summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_file.c3
-rw-r--r--fs/affs/bitmap.c28
-rw-r--r--fs/autofs4/expire.c36
-rw-r--r--fs/bio.c2
-rw-r--r--fs/btrfs/ctree.h13
-rw-r--r--fs/btrfs/disk-io.c5
-rw-r--r--fs/btrfs/file.c3
-rw-r--r--fs/btrfs/inode.c9
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/ordered-data.c2
-rw-r--r--fs/btrfs/relocation.c2
-rw-r--r--fs/btrfs/super.c45
-rw-r--r--fs/btrfs/transaction.c7
-rw-r--r--fs/btrfs/volumes.c4
-rw-r--r--fs/buffer.c28
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/ceph/addr.c3
-rw-r--r--fs/ceph/dir.c45
-rw-r--r--fs/ceph/file.c62
-rw-r--r--fs/ceph/mds_client.c23
-rw-r--r--fs/ceph/snap.c18
-rw-r--r--fs/ceph/super.c1
-rw-r--r--fs/ceph/super.h10
-rw-r--r--fs/ceph/xattr.c1
-rw-r--r--fs/cifs/cifsglob.h10
-rw-r--r--fs/cifs/cifsproto.h11
-rw-r--r--fs/cifs/cifssmb.c31
-rw-r--r--fs/cifs/inode.c302
-rw-r--r--fs/cifs/smb1ops.c24
-rw-r--r--fs/cifs/smb2inode.c39
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/smb2proto.h8
-rw-r--r--fs/compat.c10
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h24
-rw-r--r--fs/ecryptfs/file.c90
-rw-r--r--fs/ecryptfs/inode.c95
-rw-r--r--fs/ecryptfs/main.c22
-rw-r--r--fs/ecryptfs/messaging.c136
-rw-r--r--fs/ecryptfs/miscdev.c98
-rw-r--r--fs/ecryptfs/mmap.c39
-rw-r--r--fs/exec.c61
-rw-r--r--fs/exofs/inode.c27
-rw-r--r--fs/exofs/ore.c14
-rw-r--r--fs/exofs/super.c11
-rw-r--r--fs/ext2/balloc.c14
-rw-r--r--fs/ext2/ialloc.c1
-rw-r--r--fs/ext2/inode.c5
-rw-r--r--fs/ext2/super.c33
-rw-r--r--fs/ext3/balloc.c2
-rw-r--r--fs/ext3/bitmap.c12
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext3/super.c11
-rw-r--r--fs/ext4/balloc.c62
-rw-r--r--fs/ext4/bitmap.c10
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/inode.c25
-rw-r--r--fs/ext4/mmp.c6
-rw-r--r--fs/ext4/super.c48
-rw-r--r--fs/fat/dir.c255
-rw-r--r--fs/fat/fat.h15
-rw-r--r--fs/fat/file.c15
-rw-r--r--fs/fat/inode.c12
-rw-r--r--fs/fat/namei_msdos.c11
-rw-r--r--fs/fat/namei_vfat.c11
-rw-r--r--fs/fcntl.c29
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fs-writeback.c9
-rw-r--r--fs/fuse/dir.c3
-rw-r--r--fs/fuse/file.c19
-rw-r--r--fs/fuse/fuse_i.h3
-rw-r--r--fs/fuse/inode.c32
-rw-r--r--fs/gfs2/file.c18
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/trans.c4
-rw-r--r--fs/hfs/mdb.c4
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hugetlbfs/inode.c4
-rw-r--r--fs/inode.c12
-rw-r--r--fs/internal.h4
-rw-r--r--fs/jbd/journal.c4
-rw-r--r--fs/jbd2/journal.c7
-rw-r--r--fs/lockd/clntproc.c14
-rw-r--r--fs/lockd/grace.c16
-rw-r--r--fs/lockd/host.c92
-rw-r--r--fs/lockd/netns.h7
-rw-r--r--fs/lockd/svc.c43
-rw-r--r--fs/lockd/svc4proc.c14
-rw-r--r--fs/lockd/svclock.c17
-rw-r--r--fs/lockd/svcproc.c16
-rw-r--r--fs/lockd/svcsubs.c19
-rw-r--r--fs/locks.c34
-rw-r--r--fs/minix/itree_v2.c3
-rw-r--r--fs/namei.c321
-rw-r--r--fs/namespace.c97
-rw-r--r--fs/nfs/Kconfig23
-rw-r--r--fs/nfs/Makefile24
-rw-r--r--fs/nfs/blocklayout/blocklayout.c39
-rw-r--r--fs/nfs/callback.c28
-rw-r--r--fs/nfs/callback.h2
-rw-r--r--fs/nfs/callback_xdr.c4
-rw-r--r--fs/nfs/client.c874
-rw-r--r--fs/nfs/delegation.c7
-rw-r--r--fs/nfs/delegation.h21
-rw-r--r--fs/nfs/dir.c127
-rw-r--r--fs/nfs/direct.c90
-rw-r--r--fs/nfs/dns_resolve.c4
-rw-r--r--fs/nfs/file.c238
-rw-r--r--fs/nfs/getroot.c50
-rw-r--r--fs/nfs/idmap.c29
-rw-r--r--fs/nfs/inode.c110
-rw-r--r--fs/nfs/internal.h126
-rw-r--r--fs/nfs/namespace.c17
-rw-r--r--fs/nfs/netns.h2
-rw-r--r--fs/nfs/nfs.h29
-rw-r--r--fs/nfs/nfs2super.c31
-rw-r--r--fs/nfs/nfs2xdr.c28
-rw-r--r--fs/nfs/nfs3client.c65
-rw-r--r--fs/nfs/nfs3proc.c49
-rw-r--r--fs/nfs/nfs3super.c31
-rw-r--r--fs/nfs/nfs3xdr.c43
-rw-r--r--fs/nfs/nfs4_fs.h34
-rw-r--r--fs/nfs/nfs4client.c656
-rw-r--r--fs/nfs/nfs4file.c126
-rw-r--r--fs/nfs/nfs4filelayout.c10
-rw-r--r--fs/nfs/nfs4filelayoutdev.c2
-rw-r--r--fs/nfs/nfs4getroot.c49
-rw-r--r--fs/nfs/nfs4proc.c312
-rw-r--r--fs/nfs/nfs4state.c114
-rw-r--r--fs/nfs/nfs4super.c372
-rw-r--r--fs/nfs/nfs4sysctl.c68
-rw-r--r--fs/nfs/nfs4xdr.c90
-rw-r--r--fs/nfs/pagelist.c8
-rw-r--r--fs/nfs/pnfs.c57
-rw-r--r--fs/nfs/pnfs.h31
-rw-r--r--fs/nfs/proc.c41
-rw-r--r--fs/nfs/read.c27
-rw-r--r--fs/nfs/super.c594
-rw-r--r--fs/nfs/sysctl.c26
-rw-r--r--fs/nfs/unlink.c2
-rw-r--r--fs/nfs/write.c142
-rw-r--r--fs/nfsd/export.c10
-rw-r--r--fs/nfsd/netns.h4
-rw-r--r--fs/nfsd/nfs4callback.c1
-rw-r--r--fs/nfsd/nfs4idmap.c4
-rw-r--r--fs/nfsd/nfs4proc.c18
-rw-r--r--fs/nfsd/nfs4recover.c9
-rw-r--r--fs/nfsd/nfs4state.c201
-rw-r--r--fs/nfsd/nfs4xdr.c2
-rw-r--r--fs/nfsd/nfsctl.c8
-rw-r--r--fs/nfsd/nfsd.h13
-rw-r--r--fs/nfsd/nfsfh.c1
-rw-r--r--fs/nfsd/nfsproc.c9
-rw-r--r--fs/nfsd/nfssvc.c24
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/vfs.c89
-rw-r--r--fs/nfsd/vfs.h11
-rw-r--r--fs/nilfs2/alloc.h14
-rw-r--r--fs/nilfs2/bmap.h7
-rw-r--r--fs/nilfs2/btnode.h8
-rw-r--r--fs/nilfs2/cpfile.c10
-rw-r--r--fs/nilfs2/dat.c6
-rw-r--r--fs/nilfs2/export.h8
-rw-r--r--fs/nilfs2/file.c18
-rw-r--r--fs/nilfs2/ifile.c6
-rw-r--r--fs/nilfs2/inode.c7
-rw-r--r--fs/nilfs2/ioctl.c6
-rw-r--r--fs/nilfs2/mdt.h7
-rw-r--r--fs/nilfs2/nilfs.h17
-rw-r--r--fs/nilfs2/segment.c5
-rw-r--r--fs/nilfs2/sufile.c8
-rw-r--r--fs/nilfs2/super.c10
-rw-r--r--fs/nilfs2/the_nilfs.c1
-rw-r--r--fs/nilfs2/the_nilfs.h8
-rw-r--r--fs/ntfs/file.c3
-rw-r--r--fs/ntfs/super.c17
-rw-r--r--fs/ocfs2/file.c11
-rw-r--r--fs/ocfs2/ioctl.c14
-rw-r--r--fs/ocfs2/journal.c7
-rw-r--r--fs/ocfs2/localalloc.c8
-rw-r--r--fs/ocfs2/mmap.c2
-rw-r--r--fs/ocfs2/refcounttree.c11
-rw-r--r--fs/open.c24
-rw-r--r--fs/pipe.c75
-rw-r--r--fs/proc/base.c22
-rw-r--r--fs/qnx4/bitmap.c24
-rw-r--r--fs/splice.c3
-rw-r--r--fs/super.c296
-rw-r--r--fs/sysfs/bin.c2
-rw-r--r--fs/ubifs/file.c10
-rw-r--r--fs/ubifs/super.c2
-rw-r--r--fs/xattr.c16
-rw-r--r--fs/xfs/xfs_alloc_btree.h14
-rw-r--r--fs/xfs/xfs_aops.c97
-rw-r--r--fs/xfs/xfs_aops.h14
-rw-r--r--fs/xfs/xfs_attr.c78
-rw-r--r--fs/xfs/xfs_attr_leaf.c255
-rw-r--r--fs/xfs/xfs_attr_leaf.h21
-rw-r--r--fs/xfs/xfs_bmap.c2
-rw-r--r--fs/xfs/xfs_buf.c240
-rw-r--r--fs/xfs/xfs_buf.h116
-rw-r--r--fs/xfs/xfs_buf_item.c345
-rw-r--r--fs/xfs/xfs_buf_item.h38
-rw-r--r--fs/xfs/xfs_da_btree.c823
-rw-r--r--fs/xfs/xfs_da_btree.h38
-rw-r--r--fs/xfs/xfs_dinode.h2
-rw-r--r--fs/xfs/xfs_dir2.c4
-rw-r--r--fs/xfs/xfs_dir2_block.c118
-rw-r--r--fs/xfs/xfs_dir2_data.c50
-rw-r--r--fs/xfs/xfs_dir2_leaf.c621
-rw-r--r--fs/xfs/xfs_dir2_node.c236
-rw-r--r--fs/xfs/xfs_dir2_priv.h46
-rw-r--r--fs/xfs/xfs_dir2_sf.c4
-rw-r--r--fs/xfs/xfs_file.c29
-rw-r--r--fs/xfs/xfs_ialloc.c446
-rw-r--r--fs/xfs/xfs_ialloc.h2
-rw-r--r--fs/xfs/xfs_iget.c15
-rw-r--r--fs/xfs/xfs_inode.c208
-rw-r--r--fs/xfs/xfs_inode.h13
-rw-r--r--fs/xfs/xfs_ioctl.c55
-rw-r--r--fs/xfs/xfs_ioctl32.c12
-rw-r--r--fs/xfs/xfs_iomap.c10
-rw-r--r--fs/xfs/xfs_iops.c45
-rw-r--r--fs/xfs/xfs_itable.c2
-rw-r--r--fs/xfs/xfs_log.c223
-rw-r--r--fs/xfs/xfs_log_priv.h18
-rw-r--r--fs/xfs/xfs_log_recover.c142
-rw-r--r--fs/xfs/xfs_mount.c13
-rw-r--r--fs/xfs/xfs_mount.h6
-rw-r--r--fs/xfs/xfs_qm.c2
-rw-r--r--fs/xfs/xfs_super.c88
-rw-r--r--fs/xfs/xfs_sync.c15
-rw-r--r--fs/xfs/xfs_trace.h2
-rw-r--r--fs/xfs/xfs_trans.c17
-rw-r--r--fs/xfs/xfs_trans.h52
-rw-r--r--fs/xfs/xfs_trans_ail.c35
-rw-r--r--fs/xfs/xfs_trans_buf.c68
-rw-r--r--fs/xfs/xfs_trans_priv.h1
-rw-r--r--fs/xfs/xfs_types.h14
-rw-r--r--fs/xfs/xfs_utils.c17
-rw-r--r--fs/xfs/xfs_vnodeops.c285
240 files changed, 7556 insertions, 6222 deletions
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index fc06fd27065e..dd6f7ee1e312 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -610,6 +610,9 @@ v9fs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
p9_debug(P9_DEBUG_VFS, "page %p fid %lx\n",
page, (unsigned long)filp->private_data);
+ /* Update file times before taking page lock */
+ file_update_time(filp);
+
v9inode = V9FS_I(inode);
/* make sure the cache has finished storing the page */
v9fs_fscache_wait_on_page_write(inode, page);
diff --git a/fs/affs/bitmap.c b/fs/affs/bitmap.c
index 6e0be43ef6ef..a32246b8359e 100644
--- a/fs/affs/bitmap.c
+++ b/fs/affs/bitmap.c
@@ -10,30 +10,6 @@
#include <linux/slab.h>
#include "affs.h"
-/* This is, of course, shamelessly stolen from fs/minix */
-
-static const int nibblemap[] = { 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4 };
-
-static u32
-affs_count_free_bits(u32 blocksize, const void *data)
-{
- const u32 *map;
- u32 free;
- u32 tmp;
-
- map = data;
- free = 0;
- for (blocksize /= 4; blocksize > 0; blocksize--) {
- tmp = *map++;
- while (tmp) {
- free += nibblemap[tmp & 0xf];
- tmp >>= 4;
- }
- }
-
- return free;
-}
-
u32
affs_count_free_blocks(struct super_block *sb)
{
@@ -317,7 +293,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
goto out;
}
pr_debug("AFFS: read bitmap block %d: %d\n", blk, bm->bm_key);
- bm->bm_free = affs_count_free_bits(sb->s_blocksize - 4, bh->b_data + 4);
+ bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4);
/* Don't try read the extension if this is the last block,
* but we also need the right bm pointer below
@@ -367,7 +343,7 @@ int affs_init_bitmap(struct super_block *sb, int *flags)
/* recalculate bitmap count for last block */
bm--;
- bm->bm_free = affs_count_free_bits(sb->s_blocksize - 4, bh->b_data + 4);
+ bm->bm_free = memweight(bh->b_data + 4, sb->s_blocksize - 4);
out:
affs_brelse(bh);
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 1feb68ecef95..842d00048a65 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -94,25 +94,21 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev,
{
struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
struct list_head *next;
- struct dentry *p, *q;
+ struct dentry *q;
spin_lock(&sbi->lookup_lock);
+ spin_lock(&root->d_lock);
- if (prev == NULL) {
- spin_lock(&root->d_lock);
+ if (prev)
+ next = prev->d_u.d_child.next;
+ else {
prev = dget_dlock(root);
next = prev->d_subdirs.next;
- p = prev;
- goto start;
}
- p = prev;
- spin_lock(&p->d_lock);
-again:
- next = p->d_u.d_child.next;
-start:
+cont:
if (next == &root->d_subdirs) {
- spin_unlock(&p->d_lock);
+ spin_unlock(&root->d_lock);
spin_unlock(&sbi->lookup_lock);
dput(prev);
return NULL;
@@ -121,16 +117,15 @@ start:
q = list_entry(next, struct dentry, d_u.d_child);
spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
- /* Negative dentry - try next */
- if (!simple_positive(q)) {
- spin_unlock(&p->d_lock);
- lock_set_subclass(&q->d_lock.dep_map, 0, _RET_IP_);
- p = q;
- goto again;
+ /* Already gone or negative dentry (under construction) - try next */
+ if (q->d_count == 0 || !simple_positive(q)) {
+ spin_unlock(&q->d_lock);
+ next = q->d_u.d_child.next;
+ goto cont;
}
dget_dlock(q);
spin_unlock(&q->d_lock);
- spin_unlock(&p->d_lock);
+ spin_unlock(&root->d_lock);
spin_unlock(&sbi->lookup_lock);
dput(prev);
@@ -404,11 +399,6 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
DPRINTK("checking mountpoint %p %.*s",
dentry, (int)dentry->d_name.len, dentry->d_name.name);
- /* Path walk currently on this dentry? */
- ino_count = atomic_read(&ino->count) + 2;
- if (dentry->d_count > ino_count)
- goto next;
-
/* Can we umount this guy */
if (autofs4_mount_busy(mnt, dentry))
goto next;
diff --git a/fs/bio.c b/fs/bio.c
index 73922abba832..5eaa70c9d96e 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -1312,7 +1312,7 @@ EXPORT_SYMBOL(bio_copy_kern);
* Note that this code is very hard to test under normal circumstances because
* direct-io pins the pages with get_user_pages(). This makes
* is_page_cache_freeable return false, and the VM will not clean the pages.
- * But other code (eg, pdflush) could clean the pages if they are mapped
+ * But other code (eg, flusher threads) could clean the pages if they are mapped
* pagecache.
*
* Simply disabling the call to bio_set_pages_dirty() is a good way to test the
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index adb1cd7ceb9b..4bab807227ad 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3342,10 +3342,22 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
int btrfs_parse_options(struct btrfs_root *root, char *options);
int btrfs_sync_fs(struct super_block *sb, int wait);
+
+#ifdef CONFIG_PRINTK
+__printf(2, 3)
void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...);
+#else
+static inline __printf(2, 3)
+void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
+{
+}
+#endif
+
+__printf(5, 6)
void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
+
void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
struct btrfs_root *root, const char *function,
unsigned int line, int errno);
@@ -3386,6 +3398,7 @@ do { \
(errno), fmt, ##args); \
} while (0)
+__printf(5, 6)
void __btrfs_panic(struct btrfs_fs_info *fs_info, const char *function,
unsigned int line, int errno, const char *fmt, ...);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 502b20c56e84..62e0cafd6e25 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1114,7 +1114,7 @@ void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
spin_unlock(&root->fs_info->delalloc_lock);
btrfs_panic(root->fs_info, -EOVERFLOW,
"Can't clear %lu bytes from "
- " dirty_mdatadata_bytes (%lu)",
+ " dirty_mdatadata_bytes (%llu)",
buf->len,
root->fs_info->dirty_metadata_bytes);
}
@@ -1614,8 +1614,6 @@ static int cleaner_kthread(void *arg)
struct btrfs_root *root = arg;
do {
- vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
-
if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
mutex_trylock(&root->fs_info->cleaner_mutex)) {
btrfs_run_delayed_iputs(root);
@@ -1647,7 +1645,6 @@ static int transaction_kthread(void *arg)
do {
cannot_commit = false;
delay = HZ * 30;
- vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE);
mutex_lock(&root->fs_info->transaction_kthread_mutex);
spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9aa01ec2138d..5caf285c6e4d 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1379,7 +1379,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
ssize_t err = 0;
size_t count, ocount;
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+ sb_start_write(inode->i_sb);
mutex_lock(&inode->i_mutex);
@@ -1469,6 +1469,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
num_written = err;
}
out:
+ sb_end_write(inode->i_sb);
current->backing_dev_info = NULL;
return num_written ? num_written : err;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 48bdfd2591c2..6e8f416773d4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -324,7 +324,8 @@ static noinline int add_async_extent(struct async_cow *cow,
* If this code finds it can't get good compression, it puts an
* entry onto the work queue to write the uncompressed bytes. This
* makes sure that both compressed inodes and uncompressed inodes
- * are written in the same order that pdflush sent them down.
+ * are written in the same order that the flusher thread sent them
+ * down.
*/
static noinline int compress_file_range(struct inode *inode,
struct page *locked_page,
@@ -6629,6 +6630,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
u64 page_start;
u64 page_end;
+ sb_start_pagefault(inode->i_sb);
ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE);
if (!ret) {
ret = file_update_time(vma->vm_file);
@@ -6718,12 +6720,15 @@ again:
unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS);
out_unlock:
- if (!ret)
+ if (!ret) {
+ sb_end_pagefault(inode->i_sb);
return VM_FAULT_LOCKED;
+ }
unlock_page(page);
out:
btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE);
out_noreserve:
+ sb_end_pagefault(inode->i_sb);
return ret;
}
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 43f0012016e3..7bb755677a22 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -195,6 +195,10 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
if (!inode_owner_or_capable(inode))
return -EACCES;
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+
mutex_lock(&inode->i_mutex);
ip_oldflags = ip->flags;
@@ -209,10 +213,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
}
}
- ret = mnt_want_write_file(file);
- if (ret)
- goto out_unlock;
-
if (flags & FS_SYNC_FL)
ip->flags |= BTRFS_INODE_SYNC;
else
@@ -275,9 +275,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
inode->i_flags = i_oldflags;
}
- mnt_drop_write_file(file);
out_unlock:
mutex_unlock(&inode->i_mutex);
+ mnt_drop_write_file(file);
return ret;
}
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 643335a4fe3c..051c7fe551dd 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -596,7 +596,7 @@ void btrfs_start_ordered_extent(struct inode *inode,
/*
* pages in the range can be dirty, clean or writeback. We
* start IO on any dirty ones so the wait doesn't stall waiting
- * for pdflush to find them
+ * for the flusher thread to find them
*/
if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags))
filemap_fdatawrite_range(inode->i_mapping, start, end);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index c5dbd9149679..4da08652004d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1241,7 +1241,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root)
if (rb_node) {
btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found "
"for start=%llu while inserting into relocation "
- "tree\n");
+ "tree\n", node->bytenr);
kfree(node);
return -EEXIST;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index fa61ef59cd61..f2eb24c477a3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -100,10 +100,6 @@ static void __save_error_info(struct btrfs_fs_info *fs_info)
fs_info->fs_state = BTRFS_SUPER_FLAG_ERROR;
}
-/* NOTE:
- * We move write_super stuff at umount in order to avoid deadlock
- * for umount hold all lock.
- */
static void save_error_info(struct btrfs_fs_info *fs_info)
{
__save_error_info(fs_info);
@@ -125,6 +121,7 @@ static void btrfs_handle_error(struct btrfs_fs_info *fs_info)
}
}
+#ifdef CONFIG_PRINTK
/*
* __btrfs_std_error decodes expected errors from the caller and
* invokes the approciate error response.
@@ -167,7 +164,7 @@ void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
va_end(args);
}
-const char *logtypes[] = {
+static const char * const logtypes[] = {
"emergency",
"alert",
"critical",
@@ -185,21 +182,49 @@ void btrfs_printk(struct btrfs_fs_info *fs_info, const char *fmt, ...)
struct va_format vaf;
va_list args;
const char *type = logtypes[4];
+ int kern_level;
va_start(args, fmt);
- if (fmt[0] == '<' && isdigit(fmt[1]) && fmt[2] == '>') {
- memcpy(lvl, fmt, 3);
- lvl[3] = '\0';
- fmt += 3;
- type = logtypes[fmt[1] - '0'];
+ kern_level = printk_get_level(fmt);
+ if (kern_level) {
+ size_t size = printk_skip_level(fmt) - fmt;
+ memcpy(lvl, fmt, size);
+ lvl[size] = '\0';
+ fmt += size;
+ type = logtypes[kern_level - '0'];
} else
*lvl = '\0';
vaf.fmt = fmt;
vaf.va = &args;
+
printk("%sBTRFS %s (device %s): %pV", lvl, type, sb->s_id, &vaf);
+
+ va_end(args);
+}
+
+#else
+
+void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
+ unsigned int line, int errno, const char *fmt, ...)
+{
+ struct super_block *sb = fs_info->sb;
+
+ /*
+ * Special case: if the error is EROFS, and we're already
+ * under MS_RDONLY, then it is safe here.
+ */
+ if (errno == -EROFS && (sb->s_flags & MS_RDONLY))
+ return;
+
+ /* Don't go through full error handling during mount */
+ if (sb->s_flags & MS_BORN) {
+ save_error_info(fs_info);
+ btrfs_handle_error(fs_info);
+ }
}
+#endif
/*
* We only mark the transaction aborted and then set the file system read-only.
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 7ac7cdcc294e..17be3dedacba 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -335,6 +335,8 @@ again:
if (!h)
return ERR_PTR(-ENOMEM);
+ sb_start_intwrite(root->fs_info->sb);
+
if (may_wait_transaction(root, type))
wait_current_trans(root);
@@ -345,6 +347,7 @@ again:
} while (ret == -EBUSY);
if (ret < 0) {
+ sb_end_intwrite(root->fs_info->sb);
kmem_cache_free(btrfs_trans_handle_cachep, h);
return ERR_PTR(ret);
}
@@ -548,6 +551,8 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
btrfs_trans_release_metadata(trans, root);
trans->block_rsv = NULL;
+ sb_end_intwrite(root->fs_info->sb);
+
if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) &&
should_end_transaction(trans, root)) {
trans->transaction->blocked = 1;
@@ -1578,6 +1583,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
put_transaction(cur_trans);
put_transaction(cur_trans);
+ sb_end_intwrite(root->fs_info->sb);
+
trace_btrfs_transaction_commit(root);
btrfs_scrub_continue(root);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b8708f994e67..e86ae04abe6a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1744,10 +1744,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
device->fs_devices = root->fs_info->fs_devices;
- /*
- * we don't want write_supers to jump in here with our device
- * half setup
- */
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
list_add_rcu(&device->dev_list, &root->fs_info->fs_devices->devices);
list_add(&device->dev_alloc_list,
diff --git a/fs/buffer.c b/fs/buffer.c
index c7062c896d7c..9f6d2e41281d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2306,8 +2306,8 @@ EXPORT_SYMBOL(block_commit_write);
* beyond EOF, then the page is guaranteed safe against truncation until we
* unlock the page.
*
- * Direct callers of this function should call vfs_check_frozen() so that page
- * fault does not busyloop until the fs is thawed.
+ * Direct callers of this function should protect against filesystem freezing
+ * using sb_start_write() - sb_end_write() functions.
*/
int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
@@ -2318,6 +2318,12 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
loff_t size;
int ret;
+ /*
+ * Update file times before taking page lock. We may end up failing the
+ * fault so this update may be superfluous but who really cares...
+ */
+ file_update_time(vma->vm_file);
+
lock_page(page);
size = i_size_read(inode);
if ((page->mapping != inode->i_mapping) ||
@@ -2339,18 +2345,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
if (unlikely(ret < 0))
goto out_unlock;
- /*
- * Freezing in progress? We check after the page is marked dirty and
- * with page lock held so if the test here fails, we are sure freezing
- * code will wait during syncing until the page fault is done - at that
- * point page will be dirty and unlocked so freezing code will write it
- * and writeprotect it again.
- */
set_page_dirty(page);
- if (inode->i_sb->s_frozen != SB_UNFROZEN) {
- ret = -EAGAIN;
- goto out_unlock;
- }
wait_on_page_writeback(page);
return 0;
out_unlock:
@@ -2365,12 +2360,9 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
int ret;
struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
- /*
- * This check is racy but catches the common case. The check in
- * __block_page_mkwrite() is reliable.
- */
- vfs_check_frozen(sb, SB_FREEZE_WRITE);
+ sb_start_pagefault(sb);
ret = __block_page_mkwrite(vma, vmf, get_block);
+ sb_end_pagefault(sb);
return block_page_mkwrite_return(ret);
}
EXPORT_SYMBOL(block_page_mkwrite);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c0353dfac51f..c994691d9445 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -919,7 +919,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
* own time */
path.mnt = cache->mnt;
path.dentry = object->backer;
- file = dentry_open(&path, O_RDWR, cache->cache_cred);
+ file = dentry_open(&path, O_RDWR | O_LARGEFILE, cache->cache_cred);
if (IS_ERR(file)) {
ret = PTR_ERR(file);
} else {
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8b67304e4b80..452e71a1b753 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1184,6 +1184,9 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
loff_t size, len;
int ret;
+ /* Update time before taking page lock */
+ file_update_time(vma->vm_file);
+
size = i_size_read(inode);
if (off + PAGE_CACHE_SIZE <= size)
len = PAGE_CACHE_SIZE;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 00894ff9246c..e5b77319c97b 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -51,8 +51,7 @@ int ceph_init_dentry(struct dentry *dentry)
goto out_unlock;
}
- if (dentry->d_parent == NULL || /* nfs fh_to_dentry */
- ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+ if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
d_set_d_op(dentry, &ceph_dentry_ops);
else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
d_set_d_op(dentry, &ceph_snapdir_dentry_ops);
@@ -79,7 +78,7 @@ struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
return NULL;
spin_lock(&dentry->d_lock);
- if (dentry->d_parent) {
+ if (!IS_ROOT(dentry)) {
inode = dentry->d_parent->d_inode;
ihold(inode);
}
@@ -634,44 +633,6 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
return dentry;
}
-int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned flags, umode_t mode,
- int *opened)
-{
- int err;
- struct dentry *res = NULL;
-
- if (!(flags & O_CREAT)) {
- if (dentry->d_name.len > NAME_MAX)
- return -ENAMETOOLONG;
-
- err = ceph_init_dentry(dentry);
- if (err < 0)
- return err;
-
- return ceph_lookup_open(dir, dentry, file, flags, mode, opened);
- }
-
- if (d_unhashed(dentry)) {
- res = ceph_lookup(dir, dentry, 0);
- if (IS_ERR(res))
- return PTR_ERR(res);
-
- if (res)
- dentry = res;
- }
-
- /* We don't deal with positive dentries here */
- if (dentry->d_inode)
- return finish_no_open(file, res);
-
- *opened |= FILE_CREATED;
- err = ceph_lookup_open(dir, dentry, file, flags, mode, opened);
- dput(res);
-
- return err;
-}
-
/*
* If we do a create but get no trace back from the MDS, follow up with
* a lookup (the VFS expects us to link up the provided dentry).
@@ -1154,7 +1115,7 @@ static void ceph_d_prune(struct dentry *dentry)
dout("ceph_d_prune %p\n", dentry);
/* do we have a valid parent? */
- if (!dentry->d_parent || IS_ROOT(dentry))
+ if (IS_ROOT(dentry))
return;
/* if we are not hashed, we don't affect D_COMPLETE */
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 1b81d6c31878..ecebbc09bfc7 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/writeback.h>
@@ -106,9 +107,6 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
}
/*
- * If the filp already has private_data, that means the file was
- * already opened by intent during lookup, and we do nothing.
- *
* If we already have the requisite capabilities, we can satisfy
* the open request locally (no need to request new caps from the
* MDS). We do, however, need to inform the MDS (asynchronously)
@@ -207,24 +205,29 @@ out:
/*
- * Do a lookup + open with a single request.
- *
- * If this succeeds, but some subsequent check in the vfs
- * may_open() fails, the struct *file gets cleaned up (i.e.
- * ceph_release gets called). So fear not!
+ * Do a lookup + open with a single request. If we get a non-existent
+ * file or symlink, return 1 so the VFS can retry.
*/
-int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
+int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags, umode_t mode,
int *opened)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
- struct dentry *ret;
+ struct dentry *dn;
int err;
- dout("ceph_lookup_open dentry %p '%.*s' flags %d mode 0%o\n",
- dentry, dentry->d_name.len, dentry->d_name.name, flags, mode);
+ dout("atomic_open %p dentry %p '%.*s' %s flags %d mode 0%o\n",
+ dir, dentry, dentry->d_name.len, dentry->d_name.name,
+ d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
+
+ if (dentry->d_name.len > NAME_MAX)
+ return -ENAMETOOLONG;
+
+ err = ceph_init_dentry(dentry);
+ if (err < 0)
+ return err;
/* do the open */
req = prepare_open_request(dir->i_sb, flags, mode);
@@ -241,22 +244,31 @@ int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
err = ceph_handle_snapdir(req, dentry, err);
- if (err)
- goto out;
- if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
+ if (err == 0 && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
- if (err)
- goto out;
- err = finish_open(file, req->r_dentry, ceph_open, opened);
-out:
- ret = ceph_finish_lookup(req, dentry, err);
- ceph_mdsc_put_request(req);
- dout("ceph_lookup_open result=%p\n", ret);
- if (IS_ERR(ret))
- return PTR_ERR(ret);
+ if (d_unhashed(dentry)) {
+ dn = ceph_finish_lookup(req, dentry, err);
+ if (IS_ERR(dn))
+ err = PTR_ERR(dn);
+ } else {
+ /* we were given a hashed negative dentry */
+ dn = NULL;
+ }
+ if (err)
+ goto out_err;
+ if (dn || dentry->d_inode == NULL || S_ISLNK(dentry->d_inode->i_mode)) {
+ /* make vfs retry on splice, ENOENT, or symlink */
+ dout("atomic_open finish_no_open on dn %p\n", dn);
+ err = finish_no_open(file, dn);
+ } else {
+ dout("atomic_open finish_open on dn %p\n", dn);
+ err = finish_open(file, dentry, ceph_open, opened);
+ }
- dput(ret);
+out_err:
+ ceph_mdsc_put_request(req);
+ dout("atomic_open result=%d\n", err);
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 200bc87eceb1..a5a735422aa7 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -10,6 +10,7 @@
#include "super.h"
#include "mds_client.h"
+#include <linux/ceph/ceph_features.h>
#include <linux/ceph/messenger.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/pagelist.h>
@@ -394,11 +395,7 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
s->s_seq = 0;
mutex_init(&s->s_mutex);
- ceph_con_init(mdsc->fsc->client->msgr, &s->s_con);
- s->s_con.private = s;
- s->s_con.ops = &mds_con_ops;
- s->s_con.peer_name.type = CEPH_ENTITY_TYPE_MDS;
- s->s_con.peer_name.num = cpu_to_le64(mds);
+ ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
spin_lock_init(&s->s_gen_ttl_lock);
s->s_cap_gen = 0;
@@ -440,7 +437,8 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
mdsc->sessions[mds] = s;
atomic_inc(&s->s_ref); /* one ref to sessions[], one to caller */
- ceph_con_open(&s->s_con, ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
+ ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
+ ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
return s;
@@ -1472,11 +1470,6 @@ retry:
else
len += 1 + temp->d_name.len;
temp = temp->d_parent;
- if (temp == NULL) {
- rcu_read_unlock();
- pr_err("build_path corrupt dentry %p\n", dentry);
- return ERR_PTR(-EINVAL);
- }
}
rcu_read_unlock();
if (len)
@@ -1513,12 +1506,6 @@ retry:
if (pos)
path[--pos] = '/';
temp = temp->d_parent;
- if (temp == NULL) {
- rcu_read_unlock();
- pr_err("build_path corrupt dentry\n");
- kfree(path);
- return ERR_PTR(-EINVAL);
- }
}
rcu_read_unlock();
if (pos != 0 || read_seqretry(&rename_lock, seq)) {
@@ -2531,7 +2518,9 @@ static void send_mds_reconnect(struct ceph_mds_client *mdsc,
session->s_state = CEPH_MDS_SESSION_RECONNECTING;
session->s_seq = 0;
+ ceph_con_close(&session->s_con);
ceph_con_open(&session->s_con,
+ CEPH_ENTITY_TYPE_MDS, mds,
ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
/* replay unsafe requests */
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index e5206fc76562..cbb2f54a3019 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -296,8 +296,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
struct ceph_snap_realm *parent = realm->parent;
struct ceph_snap_context *snapc;
int err = 0;
- int i;
- int num = realm->num_prior_parent_snaps + realm->num_snaps;
+ u32 num = realm->num_prior_parent_snaps + realm->num_snaps;
/*
* build parent context, if it hasn't been built.
@@ -321,11 +320,11 @@ static int build_snap_context(struct ceph_snap_realm *realm)
realm->cached_context->seq == realm->seq &&
(!parent ||
realm->cached_context->seq >= parent->cached_context->seq)) {
- dout("build_snap_context %llx %p: %p seq %lld (%d snaps)"
+ dout("build_snap_context %llx %p: %p seq %lld (%u snaps)"
" (unchanged)\n",
realm->ino, realm, realm->cached_context,
realm->cached_context->seq,
- realm->cached_context->num_snaps);
+ (unsigned int) realm->cached_context->num_snaps);
return 0;
}
@@ -342,6 +341,8 @@ static int build_snap_context(struct ceph_snap_realm *realm)
num = 0;
snapc->seq = realm->seq;
if (parent) {
+ u32 i;
+
/* include any of parent's snaps occurring _after_ my
parent became my parent */
for (i = 0; i < parent->cached_context->num_snaps; i++)
@@ -361,8 +362,9 @@ static int build_snap_context(struct ceph_snap_realm *realm)
sort(snapc->snaps, num, sizeof(u64), cmpu64_rev, NULL);
snapc->num_snaps = num;
- dout("build_snap_context %llx %p: %p seq %lld (%d snaps)\n",
- realm->ino, realm, snapc, snapc->seq, snapc->num_snaps);
+ dout("build_snap_context %llx %p: %p seq %lld (%u snaps)\n",
+ realm->ino, realm, snapc, snapc->seq,
+ (unsigned int) snapc->num_snaps);
if (realm->cached_context)
ceph_put_snap_context(realm->cached_context);
@@ -402,9 +404,9 @@ static void rebuild_snap_realms(struct ceph_snap_realm *realm)
* helper to allocate and decode an array of snapids. free prior
* instance, if any.
*/
-static int dup_array(u64 **dst, __le64 *src, int num)
+static int dup_array(u64 **dst, __le64 *src, u32 num)
{
- int i;
+ u32 i;
kfree(*dst);
if (num) {
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7076109f014d..b982239f38f9 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -18,6 +18,7 @@
#include "super.h"
#include "mds_client.h"
+#include <linux/ceph/ceph_features.h>
#include <linux/ceph/decode.h>
#include <linux/ceph/mon_client.h>
#include <linux/ceph/auth.h>
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index f4d5522cb619..66ebe720e40d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -612,9 +612,9 @@ struct ceph_snap_realm {
u64 parent_since; /* snapid when our current parent became so */
u64 *prior_parent_snaps; /* snaps inherited from any parents we */
- int num_prior_parent_snaps; /* had prior to parent_since */
+ u32 num_prior_parent_snaps; /* had prior to parent_since */
u64 *snaps; /* snaps specific to this realm */
- int num_snaps;
+ u32 num_snaps;
struct ceph_snap_realm *parent;
struct list_head children; /* list of child realms */
@@ -806,9 +806,9 @@ extern int ceph_copy_from_page_vector(struct page **pages,
loff_t off, size_t len);
extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
extern int ceph_open(struct inode *inode, struct file *file);
-extern int ceph_lookup_open(struct inode *dir, struct dentry *dentry,
- struct file *od, unsigned flags,
- umode_t mode, int *opened);
+extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned flags, umode_t mode,
+ int *opened);
extern int ceph_release(struct inode *inode, struct file *filp);
/* dir.c */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 785cb3057c95..2c2ae5be9902 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -457,6 +457,7 @@ start:
for (i = 0; i < numattr; i++)
kfree(xattrs[i]);
kfree(xattrs);
+ xattrs = NULL;
goto start;
}
err = -EIO;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 497da5ce704c..977dc0e85ccb 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -246,6 +246,16 @@ struct smb_version_operations {
bool (*can_echo)(struct TCP_Server_Info *);
/* send echo request */
int (*echo)(struct TCP_Server_Info *);
+ /* create directory */
+ int (*mkdir)(const unsigned int, struct cifs_tcon *, const char *,
+ struct cifs_sb_info *);
+ /* set info on created directory */
+ void (*mkdir_setinfo)(struct inode *, const char *,
+ struct cifs_sb_info *, struct cifs_tcon *,
+ const unsigned int);
+ /* remove directory */
+ int (*rmdir)(const unsigned int, struct cifs_tcon *, const char *,
+ struct cifs_sb_info *);
};
struct smb_version_values {
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index cf7fb185103c..f1bbf8305d3a 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -289,18 +289,15 @@ extern int CIFSSMBUnixSetFileInfo(const unsigned int xid,
u16 fid, u32 pid_of_opener);
extern int CIFSSMBUnixSetPathInfo(const unsigned int xid,
- struct cifs_tcon *tcon, char *file_name,
+ struct cifs_tcon *tcon, const char *file_name,
const struct cifs_unix_set_info_args *args,
const struct nls_table *nls_codepage,
- int remap_special_chars);
+ int remap);
extern int CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon,
- const char *newName,
- const struct nls_table *nls_codepage,
- int remap_special_chars);
+ const char *name, struct cifs_sb_info *cifs_sb);
extern int CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon,
- const char *name, const struct nls_table *nls_codepage,
- int remap_special_chars);
+ const char *name, struct cifs_sb_info *cifs_sb);
extern int CIFSPOSIXDelFile(const unsigned int xid, struct cifs_tcon *tcon,
const char *name, __u16 type,
const struct nls_table *nls_codepage,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index cabc7a01f5df..074923ce593d 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -948,15 +948,15 @@ DelFileRetry:
}
int
-CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon,
- const char *dirName, const struct nls_table *nls_codepage,
- int remap)
+CIFSSMBRmDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+ struct cifs_sb_info *cifs_sb)
{
DELETE_DIRECTORY_REQ *pSMB = NULL;
DELETE_DIRECTORY_RSP *pSMBr = NULL;
int rc = 0;
int bytes_returned;
int name_len;
+ int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
cFYI(1, "In CIFSSMBRmDir");
RmDirRetry:
@@ -966,14 +966,15 @@ RmDirRetry:
return rc;
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
- name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, dirName,
- PATH_MAX, nls_codepage, remap);
+ name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name,
+ PATH_MAX, cifs_sb->local_nls,
+ remap);
name_len++; /* trailing null */
name_len *= 2;
} else { /* BB improve check for buffer overruns BB */
- name_len = strnlen(dirName, PATH_MAX);
+ name_len = strnlen(name, PATH_MAX);
name_len++; /* trailing null */
- strncpy(pSMB->DirName, dirName, name_len);
+ strncpy(pSMB->DirName, name, name_len);
}
pSMB->BufferFormat = 0x04;
@@ -992,14 +993,15 @@ RmDirRetry:
}
int
-CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon,
- const char *name, const struct nls_table *nls_codepage, int remap)
+CIFSSMBMkDir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+ struct cifs_sb_info *cifs_sb)
{
int rc = 0;
CREATE_DIRECTORY_REQ *pSMB = NULL;
CREATE_DIRECTORY_RSP *pSMBr = NULL;
int bytes_returned;
int name_len;
+ int remap = cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR;
cFYI(1, "In CIFSSMBMkDir");
MkDirRetry:
@@ -1010,7 +1012,8 @@ MkDirRetry:
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
name_len = cifsConvertToUTF16((__le16 *) pSMB->DirName, name,
- PATH_MAX, nls_codepage, remap);
+ PATH_MAX, cifs_sb->local_nls,
+ remap);
name_len++; /* trailing null */
name_len *= 2;
} else { /* BB improve check for buffer overruns BB */
@@ -5943,7 +5946,7 @@ CIFSSMBUnixSetFileInfo(const unsigned int xid, struct cifs_tcon *tcon,
int
CIFSSMBUnixSetPathInfo(const unsigned int xid, struct cifs_tcon *tcon,
- char *fileName,
+ const char *file_name,
const struct cifs_unix_set_info_args *args,
const struct nls_table *nls_codepage, int remap)
{
@@ -5964,14 +5967,14 @@ setPermsRetry:
if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) {
name_len =
- cifsConvertToUTF16((__le16 *) pSMB->FileName, fileName,
+ cifsConvertToUTF16((__le16 *) pSMB->FileName, file_name,
PATH_MAX, nls_codepage, remap);
name_len++; /* trailing null */
name_len *= 2;
} else { /* BB improve the check for buffer overruns BB */
- name_len = strnlen(fileName, PATH_MAX);
+ name_len = strnlen(file_name, PATH_MAX);
name_len++; /* trailing null */
- strncpy(pSMB->FileName, fileName, name_len);
+ strncpy(pSMB->FileName, file_name, name_len);
}
params = 6 + name_len;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 35cb6a374a45..7354877fa3bd 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1219,16 +1219,153 @@ unlink_out:
return rc;
}
+static int
+cifs_mkdir_qinfo(struct inode *inode, struct dentry *dentry, umode_t mode,
+ const char *full_path, struct cifs_sb_info *cifs_sb,
+ struct cifs_tcon *tcon, const unsigned int xid)
+{
+ int rc = 0;
+ struct inode *newinode = NULL;
+
+ if (tcon->unix_ext)
+ rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb,
+ xid);
+ else
+ rc = cifs_get_inode_info(&newinode, full_path, NULL,
+ inode->i_sb, xid, NULL);
+ if (rc)
+ return rc;
+
+ d_instantiate(dentry, newinode);
+ /*
+ * setting nlink not necessary except in cases where we failed to get it
+ * from the server or was set bogus
+ */
+ if ((dentry->d_inode) && (dentry->d_inode->i_nlink < 2))
+ set_nlink(dentry->d_inode, 2);
+
+ mode &= ~current_umask();
+ /* must turn on setgid bit if parent dir has it */
+ if (inode->i_mode & S_ISGID)
+ mode |= S_ISGID;
+
+ if (tcon->unix_ext) {
+ struct cifs_unix_set_info_args args = {
+ .mode = mode,
+ .ctime = NO_CHANGE_64,
+ .atime = NO_CHANGE_64,
+ .mtime = NO_CHANGE_64,
+ .device = 0,
+ };
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+ args.uid = (__u64)current_fsuid();
+ if (inode->i_mode & S_ISGID)
+ args.gid = (__u64)inode->i_gid;
+ else
+ args.gid = (__u64)current_fsgid();
+ } else {
+ args.uid = NO_CHANGE_64;
+ args.gid = NO_CHANGE_64;
+ }
+ CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ } else {
+ struct TCP_Server_Info *server = tcon->ses->server;
+ if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
+ (mode & S_IWUGO) == 0 && server->ops->mkdir_setinfo)
+ server->ops->mkdir_setinfo(newinode, full_path, cifs_sb,
+ tcon, xid);
+ if (dentry->d_inode) {
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)
+ dentry->d_inode->i_mode = (mode | S_IFDIR);
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
+ dentry->d_inode->i_uid = current_fsuid();
+ if (inode->i_mode & S_ISGID)
+ dentry->d_inode->i_gid = inode->i_gid;
+ else
+ dentry->d_inode->i_gid =
+ current_fsgid();
+ }
+ }
+ }
+ return rc;
+}
+
+static int
+cifs_posix_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode,
+ const char *full_path, struct cifs_sb_info *cifs_sb,
+ struct cifs_tcon *tcon, const unsigned int xid)
+{
+ int rc = 0;
+ u32 oplock = 0;
+ FILE_UNIX_BASIC_INFO *info = NULL;
+ struct inode *newinode = NULL;
+ struct cifs_fattr fattr;
+
+ info = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
+ if (info == NULL) {
+ rc = -ENOMEM;
+ goto posix_mkdir_out;
+ }
+
+ mode &= ~current_umask();
+ rc = CIFSPOSIXCreate(xid, tcon, SMB_O_DIRECTORY | SMB_O_CREAT, mode,
+ NULL /* netfid */, info, &oplock, full_path,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc == -EOPNOTSUPP)
+ goto posix_mkdir_out;
+ else if (rc) {
+ cFYI(1, "posix mkdir returned 0x%x", rc);
+ d_drop(dentry);
+ goto posix_mkdir_out;
+ }
+
+ if (info->Type == cpu_to_le32(-1))
+ /* no return info, go query for it */
+ goto posix_mkdir_get_info;
+ /*
+ * BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if
+ * need to set uid/gid.
+ */
+
+ cifs_unix_basic_to_fattr(&fattr, info, cifs_sb);
+ cifs_fill_uniqueid(inode->i_sb, &fattr);
+ newinode = cifs_iget(inode->i_sb, &fattr);
+ if (!newinode)
+ goto posix_mkdir_get_info;
+
+ d_instantiate(dentry, newinode);
+
+#ifdef CONFIG_CIFS_DEBUG2
+ cFYI(1, "instantiated dentry %p %s to inode %p", dentry,
+ dentry->d_name.name, newinode);
+
+ if (newinode->i_nlink != 2)
+ cFYI(1, "unexpected number of links %d", newinode->i_nlink);
+#endif
+
+posix_mkdir_out:
+ kfree(info);
+ return rc;
+posix_mkdir_get_info:
+ rc = cifs_mkdir_qinfo(inode, dentry, mode, full_path, cifs_sb, tcon,
+ xid);
+ goto posix_mkdir_out;
+}
+
int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
{
- int rc = 0, tmprc;
+ int rc = 0;
unsigned int xid;
struct cifs_sb_info *cifs_sb;
struct tcon_link *tlink;
struct cifs_tcon *tcon;
- char *full_path = NULL;
- struct inode *newinode = NULL;
- struct cifs_fattr fattr;
+ struct TCP_Server_Info *server;
+ char *full_path;
cFYI(1, "In cifs_mkdir, mode = 0x%hx inode = 0x%p", mode, inode);
@@ -1248,145 +1385,29 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
- u32 oplock = 0;
- FILE_UNIX_BASIC_INFO *pInfo =
- kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
- if (pInfo == NULL) {
- rc = -ENOMEM;
+ rc = cifs_posix_mkdir(inode, direntry, mode, full_path, cifs_sb,
+ tcon, xid);
+ if (rc != -EOPNOTSUPP)
goto mkdir_out;
- }
-
- mode &= ~current_umask();
- rc = CIFSPOSIXCreate(xid, tcon, SMB_O_DIRECTORY | SMB_O_CREAT,
- mode, NULL /* netfid */, pInfo, &oplock,
- full_path, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (rc == -EOPNOTSUPP) {
- kfree(pInfo);
- goto mkdir_retry_old;
- } else if (rc) {
- cFYI(1, "posix mkdir returned 0x%x", rc);
- d_drop(direntry);
- } else {
- if (pInfo->Type == cpu_to_le32(-1)) {
- /* no return info, go query for it */
- kfree(pInfo);
- goto mkdir_get_info;
- }
-/*BB check (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID ) to see if need
- to set uid/gid */
-
- cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb);
- cifs_fill_uniqueid(inode->i_sb, &fattr);
- newinode = cifs_iget(inode->i_sb, &fattr);
- if (!newinode) {
- kfree(pInfo);
- goto mkdir_get_info;
- }
-
- d_instantiate(direntry, newinode);
+ }
-#ifdef CONFIG_CIFS_DEBUG2
- cFYI(1, "instantiated dentry %p %s to inode %p",
- direntry, direntry->d_name.name, newinode);
+ server = tcon->ses->server;
- if (newinode->i_nlink != 2)
- cFYI(1, "unexpected number of links %d",
- newinode->i_nlink);
-#endif
- }
- kfree(pInfo);
+ if (!server->ops->mkdir) {
+ rc = -ENOSYS;
goto mkdir_out;
}
-mkdir_retry_old:
+
/* BB add setting the equivalent of mode via CreateX w/ACLs */
- rc = CIFSSMBMkDir(xid, tcon, full_path, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ rc = server->ops->mkdir(xid, tcon, full_path, cifs_sb);
if (rc) {
cFYI(1, "cifs_mkdir returned 0x%x", rc);
d_drop(direntry);
- } else {
-mkdir_get_info:
- if (tcon->unix_ext)
- rc = cifs_get_inode_info_unix(&newinode, full_path,
- inode->i_sb, xid);
- else
- rc = cifs_get_inode_info(&newinode, full_path, NULL,
- inode->i_sb, xid, NULL);
-
- d_instantiate(direntry, newinode);
- /* setting nlink not necessary except in cases where we
- * failed to get it from the server or was set bogus */
- if ((direntry->d_inode) && (direntry->d_inode->i_nlink < 2))
- set_nlink(direntry->d_inode, 2);
-
- mode &= ~current_umask();
- /* must turn on setgid bit if parent dir has it */
- if (inode->i_mode & S_ISGID)
- mode |= S_ISGID;
-
- if (tcon->unix_ext) {
- struct cifs_unix_set_info_args args = {
- .mode = mode,
- .ctime = NO_CHANGE_64,
- .atime = NO_CHANGE_64,
- .mtime = NO_CHANGE_64,
- .device = 0,
- };
- if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID) {
- args.uid = (__u64)current_fsuid();
- if (inode->i_mode & S_ISGID)
- args.gid = (__u64)inode->i_gid;
- else
- args.gid = (__u64)current_fsgid();
- } else {
- args.uid = NO_CHANGE_64;
- args.gid = NO_CHANGE_64;
- }
- CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- } else {
- if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) &&
- (mode & S_IWUGO) == 0) {
- FILE_BASIC_INFO pInfo;
- struct cifsInodeInfo *cifsInode;
- u32 dosattrs;
-
- memset(&pInfo, 0, sizeof(pInfo));
- cifsInode = CIFS_I(newinode);
- dosattrs = cifsInode->cifsAttrs|ATTR_READONLY;
- pInfo.Attributes = cpu_to_le32(dosattrs);
- tmprc = CIFSSMBSetPathInfo(xid, tcon,
- full_path, &pInfo,
- cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
- if (tmprc == 0)
- cifsInode->cifsAttrs = dosattrs;
- }
- if (direntry->d_inode) {
- if (cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_DYNPERM)
- direntry->d_inode->i_mode =
- (mode | S_IFDIR);
-
- if (cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_SET_UID) {
- direntry->d_inode->i_uid =
- current_fsuid();
- if (inode->i_mode & S_ISGID)
- direntry->d_inode->i_gid =
- inode->i_gid;
- else
- direntry->d_inode->i_gid =
- current_fsgid();
- }
- }
- }
+ goto mkdir_out;
}
+
+ rc = cifs_mkdir_qinfo(inode, direntry, mode, full_path, cifs_sb, tcon,
+ xid);
mkdir_out:
/*
* Force revalidate to get parent dir info when needed since cached
@@ -1405,7 +1426,8 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
unsigned int xid;
struct cifs_sb_info *cifs_sb;
struct tcon_link *tlink;
- struct cifs_tcon *pTcon;
+ struct cifs_tcon *tcon;
+ struct TCP_Server_Info *server;
char *full_path = NULL;
struct cifsInodeInfo *cifsInode;
@@ -1425,10 +1447,16 @@ int cifs_rmdir(struct inode *inode, struct dentry *direntry)
rc = PTR_ERR(tlink);
goto rmdir_exit;
}
- pTcon = tlink_tcon(tlink);
+ tcon = tlink_tcon(tlink);
+ server = tcon->ses->server;
+
+ if (!server->ops->rmdir) {
+ rc = -ENOSYS;
+ cifs_put_tlink(tlink);
+ goto rmdir_exit;
+ }
- rc = CIFSSMBRmDir(xid, pTcon, full_path, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ rc = server->ops->rmdir(xid, tcon, full_path, cifs_sb);
cifs_put_tlink(tlink);
if (!rc) {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index c40356d24c5c..3129ac74b819 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -586,6 +586,27 @@ cifs_print_stats(struct seq_file *m, struct cifs_tcon *tcon)
#endif
}
+static void
+cifs_mkdir_setinfo(struct inode *inode, const char *full_path,
+ struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon,
+ const unsigned int xid)
+{
+ FILE_BASIC_INFO info;
+ struct cifsInodeInfo *cifsInode;
+ u32 dosattrs;
+ int rc;
+
+ memset(&info, 0, sizeof(info));
+ cifsInode = CIFS_I(inode);
+ dosattrs = cifsInode->cifsAttrs|ATTR_READONLY;
+ info.Attributes = cpu_to_le32(dosattrs);
+ rc = CIFSSMBSetPathInfo(xid, tcon, full_path, &info, cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (rc == 0)
+ cifsInode->cifsAttrs = dosattrs;
+}
+
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -620,6 +641,9 @@ struct smb_version_operations smb1_operations = {
.get_srv_inum = cifs_get_srv_inum,
.build_path_to_root = cifs_build_path_to_root,
.echo = CIFSSMBEcho,
+ .mkdir = CIFSSMBMkDir,
+ .mkdir_setinfo = cifs_mkdir_setinfo,
+ .rmdir = CIFSSMBRmDir,
};
struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1ba5c405315c..2aa5cb08c526 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -122,3 +122,42 @@ out:
kfree(smb2_data);
return rc;
}
+
+int
+smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+ struct cifs_sb_info *cifs_sb)
+{
+ return smb2_open_op_close(xid, tcon, cifs_sb, name,
+ FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0,
+ CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR);
+}
+
+void
+smb2_mkdir_setinfo(struct inode *inode, const char *name,
+ struct cifs_sb_info *cifs_sb, struct cifs_tcon *tcon,
+ const unsigned int xid)
+{
+ FILE_BASIC_INFO data;
+ struct cifsInodeInfo *cifs_i;
+ u32 dosattrs;
+ int tmprc;
+
+ memset(&data, 0, sizeof(data));
+ cifs_i = CIFS_I(inode);
+ dosattrs = cifs_i->cifsAttrs | ATTR_READONLY;
+ data.Attributes = cpu_to_le32(dosattrs);
+ tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name,
+ FILE_WRITE_ATTRIBUTES, FILE_CREATE, 0,
+ CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO);
+ if (tmprc == 0)
+ cifs_i->cifsAttrs = dosattrs;
+}
+
+int
+smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
+ struct cifs_sb_info *cifs_sb)
+{
+ return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+ 0, CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE,
+ NULL, SMB2_OP_DELETE);
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 410cf925ea26..826209bf3684 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -318,6 +318,9 @@ struct smb_version_operations smb21_operations = {
.query_path_info = smb2_query_path_info,
.get_srv_inum = smb2_get_srv_inum,
.build_path_to_root = smb2_build_path_to_root,
+ .mkdir = smb2_mkdir,
+ .mkdir_setinfo = smb2_mkdir_setinfo,
+ .rmdir = smb2_rmdir,
};
struct smb_version_values smb21_values = {
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 902bbe2b5ad3..bfaa7b148afd 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -52,6 +52,14 @@ extern int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const char *full_path, FILE_ALL_INFO *data,
bool *adjust_tz);
+extern int smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon,
+ const char *name, struct cifs_sb_info *cifs_sb);
+extern void smb2_mkdir_setinfo(struct inode *inode, const char *full_path,
+ struct cifs_sb_info *cifs_sb,
+ struct cifs_tcon *tcon, const unsigned int xid);
+extern int smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
+ const char *name, struct cifs_sb_info *cifs_sb);
+
/*
* SMB2 Worker functions - most of protocol specific implementation details
* are contained within these calls.
diff --git a/fs/compat.c b/fs/compat.c
index 6161255fac45..1bdb350ea5d3 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1155,11 +1155,14 @@ compat_sys_readv(unsigned long fd, const struct compat_iovec __user *vec,
struct file *file;
int fput_needed;
ssize_t ret;
+ loff_t pos;
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_readv(file, vec, vlen, &file->f_pos);
+ pos = file->f_pos;
+ ret = compat_readv(file, vec, vlen, &pos);
+ file->f_pos = pos;
fput_light(file, fput_needed);
return ret;
}
@@ -1221,11 +1224,14 @@ compat_sys_writev(unsigned long fd, const struct compat_iovec __user *vec,
struct file *file;
int fput_needed;
ssize_t ret;
+ loff_t pos;
file = fget_light(fd, &fput_needed);
if (!file)
return -EBADF;
- ret = compat_writev(file, vec, vlen, &file->f_pos);
+ pos = file->f_pos;
+ ret = compat_writev(file, vec, vlen, &pos);
+ file->f_pos = pos;
fput_light(file, fput_needed);
return ret;
}
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 989e034f02bd..cfb4b9fed520 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -385,8 +385,6 @@ struct ecryptfs_msg_ctx {
struct mutex mux;
};
-struct ecryptfs_daemon;
-
struct ecryptfs_daemon {
#define ECRYPTFS_DAEMON_IN_READ 0x00000001
#define ECRYPTFS_DAEMON_IN_POLL 0x00000002
@@ -394,10 +392,7 @@ struct ecryptfs_daemon {
#define ECRYPTFS_DAEMON_MISCDEV_OPEN 0x00000008
u32 flags;
u32 num_queued_msg_ctx;
- struct pid *pid;
- uid_t euid;
- struct user_namespace *user_ns;
- struct task_struct *task;
+ struct file *file;
struct mutex mux;
struct list_head msg_ctx_out_queue;
wait_queue_head_t wait;
@@ -554,6 +549,8 @@ extern struct kmem_cache *ecryptfs_key_tfm_cache;
struct inode *ecryptfs_get_inode(struct inode *lower_inode,
struct super_block *sb);
void ecryptfs_i_size_init(const char *page_virt, struct inode *inode);
+int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
+ struct inode *ecryptfs_inode);
int ecryptfs_decode_and_decrypt_filename(char **decrypted_name,
size_t *decrypted_name_size,
struct dentry *ecryptfs_dentry,
@@ -607,13 +604,8 @@ int
ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags);
int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode);
-int ecryptfs_process_helo(uid_t euid, struct user_namespace *user_ns,
- struct pid *pid);
-int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
- struct pid *pid);
-int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
- struct user_namespace *user_ns, struct pid *pid,
- u32 seq);
+int ecryptfs_process_response(struct ecryptfs_daemon *daemon,
+ struct ecryptfs_message *msg, u32 seq);
int ecryptfs_send_message(char *data, int data_len,
struct ecryptfs_msg_ctx **msg_ctx);
int ecryptfs_wait_for_response(struct ecryptfs_msg_ctx *msg_ctx,
@@ -658,8 +650,7 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
struct inode *ecryptfs_inode);
struct page *ecryptfs_get_locked_page(struct inode *inode, loff_t index);
int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon);
-int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
- struct user_namespace *user_ns);
+int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon);
int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
size_t *length_size);
int ecryptfs_write_packet_length(char *dest, size_t size,
@@ -671,8 +662,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
u16 msg_flags, struct ecryptfs_daemon *daemon);
void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx);
int
-ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
- struct user_namespace *user_ns, struct pid *pid);
+ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file);
int ecryptfs_init_kthread(void);
void ecryptfs_destroy_kthread(void);
int ecryptfs_privileged_open(struct file **lower_file,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 2b17f2f9b121..44ce5c6a541d 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -138,29 +138,50 @@ out:
return rc;
}
-static void ecryptfs_vma_close(struct vm_area_struct *vma)
-{
- filemap_write_and_wait(vma->vm_file->f_mapping);
-}
-
-static const struct vm_operations_struct ecryptfs_file_vm_ops = {
- .close = ecryptfs_vma_close,
- .fault = filemap_fault,
-};
+struct kmem_cache *ecryptfs_file_info_cache;
-static int ecryptfs_file_mmap(struct file *file, struct vm_area_struct *vma)
+static int read_or_initialize_metadata(struct dentry *dentry)
{
+ struct inode *inode = dentry->d_inode;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
+ struct ecryptfs_crypt_stat *crypt_stat;
int rc;
- rc = generic_file_mmap(file, vma);
+ crypt_stat = &ecryptfs_inode_to_private(inode)->crypt_stat;
+ mount_crypt_stat = &ecryptfs_superblock_to_private(
+ inode->i_sb)->mount_crypt_stat;
+ mutex_lock(&crypt_stat->cs_mutex);
+
+ if (crypt_stat->flags & ECRYPTFS_POLICY_APPLIED &&
+ crypt_stat->flags & ECRYPTFS_KEY_VALID) {
+ rc = 0;
+ goto out;
+ }
+
+ rc = ecryptfs_read_metadata(dentry);
if (!rc)
- vma->vm_ops = &ecryptfs_file_vm_ops;
+ goto out;
+
+ if (mount_crypt_stat->flags & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED) {
+ crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
+ | ECRYPTFS_ENCRYPTED);
+ rc = 0;
+ goto out;
+ }
+ if (!(mount_crypt_stat->flags & ECRYPTFS_XATTR_METADATA_ENABLED) &&
+ !i_size_read(ecryptfs_inode_to_lower(inode))) {
+ rc = ecryptfs_initialize_file(dentry, inode);
+ if (!rc)
+ goto out;
+ }
+
+ rc = -EIO;
+out:
+ mutex_unlock(&crypt_stat->cs_mutex);
return rc;
}
-struct kmem_cache *ecryptfs_file_info_cache;
-
/**
* ecryptfs_open
* @inode: inode speciying file to open
@@ -236,32 +257,9 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
rc = 0;
goto out;
}
- mutex_lock(&crypt_stat->cs_mutex);
- if (!(crypt_stat->flags & ECRYPTFS_POLICY_APPLIED)
- || !(crypt_stat->flags & ECRYPTFS_KEY_VALID)) {
- rc = ecryptfs_read_metadata(ecryptfs_dentry);
- if (rc) {
- ecryptfs_printk(KERN_DEBUG,
- "Valid headers not found\n");
- if (!(mount_crypt_stat->flags
- & ECRYPTFS_PLAINTEXT_PASSTHROUGH_ENABLED)) {
- rc = -EIO;
- printk(KERN_WARNING "Either the lower file "
- "is not in a valid eCryptfs format, "
- "or the key could not be retrieved. "
- "Plaintext passthrough mode is not "
- "enabled; returning -EIO\n");
- mutex_unlock(&crypt_stat->cs_mutex);
- goto out_put;
- }
- rc = 0;
- crypt_stat->flags &= ~(ECRYPTFS_I_SIZE_INITIALIZED
- | ECRYPTFS_ENCRYPTED);
- mutex_unlock(&crypt_stat->cs_mutex);
- goto out;
- }
- }
- mutex_unlock(&crypt_stat->cs_mutex);
+ rc = read_or_initialize_metadata(ecryptfs_dentry);
+ if (rc)
+ goto out_put;
ecryptfs_printk(KERN_DEBUG, "inode w/ addr = [0x%p], i_ino = "
"[0x%.16lx] size: [0x%.16llx]\n", inode, inode->i_ino,
(unsigned long long)i_size_read(inode));
@@ -292,15 +290,7 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
static int
ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
- int rc = 0;
-
- rc = generic_file_fsync(file, start, end, datasync);
- if (rc)
- goto out;
- rc = vfs_fsync_range(ecryptfs_file_to_lower(file), start, end,
- datasync);
-out:
- return rc;
+ return vfs_fsync(ecryptfs_file_to_lower(file), datasync);
}
static int ecryptfs_fasync(int fd, struct file *file, int flag)
@@ -369,7 +359,7 @@ const struct file_operations ecryptfs_main_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
#endif
- .mmap = ecryptfs_file_mmap,
+ .mmap = generic_file_mmap,
.open = ecryptfs_open,
.flush = ecryptfs_flush,
.release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index ffa2be57804d..534b129ea676 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -143,6 +143,31 @@ static int ecryptfs_interpose(struct dentry *lower_dentry,
return 0;
}
+static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
+ struct inode *inode)
+{
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+ struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
+ struct dentry *lower_dir_dentry;
+ int rc;
+
+ dget(lower_dentry);
+ lower_dir_dentry = lock_parent(lower_dentry);
+ rc = vfs_unlink(lower_dir_inode, lower_dentry);
+ if (rc) {
+ printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
+ goto out_unlock;
+ }
+ fsstack_copy_attr_times(dir, lower_dir_inode);
+ set_nlink(inode, ecryptfs_inode_to_lower(inode)->i_nlink);
+ inode->i_ctime = dir->i_ctime;
+ d_drop(dentry);
+out_unlock:
+ unlock_dir(lower_dir_dentry);
+ dput(lower_dentry);
+ return rc;
+}
+
/**
* ecryptfs_do_create
* @directory_inode: inode of the new file's dentry's parent in ecryptfs
@@ -182,8 +207,10 @@ ecryptfs_do_create(struct inode *directory_inode,
}
inode = __ecryptfs_get_inode(lower_dentry->d_inode,
directory_inode->i_sb);
- if (IS_ERR(inode))
+ if (IS_ERR(inode)) {
+ vfs_unlink(lower_dir_dentry->d_inode, lower_dentry);
goto out_lock;
+ }
fsstack_copy_attr_times(directory_inode, lower_dir_dentry->d_inode);
fsstack_copy_inode_size(directory_inode, lower_dir_dentry->d_inode);
out_lock:
@@ -200,8 +227,8 @@ out:
*
* Returns zero on success
*/
-static int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
- struct inode *ecryptfs_inode)
+int ecryptfs_initialize_file(struct dentry *ecryptfs_dentry,
+ struct inode *ecryptfs_inode)
{
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
@@ -264,7 +291,9 @@ ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
* that this on disk file is prepared to be an ecryptfs file */
rc = ecryptfs_initialize_file(ecryptfs_dentry, ecryptfs_inode);
if (rc) {
- drop_nlink(ecryptfs_inode);
+ ecryptfs_do_unlink(directory_inode, ecryptfs_dentry,
+ ecryptfs_inode);
+ make_bad_inode(ecryptfs_inode);
unlock_new_inode(ecryptfs_inode);
iput(ecryptfs_inode);
goto out;
@@ -318,21 +347,20 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry,
struct vfsmount *lower_mnt;
int rc = 0;
- lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
- fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
- BUG_ON(!lower_dentry->d_count);
-
dentry_info = kmem_cache_alloc(ecryptfs_dentry_info_cache, GFP_KERNEL);
- ecryptfs_set_dentry_private(dentry, dentry_info);
if (!dentry_info) {
printk(KERN_ERR "%s: Out of memory whilst attempting "
"to allocate ecryptfs_dentry_info struct\n",
__func__);
dput(lower_dentry);
- mntput(lower_mnt);
- d_drop(dentry);
return -ENOMEM;
}
+
+ lower_mnt = mntget(ecryptfs_dentry_to_lower_mnt(dentry->d_parent));
+ fsstack_copy_attr_atime(dir_inode, lower_dentry->d_parent->d_inode);
+ BUG_ON(!lower_dentry->d_count);
+
+ ecryptfs_set_dentry_private(dentry, dentry_info);
ecryptfs_set_dentry_lower(dentry, lower_dentry);
ecryptfs_set_dentry_lower_mnt(dentry, lower_mnt);
@@ -381,12 +409,6 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
struct dentry *lower_dir_dentry, *lower_dentry;
int rc = 0;
- if ((ecryptfs_dentry->d_name.len == 1
- && !strcmp(ecryptfs_dentry->d_name.name, "."))
- || (ecryptfs_dentry->d_name.len == 2
- && !strcmp(ecryptfs_dentry->d_name.name, ".."))) {
- goto out_d_drop;
- }
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
@@ -397,8 +419,8 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
"[%d] on lower_dentry = [%s]\n", __func__, rc,
- encrypted_and_encoded_name);
- goto out_d_drop;
+ ecryptfs_dentry->d_name.name);
+ goto out;
}
if (lower_dentry->d_inode)
goto interpose;
@@ -415,7 +437,7 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt and encode "
"filename; rc = [%d]\n", __func__, rc);
- goto out_d_drop;
+ goto out;
}
mutex_lock(&lower_dir_dentry->d_inode->i_mutex);
lower_dentry = lookup_one_len(encrypted_and_encoded_name,
@@ -427,14 +449,11 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
"[%d] on lower_dentry = [%s]\n", __func__, rc,
encrypted_and_encoded_name);
- goto out_d_drop;
+ goto out;
}
interpose:
rc = ecryptfs_lookup_interpose(ecryptfs_dentry, lower_dentry,
ecryptfs_dir_inode);
- goto out;
-out_d_drop:
- d_drop(ecryptfs_dentry);
out:
kfree(encrypted_and_encoded_name);
return ERR_PTR(rc);
@@ -476,27 +495,7 @@ out_lock:
static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
{
- int rc = 0;
- struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- struct inode *lower_dir_inode = ecryptfs_inode_to_lower(dir);
- struct dentry *lower_dir_dentry;
-
- dget(lower_dentry);
- lower_dir_dentry = lock_parent(lower_dentry);
- rc = vfs_unlink(lower_dir_inode, lower_dentry);
- if (rc) {
- printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
- goto out_unlock;
- }
- fsstack_copy_attr_times(dir, lower_dir_inode);
- set_nlink(dentry->d_inode,
- ecryptfs_inode_to_lower(dentry->d_inode)->i_nlink);
- dentry->d_inode->i_ctime = dir->i_ctime;
- d_drop(dentry);
-out_unlock:
- unlock_dir(lower_dir_dentry);
- dput(lower_dentry);
- return rc;
+ return ecryptfs_do_unlink(dir, dentry, dentry->d_inode);
}
static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
@@ -971,12 +970,6 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
goto out;
}
- if (S_ISREG(inode->i_mode)) {
- rc = filemap_write_and_wait(inode->i_mapping);
- if (rc)
- goto out;
- fsstack_copy_attr_all(inode, lower_inode);
- }
memcpy(&lower_ia, ia, sizeof(lower_ia));
if (ia->ia_valid & ATTR_FILE)
lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1c0b3b6b75c6..2768138eefee 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -279,6 +279,7 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
char *fnek_src;
char *cipher_key_bytes_src;
char *fn_cipher_key_bytes_src;
+ u8 cipher_code;
*check_ruid = 0;
@@ -420,6 +421,18 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options,
&& !fn_cipher_key_bytes_set)
mount_crypt_stat->global_default_fn_cipher_key_bytes =
mount_crypt_stat->global_default_cipher_key_size;
+
+ cipher_code = ecryptfs_code_for_cipher_string(
+ mount_crypt_stat->global_default_cipher_name,
+ mount_crypt_stat->global_default_cipher_key_size);
+ if (!cipher_code) {
+ ecryptfs_printk(KERN_ERR,
+ "eCryptfs doesn't support cipher: %s",
+ mount_crypt_stat->global_default_cipher_name);
+ rc = -EINVAL;
+ goto out;
+ }
+
mutex_lock(&key_tfm_list_mutex);
if (!ecryptfs_tfm_exists(mount_crypt_stat->global_default_cipher_name,
NULL)) {
@@ -540,6 +553,15 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
}
ecryptfs_set_superblock_lower(s, path.dentry->d_sb);
+
+ /**
+ * Set the POSIX ACL flag based on whether they're enabled in the lower
+ * mount. Force a read-only eCryptfs mount if the lower mount is ro.
+ * Allow a ro eCryptfs mount even when the lower mount is rw.
+ */
+ s->s_flags = flags & ~MS_POSIXACL;
+ s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
s->s_magic = ECRYPTFS_SUPER_MAGIC;
diff --git a/fs/ecryptfs/messaging.c b/fs/ecryptfs/messaging.c
index a750f957b145..b29bb8bfa8d9 100644
--- a/fs/ecryptfs/messaging.c
+++ b/fs/ecryptfs/messaging.c
@@ -32,8 +32,8 @@ static struct mutex ecryptfs_msg_ctx_lists_mux;
static struct hlist_head *ecryptfs_daemon_hash;
struct mutex ecryptfs_daemon_hash_mux;
static int ecryptfs_hash_bits;
-#define ecryptfs_uid_hash(uid) \
- hash_long((unsigned long)uid, ecryptfs_hash_bits)
+#define ecryptfs_current_euid_hash(uid) \
+ hash_long((unsigned long)current_euid(), ecryptfs_hash_bits)
static u32 ecryptfs_msg_counter;
static struct ecryptfs_msg_ctx *ecryptfs_msg_ctx_arr;
@@ -105,26 +105,24 @@ void ecryptfs_msg_ctx_alloc_to_free(struct ecryptfs_msg_ctx *msg_ctx)
/**
* ecryptfs_find_daemon_by_euid
- * @euid: The effective user id which maps to the desired daemon id
- * @user_ns: The namespace in which @euid applies
* @daemon: If return value is zero, points to the desired daemon pointer
*
* Must be called with ecryptfs_daemon_hash_mux held.
*
- * Search the hash list for the given user id.
+ * Search the hash list for the current effective user id.
*
* Returns zero if the user id exists in the list; non-zero otherwise.
*/
-int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon, uid_t euid,
- struct user_namespace *user_ns)
+int ecryptfs_find_daemon_by_euid(struct ecryptfs_daemon **daemon)
{
struct hlist_node *elem;
int rc;
hlist_for_each_entry(*daemon, elem,
- &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)],
- euid_chain) {
- if ((*daemon)->euid == euid && (*daemon)->user_ns == user_ns) {
+ &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()],
+ euid_chain) {
+ if ((*daemon)->file->f_cred->euid == current_euid() &&
+ (*daemon)->file->f_cred->user_ns == current_user_ns()) {
rc = 0;
goto out;
}
@@ -137,9 +135,7 @@ out:
/**
* ecryptfs_spawn_daemon - Create and initialize a new daemon struct
* @daemon: Pointer to set to newly allocated daemon struct
- * @euid: Effective user id for the daemon
- * @user_ns: The namespace in which @euid applies
- * @pid: Process id for the daemon
+ * @file: File used when opening /dev/ecryptfs
*
* Must be called ceremoniously while in possession of
* ecryptfs_sacred_daemon_hash_mux
@@ -147,8 +143,7 @@ out:
* Returns zero on success; non-zero otherwise
*/
int
-ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
- struct user_namespace *user_ns, struct pid *pid)
+ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, struct file *file)
{
int rc = 0;
@@ -159,16 +154,13 @@ ecryptfs_spawn_daemon(struct ecryptfs_daemon **daemon, uid_t euid,
"GFP_KERNEL memory\n", __func__, sizeof(**daemon));
goto out;
}
- (*daemon)->euid = euid;
- (*daemon)->user_ns = get_user_ns(user_ns);
- (*daemon)->pid = get_pid(pid);
- (*daemon)->task = current;
+ (*daemon)->file = file;
mutex_init(&(*daemon)->mux);
INIT_LIST_HEAD(&(*daemon)->msg_ctx_out_queue);
init_waitqueue_head(&(*daemon)->wait);
(*daemon)->num_queued_msg_ctx = 0;
hlist_add_head(&(*daemon)->euid_chain,
- &ecryptfs_daemon_hash[ecryptfs_uid_hash(euid)]);
+ &ecryptfs_daemon_hash[ecryptfs_current_euid_hash()]);
out:
return rc;
}
@@ -188,9 +180,6 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
if ((daemon->flags & ECRYPTFS_DAEMON_IN_READ)
|| (daemon->flags & ECRYPTFS_DAEMON_IN_POLL)) {
rc = -EBUSY;
- printk(KERN_WARNING "%s: Attempt to destroy daemon with pid "
- "[0x%p], but it is in the midst of a read or a poll\n",
- __func__, daemon->pid);
mutex_unlock(&daemon->mux);
goto out;
}
@@ -203,12 +192,6 @@ int ecryptfs_exorcise_daemon(struct ecryptfs_daemon *daemon)
ecryptfs_msg_ctx_alloc_to_free(msg_ctx);
}
hlist_del(&daemon->euid_chain);
- if (daemon->task)
- wake_up_process(daemon->task);
- if (daemon->pid)
- put_pid(daemon->pid);
- if (daemon->user_ns)
- put_user_ns(daemon->user_ns);
mutex_unlock(&daemon->mux);
kzfree(daemon);
out:
@@ -216,42 +199,9 @@ out:
}
/**
- * ecryptfs_process_quit
- * @euid: The user ID owner of the message
- * @user_ns: The namespace in which @euid applies
- * @pid: The process ID for the userspace program that sent the
- * message
- *
- * Deletes the corresponding daemon for the given euid and pid, if
- * it is the registered that is requesting the deletion. Returns zero
- * after deleting the desired daemon; non-zero otherwise.
- */
-int ecryptfs_process_quit(uid_t euid, struct user_namespace *user_ns,
- struct pid *pid)
-{
- struct ecryptfs_daemon *daemon;
- int rc;
-
- mutex_lock(&ecryptfs_daemon_hash_mux);
- rc = ecryptfs_find_daemon_by_euid(&daemon, euid, user_ns);
- if (rc || !daemon) {
- rc = -EINVAL;
- printk(KERN_ERR "Received request from user [%d] to "
- "unregister unrecognized daemon [0x%p]\n", euid, pid);
- goto out_unlock;
- }
- rc = ecryptfs_exorcise_daemon(daemon);
-out_unlock:
- mutex_unlock(&ecryptfs_daemon_hash_mux);
- return rc;
-}
-
-/**
* ecryptfs_process_reponse
* @msg: The ecryptfs message received; the caller should sanity check
* msg->data_len and free the memory
- * @pid: The process ID of the userspace application that sent the
- * message
* @seq: The sequence number of the message; must match the sequence
* number for the existing message context waiting for this
* response
@@ -270,16 +220,11 @@ out_unlock:
*
* Returns zero on success; non-zero otherwise
*/
-int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
- struct user_namespace *user_ns, struct pid *pid,
- u32 seq)
+int ecryptfs_process_response(struct ecryptfs_daemon *daemon,
+ struct ecryptfs_message *msg, u32 seq)
{
- struct ecryptfs_daemon *uninitialized_var(daemon);
struct ecryptfs_msg_ctx *msg_ctx;
size_t msg_size;
- struct nsproxy *nsproxy;
- struct user_namespace *tsk_user_ns;
- uid_t ctx_euid;
int rc;
if (msg->index >= ecryptfs_message_buf_len) {
@@ -292,51 +237,6 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
}
msg_ctx = &ecryptfs_msg_ctx_arr[msg->index];
mutex_lock(&msg_ctx->mux);
- mutex_lock(&ecryptfs_daemon_hash_mux);
- rcu_read_lock();
- nsproxy = task_nsproxy(msg_ctx->task);
- if (nsproxy == NULL) {
- rc = -EBADMSG;
- printk(KERN_ERR "%s: Receiving process is a zombie. Dropping "
- "message.\n", __func__);
- rcu_read_unlock();
- mutex_unlock(&ecryptfs_daemon_hash_mux);
- goto wake_up;
- }
- tsk_user_ns = __task_cred(msg_ctx->task)->user_ns;
- ctx_euid = task_euid(msg_ctx->task);
- rc = ecryptfs_find_daemon_by_euid(&daemon, ctx_euid, tsk_user_ns);
- rcu_read_unlock();
- mutex_unlock(&ecryptfs_daemon_hash_mux);
- if (rc) {
- rc = -EBADMSG;
- printk(KERN_WARNING "%s: User [%d] received a "
- "message response from process [0x%p] but does "
- "not have a registered daemon\n", __func__,
- ctx_euid, pid);
- goto wake_up;
- }
- if (ctx_euid != euid) {
- rc = -EBADMSG;
- printk(KERN_WARNING "%s: Received message from user "
- "[%d]; expected message from user [%d]\n", __func__,
- euid, ctx_euid);
- goto unlock;
- }
- if (tsk_user_ns != user_ns) {
- rc = -EBADMSG;
- printk(KERN_WARNING "%s: Received message from user_ns "
- "[0x%p]; expected message from user_ns [0x%p]\n",
- __func__, user_ns, tsk_user_ns);
- goto unlock;
- }
- if (daemon->pid != pid) {
- rc = -EBADMSG;
- printk(KERN_ERR "%s: User [%d] sent a message response "
- "from an unrecognized process [0x%p]\n",
- __func__, ctx_euid, pid);
- goto unlock;
- }
if (msg_ctx->state != ECRYPTFS_MSG_CTX_STATE_PENDING) {
rc = -EINVAL;
printk(KERN_WARNING "%s: Desired context element is not "
@@ -359,9 +259,8 @@ int ecryptfs_process_response(struct ecryptfs_message *msg, uid_t euid,
}
memcpy(msg_ctx->msg, msg, msg_size);
msg_ctx->state = ECRYPTFS_MSG_CTX_STATE_DONE;
- rc = 0;
-wake_up:
wake_up_process(msg_ctx->task);
+ rc = 0;
unlock:
mutex_unlock(&msg_ctx->mux);
out:
@@ -383,14 +282,11 @@ ecryptfs_send_message_locked(char *data, int data_len, u8 msg_type,
struct ecryptfs_msg_ctx **msg_ctx)
{
struct ecryptfs_daemon *daemon;
- uid_t euid = current_euid();
int rc;
- rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
+ rc = ecryptfs_find_daemon_by_euid(&daemon);
if (rc || !daemon) {
rc = -ENOTCONN;
- printk(KERN_ERR "%s: User [%d] does not have a daemon "
- "registered\n", __func__, euid);
goto out;
}
mutex_lock(&ecryptfs_msg_ctx_lists_mux);
diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index c0038f6566d4..412e6eda25f8 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -33,7 +33,7 @@ static atomic_t ecryptfs_num_miscdev_opens;
/**
* ecryptfs_miscdev_poll
- * @file: dev file (ignored)
+ * @file: dev file
* @pt: dev poll table (ignored)
*
* Returns the poll mask
@@ -41,20 +41,10 @@ static atomic_t ecryptfs_num_miscdev_opens;
static unsigned int
ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
{
- struct ecryptfs_daemon *daemon;
+ struct ecryptfs_daemon *daemon = file->private_data;
unsigned int mask = 0;
- uid_t euid = current_euid();
- int rc;
- mutex_lock(&ecryptfs_daemon_hash_mux);
- /* TODO: Just use file->private_data? */
- rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
- if (rc || !daemon) {
- mutex_unlock(&ecryptfs_daemon_hash_mux);
- return -EINVAL;
- }
mutex_lock(&daemon->mux);
- mutex_unlock(&ecryptfs_daemon_hash_mux);
if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
printk(KERN_WARNING "%s: Attempt to poll on zombified "
"daemon\n", __func__);
@@ -79,7 +69,7 @@ out_unlock_daemon:
/**
* ecryptfs_miscdev_open
* @inode: inode of miscdev handle (ignored)
- * @file: file for miscdev handle (ignored)
+ * @file: file for miscdev handle
*
* Returns zero on success; non-zero otherwise
*/
@@ -87,7 +77,6 @@ static int
ecryptfs_miscdev_open(struct inode *inode, struct file *file)
{
struct ecryptfs_daemon *daemon = NULL;
- uid_t euid = current_euid();
int rc;
mutex_lock(&ecryptfs_daemon_hash_mux);
@@ -98,30 +87,20 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
"count; rc = [%d]\n", __func__, rc);
goto out_unlock_daemon_list;
}
- rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
- if (rc || !daemon) {
- rc = ecryptfs_spawn_daemon(&daemon, euid, current_user_ns(),
- task_pid(current));
- if (rc) {
- printk(KERN_ERR "%s: Error attempting to spawn daemon; "
- "rc = [%d]\n", __func__, rc);
- goto out_module_put_unlock_daemon_list;
- }
- }
- mutex_lock(&daemon->mux);
- if (daemon->pid != task_pid(current)) {
+ rc = ecryptfs_find_daemon_by_euid(&daemon);
+ if (!rc) {
rc = -EINVAL;
- printk(KERN_ERR "%s: pid [0x%p] has registered with euid [%d], "
- "but pid [0x%p] has attempted to open the handle "
- "instead\n", __func__, daemon->pid, daemon->euid,
- task_pid(current));
- goto out_unlock_daemon;
+ goto out_unlock_daemon_list;
+ }
+ rc = ecryptfs_spawn_daemon(&daemon, file);
+ if (rc) {
+ printk(KERN_ERR "%s: Error attempting to spawn daemon; "
+ "rc = [%d]\n", __func__, rc);
+ goto out_module_put_unlock_daemon_list;
}
+ mutex_lock(&daemon->mux);
if (daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN) {
rc = -EBUSY;
- printk(KERN_ERR "%s: Miscellaneous device handle may only be "
- "opened once per daemon; pid [0x%p] already has this "
- "handle open\n", __func__, daemon->pid);
goto out_unlock_daemon;
}
daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
@@ -140,7 +119,7 @@ out_unlock_daemon_list:
/**
* ecryptfs_miscdev_release
* @inode: inode of fs/ecryptfs/euid handle (ignored)
- * @file: file for fs/ecryptfs/euid handle (ignored)
+ * @file: file for fs/ecryptfs/euid handle
*
* This keeps the daemon registered until the daemon sends another
* ioctl to fs/ecryptfs/ctl or until the kernel module unregisters.
@@ -150,20 +129,18 @@ out_unlock_daemon_list:
static int
ecryptfs_miscdev_release(struct inode *inode, struct file *file)
{
- struct ecryptfs_daemon *daemon = NULL;
- uid_t euid = current_euid();
+ struct ecryptfs_daemon *daemon = file->private_data;
int rc;
- mutex_lock(&ecryptfs_daemon_hash_mux);
- rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
- if (rc || !daemon)
- daemon = file->private_data;
mutex_lock(&daemon->mux);
BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
atomic_dec(&ecryptfs_num_miscdev_opens);
mutex_unlock(&daemon->mux);
+
+ mutex_lock(&ecryptfs_daemon_hash_mux);
rc = ecryptfs_exorcise_daemon(daemon);
+ mutex_unlock(&ecryptfs_daemon_hash_mux);
if (rc) {
printk(KERN_CRIT "%s: Fatal error whilst attempting to "
"shut down daemon; rc = [%d]. Please report this "
@@ -171,7 +148,6 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
BUG();
}
module_put(THIS_MODULE);
- mutex_unlock(&ecryptfs_daemon_hash_mux);
return rc;
}
@@ -248,7 +224,7 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
/**
* ecryptfs_miscdev_read - format and send message from queue
- * @file: fs/ecryptfs/euid miscdevfs handle (ignored)
+ * @file: miscdevfs handle
* @buf: User buffer into which to copy the next message on the daemon queue
* @count: Amount of space available in @buf
* @ppos: Offset in file (ignored)
@@ -262,43 +238,27 @@ static ssize_t
ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
- struct ecryptfs_daemon *daemon;
+ struct ecryptfs_daemon *daemon = file->private_data;
struct ecryptfs_msg_ctx *msg_ctx;
size_t packet_length_size;
char packet_length[ECRYPTFS_MAX_PKT_LEN_SIZE];
size_t i;
size_t total_length;
- uid_t euid = current_euid();
int rc;
- mutex_lock(&ecryptfs_daemon_hash_mux);
- /* TODO: Just use file->private_data? */
- rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
- if (rc || !daemon) {
- mutex_unlock(&ecryptfs_daemon_hash_mux);
- return -EINVAL;
- }
mutex_lock(&daemon->mux);
- if (task_pid(current) != daemon->pid) {
- mutex_unlock(&daemon->mux);
- mutex_unlock(&ecryptfs_daemon_hash_mux);
- return -EPERM;
- }
if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
rc = 0;
- mutex_unlock(&ecryptfs_daemon_hash_mux);
printk(KERN_WARNING "%s: Attempt to read from zombified "
"daemon\n", __func__);
goto out_unlock_daemon;
}
if (daemon->flags & ECRYPTFS_DAEMON_IN_READ) {
rc = 0;
- mutex_unlock(&ecryptfs_daemon_hash_mux);
goto out_unlock_daemon;
}
/* This daemon will not go away so long as this flag is set */
daemon->flags |= ECRYPTFS_DAEMON_IN_READ;
- mutex_unlock(&ecryptfs_daemon_hash_mux);
check_list:
if (list_empty(&daemon->msg_ctx_out_queue)) {
mutex_unlock(&daemon->mux);
@@ -382,16 +342,12 @@ out_unlock_daemon:
* ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
* @data: Bytes comprising struct ecryptfs_message
* @data_size: sizeof(struct ecryptfs_message) + data len
- * @euid: Effective user id of miscdevess sending the miscdev response
- * @user_ns: The namespace in which @euid applies
- * @pid: Miscdevess id of miscdevess sending the miscdev response
* @seq: Sequence number for miscdev response packet
*
* Returns zero on success; non-zero otherwise
*/
-static int ecryptfs_miscdev_response(char *data, size_t data_size,
- uid_t euid, struct user_namespace *user_ns,
- struct pid *pid, u32 seq)
+static int ecryptfs_miscdev_response(struct ecryptfs_daemon *daemon, char *data,
+ size_t data_size, u32 seq)
{
struct ecryptfs_message *msg = (struct ecryptfs_message *)data;
int rc;
@@ -403,7 +359,7 @@ static int ecryptfs_miscdev_response(char *data, size_t data_size,
rc = -EINVAL;
goto out;
}
- rc = ecryptfs_process_response(msg, euid, user_ns, pid, seq);
+ rc = ecryptfs_process_response(daemon, msg, seq);
if (rc)
printk(KERN_ERR
"Error processing response message; rc = [%d]\n", rc);
@@ -413,7 +369,7 @@ out:
/**
* ecryptfs_miscdev_write - handle write to daemon miscdev handle
- * @file: File for misc dev handle (ignored)
+ * @file: File for misc dev handle
* @buf: Buffer containing user data
* @count: Amount of data in @buf
* @ppos: Pointer to offset in file (ignored)
@@ -428,7 +384,6 @@ ecryptfs_miscdev_write(struct file *file, const char __user *buf,
u32 seq;
size_t packet_size, packet_size_length;
char *data;
- uid_t euid = current_euid();
unsigned char packet_size_peek[ECRYPTFS_MAX_PKT_LEN_SIZE];
ssize_t rc;
@@ -488,10 +443,9 @@ memdup:
}
memcpy(&counter_nbo, &data[PKT_CTR_OFFSET], PKT_CTR_SIZE);
seq = be32_to_cpu(counter_nbo);
- rc = ecryptfs_miscdev_response(
+ rc = ecryptfs_miscdev_response(file->private_data,
&data[PKT_LEN_OFFSET + packet_size_length],
- packet_size, euid, current_user_ns(),
- task_pid(current), seq);
+ packet_size, seq);
if (rc) {
printk(KERN_WARNING "%s: Failed to deliver miscdev "
"response to requesting operation; rc = [%zd]\n",
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index a46b3a8fee1e..bd1d57f98f74 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -66,18 +66,6 @@ static int ecryptfs_writepage(struct page *page, struct writeback_control *wbc)
{
int rc;
- /*
- * Refuse to write the page out if we are called from reclaim context
- * since our writepage() path may potentially allocate memory when
- * calling into the lower fs vfs_write() which may in turn invoke
- * us again.
- */
- if (current->flags & PF_MEMALLOC) {
- redirty_page_for_writepage(wbc, page);
- rc = 0;
- goto out;
- }
-
rc = ecryptfs_encrypt_page(page);
if (rc) {
ecryptfs_printk(KERN_WARNING, "Error encrypting "
@@ -498,7 +486,6 @@ static int ecryptfs_write_end(struct file *file,
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(ecryptfs_inode)->crypt_stat;
int rc;
- int need_unlock_page = 1;
ecryptfs_printk(KERN_DEBUG, "Calling fill_zeros_to_end_of_page"
"(page w/ index = [0x%.16lx], to = [%d])\n", index, to);
@@ -519,26 +506,26 @@ static int ecryptfs_write_end(struct file *file,
"zeros in page with index = [0x%.16lx]\n", index);
goto out;
}
- set_page_dirty(page);
- unlock_page(page);
- need_unlock_page = 0;
+ rc = ecryptfs_encrypt_page(page);
+ if (rc) {
+ ecryptfs_printk(KERN_WARNING, "Error encrypting page (upper "
+ "index [0x%.16lx])\n", index);
+ goto out;
+ }
if (pos + copied > i_size_read(ecryptfs_inode)) {
i_size_write(ecryptfs_inode, pos + copied);
ecryptfs_printk(KERN_DEBUG, "Expanded file size to "
"[0x%.16llx]\n",
(unsigned long long)i_size_read(ecryptfs_inode));
- balance_dirty_pages_ratelimited(mapping);
- rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
- if (rc) {
- printk(KERN_ERR "Error writing inode size to metadata; "
- "rc = [%d]\n", rc);
- goto out;
- }
}
- rc = copied;
+ rc = ecryptfs_write_inode_size_to_metadata(ecryptfs_inode);
+ if (rc)
+ printk(KERN_ERR "Error writing inode size to metadata; "
+ "rc = [%d]\n", rc);
+ else
+ rc = copied;
out:
- if (need_unlock_page)
- unlock_page(page);
+ unlock_page(page);
page_cache_release(page);
return rc;
}
diff --git a/fs/exec.c b/fs/exec.c
index e95aeeddd25c..574cf4de4ec3 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2002,17 +2002,17 @@ static void coredump_finish(struct mm_struct *mm)
void set_dumpable(struct mm_struct *mm, int value)
{
switch (value) {
- case 0:
+ case SUID_DUMPABLE_DISABLED:
clear_bit(MMF_DUMPABLE, &mm->flags);
smp_wmb();
clear_bit(MMF_DUMP_SECURELY, &mm->flags);
break;
- case 1:
+ case SUID_DUMPABLE_ENABLED:
set_bit(MMF_DUMPABLE, &mm->flags);
smp_wmb();
clear_bit(MMF_DUMP_SECURELY, &mm->flags);
break;
- case 2:
+ case SUID_DUMPABLE_SAFE:
set_bit(MMF_DUMP_SECURELY, &mm->flags);
smp_wmb();
set_bit(MMF_DUMPABLE, &mm->flags);
@@ -2025,7 +2025,7 @@ static int __get_dumpable(unsigned long mm_flags)
int ret;
ret = mm_flags & MMF_DUMPABLE_MASK;
- return (ret >= 2) ? 2 : ret;
+ return (ret > SUID_DUMPABLE_ENABLED) ? SUID_DUMPABLE_SAFE : ret;
}
int get_dumpable(struct mm_struct *mm)
@@ -2069,25 +2069,18 @@ static void wait_for_dump_helpers(struct file *file)
*/
static int umh_pipe_setup(struct subprocess_info *info, struct cred *new)
{
- struct file *rp, *wp;
+ struct file *files[2];
struct fdtable *fdt;
struct coredump_params *cp = (struct coredump_params *)info->data;
struct files_struct *cf = current->files;
+ int err = create_pipe_files(files, 0);
+ if (err)
+ return err;
- wp = create_write_pipe(0);
- if (IS_ERR(wp))
- return PTR_ERR(wp);
-
- rp = create_read_pipe(wp, 0);
- if (IS_ERR(rp)) {
- free_write_pipe(wp);
- return PTR_ERR(rp);
- }
-
- cp->file = wp;
+ cp->file = files[1];
sys_close(0);
- fd_install(0, rp);
+ fd_install(0, files[0]);
spin_lock(&cf->file_lock);
fdt = files_fdtable(cf);
__set_open_fd(0, fdt);
@@ -2111,6 +2104,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
int retval = 0;
int flag = 0;
int ispipe;
+ bool need_nonrelative = false;
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
.signr = signr,
@@ -2136,14 +2130,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
if (!cred)
goto fail;
/*
- * We cannot trust fsuid as being the "true" uid of the
- * process nor do we know its entire history. We only know it
- * was tainted so we dump it as root in mode 2.
+ * We cannot trust fsuid as being the "true" uid of the process
+ * nor do we know its entire history. We only know it was tainted
+ * so we dump it as root in mode 2, and only into a controlled
+ * environment (pipe handler or fully qualified path).
*/
- if (__get_dumpable(cprm.mm_flags) == 2) {
+ if (__get_dumpable(cprm.mm_flags) == SUID_DUMPABLE_SAFE) {
/* Setuid core dump mode */
flag = O_EXCL; /* Stop rewrite attacks */
cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
+ need_nonrelative = true;
}
retval = coredump_wait(exit_code, &core_state);
@@ -2171,15 +2167,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
}
if (cprm.limit == 1) {
- /*
+ /* See umh_pipe_setup() which sets RLIMIT_CORE = 1.
+ *
* Normally core limits are irrelevant to pipes, since
* we're not writing to the file system, but we use
- * cprm.limit of 1 here as a speacial value. Any
- * non-1 limit gets set to RLIM_INFINITY below, but
- * a limit of 0 skips the dump. This is a consistent
- * way to catch recursive crashes. We can still crash
- * if the core_pattern binary sets RLIM_CORE = !1
- * but it runs as root, and can do lots of stupid things
+ * cprm.limit of 1 here as a speacial value, this is a
+ * consistent way to catch recursive crashes.
+ * We can still crash if the core_pattern binary sets
+ * RLIM_CORE = !1, but it runs as root, and can do
+ * lots of stupid things.
+ *
* Note that we use task_tgid_vnr here to grab the pid
* of the process group leader. That way we get the
* right pid if a thread in a multi-threaded
@@ -2223,6 +2220,14 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
+ if (need_nonrelative && cn.corename[0] != '/') {
+ printk(KERN_WARNING "Pid %d(%s) can only dump core "\
+ "to fully qualified path!\n",
+ task_tgid_vnr(current), current->comm);
+ printk(KERN_WARNING "Skipping core dump\n");
+ goto fail_unlock;
+ }
+
cprm.file = filp_open(cn.corename,
O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
0600);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5badb0c039de..1562c27a2fab 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -37,15 +37,12 @@
#define EXOFS_DBGMSG2(M...) do {} while (0)
-enum {MAX_PAGES_KMALLOC = PAGE_SIZE / sizeof(struct page *), };
-
unsigned exofs_max_io_pages(struct ore_layout *layout,
unsigned expected_pages)
{
- unsigned pages = min_t(unsigned, expected_pages, MAX_PAGES_KMALLOC);
+ unsigned pages = min_t(unsigned, expected_pages,
+ layout->max_io_length / PAGE_SIZE);
- /* TODO: easily support bio chaining */
- pages = min_t(unsigned, pages, layout->max_io_length / PAGE_SIZE);
return pages;
}
@@ -101,7 +98,8 @@ static void _pcol_reset(struct page_collect *pcol)
* it might not end here. don't be left with nothing
*/
if (!pcol->expected_pages)
- pcol->expected_pages = MAX_PAGES_KMALLOC;
+ pcol->expected_pages =
+ exofs_max_io_pages(&pcol->sbi->layout, ~0);
}
static int pcol_try_alloc(struct page_collect *pcol)
@@ -389,6 +387,8 @@ static int readpage_strip(void *data, struct page *page)
size_t len;
int ret;
+ BUG_ON(!PageLocked(page));
+
/* FIXME: Just for debugging, will be removed */
if (PageUptodate(page))
EXOFS_ERR("PageUptodate(0x%lx, 0x%lx)\n", pcol->inode->i_ino,
@@ -572,8 +572,16 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
if (!pcol->that_locked_page ||
(pcol->that_locked_page->index != index)) {
- struct page *page = find_get_page(pcol->inode->i_mapping, index);
+ struct page *page;
+ loff_t i_size = i_size_read(pcol->inode);
+
+ if (offset >= i_size) {
+ *uptodate = true;
+ EXOFS_DBGMSG("offset >= i_size index=0x%lx\n", index);
+ return ZERO_PAGE(0);
+ }
+ page = find_get_page(pcol->inode->i_mapping, index);
if (!page) {
page = find_or_create_page(pcol->inode->i_mapping,
index, GFP_NOFS);
@@ -602,12 +610,13 @@ static void __r4w_put_page(void *priv, struct page *page)
{
struct page_collect *pcol = priv;
- if (pcol->that_locked_page != page) {
+ if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) {
EXOFS_DBGMSG("index=0x%lx\n", page->index);
page_cache_release(page);
return;
}
- EXOFS_DBGMSG("that_locked_page index=0x%lx\n", page->index);
+ EXOFS_DBGMSG("that_locked_page index=0x%lx\n",
+ ZERO_PAGE(0) == page ? -1 : page->index);
}
static const struct _ore_r4w_op _r4w_op = {
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 24a49d47e935..1585db1aa365 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -837,11 +837,11 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
bio->bi_rw |= REQ_WRITE;
}
- osd_req_write(or, _ios_obj(ios, dev), per_dev->offset,
- bio, per_dev->length);
+ osd_req_write(or, _ios_obj(ios, cur_comp),
+ per_dev->offset, bio, per_dev->length);
ORE_DBGMSG("write(0x%llx) offset=0x%llx "
"length=0x%llx dev=%d\n",
- _LLU(_ios_obj(ios, dev)->id),
+ _LLU(_ios_obj(ios, cur_comp)->id),
_LLU(per_dev->offset),
_LLU(per_dev->length), dev);
} else if (ios->kern_buff) {
@@ -853,20 +853,20 @@ static int _write_mirror(struct ore_io_state *ios, int cur_comp)
(ios->si.unit_off + ios->length >
ios->layout->stripe_unit));
- ret = osd_req_write_kern(or, _ios_obj(ios, per_dev->dev),
+ ret = osd_req_write_kern(or, _ios_obj(ios, cur_comp),
per_dev->offset,
ios->kern_buff, ios->length);
if (unlikely(ret))
goto out;
ORE_DBGMSG2("write_kern(0x%llx) offset=0x%llx "
"length=0x%llx dev=%d\n",
- _LLU(_ios_obj(ios, dev)->id),
+ _LLU(_ios_obj(ios, cur_comp)->id),
_LLU(per_dev->offset),
_LLU(ios->length), per_dev->dev);
} else {
- osd_req_set_attributes(or, _ios_obj(ios, dev));
+ osd_req_set_attributes(or, _ios_obj(ios, cur_comp));
ORE_DBGMSG2("obj(0x%llx) set_attributes=%d dev=%d\n",
- _LLU(_ios_obj(ios, dev)->id),
+ _LLU(_ios_obj(ios, cur_comp)->id),
ios->out_attr_len, dev);
}
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 433783624d10..dde41a75c7c8 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -400,8 +400,6 @@ static int exofs_sync_fs(struct super_block *sb, int wait)
ret = ore_write(ios);
if (unlikely(ret))
EXOFS_ERR("%s: ore_write failed.\n", __func__);
- else
- sb->s_dirt = 0;
unlock_super(sb);
@@ -412,14 +410,6 @@ out:
return ret;
}
-static void exofs_write_super(struct super_block *sb)
-{
- if (!(sb->s_flags & MS_RDONLY))
- exofs_sync_fs(sb, 1);
- else
- sb->s_dirt = 0;
-}
-
static void _exofs_print_device(const char *msg, const char *dev_path,
struct osd_dev *od, u64 pid)
{
@@ -952,7 +942,6 @@ static const struct super_operations exofs_sops = {
.write_inode = exofs_write_inode,
.evict_inode = exofs_evict_inode,
.put_super = exofs_put_super,
- .write_super = exofs_write_super,
.sync_fs = exofs_sync_fs,
.statfs = exofs_statfs,
};
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index 1c3613998862..376aa77f3ca7 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -1444,19 +1444,9 @@ ext2_fsblk_t ext2_new_block(struct inode *inode, unsigned long goal, int *errp)
#ifdef EXT2FS_DEBUG
-static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-
-unsigned long ext2_count_free (struct buffer_head * map, unsigned int numchars)
+unsigned long ext2_count_free(struct buffer_head *map, unsigned int numchars)
{
- unsigned int i;
- unsigned long sum = 0;
-
- if (!map)
- return (0);
- for (i = 0; i < numchars; i++)
- sum += nibblemap[map->b_data[i] & 0xf] +
- nibblemap[(map->b_data[i] >> 4) & 0xf];
- return (sum);
+ return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars);
}
#endif /* EXT2FS_DEBUG */
diff --git a/fs/ext2/ialloc.c b/fs/ext2/ialloc.c
index c13eb7b91a11..8f370e012e61 100644
--- a/fs/ext2/ialloc.c
+++ b/fs/ext2/ialloc.c
@@ -644,6 +644,7 @@ unsigned long ext2_count_free_inodes (struct super_block * sb)
}
brelse(bitmap_bh);
printk("ext2_count_free_inodes: stored = %lu, computed = %lu, %lu\n",
+ (unsigned long)
percpu_counter_read(&EXT2_SB(sb)->s_freeinodes_counter),
desc_count, bitmap_count);
return desc_count;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 264d315f6c47..6363ac66fafa 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -79,6 +79,7 @@ void ext2_evict_inode(struct inode * inode)
truncate_inode_pages(&inode->i_data, 0);
if (want_delete) {
+ sb_start_intwrite(inode->i_sb);
/* set dtime */
EXT2_I(inode)->i_dtime = get_seconds();
mark_inode_dirty(inode);
@@ -98,8 +99,10 @@ void ext2_evict_inode(struct inode * inode)
if (unlikely(rsv))
kfree(rsv);
- if (want_delete)
+ if (want_delete) {
ext2_free_inode(inode);
+ sb_end_intwrite(inode->i_sb);
+ }
}
typedef struct {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 9f311d27b16f..af74d9e27b71 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -42,6 +42,8 @@ static void ext2_sync_super(struct super_block *sb,
static int ext2_remount (struct super_block * sb, int * flags, char * data);
static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf);
static int ext2_sync_fs(struct super_block *sb, int wait);
+static int ext2_freeze(struct super_block *sb);
+static int ext2_unfreeze(struct super_block *sb);
void ext2_error(struct super_block *sb, const char *function,
const char *fmt, ...)
@@ -305,6 +307,8 @@ static const struct super_operations ext2_sops = {
.evict_inode = ext2_evict_inode,
.put_super = ext2_put_super,
.sync_fs = ext2_sync_fs,
+ .freeze_fs = ext2_freeze,
+ .unfreeze_fs = ext2_unfreeze,
.statfs = ext2_statfs,
.remount_fs = ext2_remount,
.show_options = ext2_show_options,
@@ -1200,6 +1204,35 @@ static int ext2_sync_fs(struct super_block *sb, int wait)
return 0;
}
+static int ext2_freeze(struct super_block *sb)
+{
+ struct ext2_sb_info *sbi = EXT2_SB(sb);
+
+ /*
+ * Open but unlinked files present? Keep EXT2_VALID_FS flag cleared
+ * because we have unattached inodes and thus filesystem is not fully
+ * consistent.
+ */
+ if (atomic_long_read(&sb->s_remove_count)) {
+ ext2_sync_fs(sb, 1);
+ return 0;
+ }
+ /* Set EXT2_FS_VALID flag */
+ spin_lock(&sbi->s_lock);
+ sbi->s_es->s_state = cpu_to_le16(sbi->s_mount_state);
+ spin_unlock(&sbi->s_lock);
+ ext2_sync_super(sb, sbi->s_es, 1);
+
+ return 0;
+}
+
+static int ext2_unfreeze(struct super_block *sb)
+{
+ /* Just write sb to clear EXT2_VALID_FS flag */
+ ext2_write_super(sb);
+
+ return 0;
+}
void ext2_write_super(struct super_block *sb)
{
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 25cd60892116..90d901f0486b 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -1813,7 +1813,7 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
brelse(bitmap_bh);
printk("ext3_count_free_blocks: stored = "E3FSBLK
", computed = "E3FSBLK", "E3FSBLK"\n",
- le32_to_cpu(es->s_free_blocks_count),
+ (ext3_fsblk_t)le32_to_cpu(es->s_free_blocks_count),
desc_count, bitmap_count);
return bitmap_count;
#else
diff --git a/fs/ext3/bitmap.c b/fs/ext3/bitmap.c
index 909d13e26560..ef9c643e8e9d 100644
--- a/fs/ext3/bitmap.c
+++ b/fs/ext3/bitmap.c
@@ -11,19 +11,9 @@
#ifdef EXT3FS_DEBUG
-static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-
unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
{
- unsigned int i;
- unsigned long sum = 0;
-
- if (!map)
- return (0);
- for (i = 0; i < numchars; i++)
- sum += nibblemap[map->b_data[i] & 0xf] +
- nibblemap[(map->b_data[i] >> 4) & 0xf];
- return (sum);
+ return numchars * BITS_PER_BYTE - memweight(map->b_data, numchars);
}
#endif /* EXT3FS_DEBUG */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 9a4a5c48b1c9..a07597307fd1 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3459,14 +3459,6 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
* inode out, but prune_icache isn't a user-visible syncing function.
* Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
* we start and wait on commits.
- *
- * Is this efficient/effective? Well, we're being nice to the system
- * by cleaning up our inodes proactively so they can be reaped
- * without I/O. But we are potentially leaving up to five seconds'
- * worth of inodes floating about which prune_icache wants us to
- * write out. One way to fix that would be to get prune_icache()
- * to do a write_super() to free up some memory. It has the desired
- * effect.
*/
int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
{
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index ff9bcdc5b0d5..8c892e93d8e7 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -64,11 +64,6 @@ static int ext3_freeze(struct super_block *sb);
/*
* Wrappers for journal_start/end.
- *
- * The only special thing we need to do here is to make sure that all
- * journal_end calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
*/
handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
{
@@ -90,12 +85,6 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
return journal_start(journal, nblocks);
}
-/*
- * The only special thing we need to do here is to make sure that all
- * journal_stop calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
- */
int __ext3_journal_stop(const char *where, handle_t *handle)
{
struct super_block *sb;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d23b31ca9d7a..1b5089067d01 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -280,14 +280,18 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block *sb,
return desc;
}
-static int ext4_valid_block_bitmap(struct super_block *sb,
- struct ext4_group_desc *desc,
- unsigned int block_group,
- struct buffer_head *bh)
+/*
+ * Return the block number which was discovered to be invalid, or 0 if
+ * the block bitmap is valid.
+ */
+static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
+ struct ext4_group_desc *desc,
+ unsigned int block_group,
+ struct buffer_head *bh)
{
ext4_grpblk_t offset;
ext4_grpblk_t next_zero_bit;
- ext4_fsblk_t bitmap_blk;
+ ext4_fsblk_t blk;
ext4_fsblk_t group_first_block;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
@@ -297,37 +301,33 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
* or it has to also read the block group where the bitmaps
* are located to verify they are set.
*/
- return 1;
+ return 0;
}
group_first_block = ext4_group_first_block_no(sb, block_group);
/* check whether block bitmap block number is set */
- bitmap_blk = ext4_block_bitmap(sb, desc);
- offset = bitmap_blk - group_first_block;
+ blk = ext4_block_bitmap(sb, desc);
+ offset = blk - group_first_block;
if (!ext4_test_bit(offset, bh->b_data))
/* bad block bitmap */
- goto err_out;
+ return blk;
/* check whether the inode bitmap block number is set */
- bitmap_blk = ext4_inode_bitmap(sb, desc);
- offset = bitmap_blk - group_first_block;
+ blk = ext4_inode_bitmap(sb, desc);
+ offset = blk - group_first_block;
if (!ext4_test_bit(offset, bh->b_data))
/* bad block bitmap */
- goto err_out;
+ return blk;
/* check whether the inode table block number is set */
- bitmap_blk = ext4_inode_table(sb, desc);
- offset = bitmap_blk - group_first_block;
+ blk = ext4_inode_table(sb, desc);
+ offset = blk - group_first_block;
next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
offset + EXT4_SB(sb)->s_itb_per_group,
offset);
- if (next_zero_bit >= offset + EXT4_SB(sb)->s_itb_per_group)
- /* good bitmap for inode tables */
- return 1;
-
-err_out:
- ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
- block_group, bitmap_blk);
+ if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group)
+ /* bad bitmap for inode tables */
+ return blk;
return 0;
}
@@ -336,14 +336,26 @@ void ext4_validate_block_bitmap(struct super_block *sb,
unsigned int block_group,
struct buffer_head *bh)
{
+ ext4_fsblk_t blk;
+
if (buffer_verified(bh))
return;
ext4_lock_group(sb, block_group);
- if (ext4_valid_block_bitmap(sb, desc, block_group, bh) &&
- ext4_block_bitmap_csum_verify(sb, block_group, desc, bh,
- EXT4_BLOCKS_PER_GROUP(sb) / 8))
- set_buffer_verified(bh);
+ blk = ext4_valid_block_bitmap(sb, desc, block_group, bh);
+ if (unlikely(blk != 0)) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: block %llu: invalid block bitmap",
+ block_group, blk);
+ return;
+ }
+ if (unlikely(!ext4_block_bitmap_csum_verify(sb, block_group,
+ desc, bh, EXT4_BLOCKS_PER_GROUP(sb) / 8))) {
+ ext4_unlock_group(sb, block_group);
+ ext4_error(sb, "bg %u: bad block bitmap checksum", block_group);
+ return;
+ }
+ set_buffer_verified(bh);
ext4_unlock_group(sb, block_group);
}
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index a94b9c63ee5c..5c2d1813ebe9 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -11,16 +11,9 @@
#include <linux/jbd2.h>
#include "ext4.h"
-static const int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
-
unsigned int ext4_count_free(char *bitmap, unsigned int numchars)
{
- unsigned int i, sum = 0;
-
- for (i = 0; i < numchars; i++)
- sum += nibblemap[bitmap[i] & 0xf] +
- nibblemap[(bitmap[i] >> 4) & 0xf];
- return sum;
+ return numchars * BITS_PER_BYTE - memweight(bitmap, numchars);
}
int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
@@ -86,7 +79,6 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
if (provided == calculated)
return 1;
- ext4_error(sb, "Bad block bitmap checksum: block_group = %u", group);
return 0;
}
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index cd0c7ed06772..aabbb3f53683 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -2662,6 +2662,7 @@ cont:
}
path[0].p_depth = depth;
path[0].p_hdr = ext_inode_hdr(inode);
+ i = 0;
if (ext4_ext_check(inode, path[0].p_hdr, depth)) {
err = -EIO;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 89b59cb7f9b8..dff171c3a123 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -233,6 +233,11 @@ void ext4_evict_inode(struct inode *inode)
if (is_bad_inode(inode))
goto no_delete;
+ /*
+ * Protect us against freezing - iput() caller didn't have to have any
+ * protection against it
+ */
+ sb_start_intwrite(inode->i_sb);
handle = ext4_journal_start(inode, ext4_blocks_for_truncate(inode)+3);
if (IS_ERR(handle)) {
ext4_std_error(inode->i_sb, PTR_ERR(handle));
@@ -242,6 +247,7 @@ void ext4_evict_inode(struct inode *inode)
* cleaned up.
*/
ext4_orphan_del(NULL, inode);
+ sb_end_intwrite(inode->i_sb);
goto no_delete;
}
@@ -273,6 +279,7 @@ void ext4_evict_inode(struct inode *inode)
stop_handle:
ext4_journal_stop(handle);
ext4_orphan_del(NULL, inode);
+ sb_end_intwrite(inode->i_sb);
goto no_delete;
}
}
@@ -301,6 +308,7 @@ void ext4_evict_inode(struct inode *inode)
else
ext4_free_inode(handle, inode);
ext4_journal_stop(handle);
+ sb_end_intwrite(inode->i_sb);
return;
no_delete:
ext4_clear_inode(inode); /* We must guarantee clearing of inode... */
@@ -1962,7 +1970,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
* This function can get called via...
* - ext4_da_writepages after taking page lock (have journal handle)
* - journal_submit_inode_data_buffers (no journal handle)
- * - shrink_page_list via pdflush (no journal handle)
+ * - shrink_page_list via the kswapd/direct reclaim (no journal handle)
* - grab_page_cache when doing write_begin (have journal handle)
*
* We don't do any block allocation in this function. If we have page with
@@ -4581,14 +4589,6 @@ static int ext4_expand_extra_isize(struct inode *inode,
* inode out, but prune_icache isn't a user-visible syncing function.
* Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
* we start and wait on commits.
- *
- * Is this efficient/effective? Well, we're being nice to the system
- * by cleaning up our inodes proactively so they can be reaped
- * without I/O. But we are potentially leaving up to five seconds'
- * worth of inodes floating about which prune_icache wants us to
- * write out. One way to fix that would be to get prune_icache()
- * to do a write_super() to free up some memory. It has the desired
- * effect.
*/
int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
{
@@ -4779,11 +4779,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
get_block_t *get_block;
int retries = 0;
- /*
- * This check is racy but catches the common case. We rely on
- * __block_page_mkwrite() to do a reliable check.
- */
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+ sb_start_pagefault(inode->i_sb);
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
!ext4_should_journal_data(inode) &&
@@ -4851,5 +4847,6 @@ retry_alloc:
out_ret:
ret = block_page_mkwrite_return(ret);
out:
+ sb_end_pagefault(inode->i_sb);
return ret;
}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index f99a1311e847..fe7c63f4717e 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -44,6 +44,11 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
{
struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
+ /*
+ * We protect against freezing so that we don't create dirty buffers
+ * on frozen filesystem.
+ */
+ sb_start_write(sb);
ext4_mmp_csum_set(sb, mmp);
mark_buffer_dirty(bh);
lock_buffer(bh);
@@ -51,6 +56,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
get_bh(bh);
submit_bh(WRITE_SYNC, bh);
wait_on_buffer(bh);
+ sb_end_write(sb);
if (unlikely(!buffer_uptodate(bh)))
return 1;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 2d51cd9af225..c6e0cb3d1f4a 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -326,38 +326,17 @@ static void ext4_put_nojournal(handle_t *handle)
/*
* Wrappers for jbd2_journal_start/end.
- *
- * The only special thing we need to do here is to make sure that all
- * journal_end calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
- *
- * To avoid j_barrier hold in userspace when a user calls freeze(),
- * ext4 prevents a new handle from being started by s_frozen, which
- * is in an upper layer.
*/
handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
{
journal_t *journal;
- handle_t *handle;
trace_ext4_journal_start(sb, nblocks, _RET_IP_);
if (sb->s_flags & MS_RDONLY)
return ERR_PTR(-EROFS);
+ WARN_ON(sb->s_writers.frozen == SB_FREEZE_COMPLETE);
journal = EXT4_SB(sb)->s_journal;
- handle = ext4_journal_current_handle();
-
- /*
- * If a handle has been started, it should be allowed to
- * finish, otherwise deadlock could happen between freeze
- * and others(e.g. truncate) due to the restart of the
- * journal handle if the filesystem is forzen and active
- * handles are not stopped.
- */
- if (!handle)
- vfs_check_frozen(sb, SB_FREEZE_TRANS);
-
if (!journal)
return ext4_get_nojournal();
/*
@@ -372,12 +351,6 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
return jbd2_journal_start(journal, nblocks);
}
-/*
- * The only special thing we need to do here is to make sure that all
- * jbd2_journal_stop calls result in the superblock being marked dirty, so
- * that sync() will call the filesystem's write_super callback if
- * appropriate.
- */
int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle)
{
struct super_block *sb;
@@ -975,6 +948,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
ei->i_da_metadata_calc_len = 0;
+ ei->i_da_metadata_calc_last_lblock = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
@@ -2747,6 +2721,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
sb = elr->lr_super;
ngroups = EXT4_SB(sb)->s_groups_count;
+ sb_start_write(sb);
for (group = elr->lr_next_group; group < ngroups; group++) {
gdp = ext4_get_group_desc(sb, group, NULL);
if (!gdp) {
@@ -2773,6 +2748,7 @@ static int ext4_run_li_request(struct ext4_li_request *elr)
elr->lr_next_sched = jiffies + elr->lr_timeout;
elr->lr_next_group = group + 1;
}
+ sb_end_write(sb);
return ret;
}
@@ -3133,6 +3109,10 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
ext4_group_t i, ngroups = ext4_get_groups_count(sb);
int s, j, count = 0;
+ if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_BIGALLOC))
+ return (ext4_bg_has_super(sb, grp) + ext4_bg_num_gdb(sb, grp) +
+ sbi->s_itb_per_group + 2);
+
first_block = le32_to_cpu(sbi->s_es->s_first_data_block) +
(grp * EXT4_BLOCKS_PER_GROUP(sb));
last_block = first_block + EXT4_BLOCKS_PER_GROUP(sb) - 1;
@@ -4444,6 +4424,7 @@ static void ext4_clear_journal_err(struct super_block *sb,
ext4_commit_super(sb, 1);
jbd2_journal_clear_err(journal);
+ jbd2_journal_update_sb_errno(journal);
}
}
@@ -4460,10 +4441,8 @@ int ext4_force_commit(struct super_block *sb)
return 0;
journal = EXT4_SB(sb)->s_journal;
- if (journal) {
- vfs_check_frozen(sb, SB_FREEZE_TRANS);
+ if (journal)
ret = ext4_journal_force_commit(journal);
- }
return ret;
}
@@ -4493,9 +4472,8 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
* gives us a chance to flush the journal completely and mark the fs clean.
*
* Note that only this function cannot bring a filesystem to be in a clean
- * state independently, because ext4 prevents a new handle from being started
- * by @sb->s_frozen, which stays in an upper layer. It thus needs help from
- * the upper layer.
+ * state independently. It relies on upper layer to stop all data & metadata
+ * modifications.
*/
static int ext4_freeze(struct super_block *sb)
{
@@ -4522,7 +4500,7 @@ static int ext4_freeze(struct super_block *sb)
EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
error = ext4_commit_super(sb, 1);
out:
- /* we rely on s_frozen to stop further updates */
+ /* we rely on upper layer to stop further updates */
jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
return error;
}
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 6eaa28c98ad1..dc49ed2cbffa 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -35,6 +35,11 @@
#define FAT_MAX_UNI_CHARS ((MSDOS_SLOTS - 1) * 13 + 1)
#define FAT_MAX_UNI_SIZE (FAT_MAX_UNI_CHARS * sizeof(wchar_t))
+static inline unsigned char fat_tolower(unsigned char c)
+{
+ return ((c >= 'A') && (c <= 'Z')) ? c+32 : c;
+}
+
static inline loff_t fat_make_i_pos(struct super_block *sb,
struct buffer_head *bh,
struct msdos_dir_entry *de)
@@ -333,6 +338,124 @@ parse_long:
return 0;
}
+/**
+ * fat_parse_short - Parse MS-DOS (short) directory entry.
+ * @sb: superblock
+ * @de: directory entry to parse
+ * @name: FAT_MAX_SHORT_SIZE array in which to place extracted name
+ * @dot_hidden: Nonzero == prepend '.' to names with ATTR_HIDDEN
+ *
+ * Returns the number of characters extracted into 'name'.
+ */
+static int fat_parse_short(struct super_block *sb,
+ const struct msdos_dir_entry *de,
+ unsigned char *name, int dot_hidden)
+{
+ const struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ int isvfat = sbi->options.isvfat;
+ int nocase = sbi->options.nocase;
+ unsigned short opt_shortname = sbi->options.shortname;
+ struct nls_table *nls_disk = sbi->nls_disk;
+ wchar_t uni_name[14];
+ unsigned char c, work[MSDOS_NAME];
+ unsigned char *ptname = name;
+ int chi, chl, i, j, k;
+ int dotoffset = 0;
+ int name_len = 0, uni_len = 0;
+
+ if (!isvfat && dot_hidden && (de->attr & ATTR_HIDDEN)) {
+ *ptname++ = '.';
+ dotoffset = 1;
+ }
+
+ memcpy(work, de->name, sizeof(work));
+ /* see namei.c, msdos_format_name */
+ if (work[0] == 0x05)
+ work[0] = 0xE5;
+
+ /* Filename */
+ for (i = 0, j = 0; i < 8;) {
+ c = work[i];
+ if (!c)
+ break;
+ chl = fat_shortname2uni(nls_disk, &work[i], 8 - i,
+ &uni_name[j++], opt_shortname,
+ de->lcase & CASE_LOWER_BASE);
+ if (chl <= 1) {
+ if (!isvfat)
+ ptname[i] = nocase ? c : fat_tolower(c);
+ i++;
+ if (c != ' ') {
+ name_len = i;
+ uni_len = j;
+ }
+ } else {
+ uni_len = j;
+ if (isvfat)
+ i += min(chl, 8-i);
+ else {
+ for (chi = 0; chi < chl && i < 8; chi++, i++)
+ ptname[i] = work[i];
+ }
+ if (chl)
+ name_len = i;
+ }
+ }
+
+ i = name_len;
+ j = uni_len;
+ fat_short2uni(nls_disk, ".", 1, &uni_name[j++]);
+ if (!isvfat)
+ ptname[i] = '.';
+ i++;
+
+ /* Extension */
+ for (k = 8; k < MSDOS_NAME;) {
+ c = work[k];
+ if (!c)
+ break;
+ chl = fat_shortname2uni(nls_disk, &work[k], MSDOS_NAME - k,
+ &uni_name[j++], opt_shortname,
+ de->lcase & CASE_LOWER_EXT);
+ if (chl <= 1) {
+ k++;
+ if (!isvfat)
+ ptname[i] = nocase ? c : fat_tolower(c);
+ i++;
+ if (c != ' ') {
+ name_len = i;
+ uni_len = j;
+ }
+ } else {
+ uni_len = j;
+ if (isvfat) {
+ int offset = min(chl, MSDOS_NAME-k);
+ k += offset;
+ i += offset;
+ } else {
+ for (chi = 0; chi < chl && k < MSDOS_NAME;
+ chi++, i++, k++) {
+ ptname[i] = work[k];
+ }
+ }
+ if (chl)
+ name_len = i;
+ }
+ }
+
+ if (name_len > 0) {
+ name_len += dotoffset;
+
+ if (sbi->options.isvfat) {
+ uni_name[uni_len] = 0x0000;
+ name_len = fat_uni_to_x8(sb, uni_name, name,
+ FAT_MAX_SHORT_SIZE);
+ }
+ }
+
+ return name_len;
+}
+
/*
* Return values: negative -> error, 0 -> not found, positive -> found,
* value is the total amount of slots, including the shortname entry.
@@ -344,15 +467,11 @@ int fat_search_long(struct inode *inode, const unsigned char *name,
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh = NULL;
struct msdos_dir_entry *de;
- struct nls_table *nls_disk = sbi->nls_disk;
unsigned char nr_slots;
- wchar_t bufuname[14];
wchar_t *unicode = NULL;
- unsigned char work[MSDOS_NAME];
unsigned char bufname[FAT_MAX_SHORT_SIZE];
- unsigned short opt_shortname = sbi->options.shortname;
loff_t cpos = 0;
- int chl, i, j, last_u, err, len;
+ int err, len;
err = -ENOENT;
while (1) {
@@ -380,47 +499,16 @@ parse_record:
goto end_of_dir;
}
- memcpy(work, de->name, sizeof(de->name));
- /* see namei.c, msdos_format_name */
- if (work[0] == 0x05)
- work[0] = 0xE5;
- for (i = 0, j = 0, last_u = 0; i < 8;) {
- if (!work[i])
- break;
- chl = fat_shortname2uni(nls_disk, &work[i], 8 - i,
- &bufuname[j++], opt_shortname,
- de->lcase & CASE_LOWER_BASE);
- if (chl <= 1) {
- if (work[i] != ' ')
- last_u = j;
- } else {
- last_u = j;
- }
- i += chl;
- }
- j = last_u;
- fat_short2uni(nls_disk, ".", 1, &bufuname[j++]);
- for (i = 8; i < MSDOS_NAME;) {
- if (!work[i])
- break;
- chl = fat_shortname2uni(nls_disk, &work[i],
- MSDOS_NAME - i,
- &bufuname[j++], opt_shortname,
- de->lcase & CASE_LOWER_EXT);
- if (chl <= 1) {
- if (work[i] != ' ')
- last_u = j;
- } else {
- last_u = j;
- }
- i += chl;
- }
- if (!last_u)
+ /* Never prepend '.' to hidden files here.
+ * That is done only for msdos mounts (and only when
+ * 'dotsOK=yes'); if we are executing here, it is in the
+ * context of a vfat mount.
+ */
+ len = fat_parse_short(sb, de, bufname, 0);
+ if (len == 0)
continue;
/* Compare shortname */
- bufuname[last_u] = 0x0000;
- len = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
if (fat_name_match(sbi, name, name_len, bufname, len))
goto found;
@@ -469,20 +557,15 @@ static int __fat_readdir(struct inode *inode, struct file *filp, void *dirent,
struct msdos_sb_info *sbi = MSDOS_SB(sb);
struct buffer_head *bh;
struct msdos_dir_entry *de;
- struct nls_table *nls_disk = sbi->nls_disk;
unsigned char nr_slots;
- wchar_t bufuname[14];
wchar_t *unicode = NULL;
- unsigned char c, work[MSDOS_NAME];
- unsigned char bufname[FAT_MAX_SHORT_SIZE], *ptname = bufname;
- unsigned short opt_shortname = sbi->options.shortname;
+ unsigned char bufname[FAT_MAX_SHORT_SIZE];
int isvfat = sbi->options.isvfat;
- int nocase = sbi->options.nocase;
const char *fill_name = NULL;
unsigned long inum;
unsigned long lpos, dummy, *furrfu = &lpos;
loff_t cpos;
- int chi, chl, i, i2, j, last, last_u, dotoffset = 0, fill_len = 0;
+ int short_len = 0, fill_len = 0;
int ret = 0;
lock_super(sb);
@@ -556,74 +639,10 @@ parse_record:
}
}
- if (sbi->options.dotsOK) {
- ptname = bufname;
- dotoffset = 0;
- if (de->attr & ATTR_HIDDEN) {
- *ptname++ = '.';
- dotoffset = 1;
- }
- }
-
- memcpy(work, de->name, sizeof(de->name));
- /* see namei.c, msdos_format_name */
- if (work[0] == 0x05)
- work[0] = 0xE5;
- for (i = 0, j = 0, last = 0, last_u = 0; i < 8;) {
- if (!(c = work[i]))
- break;
- chl = fat_shortname2uni(nls_disk, &work[i], 8 - i,
- &bufuname[j++], opt_shortname,
- de->lcase & CASE_LOWER_BASE);
- if (chl <= 1) {
- ptname[i++] = (!nocase && c>='A' && c<='Z') ? c+32 : c;
- if (c != ' ') {
- last = i;
- last_u = j;
- }
- } else {
- last_u = j;
- for (chi = 0; chi < chl && i < 8; chi++) {
- ptname[i] = work[i];
- i++; last = i;
- }
- }
- }
- i = last;
- j = last_u;
- fat_short2uni(nls_disk, ".", 1, &bufuname[j++]);
- ptname[i++] = '.';
- for (i2 = 8; i2 < MSDOS_NAME;) {
- if (!(c = work[i2]))
- break;
- chl = fat_shortname2uni(nls_disk, &work[i2], MSDOS_NAME - i2,
- &bufuname[j++], opt_shortname,
- de->lcase & CASE_LOWER_EXT);
- if (chl <= 1) {
- i2++;
- ptname[i++] = (!nocase && c>='A' && c<='Z') ? c+32 : c;
- if (c != ' ') {
- last = i;
- last_u = j;
- }
- } else {
- last_u = j;
- for (chi = 0; chi < chl && i2 < MSDOS_NAME; chi++) {
- ptname[i++] = work[i2++];
- last = i;
- }
- }
- }
- if (!last)
+ short_len = fat_parse_short(sb, de, bufname, sbi->options.dotsOK);
+ if (short_len == 0)
goto record_end;
- i = last + dotoffset;
- j = last_u;
-
- if (isvfat) {
- bufuname[j] = 0x0000;
- i = fat_uni_to_x8(sb, bufuname, bufname, sizeof(bufname));
- }
if (nr_slots) {
/* hack for fat_ioctl_filldir() */
struct fat_ioctl_filldir_callback *p = dirent;
@@ -631,12 +650,12 @@ parse_record:
p->longname = fill_name;
p->long_len = fill_len;
p->shortname = bufname;
- p->short_len = i;
+ p->short_len = short_len;
fill_name = NULL;
fill_len = 0;
} else {
fill_name = bufname;
- fill_len = i;
+ fill_len = short_len;
}
start_filldir:
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index fc35c5c69136..2deeeb86f331 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -217,6 +217,21 @@ static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len)
#endif
}
+static inline int fat_get_start(const struct msdos_sb_info *sbi,
+ const struct msdos_dir_entry *de)
+{
+ int cluster = le16_to_cpu(de->start);
+ if (sbi->fat_bits == 32)
+ cluster |= (le16_to_cpu(de->starthi) << 16);
+ return cluster;
+}
+
+static inline void fat_set_start(struct msdos_dir_entry *de, int cluster)
+{
+ de->start = cpu_to_le16(cluster);
+ de->starthi = cpu_to_le16(cluster >> 16);
+}
+
static inline void fatwchar_to16(__u8 *dst, const wchar_t *src, size_t len)
{
#ifdef __BIG_ENDIAN
diff --git a/fs/fat/file.c b/fs/fat/file.c
index a71fe3715ee8..e007b8bd8e5e 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -43,10 +43,10 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
if (err)
goto out;
- mutex_lock(&inode->i_mutex);
err = mnt_want_write_file(file);
if (err)
- goto out_unlock_inode;
+ goto out;
+ mutex_lock(&inode->i_mutex);
/*
* ATTR_VOLUME and ATTR_DIR cannot be changed; this also
@@ -73,14 +73,14 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
/* The root directory has no attributes */
if (inode->i_ino == MSDOS_ROOT_INO && attr != ATTR_DIR) {
err = -EINVAL;
- goto out_drop_write;
+ goto out_unlock_inode;
}
if (sbi->options.sys_immutable &&
((attr | oldattr) & ATTR_SYS) &&
!capable(CAP_LINUX_IMMUTABLE)) {
err = -EPERM;
- goto out_drop_write;
+ goto out_unlock_inode;
}
/*
@@ -90,12 +90,12 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
*/
err = security_inode_setattr(file->f_path.dentry, &ia);
if (err)
- goto out_drop_write;
+ goto out_unlock_inode;
/* This MUST be done before doing anything irreversible... */
err = fat_setattr(file->f_path.dentry, &ia);
if (err)
- goto out_drop_write;
+ goto out_unlock_inode;
fsnotify_change(file->f_path.dentry, ia.ia_valid);
if (sbi->options.sys_immutable) {
@@ -107,10 +107,9 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
fat_save_attrs(inode, attr);
mark_inode_dirty(inode);
-out_drop_write:
- mnt_drop_write_file(file);
out_unlock_inode:
mutex_unlock(&inode->i_mutex);
+ mnt_drop_write_file(file);
out:
return err;
}
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 0038b32cb362..05e897fe9866 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -369,10 +369,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_op = sbi->dir_ops;
inode->i_fop = &fat_dir_operations;
- MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
- if (sbi->fat_bits == 32)
- MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16);
-
+ MSDOS_I(inode)->i_start = fat_get_start(sbi, de);
MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start;
error = fat_calc_dir_size(inode);
if (error < 0)
@@ -385,9 +382,7 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
inode->i_mode = fat_make_mode(sbi, de->attr,
((sbi->options.showexec && !is_exec(de->name + 8))
? S_IRUGO|S_IWUGO : S_IRWXUGO));
- MSDOS_I(inode)->i_start = le16_to_cpu(de->start);
- if (sbi->fat_bits == 32)
- MSDOS_I(inode)->i_start |= (le16_to_cpu(de->starthi) << 16);
+ MSDOS_I(inode)->i_start = fat_get_start(sbi, de);
MSDOS_I(inode)->i_logstart = MSDOS_I(inode)->i_start;
inode->i_size = le32_to_cpu(de->size);
@@ -613,8 +608,7 @@ retry:
else
raw_entry->size = cpu_to_le32(inode->i_size);
raw_entry->attr = fat_make_attrs(inode);
- raw_entry->start = cpu_to_le16(MSDOS_I(inode)->i_logstart);
- raw_entry->starthi = cpu_to_le16(MSDOS_I(inode)->i_logstart >> 16);
+ fat_set_start(raw_entry, MSDOS_I(inode)->i_logstart);
fat_time_unix2fat(sbi, &inode->i_mtime, &raw_entry->time,
&raw_entry->date, NULL);
if (sbi->options.isvfat) {
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index 70d993a93805..b0e12bf9f4a1 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -246,8 +246,7 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
de.ctime_cs = 0;
de.time = time;
de.date = date;
- de.start = cpu_to_le16(cluster);
- de.starthi = cpu_to_le16(cluster >> 16);
+ fat_set_start(&de, cluster);
de.size = 0;
err = fat_add_entries(dir, &de, 1, sinfo);
@@ -530,9 +529,7 @@ static int do_msdos_rename(struct inode *old_dir, unsigned char *old_name,
mark_inode_dirty(old_inode);
if (update_dotdot) {
- int start = MSDOS_I(new_dir)->i_logstart;
- dotdot_de->start = cpu_to_le16(start);
- dotdot_de->starthi = cpu_to_le16(start >> 16);
+ fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart);
mark_buffer_dirty_inode(dotdot_bh, old_inode);
if (IS_DIRSYNC(new_dir)) {
err = sync_dirty_buffer(dotdot_bh);
@@ -572,9 +569,7 @@ error_dotdot:
corrupt = 1;
if (update_dotdot) {
- int start = MSDOS_I(old_dir)->i_logstart;
- dotdot_de->start = cpu_to_le16(start);
- dotdot_de->starthi = cpu_to_le16(start >> 16);
+ fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart);
mark_buffer_dirty_inode(dotdot_bh, old_inode);
corrupt |= sync_dirty_buffer(dotdot_bh);
}
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6cc480652433..6a6d8c0715a1 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -651,8 +651,7 @@ shortname:
de->time = de->ctime = time;
de->date = de->cdate = de->adate = date;
de->ctime_cs = time_cs;
- de->start = cpu_to_le16(cluster);
- de->starthi = cpu_to_le16(cluster >> 16);
+ fat_set_start(de, cluster);
de->size = 0;
out_free:
__putname(uname);
@@ -965,9 +964,7 @@ static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
mark_inode_dirty(old_inode);
if (update_dotdot) {
- int start = MSDOS_I(new_dir)->i_logstart;
- dotdot_de->start = cpu_to_le16(start);
- dotdot_de->starthi = cpu_to_le16(start >> 16);
+ fat_set_start(dotdot_de, MSDOS_I(new_dir)->i_logstart);
mark_buffer_dirty_inode(dotdot_bh, old_inode);
if (IS_DIRSYNC(new_dir)) {
err = sync_dirty_buffer(dotdot_bh);
@@ -1009,9 +1006,7 @@ error_dotdot:
corrupt = 1;
if (update_dotdot) {
- int start = MSDOS_I(old_dir)->i_logstart;
- dotdot_de->start = cpu_to_le16(start);
- dotdot_de->starthi = cpu_to_le16(start >> 16);
+ fat_set_start(dotdot_de, MSDOS_I(old_dir)->i_logstart);
mark_buffer_dirty_inode(dotdot_bh, old_inode);
corrupt |= sync_dirty_buffer(dotdot_bh);
}
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 81b70e665bf0..887b5ba8c9b5 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -20,6 +20,7 @@
#include <linux/signal.h>
#include <linux/rcupdate.h>
#include <linux/pid_namespace.h>
+#include <linux/user_namespace.h>
#include <asm/poll.h>
#include <asm/siginfo.h>
@@ -340,6 +341,31 @@ static int f_getown_ex(struct file *filp, unsigned long arg)
return ret;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
+static int f_getowner_uids(struct file *filp, unsigned long arg)
+{
+ struct user_namespace *user_ns = current_user_ns();
+ uid_t * __user dst = (void * __user)arg;
+ uid_t src[2];
+ int err;
+
+ read_lock(&filp->f_owner.lock);
+ src[0] = from_kuid(user_ns, filp->f_owner.uid);
+ src[1] = from_kuid(user_ns, filp->f_owner.euid);
+ read_unlock(&filp->f_owner.lock);
+
+ err = put_user(src[0], &dst[0]);
+ err |= put_user(src[1], &dst[1]);
+
+ return err;
+}
+#else
+static int f_getowner_uids(struct file *filp, unsigned long arg)
+{
+ return -EINVAL;
+}
+#endif
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
@@ -396,6 +422,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
case F_SETOWN_EX:
err = f_setown_ex(filp, arg);
break;
+ case F_GETOWNER_UIDS:
+ err = f_getowner_uids(filp, arg);
+ break;
case F_GETSIG:
err = filp->f_owner.signum;
break;
diff --git a/fs/file_table.c b/fs/file_table.c
index b3fc4d67a26b..701985e4ccda 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -43,7 +43,7 @@ static struct kmem_cache *filp_cachep __read_mostly;
static struct percpu_counter nr_files __cacheline_aligned_in_smp;
-static inline void file_free_rcu(struct rcu_head *head)
+static void file_free_rcu(struct rcu_head *head)
{
struct file *f = container_of(head, struct file, f_u.fu_rcuhead);
@@ -217,7 +217,7 @@ static void drop_file_write_access(struct file *file)
return;
if (file_check_writeable(file) != 0)
return;
- mnt_drop_write(mnt);
+ __mnt_drop_write(mnt);
file_release_write(file);
}
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 8f660dd6137a..be3efc4f64f4 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -52,11 +52,6 @@ struct wb_writeback_work {
struct completion *done; /* set if the caller waits */
};
-/*
- * We don't actually have pdflush, but this one is exported though /proc...
- */
-int nr_pdflush_threads;
-
/**
* writeback_in_progress - determine whether there is writeback in progress
* @bdi: the device's backing_dev_info structure.
@@ -628,8 +623,8 @@ static long writeback_sb_inodes(struct super_block *sb,
}
/*
- * Don't bother with new inodes or inodes beeing freed, first
- * kind does not need peridic writeout yet, and for the latter
+ * Don't bother with new inodes or inodes being freed, first
+ * kind does not need periodic writeout yet, and for the latter
* kind writeout is handled by the freer.
*/
spin_lock(&inode->i_lock);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 8964cf3999b2..324bc0850534 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -383,6 +383,9 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
struct fuse_entry_out outentry;
struct fuse_file *ff;
+ /* Userspace expects S_IFREG in create mode */
+ BUG_ON((mode & S_IFMT) != S_IFREG);
+
forget = fuse_alloc_forget();
err = -ENOMEM;
if (!forget)
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index b321a688cde7..aba15f1b7ad2 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -703,13 +703,16 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct fuse_conn *fc = get_fuse_conn(inode);
- if (pos + iov_length(iov, nr_segs) > i_size_read(inode)) {
+ /*
+ * In auto invalidate mode, always update attributes on read.
+ * Otherwise, only update if we attempt to read past EOF (to ensure
+ * i_size is up to date).
+ */
+ if (fc->auto_inval_data ||
+ (pos + iov_length(iov, nr_segs) > i_size_read(inode))) {
int err;
- /*
- * If trying to read past EOF, make sure the i_size
- * attribute is up-to-date.
- */
err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL);
if (err)
return err;
@@ -944,9 +947,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
return err;
count = ocount;
-
+ sb_start_write(inode->i_sb);
mutex_lock(&inode->i_mutex);
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
@@ -1004,6 +1006,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
out:
current->backing_dev_info = NULL;
mutex_unlock(&inode->i_mutex);
+ sb_end_write(inode->i_sb);
return written ? written : err;
}
@@ -1700,7 +1703,7 @@ static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
size_t n;
u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
- for (n = 0; n < count; n++) {
+ for (n = 0; n < count; n++, iov++) {
if (iov->iov_len > (size_t) max)
return -ENOMEM;
max -= iov->iov_len;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 771fb6322c07..e24dd74e3068 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -484,6 +484,9 @@ struct fuse_conn {
/** Is fallocate not implemented by fs? */
unsigned no_fallocate:1;
+ /** Use enhanced/automatic page cache invalidation. */
+ unsigned auto_inval_data:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1cd61652018c..ce0a2838ccd0 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -197,6 +197,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
loff_t oldsize;
+ struct timespec old_mtime;
spin_lock(&fc->lock);
if (attr_version != 0 && fi->attr_version > attr_version) {
@@ -204,15 +205,35 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
return;
}
+ old_mtime = inode->i_mtime;
fuse_change_attributes_common(inode, attr, attr_valid);
oldsize = inode->i_size;
i_size_write(inode, attr->size);
spin_unlock(&fc->lock);
- if (S_ISREG(inode->i_mode) && oldsize != attr->size) {
- truncate_pagecache(inode, oldsize, attr->size);
- invalidate_inode_pages2(inode->i_mapping);
+ if (S_ISREG(inode->i_mode)) {
+ bool inval = false;
+
+ if (oldsize != attr->size) {
+ truncate_pagecache(inode, oldsize, attr->size);
+ inval = true;
+ } else if (fc->auto_inval_data) {
+ struct timespec new_mtime = {
+ .tv_sec = attr->mtime,
+ .tv_nsec = attr->mtimensec,
+ };
+
+ /*
+ * Auto inval mode also checks and invalidates if mtime
+ * has changed.
+ */
+ if (!timespec_equal(&old_mtime, &new_mtime))
+ inval = true;
+ }
+
+ if (inval)
+ invalidate_inode_pages2(inode->i_mapping);
}
}
@@ -834,6 +855,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->big_writes = 1;
if (arg->flags & FUSE_DONT_MASK)
fc->dont_mask = 1;
+ if (arg->flags & FUSE_AUTO_INVAL_DATA)
+ fc->auto_inval_data = 1;
} else {
ra_pages = fc->max_read / PAGE_CACHE_SIZE;
fc->no_lock = 1;
@@ -859,7 +882,8 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
- FUSE_FLOCK_LOCKS;
+ FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
+ FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 9aa6af13823c..d1d791ef38de 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -373,11 +373,10 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
loff_t size;
int ret;
- /* Wait if fs is frozen. This is racy so we check again later on
- * and retry if the fs has been frozen after the page lock has
- * been acquired
- */
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+ sb_start_pagefault(inode->i_sb);
+
+ /* Update file times before taking page lock */
+ file_update_time(vma->vm_file);
ret = gfs2_rs_alloc(ip);
if (ret)
@@ -462,14 +461,9 @@ out:
gfs2_holder_uninit(&gh);
if (ret == 0) {
set_page_dirty(page);
- /* This check must be post dropping of transaction lock */
- if (inode->i_sb->s_frozen == SB_UNFROZEN) {
- wait_on_page_writeback(page);
- } else {
- ret = -EAGAIN;
- unlock_page(page);
- }
+ wait_on_page_writeback(page);
}
+ sb_end_pagefault(inode->i_sb);
return block_page_mkwrite_return(ret);
}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 3a56c8d94de0..22255d96b27e 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -52,7 +52,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
/*
* If it's a fully non-blocking write attempt and we cannot
* lock the buffer then redirty the page. Note that this can
- * potentially cause a busy-wait loop from pdflush and kswapd
+ * potentially cause a busy-wait loop from flusher thread and kswapd
* activity, but those code paths have their own higher-level
* throttling.
*/
diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c
index ad3e2fb763d7..adbd27875ef9 100644
--- a/fs/gfs2/trans.c
+++ b/fs/gfs2/trans.c
@@ -50,6 +50,7 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
if (revokes)
tr->tr_reserved += gfs2_struct2blk(sdp, revokes,
sizeof(u64));
+ sb_start_intwrite(sdp->sd_vfs);
gfs2_holder_init(sdp->sd_trans_gl, LM_ST_SHARED, 0, &tr->tr_t_gh);
error = gfs2_glock_nq(&tr->tr_t_gh);
@@ -68,6 +69,7 @@ fail_gunlock:
gfs2_glock_dq(&tr->tr_t_gh);
fail_holder_uninit:
+ sb_end_intwrite(sdp->sd_vfs);
gfs2_holder_uninit(&tr->tr_t_gh);
kfree(tr);
@@ -116,6 +118,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
gfs2_holder_uninit(&tr->tr_t_gh);
kfree(tr);
}
+ sb_end_intwrite(sdp->sd_vfs);
return;
}
@@ -136,6 +139,7 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
if (sdp->sd_vfs->s_flags & MS_SYNCHRONOUS)
gfs2_log_flush(sdp, NULL);
+ sb_end_intwrite(sdp->sd_vfs);
}
/**
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 5fd51a5833ff..b7ec224910c5 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -236,10 +236,10 @@ out:
* hfs_mdb_commit()
*
* Description:
- * This updates the MDB on disk (look also at hfs_write_super()).
+ * This updates the MDB on disk.
* It does not check, if the superblock has been modified, or
* if the filesystem has been mounted read-only. It is mainly
- * called by hfs_write_super() and hfs_btree_extend().
+ * called by hfs_sync_fs() and flush_mdb().
* Input Variable(s):
* struct hfs_mdb *mdb: Pointer to the hfs MDB
* int backup;
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 473332098013..fdafb2d71654 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -365,7 +365,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
u64 last_fs_block, last_fs_page;
int err;
- err = -EINVAL;
+ err = -ENOMEM;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
goto out;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e13e9bdb0bf5..8349a899912e 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -416,8 +416,8 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, pgoff_t pgoff)
else
v_offset = 0;
- __unmap_hugepage_range(vma,
- vma->vm_start + v_offset, vma->vm_end, NULL);
+ unmap_hugepage_range(vma, vma->vm_start + v_offset,
+ vma->vm_end, NULL);
}
}
diff --git a/fs/inode.c b/fs/inode.c
index 3cc504320467..ac8d904b3f16 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1542,9 +1542,11 @@ void touch_atime(struct path *path)
if (timespec_equal(&inode->i_atime, &now))
return;
- if (mnt_want_write(mnt))
+ if (!sb_start_write_trylock(inode->i_sb))
return;
+ if (__mnt_want_write(mnt))
+ goto skip_update;
/*
* File systems can error out when updating inodes if they need to
* allocate new space to modify an inode (such is the case for
@@ -1555,7 +1557,9 @@ void touch_atime(struct path *path)
* of the fs read only, e.g. subvolumes in Btrfs.
*/
update_time(inode, &now, S_ATIME);
- mnt_drop_write(mnt);
+ __mnt_drop_write(mnt);
+skip_update:
+ sb_end_write(inode->i_sb);
}
EXPORT_SYMBOL(touch_atime);
@@ -1662,11 +1666,11 @@ int file_update_time(struct file *file)
return 0;
/* Finally allowed to write? Takes lock. */
- if (mnt_want_write_file(file))
+ if (__mnt_want_write_file(file))
return 0;
ret = update_time(inode, &now, sync_it);
- mnt_drop_write_file(file);
+ __mnt_drop_write_file(file);
return ret;
}
diff --git a/fs/internal.h b/fs/internal.h
index a6fd56c68b11..371bcc4b1697 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -61,6 +61,10 @@ extern void __init mnt_init(void);
extern struct lglock vfsmount_lock;
+extern int __mnt_want_write(struct vfsmount *);
+extern int __mnt_want_write_file(struct file *);
+extern void __mnt_drop_write(struct vfsmount *);
+extern void __mnt_drop_write_file(struct file *);
/*
* fs_struct.c
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 425c2f2cf170..09357508ec9a 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -534,8 +534,8 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
ret = 1;
} else if (journal->j_committing_transaction) {
/*
- * If ext3_write_super() recently started a commit, then we
- * have to wait for completion of that transaction
+ * If commit has been started, then we have to wait for
+ * completion of that transaction.
*/
if (ptid)
*ptid = journal->j_committing_transaction->t_tid;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e9a3c4c85594..e149b99a7ffb 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -612,8 +612,8 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
ret = 1;
} else if (journal->j_committing_transaction) {
/*
- * If ext3_write_super() recently started a commit, then we
- * have to wait for completion of that transaction
+ * If commit has been started, then we have to wait for
+ * completion of that transaction.
*/
if (ptid)
*ptid = journal->j_committing_transaction->t_tid;
@@ -1377,7 +1377,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
* Update a journal's errno. Write updated superblock to disk waiting for IO
* to complete.
*/
-static void jbd2_journal_update_sb_errno(journal_t *journal)
+void jbd2_journal_update_sb_errno(journal_t *journal)
{
journal_superblock_t *sb = journal->j_superblock;
@@ -1390,6 +1390,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
jbd2_write_superblock(journal, WRITE_SYNC);
}
+EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
/*
* Read the superblock for a given journal, performing initial
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 8392cb85bd54..05d29124c6ab 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -156,12 +156,16 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
struct nlm_rqst *call;
int status;
- nlm_get_host(host);
call = nlm_alloc_call(host);
if (call == NULL)
return -ENOMEM;
nlmclnt_locks_init_private(fl, host);
+ if (!fl->fl_u.nfs_fl.owner) {
+ /* lockowner allocation has failed */
+ nlmclnt_release_call(call);
+ return -ENOMEM;
+ }
/* Set up the argument struct */
nlmclnt_setlockargs(call, fl);
@@ -185,9 +189,6 @@ EXPORT_SYMBOL_GPL(nlmclnt_proc);
/*
* Allocate an NLM RPC call struct
- *
- * Note: the caller must hold a reference to host. In case of failure,
- * this reference will be released.
*/
struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
{
@@ -199,7 +200,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
atomic_set(&call->a_count, 1);
locks_init_lock(&call->a_args.lock.fl);
locks_init_lock(&call->a_res.lock.fl);
- call->a_host = host;
+ call->a_host = nlm_get_host(host);
return call;
}
if (signalled())
@@ -207,7 +208,6 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
printk("nlm_alloc_call: failed, waiting for memory\n");
schedule_timeout_interruptible(5*HZ);
}
- nlmclnt_release_host(host);
return NULL;
}
@@ -750,7 +750,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
dprintk("lockd: blocking lock attempt was interrupted by a signal.\n"
" Attempting to cancel lock.\n");
- req = nlm_alloc_call(nlm_get_host(host));
+ req = nlm_alloc_call(host);
if (!req)
return -ENOMEM;
req->a_flags = RPC_TASK_ASYNC;
diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c
index 183cc1f0af1c..6d1ee7204c88 100644
--- a/fs/lockd/grace.c
+++ b/fs/lockd/grace.c
@@ -4,8 +4,10 @@
#include <linux/module.h>
#include <linux/lockd/bind.h>
+#include <net/net_namespace.h>
+
+#include "netns.h"
-static LIST_HEAD(grace_list);
static DEFINE_SPINLOCK(grace_lock);
/**
@@ -19,10 +21,12 @@ static DEFINE_SPINLOCK(grace_lock);
*
* This function is called to start a grace period.
*/
-void locks_start_grace(struct lock_manager *lm)
+void locks_start_grace(struct net *net, struct lock_manager *lm)
{
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+
spin_lock(&grace_lock);
- list_add(&lm->list, &grace_list);
+ list_add(&lm->list, &ln->grace_list);
spin_unlock(&grace_lock);
}
EXPORT_SYMBOL_GPL(locks_start_grace);
@@ -52,8 +56,10 @@ EXPORT_SYMBOL_GPL(locks_end_grace);
* to answer ordinary lock requests, and when they should accept only
* lock reclaims.
*/
-int locks_in_grace(void)
+int locks_in_grace(struct net *net)
{
- return !list_empty(&grace_list);
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+
+ return !list_empty(&ln->grace_list);
}
EXPORT_SYMBOL_GPL(locks_in_grace);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index eb75ca7c2d6e..f9b22e58f78f 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -21,6 +21,8 @@
#include <net/ipv6.h>
+#include "netns.h"
+
#define NLMDBG_FACILITY NLMDBG_HOSTCACHE
#define NLM_HOST_NRHASH 32
#define NLM_HOST_REBIND (60 * HZ)
@@ -41,11 +43,10 @@ static struct hlist_head nlm_client_hosts[NLM_HOST_NRHASH];
hlist_for_each_entry_safe((host), (pos), (next), \
(chain), h_hash)
-static unsigned long next_gc;
static unsigned long nrhosts;
static DEFINE_MUTEX(nlm_host_mutex);
-static void nlm_gc_hosts(void);
+static void nlm_gc_hosts(struct net *net);
struct nlm_lookup_host_info {
const int server; /* search for server|client */
@@ -172,6 +173,7 @@ out:
static void nlm_destroy_host_locked(struct nlm_host *host)
{
struct rpc_clnt *clnt;
+ struct lockd_net *ln = net_generic(host->net, lockd_net_id);
dprintk("lockd: destroy host %s\n", host->h_name);
@@ -188,6 +190,7 @@ static void nlm_destroy_host_locked(struct nlm_host *host)
rpc_shutdown_client(clnt);
kfree(host);
+ ln->nrhosts--;
nrhosts--;
}
@@ -228,6 +231,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
struct hlist_node *pos;
struct nlm_host *host;
struct nsm_handle *nsm = NULL;
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
dprintk("lockd: %s(host='%s', vers=%u, proto=%s)\n", __func__,
(hostname ? hostname : "<none>"), version,
@@ -262,6 +266,7 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
goto out;
hlist_add_head(&host->h_hash, chain);
+ ln->nrhosts++;
nrhosts++;
dprintk("lockd: %s created host %s (%s)\n", __func__,
@@ -326,7 +331,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
struct nsm_handle *nsm = NULL;
struct sockaddr *src_sap = svc_daddr(rqstp);
size_t src_len = rqstp->rq_daddrlen;
- struct net *net = rqstp->rq_xprt->xpt_net;
+ struct net *net = SVC_NET(rqstp);
struct nlm_lookup_host_info ni = {
.server = 1,
.sap = svc_addr(rqstp),
@@ -337,6 +342,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
.hostname_len = hostname_len,
.net = net,
};
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
(int)hostname_len, hostname, rqstp->rq_vers,
@@ -344,8 +350,8 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
mutex_lock(&nlm_host_mutex);
- if (time_after_eq(jiffies, next_gc))
- nlm_gc_hosts();
+ if (time_after_eq(jiffies, ln->next_gc))
+ nlm_gc_hosts(net);
chain = &nlm_server_hosts[nlm_hash_address(ni.sap)];
hlist_for_each_entry(host, pos, chain, h_hash) {
@@ -382,6 +388,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
memcpy(nlm_srcaddr(host), src_sap, src_len);
host->h_srcaddrlen = src_len;
hlist_add_head(&host->h_hash, chain);
+ ln->nrhosts++;
nrhosts++;
dprintk("lockd: %s created host %s (%s)\n",
@@ -565,6 +572,35 @@ void nlm_host_rebooted(const struct nlm_reboot *info)
nsm_release(nsm);
}
+static void nlm_complain_hosts(struct net *net)
+{
+ struct hlist_head *chain;
+ struct hlist_node *pos;
+ struct nlm_host *host;
+
+ if (net) {
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+
+ if (ln->nrhosts == 0)
+ return;
+ printk(KERN_WARNING "lockd: couldn't shutdown host module for net %p!\n", net);
+ dprintk("lockd: %lu hosts left in net %p:\n", ln->nrhosts, net);
+ } else {
+ if (nrhosts == 0)
+ return;
+ printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
+ dprintk("lockd: %lu hosts left:\n", nrhosts);
+ }
+
+ for_each_host(host, pos, chain, nlm_server_hosts) {
+ if (net && host->net != net)
+ continue;
+ dprintk(" %s (cnt %d use %d exp %ld net %p)\n",
+ host->h_name, atomic_read(&host->h_count),
+ host->h_inuse, host->h_expires, host->net);
+ }
+}
+
void
nlm_shutdown_hosts_net(struct net *net)
{
@@ -572,11 +608,10 @@ nlm_shutdown_hosts_net(struct net *net)
struct hlist_node *pos;
struct nlm_host *host;
- dprintk("lockd: shutting down host module\n");
mutex_lock(&nlm_host_mutex);
/* First, make all hosts eligible for gc */
- dprintk("lockd: nuking all hosts...\n");
+ dprintk("lockd: nuking all hosts in net %p...\n", net);
for_each_host(host, pos, chain, nlm_server_hosts) {
if (net && host->net != net)
continue;
@@ -588,8 +623,10 @@ nlm_shutdown_hosts_net(struct net *net)
}
/* Then, perform a garbage collection pass */
- nlm_gc_hosts();
+ nlm_gc_hosts(net);
mutex_unlock(&nlm_host_mutex);
+
+ nlm_complain_hosts(net);
}
/*
@@ -599,22 +636,8 @@ nlm_shutdown_hosts_net(struct net *net)
void
nlm_shutdown_hosts(void)
{
- struct hlist_head *chain;
- struct hlist_node *pos;
- struct nlm_host *host;
-
+ dprintk("lockd: shutting down host module\n");
nlm_shutdown_hosts_net(NULL);
-
- /* complain if any hosts are left */
- if (nrhosts != 0) {
- printk(KERN_WARNING "lockd: couldn't shutdown host module!\n");
- dprintk("lockd: %lu hosts left:\n", nrhosts);
- for_each_host(host, pos, chain, nlm_server_hosts) {
- dprintk(" %s (cnt %d use %d exp %ld net %p)\n",
- host->h_name, atomic_read(&host->h_count),
- host->h_inuse, host->h_expires, host->net);
- }
- }
}
/*
@@ -623,30 +646,39 @@ nlm_shutdown_hosts(void)
* mark & sweep for resources held by remote clients.
*/
static void
-nlm_gc_hosts(void)
+nlm_gc_hosts(struct net *net)
{
struct hlist_head *chain;
struct hlist_node *pos, *next;
struct nlm_host *host;
- dprintk("lockd: host garbage collection\n");
- for_each_host(host, pos, chain, nlm_server_hosts)
+ dprintk("lockd: host garbage collection for net %p\n", net);
+ for_each_host(host, pos, chain, nlm_server_hosts) {
+ if (net && host->net != net)
+ continue;
host->h_inuse = 0;
+ }
/* Mark all hosts that hold locks, blocks or shares */
- nlmsvc_mark_resources();
+ nlmsvc_mark_resources(net);
for_each_host_safe(host, pos, next, chain, nlm_server_hosts) {
+ if (net && host->net != net)
+ continue;
if (atomic_read(&host->h_count) || host->h_inuse
|| time_before(jiffies, host->h_expires)) {
dprintk("nlm_gc_hosts skipping %s "
- "(cnt %d use %d exp %ld)\n",
+ "(cnt %d use %d exp %ld net %p)\n",
host->h_name, atomic_read(&host->h_count),
- host->h_inuse, host->h_expires);
+ host->h_inuse, host->h_expires, host->net);
continue;
}
nlm_destroy_host_locked(host);
}
- next_gc = jiffies + NLM_HOST_COLLECT;
+ if (net) {
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+
+ ln->next_gc = jiffies + NLM_HOST_COLLECT;
+ }
}
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index ce227e0fbc5c..4eee248ba96e 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -1,10 +1,17 @@
#ifndef __LOCKD_NETNS_H__
#define __LOCKD_NETNS_H__
+#include <linux/fs.h>
#include <net/netns/generic.h>
struct lockd_net {
unsigned int nlmsvc_users;
+ unsigned long next_gc;
+ unsigned long nrhosts;
+
+ struct delayed_work grace_period_end;
+ struct lock_manager lockd_manager;
+ struct list_head grace_list;
};
extern int lockd_net_id;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 80938fda67e0..31a63f87b806 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -87,32 +87,36 @@ static unsigned long get_lockd_grace_period(void)
return nlm_timeout * 5 * HZ;
}
-static struct lock_manager lockd_manager = {
-};
-
-static void grace_ender(struct work_struct *not_used)
+static void grace_ender(struct work_struct *grace)
{
- locks_end_grace(&lockd_manager);
-}
+ struct delayed_work *dwork = container_of(grace, struct delayed_work,
+ work);
+ struct lockd_net *ln = container_of(dwork, struct lockd_net,
+ grace_period_end);
-static DECLARE_DELAYED_WORK(grace_period_end, grace_ender);
+ locks_end_grace(&ln->lockd_manager);
+}
-static void set_grace_period(void)
+static void set_grace_period(struct net *net)
{
unsigned long grace_period = get_lockd_grace_period();
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
- locks_start_grace(&lockd_manager);
- cancel_delayed_work_sync(&grace_period_end);
- schedule_delayed_work(&grace_period_end, grace_period);
+ locks_start_grace(net, &ln->lockd_manager);
+ cancel_delayed_work_sync(&ln->grace_period_end);
+ schedule_delayed_work(&ln->grace_period_end, grace_period);
}
static void restart_grace(void)
{
if (nlmsvc_ops) {
- cancel_delayed_work_sync(&grace_period_end);
- locks_end_grace(&lockd_manager);
+ struct net *net = &init_net;
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+
+ cancel_delayed_work_sync(&ln->grace_period_end);
+ locks_end_grace(&ln->lockd_manager);
nlmsvc_invalidate_all();
- set_grace_period();
+ set_grace_period(net);
}
}
@@ -137,8 +141,6 @@ lockd(void *vrqstp)
nlm_timeout = LOCKD_DFLT_TIMEO;
nlmsvc_timeout = nlm_timeout * HZ;
- set_grace_period();
-
/*
* The main request loop. We don't terminate until the last
* NFS mount or NFS daemon has gone away.
@@ -184,8 +186,6 @@ lockd(void *vrqstp)
svc_process(rqstp);
}
flush_signals(current);
- cancel_delayed_work_sync(&grace_period_end);
- locks_end_grace(&lockd_manager);
if (nlmsvc_ops)
nlmsvc_invalidate_all();
nlm_shutdown_hosts();
@@ -266,6 +266,7 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)
error = make_socks(serv, net);
if (error < 0)
goto err_socks;
+ set_grace_period(net);
dprintk("lockd_up_net: per-net data created; net=%p\n", net);
return 0;
@@ -283,6 +284,8 @@ static void lockd_down_net(struct svc_serv *serv, struct net *net)
if (ln->nlmsvc_users) {
if (--ln->nlmsvc_users == 0) {
nlm_shutdown_hosts_net(net);
+ cancel_delayed_work_sync(&ln->grace_period_end);
+ locks_end_grace(&ln->lockd_manager);
svc_shutdown_net(serv, net);
dprintk("lockd_down_net: per-net data destroyed; net=%p\n", net);
}
@@ -589,6 +592,10 @@ module_param(nlm_max_connections, uint, 0644);
static int lockd_init_net(struct net *net)
{
+ struct lockd_net *ln = net_generic(net, lockd_net_id);
+
+ INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
+ INIT_LIST_HEAD(&ln->grace_list);
return 0;
}
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 9a41fdc19511..b147d1ae71fd 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -11,6 +11,7 @@
#include <linux/time.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
+#include <linux/sunrpc/svc_xprt.h>
#define NLMDBG_FACILITY NLMDBG_CLIENT
@@ -151,7 +152,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->cookie = argp->cookie;
/* Don't accept requests during grace period */
- if (locks_in_grace()) {
+ if (locks_in_grace(SVC_NET(rqstp))) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
}
@@ -161,7 +162,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
/* Try to cancel request. */
- resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
+ resp->status = nlmsvc_cancel_blocked(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
nlmsvc_release_host(host);
@@ -184,7 +185,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->cookie = argp->cookie;
/* Don't accept new lock requests during grace period */
- if (locks_in_grace()) {
+ if (locks_in_grace(SVC_NET(rqstp))) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
}
@@ -194,7 +195,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
/* Now try to remove the lock */
- resp->status = nlmsvc_unlock(file, &argp->lock);
+ resp->status = nlmsvc_unlock(SVC_NET(rqstp), file, &argp->lock);
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
nlmsvc_release_host(host);
@@ -256,6 +257,7 @@ static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
return rpc_system_err;
call = nlm_alloc_call(host);
+ nlmsvc_release_host(host);
if (call == NULL)
return rpc_system_err;
@@ -321,7 +323,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->cookie = argp->cookie;
/* Don't accept new lock requests during grace period */
- if (locks_in_grace() && !argp->reclaim) {
+ if (locks_in_grace(SVC_NET(rqstp)) && !argp->reclaim) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
}
@@ -354,7 +356,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->cookie = argp->cookie;
/* Don't accept requests during grace period */
- if (locks_in_grace()) {
+ if (locks_in_grace(SVC_NET(rqstp))) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
}
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e46353f41a42..fb1a2bedbe97 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -26,7 +26,7 @@
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/lockd/nlm.h>
#include <linux/lockd/lockd.h>
#include <linux/kthread.h>
@@ -219,7 +219,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
struct nlm_block *block;
struct nlm_rqst *call = NULL;
- nlm_get_host(host);
call = nlm_alloc_call(host);
if (call == NULL)
return NULL;
@@ -447,11 +446,11 @@ nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
- if (locks_in_grace() && !reclaim) {
+ if (locks_in_grace(SVC_NET(rqstp)) && !reclaim) {
ret = nlm_lck_denied_grace_period;
goto out;
}
- if (reclaim && !locks_in_grace()) {
+ if (reclaim && !locks_in_grace(SVC_NET(rqstp))) {
ret = nlm_lck_denied_grace_period;
goto out;
}
@@ -559,7 +558,7 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
goto out;
}
- if (locks_in_grace()) {
+ if (locks_in_grace(SVC_NET(rqstp))) {
ret = nlm_lck_denied_grace_period;
goto out;
}
@@ -603,7 +602,7 @@ out:
* must be removed.
*/
__be32
-nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
+nlmsvc_unlock(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
{
int error;
@@ -615,7 +614,7 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
(long long)lock->fl.fl_end);
/* First, cancel any lock that might be there */
- nlmsvc_cancel_blocked(file, lock);
+ nlmsvc_cancel_blocked(net, file, lock);
lock->fl.fl_type = F_UNLCK;
error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
@@ -631,7 +630,7 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
* The calling procedure must check whether the file can be closed.
*/
__be32
-nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
+nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *lock)
{
struct nlm_block *block;
int status = 0;
@@ -643,7 +642,7 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
(long long)lock->fl.fl_start,
(long long)lock->fl.fl_end);
- if (locks_in_grace())
+ if (locks_in_grace(net))
return nlm_lck_denied_grace_period;
mutex_lock(&file->f_mutex);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index d27aab11f324..3009a365e082 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -11,6 +11,7 @@
#include <linux/time.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
+#include <linux/sunrpc/svc_xprt.h>
#define NLMDBG_FACILITY NLMDBG_CLIENT
@@ -175,13 +176,14 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
+ struct net *net = SVC_NET(rqstp);
dprintk("lockd: CANCEL called\n");
resp->cookie = argp->cookie;
/* Don't accept requests during grace period */
- if (locks_in_grace()) {
+ if (locks_in_grace(net)) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
}
@@ -191,7 +193,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
/* Try to cancel request. */
- resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
+ resp->status = cast_status(nlmsvc_cancel_blocked(net, file, &argp->lock));
dprintk("lockd: CANCEL status %d\n", ntohl(resp->status));
nlmsvc_release_host(host);
@@ -208,13 +210,14 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
{
struct nlm_host *host;
struct nlm_file *file;
+ struct net *net = SVC_NET(rqstp);
dprintk("lockd: UNLOCK called\n");
resp->cookie = argp->cookie;
/* Don't accept new lock requests during grace period */
- if (locks_in_grace()) {
+ if (locks_in_grace(net)) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
}
@@ -224,7 +227,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
/* Now try to remove the lock */
- resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
+ resp->status = cast_status(nlmsvc_unlock(net, file, &argp->lock));
dprintk("lockd: UNLOCK status %d\n", ntohl(resp->status));
nlmsvc_release_host(host);
@@ -294,6 +297,7 @@ static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args
return rpc_system_err;
call = nlm_alloc_call(host);
+ nlmsvc_release_host(host);
if (call == NULL)
return rpc_system_err;
@@ -361,7 +365,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->cookie = argp->cookie;
/* Don't accept new lock requests during grace period */
- if (locks_in_grace() && !argp->reclaim) {
+ if (locks_in_grace(SVC_NET(rqstp)) && !argp->reclaim) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
}
@@ -394,7 +398,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
resp->cookie = argp->cookie;
/* Don't accept requests during grace period */
- if (locks_in_grace()) {
+ if (locks_in_grace(SVC_NET(rqstp))) {
resp->status = nlm_lck_denied_grace_period;
return rpc_success;
}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 2240d384d787..0deb5f6c9dd4 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -309,7 +309,8 @@ nlm_release_file(struct nlm_file *file)
* Helpers function for resource traversal
*
* nlmsvc_mark_host:
- * used by the garbage collector; simply sets h_inuse.
+ * used by the garbage collector; simply sets h_inuse only for those
+ * hosts, which passed network check.
* Always returns 0.
*
* nlmsvc_same_host:
@@ -320,12 +321,15 @@ nlm_release_file(struct nlm_file *file)
* returns 1 iff the host is a client.
* Used by nlmsvc_invalidate_all
*/
+
static int
-nlmsvc_mark_host(void *data, struct nlm_host *dummy)
+nlmsvc_mark_host(void *data, struct nlm_host *hint)
{
struct nlm_host *host = data;
- host->h_inuse = 1;
+ if ((hint->net == NULL) ||
+ (host->net == hint->net))
+ host->h_inuse = 1;
return 0;
}
@@ -358,10 +362,13 @@ nlmsvc_is_client(void *data, struct nlm_host *dummy)
* Mark all hosts that still hold resources
*/
void
-nlmsvc_mark_resources(void)
+nlmsvc_mark_resources(struct net *net)
{
- dprintk("lockd: nlmsvc_mark_resources\n");
- nlm_traverse_files(NULL, nlmsvc_mark_host, NULL);
+ struct nlm_host hint;
+
+ dprintk("lockd: nlmsvc_mark_resources for net %p\n", net);
+ hint.net = net;
+ nlm_traverse_files(&hint, nlmsvc_mark_host, NULL);
}
/*
diff --git a/fs/locks.c b/fs/locks.c
index 82c353304f9e..7e81bfc75164 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -200,11 +200,7 @@ void locks_release_private(struct file_lock *fl)
fl->fl_ops->fl_release_private(fl);
fl->fl_ops = NULL;
}
- if (fl->fl_lmops) {
- if (fl->fl_lmops->lm_release_private)
- fl->fl_lmops->lm_release_private(fl);
- fl->fl_lmops = NULL;
- }
+ fl->fl_lmops = NULL;
}
EXPORT_SYMBOL_GPL(locks_release_private);
@@ -427,18 +423,8 @@ static void lease_break_callback(struct file_lock *fl)
kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
}
-static void lease_release_private_callback(struct file_lock *fl)
-{
- if (!fl->fl_file)
- return;
-
- f_delown(fl->fl_file);
- fl->fl_file->f_owner.signum = 0;
-}
-
static const struct lock_manager_operations lease_manager_ops = {
.lm_break = lease_break_callback,
- .lm_release_private = lease_release_private_callback,
.lm_change = lease_modify,
};
@@ -580,12 +566,6 @@ static void locks_delete_lock(struct file_lock **thisfl_p)
fl->fl_next = NULL;
list_del_init(&fl->fl_link);
- fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
- if (fl->fl_fasync != NULL) {
- printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
- fl->fl_fasync = NULL;
- }
-
if (fl->fl_nspid) {
put_pid(fl->fl_nspid);
fl->fl_nspid = NULL;
@@ -1155,8 +1135,18 @@ int lease_modify(struct file_lock **before, int arg)
return error;
lease_clear_pending(fl, arg);
locks_wake_up_blocks(fl);
- if (arg == F_UNLCK)
+ if (arg == F_UNLCK) {
+ struct file *filp = fl->fl_file;
+
+ f_delown(filp);
+ filp->f_owner.signum = 0;
+ fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
+ if (fl->fl_fasync != NULL) {
+ printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
+ fl->fl_fasync = NULL;
+ }
locks_delete_lock(before);
+ }
return 0;
}
diff --git a/fs/minix/itree_v2.c b/fs/minix/itree_v2.c
index 13487ad16894..78e2d93e5c83 100644
--- a/fs/minix/itree_v2.c
+++ b/fs/minix/itree_v2.c
@@ -32,7 +32,8 @@ static int block_to_path(struct inode * inode, long block, int offsets[DEPTH])
if (block < 0) {
printk("MINIX-fs: block_to_path: block %ld < 0 on dev %s\n",
block, bdevname(sb->s_bdev, b));
- } else if (block >= (minix_sb(inode->i_sb)->s_max_size/sb->s_blocksize)) {
+ } else if ((u64)block * (u64)sb->s_blocksize >=
+ minix_sb(sb)->s_max_size) {
if (printk_ratelimit())
printk("MINIX-fs: block_to_path: "
"block %ld too big on dev %s\n",
diff --git a/fs/namei.c b/fs/namei.c
index 2ccc35c4dc24..db76b866a097 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -650,6 +650,121 @@ static inline void put_link(struct nameidata *nd, struct path *link, void *cooki
path_put(link);
}
+int sysctl_protected_symlinks __read_mostly = 1;
+int sysctl_protected_hardlinks __read_mostly = 1;
+
+/**
+ * may_follow_link - Check symlink following for unsafe situations
+ * @link: The path of the symlink
+ *
+ * In the case of the sysctl_protected_symlinks sysctl being enabled,
+ * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is
+ * in a sticky world-writable directory. This is to protect privileged
+ * processes from failing races against path names that may change out
+ * from under them by way of other users creating malicious symlinks.
+ * It will permit symlinks to be followed only when outside a sticky
+ * world-writable directory, or when the uid of the symlink and follower
+ * match, or when the directory owner matches the symlink's owner.
+ *
+ * Returns 0 if following the symlink is allowed, -ve on error.
+ */
+static inline int may_follow_link(struct path *link, struct nameidata *nd)
+{
+ const struct inode *inode;
+ const struct inode *parent;
+
+ if (!sysctl_protected_symlinks)
+ return 0;
+
+ /* Allowed if owner and follower match. */
+ inode = link->dentry->d_inode;
+ if (current_cred()->fsuid == inode->i_uid)
+ return 0;
+
+ /* Allowed if parent directory not sticky and world-writable. */
+ parent = nd->path.dentry->d_inode;
+ if ((parent->i_mode & (S_ISVTX|S_IWOTH)) != (S_ISVTX|S_IWOTH))
+ return 0;
+
+ /* Allowed if parent directory and link owner match. */
+ if (parent->i_uid == inode->i_uid)
+ return 0;
+
+ path_put_conditional(link, nd);
+ path_put(&nd->path);
+ audit_log_link_denied("follow_link", link);
+ return -EACCES;
+}
+
+/**
+ * safe_hardlink_source - Check for safe hardlink conditions
+ * @inode: the source inode to hardlink from
+ *
+ * Return false if at least one of the following conditions:
+ * - inode is not a regular file
+ * - inode is setuid
+ * - inode is setgid and group-exec
+ * - access failure for read and write
+ *
+ * Otherwise returns true.
+ */
+static bool safe_hardlink_source(struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+
+ /* Special files should not get pinned to the filesystem. */
+ if (!S_ISREG(mode))
+ return false;
+
+ /* Setuid files should not get pinned to the filesystem. */
+ if (mode & S_ISUID)
+ return false;
+
+ /* Executable setgid files should not get pinned to the filesystem. */
+ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
+ return false;
+
+ /* Hardlinking to unreadable or unwritable sources is dangerous. */
+ if (inode_permission(inode, MAY_READ | MAY_WRITE))
+ return false;
+
+ return true;
+}
+
+/**
+ * may_linkat - Check permissions for creating a hardlink
+ * @link: the source to hardlink from
+ *
+ * Block hardlink when all of:
+ * - sysctl_protected_hardlinks enabled
+ * - fsuid does not match inode
+ * - hardlink source is unsafe (see safe_hardlink_source() above)
+ * - not CAP_FOWNER
+ *
+ * Returns 0 if successful, -ve on error.
+ */
+static int may_linkat(struct path *link)
+{
+ const struct cred *cred;
+ struct inode *inode;
+
+ if (!sysctl_protected_hardlinks)
+ return 0;
+
+ cred = current_cred();
+ inode = link->dentry->d_inode;
+
+ /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
+ * otherwise, it must be a safe source.
+ */
+ if (cred->fsuid == inode->i_uid || safe_hardlink_source(inode) ||
+ capable(CAP_FOWNER))
+ return 0;
+
+ audit_log_link_denied("linkat", link);
+ return -EPERM;
+}
+
static __always_inline int
follow_link(struct path *link, struct nameidata *nd, void **p)
{
@@ -1818,6 +1933,9 @@ static int path_lookupat(int dfd, const char *name,
while (err > 0) {
void *cookie;
struct path link = path;
+ err = may_follow_link(&link, nd);
+ if (unlikely(err))
+ break;
nd->flags |= LOOKUP_PARENT;
err = follow_link(&link, nd, &cookie);
if (err)
@@ -2277,7 +2395,7 @@ static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
static int atomic_open(struct nameidata *nd, struct dentry *dentry,
struct path *path, struct file *file,
const struct open_flags *op,
- bool *want_write, bool need_lookup,
+ bool got_write, bool need_lookup,
int *opened)
{
struct inode *dir = nd->path.dentry->d_inode;
@@ -2296,11 +2414,11 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
goto out;
}
- mode = op->mode & S_IALLUGO;
+ mode = op->mode;
if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
mode &= ~current_umask();
- if (open_flag & O_EXCL) {
+ if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) {
open_flag &= ~O_TRUNC;
*opened |= FILE_CREATED;
}
@@ -2314,12 +2432,9 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
* Another problem is returing the "right" error value (e.g. for an
* O_EXCL open we want to return EEXIST not EROFS).
*/
- if ((open_flag & (O_CREAT | O_TRUNC)) ||
- (open_flag & O_ACCMODE) != O_RDONLY) {
- error = mnt_want_write(nd->path.mnt);
- if (!error) {
- *want_write = true;
- } else if (!(open_flag & O_CREAT)) {
+ if (((open_flag & (O_CREAT | O_TRUNC)) ||
+ (open_flag & O_ACCMODE) != O_RDONLY) && unlikely(!got_write)) {
+ if (!(open_flag & O_CREAT)) {
/*
* No O_CREATE -> atomicity not a requirement -> fall
* back to lookup + open
@@ -2327,17 +2442,17 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
goto no_open;
} else if (open_flag & (O_EXCL | O_TRUNC)) {
/* Fall back and fail with the right error */
- create_error = error;
+ create_error = -EROFS;
goto no_open;
} else {
/* No side effects, safe to clear O_CREAT */
- create_error = error;
+ create_error = -EROFS;
open_flag &= ~O_CREAT;
}
}
if (open_flag & O_CREAT) {
- error = may_o_create(&nd->path, dentry, op->mode);
+ error = may_o_create(&nd->path, dentry, mode);
if (error) {
create_error = error;
if (open_flag & O_EXCL)
@@ -2374,6 +2489,10 @@ static int atomic_open(struct nameidata *nd, struct dentry *dentry,
dput(dentry);
dentry = file->f_path.dentry;
}
+ if (create_error && dentry->d_inode == NULL) {
+ error = create_error;
+ goto out;
+ }
goto looked_up;
}
@@ -2438,7 +2557,7 @@ looked_up:
static int lookup_open(struct nameidata *nd, struct path *path,
struct file *file,
const struct open_flags *op,
- bool *want_write, int *opened)
+ bool got_write, int *opened)
{
struct dentry *dir = nd->path.dentry;
struct inode *dir_inode = dir->d_inode;
@@ -2456,7 +2575,7 @@ static int lookup_open(struct nameidata *nd, struct path *path,
goto out_no_open;
if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
- return atomic_open(nd, dentry, path, file, op, want_write,
+ return atomic_open(nd, dentry, path, file, op, got_write,
need_lookup, opened);
}
@@ -2480,10 +2599,10 @@ static int lookup_open(struct nameidata *nd, struct path *path,
* a permanent write count is taken through
* the 'struct file' in finish_open().
*/
- error = mnt_want_write(nd->path.mnt);
- if (error)
+ if (!got_write) {
+ error = -EROFS;
goto out_dput;
- *want_write = true;
+ }
*opened |= FILE_CREATED;
error = security_path_mknod(&nd->path, dentry, mode, 0);
if (error)
@@ -2513,7 +2632,7 @@ static int do_last(struct nameidata *nd, struct path *path,
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool will_truncate = (open_flag & O_TRUNC) != 0;
- bool want_write = false;
+ bool got_write = false;
int acc_mode = op->acc_mode;
struct inode *inode;
bool symlink_ok = false;
@@ -2582,8 +2701,18 @@ static int do_last(struct nameidata *nd, struct path *path,
}
retry_lookup:
+ if (op->open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
+ error = mnt_want_write(nd->path.mnt);
+ if (!error)
+ got_write = true;
+ /*
+ * do _not_ fail yet - we might not need that or fail with
+ * a different error; let lookup_open() decide; we'll be
+ * dropping this one anyway.
+ */
+ }
mutex_lock(&dir->d_inode->i_mutex);
- error = lookup_open(nd, path, file, op, &want_write, opened);
+ error = lookup_open(nd, path, file, op, got_write, opened);
mutex_unlock(&dir->d_inode->i_mutex);
if (error <= 0) {
@@ -2608,22 +2737,23 @@ retry_lookup:
}
/*
- * It already exists.
+ * create/update audit record if it already exists.
*/
- audit_inode(pathname, path->dentry);
+ if (path->dentry->d_inode)
+ audit_inode(pathname, path->dentry);
/*
* If atomic_open() acquired write access it is dropped now due to
* possible mount and symlink following (this might be optimized away if
* necessary...)
*/
- if (want_write) {
+ if (got_write) {
mnt_drop_write(nd->path.mnt);
- want_write = false;
+ got_write = false;
}
error = -EEXIST;
- if (open_flag & O_EXCL)
+ if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))
goto exit_dput;
error = follow_managed(path, nd->flags);
@@ -2684,7 +2814,7 @@ finish_open:
error = mnt_want_write(nd->path.mnt);
if (error)
goto out;
- want_write = true;
+ got_write = true;
}
finish_open_created:
error = may_open(&nd->path, acc_mode, open_flag);
@@ -2711,7 +2841,7 @@ opened:
goto exit_fput;
}
out:
- if (want_write)
+ if (got_write)
mnt_drop_write(nd->path.mnt);
path_put(&save_parent);
terminate_walk(nd);
@@ -2735,9 +2865,9 @@ stale_open:
nd->inode = dir->d_inode;
save_parent.mnt = NULL;
save_parent.dentry = NULL;
- if (want_write) {
+ if (got_write) {
mnt_drop_write(nd->path.mnt);
- want_write = false;
+ got_write = false;
}
retried = true;
goto retry_lookup;
@@ -2777,6 +2907,9 @@ static struct file *path_openat(int dfd, const char *pathname,
error = -ELOOP;
break;
}
+ error = may_follow_link(&link, nd);
+ if (unlikely(error))
+ break;
nd->flags |= LOOKUP_PARENT;
nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL);
error = follow_link(&link, nd, &cookie);
@@ -2846,6 +2979,7 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct nameidata nd;
+ int err2;
int error = do_path_lookup(dfd, pathname, LOOKUP_PARENT, &nd);
if (error)
return ERR_PTR(error);
@@ -2859,16 +2993,19 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
nd.flags &= ~LOOKUP_PARENT;
nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL;
+ /* don't fail immediately if it's r/o, at least try to report other errors */
+ err2 = mnt_want_write(nd.path.mnt);
/*
* Do the final lookup.
*/
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
if (IS_ERR(dentry))
- goto fail;
+ goto unlock;
+ error = -EEXIST;
if (dentry->d_inode)
- goto eexist;
+ goto fail;
/*
* Special case - lookup gave negative, but... we had foo/bar/
* From the vfs_mknod() POV we just have a negative dentry -
@@ -2876,23 +3013,37 @@ struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path
* been asking for (non-existent) directory. -ENOENT for you.
*/
if (unlikely(!is_dir && nd.last.name[nd.last.len])) {
- dput(dentry);
- dentry = ERR_PTR(-ENOENT);
+ error = -ENOENT;
+ goto fail;
+ }
+ if (unlikely(err2)) {
+ error = err2;
goto fail;
}
*path = nd.path;
return dentry;
-eexist:
- dput(dentry);
- dentry = ERR_PTR(-EEXIST);
fail:
+ dput(dentry);
+ dentry = ERR_PTR(error);
+unlock:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ if (!err2)
+ mnt_drop_write(nd.path.mnt);
out:
path_put(&nd.path);
return dentry;
}
EXPORT_SYMBOL(kern_path_create);
+void done_path_create(struct path *path, struct dentry *dentry)
+{
+ dput(dentry);
+ mutex_unlock(&path->dentry->d_inode->i_mutex);
+ mnt_drop_write(path->mnt);
+ path_put(path);
+}
+EXPORT_SYMBOL(done_path_create);
+
struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, int is_dir)
{
char *tmp = getname(pathname);
@@ -2956,8 +3107,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
struct path path;
int error;
- if (S_ISDIR(mode))
- return -EPERM;
+ error = may_mknod(mode);
+ if (error)
+ return error;
dentry = user_path_create(dfd, filename, &path, 0);
if (IS_ERR(dentry))
@@ -2965,15 +3117,9 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
- error = may_mknod(mode);
- if (error)
- goto out_dput;
- error = mnt_want_write(path.mnt);
- if (error)
- goto out_dput;
error = security_path_mknod(&path, dentry, mode, dev);
if (error)
- goto out_drop_write;
+ goto out;
switch (mode & S_IFMT) {
case 0: case S_IFREG:
error = vfs_create(path.dentry->d_inode,dentry,mode,true);
@@ -2986,13 +3132,8 @@ SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
break;
}
-out_drop_write:
- mnt_drop_write(path.mnt);
-out_dput:
- dput(dentry);
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- path_put(&path);
-
+out:
+ done_path_create(&path, dentry);
return error;
}
@@ -3038,19 +3179,10 @@ SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
if (!IS_POSIXACL(path.dentry->d_inode))
mode &= ~current_umask();
- error = mnt_want_write(path.mnt);
- if (error)
- goto out_dput;
error = security_path_mkdir(&path, dentry, mode);
- if (error)
- goto out_drop_write;
- error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
-out_drop_write:
- mnt_drop_write(path.mnt);
-out_dput:
- dput(dentry);
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- path_put(&path);
+ if (!error)
+ error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+ done_path_create(&path, dentry);
return error;
}
@@ -3144,6 +3276,9 @@ static long do_rmdir(int dfd, const char __user *pathname)
}
nd.flags &= ~LOOKUP_PARENT;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit1;
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
@@ -3154,19 +3289,15 @@ static long do_rmdir(int dfd, const char __user *pathname)
error = -ENOENT;
goto exit3;
}
- error = mnt_want_write(nd.path.mnt);
- if (error)
- goto exit3;
error = security_path_rmdir(&nd.path, dentry);
if (error)
- goto exit4;
+ goto exit3;
error = vfs_rmdir(nd.path.dentry->d_inode, dentry);
-exit4:
- mnt_drop_write(nd.path.mnt);
exit3:
dput(dentry);
exit2:
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
+ mnt_drop_write(nd.path.mnt);
exit1:
path_put(&nd.path);
putname(name);
@@ -3233,6 +3364,9 @@ static long do_unlinkat(int dfd, const char __user *pathname)
goto exit1;
nd.flags &= ~LOOKUP_PARENT;
+ error = mnt_want_write(nd.path.mnt);
+ if (error)
+ goto exit1;
mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT);
dentry = lookup_hash(&nd);
@@ -3245,21 +3379,17 @@ static long do_unlinkat(int dfd, const char __user *pathname)
if (!inode)
goto slashes;
ihold(inode);
- error = mnt_want_write(nd.path.mnt);
- if (error)
- goto exit2;
error = security_path_unlink(&nd.path, dentry);
if (error)
- goto exit3;
+ goto exit2;
error = vfs_unlink(nd.path.dentry->d_inode, dentry);
-exit3:
- mnt_drop_write(nd.path.mnt);
- exit2:
+exit2:
dput(dentry);
}
mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
if (inode)
iput(inode); /* truncate the inode here */
+ mnt_drop_write(nd.path.mnt);
exit1:
path_put(&nd.path);
putname(name);
@@ -3324,19 +3454,10 @@ SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
if (IS_ERR(dentry))
goto out_putname;
- error = mnt_want_write(path.mnt);
- if (error)
- goto out_dput;
error = security_path_symlink(&path, dentry, from);
- if (error)
- goto out_drop_write;
- error = vfs_symlink(path.dentry->d_inode, dentry, from);
-out_drop_write:
- mnt_drop_write(path.mnt);
-out_dput:
- dput(dentry);
- mutex_unlock(&path.dentry->d_inode->i_mutex);
- path_put(&path);
+ if (!error)
+ error = vfs_symlink(path.dentry->d_inode, dentry, from);
+ done_path_create(&path, dentry);
out_putname:
putname(from);
return error;
@@ -3436,19 +3557,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
error = -EXDEV;
if (old_path.mnt != new_path.mnt)
goto out_dput;
- error = mnt_want_write(new_path.mnt);
- if (error)
+ error = may_linkat(&old_path);
+ if (unlikely(error))
goto out_dput;
error = security_path_link(old_path.dentry, &new_path, new_dentry);
if (error)
- goto out_drop_write;
+ goto out_dput;
error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry);
-out_drop_write:
- mnt_drop_write(new_path.mnt);
out_dput:
- dput(new_dentry);
- mutex_unlock(&new_path.dentry->d_inode->i_mutex);
- path_put(&new_path);
+ done_path_create(&new_path, new_dentry);
out:
path_put(&old_path);
@@ -3644,6 +3761,10 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
if (newnd.last_type != LAST_NORM)
goto exit2;
+ error = mnt_want_write(oldnd.path.mnt);
+ if (error)
+ goto exit2;
+
oldnd.flags &= ~LOOKUP_PARENT;
newnd.flags &= ~LOOKUP_PARENT;
newnd.flags |= LOOKUP_RENAME_TARGET;
@@ -3679,23 +3800,19 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname,
if (new_dentry == trap)
goto exit5;
- error = mnt_want_write(oldnd.path.mnt);
- if (error)
- goto exit5;
error = security_path_rename(&oldnd.path, old_dentry,
&newnd.path, new_dentry);
if (error)
- goto exit6;
+ goto exit5;
error = vfs_rename(old_dir->d_inode, old_dentry,
new_dir->d_inode, new_dentry);
-exit6:
- mnt_drop_write(oldnd.path.mnt);
exit5:
dput(new_dentry);
exit4:
dput(old_dentry);
exit3:
unlock_rename(new_dir, old_dir);
+ mnt_drop_write(oldnd.path.mnt);
exit2:
path_put(&newnd.path);
putname(to);
diff --git a/fs/namespace.c b/fs/namespace.c
index c53d3381b0d0..4d31f73e2561 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -283,24 +283,22 @@ static int mnt_is_readonly(struct vfsmount *mnt)
}
/*
- * Most r/o checks on a fs are for operations that take
- * discrete amounts of time, like a write() or unlink().
- * We must keep track of when those operations start
- * (for permission checks) and when they end, so that
- * we can determine when writes are able to occur to
- * a filesystem.
+ * Most r/o & frozen checks on a fs are for operations that take discrete
+ * amounts of time, like a write() or unlink(). We must keep track of when
+ * those operations start (for permission checks) and when they end, so that we
+ * can determine when writes are able to occur to a filesystem.
*/
/**
- * mnt_want_write - get write access to a mount
+ * __mnt_want_write - get write access to a mount without freeze protection
* @m: the mount on which to take a write
*
- * This tells the low-level filesystem that a write is
- * about to be performed to it, and makes sure that
- * writes are allowed before returning success. When
- * the write operation is finished, mnt_drop_write()
- * must be called. This is effectively a refcount.
+ * This tells the low-level filesystem that a write is about to be performed to
+ * it, and makes sure that writes are allowed (mnt it read-write) before
+ * returning success. This operation does not protect against filesystem being
+ * frozen. When the write operation is finished, __mnt_drop_write() must be
+ * called. This is effectively a refcount.
*/
-int mnt_want_write(struct vfsmount *m)
+int __mnt_want_write(struct vfsmount *m)
{
struct mount *mnt = real_mount(m);
int ret = 0;
@@ -326,6 +324,27 @@ int mnt_want_write(struct vfsmount *m)
ret = -EROFS;
}
preempt_enable();
+
+ return ret;
+}
+
+/**
+ * mnt_want_write - get write access to a mount
+ * @m: the mount on which to take a write
+ *
+ * This tells the low-level filesystem that a write is about to be performed to
+ * it, and makes sure that writes are allowed (mount is read-write, filesystem
+ * is not frozen) before returning success. When the write operation is
+ * finished, mnt_drop_write() must be called. This is effectively a refcount.
+ */
+int mnt_want_write(struct vfsmount *m)
+{
+ int ret;
+
+ sb_start_write(m->mnt_sb);
+ ret = __mnt_want_write(m);
+ if (ret)
+ sb_end_write(m->mnt_sb);
return ret;
}
EXPORT_SYMBOL_GPL(mnt_want_write);
@@ -355,38 +374,76 @@ int mnt_clone_write(struct vfsmount *mnt)
EXPORT_SYMBOL_GPL(mnt_clone_write);
/**
- * mnt_want_write_file - get write access to a file's mount
+ * __mnt_want_write_file - get write access to a file's mount
* @file: the file who's mount on which to take a write
*
- * This is like mnt_want_write, but it takes a file and can
+ * This is like __mnt_want_write, but it takes a file and can
* do some optimisations if the file is open for write already
*/
-int mnt_want_write_file(struct file *file)
+int __mnt_want_write_file(struct file *file)
{
struct inode *inode = file->f_dentry->d_inode;
+
if (!(file->f_mode & FMODE_WRITE) || special_file(inode->i_mode))
- return mnt_want_write(file->f_path.mnt);
+ return __mnt_want_write(file->f_path.mnt);
else
return mnt_clone_write(file->f_path.mnt);
}
+
+/**
+ * mnt_want_write_file - get write access to a file's mount
+ * @file: the file who's mount on which to take a write
+ *
+ * This is like mnt_want_write, but it takes a file and can
+ * do some optimisations if the file is open for write already
+ */
+int mnt_want_write_file(struct file *file)
+{
+ int ret;
+
+ sb_start_write(file->f_path.mnt->mnt_sb);
+ ret = __mnt_want_write_file(file);
+ if (ret)
+ sb_end_write(file->f_path.mnt->mnt_sb);
+ return ret;
+}
EXPORT_SYMBOL_GPL(mnt_want_write_file);
/**
- * mnt_drop_write - give up write access to a mount
+ * __mnt_drop_write - give up write access to a mount
* @mnt: the mount on which to give up write access
*
* Tells the low-level filesystem that we are done
* performing writes to it. Must be matched with
- * mnt_want_write() call above.
+ * __mnt_want_write() call above.
*/
-void mnt_drop_write(struct vfsmount *mnt)
+void __mnt_drop_write(struct vfsmount *mnt)
{
preempt_disable();
mnt_dec_writers(real_mount(mnt));
preempt_enable();
}
+
+/**
+ * mnt_drop_write - give up write access to a mount
+ * @mnt: the mount on which to give up write access
+ *
+ * Tells the low-level filesystem that we are done performing writes to it and
+ * also allows filesystem to be frozen again. Must be matched with
+ * mnt_want_write() call above.
+ */
+void mnt_drop_write(struct vfsmount *mnt)
+{
+ __mnt_drop_write(mnt);
+ sb_end_write(mnt->mnt_sb);
+}
EXPORT_SYMBOL_GPL(mnt_drop_write);
+void __mnt_drop_write_file(struct file *file)
+{
+ __mnt_drop_write(file->f_path.mnt);
+}
+
void mnt_drop_write_file(struct file *file)
{
mnt_drop_write(file->f_path.mnt);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f90f4f5cd421..db7ad719628a 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -30,7 +30,7 @@ config NFS_FS
If unsure, say N.
config NFS_V2
- bool "NFS client support for NFS version 2"
+ tristate "NFS client support for NFS version 2"
depends on NFS_FS
default y
help
@@ -40,7 +40,7 @@ config NFS_V2
If unsure, say Y.
config NFS_V3
- bool "NFS client support for NFS version 3"
+ tristate "NFS client support for NFS version 3"
depends on NFS_FS
default y
help
@@ -72,7 +72,7 @@ config NFS_V3_ACL
If unsure, say N.
config NFS_V4
- bool "NFS client support for NFS version 4"
+ tristate "NFS client support for NFS version 4"
depends on NFS_FS
select SUNRPC_GSS
select KEYS
@@ -86,11 +86,18 @@ config NFS_V4
If unsure, say Y.
+config NFS_SWAP
+ bool "Provide swap over NFS support"
+ default n
+ depends on NFS_FS
+ select SUNRPC_SWAP
+ help
+ This option enables swapon to work on files located on NFS mounts.
+
config NFS_V4_1
bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
- depends on NFS_FS && NFS_V4 && EXPERIMENTAL
+ depends on NFS_V4 && EXPERIMENTAL
select SUNRPC_BACKCHANNEL
- select PNFS_FILE_LAYOUT
help
This option enables support for minor version 1 of the NFSv4 protocol
(RFC 5661) in the kernel's NFS client.
@@ -99,15 +106,17 @@ config NFS_V4_1
config PNFS_FILE_LAYOUT
tristate
+ depends on NFS_V4_1
+ default m
config PNFS_BLOCK
tristate
- depends on NFS_FS && NFS_V4_1 && BLK_DEV_DM
+ depends on NFS_V4_1 && BLK_DEV_DM
default m
config PNFS_OBJLAYOUT
tristate
- depends on NFS_FS && NFS_V4_1 && SCSI_OSD_ULD
+ depends on NFS_V4_1 && SCSI_OSD_ULD
default m
config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 7ddd45d9f170..8bf3a3f6925a 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -9,17 +9,23 @@ nfs-y := client.o dir.o file.o getroot.o inode.o super.o \
write.o namespace.o mount_clnt.o \
dns_resolve.o cache_lib.o
nfs-$(CONFIG_ROOT_NFS) += nfsroot.o
-nfs-$(CONFIG_NFS_V2) += proc.o nfs2xdr.o
-nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o
-nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
-nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
- delegation.o idmap.o \
- callback.o callback_xdr.o callback_proc.o \
- nfs4namespace.o
-nfs-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
-nfs-$(CONFIG_SYSCTL) += sysctl.o
+nfs-$(CONFIG_SYSCTL) += sysctl.o
nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o
+obj-$(CONFIG_NFS_V2) += nfs2.o
+nfs2-y := nfs2super.o proc.o nfs2xdr.o
+
+obj-$(CONFIG_NFS_V3) += nfs3.o
+nfs3-y := nfs3super.o nfs3client.o nfs3proc.o nfs3xdr.o
+nfs3-$(CONFIG_NFS_V3_ACL) += nfs3acl.o
+
+obj-$(CONFIG_NFS_V4) += nfs4.o
+nfs4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o \
+ delegation.o idmap.o callback.o callback_xdr.o callback_proc.o \
+ nfs4namespace.o nfs4getroot.o nfs4client.o
+nfs4-$(CONFIG_SYSCTL) += nfs4sysctl.o
+nfs4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
+
obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 7ae8a608956f..dd392ed5f2e2 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -228,6 +228,14 @@ bl_end_par_io_read(void *data, int unused)
schedule_work(&rdata->task.u.tk_work);
}
+static bool
+bl_check_alignment(u64 offset, u32 len, unsigned long blkmask)
+{
+ if ((offset & blkmask) || (len & blkmask))
+ return false;
+ return true;
+}
+
static enum pnfs_try_status
bl_read_pagelist(struct nfs_read_data *rdata)
{
@@ -244,6 +252,9 @@ bl_read_pagelist(struct nfs_read_data *rdata)
dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
+ if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK))
+ goto use_mds;
+
par = alloc_parallel(rdata);
if (!par)
goto use_mds;
@@ -552,7 +563,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
struct bio *bio = NULL;
struct pnfs_block_extent *be = NULL, *cow_read = NULL;
sector_t isect, last_isect = 0, extent_length = 0;
- struct parallel_io *par;
+ struct parallel_io *par = NULL;
loff_t offset = wdata->args.offset;
size_t count = wdata->args.count;
struct page **pages = wdata->args.pages;
@@ -563,6 +574,10 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
+ /* Check for alignment first */
+ if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK))
+ goto out_mds;
+
/* At this point, wdata->pages is a (sequential) list of nfs_pages.
* We want to write each, and if there is an error set pnfs_error
* to have it redone using nfs.
@@ -996,14 +1011,32 @@ bl_clear_layoutdriver(struct nfs_server *server)
return 0;
}
+static void
+bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
+ nfs_pageio_reset_read_mds(pgio);
+ else
+ pnfs_generic_pg_init_read(pgio, req);
+}
+
+static void
+bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
+{
+ if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK))
+ nfs_pageio_reset_write_mds(pgio);
+ else
+ pnfs_generic_pg_init_write(pgio, req);
+}
+
static const struct nfs_pageio_ops bl_pg_read_ops = {
- .pg_init = pnfs_generic_pg_init_read,
+ .pg_init = bl_pg_init_read,
.pg_test = pnfs_generic_pg_test,
.pg_doio = pnfs_generic_pg_readpages,
};
static const struct nfs_pageio_ops bl_pg_write_ops = {
- .pg_init = pnfs_generic_pg_init_write,
+ .pg_init = bl_pg_init_write,
.pg_test = pnfs_generic_pg_test,
.pg_doio = pnfs_generic_pg_writepages,
};
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 23ff18fe080a..4c8459e5bdee 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -37,31 +37,7 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1];
static DEFINE_MUTEX(nfs_callback_mutex);
static struct svc_program nfs4_callback_program;
-unsigned int nfs_callback_set_tcpport;
-unsigned short nfs_callback_tcpport;
unsigned short nfs_callback_tcpport6;
-#define NFS_CALLBACK_MAXPORTNR (65535U)
-
-static int param_set_portnr(const char *val, const struct kernel_param *kp)
-{
- unsigned long num;
- int ret;
-
- if (!val)
- return -EINVAL;
- ret = strict_strtoul(val, 0, &num);
- if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
- return -EINVAL;
- *((unsigned int *)kp->arg) = num;
- return 0;
-}
-static struct kernel_param_ops param_ops_portnr = {
- .set = param_set_portnr,
- .get = param_get_uint,
-};
-#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
-
-module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
/*
* This is the NFSv4 callback kernel thread.
@@ -265,6 +241,10 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
ret = -ENOMEM;
goto out_err;
}
+ /* As there is only one thread we need to over-ride the
+ * default maximum of 80 connections
+ */
+ serv->sv_maxconn = 1024;
ret = svc_bind(serv, net);
if (ret < 0) {
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index a5527c90a5aa..b44d7b128b71 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -192,7 +192,7 @@ extern __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
struct cb_process_state *cps);
extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
struct cb_process_state *cps);
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
extern void nfs_callback_down(int minorversion);
extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index e64b01d2a338..742ff4ffced7 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -863,7 +863,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
.drc_status = 0,
.clp = NULL,
.slotid = NFS4_NO_SLOT,
- .net = rqstp->rq_xprt->xpt_net,
+ .net = SVC_NET(rqstp),
};
unsigned int nops = 0;
@@ -879,7 +879,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
return rpc_garbage_args;
if (hdr_arg.minorversion == 0) {
- cps.clp = nfs4_find_client_ident(rqstp->rq_xprt->xpt_net, hdr_arg.cb_ident);
+ cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
return rpc_drop_reply;
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index f005b5bebdc7..9fc0d9dfc91b 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -51,54 +51,23 @@
#include "internal.h"
#include "fscache.h"
#include "pnfs.h"
+#include "nfs.h"
#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_CLIENT
static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
-#ifdef CONFIG_NFS_V4
-
-/*
- * Get a unique NFSv4.0 callback identifier which will be used
- * by the V4.0 callback service to lookup the nfs_client struct
- */
-static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
-{
- int ret = 0;
- struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
-
- if (clp->rpc_ops->version != 4 || minorversion != 0)
- return ret;
-retry:
- if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
- return -ENOMEM;
- spin_lock(&nn->nfs_client_lock);
- ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
- spin_unlock(&nn->nfs_client_lock);
- if (ret == -EAGAIN)
- goto retry;
- return ret;
-}
-#endif /* CONFIG_NFS_V4 */
-
-/*
- * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
- */
-static bool nfs4_disable_idmapping = true;
+static DEFINE_SPINLOCK(nfs_version_lock);
+static DEFINE_MUTEX(nfs_version_mutex);
+static LIST_HEAD(nfs_versions);
/*
* RPC cruft for NFS
*/
static const struct rpc_version *nfs_version[5] = {
-#ifdef CONFIG_NFS_V2
- [2] = &nfs_version2,
-#endif
-#ifdef CONFIG_NFS_V3
- [3] = &nfs_version3,
-#endif
-#ifdef CONFIG_NFS_V4
- [4] = &nfs_version4,
-#endif
+ [2] = NULL,
+ [3] = NULL,
+ [4] = NULL,
};
const struct rpc_program nfs_program = {
@@ -114,32 +83,64 @@ struct rpc_stat nfs_rpcstat = {
.program = &nfs_program
};
+static struct nfs_subversion *find_nfs_version(unsigned int version)
+{
+ struct nfs_subversion *nfs;
+ spin_lock(&nfs_version_lock);
-#ifdef CONFIG_NFS_V3_ACL
-static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
-static const struct rpc_version *nfsacl_version[] = {
- [3] = &nfsacl_version3,
-};
+ list_for_each_entry(nfs, &nfs_versions, list) {
+ if (nfs->rpc_ops->version == version) {
+ spin_unlock(&nfs_version_lock);
+ return nfs;
+ }
+ };
-const struct rpc_program nfsacl_program = {
- .name = "nfsacl",
- .number = NFS_ACL_PROGRAM,
- .nrvers = ARRAY_SIZE(nfsacl_version),
- .version = nfsacl_version,
- .stats = &nfsacl_rpcstat,
-};
-#endif /* CONFIG_NFS_V3_ACL */
-
-struct nfs_client_initdata {
- unsigned long init_flags;
- const char *hostname;
- const struct sockaddr *addr;
- size_t addrlen;
- const struct nfs_rpc_ops *rpc_ops;
- int proto;
- u32 minorversion;
- struct net *net;
-};
+ spin_unlock(&nfs_version_lock);
+ return ERR_PTR(-EPROTONOSUPPORT);;
+}
+
+struct nfs_subversion *get_nfs_version(unsigned int version)
+{
+ struct nfs_subversion *nfs = find_nfs_version(version);
+
+ if (IS_ERR(nfs)) {
+ mutex_lock(&nfs_version_mutex);
+ request_module("nfs%d", version);
+ nfs = find_nfs_version(version);
+ mutex_unlock(&nfs_version_mutex);
+ }
+
+ if (!IS_ERR(nfs))
+ try_module_get(nfs->owner);
+ return nfs;
+}
+
+void put_nfs_version(struct nfs_subversion *nfs)
+{
+ module_put(nfs->owner);
+}
+
+void register_nfs_version(struct nfs_subversion *nfs)
+{
+ spin_lock(&nfs_version_lock);
+
+ list_add(&nfs->list, &nfs_versions);
+ nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers;
+
+ spin_unlock(&nfs_version_lock);
+}
+EXPORT_SYMBOL_GPL(register_nfs_version);
+
+void unregister_nfs_version(struct nfs_subversion *nfs)
+{
+ spin_lock(&nfs_version_lock);
+
+ nfs_version[nfs->rpc_ops->version] = NULL;
+ list_del(&nfs->list);
+
+ spin_unlock(&nfs_version_lock);
+}
+EXPORT_SYMBOL_GPL(unregister_nfs_version);
/*
* Allocate a shared client record
@@ -147,7 +148,7 @@ struct nfs_client_initdata {
* Since these are allocated/deallocated very rarely, we don't
* bother putting them in a slab cache...
*/
-static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
+struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
{
struct nfs_client *clp;
struct rpc_cred *cred;
@@ -156,7 +157,10 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
goto error_0;
- clp->rpc_ops = cl_init->rpc_ops;
+ clp->cl_nfs_mod = cl_init->nfs_mod;
+ try_module_get(clp->cl_nfs_mod->owner);
+
+ clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
atomic_set(&clp->cl_count, 1);
clp->cl_cons_state = NFS_CS_INITING;
@@ -177,18 +181,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
clp->cl_proto = cl_init->proto;
clp->cl_net = get_net(cl_init->net);
-#ifdef CONFIG_NFS_V4
- err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
- if (err)
- goto error_cleanup;
-
- spin_lock_init(&clp->cl_lock);
- INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
- rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
- clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
- clp->cl_minorversion = cl_init->minorversion;
- clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
-#endif
cred = rpc_lookup_machine_cred("*");
if (!IS_ERR(cred))
clp->cl_machine_cred = cred;
@@ -197,51 +189,14 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
return clp;
error_cleanup:
+ put_nfs_version(clp->cl_nfs_mod);
kfree(clp);
error_0:
return ERR_PTR(err);
}
+EXPORT_SYMBOL_GPL(nfs_alloc_client);
-#ifdef CONFIG_NFS_V4
-#ifdef CONFIG_NFS_V4_1
-static void nfs4_shutdown_session(struct nfs_client *clp)
-{
- if (nfs4_has_session(clp)) {
- nfs4_destroy_session(clp->cl_session);
- nfs4_destroy_clientid(clp);
- }
-
-}
-#else /* CONFIG_NFS_V4_1 */
-static void nfs4_shutdown_session(struct nfs_client *clp)
-{
-}
-#endif /* CONFIG_NFS_V4_1 */
-
-/*
- * Destroy the NFS4 callback service
- */
-static void nfs4_destroy_callback(struct nfs_client *clp)
-{
- if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
- nfs_callback_down(clp->cl_mvops->minor_version);
-}
-
-static void nfs4_shutdown_client(struct nfs_client *clp)
-{
- if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
- nfs4_kill_renewd(clp);
- nfs4_shutdown_session(clp);
- nfs4_destroy_callback(clp);
- if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
- nfs_idmap_delete(clp);
-
- rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
- kfree(clp->cl_serverowner);
- kfree(clp->cl_serverscope);
- kfree(clp->cl_implid);
-}
-
+#if IS_ENABLED(CONFIG_NFS_V4)
/* idr_remove_all is not needed as all id's are removed by nfs_put_client */
void nfs_cleanup_cb_ident_idr(struct net *net)
{
@@ -264,16 +219,7 @@ static void pnfs_init_server(struct nfs_server *server)
rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
}
-static void nfs4_destroy_server(struct nfs_server *server)
-{
- nfs4_purge_state_owners(server);
-}
-
#else
-static void nfs4_shutdown_client(struct nfs_client *clp)
-{
-}
-
void nfs_cleanup_cb_ident_idr(struct net *net)
{
}
@@ -291,12 +237,10 @@ static void pnfs_init_server(struct nfs_server *server)
/*
* Destroy a shared client record
*/
-static void nfs_free_client(struct nfs_client *clp)
+void nfs_free_client(struct nfs_client *clp)
{
dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
- nfs4_shutdown_client(clp);
-
nfs_fscache_release_client_cookie(clp);
/* -EIO all pending I/O */
@@ -307,11 +251,13 @@ static void nfs_free_client(struct nfs_client *clp)
put_rpccred(clp->cl_machine_cred);
put_net(clp->cl_net);
+ put_nfs_version(clp->cl_nfs_mod);
kfree(clp->cl_hostname);
kfree(clp);
dprintk("<-- nfs_free_client()\n");
}
+EXPORT_SYMBOL_GPL(nfs_free_client);
/*
* Release a reference to a shared client record
@@ -333,7 +279,7 @@ void nfs_put_client(struct nfs_client *clp)
BUG_ON(!list_empty(&clp->cl_superblocks));
- nfs_free_client(clp);
+ clp->rpc_ops->free_client(clp);
}
}
EXPORT_SYMBOL_GPL(nfs_put_client);
@@ -412,8 +358,8 @@ static int nfs_sockaddr_cmp_ip4(const struct sockaddr *sa1,
* Test if two socket addresses represent the same actual socket,
* by comparing (only) relevant fields, excluding the port number.
*/
-static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
- const struct sockaddr *sa2)
+int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
+ const struct sockaddr *sa2)
{
if (sa1->sa_family != sa2->sa_family)
return 0;
@@ -426,6 +372,7 @@ static int nfs_sockaddr_match_ipaddr(const struct sockaddr *sa1,
}
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_sockaddr_match_ipaddr);
#endif /* CONFIG_NFS_V4_1 */
/*
@@ -447,33 +394,6 @@ static int nfs_sockaddr_cmp(const struct sockaddr *sa1,
return 0;
}
-#if defined(CONFIG_NFS_V4_1)
-/* Common match routine for v4.0 and v4.1 callback services */
-static bool nfs4_cb_match_client(const struct sockaddr *addr,
- struct nfs_client *clp, u32 minorversion)
-{
- struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
-
- /* Don't match clients that failed to initialise */
- if (!(clp->cl_cons_state == NFS_CS_READY ||
- clp->cl_cons_state == NFS_CS_SESSION_INITING))
- return false;
-
- smp_rmb();
-
- /* Match the version and minorversion */
- if (clp->rpc_ops->version != 4 ||
- clp->cl_minorversion != minorversion)
- return false;
-
- /* Match only the IP address, not the port number */
- if (!nfs_sockaddr_match_ipaddr(addr, clap))
- return false;
-
- return true;
-}
-#endif /* CONFIG_NFS_V4_1 */
-
/*
* Find an nfs_client on the list that matches the initialisation data
* that is supplied.
@@ -491,7 +411,7 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
continue;
/* Different NFS versions cannot share the same nfs_client */
- if (clp->rpc_ops != data->rpc_ops)
+ if (clp->rpc_ops != data->nfs_mod->rpc_ops)
continue;
if (clp->cl_proto != data->proto)
@@ -519,6 +439,7 @@ int nfs_wait_client_init_complete(const struct nfs_client *clp)
return wait_event_killable(nfs_client_active_wq,
nfs_client_init_is_complete(clp));
}
+EXPORT_SYMBOL_GPL(nfs_wait_client_init_complete);
/*
* Found an existing client. Make sure it's ready before returning.
@@ -552,7 +473,7 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
* Look up a client by IP address and protocol version
* - creates a new record if one doesn't yet exist
*/
-static struct nfs_client *
+struct nfs_client *
nfs_get_client(const struct nfs_client_initdata *cl_init,
const struct rpc_timeout *timeparms,
const char *ip_addr,
@@ -560,9 +481,10 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
{
struct nfs_client *clp, *new = NULL;
struct nfs_net *nn = net_generic(cl_init->net, nfs_net_id);
+ const struct nfs_rpc_ops *rpc_ops = cl_init->nfs_mod->rpc_ops;
dprintk("--> nfs_get_client(%s,v%u)\n",
- cl_init->hostname ?: "", cl_init->rpc_ops->version);
+ cl_init->hostname ?: "", rpc_ops->version);
/* see if the client already exists */
do {
@@ -572,27 +494,27 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
if (clp) {
spin_unlock(&nn->nfs_client_lock);
if (new)
- nfs_free_client(new);
+ new->rpc_ops->free_client(new);
return nfs_found_client(cl_init, clp);
}
if (new) {
list_add(&new->cl_share_link, &nn->nfs_client_list);
spin_unlock(&nn->nfs_client_lock);
new->cl_flags = cl_init->init_flags;
- return cl_init->rpc_ops->init_client(new,
- timeparms, ip_addr,
- authflavour);
+ return rpc_ops->init_client(new, timeparms, ip_addr,
+ authflavour);
}
spin_unlock(&nn->nfs_client_lock);
- new = nfs_alloc_client(cl_init);
+ new = rpc_ops->alloc_client(cl_init);
} while (!IS_ERR(new));
dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
cl_init->hostname ?: "", PTR_ERR(new));
return new;
}
+EXPORT_SYMBOL_GPL(nfs_get_client);
/*
* Mark a server as ready or failed
@@ -603,11 +525,12 @@ void nfs_mark_client_ready(struct nfs_client *clp, int state)
clp->cl_cons_state = state;
wake_up_all(&nfs_client_active_wq);
}
+EXPORT_SYMBOL_GPL(nfs_mark_client_ready);
/*
* Initialise the timeout values for a connection
*/
-static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
+void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
unsigned int timeo, unsigned int retrans)
{
to->to_initval = timeo * HZ / 10;
@@ -644,13 +567,14 @@ static void nfs_init_timeout_values(struct rpc_timeout *to, int proto,
BUG();
}
}
+EXPORT_SYMBOL_GPL(nfs_init_timeout_values);
/*
* Create an RPC client handle
*/
-static int nfs_create_rpc_client(struct nfs_client *clp,
- const struct rpc_timeout *timeparms,
- rpc_authflavor_t flavor)
+int nfs_create_rpc_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ rpc_authflavor_t flavor)
{
struct rpc_clnt *clnt = NULL;
struct rpc_create_args args = {
@@ -683,6 +607,7 @@ static int nfs_create_rpc_client(struct nfs_client *clp,
clp->cl_rpcclient = clnt;
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_create_rpc_client);
/*
* Version 2 or 3 client destruction
@@ -735,39 +660,9 @@ static int nfs_start_lockd(struct nfs_server *server)
}
/*
- * Initialise an NFSv3 ACL client connection
- */
-#ifdef CONFIG_NFS_V3_ACL
-static void nfs_init_server_aclclient(struct nfs_server *server)
-{
- if (server->nfs_client->rpc_ops->version != 3)
- goto out_noacl;
- if (server->flags & NFS_MOUNT_NOACL)
- goto out_noacl;
-
- server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
- if (IS_ERR(server->client_acl))
- goto out_noacl;
-
- /* No errors! Assume that Sun nfsacls are supported */
- server->caps |= NFS_CAP_ACLS;
- return;
-
-out_noacl:
- server->caps &= ~NFS_CAP_ACLS;
-}
-#else
-static inline void nfs_init_server_aclclient(struct nfs_server *server)
-{
- server->flags &= ~NFS_MOUNT_NOACL;
- server->caps &= ~NFS_CAP_ACLS;
-}
-#endif
-
-/*
* Create a general RPC client
*/
-static int nfs_init_server_rpcclient(struct nfs_server *server,
+int nfs_init_server_rpcclient(struct nfs_server *server,
const struct rpc_timeout *timeo,
rpc_authflavor_t pseudoflavour)
{
@@ -799,6 +694,7 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_init_server_rpcclient);
/**
* nfs_init_client - Initialise an NFS2 or NFS3 client
@@ -838,18 +734,20 @@ error:
dprintk("<-- nfs_init_client() = xerror %d\n", error);
return ERR_PTR(error);
}
+EXPORT_SYMBOL_GPL(nfs_init_client);
/*
* Create a version 2 or 3 client
*/
static int nfs_init_server(struct nfs_server *server,
- const struct nfs_parsed_mount_data *data)
+ const struct nfs_parsed_mount_data *data,
+ struct nfs_subversion *nfs_mod)
{
struct nfs_client_initdata cl_init = {
.hostname = data->nfs_server.hostname,
.addr = (const struct sockaddr *)&data->nfs_server.address,
.addrlen = data->nfs_server.addrlen,
- .rpc_ops = NULL,
+ .nfs_mod = nfs_mod,
.proto = data->nfs_server.protocol,
.net = data->net,
};
@@ -859,21 +757,6 @@ static int nfs_init_server(struct nfs_server *server,
dprintk("--> nfs_init_server()\n");
- switch (data->version) {
-#ifdef CONFIG_NFS_V2
- case 2:
- cl_init.rpc_ops = &nfs_v2_clientops;
- break;
-#endif
-#ifdef CONFIG_NFS_V3
- case 3:
- cl_init.rpc_ops = &nfs_v3_clientops;
- break;
-#endif
- default:
- return -EPROTONOSUPPORT;
- }
-
nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
data->timeo, data->retrans);
if (data->flags & NFS_MOUNT_NORESVPORT)
@@ -927,8 +810,6 @@ static int nfs_init_server(struct nfs_server *server,
server->mountd_protocol = data->mount_server.protocol;
server->namelen = data->namlen;
- /* Create a client RPC handle for the NFSv3 ACL management interface */
- nfs_init_server_aclclient(server);
dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
return 0;
@@ -975,7 +856,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->wsize = NFS_MAX_FILE_IO_SIZE;
server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
server->pnfs_blksize = fsinfo->blksize;
- set_pnfs_layoutdriver(server, mntfh, fsinfo->layouttype);
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
@@ -1001,7 +881,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
/*
* Probe filesystem information, including the FSID on v2/v3
*/
-static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
+int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr)
{
struct nfs_fsinfo fsinfo;
struct nfs_client *clp = server->nfs_client;
@@ -1041,11 +921,12 @@ out_error:
dprintk("nfs_probe_fsinfo: error = %d\n", -error);
return error;
}
+EXPORT_SYMBOL_GPL(nfs_probe_fsinfo);
/*
* Copy useful information when duplicating a server record
*/
-static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
+void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *source)
{
target->flags = source->flags;
target->rsize = source->rsize;
@@ -1057,8 +938,9 @@ static void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_serve
target->caps = source->caps;
target->options = source->options;
}
+EXPORT_SYMBOL_GPL(nfs_server_copy_userdata);
-static void nfs_server_insert_lists(struct nfs_server *server)
+void nfs_server_insert_lists(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
@@ -1070,6 +952,7 @@ static void nfs_server_insert_lists(struct nfs_server *server)
spin_unlock(&nn->nfs_client_lock);
}
+EXPORT_SYMBOL_GPL(nfs_server_insert_lists);
static void nfs_server_remove_lists(struct nfs_server *server)
{
@@ -1092,7 +975,7 @@ static void nfs_server_remove_lists(struct nfs_server *server)
/*
* Allocate and initialise a server record
*/
-static struct nfs_server *nfs_alloc_server(void)
+struct nfs_server *nfs_alloc_server(void)
{
struct nfs_server *server;
@@ -1129,6 +1012,7 @@ static struct nfs_server *nfs_alloc_server(void)
return server;
}
+EXPORT_SYMBOL_GPL(nfs_alloc_server);
/*
* Free up a server record
@@ -1138,7 +1022,6 @@ void nfs_free_server(struct nfs_server *server)
dprintk("--> nfs_free_server()\n");
nfs_server_remove_lists(server);
- unset_pnfs_layoutdriver(server);
if (server->destroy != NULL)
server->destroy(server);
@@ -1158,13 +1041,14 @@ void nfs_free_server(struct nfs_server *server)
nfs_release_automount_timer();
dprintk("<-- nfs_free_server()\n");
}
+EXPORT_SYMBOL_GPL(nfs_free_server);
/*
* Create a version 2 or 3 volume record
* - keyed on server and FSID
*/
-struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
- struct nfs_fh *mntfh)
+struct nfs_server *nfs_create_server(struct nfs_mount_info *mount_info,
+ struct nfs_subversion *nfs_mod)
{
struct nfs_server *server;
struct nfs_fattr *fattr;
@@ -1180,7 +1064,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
goto error;
/* Get a client representation */
- error = nfs_init_server(server, data);
+ error = nfs_init_server(server, mount_info->parsed, nfs_mod);
if (error < 0)
goto error;
@@ -1189,13 +1073,13 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
/* Probe the root fh to retrieve its FSID */
- error = nfs_probe_fsinfo(server, mntfh, fattr);
+ error = nfs_probe_fsinfo(server, mount_info->mntfh, fattr);
if (error < 0)
goto error;
if (server->nfs_client->rpc_ops->version == 3) {
if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN)
server->namelen = NFS3_MAXNAMLEN;
- if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
+ if (!(mount_info->parsed->flags & NFS_MOUNT_NORDIRPLUS))
server->caps |= NFS_CAP_READDIRPLUS;
} else {
if (server->namelen == 0 || server->namelen > NFS2_MAXNAMLEN)
@@ -1203,7 +1087,7 @@ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data,
}
if (!(fattr->valid & NFS_ATTR_FATTR)) {
- error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
+ error = nfs_mod->rpc_ops->getattr(server, mount_info->mntfh, fattr);
if (error < 0) {
dprintk("nfs_create_server: getattr error = %d\n", -error);
goto error;
@@ -1225,522 +1109,7 @@ error:
nfs_free_server(server);
return ERR_PTR(error);
}
-
-#ifdef CONFIG_NFS_V4
-/*
- * NFSv4.0 callback thread helper
- *
- * Find a client by callback identifier
- */
-struct nfs_client *
-nfs4_find_client_ident(struct net *net, int cb_ident)
-{
- struct nfs_client *clp;
- struct nfs_net *nn = net_generic(net, nfs_net_id);
-
- spin_lock(&nn->nfs_client_lock);
- clp = idr_find(&nn->cb_ident_idr, cb_ident);
- if (clp)
- atomic_inc(&clp->cl_count);
- spin_unlock(&nn->nfs_client_lock);
- return clp;
-}
-
-#if defined(CONFIG_NFS_V4_1)
-/*
- * NFSv4.1 callback thread helper
- * For CB_COMPOUND calls, find a client by IP address, protocol version,
- * minorversion, and sessionID
- *
- * Returns NULL if no such client
- */
-struct nfs_client *
-nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
- struct nfs4_sessionid *sid)
-{
- struct nfs_client *clp;
- struct nfs_net *nn = net_generic(net, nfs_net_id);
-
- spin_lock(&nn->nfs_client_lock);
- list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
- if (nfs4_cb_match_client(addr, clp, 1) == false)
- continue;
-
- if (!nfs4_has_session(clp))
- continue;
-
- /* Match sessionid*/
- if (memcmp(clp->cl_session->sess_id.data,
- sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
- continue;
-
- atomic_inc(&clp->cl_count);
- spin_unlock(&nn->nfs_client_lock);
- return clp;
- }
- spin_unlock(&nn->nfs_client_lock);
- return NULL;
-}
-
-#else /* CONFIG_NFS_V4_1 */
-
-struct nfs_client *
-nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
- struct nfs4_sessionid *sid)
-{
- return NULL;
-}
-#endif /* CONFIG_NFS_V4_1 */
-
-/*
- * Initialize the NFS4 callback service
- */
-static int nfs4_init_callback(struct nfs_client *clp)
-{
- int error;
-
- if (clp->rpc_ops->version == 4) {
- struct rpc_xprt *xprt;
-
- xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
-
- if (nfs4_has_session(clp)) {
- error = xprt_setup_backchannel(xprt,
- NFS41_BC_MIN_CALLBACKS);
- if (error < 0)
- return error;
- }
-
- error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
- if (error < 0) {
- dprintk("%s: failed to start callback. Error = %d\n",
- __func__, error);
- return error;
- }
- __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
- }
- return 0;
-}
-
-/*
- * Initialize the minor version specific parts of an NFS4 client record
- */
-static int nfs4_init_client_minor_version(struct nfs_client *clp)
-{
-#if defined(CONFIG_NFS_V4_1)
- if (clp->cl_mvops->minor_version) {
- struct nfs4_session *session = NULL;
- /*
- * Create the session and mark it expired.
- * When a SEQUENCE operation encounters the expired session
- * it will do session recovery to initialize it.
- */
- session = nfs4_alloc_session(clp);
- if (!session)
- return -ENOMEM;
-
- clp->cl_session = session;
- /*
- * The create session reply races with the server back
- * channel probe. Mark the client NFS_CS_SESSION_INITING
- * so that the client back channel can find the
- * nfs_client struct
- */
- nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
- }
-#endif /* CONFIG_NFS_V4_1 */
-
- return nfs4_init_callback(clp);
-}
-
-/**
- * nfs4_init_client - Initialise an NFS4 client record
- *
- * @clp: nfs_client to initialise
- * @timeparms: timeout parameters for underlying RPC transport
- * @ip_addr: callback IP address in presentation format
- * @authflavor: authentication flavor for underlying RPC transport
- *
- * Returns pointer to an NFS client, or an ERR_PTR value.
- */
-struct nfs_client *nfs4_init_client(struct nfs_client *clp,
- const struct rpc_timeout *timeparms,
- const char *ip_addr,
- rpc_authflavor_t authflavour)
-{
- char buf[INET6_ADDRSTRLEN + 1];
- int error;
-
- if (clp->cl_cons_state == NFS_CS_READY) {
- /* the client is initialised already */
- dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
- return clp;
- }
-
- /* Check NFS protocol revision and initialize RPC op vector */
- clp->rpc_ops = &nfs_v4_clientops;
-
- __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
- error = nfs_create_rpc_client(clp, timeparms, authflavour);
- if (error < 0)
- goto error;
-
- /* If no clientaddr= option was specified, find a usable cb address */
- if (ip_addr == NULL) {
- struct sockaddr_storage cb_addr;
- struct sockaddr *sap = (struct sockaddr *)&cb_addr;
-
- error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
- if (error < 0)
- goto error;
- error = rpc_ntop(sap, buf, sizeof(buf));
- if (error < 0)
- goto error;
- ip_addr = (const char *)buf;
- }
- strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
-
- error = nfs_idmap_new(clp);
- if (error < 0) {
- dprintk("%s: failed to create idmapper. Error = %d\n",
- __func__, error);
- goto error;
- }
- __set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
-
- error = nfs4_init_client_minor_version(clp);
- if (error < 0)
- goto error;
-
- if (!nfs4_has_session(clp))
- nfs_mark_client_ready(clp, NFS_CS_READY);
- return clp;
-
-error:
- nfs_mark_client_ready(clp, error);
- nfs_put_client(clp);
- dprintk("<-- nfs4_init_client() = xerror %d\n", error);
- return ERR_PTR(error);
-}
-
-/*
- * Set up an NFS4 client
- */
-static int nfs4_set_client(struct nfs_server *server,
- const char *hostname,
- const struct sockaddr *addr,
- const size_t addrlen,
- const char *ip_addr,
- rpc_authflavor_t authflavour,
- int proto, const struct rpc_timeout *timeparms,
- u32 minorversion, struct net *net)
-{
- struct nfs_client_initdata cl_init = {
- .hostname = hostname,
- .addr = addr,
- .addrlen = addrlen,
- .rpc_ops = &nfs_v4_clientops,
- .proto = proto,
- .minorversion = minorversion,
- .net = net,
- };
- struct nfs_client *clp;
- int error;
-
- dprintk("--> nfs4_set_client()\n");
-
- if (server->flags & NFS_MOUNT_NORESVPORT)
- set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
-
- /* Allocate or find a client reference we can use */
- clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
- if (IS_ERR(clp)) {
- error = PTR_ERR(clp);
- goto error;
- }
-
- /*
- * Query for the lease time on clientid setup or renewal
- *
- * Note that this will be set on nfs_clients that were created
- * only for the DS role and did not set this bit, but now will
- * serve a dual role.
- */
- set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
-
- server->nfs_client = clp;
- dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
- return 0;
-error:
- dprintk("<-- nfs4_set_client() = xerror %d\n", error);
- return error;
-}
-
-/*
- * Set up a pNFS Data Server client.
- *
- * Return any existing nfs_client that matches server address,port,version
- * and minorversion.
- *
- * For a new nfs_client, use a soft mount (default), a low retrans and a
- * low timeout interval so that if a connection is lost, we retry through
- * the MDS.
- */
-struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
- const struct sockaddr *ds_addr, int ds_addrlen,
- int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
-{
- struct nfs_client_initdata cl_init = {
- .addr = ds_addr,
- .addrlen = ds_addrlen,
- .rpc_ops = &nfs_v4_clientops,
- .proto = ds_proto,
- .minorversion = mds_clp->cl_minorversion,
- .net = mds_clp->cl_net,
- };
- struct rpc_timeout ds_timeout;
- struct nfs_client *clp;
-
- /*
- * Set an authflavor equual to the MDS value. Use the MDS nfs_client
- * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
- * (section 13.1 RFC 5661).
- */
- nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
- clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
- mds_clp->cl_rpcclient->cl_auth->au_flavor);
-
- dprintk("<-- %s %p\n", __func__, clp);
- return clp;
-}
-EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
-
-/*
- * Session has been established, and the client marked ready.
- * Set the mount rsize and wsize with negotiated fore channel
- * attributes which will be bound checked in nfs_server_set_fsinfo.
- */
-static void nfs4_session_set_rwsize(struct nfs_server *server)
-{
-#ifdef CONFIG_NFS_V4_1
- struct nfs4_session *sess;
- u32 server_resp_sz;
- u32 server_rqst_sz;
-
- if (!nfs4_has_session(server->nfs_client))
- return;
- sess = server->nfs_client->cl_session;
- server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
- server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
-
- if (server->rsize > server_resp_sz)
- server->rsize = server_resp_sz;
- if (server->wsize > server_rqst_sz)
- server->wsize = server_rqst_sz;
-#endif /* CONFIG_NFS_V4_1 */
-}
-
-static int nfs4_server_common_setup(struct nfs_server *server,
- struct nfs_fh *mntfh)
-{
- struct nfs_fattr *fattr;
- int error;
-
- BUG_ON(!server->nfs_client);
- BUG_ON(!server->nfs_client->rpc_ops);
- BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
-
- /* data servers support only a subset of NFSv4.1 */
- if (is_ds_only_client(server->nfs_client))
- return -EPROTONOSUPPORT;
-
- fattr = nfs_alloc_fattr();
- if (fattr == NULL)
- return -ENOMEM;
-
- /* We must ensure the session is initialised first */
- error = nfs4_init_session(server);
- if (error < 0)
- goto out;
-
- /* Probe the root fh to retrieve its FSID and filehandle */
- error = nfs4_get_rootfh(server, mntfh);
- if (error < 0)
- goto out;
-
- dprintk("Server FSID: %llx:%llx\n",
- (unsigned long long) server->fsid.major,
- (unsigned long long) server->fsid.minor);
- dprintk("Mount FH: %d\n", mntfh->size);
-
- nfs4_session_set_rwsize(server);
-
- error = nfs_probe_fsinfo(server, mntfh, fattr);
- if (error < 0)
- goto out;
-
- if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
- server->namelen = NFS4_MAXNAMLEN;
-
- nfs_server_insert_lists(server);
- server->mount_time = jiffies;
- server->destroy = nfs4_destroy_server;
-out:
- nfs_free_fattr(fattr);
- return error;
-}
-
-/*
- * Create a version 4 volume record
- */
-static int nfs4_init_server(struct nfs_server *server,
- const struct nfs_parsed_mount_data *data)
-{
- struct rpc_timeout timeparms;
- int error;
-
- dprintk("--> nfs4_init_server()\n");
-
- nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
- data->timeo, data->retrans);
-
- /* Initialise the client representation from the mount data */
- server->flags = data->flags;
- server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
- if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
- server->caps |= NFS_CAP_READDIRPLUS;
- server->options = data->options;
-
- /* Get a client record */
- error = nfs4_set_client(server,
- data->nfs_server.hostname,
- (const struct sockaddr *)&data->nfs_server.address,
- data->nfs_server.addrlen,
- data->client_address,
- data->auth_flavors[0],
- data->nfs_server.protocol,
- &timeparms,
- data->minorversion,
- data->net);
- if (error < 0)
- goto error;
-
- /*
- * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
- * authentication.
- */
- if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
- server->caps |= NFS_CAP_UIDGID_NOMAP;
-
- if (data->rsize)
- server->rsize = nfs_block_size(data->rsize, NULL);
- if (data->wsize)
- server->wsize = nfs_block_size(data->wsize, NULL);
-
- server->acregmin = data->acregmin * HZ;
- server->acregmax = data->acregmax * HZ;
- server->acdirmin = data->acdirmin * HZ;
- server->acdirmax = data->acdirmax * HZ;
-
- server->port = data->nfs_server.port;
-
- error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
-
-error:
- /* Done */
- dprintk("<-- nfs4_init_server() = %d\n", error);
- return error;
-}
-
-/*
- * Create a version 4 volume record
- * - keyed on server and FSID
- */
-struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
- struct nfs_fh *mntfh)
-{
- struct nfs_server *server;
- int error;
-
- dprintk("--> nfs4_create_server()\n");
-
- server = nfs_alloc_server();
- if (!server)
- return ERR_PTR(-ENOMEM);
-
- /* set up the general RPC client */
- error = nfs4_init_server(server, data);
- if (error < 0)
- goto error;
-
- error = nfs4_server_common_setup(server, mntfh);
- if (error < 0)
- goto error;
-
- dprintk("<-- nfs4_create_server() = %p\n", server);
- return server;
-
-error:
- nfs_free_server(server);
- dprintk("<-- nfs4_create_server() = error %d\n", error);
- return ERR_PTR(error);
-}
-
-/*
- * Create an NFS4 referral server record
- */
-struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
- struct nfs_fh *mntfh)
-{
- struct nfs_client *parent_client;
- struct nfs_server *server, *parent_server;
- int error;
-
- dprintk("--> nfs4_create_referral_server()\n");
-
- server = nfs_alloc_server();
- if (!server)
- return ERR_PTR(-ENOMEM);
-
- parent_server = NFS_SB(data->sb);
- parent_client = parent_server->nfs_client;
-
- /* Initialise the client representation from the parent server */
- nfs_server_copy_userdata(server, parent_server);
- server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
-
- /* Get a client representation.
- * Note: NFSv4 always uses TCP, */
- error = nfs4_set_client(server, data->hostname,
- data->addr,
- data->addrlen,
- parent_client->cl_ipaddr,
- data->authflavor,
- rpc_protocol(parent_server->client),
- parent_server->client->cl_timeout,
- parent_client->cl_mvops->minor_version,
- parent_client->cl_net);
- if (error < 0)
- goto error;
-
- error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
- if (error < 0)
- goto error;
-
- error = nfs4_server_common_setup(server, mntfh);
- if (error < 0)
- goto error;
-
- dprintk("<-- nfs_create_referral_server() = %p\n", server);
- return server;
-
-error:
- nfs_free_server(server);
- dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
- return ERR_PTR(error);
-}
-
-#endif /* CONFIG_NFS_V4 */
+EXPORT_SYMBOL_GPL(nfs_create_server);
/*
* Clone an NFS2, NFS3 or NFS4 server record
@@ -1780,8 +1149,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
flavor);
if (error < 0)
goto out_free_server;
- if (!IS_ERR(source->client_acl))
- nfs_init_server_aclclient(server);
/* probe the filesystem info for this server filesystem */
error = nfs_probe_fsinfo(server, fh, fattr_fsinfo);
@@ -1812,6 +1179,7 @@ out_free_server:
dprintk("<-- nfs_clone_server() = error %d\n", error);
return ERR_PTR(error);
}
+EXPORT_SYMBOL_GPL(nfs_clone_server);
void nfs_clients_init(struct net *net)
{
@@ -1819,7 +1187,7 @@ void nfs_clients_init(struct net *net)
INIT_LIST_HEAD(&nn->nfs_client_list);
INIT_LIST_HEAD(&nn->nfs_volume_list);
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
idr_init(&nn->cb_ident_idr);
#endif
spin_lock_init(&nn->nfs_client_lock);
@@ -2091,7 +1459,3 @@ void nfs_fs_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-
-module_param(nfs4_disable_idmapping, bool, 0644);
-MODULE_PARM_DESC(nfs4_disable_idmapping,
- "Turn off NFSv4 idmapping when using 'sec=sys'");
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index bd3a9601d32d..81c5eec3cf38 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -47,7 +47,7 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
*
* Returns one if inode has the indicated delegation, otherwise zero.
*/
-int nfs_have_delegation(struct inode *inode, fmode_t flags)
+int nfs4_have_delegation(struct inode *inode, fmode_t flags)
{
struct nfs_delegation *delegation;
int ret = 0;
@@ -388,7 +388,7 @@ void nfs_inode_return_delegation_noreclaim(struct inode *inode)
*
* Returns zero on success, or a negative errno value.
*/
-int nfs_inode_return_delegation(struct inode *inode)
+int nfs4_inode_return_delegation(struct inode *inode)
{
struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
@@ -417,9 +417,8 @@ static void nfs_mark_return_delegation(struct nfs_server *server,
* @sb: sb to process
*
*/
-void nfs_super_return_all_delegations(struct super_block *sb)
+void nfs_server_return_all_delegations(struct nfs_server *server)
{
- struct nfs_server *server = NFS_SB(sb);
struct nfs_client *clp = server->nfs_client;
struct nfs_delegation *delegation;
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 72709c4193fa..bbc6a4dba0d8 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -8,7 +8,7 @@
#ifndef FS_NFS_DELEGATION_H
#define FS_NFS_DELEGATION_H
-#if defined(CONFIG_NFS_V4)
+#if IS_ENABLED(CONFIG_NFS_V4)
/*
* NFSv4 delegation
*/
@@ -33,12 +33,12 @@ enum {
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
-int nfs_inode_return_delegation(struct inode *inode);
+int nfs4_inode_return_delegation(struct inode *inode);
int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid);
void nfs_inode_return_delegation_noreclaim(struct inode *inode);
struct inode *nfs_delegation_find_inode(struct nfs_client *clp, const struct nfs_fh *fhandle);
-void nfs_super_return_all_delegations(struct super_block *sb);
+void nfs_server_return_all_delegations(struct nfs_server *);
void nfs_expire_all_delegations(struct nfs_client *clp);
void nfs_expire_all_delegation_types(struct nfs_client *clp, fmode_t flags);
void nfs_expire_unreferenced_delegations(struct nfs_client *clp);
@@ -56,24 +56,13 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl);
bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode, fmode_t flags);
void nfs_mark_delegation_referenced(struct nfs_delegation *delegation);
-int nfs_have_delegation(struct inode *inode, fmode_t flags);
+int nfs4_have_delegation(struct inode *inode, fmode_t flags);
-#else
-static inline int nfs_have_delegation(struct inode *inode, fmode_t flags)
-{
- return 0;
-}
-
-static inline int nfs_inode_return_delegation(struct inode *inode)
-{
- nfs_wb_all(inode);
- return 0;
-}
#endif
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
- return nfs_have_delegation(inode, FMODE_READ) &&
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) &&
!(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED);
}
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index a6b1c7fb8232..627f108ede23 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -17,6 +17,7 @@
* 6 Jun 1999 Cache readdir lookups in the page cache. -DaveM
*/
+#include <linux/module.h>
#include <linux/time.h>
#include <linux/errno.h>
#include <linux/stat.h>
@@ -46,16 +47,6 @@
static int nfs_opendir(struct inode *, struct file *);
static int nfs_closedir(struct inode *, struct file *);
static int nfs_readdir(struct file *, void *, filldir_t);
-static struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
-static int nfs_create(struct inode *, struct dentry *, umode_t, bool);
-static int nfs_mkdir(struct inode *, struct dentry *, umode_t);
-static int nfs_rmdir(struct inode *, struct dentry *);
-static int nfs_unlink(struct inode *, struct dentry *);
-static int nfs_symlink(struct inode *, struct dentry *, const char *);
-static int nfs_link(struct dentry *, struct inode *, struct dentry *);
-static int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-static int nfs_rename(struct inode *, struct dentry *,
- struct inode *, struct dentry *);
static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
static loff_t nfs_llseek_dir(struct file *, loff_t, int);
static void nfs_readdir_clear_array(struct page*);
@@ -69,73 +60,10 @@ const struct file_operations nfs_dir_operations = {
.fsync = nfs_fsync_dir,
};
-const struct inode_operations nfs_dir_inode_operations = {
- .create = nfs_create,
- .lookup = nfs_lookup,
- .link = nfs_link,
- .unlink = nfs_unlink,
- .symlink = nfs_symlink,
- .mkdir = nfs_mkdir,
- .rmdir = nfs_rmdir,
- .mknod = nfs_mknod,
- .rename = nfs_rename,
- .permission = nfs_permission,
- .getattr = nfs_getattr,
- .setattr = nfs_setattr,
-};
-
const struct address_space_operations nfs_dir_aops = {
.freepage = nfs_readdir_clear_array,
};
-#ifdef CONFIG_NFS_V3
-const struct inode_operations nfs3_dir_inode_operations = {
- .create = nfs_create,
- .lookup = nfs_lookup,
- .link = nfs_link,
- .unlink = nfs_unlink,
- .symlink = nfs_symlink,
- .mkdir = nfs_mkdir,
- .rmdir = nfs_rmdir,
- .mknod = nfs_mknod,
- .rename = nfs_rename,
- .permission = nfs_permission,
- .getattr = nfs_getattr,
- .setattr = nfs_setattr,
- .listxattr = nfs3_listxattr,
- .getxattr = nfs3_getxattr,
- .setxattr = nfs3_setxattr,
- .removexattr = nfs3_removexattr,
-};
-#endif /* CONFIG_NFS_V3 */
-
-#ifdef CONFIG_NFS_V4
-
-static int nfs_atomic_open(struct inode *, struct dentry *,
- struct file *, unsigned, umode_t,
- int *);
-const struct inode_operations nfs4_dir_inode_operations = {
- .create = nfs_create,
- .lookup = nfs_lookup,
- .atomic_open = nfs_atomic_open,
- .link = nfs_link,
- .unlink = nfs_unlink,
- .symlink = nfs_symlink,
- .mkdir = nfs_mkdir,
- .rmdir = nfs_rmdir,
- .mknod = nfs_mknod,
- .rename = nfs_rename,
- .permission = nfs_permission,
- .getattr = nfs_getattr,
- .setattr = nfs_setattr,
- .getxattr = generic_getxattr,
- .setxattr = generic_setxattr,
- .listxattr = generic_listxattr,
- .removexattr = generic_removexattr,
-};
-
-#endif /* CONFIG_NFS_V4 */
-
static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, struct rpc_cred *cred)
{
struct nfs_open_dir_context *ctx;
@@ -1008,6 +936,7 @@ void nfs_force_lookup_revalidate(struct inode *dir)
{
NFS_I(dir)->cache_change_attribute++;
}
+EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
/*
* A check for whether or not the parent directory has changed.
@@ -1128,7 +1057,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto out_bad;
}
- if (nfs_have_delegation(inode, FMODE_READ))
+ if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
goto out_set_verifier;
/* Force a full look up iff the parent directory has changed */
@@ -1269,8 +1198,9 @@ const struct dentry_operations nfs_dentry_operations = {
.d_automount = nfs_d_automount,
.d_release = nfs_d_release,
};
+EXPORT_SYMBOL_GPL(nfs_dentry_operations);
-static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
+struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
struct dentry *res;
struct dentry *parent;
@@ -1336,8 +1266,9 @@ out:
nfs_free_fhandle(fhandle);
return res;
}
+EXPORT_SYMBOL_GPL(nfs_lookup);
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
const struct dentry_operations nfs4_dentry_operations = {
@@ -1347,6 +1278,7 @@ const struct dentry_operations nfs4_dentry_operations = {
.d_automount = nfs_d_automount,
.d_release = nfs_d_release,
};
+EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
static fmode_t flags_to_mode(int flags)
{
@@ -1398,9 +1330,9 @@ out:
return err;
}
-static int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
- struct file *file, unsigned open_flags,
- umode_t mode, int *opened)
+int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
+ struct file *file, unsigned open_flags,
+ umode_t mode, int *opened)
{
struct nfs_open_context *ctx;
struct dentry *res;
@@ -1489,6 +1421,7 @@ no_open:
return finish_no_open(file, res);
}
+EXPORT_SYMBOL_GPL(nfs_atomic_open);
static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
{
@@ -1581,6 +1514,7 @@ out_error:
dput(parent);
return error;
}
+EXPORT_SYMBOL_GPL(nfs_instantiate);
/*
* Following a failed create operation, we drop the dentry rather
@@ -1588,7 +1522,7 @@ out_error:
* that the operation succeeded on the server, but an error in the
* reply path made it appear to have failed.
*/
-static int nfs_create(struct inode *dir, struct dentry *dentry,
+int nfs_create(struct inode *dir, struct dentry *dentry,
umode_t mode, bool excl)
{
struct iattr attr;
@@ -1609,11 +1543,12 @@ out_err:
d_drop(dentry);
return error;
}
+EXPORT_SYMBOL_GPL(nfs_create);
/*
* See comments for nfs_proc_create regarding failed operations.
*/
-static int
+int
nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
{
struct iattr attr;
@@ -1636,11 +1571,12 @@ out_err:
d_drop(dentry);
return status;
}
+EXPORT_SYMBOL_GPL(nfs_mknod);
/*
* See comments for nfs_proc_create regarding failed operations.
*/
-static int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
struct iattr attr;
int error;
@@ -1659,6 +1595,7 @@ out_err:
d_drop(dentry);
return error;
}
+EXPORT_SYMBOL_GPL(nfs_mkdir);
static void nfs_dentry_handle_enoent(struct dentry *dentry)
{
@@ -1666,7 +1603,7 @@ static void nfs_dentry_handle_enoent(struct dentry *dentry)
d_delete(dentry);
}
-static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
+int nfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error;
@@ -1682,6 +1619,7 @@ static int nfs_rmdir(struct inode *dir, struct dentry *dentry)
return error;
}
+EXPORT_SYMBOL_GPL(nfs_rmdir);
/*
* Remove a file after making sure there are no pending writes,
@@ -1706,7 +1644,7 @@ static int nfs_safe_remove(struct dentry *dentry)
}
if (inode != NULL) {
- nfs_inode_return_delegation(inode);
+ NFS_PROTO(inode)->return_delegation(inode);
error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
/* The VFS may want to delete this inode */
if (error == 0)
@@ -1725,7 +1663,7 @@ out:
*
* If sillyrename() returns 0, we do nothing, otherwise we unlink.
*/
-static int nfs_unlink(struct inode *dir, struct dentry *dentry)
+int nfs_unlink(struct inode *dir, struct dentry *dentry)
{
int error;
int need_rehash = 0;
@@ -1753,6 +1691,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
d_rehash(dentry);
return error;
}
+EXPORT_SYMBOL_GPL(nfs_unlink);
/*
* To create a symbolic link, most file systems instantiate a new inode,
@@ -1769,7 +1708,7 @@ static int nfs_unlink(struct inode *dir, struct dentry *dentry)
* now have a new file handle and can instantiate an in-core NFS inode
* and move the raw page into its mapping.
*/
-static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
struct pagevec lru_pvec;
struct page *page;
@@ -1823,8 +1762,9 @@ static int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *sym
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_symlink);
-static int
+int
nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
{
struct inode *inode = old_dentry->d_inode;
@@ -1834,7 +1774,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
old_dentry->d_parent->d_name.name, old_dentry->d_name.name,
dentry->d_parent->d_name.name, dentry->d_name.name);
- nfs_inode_return_delegation(inode);
+ NFS_PROTO(inode)->return_delegation(inode);
d_drop(dentry);
error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
@@ -1844,6 +1784,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
}
return error;
}
+EXPORT_SYMBOL_GPL(nfs_link);
/*
* RENAME
@@ -1869,7 +1810,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
* If these conditions are met, we can drop the dentries before doing
* the rename.
*/
-static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct inode *new_dir, struct dentry *new_dentry)
{
struct inode *old_inode = old_dentry->d_inode;
@@ -1918,9 +1859,9 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
}
}
- nfs_inode_return_delegation(old_inode);
+ NFS_PROTO(old_inode)->return_delegation(old_inode);
if (new_inode != NULL)
- nfs_inode_return_delegation(new_inode);
+ NFS_PROTO(new_inode)->return_delegation(new_inode);
error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
new_dir, &new_dentry->d_name);
@@ -1942,6 +1883,7 @@ out:
dput(dentry);
return error;
}
+EXPORT_SYMBOL_GPL(nfs_rename);
static DEFINE_SPINLOCK(nfs_access_lru_lock);
static LIST_HEAD(nfs_access_lru_list);
@@ -2042,6 +1984,7 @@ void nfs_access_zap_cache(struct inode *inode)
spin_unlock(&nfs_access_lru_lock);
nfs_access_free_list(&head);
}
+EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred)
{
@@ -2202,6 +2145,7 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
{
return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
}
+EXPORT_SYMBOL_GPL(nfs_may_open);
int nfs_permission(struct inode *inode, int mask)
{
@@ -2261,6 +2205,7 @@ out_notsup:
res = generic_permission(inode, mask);
goto out;
}
+EXPORT_SYMBOL_GPL(nfs_permission);
/*
* Local variables:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 48253372ab1d..1ba385b7c90d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -115,17 +115,28 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
* @nr_segs: size of iovec array
*
* The presence of this routine in the address space ops vector means
- * the NFS client supports direct I/O. However, we shunt off direct
- * read and write requests before the VFS gets them, so this method
- * should never be called.
+ * the NFS client supports direct I/O. However, for most direct IO, we
+ * shunt off direct read and write requests before the VFS gets them,
+ * so this method is only ever called for swap.
*/
ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
{
+#ifndef CONFIG_NFS_SWAP
dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
iocb->ki_filp->f_path.dentry->d_name.name,
(long long) pos, nr_segs);
return -EINVAL;
+#else
+ VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
+ VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
+
+ if (rw == READ || rw == KERNEL_READ)
+ return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+ rw == READ ? true : false);
+ return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+ rw == WRITE ? true : false);
+#endif /* CONFIG_NFS_SWAP */
}
static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
@@ -303,7 +314,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
*/
static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
- loff_t pos)
+ loff_t pos, bool uio)
{
struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
@@ -331,12 +342,20 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
GFP_KERNEL);
if (!pagevec)
break;
- down_read(&current->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
+ if (uio) {
+ down_read(&current->mm->mmap_sem);
+ result = get_user_pages(current, current->mm, user_addr,
npages, 1, 0, pagevec, NULL);
- up_read(&current->mm->mmap_sem);
- if (result < 0)
- break;
+ up_read(&current->mm->mmap_sem);
+ if (result < 0)
+ break;
+ } else {
+ WARN_ON(npages != 1);
+ result = get_kernel_page(user_addr, 1, pagevec);
+ if (WARN_ON(result != 1))
+ break;
+ }
+
if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
if (bytes <= pgbase) {
@@ -386,21 +405,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
- loff_t pos)
+ loff_t pos, bool uio)
{
struct nfs_pageio_descriptor desc;
ssize_t result = -EINVAL;
size_t requested_bytes = 0;
unsigned long seg;
- nfs_pageio_init_read(&desc, dreq->inode,
+ NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
&nfs_direct_read_completion_ops);
get_dreq(dreq);
desc.pg_dreq = dreq;
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_read_schedule_segment(&desc, vec, pos);
+ result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
if (result < 0)
break;
requested_bytes += result;
@@ -426,7 +445,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
}
static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -444,7 +463,7 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos);
+ result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
NFS_I(inode)->read_io += result;
@@ -460,7 +479,7 @@ static void nfs_inode_dio_write_done(struct inode *inode)
inode_dio_done(inode);
}
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
struct nfs_pageio_descriptor desc;
@@ -478,7 +497,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
dreq->count = 0;
get_dreq(dreq);
- nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE,
+ NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
@@ -610,7 +629,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
*/
static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
const struct iovec *iov,
- loff_t pos)
+ loff_t pos, bool uio)
{
struct nfs_direct_req *dreq = desc->pg_dreq;
struct nfs_open_context *ctx = dreq->ctx;
@@ -638,12 +657,19 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
if (!pagevec)
break;
- down_read(&current->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
- npages, 0, 0, pagevec, NULL);
- up_read(&current->mm->mmap_sem);
- if (result < 0)
- break;
+ if (uio) {
+ down_read(&current->mm->mmap_sem);
+ result = get_user_pages(current, current->mm, user_addr,
+ npages, 0, 0, pagevec, NULL);
+ up_read(&current->mm->mmap_sem);
+ if (result < 0)
+ break;
+ } else {
+ WARN_ON(npages != 1);
+ result = get_kernel_page(user_addr, 0, pagevec);
+ if (WARN_ON(result != 1))
+ break;
+ }
if ((unsigned)result < npages) {
bytes = result * PAGE_SIZE;
@@ -774,7 +800,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
const struct iovec *iov,
unsigned long nr_segs,
- loff_t pos)
+ loff_t pos, bool uio)
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
@@ -782,7 +808,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
size_t requested_bytes = 0;
unsigned long seg;
- nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE,
+ NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
get_dreq(dreq);
@@ -790,7 +816,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
for (seg = 0; seg < nr_segs; seg++) {
const struct iovec *vec = &iov[seg];
- result = nfs_direct_write_schedule_segment(&desc, vec, pos);
+ result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
if (result < 0)
break;
requested_bytes += result;
@@ -818,7 +844,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos,
- size_t count)
+ size_t count, bool uio)
{
ssize_t result = -ENOMEM;
struct inode *inode = iocb->ki_filp->f_mapping->host;
@@ -836,7 +862,7 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos);
+ result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
if (!result)
result = nfs_direct_wait(dreq);
out_release:
@@ -867,7 +893,7 @@ out:
* cache.
*/
ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
@@ -892,7 +918,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
task_io_account_read(count);
- retval = nfs_direct_read(iocb, iov, nr_segs, pos);
+ retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
if (retval > 0)
iocb->ki_pos = pos + retval;
@@ -923,7 +949,7 @@ out:
* is no atomic O_APPEND write facility in the NFS protocol.
*/
ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ unsigned long nr_segs, loff_t pos, bool uio)
{
ssize_t retval = -EINVAL;
struct file *file = iocb->ki_filp;
@@ -955,7 +981,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
task_io_account_write(count);
- retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
+ retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
if (retval > 0) {
struct inode *inode = mapping->host;
diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c
index b3924b8a6000..31c26c4dcc23 100644
--- a/fs/nfs/dns_resolve.c
+++ b/fs/nfs/dns_resolve.c
@@ -8,6 +8,7 @@
#ifdef CONFIG_NFS_USE_KERNEL_DNS
+#include <linux/module.h>
#include <linux/sunrpc/clnt.h>
#include <linux/dns_resolver.h>
#include "dns_resolve.h"
@@ -27,9 +28,11 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen,
kfree(ip_addr);
return ret;
}
+EXPORT_SYMBOL_GPL(nfs_dns_resolve_name);
#else
+#include <linux/module.h>
#include <linux/hash.h>
#include <linux/string.h>
#include <linux/kmod.h>
@@ -345,6 +348,7 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name,
ret = -ESRCH;
return ret;
}
+EXPORT_SYMBOL_GPL(nfs_dns_resolve_name);
int nfs_dns_resolver_cache_init(struct net *net)
{
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index a6708e6b438d..75d6d0a3d32e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -16,6 +16,7 @@
* nfs regular file handling functions
*/
+#include <linux/module.h>
#include <linux/time.h>
#include <linux/kernel.h>
#include <linux/errno.h>
@@ -35,42 +36,24 @@
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
-#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_FILE
static const struct vm_operations_struct nfs_file_vm_ops;
-const struct inode_operations nfs_file_inode_operations = {
- .permission = nfs_permission,
- .getattr = nfs_getattr,
- .setattr = nfs_setattr,
-};
-
-#ifdef CONFIG_NFS_V3
-const struct inode_operations nfs3_file_inode_operations = {
- .permission = nfs_permission,
- .getattr = nfs_getattr,
- .setattr = nfs_setattr,
- .listxattr = nfs3_listxattr,
- .getxattr = nfs3_getxattr,
- .setxattr = nfs3_setxattr,
- .removexattr = nfs3_removexattr,
-};
-#endif /* CONFIG_NFS_v3 */
-
/* Hack for future NFS swap support */
#ifndef IS_SWAPFILE
# define IS_SWAPFILE(inode) (0)
#endif
-static int nfs_check_flags(int flags)
+int nfs_check_flags(int flags)
{
if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT))
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_check_flags);
/*
* Open file
@@ -93,7 +76,7 @@ nfs_file_open(struct inode *inode, struct file *filp)
return res;
}
-static int
+int
nfs_file_release(struct inode *inode, struct file *filp)
{
dprintk("NFS: release(%s/%s)\n",
@@ -103,6 +86,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
return nfs_release(inode, filp);
}
+EXPORT_SYMBOL_GPL(nfs_file_release);
/**
* nfs_revalidate_size - Revalidate the file size
@@ -135,7 +119,7 @@ force_reval:
return __nfs_revalidate_inode(server, inode);
}
-static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
+loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
{
dprintk("NFS: llseek file(%s/%s, %lld, %d)\n",
filp->f_path.dentry->d_parent->d_name.name,
@@ -156,11 +140,12 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin)
return generic_file_llseek(filp, offset, origin);
}
+EXPORT_SYMBOL_GPL(nfs_file_llseek);
/*
* Flush all dirty pages, and check for write errors.
*/
-static int
+int
nfs_file_flush(struct file *file, fl_owner_t id)
{
struct dentry *dentry = file->f_path.dentry;
@@ -178,14 +163,15 @@ nfs_file_flush(struct file *file, fl_owner_t id)
* If we're holding a write delegation, then just start the i/o
* but don't wait for completion (or send a commit).
*/
- if (nfs_have_delegation(inode, FMODE_WRITE))
+ if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
return filemap_fdatawrite(file->f_mapping);
/* Flush writes to the server and return any errors */
return vfs_fsync(file, 0);
}
+EXPORT_SYMBOL_GPL(nfs_file_flush);
-static ssize_t
+ssize_t
nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
@@ -194,7 +180,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
ssize_t result;
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_read(iocb, iov, nr_segs, pos);
+ return nfs_file_direct_read(iocb, iov, nr_segs, pos, true);
dprintk("NFS: read(%s/%s, %lu@%lu)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -208,8 +194,9 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
}
return result;
}
+EXPORT_SYMBOL_GPL(nfs_file_read);
-static ssize_t
+ssize_t
nfs_file_splice_read(struct file *filp, loff_t *ppos,
struct pipe_inode_info *pipe, size_t count,
unsigned int flags)
@@ -230,8 +217,9 @@ nfs_file_splice_read(struct file *filp, loff_t *ppos,
}
return res;
}
+EXPORT_SYMBOL_GPL(nfs_file_splice_read);
-static int
+int
nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
{
struct dentry *dentry = file->f_path.dentry;
@@ -251,6 +239,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
}
return status;
}
+EXPORT_SYMBOL_GPL(nfs_file_mmap);
/*
* Flush any dirty pages for this process, and check for write errors.
@@ -264,8 +253,8 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
* nfs_file_write() that a write error occurred, and hence cause it to
* fall back to doing a synchronous write.
*/
-static int
-nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+int
+nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file->f_path.dentry;
struct nfs_open_context *ctx = nfs_file_open_context(file);
@@ -277,9 +266,6 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
dentry->d_parent->d_name.name, dentry->d_name.name,
datasync);
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- mutex_lock(&inode->i_mutex);
-
nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
status = nfs_commit_inode(inode, FLUSH_SYNC);
@@ -290,10 +276,21 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
ret = xchg(&ctx->error, 0);
if (!ret && status < 0)
ret = status;
- if (!ret && !datasync)
- /* application has asked for meta-data sync */
- ret = pnfs_layoutcommit_inode(inode, true);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_file_fsync_commit);
+
+static int
+nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ int ret;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ mutex_lock(&inode->i_mutex);
+ ret = nfs_file_fsync_commit(file, start, end, datasync);
mutex_unlock(&inode->i_mutex);
+
return ret;
}
@@ -442,7 +439,7 @@ static void nfs_invalidate_page(struct page *page, unsigned long offset)
if (offset != 0)
return;
/* Cancel any unstarted writes on this page */
- nfs_wb_page_cancel(page->mapping->host, page);
+ nfs_wb_page_cancel(page_file_mapping(page)->host, page);
nfs_fscache_invalidate_page(page, page->mapping->host);
}
@@ -459,8 +456,11 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
- /* Only do I/O if gfp is a superset of GFP_KERNEL */
- if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL) {
+ /* Only do I/O if gfp is a superset of GFP_KERNEL, and we're not
+ * doing this memory reclaim for a fs-related allocation.
+ */
+ if (mapping && (gfp & GFP_KERNEL) == GFP_KERNEL &&
+ !(current->flags & PF_FSTRANS)) {
int how = FLUSH_SYNC;
/* Don't let kswapd deadlock waiting for OOM RPC calls */
@@ -484,7 +484,7 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
*/
static int nfs_launder_page(struct page *page)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
struct nfs_inode *nfsi = NFS_I(inode);
dfprintk(PAGECACHE, "NFS: launder_page(%ld, %llu)\n",
@@ -494,6 +494,20 @@ static int nfs_launder_page(struct page *page)
return nfs_wb_page(inode, page);
}
+#ifdef CONFIG_NFS_SWAP
+static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
+ sector_t *span)
+{
+ *span = sis->pages;
+ return xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 1);
+}
+
+static void nfs_swap_deactivate(struct file *file)
+{
+ xs_swapper(NFS_CLIENT(file->f_mapping->host)->cl_xprt, 0);
+}
+#endif
+
const struct address_space_operations nfs_file_aops = {
.readpage = nfs_readpage,
.readpages = nfs_readpages,
@@ -508,6 +522,10 @@ const struct address_space_operations nfs_file_aops = {
.migratepage = nfs_migrate_page,
.launder_page = nfs_launder_page,
.error_remove_page = generic_error_remove_page,
+#ifdef CONFIG_NFS_SWAP
+ .swap_activate = nfs_swap_activate,
+ .swap_deactivate = nfs_swap_deactivate,
+#endif
};
/*
@@ -533,7 +551,7 @@ static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
nfs_fscache_wait_on_page_write(NFS_I(dentry->d_inode), page);
lock_page(page);
- mapping = page->mapping;
+ mapping = page_file_mapping(page);
if (mapping != dentry->d_inode->i_mapping)
goto out_unlock;
@@ -572,8 +590,8 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode)
return 0;
}
-static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
{
struct dentry * dentry = iocb->ki_filp->f_path.dentry;
struct inode * inode = dentry->d_inode;
@@ -582,7 +600,7 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
size_t count = iov_length(iov, nr_segs);
if (iocb->ki_filp->f_flags & O_DIRECT)
- return nfs_file_direct_write(iocb, iov, nr_segs, pos);
+ return nfs_file_direct_write(iocb, iov, nr_segs, pos, true);
dprintk("NFS: write(%s/%s, %lu@%Ld)\n",
dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -623,10 +641,11 @@ out_swapfile:
printk(KERN_INFO "NFS: attempt to write to active swap file!\n");
goto out;
}
+EXPORT_SYMBOL_GPL(nfs_file_write);
-static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
- struct file *filp, loff_t *ppos,
- size_t count, unsigned int flags)
+ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
+ struct file *filp, loff_t *ppos,
+ size_t count, unsigned int flags)
{
struct dentry *dentry = filp->f_path.dentry;
struct inode *inode = dentry->d_inode;
@@ -654,6 +673,7 @@ static ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe,
nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written);
return ret;
}
+EXPORT_SYMBOL_GPL(nfs_file_splice_write);
static int
do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
@@ -670,7 +690,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
}
fl->fl_type = saved_type;
- if (nfs_have_delegation(inode, FMODE_READ))
+ if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
goto out_noconflict;
if (is_local)
@@ -765,7 +785,7 @@ do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
* This makes locking act as a cache coherency point.
*/
nfs_sync_mapping(filp->f_mapping);
- if (!nfs_have_delegation(inode, FMODE_READ)) {
+ if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ)) {
if (is_time_granular(&NFS_SERVER(inode)->time_delta))
__nfs_revalidate_inode(NFS_SERVER(inode), inode);
else
@@ -778,7 +798,7 @@ out:
/*
* Lock a (portion of) a file
*/
-static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
+int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
int ret = -ENOLCK;
@@ -814,11 +834,12 @@ static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl)
out_err:
return ret;
}
+EXPORT_SYMBOL_GPL(nfs_lock);
/*
* Lock a (portion of) a file
*/
-static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
+int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
{
struct inode *inode = filp->f_mapping->host;
int is_local = 0;
@@ -831,6 +852,15 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
if (!(fl->fl_flags & FL_FLOCK))
return -ENOLCK;
+ /*
+ * The NFSv4 protocol doesn't support LOCK_MAND, which is not part of
+ * any standard. In principle we might be able to support LOCK_MAND
+ * on NFSv2/3 since NLMv3/4 support DOS share modes, but for now the
+ * NFS code is not set up for it.
+ */
+ if (fl->fl_type & LOCK_MAND)
+ return -EINVAL;
+
if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
is_local = 1;
@@ -843,18 +873,20 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
return do_unlk(filp, cmd, fl, is_local);
return do_setlk(filp, cmd, fl, is_local);
}
+EXPORT_SYMBOL_GPL(nfs_flock);
/*
* There is no protocol support for leases, so we have no way to implement
* them correctly in the face of opens by other clients.
*/
-static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
+int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
{
dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name, arg);
return -EINVAL;
}
+EXPORT_SYMBOL_GPL(nfs_setlease);
const struct file_operations nfs_file_operations = {
.llseek = nfs_file_llseek,
@@ -874,104 +906,4 @@ const struct file_operations nfs_file_operations = {
.check_flags = nfs_check_flags,
.setlease = nfs_setlease,
};
-
-#ifdef CONFIG_NFS_V4
-static int
-nfs4_file_open(struct inode *inode, struct file *filp)
-{
- struct nfs_open_context *ctx;
- struct dentry *dentry = filp->f_path.dentry;
- struct dentry *parent = NULL;
- struct inode *dir;
- unsigned openflags = filp->f_flags;
- struct iattr attr;
- int err;
-
- BUG_ON(inode != dentry->d_inode);
- /*
- * If no cached dentry exists or if it's negative, NFSv4 handled the
- * opens in ->lookup() or ->create().
- *
- * We only get this far for a cached positive dentry. We skipped
- * revalidation, so handle it here by dropping the dentry and returning
- * -EOPENSTALE. The VFS will retry the lookup/create/open.
- */
-
- dprintk("NFS: open file(%s/%s)\n",
- dentry->d_parent->d_name.name,
- dentry->d_name.name);
-
- if ((openflags & O_ACCMODE) == 3)
- openflags--;
-
- /* We can't create new files here */
- openflags &= ~(O_CREAT|O_EXCL);
-
- parent = dget_parent(dentry);
- dir = parent->d_inode;
-
- ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
- err = PTR_ERR(ctx);
- if (IS_ERR(ctx))
- goto out;
-
- attr.ia_valid = ATTR_OPEN;
- if (openflags & O_TRUNC) {
- attr.ia_valid |= ATTR_SIZE;
- attr.ia_size = 0;
- nfs_wb_all(inode);
- }
-
- inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
- if (IS_ERR(inode)) {
- err = PTR_ERR(inode);
- switch (err) {
- case -EPERM:
- case -EACCES:
- case -EDQUOT:
- case -ENOSPC:
- case -EROFS:
- goto out_put_ctx;
- default:
- goto out_drop;
- }
- }
- iput(inode);
- if (inode != dentry->d_inode)
- goto out_drop;
-
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- nfs_file_set_open_context(filp, ctx);
- err = 0;
-
-out_put_ctx:
- put_nfs_open_context(ctx);
-out:
- dput(parent);
- return err;
-
-out_drop:
- d_drop(dentry);
- err = -EOPENSTALE;
- goto out_put_ctx;
-}
-
-const struct file_operations nfs4_file_operations = {
- .llseek = nfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = nfs_file_read,
- .aio_write = nfs_file_write,
- .mmap = nfs_file_mmap,
- .open = nfs4_file_open,
- .flush = nfs_file_flush,
- .release = nfs_file_release,
- .fsync = nfs_file_fsync,
- .lock = nfs_lock,
- .flock = nfs_flock,
- .splice_read = nfs_file_splice_read,
- .splice_write = nfs_file_splice_write,
- .check_flags = nfs_check_flags,
- .setlease = nfs_setlease,
-};
-#endif /* CONFIG_NFS_V4 */
+EXPORT_SYMBOL_GPL(nfs_file_operations);
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a67990f90bd7..4654ced096a6 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -23,21 +23,15 @@
#include <linux/sunrpc/stats.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_mount.h>
-#include <linux/nfs4_mount.h>
#include <linux/lockd/bind.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
-#include <linux/nfs_idmap.h>
#include <linux/vfs.h>
#include <linux/namei.h>
#include <linux/security.h>
#include <asm/uaccess.h>
-#include "nfs4_fs.h"
-#include "delegation.h"
-#include "internal.h"
-
#define NFSDBG_FACILITY NFSDBG_CLIENT
/*
@@ -135,47 +129,3 @@ out:
nfs_free_fattr(fsinfo.fattr);
return ret;
}
-
-#ifdef CONFIG_NFS_V4
-
-int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
-{
- struct nfs_fsinfo fsinfo;
- int ret = -ENOMEM;
-
- dprintk("--> nfs4_get_rootfh()\n");
-
- fsinfo.fattr = nfs_alloc_fattr();
- if (fsinfo.fattr == NULL)
- goto out;
-
- /* Start by getting the root filehandle from the server */
- ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
- if (ret < 0) {
- dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
- goto out;
- }
-
- if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
- || !S_ISDIR(fsinfo.fattr->mode)) {
- printk(KERN_ERR "nfs4_get_rootfh:"
- " getroot encountered non-directory\n");
- ret = -ENOTDIR;
- goto out;
- }
-
- if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
- printk(KERN_ERR "nfs4_get_rootfh:"
- " getroot obtained referral\n");
- ret = -EREMOTE;
- goto out;
- }
-
- memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
-out:
- nfs_free_fattr(fsinfo.fattr);
- dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
- return ret;
-}
-
-#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 864c51e4b400..b701358c39c3 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -52,8 +52,6 @@
#define NFS_UINT_MAXLEN 11
-/* Default cache timeout is 10 minutes */
-unsigned int nfs_idmap_cache_timeout = 600;
static const struct cred *id_resolver_cache;
static struct key_type key_type_id_resolver_legacy;
@@ -205,12 +203,18 @@ static int nfs_idmap_init_keyring(void)
if (ret < 0)
goto failed_put_key;
+ ret = register_key_type(&key_type_id_resolver_legacy);
+ if (ret < 0)
+ goto failed_reg_legacy;
+
set_bit(KEY_FLAG_ROOT_CAN_CLEAR, &keyring->flags);
cred->thread_keyring = keyring;
cred->jit_keyring = KEY_REQKEY_DEFL_THREAD_KEYRING;
id_resolver_cache = cred;
return 0;
+failed_reg_legacy:
+ unregister_key_type(&key_type_id_resolver);
failed_put_key:
key_put(keyring);
failed_put_cred:
@@ -222,6 +226,7 @@ static void nfs_idmap_quit_keyring(void)
{
key_revoke(id_resolver_cache->thread_keyring);
unregister_key_type(&key_type_id_resolver);
+ unregister_key_type(&key_type_id_resolver_legacy);
put_cred(id_resolver_cache);
}
@@ -359,7 +364,6 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ
}
/* idmap classic begins here */
-module_param(nfs_idmap_cache_timeout, int, 0644);
enum {
Opt_find_uid, Opt_find_gid, Opt_find_user, Opt_find_group, Opt_find_err
@@ -385,7 +389,7 @@ static const struct rpc_pipe_ops idmap_upcall_ops = {
};
static struct key_type key_type_id_resolver_legacy = {
- .name = "id_resolver",
+ .name = "id_legacy",
.instantiate = user_instantiate,
.match = user_match,
.revoke = user_revoke,
@@ -674,6 +678,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
if (ret < 0)
goto out2;
+ BUG_ON(idmap->idmap_key_cons != NULL);
idmap->idmap_key_cons = cons;
ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
@@ -687,8 +692,7 @@ out2:
out1:
kfree(msg);
out0:
- key_revoke(cons->key);
- key_revoke(cons->authkey);
+ complete_request_key(cons, ret);
return ret;
}
@@ -722,11 +726,18 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
{
struct rpc_inode *rpci = RPC_I(filp->f_path.dentry->d_inode);
struct idmap *idmap = (struct idmap *)rpci->private;
- struct key_construction *cons = idmap->idmap_key_cons;
+ struct key_construction *cons;
struct idmap_msg im;
size_t namelen_in;
int ret;
+ /* If instantiation is successful, anyone waiting for key construction
+ * will have been woken up and someone else may now have used
+ * idmap_key_cons - so after this point we may no longer touch it.
+ */
+ cons = ACCESS_ONCE(idmap->idmap_key_cons);
+ idmap->idmap_key_cons = NULL;
+
if (mlen != sizeof(im)) {
ret = -ENOSPC;
goto out;
@@ -739,7 +750,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
if (!(im.im_status & IDMAP_STATUS_SUCCESS)) {
ret = mlen;
- complete_request_key(idmap->idmap_key_cons, -ENOKEY);
+ complete_request_key(cons, -ENOKEY);
goto out_incomplete;
}
@@ -756,7 +767,7 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
}
out:
- complete_request_key(idmap->idmap_key_cons, ret);
+ complete_request_key(cons, ret);
out_incomplete:
return ret;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f7296983eba6..c6e895f0fbf3 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -32,7 +32,6 @@
#include <linux/lockd/bind.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
-#include <linux/nfs_idmap.h>
#include <linux/vfs.h>
#include <linux/inet.h>
#include <linux/nfs_xdr.h>
@@ -51,6 +50,7 @@
#include "fscache.h"
#include "dns_resolve.h"
#include "pnfs.h"
+#include "nfs.h"
#include "netns.h"
#define NFSDBG_FACILITY NFSDBG_VFS
@@ -82,6 +82,7 @@ int nfs_wait_bit_killable(void *word)
freezable_schedule();
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_wait_bit_killable);
/**
* nfs_compat_user_ino64 - returns the user-visible inode number
@@ -106,7 +107,7 @@ u64 nfs_compat_user_ino64(u64 fileid)
return ino;
}
-static void nfs_clear_inode(struct inode *inode)
+void nfs_clear_inode(struct inode *inode)
{
/*
* The following should never happen...
@@ -117,6 +118,7 @@ static void nfs_clear_inode(struct inode *inode)
nfs_access_zap_cache(inode);
nfs_fscache_release_inode_cookie(inode);
}
+EXPORT_SYMBOL_GPL(nfs_clear_inode);
void nfs_evict_inode(struct inode *inode)
{
@@ -186,6 +188,7 @@ void nfs_zap_acl_cache(struct inode *inode)
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_ACL;
spin_unlock(&inode->i_lock);
}
+EXPORT_SYMBOL_GPL(nfs_zap_acl_cache);
void nfs_invalidate_atime(struct inode *inode)
{
@@ -193,6 +196,7 @@ void nfs_invalidate_atime(struct inode *inode)
NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
spin_unlock(&inode->i_lock);
}
+EXPORT_SYMBOL_GPL(nfs_invalidate_atime);
/*
* Invalidate, but do not unhash, the inode.
@@ -391,6 +395,7 @@ out_no_inode:
dprintk("nfs_fhget: iget failed with error %ld\n", PTR_ERR(inode));
goto out;
}
+EXPORT_SYMBOL_GPL(nfs_fhget);
#define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
@@ -430,7 +435,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
* Return any delegations if we're going to change ACLs
*/
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0)
- nfs_inode_return_delegation(inode);
+ NFS_PROTO(inode)->return_delegation(inode);
error = NFS_PROTO(inode)->setattr(dentry, fattr, attr);
if (error == 0)
nfs_refresh_inode(inode, fattr);
@@ -438,6 +443,7 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
out:
return error;
}
+EXPORT_SYMBOL_GPL(nfs_setattr);
/**
* nfs_vmtruncate - unmap mappings "freed" by truncate() syscall
@@ -496,6 +502,7 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr)
nfs_vmtruncate(inode, attr->ia_size);
}
}
+EXPORT_SYMBOL_GPL(nfs_setattr_update_inode);
int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
@@ -535,6 +542,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
out:
return err;
}
+EXPORT_SYMBOL_GPL(nfs_getattr);
static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
{
@@ -623,6 +631,7 @@ void nfs_close_context(struct nfs_open_context *ctx, int is_sync)
return;
nfs_revalidate_inode(server, inode);
}
+EXPORT_SYMBOL_GPL(nfs_close_context);
struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f_mode)
{
@@ -649,6 +658,7 @@ struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, fmode_t f
ctx->mdsthreshold = NULL;
return ctx;
}
+EXPORT_SYMBOL_GPL(alloc_nfs_open_context);
struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
{
@@ -656,6 +666,7 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
atomic_inc(&ctx->lock_context.count);
return ctx;
}
+EXPORT_SYMBOL_GPL(get_nfs_open_context);
static void __put_nfs_open_context(struct nfs_open_context *ctx, int is_sync)
{
@@ -683,6 +694,7 @@ void put_nfs_open_context(struct nfs_open_context *ctx)
{
__put_nfs_open_context(ctx, 0);
}
+EXPORT_SYMBOL_GPL(put_nfs_open_context);
/*
* Ensure that mmap has a recent RPC credential for use when writing out
@@ -698,6 +710,7 @@ void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx)
list_add(&ctx->list, &nfsi->open_files);
spin_unlock(&inode->i_lock);
}
+EXPORT_SYMBOL_GPL(nfs_file_set_open_context);
/*
* Given an inode, search for an open context with the desired characteristics
@@ -842,6 +855,7 @@ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
return NFS_STALE(inode) ? -ESTALE : 0;
return __nfs_revalidate_inode(server, inode);
}
+EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
{
@@ -883,6 +897,10 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
struct nfs_inode *nfsi = NFS_I(inode);
int ret = 0;
+ /* swapfiles are not supposed to be shared. */
+ if (IS_SWAPFILE(inode))
+ goto out;
+
if (nfs_mapping_need_revalidate_inode(inode)) {
ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
if (ret < 0)
@@ -1028,6 +1046,7 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
fattr->owner_name = NULL;
fattr->group_name = NULL;
}
+EXPORT_SYMBOL_GPL(nfs_fattr_init);
struct nfs_fattr *nfs_alloc_fattr(void)
{
@@ -1038,6 +1057,7 @@ struct nfs_fattr *nfs_alloc_fattr(void)
nfs_fattr_init(fattr);
return fattr;
}
+EXPORT_SYMBOL_GPL(nfs_alloc_fattr);
struct nfs_fh *nfs_alloc_fhandle(void)
{
@@ -1048,6 +1068,7 @@ struct nfs_fh *nfs_alloc_fhandle(void)
fh->size = 0;
return fh;
}
+EXPORT_SYMBOL_GPL(nfs_alloc_fhandle);
#ifdef NFS_DEBUG
/*
@@ -1168,6 +1189,7 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
return status;
}
+EXPORT_SYMBOL_GPL(nfs_refresh_inode);
static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
{
@@ -1204,6 +1226,7 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr)
spin_unlock(&inode->i_lock);
return status;
}
+EXPORT_SYMBOL_GPL(nfs_post_op_update_inode);
/**
* nfs_post_op_update_inode_force_wcc - try to update the inode attribute cache
@@ -1255,6 +1278,7 @@ out_noforce:
spin_unlock(&inode->i_lock);
return status;
}
+EXPORT_SYMBOL_GPL(nfs_post_op_update_inode_force_wcc);
/*
* Many nfs protocol calls return the new file attributes after
@@ -1457,7 +1481,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
|| S_ISLNK(inode->i_mode)))
invalid &= ~NFS_INO_INVALID_DATA;
- if (!nfs_have_delegation(inode, FMODE_READ) ||
+ if (!NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) ||
(save_cache_validity & NFS_INO_REVAL_FORCED))
nfsi->cache_validity |= invalid;
@@ -1472,27 +1496,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
return -ESTALE;
}
-
-#ifdef CONFIG_NFS_V4
-
-/*
- * Clean out any remaining NFSv4 state that might be left over due
- * to open() calls that passed nfs_atomic_lookup, but failed to call
- * nfs_open().
- */
-void nfs4_evict_inode(struct inode *inode)
-{
- truncate_inode_pages(&inode->i_data, 0);
- clear_inode(inode);
- pnfs_return_layout(inode);
- pnfs_destroy_layout(NFS_I(inode));
- /* If we are holding a delegation, return it! */
- nfs_inode_return_delegation_noreclaim(inode);
- /* First call standard NFS clear_inode() code */
- nfs_clear_inode(inode);
-}
-#endif
-
struct inode *nfs_alloc_inode(struct super_block *sb)
{
struct nfs_inode *nfsi;
@@ -1505,11 +1508,12 @@ struct inode *nfs_alloc_inode(struct super_block *sb)
nfsi->acl_access = ERR_PTR(-EAGAIN);
nfsi->acl_default = ERR_PTR(-EAGAIN);
#endif
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
nfsi->nfs4_acl = NULL;
#endif /* CONFIG_NFS_V4 */
return &nfsi->vfs_inode;
}
+EXPORT_SYMBOL_GPL(nfs_alloc_inode);
static void nfs_i_callback(struct rcu_head *head)
{
@@ -1521,10 +1525,11 @@ void nfs_destroy_inode(struct inode *inode)
{
call_rcu(&inode->i_rcu, nfs_i_callback);
}
+EXPORT_SYMBOL_GPL(nfs_destroy_inode);
static inline void nfs4_init_once(struct nfs_inode *nfsi)
{
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
INIT_LIST_HEAD(&nfsi->open_states);
nfsi->delegation = NULL;
nfsi->delegation_state = 0;
@@ -1570,6 +1575,7 @@ static void nfs_destroy_inodecache(void)
}
struct workqueue_struct *nfsiod_workqueue;
+EXPORT_SYMBOL_GPL(nfsiod_workqueue);
/*
* start up the nfsiod workqueue
@@ -1628,81 +1634,76 @@ static int __init init_nfs_fs(void)
{
int err;
- err = nfs_idmap_init();
- if (err < 0)
- goto out10;
-
err = nfs_dns_resolver_init();
if (err < 0)
- goto out9;
+ goto out10;;
err = register_pernet_subsys(&nfs_net_ops);
if (err < 0)
- goto out8;
+ goto out9;
err = nfs_fscache_register();
if (err < 0)
- goto out7;
+ goto out8;
err = nfsiod_start();
if (err)
- goto out6;
+ goto out7;
err = nfs_fs_proc_init();
if (err)
- goto out5;
+ goto out6;
err = nfs_init_nfspagecache();
if (err)
- goto out4;
+ goto out5;
err = nfs_init_inodecache();
if (err)
- goto out3;
+ goto out4;
err = nfs_init_readpagecache();
if (err)
- goto out2;
+ goto out3;
err = nfs_init_writepagecache();
if (err)
- goto out1;
+ goto out2;
err = nfs_init_directcache();
if (err)
- goto out0;
+ goto out1;
#ifdef CONFIG_PROC_FS
rpc_proc_register(&init_net, &nfs_rpcstat);
#endif
if ((err = register_nfs_fs()) != 0)
- goto out;
+ goto out0;
+
return 0;
-out:
+out0:
#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
#endif
nfs_destroy_directcache();
-out0:
- nfs_destroy_writepagecache();
out1:
- nfs_destroy_readpagecache();
+ nfs_destroy_writepagecache();
out2:
- nfs_destroy_inodecache();
+ nfs_destroy_readpagecache();
out3:
- nfs_destroy_nfspagecache();
+ nfs_destroy_inodecache();
out4:
- nfs_fs_proc_exit();
+ nfs_destroy_nfspagecache();
out5:
- nfsiod_stop();
+ nfs_fs_proc_exit();
out6:
- nfs_fscache_unregister();
+ nfsiod_stop();
out7:
- unregister_pernet_subsys(&nfs_net_ops);
+ nfs_fscache_unregister();
out8:
- nfs_dns_resolver_destroy();
+ unregister_pernet_subsys(&nfs_net_ops);
out9:
- nfs_idmap_quit();
+ nfs_dns_resolver_destroy();
out10:
return err;
}
@@ -1717,7 +1718,6 @@ static void __exit exit_nfs_fs(void)
nfs_fscache_unregister();
unregister_pernet_subsys(&nfs_net_ops);
nfs_dns_resolver_destroy();
- nfs_idmap_quit();
#ifdef CONFIG_PROC_FS
rpc_proc_unregister(&init_net, "nfs");
#endif
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 18f99ef71343..31fdb03225cd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -85,6 +85,17 @@ struct nfs_clone_mount {
*/
#define NFS_MAX_READDIR_PAGES 8
+struct nfs_client_initdata {
+ unsigned long init_flags;
+ const char *hostname;
+ const struct sockaddr *addr;
+ size_t addrlen;
+ struct nfs_subversion *nfs_mod;
+ int proto;
+ u32 minorversion;
+ struct net *net;
+};
+
/*
* In-kernel mount arguments
*/
@@ -142,25 +153,45 @@ struct nfs_mount_request {
struct net *net;
};
+struct nfs_mount_info {
+ void (*fill_super)(struct super_block *, struct nfs_mount_info *);
+ int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
+ struct nfs_parsed_mount_data *parsed;
+ struct nfs_clone_mount *cloned;
+ struct nfs_fh *mntfh;
+};
+
extern int nfs_mount(struct nfs_mount_request *info);
extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
extern const struct rpc_program nfs_program;
extern void nfs_clients_init(struct net *net);
+extern struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *);
+int nfs_create_rpc_client(struct nfs_client *, const struct rpc_timeout *, rpc_authflavor_t);
+struct nfs_client *nfs_get_client(const struct nfs_client_initdata *,
+ const struct rpc_timeout *, const char *,
+ rpc_authflavor_t);
+int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *);
+void nfs_server_insert_lists(struct nfs_server *);
+void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int);
+int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t,
+ rpc_authflavor_t);
+struct nfs_server *nfs_alloc_server(void);
+void nfs_server_copy_userdata(struct nfs_server *, struct nfs_server *);
extern void nfs_cleanup_cb_ident_idr(struct net *);
extern void nfs_put_client(struct nfs_client *);
+extern void nfs_free_client(struct nfs_client *);
extern struct nfs_client *nfs4_find_client_ident(struct net *, int);
extern struct nfs_client *
nfs4_find_client_sessionid(struct net *, const struct sockaddr *,
struct nfs4_sessionid *);
-extern struct nfs_server *nfs_create_server(
- const struct nfs_parsed_mount_data *,
- struct nfs_fh *);
+extern struct nfs_server *nfs_create_server(struct nfs_mount_info *,
+ struct nfs_subversion *);
extern struct nfs_server *nfs4_create_server(
- const struct nfs_parsed_mount_data *,
- struct nfs_fh *);
+ struct nfs_mount_info *,
+ struct nfs_subversion *);
extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *,
struct nfs_fh *);
extern void nfs_free_server(struct nfs_server *server);
@@ -188,6 +219,17 @@ static inline void nfs_fs_proc_exit(void)
}
#endif
+#ifdef CONFIG_NFS_V4_1
+int nfs_sockaddr_match_ipaddr(const struct sockaddr *, const struct sockaddr *);
+#endif
+
+/* nfs3client.c */
+#if IS_ENABLED(CONFIG_NFS_V3)
+struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *);
+struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *,
+ struct nfs_fattr *, rpc_authflavor_t);
+#endif
+
/* callback_xdr.c */
extern struct svc_version nfs4_callback_version1;
extern struct svc_version nfs4_callback_version4;
@@ -220,7 +262,7 @@ extern int nfs3_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
/* nfs4xdr.c */
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
extern int nfs4_decode_dirent(struct xdr_stream *,
struct nfs_entry *, int);
#endif
@@ -230,7 +272,7 @@ extern const u32 nfs41_maxwrite_overhead;
#endif
/* nfs4proc.c */
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
extern struct rpc_procinfo nfs4_procedures[];
#endif
@@ -245,25 +287,63 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
/* dir.c */
extern int nfs_access_cache_shrinker(struct shrinker *shrink,
struct shrink_control *sc);
+struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
+int nfs_create(struct inode *, struct dentry *, umode_t, bool);
+int nfs_mkdir(struct inode *, struct dentry *, umode_t);
+int nfs_rmdir(struct inode *, struct dentry *);
+int nfs_unlink(struct inode *, struct dentry *);
+int nfs_symlink(struct inode *, struct dentry *, const char *);
+int nfs_link(struct dentry *, struct inode *, struct dentry *);
+int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
+
+/* file.c */
+int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
+loff_t nfs_file_llseek(struct file *, loff_t, int);
+int nfs_file_flush(struct file *, fl_owner_t);
+ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
+ size_t, unsigned int);
+int nfs_file_mmap(struct file *, struct vm_area_struct *);
+ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t);
+int nfs_file_release(struct inode *, struct file *);
+int nfs_lock(struct file *, int, struct file_lock *);
+int nfs_flock(struct file *, int, struct file_lock *);
+ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *,
+ size_t, unsigned int);
+int nfs_check_flags(int);
+int nfs_setlease(struct file *, long, struct file_lock **);
/* inode.c */
extern struct workqueue_struct *nfsiod_workqueue;
extern struct inode *nfs_alloc_inode(struct super_block *sb);
extern void nfs_destroy_inode(struct inode *);
extern int nfs_write_inode(struct inode *, struct writeback_control *);
+extern void nfs_clear_inode(struct inode *);
extern void nfs_evict_inode(struct inode *);
-#ifdef CONFIG_NFS_V4
-extern void nfs4_evict_inode(struct inode *);
-#endif
void nfs_zap_acl_cache(struct inode *inode);
extern int nfs_wait_bit_killable(void *word);
/* super.c */
+extern const struct super_operations nfs_sops;
+extern struct file_system_type nfs_fs_type;
extern struct file_system_type nfs_xdev_fs_type;
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
extern struct file_system_type nfs4_xdev_fs_type;
extern struct file_system_type nfs4_referral_fs_type;
#endif
+struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *,
+ struct nfs_subversion *);
+void nfs_initialise_sb(struct super_block *);
+int nfs_set_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
+int nfs_clone_sb_security(struct super_block *, struct dentry *, struct nfs_mount_info *);
+struct dentry *nfs_fs_mount_common(struct nfs_server *, int, const char *,
+ struct nfs_mount_info *, struct nfs_subversion *);
+struct dentry *nfs_fs_mount(struct file_system_type *, int, const char *, void *);
+struct dentry * nfs_xdev_mount_common(struct file_system_type *, int,
+ const char *, struct nfs_mount_info *);
+void nfs_kill_super(struct super_block *);
+void nfs_fill_super(struct super_block *, struct nfs_mount_info *);
extern struct rpc_stat nfs_rpcstat;
@@ -284,7 +364,7 @@ struct vfsmount *nfs_do_submount(struct dentry *, struct nfs_fh *,
/* getroot.c */
extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
const char *);
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
const char *);
@@ -304,12 +384,23 @@ extern int nfs_initiate_read(struct rpc_clnt *clnt,
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr);
-extern void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode,
const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
extern void nfs_readdata_release(struct nfs_read_data *rdata);
+/* super.c */
+void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
+void nfs_umount_begin(struct super_block *);
+int nfs_statfs(struct dentry *, struct kstatfs *);
+int nfs_show_options(struct seq_file *, struct dentry *);
+int nfs_show_devname(struct seq_file *, struct dentry *);
+int nfs_show_path(struct seq_file *, struct dentry *);
+int nfs_show_stats(struct seq_file *, struct dentry *);
+void nfs_put_super(struct super_block *);
+int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
+
/* write.c */
extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags,
@@ -318,7 +409,7 @@ extern struct nfs_write_header *nfs_writehdr_alloc(void);
extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr);
-extern void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags,
const struct nfs_pgio_completion_ops *compl_ops);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
@@ -463,13 +554,14 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
static inline
unsigned int nfs_page_length(struct page *page)
{
- loff_t i_size = i_size_read(page->mapping->host);
+ loff_t i_size = i_size_read(page_file_mapping(page)->host);
if (i_size > 0) {
+ pgoff_t page_index = page_file_index(page);
pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
- if (page->index < end_index)
+ if (page_index < end_index)
return PAGE_CACHE_SIZE;
- if (page->index == end_index)
+ if (page_index == end_index)
return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
}
return 0;
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 08b9c93675da..655925373b91 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -7,6 +7,7 @@
* NFS namespace
*/
+#include <linux/module.h>
#include <linux/dcache.h>
#include <linux/gfp.h>
#include <linux/mount.h>
@@ -112,6 +113,7 @@ Elong_unlock:
Elong:
return ERR_PTR(-ENAMETOOLONG);
}
+EXPORT_SYMBOL_GPL(nfs_path);
/*
* nfs_d_automount - Handle crossing a mountpoint on the server
@@ -195,20 +197,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
const char *devname,
struct nfs_clone_mount *mountdata)
{
-#ifdef CONFIG_NFS_V4
- struct vfsmount *mnt = ERR_PTR(-EINVAL);
- switch (server->nfs_client->rpc_ops->version) {
- case 2:
- case 3:
- mnt = vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
- break;
- case 4:
- mnt = vfs_kern_mount(&nfs4_xdev_fs_type, 0, devname, mountdata);
- }
- return mnt;
-#else
return vfs_kern_mount(&nfs_xdev_fs_type, 0, devname, mountdata);
-#endif
}
/**
@@ -253,6 +242,7 @@ out:
dprintk("<-- nfs_do_submount() = %p\n", mnt);
return mnt;
}
+EXPORT_SYMBOL_GPL(nfs_do_submount);
struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
struct nfs_fh *fh, struct nfs_fattr *fattr)
@@ -268,3 +258,4 @@ struct vfsmount *nfs_submount(struct nfs_server *server, struct dentry *dentry,
return nfs_do_submount(dentry, fh, fattr, server->client->cl_auth->au_flavor);
}
+EXPORT_SYMBOL_GPL(nfs_submount);
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index 8a6394edb8b0..0539de1b8d1f 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -20,7 +20,7 @@ struct nfs_net {
wait_queue_head_t bl_wq;
struct list_head nfs_client_list;
struct list_head nfs_volume_list;
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
struct idr cb_ident_idr; /* Protected by nfs_client_lock */
#endif
spinlock_t nfs_client_lock;
diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h
new file mode 100644
index 000000000000..43679df56cd0
--- /dev/null
+++ b/fs/nfs/nfs.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2012 Netapp, Inc. All rights reserved.
+ *
+ * Function and structures exported by the NFS module
+ * for use by NFS version-specific modules.
+ */
+#ifndef __LINUX_INTERNAL_NFS_H
+#define __LINUX_INTERNAL_NFS_H
+
+#include <linux/fs.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs_xdr.h>
+
+struct nfs_subversion {
+ struct module *owner; /* THIS_MODULE pointer */
+ struct file_system_type *nfs_fs; /* NFS filesystem type */
+ const struct rpc_version *rpc_vers; /* NFS version information */
+ const struct nfs_rpc_ops *rpc_ops; /* NFS operations */
+ const struct super_operations *sops; /* NFS Super operations */
+ const struct xattr_handler **xattr; /* NFS xattr handlers */
+ struct list_head list; /* List of NFS versions */
+};
+
+struct nfs_subversion *get_nfs_version(unsigned int);
+void put_nfs_version(struct nfs_subversion *);
+void register_nfs_version(struct nfs_subversion *);
+void unregister_nfs_version(struct nfs_subversion *);
+
+#endif /* __LINUX_INTERNAL_NFS_H */
diff --git a/fs/nfs/nfs2super.c b/fs/nfs/nfs2super.c
new file mode 100644
index 000000000000..0a9782c9171a
--- /dev/null
+++ b/fs/nfs/nfs2super.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2012 Netapp, Inc. All rights reserved.
+ */
+#include <linux/module.h>
+#include <linux/nfs_fs.h>
+#include "internal.h"
+#include "nfs.h"
+
+static struct nfs_subversion nfs_v2 = {
+ .owner = THIS_MODULE,
+ .nfs_fs = &nfs_fs_type,
+ .rpc_vers = &nfs_version2,
+ .rpc_ops = &nfs_v2_clientops,
+ .sops = &nfs_sops,
+};
+
+static int __init init_nfs_v2(void)
+{
+ register_nfs_version(&nfs_v2);
+ return 0;
+}
+
+static void __exit exit_nfs_v2(void)
+{
+ unregister_nfs_version(&nfs_v2);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_nfs_v2);
+module_exit(exit_nfs_v2);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index baf759bccd05..d04f0df7be55 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -106,19 +106,16 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
{
u32 recvd, count;
- size_t hdrlen;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
goto out_overflow;
count = be32_to_cpup(p);
- hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
- recvd = xdr->buf->len - hdrlen;
+ recvd = xdr_read_pages(xdr, count);
if (unlikely(count > recvd))
goto out_cheating;
out:
- xdr_read_pages(xdr, count);
result->eof = 0; /* NFSv2 does not pass EOF flag on the wire. */
result->count = count;
return count;
@@ -440,7 +437,6 @@ static void encode_path(struct xdr_stream *xdr, struct page **pages, u32 length)
static int decode_path(struct xdr_stream *xdr)
{
u32 length, recvd;
- size_t hdrlen;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
@@ -449,12 +445,9 @@ static int decode_path(struct xdr_stream *xdr)
length = be32_to_cpup(p);
if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN))
goto out_size;
- hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
- recvd = xdr->buf->len - hdrlen;
+ recvd = xdr_read_pages(xdr, length);
if (unlikely(length > recvd))
goto out_cheating;
-
- xdr_read_pages(xdr, length);
xdr_terminate_string(xdr->buf, length);
return 0;
out_size:
@@ -972,22 +965,7 @@ out_overflow:
*/
static int decode_readdirok(struct xdr_stream *xdr)
{
- u32 recvd, pglen;
- size_t hdrlen;
-
- pglen = xdr->buf->page_len;
- hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
- recvd = xdr->buf->len - hdrlen;
- if (unlikely(pglen > recvd))
- goto out_cheating;
-out:
- xdr_read_pages(xdr, pglen);
- return pglen;
-out_cheating:
- dprintk("NFS: server cheating in readdir result: "
- "pglen %u > recvd %u\n", pglen, recvd);
- pglen = recvd;
- goto out;
+ return xdr_read_pages(xdr, xdr->buf->page_len);
}
static int nfs2_xdr_dec_readdirres(struct rpc_rqst *req,
diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c
new file mode 100644
index 000000000000..b3fc65ef39ca
--- /dev/null
+++ b/fs/nfs/nfs3client.c
@@ -0,0 +1,65 @@
+#include <linux/nfs_fs.h>
+#include <linux/nfs_mount.h>
+#include "internal.h"
+
+#ifdef CONFIG_NFS_V3_ACL
+static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program };
+static const struct rpc_version *nfsacl_version[] = {
+ [3] = &nfsacl_version3,
+};
+
+const struct rpc_program nfsacl_program = {
+ .name = "nfsacl",
+ .number = NFS_ACL_PROGRAM,
+ .nrvers = ARRAY_SIZE(nfsacl_version),
+ .version = nfsacl_version,
+ .stats = &nfsacl_rpcstat,
+};
+
+/*
+ * Initialise an NFSv3 ACL client connection
+ */
+static void nfs_init_server_aclclient(struct nfs_server *server)
+{
+ if (server->flags & NFS_MOUNT_NOACL)
+ goto out_noacl;
+
+ server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3);
+ if (IS_ERR(server->client_acl))
+ goto out_noacl;
+
+ /* No errors! Assume that Sun nfsacls are supported */
+ server->caps |= NFS_CAP_ACLS;
+ return;
+
+out_noacl:
+ server->caps &= ~NFS_CAP_ACLS;
+}
+#else
+static inline void nfs_init_server_aclclient(struct nfs_server *server)
+{
+ server->flags &= ~NFS_MOUNT_NOACL;
+ server->caps &= ~NFS_CAP_ACLS;
+}
+#endif
+
+struct nfs_server *nfs3_create_server(struct nfs_mount_info *mount_info,
+ struct nfs_subversion *nfs_mod)
+{
+ struct nfs_server *server = nfs_create_server(mount_info, nfs_mod);
+ /* Create a client RPC handle for the NFS v3 ACL management interface */
+ if (!IS_ERR(server))
+ nfs_init_server_aclclient(server);
+ return server;
+}
+
+struct nfs_server *nfs3_clone_server(struct nfs_server *source,
+ struct nfs_fh *fh,
+ struct nfs_fattr *fattr,
+ rpc_authflavor_t flavor)
+{
+ struct nfs_server *server = nfs_clone_server(source, fh, fattr, flavor);
+ if (!IS_ERR(server) && !IS_ERR(source->client_acl))
+ nfs_init_server_aclclient(server);
+ return server;
+}
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3187e24e8f78..0952c791df36 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -877,6 +877,46 @@ nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl);
}
+static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
+{
+ return 0;
+}
+
+static int nfs3_return_delegation(struct inode *inode)
+{
+ nfs_wb_all(inode);
+ return 0;
+}
+
+static const struct inode_operations nfs3_dir_inode_operations = {
+ .create = nfs_create,
+ .lookup = nfs_lookup,
+ .link = nfs_link,
+ .unlink = nfs_unlink,
+ .symlink = nfs_symlink,
+ .mkdir = nfs_mkdir,
+ .rmdir = nfs_rmdir,
+ .mknod = nfs_mknod,
+ .rename = nfs_rename,
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+ .listxattr = nfs3_listxattr,
+ .getxattr = nfs3_getxattr,
+ .setxattr = nfs3_setxattr,
+ .removexattr = nfs3_removexattr,
+};
+
+static const struct inode_operations nfs3_file_inode_operations = {
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+ .listxattr = nfs3_listxattr,
+ .getxattr = nfs3_getxattr,
+ .setxattr = nfs3_setxattr,
+ .removexattr = nfs3_removexattr,
+};
+
const struct nfs_rpc_ops nfs_v3_clientops = {
.version = 3, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
@@ -885,6 +925,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.file_ops = &nfs_file_operations,
.getroot = nfs3_proc_get_root,
.submount = nfs_submount,
+ .try_mount = nfs_try_mount,
.getattr = nfs3_proc_getattr,
.setattr = nfs3_proc_setattr,
.lookup = nfs3_proc_lookup,
@@ -910,9 +951,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
.read_setup = nfs3_proc_read_setup,
+ .read_pageio_init = nfs_pageio_init_read,
.read_rpc_prepare = nfs3_proc_read_rpc_prepare,
.read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
+ .write_pageio_init = nfs_pageio_init_write,
.write_rpc_prepare = nfs3_proc_write_rpc_prepare,
.write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
@@ -921,5 +964,11 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.lock = nfs3_proc_lock,
.clear_acl_cache = nfs3_forget_cached_acls,
.close_context = nfs_close_context,
+ .have_delegation = nfs3_have_delegation,
+ .return_delegation = nfs3_return_delegation,
+ .alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
+ .free_client = nfs_free_client,
+ .create_server = nfs3_create_server,
+ .clone_server = nfs3_clone_server,
};
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c
new file mode 100644
index 000000000000..cc471c725230
--- /dev/null
+++ b/fs/nfs/nfs3super.c
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2012 Netapp, Inc. All rights reserved.
+ */
+#include <linux/module.h>
+#include <linux/nfs_fs.h>
+#include "internal.h"
+#include "nfs.h"
+
+static struct nfs_subversion nfs_v3 = {
+ .owner = THIS_MODULE,
+ .nfs_fs = &nfs_fs_type,
+ .rpc_vers = &nfs_version3,
+ .rpc_ops = &nfs_v3_clientops,
+ .sops = &nfs_sops,
+};
+
+static int __init init_nfs_v3(void)
+{
+ register_nfs_version(&nfs_v3);
+ return 0;
+}
+
+static void __exit exit_nfs_v3(void)
+{
+ unregister_nfs_version(&nfs_v3);
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_nfs_v3);
+module_exit(exit_nfs_v3);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 902de489ec9b..6cbe89400dfc 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -246,7 +246,6 @@ static void encode_nfspath3(struct xdr_stream *xdr, struct page **pages,
static int decode_nfspath3(struct xdr_stream *xdr)
{
u32 recvd, count;
- size_t hdrlen;
__be32 *p;
p = xdr_inline_decode(xdr, 4);
@@ -255,12 +254,9 @@ static int decode_nfspath3(struct xdr_stream *xdr)
count = be32_to_cpup(p);
if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
goto out_nametoolong;
- hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
- recvd = xdr->buf->len - hdrlen;
+ recvd = xdr_read_pages(xdr, count);
if (unlikely(count > recvd))
goto out_cheating;
-
- xdr_read_pages(xdr, count);
xdr_terminate_string(xdr->buf, count);
return 0;
@@ -329,14 +325,14 @@ static void encode_createverf3(struct xdr_stream *xdr, const __be32 *verifier)
memcpy(p, verifier, NFS3_CREATEVERFSIZE);
}
-static int decode_writeverf3(struct xdr_stream *xdr, __be32 *verifier)
+static int decode_writeverf3(struct xdr_stream *xdr, struct nfs_write_verifier *verifier)
{
__be32 *p;
p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
if (unlikely(p == NULL))
goto out_overflow;
- memcpy(verifier, p, NFS3_WRITEVERFSIZE);
+ memcpy(verifier->data, p, NFS3_WRITEVERFSIZE);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -1587,7 +1583,6 @@ static int decode_read3resok(struct xdr_stream *xdr,
struct nfs_readres *result)
{
u32 eof, count, ocount, recvd;
- size_t hdrlen;
__be32 *p;
p = xdr_inline_decode(xdr, 4 + 4 + 4);
@@ -1598,13 +1593,10 @@ static int decode_read3resok(struct xdr_stream *xdr,
ocount = be32_to_cpup(p++);
if (unlikely(ocount != count))
goto out_mismatch;
- hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
- recvd = xdr->buf->len - hdrlen;
+ recvd = xdr_read_pages(xdr, count);
if (unlikely(count > recvd))
goto out_cheating;
-
out:
- xdr_read_pages(xdr, count);
result->eof = eof;
result->count = count;
return count;
@@ -1676,20 +1668,22 @@ static int decode_write3resok(struct xdr_stream *xdr,
{
__be32 *p;
- p = xdr_inline_decode(xdr, 4 + 4 + NFS3_WRITEVERFSIZE);
+ p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(p == NULL))
goto out_overflow;
result->count = be32_to_cpup(p++);
result->verf->committed = be32_to_cpup(p++);
if (unlikely(result->verf->committed > NFS_FILE_SYNC))
goto out_badvalue;
- memcpy(result->verf->verifier, p, NFS3_WRITEVERFSIZE);
+ if (decode_writeverf3(xdr, &result->verf->verifier))
+ goto out_eio;
return result->count;
out_badvalue:
dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
return -EIO;
out_overflow:
print_overflow_msg(__func__, xdr);
+out_eio:
return -EIO;
}
@@ -2039,22 +2033,7 @@ out_truncated:
*/
static int decode_dirlist3(struct xdr_stream *xdr)
{
- u32 recvd, pglen;
- size_t hdrlen;
-
- pglen = xdr->buf->page_len;
- hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
- recvd = xdr->buf->len - hdrlen;
- if (unlikely(pglen > recvd))
- goto out_cheating;
-out:
- xdr_read_pages(xdr, pglen);
- return pglen;
-out_cheating:
- dprintk("NFS: server cheating in readdir result: "
- "pglen %u > recvd %u\n", pglen, recvd);
- pglen = recvd;
- goto out;
+ return xdr_read_pages(xdr, xdr->buf->page_len);
}
static int decode_readdir3resok(struct xdr_stream *xdr,
@@ -2337,7 +2316,7 @@ static int nfs3_xdr_dec_commit3res(struct rpc_rqst *req,
goto out;
if (status != NFS3_OK)
goto out_status;
- error = decode_writeverf3(xdr, result->verf->verifier);
+ error = decode_writeverf3(xdr, &result->verf->verifier);
out:
return error;
out_status:
@@ -2364,7 +2343,7 @@ static inline int decode_getacl3resok(struct xdr_stream *xdr,
if (result->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
goto out;
- hdrlen = (u8 *)xdr->p - (u8 *)xdr->iov->iov_base;
+ hdrlen = xdr_stream_pos(xdr);
acl = NULL;
if (result->mask & NFS_ACL)
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index cc5900ac61b5..3b950dd81e81 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -9,7 +9,7 @@
#ifndef __LINUX_FS_NFS_NFS4_FS_H
#define __LINUX_FS_NFS_NFS4_FS_H
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
struct idmap;
@@ -200,7 +200,10 @@ struct nfs4_state_maintenance_ops {
};
extern const struct dentry_operations nfs4_dentry_operations;
-extern const struct inode_operations nfs4_dir_inode_operations;
+
+/* dir.c */
+int nfs_atomic_open(struct inode *, struct dentry *, struct file *,
+ unsigned, umode_t, int *);
/* nfs4namespace.c */
rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
@@ -301,6 +304,10 @@ extern const u32 nfs4_pathconf_bitmap[2];
extern const u32 nfs4_fsinfo_bitmap[3];
extern const u32 nfs4_fs_locations_bitmap[2];
+void nfs4_free_client(struct nfs_client *);
+
+struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *);
+
/* nfs4renewd.c */
extern void nfs4_schedule_state_renewal(struct nfs_client *);
extern void nfs4_renewd_prepare_shutdown(struct nfs_server *);
@@ -354,6 +361,29 @@ extern void nfs4_free_lock_state(struct nfs_server *server, struct nfs4_lock_sta
extern const nfs4_stateid zero_stateid;
+/* nfs4super.c */
+struct nfs_mount_info;
+extern struct nfs_subversion nfs_v4;
+struct dentry *nfs4_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *);
+extern bool nfs4_disable_idmapping;
+extern unsigned short max_session_slots;
+extern unsigned short send_implementation_id;
+
+/* nfs4sysctl.c */
+#ifdef CONFIG_SYSCTL
+int nfs4_register_sysctl(void);
+void nfs4_unregister_sysctl(void);
+#else
+static inline int nfs4_register_sysctl(void)
+{
+ return 0;
+}
+
+static inline void nfs4_unregister_sysctl(void)
+{
+}
+#endif
+
/* nfs4xdr.c */
extern struct rpc_procinfo nfs4_procedures[];
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
new file mode 100644
index 000000000000..cbcdfaf32505
--- /dev/null
+++ b/fs/nfs/nfs4client.c
@@ -0,0 +1,656 @@
+/*
+ * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+#include <linux/module.h>
+#include <linux/nfs_fs.h>
+#include <linux/nfs_idmap.h>
+#include <linux/nfs_mount.h>
+#include <linux/sunrpc/auth.h>
+#include <linux/sunrpc/xprt.h>
+#include <linux/sunrpc/bc_xprt.h>
+#include "internal.h"
+#include "callback.h"
+#include "delegation.h"
+#include "pnfs.h"
+#include "netns.h"
+
+#define NFSDBG_FACILITY NFSDBG_CLIENT
+
+/*
+ * Get a unique NFSv4.0 callback identifier which will be used
+ * by the V4.0 callback service to lookup the nfs_client struct
+ */
+static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion)
+{
+ int ret = 0;
+ struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
+
+ if (clp->rpc_ops->version != 4 || minorversion != 0)
+ return ret;
+retry:
+ if (!idr_pre_get(&nn->cb_ident_idr, GFP_KERNEL))
+ return -ENOMEM;
+ spin_lock(&nn->nfs_client_lock);
+ ret = idr_get_new(&nn->cb_ident_idr, clp, &clp->cl_cb_ident);
+ spin_unlock(&nn->nfs_client_lock);
+ if (ret == -EAGAIN)
+ goto retry;
+ return ret;
+}
+
+#ifdef CONFIG_NFS_V4_1
+static void nfs4_shutdown_session(struct nfs_client *clp)
+{
+ if (nfs4_has_session(clp)) {
+ nfs4_destroy_session(clp->cl_session);
+ nfs4_destroy_clientid(clp);
+ }
+
+}
+#else /* CONFIG_NFS_V4_1 */
+static void nfs4_shutdown_session(struct nfs_client *clp)
+{
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
+{
+ int err;
+ struct nfs_client *clp = nfs_alloc_client(cl_init);
+ if (IS_ERR(clp))
+ return clp;
+
+ err = nfs_get_cb_ident_idr(clp, cl_init->minorversion);
+ if (err)
+ goto error;
+
+ spin_lock_init(&clp->cl_lock);
+ INIT_DELAYED_WORK(&clp->cl_renewd, nfs4_renew_state);
+ rpc_init_wait_queue(&clp->cl_rpcwaitq, "NFS client");
+ clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
+ clp->cl_minorversion = cl_init->minorversion;
+ clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
+ return clp;
+
+error:
+ kfree(clp);
+ return ERR_PTR(err);
+}
+
+/*
+ * Destroy the NFS4 callback service
+ */
+static void nfs4_destroy_callback(struct nfs_client *clp)
+{
+ if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
+ nfs_callback_down(clp->cl_mvops->minor_version);
+}
+
+static void nfs4_shutdown_client(struct nfs_client *clp)
+{
+ if (__test_and_clear_bit(NFS_CS_RENEWD, &clp->cl_res_state))
+ nfs4_kill_renewd(clp);
+ nfs4_shutdown_session(clp);
+ nfs4_destroy_callback(clp);
+ if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
+ nfs_idmap_delete(clp);
+
+ rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
+ kfree(clp->cl_serverowner);
+ kfree(clp->cl_serverscope);
+ kfree(clp->cl_implid);
+}
+
+void nfs4_free_client(struct nfs_client *clp)
+{
+ nfs4_shutdown_client(clp);
+ nfs_free_client(clp);
+}
+
+/*
+ * Initialize the NFS4 callback service
+ */
+static int nfs4_init_callback(struct nfs_client *clp)
+{
+ int error;
+
+ if (clp->rpc_ops->version == 4) {
+ struct rpc_xprt *xprt;
+
+ xprt = rcu_dereference_raw(clp->cl_rpcclient->cl_xprt);
+
+ if (nfs4_has_session(clp)) {
+ error = xprt_setup_backchannel(xprt,
+ NFS41_BC_MIN_CALLBACKS);
+ if (error < 0)
+ return error;
+ }
+
+ error = nfs_callback_up(clp->cl_mvops->minor_version, xprt);
+ if (error < 0) {
+ dprintk("%s: failed to start callback. Error = %d\n",
+ __func__, error);
+ return error;
+ }
+ __set_bit(NFS_CS_CALLBACK, &clp->cl_res_state);
+ }
+ return 0;
+}
+
+/*
+ * Initialize the minor version specific parts of an NFS4 client record
+ */
+static int nfs4_init_client_minor_version(struct nfs_client *clp)
+{
+#if defined(CONFIG_NFS_V4_1)
+ if (clp->cl_mvops->minor_version) {
+ struct nfs4_session *session = NULL;
+ /*
+ * Create the session and mark it expired.
+ * When a SEQUENCE operation encounters the expired session
+ * it will do session recovery to initialize it.
+ */
+ session = nfs4_alloc_session(clp);
+ if (!session)
+ return -ENOMEM;
+
+ clp->cl_session = session;
+ /*
+ * The create session reply races with the server back
+ * channel probe. Mark the client NFS_CS_SESSION_INITING
+ * so that the client back channel can find the
+ * nfs_client struct
+ */
+ nfs_mark_client_ready(clp, NFS_CS_SESSION_INITING);
+ }
+#endif /* CONFIG_NFS_V4_1 */
+
+ return nfs4_init_callback(clp);
+}
+
+/**
+ * nfs4_init_client - Initialise an NFS4 client record
+ *
+ * @clp: nfs_client to initialise
+ * @timeparms: timeout parameters for underlying RPC transport
+ * @ip_addr: callback IP address in presentation format
+ * @authflavor: authentication flavor for underlying RPC transport
+ *
+ * Returns pointer to an NFS client, or an ERR_PTR value.
+ */
+struct nfs_client *nfs4_init_client(struct nfs_client *clp,
+ const struct rpc_timeout *timeparms,
+ const char *ip_addr,
+ rpc_authflavor_t authflavour)
+{
+ char buf[INET6_ADDRSTRLEN + 1];
+ int error;
+
+ if (clp->cl_cons_state == NFS_CS_READY) {
+ /* the client is initialised already */
+ dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
+ return clp;
+ }
+
+ /* Check NFS protocol revision and initialize RPC op vector */
+ clp->rpc_ops = &nfs_v4_clientops;
+
+ __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
+ error = nfs_create_rpc_client(clp, timeparms, authflavour);
+ if (error < 0)
+ goto error;
+
+ /* If no clientaddr= option was specified, find a usable cb address */
+ if (ip_addr == NULL) {
+ struct sockaddr_storage cb_addr;
+ struct sockaddr *sap = (struct sockaddr *)&cb_addr;
+
+ error = rpc_localaddr(clp->cl_rpcclient, sap, sizeof(cb_addr));
+ if (error < 0)
+ goto error;
+ error = rpc_ntop(sap, buf, sizeof(buf));
+ if (error < 0)
+ goto error;
+ ip_addr = (const char *)buf;
+ }
+ strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
+
+ error = nfs_idmap_new(clp);
+ if (error < 0) {
+ dprintk("%s: failed to create idmapper. Error = %d\n",
+ __func__, error);
+ goto error;
+ }
+ __set_bit(NFS_CS_IDMAP, &clp->cl_res_state);
+
+ error = nfs4_init_client_minor_version(clp);
+ if (error < 0)
+ goto error;
+
+ if (!nfs4_has_session(clp))
+ nfs_mark_client_ready(clp, NFS_CS_READY);
+ return clp;
+
+error:
+ nfs_mark_client_ready(clp, error);
+ nfs_put_client(clp);
+ dprintk("<-- nfs4_init_client() = xerror %d\n", error);
+ return ERR_PTR(error);
+}
+
+static void nfs4_destroy_server(struct nfs_server *server)
+{
+ nfs_server_return_all_delegations(server);
+ unset_pnfs_layoutdriver(server);
+ nfs4_purge_state_owners(server);
+}
+
+/*
+ * NFSv4.0 callback thread helper
+ *
+ * Find a client by callback identifier
+ */
+struct nfs_client *
+nfs4_find_client_ident(struct net *net, int cb_ident)
+{
+ struct nfs_client *clp;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+ spin_lock(&nn->nfs_client_lock);
+ clp = idr_find(&nn->cb_ident_idr, cb_ident);
+ if (clp)
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+ return clp;
+}
+
+#if defined(CONFIG_NFS_V4_1)
+/* Common match routine for v4.0 and v4.1 callback services */
+static bool nfs4_cb_match_client(const struct sockaddr *addr,
+ struct nfs_client *clp, u32 minorversion)
+{
+ struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
+
+ /* Don't match clients that failed to initialise */
+ if (!(clp->cl_cons_state == NFS_CS_READY ||
+ clp->cl_cons_state == NFS_CS_SESSION_INITING))
+ return false;
+
+ smp_rmb();
+
+ /* Match the version and minorversion */
+ if (clp->rpc_ops->version != 4 ||
+ clp->cl_minorversion != minorversion)
+ return false;
+
+ /* Match only the IP address, not the port number */
+ if (!nfs_sockaddr_match_ipaddr(addr, clap))
+ return false;
+
+ return true;
+}
+
+/*
+ * NFSv4.1 callback thread helper
+ * For CB_COMPOUND calls, find a client by IP address, protocol version,
+ * minorversion, and sessionID
+ *
+ * Returns NULL if no such client
+ */
+struct nfs_client *
+nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
+ struct nfs4_sessionid *sid)
+{
+ struct nfs_client *clp;
+ struct nfs_net *nn = net_generic(net, nfs_net_id);
+
+ spin_lock(&nn->nfs_client_lock);
+ list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
+ if (nfs4_cb_match_client(addr, clp, 1) == false)
+ continue;
+
+ if (!nfs4_has_session(clp))
+ continue;
+
+ /* Match sessionid*/
+ if (memcmp(clp->cl_session->sess_id.data,
+ sid->data, NFS4_MAX_SESSIONID_LEN) != 0)
+ continue;
+
+ atomic_inc(&clp->cl_count);
+ spin_unlock(&nn->nfs_client_lock);
+ return clp;
+ }
+ spin_unlock(&nn->nfs_client_lock);
+ return NULL;
+}
+
+#else /* CONFIG_NFS_V4_1 */
+
+struct nfs_client *
+nfs4_find_client_sessionid(struct net *net, const struct sockaddr *addr,
+ struct nfs4_sessionid *sid)
+{
+ return NULL;
+}
+#endif /* CONFIG_NFS_V4_1 */
+
+/*
+ * Set up an NFS4 client
+ */
+static int nfs4_set_client(struct nfs_server *server,
+ const char *hostname,
+ const struct sockaddr *addr,
+ const size_t addrlen,
+ const char *ip_addr,
+ rpc_authflavor_t authflavour,
+ int proto, const struct rpc_timeout *timeparms,
+ u32 minorversion, struct net *net)
+{
+ struct nfs_client_initdata cl_init = {
+ .hostname = hostname,
+ .addr = addr,
+ .addrlen = addrlen,
+ .nfs_mod = &nfs_v4,
+ .proto = proto,
+ .minorversion = minorversion,
+ .net = net,
+ };
+ struct nfs_client *clp;
+ int error;
+
+ dprintk("--> nfs4_set_client()\n");
+
+ if (server->flags & NFS_MOUNT_NORESVPORT)
+ set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
+
+ /* Allocate or find a client reference we can use */
+ clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour);
+ if (IS_ERR(clp)) {
+ error = PTR_ERR(clp);
+ goto error;
+ }
+
+ /*
+ * Query for the lease time on clientid setup or renewal
+ *
+ * Note that this will be set on nfs_clients that were created
+ * only for the DS role and did not set this bit, but now will
+ * serve a dual role.
+ */
+ set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
+
+ server->nfs_client = clp;
+ dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
+ return 0;
+error:
+ dprintk("<-- nfs4_set_client() = xerror %d\n", error);
+ return error;
+}
+
+/*
+ * Set up a pNFS Data Server client.
+ *
+ * Return any existing nfs_client that matches server address,port,version
+ * and minorversion.
+ *
+ * For a new nfs_client, use a soft mount (default), a low retrans and a
+ * low timeout interval so that if a connection is lost, we retry through
+ * the MDS.
+ */
+struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
+ const struct sockaddr *ds_addr, int ds_addrlen,
+ int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
+{
+ struct nfs_client_initdata cl_init = {
+ .addr = ds_addr,
+ .addrlen = ds_addrlen,
+ .nfs_mod = &nfs_v4,
+ .proto = ds_proto,
+ .minorversion = mds_clp->cl_minorversion,
+ .net = mds_clp->cl_net,
+ };
+ struct rpc_timeout ds_timeout;
+ struct nfs_client *clp;
+
+ /*
+ * Set an authflavor equual to the MDS value. Use the MDS nfs_client
+ * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
+ * (section 13.1 RFC 5661).
+ */
+ nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
+ clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
+ mds_clp->cl_rpcclient->cl_auth->au_flavor);
+
+ dprintk("<-- %s %p\n", __func__, clp);
+ return clp;
+}
+EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
+
+/*
+ * Session has been established, and the client marked ready.
+ * Set the mount rsize and wsize with negotiated fore channel
+ * attributes which will be bound checked in nfs_server_set_fsinfo.
+ */
+static void nfs4_session_set_rwsize(struct nfs_server *server)
+{
+#ifdef CONFIG_NFS_V4_1
+ struct nfs4_session *sess;
+ u32 server_resp_sz;
+ u32 server_rqst_sz;
+
+ if (!nfs4_has_session(server->nfs_client))
+ return;
+ sess = server->nfs_client->cl_session;
+ server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
+ server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
+
+ if (server->rsize > server_resp_sz)
+ server->rsize = server_resp_sz;
+ if (server->wsize > server_rqst_sz)
+ server->wsize = server_rqst_sz;
+#endif /* CONFIG_NFS_V4_1 */
+}
+
+static int nfs4_server_common_setup(struct nfs_server *server,
+ struct nfs_fh *mntfh)
+{
+ struct nfs_fattr *fattr;
+ int error;
+
+ BUG_ON(!server->nfs_client);
+ BUG_ON(!server->nfs_client->rpc_ops);
+ BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
+
+ /* data servers support only a subset of NFSv4.1 */
+ if (is_ds_only_client(server->nfs_client))
+ return -EPROTONOSUPPORT;
+
+ fattr = nfs_alloc_fattr();
+ if (fattr == NULL)
+ return -ENOMEM;
+
+ /* We must ensure the session is initialised first */
+ error = nfs4_init_session(server);
+ if (error < 0)
+ goto out;
+
+ /* Probe the root fh to retrieve its FSID and filehandle */
+ error = nfs4_get_rootfh(server, mntfh);
+ if (error < 0)
+ goto out;
+
+ dprintk("Server FSID: %llx:%llx\n",
+ (unsigned long long) server->fsid.major,
+ (unsigned long long) server->fsid.minor);
+ dprintk("Mount FH: %d\n", mntfh->size);
+
+ nfs4_session_set_rwsize(server);
+
+ error = nfs_probe_fsinfo(server, mntfh, fattr);
+ if (error < 0)
+ goto out;
+
+ if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
+ server->namelen = NFS4_MAXNAMLEN;
+
+ nfs_server_insert_lists(server);
+ server->mount_time = jiffies;
+ server->destroy = nfs4_destroy_server;
+out:
+ nfs_free_fattr(fattr);
+ return error;
+}
+
+/*
+ * Create a version 4 volume record
+ */
+static int nfs4_init_server(struct nfs_server *server,
+ const struct nfs_parsed_mount_data *data)
+{
+ struct rpc_timeout timeparms;
+ int error;
+
+ dprintk("--> nfs4_init_server()\n");
+
+ nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
+ data->timeo, data->retrans);
+
+ /* Initialise the client representation from the mount data */
+ server->flags = data->flags;
+ server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR|NFS_CAP_POSIX_LOCK;
+ if (!(data->flags & NFS_MOUNT_NORDIRPLUS))
+ server->caps |= NFS_CAP_READDIRPLUS;
+ server->options = data->options;
+
+ /* Get a client record */
+ error = nfs4_set_client(server,
+ data->nfs_server.hostname,
+ (const struct sockaddr *)&data->nfs_server.address,
+ data->nfs_server.addrlen,
+ data->client_address,
+ data->auth_flavors[0],
+ data->nfs_server.protocol,
+ &timeparms,
+ data->minorversion,
+ data->net);
+ if (error < 0)
+ goto error;
+
+ /*
+ * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
+ * authentication.
+ */
+ if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
+ server->caps |= NFS_CAP_UIDGID_NOMAP;
+
+ if (data->rsize)
+ server->rsize = nfs_block_size(data->rsize, NULL);
+ if (data->wsize)
+ server->wsize = nfs_block_size(data->wsize, NULL);
+
+ server->acregmin = data->acregmin * HZ;
+ server->acregmax = data->acregmax * HZ;
+ server->acdirmin = data->acdirmin * HZ;
+ server->acdirmax = data->acdirmax * HZ;
+
+ server->port = data->nfs_server.port;
+
+ error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
+
+error:
+ /* Done */
+ dprintk("<-- nfs4_init_server() = %d\n", error);
+ return error;
+}
+
+/*
+ * Create a version 4 volume record
+ * - keyed on server and FSID
+ */
+/*struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data,
+ struct nfs_fh *mntfh)*/
+struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
+ struct nfs_subversion *nfs_mod)
+{
+ struct nfs_server *server;
+ int error;
+
+ dprintk("--> nfs4_create_server()\n");
+
+ server = nfs_alloc_server();
+ if (!server)
+ return ERR_PTR(-ENOMEM);
+
+ /* set up the general RPC client */
+ error = nfs4_init_server(server, mount_info->parsed);
+ if (error < 0)
+ goto error;
+
+ error = nfs4_server_common_setup(server, mount_info->mntfh);
+ if (error < 0)
+ goto error;
+
+ dprintk("<-- nfs4_create_server() = %p\n", server);
+ return server;
+
+error:
+ nfs_free_server(server);
+ dprintk("<-- nfs4_create_server() = error %d\n", error);
+ return ERR_PTR(error);
+}
+
+/*
+ * Create an NFS4 referral server record
+ */
+struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
+ struct nfs_fh *mntfh)
+{
+ struct nfs_client *parent_client;
+ struct nfs_server *server, *parent_server;
+ int error;
+
+ dprintk("--> nfs4_create_referral_server()\n");
+
+ server = nfs_alloc_server();
+ if (!server)
+ return ERR_PTR(-ENOMEM);
+
+ parent_server = NFS_SB(data->sb);
+ parent_client = parent_server->nfs_client;
+
+ /* Initialise the client representation from the parent server */
+ nfs_server_copy_userdata(server, parent_server);
+ server->caps |= NFS_CAP_ATOMIC_OPEN|NFS_CAP_CHANGE_ATTR;
+
+ /* Get a client representation.
+ * Note: NFSv4 always uses TCP, */
+ error = nfs4_set_client(server, data->hostname,
+ data->addr,
+ data->addrlen,
+ parent_client->cl_ipaddr,
+ data->authflavor,
+ rpc_protocol(parent_server->client),
+ parent_server->client->cl_timeout,
+ parent_client->cl_mvops->minor_version,
+ parent_client->cl_net);
+ if (error < 0)
+ goto error;
+
+ error = nfs_init_server_rpcclient(server, parent_server->client->cl_timeout, data->authflavor);
+ if (error < 0)
+ goto error;
+
+ error = nfs4_server_common_setup(server, mntfh);
+ if (error < 0)
+ goto error;
+
+ dprintk("<-- nfs_create_referral_server() = %p\n", server);
+ return server;
+
+error:
+ nfs_free_server(server);
+ dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
+ return ERR_PTR(error);
+}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
new file mode 100644
index 000000000000..acb65e7887f8
--- /dev/null
+++ b/fs/nfs/nfs4file.c
@@ -0,0 +1,126 @@
+/*
+ * linux/fs/nfs/file.c
+ *
+ * Copyright (C) 1992 Rick Sladkey
+ */
+#include <linux/nfs_fs.h>
+#include "internal.h"
+#include "pnfs.h"
+
+#define NFSDBG_FACILITY NFSDBG_FILE
+
+static int
+nfs4_file_open(struct inode *inode, struct file *filp)
+{
+ struct nfs_open_context *ctx;
+ struct dentry *dentry = filp->f_path.dentry;
+ struct dentry *parent = NULL;
+ struct inode *dir;
+ unsigned openflags = filp->f_flags;
+ struct iattr attr;
+ int err;
+
+ BUG_ON(inode != dentry->d_inode);
+ /*
+ * If no cached dentry exists or if it's negative, NFSv4 handled the
+ * opens in ->lookup() or ->create().
+ *
+ * We only get this far for a cached positive dentry. We skipped
+ * revalidation, so handle it here by dropping the dentry and returning
+ * -EOPENSTALE. The VFS will retry the lookup/create/open.
+ */
+
+ dprintk("NFS: open file(%s/%s)\n",
+ dentry->d_parent->d_name.name,
+ dentry->d_name.name);
+
+ if ((openflags & O_ACCMODE) == 3)
+ openflags--;
+
+ /* We can't create new files here */
+ openflags &= ~(O_CREAT|O_EXCL);
+
+ parent = dget_parent(dentry);
+ dir = parent->d_inode;
+
+ ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+ err = PTR_ERR(ctx);
+ if (IS_ERR(ctx))
+ goto out;
+
+ attr.ia_valid = ATTR_OPEN;
+ if (openflags & O_TRUNC) {
+ attr.ia_valid |= ATTR_SIZE;
+ attr.ia_size = 0;
+ nfs_wb_all(inode);
+ }
+
+ inode = NFS_PROTO(dir)->open_context(dir, ctx, openflags, &attr);
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ switch (err) {
+ case -EPERM:
+ case -EACCES:
+ case -EDQUOT:
+ case -ENOSPC:
+ case -EROFS:
+ goto out_put_ctx;
+ default:
+ goto out_drop;
+ }
+ }
+ iput(inode);
+ if (inode != dentry->d_inode)
+ goto out_drop;
+
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ nfs_file_set_open_context(filp, ctx);
+ err = 0;
+
+out_put_ctx:
+ put_nfs_open_context(ctx);
+out:
+ dput(parent);
+ return err;
+
+out_drop:
+ d_drop(dentry);
+ err = -EOPENSTALE;
+ goto out_put_ctx;
+}
+
+static int
+nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+ int ret;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
+ ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
+ mutex_lock(&inode->i_mutex);
+ ret = nfs_file_fsync_commit(file, start, end, datasync);
+ if (!ret && !datasync)
+ /* application has asked for meta-data sync */
+ ret = pnfs_layoutcommit_inode(inode, true);
+ mutex_unlock(&inode->i_mutex);
+
+ return ret;
+}
+
+const struct file_operations nfs4_file_operations = {
+ .llseek = nfs_file_llseek,
+ .read = do_sync_read,
+ .write = do_sync_write,
+ .aio_read = nfs_file_read,
+ .aio_write = nfs_file_write,
+ .mmap = nfs_file_mmap,
+ .open = nfs4_file_open,
+ .flush = nfs_file_flush,
+ .release = nfs_file_release,
+ .fsync = nfs4_file_fsync,
+ .lock = nfs_lock,
+ .flock = nfs_flock,
+ .splice_read = nfs_file_splice_read,
+ .splice_write = nfs_file_splice_write,
+ .check_flags = nfs_check_flags,
+ .setlease = nfs_setlease,
+};
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index e1340293872c..53f94d915bd1 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -205,9 +205,9 @@ static int filelayout_async_handle_error(struct rpc_task *task,
case -EPIPE:
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
- if (!filelayout_test_devid_invalid(devid))
- _pnfs_return_layout(inode);
filelayout_mark_devid_invalid(devid);
+ clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
+ _pnfs_return_layout(inode);
rpc_wake_up(&tbl->slot_tbl_waitq);
nfs4_ds_disconnect(clp);
/* fall through */
@@ -351,9 +351,9 @@ static void prepare_to_resend_writes(struct nfs_commit_data *data)
struct nfs_page *first = nfs_list_entry(data->pages.next);
data->task.tk_status = 0;
- memcpy(data->verf.verifier, first->wb_verf.verifier,
- sizeof(first->wb_verf.verifier));
- data->verf.verifier[0]++; /* ensure verifier mismatch */
+ memcpy(&data->verf.verifier, &first->wb_verf,
+ sizeof(data->verf.verifier));
+ data->verf.verifier.data[0]++; /* ensure verifier mismatch */
}
static int filelayout_commit_done_cb(struct rpc_task *task,
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index a1fab8da7f03..f81231f30d94 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -728,7 +728,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla
pdev->layout_type = LAYOUT_NFSV4_1_FILES;
pdev->pages = pages;
pdev->pgbase = 0;
- pdev->pglen = PAGE_SIZE * max_pages;
+ pdev->pglen = max_resp_sz;
pdev->mincount = 0;
rc = nfs4_proc_getdeviceinfo(server, pdev);
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
new file mode 100644
index 000000000000..6a83780e0ce6
--- /dev/null
+++ b/fs/nfs/nfs4getroot.c
@@ -0,0 +1,49 @@
+/*
+* Copyright (C) 2006 Red Hat, Inc. All Rights Reserved.
+* Written by David Howells (dhowells@redhat.com)
+*/
+
+#include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
+
+#define NFSDBG_FACILITY NFSDBG_CLIENT
+
+int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh)
+{
+ struct nfs_fsinfo fsinfo;
+ int ret = -ENOMEM;
+
+ dprintk("--> nfs4_get_rootfh()\n");
+
+ fsinfo.fattr = nfs_alloc_fattr();
+ if (fsinfo.fattr == NULL)
+ goto out;
+
+ /* Start by getting the root filehandle from the server */
+ ret = nfs4_proc_get_rootfh(server, mntfh, &fsinfo);
+ if (ret < 0) {
+ dprintk("nfs4_get_rootfh: getroot error = %d\n", -ret);
+ goto out;
+ }
+
+ if (!(fsinfo.fattr->valid & NFS_ATTR_FATTR_TYPE)
+ || !S_ISDIR(fsinfo.fattr->mode)) {
+ printk(KERN_ERR "nfs4_get_rootfh:"
+ " getroot encountered non-directory\n");
+ ret = -ENOTDIR;
+ goto out;
+ }
+
+ if (fsinfo.fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
+ printk(KERN_ERR "nfs4_get_rootfh:"
+ " getroot obtained referral\n");
+ ret = -EREMOTE;
+ goto out;
+ }
+
+ memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
+out:
+ nfs_free_fattr(fsinfo.fattr);
+ dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
+ return ret;
+}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c157b2089b47..a99a8d948721 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -43,7 +43,6 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/gss_api.h>
#include <linux/nfs.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
@@ -73,8 +72,6 @@
#define NFS4_MAX_LOOP_ON_RECOVER (10)
-static unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
-
struct nfs4_opendata;
static int _nfs4_proc_open(struct nfs4_opendata *data);
static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
@@ -259,7 +256,12 @@ static int nfs4_wait_clnt_recover(struct nfs_client *clp)
res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING,
nfs_wait_bit_killable, TASK_KILLABLE);
- return res;
+ if (res)
+ return res;
+
+ if (clp->cl_cons_state < 0)
+ return clp->cl_cons_state;
+ return 0;
}
static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
@@ -294,8 +296,8 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc
case 0:
return 0;
case -NFS4ERR_OPENMODE:
- if (inode && nfs_have_delegation(inode, FMODE_READ)) {
- nfs_inode_return_delegation(inode);
+ if (inode && nfs4_have_delegation(inode, FMODE_READ)) {
+ nfs4_inode_return_delegation(inode);
exception->retry = 1;
return 0;
}
@@ -1065,7 +1067,7 @@ static void nfs4_return_incompatible_delegation(struct inode *inode, fmode_t fmo
return;
}
rcu_read_unlock();
- nfs_inode_return_delegation(inode);
+ nfs4_inode_return_delegation(inode);
}
static struct nfs4_state *nfs4_try_open_cached(struct nfs4_opendata *opendata)
@@ -1756,33 +1758,70 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
}
#if defined(CONFIG_NFS_V4_1)
-static int nfs41_check_expired_stateid(struct nfs4_state *state, nfs4_stateid *stateid, unsigned int flags)
+static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
{
- int status = NFS_OK;
struct nfs_server *server = NFS_SERVER(state->inode);
+ nfs4_stateid *stateid = &state->stateid;
+ int status;
- if (state->flags & flags) {
- status = nfs41_test_stateid(server, stateid);
- if (status != NFS_OK) {
+ /* If a state reset has been done, test_stateid is unneeded */
+ if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0)
+ return;
+
+ status = nfs41_test_stateid(server, stateid);
+ if (status != NFS_OK) {
+ /* Free the stateid unless the server explicitly
+ * informs us the stateid is unrecognized. */
+ if (status != -NFS4ERR_BAD_STATEID)
nfs41_free_stateid(server, stateid);
- state->flags &= ~flags;
- }
+
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+ }
+}
+
+/**
+ * nfs41_check_open_stateid - possibly free an open stateid
+ *
+ * @state: NFSv4 state for an inode
+ *
+ * Returns NFS_OK if recovery for this stateid is now finished.
+ * Otherwise a negative NFS4ERR value is returned.
+ */
+static int nfs41_check_open_stateid(struct nfs4_state *state)
+{
+ struct nfs_server *server = NFS_SERVER(state->inode);
+ nfs4_stateid *stateid = &state->stateid;
+ int status;
+
+ /* If a state reset has been done, test_stateid is unneeded */
+ if ((test_bit(NFS_O_RDONLY_STATE, &state->flags) == 0) &&
+ (test_bit(NFS_O_WRONLY_STATE, &state->flags) == 0) &&
+ (test_bit(NFS_O_RDWR_STATE, &state->flags) == 0))
+ return -NFS4ERR_BAD_STATEID;
+
+ status = nfs41_test_stateid(server, stateid);
+ if (status != NFS_OK) {
+ /* Free the stateid unless the server explicitly
+ * informs us the stateid is unrecognized. */
+ if (status != -NFS4ERR_BAD_STATEID)
+ nfs41_free_stateid(server, stateid);
+
+ clear_bit(NFS_O_RDONLY_STATE, &state->flags);
+ clear_bit(NFS_O_WRONLY_STATE, &state->flags);
+ clear_bit(NFS_O_RDWR_STATE, &state->flags);
}
return status;
}
static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
{
- int deleg_status, open_status;
- int deleg_flags = 1 << NFS_DELEGATED_STATE;
- int open_flags = (1 << NFS_O_RDONLY_STATE) | (1 << NFS_O_WRONLY_STATE) | (1 << NFS_O_RDWR_STATE);
-
- deleg_status = nfs41_check_expired_stateid(state, &state->stateid, deleg_flags);
- open_status = nfs41_check_expired_stateid(state, &state->open_stateid, open_flags);
+ int status;
- if ((deleg_status == NFS_OK) && (open_status == NFS_OK))
- return NFS_OK;
- return nfs4_open_expired(sp, state);
+ nfs41_clear_delegation_stateid(state);
+ status = nfs41_check_open_stateid(state);
+ if (status != NFS_OK)
+ status = nfs4_open_expired(sp, state);
+ return status;
}
#endif
@@ -2375,11 +2414,15 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
int i, len, status = 0;
rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS];
- len = gss_mech_list_pseudoflavors(&flav_array[0]);
- flav_array[len] = RPC_AUTH_NULL;
- len += 1;
+ len = rpcauth_list_flavors(flav_array, ARRAY_SIZE(flav_array));
+ BUG_ON(len < 0);
for (i = 0; i < len; i++) {
+ /* AUTH_UNIX is the default flavor if none was specified,
+ * thus has already been tried. */
+ if (flav_array[i] == RPC_AUTH_UNIX)
+ continue;
+
status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
continue;
@@ -2766,9 +2809,7 @@ static int nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry)
*
* In the case of WRITE, we also want to put the GETATTR after
* the operation -- in this case because we want to make sure
- * we get the post-operation mtime and size. This means that
- * we can't use xdr_encode_pages() as written: we need a variant
- * of it which would leave room in the 'tail' iovec.
+ * we get the post-operation mtime and size.
*
* Both of these changes to the XDR layer would in fact be quite
* minor, but I decided to leave them for a subsequent patch.
@@ -2821,7 +2862,9 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
return PTR_ERR(ctx);
sattr->ia_mode &= ~current_umask();
- state = nfs4_do_open(dir, dentry, ctx->mode, flags, sattr, ctx->cred, NULL);
+ state = nfs4_do_open(dir, dentry, ctx->mode,
+ flags, sattr, ctx->cred,
+ &ctx->mdsthreshold);
d_drop(dentry);
if (IS_ERR(state)) {
status = PTR_ERR(state);
@@ -3315,8 +3358,14 @@ static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, str
static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
{
+ int error;
+
nfs_fattr_init(fsinfo->fattr);
- return nfs4_do_fsinfo(server, fhandle, fsinfo);
+ error = nfs4_do_fsinfo(server, fhandle, fsinfo);
+ if (error == 0)
+ set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
+
+ return error;
}
static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -3443,7 +3492,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
/* Otherwise, request attributes if and only if we don't hold
* a delegation
*/
- return nfs_have_delegation(hdr->inode, FMODE_READ) == 0;
+ return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
@@ -3732,7 +3781,8 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
.rpc_argp = &args,
.rpc_resp = &res,
};
- int ret = -ENOMEM, npages, i, acl_len = 0;
+ int ret = -ENOMEM, npages, i;
+ size_t acl_len = 0;
npages = (buflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
/* As long as we're doing a round trip to the server anyway,
@@ -3847,7 +3897,7 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
if (i < 0)
return i;
- nfs_inode_return_delegation(inode);
+ nfs4_inode_return_delegation(inode);
ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
/*
@@ -3961,6 +4011,16 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
memcpy(bootverf->data, verf, sizeof(bootverf->data));
}
+/**
+ * nfs4_proc_setclientid - Negotiate client ID
+ * @clp: state data structure
+ * @program: RPC program for NFSv4 callback service
+ * @port: IP port number for NFS4 callback service
+ * @cred: RPC credential to use for this call
+ * @res: where to place the result
+ *
+ * Returns zero, a negative errno, or a negative NFS4ERR status code.
+ */
int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
unsigned short port, struct rpc_cred *cred,
struct nfs4_setclientid_res *res)
@@ -3977,44 +4037,44 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
.rpc_resp = res,
.rpc_cred = cred,
};
- int loop = 0;
int status;
+ /* nfs_client_id4 */
nfs4_init_boot_verifier(clp, &sc_verifier);
-
- for(;;) {
- rcu_read_lock();
- setclientid.sc_name_len = scnprintf(setclientid.sc_name,
- sizeof(setclientid.sc_name), "%s/%s %s %s %u",
- clp->cl_ipaddr,
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_ADDR),
- rpc_peeraddr2str(clp->cl_rpcclient,
- RPC_DISPLAY_PROTO),
- clp->cl_rpcclient->cl_auth->au_ops->au_name,
- clp->cl_id_uniquifier);
- setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
+ rcu_read_lock();
+ setclientid.sc_name_len = scnprintf(setclientid.sc_name,
+ sizeof(setclientid.sc_name), "%s/%s %s",
+ clp->cl_ipaddr,
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_ADDR),
+ rpc_peeraddr2str(clp->cl_rpcclient,
+ RPC_DISPLAY_PROTO));
+ /* cb_client4 */
+ setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
sizeof(setclientid.sc_netid),
rpc_peeraddr2str(clp->cl_rpcclient,
RPC_DISPLAY_NETID));
- setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
+ rcu_read_unlock();
+ setclientid.sc_uaddr_len = scnprintf(setclientid.sc_uaddr,
sizeof(setclientid.sc_uaddr), "%s.%u.%u",
clp->cl_ipaddr, port >> 8, port & 255);
- rcu_read_unlock();
- status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
- if (status != -NFS4ERR_CLID_INUSE)
- break;
- if (loop != 0) {
- ++clp->cl_id_uniquifier;
- break;
- }
- ++loop;
- ssleep(clp->cl_lease_time / HZ + 1);
- }
+ dprintk("NFS call setclientid auth=%s, '%.*s'\n",
+ clp->cl_rpcclient->cl_auth->au_ops->au_name,
+ setclientid.sc_name_len, setclientid.sc_name);
+ status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ dprintk("NFS reply setclientid: %d\n", status);
return status;
}
+/**
+ * nfs4_proc_setclientid_confirm - Confirm client ID
+ * @clp: state data structure
+ * @res: result of a previous SETCLIENTID
+ * @cred: RPC credential to use for this call
+ *
+ * Returns zero, a negative errno, or a negative NFS4ERR status code.
+ */
int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
struct nfs4_setclientid_res *arg,
struct rpc_cred *cred)
@@ -4029,6 +4089,9 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
unsigned long now;
int status;
+ dprintk("NFS call setclientid_confirm auth=%s, (client ID %llx)\n",
+ clp->cl_rpcclient->cl_auth->au_ops->au_name,
+ clp->cl_clientid);
now = jiffies;
status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
if (status == 0) {
@@ -4037,6 +4100,7 @@ int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
clp->cl_last_renewal = now;
spin_unlock(&clp->cl_lock);
}
+ dprintk("NFS reply setclientid_confirm: %d\n", status);
return status;
}
@@ -4681,9 +4745,17 @@ out:
}
#if defined(CONFIG_NFS_V4_1)
+/**
+ * nfs41_check_expired_locks - possibly free a lock stateid
+ *
+ * @state: NFSv4 state for an inode
+ *
+ * Returns NFS_OK if recovery for this stateid is now finished.
+ * Otherwise a negative NFS4ERR value is returned.
+ */
static int nfs41_check_expired_locks(struct nfs4_state *state)
{
- int status, ret = NFS_OK;
+ int status, ret = -NFS4ERR_BAD_STATEID;
struct nfs4_lock_state *lsp;
struct nfs_server *server = NFS_SERVER(state->inode);
@@ -4691,7 +4763,11 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
if (lsp->ls_flags & NFS_LOCK_INITIALIZED) {
status = nfs41_test_stateid(server, &lsp->ls_stateid);
if (status != NFS_OK) {
- nfs41_free_stateid(server, &lsp->ls_stateid);
+ /* Free the stateid unless the server
+ * informs us the stateid is unrecognized. */
+ if (status != -NFS4ERR_BAD_STATEID)
+ nfs41_free_stateid(server,
+ &lsp->ls_stateid);
lsp->ls_flags &= ~NFS_LOCK_INITIALIZED;
ret = status;
}
@@ -4707,9 +4783,9 @@ static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *reques
if (test_bit(LK_STATE_IN_USE, &state->flags))
status = nfs41_check_expired_locks(state);
- if (status == NFS_OK)
- return status;
- return nfs4_lock_expired(state, request);
+ if (status != NFS_OK)
+ status = nfs4_lock_expired(state, request);
+ return status;
}
#endif
@@ -4807,7 +4883,7 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
* Don't rely on the VFS having checked the file open mode,
* since it won't do this for flock() locks.
*/
- switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) {
+ switch (request->fl_type) {
case F_RDLCK:
if (!(filp->f_mode & FMODE_READ))
return -EBADF;
@@ -5168,6 +5244,8 @@ out:
/*
* nfs4_proc_exchange_id()
*
+ * Returns zero, a negative errno, or a negative NFS4ERR status code.
+ *
* Since the clientid has expired, all compounds using sessions
* associated with the stale clientid will be returning
* NFS4ERR_BADSESSION in the sequence operation, and will therefore
@@ -5192,16 +5270,14 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
.rpc_cred = cred,
};
- dprintk("--> %s\n", __func__);
- BUG_ON(clp == NULL);
-
nfs4_init_boot_verifier(clp, &verifier);
-
args.id_len = scnprintf(args.id, sizeof(args.id),
- "%s/%s/%u",
+ "%s/%s",
clp->cl_ipaddr,
- clp->cl_rpcclient->cl_nodename,
- clp->cl_rpcclient->cl_auth->au_flavor);
+ clp->cl_rpcclient->cl_nodename);
+ dprintk("NFS call exchange_id auth=%s, '%.*s'\n",
+ clp->cl_rpcclient->cl_auth->au_ops->au_name,
+ args.id_len, args.id);
res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
GFP_NOFS);
@@ -5264,12 +5340,12 @@ out_server_scope:
kfree(res.server_scope);
out:
if (clp->cl_implid != NULL)
- dprintk("%s: Server Implementation ID: "
+ dprintk("NFS reply exchange_id: Server Implementation ID: "
"domain: %s, name: %s, date: %llu,%u\n",
- __func__, clp->cl_implid->domain, clp->cl_implid->name,
+ clp->cl_implid->domain, clp->cl_implid->name,
clp->cl_implid->date.seconds,
clp->cl_implid->date.nseconds);
- dprintk("<-- %s status= %d\n", __func__, status);
+ dprintk("NFS reply exchange_id: %d\n", status);
return status;
}
@@ -6570,22 +6646,36 @@ static int _nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
.rpc_resp = &res,
};
+ dprintk("NFS call test_stateid %p\n", stateid);
nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
-
- if (status == NFS_OK)
- return res.status;
- return status;
+ if (status != NFS_OK) {
+ dprintk("NFS reply test_stateid: failed, %d\n", status);
+ return status;
+ }
+ dprintk("NFS reply test_stateid: succeeded, %d\n", -res.status);
+ return -res.status;
}
+/**
+ * nfs41_test_stateid - perform a TEST_STATEID operation
+ *
+ * @server: server / transport on which to perform the operation
+ * @stateid: state ID to test
+ *
+ * Returns NFS_OK if the server recognizes that "stateid" is valid.
+ * Otherwise a negative NFS4ERR value is returned if the operation
+ * failed or the state ID is not currently valid.
+ */
static int nfs41_test_stateid(struct nfs_server *server, nfs4_stateid *stateid)
{
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(server,
- _nfs41_test_stateid(server, stateid),
- &exception);
+ err = _nfs41_test_stateid(server, stateid);
+ if (err != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
@@ -6601,19 +6691,34 @@ static int _nfs4_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
.rpc_argp = &args,
.rpc_resp = &res,
};
+ int status;
+ dprintk("NFS call free_stateid %p\n", stateid);
nfs41_init_sequence(&args.seq_args, &res.seq_res, 0);
- return nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
+ status = nfs4_call_sync_sequence(server->client, server, &msg,
+ &args.seq_args, &res.seq_res, 1);
+ dprintk("NFS reply free_stateid: %d\n", status);
+ return status;
}
+/**
+ * nfs41_free_stateid - perform a FREE_STATEID operation
+ *
+ * @server: server / transport on which to perform the operation
+ * @stateid: state ID to release
+ *
+ * Returns NFS_OK if the server freed "stateid". Otherwise a
+ * negative NFS4ERR value is returned.
+ */
static int nfs41_free_stateid(struct nfs_server *server, nfs4_stateid *stateid)
{
struct nfs4_exception exception = { };
int err;
do {
- err = nfs4_handle_exception(server,
- _nfs4_free_stateid(server, stateid),
- &exception);
+ err = _nfs4_free_stateid(server, stateid);
+ if (err != -NFS4ERR_DELAY)
+ break;
+ nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
return err;
}
@@ -6725,6 +6830,26 @@ const struct nfs4_minor_version_ops *nfs_v4_minor_ops[] = {
#endif
};
+const struct inode_operations nfs4_dir_inode_operations = {
+ .create = nfs_create,
+ .lookup = nfs_lookup,
+ .atomic_open = nfs_atomic_open,
+ .link = nfs_link,
+ .unlink = nfs_unlink,
+ .symlink = nfs_symlink,
+ .mkdir = nfs_mkdir,
+ .rmdir = nfs_rmdir,
+ .mknod = nfs_mknod,
+ .rename = nfs_rename,
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+ .getxattr = generic_getxattr,
+ .setxattr = generic_setxattr,
+ .listxattr = generic_listxattr,
+ .removexattr = generic_removexattr,
+};
+
static const struct inode_operations nfs4_file_inode_operations = {
.permission = nfs_permission,
.getattr = nfs_getattr,
@@ -6743,6 +6868,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.file_ops = &nfs4_file_operations,
.getroot = nfs4_proc_get_root,
.submount = nfs4_submount,
+ .try_mount = nfs4_try_mount,
.getattr = nfs4_proc_getattr,
.setattr = nfs4_proc_setattr,
.lookup = nfs4_proc_lookup,
@@ -6769,9 +6895,11 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.set_capabilities = nfs4_server_capabilities,
.decode_dirent = nfs4_decode_dirent,
.read_setup = nfs4_proc_read_setup,
+ .read_pageio_init = pnfs_pageio_init_read,
.read_rpc_prepare = nfs4_proc_read_rpc_prepare,
.read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
+ .write_pageio_init = pnfs_pageio_init_write,
.write_rpc_prepare = nfs4_proc_write_rpc_prepare,
.write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
@@ -6781,7 +6909,13 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.clear_acl_cache = nfs4_zap_acl_attr,
.close_context = nfs4_close_context,
.open_context = nfs4_atomic_open,
+ .have_delegation = nfs4_have_delegation,
+ .return_delegation = nfs4_inode_return_delegation,
+ .alloc_client = nfs4_alloc_client,
.init_client = nfs4_init_client,
+ .free_client = nfs4_free_client,
+ .create_server = nfs4_create_server,
+ .clone_server = nfs_clone_server,
};
static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
@@ -6796,10 +6930,6 @@ const struct xattr_handler *nfs4_xattr_handlers[] = {
NULL
};
-module_param(max_session_slots, ushort, 0644);
-MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
- "requests the client will negotiate");
-
/*
* Local variables:
* c-basic-offset: 8
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f38300e9f171..55148def5540 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1606,10 +1606,15 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
return -ESERVERFAULT;
/* Lease confirmation error: retry after purging the lease */
ssleep(1);
- case -NFS4ERR_CLID_INUSE:
case -NFS4ERR_STALE_CLIENTID:
clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
break;
+ case -NFS4ERR_CLID_INUSE:
+ pr_err("NFS: Server %s reports our clientid is in use\n",
+ clp->cl_hostname);
+ nfs_mark_client_ready(clp, -EPERM);
+ clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
+ return -EPERM;
case -EACCES:
if (clp->cl_machine_cred == NULL)
return -EACCES;
@@ -1642,7 +1647,7 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
return 0;
}
-static int nfs4_reclaim_lease(struct nfs_client *clp)
+static int nfs4_establish_lease(struct nfs_client *clp)
{
struct rpc_cred *cred;
const struct nfs4_state_recovery_ops *ops =
@@ -1655,7 +1660,41 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
status = ops->establish_clid(clp, cred);
put_rpccred(cred);
if (status != 0)
+ return status;
+ pnfs_destroy_all_layouts(clp);
+ return 0;
+}
+
+/*
+ * Returns zero or a negative errno. NFS4ERR values are converted
+ * to local errno values.
+ */
+static int nfs4_reclaim_lease(struct nfs_client *clp)
+{
+ int status;
+
+ status = nfs4_establish_lease(clp);
+ if (status < 0)
+ return nfs4_handle_reclaim_lease_error(clp, status);
+ if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state))
+ nfs4_state_start_reclaim_nograce(clp);
+ if (!test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
+ set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
+ clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
+ clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ return 0;
+}
+
+static int nfs4_purge_lease(struct nfs_client *clp)
+{
+ int status;
+
+ status = nfs4_establish_lease(clp);
+ if (status < 0)
return nfs4_handle_reclaim_lease_error(clp, status);
+ clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ nfs4_state_start_reclaim_nograce(clp);
return 0;
}
@@ -1764,6 +1803,8 @@ static int nfs4_reset_session(struct nfs_client *clp)
struct rpc_cred *cred;
int status;
+ if (!nfs4_has_session(clp))
+ return 0;
nfs4_begin_drain_session(clp);
cred = nfs4_get_exchange_id_cred(clp);
status = nfs4_proc_destroy_session(clp->cl_session, cred);
@@ -1792,12 +1833,14 @@ out:
static int nfs4_recall_slot(struct nfs_client *clp)
{
- struct nfs4_slot_table *fc_tbl = &clp->cl_session->fc_slot_table;
- struct nfs4_channel_attrs *fc_attrs = &clp->cl_session->fc_attrs;
+ struct nfs4_slot_table *fc_tbl;
struct nfs4_slot *new, *old;
int i;
+ if (!nfs4_has_session(clp))
+ return 0;
nfs4_begin_drain_session(clp);
+ fc_tbl = &clp->cl_session->fc_slot_table;
new = kmalloc(fc_tbl->target_max_slots * sizeof(struct nfs4_slot),
GFP_NOFS);
if (!new)
@@ -1810,11 +1853,10 @@ static int nfs4_recall_slot(struct nfs_client *clp)
fc_tbl->slots = new;
fc_tbl->max_slots = fc_tbl->target_max_slots;
fc_tbl->target_max_slots = 0;
- fc_attrs->max_reqs = fc_tbl->max_slots;
+ clp->cl_session->fc_attrs.max_reqs = fc_tbl->max_slots;
spin_unlock(&fc_tbl->slot_tbl_lock);
kfree(old);
- nfs4_end_drain_session(clp);
return 0;
}
@@ -1823,6 +1865,8 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
struct rpc_cred *cred;
int ret;
+ if (!nfs4_has_session(clp))
+ return 0;
nfs4_begin_drain_session(clp);
cred = nfs4_get_exchange_id_cred(clp);
ret = nfs4_proc_bind_conn_to_session(clp, cred);
@@ -1857,37 +1901,29 @@ static int nfs4_bind_conn_to_session(struct nfs_client *clp)
static void nfs4_state_manager(struct nfs_client *clp)
{
int status = 0;
+ const char *section = "", *section_sep = "";
/* Ensure exclusive access to NFSv4 state */
do {
if (test_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state)) {
- status = nfs4_reclaim_lease(clp);
+ section = "purge state";
+ status = nfs4_purge_lease(clp);
if (status < 0)
goto out_error;
- clear_bit(NFS4CLNT_PURGE_STATE, &clp->cl_state);
- set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ continue;
}
- if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
+ if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
+ section = "lease expired";
/* We're going to have to re-establish a clientid */
status = nfs4_reclaim_lease(clp);
if (status < 0)
goto out_error;
- if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
- continue;
- clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
-
- if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
- &clp->cl_state))
- nfs4_state_start_reclaim_nograce(clp);
- else
- set_bit(NFS4CLNT_RECLAIM_REBOOT,
- &clp->cl_state);
-
- pnfs_destroy_all_layouts(clp);
+ continue;
}
if (test_and_clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state)) {
+ section = "check lease";
status = nfs4_check_lease(clp);
if (status < 0)
goto out_error;
@@ -1896,8 +1932,8 @@ static void nfs4_state_manager(struct nfs_client *clp)
}
/* Initialize or reset the session */
- if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)
- && nfs4_has_session(clp)) {
+ if (test_and_clear_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state)) {
+ section = "reset session";
status = nfs4_reset_session(clp);
if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state))
continue;
@@ -1907,15 +1943,26 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Send BIND_CONN_TO_SESSION */
if (test_and_clear_bit(NFS4CLNT_BIND_CONN_TO_SESSION,
- &clp->cl_state) && nfs4_has_session(clp)) {
+ &clp->cl_state)) {
+ section = "bind conn to session";
status = nfs4_bind_conn_to_session(clp);
if (status < 0)
goto out_error;
continue;
}
+ /* Recall session slots */
+ if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)) {
+ section = "recall slot";
+ status = nfs4_recall_slot(clp);
+ if (status < 0)
+ goto out_error;
+ continue;
+ }
+
/* First recover reboot state... */
if (test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state)) {
+ section = "reclaim reboot";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->reboot_recovery_ops);
if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
@@ -1930,6 +1977,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
/* Now recover expired state... */
if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
+ section = "reclaim nograce";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->nograce_recovery_ops);
if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
@@ -1945,15 +1993,6 @@ static void nfs4_state_manager(struct nfs_client *clp)
nfs_client_return_marked_delegations(clp);
continue;
}
- /* Recall session slots */
- if (test_and_clear_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state)
- && nfs4_has_session(clp)) {
- status = nfs4_recall_slot(clp);
- if (status < 0)
- goto out_error;
- continue;
- }
-
nfs4_clear_state_manager_bit(clp);
/* Did we race with an attempt to give us more work? */
@@ -1964,8 +2003,11 @@ static void nfs4_state_manager(struct nfs_client *clp)
} while (atomic_read(&clp->cl_count) > 1);
return;
out_error:
- pr_warn_ratelimited("NFS: state manager failed on NFSv4 server %s"
- " with error %d\n", clp->cl_hostname, -status);
+ if (strlen(section))
+ section_sep = ": ";
+ pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
+ " with error %d\n", section_sep, section,
+ clp->cl_hostname, -status);
nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
}
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
new file mode 100644
index 000000000000..12a31a9dbcdd
--- /dev/null
+++ b/fs/nfs/nfs4super.c
@@ -0,0 +1,372 @@
+/*
+ * Copyright (c) 2012 Bryan Schumaker <bjschuma@netapp.com>
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/nfs_idmap.h>
+#include <linux/nfs4_mount.h>
+#include <linux/nfs_fs.h>
+#include "delegation.h"
+#include "internal.h"
+#include "nfs4_fs.h"
+#include "pnfs.h"
+#include "nfs.h"
+
+#define NFSDBG_FACILITY NFSDBG_VFS
+
+static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc);
+static void nfs4_evict_inode(struct inode *inode);
+static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
+static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
+static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data);
+
+static struct file_system_type nfs4_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .mount = nfs_fs_mount,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+static struct file_system_type nfs4_remote_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .mount = nfs4_remote_mount,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+static struct file_system_type nfs4_remote_referral_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .mount = nfs4_remote_referral_mount,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+struct file_system_type nfs4_referral_fs_type = {
+ .owner = THIS_MODULE,
+ .name = "nfs4",
+ .mount = nfs4_referral_mount,
+ .kill_sb = nfs_kill_super,
+ .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
+};
+
+static const struct super_operations nfs4_sops = {
+ .alloc_inode = nfs_alloc_inode,
+ .destroy_inode = nfs_destroy_inode,
+ .write_inode = nfs4_write_inode,
+ .put_super = nfs_put_super,
+ .statfs = nfs_statfs,
+ .evict_inode = nfs4_evict_inode,
+ .umount_begin = nfs_umount_begin,
+ .show_options = nfs_show_options,
+ .show_devname = nfs_show_devname,
+ .show_path = nfs_show_path,
+ .show_stats = nfs_show_stats,
+ .remount_fs = nfs_remount,
+};
+
+struct nfs_subversion nfs_v4 = {
+ .owner = THIS_MODULE,
+ .nfs_fs = &nfs4_fs_type,
+ .rpc_vers = &nfs_version4,
+ .rpc_ops = &nfs_v4_clientops,
+ .sops = &nfs4_sops,
+ .xattr = nfs4_xattr_handlers,
+};
+
+static int nfs4_write_inode(struct inode *inode, struct writeback_control *wbc)
+{
+ int ret = nfs_write_inode(inode, wbc);
+
+ if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
+ int status;
+ bool sync = true;
+
+ if (wbc->sync_mode == WB_SYNC_NONE)
+ sync = false;
+
+ status = pnfs_layoutcommit_inode(inode, sync);
+ if (status < 0)
+ return status;
+ }
+ return ret;
+}
+
+/*
+ * Clean out any remaining NFSv4 state that might be left over due
+ * to open() calls that passed nfs_atomic_lookup, but failed to call
+ * nfs_open().
+ */
+static void nfs4_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages(&inode->i_data, 0);
+ clear_inode(inode);
+ pnfs_return_layout(inode);
+ pnfs_destroy_layout(NFS_I(inode));
+ /* If we are holding a delegation, return it! */
+ nfs_inode_return_delegation_noreclaim(inode);
+ /* First call standard NFS clear_inode() code */
+ nfs_clear_inode(inode);
+}
+
+/*
+ * Get the superblock for the NFS4 root partition
+ */
+static struct dentry *
+nfs4_remote_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *info)
+{
+ struct nfs_mount_info *mount_info = info;
+ struct nfs_server *server;
+ struct dentry *mntroot = ERR_PTR(-ENOMEM);
+
+ mount_info->set_security = nfs_set_sb_security;
+
+ /* Get a volume representation */
+ server = nfs4_create_server(mount_info, &nfs_v4);
+ if (IS_ERR(server)) {
+ mntroot = ERR_CAST(server);
+ goto out;
+ }
+
+ mntroot = nfs_fs_mount_common(server, flags, dev_name, mount_info, &nfs_v4);
+
+out:
+ return mntroot;
+}
+
+static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
+ int flags, void *data, const char *hostname)
+{
+ struct vfsmount *root_mnt;
+ char *root_devname;
+ size_t len;
+
+ len = strlen(hostname) + 5;
+ root_devname = kmalloc(len, GFP_KERNEL);
+ if (root_devname == NULL)
+ return ERR_PTR(-ENOMEM);
+ /* Does hostname needs to be enclosed in brackets? */
+ if (strchr(hostname, ':'))
+ snprintf(root_devname, len, "[%s]:/", hostname);
+ else
+ snprintf(root_devname, len, "%s:/", hostname);
+ root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
+ kfree(root_devname);
+ return root_mnt;
+}
+
+struct nfs_referral_count {
+ struct list_head list;
+ const struct task_struct *task;
+ unsigned int referral_count;
+};
+
+static LIST_HEAD(nfs_referral_count_list);
+static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
+
+static struct nfs_referral_count *nfs_find_referral_count(void)
+{
+ struct nfs_referral_count *p;
+
+ list_for_each_entry(p, &nfs_referral_count_list, list) {
+ if (p->task == current)
+ return p;
+ }
+ return NULL;
+}
+
+#define NFS_MAX_NESTED_REFERRALS 2
+
+static int nfs_referral_loop_protect(void)
+{
+ struct nfs_referral_count *p, *new;
+ int ret = -ENOMEM;
+
+ new = kmalloc(sizeof(*new), GFP_KERNEL);
+ if (!new)
+ goto out;
+ new->task = current;
+ new->referral_count = 1;
+
+ ret = 0;
+ spin_lock(&nfs_referral_count_list_lock);
+ p = nfs_find_referral_count();
+ if (p != NULL) {
+ if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
+ ret = -ELOOP;
+ else
+ p->referral_count++;
+ } else {
+ list_add(&new->list, &nfs_referral_count_list);
+ new = NULL;
+ }
+ spin_unlock(&nfs_referral_count_list_lock);
+ kfree(new);
+out:
+ return ret;
+}
+
+static void nfs_referral_loop_unprotect(void)
+{
+ struct nfs_referral_count *p;
+
+ spin_lock(&nfs_referral_count_list_lock);
+ p = nfs_find_referral_count();
+ p->referral_count--;
+ if (p->referral_count == 0)
+ list_del(&p->list);
+ else
+ p = NULL;
+ spin_unlock(&nfs_referral_count_list_lock);
+ kfree(p);
+}
+
+static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
+ const char *export_path)
+{
+ struct dentry *dentry;
+ int err;
+
+ if (IS_ERR(root_mnt))
+ return ERR_CAST(root_mnt);
+
+ err = nfs_referral_loop_protect();
+ if (err) {
+ mntput(root_mnt);
+ return ERR_PTR(err);
+ }
+
+ dentry = mount_subtree(root_mnt, export_path);
+ nfs_referral_loop_unprotect();
+
+ return dentry;
+}
+
+struct dentry *nfs4_try_mount(int flags, const char *dev_name,
+ struct nfs_mount_info *mount_info,
+ struct nfs_subversion *nfs_mod)
+{
+ char *export_path;
+ struct vfsmount *root_mnt;
+ struct dentry *res;
+ struct nfs_parsed_mount_data *data = mount_info->parsed;
+
+ dfprintk(MOUNT, "--> nfs4_try_mount()\n");
+
+ export_path = data->nfs_server.export_path;
+ data->nfs_server.export_path = "/";
+ root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
+ data->nfs_server.hostname);
+ data->nfs_server.export_path = export_path;
+
+ res = nfs_follow_remote_path(root_mnt, export_path);
+
+ dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
+ IS_ERR(res) ? PTR_ERR(res) : 0,
+ IS_ERR(res) ? " [error]" : "");
+ return res;
+}
+
+static struct dentry *
+nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
+{
+ struct nfs_mount_info mount_info = {
+ .fill_super = nfs_fill_super,
+ .set_security = nfs_clone_sb_security,
+ .cloned = raw_data,
+ };
+ struct nfs_server *server;
+ struct dentry *mntroot = ERR_PTR(-ENOMEM);
+
+ dprintk("--> nfs4_referral_get_sb()\n");
+
+ mount_info.mntfh = nfs_alloc_fhandle();
+ if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
+ goto out;
+
+ /* create a new volume representation */
+ server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
+ if (IS_ERR(server)) {
+ mntroot = ERR_CAST(server);
+ goto out;
+ }
+
+ mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, &nfs_v4);
+out:
+ nfs_free_fhandle(mount_info.mntfh);
+ return mntroot;
+}
+
+/*
+ * Create an NFS4 server record on referral traversal
+ */
+static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *raw_data)
+{
+ struct nfs_clone_mount *data = raw_data;
+ char *export_path;
+ struct vfsmount *root_mnt;
+ struct dentry *res;
+
+ dprintk("--> nfs4_referral_mount()\n");
+
+ export_path = data->mnt_path;
+ data->mnt_path = "/";
+
+ root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
+ flags, data, data->hostname);
+ data->mnt_path = export_path;
+
+ res = nfs_follow_remote_path(root_mnt, export_path);
+ dprintk("<-- nfs4_referral_mount() = %ld%s\n",
+ IS_ERR(res) ? PTR_ERR(res) : 0,
+ IS_ERR(res) ? " [error]" : "");
+ return res;
+}
+
+
+static int __init init_nfs_v4(void)
+{
+ int err;
+
+ err = nfs_idmap_init();
+ if (err)
+ goto out;
+
+ err = nfs4_register_sysctl();
+ if (err)
+ goto out1;
+
+ err = register_filesystem(&nfs4_fs_type);
+ if (err < 0)
+ goto out2;
+
+ register_nfs_version(&nfs_v4);
+ return 0;
+out2:
+ nfs4_unregister_sysctl();
+out1:
+ nfs_idmap_quit();
+out:
+ return err;
+}
+
+static void __exit exit_nfs_v4(void)
+{
+ unregister_nfs_version(&nfs_v4);
+ unregister_filesystem(&nfs4_fs_type);
+ nfs4_unregister_sysctl();
+ nfs_idmap_quit();
+}
+
+MODULE_LICENSE("GPL");
+
+module_init(init_nfs_v4);
+module_exit(exit_nfs_v4);
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
new file mode 100644
index 000000000000..5729bc8aa75d
--- /dev/null
+++ b/fs/nfs/nfs4sysctl.c
@@ -0,0 +1,68 @@
+/*
+ * linux/fs/nfs/nfs4sysctl.c
+ *
+ * Sysctl interface to NFS v4 parameters
+ *
+ * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com>
+ */
+#include <linux/sysctl.h>
+#include <linux/nfs_idmap.h>
+#include <linux/nfs_fs.h>
+
+#include "callback.h"
+
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+static struct ctl_table_header *nfs4_callback_sysctl_table;
+
+static ctl_table nfs4_cb_sysctls[] = {
+ {
+ .procname = "nfs_callback_tcpport",
+ .data = &nfs_callback_set_tcpport,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (int *)&nfs_set_port_min,
+ .extra2 = (int *)&nfs_set_port_max,
+ },
+ {
+ .procname = "idmap_cache_timeout",
+ .data = &nfs_idmap_cache_timeout,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+
+static ctl_table nfs4_cb_sysctl_dir[] = {
+ {
+ .procname = "nfs",
+ .mode = 0555,
+ .child = nfs4_cb_sysctls,
+ },
+ { }
+};
+
+static ctl_table nfs4_cb_sysctl_root[] = {
+ {
+ .procname = "fs",
+ .mode = 0555,
+ .child = nfs4_cb_sysctl_dir,
+ },
+ { }
+};
+
+int nfs4_register_sysctl(void)
+{
+ nfs4_callback_sysctl_table = register_sysctl_table(nfs4_cb_sysctl_root);
+ if (nfs4_callback_sysctl_table == NULL)
+ return -ENOMEM;
+ return 0;
+}
+
+void nfs4_unregister_sysctl(void)
+{
+ unregister_sysctl_table(nfs4_callback_sysctl_table);
+ nfs4_callback_sysctl_table = NULL;
+}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 18fae29b0301..ca13483edd60 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -852,12 +852,6 @@ const u32 nfs41_maxread_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
XDR_UNIT);
#endif /* CONFIG_NFS_V4_1 */
-static unsigned short send_implementation_id = 1;
-
-module_param(send_implementation_id, ushort, 0644);
-MODULE_PARM_DESC(send_implementation_id,
- "Send implementation ID with NFSv4.1 exchange_id");
-
static const umode_t nfs_type2fmt[] = {
[NF4BAD] = 0,
[NF4REG] = S_IFREG,
@@ -1236,7 +1230,7 @@ static void encode_link(struct xdr_stream *xdr, const struct qstr *name, struct
static inline int nfs4_lock_type(struct file_lock *fl, int block)
{
- if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK)
+ if (fl->fl_type == F_RDLCK)
return block ? NFS4_READW_LT : NFS4_READ_LT;
return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
}
@@ -3078,7 +3072,7 @@ out_overflow:
return -EIO;
}
-static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, __be32 **savep)
+static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigned int *savep)
{
__be32 *p;
@@ -3086,7 +3080,7 @@ static inline int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen,
if (unlikely(!p))
goto out_overflow;
*attrlen = be32_to_cpup(p);
- *savep = xdr->p;
+ *savep = xdr_stream_pos(xdr);
return 0;
out_overflow:
print_overflow_msg(__func__, xdr);
@@ -4068,10 +4062,10 @@ static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, str
return status;
}
-static int verify_attr_len(struct xdr_stream *xdr, __be32 *savep, uint32_t attrlen)
+static int verify_attr_len(struct xdr_stream *xdr, unsigned int savep, uint32_t attrlen)
{
unsigned int attrwords = XDR_QUADLEN(attrlen);
- unsigned int nwords = xdr->p - savep;
+ unsigned int nwords = (xdr_stream_pos(xdr) - savep) >> 2;
if (unlikely(attrwords != nwords)) {
dprintk("%s: server returned incorrect attribute length: "
@@ -4158,13 +4152,18 @@ static int decode_verifier(struct xdr_stream *xdr, void *verifier)
return decode_opaque_fixed(xdr, verifier, NFS4_VERIFIER_SIZE);
}
+static int decode_write_verifier(struct xdr_stream *xdr, struct nfs_write_verifier *verifier)
+{
+ return decode_opaque_fixed(xdr, verifier->data, NFS4_VERIFIER_SIZE);
+}
+
static int decode_commit(struct xdr_stream *xdr, struct nfs_commitres *res)
{
int status;
status = decode_op_hdr(xdr, OP_COMMIT);
if (!status)
- status = decode_verifier(xdr, res->verf->verifier);
+ status = decode_write_verifier(xdr, &res->verf->verifier);
return status;
}
@@ -4193,7 +4192,7 @@ out_overflow:
static int decode_server_caps(struct xdr_stream *xdr, struct nfs4_server_caps_res *res)
{
- __be32 *savep;
+ unsigned int savep;
uint32_t attrlen, bitmap[3] = {0};
int status;
@@ -4222,7 +4221,7 @@ xdr_error:
static int decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
{
- __be32 *savep;
+ unsigned int savep;
uint32_t attrlen, bitmap[3] = {0};
int status;
@@ -4254,7 +4253,7 @@ xdr_error:
static int decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
{
- __be32 *savep;
+ unsigned int savep;
uint32_t attrlen, bitmap[3] = {0};
int status;
@@ -4299,7 +4298,8 @@ out_overflow:
static int decode_first_threshold_item4(struct xdr_stream *xdr,
struct nfs4_threshold *res)
{
- __be32 *p, *savep;
+ __be32 *p;
+ unsigned int savep;
uint32_t bitmap[3] = {0,}, attrlen;
int status;
@@ -4503,7 +4503,7 @@ static int decode_getfattr_generic(struct xdr_stream *xdr, struct nfs_fattr *fat
struct nfs_fh *fh, struct nfs4_fs_locations *fs_loc,
const struct nfs_server *server)
{
- __be32 *savep;
+ unsigned int savep;
uint32_t attrlen,
bitmap[3] = {0};
int status;
@@ -4615,7 +4615,7 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
{
- __be32 *savep;
+ unsigned int savep;
uint32_t attrlen, bitmap[3];
int status;
@@ -4920,9 +4920,8 @@ static int decode_putrootfh(struct xdr_stream *xdr)
static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
{
- struct kvec *iov = req->rq_rcv_buf.head;
__be32 *p;
- uint32_t count, eof, recvd, hdrlen;
+ uint32_t count, eof, recvd;
int status;
status = decode_op_hdr(xdr, OP_READ);
@@ -4933,15 +4932,13 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_
goto out_overflow;
eof = be32_to_cpup(p++);
count = be32_to_cpup(p);
- hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
- recvd = req->rq_rcv_buf.len - hdrlen;
+ recvd = xdr_read_pages(xdr, count);
if (count > recvd) {
dprintk("NFS: server cheating in read reply: "
"count %u > recvd %u\n", count, recvd);
count = recvd;
eof = 0;
}
- xdr_read_pages(xdr, count);
res->eof = eof;
res->count = count;
return 0;
@@ -4952,10 +4949,6 @@ out_overflow:
static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
{
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- size_t hdrlen;
- u32 recvd, pglen = rcvbuf->page_len;
int status;
__be32 verf[2];
@@ -4967,22 +4960,12 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
memcpy(verf, readdir->verifier.data, sizeof(verf));
dprintk("%s: verifier = %08x:%08x\n",
__func__, verf[0], verf[1]);
-
- hdrlen = (char *) xdr->p - (char *) iov->iov_base;
- recvd = rcvbuf->len - hdrlen;
- if (pglen > recvd)
- pglen = recvd;
- xdr_read_pages(xdr, pglen);
-
-
- return pglen;
+ return xdr_read_pages(xdr, xdr->buf->page_len);
}
static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
{
struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- size_t hdrlen;
u32 len, recvd;
__be32 *p;
int status;
@@ -5000,14 +4983,12 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
dprintk("nfs: server returned giant symlink!\n");
return -ENAMETOOLONG;
}
- hdrlen = (char *) xdr->p - (char *) iov->iov_base;
- recvd = req->rq_rcv_buf.len - hdrlen;
+ recvd = xdr_read_pages(xdr, len);
if (recvd < len) {
dprintk("NFS: server cheating in readlink reply: "
"count %u > recvd %u\n", len, recvd);
return -EIO;
}
- xdr_read_pages(xdr, len);
/*
* The XDR encode routine has set things up so that
* the link text will be copied directly into the
@@ -5063,10 +5044,10 @@ decode_restorefh(struct xdr_stream *xdr)
static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
struct nfs_getaclres *res)
{
- __be32 *savep, *bm_p;
+ unsigned int savep;
+ __be32 *bm_p;
uint32_t attrlen,
bitmap[3] = {0};
- struct kvec *iov = req->rq_rcv_buf.head;
int status;
size_t page_len = xdr->buf->page_len;
@@ -5089,7 +5070,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
if (unlikely(bitmap[0] & (FATTR4_WORD0_ACL - 1U)))
return -EIO;
if (likely(bitmap[0] & FATTR4_WORD0_ACL)) {
- size_t hdrlen;
/* The bitmap (xdr len + bitmaps) and the attr xdr len words
* are stored with the acl data to handle the problem of
@@ -5098,7 +5078,6 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
/* We ignore &savep and don't do consistency checks on
* the attr length. Let userspace figure it out.... */
- hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base;
attrlen += res->acl_data_offset;
if (attrlen > page_len) {
if (res->acl_flags & NFS4_ACL_LEN_REQUEST) {
@@ -5212,13 +5191,12 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
if (status)
return status;
- p = xdr_inline_decode(xdr, 16);
+ p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
goto out_overflow;
res->count = be32_to_cpup(p++);
res->verf->committed = be32_to_cpup(p++);
- memcpy(res->verf->verifier, p, NFS4_VERIFIER_SIZE);
- return 0;
+ return decode_write_verifier(xdr, &res->verf->verifier);
out_overflow:
print_overflow_msg(__func__, xdr);
return -EIO;
@@ -5599,7 +5577,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr,
{
__be32 *p;
int status, i;
- struct nfs_writeverf verftemp;
+ nfs4_verifier verftemp;
status = decode_op_hdr(xdr, OP_GETDEVICELIST);
if (status)
@@ -5613,7 +5591,7 @@ static int decode_getdevicelist(struct xdr_stream *xdr,
p += 2;
/* Read verifier */
- p = xdr_decode_opaque_fixed(p, verftemp.verifier, NFS4_VERIFIER_SIZE);
+ p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE);
res->num_devs = be32_to_cpup(p);
@@ -5707,9 +5685,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
__be32 *p;
int status;
u32 layout_count;
- struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
- struct kvec *iov = rcvbuf->head;
- u32 hdrlen, recvd;
+ u32 recvd;
status = decode_op_hdr(xdr, OP_LAYOUTGET);
if (status)
@@ -5746,8 +5722,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
res->type,
res->layoutp->len);
- hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
- recvd = req->rq_rcv_buf.len - hdrlen;
+ recvd = xdr_read_pages(xdr, res->layoutp->len);
if (res->layoutp->len > recvd) {
dprintk("NFS: server cheating in layoutget reply: "
"layout len %u > recvd %u\n",
@@ -5755,8 +5730,6 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
return -EINVAL;
}
- xdr_read_pages(xdr, res->layoutp->len);
-
if (layout_count > 1) {
/* We only handle a length one array at the moment. Any
* further entries are just ignored. Note that this means
@@ -7103,6 +7076,7 @@ out:
int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
int plus)
{
+ unsigned int savep;
uint32_t bitmap[3] = {0};
uint32_t len;
__be32 *p = xdr_inline_decode(xdr, 4);
@@ -7141,7 +7115,7 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
if (decode_attr_bitmap(xdr, bitmap) < 0)
goto out_overflow;
- if (decode_attr_length(xdr, &len, &p) < 0)
+ if (decode_attr_length(xdr, &len, &savep) < 0)
goto out_overflow;
if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index aed913c833f4..1a6732ed04a4 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -54,6 +54,7 @@ void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
if (hdr->completion_ops->init_hdr)
hdr->completion_ops->init_hdr(hdr);
}
+EXPORT_SYMBOL_GPL(nfs_pgheader_init);
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
{
@@ -70,7 +71,7 @@ void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
static inline struct nfs_page *
nfs_page_alloc(void)
{
- struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_KERNEL);
+ struct nfs_page *p = kmem_cache_zalloc(nfs_page_cachep, GFP_NOIO);
if (p)
INIT_LIST_HEAD(&p->wb_list);
return p;
@@ -117,7 +118,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
- req->wb_index = page->index;
+ req->wb_index = page_file_index(page);
page_cache_get(page);
req->wb_offset = offset;
req->wb_pgbase = offset;
@@ -268,6 +269,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_lseg = NULL;
desc->pg_dreq = NULL;
}
+EXPORT_SYMBOL_GPL(nfs_pageio_init);
/**
* nfs_can_coalesce_requests - test two requests for compatibility
@@ -409,6 +411,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
} while (ret);
return ret;
}
+EXPORT_SYMBOL_GPL(nfs_pageio_add_request);
/**
* nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
@@ -424,6 +427,7 @@ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
break;
}
}
+EXPORT_SYMBOL_GPL(nfs_pageio_complete);
/**
* nfs_pageio_cond_complete - Conditional I/O completion
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bbc49caa7a82..76875bfcf19c 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -651,7 +651,14 @@ out_err_free:
return NULL;
}
-/* Initiates a LAYOUTRETURN(FILE) */
+/*
+ * Initiates a LAYOUTRETURN(FILE), and removes the pnfs_layout_hdr
+ * when the layout segment list is empty.
+ *
+ * Note that a pnfs_layout_hdr can exist with an empty layout segment
+ * list when LAYOUTGET has failed, or when LAYOUTGET succeeded, but the
+ * deviceid is marked invalid.
+ */
int
_pnfs_return_layout(struct inode *ino)
{
@@ -660,22 +667,31 @@ _pnfs_return_layout(struct inode *ino)
LIST_HEAD(tmp_list);
struct nfs4_layoutreturn *lrp;
nfs4_stateid stateid;
- int status = 0;
+ int status = 0, empty;
- dprintk("--> %s\n", __func__);
+ dprintk("NFS: %s for inode %lu\n", __func__, ino->i_ino);
spin_lock(&ino->i_lock);
lo = nfsi->layout;
- if (!lo) {
+ if (!lo || pnfs_test_layout_returned(lo)) {
spin_unlock(&ino->i_lock);
- dprintk("%s: no layout to return\n", __func__);
- return status;
+ dprintk("NFS: %s no layout to return\n", __func__);
+ goto out;
}
stateid = nfsi->layout->plh_stateid;
/* Reference matched in nfs4_layoutreturn_release */
get_layout_hdr(lo);
+ empty = list_empty(&lo->plh_segs);
mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
+ /* Don't send a LAYOUTRETURN if list was initially empty */
+ if (empty) {
+ spin_unlock(&ino->i_lock);
+ put_layout_hdr(lo);
+ dprintk("NFS: %s no layout segments to return\n", __func__);
+ goto out;
+ }
lo->plh_block_lgets++;
+ pnfs_mark_layout_returned(lo);
spin_unlock(&ino->i_lock);
pnfs_free_lseg_list(&tmp_list);
@@ -686,6 +702,7 @@ _pnfs_return_layout(struct inode *ino)
status = -ENOMEM;
set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
+ pnfs_clear_layout_returned(lo);
put_layout_hdr(lo);
goto out;
}
@@ -1075,6 +1092,10 @@ pnfs_update_layout(struct inode *ino,
get_layout_hdr(lo);
if (list_empty(&lo->plh_segs))
first = true;
+
+ /* Enable LAYOUTRETURNs */
+ pnfs_clear_layout_returned(lo);
+
spin_unlock(&ino->i_lock);
if (first) {
/* The lo must be on the clp list if there is any
@@ -1209,7 +1230,7 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
-bool
+void
pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
const struct nfs_pgio_completion_ops *compl_ops)
{
@@ -1217,13 +1238,12 @@ pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
if (ld == NULL)
- return false;
- nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops,
- server->rsize, 0);
- return true;
+ nfs_pageio_init_read(pgio, inode, compl_ops);
+ else
+ nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
}
-bool
+void
pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
int ioflags,
const struct nfs_pgio_completion_ops *compl_ops)
@@ -1232,10 +1252,9 @@ pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
if (ld == NULL)
- return false;
- nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops,
- server->wsize, ioflags);
- return true;
+ nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
+ else
+ nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
}
bool
@@ -1272,7 +1291,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_write_mds(&pgio, inode, FLUSH_STABLE, compl_ops);
+ nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops);
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1388,6 +1407,7 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
put_lseg(hdr->lseg);
nfs_writehdr_free(hdr);
}
+EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
@@ -1427,7 +1447,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_read_mds(&pgio, inode, compl_ops);
+ nfs_pageio_init_read(&pgio, inode, compl_ops);
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1542,6 +1562,7 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
put_lseg(hdr->lseg);
nfs_readhdr_free(hdr);
}
+EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 64f90d845f6a..2c6c80503ba4 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -64,6 +64,7 @@ enum {
NFS_LAYOUT_ROC, /* some lseg had roc bit set */
NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
NFS_LAYOUT_INVALID, /* layout is being destroyed */
+ NFS_LAYOUT_RETURNED, /* layout has already been returned */
};
enum layoutdriver_policy_flags {
@@ -178,9 +179,9 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
void get_layout_hdr(struct pnfs_layout_hdr *lo);
void put_lseg(struct pnfs_layout_segment *lseg);
-bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
+void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
const struct nfs_pgio_completion_ops *);
-bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
+void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
int, const struct nfs_pgio_completion_ops *);
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
@@ -255,6 +256,24 @@ struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *
bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
void nfs4_deviceid_purge_client(const struct nfs_client *);
+static inline void
+pnfs_mark_layout_returned(struct pnfs_layout_hdr *lo)
+{
+ set_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
+}
+
+static inline void
+pnfs_clear_layout_returned(struct pnfs_layout_hdr *lo)
+{
+ clear_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
+}
+
+static inline bool
+pnfs_test_layout_returned(struct pnfs_layout_hdr *lo)
+{
+ return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
+}
+
static inline int lo_fail_bit(u32 iomode)
{
return iomode == IOMODE_RW ?
@@ -438,16 +457,16 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
-static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
+static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
const struct nfs_pgio_completion_ops *compl_ops)
{
- return false;
+ nfs_pageio_init_read(pgio, inode, compl_ops);
}
-static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
+static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
const struct nfs_pgio_completion_ops *compl_ops)
{
- return false;
+ nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
}
static inline int
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 4433806e116f..50a88c3546ed 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -734,6 +734,38 @@ out_einval:
return -EINVAL;
}
+static int nfs_have_delegation(struct inode *inode, fmode_t flags)
+{
+ return 0;
+}
+
+static int nfs_return_delegation(struct inode *inode)
+{
+ nfs_wb_all(inode);
+ return 0;
+}
+
+static const struct inode_operations nfs_dir_inode_operations = {
+ .create = nfs_create,
+ .lookup = nfs_lookup,
+ .link = nfs_link,
+ .unlink = nfs_unlink,
+ .symlink = nfs_symlink,
+ .mkdir = nfs_mkdir,
+ .rmdir = nfs_rmdir,
+ .mknod = nfs_mknod,
+ .rename = nfs_rename,
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+};
+
+static const struct inode_operations nfs_file_inode_operations = {
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
+};
+
const struct nfs_rpc_ops nfs_v2_clientops = {
.version = 2, /* protocol version */
.dentry_ops = &nfs_dentry_operations,
@@ -742,6 +774,7 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.file_ops = &nfs_file_operations,
.getroot = nfs_proc_get_root,
.submount = nfs_submount,
+ .try_mount = nfs_try_mount,
.getattr = nfs_proc_getattr,
.setattr = nfs_proc_setattr,
.lookup = nfs_proc_lookup,
@@ -767,9 +800,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs2_decode_dirent,
.read_setup = nfs_proc_read_setup,
+ .read_pageio_init = nfs_pageio_init_read,
.read_rpc_prepare = nfs_proc_read_rpc_prepare,
.read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
+ .write_pageio_init = nfs_pageio_init_write,
.write_rpc_prepare = nfs_proc_write_rpc_prepare,
.write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
@@ -777,5 +812,11 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.lock = nfs_proc_lock,
.lock_check_bounds = nfs_lock_check_bounds,
.close_context = nfs_close_context,
+ .have_delegation = nfs_have_delegation,
+ .return_delegation = nfs_return_delegation,
+ .alloc_client = nfs_alloc_client,
.init_client = nfs_init_client,
+ .free_client = nfs_free_client,
+ .create_server = nfs_create_server,
+ .clone_server = nfs_clone_server,
};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 86ced7836214..b6bdb18e892c 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -20,8 +20,6 @@
#include <linux/nfs_page.h>
#include <linux/module.h>
-#include "pnfs.h"
-
#include "nfs4_fs.h"
#include "internal.h"
#include "iostat.h"
@@ -50,6 +48,7 @@ struct nfs_read_header *nfs_readhdr_alloc(void)
}
return rhdr;
}
+EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
unsigned int pagecount)
@@ -82,6 +81,7 @@ void nfs_readhdr_free(struct nfs_pgio_header *hdr)
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
+EXPORT_SYMBOL_GPL(nfs_readhdr_free);
void nfs_readdata_release(struct nfs_read_data *rdata)
{
@@ -98,6 +98,7 @@ void nfs_readdata_release(struct nfs_read_data *rdata)
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
}
+EXPORT_SYMBOL_GPL(nfs_readdata_release);
static
int nfs_return_empty_page(struct page *page)
@@ -108,13 +109,14 @@ int nfs_return_empty_page(struct page *page)
return 0;
}
-void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
+void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
struct inode *inode,
const struct nfs_pgio_completion_ops *compl_ops)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
NFS_SERVER(inode)->rsize, 0);
}
+EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
@@ -123,14 +125,6 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
-void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- if (!pnfs_pageio_init_read(pgio, inode, compl_ops))
- nfs_pageio_init_read_mds(pgio, inode, compl_ops);
-}
-
int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
struct page *page)
{
@@ -149,7 +143,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
- nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
+ NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -407,6 +401,7 @@ int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
return nfs_pagein_multi(desc, hdr);
return nfs_pagein_one(desc, hdr);
}
+EXPORT_SYMBOL_GPL(nfs_generic_pagein);
static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
@@ -532,11 +527,11 @@ static const struct rpc_call_ops nfs_read_common_ops = {
int nfs_readpage(struct file *file, struct page *page)
{
struct nfs_open_context *ctx;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
int error;
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
- page, PAGE_CACHE_SIZE, page->index);
+ page, PAGE_CACHE_SIZE, page_file_index(page));
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
nfs_add_stats(inode, NFSIOS_READPAGES, 1);
@@ -590,7 +585,7 @@ static int
readpage_async_filler(void *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *new;
unsigned int len;
int error;
@@ -652,7 +647,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
- nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops);
+ NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 8b2a2977b720..ac6a3c55dce4 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -64,11 +64,12 @@
#include "internal.h"
#include "fscache.h"
#include "pnfs.h"
+#include "nfs.h"
#define NFSDBG_FACILITY NFSDBG_VFS
#define NFS_TEXT_DATA 1
-#ifdef CONFIG_NFS_V3
+#if IS_ENABLED(CONFIG_NFS_V3)
#define NFS_DEFAULT_VERSION 3
#else
#define NFS_DEFAULT_VERSION 2
@@ -278,37 +279,17 @@ static match_table_t nfs_vers_tokens = {
{ Opt_vers_err, NULL }
};
-struct nfs_mount_info {
- void (*fill_super)(struct super_block *, struct nfs_mount_info *);
- int (*set_security)(struct super_block *, struct dentry *, struct nfs_mount_info *);
- struct nfs_parsed_mount_data *parsed;
- struct nfs_clone_mount *cloned;
- struct nfs_fh *mntfh;
-};
-
-static void nfs_umount_begin(struct super_block *);
-static int nfs_statfs(struct dentry *, struct kstatfs *);
-static int nfs_show_options(struct seq_file *, struct dentry *);
-static int nfs_show_devname(struct seq_file *, struct dentry *);
-static int nfs_show_path(struct seq_file *, struct dentry *);
-static int nfs_show_stats(struct seq_file *, struct dentry *);
-static struct dentry *nfs_fs_mount_common(struct file_system_type *,
- struct nfs_server *, int, const char *, struct nfs_mount_info *);
-static struct dentry *nfs_fs_mount(struct file_system_type *,
- int, const char *, void *);
static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data);
-static void nfs_put_super(struct super_block *);
-static void nfs_kill_super(struct super_block *);
-static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
-static struct file_system_type nfs_fs_type = {
+struct file_system_type nfs_fs_type = {
.owner = THIS_MODULE,
.name = "nfs",
.mount = nfs_fs_mount,
.kill_sb = nfs_kill_super,
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
+EXPORT_SYMBOL_GPL(nfs_fs_type);
struct file_system_type nfs_xdev_fs_type = {
.owner = THIS_MODULE,
@@ -318,7 +299,7 @@ struct file_system_type nfs_xdev_fs_type = {
.fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
};
-static const struct super_operations nfs_sops = {
+const struct super_operations nfs_sops = {
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs_write_inode,
@@ -332,77 +313,12 @@ static const struct super_operations nfs_sops = {
.show_stats = nfs_show_stats,
.remount_fs = nfs_remount,
};
+EXPORT_SYMBOL_GPL(nfs_sops);
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *);
static int nfs4_validate_mount_data(void *options,
struct nfs_parsed_mount_data *args, const char *dev_name);
-static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
- struct nfs_mount_info *mount_info);
-static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data);
-static void nfs4_kill_super(struct super_block *sb);
-
-static struct file_system_type nfs4_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs_fs_mount,
- .kill_sb = nfs4_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
-};
-
-static struct file_system_type nfs4_remote_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_remote_mount,
- .kill_sb = nfs4_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
-};
-
-struct file_system_type nfs4_xdev_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_xdev_mount,
- .kill_sb = nfs4_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
-};
-
-static struct file_system_type nfs4_remote_referral_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_remote_referral_mount,
- .kill_sb = nfs4_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
-};
-
-struct file_system_type nfs4_referral_fs_type = {
- .owner = THIS_MODULE,
- .name = "nfs4",
- .mount = nfs4_referral_mount,
- .kill_sb = nfs4_kill_super,
- .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
-};
-
-static const struct super_operations nfs4_sops = {
- .alloc_inode = nfs_alloc_inode,
- .destroy_inode = nfs_destroy_inode,
- .write_inode = nfs_write_inode,
- .put_super = nfs_put_super,
- .statfs = nfs_statfs,
- .evict_inode = nfs4_evict_inode,
- .umount_begin = nfs_umount_begin,
- .show_options = nfs_show_options,
- .show_devname = nfs_show_devname,
- .show_path = nfs_show_path,
- .show_stats = nfs_show_stats,
- .remount_fs = nfs_remount,
-};
#endif
static struct shrinker acl_shrinker = {
@@ -424,18 +340,9 @@ int __init register_nfs_fs(void)
ret = nfs_register_sysctl();
if (ret < 0)
goto error_1;
-#ifdef CONFIG_NFS_V4
- ret = register_filesystem(&nfs4_fs_type);
- if (ret < 0)
- goto error_2;
-#endif
register_shrinker(&acl_shrinker);
return 0;
-#ifdef CONFIG_NFS_V4
-error_2:
- nfs_unregister_sysctl();
-#endif
error_1:
unregister_filesystem(&nfs_fs_type);
error_0:
@@ -448,9 +355,6 @@ error_0:
void __exit unregister_nfs_fs(void)
{
unregister_shrinker(&acl_shrinker);
-#ifdef CONFIG_NFS_V4
- unregister_filesystem(&nfs4_fs_type);
-#endif
nfs_unregister_sysctl();
unregister_filesystem(&nfs_fs_type);
}
@@ -462,6 +366,7 @@ void nfs_sb_active(struct super_block *sb)
if (atomic_inc_return(&server->active) == 1)
atomic_inc(&sb->s_active);
}
+EXPORT_SYMBOL_GPL(nfs_sb_active);
void nfs_sb_deactive(struct super_block *sb)
{
@@ -470,11 +375,12 @@ void nfs_sb_deactive(struct super_block *sb)
if (atomic_dec_and_test(&server->active))
deactivate_super(sb);
}
+EXPORT_SYMBOL_GPL(nfs_sb_deactive);
/*
* Deliver file system statistics to userspace
*/
-static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct nfs_server *server = NFS_SB(dentry->d_sb);
unsigned char blockbits;
@@ -535,6 +441,7 @@ static int nfs_statfs(struct dentry *dentry, struct kstatfs *buf)
dprintk("%s: statfs error = %d\n", __func__, -error);
return error;
}
+EXPORT_SYMBOL_GPL(nfs_statfs);
/*
* Map the security flavour number to a name
@@ -640,7 +547,7 @@ static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
nfs_show_mountd_netid(m, nfss, showdefaults);
}
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
static void nfs_show_nfsv4_options(struct seq_file *m, struct nfs_server *nfss,
int showdefaults)
{
@@ -757,7 +664,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
/*
* Describe the mount options on this VFS mountpoint
*/
-static int nfs_show_options(struct seq_file *m, struct dentry *root)
+int nfs_show_options(struct seq_file *m, struct dentry *root)
{
struct nfs_server *nfss = NFS_SB(root->d_sb);
@@ -771,8 +678,9 @@ static int nfs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_show_options);
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
#ifdef CONFIG_NFS_V4_1
static void show_sessions(struct seq_file *m, struct nfs_server *server)
{
@@ -805,7 +713,7 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
}
}
#else
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
static void show_pnfs(struct seq_file *m, struct nfs_server *server)
{
}
@@ -815,7 +723,7 @@ static void show_implementation_id(struct seq_file *m, struct nfs_server *nfss)
}
#endif
-static int nfs_show_devname(struct seq_file *m, struct dentry *root)
+int nfs_show_devname(struct seq_file *m, struct dentry *root)
{
char *page = (char *) __get_free_page(GFP_KERNEL);
char *devname, *dummy;
@@ -830,17 +738,19 @@ static int nfs_show_devname(struct seq_file *m, struct dentry *root)
free_page((unsigned long)page);
return err;
}
+EXPORT_SYMBOL_GPL(nfs_show_devname);
-static int nfs_show_path(struct seq_file *m, struct dentry *dentry)
+int nfs_show_path(struct seq_file *m, struct dentry *dentry)
{
seq_puts(m, "/");
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_show_path);
/*
* Present statistical information for this VFS mountpoint
*/
-static int nfs_show_stats(struct seq_file *m, struct dentry *root)
+int nfs_show_stats(struct seq_file *m, struct dentry *root)
{
int i, cpu;
struct nfs_server *nfss = NFS_SB(root->d_sb);
@@ -870,7 +780,7 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
seq_printf(m, ",bsize=%u", nfss->bsize);
seq_printf(m, ",namlen=%u", nfss->namelen);
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
if (nfss->nfs_client->rpc_ops->version == 4) {
seq_printf(m, "\n\tnfsv4:\t");
seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]);
@@ -928,12 +838,13 @@ static int nfs_show_stats(struct seq_file *m, struct dentry *root)
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_show_stats);
/*
* Begin unmount by attempting to remove all automounted mountpoints we added
* in response to xdev traversals and referrals
*/
-static void nfs_umount_begin(struct super_block *sb)
+void nfs_umount_begin(struct super_block *sb)
{
struct nfs_server *server;
struct rpc_clnt *rpc;
@@ -947,6 +858,7 @@ static void nfs_umount_begin(struct super_block *sb)
if (!IS_ERR(rpc))
rpc_killall_tasks(rpc);
}
+EXPORT_SYMBOL_GPL(nfs_umount_begin);
static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void)
{
@@ -1748,8 +1660,9 @@ static int nfs_request_mount(struct nfs_parsed_mount_data *args,
return nfs_walk_authlist(args, &request);
}
-static struct dentry *nfs_try_mount(int flags, const char *dev_name,
- struct nfs_mount_info *mount_info)
+struct dentry *nfs_try_mount(int flags, const char *dev_name,
+ struct nfs_mount_info *mount_info,
+ struct nfs_subversion *nfs_mod)
{
int status;
struct nfs_server *server;
@@ -1761,12 +1674,13 @@ static struct dentry *nfs_try_mount(int flags, const char *dev_name,
}
/* Get a volume representation */
- server = nfs_create_server(mount_info->parsed, mount_info->mntfh);
+ server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod);
if (IS_ERR(server))
return ERR_CAST(server);
- return nfs_fs_mount_common(&nfs_fs_type, server, flags, dev_name, mount_info);
+ return nfs_fs_mount_common(server, flags, dev_name, mount_info, nfs_mod);
}
+EXPORT_SYMBOL_GPL(nfs_try_mount);
/*
* Split "dev_name" into "hostname:export_path".
@@ -1970,7 +1884,7 @@ static int nfs23_validate_mount_data(void *options,
return NFS_TEXT_DATA;
}
-#ifndef CONFIG_NFS_V3
+#if !IS_ENABLED(CONFIG_NFS_V3)
if (args->version == 3)
goto out_v3_not_compiled;
#endif /* !CONFIG_NFS_V3 */
@@ -1990,7 +1904,7 @@ out_no_sec:
dfprintk(MOUNT, "NFS: nfs_mount_data version supports only AUTH_SYS\n");
return -EINVAL;
-#ifndef CONFIG_NFS_V3
+#if !IS_ENABLED(CONFIG_NFS_V3)
out_v3_not_compiled:
dfprintk(MOUNT, "NFS: NFSv3 is not compiled into kernel\n");
return -EPROTONOSUPPORT;
@@ -2009,7 +1923,7 @@ out_invalid_fh:
return -EINVAL;
}
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
static int nfs_validate_mount_data(struct file_system_type *fs_type,
void *options,
struct nfs_parsed_mount_data *args,
@@ -2047,7 +1961,7 @@ static int nfs_validate_text_mount_data(void *options,
goto out_no_address;
if (args->version == 4) {
-#ifdef CONFIG_NFS_V4
+#if IS_ENABLED(CONFIG_NFS_V4)
port = NFS_PORT;
max_namelen = NFS4_MAXNAMLEN;
max_pathlen = NFS4_MAXPATHLEN;
@@ -2070,7 +1984,7 @@ static int nfs_validate_text_mount_data(void *options,
&args->nfs_server.export_path,
max_pathlen);
-#ifndef CONFIG_NFS_V4
+#if !IS_ENABLED(CONFIG_NFS_V4)
out_v4_not_compiled:
dfprintk(MOUNT, "NFS: NFSv4 is not compiled into kernel\n");
return -EPROTONOSUPPORT;
@@ -2108,7 +2022,7 @@ nfs_compare_remount_data(struct nfs_server *nfss,
return 0;
}
-static int
+int
nfs_remount(struct super_block *sb, int *flags, char *raw_data)
{
int error;
@@ -2169,11 +2083,12 @@ out:
kfree(data);
return error;
}
+EXPORT_SYMBOL_GPL(nfs_remount);
/*
* Initialise the common bits of the superblock
*/
-static inline void nfs_initialise_sb(struct super_block *sb)
+inline void nfs_initialise_sb(struct super_block *sb)
{
struct nfs_server *server = NFS_SB(sb);
@@ -2195,18 +2110,19 @@ static inline void nfs_initialise_sb(struct super_block *sb)
/*
* Finish setting up an NFS2/3 superblock
*/
-static void nfs_fill_super(struct super_block *sb,
- struct nfs_mount_info *mount_info)
+void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info)
{
struct nfs_parsed_mount_data *data = mount_info->parsed;
struct nfs_server *server = NFS_SB(sb);
sb->s_blocksize_bits = 0;
sb->s_blocksize = 0;
- if (data->bsize)
+ sb->s_xattr = server->nfs_client->cl_nfs_mod->xattr;
+ sb->s_op = server->nfs_client->cl_nfs_mod->sops;
+ if (data && data->bsize)
sb->s_blocksize = nfs_block_size(data->bsize, &sb->s_blocksize_bits);
- if (server->nfs_client->rpc_ops->version == 3) {
+ if (server->nfs_client->rpc_ops->version != 2) {
/* The VFS shouldn't apply the umask to mode bits. We will do
* so ourselves when necessary.
*/
@@ -2214,15 +2130,14 @@ static void nfs_fill_super(struct super_block *sb,
sb->s_time_gran = 1;
}
- sb->s_op = &nfs_sops;
nfs_initialise_sb(sb);
}
+EXPORT_SYMBOL_GPL(nfs_fill_super);
/*
- * Finish setting up a cloned NFS2/3 superblock
+ * Finish setting up a cloned NFS2/3/4 superblock
*/
-static void nfs_clone_super(struct super_block *sb,
- struct nfs_mount_info *mount_info)
+void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
{
const struct super_block *old_sb = mount_info->cloned->sb;
struct nfs_server *server = NFS_SB(sb);
@@ -2230,16 +2145,17 @@ static void nfs_clone_super(struct super_block *sb,
sb->s_blocksize_bits = old_sb->s_blocksize_bits;
sb->s_blocksize = old_sb->s_blocksize;
sb->s_maxbytes = old_sb->s_maxbytes;
+ sb->s_xattr = old_sb->s_xattr;
+ sb->s_op = old_sb->s_op;
+ sb->s_time_gran = 1;
- if (server->nfs_client->rpc_ops->version == 3) {
+ if (server->nfs_client->rpc_ops->version != 2) {
/* The VFS shouldn't apply the umask to mode bits. We will do
* so ourselves when necessary.
*/
sb->s_flags |= MS_POSIXACL;
- sb->s_time_gran = 1;
}
- sb->s_op = old_sb->s_op;
nfs_initialise_sb(sb);
}
@@ -2381,14 +2297,15 @@ static int nfs_bdi_register(struct nfs_server *server)
return bdi_register_dev(&server->backing_dev_info, server->s_dev);
}
-static int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
- struct nfs_mount_info *mount_info)
+int nfs_set_sb_security(struct super_block *s, struct dentry *mntroot,
+ struct nfs_mount_info *mount_info)
{
return security_sb_set_mnt_opts(s, &mount_info->parsed->lsm_opts);
}
+EXPORT_SYMBOL_GPL(nfs_set_sb_security);
-static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
- struct nfs_mount_info *mount_info)
+int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
+ struct nfs_mount_info *mount_info)
{
/* clone any lsm security options from the parent to the new sb */
security_sb_clone_mnt_opts(mount_info->cloned->sb, s);
@@ -2396,11 +2313,12 @@ static int nfs_clone_sb_security(struct super_block *s, struct dentry *mntroot,
return -ESTALE;
return 0;
}
+EXPORT_SYMBOL_GPL(nfs_clone_sb_security);
-static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
- struct nfs_server *server,
- int flags, const char *dev_name,
- struct nfs_mount_info *mount_info)
+struct dentry *nfs_fs_mount_common(struct nfs_server *server,
+ int flags, const char *dev_name,
+ struct nfs_mount_info *mount_info,
+ struct nfs_subversion *nfs_mod)
{
struct super_block *s;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
@@ -2419,7 +2337,7 @@ static struct dentry *nfs_fs_mount_common(struct file_system_type *fs_type,
sb_mntdata.mntflags |= MS_SYNCHRONOUS;
/* Get a superblock - note that we may end up sharing one that already exists */
- s = sget(fs_type, compare_super, nfs_set_super, flags, &sb_mntdata);
+ s = sget(nfs_mod->nfs_fs, compare_super, nfs_set_super, flags, &sb_mntdata);
if (IS_ERR(s)) {
mntroot = ERR_CAST(s);
goto out_err_nosb;
@@ -2469,8 +2387,9 @@ error_splat_bdi:
deactivate_locked_super(s);
goto out;
}
+EXPORT_SYMBOL_GPL(nfs_fs_mount_common);
-static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
+struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data)
{
struct nfs_mount_info mount_info = {
@@ -2478,6 +2397,7 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
.set_security = nfs_set_sb_security,
};
struct dentry *mntroot = ERR_PTR(-ENOMEM);
+ struct nfs_subversion *nfs_mod;
int error;
mount_info.parsed = nfs_alloc_parsed_mount_data();
@@ -2494,34 +2414,38 @@ static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
goto out;
}
-#ifdef CONFIG_NFS_V4
- if (mount_info.parsed->version == 4)
- mntroot = nfs4_try_mount(flags, dev_name, &mount_info);
- else
-#endif /* CONFIG_NFS_V4 */
- mntroot = nfs_try_mount(flags, dev_name, &mount_info);
+ nfs_mod = get_nfs_version(mount_info.parsed->version);
+ if (IS_ERR(nfs_mod)) {
+ mntroot = ERR_CAST(nfs_mod);
+ goto out;
+ }
+
+ mntroot = nfs_mod->rpc_ops->try_mount(flags, dev_name, &mount_info, nfs_mod);
+ put_nfs_version(nfs_mod);
out:
nfs_free_parsed_mount_data(mount_info.parsed);
nfs_free_fhandle(mount_info.mntfh);
return mntroot;
}
+EXPORT_SYMBOL_GPL(nfs_fs_mount);
/*
* Ensure that we unregister the bdi before kill_anon_super
* releases the device name
*/
-static void nfs_put_super(struct super_block *s)
+void nfs_put_super(struct super_block *s)
{
struct nfs_server *server = NFS_SB(s);
bdi_unregister(&server->backing_dev_info);
}
+EXPORT_SYMBOL_GPL(nfs_put_super);
/*
* Destroy an NFS2/3 superblock
*/
-static void nfs_kill_super(struct super_block *s)
+void nfs_kill_super(struct super_block *s)
{
struct nfs_server *server = NFS_SB(s);
@@ -2529,31 +2453,38 @@ static void nfs_kill_super(struct super_block *s)
nfs_fscache_release_super_cookie(s);
nfs_free_server(server);
}
+EXPORT_SYMBOL_GPL(nfs_kill_super);
/*
* Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
*/
-static struct dentry *
-nfs_xdev_mount_common(struct file_system_type *fs_type, int flags,
- const char *dev_name, struct nfs_mount_info *mount_info)
+struct dentry *
+nfs_xdev_mount(struct file_system_type *fs_type, int flags,
+ const char *dev_name, void *raw_data)
{
- struct nfs_clone_mount *data = mount_info->cloned;
+ struct nfs_clone_mount *data = raw_data;
+ struct nfs_mount_info mount_info = {
+ .fill_super = nfs_clone_super,
+ .set_security = nfs_clone_sb_security,
+ .cloned = data,
+ };
struct nfs_server *server;
struct dentry *mntroot = ERR_PTR(-ENOMEM);
+ struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
int error;
dprintk("--> nfs_xdev_mount_common()\n");
- mount_info->mntfh = data->fh;
+ mount_info.mntfh = mount_info.cloned->fh;
/* create a new volume representation */
- server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
+ server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
if (IS_ERR(server)) {
error = PTR_ERR(server);
goto out_err;
}
- mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
+ mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod);
dprintk("<-- nfs_xdev_mount_common() = 0\n");
out:
return mntroot;
@@ -2563,60 +2494,7 @@ out_err:
goto out;
}
-/*
- * Clone an NFS2/3 server record on xdev traversal (FSID-change)
- */
-static struct dentry *
-nfs_xdev_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
-{
- struct nfs_mount_info mount_info = {
- .fill_super = nfs_clone_super,
- .set_security = nfs_clone_sb_security,
- .cloned = raw_data,
- };
- return nfs_xdev_mount_common(&nfs_fs_type, flags, dev_name, &mount_info);
-}
-
-#ifdef CONFIG_NFS_V4
-
-/*
- * Finish setting up a cloned NFS4 superblock
- */
-static void nfs4_clone_super(struct super_block *sb,
- struct nfs_mount_info *mount_info)
-{
- const struct super_block *old_sb = mount_info->cloned->sb;
- sb->s_blocksize_bits = old_sb->s_blocksize_bits;
- sb->s_blocksize = old_sb->s_blocksize;
- sb->s_maxbytes = old_sb->s_maxbytes;
- sb->s_time_gran = 1;
- sb->s_op = old_sb->s_op;
- /*
- * The VFS shouldn't apply the umask to mode bits. We will do
- * so ourselves when necessary.
- */
- sb->s_flags |= MS_POSIXACL;
- sb->s_xattr = old_sb->s_xattr;
- nfs_initialise_sb(sb);
-}
-
-/*
- * Set up an NFS4 superblock
- */
-static void nfs4_fill_super(struct super_block *sb,
- struct nfs_mount_info *mount_info)
-{
- sb->s_time_gran = 1;
- sb->s_op = &nfs4_sops;
- /*
- * The VFS shouldn't apply the umask to mode bits. We will do
- * so ourselves when necessary.
- */
- sb->s_flags |= MS_POSIXACL;
- sb->s_xattr = nfs4_xattr_handlers;
- nfs_initialise_sb(sb);
-}
+#if IS_ENABLED(CONFIG_NFS_V4)
static void nfs4_validate_mount_flags(struct nfs_parsed_mount_data *args)
{
@@ -2716,249 +2594,55 @@ out_no_address:
}
/*
- * Get the superblock for the NFS4 root partition
+ * NFS v4 module parameters need to stay in the
+ * NFS client for backwards compatibility
*/
-static struct dentry *
-nfs4_remote_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *info)
-{
- struct nfs_mount_info *mount_info = info;
- struct nfs_server *server;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
-
- mount_info->fill_super = nfs4_fill_super;
- mount_info->set_security = nfs_set_sb_security;
-
- /* Get a volume representation */
- server = nfs4_create_server(mount_info->parsed, mount_info->mntfh);
- if (IS_ERR(server)) {
- mntroot = ERR_CAST(server);
- goto out;
- }
-
- mntroot = nfs_fs_mount_common(fs_type, server, flags, dev_name, mount_info);
-
-out:
- return mntroot;
-}
-
-static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
- int flags, void *data, const char *hostname)
-{
- struct vfsmount *root_mnt;
- char *root_devname;
- size_t len;
+unsigned int nfs_callback_set_tcpport;
+unsigned short nfs_callback_tcpport;
+/* Default cache timeout is 10 minutes */
+unsigned int nfs_idmap_cache_timeout = 600;
+/* Turn off NFSv4 uid/gid mapping when using AUTH_SYS */
+bool nfs4_disable_idmapping = true;
+unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
+unsigned short send_implementation_id = 1;
+
+EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
+EXPORT_SYMBOL_GPL(nfs_callback_tcpport);
+EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout);
+EXPORT_SYMBOL_GPL(nfs4_disable_idmapping);
+EXPORT_SYMBOL_GPL(max_session_slots);
+EXPORT_SYMBOL_GPL(send_implementation_id);
+
+#define NFS_CALLBACK_MAXPORTNR (65535U)
+
+static int param_set_portnr(const char *val, const struct kernel_param *kp)
+{
+ unsigned long num;
+ int ret;
- len = strlen(hostname) + 5;
- root_devname = kmalloc(len, GFP_KERNEL);
- if (root_devname == NULL)
- return ERR_PTR(-ENOMEM);
- /* Does hostname needs to be enclosed in brackets? */
- if (strchr(hostname, ':'))
- snprintf(root_devname, len, "[%s]:/", hostname);
- else
- snprintf(root_devname, len, "%s:/", hostname);
- root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data);
- kfree(root_devname);
- return root_mnt;
+ if (!val)
+ return -EINVAL;
+ ret = strict_strtoul(val, 0, &num);
+ if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
+ return -EINVAL;
+ *((unsigned int *)kp->arg) = num;
+ return 0;
}
-
-struct nfs_referral_count {
- struct list_head list;
- const struct task_struct *task;
- unsigned int referral_count;
+static struct kernel_param_ops param_ops_portnr = {
+ .set = param_set_portnr,
+ .get = param_get_uint,
};
-
-static LIST_HEAD(nfs_referral_count_list);
-static DEFINE_SPINLOCK(nfs_referral_count_list_lock);
-
-static struct nfs_referral_count *nfs_find_referral_count(void)
-{
- struct nfs_referral_count *p;
-
- list_for_each_entry(p, &nfs_referral_count_list, list) {
- if (p->task == current)
- return p;
- }
- return NULL;
-}
-
-#define NFS_MAX_NESTED_REFERRALS 2
-
-static int nfs_referral_loop_protect(void)
-{
- struct nfs_referral_count *p, *new;
- int ret = -ENOMEM;
-
- new = kmalloc(sizeof(*new), GFP_KERNEL);
- if (!new)
- goto out;
- new->task = current;
- new->referral_count = 1;
-
- ret = 0;
- spin_lock(&nfs_referral_count_list_lock);
- p = nfs_find_referral_count();
- if (p != NULL) {
- if (p->referral_count >= NFS_MAX_NESTED_REFERRALS)
- ret = -ELOOP;
- else
- p->referral_count++;
- } else {
- list_add(&new->list, &nfs_referral_count_list);
- new = NULL;
- }
- spin_unlock(&nfs_referral_count_list_lock);
- kfree(new);
-out:
- return ret;
-}
-
-static void nfs_referral_loop_unprotect(void)
-{
- struct nfs_referral_count *p;
-
- spin_lock(&nfs_referral_count_list_lock);
- p = nfs_find_referral_count();
- p->referral_count--;
- if (p->referral_count == 0)
- list_del(&p->list);
- else
- p = NULL;
- spin_unlock(&nfs_referral_count_list_lock);
- kfree(p);
-}
-
-static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
- const char *export_path)
-{
- struct dentry *dentry;
- int err;
-
- if (IS_ERR(root_mnt))
- return ERR_CAST(root_mnt);
-
- err = nfs_referral_loop_protect();
- if (err) {
- mntput(root_mnt);
- return ERR_PTR(err);
- }
-
- dentry = mount_subtree(root_mnt, export_path);
- nfs_referral_loop_unprotect();
-
- return dentry;
-}
-
-static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
- struct nfs_mount_info *mount_info)
-{
- char *export_path;
- struct vfsmount *root_mnt;
- struct dentry *res;
- struct nfs_parsed_mount_data *data = mount_info->parsed;
-
- dfprintk(MOUNT, "--> nfs4_try_mount()\n");
-
- mount_info->fill_super = nfs4_fill_super;
-
- export_path = data->nfs_server.export_path;
- data->nfs_server.export_path = "/";
- root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
- data->nfs_server.hostname);
- data->nfs_server.export_path = export_path;
-
- res = nfs_follow_remote_path(root_mnt, export_path);
-
- dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
- IS_ERR(res) ? PTR_ERR(res) : 0,
- IS_ERR(res) ? " [error]" : "");
- return res;
-}
-
-static void nfs4_kill_super(struct super_block *sb)
-{
- struct nfs_server *server = NFS_SB(sb);
-
- dprintk("--> %s\n", __func__);
- nfs_super_return_all_delegations(sb);
- kill_anon_super(sb);
- nfs_fscache_release_super_cookie(sb);
- nfs_free_server(server);
- dprintk("<-- %s\n", __func__);
-}
-
-/*
- * Clone an NFS4 server record on xdev traversal (FSID-change)
- */
-static struct dentry *
-nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
-{
- struct nfs_mount_info mount_info = {
- .fill_super = nfs4_clone_super,
- .set_security = nfs_clone_sb_security,
- .cloned = raw_data,
- };
- return nfs_xdev_mount_common(&nfs4_fs_type, flags, dev_name, &mount_info);
-}
-
-static struct dentry *
-nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
- const char *dev_name, void *raw_data)
-{
- struct nfs_mount_info mount_info = {
- .fill_super = nfs4_fill_super,
- .set_security = nfs_clone_sb_security,
- .cloned = raw_data,
- };
- struct nfs_server *server;
- struct dentry *mntroot = ERR_PTR(-ENOMEM);
-
- dprintk("--> nfs4_referral_get_sb()\n");
-
- mount_info.mntfh = nfs_alloc_fhandle();
- if (mount_info.cloned == NULL || mount_info.mntfh == NULL)
- goto out;
-
- /* create a new volume representation */
- server = nfs4_create_referral_server(mount_info.cloned, mount_info.mntfh);
- if (IS_ERR(server)) {
- mntroot = ERR_CAST(server);
- goto out;
- }
-
- mntroot = nfs_fs_mount_common(&nfs4_fs_type, server, flags, dev_name, &mount_info);
-out:
- nfs_free_fhandle(mount_info.mntfh);
- return mntroot;
-}
-
-/*
- * Create an NFS4 server record on referral traversal
- */
-static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
- int flags, const char *dev_name, void *raw_data)
-{
- struct nfs_clone_mount *data = raw_data;
- char *export_path;
- struct vfsmount *root_mnt;
- struct dentry *res;
-
- dprintk("--> nfs4_referral_mount()\n");
-
- export_path = data->mnt_path;
- data->mnt_path = "/";
-
- root_mnt = nfs_do_root_mount(&nfs4_remote_referral_fs_type,
- flags, data, data->hostname);
- data->mnt_path = export_path;
-
- res = nfs_follow_remote_path(root_mnt, export_path);
- dprintk("<-- nfs4_referral_mount() = %ld%s\n",
- IS_ERR(res) ? PTR_ERR(res) : 0,
- IS_ERR(res) ? " [error]" : "");
- return res;
-}
-
+#define param_check_portnr(name, p) __param_check(name, p, unsigned int);
+
+module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
+module_param(nfs_idmap_cache_timeout, int, 0644);
+module_param(nfs4_disable_idmapping, bool, 0644);
+MODULE_PARM_DESC(nfs4_disable_idmapping,
+ "Turn off NFSv4 idmapping when using 'sec=sys'");
+module_param(max_session_slots, ushort, 0644);
+MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
+ "requests the client will negotiate");
+module_param(send_implementation_id, ushort, 0644);
+MODULE_PARM_DESC(send_implementation_id,
+ "Send implementation ID with NFSv4.1 exchange_id");
#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index ad4d2e787b20..6b3f2535a3ec 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -9,37 +9,11 @@
#include <linux/fs.h>
#include <linux/sysctl.h>
#include <linux/module.h>
-#include <linux/nfs4.h>
-#include <linux/nfs_idmap.h>
#include <linux/nfs_fs.h>
-#include "callback.h"
-
-#ifdef CONFIG_NFS_V4
-static const int nfs_set_port_min = 0;
-static const int nfs_set_port_max = 65535;
-#endif
static struct ctl_table_header *nfs_callback_sysctl_table;
static ctl_table nfs_cb_sysctls[] = {
-#ifdef CONFIG_NFS_V4
- {
- .procname = "nfs_callback_tcpport",
- .data = &nfs_callback_set_tcpport,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = (int *)&nfs_set_port_min,
- .extra2 = (int *)&nfs_set_port_max,
- },
- {
- .procname = "idmap_cache_timeout",
- .data = &nfs_idmap_cache_timeout,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
-#endif
{
.procname = "nfs_mountpoint_timeout",
.data = &nfs_mountpoint_expiry_timeout,
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 3210a03342f9..13cea637eff8 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -501,7 +501,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
(unsigned long long)NFS_FILEID(dentry->d_inode));
/* Return delegation in anticipation of the rename */
- nfs_inode_return_delegation(dentry->d_inode);
+ NFS_PROTO(dentry->d_inode)->return_delegation(dentry->d_inode);
sdentry = NULL;
do {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4d6861c0dc14..5829d0ce7cfb 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -52,7 +52,7 @@ static mempool_t *nfs_commit_mempool;
struct nfs_commit_data *nfs_commitdata_alloc(void)
{
- struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
+ struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
if (p) {
memset(p, 0, sizeof(*p));
@@ -70,7 +70,7 @@ EXPORT_SYMBOL_GPL(nfs_commit_free);
struct nfs_write_header *nfs_writehdr_alloc(void)
{
- struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
+ struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
if (p) {
struct nfs_pgio_header *hdr = &p->header;
@@ -84,6 +84,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void)
}
return p;
}
+EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
unsigned int pagecount)
@@ -115,6 +116,7 @@ void nfs_writehdr_free(struct nfs_pgio_header *hdr)
struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
mempool_free(whdr, nfs_wdata_mempool);
}
+EXPORT_SYMBOL_GPL(nfs_writehdr_free);
void nfs_writedata_release(struct nfs_write_data *wdata)
{
@@ -131,6 +133,7 @@ void nfs_writedata_release(struct nfs_write_data *wdata)
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
}
+EXPORT_SYMBOL_GPL(nfs_writedata_release);
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
@@ -139,25 +142,38 @@ static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
}
-static struct nfs_page *nfs_page_find_request_locked(struct page *page)
+static struct nfs_page *
+nfs_page_find_request_locked(struct nfs_inode *nfsi, struct page *page)
{
struct nfs_page *req = NULL;
- if (PagePrivate(page)) {
+ if (PagePrivate(page))
req = (struct nfs_page *)page_private(page);
- if (req != NULL)
- kref_get(&req->wb_kref);
+ else if (unlikely(PageSwapCache(page))) {
+ struct nfs_page *freq, *t;
+
+ /* Linearly search the commit list for the correct req */
+ list_for_each_entry_safe(freq, t, &nfsi->commit_info.list, wb_list) {
+ if (freq->wb_page == page) {
+ req = freq;
+ break;
+ }
+ }
}
+
+ if (req)
+ kref_get(&req->wb_kref);
+
return req;
}
static struct nfs_page *nfs_page_find_request(struct page *page)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req = NULL;
spin_lock(&inode->i_lock);
- req = nfs_page_find_request_locked(page);
+ req = nfs_page_find_request_locked(NFS_I(inode), page);
spin_unlock(&inode->i_lock);
return req;
}
@@ -165,16 +181,16 @@ static struct nfs_page *nfs_page_find_request(struct page *page)
/* Adjust the file length if we're writing beyond the end */
static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
loff_t end, i_size;
pgoff_t end_index;
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
- if (i_size > 0 && page->index < end_index)
+ if (i_size > 0 && page_file_index(page) < end_index)
goto out;
- end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
+ end = page_file_offset(page) + ((loff_t)offset+count);
if (i_size >= end)
goto out;
i_size_write(inode, end);
@@ -187,7 +203,7 @@ out:
static void nfs_set_pageerror(struct page *page)
{
SetPageError(page);
- nfs_zap_mapping(page->mapping->host, page->mapping);
+ nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
}
/* We can set the PG_uptodate flag if we see that a write request
@@ -228,7 +244,7 @@ static int nfs_set_page_writeback(struct page *page)
int ret = test_set_page_writeback(page);
if (!ret) {
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
struct nfs_server *nfss = NFS_SERVER(inode);
if (atomic_long_inc_return(&nfss->writeback) >
@@ -242,7 +258,7 @@ static int nfs_set_page_writeback(struct page *page)
static void nfs_end_page_writeback(struct page *page)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
struct nfs_server *nfss = NFS_SERVER(inode);
end_page_writeback(page);
@@ -252,13 +268,13 @@ static void nfs_end_page_writeback(struct page *page)
static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req;
int ret;
spin_lock(&inode->i_lock);
for (;;) {
- req = nfs_page_find_request_locked(page);
+ req = nfs_page_find_request_locked(NFS_I(inode), page);
if (req == NULL)
break;
if (nfs_lock_request(req))
@@ -313,13 +329,13 @@ out:
static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
int ret;
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
- nfs_pageio_cond_complete(pgio, page->index);
+ nfs_pageio_cond_complete(pgio, page_file_index(page));
ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
if (ret == -EAGAIN) {
redirty_page_for_writepage(wbc, page);
@@ -336,8 +352,10 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
struct nfs_pageio_descriptor pgio;
int err;
- nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
- &nfs_async_write_completion_ops);
+ NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
+ page->mapping->host,
+ wb_priority(wbc),
+ &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
nfs_pageio_complete(&pgio);
if (err < 0)
@@ -380,8 +398,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
- nfs_pageio_init_write(&pgio, inode, wb_priority(wbc),
- &nfs_async_write_completion_ops);
+ NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
@@ -410,11 +427,17 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
nfs_lock_request(req);
spin_lock(&inode->i_lock);
- if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
+ if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
inode->i_version++;
- set_bit(PG_MAPPED, &req->wb_flags);
- SetPagePrivate(req->wb_page);
- set_page_private(req->wb_page, (unsigned long)req);
+ /*
+ * Swap-space should not get truncated. Hence no need to plug the race
+ * with invalidate/truncate.
+ */
+ if (likely(!PageSwapCache(req->wb_page))) {
+ set_bit(PG_MAPPED, &req->wb_flags);
+ SetPagePrivate(req->wb_page);
+ set_page_private(req->wb_page, (unsigned long)req);
+ }
nfsi->npages++;
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
@@ -431,9 +454,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
BUG_ON (!NFS_WBACK_BUSY(req));
spin_lock(&inode->i_lock);
- set_page_private(req->wb_page, 0);
- ClearPagePrivate(req->wb_page);
- clear_bit(PG_MAPPED, &req->wb_flags);
+ if (likely(!PageSwapCache(req->wb_page))) {
+ set_page_private(req->wb_page, 0);
+ ClearPagePrivate(req->wb_page);
+ clear_bit(PG_MAPPED, &req->wb_flags);
+ }
nfsi->npages--;
spin_unlock(&inode->i_lock);
nfs_release_request(req);
@@ -445,7 +470,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
__set_page_dirty_nobuffers(req->wb_page);
}
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
/**
* nfs_request_add_commit_list - add request to a commit list
* @req: pointer to a struct nfs_page
@@ -470,7 +495,7 @@ nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst,
spin_unlock(cinfo->lock);
if (!cinfo->dreq) {
inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- inc_bdi_stat(req->wb_page->mapping->backing_dev_info,
+ inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
BDI_RECLAIMABLE);
__mark_inode_dirty(req->wb_context->dentry->d_inode,
I_DIRTY_DATASYNC);
@@ -537,7 +562,7 @@ static void
nfs_clear_page_commit(struct page *page)
{
dec_zone_page_state(page, NR_UNSTABLE_NFS);
- dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE);
+ dec_bdi_stat(page_file_mapping(page)->backing_dev_info, BDI_RECLAIMABLE);
}
static void
@@ -620,7 +645,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
goto next;
}
if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
- memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf));
+ memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next;
}
@@ -635,7 +660,7 @@ out:
hdr->release(hdr);
}
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static unsigned long
nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
@@ -729,7 +754,7 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
spin_lock(&inode->i_lock);
for (;;) {
- req = nfs_page_find_request_locked(page);
+ req = nfs_page_find_request_locked(NFS_I(inode), page);
if (req == NULL)
goto out_unlock;
@@ -788,7 +813,7 @@ out_err:
static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
struct page *page, unsigned int offset, unsigned int bytes)
{
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req;
req = nfs_try_to_update_request(inode, page, offset, bytes);
@@ -841,7 +866,7 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
nfs_release_request(req);
if (!do_flush)
return 0;
- status = nfs_wb_page(page->mapping->host, page);
+ status = nfs_wb_page(page_file_mapping(page)->host, page);
} while (status == 0);
return status;
}
@@ -871,7 +896,7 @@ int nfs_updatepage(struct file *file, struct page *page,
unsigned int offset, unsigned int count)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
- struct inode *inode = page->mapping->host;
+ struct inode *inode = page_file_mapping(page)->host;
int status = 0;
nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -879,7 +904,7 @@ int nfs_updatepage(struct file *file, struct page *page,
dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n",
file->f_path.dentry->d_parent->d_name.name,
file->f_path.dentry->d_name.name, count,
- (long long)(page_offset(page) + offset));
+ (long long)(page_file_offset(page) + offset));
/* If we're not using byte range locks, and we know the page
* is up to date, it may be more efficient to extend the write
@@ -1172,6 +1197,7 @@ int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
return nfs_flush_multi(desc, hdr);
return nfs_flush_one(desc, hdr);
}
+EXPORT_SYMBOL_GPL(nfs_generic_flush);
static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
@@ -1202,13 +1228,14 @@ static const struct nfs_pageio_ops nfs_pageio_write_ops = {
.pg_doio = nfs_generic_pg_writepages,
};
-void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
+void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
struct inode *inode, int ioflags,
const struct nfs_pgio_completion_ops *compl_ops)
{
nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
NFS_SERVER(inode)->wsize, ioflags);
}
+EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
@@ -1217,13 +1244,6 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
-void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops))
- nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops);
-}
void nfs_write_prepare(struct rpc_task *task, void *calldata)
{
@@ -1303,7 +1323,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
return;
nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
@@ -1363,7 +1383,7 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
}
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
{
int ret;
@@ -1475,7 +1495,7 @@ void nfs_retry_commit(struct list_head *page_list,
nfs_mark_request_commit(req, lseg, cinfo);
if (!cinfo->dreq) {
dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
- dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
+ dec_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info,
BDI_RECLAIMABLE);
}
nfs_unlock_and_release_request(req);
@@ -1547,7 +1567,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
/* Okay, COMMIT succeeded, apparently. Check the verifier
* returned by the server against all stored verfs. */
- if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) {
+ if (!memcmp(&req->wb_verf, &data->verf.verifier, sizeof(req->wb_verf))) {
/* We have a match */
nfs_inode_remove_request(req);
dprintk(" OK\n");
@@ -1677,22 +1697,9 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
{
- int ret;
-
- ret = nfs_commit_unstable_pages(inode, wbc);
- if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
- int status;
- bool sync = true;
-
- if (wbc->sync_mode == WB_SYNC_NONE)
- sync = false;
-
- status = pnfs_layoutcommit_inode(inode, sync);
- if (status < 0)
- return status;
- }
- return ret;
+ return nfs_commit_unstable_pages(inode, wbc);
}
+EXPORT_SYMBOL_GPL(nfs_write_inode);
/*
* flush the inode to disk.
@@ -1708,6 +1715,7 @@ int nfs_wb_all(struct inode *inode)
return sync_inode(inode, &wbc);
}
+EXPORT_SYMBOL_GPL(nfs_wb_all);
int nfs_wb_page_cancel(struct inode *inode, struct page *page)
{
@@ -1744,7 +1752,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
*/
int nfs_wb_page(struct inode *inode, struct page *page)
{
- loff_t range_start = page_offset(page);
+ loff_t range_start = page_file_offset(page);
loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ba233499b9a5..a3946cf13fc8 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -398,7 +398,7 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
int migrated, i, err;
/* listsize */
- err = get_int(mesg, &fsloc->locations_count);
+ err = get_uint(mesg, &fsloc->locations_count);
if (err)
return err;
if (fsloc->locations_count > MAX_FS_LOCATIONS)
@@ -456,7 +456,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
return -EINVAL;
for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
- err = get_int(mesg, &f->pseudoflavor);
+ err = get_uint(mesg, &f->pseudoflavor);
if (err)
return err;
/*
@@ -465,7 +465,7 @@ static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
* problem at export time instead of when a client fails
* to authenticate.
*/
- err = get_int(mesg, &f->flags);
+ err = get_uint(mesg, &f->flags);
if (err)
return err;
/* Only some flags are allowed to differ between flavors: */
@@ -929,7 +929,7 @@ struct svc_export *
rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
{
struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
- struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id);
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct cache_detail *cd = nn->svc_export_cache;
if (rqstp->rq_client == NULL)
@@ -960,7 +960,7 @@ struct svc_export *
rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv)
{
struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
- struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id);
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct cache_detail *cd = nn->svc_export_cache;
if (rqstp->rq_client == NULL)
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 39365636b244..65c2431ea32f 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -34,6 +34,10 @@ struct nfsd_net {
struct cache_detail *idtoname_cache;
struct cache_detail *nametoid_cache;
+
+ struct lock_manager nfsd4_manager;
+ bool grace_ended;
+ time_t boot_time;
};
extern int nfsd_net_id;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index a5fd6b982f27..cbaf4f8bb7b7 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -756,7 +756,6 @@ static void do_probe_callback(struct nfs4_client *clp)
*/
void nfsd4_probe_callback(struct nfs4_client *clp)
{
- /* XXX: atomicity? Also, should we be using cl_flags? */
clp->cl_cb_state = NFSD4_CB_UNKNOWN;
set_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
do_probe_callback(clp);
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index dae36f1dee95..fdc91a6fc9c4 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -546,7 +546,7 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
.type = type,
};
int ret;
- struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id);
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
if (namelen + 1 > sizeof(key.name))
return nfserr_badowner;
@@ -571,7 +571,7 @@ idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
.type = type,
};
int ret;
- struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id);
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 987e719fbae8..c9c1c0a25417 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -354,10 +354,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
/* Openowner is now set, so sequence id will get bumped. Now we need
* these checks before we do any creates: */
status = nfserr_grace;
- if (locks_in_grace() && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
+ if (locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS)
goto out;
status = nfserr_no_grace;
- if (!locks_in_grace() && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
+ if (!locks_in_grace(SVC_NET(rqstp)) && open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS)
goto out;
switch (open->op_claim_type) {
@@ -686,7 +686,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_lock_state();
/* check stateid */
- if ((status = nfs4_preprocess_stateid_op(cstate, &read->rd_stateid,
+ if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
+ cstate, &read->rd_stateid,
RD_STATE, &read->rd_filp))) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
@@ -741,7 +742,7 @@ nfsd4_remove(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
__be32 status;
- if (locks_in_grace())
+ if (locks_in_grace(SVC_NET(rqstp)))
return nfserr_grace;
status = nfsd_unlink(rqstp, &cstate->current_fh, 0,
remove->rm_name, remove->rm_namelen);
@@ -760,8 +761,8 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!cstate->save_fh.fh_dentry)
return status;
- if (locks_in_grace() && !(cstate->save_fh.fh_export->ex_flags
- & NFSEXP_NOSUBTREECHECK))
+ if (locks_in_grace(SVC_NET(rqstp)) &&
+ !(cstate->save_fh.fh_export->ex_flags & NFSEXP_NOSUBTREECHECK))
return nfserr_grace;
status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
rename->rn_snamelen, &cstate->current_fh,
@@ -845,7 +846,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
nfs4_lock_state();
- status = nfs4_preprocess_stateid_op(cstate,
+ status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
&setattr->sa_stateid, WR_STATE, NULL);
nfs4_unlock_state();
if (status) {
@@ -890,7 +891,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
nfs4_lock_state();
- status = nfs4_preprocess_stateid_op(cstate, stateid, WR_STATE, &filp);
+ status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
+ cstate, stateid, WR_STATE, &filp);
if (filp)
get_file(filp);
nfs4_unlock_state();
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 5ff0b7b9fc08..43295d45cc2b 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -154,6 +154,10 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
if (status < 0)
return;
+ status = mnt_want_write_file(rec_file);
+ if (status)
+ return;
+
dir = rec_file->f_path.dentry;
/* lock the parent */
mutex_lock(&dir->d_inode->i_mutex);
@@ -173,11 +177,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
* as well be forgiving and just succeed silently.
*/
goto out_put;
- status = mnt_want_write_file(rec_file);
- if (status)
- goto out_put;
status = vfs_mkdir(dir->d_inode, dentry, S_IRWXU);
- mnt_drop_write_file(rec_file);
out_put:
dput(dentry);
out_unlock:
@@ -189,6 +189,7 @@ out_unlock:
" (err %d); please check that %s exists"
" and is writeable", status,
user_recovery_dirname);
+ mnt_drop_write_file(rec_file);
nfs4_reset_creds(original_cred);
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 94effd5bc4a1..cc894eda385a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -38,18 +38,21 @@
#include <linux/namei.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
+#include <linux/ratelimit.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/clnt.h>
#include "xdr4.h"
#include "vfs.h"
#include "current_stateid.h"
+#include "fault_inject.h"
+
+#include "netns.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
/* Globals */
time_t nfsd4_lease = 90; /* default lease time */
time_t nfsd4_grace = 90;
-static time_t boot_time;
#define all_ones {{~0,~0},~0}
static const stateid_t one_stateid = {
@@ -862,6 +865,11 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses,
if (ret)
/* oops; xprt is already down: */
nfsd4_conn_lost(&conn->cn_xpt_user);
+ if (ses->se_client->cl_cb_state == NFSD4_CB_DOWN &&
+ dir & NFS4_CDFC4_BACK) {
+ /* callback channel may be back up */
+ nfsd4_probe_callback(ses->se_client);
+ }
return nfs_ok;
}
@@ -1047,12 +1055,12 @@ renew_client(struct nfs4_client *clp)
/* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
static int
-STALE_CLIENTID(clientid_t *clid)
+STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
{
- if (clid->cl_boot == boot_time)
+ if (clid->cl_boot == nn->boot_time)
return 0;
dprintk("NFSD stale clientid (%08x/%08x) boot_time %08lx\n",
- clid->cl_boot, clid->cl_id, boot_time);
+ clid->cl_boot, clid->cl_id, nn->boot_time);
return 1;
}
@@ -1215,7 +1223,7 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2)
return true;
}
-static int
+static bool
same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
{
if ((cr1->cr_flavor != cr2->cr_flavor)
@@ -1227,14 +1235,15 @@ same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
return true;
if (!cr1->cr_principal || !cr2->cr_principal)
return false;
- return 0 == strcmp(cr1->cr_principal, cr1->cr_principal);
+ return 0 == strcmp(cr1->cr_principal, cr2->cr_principal);
}
static void gen_clid(struct nfs4_client *clp)
{
static u32 current_clientid = 1;
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
- clp->cl_clientid.cl_boot = boot_time;
+ clp->cl_clientid.cl_boot = nn->boot_time;
clp->cl_clientid.cl_id = current_clientid++;
}
@@ -2217,8 +2226,9 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
nfs4_verifier confirm = setclientid_confirm->sc_confirm;
clientid_t * clid = &setclientid_confirm->sc_clientid;
__be32 status;
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
- if (STALE_CLIENTID(clid))
+ if (STALE_CLIENTID(clid, nn))
return nfserr_stale_clientid;
nfs4_lock_state();
@@ -2577,8 +2587,9 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
unsigned int strhashval;
struct nfs4_openowner *oo = NULL;
__be32 status;
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
- if (STALE_CLIENTID(&open->op_clientid))
+ if (STALE_CLIENTID(&open->op_clientid, nn))
return nfserr_stale_clientid;
/*
* In case we need it later, after we've already created the
@@ -2876,7 +2887,8 @@ static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
* Attempt to hand out a delegation.
*/
static void
-nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
+nfs4_open_delegation(struct net *net, struct svc_fh *fh,
+ struct nfsd4_open *open, struct nfs4_ol_stateid *stp)
{
struct nfs4_delegation *dp;
struct nfs4_openowner *oo = container_of(stp->st_stateowner, struct nfs4_openowner, oo_owner);
@@ -2897,7 +2909,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_ol_
case NFS4_OPEN_CLAIM_NULL:
/* Let's not give out any delegations till everyone's
* had the chance to reclaim theirs.... */
- if (locks_in_grace())
+ if (locks_in_grace(net))
goto out;
if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
goto out;
@@ -3007,14 +3019,12 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs4_get_vfs_file(rqstp, fp, current_fh, open);
if (status)
goto out;
+ status = nfsd4_truncate(rqstp, current_fh, open);
+ if (status)
+ goto out;
stp = open->op_stp;
open->op_stp = NULL;
init_open_stateid(stp, fp, open);
- status = nfsd4_truncate(rqstp, current_fh, open);
- if (status) {
- release_open_stateid(stp);
- goto out;
- }
}
update_stateid(&stp->st_stid.sc_stateid);
memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
@@ -3033,7 +3043,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
* Attempt to hand out a delegation. No error return, because the
* OPEN succeeds even if we fail.
*/
- nfs4_open_delegation(current_fh, open, stp);
+ nfs4_open_delegation(SVC_NET(rqstp), current_fh, open, stp);
nodeleg:
status = nfs_ok;
@@ -3087,12 +3097,13 @@ nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
{
struct nfs4_client *clp;
__be32 status;
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
nfs4_lock_state();
dprintk("process_renew(%08x/%08x): starting\n",
clid->cl_boot, clid->cl_id);
status = nfserr_stale_clientid;
- if (STALE_CLIENTID(clid))
+ if (STALE_CLIENTID(clid, nn))
goto out;
clp = find_confirmed_client(clid);
status = nfserr_expired;
@@ -3111,22 +3122,19 @@ out:
return status;
}
-static struct lock_manager nfsd4_manager = {
-};
-
-static bool grace_ended;
-
static void
-nfsd4_end_grace(void)
+nfsd4_end_grace(struct net *net)
{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
/* do nothing if grace period already ended */
- if (grace_ended)
+ if (nn->grace_ended)
return;
dprintk("NFSD: end of grace period\n");
- grace_ended = true;
- nfsd4_record_grace_done(&init_net, boot_time);
- locks_end_grace(&nfsd4_manager);
+ nn->grace_ended = true;
+ nfsd4_record_grace_done(net, nn->boot_time);
+ locks_end_grace(&nn->nfsd4_manager);
/*
* Now that every NFSv4 client has had the chance to recover and
* to see the (possibly new, possibly shorter) lease time, we
@@ -3149,7 +3157,7 @@ nfs4_laundromat(void)
nfs4_lock_state();
dprintk("NFSD: laundromat service - starting\n");
- nfsd4_end_grace();
+ nfsd4_end_grace(&init_net);
INIT_LIST_HEAD(&reaplist);
spin_lock(&client_lock);
list_for_each_safe(pos, next, &client_lru) {
@@ -3231,9 +3239,9 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *s
}
static int
-STALE_STATEID(stateid_t *stateid)
+STALE_STATEID(stateid_t *stateid, struct nfsd_net *nn)
{
- if (stateid->si_opaque.so_clid.cl_boot == boot_time)
+ if (stateid->si_opaque.so_clid.cl_boot == nn->boot_time)
return 0;
dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
STATEID_VAL(stateid));
@@ -3273,11 +3281,11 @@ out:
}
static inline __be32
-check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
+check_special_stateids(struct net *net, svc_fh *current_fh, stateid_t *stateid, int flags)
{
if (ONE_STATEID(stateid) && (flags & RD_STATE))
return nfs_ok;
- else if (locks_in_grace()) {
+ else if (locks_in_grace(net)) {
/* Answer in remaining cases depends on existence of
* conflicting state; so we must wait out the grace period. */
return nfserr_grace;
@@ -3294,9 +3302,9 @@ check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
* that are not able to provide mandatory locking.
*/
static inline int
-grace_disallows_io(struct inode *inode)
+grace_disallows_io(struct net *net, struct inode *inode)
{
- return locks_in_grace() && mandatory_lock(inode);
+ return locks_in_grace(net) && mandatory_lock(inode);
}
/* Returns true iff a is later than b: */
@@ -3333,18 +3341,26 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s
return nfserr_old_stateid;
}
-__be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
+static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
{
struct nfs4_stid *s;
struct nfs4_ol_stateid *ols;
__be32 status;
- if (STALE_STATEID(stateid))
- return nfserr_stale_stateid;
-
+ if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
+ return nfserr_bad_stateid;
+ /* Client debugging aid. */
+ if (!same_clid(&stateid->si_opaque.so_clid, &cl->cl_clientid)) {
+ char addr_str[INET6_ADDRSTRLEN];
+ rpc_ntop((struct sockaddr *)&cl->cl_addr, addr_str,
+ sizeof(addr_str));
+ pr_warn_ratelimited("NFSD: client %s testing state ID "
+ "with incorrect client ID\n", addr_str);
+ return nfserr_bad_stateid;
+ }
s = find_stateid(cl, stateid);
if (!s)
- return nfserr_stale_stateid;
+ return nfserr_bad_stateid;
status = check_stateid_generation(stateid, &s->sc_stateid, 1);
if (status)
return status;
@@ -3360,10 +3376,11 @@ __be32 nfs4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, struct nfs4_stid **s)
{
struct nfs4_client *cl;
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return nfserr_bad_stateid;
- if (STALE_STATEID(stateid))
+ if (STALE_STATEID(stateid, nn))
return nfserr_stale_stateid;
cl = find_confirmed_client(&stateid->si_opaque.so_clid);
if (!cl)
@@ -3379,7 +3396,7 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask, s
* Checks for stateid operations
*/
__be32
-nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
+nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
stateid_t *stateid, int flags, struct file **filpp)
{
struct nfs4_stid *s;
@@ -3392,11 +3409,11 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
if (filpp)
*filpp = NULL;
- if (grace_disallows_io(ino))
+ if (grace_disallows_io(net, ino))
return nfserr_grace;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
- return check_special_stateids(current_fh, stateid, flags);
+ return check_special_stateids(net, current_fh, stateid, flags);
status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID, &s);
if (status)
@@ -3463,7 +3480,8 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_lock_state();
list_for_each_entry(stateid, &test_stateid->ts_stateid_list, ts_id_list)
- stateid->ts_id_status = nfs4_validate_stateid(cl, &stateid->ts_id_stateid);
+ stateid->ts_id_status =
+ nfsd4_validate_stateid(cl, &stateid->ts_id_stateid);
nfs4_unlock_state();
return nfs_ok;
@@ -3750,12 +3768,19 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfsd4_close_open_stateid(stp);
oo->oo_last_closed_stid = stp;
- /* place unused nfs4_stateowners on so_close_lru list to be
- * released by the laundromat service after the lease period
- * to enable us to handle CLOSE replay
- */
- if (list_empty(&oo->oo_owner.so_stateids))
- move_to_close_lru(oo);
+ if (list_empty(&oo->oo_owner.so_stateids)) {
+ if (cstate->minorversion) {
+ release_openowner(oo);
+ cstate->replay_owner = NULL;
+ } else {
+ /*
+ * In the 4.0 case we need to keep the owners around a
+ * little while to handle CLOSE replay.
+ */
+ if (list_empty(&oo->oo_owner.so_stateids))
+ move_to_close_lru(oo);
+ }
+ }
out:
if (!cstate->replay_owner)
nfs4_unlock_state();
@@ -4027,6 +4052,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
bool new_state = false;
int lkflg;
int err;
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
(long long) lock->lk_offset,
@@ -4044,11 +4070,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_lock_state();
if (lock->lk_is_new) {
- /*
- * Client indicates that this is a new lockowner.
- * Use open owner and open stateid to create lock owner and
- * lock stateid.
- */
struct nfs4_ol_stateid *open_stp = NULL;
if (nfsd4_has_session(cstate))
@@ -4058,7 +4079,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
sizeof(clientid_t));
status = nfserr_stale_clientid;
- if (STALE_CLIENTID(&lock->lk_new_clientid))
+ if (STALE_CLIENTID(&lock->lk_new_clientid, nn))
goto out;
/* validate and update open stateid and open seqid */
@@ -4075,17 +4096,13 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new_state);
- if (status)
- goto out;
- } else {
- /* lock (lock owner + lock stateid) already exists */
+ } else
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
&lock->lk_old_lock_stateid,
NFS4_LOCK_STID, &lock_stp);
- if (status)
- goto out;
- }
+ if (status)
+ goto out;
lock_sop = lockowner(lock_stp->st_stateowner);
lkflg = setlkflg(lock->lk_type);
@@ -4094,10 +4111,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
status = nfserr_grace;
- if (locks_in_grace() && !lock->lk_reclaim)
+ if (locks_in_grace(SVC_NET(rqstp)) && !lock->lk_reclaim)
goto out;
status = nfserr_no_grace;
- if (!locks_in_grace() && lock->lk_reclaim)
+ if (!locks_in_grace(SVC_NET(rqstp)) && lock->lk_reclaim)
goto out;
locks_init_lock(&file_lock);
@@ -4196,8 +4213,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct file_lock file_lock;
struct nfs4_lockowner *lo;
__be32 status;
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
- if (locks_in_grace())
+ if (locks_in_grace(SVC_NET(rqstp)))
return nfserr_grace;
if (check_lock_length(lockt->lt_offset, lockt->lt_length))
@@ -4206,7 +4224,7 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
nfs4_lock_state();
status = nfserr_stale_clientid;
- if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid))
+ if (!nfsd4_has_session(cstate) && STALE_CLIENTID(&lockt->lt_clientid, nn))
goto out;
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
@@ -4355,6 +4373,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
struct list_head matches;
unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
__be32 status;
+ struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
@@ -4362,7 +4381,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
/* XXX check for lease expiration */
status = nfserr_stale_clientid;
- if (STALE_CLIENTID(clid))
+ if (STALE_CLIENTID(clid, nn))
return status;
nfs4_lock_state();
@@ -4564,7 +4583,7 @@ void nfsd_forget_openowners(u64 num)
printk(KERN_INFO "NFSD: Forgot %d open owners", count);
}
-int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
+int nfsd_process_n_delegations(u64 num, struct list_head *list)
{
int i, count = 0;
struct nfs4_file *fp, *fnext;
@@ -4573,7 +4592,7 @@ int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegatio
for (i = 0; i < FILE_HASH_SIZE; i++) {
list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
- deleg_func(dp);
+ list_move(&dp->dl_recall_lru, list);
if (++count == num)
return count;
}
@@ -4586,9 +4605,16 @@ int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegatio
void nfsd_forget_delegations(u64 num)
{
unsigned int count;
+ LIST_HEAD(victims);
+ struct nfs4_delegation *dp, *dnext;
+
+ spin_lock(&recall_lock);
+ count = nfsd_process_n_delegations(num, &victims);
+ spin_unlock(&recall_lock);
nfs4_lock_state();
- count = nfsd_process_n_delegations(num, unhash_delegation);
+ list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru)
+ unhash_delegation(dp);
nfs4_unlock_state();
printk(KERN_INFO "NFSD: Forgot %d delegations", count);
@@ -4597,12 +4623,16 @@ void nfsd_forget_delegations(u64 num)
void nfsd_recall_delegations(u64 num)
{
unsigned int count;
+ LIST_HEAD(victims);
+ struct nfs4_delegation *dp, *dnext;
- nfs4_lock_state();
spin_lock(&recall_lock);
- count = nfsd_process_n_delegations(num, nfsd_break_one_deleg);
+ count = nfsd_process_n_delegations(num, &victims);
+ list_for_each_entry_safe(dp, dnext, &victims, dl_recall_lru) {
+ list_del(&dp->dl_recall_lru);
+ nfsd_break_one_deleg(dp);
+ }
spin_unlock(&recall_lock);
- nfs4_unlock_state();
printk(KERN_INFO "NFSD: Recalled %d delegations", count);
}
@@ -4665,6 +4695,8 @@ set_max_delegations(void)
int
nfs4_state_start(void)
{
+ struct net *net = &init_net;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int ret;
/*
@@ -4674,11 +4706,11 @@ nfs4_state_start(void)
* to that instead and then do most of the rest of this on a per-net
* basis.
*/
- get_net(&init_net);
- nfsd4_client_tracking_init(&init_net);
- boot_time = get_seconds();
- locks_start_grace(&nfsd4_manager);
- grace_ended = false;
+ get_net(net);
+ nfsd4_client_tracking_init(net);
+ nn->boot_time = get_seconds();
+ locks_start_grace(net, &nn->nfsd4_manager);
+ nn->grace_ended = false;
printk(KERN_INFO "NFSD: starting %ld-second grace period\n",
nfsd4_grace);
ret = set_callback_cred();
@@ -4700,8 +4732,8 @@ nfs4_state_start(void)
out_free_laundry:
destroy_workqueue(laundry_wq);
out_recovery:
- nfsd4_client_tracking_exit(&init_net);
- put_net(&init_net);
+ nfsd4_client_tracking_exit(net);
+ put_net(net);
return ret;
}
@@ -4742,9 +4774,12 @@ __nfs4_state_shutdown(void)
void
nfs4_state_shutdown(void)
{
+ struct net *net = &init_net;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
cancel_delayed_work_sync(&laundromat_work);
destroy_workqueue(laundry_wq);
- locks_end_grace(&nfsd4_manager);
+ locks_end_grace(&nn->nfsd4_manager);
nfs4_lock_state();
__nfs4_state_shutdown();
nfs4_unlock_state();
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 4949667c84ea..6322df36031f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2259,7 +2259,7 @@ out_acl:
if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
if ((buflen -= 4) < 0)
goto out_resource;
- WRITE32(1);
+ WRITE32(0);
}
if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
if ((buflen -= 4) < 0)
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index c55298ed5772..fa49cff5ee65 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -673,9 +673,7 @@ static ssize_t __write_ports_addfd(char *buf)
err = svc_addsock(nfsd_serv, fd, buf, SIMPLE_TRANSACTION_LIMIT);
if (err < 0) {
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv);
+ nfsd_destroy(net);
return err;
}
@@ -744,9 +742,7 @@ out_close:
svc_xprt_put(xprt);
}
out_err:
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv);
+ nfsd_destroy(net);
return err;
}
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 1671429ffa66..2244222368ab 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -72,6 +72,19 @@ int nfsd_nrthreads(void);
int nfsd_nrpools(void);
int nfsd_get_nrthreads(int n, int *);
int nfsd_set_nrthreads(int n, int *);
+int nfsd_pool_stats_open(struct inode *, struct file *);
+int nfsd_pool_stats_release(struct inode *, struct file *);
+
+static inline void nfsd_destroy(struct net *net)
+{
+ int destroy = (nfsd_serv->sv_nrthreads == 1);
+
+ if (destroy)
+ svc_shutdown_net(nfsd_serv, net);
+ svc_destroy(nfsd_serv);
+ if (destroy)
+ nfsd_serv = NULL;
+}
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
#ifdef CONFIG_NFSD_V2_ACL
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index cc793005a87c..032af381b3aa 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -635,6 +635,7 @@ fh_put(struct svc_fh *fhp)
fhp->fh_post_saved = 0;
#endif
}
+ fh_drop_write(fhp);
if (exp) {
exp_put(exp);
fhp->fh_export = NULL;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index e15dc45fc5ec..aad6d457b9e8 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -196,6 +196,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
struct dentry *dchild;
int type, mode;
__be32 nfserr;
+ int hosterr;
dev_t rdev = 0, wanted = new_decode_dev(attr->ia_size);
dprintk("nfsd: CREATE %s %.*s\n",
@@ -214,6 +215,12 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
nfserr = nfserr_exist;
if (isdotent(argp->name, argp->len))
goto done;
+ hosterr = fh_want_write(dirfhp);
+ if (hosterr) {
+ nfserr = nfserrno(hosterr);
+ goto done;
+ }
+
fh_lock_nested(dirfhp, I_MUTEX_PARENT);
dchild = lookup_one_len(argp->name, dirfhp->fh_dentry, argp->len);
if (IS_ERR(dchild)) {
@@ -330,7 +337,7 @@ nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
out_unlock:
/* We don't really need to unlock, as fh_put does it. */
fh_unlock(dirfhp);
-
+ fh_drop_write(dirfhp);
done:
fh_put(dirfhp);
return nfsd_return_dirop(nfserr, resp);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ee709fc8f58b..240473cb708f 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -254,8 +254,6 @@ static void nfsd_shutdown(void)
static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
{
- /* When last nfsd thread exits we need to do some clean-up */
- nfsd_serv = NULL;
nfsd_shutdown();
svc_rpcb_cleanup(serv, net);
@@ -332,6 +330,7 @@ static int nfsd_get_default_max_blksize(void)
int nfsd_create_serv(void)
{
int error;
+ struct net *net = current->nsproxy->net_ns;
WARN_ON(!mutex_is_locked(&nfsd_mutex));
if (nfsd_serv) {
@@ -346,7 +345,7 @@ int nfsd_create_serv(void)
if (nfsd_serv == NULL)
return -ENOMEM;
- error = svc_bind(nfsd_serv, current->nsproxy->net_ns);
+ error = svc_bind(nfsd_serv, net);
if (error < 0) {
svc_destroy(nfsd_serv);
return error;
@@ -427,11 +426,7 @@ int nfsd_set_nrthreads(int n, int *nthreads)
if (err)
break;
}
-
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv);
-
+ nfsd_destroy(net);
return err;
}
@@ -478,9 +473,7 @@ out_shutdown:
if (error < 0 && !nfsd_up_before)
nfsd_shutdown();
out_destroy:
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv); /* Release server */
+ nfsd_destroy(net); /* Release server */
out:
mutex_unlock(&nfsd_mutex);
return error;
@@ -563,12 +556,13 @@ nfsd(void *vrqstp)
nfsdstats.th_cnt --;
out:
- if (rqstp->rq_server->sv_nrthreads == 1)
- svc_shutdown_net(rqstp->rq_server, &init_net);
+ rqstp->rq_server = NULL;
/* Release the thread */
svc_exit_thread(rqstp);
+ nfsd_destroy(&init_net);
+
/* Release module */
mutex_unlock(&nfsd_mutex);
module_put_and_exit(0);
@@ -682,9 +676,7 @@ int nfsd_pool_stats_release(struct inode *inode, struct file *file)
mutex_lock(&nfsd_mutex);
/* this function really, really should have been called svc_put() */
- if (nfsd_serv->sv_nrthreads == 1)
- svc_shutdown_net(nfsd_serv, net);
- svc_destroy(nfsd_serv);
+ nfsd_destroy(net);
mutex_unlock(&nfsd_mutex);
return ret;
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 849091e16ea6..e6173147f982 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -450,8 +450,10 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
#define WR_STATE 0x00000020
struct nfsd4_compound_state;
+struct nfsd_net;
-extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
+extern __be32 nfs4_preprocess_stateid_op(struct net *net,
+ struct nfsd4_compound_state *cstate,
stateid_t *stateid, int flags, struct file **filp);
extern void nfs4_lock_state(void);
extern void nfs4_unlock_state(void);
@@ -475,7 +477,6 @@ extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
extern int nfs4_client_to_reclaim(const char *name);
extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
extern void release_session_client(struct nfsd4_session *);
-extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
/* nfs4recover operations */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 4700a0a929d7..a9269f142cc4 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -757,8 +757,16 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
* If we get here, then the client has already done an "open",
* and (hopefully) checked permission - so allow OWNER_OVERRIDE
* in case a chmod has now revoked permission.
+ *
+ * Arguably we should also allow the owner override for
+ * directories, but we never have and it doesn't seem to have
+ * caused anyone a problem. If we were to change this, note
+ * also that our filldir callbacks would need a variant of
+ * lookup_one_len that doesn't check permissions.
*/
- err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE);
+ if (type == S_IFREG)
+ may_flags |= NFSD_MAY_OWNER_OVERRIDE;
+ err = fh_verify(rqstp, fhp, type, may_flags);
if (err)
goto out;
@@ -1276,6 +1284,10 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
* If it has, the parent directory should already be locked.
*/
if (!resfhp->fh_dentry) {
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ goto out_nfserr;
+
/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
fh_lock_nested(fhp, I_MUTEX_PARENT);
dchild = lookup_one_len(fname, dentry, flen);
@@ -1319,14 +1331,11 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
goto out;
}
- host_err = fh_want_write(fhp);
- if (host_err)
- goto out_nfserr;
-
/*
* Get the dir op function pointer.
*/
err = 0;
+ host_err = 0;
switch (type) {
case S_IFREG:
host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
@@ -1343,10 +1352,8 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
break;
}
- if (host_err < 0) {
- fh_drop_write(fhp);
+ if (host_err < 0)
goto out_nfserr;
- }
err = nfsd_create_setattr(rqstp, resfhp, iap);
@@ -1358,7 +1365,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err2 = nfserrno(commit_metadata(fhp));
if (err2)
err = err2;
- fh_drop_write(fhp);
/*
* Update the file handle to get the new inode info.
*/
@@ -1417,6 +1423,11 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = nfserr_notdir;
if (!dirp->i_op->lookup)
goto out;
+
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ goto out_nfserr;
+
fh_lock_nested(fhp, I_MUTEX_PARENT);
/*
@@ -1449,9 +1460,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
v_atime = verifier[1]&0x7fffffff;
}
- host_err = fh_want_write(fhp);
- if (host_err)
- goto out_nfserr;
if (dchild->d_inode) {
err = 0;
@@ -1522,7 +1530,6 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (!err)
err = nfserrno(commit_metadata(fhp));
- fh_drop_write(fhp);
/*
* Update the filehandle to get the new inode info.
*/
@@ -1533,6 +1540,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
fh_unlock(fhp);
if (dchild && !IS_ERR(dchild))
dput(dchild);
+ fh_drop_write(fhp);
return err;
out_nfserr:
@@ -1613,6 +1621,11 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
if (err)
goto out;
+
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ goto out_nfserr;
+
fh_lock(fhp);
dentry = fhp->fh_dentry;
dnew = lookup_one_len(fname, dentry, flen);
@@ -1620,10 +1633,6 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (IS_ERR(dnew))
goto out_nfserr;
- host_err = fh_want_write(fhp);
- if (host_err)
- goto out_nfserr;
-
if (unlikely(path[plen] != 0)) {
char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
if (path_alloced == NULL)
@@ -1683,6 +1692,12 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
if (isdotent(name, len))
goto out;
+ host_err = fh_want_write(tfhp);
+ if (host_err) {
+ err = nfserrno(host_err);
+ goto out;
+ }
+
fh_lock_nested(ffhp, I_MUTEX_PARENT);
ddir = ffhp->fh_dentry;
dirp = ddir->d_inode;
@@ -1694,18 +1709,13 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
dold = tfhp->fh_dentry;
- host_err = fh_want_write(tfhp);
- if (host_err) {
- err = nfserrno(host_err);
- goto out_dput;
- }
err = nfserr_noent;
if (!dold->d_inode)
- goto out_drop_write;
+ goto out_dput;
host_err = nfsd_break_lease(dold->d_inode);
if (host_err) {
err = nfserrno(host_err);
- goto out_drop_write;
+ goto out_dput;
}
host_err = vfs_link(dold, dirp, dnew);
if (!host_err) {
@@ -1718,12 +1728,11 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
else
err = nfserrno(host_err);
}
-out_drop_write:
- fh_drop_write(tfhp);
out_dput:
dput(dnew);
out_unlock:
fh_unlock(ffhp);
+ fh_drop_write(tfhp);
out:
return err;
@@ -1766,6 +1775,12 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
goto out;
+ host_err = fh_want_write(ffhp);
+ if (host_err) {
+ err = nfserrno(host_err);
+ goto out;
+ }
+
/* cannot use fh_lock as we need deadlock protective ordering
* so do it by hand */
trap = lock_rename(tdentry, fdentry);
@@ -1796,17 +1811,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
host_err = -EXDEV;
if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
goto out_dput_new;
- host_err = fh_want_write(ffhp);
- if (host_err)
- goto out_dput_new;
host_err = nfsd_break_lease(odentry->d_inode);
if (host_err)
- goto out_drop_write;
+ goto out_dput_new;
if (ndentry->d_inode) {
host_err = nfsd_break_lease(ndentry->d_inode);
if (host_err)
- goto out_drop_write;
+ goto out_dput_new;
}
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!host_err) {
@@ -1814,8 +1826,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
if (!host_err)
host_err = commit_metadata(ffhp);
}
-out_drop_write:
- fh_drop_write(ffhp);
out_dput_new:
dput(ndentry);
out_dput_old:
@@ -1831,6 +1841,7 @@ out_drop_write:
fill_post_wcc(tfhp);
unlock_rename(tdentry, fdentry);
ffhp->fh_locked = tfhp->fh_locked = 0;
+ fh_drop_write(ffhp);
out:
return err;
@@ -1856,6 +1867,10 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (err)
goto out;
+ host_err = fh_want_write(fhp);
+ if (host_err)
+ goto out_nfserr;
+
fh_lock_nested(fhp, I_MUTEX_PARENT);
dentry = fhp->fh_dentry;
dirp = dentry->d_inode;
@@ -1874,21 +1889,15 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (!type)
type = rdentry->d_inode->i_mode & S_IFMT;
- host_err = fh_want_write(fhp);
- if (host_err)
- goto out_put;
-
host_err = nfsd_break_lease(rdentry->d_inode);
if (host_err)
- goto out_drop_write;
+ goto out_put;
if (type != S_IFDIR)
host_err = vfs_unlink(dirp, rdentry);
else
host_err = vfs_rmdir(dirp, rdentry);
if (!host_err)
host_err = commit_metadata(fhp);
-out_drop_write:
- fh_drop_write(fhp);
out_put:
dput(rdentry);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index ec0611b2b738..359594c393d2 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -110,12 +110,19 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
static inline int fh_want_write(struct svc_fh *fh)
{
- return mnt_want_write(fh->fh_export->ex_path.mnt);
+ int ret = mnt_want_write(fh->fh_export->ex_path.mnt);
+
+ if (!ret)
+ fh->fh_want_write = 1;
+ return ret;
}
static inline void fh_drop_write(struct svc_fh *fh)
{
- mnt_drop_write(fh->fh_export->ex_path.mnt);
+ if (fh->fh_want_write) {
+ fh->fh_want_write = 0;
+ mnt_drop_write(fh->fh_export->ex_path.mnt);
+ }
}
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nilfs2/alloc.h b/fs/nilfs2/alloc.h
index f5fde36b9e28..fb7238100548 100644
--- a/fs/nilfs2/alloc.h
+++ b/fs/nilfs2/alloc.h
@@ -76,15 +76,23 @@ int nilfs_palloc_freev(struct inode *, __u64 *, size_t);
#define nilfs_clear_bit_atomic ext2_clear_bit_atomic
#define nilfs_find_next_zero_bit find_next_zero_bit_le
-/*
- * persistent object allocator cache
+/**
+ * struct nilfs_bh_assoc - block offset and buffer head association
+ * @blkoff: block offset
+ * @bh: buffer head
*/
-
struct nilfs_bh_assoc {
unsigned long blkoff;
struct buffer_head *bh;
};
+/**
+ * struct nilfs_palloc_cache - persistent object allocator cache
+ * @lock: cache protecting lock
+ * @prev_desc: blockgroup descriptors cache
+ * @prev_bitmap: blockgroup bitmap cache
+ * @prev_entry: translation entries cache
+ */
struct nilfs_palloc_cache {
spinlock_t lock;
struct nilfs_bh_assoc prev_desc;
diff --git a/fs/nilfs2/bmap.h b/fs/nilfs2/bmap.h
index 40d9f453d31c..b89e68076adc 100644
--- a/fs/nilfs2/bmap.h
+++ b/fs/nilfs2/bmap.h
@@ -135,6 +135,13 @@ struct nilfs_bmap {
/* state */
#define NILFS_BMAP_DIRTY 0x00000001
+/**
+ * struct nilfs_bmap_store - shadow copy of bmap state
+ * @data: cached raw block mapping of on-disk inode
+ * @last_allocated_key: cached value of last allocated key for data block
+ * @last_allocated_ptr: cached value of last allocated ptr for data block
+ * @state: cached value of state field of bmap structure
+ */
struct nilfs_bmap_store {
__le64 data[NILFS_BMAP_SIZE / sizeof(__le64)];
__u64 last_allocated_key;
diff --git a/fs/nilfs2/btnode.h b/fs/nilfs2/btnode.h
index 3a4dd2d8d3fc..d876b565ce64 100644
--- a/fs/nilfs2/btnode.h
+++ b/fs/nilfs2/btnode.h
@@ -29,7 +29,13 @@
#include <linux/fs.h>
#include <linux/backing-dev.h>
-
+/**
+ * struct nilfs_btnode_chkey_ctxt - change key context
+ * @oldkey: old key of block's moving content
+ * @newkey: new key for block's content
+ * @bh: buffer head of old buffer
+ * @newbh: buffer head of new buffer
+ */
struct nilfs_btnode_chkey_ctxt {
__u64 oldkey;
__u64 newkey;
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index dab5c4c6dfaf..deaa3d33a0aa 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -286,7 +286,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
__u64 cno;
void *kaddr;
unsigned long tnicps;
- int ret, ncps, nicps, count, i;
+ int ret, ncps, nicps, nss, count, i;
if (unlikely(start == 0 || start > end)) {
printk(KERN_ERR "%s: invalid range of checkpoint numbers: "
@@ -301,6 +301,7 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
if (ret < 0)
goto out_sem;
tnicps = 0;
+ nss = 0;
for (cno = start; cno < end; cno += ncps) {
ncps = nilfs_cpfile_checkpoints_in_block(cpfile, cno, end);
@@ -318,8 +319,9 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
cpfile, cno, cp_bh, kaddr);
nicps = 0;
for (i = 0; i < ncps; i++, cp = (void *)cp + cpsz) {
- WARN_ON(nilfs_checkpoint_snapshot(cp));
- if (!nilfs_checkpoint_invalid(cp)) {
+ if (nilfs_checkpoint_snapshot(cp)) {
+ nss++;
+ } else if (!nilfs_checkpoint_invalid(cp)) {
nilfs_checkpoint_set_invalid(cp);
nicps++;
}
@@ -364,6 +366,8 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
}
brelse(header_bh);
+ if (nss > 0)
+ ret = -EBUSY;
out_sem:
up_write(&NILFS_MDT(cpfile)->mi_sem);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index b5c13f3576b9..fa0f80308c2d 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -33,6 +33,12 @@
#define NILFS_CNO_MIN ((__u64)1)
#define NILFS_CNO_MAX (~(__u64)0)
+/**
+ * struct nilfs_dat_info - on-memory private data of DAT file
+ * @mi: on-memory private data of metadata file
+ * @palloc_cache: persistent object allocator cache of DAT file
+ * @shadow: shadow map of DAT file
+ */
struct nilfs_dat_info {
struct nilfs_mdt_info mi;
struct nilfs_palloc_cache palloc_cache;
diff --git a/fs/nilfs2/export.h b/fs/nilfs2/export.h
index a71cc412b651..19ccbf9522ab 100644
--- a/fs/nilfs2/export.h
+++ b/fs/nilfs2/export.h
@@ -5,6 +5,14 @@
extern const struct export_operations nilfs_export_ops;
+/**
+ * struct nilfs_fid - NILFS file id type
+ * @cno: checkpoint number
+ * @ino: inode number
+ * @gen: file generation (version) for NFS
+ * @parent_gen: parent generation (version) for NFS
+ * @parent_ino: parent inode number
+ */
struct nilfs_fid {
u64 cno;
u64 ino;
diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c
index 62cebc8e1a1f..a4d56ac02e6c 100644
--- a/fs/nilfs2/file.c
+++ b/fs/nilfs2/file.c
@@ -69,16 +69,18 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct page *page = vmf->page;
struct inode *inode = vma->vm_file->f_dentry->d_inode;
struct nilfs_transaction_info ti;
- int ret;
+ int ret = 0;
if (unlikely(nilfs_near_disk_full(inode->i_sb->s_fs_info)))
return VM_FAULT_SIGBUS; /* -ENOSPC */
+ sb_start_pagefault(inode->i_sb);
lock_page(page);
if (page->mapping != inode->i_mapping ||
page_offset(page) >= i_size_read(inode) || !PageUptodate(page)) {
unlock_page(page);
- return VM_FAULT_NOPAGE; /* make the VM retry the fault */
+ ret = -EFAULT; /* make the VM retry the fault */
+ goto out;
}
/*
@@ -112,19 +114,21 @@ static int nilfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = nilfs_transaction_begin(inode->i_sb, &ti, 1);
/* never returns -ENOMEM, but may return -ENOSPC */
if (unlikely(ret))
- return VM_FAULT_SIGBUS;
+ goto out;
- ret = block_page_mkwrite(vma, vmf, nilfs_get_block);
- if (ret != VM_FAULT_LOCKED) {
+ ret = __block_page_mkwrite(vma, vmf, nilfs_get_block);
+ if (ret) {
nilfs_transaction_abort(inode->i_sb);
- return ret;
+ goto out;
}
nilfs_set_file_dirty(inode, 1 << (PAGE_SHIFT - inode->i_blkbits));
nilfs_transaction_commit(inode->i_sb);
mapped:
wait_on_page_writeback(page);
- return VM_FAULT_LOCKED;
+ out:
+ sb_end_pagefault(inode->i_sb);
+ return block_page_mkwrite_return(ret);
}
static const struct vm_operations_struct nilfs_file_vm_ops = {
diff --git a/fs/nilfs2/ifile.c b/fs/nilfs2/ifile.c
index 5a48df79d674..d8e65bde083c 100644
--- a/fs/nilfs2/ifile.c
+++ b/fs/nilfs2/ifile.c
@@ -29,7 +29,11 @@
#include "alloc.h"
#include "ifile.h"
-
+/**
+ * struct nilfs_ifile_info - on-memory private data of ifile
+ * @mi: on-memory private data of metadata file
+ * @palloc_cache: persistent object allocator cache of ifile
+ */
struct nilfs_ifile_info {
struct nilfs_mdt_info mi;
struct nilfs_palloc_cache palloc_cache;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 7cc64465ec26..6e2c3db976b2 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -34,6 +34,13 @@
#include "cpfile.h"
#include "ifile.h"
+/**
+ * struct nilfs_iget_args - arguments used during comparison between inodes
+ * @ino: inode number
+ * @cno: checkpoint number
+ * @root: pointer on NILFS root object (mounted checkpoint)
+ * @for_gc: inode for GC flag
+ */
struct nilfs_iget_args {
u64 ino;
__u64 cno;
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 06658caa18bd..fdb180769485 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -182,7 +182,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
if (copy_from_user(&cpmode, argp, sizeof(cpmode)))
goto out;
- down_read(&inode->i_sb->s_umount);
+ mutex_lock(&nilfs->ns_snapshot_mount_mutex);
nilfs_transaction_begin(inode->i_sb, &ti, 0);
ret = nilfs_cpfile_change_cpmode(
@@ -192,7 +192,7 @@ static int nilfs_ioctl_change_cpmode(struct inode *inode, struct file *filp,
else
nilfs_transaction_commit(inode->i_sb); /* never fails */
- up_read(&inode->i_sb->s_umount);
+ mutex_unlock(&nilfs->ns_snapshot_mount_mutex);
out:
mnt_drop_write_file(filp);
return ret;
@@ -660,8 +660,6 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
goto out_free;
}
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
-
ret = nilfs_ioctl_move_blocks(inode->i_sb, &argv[0], kbufs[0]);
if (ret < 0)
printk(KERN_ERR "NILFS: GC failed during preparation: "
diff --git a/fs/nilfs2/mdt.h b/fs/nilfs2/mdt.h
index ab20a4baa50f..ab172e8549c5 100644
--- a/fs/nilfs2/mdt.h
+++ b/fs/nilfs2/mdt.h
@@ -28,6 +28,13 @@
#include "nilfs.h"
#include "page.h"
+/**
+ * struct nilfs_shadow_map - shadow mapping of meta data file
+ * @bmap_store: shadow copy of bmap state
+ * @frozen_data: shadowed dirty data pages
+ * @frozen_btnodes: shadowed dirty b-tree nodes' pages
+ * @frozen_buffers: list of frozen buffers
+ */
struct nilfs_shadow_map {
struct nilfs_bmap_store bmap_store;
struct address_space frozen_data;
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 250add84da76..74cece80e9a3 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -32,8 +32,21 @@
#include "the_nilfs.h"
#include "bmap.h"
-/*
- * nilfs inode data in memory
+/**
+ * struct nilfs_inode_info - nilfs inode data in memory
+ * @i_flags: inode flags
+ * @i_state: dynamic state flags
+ * @i_bmap: pointer on i_bmap_data
+ * @i_bmap_data: raw block mapping
+ * @i_xattr: <TODO>
+ * @i_dir_start_lookup: page index of last successful search
+ * @i_cno: checkpoint number for GC inode
+ * @i_btnode_cache: cached pages of b-tree nodes
+ * @i_dirty: list for connecting dirty files
+ * @xattr_sem: semaphore for extended attributes processing
+ * @i_bh: buffer contains disk inode
+ * @i_root: root object of the current filesystem tree
+ * @vfs_inode: VFS inode object
*/
struct nilfs_inode_info {
__u32 i_flags;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 88e11fb346b6..a5752a589932 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -189,7 +189,7 @@ int nilfs_transaction_begin(struct super_block *sb,
if (ret > 0)
return 0;
- vfs_check_frozen(sb, SB_FREEZE_WRITE);
+ sb_start_intwrite(sb);
nilfs = sb->s_fs_info;
down_read(&nilfs->ns_segctor_sem);
@@ -205,6 +205,7 @@ int nilfs_transaction_begin(struct super_block *sb,
current->journal_info = ti->ti_save;
if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
kmem_cache_free(nilfs_transaction_cachep, ti);
+ sb_end_intwrite(sb);
return ret;
}
@@ -246,6 +247,7 @@ int nilfs_transaction_commit(struct super_block *sb)
err = nilfs_construct_segment(sb);
if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
kmem_cache_free(nilfs_transaction_cachep, ti);
+ sb_end_intwrite(sb);
return err;
}
@@ -264,6 +266,7 @@ void nilfs_transaction_abort(struct super_block *sb)
current->journal_info = ti->ti_save;
if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC)
kmem_cache_free(nilfs_transaction_cachep, ti);
+ sb_end_intwrite(sb);
}
void nilfs_relax_pressure_in_lock(struct super_block *sb)
diff --git a/fs/nilfs2/sufile.c b/fs/nilfs2/sufile.c
index c5b7653a4391..3127e9f438a7 100644
--- a/fs/nilfs2/sufile.c
+++ b/fs/nilfs2/sufile.c
@@ -30,7 +30,13 @@
#include "mdt.h"
#include "sufile.h"
-
+/**
+ * struct nilfs_sufile_info - on-memory private data of sufile
+ * @mi: on-memory private data of metadata file
+ * @ncleansegs: number of clean segments
+ * @allocmin: lower limit of allocatable segment range
+ * @allocmax: upper limit of allocatable segment range
+ */
struct nilfs_sufile_info {
struct nilfs_mdt_info mi;
unsigned long ncleansegs;/* number of clean segments */
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index d57c42f974ea..6a10812711c1 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -676,20 +676,13 @@ static const struct super_operations nilfs_sops = {
.alloc_inode = nilfs_alloc_inode,
.destroy_inode = nilfs_destroy_inode,
.dirty_inode = nilfs_dirty_inode,
- /* .write_inode = nilfs_write_inode, */
- /* .put_inode = nilfs_put_inode, */
- /* .drop_inode = nilfs_drop_inode, */
.evict_inode = nilfs_evict_inode,
.put_super = nilfs_put_super,
- /* .write_super = nilfs_write_super, */
.sync_fs = nilfs_sync_fs,
.freeze_fs = nilfs_freeze,
.unfreeze_fs = nilfs_unfreeze,
- /* .write_super_lockfs */
- /* .unlockfs */
.statfs = nilfs_statfs,
.remount_fs = nilfs_remount,
- /* .umount_begin */
.show_options = nilfs_show_options
};
@@ -948,6 +941,8 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
struct nilfs_root *root;
int ret;
+ mutex_lock(&nilfs->ns_snapshot_mount_mutex);
+
down_read(&nilfs->ns_segctor_sem);
ret = nilfs_cpfile_is_snapshot(nilfs->ns_cpfile, cno);
up_read(&nilfs->ns_segctor_sem);
@@ -972,6 +967,7 @@ static int nilfs_attach_snapshot(struct super_block *s, __u64 cno,
ret = nilfs_get_root_dentry(s, root, root_dentry);
nilfs_put_root(root);
out:
+ mutex_unlock(&nilfs->ns_snapshot_mount_mutex);
return ret;
}
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 501b7f8b739f..41e6a04a561f 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -76,6 +76,7 @@ struct the_nilfs *alloc_nilfs(struct block_device *bdev)
nilfs->ns_bdev = bdev;
atomic_set(&nilfs->ns_ndirtyblks, 0);
init_rwsem(&nilfs->ns_sem);
+ mutex_init(&nilfs->ns_snapshot_mount_mutex);
INIT_LIST_HEAD(&nilfs->ns_dirty_files);
INIT_LIST_HEAD(&nilfs->ns_gc_inodes);
spin_lock_init(&nilfs->ns_inode_lock);
diff --git a/fs/nilfs2/the_nilfs.h b/fs/nilfs2/the_nilfs.h
index 9992b11312ff..be1267a34cea 100644
--- a/fs/nilfs2/the_nilfs.h
+++ b/fs/nilfs2/the_nilfs.h
@@ -47,11 +47,13 @@ enum {
* @ns_flags: flags
* @ns_bdev: block device
* @ns_sem: semaphore for shared states
+ * @ns_snapshot_mount_mutex: mutex to protect snapshot mounts
* @ns_sbh: buffer heads of on-disk super blocks
* @ns_sbp: pointers to super block data
* @ns_sbwtime: previous write time of super block
* @ns_sbwcount: write count of super block
* @ns_sbsize: size of valid data in super block
+ * @ns_mount_state: file system state
* @ns_seg_seq: segment sequence counter
* @ns_segnum: index number of the latest full segment.
* @ns_nextnum: index number of the full segment index to be used next
@@ -99,13 +101,12 @@ struct the_nilfs {
struct block_device *ns_bdev;
struct rw_semaphore ns_sem;
+ struct mutex ns_snapshot_mount_mutex;
/*
* used for
* - loading the latest checkpoint exclusively.
* - allocating a new full segment.
- * - protecting s_dirt in the super_block struct
- * (see nilfs_write_super) and the following fields.
*/
struct buffer_head *ns_sbh[2];
struct nilfs_super_block *ns_sbp[2];
@@ -229,9 +230,8 @@ THE_NILFS_FNS(SB_DIRTY, sb_dirty)
* @count: refcount of this structure
* @nilfs: nilfs object
* @ifile: inode file
- * @root: root inode
* @inodes_count: number of inodes
- * @blocks_count: number of blocks (Reserved)
+ * @blocks_count: number of blocks
*/
struct nilfs_root {
__u64 cno;
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 7389d2d5e51d..1ecf46448f85 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2084,7 +2084,6 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
if (err)
return err;
pos = *ppos;
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
/* We can write back this queue in page reclaim. */
current->backing_dev_info = mapping->backing_dev_info;
written = 0;
@@ -2119,6 +2118,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
BUG_ON(iocb->ki_pos != pos);
+ sb_start_write(inode->i_sb);
mutex_lock(&inode->i_mutex);
ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
@@ -2127,6 +2127,7 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
if (err < 0)
ret = err;
}
+ sb_end_write(inode->i_sb);
return ret;
}
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index b341492542ca..2bc149d6a784 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2660,31 +2660,14 @@ static const struct super_operations ntfs_sops = {
.alloc_inode = ntfs_alloc_big_inode, /* VFS: Allocate new inode. */
.destroy_inode = ntfs_destroy_big_inode, /* VFS: Deallocate inode. */
#ifdef NTFS_RW
- //.dirty_inode = NULL, /* VFS: Called from
- // __mark_inode_dirty(). */
.write_inode = ntfs_write_inode, /* VFS: Write dirty inode to
disk. */
- //.drop_inode = NULL, /* VFS: Called just after the
- // inode reference count has
- // been decreased to zero.
- // NOTE: The inode lock is
- // held. See fs/inode.c::
- // generic_drop_inode(). */
- //.delete_inode = NULL, /* VFS: Delete inode from disk.
- // Called when i_count becomes
- // 0 and i_nlink is also 0. */
- //.write_super = NULL, /* Flush dirty super block to
- // disk. */
- //.sync_fs = NULL, /* ? */
- //.write_super_lockfs = NULL, /* ? */
- //.unlockfs = NULL, /* ? */
#endif /* NTFS_RW */
.put_super = ntfs_put_super, /* Syscall: umount. */
.statfs = ntfs_statfs, /* Syscall: statfs */
.remount_fs = ntfs_remount, /* Syscall: mount -o remount. */
.evict_inode = ntfs_evict_big_inode, /* VFS: Called when an inode is
removed from memory. */
- //.umount_begin = NULL, /* Forced umount. */
.show_options = ntfs_show_options, /* Show mount options in
proc. */
};
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7602783d7f41..46a1f6d75104 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1971,6 +1971,7 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
{
struct inode *inode = file->f_path.dentry->d_inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ int ret;
if ((cmd == OCFS2_IOC_RESVSP || cmd == OCFS2_IOC_RESVSP64) &&
!ocfs2_writes_unwritten_extents(osb))
@@ -1985,7 +1986,12 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
- return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
+ ret = mnt_want_write_file(file);
+ if (ret)
+ return ret;
+ ret = __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
+ mnt_drop_write_file(file);
+ return ret;
}
static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
@@ -2261,7 +2267,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
if (iocb->ki_left == 0)
return 0;
- vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE);
+ sb_start_write(inode->i_sb);
appending = file->f_flags & O_APPEND ? 1 : 0;
direct_io = file->f_flags & O_DIRECT ? 1 : 0;
@@ -2436,6 +2442,7 @@ out_sems:
ocfs2_iocb_clear_sem_locked(iocb);
mutex_unlock(&inode->i_mutex);
+ sb_end_write(inode->i_sb);
if (written)
ret = written;
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index d96f7f81d8dd..f20edcbfe700 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -928,7 +928,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (get_user(new_clusters, (int __user *)arg))
return -EFAULT;
- return ocfs2_group_extend(inode, new_clusters);
+ status = mnt_want_write_file(filp);
+ if (status)
+ return status;
+ status = ocfs2_group_extend(inode, new_clusters);
+ mnt_drop_write_file(filp);
+ return status;
case OCFS2_IOC_GROUP_ADD:
case OCFS2_IOC_GROUP_ADD64:
if (!capable(CAP_SYS_RESOURCE))
@@ -937,7 +942,12 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
if (copy_from_user(&input, (int __user *) arg, sizeof(input)))
return -EFAULT;
- return ocfs2_group_add(inode, &input);
+ status = mnt_want_write_file(filp);
+ if (status)
+ return status;
+ status = ocfs2_group_add(inode, &input);
+ mnt_drop_write_file(filp);
+ return status;
case OCFS2_IOC_REFLINK:
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 0a42ae96dca7..2dd36af79e26 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -355,11 +355,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs)
if (journal_current_handle())
return jbd2_journal_start(journal, max_buffs);
+ sb_start_intwrite(osb->sb);
+
down_read(&osb->journal->j_trans_barrier);
handle = jbd2_journal_start(journal, max_buffs);
if (IS_ERR(handle)) {
up_read(&osb->journal->j_trans_barrier);
+ sb_end_intwrite(osb->sb);
mlog_errno(PTR_ERR(handle));
@@ -388,8 +391,10 @@ int ocfs2_commit_trans(struct ocfs2_super *osb,
if (ret < 0)
mlog_errno(ret);
- if (!nested)
+ if (!nested) {
up_read(&journal->j_trans_barrier);
+ sb_end_intwrite(osb->sb);
+ }
return ret;
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 210c35237548..a9f78c74d687 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -784,14 +784,10 @@ bail:
static u32 ocfs2_local_alloc_count_bits(struct ocfs2_dinode *alloc)
{
- int i;
- u8 *buffer;
- u32 count = 0;
+ u32 count;
struct ocfs2_local_alloc *la = OCFS2_LOCAL_ALLOC(alloc);
- buffer = la->la_bitmap;
- for (i = 0; i < le16_to_cpu(la->la_size); i++)
- count += hweight8(buffer[i]);
+ count = memweight(la->la_bitmap, le16_to_cpu(la->la_size));
trace_ocfs2_local_alloc_count_bits(count);
return count;
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 9cd41083e991..d150372fd81d 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -136,6 +136,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
sigset_t oldset;
int ret;
+ sb_start_pagefault(inode->i_sb);
ocfs2_block_signals(&oldset);
/*
@@ -165,6 +166,7 @@ static int ocfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
out:
ocfs2_unblock_signals(&oldset);
+ sb_end_pagefault(inode->i_sb);
return ret;
}
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 9f32d7cbb7a3..30a055049e16 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4466,20 +4466,11 @@ int ocfs2_reflink_ioctl(struct inode *inode,
goto out_dput;
}
- error = mnt_want_write(new_path.mnt);
- if (error) {
- mlog_errno(error);
- goto out_dput;
- }
-
error = ocfs2_vfs_reflink(old_path.dentry,
new_path.dentry->d_inode,
new_dentry, preserve);
- mnt_drop_write(new_path.mnt);
out_dput:
- dput(new_dentry);
- mutex_unlock(&new_path.dentry->d_inode->i_mutex);
- path_put(&new_path);
+ done_path_create(&new_path, new_dentry);
out:
path_put(&old_path);
diff --git a/fs/open.c b/fs/open.c
index 1e914b397e12..e1f2cdb91a4d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -164,11 +164,13 @@ static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
if (IS_APPEND(inode))
goto out_putf;
+ sb_start_write(inode->i_sb);
error = locks_verify_truncate(inode, file, length);
if (!error)
error = security_path_truncate(&file->f_path);
if (!error)
error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, file);
+ sb_end_write(inode->i_sb);
out_putf:
fput(file);
out:
@@ -266,7 +268,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (!file->f_op->fallocate)
return -EOPNOTSUPP;
- return file->f_op->fallocate(file, mode, offset, len);
+ sb_start_write(inode->i_sb);
+ ret = file->f_op->fallocate(file, mode, offset, len);
+ sb_end_write(inode->i_sb);
+ return ret;
}
SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
@@ -620,7 +625,7 @@ static inline int __get_file_write_access(struct inode *inode,
/*
* Balanced in __fput()
*/
- error = mnt_want_write(mnt);
+ error = __mnt_want_write(mnt);
if (error)
put_write_access(inode);
}
@@ -654,6 +659,7 @@ static int do_dentry_open(struct file *f,
if (unlikely(f->f_flags & O_PATH))
f->f_mode = FMODE_PATH;
+ path_get(&f->f_path);
inode = f->f_path.dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
error = __get_file_write_access(inode, f->f_path.mnt);
@@ -711,7 +717,7 @@ cleanup_all:
* here, so just reset the state.
*/
file_reset_write(f);
- mnt_drop_write(f->f_path.mnt);
+ __mnt_drop_write(f->f_path.mnt);
}
}
cleanup_file:
@@ -739,9 +745,7 @@ int finish_open(struct file *file, struct dentry *dentry,
int error;
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
- mntget(file->f_path.mnt);
- file->f_path.dentry = dget(dentry);
-
+ file->f_path.dentry = dentry;
error = do_dentry_open(file, open, current_cred());
if (!error)
*opened |= FILE_OPENED;
@@ -784,7 +788,6 @@ struct file *dentry_open(const struct path *path, int flags,
f->f_flags = flags;
f->f_path = *path;
- path_get(&f->f_path);
error = do_dentry_open(f, NULL, cred);
if (!error) {
error = open_check_o_direct(f);
@@ -849,9 +852,10 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
int lookup_flags = 0;
int acc_mode;
- if (!(flags & O_CREAT))
- mode = 0;
- op->mode = mode;
+ if (flags & O_CREAT)
+ op->mode = (mode & S_IALLUGO) | S_IFREG;
+ else
+ op->mode = 0;
/* Must never be set by userspace */
flags &= ~FMODE_NONOTIFY;
diff --git a/fs/pipe.c b/fs/pipe.c
index 95cbd6b227e6..8d85d7068c1e 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1016,18 +1016,16 @@ fail_inode:
return NULL;
}
-struct file *create_write_pipe(int flags)
+int create_pipe_files(struct file **res, int flags)
{
int err;
- struct inode *inode;
+ struct inode *inode = get_pipe_inode();
struct file *f;
struct path path;
- struct qstr name = { .name = "" };
+ static struct qstr name = { .name = "" };
- err = -ENFILE;
- inode = get_pipe_inode();
if (!inode)
- goto err;
+ return -ENFILE;
err = -ENOMEM;
path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);
@@ -1041,62 +1039,43 @@ struct file *create_write_pipe(int flags)
f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
if (!f)
goto err_dentry;
- f->f_mapping = inode->i_mapping;
f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT));
- f->f_version = 0;
- return f;
+ res[0] = alloc_file(&path, FMODE_READ, &read_pipefifo_fops);
+ if (!res[0])
+ goto err_file;
+
+ path_get(&path);
+ res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK);
+ res[1] = f;
+ return 0;
- err_dentry:
+err_file:
+ put_filp(f);
+err_dentry:
free_pipe_info(inode);
path_put(&path);
- return ERR_PTR(err);
+ return err;
- err_inode:
+err_inode:
free_pipe_info(inode);
iput(inode);
- err:
- return ERR_PTR(err);
-}
-
-void free_write_pipe(struct file *f)
-{
- free_pipe_info(f->f_dentry->d_inode);
- path_put(&f->f_path);
- put_filp(f);
-}
-
-struct file *create_read_pipe(struct file *wrf, int flags)
-{
- /* Grab pipe from the writer */
- struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
- &read_pipefifo_fops);
- if (!f)
- return ERR_PTR(-ENFILE);
-
- path_get(&wrf->f_path);
- f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
-
- return f;
+ return err;
}
int do_pipe_flags(int *fd, int flags)
{
- struct file *fw, *fr;
+ struct file *files[2];
int error;
int fdw, fdr;
if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))
return -EINVAL;
- fw = create_write_pipe(flags);
- if (IS_ERR(fw))
- return PTR_ERR(fw);
- fr = create_read_pipe(fw, flags);
- error = PTR_ERR(fr);
- if (IS_ERR(fr))
- goto err_write_pipe;
+ error = create_pipe_files(files, flags);
+ if (error)
+ return error;
error = get_unused_fd_flags(flags);
if (error < 0)
@@ -1109,8 +1088,8 @@ int do_pipe_flags(int *fd, int flags)
fdw = error;
audit_fd_pair(fdr, fdw);
- fd_install(fdr, fr);
- fd_install(fdw, fw);
+ fd_install(fdr, files[0]);
+ fd_install(fdw, files[1]);
fd[0] = fdr;
fd[1] = fdw;
@@ -1119,10 +1098,8 @@ int do_pipe_flags(int *fd, int flags)
err_fdr:
put_unused_fd(fdr);
err_read_pipe:
- path_put(&fr->f_path);
- put_filp(fr);
- err_write_pipe:
- free_write_pipe(fw);
+ fput(files[0]);
+ fput(files[1]);
return error;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2772208338f8..1b6c84cbdb73 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -695,8 +695,6 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
mmput(mm);
}
- /* OK to pass negative loff_t, we can catch out-of-range */
- file->f_mode |= FMODE_UNSIGNED_OFFSET;
file->private_data = mm;
return 0;
@@ -704,7 +702,12 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
static int mem_open(struct inode *inode, struct file *file)
{
- return __mem_open(inode, file, PTRACE_MODE_ATTACH);
+ int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
+
+ /* OK to pass negative loff_t, we can catch out-of-range */
+ file->f_mode |= FMODE_UNSIGNED_OFFSET;
+
+ return ret;
}
static ssize_t mem_rw(struct file *file, char __user *buf,
@@ -827,15 +830,16 @@ static ssize_t environ_read(struct file *file, char __user *buf,
if (!atomic_inc_not_zero(&mm->mm_users))
goto free;
while (count > 0) {
- int this_len, retval, max_len;
-
- this_len = mm->env_end - (mm->env_start + src);
+ size_t this_len, max_len;
+ int retval;
- if (this_len <= 0)
+ if (src >= (mm->env_end - mm->env_start))
break;
- max_len = (count > PAGE_SIZE) ? PAGE_SIZE : count;
- this_len = (this_len > max_len) ? max_len : this_len;
+ this_len = mm->env_end - (mm->env_start + src);
+
+ max_len = min_t(size_t, PAGE_SIZE, count);
+ this_len = min(max_len, this_len);
retval = access_remote_vm(mm, (mm->env_start + src),
page, this_len, 0);
diff --git a/fs/qnx4/bitmap.c b/fs/qnx4/bitmap.c
index 22e0d60e53ef..76a7a697b778 100644
--- a/fs/qnx4/bitmap.c
+++ b/fs/qnx4/bitmap.c
@@ -17,23 +17,6 @@
#include <linux/bitops.h>
#include "qnx4.h"
-static void count_bits(register const char *bmPart, register int size,
- int *const tf)
-{
- char b;
- int tot = *tf;
-
- if (size > QNX4_BLOCK_SIZE) {
- size = QNX4_BLOCK_SIZE;
- }
- do {
- b = *bmPart++;
- tot += 8 - hweight8(b);
- size--;
- } while (size != 0);
- *tf = tot;
-}
-
unsigned long qnx4_count_free_blocks(struct super_block *sb)
{
int start = le32_to_cpu(qnx4_sb(sb)->BitMap->di_first_xtnt.xtnt_blk) - 1;
@@ -44,13 +27,16 @@ unsigned long qnx4_count_free_blocks(struct super_block *sb)
struct buffer_head *bh;
while (total < size) {
+ int bytes = min(size - total, QNX4_BLOCK_SIZE);
+
if ((bh = sb_bread(sb, start + offset)) == NULL) {
printk(KERN_ERR "qnx4: I/O error in counting free blocks\n");
break;
}
- count_bits(bh->b_data, size - total, &total_free);
+ total_free += bytes * BITS_PER_BYTE -
+ memweight(bh->b_data, bytes);
brelse(bh);
- total += QNX4_BLOCK_SIZE;
+ total += bytes;
offset++;
}
diff --git a/fs/splice.c b/fs/splice.c
index 7bf08fa22ec9..41514dd89462 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -996,6 +996,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
};
ssize_t ret;
+ sb_start_write(inode->i_sb);
+
pipe_lock(pipe);
splice_from_pipe_begin(&sd);
@@ -1034,6 +1036,7 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
*ppos += ret;
balance_dirty_pages_ratelimited_nr(mapping, nr_pages);
}
+ sb_end_write(inode->i_sb);
return ret;
}
diff --git a/fs/super.c b/fs/super.c
index c743fb3be4b8..0902cfa6a12e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -33,12 +33,19 @@
#include <linux/rculist_bl.h>
#include <linux/cleancache.h>
#include <linux/fsnotify.h>
+#include <linux/lockdep.h>
#include "internal.h"
LIST_HEAD(super_blocks);
DEFINE_SPINLOCK(sb_lock);
+static char *sb_writers_name[SB_FREEZE_LEVELS] = {
+ "sb_writers",
+ "sb_pagefaults",
+ "sb_internal",
+};
+
/*
* One thing we have to be careful of with a per-sb shrinker is that we don't
* drop the last active reference to the superblock from within the shrinker.
@@ -62,7 +69,7 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
return -1;
if (!grab_super_passive(sb))
- return !sc->nr_to_scan ? 0 : -1;
+ return -1;
if (sb->s_op && sb->s_op->nr_cached_objects)
fs_objects = sb->s_op->nr_cached_objects(sb);
@@ -102,6 +109,35 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
return total_objects;
}
+static int init_sb_writers(struct super_block *s, struct file_system_type *type)
+{
+ int err;
+ int i;
+
+ for (i = 0; i < SB_FREEZE_LEVELS; i++) {
+ err = percpu_counter_init(&s->s_writers.counter[i], 0);
+ if (err < 0)
+ goto err_out;
+ lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
+ &type->s_writers_key[i], 0);
+ }
+ init_waitqueue_head(&s->s_writers.wait);
+ init_waitqueue_head(&s->s_writers.wait_unfrozen);
+ return 0;
+err_out:
+ while (--i >= 0)
+ percpu_counter_destroy(&s->s_writers.counter[i]);
+ return err;
+}
+
+static void destroy_sb_writers(struct super_block *s)
+{
+ int i;
+
+ for (i = 0; i < SB_FREEZE_LEVELS; i++)
+ percpu_counter_destroy(&s->s_writers.counter[i]);
+}
+
/**
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
@@ -117,18 +153,19 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
if (s) {
if (security_sb_alloc(s)) {
+ /*
+ * We cannot call security_sb_free() without
+ * security_sb_alloc() succeeding. So bail out manually
+ */
kfree(s);
s = NULL;
goto out;
}
#ifdef CONFIG_SMP
s->s_files = alloc_percpu(struct list_head);
- if (!s->s_files) {
- security_sb_free(s);
- kfree(s);
- s = NULL;
- goto out;
- } else {
+ if (!s->s_files)
+ goto err_out;
+ else {
int i;
for_each_possible_cpu(i)
@@ -137,6 +174,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
#else
INIT_LIST_HEAD(&s->s_files);
#endif
+ if (init_sb_writers(s, type))
+ goto err_out;
s->s_flags = flags;
s->s_bdi = &default_backing_dev_info;
INIT_HLIST_NODE(&s->s_instances);
@@ -178,7 +217,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
mutex_init(&s->s_dquot.dqio_mutex);
mutex_init(&s->s_dquot.dqonoff_mutex);
init_rwsem(&s->s_dquot.dqptr_sem);
- init_waitqueue_head(&s->s_wait_unfrozen);
s->s_maxbytes = MAX_NON_LFS;
s->s_op = &default_op;
s->s_time_gran = 1000000000;
@@ -190,6 +228,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
}
out:
return s;
+err_out:
+ security_sb_free(s);
+#ifdef CONFIG_SMP
+ if (s->s_files)
+ free_percpu(s->s_files);
+#endif
+ destroy_sb_writers(s);
+ kfree(s);
+ s = NULL;
+ goto out;
}
/**
@@ -203,6 +251,7 @@ static inline void destroy_super(struct super_block *s)
#ifdef CONFIG_SMP
free_percpu(s->s_files);
#endif
+ destroy_sb_writers(s);
security_sb_free(s);
WARN_ON(!list_empty(&s->s_mounts));
kfree(s->s_subtype);
@@ -320,7 +369,7 @@ static int grab_super(struct super_block *s) __releases(sb_lock)
/*
* grab_super_passive - acquire a passive reference
- * @s: reference we are trying to grab
+ * @sb: reference we are trying to grab
*
* Tries to acquire a passive reference. This is used in places where we
* cannot take an active reference but we need to ensure that the
@@ -488,46 +537,6 @@ void drop_super(struct super_block *sb)
EXPORT_SYMBOL(drop_super);
/**
- * sync_supers - helper for periodic superblock writeback
- *
- * Call the write_super method if present on all dirty superblocks in
- * the system. This is for the periodic writeback used by most older
- * filesystems. For data integrity superblock writeback use
- * sync_filesystems() instead.
- *
- * Note: check the dirty flag before waiting, so we don't
- * hold up the sync while mounting a device. (The newly
- * mounted device won't need syncing.)
- */
-void sync_supers(void)
-{
- struct super_block *sb, *p = NULL;
-
- spin_lock(&sb_lock);
- list_for_each_entry(sb, &super_blocks, s_list) {
- if (hlist_unhashed(&sb->s_instances))
- continue;
- if (sb->s_op->write_super && sb->s_dirt) {
- sb->s_count++;
- spin_unlock(&sb_lock);
-
- down_read(&sb->s_umount);
- if (sb->s_root && sb->s_dirt && (sb->s_flags & MS_BORN))
- sb->s_op->write_super(sb);
- up_read(&sb->s_umount);
-
- spin_lock(&sb_lock);
- if (p)
- __put_super(p);
- p = sb;
- }
- }
- if (p)
- __put_super(p);
- spin_unlock(&sb_lock);
-}
-
-/**
* iterate_supers - call function for all active superblocks
* @f: function to call
* @arg: argument to pass to it
@@ -651,10 +660,11 @@ struct super_block *get_super_thawed(struct block_device *bdev)
{
while (1) {
struct super_block *s = get_super(bdev);
- if (!s || s->s_frozen == SB_UNFROZEN)
+ if (!s || s->s_writers.frozen == SB_UNFROZEN)
return s;
up_read(&s->s_umount);
- vfs_check_frozen(s, SB_FREEZE_WRITE);
+ wait_event(s->s_writers.wait_unfrozen,
+ s->s_writers.frozen == SB_UNFROZEN);
put_super(s);
}
}
@@ -732,7 +742,7 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
int retval;
int remount_ro;
- if (sb->s_frozen != SB_UNFROZEN)
+ if (sb->s_writers.frozen != SB_UNFROZEN)
return -EBUSY;
#ifdef CONFIG_BLOCK
@@ -1163,6 +1173,120 @@ out:
return ERR_PTR(error);
}
+/*
+ * This is an internal function, please use sb_end_{write,pagefault,intwrite}
+ * instead.
+ */
+void __sb_end_write(struct super_block *sb, int level)
+{
+ percpu_counter_dec(&sb->s_writers.counter[level-1]);
+ /*
+ * Make sure s_writers are updated before we wake up waiters in
+ * freeze_super().
+ */
+ smp_mb();
+ if (waitqueue_active(&sb->s_writers.wait))
+ wake_up(&sb->s_writers.wait);
+ rwsem_release(&sb->s_writers.lock_map[level-1], 1, _RET_IP_);
+}
+EXPORT_SYMBOL(__sb_end_write);
+
+#ifdef CONFIG_LOCKDEP
+/*
+ * We want lockdep to tell us about possible deadlocks with freezing but
+ * it's it bit tricky to properly instrument it. Getting a freeze protection
+ * works as getting a read lock but there are subtle problems. XFS for example
+ * gets freeze protection on internal level twice in some cases, which is OK
+ * only because we already hold a freeze protection also on higher level. Due
+ * to these cases we have to tell lockdep we are doing trylock when we
+ * already hold a freeze protection for a higher freeze level.
+ */
+static void acquire_freeze_lock(struct super_block *sb, int level, bool trylock,
+ unsigned long ip)
+{
+ int i;
+
+ if (!trylock) {
+ for (i = 0; i < level - 1; i++)
+ if (lock_is_held(&sb->s_writers.lock_map[i])) {
+ trylock = true;
+ break;
+ }
+ }
+ rwsem_acquire_read(&sb->s_writers.lock_map[level-1], 0, trylock, ip);
+}
+#endif
+
+/*
+ * This is an internal function, please use sb_start_{write,pagefault,intwrite}
+ * instead.
+ */
+int __sb_start_write(struct super_block *sb, int level, bool wait)
+{
+retry:
+ if (unlikely(sb->s_writers.frozen >= level)) {
+ if (!wait)
+ return 0;
+ wait_event(sb->s_writers.wait_unfrozen,
+ sb->s_writers.frozen < level);
+ }
+
+#ifdef CONFIG_LOCKDEP
+ acquire_freeze_lock(sb, level, !wait, _RET_IP_);
+#endif
+ percpu_counter_inc(&sb->s_writers.counter[level-1]);
+ /*
+ * Make sure counter is updated before we check for frozen.
+ * freeze_super() first sets frozen and then checks the counter.
+ */
+ smp_mb();
+ if (unlikely(sb->s_writers.frozen >= level)) {
+ __sb_end_write(sb, level);
+ goto retry;
+ }
+ return 1;
+}
+EXPORT_SYMBOL(__sb_start_write);
+
+/**
+ * sb_wait_write - wait until all writers to given file system finish
+ * @sb: the super for which we wait
+ * @level: type of writers we wait for (normal vs page fault)
+ *
+ * This function waits until there are no writers of given type to given file
+ * system. Caller of this function should make sure there can be no new writers
+ * of type @level before calling this function. Otherwise this function can
+ * livelock.
+ */
+static void sb_wait_write(struct super_block *sb, int level)
+{
+ s64 writers;
+
+ /*
+ * We just cycle-through lockdep here so that it does not complain
+ * about returning with lock to userspace
+ */
+ rwsem_acquire(&sb->s_writers.lock_map[level-1], 0, 0, _THIS_IP_);
+ rwsem_release(&sb->s_writers.lock_map[level-1], 1, _THIS_IP_);
+
+ do {
+ DEFINE_WAIT(wait);
+
+ /*
+ * We use a barrier in prepare_to_wait() to separate setting
+ * of frozen and checking of the counter
+ */
+ prepare_to_wait(&sb->s_writers.wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+
+ writers = percpu_counter_sum(&sb->s_writers.counter[level-1]);
+ if (writers)
+ schedule();
+
+ finish_wait(&sb->s_writers.wait, &wait);
+ } while (writers);
+}
+
/**
* freeze_super - lock the filesystem and force it into a consistent state
* @sb: the super to lock
@@ -1170,6 +1294,31 @@ out:
* Syncs the super to make sure the filesystem is consistent and calls the fs's
* freeze_fs. Subsequent calls to this without first thawing the fs will return
* -EBUSY.
+ *
+ * During this function, sb->s_writers.frozen goes through these values:
+ *
+ * SB_UNFROZEN: File system is normal, all writes progress as usual.
+ *
+ * SB_FREEZE_WRITE: The file system is in the process of being frozen. New
+ * writes should be blocked, though page faults are still allowed. We wait for
+ * all writes to complete and then proceed to the next stage.
+ *
+ * SB_FREEZE_PAGEFAULT: Freezing continues. Now also page faults are blocked
+ * but internal fs threads can still modify the filesystem (although they
+ * should not dirty new pages or inodes), writeback can run etc. After waiting
+ * for all running page faults we sync the filesystem which will clean all
+ * dirty pages and inodes (no new dirty pages or inodes can be created when
+ * sync is running).
+ *
+ * SB_FREEZE_FS: The file system is frozen. Now all internal sources of fs
+ * modification are blocked (e.g. XFS preallocation truncation on inode
+ * reclaim). This is usually implemented by blocking new transactions for
+ * filesystems that have them and need this additional guard. After all
+ * internal writers are finished we call ->freeze_fs() to finish filesystem
+ * freezing. Then we transition to SB_FREEZE_COMPLETE state. This state is
+ * mostly auxiliary for filesystems to verify they do not modify frozen fs.
+ *
+ * sb->s_writers.frozen is protected by sb->s_umount.
*/
int freeze_super(struct super_block *sb)
{
@@ -1177,7 +1326,7 @@ int freeze_super(struct super_block *sb)
atomic_inc(&sb->s_active);
down_write(&sb->s_umount);
- if (sb->s_frozen) {
+ if (sb->s_writers.frozen != SB_UNFROZEN) {
deactivate_locked_super(sb);
return -EBUSY;
}
@@ -1188,33 +1337,53 @@ int freeze_super(struct super_block *sb)
}
if (sb->s_flags & MS_RDONLY) {
- sb->s_frozen = SB_FREEZE_TRANS;
- smp_wmb();
+ /* Nothing to do really... */
+ sb->s_writers.frozen = SB_FREEZE_COMPLETE;
up_write(&sb->s_umount);
return 0;
}
- sb->s_frozen = SB_FREEZE_WRITE;
+ /* From now on, no new normal writers can start */
+ sb->s_writers.frozen = SB_FREEZE_WRITE;
smp_wmb();
+ /* Release s_umount to preserve sb_start_write -> s_umount ordering */
+ up_write(&sb->s_umount);
+
+ sb_wait_write(sb, SB_FREEZE_WRITE);
+
+ /* Now we go and block page faults... */
+ down_write(&sb->s_umount);
+ sb->s_writers.frozen = SB_FREEZE_PAGEFAULT;
+ smp_wmb();
+
+ sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
+
+ /* All writers are done so after syncing there won't be dirty data */
sync_filesystem(sb);
- sb->s_frozen = SB_FREEZE_TRANS;
+ /* Now wait for internal filesystem counter */
+ sb->s_writers.frozen = SB_FREEZE_FS;
smp_wmb();
+ sb_wait_write(sb, SB_FREEZE_FS);
- sync_blockdev(sb->s_bdev);
if (sb->s_op->freeze_fs) {
ret = sb->s_op->freeze_fs(sb);
if (ret) {
printk(KERN_ERR
"VFS:Filesystem freeze failed\n");
- sb->s_frozen = SB_UNFROZEN;
+ sb->s_writers.frozen = SB_UNFROZEN;
smp_wmb();
- wake_up(&sb->s_wait_unfrozen);
+ wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb);
return ret;
}
}
+ /*
+ * This is just for debugging purposes so that fs can warn if it
+ * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
+ */
+ sb->s_writers.frozen = SB_FREEZE_COMPLETE;
up_write(&sb->s_umount);
return 0;
}
@@ -1231,7 +1400,7 @@ int thaw_super(struct super_block *sb)
int error;
down_write(&sb->s_umount);
- if (sb->s_frozen == SB_UNFROZEN) {
+ if (sb->s_writers.frozen == SB_UNFROZEN) {
up_write(&sb->s_umount);
return -EINVAL;
}
@@ -1244,16 +1413,15 @@ int thaw_super(struct super_block *sb)
if (error) {
printk(KERN_ERR
"VFS:Filesystem thaw failed\n");
- sb->s_frozen = SB_FREEZE_TRANS;
up_write(&sb->s_umount);
return error;
}
}
out:
- sb->s_frozen = SB_UNFROZEN;
+ sb->s_writers.frozen = SB_UNFROZEN;
smp_wmb();
- wake_up(&sb->s_wait_unfrozen);
+ wake_up(&sb->s_writers.wait_unfrozen);
deactivate_locked_super(sb);
return 0;
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index a4759833d62d..614b2b544880 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -228,6 +228,8 @@ static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = 0;
if (bb->vm_ops->page_mkwrite)
ret = bb->vm_ops->page_mkwrite(vma, vmf);
+ else
+ file_update_time(file);
sysfs_put_active(attr_sd);
return ret;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 35389ca2d267..7bd6e72afd11 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -37,11 +37,11 @@
*
* A thing to keep in mind: inode @i_mutex is locked in most VFS operations we
* implement. However, this is not true for 'ubifs_writepage()', which may be
- * called with @i_mutex unlocked. For example, when pdflush is doing background
- * write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex. At "normal"
- * work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g. in the
- * "sys_write -> alloc_pages -> direct reclaim path". So, in 'ubifs_writepage()'
- * we are only guaranteed that the page is locked.
+ * called with @i_mutex unlocked. For example, when flusher thread is doing
+ * background write-back, it calls 'ubifs_writepage()' with unlocked @i_mutex.
+ * At "normal" work-paths the @i_mutex is locked in 'ubifs_writepage()', e.g.
+ * in the "sys_write -> alloc_pages -> direct reclaim path". So, in
+ * 'ubifs_writepage()' we are only guaranteed that the page is locked.
*
* Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
* read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 1c766c39c038..c3fa6c5327a3 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -303,7 +303,7 @@ static int ubifs_write_inode(struct inode *inode, struct writeback_control *wbc)
mutex_lock(&ui->ui_mutex);
/*
* Due to races between write-back forced by budgeting
- * (see 'sync_some_inodes()') and pdflush write-back, the inode may
+ * (see 'sync_some_inodes()') and background write-back, the inode may
* have already been synchronized, do not do this again. This might
* also happen if it was synchronized in an VFS operation, e.g.
* 'ubifs_link()'.
diff --git a/fs/xattr.c b/fs/xattr.c
index 1d7ac3790458..4d45b7189e7e 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -427,6 +427,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
{
ssize_t error;
void *kvalue = NULL;
+ void *vvalue = NULL;
char kname[XATTR_NAME_MAX + 1];
error = strncpy_from_user(kname, name, sizeof(kname));
@@ -438,9 +439,13 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
if (size) {
if (size > XATTR_SIZE_MAX)
size = XATTR_SIZE_MAX;
- kvalue = kzalloc(size, GFP_KERNEL);
- if (!kvalue)
- return -ENOMEM;
+ kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
+ if (!kvalue) {
+ vvalue = vmalloc(size);
+ if (!vvalue)
+ return -ENOMEM;
+ kvalue = vvalue;
+ }
}
error = vfs_getxattr(d, kname, kvalue, size);
@@ -452,7 +457,10 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
than XATTR_SIZE_MAX bytes. Not possible. */
error = -E2BIG;
}
- kfree(kvalue);
+ if (vvalue)
+ vfree(vvalue);
+ else
+ kfree(kvalue);
return error;
}
diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h
index a6caa0022c9b..359fb86ed876 100644
--- a/fs/xfs/xfs_alloc_btree.h
+++ b/fs/xfs/xfs_alloc_btree.h
@@ -51,20 +51,6 @@ typedef struct xfs_alloc_rec_incore {
typedef __be32 xfs_alloc_ptr_t;
/*
- * Minimum and maximum blocksize and sectorsize.
- * The blocksize upper limit is pretty much arbitrary.
- * The sectorsize upper limit is due to sizeof(sb_sectsize).
- */
-#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */
-#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */
-#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG)
-#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG)
-#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */
-#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */
-#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG)
-#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG)
-
-/*
* Block numbers in the AG:
* SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3.
*/
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8dad722c0041..e562dd43f41f 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -124,6 +124,12 @@ xfs_setfilesize_trans_alloc(
ioend->io_append_trans = tp;
/*
+ * We will pass freeze protection with a transaction. So tell lockdep
+ * we released it.
+ */
+ rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+ 1, _THIS_IP_);
+ /*
* We hand off the transaction to the completion thread now, so
* clear the flag here.
*/
@@ -179,7 +185,7 @@ xfs_finish_ioend(
if (atomic_dec_and_test(&ioend->io_remaining)) {
struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
- if (ioend->io_type == IO_UNWRITTEN)
+ if (ioend->io_type == XFS_IO_UNWRITTEN)
queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
else if (ioend->io_append_trans)
queue_work(mp->m_data_workqueue, &ioend->io_work);
@@ -199,6 +205,15 @@ xfs_end_io(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = 0;
+ if (ioend->io_append_trans) {
+ /*
+ * We've got freeze protection passed with the transaction.
+ * Tell lockdep about it.
+ */
+ rwsem_acquire_read(
+ &ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+ 0, 1, _THIS_IP_);
+ }
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
ioend->io_error = -EIO;
goto done;
@@ -210,7 +225,7 @@ xfs_end_io(
* For unwritten extents we need to issue transactions to convert a
* range to normal written extens after the data I/O has finished.
*/
- if (ioend->io_type == IO_UNWRITTEN) {
+ if (ioend->io_type == XFS_IO_UNWRITTEN) {
/*
* For buffered I/O we never preallocate a transaction when
* doing the unwritten extent conversion, but for direct I/O
@@ -312,7 +327,7 @@ xfs_map_blocks(
if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
- if (type == IO_UNWRITTEN)
+ if (type == XFS_IO_UNWRITTEN)
bmapi_flags |= XFS_BMAPI_IGSTATE;
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
@@ -323,10 +338,10 @@ xfs_map_blocks(
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
(ip->i_df.if_flags & XFS_IFEXTENTS));
- ASSERT(offset <= mp->m_maxioffset);
+ ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset + count > mp->m_maxioffset)
- count = mp->m_maxioffset - offset;
+ if (offset + count > mp->m_super->s_maxbytes)
+ count = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
@@ -336,7 +351,7 @@ xfs_map_blocks(
if (error)
return -XFS_ERROR(error);
- if (type == IO_DELALLOC &&
+ if (type == XFS_IO_DELALLOC &&
(!nimaps || isnullstartblock(imap->br_startblock))) {
error = xfs_iomap_write_allocate(ip, offset, count, imap);
if (!error)
@@ -345,7 +360,7 @@ xfs_map_blocks(
}
#ifdef DEBUG
- if (type == IO_UNWRITTEN) {
+ if (type == XFS_IO_UNWRITTEN) {
ASSERT(nimaps);
ASSERT(imap->br_startblock != HOLESTARTBLOCK);
ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
@@ -634,11 +649,11 @@ xfs_check_page_type(
bh = head = page_buffers(page);
do {
if (buffer_unwritten(bh))
- acceptable += (type == IO_UNWRITTEN);
+ acceptable += (type == XFS_IO_UNWRITTEN);
else if (buffer_delay(bh))
- acceptable += (type == IO_DELALLOC);
+ acceptable += (type == XFS_IO_DELALLOC);
else if (buffer_dirty(bh) && buffer_mapped(bh))
- acceptable += (type == IO_OVERWRITE);
+ acceptable += (type == XFS_IO_OVERWRITE);
else
break;
} while ((bh = bh->b_this_page) != head);
@@ -721,11 +736,11 @@ xfs_convert_page(
if (buffer_unwritten(bh) || buffer_delay(bh) ||
buffer_mapped(bh)) {
if (buffer_unwritten(bh))
- type = IO_UNWRITTEN;
+ type = XFS_IO_UNWRITTEN;
else if (buffer_delay(bh))
- type = IO_DELALLOC;
+ type = XFS_IO_DELALLOC;
else
- type = IO_OVERWRITE;
+ type = XFS_IO_OVERWRITE;
if (!xfs_imap_valid(inode, imap, offset)) {
done = 1;
@@ -733,7 +748,7 @@ xfs_convert_page(
}
lock_buffer(bh);
- if (type != IO_OVERWRITE)
+ if (type != XFS_IO_OVERWRITE)
xfs_map_at_offset(inode, bh, imap, offset);
xfs_add_to_ioend(inode, bh, offset, type,
ioendp, done);
@@ -831,7 +846,7 @@ xfs_aops_discard_page(
struct buffer_head *bh, *head;
loff_t offset = page_offset(page);
- if (!xfs_check_page_type(page, IO_DELALLOC))
+ if (!xfs_check_page_type(page, XFS_IO_DELALLOC))
goto out_invalidate;
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
@@ -927,11 +942,26 @@ xfs_vm_writepage(
end_index = offset >> PAGE_CACHE_SHIFT;
last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
if (page->index >= end_index) {
- if ((page->index >= end_index + 1) ||
- !(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
+ unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
+
+ /*
+ * Just skip the page if it is fully outside i_size, e.g. due
+ * to a truncate operation that is in progress.
+ */
+ if (page->index >= end_index + 1 || offset_into_page == 0) {
unlock_page(page);
return 0;
}
+
+ /*
+ * The page straddles i_size. It must be zeroed out on each
+ * and every writepage invocation because it may be mmapped.
+ * "A file is mapped in multiples of the page size. For a file
+ * that is not a multiple of the page size, the remaining
+ * memory is zeroed when mapped, and writes to that region are
+ * not written out to the file."
+ */
+ zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
}
end_offset = min_t(unsigned long long,
@@ -941,7 +971,7 @@ xfs_vm_writepage(
bh = head = page_buffers(page);
offset = page_offset(page);
- type = IO_OVERWRITE;
+ type = XFS_IO_OVERWRITE;
if (wbc->sync_mode == WB_SYNC_NONE)
nonblocking = 1;
@@ -966,18 +996,18 @@ xfs_vm_writepage(
}
if (buffer_unwritten(bh)) {
- if (type != IO_UNWRITTEN) {
- type = IO_UNWRITTEN;
+ if (type != XFS_IO_UNWRITTEN) {
+ type = XFS_IO_UNWRITTEN;
imap_valid = 0;
}
} else if (buffer_delay(bh)) {
- if (type != IO_DELALLOC) {
- type = IO_DELALLOC;
+ if (type != XFS_IO_DELALLOC) {
+ type = XFS_IO_DELALLOC;
imap_valid = 0;
}
} else if (buffer_uptodate(bh)) {
- if (type != IO_OVERWRITE) {
- type = IO_OVERWRITE;
+ if (type != XFS_IO_OVERWRITE) {
+ type = XFS_IO_OVERWRITE;
imap_valid = 0;
}
} else {
@@ -1013,7 +1043,7 @@ xfs_vm_writepage(
}
if (imap_valid) {
lock_buffer(bh);
- if (type != IO_OVERWRITE)
+ if (type != XFS_IO_OVERWRITE)
xfs_map_at_offset(inode, bh, &imap, offset);
xfs_add_to_ioend(inode, bh, offset, type, &ioend,
new_ioend);
@@ -1054,7 +1084,7 @@ xfs_vm_writepage(
* Reserve log space if we might write beyond the on-disk
* inode size.
*/
- if (ioend->io_type != IO_UNWRITTEN &&
+ if (ioend->io_type != XFS_IO_UNWRITTEN &&
xfs_ioend_is_append(ioend)) {
err = xfs_setfilesize_trans_alloc(ioend);
if (err)
@@ -1162,9 +1192,9 @@ __xfs_get_blocks(
lockmode = xfs_ilock_map_shared(ip);
}
- ASSERT(offset <= mp->m_maxioffset);
- if (offset + size > mp->m_maxioffset)
- size = mp->m_maxioffset - offset;
+ ASSERT(offset <= mp->m_super->s_maxbytes);
+ if (offset + size > mp->m_super->s_maxbytes)
+ size = mp->m_super->s_maxbytes - offset;
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -1351,7 +1381,7 @@ xfs_end_io_direct_write(
ioend->io_iocb = iocb;
ioend->io_result = ret;
if (private && size > 0)
- ioend->io_type = IO_UNWRITTEN;
+ ioend->io_type = XFS_IO_UNWRITTEN;
if (is_async) {
ioend->io_isasync = 1;
@@ -1383,7 +1413,7 @@ xfs_vm_direct_IO(
* and converts at least on unwritten extent we will cancel
* the still clean transaction after the I/O has finished.
*/
- iocb->private = ioend = xfs_alloc_ioend(inode, IO_DIRECT);
+ iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT);
if (offset + size > XFS_I(inode)->i_d.di_size) {
ret = xfs_setfilesize_trans_alloc(ioend);
if (ret)
@@ -1410,6 +1440,9 @@ out_trans_cancel:
if (ioend->io_append_trans) {
current_set_flags_nested(&ioend->io_append_trans->t_pflags,
PF_FSTRANS);
+ rwsem_acquire_read(
+ &inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1],
+ 0, 1, _THIS_IP_);
xfs_trans_cancel(ioend->io_append_trans, 0);
}
out_destroy_ioend:
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 84eafbcb0d9d..c325abb8d61a 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -24,17 +24,17 @@ extern mempool_t *xfs_ioend_pool;
* Types of I/O for bmap clustering and I/O completion tracking.
*/
enum {
- IO_DIRECT = 0, /* special case for direct I/O ioends */
- IO_DELALLOC, /* mapping covers delalloc region */
- IO_UNWRITTEN, /* mapping covers allocated but uninitialized data */
- IO_OVERWRITE, /* mapping covers already allocated extent */
+ XFS_IO_DIRECT = 0, /* special case for direct I/O ioends */
+ XFS_IO_DELALLOC, /* covers delalloc region */
+ XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
+ XFS_IO_OVERWRITE, /* covers already allocated extent */
};
#define XFS_IO_TYPES \
{ 0, "" }, \
- { IO_DELALLOC, "delalloc" }, \
- { IO_UNWRITTEN, "unwritten" }, \
- { IO_OVERWRITE, "overwrite" }
+ { XFS_IO_DELALLOC, "delalloc" }, \
+ { XFS_IO_UNWRITTEN, "unwritten" }, \
+ { XFS_IO_OVERWRITE, "overwrite" }
/*
* xfs_ioend struct manages large extent writes for XFS.
diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c
index a17ff01b5adf..0ca1f0be62d2 100644
--- a/fs/xfs/xfs_attr.c
+++ b/fs/xfs/xfs_attr.c
@@ -893,7 +893,7 @@ STATIC int
xfs_attr_leaf_addname(xfs_da_args_t *args)
{
xfs_inode_t *dp;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int retval, error, committed, forkoff;
trace_xfs_attr_leaf_addname(args);
@@ -915,11 +915,11 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
retval = xfs_attr_leaf_lookup_int(bp, args);
if ((args->flags & ATTR_REPLACE) && (retval == ENOATTR)) {
- xfs_da_brelse(args->trans, bp);
+ xfs_trans_brelse(args->trans, bp);
return(retval);
} else if (retval == EEXIST) {
if (args->flags & ATTR_CREATE) { /* pure create op */
- xfs_da_brelse(args->trans, bp);
+ xfs_trans_brelse(args->trans, bp);
return(retval);
}
@@ -937,7 +937,6 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
* if required.
*/
retval = xfs_attr_leaf_add(bp, args);
- xfs_da_buf_done(bp);
if (retval == ENOSPC) {
/*
* Promote the attribute list to the Btree format, then
@@ -1065,8 +1064,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
- } else
- xfs_da_buf_done(bp);
+ }
/*
* Commit the remove and start the next trans in series.
@@ -1092,7 +1090,7 @@ STATIC int
xfs_attr_leaf_removename(xfs_da_args_t *args)
{
xfs_inode_t *dp;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error, committed, forkoff;
trace_xfs_attr_leaf_removename(args);
@@ -1111,7 +1109,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
ASSERT(bp != NULL);
error = xfs_attr_leaf_lookup_int(bp, args);
if (error == ENOATTR) {
- xfs_da_brelse(args->trans, bp);
+ xfs_trans_brelse(args->trans, bp);
return(error);
}
@@ -1141,8 +1139,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
*/
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
- } else
- xfs_da_buf_done(bp);
+ }
return(0);
}
@@ -1155,7 +1152,7 @@ xfs_attr_leaf_removename(xfs_da_args_t *args)
STATIC int
xfs_attr_leaf_get(xfs_da_args_t *args)
{
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error;
args->blkno = 0;
@@ -1167,11 +1164,11 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
error = xfs_attr_leaf_lookup_int(bp, args);
if (error != EEXIST) {
- xfs_da_brelse(args->trans, bp);
+ xfs_trans_brelse(args->trans, bp);
return(error);
}
error = xfs_attr_leaf_getvalue(bp, args);
- xfs_da_brelse(args->trans, bp);
+ xfs_trans_brelse(args->trans, bp);
if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
error = xfs_attr_rmtval_get(args);
}
@@ -1186,23 +1183,23 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
{
xfs_attr_leafblock_t *leaf;
int error;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
context->cursor->blkno = 0;
error = xfs_da_read_buf(NULL, context->dp, 0, -1, &bp, XFS_ATTR_FORK);
if (error)
return XFS_ERROR(error);
ASSERT(bp != NULL);
- leaf = bp->data;
+ leaf = bp->b_addr;
if (unlikely(leaf->hdr.info.magic != cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_leaf_list", XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return XFS_ERROR(EFSCORRUPTED);
}
error = xfs_attr_leaf_list_int(bp, context);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return XFS_ERROR(error);
}
@@ -1489,7 +1486,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
xfs_da_state_t *state;
xfs_da_state_blk_t *blk;
xfs_inode_t *dp;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int retval, error, committed, forkoff;
trace_xfs_attr_node_removename(args);
@@ -1601,14 +1598,13 @@ xfs_attr_node_removename(xfs_da_args_t *args)
*/
ASSERT(state->path.active == 1);
ASSERT(state->path.blk[0].bp);
- xfs_da_buf_done(state->path.blk[0].bp);
state->path.blk[0].bp = NULL;
error = xfs_da_read_buf(args->trans, args->dp, 0, -1, &bp,
XFS_ATTR_FORK);
if (error)
goto out;
- ASSERT((((xfs_attr_leafblock_t *)bp->data)->hdr.info.magic) ==
+ ASSERT((((xfs_attr_leafblock_t *)bp->b_addr)->hdr.info.magic) ==
cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if ((forkoff = xfs_attr_shortform_allfit(bp, dp))) {
@@ -1635,7 +1631,7 @@ xfs_attr_node_removename(xfs_da_args_t *args)
if (committed)
xfs_trans_ijoin(args->trans, dp, 0);
} else
- xfs_da_brelse(args->trans, bp);
+ xfs_trans_brelse(args->trans, bp);
}
error = 0;
@@ -1665,8 +1661,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->bp) {
- blk->disk_blkno = xfs_da_blkno(blk->bp);
- xfs_da_buf_done(blk->bp);
+ blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
blk->bp = NULL;
} else {
blk->disk_blkno = 0;
@@ -1681,8 +1676,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->bp) {
- blk->disk_blkno = xfs_da_blkno(blk->bp);
- xfs_da_buf_done(blk->bp);
+ blk->disk_blkno = XFS_BUF_ADDR(blk->bp);
blk->bp = NULL;
} else {
blk->disk_blkno = 0;
@@ -1792,7 +1786,7 @@ xfs_attr_node_get(xfs_da_args_t *args)
* If not in a transaction, we have to release all the buffers.
*/
for (i = 0; i < state->path.active; i++) {
- xfs_da_brelse(args->trans, state->path.blk[i].bp);
+ xfs_trans_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
@@ -1808,7 +1802,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
xfs_da_intnode_t *node;
xfs_da_node_entry_t *btree;
int error, i;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
cursor = context->cursor;
cursor->initted = 1;
@@ -1825,30 +1819,30 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
if ((error != 0) && (error != EFSCORRUPTED))
return(error);
if (bp) {
- node = bp->data;
+ node = bp->b_addr;
switch (be16_to_cpu(node->hdr.info.magic)) {
case XFS_DA_NODE_MAGIC:
trace_xfs_attr_list_wrong_blk(context);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
bp = NULL;
break;
case XFS_ATTR_LEAF_MAGIC:
- leaf = bp->data;
+ leaf = bp->b_addr;
if (cursor->hashval > be32_to_cpu(leaf->entries[
be16_to_cpu(leaf->hdr.count)-1].hashval)) {
trace_xfs_attr_list_wrong_blk(context);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
bp = NULL;
} else if (cursor->hashval <=
be32_to_cpu(leaf->entries[0].hashval)) {
trace_xfs_attr_list_wrong_blk(context);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
bp = NULL;
}
break;
default:
trace_xfs_attr_list_wrong_blk(context);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
bp = NULL;
}
}
@@ -1873,7 +1867,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
context->dp->i_mount);
return(XFS_ERROR(EFSCORRUPTED));
}
- node = bp->data;
+ node = bp->b_addr;
if (node->hdr.info.magic ==
cpu_to_be16(XFS_ATTR_LEAF_MAGIC))
break;
@@ -1883,7 +1877,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
XFS_ERRLEVEL_LOW,
context->dp->i_mount,
node);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return(XFS_ERROR(EFSCORRUPTED));
}
btree = node->btree;
@@ -1898,10 +1892,10 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
}
}
if (i == be16_to_cpu(node->hdr.count)) {
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return(0);
}
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
}
}
ASSERT(bp != NULL);
@@ -1912,24 +1906,24 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
* adding the information.
*/
for (;;) {
- leaf = bp->data;
+ leaf = bp->b_addr;
if (unlikely(leaf->hdr.info.magic !=
cpu_to_be16(XFS_ATTR_LEAF_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_attr_node_list(4)",
XFS_ERRLEVEL_LOW,
context->dp->i_mount, leaf);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return(XFS_ERROR(EFSCORRUPTED));
}
error = xfs_attr_leaf_list_int(bp, context);
if (error) {
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return error;
}
if (context->seen_enough || leaf->hdr.info.forw == 0)
break;
cursor->blkno = be32_to_cpu(leaf->hdr.info.forw);
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
error = xfs_da_read_buf(NULL, context->dp, cursor->blkno, -1,
&bp, XFS_ATTR_FORK);
if (error)
@@ -1941,7 +1935,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)
return(XFS_ERROR(EFSCORRUPTED));
}
}
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return(0);
}
diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c
index 7d89d800f517..d330111ca738 100644
--- a/fs/xfs/xfs_attr_leaf.c
+++ b/fs/xfs/xfs_attr_leaf.c
@@ -54,10 +54,10 @@
* Routines used for growing the Btree.
*/
STATIC int xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t which_block,
- xfs_dabuf_t **bpp);
-STATIC int xfs_attr_leaf_add_work(xfs_dabuf_t *leaf_buffer, xfs_da_args_t *args,
- int freemap_index);
-STATIC void xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *leaf_buffer);
+ struct xfs_buf **bpp);
+STATIC int xfs_attr_leaf_add_work(struct xfs_buf *leaf_buffer,
+ xfs_da_args_t *args, int freemap_index);
+STATIC void xfs_attr_leaf_compact(xfs_trans_t *tp, struct xfs_buf *leaf_buffer);
STATIC void xfs_attr_leaf_rebalance(xfs_da_state_t *state,
xfs_da_state_blk_t *blk1,
xfs_da_state_blk_t *blk2);
@@ -71,9 +71,9 @@ STATIC int xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
* Routines used for shrinking the Btree.
*/
STATIC int xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
- xfs_dabuf_t *bp, int level);
+ struct xfs_buf *bp, int level);
STATIC int xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp,
- xfs_dabuf_t *bp);
+ struct xfs_buf *bp);
STATIC int xfs_attr_leaf_freextent(xfs_trans_t **trans, xfs_inode_t *dp,
xfs_dablk_t blkno, int blkcnt);
@@ -480,7 +480,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
char *tmpbuffer;
int error, i, size;
xfs_dablk_t blkno;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
xfs_ifork_t *ifp;
trace_xfs_attr_sf_to_leaf(args);
@@ -550,8 +550,6 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
error = 0;
out:
- if(bp)
- xfs_da_buf_done(bp);
kmem_free(tmpbuffer);
return(error);
}
@@ -737,14 +735,16 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
* a shortform attribute list.
*/
int
-xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
+xfs_attr_shortform_allfit(
+ struct xfs_buf *bp,
+ struct xfs_inode *dp)
{
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_entry_t *entry;
xfs_attr_leaf_name_local_t *name_loc;
int bytes, i;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
entry = &leaf->entries[0];
@@ -774,7 +774,10 @@ xfs_attr_shortform_allfit(xfs_dabuf_t *bp, xfs_inode_t *dp)
* Convert a leaf attribute list to shortform attribute list
*/
int
-xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
+xfs_attr_leaf_to_shortform(
+ struct xfs_buf *bp,
+ xfs_da_args_t *args,
+ int forkoff)
{
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_entry_t *entry;
@@ -791,10 +794,10 @@ xfs_attr_leaf_to_shortform(xfs_dabuf_t *bp, xfs_da_args_t *args, int forkoff)
ASSERT(tmpbuffer != NULL);
ASSERT(bp != NULL);
- memcpy(tmpbuffer, bp->data, XFS_LBSIZE(dp->i_mount));
+ memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount));
leaf = (xfs_attr_leafblock_t *)tmpbuffer;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
- memset(bp->data, 0, XFS_LBSIZE(dp->i_mount));
+ memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount));
/*
* Clean out the prior contents of the attribute list.
@@ -855,7 +858,7 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
xfs_attr_leafblock_t *leaf;
xfs_da_intnode_t *node;
xfs_inode_t *dp;
- xfs_dabuf_t *bp1, *bp2;
+ struct xfs_buf *bp1, *bp2;
xfs_dablk_t blkno;
int error;
@@ -877,10 +880,9 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
if (error)
goto out;
ASSERT(bp2 != NULL);
- memcpy(bp2->data, bp1->data, XFS_LBSIZE(dp->i_mount));
- xfs_da_buf_done(bp1);
+ memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(dp->i_mount));
bp1 = NULL;
- xfs_da_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
+ xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(dp->i_mount) - 1);
/*
* Set up the new root node.
@@ -888,21 +890,17 @@ xfs_attr_leaf_to_node(xfs_da_args_t *args)
error = xfs_da_node_create(args, 0, 1, &bp1, XFS_ATTR_FORK);
if (error)
goto out;
- node = bp1->data;
- leaf = bp2->data;
+ node = bp1->b_addr;
+ leaf = bp2->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
/* both on-disk, don't endian-flip twice */
node->btree[0].hashval =
leaf->entries[be16_to_cpu(leaf->hdr.count)-1 ].hashval;
node->btree[0].before = cpu_to_be32(blkno);
node->hdr.count = cpu_to_be16(1);
- xfs_da_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
+ xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(dp->i_mount) - 1);
error = 0;
out:
- if (bp1)
- xfs_da_buf_done(bp1);
- if (bp2)
- xfs_da_buf_done(bp2);
return(error);
}
@@ -916,12 +914,15 @@ out:
* or a leaf in a node attribute list.
*/
STATIC int
-xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
+xfs_attr_leaf_create(
+ xfs_da_args_t *args,
+ xfs_dablk_t blkno,
+ struct xfs_buf **bpp)
{
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_hdr_t *hdr;
xfs_inode_t *dp;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error;
trace_xfs_attr_leaf_create(args);
@@ -933,7 +934,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
if (error)
return(error);
ASSERT(bp != NULL);
- leaf = bp->data;
+ leaf = bp->b_addr;
memset((char *)leaf, 0, XFS_LBSIZE(dp->i_mount));
hdr = &leaf->hdr;
hdr->info.magic = cpu_to_be16(XFS_ATTR_LEAF_MAGIC);
@@ -947,7 +948,7 @@ xfs_attr_leaf_create(xfs_da_args_t *args, xfs_dablk_t blkno, xfs_dabuf_t **bpp)
hdr->freemap[0].size = cpu_to_be16(be16_to_cpu(hdr->firstused) -
sizeof(xfs_attr_leaf_hdr_t));
- xfs_da_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
+ xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(dp->i_mount) - 1);
*bpp = bp;
return(0);
@@ -1014,7 +1015,9 @@ xfs_attr_leaf_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
* Add a name to the leaf attribute list structure.
*/
int
-xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_add(
+ struct xfs_buf *bp,
+ struct xfs_da_args *args)
{
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_hdr_t *hdr;
@@ -1023,7 +1026,7 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
trace_xfs_attr_leaf_add(args);
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT((args->index >= 0)
&& (args->index <= be16_to_cpu(leaf->hdr.count)));
@@ -1085,7 +1088,10 @@ xfs_attr_leaf_add(xfs_dabuf_t *bp, xfs_da_args_t *args)
* Add a name to a leaf attribute list structure.
*/
STATIC int
-xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
+xfs_attr_leaf_add_work(
+ struct xfs_buf *bp,
+ xfs_da_args_t *args,
+ int mapindex)
{
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_hdr_t *hdr;
@@ -1096,7 +1102,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
xfs_mount_t *mp;
int tmp, i;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr = &leaf->hdr;
ASSERT((mapindex >= 0) && (mapindex < XFS_ATTR_LEAF_MAPSIZE));
@@ -1110,7 +1116,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
tmp = be16_to_cpu(hdr->count) - args->index;
tmp *= sizeof(xfs_attr_leaf_entry_t);
memmove((char *)(entry+1), (char *)entry, tmp);
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
}
be16_add_cpu(&hdr->count, 1);
@@ -1142,7 +1148,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
args->index2++;
}
}
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
ASSERT((args->index == 0) ||
(be32_to_cpu(entry->hashval) >= be32_to_cpu((entry-1)->hashval)));
@@ -1174,7 +1180,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
args->rmtblkno = 1;
args->rmtblkcnt = XFS_B_TO_FSB(mp, args->valuelen);
}
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index),
xfs_attr_leaf_entsize(leaf, args->index)));
@@ -1198,7 +1204,7 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
}
}
be16_add_cpu(&hdr->usedbytes, xfs_attr_leaf_entsize(leaf, args->index));
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
return(0);
}
@@ -1207,7 +1213,9 @@ xfs_attr_leaf_add_work(xfs_dabuf_t *bp, xfs_da_args_t *args, int mapindex)
* Garbage collect a leaf attribute list block by copying it to a new buffer.
*/
STATIC void
-xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
+xfs_attr_leaf_compact(
+ struct xfs_trans *trans,
+ struct xfs_buf *bp)
{
xfs_attr_leafblock_t *leaf_s, *leaf_d;
xfs_attr_leaf_hdr_t *hdr_s, *hdr_d;
@@ -1217,14 +1225,14 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
mp = trans->t_mountp;
tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP);
ASSERT(tmpbuffer != NULL);
- memcpy(tmpbuffer, bp->data, XFS_LBSIZE(mp));
- memset(bp->data, 0, XFS_LBSIZE(mp));
+ memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp));
+ memset(bp->b_addr, 0, XFS_LBSIZE(mp));
/*
* Copy basic information
*/
leaf_s = (xfs_attr_leafblock_t *)tmpbuffer;
- leaf_d = bp->data;
+ leaf_d = bp->b_addr;
hdr_s = &leaf_s->hdr;
hdr_d = &leaf_d->hdr;
hdr_d->info = hdr_s->info; /* struct copy */
@@ -1247,7 +1255,7 @@ xfs_attr_leaf_compact(xfs_trans_t *trans, xfs_dabuf_t *bp)
*/
xfs_attr_leaf_moveents(leaf_s, 0, leaf_d, 0,
be16_to_cpu(hdr_s->count), mp);
- xfs_da_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
+ xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1);
kmem_free(tmpbuffer);
}
@@ -1279,8 +1287,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
*/
ASSERT(blk1->magic == XFS_ATTR_LEAF_MAGIC);
ASSERT(blk2->magic == XFS_ATTR_LEAF_MAGIC);
- leaf1 = blk1->bp->data;
- leaf2 = blk2->bp->data;
+ leaf1 = blk1->bp->b_addr;
+ leaf2 = blk2->bp->b_addr;
ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
args = state->args;
@@ -1298,8 +1306,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
tmp_blk = blk1;
blk1 = blk2;
blk2 = tmp_blk;
- leaf1 = blk1->bp->data;
- leaf2 = blk2->bp->data;
+ leaf1 = blk1->bp->b_addr;
+ leaf2 = blk2->bp->b_addr;
swap = 1;
}
hdr1 = &leaf1->hdr;
@@ -1346,8 +1354,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
xfs_attr_leaf_moveents(leaf1, be16_to_cpu(hdr1->count) - count,
leaf2, 0, count, state->mp);
- xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
- xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+ xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+ xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
} else if (count > be16_to_cpu(hdr1->count)) {
/*
* I assert that since all callers pass in an empty
@@ -1378,8 +1386,8 @@ xfs_attr_leaf_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
xfs_attr_leaf_moveents(leaf2, 0, leaf1,
be16_to_cpu(hdr1->count), count, state->mp);
- xfs_da_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
- xfs_da_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
+ xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1);
+ xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1);
}
/*
@@ -1448,8 +1456,8 @@ xfs_attr_leaf_figure_balance(xfs_da_state_t *state,
/*
* Set up environment.
*/
- leaf1 = blk1->bp->data;
- leaf2 = blk2->bp->data;
+ leaf1 = blk1->bp->b_addr;
+ leaf2 = blk2->bp->b_addr;
hdr1 = &leaf1->hdr;
hdr2 = &leaf2->hdr;
foundit = 0;
@@ -1551,7 +1559,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
xfs_da_blkinfo_t *info;
int count, bytes, forward, error, retval, i;
xfs_dablk_t blkno;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
/*
* Check for the degenerate case of the block being over 50% full.
@@ -1559,7 +1567,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
* to coalesce with a sibling.
*/
blk = &state->path.blk[ state->path.active-1 ];
- info = blk->bp->data;
+ info = blk->bp->b_addr;
ASSERT(info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
leaf = (xfs_attr_leafblock_t *)info;
count = be16_to_cpu(leaf->hdr.count);
@@ -1622,13 +1630,13 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
count = be16_to_cpu(leaf->hdr.count);
bytes = state->blocksize - (state->blocksize>>2);
bytes -= be16_to_cpu(leaf->hdr.usedbytes);
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
count += be16_to_cpu(leaf->hdr.count);
bytes -= be16_to_cpu(leaf->hdr.usedbytes);
bytes -= count * sizeof(xfs_attr_leaf_entry_t);
bytes -= sizeof(xfs_attr_leaf_hdr_t);
- xfs_da_brelse(state->args->trans, bp);
+ xfs_trans_brelse(state->args->trans, bp);
if (bytes >= 0)
break; /* fits with at least 25% to spare */
}
@@ -1666,7 +1674,9 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
* If two leaves are 37% full, when combined they will leave 25% free.
*/
int
-xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_remove(
+ struct xfs_buf *bp,
+ xfs_da_args_t *args)
{
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_hdr_t *hdr;
@@ -1676,7 +1686,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
int tablesize, tmp, i;
xfs_mount_t *mp;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
hdr = &leaf->hdr;
mp = args->trans->t_mountp;
@@ -1769,7 +1779,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
*/
memset(xfs_attr_leaf_name(leaf, args->index), 0, entsize);
be16_add_cpu(&hdr->usedbytes, -entsize);
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, xfs_attr_leaf_name(leaf, args->index),
entsize));
@@ -1777,7 +1787,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
* sizeof(xfs_attr_leaf_entry_t);
memmove((char *)entry, (char *)(entry+1), tmp);
be16_add_cpu(&hdr->count, -1);
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, entry, tmp + sizeof(*entry)));
entry = &leaf->entries[be16_to_cpu(hdr->count)];
memset((char *)entry, 0, sizeof(xfs_attr_leaf_entry_t));
@@ -1807,7 +1817,7 @@ xfs_attr_leaf_remove(xfs_dabuf_t *bp, xfs_da_args_t *args)
} else {
hdr->holes = 1; /* mark as needing compaction */
}
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, hdr, sizeof(*hdr)));
/*
@@ -1840,8 +1850,8 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
mp = state->mp;
ASSERT(drop_blk->magic == XFS_ATTR_LEAF_MAGIC);
ASSERT(save_blk->magic == XFS_ATTR_LEAF_MAGIC);
- drop_leaf = drop_blk->bp->data;
- save_leaf = save_blk->bp->data;
+ drop_leaf = drop_blk->bp->b_addr;
+ save_leaf = save_blk->bp->b_addr;
ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
drop_hdr = &drop_leaf->hdr;
@@ -1906,7 +1916,7 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
kmem_free(tmpbuffer);
}
- xfs_da_log_buf(state->args->trans, save_blk->bp, 0,
+ xfs_trans_log_buf(state->args->trans, save_blk->bp, 0,
state->blocksize - 1);
/*
@@ -1934,7 +1944,9 @@ xfs_attr_leaf_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
* Don't change the args->value unless we find the attribute.
*/
int
-xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_lookup_int(
+ struct xfs_buf *bp,
+ xfs_da_args_t *args)
{
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_entry_t *entry;
@@ -1945,7 +1957,7 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
trace_xfs_attr_leaf_lookup(args);
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count)
< (XFS_LBSIZE(args->dp->i_mount)/8));
@@ -2041,7 +2053,9 @@ xfs_attr_leaf_lookup_int(xfs_dabuf_t *bp, xfs_da_args_t *args)
* list structure.
*/
int
-xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
+xfs_attr_leaf_getvalue(
+ struct xfs_buf *bp,
+ xfs_da_args_t *args)
{
int valuelen;
xfs_attr_leafblock_t *leaf;
@@ -2049,7 +2063,7 @@ xfs_attr_leaf_getvalue(xfs_dabuf_t *bp, xfs_da_args_t *args)
xfs_attr_leaf_name_local_t *name_loc;
xfs_attr_leaf_name_remote_t *name_rmt;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count)
< (XFS_LBSIZE(args->dp->i_mount)/8));
@@ -2247,12 +2261,14 @@ xfs_attr_leaf_moveents(xfs_attr_leafblock_t *leaf_s, int start_s,
* Return 0 unless leaf2 should go before leaf1.
*/
int
-xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
+xfs_attr_leaf_order(
+ struct xfs_buf *leaf1_bp,
+ struct xfs_buf *leaf2_bp)
{
xfs_attr_leafblock_t *leaf1, *leaf2;
- leaf1 = leaf1_bp->data;
- leaf2 = leaf2_bp->data;
+ leaf1 = leaf1_bp->b_addr;
+ leaf2 = leaf2_bp->b_addr;
ASSERT((leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) &&
(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)));
if ((be16_to_cpu(leaf1->hdr.count) > 0) &&
@@ -2272,11 +2288,13 @@ xfs_attr_leaf_order(xfs_dabuf_t *leaf1_bp, xfs_dabuf_t *leaf2_bp)
* Pick up the last hashvalue from a leaf block.
*/
xfs_dahash_t
-xfs_attr_leaf_lasthash(xfs_dabuf_t *bp, int *count)
+xfs_attr_leaf_lasthash(
+ struct xfs_buf *bp,
+ int *count)
{
xfs_attr_leafblock_t *leaf;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
if (count)
*count = be16_to_cpu(leaf->hdr.count);
@@ -2337,7 +2355,9 @@ xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local)
* Copy out attribute list entries for attr_list(), for leaf attribute lists.
*/
int
-xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
+xfs_attr_leaf_list_int(
+ struct xfs_buf *bp,
+ xfs_attr_list_context_t *context)
{
attrlist_cursor_kern_t *cursor;
xfs_attr_leafblock_t *leaf;
@@ -2345,7 +2365,7 @@ xfs_attr_leaf_list_int(xfs_dabuf_t *bp, xfs_attr_list_context_t *context)
int retval, i;
ASSERT(bp != NULL);
- leaf = bp->data;
+ leaf = bp->b_addr;
cursor = context->cursor;
cursor->initted = 1;
@@ -2463,7 +2483,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_entry_t *entry;
xfs_attr_leaf_name_remote_t *name_rmt;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error;
#ifdef DEBUG
xfs_attr_leaf_name_local_t *name_loc;
@@ -2482,7 +2502,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
}
ASSERT(bp != NULL);
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
ASSERT(args->index >= 0);
@@ -2505,7 +2525,7 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
#endif /* DEBUG */
entry->flags &= ~XFS_ATTR_INCOMPLETE;
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
if (args->rmtblkno) {
@@ -2513,10 +2533,9 @@ xfs_attr_leaf_clearflag(xfs_da_args_t *args)
name_rmt = xfs_attr_leaf_name_remote(leaf, args->index);
name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
name_rmt->valuelen = cpu_to_be32(args->valuelen);
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
}
- xfs_da_buf_done(bp);
/*
* Commit the flag value change and start the next trans in series.
@@ -2533,7 +2552,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_entry_t *entry;
xfs_attr_leaf_name_remote_t *name_rmt;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error;
trace_xfs_attr_leaf_setflag(args);
@@ -2548,7 +2567,7 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
}
ASSERT(bp != NULL);
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf->hdr.count));
ASSERT(args->index >= 0);
@@ -2556,16 +2575,15 @@ xfs_attr_leaf_setflag(xfs_da_args_t *args)
ASSERT((entry->flags & XFS_ATTR_INCOMPLETE) == 0);
entry->flags |= XFS_ATTR_INCOMPLETE;
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, entry, sizeof(*entry)));
if ((entry->flags & XFS_ATTR_LOCAL) == 0) {
name_rmt = xfs_attr_leaf_name_remote(leaf, args->index);
name_rmt->valueblk = 0;
name_rmt->valuelen = 0;
- xfs_da_log_buf(args->trans, bp,
+ xfs_trans_log_buf(args->trans, bp,
XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));
}
- xfs_da_buf_done(bp);
/*
* Commit the flag value change and start the next trans in series.
@@ -2586,7 +2604,7 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
xfs_attr_leafblock_t *leaf1, *leaf2;
xfs_attr_leaf_entry_t *entry1, *entry2;
xfs_attr_leaf_name_remote_t *name_rmt;
- xfs_dabuf_t *bp1, *bp2;
+ struct xfs_buf *bp1, *bp2;
int error;
#ifdef DEBUG
xfs_attr_leaf_name_local_t *name_loc;
@@ -2620,13 +2638,13 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
bp2 = bp1;
}
- leaf1 = bp1->data;
+ leaf1 = bp1->b_addr;
ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index < be16_to_cpu(leaf1->hdr.count));
ASSERT(args->index >= 0);
entry1 = &leaf1->entries[ args->index ];
- leaf2 = bp2->data;
+ leaf2 = bp2->b_addr;
ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
ASSERT(args->index2 < be16_to_cpu(leaf2->hdr.count));
ASSERT(args->index2 >= 0);
@@ -2660,30 +2678,27 @@ xfs_attr_leaf_flipflags(xfs_da_args_t *args)
ASSERT((entry2->flags & XFS_ATTR_INCOMPLETE) == 0);
entry1->flags &= ~XFS_ATTR_INCOMPLETE;
- xfs_da_log_buf(args->trans, bp1,
+ xfs_trans_log_buf(args->trans, bp1,
XFS_DA_LOGRANGE(leaf1, entry1, sizeof(*entry1)));
if (args->rmtblkno) {
ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);
name_rmt = xfs_attr_leaf_name_remote(leaf1, args->index);
name_rmt->valueblk = cpu_to_be32(args->rmtblkno);
name_rmt->valuelen = cpu_to_be32(args->valuelen);
- xfs_da_log_buf(args->trans, bp1,
+ xfs_trans_log_buf(args->trans, bp1,
XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));
}
entry2->flags |= XFS_ATTR_INCOMPLETE;
- xfs_da_log_buf(args->trans, bp2,
+ xfs_trans_log_buf(args->trans, bp2,
XFS_DA_LOGRANGE(leaf2, entry2, sizeof(*entry2)));
if ((entry2->flags & XFS_ATTR_LOCAL) == 0) {
name_rmt = xfs_attr_leaf_name_remote(leaf2, args->index2);
name_rmt->valueblk = 0;
name_rmt->valuelen = 0;
- xfs_da_log_buf(args->trans, bp2,
+ xfs_trans_log_buf(args->trans, bp2,
XFS_DA_LOGRANGE(leaf2, name_rmt, sizeof(*name_rmt)));
}
- xfs_da_buf_done(bp1);
- if (bp1 != bp2)
- xfs_da_buf_done(bp2);
/*
* Commit the flag value change and start the next trans in series.
@@ -2706,7 +2721,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
{
xfs_da_blkinfo_t *info;
xfs_daddr_t blkno;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error;
/*
@@ -2718,20 +2733,20 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
error = xfs_da_read_buf(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
if (error)
return(error);
- blkno = xfs_da_blkno(bp);
+ blkno = XFS_BUF_ADDR(bp);
/*
* Invalidate the tree, even if the "tree" is only a single leaf block.
* This is a depth-first traversal!
*/
- info = bp->data;
+ info = bp->b_addr;
if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
error = xfs_attr_node_inactive(trans, dp, bp, 1);
} else if (info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)) {
error = xfs_attr_leaf_inactive(trans, dp, bp);
} else {
error = XFS_ERROR(EIO);
- xfs_da_brelse(*trans, bp);
+ xfs_trans_brelse(*trans, bp);
}
if (error)
return(error);
@@ -2742,7 +2757,7 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
if (error)
return(error);
- xfs_da_binval(*trans, bp); /* remove from cache */
+ xfs_trans_binval(*trans, bp); /* remove from cache */
/*
* Commit the invalidate and start the next transaction.
*/
@@ -2756,34 +2771,37 @@ xfs_attr_root_inactive(xfs_trans_t **trans, xfs_inode_t *dp)
* We're doing a depth-first traversal in order to invalidate everything.
*/
STATIC int
-xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
- int level)
+xfs_attr_node_inactive(
+ struct xfs_trans **trans,
+ struct xfs_inode *dp,
+ struct xfs_buf *bp,
+ int level)
{
xfs_da_blkinfo_t *info;
xfs_da_intnode_t *node;
xfs_dablk_t child_fsb;
xfs_daddr_t parent_blkno, child_blkno;
int error, count, i;
- xfs_dabuf_t *child_bp;
+ struct xfs_buf *child_bp;
/*
* Since this code is recursive (gasp!) we must protect ourselves.
*/
if (level > XFS_DA_NODE_MAXDEPTH) {
- xfs_da_brelse(*trans, bp); /* no locks for later trans */
+ xfs_trans_brelse(*trans, bp); /* no locks for later trans */
return(XFS_ERROR(EIO));
}
- node = bp->data;
+ node = bp->b_addr;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
- parent_blkno = xfs_da_blkno(bp); /* save for re-read later */
+ parent_blkno = XFS_BUF_ADDR(bp); /* save for re-read later */
count = be16_to_cpu(node->hdr.count);
if (!count) {
- xfs_da_brelse(*trans, bp);
+ xfs_trans_brelse(*trans, bp);
return(0);
}
child_fsb = be32_to_cpu(node->btree[0].before);
- xfs_da_brelse(*trans, bp); /* no locks for later trans */
+ xfs_trans_brelse(*trans, bp); /* no locks for later trans */
/*
* If this is the node level just above the leaves, simply loop
@@ -2803,12 +2821,12 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
return(error);
if (child_bp) {
/* save for re-read later */
- child_blkno = xfs_da_blkno(child_bp);
+ child_blkno = XFS_BUF_ADDR(child_bp);
/*
* Invalidate the subtree, however we have to.
*/
- info = child_bp->data;
+ info = child_bp->b_addr;
if (info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
error = xfs_attr_node_inactive(trans, dp,
child_bp, level+1);
@@ -2817,7 +2835,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
child_bp);
} else {
error = XFS_ERROR(EIO);
- xfs_da_brelse(*trans, child_bp);
+ xfs_trans_brelse(*trans, child_bp);
}
if (error)
return(error);
@@ -2830,7 +2848,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
&child_bp, XFS_ATTR_FORK);
if (error)
return(error);
- xfs_da_binval(*trans, child_bp);
+ xfs_trans_binval(*trans, child_bp);
}
/*
@@ -2843,7 +2861,7 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
if (error)
return(error);
child_fsb = be32_to_cpu(node->btree[i+1].before);
- xfs_da_brelse(*trans, bp);
+ xfs_trans_brelse(*trans, bp);
}
/*
* Atomically commit the whole invalidate stuff.
@@ -2863,7 +2881,10 @@ xfs_attr_node_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp,
* caught holding something that the logging code wants to flush to disk.
*/
STATIC int
-xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
+xfs_attr_leaf_inactive(
+ struct xfs_trans **trans,
+ struct xfs_inode *dp,
+ struct xfs_buf *bp)
{
xfs_attr_leafblock_t *leaf;
xfs_attr_leaf_entry_t *entry;
@@ -2871,7 +2892,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
xfs_attr_inactive_list_t *list, *lp;
int error, count, size, tmp, i;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
/*
@@ -2892,7 +2913,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
* If there are no "remote" values, we're done.
*/
if (count == 0) {
- xfs_da_brelse(*trans, bp);
+ xfs_trans_brelse(*trans, bp);
return(0);
}
@@ -2919,7 +2940,7 @@ xfs_attr_leaf_inactive(xfs_trans_t **trans, xfs_inode_t *dp, xfs_dabuf_t *bp)
}
}
}
- xfs_da_brelse(*trans, bp); /* unlock for trans. in freextent() */
+ xfs_trans_brelse(*trans, bp); /* unlock for trans. in freextent() */
/*
* Invalidate each of the "remote" value extents.
diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h
index 9c7d22fdcf4d..dea17722945e 100644
--- a/fs/xfs/xfs_attr_leaf.h
+++ b/fs/xfs/xfs_attr_leaf.h
@@ -31,7 +31,6 @@
struct attrlist;
struct attrlist_cursor_kern;
struct xfs_attr_list_context;
-struct xfs_dabuf;
struct xfs_da_args;
struct xfs_da_state;
struct xfs_da_state_blk;
@@ -215,7 +214,7 @@ int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
int xfs_attr_shortform_remove(struct xfs_da_args *args);
int xfs_attr_shortform_list(struct xfs_attr_list_context *context);
-int xfs_attr_shortform_allfit(struct xfs_dabuf *bp, struct xfs_inode *dp);
+int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
@@ -223,7 +222,7 @@ int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes);
* Internal routines when attribute fork size == XFS_LBSIZE(mp).
*/
int xfs_attr_leaf_to_node(struct xfs_da_args *args);
-int xfs_attr_leaf_to_shortform(struct xfs_dabuf *bp,
+int xfs_attr_leaf_to_shortform(struct xfs_buf *bp,
struct xfs_da_args *args, int forkoff);
int xfs_attr_leaf_clearflag(struct xfs_da_args *args);
int xfs_attr_leaf_setflag(struct xfs_da_args *args);
@@ -235,14 +234,14 @@ int xfs_attr_leaf_flipflags(xfs_da_args_t *args);
int xfs_attr_leaf_split(struct xfs_da_state *state,
struct xfs_da_state_blk *oldblk,
struct xfs_da_state_blk *newblk);
-int xfs_attr_leaf_lookup_int(struct xfs_dabuf *leaf,
+int xfs_attr_leaf_lookup_int(struct xfs_buf *leaf,
struct xfs_da_args *args);
-int xfs_attr_leaf_getvalue(struct xfs_dabuf *bp, struct xfs_da_args *args);
-int xfs_attr_leaf_add(struct xfs_dabuf *leaf_buffer,
+int xfs_attr_leaf_getvalue(struct xfs_buf *bp, struct xfs_da_args *args);
+int xfs_attr_leaf_add(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
-int xfs_attr_leaf_remove(struct xfs_dabuf *leaf_buffer,
+int xfs_attr_leaf_remove(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
-int xfs_attr_leaf_list_int(struct xfs_dabuf *bp,
+int xfs_attr_leaf_list_int(struct xfs_buf *bp,
struct xfs_attr_list_context *context);
/*
@@ -257,9 +256,9 @@ int xfs_attr_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);
/*
* Utility routines.
*/
-xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_dabuf *bp, int *count);
-int xfs_attr_leaf_order(struct xfs_dabuf *leaf1_bp,
- struct xfs_dabuf *leaf2_bp);
+xfs_dahash_t xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count);
+int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
+ struct xfs_buf *leaf2_bp);
int xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize,
int *local);
#endif /* __XFS_ATTR_LEAF_H__ */
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 58b815ec8c91..848ffa77707b 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5517,7 +5517,7 @@ xfs_getbmap(
if (xfs_get_extsz_hint(ip) ||
ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
prealloced = 1;
- fixlen = XFS_MAXIOFFSET(mp);
+ fixlen = mp->m_super->s_maxbytes;
} else {
prealloced = 0;
fixlen = XFS_ISIZE(ip);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 269b35c084da..d7a9dd735e1e 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -164,14 +164,49 @@ xfs_buf_stale(
ASSERT(atomic_read(&bp->b_hold) >= 1);
}
+static int
+xfs_buf_get_maps(
+ struct xfs_buf *bp,
+ int map_count)
+{
+ ASSERT(bp->b_maps == NULL);
+ bp->b_map_count = map_count;
+
+ if (map_count == 1) {
+ bp->b_maps = &bp->b_map;
+ return 0;
+ }
+
+ bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
+ KM_NOFS);
+ if (!bp->b_maps)
+ return ENOMEM;
+ return 0;
+}
+
+/*
+ * Frees b_pages if it was allocated.
+ */
+static void
+xfs_buf_free_maps(
+ struct xfs_buf *bp)
+{
+ if (bp->b_maps != &bp->b_map) {
+ kmem_free(bp->b_maps);
+ bp->b_maps = NULL;
+ }
+}
+
struct xfs_buf *
-xfs_buf_alloc(
+_xfs_buf_alloc(
struct xfs_buftarg *target,
- xfs_daddr_t blkno,
- size_t numblks,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags)
{
struct xfs_buf *bp;
+ int error;
+ int i;
bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
if (unlikely(!bp))
@@ -192,16 +227,28 @@ xfs_buf_alloc(
sema_init(&bp->b_sema, 0); /* held, no waiters */
XB_SET_OWNER(bp);
bp->b_target = target;
+ bp->b_flags = flags;
/*
* Set length and io_length to the same value initially.
* I/O routines should use io_length, which will be the same in
* most cases but may be reset (e.g. XFS recovery).
*/
- bp->b_length = numblks;
- bp->b_io_length = numblks;
- bp->b_flags = flags;
- bp->b_bn = blkno;
+ error = xfs_buf_get_maps(bp, nmaps);
+ if (error) {
+ kmem_zone_free(xfs_buf_zone, bp);
+ return NULL;
+ }
+
+ bp->b_bn = map[0].bm_bn;
+ bp->b_length = 0;
+ for (i = 0; i < nmaps; i++) {
+ bp->b_maps[i].bm_bn = map[i].bm_bn;
+ bp->b_maps[i].bm_len = map[i].bm_len;
+ bp->b_length += map[i].bm_len;
+ }
+ bp->b_io_length = bp->b_length;
+
atomic_set(&bp->b_pin_count, 0);
init_waitqueue_head(&bp->b_waiters);
@@ -280,6 +327,7 @@ xfs_buf_free(
} else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp);
+ xfs_buf_free_maps(bp);
kmem_zone_free(xfs_buf_zone, bp);
}
@@ -327,8 +375,9 @@ xfs_buf_allocate_memory(
}
use_alloc_page:
- start = BBTOB(bp->b_bn) >> PAGE_SHIFT;
- end = (BBTOB(bp->b_bn + bp->b_length) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ start = BBTOB(bp->b_map.bm_bn) >> PAGE_SHIFT;
+ end = (BBTOB(bp->b_map.bm_bn + bp->b_length) + PAGE_SIZE - 1)
+ >> PAGE_SHIFT;
page_count = end - start;
error = _xfs_buf_get_pages(bp, page_count, flags);
if (unlikely(error))
@@ -425,8 +474,8 @@ _xfs_buf_map_pages(
xfs_buf_t *
_xfs_buf_find(
struct xfs_buftarg *btp,
- xfs_daddr_t blkno,
- size_t numblks,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags,
xfs_buf_t *new_bp)
{
@@ -435,7 +484,12 @@ _xfs_buf_find(
struct rb_node **rbp;
struct rb_node *parent;
xfs_buf_t *bp;
+ xfs_daddr_t blkno = map[0].bm_bn;
+ int numblks = 0;
+ int i;
+ for (i = 0; i < nmaps; i++)
+ numblks += map[i].bm_len;
numbytes = BBTOB(numblks);
/* Check for IOs smaller than the sector size / not sector aligned */
@@ -527,31 +581,31 @@ found:
* more hits than misses.
*/
struct xfs_buf *
-xfs_buf_get(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks,
+xfs_buf_get_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags)
{
struct xfs_buf *bp;
struct xfs_buf *new_bp;
int error = 0;
- bp = _xfs_buf_find(target, blkno, numblks, flags, NULL);
+ bp = _xfs_buf_find(target, map, nmaps, flags, NULL);
if (likely(bp))
goto found;
- new_bp = xfs_buf_alloc(target, blkno, numblks, flags);
+ new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
if (unlikely(!new_bp))
return NULL;
error = xfs_buf_allocate_memory(new_bp, flags);
if (error) {
- kmem_zone_free(xfs_buf_zone, new_bp);
+ xfs_buf_free(new_bp);
return NULL;
}
- bp = _xfs_buf_find(target, blkno, numblks, flags, new_bp);
+ bp = _xfs_buf_find(target, map, nmaps, flags, new_bp);
if (!bp) {
xfs_buf_free(new_bp);
return NULL;
@@ -560,8 +614,6 @@ xfs_buf_get(
if (bp != new_bp)
xfs_buf_free(new_bp);
- bp->b_io_length = bp->b_length;
-
found:
if (!bp->b_addr) {
error = _xfs_buf_map_pages(bp, flags);
@@ -584,7 +636,7 @@ _xfs_buf_read(
xfs_buf_flags_t flags)
{
ASSERT(!(flags & XBF_WRITE));
- ASSERT(bp->b_bn != XFS_BUF_DADDR_NULL);
+ ASSERT(bp->b_map.bm_bn != XFS_BUF_DADDR_NULL);
bp->b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD);
bp->b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD);
@@ -596,17 +648,17 @@ _xfs_buf_read(
}
xfs_buf_t *
-xfs_buf_read(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks,
+xfs_buf_read_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
xfs_buf_flags_t flags)
{
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
flags |= XBF_READ;
- bp = xfs_buf_get(target, blkno, numblks, flags);
+ bp = xfs_buf_get_map(target, map, nmaps, flags);
if (bp) {
trace_xfs_buf_read(bp, flags, _RET_IP_);
@@ -634,15 +686,15 @@ xfs_buf_read(
* safe manner.
*/
void
-xfs_buf_readahead(
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- size_t numblks)
+xfs_buf_readahead_map(
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps)
{
if (bdi_read_congested(target->bt_bdi))
return;
- xfs_buf_read(target, blkno, numblks,
+ xfs_buf_read_map(target, map, nmaps,
XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
}
@@ -665,8 +717,10 @@ xfs_buf_read_uncached(
return NULL;
/* set up the buffer for a read IO */
- XFS_BUF_SET_ADDR(bp, daddr);
- XFS_BUF_READ(bp);
+ ASSERT(bp->b_map_count == 1);
+ bp->b_bn = daddr;
+ bp->b_maps[0].bm_bn = daddr;
+ bp->b_flags |= XBF_READ;
xfsbdstrat(target->bt_mount, bp);
error = xfs_buf_iowait(bp);
@@ -694,7 +748,11 @@ xfs_buf_set_empty(
bp->b_addr = NULL;
bp->b_length = numblks;
bp->b_io_length = numblks;
+
+ ASSERT(bp->b_map_count == 1);
bp->b_bn = XFS_BUF_DADDR_NULL;
+ bp->b_maps[0].bm_bn = XFS_BUF_DADDR_NULL;
+ bp->b_maps[0].bm_len = bp->b_length;
}
static inline struct page *
@@ -758,9 +816,10 @@ xfs_buf_get_uncached(
{
unsigned long page_count;
int error, i;
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
+ DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
- bp = xfs_buf_alloc(target, XFS_BUF_DADDR_NULL, numblks, 0);
+ bp = _xfs_buf_alloc(target, &map, 1, 0);
if (unlikely(bp == NULL))
goto fail;
@@ -791,6 +850,7 @@ xfs_buf_get_uncached(
__free_page(bp->b_pages[i]);
_xfs_buf_free_pages(bp);
fail_free_buf:
+ xfs_buf_free_maps(bp);
kmem_zone_free(xfs_buf_zone, bp);
fail:
return NULL;
@@ -1144,36 +1204,39 @@ xfs_buf_bio_end_io(
bio_put(bio);
}
-STATIC void
-_xfs_buf_ioapply(
- xfs_buf_t *bp)
+static void
+xfs_buf_ioapply_map(
+ struct xfs_buf *bp,
+ int map,
+ int *buf_offset,
+ int *count,
+ int rw)
{
- int rw, map_i, total_nr_pages, nr_pages;
- struct bio *bio;
- int offset = bp->b_offset;
- int size = BBTOB(bp->b_io_length);
- sector_t sector = bp->b_bn;
+ int page_index;
+ int total_nr_pages = bp->b_page_count;
+ int nr_pages;
+ struct bio *bio;
+ sector_t sector = bp->b_maps[map].bm_bn;
+ int size;
+ int offset;
total_nr_pages = bp->b_page_count;
- map_i = 0;
- if (bp->b_flags & XBF_WRITE) {
- if (bp->b_flags & XBF_SYNCIO)
- rw = WRITE_SYNC;
- else
- rw = WRITE;
- if (bp->b_flags & XBF_FUA)
- rw |= REQ_FUA;
- if (bp->b_flags & XBF_FLUSH)
- rw |= REQ_FLUSH;
- } else if (bp->b_flags & XBF_READ_AHEAD) {
- rw = READA;
- } else {
- rw = READ;
+ /* skip the pages in the buffer before the start offset */
+ page_index = 0;
+ offset = *buf_offset;
+ while (offset >= PAGE_SIZE) {
+ page_index++;
+ offset -= PAGE_SIZE;
}
- /* we only use the buffer cache for meta-data */
- rw |= REQ_META;
+ /*
+ * Limit the IO size to the length of the current vector, and update the
+ * remaining IO count for the next time around.
+ */
+ size = min_t(int, BBTOB(bp->b_maps[map].bm_len), *count);
+ *count -= size;
+ *buf_offset += size;
next_chunk:
atomic_inc(&bp->b_io_remaining);
@@ -1188,13 +1251,14 @@ next_chunk:
bio->bi_private = bp;
- for (; size && nr_pages; nr_pages--, map_i++) {
+ for (; size && nr_pages; nr_pages--, page_index++) {
int rbytes, nbytes = PAGE_SIZE - offset;
if (nbytes > size)
nbytes = size;
- rbytes = bio_add_page(bio, bp->b_pages[map_i], nbytes, offset);
+ rbytes = bio_add_page(bio, bp->b_pages[page_index], nbytes,
+ offset);
if (rbytes < nbytes)
break;
@@ -1216,6 +1280,54 @@ next_chunk:
xfs_buf_ioerror(bp, EIO);
bio_put(bio);
}
+
+}
+
+STATIC void
+_xfs_buf_ioapply(
+ struct xfs_buf *bp)
+{
+ struct blk_plug plug;
+ int rw;
+ int offset;
+ int size;
+ int i;
+
+ if (bp->b_flags & XBF_WRITE) {
+ if (bp->b_flags & XBF_SYNCIO)
+ rw = WRITE_SYNC;
+ else
+ rw = WRITE;
+ if (bp->b_flags & XBF_FUA)
+ rw |= REQ_FUA;
+ if (bp->b_flags & XBF_FLUSH)
+ rw |= REQ_FLUSH;
+ } else if (bp->b_flags & XBF_READ_AHEAD) {
+ rw = READA;
+ } else {
+ rw = READ;
+ }
+
+ /* we only use the buffer cache for meta-data */
+ rw |= REQ_META;
+
+ /*
+ * Walk all the vectors issuing IO on them. Set up the initial offset
+ * into the buffer and the desired IO size before we start -
+ * _xfs_buf_ioapply_vec() will modify them appropriately for each
+ * subsequent call.
+ */
+ offset = bp->b_offset;
+ size = BBTOB(bp->b_io_length);
+ blk_start_plug(&plug);
+ for (i = 0; i < bp->b_map_count; i++) {
+ xfs_buf_ioapply_map(bp, i, &offset, &size, rw);
+ if (bp->b_error)
+ break;
+ if (size <= 0)
+ break; /* all done */
+ }
+ blk_finish_plug(&plug);
}
void
@@ -1557,7 +1669,7 @@ xfs_buf_cmp(
struct xfs_buf *bp = container_of(b, struct xfs_buf, b_list);
xfs_daddr_t diff;
- diff = ap->b_bn - bp->b_bn;
+ diff = ap->b_map.bm_bn - bp->b_map.bm_bn;
if (diff < 0)
return -1;
if (diff > 0)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 79344c48008e..d03b73b9604e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -58,6 +58,7 @@ typedef enum {
#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
+#define _XBF_COMPOUND (1 << 23)/* compound buffer */
typedef unsigned int xfs_buf_flags_t;
@@ -75,7 +76,8 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_UNMAPPED, "UNMAPPED" }, /* ditto */\
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
- { _XBF_DELWRI_Q, "DELWRI_Q" }
+ { _XBF_DELWRI_Q, "DELWRI_Q" }, \
+ { _XBF_COMPOUND, "COMPOUND" }
typedef struct xfs_buftarg {
dev_t bt_dev;
@@ -98,6 +100,14 @@ typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
#define XB_PAGES 2
+struct xfs_buf_map {
+ xfs_daddr_t bm_bn; /* block number for I/O */
+ int bm_len; /* size of I/O */
+};
+
+#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \
+ struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
+
typedef struct xfs_buf {
/*
* first cacheline holds all the fields needed for an uncontended cache
@@ -107,7 +117,7 @@ typedef struct xfs_buf {
* fast-path on locking.
*/
struct rb_node b_rbnode; /* rbtree node */
- xfs_daddr_t b_bn; /* block number for I/O */
+ xfs_daddr_t b_bn; /* block number of buffer */
int b_length; /* size of buffer in BBs */
atomic_t b_hold; /* reference count */
atomic_t b_lru_ref; /* lru reclaim ref count */
@@ -127,12 +137,16 @@ typedef struct xfs_buf {
struct xfs_trans *b_transp;
struct page **b_pages; /* array of page pointers */
struct page *b_page_array[XB_PAGES]; /* inline pages */
+ struct xfs_buf_map *b_maps; /* compound buffer map */
+ struct xfs_buf_map b_map; /* inline compound buffer map */
+ int b_map_count;
int b_io_length; /* IO size in BBs */
atomic_t b_pin_count; /* pin count */
atomic_t b_io_remaining; /* #outstanding I/O requests */
unsigned int b_page_count; /* size of page array */
unsigned int b_offset; /* page offset in first page */
unsigned short b_error; /* error code on I/O */
+
#ifdef XFS_BUF_LOCK_TRACKING
int b_last_holder;
#endif
@@ -140,22 +154,78 @@ typedef struct xfs_buf {
/* Finding and Reading Buffers */
-struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target, xfs_daddr_t blkno,
- size_t numblks, xfs_buf_flags_t flags,
- struct xfs_buf *new_bp);
-#define xfs_incore(buftarg,blkno,len,lockit) \
- _xfs_buf_find(buftarg, blkno ,len, lockit, NULL)
-
-struct xfs_buf *xfs_buf_get(struct xfs_buftarg *target, xfs_daddr_t blkno,
- size_t numblks, xfs_buf_flags_t flags);
-struct xfs_buf *xfs_buf_read(struct xfs_buftarg *target, xfs_daddr_t blkno,
- size_t numblks, xfs_buf_flags_t flags);
-void xfs_buf_readahead(struct xfs_buftarg *target, xfs_daddr_t blkno,
- size_t numblks);
+struct xfs_buf *_xfs_buf_find(struct xfs_buftarg *target,
+ struct xfs_buf_map *map, int nmaps,
+ xfs_buf_flags_t flags, struct xfs_buf *new_bp);
+
+static inline struct xfs_buf *
+xfs_incore(
+ struct xfs_buftarg *target,
+ xfs_daddr_t blkno,
+ size_t numblks,
+ xfs_buf_flags_t flags)
+{
+ DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+ return _xfs_buf_find(target, &map, 1, flags, NULL);
+}
+
+struct xfs_buf *_xfs_buf_alloc(struct xfs_buftarg *target,
+ struct xfs_buf_map *map, int nmaps,
+ xfs_buf_flags_t flags);
+
+static inline struct xfs_buf *
+xfs_buf_alloc(
+ struct xfs_buftarg *target,
+ xfs_daddr_t blkno,
+ size_t numblks,
+ xfs_buf_flags_t flags)
+{
+ DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+ return _xfs_buf_alloc(target, &map, 1, flags);
+}
+
+struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
+ struct xfs_buf_map *map, int nmaps,
+ xfs_buf_flags_t flags);
+struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target,
+ struct xfs_buf_map *map, int nmaps,
+ xfs_buf_flags_t flags);
+void xfs_buf_readahead_map(struct xfs_buftarg *target,
+ struct xfs_buf_map *map, int nmaps);
+
+static inline struct xfs_buf *
+xfs_buf_get(
+ struct xfs_buftarg *target,
+ xfs_daddr_t blkno,
+ size_t numblks,
+ xfs_buf_flags_t flags)
+{
+ DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+ return xfs_buf_get_map(target, &map, 1, flags);
+}
+
+static inline struct xfs_buf *
+xfs_buf_read(
+ struct xfs_buftarg *target,
+ xfs_daddr_t blkno,
+ size_t numblks,
+ xfs_buf_flags_t flags)
+{
+ DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+ return xfs_buf_read_map(target, &map, 1, flags);
+}
+
+static inline void
+xfs_buf_readahead(
+ struct xfs_buftarg *target,
+ xfs_daddr_t blkno,
+ size_t numblks)
+{
+ DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
+ return xfs_buf_readahead_map(target, &map, 1);
+}
struct xfs_buf *xfs_buf_get_empty(struct xfs_buftarg *target, size_t numblks);
-struct xfs_buf *xfs_buf_alloc(struct xfs_buftarg *target, xfs_daddr_t blkno,
- size_t numblks, xfs_buf_flags_t flags);
void xfs_buf_set_empty(struct xfs_buf *bp, size_t numblks);
int xfs_buf_associate_memory(struct xfs_buf *bp, void *mem, size_t length);
@@ -232,8 +302,18 @@ void xfs_buf_stale(struct xfs_buf *bp);
#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
-#define XFS_BUF_ADDR(bp) ((bp)->b_bn)
-#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_bn = (xfs_daddr_t)(bno))
+/*
+ * These macros use the IO block map rather than b_bn. b_bn is now really
+ * just for the buffer cache index for cached buffers. As IO does not use b_bn
+ * anymore, uncached buffers do not use b_bn at all and hence must modify the IO
+ * map directly. Uncached buffers are not allowed to be discontiguous, so this
+ * is safe to do.
+ *
+ * In future, uncached buffers will pass the block number directly to the io
+ * request function and hence these macros will go away at that point.
+ */
+#define XFS_BUF_ADDR(bp) ((bp)->b_map.bm_bn)
+#define XFS_BUF_SET_ADDR(bp, bno) ((bp)->b_map.bm_bn = (xfs_daddr_t)(bno))
static inline void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
{
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index d9e451115f98..a8d0ed911196 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -153,33 +153,25 @@ STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
* If the XFS_BLI_STALE flag has been set, then log nothing.
*/
STATIC uint
-xfs_buf_item_size(
- struct xfs_log_item *lip)
+xfs_buf_item_size_segment(
+ struct xfs_buf_log_item *bip,
+ struct xfs_buf_log_format *blfp)
{
- struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
uint nvecs;
int next_bit;
int last_bit;
- ASSERT(atomic_read(&bip->bli_refcount) > 0);
- if (bip->bli_flags & XFS_BLI_STALE) {
- /*
- * The buffer is stale, so all we need to log
- * is the buf log format structure with the
- * cancel flag in it.
- */
- trace_xfs_buf_item_size_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
- return 1;
- }
+ last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
+ if (last_bit == -1)
+ return 0;
+
+ /*
+ * initial count for a dirty buffer is 2 vectors - the format structure
+ * and the first dirty region.
+ */
+ nvecs = 2;
- ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
- nvecs = 1;
- last_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size, 0);
- ASSERT(last_bit != -1);
- nvecs++;
while (last_bit != -1) {
/*
* This takes the bit number to start looking from and
@@ -187,16 +179,15 @@ xfs_buf_item_size(
* if there are no more bits set or the start bit is
* beyond the end of the bitmap.
*/
- next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size,
- last_bit + 1);
+ next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
+ last_bit + 1);
/*
* If we run out of bits, leave the loop,
* else if we find a new set of bits bump the number of vecs,
* else keep scanning the current set of bits.
*/
if (next_bit == -1) {
- last_bit = -1;
+ break;
} else if (next_bit != last_bit + 1) {
last_bit = next_bit;
nvecs++;
@@ -210,22 +201,73 @@ xfs_buf_item_size(
}
}
- trace_xfs_buf_item_size(bip);
return nvecs;
}
/*
- * This is called to fill in the vector of log iovecs for the
- * given log buf item. It fills the first entry with a buf log
- * format structure, and the rest point to contiguous chunks
- * within the buffer.
+ * This returns the number of log iovecs needed to log the given buf log item.
+ *
+ * It calculates this as 1 iovec for the buf log format structure and 1 for each
+ * stretch of non-contiguous chunks to be logged. Contiguous chunks are logged
+ * in a single iovec.
+ *
+ * Discontiguous buffers need a format structure per region that that is being
+ * logged. This makes the changes in the buffer appear to log recovery as though
+ * they came from separate buffers, just like would occur if multiple buffers
+ * were used instead of a single discontiguous buffer. This enables
+ * discontiguous buffers to be in-memory constructs, completely transparent to
+ * what ends up on disk.
+ *
+ * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
+ * format structures.
*/
-STATIC void
-xfs_buf_item_format(
- struct xfs_log_item *lip,
- struct xfs_log_iovec *vecp)
+STATIC uint
+xfs_buf_item_size(
+ struct xfs_log_item *lip)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ uint nvecs;
+ int i;
+
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+ if (bip->bli_flags & XFS_BLI_STALE) {
+ /*
+ * The buffer is stale, so all we need to log
+ * is the buf log format structure with the
+ * cancel flag in it.
+ */
+ trace_xfs_buf_item_size_stale(bip);
+ ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ return bip->bli_format_count;
+ }
+
+ ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
+
+ /*
+ * the vector count is based on the number of buffer vectors we have
+ * dirty bits in. This will only be greater than one when we have a
+ * compound buffer with more than one segment dirty. Hence for compound
+ * buffers we need to track which segment the dirty bits correspond to,
+ * and when we move from one segment to the next increment the vector
+ * count for the extra buf log format structure that will need to be
+ * written.
+ */
+ nvecs = 0;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
+ }
+
+ trace_xfs_buf_item_size(bip);
+ return nvecs;
+}
+
+static struct xfs_log_iovec *
+xfs_buf_item_format_segment(
+ struct xfs_buf_log_item *bip,
+ struct xfs_log_iovec *vecp,
+ uint offset,
+ struct xfs_buf_log_format *blfp)
+{
struct xfs_buf *bp = bip->bli_buf;
uint base_size;
uint nvecs;
@@ -235,40 +277,22 @@ xfs_buf_item_format(
uint nbits;
uint buffer_offset;
- ASSERT(atomic_read(&bip->bli_refcount) > 0);
- ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
- (bip->bli_flags & XFS_BLI_STALE));
+ /* copy the flags across from the base format item */
+ blfp->blf_flags = bip->bli_format.blf_flags;
/*
- * The size of the base structure is the size of the
- * declared structure plus the space for the extra words
- * of the bitmap. We subtract one from the map size, because
- * the first element of the bitmap is accounted for in the
- * size of the base structure.
+ * Base size is the actual size of the ondisk structure - it reflects
+ * the actual size of the dirty bitmap rather than the size of the in
+ * memory structure.
*/
- base_size =
- (uint)(sizeof(xfs_buf_log_format_t) +
- ((bip->bli_format.blf_map_size - 1) * sizeof(uint)));
- vecp->i_addr = &bip->bli_format;
+ base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
+ (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+ vecp->i_addr = blfp;
vecp->i_len = base_size;
vecp->i_type = XLOG_REG_TYPE_BFORMAT;
vecp++;
nvecs = 1;
- /*
- * If it is an inode buffer, transfer the in-memory state to the
- * format flags and clear the in-memory state. We do not transfer
- * this state if the inode buffer allocation has not yet been committed
- * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
- * correct replay of the inode allocation.
- */
- if (bip->bli_flags & XFS_BLI_INODE_BUF) {
- if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
- xfs_log_item_in_current_chkpt(lip)))
- bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
- bip->bli_flags &= ~XFS_BLI_INODE_BUF;
- }
-
if (bip->bli_flags & XFS_BLI_STALE) {
/*
* The buffer is stale, so all we need to log
@@ -276,16 +300,15 @@ xfs_buf_item_format(
* cancel flag in it.
*/
trace_xfs_buf_item_format_stale(bip);
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
- bip->bli_format.blf_size = nvecs;
- return;
+ ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
+ blfp->blf_size = nvecs;
+ return vecp;
}
/*
* Fill in an iovec for each set of contiguous chunks.
*/
- first_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size, 0);
+ first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
ASSERT(first_bit != -1);
last_bit = first_bit;
nbits = 1;
@@ -296,9 +319,8 @@ xfs_buf_item_format(
* if there are no more bits set or the start bit is
* beyond the end of the bitmap.
*/
- next_bit = xfs_next_bit(bip->bli_format.blf_data_map,
- bip->bli_format.blf_map_size,
- (uint)last_bit + 1);
+ next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
+ (uint)last_bit + 1);
/*
* If we run out of bits fill in the last iovec and get
* out of the loop.
@@ -309,14 +331,14 @@ xfs_buf_item_format(
* keep counting and scanning.
*/
if (next_bit == -1) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
nvecs++;
break;
} else if (next_bit != last_bit + 1) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
@@ -325,14 +347,17 @@ xfs_buf_item_format(
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
- } else if (xfs_buf_offset(bp, next_bit << XFS_BLF_SHIFT) !=
- (xfs_buf_offset(bp, last_bit << XFS_BLF_SHIFT) +
+ } else if (xfs_buf_offset(bp, offset +
+ (next_bit << XFS_BLF_SHIFT)) !=
+ (xfs_buf_offset(bp, offset +
+ (last_bit << XFS_BLF_SHIFT)) +
XFS_BLF_CHUNK)) {
- buffer_offset = first_bit * XFS_BLF_CHUNK;
+ buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
vecp->i_len = nbits * XFS_BLF_CHUNK;
vecp->i_type = XLOG_REG_TYPE_BCHUNK;
-/* You would think we need to bump the nvecs here too, but we do not
+/*
+ * You would think we need to bump the nvecs here too, but we do not
* this number is used by recovery, and it gets confused by the boundary
* split here
* nvecs++;
@@ -347,6 +372,48 @@ xfs_buf_item_format(
}
}
bip->bli_format.blf_size = nvecs;
+ return vecp;
+}
+
+/*
+ * This is called to fill in the vector of log iovecs for the
+ * given log buf item. It fills the first entry with a buf log
+ * format structure, and the rest point to contiguous chunks
+ * within the buffer.
+ */
+STATIC void
+xfs_buf_item_format(
+ struct xfs_log_item *lip,
+ struct xfs_log_iovec *vecp)
+{
+ struct xfs_buf_log_item *bip = BUF_ITEM(lip);
+ struct xfs_buf *bp = bip->bli_buf;
+ uint offset = 0;
+ int i;
+
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+ ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
+ (bip->bli_flags & XFS_BLI_STALE));
+
+ /*
+ * If it is an inode buffer, transfer the in-memory state to the
+ * format flags and clear the in-memory state. We do not transfer
+ * this state if the inode buffer allocation has not yet been committed
+ * to the log as setting the XFS_BLI_INODE_BUF flag will prevent
+ * correct replay of the inode allocation.
+ */
+ if (bip->bli_flags & XFS_BLI_INODE_BUF) {
+ if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
+ xfs_log_item_in_current_chkpt(lip)))
+ bip->bli_format.blf_flags |= XFS_BLF_INODE_BUF;
+ bip->bli_flags &= ~XFS_BLI_INODE_BUF;
+ }
+
+ for (i = 0; i < bip->bli_format_count; i++) {
+ vecp = xfs_buf_item_format_segment(bip, vecp, offset,
+ &bip->bli_formats[i]);
+ offset += bp->b_maps[i].bm_len;
+ }
/*
* Check to make sure everything is consistent.
@@ -622,6 +689,35 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_committing = xfs_buf_item_committing
};
+STATIC int
+xfs_buf_item_get_format(
+ struct xfs_buf_log_item *bip,
+ int count)
+{
+ ASSERT(bip->bli_formats == NULL);
+ bip->bli_format_count = count;
+
+ if (count == 1) {
+ bip->bli_formats = &bip->bli_format;
+ return 0;
+ }
+
+ bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
+ KM_SLEEP);
+ if (!bip->bli_formats)
+ return ENOMEM;
+ return 0;
+}
+
+STATIC void
+xfs_buf_item_free_format(
+ struct xfs_buf_log_item *bip)
+{
+ if (bip->bli_formats != &bip->bli_format) {
+ kmem_free(bip->bli_formats);
+ bip->bli_formats = NULL;
+ }
+}
/*
* Allocate a new buf log item to go with the given buffer.
@@ -639,6 +735,8 @@ xfs_buf_item_init(
xfs_buf_log_item_t *bip;
int chunks;
int map_size;
+ int error;
+ int i;
/*
* Check to see if there is already a buf log item for
@@ -650,25 +748,33 @@ xfs_buf_item_init(
if (lip != NULL && lip->li_type == XFS_LI_BUF)
return;
- /*
- * chunks is the number of XFS_BLF_CHUNK size pieces
- * the buffer can be divided into. Make sure not to
- * truncate any pieces. map_size is the size of the
- * bitmap needed to describe the chunks of the buffer.
- */
- chunks = (int)((BBTOB(bp->b_length) + (XFS_BLF_CHUNK - 1)) >>
- XFS_BLF_SHIFT);
- map_size = (int)((chunks + NBWORD) >> BIT_TO_WORD_SHIFT);
-
- bip = (xfs_buf_log_item_t*)kmem_zone_zalloc(xfs_buf_item_zone,
- KM_SLEEP);
+ bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
bip->bli_buf = bp;
xfs_buf_hold(bp);
- bip->bli_format.blf_type = XFS_LI_BUF;
- bip->bli_format.blf_blkno = (__int64_t)XFS_BUF_ADDR(bp);
- bip->bli_format.blf_len = (ushort)bp->b_length;
- bip->bli_format.blf_map_size = map_size;
+
+ /*
+ * chunks is the number of XFS_BLF_CHUNK size pieces the buffer
+ * can be divided into. Make sure not to truncate any pieces.
+ * map_size is the size of the bitmap needed to describe the
+ * chunks of the buffer.
+ *
+ * Discontiguous buffer support follows the layout of the underlying
+ * buffer. This makes the implementation as simple as possible.
+ */
+ error = xfs_buf_item_get_format(bip, bp->b_map_count);
+ ASSERT(error == 0);
+
+ for (i = 0; i < bip->bli_format_count; i++) {
+ chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
+ XFS_BLF_CHUNK);
+ map_size = DIV_ROUND_UP(chunks, NBWORD);
+
+ bip->bli_formats[i].blf_type = XFS_LI_BUF;
+ bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
+ bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
+ bip->bli_formats[i].blf_map_size = map_size;
+ }
#ifdef XFS_TRANS_DEBUG
/*
@@ -699,10 +805,11 @@ xfs_buf_item_init(
* item's bitmap.
*/
void
-xfs_buf_item_log(
- xfs_buf_log_item_t *bip,
+xfs_buf_item_log_segment(
+ struct xfs_buf_log_item *bip,
uint first,
- uint last)
+ uint last,
+ uint *map)
{
uint first_bit;
uint last_bit;
@@ -715,12 +822,6 @@ xfs_buf_item_log(
uint mask;
/*
- * Mark the item as having some dirty data for
- * quick reference in xfs_buf_item_dirty.
- */
- bip->bli_flags |= XFS_BLI_DIRTY;
-
- /*
* Convert byte offsets to bit numbers.
*/
first_bit = first >> XFS_BLF_SHIFT;
@@ -736,7 +837,7 @@ xfs_buf_item_log(
* to set a bit in.
*/
word_num = first_bit >> BIT_TO_WORD_SHIFT;
- wordp = &(bip->bli_format.blf_data_map[word_num]);
+ wordp = &map[word_num];
/*
* Calculate the starting bit in the first word.
@@ -783,6 +884,51 @@ xfs_buf_item_log(
xfs_buf_item_log_debug(bip, first, last);
}
+/*
+ * Mark bytes first through last inclusive as dirty in the buf
+ * item's bitmap.
+ */
+void
+xfs_buf_item_log(
+ xfs_buf_log_item_t *bip,
+ uint first,
+ uint last)
+{
+ int i;
+ uint start;
+ uint end;
+ struct xfs_buf *bp = bip->bli_buf;
+
+ /*
+ * Mark the item as having some dirty data for
+ * quick reference in xfs_buf_item_dirty.
+ */
+ bip->bli_flags |= XFS_BLI_DIRTY;
+
+ /*
+ * walk each buffer segment and mark them dirty appropriately.
+ */
+ start = 0;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ if (start > last)
+ break;
+ end = start + BBTOB(bp->b_maps[i].bm_len);
+ if (first > end) {
+ start += BBTOB(bp->b_maps[i].bm_len);
+ continue;
+ }
+ if (first < start)
+ first = start;
+ if (end > last)
+ end = last;
+
+ xfs_buf_item_log_segment(bip, first, end,
+ &bip->bli_formats[i].blf_data_map[0]);
+
+ start += bp->b_maps[i].bm_len;
+ }
+}
+
/*
* Return 1 if the buffer has some data that has been logged (at any
@@ -804,6 +950,7 @@ xfs_buf_item_free(
kmem_free(bip->bli_logged);
#endif /* XFS_TRANS_DEBUG */
+ xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip);
}
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index b6ecd2061e7c..6850f49f4af3 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -21,23 +21,6 @@
extern kmem_zone_t *xfs_buf_item_zone;
/*
- * This is the structure used to lay out a buf log item in the
- * log. The data map describes which 128 byte chunks of the buffer
- * have been logged.
- * For 6.2 and beyond, this is XFS_LI_BUF. We use this to log everything.
- */
-typedef struct xfs_buf_log_format {
- unsigned short blf_type; /* buf log item type indicator */
- unsigned short blf_size; /* size of this item */
- ushort blf_flags; /* misc state */
- ushort blf_len; /* number of blocks in this buf */
- __int64_t blf_blkno; /* starting blkno of this buf */
- unsigned int blf_map_size; /* size of data bitmap in words */
- unsigned int blf_data_map[1];/* variable size bitmap of */
- /* regions of buffer in this item */
-} xfs_buf_log_format_t;
-
-/*
* This flag indicates that the buffer contains on disk inodes
* and requires special recovery handling.
*/
@@ -61,6 +44,23 @@ typedef struct xfs_buf_log_format {
#define NBWORD (NBBY * sizeof(unsigned int))
/*
+ * This is the structure used to lay out a buf log item in the
+ * log. The data map describes which 128 byte chunks of the buffer
+ * have been logged.
+ */
+#define XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
+
+typedef struct xfs_buf_log_format {
+ unsigned short blf_type; /* buf log item type indicator */
+ unsigned short blf_size; /* size of this item */
+ ushort blf_flags; /* misc state */
+ ushort blf_len; /* number of blocks in this buf */
+ __int64_t blf_blkno; /* starting blkno of this buf */
+ unsigned int blf_map_size; /* used size of data bitmap in words */
+ unsigned int blf_data_map[XFS_BLF_DATAMAP_SIZE]; /* dirty bitmap */
+} xfs_buf_log_format_t;
+
+/*
* buf log item flags
*/
#define XFS_BLI_HOLD 0x01
@@ -102,7 +102,9 @@ typedef struct xfs_buf_log_item {
char *bli_orig; /* original buffer copy */
char *bli_logged; /* bytes logged (bitmap) */
#endif
- xfs_buf_log_format_t bli_format; /* in-log header */
+ int bli_format_count; /* count of headers */
+ struct xfs_buf_log_format *bli_formats; /* array of in-log header ptrs */
+ struct xfs_buf_log_format bli_format; /* embedded in-log header */
} xfs_buf_log_item_t;
void xfs_buf_item_init(struct xfs_buf *, struct xfs_mount *);
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 015b946c5808..7bfb7dd334fc 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -83,9 +83,9 @@ STATIC void xfs_da_node_unbalance(xfs_da_state_t *state,
/*
* Utility routines.
*/
-STATIC uint xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count);
-STATIC int xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp);
-STATIC xfs_dabuf_t *xfs_da_buf_make(int nbuf, xfs_buf_t **bps);
+STATIC uint xfs_da_node_lasthash(struct xfs_buf *bp, int *count);
+STATIC int xfs_da_node_order(struct xfs_buf *node1_bp,
+ struct xfs_buf *node2_bp);
STATIC int xfs_da_blk_unlink(xfs_da_state_t *state,
xfs_da_state_blk_t *drop_blk,
xfs_da_state_blk_t *save_blk);
@@ -100,10 +100,10 @@ STATIC void xfs_da_state_kill_altpath(xfs_da_state_t *state);
*/
int
xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
- xfs_dabuf_t **bpp, int whichfork)
+ struct xfs_buf **bpp, int whichfork)
{
xfs_da_intnode_t *node;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error;
xfs_trans_t *tp;
@@ -114,7 +114,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
if (error)
return(error);
ASSERT(bp != NULL);
- node = bp->data;
+ node = bp->b_addr;
node->hdr.info.forw = 0;
node->hdr.info.back = 0;
node->hdr.info.magic = cpu_to_be16(XFS_DA_NODE_MAGIC);
@@ -122,7 +122,7 @@ xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
node->hdr.count = 0;
node->hdr.level = cpu_to_be16(level);
- xfs_da_log_buf(tp, bp,
+ xfs_trans_log_buf(tp, bp,
XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
*bpp = bp;
@@ -138,7 +138,7 @@ xfs_da_split(xfs_da_state_t *state)
{
xfs_da_state_blk_t *oldblk, *newblk, *addblk;
xfs_da_intnode_t *node;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int max, action, error, i;
trace_xfs_da_split(state->args);
@@ -203,7 +203,6 @@ xfs_da_split(xfs_da_state_t *state)
case XFS_DA_NODE_MAGIC:
error = xfs_da_node_split(state, oldblk, newblk, addblk,
max - i, &action);
- xfs_da_buf_done(addblk->bp);
addblk->bp = NULL;
if (error)
return(error); /* GROT: dir is inconsistent */
@@ -221,13 +220,6 @@ xfs_da_split(xfs_da_state_t *state)
* Update the btree to show the new hashval for this child.
*/
xfs_da_fixhashpath(state, &state->path);
- /*
- * If we won't need this block again, it's getting dropped
- * from the active path by the loop control, so we need
- * to mark it done now.
- */
- if (i > 0 || !addblk)
- xfs_da_buf_done(oldblk->bp);
}
if (!addblk)
return(0);
@@ -239,8 +231,6 @@ xfs_da_split(xfs_da_state_t *state)
oldblk = &state->path.blk[0];
error = xfs_da_root_split(state, oldblk, addblk);
if (error) {
- xfs_da_buf_done(oldblk->bp);
- xfs_da_buf_done(addblk->bp);
addblk->bp = NULL;
return(error); /* GROT: dir is inconsistent */
}
@@ -252,7 +242,7 @@ xfs_da_split(xfs_da_state_t *state)
* and the original block 0 could be at any position in the list.
*/
- node = oldblk->bp->data;
+ node = oldblk->bp->b_addr;
if (node->hdr.info.forw) {
if (be32_to_cpu(node->hdr.info.forw) == addblk->blkno) {
bp = addblk->bp;
@@ -260,13 +250,13 @@ xfs_da_split(xfs_da_state_t *state)
ASSERT(state->extravalid);
bp = state->extrablk.bp;
}
- node = bp->data;
+ node = bp->b_addr;
node->hdr.info.back = cpu_to_be32(oldblk->blkno);
- xfs_da_log_buf(state->args->trans, bp,
+ xfs_trans_log_buf(state->args->trans, bp,
XFS_DA_LOGRANGE(node, &node->hdr.info,
sizeof(node->hdr.info)));
}
- node = oldblk->bp->data;
+ node = oldblk->bp->b_addr;
if (node->hdr.info.back) {
if (be32_to_cpu(node->hdr.info.back) == addblk->blkno) {
bp = addblk->bp;
@@ -274,14 +264,12 @@ xfs_da_split(xfs_da_state_t *state)
ASSERT(state->extravalid);
bp = state->extrablk.bp;
}
- node = bp->data;
+ node = bp->b_addr;
node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
- xfs_da_log_buf(state->args->trans, bp,
+ xfs_trans_log_buf(state->args->trans, bp,
XFS_DA_LOGRANGE(node, &node->hdr.info,
sizeof(node->hdr.info)));
}
- xfs_da_buf_done(oldblk->bp);
- xfs_da_buf_done(addblk->bp);
addblk->bp = NULL;
return(0);
}
@@ -298,7 +286,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
xfs_da_intnode_t *node, *oldroot;
xfs_da_args_t *args;
xfs_dablk_t blkno;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error, size;
xfs_inode_t *dp;
xfs_trans_t *tp;
@@ -323,8 +311,8 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
if (error)
return(error);
ASSERT(bp != NULL);
- node = bp->data;
- oldroot = blk1->bp->data;
+ node = bp->b_addr;
+ oldroot = blk1->bp->b_addr;
if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)) {
size = (int)((char *)&oldroot->btree[be16_to_cpu(oldroot->hdr.count)] -
(char *)oldroot);
@@ -335,8 +323,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
(char *)leaf);
}
memcpy(node, oldroot, size);
- xfs_da_log_buf(tp, bp, 0, size - 1);
- xfs_da_buf_done(blk1->bp);
+ xfs_trans_log_buf(tp, bp, 0, size - 1);
blk1->bp = bp;
blk1->blkno = blkno;
@@ -348,7 +335,7 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
be16_to_cpu(node->hdr.level) + 1, &bp, args->whichfork);
if (error)
return(error);
- node = bp->data;
+ node = bp->b_addr;
node->btree[0].hashval = cpu_to_be32(blk1->hashval);
node->btree[0].before = cpu_to_be32(blk1->blkno);
node->btree[1].hashval = cpu_to_be32(blk2->hashval);
@@ -365,10 +352,9 @@ xfs_da_root_split(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
#endif
/* Header is already logged by xfs_da_node_create */
- xfs_da_log_buf(tp, bp,
+ xfs_trans_log_buf(tp, bp,
XFS_DA_LOGRANGE(node, node->btree,
sizeof(xfs_da_node_entry_t) * 2));
- xfs_da_buf_done(bp);
return(0);
}
@@ -389,7 +375,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
trace_xfs_da_node_split(state->args);
- node = oldblk->bp->data;
+ node = oldblk->bp->b_addr;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
/*
@@ -436,7 +422,7 @@ xfs_da_node_split(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
*
* If we had double-split op below us, then add the extra block too.
*/
- node = oldblk->bp->data;
+ node = oldblk->bp->b_addr;
if (oldblk->index <= be16_to_cpu(node->hdr.count)) {
oldblk->index++;
xfs_da_node_add(state, oldblk, addblk);
@@ -477,8 +463,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
trace_xfs_da_node_rebalance(state->args);
- node1 = blk1->bp->data;
- node2 = blk2->bp->data;
+ node1 = blk1->bp->b_addr;
+ node2 = blk2->bp->b_addr;
/*
* Figure out how many entries need to move, and in which direction.
* Swap the nodes around if that makes it simpler.
@@ -532,7 +518,7 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
btree_d = &node1->btree[be16_to_cpu(node1->hdr.count)];
memcpy(btree_d, btree_s, tmp);
be16_add_cpu(&node1->hdr.count, count);
- xfs_da_log_buf(tp, blk1->bp,
+ xfs_trans_log_buf(tp, blk1->bp,
XFS_DA_LOGRANGE(node1, btree_d, tmp));
/*
@@ -549,9 +535,9 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
/*
* Log header of node 1 and all current bits of node 2.
*/
- xfs_da_log_buf(tp, blk1->bp,
+ xfs_trans_log_buf(tp, blk1->bp,
XFS_DA_LOGRANGE(node1, &node1->hdr, sizeof(node1->hdr)));
- xfs_da_log_buf(tp, blk2->bp,
+ xfs_trans_log_buf(tp, blk2->bp,
XFS_DA_LOGRANGE(node2, &node2->hdr,
sizeof(node2->hdr) +
sizeof(node2->btree[0]) * be16_to_cpu(node2->hdr.count)));
@@ -560,8 +546,8 @@ xfs_da_node_rebalance(xfs_da_state_t *state, xfs_da_state_blk_t *blk1,
* Record the last hashval from each block for upward propagation.
* (note: don't use the swapped node pointers)
*/
- node1 = blk1->bp->data;
- node2 = blk2->bp->data;
+ node1 = blk1->bp->b_addr;
+ node2 = blk2->bp->b_addr;
blk1->hashval = be32_to_cpu(node1->btree[be16_to_cpu(node1->hdr.count)-1].hashval);
blk2->hashval = be32_to_cpu(node2->btree[be16_to_cpu(node2->hdr.count)-1].hashval);
@@ -587,7 +573,7 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
trace_xfs_da_node_add(state->args);
- node = oldblk->bp->data;
+ node = oldblk->bp->b_addr;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
ASSERT((oldblk->index >= 0) && (oldblk->index <= be16_to_cpu(node->hdr.count)));
ASSERT(newblk->blkno != 0);
@@ -606,10 +592,10 @@ xfs_da_node_add(xfs_da_state_t *state, xfs_da_state_blk_t *oldblk,
}
btree->hashval = cpu_to_be32(newblk->hashval);
btree->before = cpu_to_be32(newblk->blkno);
- xfs_da_log_buf(state->args->trans, oldblk->bp,
+ xfs_trans_log_buf(state->args->trans, oldblk->bp,
XFS_DA_LOGRANGE(node, btree, tmp + sizeof(*btree)));
be16_add_cpu(&node->hdr.count, 1);
- xfs_da_log_buf(state->args->trans, oldblk->bp,
+ xfs_trans_log_buf(state->args->trans, oldblk->bp,
XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
/*
@@ -735,7 +721,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
xfs_da_intnode_t *oldroot;
xfs_da_args_t *args;
xfs_dablk_t child;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error;
trace_xfs_da_root_join(state->args);
@@ -743,7 +729,7 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
args = state->args;
ASSERT(args != NULL);
ASSERT(root_blk->magic == XFS_DA_NODE_MAGIC);
- oldroot = root_blk->bp->data;
+ oldroot = root_blk->bp->b_addr;
ASSERT(oldroot->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
ASSERT(!oldroot->hdr.info.forw);
ASSERT(!oldroot->hdr.info.back);
@@ -765,11 +751,11 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk)
if (error)
return(error);
ASSERT(bp != NULL);
- xfs_da_blkinfo_onlychild_validate(bp->data,
+ xfs_da_blkinfo_onlychild_validate(bp->b_addr,
be16_to_cpu(oldroot->hdr.level));
- memcpy(root_blk->bp->data, bp->data, state->blocksize);
- xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
+ memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize);
+ xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1);
error = xfs_da_shrink_inode(args, child, bp);
return(error);
}
@@ -791,7 +777,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
xfs_da_blkinfo_t *info;
int count, forward, error, retval, i;
xfs_dablk_t blkno;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
/*
* Check for the degenerate case of the block being over 50% full.
@@ -799,7 +785,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
* to coalesce with a sibling.
*/
blk = &state->path.blk[ state->path.active-1 ];
- info = blk->bp->data;
+ info = blk->bp->b_addr;
ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
node = (xfs_da_intnode_t *)info;
count = be16_to_cpu(node->hdr.count);
@@ -859,10 +845,10 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
count = state->node_ents;
count -= state->node_ents >> 2;
count -= be16_to_cpu(node->hdr.count);
- node = bp->data;
+ node = bp->b_addr;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
count -= be16_to_cpu(node->hdr.count);
- xfs_da_brelse(state->args->trans, bp);
+ xfs_trans_brelse(state->args->trans, bp);
if (count >= 0)
break; /* fits with at least 25% to spare */
}
@@ -934,14 +920,14 @@ xfs_da_fixhashpath(xfs_da_state_t *state, xfs_da_state_path_t *path)
break;
}
for (blk--, level--; level >= 0; blk--, level--) {
- node = blk->bp->data;
+ node = blk->bp->b_addr;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
btree = &node->btree[ blk->index ];
if (be32_to_cpu(btree->hashval) == lasthash)
break;
blk->hashval = lasthash;
btree->hashval = cpu_to_be32(lasthash);
- xfs_da_log_buf(state->args->trans, blk->bp,
+ xfs_trans_log_buf(state->args->trans, blk->bp,
XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
lasthash = be32_to_cpu(node->btree[be16_to_cpu(node->hdr.count)-1].hashval);
@@ -960,7 +946,7 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
trace_xfs_da_node_remove(state->args);
- node = drop_blk->bp->data;
+ node = drop_blk->bp->b_addr;
ASSERT(drop_blk->index < be16_to_cpu(node->hdr.count));
ASSERT(drop_blk->index >= 0);
@@ -972,15 +958,15 @@ xfs_da_node_remove(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk)
tmp = be16_to_cpu(node->hdr.count) - drop_blk->index - 1;
tmp *= (uint)sizeof(xfs_da_node_entry_t);
memmove(btree, btree + 1, tmp);
- xfs_da_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(state->args->trans, drop_blk->bp,
XFS_DA_LOGRANGE(node, btree, tmp));
btree = &node->btree[be16_to_cpu(node->hdr.count)-1];
}
memset((char *)btree, 0, sizeof(xfs_da_node_entry_t));
- xfs_da_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(state->args->trans, drop_blk->bp,
XFS_DA_LOGRANGE(node, btree, sizeof(*btree)));
be16_add_cpu(&node->hdr.count, -1);
- xfs_da_log_buf(state->args->trans, drop_blk->bp,
+ xfs_trans_log_buf(state->args->trans, drop_blk->bp,
XFS_DA_LOGRANGE(node, &node->hdr, sizeof(node->hdr)));
/*
@@ -1005,8 +991,8 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
trace_xfs_da_node_unbalance(state->args);
- drop_node = drop_blk->bp->data;
- save_node = save_blk->bp->data;
+ drop_node = drop_blk->bp->b_addr;
+ save_node = save_blk->bp->b_addr;
ASSERT(drop_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
ASSERT(save_node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
tp = state->args->trans;
@@ -1023,13 +1009,13 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
tmp = be16_to_cpu(save_node->hdr.count) * (uint)sizeof(xfs_da_node_entry_t);
memmove(btree, &save_node->btree[0], tmp);
btree = &save_node->btree[0];
- xfs_da_log_buf(tp, save_blk->bp,
+ xfs_trans_log_buf(tp, save_blk->bp,
XFS_DA_LOGRANGE(save_node, btree,
(be16_to_cpu(save_node->hdr.count) + be16_to_cpu(drop_node->hdr.count)) *
sizeof(xfs_da_node_entry_t)));
} else {
btree = &save_node->btree[be16_to_cpu(save_node->hdr.count)];
- xfs_da_log_buf(tp, save_blk->bp,
+ xfs_trans_log_buf(tp, save_blk->bp,
XFS_DA_LOGRANGE(save_node, btree,
be16_to_cpu(drop_node->hdr.count) *
sizeof(xfs_da_node_entry_t)));
@@ -1042,7 +1028,7 @@ xfs_da_node_unbalance(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
memcpy(btree, &drop_node->btree[0], tmp);
be16_add_cpu(&save_node->hdr.count, be16_to_cpu(drop_node->hdr.count));
- xfs_da_log_buf(tp, save_blk->bp,
+ xfs_trans_log_buf(tp, save_blk->bp,
XFS_DA_LOGRANGE(save_node, &save_node->hdr,
sizeof(save_node->hdr)));
@@ -1100,7 +1086,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
state->path.active--;
return(error);
}
- curr = blk->bp->data;
+ curr = blk->bp->b_addr;
blk->magic = be16_to_cpu(curr->magic);
ASSERT(blk->magic == XFS_DA_NODE_MAGIC ||
blk->magic == XFS_DIR2_LEAFN_MAGIC ||
@@ -1110,7 +1096,7 @@ xfs_da_node_lookup_int(xfs_da_state_t *state, int *result)
* Search an intermediate node for a match.
*/
if (blk->magic == XFS_DA_NODE_MAGIC) {
- node = blk->bp->data;
+ node = blk->bp->b_addr;
max = be16_to_cpu(node->hdr.count);
blk->hashval = be32_to_cpu(node->btree[max-1].hashval);
@@ -1216,15 +1202,15 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
xfs_da_blkinfo_t *old_info, *new_info, *tmp_info;
xfs_da_args_t *args;
int before=0, error;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
/*
* Set up environment.
*/
args = state->args;
ASSERT(args != NULL);
- old_info = old_blk->bp->data;
- new_info = new_blk->bp->data;
+ old_info = old_blk->bp->b_addr;
+ new_info = new_blk->bp->b_addr;
ASSERT(old_blk->magic == XFS_DA_NODE_MAGIC ||
old_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
old_blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1261,12 +1247,11 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
if (error)
return(error);
ASSERT(bp != NULL);
- tmp_info = bp->data;
+ tmp_info = bp->b_addr;
ASSERT(be16_to_cpu(tmp_info->magic) == be16_to_cpu(old_info->magic));
ASSERT(be32_to_cpu(tmp_info->forw) == old_blk->blkno);
tmp_info->forw = cpu_to_be32(new_blk->blkno);
- xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
- xfs_da_buf_done(bp);
+ xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
}
old_info->back = cpu_to_be32(new_blk->blkno);
} else {
@@ -1283,18 +1268,17 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
if (error)
return(error);
ASSERT(bp != NULL);
- tmp_info = bp->data;
+ tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == old_info->magic);
ASSERT(be32_to_cpu(tmp_info->back) == old_blk->blkno);
tmp_info->back = cpu_to_be32(new_blk->blkno);
- xfs_da_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
- xfs_da_buf_done(bp);
+ xfs_trans_log_buf(args->trans, bp, 0, sizeof(*tmp_info)-1);
}
old_info->forw = cpu_to_be32(new_blk->blkno);
}
- xfs_da_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
- xfs_da_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
+ xfs_trans_log_buf(args->trans, old_blk->bp, 0, sizeof(*tmp_info) - 1);
+ xfs_trans_log_buf(args->trans, new_blk->bp, 0, sizeof(*tmp_info) - 1);
return(0);
}
@@ -1302,12 +1286,14 @@ xfs_da_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
* Compare two intermediate nodes for "order".
*/
STATIC int
-xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
+xfs_da_node_order(
+ struct xfs_buf *node1_bp,
+ struct xfs_buf *node2_bp)
{
xfs_da_intnode_t *node1, *node2;
- node1 = node1_bp->data;
- node2 = node2_bp->data;
+ node1 = node1_bp->b_addr;
+ node2 = node2_bp->b_addr;
ASSERT(node1->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) &&
node2->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if ((be16_to_cpu(node1->hdr.count) > 0) && (be16_to_cpu(node2->hdr.count) > 0) &&
@@ -1324,11 +1310,13 @@ xfs_da_node_order(xfs_dabuf_t *node1_bp, xfs_dabuf_t *node2_bp)
* Pick up the last hashvalue from an intermediate node.
*/
STATIC uint
-xfs_da_node_lasthash(xfs_dabuf_t *bp, int *count)
+xfs_da_node_lasthash(
+ struct xfs_buf *bp,
+ int *count)
{
xfs_da_intnode_t *node;
- node = bp->data;
+ node = bp->b_addr;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if (count)
*count = be16_to_cpu(node->hdr.count);
@@ -1346,7 +1334,7 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
{
xfs_da_blkinfo_t *drop_info, *save_info, *tmp_info;
xfs_da_args_t *args;
- xfs_dabuf_t *bp;
+ struct xfs_buf *bp;
int error;
/*
@@ -1354,8 +1342,8 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
*/
args = state->args;
ASSERT(args != NULL);
- save_info = save_blk->bp->data;
- drop_info = drop_blk->bp->data;
+ save_info = save_blk->bp->b_addr;
+ drop_info = drop_blk->bp->b_addr;
ASSERT(save_blk->magic == XFS_DA_NODE_MAGIC ||
save_blk->magic == XFS_DIR2_LEAFN_MAGIC ||
save_blk->magic == XFS_ATTR_LEAF_MAGIC);
@@ -1380,13 +1368,12 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
if (error)
return(error);
ASSERT(bp != NULL);
- tmp_info = bp->data;
+ tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
ASSERT(be32_to_cpu(tmp_info->forw) == drop_blk->blkno);
tmp_info->forw = cpu_to_be32(save_blk->blkno);
- xfs_da_log_buf(args->trans, bp, 0,
+ xfs_trans_log_buf(args->trans, bp, 0,
sizeof(*tmp_info) - 1);
- xfs_da_buf_done(bp);
}
} else {
trace_xfs_da_unlink_forward(args);
@@ -1398,17 +1385,16 @@ xfs_da_blk_unlink(xfs_da_state_t *state, xfs_da_state_blk_t *drop_blk,
if (error)
return(error);
ASSERT(bp != NULL);
- tmp_info = bp->data;
+ tmp_info = bp->b_addr;
ASSERT(tmp_info->magic == save_info->magic);
ASSERT(be32_to_cpu(tmp_info->back) == drop_blk->blkno);
tmp_info->back = cpu_to_be32(save_blk->blkno);
- xfs_da_log_buf(args->trans, bp, 0,
+ xfs_trans_log_buf(args->trans, bp, 0,
sizeof(*tmp_info) - 1);
- xfs_da_buf_done(bp);
}
}
- xfs_da_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
+ xfs_trans_log_buf(args->trans, save_blk->bp, 0, sizeof(*save_info) - 1);
return(0);
}
@@ -1443,7 +1429,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
level = (path->active-1) - 1; /* skip bottom layer in path */
for (blk = &path->blk[level]; level >= 0; blk--, level--) {
ASSERT(blk->bp != NULL);
- node = blk->bp->data;
+ node = blk->bp->b_addr;
ASSERT(node->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
if (forward && (blk->index < be16_to_cpu(node->hdr.count)-1)) {
blk->index++;
@@ -1471,7 +1457,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
* (if it's dirty, trans won't actually let go)
*/
if (release)
- xfs_da_brelse(args->trans, blk->bp);
+ xfs_trans_brelse(args->trans, blk->bp);
/*
* Read the next child block.
@@ -1482,7 +1468,7 @@ xfs_da_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
if (error)
return(error);
ASSERT(blk->bp != NULL);
- info = blk->bp->data;
+ info = blk->bp->b_addr;
ASSERT(info->magic == cpu_to_be16(XFS_DA_NODE_MAGIC) ||
info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
info->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC));
@@ -1702,11 +1688,13 @@ xfs_da_grow_inode(
* a bmap btree split to do that.
*/
STATIC int
-xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
- xfs_dabuf_t **dead_bufp)
+xfs_da_swap_lastblock(
+ xfs_da_args_t *args,
+ xfs_dablk_t *dead_blknop,
+ struct xfs_buf **dead_bufp)
{
xfs_dablk_t dead_blkno, last_blkno, sib_blkno, par_blkno;
- xfs_dabuf_t *dead_buf, *last_buf, *sib_buf, *par_buf;
+ struct xfs_buf *dead_buf, *last_buf, *sib_buf, *par_buf;
xfs_fileoff_t lastoff;
xfs_inode_t *ip;
xfs_trans_t *tp;
@@ -1744,9 +1732,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
/*
* Copy the last block into the dead buffer and log it.
*/
- memcpy(dead_buf->data, last_buf->data, mp->m_dirblksize);
- xfs_da_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
- dead_info = dead_buf->data;
+ memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize);
+ xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1);
+ dead_info = dead_buf->b_addr;
/*
* Get values from the moved block.
*/
@@ -1767,7 +1755,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
if ((sib_blkno = be32_to_cpu(dead_info->back))) {
if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
goto done;
- sib_info = sib_buf->data;
+ sib_info = sib_buf->b_addr;
if (unlikely(
be32_to_cpu(sib_info->forw) != last_blkno ||
sib_info->magic != dead_info->magic)) {
@@ -1777,10 +1765,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
goto done;
}
sib_info->forw = cpu_to_be32(dead_blkno);
- xfs_da_log_buf(tp, sib_buf,
+ xfs_trans_log_buf(tp, sib_buf,
XFS_DA_LOGRANGE(sib_info, &sib_info->forw,
sizeof(sib_info->forw)));
- xfs_da_buf_done(sib_buf);
sib_buf = NULL;
}
/*
@@ -1789,7 +1776,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
if ((error = xfs_da_read_buf(tp, ip, sib_blkno, -1, &sib_buf, w)))
goto done;
- sib_info = sib_buf->data;
+ sib_info = sib_buf->b_addr;
if (unlikely(
be32_to_cpu(sib_info->back) != last_blkno ||
sib_info->magic != dead_info->magic)) {
@@ -1799,10 +1786,9 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
goto done;
}
sib_info->back = cpu_to_be32(dead_blkno);
- xfs_da_log_buf(tp, sib_buf,
+ xfs_trans_log_buf(tp, sib_buf,
XFS_DA_LOGRANGE(sib_info, &sib_info->back,
sizeof(sib_info->back)));
- xfs_da_buf_done(sib_buf);
sib_buf = NULL;
}
par_blkno = mp->m_dirleafblk;
@@ -1813,7 +1799,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
for (;;) {
if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
goto done;
- par_node = par_buf->data;
+ par_node = par_buf->b_addr;
if (unlikely(par_node->hdr.info.magic !=
cpu_to_be16(XFS_DA_NODE_MAGIC) ||
(level >= 0 && level != be16_to_cpu(par_node->hdr.level) + 1))) {
@@ -1837,7 +1823,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
par_blkno = be32_to_cpu(par_node->btree[entno].before);
if (level == dead_level + 1)
break;
- xfs_da_brelse(tp, par_buf);
+ xfs_trans_brelse(tp, par_buf);
par_buf = NULL;
}
/*
@@ -1853,7 +1839,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
if (entno < be16_to_cpu(par_node->hdr.count))
break;
par_blkno = be32_to_cpu(par_node->hdr.info.forw);
- xfs_da_brelse(tp, par_buf);
+ xfs_trans_brelse(tp, par_buf);
par_buf = NULL;
if (unlikely(par_blkno == 0)) {
XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
@@ -1863,7 +1849,7 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
}
if ((error = xfs_da_read_buf(tp, ip, par_blkno, -1, &par_buf, w)))
goto done;
- par_node = par_buf->data;
+ par_node = par_buf->b_addr;
if (unlikely(
be16_to_cpu(par_node->hdr.level) != level ||
par_node->hdr.info.magic != cpu_to_be16(XFS_DA_NODE_MAGIC))) {
@@ -1878,20 +1864,18 @@ xfs_da_swap_lastblock(xfs_da_args_t *args, xfs_dablk_t *dead_blknop,
* Update the parent entry pointing to the moved block.
*/
par_node->btree[entno].before = cpu_to_be32(dead_blkno);
- xfs_da_log_buf(tp, par_buf,
+ xfs_trans_log_buf(tp, par_buf,
XFS_DA_LOGRANGE(par_node, &par_node->btree[entno].before,
sizeof(par_node->btree[entno].before)));
- xfs_da_buf_done(par_buf);
- xfs_da_buf_done(dead_buf);
*dead_blknop = last_blkno;
*dead_bufp = last_buf;
return 0;
done:
if (par_buf)
- xfs_da_brelse(tp, par_buf);
+ xfs_trans_brelse(tp, par_buf);
if (sib_buf)
- xfs_da_brelse(tp, sib_buf);
- xfs_da_brelse(tp, last_buf);
+ xfs_trans_brelse(tp, sib_buf);
+ xfs_trans_brelse(tp, last_buf);
return error;
}
@@ -1899,8 +1883,10 @@ done:
* Remove a btree block from a directory or attribute.
*/
int
-xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
- xfs_dabuf_t *dead_buf)
+xfs_da_shrink_inode(
+ xfs_da_args_t *args,
+ xfs_dablk_t dead_blkno,
+ struct xfs_buf *dead_buf)
{
xfs_inode_t *dp;
int done, error, w, count;
@@ -1935,7 +1921,7 @@ xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
break;
}
}
- xfs_da_binval(tp, dead_buf);
+ xfs_trans_binval(tp, dead_buf);
return error;
}
@@ -1967,35 +1953,75 @@ xfs_da_map_covers_blocks(
}
/*
- * Make a dabuf.
- * Used for get_buf, read_buf, read_bufr, and reada_buf.
+ * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
+ *
+ * For the single map case, it is assumed that the caller has provided a pointer
+ * to a valid xfs_buf_map. For the multiple map case, this function will
+ * allocate the xfs_buf_map to hold all the maps and replace the caller's single
+ * map pointer with the allocated map.
*/
-STATIC int
-xfs_da_do_buf(
- xfs_trans_t *trans,
- xfs_inode_t *dp,
- xfs_dablk_t bno,
- xfs_daddr_t *mappedbnop,
- xfs_dabuf_t **bpp,
- int whichfork,
- int caller)
+static int
+xfs_buf_map_from_irec(
+ struct xfs_mount *mp,
+ struct xfs_buf_map **mapp,
+ unsigned int *nmaps,
+ struct xfs_bmbt_irec *irecs,
+ unsigned int nirecs)
{
- xfs_buf_t *bp = NULL;
- xfs_buf_t **bplist;
- int error=0;
- int i;
- xfs_bmbt_irec_t map;
- xfs_bmbt_irec_t *mapp;
- xfs_daddr_t mappedbno;
- xfs_mount_t *mp;
- int nbplist=0;
- int nfsb;
- int nmap;
- xfs_dabuf_t *rbp;
+ struct xfs_buf_map *map;
+ int i;
+
+ ASSERT(*nmaps == 1);
+ ASSERT(nirecs >= 1);
+
+ if (nirecs > 1) {
+ map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_SLEEP);
+ if (!map)
+ return ENOMEM;
+ *mapp = map;
+ }
+
+ *nmaps = nirecs;
+ map = *mapp;
+ for (i = 0; i < *nmaps; i++) {
+ ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
+ irecs[i].br_startblock != HOLESTARTBLOCK);
+ map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
+ map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
+ }
+ return 0;
+}
+
+/*
+ * Map the block we are given ready for reading. There are three possible return
+ * values:
+ * -1 - will be returned if we land in a hole and mappedbno == -2 so the
+ * caller knows not to execute a subsequent read.
+ * 0 - if we mapped the block successfully
+ * >0 - positive error number if there was an error.
+ */
+static int
+xfs_dabuf_map(
+ struct xfs_trans *trans,
+ struct xfs_inode *dp,
+ xfs_dablk_t bno,
+ xfs_daddr_t mappedbno,
+ int whichfork,
+ struct xfs_buf_map **map,
+ int *nmaps)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ int nfsb;
+ int error = 0;
+ struct xfs_bmbt_irec irec;
+ struct xfs_bmbt_irec *irecs = &irec;
+ int nirecs;
+
+ ASSERT(map && *map);
+ ASSERT(*nmaps == 1);
- mp = dp->i_mount;
nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1;
- mappedbno = *mappedbnop;
+
/*
* Caller doesn't have a mapping. -2 means don't complain
* if we land in a hole.
@@ -2004,112 +2030,150 @@ xfs_da_do_buf(
/*
* Optimize the one-block case.
*/
- if (nfsb == 1)
- mapp = &map;
- else
- mapp = kmem_alloc(sizeof(*mapp) * nfsb, KM_SLEEP);
+ if (nfsb != 1)
+ irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_SLEEP);
- nmap = nfsb;
- error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, mapp,
- &nmap, xfs_bmapi_aflag(whichfork));
+ nirecs = nfsb;
+ error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
+ &nirecs, xfs_bmapi_aflag(whichfork));
if (error)
- goto exit0;
+ goto out;
} else {
- map.br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
- map.br_startoff = (xfs_fileoff_t)bno;
- map.br_blockcount = nfsb;
- mapp = &map;
- nmap = 1;
+ irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
+ irecs->br_startoff = (xfs_fileoff_t)bno;
+ irecs->br_blockcount = nfsb;
+ irecs->br_state = 0;
+ nirecs = 1;
}
- if (!xfs_da_map_covers_blocks(nmap, mapp, bno, nfsb)) {
- error = mappedbno == -2 ? 0 : XFS_ERROR(EFSCORRUPTED);
+
+ if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
+ error = mappedbno == -2 ? -1 : XFS_ERROR(EFSCORRUPTED);
if (unlikely(error == EFSCORRUPTED)) {
if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+ int i;
xfs_alert(mp, "%s: bno %lld dir: inode %lld",
__func__, (long long)bno,
(long long)dp->i_ino);
- for (i = 0; i < nmap; i++) {
+ for (i = 0; i < *nmaps; i++) {
xfs_alert(mp,
"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
i,
- (long long)mapp[i].br_startoff,
- (long long)mapp[i].br_startblock,
- (long long)mapp[i].br_blockcount,
- mapp[i].br_state);
+ (long long)irecs[i].br_startoff,
+ (long long)irecs[i].br_startblock,
+ (long long)irecs[i].br_blockcount,
+ irecs[i].br_state);
}
}
XFS_ERROR_REPORT("xfs_da_do_buf(1)",
XFS_ERRLEVEL_LOW, mp);
}
- goto exit0;
+ goto out;
}
- if (caller != 3 && nmap > 1) {
- bplist = kmem_alloc(sizeof(*bplist) * nmap, KM_SLEEP);
- nbplist = 0;
- } else
- bplist = NULL;
- /*
- * Turn the mapping(s) into buffer(s).
- */
- for (i = 0; i < nmap; i++) {
- int nmapped;
-
- mappedbno = XFS_FSB_TO_DADDR(mp, mapp[i].br_startblock);
- if (i == 0)
- *mappedbnop = mappedbno;
- nmapped = (int)XFS_FSB_TO_BB(mp, mapp[i].br_blockcount);
- switch (caller) {
- case 0:
- bp = xfs_trans_get_buf(trans, mp->m_ddev_targp,
- mappedbno, nmapped, 0);
- error = bp ? bp->b_error : XFS_ERROR(EIO);
- break;
- case 1:
- case 2:
- bp = NULL;
- error = xfs_trans_read_buf(mp, trans, mp->m_ddev_targp,
- mappedbno, nmapped, 0, &bp);
- break;
- case 3:
- xfs_buf_readahead(mp->m_ddev_targp, mappedbno, nmapped);
+ error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
+out:
+ if (irecs != &irec)
+ kmem_free(irecs);
+ return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block.
+ */
+int
+xfs_da_get_buf(
+ struct xfs_trans *trans,
+ struct xfs_inode *dp,
+ xfs_dablk_t bno,
+ xfs_daddr_t mappedbno,
+ struct xfs_buf **bpp,
+ int whichfork)
+{
+ struct xfs_buf *bp;
+ struct xfs_buf_map map;
+ struct xfs_buf_map *mapp;
+ int nmap;
+ int error;
+
+ *bpp = NULL;
+ mapp = &map;
+ nmap = 1;
+ error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+ &mapp, &nmap);
+ if (error) {
+ /* mapping a hole is not an error, but we don't continue */
+ if (error == -1)
error = 0;
- bp = NULL;
- break;
- }
- if (error) {
- if (bp)
- xfs_trans_brelse(trans, bp);
- goto exit1;
- }
- if (!bp)
- continue;
- if (caller == 1) {
- if (whichfork == XFS_ATTR_FORK)
- xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
- else
- xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
- }
- if (bplist) {
- bplist[nbplist++] = bp;
- }
+ goto out_free;
}
- /*
- * Build a dabuf structure.
- */
- if (bplist) {
- rbp = xfs_da_buf_make(nbplist, bplist);
- } else if (bp)
- rbp = xfs_da_buf_make(1, &bp);
+
+ bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
+ mapp, nmap, 0);
+ error = bp ? bp->b_error : XFS_ERROR(EIO);
+ if (error) {
+ xfs_trans_brelse(trans, bp);
+ goto out_free;
+ }
+
+ *bpp = bp;
+
+out_free:
+ if (mapp != &map)
+ kmem_free(mapp);
+
+ return error;
+}
+
+/*
+ * Get a buffer for the dir/attr block, fill in the contents.
+ */
+int
+xfs_da_read_buf(
+ struct xfs_trans *trans,
+ struct xfs_inode *dp,
+ xfs_dablk_t bno,
+ xfs_daddr_t mappedbno,
+ struct xfs_buf **bpp,
+ int whichfork)
+{
+ struct xfs_buf *bp;
+ struct xfs_buf_map map;
+ struct xfs_buf_map *mapp;
+ int nmap;
+ int error;
+
+ *bpp = NULL;
+ mapp = &map;
+ nmap = 1;
+ error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork,
+ &mapp, &nmap);
+ if (error) {
+ /* mapping a hole is not an error, but we don't continue */
+ if (error == -1)
+ error = 0;
+ goto out_free;
+ }
+
+ error = xfs_trans_read_buf_map(dp->i_mount, trans,
+ dp->i_mount->m_ddev_targp,
+ mapp, nmap, 0, &bp);
+ if (error)
+ goto out_free;
+
+ if (whichfork == XFS_ATTR_FORK)
+ xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
else
- rbp = NULL;
+ xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
+
/*
- * For read_buf, check the magic number.
+ * This verification code will be moved to a CRC verification callback
+ * function so just leave it here unchanged until then.
*/
- if (caller == 1) {
- xfs_dir2_data_hdr_t *hdr = rbp->data;
- xfs_dir2_free_t *free = rbp->data;
- xfs_da_blkinfo_t *info = rbp->data;
+ {
+ xfs_dir2_data_hdr_t *hdr = bp->b_addr;
+ xfs_dir2_free_t *free = bp->b_addr;
+ xfs_da_blkinfo_t *info = bp->b_addr;
uint magic, magic1;
+ struct xfs_mount *mp = dp->i_mount;
magic = be16_to_cpu(info->magic);
magic1 = be32_to_cpu(hdr->magic);
@@ -2123,66 +2187,20 @@ xfs_da_do_buf(
(free->hdr.magic != cpu_to_be32(XFS_DIR2_FREE_MAGIC)),
mp, XFS_ERRTAG_DA_READ_BUF,
XFS_RANDOM_DA_READ_BUF))) {
- trace_xfs_da_btree_corrupt(rbp->bps[0], _RET_IP_);
+ trace_xfs_da_btree_corrupt(bp, _RET_IP_);
XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
XFS_ERRLEVEL_LOW, mp, info);
error = XFS_ERROR(EFSCORRUPTED);
- xfs_da_brelse(trans, rbp);
- nbplist = 0;
- goto exit1;
+ xfs_trans_brelse(trans, bp);
+ goto out_free;
}
}
- if (bplist) {
- kmem_free(bplist);
- }
- if (mapp != &map) {
- kmem_free(mapp);
- }
- if (bpp)
- *bpp = rbp;
- return 0;
-exit1:
- if (bplist) {
- for (i = 0; i < nbplist; i++)
- xfs_trans_brelse(trans, bplist[i]);
- kmem_free(bplist);
- }
-exit0:
+ *bpp = bp;
+out_free:
if (mapp != &map)
kmem_free(mapp);
- if (bpp)
- *bpp = NULL;
- return error;
-}
-
-/*
- * Get a buffer for the dir/attr block.
- */
-int
-xfs_da_get_buf(
- xfs_trans_t *trans,
- xfs_inode_t *dp,
- xfs_dablk_t bno,
- xfs_daddr_t mappedbno,
- xfs_dabuf_t **bpp,
- int whichfork)
-{
- return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 0);
-}
-/*
- * Get a buffer for the dir/attr block, fill in the contents.
- */
-int
-xfs_da_read_buf(
- xfs_trans_t *trans,
- xfs_inode_t *dp,
- xfs_dablk_t bno,
- xfs_daddr_t mappedbno,
- xfs_dabuf_t **bpp,
- int whichfork)
-{
- return xfs_da_do_buf(trans, dp, bno, &mappedbno, bpp, whichfork, 1);
+ return error;
}
/*
@@ -2190,22 +2208,41 @@ xfs_da_read_buf(
*/
xfs_daddr_t
xfs_da_reada_buf(
- xfs_trans_t *trans,
- xfs_inode_t *dp,
- xfs_dablk_t bno,
- int whichfork)
+ struct xfs_trans *trans,
+ struct xfs_inode *dp,
+ xfs_dablk_t bno,
+ int whichfork)
{
- xfs_daddr_t rval;
+ xfs_daddr_t mappedbno = -1;
+ struct xfs_buf_map map;
+ struct xfs_buf_map *mapp;
+ int nmap;
+ int error;
+
+ mapp = &map;
+ nmap = 1;
+ error = xfs_dabuf_map(trans, dp, bno, -1, whichfork,
+ &mapp, &nmap);
+ if (error) {
+ /* mapping a hole is not an error, but we don't continue */
+ if (error == -1)
+ error = 0;
+ goto out_free;
+ }
- rval = -1;
- if (xfs_da_do_buf(trans, dp, bno, &rval, NULL, whichfork, 3))
+ mappedbno = mapp[0].bm_bn;
+ xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap);
+
+out_free:
+ if (mapp != &map)
+ kmem_free(mapp);
+
+ if (error)
return -1;
- else
- return rval;
+ return mappedbno;
}
kmem_zone_t *xfs_da_state_zone; /* anchor for state struct zone */
-kmem_zone_t *xfs_dabuf_zone; /* dabuf zone */
/*
* Allocate a dir-state structure.
@@ -2225,13 +2262,8 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state)
{
int i;
- for (i = 0; i < state->altpath.active; i++) {
- if (state->altpath.blk[i].bp) {
- if (state->altpath.blk[i].bp != state->path.blk[i].bp)
- xfs_da_buf_done(state->altpath.blk[i].bp);
- state->altpath.blk[i].bp = NULL;
- }
- }
+ for (i = 0; i < state->altpath.active; i++)
+ state->altpath.blk[i].bp = NULL;
state->altpath.active = 0;
}
@@ -2241,204 +2273,9 @@ xfs_da_state_kill_altpath(xfs_da_state_t *state)
void
xfs_da_state_free(xfs_da_state_t *state)
{
- int i;
-
xfs_da_state_kill_altpath(state);
- for (i = 0; i < state->path.active; i++) {
- if (state->path.blk[i].bp)
- xfs_da_buf_done(state->path.blk[i].bp);
- }
- if (state->extravalid && state->extrablk.bp)
- xfs_da_buf_done(state->extrablk.bp);
#ifdef DEBUG
memset((char *)state, 0, sizeof(*state));
#endif /* DEBUG */
kmem_zone_free(xfs_da_state_zone, state);
}
-
-/*
- * Create a dabuf.
- */
-/* ARGSUSED */
-STATIC xfs_dabuf_t *
-xfs_da_buf_make(int nbuf, xfs_buf_t **bps)
-{
- xfs_buf_t *bp;
- xfs_dabuf_t *dabuf;
- int i;
- int off;
-
- if (nbuf == 1)
- dabuf = kmem_zone_alloc(xfs_dabuf_zone, KM_NOFS);
- else
- dabuf = kmem_alloc(XFS_DA_BUF_SIZE(nbuf), KM_NOFS);
- dabuf->dirty = 0;
- if (nbuf == 1) {
- dabuf->nbuf = 1;
- bp = bps[0];
- dabuf->bbcount = bp->b_length;
- dabuf->data = bp->b_addr;
- dabuf->bps[0] = bp;
- } else {
- dabuf->nbuf = nbuf;
- for (i = 0, dabuf->bbcount = 0; i < nbuf; i++) {
- dabuf->bps[i] = bp = bps[i];
- dabuf->bbcount += bp->b_length;
- }
- dabuf->data = kmem_alloc(BBTOB(dabuf->bbcount), KM_SLEEP);
- for (i = off = 0; i < nbuf; i++, off += BBTOB(bp->b_length)) {
- bp = bps[i];
- memcpy((char *)dabuf->data + off, bp->b_addr,
- BBTOB(bp->b_length));
- }
- }
- return dabuf;
-}
-
-/*
- * Un-dirty a dabuf.
- */
-STATIC void
-xfs_da_buf_clean(xfs_dabuf_t *dabuf)
-{
- xfs_buf_t *bp;
- int i;
- int off;
-
- if (dabuf->dirty) {
- ASSERT(dabuf->nbuf > 1);
- dabuf->dirty = 0;
- for (i = off = 0; i < dabuf->nbuf;
- i++, off += BBTOB(bp->b_length)) {
- bp = dabuf->bps[i];
- memcpy(bp->b_addr, dabuf->data + off,
- BBTOB(bp->b_length));
- }
- }
-}
-
-/*
- * Release a dabuf.
- */
-void
-xfs_da_buf_done(xfs_dabuf_t *dabuf)
-{
- ASSERT(dabuf);
- ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
- if (dabuf->dirty)
- xfs_da_buf_clean(dabuf);
- if (dabuf->nbuf > 1) {
- kmem_free(dabuf->data);
- kmem_free(dabuf);
- } else {
- kmem_zone_free(xfs_dabuf_zone, dabuf);
- }
-}
-
-/*
- * Log transaction from a dabuf.
- */
-void
-xfs_da_log_buf(xfs_trans_t *tp, xfs_dabuf_t *dabuf, uint first, uint last)
-{
- xfs_buf_t *bp;
- uint f;
- int i;
- uint l;
- int off;
-
- ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
- if (dabuf->nbuf == 1) {
- ASSERT(dabuf->data == dabuf->bps[0]->b_addr);
- xfs_trans_log_buf(tp, dabuf->bps[0], first, last);
- return;
- }
- dabuf->dirty = 1;
- ASSERT(first <= last);
- for (i = off = 0; i < dabuf->nbuf; i++, off += BBTOB(bp->b_length)) {
- bp = dabuf->bps[i];
- f = off;
- l = f + BBTOB(bp->b_length) - 1;
- if (f < first)
- f = first;
- if (l > last)
- l = last;
- if (f <= l)
- xfs_trans_log_buf(tp, bp, f - off, l - off);
- /*
- * B_DONE is set by xfs_trans_log buf.
- * If we don't set it on a new buffer (get not read)
- * then if we don't put anything in the buffer it won't
- * be set, and at commit it it released into the cache,
- * and then a read will fail.
- */
- else if (!(XFS_BUF_ISDONE(bp)))
- XFS_BUF_DONE(bp);
- }
- ASSERT(last < off);
-}
-
-/*
- * Release dabuf from a transaction.
- * Have to free up the dabuf before the buffers are released,
- * since the synchronization on the dabuf is really the lock on the buffer.
- */
-void
-xfs_da_brelse(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
-{
- xfs_buf_t *bp;
- xfs_buf_t **bplist;
- int i;
- int nbuf;
-
- ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
- if ((nbuf = dabuf->nbuf) == 1) {
- bplist = &bp;
- bp = dabuf->bps[0];
- } else {
- bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
- memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
- }
- xfs_da_buf_done(dabuf);
- for (i = 0; i < nbuf; i++)
- xfs_trans_brelse(tp, bplist[i]);
- if (bplist != &bp)
- kmem_free(bplist);
-}
-
-/*
- * Invalidate dabuf from a transaction.
- */
-void
-xfs_da_binval(xfs_trans_t *tp, xfs_dabuf_t *dabuf)
-{
- xfs_buf_t *bp;
- xfs_buf_t **bplist;
- int i;
- int nbuf;
-
- ASSERT(dabuf->nbuf && dabuf->data && dabuf->bbcount && dabuf->bps[0]);
- if ((nbuf = dabuf->nbuf) == 1) {
- bplist = &bp;
- bp = dabuf->bps[0];
- } else {
- bplist = kmem_alloc(nbuf * sizeof(*bplist), KM_SLEEP);
- memcpy(bplist, dabuf->bps, nbuf * sizeof(*bplist));
- }
- xfs_da_buf_done(dabuf);
- for (i = 0; i < nbuf; i++)
- xfs_trans_binval(tp, bplist[i]);
- if (bplist != &bp)
- kmem_free(bplist);
-}
-
-/*
- * Get the first daddr from a dabuf.
- */
-xfs_daddr_t
-xfs_da_blkno(xfs_dabuf_t *dabuf)
-{
- ASSERT(dabuf->nbuf);
- ASSERT(dabuf->data);
- return XFS_BUF_ADDR(dabuf->bps[0]);
-}
diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h
index dbf7c074ae73..132adafb041e 100644
--- a/fs/xfs/xfs_da_btree.h
+++ b/fs/xfs/xfs_da_btree.h
@@ -32,7 +32,7 @@ struct zone;
/*
* This structure is common to both leaf nodes and non-leaf nodes in the Btree.
*
- * Is is used to manage a doubly linked list of all blocks at the same
+ * It is used to manage a doubly linked list of all blocks at the same
* level in the Btree, and to identify which type of block this is.
*/
#define XFS_DA_NODE_MAGIC 0xfebe /* magic number: non-leaf blocks */
@@ -133,24 +133,6 @@ typedef struct xfs_da_args {
{ XFS_DA_OP_CILOOKUP, "CILOOKUP" }
/*
- * Structure to describe buffer(s) for a block.
- * This is needed in the directory version 2 format case, when
- * multiple non-contiguous fsblocks might be needed to cover one
- * logical directory block.
- * If the buffer count is 1 then the data pointer points to the
- * same place as the b_addr field for the buffer, else to kmem_alloced memory.
- */
-typedef struct xfs_dabuf {
- int nbuf; /* number of buffer pointers present */
- short dirty; /* data needs to be copied back */
- short bbcount; /* how large is data in bbs */
- void *data; /* pointer for buffers' data */
- struct xfs_buf *bps[1]; /* actually nbuf of these */
-} xfs_dabuf_t;
-#define XFS_DA_BUF_SIZE(n) \
- (sizeof(xfs_dabuf_t) + sizeof(struct xfs_buf *) * ((n) - 1))
-
-/*
* Storage for holding state during Btree searches and split/join ops.
*
* Only need space for 5 intermediate nodes. With a minimum of 62-way
@@ -158,7 +140,7 @@ typedef struct xfs_dabuf {
* which is slightly more than enough.
*/
typedef struct xfs_da_state_blk {
- xfs_dabuf_t *bp; /* buffer containing block */
+ struct xfs_buf *bp; /* buffer containing block */
xfs_dablk_t blkno; /* filesystem blkno of buffer */
xfs_daddr_t disk_blkno; /* on-disk blkno (in BBs) of buffer */
int index; /* relevant index into block */
@@ -211,7 +193,7 @@ struct xfs_nameops {
* Routines used for growing the Btree.
*/
int xfs_da_node_create(xfs_da_args_t *args, xfs_dablk_t blkno, int level,
- xfs_dabuf_t **bpp, int whichfork);
+ struct xfs_buf **bpp, int whichfork);
int xfs_da_split(xfs_da_state_t *state);
/*
@@ -241,14 +223,14 @@ int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
int count);
int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
- xfs_dabuf_t **bp, int whichfork);
+ struct xfs_buf **bp, int whichfork);
int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, xfs_daddr_t mappedbno,
- xfs_dabuf_t **bpp, int whichfork);
+ struct xfs_buf **bpp, int whichfork);
xfs_daddr_t xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp,
xfs_dablk_t bno, int whichfork);
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
- xfs_dabuf_t *dead_buf);
+ struct xfs_buf *dead_buf);
uint xfs_da_hashname(const __uint8_t *name_string, int name_length);
enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
@@ -258,15 +240,7 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
xfs_da_state_t *xfs_da_state_alloc(void);
void xfs_da_state_free(xfs_da_state_t *state);
-void xfs_da_buf_done(xfs_dabuf_t *dabuf);
-void xfs_da_log_buf(struct xfs_trans *tp, xfs_dabuf_t *dabuf, uint first,
- uint last);
-void xfs_da_brelse(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
-void xfs_da_binval(struct xfs_trans *tp, xfs_dabuf_t *dabuf);
-xfs_daddr_t xfs_da_blkno(xfs_dabuf_t *dabuf);
-
extern struct kmem_zone *xfs_da_state_zone;
-extern struct kmem_zone *xfs_dabuf_zone;
extern const struct xfs_nameops xfs_default_nameops;
#endif /* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h
index a3721633abc8..1d9643b3dce6 100644
--- a/fs/xfs/xfs_dinode.h
+++ b/fs/xfs/xfs_dinode.h
@@ -33,7 +33,7 @@ typedef struct xfs_timestamp {
* variable size the leftover area split into a data and an attribute fork.
* The format of the data and attribute fork depends on the format of the
* inode as indicated by di_format and di_aformat. To access the data and
- * attribute use the XFS_DFORK_PTR, XFS_DFORK_DPTR, and XFS_DFORK_PTR macros
+ * attribute use the XFS_DFORK_DPTR, XFS_DFORK_APTR, and XFS_DFORK_PTR macros
* below.
*
* There is a very similar struct icdinode in xfs_inode which matches the
diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c
index 67a250c36d41..b26a50f9921d 100644
--- a/fs/xfs/xfs_dir2.c
+++ b/fs/xfs/xfs_dir2.c
@@ -592,7 +592,7 @@ int
xfs_dir2_shrink_inode(
xfs_da_args_t *args,
xfs_dir2_db_t db,
- xfs_dabuf_t *bp)
+ struct xfs_buf *bp)
{
xfs_fileoff_t bno; /* directory file offset */
xfs_dablk_t da; /* directory file offset */
@@ -634,7 +634,7 @@ xfs_dir2_shrink_inode(
/*
* Invalidate the buffer from the transaction.
*/
- xfs_da_binval(tp, bp);
+ xfs_trans_binval(tp, bp);
/*
* If it's not a data block, we're done.
*/
diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c
index 586732f2d80d..e93ca8f054f4 100644
--- a/fs/xfs/xfs_dir2_block.c
+++ b/fs/xfs/xfs_dir2_block.c
@@ -37,10 +37,10 @@
/*
* Local function prototypes.
*/
-static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, xfs_dabuf_t *bp, int first,
- int last);
-static void xfs_dir2_block_log_tail(xfs_trans_t *tp, xfs_dabuf_t *bp);
-static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **bpp,
+static void xfs_dir2_block_log_leaf(xfs_trans_t *tp, struct xfs_buf *bp,
+ int first, int last);
+static void xfs_dir2_block_log_tail(xfs_trans_t *tp, struct xfs_buf *bp);
+static int xfs_dir2_block_lookup_int(xfs_da_args_t *args, struct xfs_buf **bpp,
int *entno);
static int xfs_dir2_block_sort(const void *a, const void *b);
@@ -66,7 +66,7 @@ xfs_dir2_block_addname(
xfs_dir2_data_free_t *bf; /* bestfree table in block */
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
- xfs_dabuf_t *bp; /* buffer for block */
+ struct xfs_buf *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
int compact; /* need to compact leaf ents */
xfs_dir2_data_entry_t *dep; /* block data entry */
@@ -102,14 +102,14 @@ xfs_dir2_block_addname(
return error;
}
ASSERT(bp != NULL);
- hdr = bp->data;
+ hdr = bp->b_addr;
/*
* Check the magic number, corrupted if wrong.
*/
if (unlikely(hdr->magic != cpu_to_be32(XFS_DIR2_BLOCK_MAGIC))) {
XFS_CORRUPTION_ERROR("xfs_dir2_block_addname",
XFS_ERRLEVEL_LOW, mp, hdr);
- xfs_da_brelse(tp, bp);
+ xfs_trans_brelse(tp, bp);
return XFS_ERROR(EFSCORRUPTED);
}
len = xfs_dir2_data_entsize(args->namelen);
@@ -212,7 +212,7 @@ xfs_dir2_block_addname(
* If this isn't a real add, we're done with the buffer.
*/
if (args->op_flags & XFS_DA_OP_JUSTCHECK)
- xfs_da_brelse(tp, bp);
+ xfs_trans_brelse(tp, bp);
/*
* If we don't have space for the new entry & leaf ...
*/
@@ -228,7 +228,6 @@ xfs_dir2_block_addname(
* Then add the new entry in that format.
*/
error = xfs_dir2_block_to_leaf(args, bp);
- xfs_da_buf_done(bp);
if (error)
return error;
return xfs_dir2_leaf_addname(args);
@@ -422,7 +421,6 @@ xfs_dir2_block_addname(
xfs_dir2_block_log_tail(tp, bp);
xfs_dir2_data_log_entry(tp, bp, dep);
xfs_dir2_data_check(dp, bp);
- xfs_da_buf_done(bp);
return 0;
}
@@ -437,7 +435,7 @@ xfs_dir2_block_getdents(
filldir_t filldir)
{
xfs_dir2_data_hdr_t *hdr; /* block header */
- xfs_dabuf_t *bp; /* buffer for block */
+ struct xfs_buf *bp; /* buffer for block */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* block data entry */
xfs_dir2_data_unused_t *dup; /* block unused entry */
@@ -469,7 +467,7 @@ xfs_dir2_block_getdents(
* We'll skip entries before this.
*/
wantoff = xfs_dir2_dataptr_to_off(mp, *offset);
- hdr = bp->data;
+ hdr = bp->b_addr;
xfs_dir2_data_check(dp, bp);
/*
* Set up values for the loop.
@@ -514,7 +512,7 @@ xfs_dir2_block_getdents(
cook & 0x7fffffff, be64_to_cpu(dep->inumber),
DT_UNKNOWN)) {
*offset = cook & 0x7fffffff;
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return 0;
}
}
@@ -525,7 +523,7 @@ xfs_dir2_block_getdents(
*/
*offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) &
0x7fffffff;
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return 0;
}
@@ -535,17 +533,17 @@ xfs_dir2_block_getdents(
static void
xfs_dir2_block_log_leaf(
xfs_trans_t *tp, /* transaction structure */
- xfs_dabuf_t *bp, /* block buffer */
+ struct xfs_buf *bp, /* block buffer */
int first, /* index of first logged leaf */
int last) /* index of last logged leaf */
{
- xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_data_hdr_t *hdr = bp->b_addr;
xfs_dir2_leaf_entry_t *blp;
xfs_dir2_block_tail_t *btp;
btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
- xfs_da_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),
(uint)((char *)&blp[last + 1] - (char *)hdr - 1));
}
@@ -555,13 +553,13 @@ xfs_dir2_block_log_leaf(
static void
xfs_dir2_block_log_tail(
xfs_trans_t *tp, /* transaction structure */
- xfs_dabuf_t *bp) /* block buffer */
+ struct xfs_buf *bp) /* block buffer */
{
- xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_data_hdr_t *hdr = bp->b_addr;
xfs_dir2_block_tail_t *btp;
btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr);
- xfs_da_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),
(uint)((char *)(btp + 1) - (char *)hdr - 1));
}
@@ -575,7 +573,7 @@ xfs_dir2_block_lookup(
{
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
- xfs_dabuf_t *bp; /* block buffer */
+ struct xfs_buf *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* block data entry */
xfs_inode_t *dp; /* incore inode */
@@ -593,7 +591,7 @@ xfs_dir2_block_lookup(
return error;
dp = args->dp;
mp = dp->i_mount;
- hdr = bp->data;
+ hdr = bp->b_addr;
xfs_dir2_data_check(dp, bp);
btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
@@ -607,7 +605,7 @@ xfs_dir2_block_lookup(
*/
args->inumber = be64_to_cpu(dep->inumber);
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
- xfs_da_brelse(args->trans, bp);
+ xfs_trans_brelse(args->trans, bp);
return XFS_ERROR(error);
}
@@ -617,13 +615,13 @@ xfs_dir2_block_lookup(
static int /* error */
xfs_dir2_block_lookup_int(
xfs_da_args_t *args, /* dir lookup arguments */
- xfs_dabuf_t **bpp, /* returned block buffer */
+ struct xfs_buf **bpp, /* returned block buffer */
int *entno) /* returned entry number */
{
xfs_dir2_dataptr_t addr; /* data entry address */
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
- xfs_dabuf_t *bp; /* block buffer */
+ struct xfs_buf *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* block data entry */
xfs_inode_t *dp; /* incore inode */
@@ -647,7 +645,7 @@ xfs_dir2_block_lookup_int(
return error;
}
ASSERT(bp != NULL);
- hdr = bp->data;
+ hdr = bp->b_addr;
xfs_dir2_data_check(dp, bp);
btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
@@ -666,7 +664,7 @@ xfs_dir2_block_lookup_int(
high = mid - 1;
if (low > high) {
ASSERT(args->op_flags & XFS_DA_OP_OKNOENT);
- xfs_da_brelse(tp, bp);
+ xfs_trans_brelse(tp, bp);
return XFS_ERROR(ENOENT);
}
}
@@ -714,7 +712,7 @@ xfs_dir2_block_lookup_int(
/*
* No match, release the buffer and return ENOENT.
*/
- xfs_da_brelse(tp, bp);
+ xfs_trans_brelse(tp, bp);
return XFS_ERROR(ENOENT);
}
@@ -728,7 +726,7 @@ xfs_dir2_block_removename(
{
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf pointer */
- xfs_dabuf_t *bp; /* block buffer */
+ struct xfs_buf *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* block data entry */
xfs_inode_t *dp; /* incore inode */
@@ -753,7 +751,7 @@ xfs_dir2_block_removename(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- hdr = bp->data;
+ hdr = bp->b_addr;
btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
@@ -790,10 +788,9 @@ xfs_dir2_block_removename(
* See if the size as a shortform is good enough.
*/
size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
- if (size > XFS_IFORK_DSIZE(dp)) {
- xfs_da_buf_done(bp);
+ if (size > XFS_IFORK_DSIZE(dp))
return 0;
- }
+
/*
* If it works, do the conversion.
*/
@@ -810,7 +807,7 @@ xfs_dir2_block_replace(
{
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
- xfs_dabuf_t *bp; /* block buffer */
+ struct xfs_buf *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail */
xfs_dir2_data_entry_t *dep; /* block data entry */
xfs_inode_t *dp; /* incore inode */
@@ -829,7 +826,7 @@ xfs_dir2_block_replace(
}
dp = args->dp;
mp = dp->i_mount;
- hdr = bp->data;
+ hdr = bp->b_addr;
btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
/*
@@ -844,7 +841,6 @@ xfs_dir2_block_replace(
dep->inumber = cpu_to_be64(args->inumber);
xfs_dir2_data_log_entry(args->trans, bp, dep);
xfs_dir2_data_check(dp, bp);
- xfs_da_buf_done(bp);
return 0;
}
@@ -871,8 +867,8 @@ xfs_dir2_block_sort(
int /* error */
xfs_dir2_leaf_to_block(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *lbp, /* leaf buffer */
- xfs_dabuf_t *dbp) /* data buffer */
+ struct xfs_buf *lbp, /* leaf buffer */
+ struct xfs_buf *dbp) /* data buffer */
{
__be16 *bestsp; /* leaf bests table */
xfs_dir2_data_hdr_t *hdr; /* block header */
@@ -898,7 +894,7 @@ xfs_dir2_leaf_to_block(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- leaf = lbp->data;
+ leaf = lbp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
/*
@@ -914,11 +910,9 @@ xfs_dir2_leaf_to_block(
if ((error =
xfs_dir2_leaf_trim_data(args, lbp,
(xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1))))
- goto out;
- } else {
- error = 0;
- goto out;
- }
+ return error;
+ } else
+ return 0;
}
/*
* Read the data block if we don't already have it, give up if it fails.
@@ -926,9 +920,9 @@ xfs_dir2_leaf_to_block(
if (dbp == NULL &&
(error = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, &dbp,
XFS_DATA_FORK))) {
- goto out;
+ return error;
}
- hdr = dbp->data;
+ hdr = dbp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
/*
* Size of the "leaf" area in the block.
@@ -944,10 +938,9 @@ xfs_dir2_leaf_to_block(
* If it's not free or is too short we can't do it.
*/
if (be16_to_cpu(dup->freetag) != XFS_DIR2_DATA_FREE_TAG ||
- be16_to_cpu(dup->length) < size) {
- error = 0;
- goto out;
- }
+ be16_to_cpu(dup->length) < size)
+ return 0;
+
/*
* Start converting it to block form.
*/
@@ -989,25 +982,17 @@ xfs_dir2_leaf_to_block(
* Pitch the old leaf block.
*/
error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp);
- lbp = NULL;
- if (error) {
- goto out;
- }
+ if (error)
+ return error;
+
/*
* Now see if the resulting block can be shrunken to shortform.
*/
size = xfs_dir2_block_sfsize(dp, hdr, &sfh);
- if (size > XFS_IFORK_DSIZE(dp)) {
- error = 0;
- goto out;
- }
+ if (size > XFS_IFORK_DSIZE(dp))
+ return 0;
+
return xfs_dir2_block_to_sf(args, dbp, size, &sfh);
-out:
- if (lbp)
- xfs_da_buf_done(lbp);
- if (dbp)
- xfs_da_buf_done(dbp);
- return error;
}
/*
@@ -1020,7 +1005,7 @@ xfs_dir2_sf_to_block(
xfs_dir2_db_t blkno; /* dir-relative block # (0) */
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
- xfs_dabuf_t *bp; /* block buffer */
+ struct xfs_buf *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail pointer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
xfs_inode_t *dp; /* incore directory inode */
@@ -1088,7 +1073,7 @@ xfs_dir2_sf_to_block(
kmem_free(sfp);
return error;
}
- hdr = bp->data;
+ hdr = bp->b_addr;
hdr->magic = cpu_to_be32(XFS_DIR2_BLOCK_MAGIC);
/*
* Compute size of block "tail" area.
@@ -1217,6 +1202,5 @@ xfs_dir2_sf_to_block(
xfs_dir2_block_log_leaf(tp, bp, 0, be32_to_cpu(btp->count) - 1);
xfs_dir2_block_log_tail(tp, bp);
xfs_dir2_data_check(dp, bp);
- xfs_da_buf_done(bp);
return 0;
}
diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c
index 2046988e9eb2..44ffd4d6bc91 100644
--- a/fs/xfs/xfs_dir2_data.c
+++ b/fs/xfs/xfs_dir2_data.c
@@ -42,8 +42,8 @@ xfs_dir2_data_freefind(xfs_dir2_data_hdr_t *hdr, xfs_dir2_data_unused_t *dup);
*/
void
xfs_dir2_data_check(
- xfs_inode_t *dp, /* incore inode pointer */
- xfs_dabuf_t *bp) /* data block's buffer */
+ struct xfs_inode *dp, /* incore inode pointer */
+ struct xfs_buf *bp) /* data block's buffer */
{
xfs_dir2_dataptr_t addr; /* addr for leaf lookup */
xfs_dir2_data_free_t *bf; /* bestfree table */
@@ -65,7 +65,7 @@ xfs_dir2_data_check(
struct xfs_name name;
mp = dp->i_mount;
- hdr = bp->data;
+ hdr = bp->b_addr;
bf = hdr->bestfree;
p = (char *)(hdr + 1);
@@ -389,9 +389,9 @@ int /* error */
xfs_dir2_data_init(
xfs_da_args_t *args, /* directory operation args */
xfs_dir2_db_t blkno, /* logical dir block number */
- xfs_dabuf_t **bpp) /* output block buffer */
+ struct xfs_buf **bpp) /* output block buffer */
{
- xfs_dabuf_t *bp; /* block buffer */
+ struct xfs_buf *bp; /* block buffer */
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* unused entry pointer */
@@ -417,7 +417,7 @@ xfs_dir2_data_init(
/*
* Initialize the header.
*/
- hdr = bp->data;
+ hdr = bp->b_addr;
hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
hdr->bestfree[0].offset = cpu_to_be16(sizeof(*hdr));
for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
@@ -449,16 +449,16 @@ xfs_dir2_data_init(
*/
void
xfs_dir2_data_log_entry(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp, /* block buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
xfs_dir2_data_entry_t *dep) /* data entry pointer */
{
- xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_data_hdr_t *hdr = bp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
- xfs_da_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr),
(uint)((char *)(xfs_dir2_data_entry_tag_p(dep) + 1) -
(char *)hdr - 1));
}
@@ -468,15 +468,15 @@ xfs_dir2_data_log_entry(
*/
void
xfs_dir2_data_log_header(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp) /* block buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
{
- xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_data_hdr_t *hdr = bp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
- xfs_da_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
+ xfs_trans_log_buf(tp, bp, 0, sizeof(*hdr) - 1);
}
/*
@@ -484,11 +484,11 @@ xfs_dir2_data_log_header(
*/
void
xfs_dir2_data_log_unused(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp, /* block buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
xfs_dir2_data_unused_t *dup) /* data unused pointer */
{
- xfs_dir2_data_hdr_t *hdr = bp->data;
+ xfs_dir2_data_hdr_t *hdr = bp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
@@ -496,13 +496,13 @@ xfs_dir2_data_log_unused(
/*
* Log the first part of the unused entry.
*/
- xfs_da_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr),
(uint)((char *)&dup->length + sizeof(dup->length) -
1 - (char *)hdr));
/*
* Log the end (tag) of the unused entry.
*/
- xfs_da_log_buf(tp, bp,
+ xfs_trans_log_buf(tp, bp,
(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),
(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +
sizeof(xfs_dir2_data_off_t) - 1));
@@ -514,8 +514,8 @@ xfs_dir2_data_log_unused(
*/
void
xfs_dir2_data_make_free(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp, /* block buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
xfs_dir2_data_aoff_t offset, /* starting byte offset */
xfs_dir2_data_aoff_t len, /* length in bytes */
int *needlogp, /* out: log header */
@@ -531,7 +531,7 @@ xfs_dir2_data_make_free(
xfs_dir2_data_unused_t *prevdup; /* unused entry before us */
mp = tp->t_mountp;
- hdr = bp->data;
+ hdr = bp->b_addr;
/*
* Figure out where the end of the data area is.
@@ -696,8 +696,8 @@ xfs_dir2_data_make_free(
*/
void
xfs_dir2_data_use_free(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp, /* data block buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
xfs_dir2_data_unused_t *dup, /* unused entry */
xfs_dir2_data_aoff_t offset, /* starting offset to use */
xfs_dir2_data_aoff_t len, /* length to use */
@@ -713,7 +713,7 @@ xfs_dir2_data_use_free(
xfs_dir2_data_unused_t *newdup2; /* another new unused entry */
int oldlen; /* old unused entry's length */
- hdr = bp->data;
+ hdr = bp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC));
ASSERT(be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG);
diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c
index 397ffbcbab1d..0b296253bd01 100644
--- a/fs/xfs/xfs_dir2_leaf.c
+++ b/fs/xfs/xfs_dir2_leaf.c
@@ -38,15 +38,15 @@
* Local function declarations.
*/
#ifdef DEBUG
-static void xfs_dir2_leaf_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
+static void xfs_dir2_leaf_check(struct xfs_inode *dp, struct xfs_buf *bp);
#else
#define xfs_dir2_leaf_check(dp, bp)
#endif
-static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, xfs_dabuf_t **lbpp,
- int *indexp, xfs_dabuf_t **dbpp);
-static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_dabuf *bp,
+static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
+ int *indexp, struct xfs_buf **dbpp);
+static void xfs_dir2_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp,
int first, int last);
-static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
+static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp);
/*
@@ -55,7 +55,7 @@ static void xfs_dir2_leaf_log_tail(struct xfs_trans *tp, struct xfs_dabuf *bp);
int /* error */
xfs_dir2_block_to_leaf(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *dbp) /* input block's buffer */
+ struct xfs_buf *dbp) /* input block's buffer */
{
__be16 *bestsp; /* leaf's bestsp entries */
xfs_dablk_t blkno; /* leaf block's bno */
@@ -64,7 +64,7 @@ xfs_dir2_block_to_leaf(
xfs_dir2_block_tail_t *btp; /* block's tail */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
- xfs_dabuf_t *lbp; /* leaf block's buffer */
+ struct xfs_buf *lbp; /* leaf block's buffer */
xfs_dir2_db_t ldb; /* leaf block's bno */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_tail_t *ltp; /* leaf's tail */
@@ -95,8 +95,8 @@ xfs_dir2_block_to_leaf(
return error;
}
ASSERT(lbp != NULL);
- leaf = lbp->data;
- hdr = dbp->data;
+ leaf = lbp->b_addr;
+ hdr = dbp->b_addr;
xfs_dir2_data_check(dp, dbp);
btp = xfs_dir2_block_tail_p(mp, hdr);
blp = xfs_dir2_block_leaf_p(btp);
@@ -143,7 +143,6 @@ xfs_dir2_block_to_leaf(
xfs_dir2_leaf_check(dp, lbp);
xfs_dir2_data_check(dp, dbp);
xfs_dir2_leaf_log_bests(tp, lbp, 0, 0);
- xfs_da_buf_done(lbp);
return 0;
}
@@ -282,7 +281,7 @@ xfs_dir2_leaf_addname(
__be16 *bestsp; /* freespace table in leaf */
int compact; /* need to compact leaves */
xfs_dir2_data_hdr_t *hdr; /* data block header */
- xfs_dabuf_t *dbp; /* data block buffer */
+ struct xfs_buf *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* data unused entry */
@@ -291,7 +290,7 @@ xfs_dir2_leaf_addname(
int highstale; /* index of next stale leaf */
int i; /* temporary, index */
int index; /* leaf table position */
- xfs_dabuf_t *lbp; /* leaf's buffer */
+ struct xfs_buf *lbp; /* leaf's buffer */
xfs_dir2_leaf_t *leaf; /* leaf structure */
int length; /* length of new entry */
xfs_dir2_leaf_entry_t *lep; /* leaf entry table pointer */
@@ -328,7 +327,7 @@ xfs_dir2_leaf_addname(
* But if there are dup hash values the index is of the first of those.
*/
index = xfs_dir2_leaf_search_hash(args, lbp);
- leaf = lbp->data;
+ leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
length = xfs_dir2_data_entsize(args->namelen);
@@ -402,14 +401,13 @@ xfs_dir2_leaf_addname(
*/
if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
args->total == 0) {
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return XFS_ERROR(ENOSPC);
}
/*
* Convert to node form.
*/
error = xfs_dir2_leaf_to_node(args, lbp);
- xfs_da_buf_done(lbp);
if (error)
return error;
/*
@@ -427,7 +425,7 @@ xfs_dir2_leaf_addname(
* a new data block.
*/
if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return use_block == -1 ? XFS_ERROR(ENOSPC) : 0;
}
/*
@@ -435,7 +433,7 @@ xfs_dir2_leaf_addname(
* changed anything.
*/
if (args->total == 0 && use_block == -1) {
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return XFS_ERROR(ENOSPC);
}
/*
@@ -466,14 +464,14 @@ xfs_dir2_leaf_addname(
*/
if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE,
&use_block))) {
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return error;
}
/*
* Initialize the block.
*/
if ((error = xfs_dir2_data_init(args, use_block, &dbp))) {
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return error;
}
/*
@@ -493,7 +491,7 @@ xfs_dir2_leaf_addname(
*/
else
xfs_dir2_leaf_log_bests(tp, lbp, use_block, use_block);
- hdr = dbp->data;
+ hdr = dbp->b_addr;
bestsp[use_block] = hdr->bestfree[0].length;
grown = 1;
}
@@ -505,10 +503,10 @@ xfs_dir2_leaf_addname(
if ((error =
xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, use_block),
-1, &dbp, XFS_DATA_FORK))) {
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return error;
}
- hdr = dbp->data;
+ hdr = dbp->b_addr;
grown = 0;
}
xfs_dir2_data_check(dp, dbp);
@@ -570,9 +568,7 @@ xfs_dir2_leaf_addname(
xfs_dir2_leaf_log_header(tp, lbp);
xfs_dir2_leaf_log_ents(tp, lbp, lfloglow, lfloghigh);
xfs_dir2_leaf_check(dp, lbp);
- xfs_da_buf_done(lbp);
xfs_dir2_data_check(dp, dbp);
- xfs_da_buf_done(dbp);
return 0;
}
@@ -583,8 +579,8 @@ xfs_dir2_leaf_addname(
*/
STATIC void
xfs_dir2_leaf_check(
- xfs_inode_t *dp, /* incore directory inode */
- xfs_dabuf_t *bp) /* leaf's buffer */
+ struct xfs_inode *dp, /* incore directory inode */
+ struct xfs_buf *bp) /* leaf's buffer */
{
int i; /* leaf index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
@@ -592,7 +588,7 @@ xfs_dir2_leaf_check(
xfs_mount_t *mp; /* filesystem mount point */
int stale; /* count of stale leaves */
- leaf = bp->data;
+ leaf = bp->b_addr;
mp = dp->i_mount;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
/*
@@ -628,14 +624,14 @@ xfs_dir2_leaf_check(
void
xfs_dir2_leaf_compact(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *bp) /* leaf buffer */
+ struct xfs_buf *bp) /* leaf buffer */
{
int from; /* source leaf index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
int loglow; /* first leaf entry to log */
int to; /* target leaf index */
- leaf = bp->data;
+ leaf = bp->b_addr;
if (!leaf->hdr.stale) {
return;
}
@@ -677,7 +673,7 @@ xfs_dir2_leaf_compact(
*/
void
xfs_dir2_leaf_compact_x1(
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
int *indexp, /* insertion index */
int *lowstalep, /* out: stale entry before us */
int *highstalep, /* out: stale entry after us */
@@ -693,7 +689,7 @@ xfs_dir2_leaf_compact_x1(
int newindex=0; /* new insertion index */
int to; /* destination copy index */
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(be16_to_cpu(leaf->hdr.stale) > 1);
index = *indexp;
@@ -763,6 +759,218 @@ xfs_dir2_leaf_compact_x1(
*highstalep = highstale;
}
+struct xfs_dir2_leaf_map_info {
+ xfs_extlen_t map_blocks; /* number of fsbs in map */
+ xfs_dablk_t map_off; /* last mapped file offset */
+ int map_size; /* total entries in *map */
+ int map_valid; /* valid entries in *map */
+ int nmap; /* mappings to ask xfs_bmapi */
+ xfs_dir2_db_t curdb; /* db for current block */
+ int ra_current; /* number of read-ahead blks */
+ int ra_index; /* *map index for read-ahead */
+ int ra_offset; /* map entry offset for ra */
+ int ra_want; /* readahead count wanted */
+ struct xfs_bmbt_irec map[]; /* map vector for blocks */
+};
+
+STATIC int
+xfs_dir2_leaf_readbuf(
+ struct xfs_inode *dp,
+ size_t bufsize,
+ struct xfs_dir2_leaf_map_info *mip,
+ xfs_dir2_off_t *curoff,
+ struct xfs_buf **bpp)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_buf *bp = *bpp;
+ struct xfs_bmbt_irec *map = mip->map;
+ int error = 0;
+ int length;
+ int i;
+ int j;
+
+ /*
+ * If we have a buffer, we need to release it and
+ * take it out of the mapping.
+ */
+
+ if (bp) {
+ xfs_trans_brelse(NULL, bp);
+ bp = NULL;
+ mip->map_blocks -= mp->m_dirblkfsbs;
+ /*
+ * Loop to get rid of the extents for the
+ * directory block.
+ */
+ for (i = mp->m_dirblkfsbs; i > 0; ) {
+ j = min_t(int, map->br_blockcount, i);
+ map->br_blockcount -= j;
+ map->br_startblock += j;
+ map->br_startoff += j;
+ /*
+ * If mapping is done, pitch it from
+ * the table.
+ */
+ if (!map->br_blockcount && --mip->map_valid)
+ memmove(&map[0], &map[1],
+ sizeof(map[0]) * mip->map_valid);
+ i -= j;
+ }
+ }
+
+ /*
+ * Recalculate the readahead blocks wanted.
+ */
+ mip->ra_want = howmany(bufsize + mp->m_dirblksize,
+ mp->m_sb.sb_blocksize) - 1;
+ ASSERT(mip->ra_want >= 0);
+
+ /*
+ * If we don't have as many as we want, and we haven't
+ * run out of data blocks, get some more mappings.
+ */
+ if (1 + mip->ra_want > mip->map_blocks &&
+ mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
+ /*
+ * Get more bmaps, fill in after the ones
+ * we already have in the table.
+ */
+ mip->nmap = mip->map_size - mip->map_valid;
+ error = xfs_bmapi_read(dp, mip->map_off,
+ xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) -
+ mip->map_off,
+ &map[mip->map_valid], &mip->nmap, 0);
+
+ /*
+ * Don't know if we should ignore this or try to return an
+ * error. The trouble with returning errors is that readdir
+ * will just stop without actually passing the error through.
+ */
+ if (error)
+ goto out; /* XXX */
+
+ /*
+ * If we got all the mappings we asked for, set the final map
+ * offset based on the last bmap value received. Otherwise,
+ * we've reached the end.
+ */
+ if (mip->nmap == mip->map_size - mip->map_valid) {
+ i = mip->map_valid + mip->nmap - 1;
+ mip->map_off = map[i].br_startoff + map[i].br_blockcount;
+ } else
+ mip->map_off = xfs_dir2_byte_to_da(mp,
+ XFS_DIR2_LEAF_OFFSET);
+
+ /*
+ * Look for holes in the mapping, and eliminate them. Count up
+ * the valid blocks.
+ */
+ for (i = mip->map_valid; i < mip->map_valid + mip->nmap; ) {
+ if (map[i].br_startblock == HOLESTARTBLOCK) {
+ mip->nmap--;
+ length = mip->map_valid + mip->nmap - i;
+ if (length)
+ memmove(&map[i], &map[i + 1],
+ sizeof(map[i]) * length);
+ } else {
+ mip->map_blocks += map[i].br_blockcount;
+ i++;
+ }
+ }
+ mip->map_valid += mip->nmap;
+ }
+
+ /*
+ * No valid mappings, so no more data blocks.
+ */
+ if (!mip->map_valid) {
+ *curoff = xfs_dir2_da_to_byte(mp, mip->map_off);
+ goto out;
+ }
+
+ /*
+ * Read the directory block starting at the first mapping.
+ */
+ mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
+ error = xfs_da_read_buf(NULL, dp, map->br_startoff,
+ map->br_blockcount >= mp->m_dirblkfsbs ?
+ XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1,
+ &bp, XFS_DATA_FORK);
+
+ /*
+ * Should just skip over the data block instead of giving up.
+ */
+ if (error)
+ goto out; /* XXX */
+
+ /*
+ * Adjust the current amount of read-ahead: we just read a block that
+ * was previously ra.
+ */
+ if (mip->ra_current)
+ mip->ra_current -= mp->m_dirblkfsbs;
+
+ /*
+ * Do we need more readahead?
+ */
+ for (mip->ra_index = mip->ra_offset = i = 0;
+ mip->ra_want > mip->ra_current && i < mip->map_blocks;
+ i += mp->m_dirblkfsbs) {
+ ASSERT(mip->ra_index < mip->map_valid);
+ /*
+ * Read-ahead a contiguous directory block.
+ */
+ if (i > mip->ra_current &&
+ map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) {
+ xfs_buf_readahead(mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp,
+ map[mip->ra_index].br_startblock +
+ mip->ra_offset),
+ (int)BTOBB(mp->m_dirblksize));
+ mip->ra_current = i;
+ }
+
+ /*
+ * Read-ahead a non-contiguous directory block. This doesn't
+ * use our mapping, but this is a very rare case.
+ */
+ else if (i > mip->ra_current) {
+ xfs_da_reada_buf(NULL, dp,
+ map[mip->ra_index].br_startoff +
+ mip->ra_offset,
+ XFS_DATA_FORK);
+ mip->ra_current = i;
+ }
+
+ /*
+ * Advance offset through the mapping table.
+ */
+ for (j = 0; j < mp->m_dirblkfsbs; j++) {
+ /*
+ * The rest of this extent but not more than a dir
+ * block.
+ */
+ length = min_t(int, mp->m_dirblkfsbs,
+ map[mip->ra_index].br_blockcount -
+ mip->ra_offset);
+ j += length;
+ mip->ra_offset += length;
+
+ /*
+ * Advance to the next mapping if this one is used up.
+ */
+ if (mip->ra_offset == map[mip->ra_index].br_blockcount) {
+ mip->ra_offset = 0;
+ mip->ra_index++;
+ }
+ }
+ }
+
+out:
+ *bpp = bp;
+ return error;
+}
+
/*
* Getdents (readdir) for leaf and node directories.
* This reads the data blocks only, so is the same for both forms.
@@ -775,30 +983,18 @@ xfs_dir2_leaf_getdents(
xfs_off_t *offset,
filldir_t filldir)
{
- xfs_dabuf_t *bp; /* data block buffer */
- int byteoff; /* offset in current block */
- xfs_dir2_db_t curdb; /* db for current block */
- xfs_dir2_off_t curoff; /* current overall offset */
+ struct xfs_buf *bp = NULL; /* data block buffer */
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
int error = 0; /* error return value */
- int i; /* temporary loop index */
- int j; /* temporary loop index */
int length; /* temporary length value */
- xfs_bmbt_irec_t *map; /* map vector for blocks */
- xfs_extlen_t map_blocks; /* number of fsbs in map */
- xfs_dablk_t map_off; /* last mapped file offset */
- int map_size; /* total entries in *map */
- int map_valid; /* valid entries in *map */
xfs_mount_t *mp; /* filesystem mount point */
+ int byteoff; /* offset in current block */
+ xfs_dir2_off_t curoff; /* current overall offset */
xfs_dir2_off_t newoff; /* new curoff after new blk */
- int nmap; /* mappings to ask xfs_bmapi */
char *ptr = NULL; /* pointer to current data */
- int ra_current; /* number of read-ahead blks */
- int ra_index; /* *map index for read-ahead */
- int ra_offset; /* map entry offset for ra */
- int ra_want; /* readahead count wanted */
+ struct xfs_dir2_leaf_map_info *map_info;
/*
* If the offset is at or past the largest allowed value,
@@ -814,10 +1010,12 @@ xfs_dir2_leaf_getdents(
* buffer size, the directory block size, and the filesystem
* block size.
*/
- map_size = howmany(bufsize + mp->m_dirblksize, mp->m_sb.sb_blocksize);
- map = kmem_alloc(map_size * sizeof(*map), KM_SLEEP);
- map_valid = ra_index = ra_offset = ra_current = map_blocks = 0;
- bp = NULL;
+ length = howmany(bufsize + mp->m_dirblksize,
+ mp->m_sb.sb_blocksize);
+ map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +
+ (length * sizeof(struct xfs_bmbt_irec)),
+ KM_SLEEP);
+ map_info->map_size = length;
/*
* Inside the loop we keep the main offset value as a byte offset
@@ -829,7 +1027,9 @@ xfs_dir2_leaf_getdents(
* Force this conversion through db so we truncate the offset
* down to get the start of the data block.
*/
- map_off = xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, curoff));
+ map_info->map_off = xfs_dir2_db_to_da(mp,
+ xfs_dir2_byte_to_db(mp, curoff));
+
/*
* Loop over directory entries until we reach the end offset.
* Get more blocks and readahead as necessary.
@@ -839,191 +1039,17 @@ xfs_dir2_leaf_getdents(
* If we have no buffer, or we're off the end of the
* current buffer, need to get another one.
*/
- if (!bp || ptr >= (char *)bp->data + mp->m_dirblksize) {
- /*
- * If we have a buffer, we need to release it and
- * take it out of the mapping.
- */
- if (bp) {
- xfs_da_brelse(NULL, bp);
- bp = NULL;
- map_blocks -= mp->m_dirblkfsbs;
- /*
- * Loop to get rid of the extents for the
- * directory block.
- */
- for (i = mp->m_dirblkfsbs; i > 0; ) {
- j = MIN((int)map->br_blockcount, i);
- map->br_blockcount -= j;
- map->br_startblock += j;
- map->br_startoff += j;
- /*
- * If mapping is done, pitch it from
- * the table.
- */
- if (!map->br_blockcount && --map_valid)
- memmove(&map[0], &map[1],
- sizeof(map[0]) *
- map_valid);
- i -= j;
- }
- }
- /*
- * Recalculate the readahead blocks wanted.
- */
- ra_want = howmany(bufsize + mp->m_dirblksize,
- mp->m_sb.sb_blocksize) - 1;
- ASSERT(ra_want >= 0);
+ if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) {
- /*
- * If we don't have as many as we want, and we haven't
- * run out of data blocks, get some more mappings.
- */
- if (1 + ra_want > map_blocks &&
- map_off <
- xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) {
- /*
- * Get more bmaps, fill in after the ones
- * we already have in the table.
- */
- nmap = map_size - map_valid;
- error = xfs_bmapi_read(dp, map_off,
- xfs_dir2_byte_to_da(mp,
- XFS_DIR2_LEAF_OFFSET) - map_off,
- &map[map_valid], &nmap, 0);
- /*
- * Don't know if we should ignore this or
- * try to return an error.
- * The trouble with returning errors
- * is that readdir will just stop without
- * actually passing the error through.
- */
- if (error)
- break; /* XXX */
- /*
- * If we got all the mappings we asked for,
- * set the final map offset based on the
- * last bmap value received.
- * Otherwise, we've reached the end.
- */
- if (nmap == map_size - map_valid)
- map_off =
- map[map_valid + nmap - 1].br_startoff +
- map[map_valid + nmap - 1].br_blockcount;
- else
- map_off =
- xfs_dir2_byte_to_da(mp,
- XFS_DIR2_LEAF_OFFSET);
- /*
- * Look for holes in the mapping, and
- * eliminate them. Count up the valid blocks.
- */
- for (i = map_valid; i < map_valid + nmap; ) {
- if (map[i].br_startblock ==
- HOLESTARTBLOCK) {
- nmap--;
- length = map_valid + nmap - i;
- if (length)
- memmove(&map[i],
- &map[i + 1],
- sizeof(map[i]) *
- length);
- } else {
- map_blocks +=
- map[i].br_blockcount;
- i++;
- }
- }
- map_valid += nmap;
- }
- /*
- * No valid mappings, so no more data blocks.
- */
- if (!map_valid) {
- curoff = xfs_dir2_da_to_byte(mp, map_off);
+ error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info,
+ &curoff, &bp);
+ if (error || !map_info->map_valid)
break;
- }
- /*
- * Read the directory block starting at the first
- * mapping.
- */
- curdb = xfs_dir2_da_to_db(mp, map->br_startoff);
- error = xfs_da_read_buf(NULL, dp, map->br_startoff,
- map->br_blockcount >= mp->m_dirblkfsbs ?
- XFS_FSB_TO_DADDR(mp, map->br_startblock) :
- -1,
- &bp, XFS_DATA_FORK);
- /*
- * Should just skip over the data block instead
- * of giving up.
- */
- if (error)
- break; /* XXX */
- /*
- * Adjust the current amount of read-ahead: we just
- * read a block that was previously ra.
- */
- if (ra_current)
- ra_current -= mp->m_dirblkfsbs;
- /*
- * Do we need more readahead?
- */
- for (ra_index = ra_offset = i = 0;
- ra_want > ra_current && i < map_blocks;
- i += mp->m_dirblkfsbs) {
- ASSERT(ra_index < map_valid);
- /*
- * Read-ahead a contiguous directory block.
- */
- if (i > ra_current &&
- map[ra_index].br_blockcount >=
- mp->m_dirblkfsbs) {
- xfs_buf_readahead(mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp,
- map[ra_index].br_startblock +
- ra_offset),
- (int)BTOBB(mp->m_dirblksize));
- ra_current = i;
- }
- /*
- * Read-ahead a non-contiguous directory block.
- * This doesn't use our mapping, but this
- * is a very rare case.
- */
- else if (i > ra_current) {
- (void)xfs_da_reada_buf(NULL, dp,
- map[ra_index].br_startoff +
- ra_offset, XFS_DATA_FORK);
- ra_current = i;
- }
- /*
- * Advance offset through the mapping table.
- */
- for (j = 0; j < mp->m_dirblkfsbs; j++) {
- /*
- * The rest of this extent but not
- * more than a dir block.
- */
- length = MIN(mp->m_dirblkfsbs,
- (int)(map[ra_index].br_blockcount -
- ra_offset));
- j += length;
- ra_offset += length;
- /*
- * Advance to the next mapping if
- * this one is used up.
- */
- if (ra_offset ==
- map[ra_index].br_blockcount) {
- ra_offset = 0;
- ra_index++;
- }
- }
- }
+
/*
* Having done a read, we need to set a new offset.
*/
- newoff = xfs_dir2_db_off_to_byte(mp, curdb, 0);
+ newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0);
/*
* Start of the current block.
*/
@@ -1034,8 +1060,8 @@ xfs_dir2_leaf_getdents(
*/
else if (curoff > newoff)
ASSERT(xfs_dir2_byte_to_db(mp, curoff) ==
- curdb);
- hdr = bp->data;
+ map_info->curdb);
+ hdr = bp->b_addr;
xfs_dir2_data_check(dp, bp);
/*
* Find our position in the block.
@@ -1117,9 +1143,9 @@ xfs_dir2_leaf_getdents(
*offset = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;
else
*offset = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff;
- kmem_free(map);
+ kmem_free(map_info);
if (bp)
- xfs_da_brelse(NULL, bp);
+ xfs_trans_brelse(NULL, bp);
return error;
}
@@ -1130,10 +1156,10 @@ int
xfs_dir2_leaf_init(
xfs_da_args_t *args, /* operation arguments */
xfs_dir2_db_t bno, /* directory block number */
- xfs_dabuf_t **bpp, /* out: leaf buffer */
+ struct xfs_buf **bpp, /* out: leaf buffer */
int magic) /* magic number for block */
{
- xfs_dabuf_t *bp; /* leaf buffer */
+ struct xfs_buf *bp; /* leaf buffer */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
xfs_dir2_leaf_t *leaf; /* leaf structure */
@@ -1156,7 +1182,7 @@ xfs_dir2_leaf_init(
return error;
}
ASSERT(bp != NULL);
- leaf = bp->data;
+ leaf = bp->b_addr;
/*
* Initialize the header.
*/
@@ -1186,7 +1212,7 @@ xfs_dir2_leaf_init(
static void
xfs_dir2_leaf_log_bests(
xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
int first, /* first entry to log */
int last) /* last entry to log */
{
@@ -1195,12 +1221,12 @@ xfs_dir2_leaf_log_bests(
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf);
firstb = xfs_dir2_leaf_bests_p(ltp) + first;
lastb = xfs_dir2_leaf_bests_p(ltp) + last;
- xfs_da_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf),
(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));
}
@@ -1210,7 +1236,7 @@ xfs_dir2_leaf_log_bests(
void
xfs_dir2_leaf_log_ents(
xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
int first, /* first entry to log */
int last) /* last entry to log */
{
@@ -1218,12 +1244,12 @@ xfs_dir2_leaf_log_ents(
xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */
xfs_dir2_leaf_t *leaf; /* leaf structure */
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
firstlep = &leaf->ents[first];
lastlep = &leaf->ents[last];
- xfs_da_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf),
(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
}
@@ -1232,15 +1258,15 @@ xfs_dir2_leaf_log_ents(
*/
void
xfs_dir2_leaf_log_header(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp) /* leaf buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
{
xfs_dir2_leaf_t *leaf; /* leaf structure */
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
- xfs_da_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf),
(uint)(sizeof(leaf->hdr) - 1));
}
@@ -1249,18 +1275,18 @@ xfs_dir2_leaf_log_header(
*/
STATIC void
xfs_dir2_leaf_log_tail(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp) /* leaf buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
{
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
xfs_mount_t *mp; /* filesystem mount point */
mp = tp->t_mountp;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC));
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
- xfs_da_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf),
(uint)(mp->m_dirblksize - 1));
}
@@ -1273,12 +1299,12 @@ int
xfs_dir2_leaf_lookup(
xfs_da_args_t *args) /* operation arguments */
{
- xfs_dabuf_t *dbp; /* data block buffer */
+ struct xfs_buf *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
int index; /* found entry index */
- xfs_dabuf_t *lbp; /* leaf buffer */
+ struct xfs_buf *lbp; /* leaf buffer */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_trans_t *tp; /* transaction pointer */
@@ -1294,7 +1320,7 @@ xfs_dir2_leaf_lookup(
tp = args->trans;
dp = args->dp;
xfs_dir2_leaf_check(dp, lbp);
- leaf = lbp->data;
+ leaf = lbp->b_addr;
/*
* Get to the leaf entry and contained data entry address.
*/
@@ -1303,15 +1329,15 @@ xfs_dir2_leaf_lookup(
* Point to the data entry.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)dbp->data +
+ ((char *)dbp->b_addr +
xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
/*
* Return the found inode number & CI name if appropriate
*/
args->inumber = be64_to_cpu(dep->inumber);
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
- xfs_da_brelse(tp, dbp);
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, dbp);
+ xfs_trans_brelse(tp, lbp);
return XFS_ERROR(error);
}
@@ -1324,17 +1350,17 @@ xfs_dir2_leaf_lookup(
static int /* error */
xfs_dir2_leaf_lookup_int(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t **lbpp, /* out: leaf buffer */
+ struct xfs_buf **lbpp, /* out: leaf buffer */
int *indexp, /* out: index in leaf block */
- xfs_dabuf_t **dbpp) /* out: data buffer */
+ struct xfs_buf **dbpp) /* out: data buffer */
{
xfs_dir2_db_t curdb = -1; /* current data block number */
- xfs_dabuf_t *dbp = NULL; /* data buffer */
+ struct xfs_buf *dbp = NULL; /* data buffer */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
int index; /* index in leaf block */
- xfs_dabuf_t *lbp; /* leaf buffer */
+ struct xfs_buf *lbp; /* leaf buffer */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_mount_t *mp; /* filesystem mount point */
@@ -1354,7 +1380,7 @@ xfs_dir2_leaf_lookup_int(
if (error)
return error;
*lbpp = lbp;
- leaf = lbp->data;
+ leaf = lbp->b_addr;
xfs_dir2_leaf_check(dp, lbp);
/*
* Look for the first leaf entry with our hash value.
@@ -1382,12 +1408,12 @@ xfs_dir2_leaf_lookup_int(
*/
if (newdb != curdb) {
if (dbp)
- xfs_da_brelse(tp, dbp);
+ xfs_trans_brelse(tp, dbp);
error = xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp, newdb),
-1, &dbp, XFS_DATA_FORK);
if (error) {
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return error;
}
xfs_dir2_data_check(dp, dbp);
@@ -1396,7 +1422,7 @@ xfs_dir2_leaf_lookup_int(
/*
* Point to the data entry.
*/
- dep = (xfs_dir2_data_entry_t *)((char *)dbp->data +
+ dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr +
xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
/*
* Compare name and if it's an exact match, return the index
@@ -1424,12 +1450,12 @@ xfs_dir2_leaf_lookup_int(
if (args->cmpresult == XFS_CMP_CASE) {
ASSERT(cidb != -1);
if (cidb != curdb) {
- xfs_da_brelse(tp, dbp);
+ xfs_trans_brelse(tp, dbp);
error = xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp, cidb),
-1, &dbp, XFS_DATA_FORK);
if (error) {
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return error;
}
}
@@ -1441,8 +1467,8 @@ xfs_dir2_leaf_lookup_int(
*/
ASSERT(cidb == -1);
if (dbp)
- xfs_da_brelse(tp, dbp);
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, dbp);
+ xfs_trans_brelse(tp, lbp);
return XFS_ERROR(ENOENT);
}
@@ -1456,13 +1482,13 @@ xfs_dir2_leaf_removename(
__be16 *bestsp; /* leaf block best freespace */
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t db; /* data block number */
- xfs_dabuf_t *dbp; /* data block buffer */
+ struct xfs_buf *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data entry structure */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
xfs_dir2_db_t i; /* temporary data block # */
int index; /* index into leaf entries */
- xfs_dabuf_t *lbp; /* leaf buffer */
+ struct xfs_buf *lbp; /* leaf buffer */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
@@ -1483,8 +1509,8 @@ xfs_dir2_leaf_removename(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- leaf = lbp->data;
- hdr = dbp->data;
+ leaf = lbp->b_addr;
+ hdr = dbp->b_addr;
xfs_dir2_data_check(dp, dbp);
/*
* Point to the leaf entry, use that to point to the data entry.
@@ -1541,12 +1567,9 @@ xfs_dir2_leaf_removename(
* Just go on, returning success, leaving the
* empty block in place.
*/
- if (error == ENOSPC && args->total == 0) {
- xfs_da_buf_done(dbp);
+ if (error == ENOSPC && args->total == 0)
error = 0;
- }
xfs_dir2_leaf_check(dp, lbp);
- xfs_da_buf_done(lbp);
return error;
}
dbp = NULL;
@@ -1577,10 +1600,9 @@ xfs_dir2_leaf_removename(
/*
* If the data block was not the first one, drop it.
*/
- else if (db != mp->m_dirdatablk && dbp != NULL) {
- xfs_da_buf_done(dbp);
+ else if (db != mp->m_dirdatablk)
dbp = NULL;
- }
+
xfs_dir2_leaf_check(dp, lbp);
/*
* See if we can convert to block form.
@@ -1595,12 +1617,12 @@ int /* error */
xfs_dir2_leaf_replace(
xfs_da_args_t *args) /* operation arguments */
{
- xfs_dabuf_t *dbp; /* data block buffer */
+ struct xfs_buf *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
int index; /* index of leaf entry */
- xfs_dabuf_t *lbp; /* leaf buffer */
+ struct xfs_buf *lbp; /* leaf buffer */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_trans_t *tp; /* transaction pointer */
@@ -1614,7 +1636,7 @@ xfs_dir2_leaf_replace(
return error;
}
dp = args->dp;
- leaf = lbp->data;
+ leaf = lbp->b_addr;
/*
* Point to the leaf entry, get data address from it.
*/
@@ -1623,7 +1645,7 @@ xfs_dir2_leaf_replace(
* Point to the data entry.
*/
dep = (xfs_dir2_data_entry_t *)
- ((char *)dbp->data +
+ ((char *)dbp->b_addr +
xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address)));
ASSERT(args->inumber != be64_to_cpu(dep->inumber));
/*
@@ -1632,9 +1654,8 @@ xfs_dir2_leaf_replace(
dep->inumber = cpu_to_be64(args->inumber);
tp = args->trans;
xfs_dir2_data_log_entry(tp, dbp, dep);
- xfs_da_buf_done(dbp);
xfs_dir2_leaf_check(dp, lbp);
- xfs_da_brelse(tp, lbp);
+ xfs_trans_brelse(tp, lbp);
return 0;
}
@@ -1646,7 +1667,7 @@ xfs_dir2_leaf_replace(
int /* index value */
xfs_dir2_leaf_search_hash(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *lbp) /* leaf buffer */
+ struct xfs_buf *lbp) /* leaf buffer */
{
xfs_dahash_t hash=0; /* hash from this entry */
xfs_dahash_t hashwant; /* hash value looking for */
@@ -1656,7 +1677,7 @@ xfs_dir2_leaf_search_hash(
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
int mid=0; /* current leaf index */
- leaf = lbp->data;
+ leaf = lbp->b_addr;
#ifndef __KERNEL__
if (!leaf->hdr.count)
return 0;
@@ -1699,11 +1720,11 @@ xfs_dir2_leaf_search_hash(
int /* error */
xfs_dir2_leaf_trim_data(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *lbp, /* leaf buffer */
+ struct xfs_buf *lbp, /* leaf buffer */
xfs_dir2_db_t db) /* data block number */
{
__be16 *bestsp; /* leaf bests table */
- xfs_dabuf_t *dbp; /* data block buffer */
+ struct xfs_buf *dbp; /* data block buffer */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
xfs_dir2_leaf_t *leaf; /* leaf structure */
@@ -1722,12 +1743,12 @@ xfs_dir2_leaf_trim_data(
return error;
}
- leaf = lbp->data;
+ leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
#ifdef DEBUG
{
- struct xfs_dir2_data_hdr *hdr = dbp->data;
+ struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
ASSERT(be16_to_cpu(hdr->bestfree[0].length) ==
@@ -1741,7 +1762,7 @@ xfs_dir2_leaf_trim_data(
*/
if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
ASSERT(error != ENOSPC);
- xfs_da_brelse(tp, dbp);
+ xfs_trans_brelse(tp, dbp);
return error;
}
/*
@@ -1781,10 +1802,10 @@ xfs_dir2_node_to_leaf(
xfs_da_args_t *args; /* operation arguments */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
- xfs_dabuf_t *fbp; /* buffer for freespace block */
+ struct xfs_buf *fbp; /* buffer for freespace block */
xfs_fileoff_t fo; /* freespace file offset */
xfs_dir2_free_t *free; /* freespace structure */
- xfs_dabuf_t *lbp; /* buffer for leaf block */
+ struct xfs_buf *lbp; /* buffer for leaf block */
xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_mount_t *mp; /* filesystem mount point */
@@ -1838,7 +1859,7 @@ xfs_dir2_node_to_leaf(
if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize)
return 0;
lbp = state->path.blk[0].bp;
- leaf = lbp->data;
+ leaf = lbp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
* Read the freespace block.
@@ -1847,7 +1868,7 @@ xfs_dir2_node_to_leaf(
XFS_DATA_FORK))) {
return error;
}
- free = fbp->data;
+ free = fbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
ASSERT(!free->hdr.firstdb);
@@ -1857,7 +1878,7 @@ xfs_dir2_node_to_leaf(
*/
if (xfs_dir2_leaf_size(&leaf->hdr, be32_to_cpu(free->hdr.nvalid)) >
mp->m_dirblksize) {
- xfs_da_brelse(tp, fbp);
+ xfs_trans_brelse(tp, fbp);
return 0;
}
diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c
index b0f26780449d..6c7052406605 100644
--- a/fs/xfs/xfs_dir2_node.c
+++ b/fs/xfs/xfs_dir2_node.c
@@ -36,20 +36,20 @@
/*
* Function declarations.
*/
-static void xfs_dir2_free_log_header(xfs_trans_t *tp, xfs_dabuf_t *bp);
-static int xfs_dir2_leafn_add(xfs_dabuf_t *bp, xfs_da_args_t *args, int index);
+static int xfs_dir2_leafn_add(struct xfs_buf *bp, xfs_da_args_t *args,
+ int index);
#ifdef DEBUG
-static void xfs_dir2_leafn_check(xfs_inode_t *dp, xfs_dabuf_t *bp);
+static void xfs_dir2_leafn_check(struct xfs_inode *dp, struct xfs_buf *bp);
#else
#define xfs_dir2_leafn_check(dp, bp)
#endif
-static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, xfs_dabuf_t *bp_s,
- int start_s, xfs_dabuf_t *bp_d, int start_d,
- int count);
+static void xfs_dir2_leafn_moveents(xfs_da_args_t *args, struct xfs_buf *bp_s,
+ int start_s, struct xfs_buf *bp_d,
+ int start_d, int count);
static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
xfs_da_state_blk_t *blk1,
xfs_da_state_blk_t *blk2);
-static int xfs_dir2_leafn_remove(xfs_da_args_t *args, xfs_dabuf_t *bp,
+static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
int index, xfs_da_state_blk_t *dblk,
int *rval);
static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
@@ -60,16 +60,16 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
*/
STATIC void
xfs_dir2_free_log_bests(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp, /* freespace buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
int first, /* first entry to log */
int last) /* last entry to log */
{
xfs_dir2_free_t *free; /* freespace structure */
- free = bp->data;
+ free = bp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
- xfs_da_log_buf(tp, bp,
+ xfs_trans_log_buf(tp, bp,
(uint)((char *)&free->bests[first] - (char *)free),
(uint)((char *)&free->bests[last] - (char *)free +
sizeof(free->bests[0]) - 1));
@@ -80,14 +80,14 @@ xfs_dir2_free_log_bests(
*/
static void
xfs_dir2_free_log_header(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_dabuf_t *bp) /* freespace buffer */
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
{
xfs_dir2_free_t *free; /* freespace structure */
- free = bp->data;
+ free = bp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
- xfs_da_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
+ xfs_trans_log_buf(tp, bp, (uint)((char *)&free->hdr - (char *)free),
(uint)(sizeof(xfs_dir2_free_hdr_t) - 1));
}
@@ -99,11 +99,11 @@ xfs_dir2_free_log_header(
int /* error */
xfs_dir2_leaf_to_node(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *lbp) /* leaf buffer */
+ struct xfs_buf *lbp) /* leaf buffer */
{
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return value */
- xfs_dabuf_t *fbp; /* freespace buffer */
+ struct xfs_buf *fbp; /* freespace buffer */
xfs_dir2_db_t fdb; /* freespace block number */
xfs_dir2_free_t *free; /* freespace structure */
__be16 *from; /* pointer to freespace entry */
@@ -136,8 +136,8 @@ xfs_dir2_leaf_to_node(
return error;
}
ASSERT(fbp != NULL);
- free = fbp->data;
- leaf = lbp->data;
+ free = fbp->b_addr;
+ leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(mp, leaf);
/*
* Initialize the freespace block header.
@@ -164,7 +164,6 @@ xfs_dir2_leaf_to_node(
xfs_dir2_leaf_log_header(tp, lbp);
xfs_dir2_free_log_header(tp, fbp);
xfs_dir2_free_log_bests(tp, fbp, 0, be32_to_cpu(free->hdr.nvalid) - 1);
- xfs_da_buf_done(fbp);
xfs_dir2_leafn_check(dp, lbp);
return 0;
}
@@ -175,7 +174,7 @@ xfs_dir2_leaf_to_node(
*/
static int /* error */
xfs_dir2_leafn_add(
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
xfs_da_args_t *args, /* operation arguments */
int index) /* insertion pt for new entry */
{
@@ -195,7 +194,7 @@ xfs_dir2_leafn_add(
dp = args->dp;
mp = dp->i_mount;
tp = args->trans;
- leaf = bp->data;
+ leaf = bp->b_addr;
/*
* Quick check just to make sure we are not going to index
@@ -261,15 +260,15 @@ xfs_dir2_leafn_add(
*/
void
xfs_dir2_leafn_check(
- xfs_inode_t *dp, /* incore directory inode */
- xfs_dabuf_t *bp) /* leaf buffer */
+ struct xfs_inode *dp,
+ struct xfs_buf *bp)
{
int i; /* leaf index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_mount_t *mp; /* filesystem mount point */
int stale; /* count of stale leaves */
- leaf = bp->data;
+ leaf = bp->b_addr;
mp = dp->i_mount;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
ASSERT(be16_to_cpu(leaf->hdr.count) <= xfs_dir2_max_leaf_ents(mp));
@@ -291,12 +290,12 @@ xfs_dir2_leafn_check(
*/
xfs_dahash_t /* hash value */
xfs_dir2_leafn_lasthash(
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
int *count) /* count of entries in leaf */
{
xfs_dir2_leaf_t *leaf; /* leaf structure */
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
if (count)
*count = be16_to_cpu(leaf->hdr.count);
@@ -311,12 +310,12 @@ xfs_dir2_leafn_lasthash(
*/
STATIC int
xfs_dir2_leafn_lookup_for_addname(
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
xfs_da_args_t *args, /* operation arguments */
int *indexp, /* out: leaf entry index */
xfs_da_state_t *state) /* state to fill in */
{
- xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
+ struct xfs_buf *curbp = NULL; /* current data/free buffer */
xfs_dir2_db_t curdb = -1; /* current data block number */
xfs_dir2_db_t curfdb = -1; /* current free block number */
xfs_inode_t *dp; /* incore directory inode */
@@ -335,7 +334,7 @@ xfs_dir2_leafn_lookup_for_addname(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
#ifdef __KERNEL__
ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
@@ -352,7 +351,7 @@ xfs_dir2_leafn_lookup_for_addname(
/* If so, it's a free block buffer, get the block number. */
curbp = state->extrablk.bp;
curfdb = state->extrablk.blkno;
- free = curbp->data;
+ free = curbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
}
length = xfs_dir2_data_entsize(args->namelen);
@@ -394,7 +393,7 @@ xfs_dir2_leafn_lookup_for_addname(
* If we had one before, drop it.
*/
if (curbp)
- xfs_da_brelse(tp, curbp);
+ xfs_trans_brelse(tp, curbp);
/*
* Read the free block.
*/
@@ -403,7 +402,7 @@ xfs_dir2_leafn_lookup_for_addname(
-1, &curbp, XFS_DATA_FORK);
if (error)
return error;
- free = curbp->data;
+ free = curbp->b_addr;
ASSERT(be32_to_cpu(free->hdr.magic) ==
XFS_DIR2_FREE_MAGIC);
ASSERT((be32_to_cpu(free->hdr.firstdb) %
@@ -424,7 +423,7 @@ xfs_dir2_leafn_lookup_for_addname(
XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
XFS_ERRLEVEL_LOW, mp);
if (curfdb != newfdb)
- xfs_da_brelse(tp, curbp);
+ xfs_trans_brelse(tp, curbp);
return XFS_ERROR(EFSCORRUPTED);
}
curfdb = newfdb;
@@ -459,12 +458,12 @@ out:
*/
STATIC int
xfs_dir2_leafn_lookup_for_entry(
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
xfs_da_args_t *args, /* operation arguments */
int *indexp, /* out: leaf entry index */
xfs_da_state_t *state) /* state to fill in */
{
- xfs_dabuf_t *curbp = NULL; /* current data/free buffer */
+ struct xfs_buf *curbp = NULL; /* current data/free buffer */
xfs_dir2_db_t curdb = -1; /* current data block number */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
@@ -480,7 +479,7 @@ xfs_dir2_leafn_lookup_for_entry(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
#ifdef __KERNEL__
ASSERT(be16_to_cpu(leaf->hdr.count) > 0);
@@ -525,7 +524,7 @@ xfs_dir2_leafn_lookup_for_entry(
*/
if (curbp && (args->cmpresult == XFS_CMP_DIFFERENT ||
curdb != state->extrablk.blkno))
- xfs_da_brelse(tp, curbp);
+ xfs_trans_brelse(tp, curbp);
/*
* If needing the block that is saved with a CI match,
* use it otherwise read in the new data block.
@@ -547,7 +546,7 @@ xfs_dir2_leafn_lookup_for_entry(
/*
* Point to the data entry.
*/
- dep = (xfs_dir2_data_entry_t *)((char *)curbp->data +
+ dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr +
xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)));
/*
* Compare the entry and if it's an exact match, return
@@ -559,7 +558,7 @@ xfs_dir2_leafn_lookup_for_entry(
/* If there is a CI match block, drop it */
if (args->cmpresult != XFS_CMP_DIFFERENT &&
curdb != state->extrablk.blkno)
- xfs_da_brelse(tp, state->extrablk.bp);
+ xfs_trans_brelse(tp, state->extrablk.bp);
args->cmpresult = cmp;
args->inumber = be64_to_cpu(dep->inumber);
*indexp = index;
@@ -567,7 +566,7 @@ xfs_dir2_leafn_lookup_for_entry(
state->extrablk.bp = curbp;
state->extrablk.blkno = curdb;
state->extrablk.index = (int)((char *)dep -
- (char *)curbp->data);
+ (char *)curbp->b_addr);
state->extrablk.magic = XFS_DIR2_DATA_MAGIC;
if (cmp == XFS_CMP_EXACT)
return XFS_ERROR(EEXIST);
@@ -586,7 +585,7 @@ xfs_dir2_leafn_lookup_for_entry(
} else {
/* If the curbp is not the CI match block, drop it */
if (state->extrablk.bp != curbp)
- xfs_da_brelse(tp, curbp);
+ xfs_trans_brelse(tp, curbp);
}
} else {
state->extravalid = 0;
@@ -602,7 +601,7 @@ xfs_dir2_leafn_lookup_for_entry(
*/
int
xfs_dir2_leafn_lookup_int(
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
xfs_da_args_t *args, /* operation arguments */
int *indexp, /* out: leaf entry index */
xfs_da_state_t *state) /* state to fill in */
@@ -620,9 +619,9 @@ xfs_dir2_leafn_lookup_int(
static void
xfs_dir2_leafn_moveents(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *bp_s, /* source leaf buffer */
+ struct xfs_buf *bp_s, /* source leaf buffer */
int start_s, /* source leaf index */
- xfs_dabuf_t *bp_d, /* destination leaf buffer */
+ struct xfs_buf *bp_d, /* destination leaf buffer */
int start_d, /* destination leaf index */
int count) /* count of leaves to copy */
{
@@ -640,8 +639,8 @@ xfs_dir2_leafn_moveents(
return;
}
tp = args->trans;
- leaf_s = bp_s->data;
- leaf_d = bp_d->data;
+ leaf_s = bp_s->b_addr;
+ leaf_d = bp_d->b_addr;
/*
* If the destination index is not the end of the current
* destination leaf entries, open up a hole in the destination
@@ -702,14 +701,14 @@ xfs_dir2_leafn_moveents(
*/
int /* sort order */
xfs_dir2_leafn_order(
- xfs_dabuf_t *leaf1_bp, /* leaf1 buffer */
- xfs_dabuf_t *leaf2_bp) /* leaf2 buffer */
+ struct xfs_buf *leaf1_bp, /* leaf1 buffer */
+ struct xfs_buf *leaf2_bp) /* leaf2 buffer */
{
xfs_dir2_leaf_t *leaf1; /* leaf1 structure */
xfs_dir2_leaf_t *leaf2; /* leaf2 structure */
- leaf1 = leaf1_bp->data;
- leaf2 = leaf2_bp->data;
+ leaf1 = leaf1_bp->b_addr;
+ leaf2 = leaf2_bp->b_addr;
ASSERT(leaf1->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
ASSERT(leaf2->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
if (be16_to_cpu(leaf1->hdr.count) > 0 &&
@@ -757,8 +756,8 @@ xfs_dir2_leafn_rebalance(
blk1 = blk2;
blk2 = tmp;
}
- leaf1 = blk1->bp->data;
- leaf2 = blk2->bp->data;
+ leaf1 = blk1->bp->b_addr;
+ leaf2 = blk2->bp->b_addr;
oldsum = be16_to_cpu(leaf1->hdr.count) + be16_to_cpu(leaf2->hdr.count);
#ifdef DEBUG
oldstale = be16_to_cpu(leaf1->hdr.stale) + be16_to_cpu(leaf2->hdr.stale);
@@ -834,14 +833,14 @@ xfs_dir2_leafn_rebalance(
static int /* error */
xfs_dir2_leafn_remove(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *bp, /* leaf buffer */
+ struct xfs_buf *bp, /* leaf buffer */
int index, /* leaf entry index */
xfs_da_state_blk_t *dblk, /* data block */
int *rval) /* resulting block needs join */
{
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t db; /* data block number */
- xfs_dabuf_t *dbp; /* data block buffer */
+ struct xfs_buf *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data block entry */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_leaf_t *leaf; /* leaf structure */
@@ -858,7 +857,7 @@ xfs_dir2_leafn_remove(
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
* Point to the entry we're removing.
@@ -884,7 +883,7 @@ xfs_dir2_leafn_remove(
* in the data block in case it changes.
*/
dbp = dblk->bp;
- hdr = dbp->data;
+ hdr = dbp->b_addr;
dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
longest = be16_to_cpu(hdr->bestfree[0].length);
needlog = needscan = 0;
@@ -905,7 +904,7 @@ xfs_dir2_leafn_remove(
*/
if (longest < be16_to_cpu(hdr->bestfree[0].length)) {
int error; /* error return value */
- xfs_dabuf_t *fbp; /* freeblock buffer */
+ struct xfs_buf *fbp; /* freeblock buffer */
xfs_dir2_db_t fdb; /* freeblock block number */
int findex; /* index in freeblock entries */
xfs_dir2_free_t *free; /* freeblock structure */
@@ -920,7 +919,7 @@ xfs_dir2_leafn_remove(
-1, &fbp, XFS_DATA_FORK))) {
return error;
}
- free = fbp->data;
+ free = fbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
ASSERT(be32_to_cpu(free->hdr.firstdb) ==
xfs_dir2_free_max_bests(mp) *
@@ -948,9 +947,7 @@ xfs_dir2_leafn_remove(
* In this case just drop the buffer and some one else
* will eventually get rid of the empty block.
*/
- else if (error == ENOSPC && args->total == 0)
- xfs_da_buf_done(dbp);
- else
+ else if (!(error == ENOSPC && args->total == 0))
return error;
}
/*
@@ -1018,11 +1015,6 @@ xfs_dir2_leafn_remove(
*/
if (logfree)
xfs_dir2_free_log_bests(tp, fbp, findex, findex);
- /*
- * Drop the buffer if we still have it.
- */
- if (fbp)
- xfs_da_buf_done(fbp);
}
xfs_dir2_leafn_check(dp, bp);
/*
@@ -1114,7 +1106,7 @@ xfs_dir2_leafn_toosmall(
{
xfs_da_state_blk_t *blk; /* leaf block */
xfs_dablk_t blkno; /* leaf block number */
- xfs_dabuf_t *bp; /* leaf buffer */
+ struct xfs_buf *bp; /* leaf buffer */
int bytes; /* bytes in use */
int count; /* leaf live entry count */
int error; /* error return value */
@@ -1130,7 +1122,7 @@ xfs_dir2_leafn_toosmall(
* to coalesce with a sibling.
*/
blk = &state->path.blk[state->path.active - 1];
- info = blk->bp->data;
+ info = blk->bp->b_addr;
ASSERT(info->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
leaf = (xfs_dir2_leaf_t *)info;
count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
@@ -1189,7 +1181,7 @@ xfs_dir2_leafn_toosmall(
leaf = (xfs_dir2_leaf_t *)info;
count = be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes = state->blocksize - (state->blocksize >> 2);
- leaf = bp->data;
+ leaf = bp->b_addr;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
count += be16_to_cpu(leaf->hdr.count) - be16_to_cpu(leaf->hdr.stale);
bytes -= count * (uint)sizeof(leaf->ents[0]);
@@ -1198,7 +1190,7 @@ xfs_dir2_leafn_toosmall(
*/
if (bytes >= 0)
break;
- xfs_da_brelse(state->args->trans, bp);
+ xfs_trans_brelse(state->args->trans, bp);
}
/*
* Didn't like either block, give up.
@@ -1207,11 +1199,7 @@ xfs_dir2_leafn_toosmall(
*action = 0;
return 0;
}
- /*
- * Done with the sibling leaf block here, drop the dabuf
- * so path_shift can get it.
- */
- xfs_da_buf_done(bp);
+
/*
* Make altpath point to the block we want to keep (the lower
* numbered block) and path point to the block we want to drop.
@@ -1247,8 +1235,8 @@ xfs_dir2_leafn_unbalance(
args = state->args;
ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC);
ASSERT(save_blk->magic == XFS_DIR2_LEAFN_MAGIC);
- drop_leaf = drop_blk->bp->data;
- save_leaf = save_blk->bp->data;
+ drop_leaf = drop_blk->bp->b_addr;
+ save_leaf = save_blk->bp->b_addr;
ASSERT(drop_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
ASSERT(save_leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC));
/*
@@ -1356,13 +1344,13 @@ xfs_dir2_node_addname_int(
{
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t dbno; /* data block number */
- xfs_dabuf_t *dbp; /* data block buffer */
+ struct xfs_buf *dbp; /* data block buffer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_data_unused_t *dup; /* data unused entry pointer */
int error; /* error return value */
xfs_dir2_db_t fbno; /* freespace block number */
- xfs_dabuf_t *fbp; /* freespace buffer */
+ struct xfs_buf *fbp; /* freespace buffer */
int findex; /* freespace entry index */
xfs_dir2_free_t *free=NULL; /* freespace block structure */
xfs_dir2_db_t ifbno; /* initial freespace block no */
@@ -1390,7 +1378,7 @@ xfs_dir2_node_addname_int(
* Remember initial freespace block number.
*/
ifbno = fblk->blkno;
- free = fbp->data;
+ free = fbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
findex = fblk->index;
/*
@@ -1474,7 +1462,7 @@ xfs_dir2_node_addname_int(
if (unlikely(fbp == NULL)) {
continue;
}
- free = fbp->data;
+ free = fbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
findex = 0;
}
@@ -1492,7 +1480,7 @@ xfs_dir2_node_addname_int(
/*
* Drop the block.
*/
- xfs_da_brelse(tp, fbp);
+ xfs_trans_brelse(tp, fbp);
fbp = NULL;
if (fblk && fblk->bp)
fblk->bp = NULL;
@@ -1507,36 +1495,23 @@ xfs_dir2_node_addname_int(
/*
* Not allowed to allocate, return failure.
*/
- if ((args->op_flags & XFS_DA_OP_JUSTCHECK) ||
- args->total == 0) {
- /*
- * Drop the freespace buffer unless it came from our
- * caller.
- */
- if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
- xfs_da_buf_done(fbp);
+ if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
return XFS_ERROR(ENOSPC);
- }
+
/*
* Allocate and initialize the new data block.
*/
if (unlikely((error = xfs_dir2_grow_inode(args,
XFS_DIR2_DATA_SPACE,
&dbno)) ||
- (error = xfs_dir2_data_init(args, dbno, &dbp)))) {
- /*
- * Drop the freespace buffer unless it came from our
- * caller.
- */
- if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
- xfs_da_buf_done(fbp);
+ (error = xfs_dir2_data_init(args, dbno, &dbp))))
return error;
- }
+
/*
* If (somehow) we have a freespace block, get rid of it.
*/
if (fbp)
- xfs_da_brelse(tp, fbp);
+ xfs_trans_brelse(tp, fbp);
if (fblk && fblk->bp)
fblk->bp = NULL;
@@ -1547,10 +1522,9 @@ xfs_dir2_node_addname_int(
fbno = xfs_dir2_db_to_fdb(mp, dbno);
if (unlikely(error = xfs_da_read_buf(tp, dp,
xfs_dir2_db_to_da(mp, fbno), -2, &fbp,
- XFS_DATA_FORK))) {
- xfs_da_buf_done(dbp);
+ XFS_DATA_FORK)))
return error;
- }
+
/*
* If there wasn't a freespace block, the read will
* return a NULL fbp. Allocate and initialize a new one.
@@ -1598,7 +1572,7 @@ xfs_dir2_node_addname_int(
* Initialize the new block to be empty, and remember
* its first slot as our empty slot.
*/
- free = fbp->data;
+ free = fbp->b_addr;
free->hdr.magic = cpu_to_be32(XFS_DIR2_FREE_MAGIC);
free->hdr.firstdb = cpu_to_be32(
(fbno - XFS_DIR2_FREE_FIRSTDB(mp)) *
@@ -1606,7 +1580,7 @@ xfs_dir2_node_addname_int(
free->hdr.nvalid = 0;
free->hdr.nused = 0;
} else {
- free = fbp->data;
+ free = fbp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
}
@@ -1639,7 +1613,7 @@ xfs_dir2_node_addname_int(
* We haven't allocated the data entry yet so this will
* change again.
*/
- hdr = dbp->data;
+ hdr = dbp->b_addr;
free->bests[findex] = hdr->bestfree[0].length;
logfree = 1;
}
@@ -1650,22 +1624,17 @@ xfs_dir2_node_addname_int(
/*
* If just checking, we succeeded.
*/
- if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
- if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
- xfs_da_buf_done(fbp);
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK)
return 0;
- }
+
/*
* Read the data block in.
*/
- if (unlikely(
- error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
- -1, &dbp, XFS_DATA_FORK))) {
- if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
- xfs_da_buf_done(fbp);
+ error = xfs_da_read_buf(tp, dp, xfs_dir2_db_to_da(mp, dbno),
+ -1, &dbp, XFS_DATA_FORK);
+ if (error)
return error;
- }
- hdr = dbp->data;
+ hdr = dbp->b_addr;
logfree = 0;
}
ASSERT(be16_to_cpu(hdr->bestfree[0].length) >= length);
@@ -1714,16 +1683,10 @@ xfs_dir2_node_addname_int(
if (logfree)
xfs_dir2_free_log_bests(tp, fbp, findex, findex);
/*
- * If the caller didn't hand us the freespace block, drop it.
- */
- if ((fblk == NULL || fblk->bp == NULL) && fbp != NULL)
- xfs_da_buf_done(fbp);
- /*
* Return the data block and offset in args, then drop the data block.
*/
args->blkno = (xfs_dablk_t)dbno;
args->index = be16_to_cpu(*tagp);
- xfs_da_buf_done(dbp);
return 0;
}
@@ -1761,22 +1724,23 @@ xfs_dir2_node_lookup(
/* If a CI match, dup the actual name and return EEXIST */
xfs_dir2_data_entry_t *dep;
- dep = (xfs_dir2_data_entry_t *)((char *)state->extrablk.bp->
- data + state->extrablk.index);
+ dep = (xfs_dir2_data_entry_t *)
+ ((char *)state->extrablk.bp->b_addr +
+ state->extrablk.index);
rval = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
}
/*
* Release the btree blocks and leaf block.
*/
for (i = 0; i < state->path.active; i++) {
- xfs_da_brelse(args->trans, state->path.blk[i].bp);
+ xfs_trans_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
/*
* Release the data block if we have it.
*/
if (state->extravalid && state->extrablk.bp) {
- xfs_da_brelse(args->trans, state->extrablk.bp);
+ xfs_trans_brelse(args->trans, state->extrablk.bp);
state->extrablk.bp = NULL;
}
xfs_da_state_free(state);
@@ -1893,13 +1857,13 @@ xfs_dir2_node_replace(
*/
blk = &state->path.blk[state->path.active - 1];
ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
- leaf = blk->bp->data;
+ leaf = blk->bp->b_addr;
lep = &leaf->ents[blk->index];
ASSERT(state->extravalid);
/*
* Point to the data entry.
*/
- hdr = state->extrablk.bp->data;
+ hdr = state->extrablk.bp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC));
dep = (xfs_dir2_data_entry_t *)
((char *)hdr +
@@ -1916,14 +1880,14 @@ xfs_dir2_node_replace(
* Didn't find it, and we're holding a data block. Drop it.
*/
else if (state->extravalid) {
- xfs_da_brelse(args->trans, state->extrablk.bp);
+ xfs_trans_brelse(args->trans, state->extrablk.bp);
state->extrablk.bp = NULL;
}
/*
* Release all the buffers in the cursor.
*/
for (i = 0; i < state->path.active; i++) {
- xfs_da_brelse(args->trans, state->path.blk[i].bp);
+ xfs_trans_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
}
xfs_da_state_free(state);
@@ -1940,7 +1904,7 @@ xfs_dir2_node_trim_free(
xfs_fileoff_t fo, /* free block number */
int *rvalp) /* out: did something */
{
- xfs_dabuf_t *bp; /* freespace buffer */
+ struct xfs_buf *bp; /* freespace buffer */
xfs_inode_t *dp; /* incore directory inode */
int error; /* error return code */
xfs_dir2_free_t *free; /* freespace structure */
@@ -1965,13 +1929,13 @@ xfs_dir2_node_trim_free(
if (bp == NULL) {
return 0;
}
- free = bp->data;
+ free = bp->b_addr;
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC));
/*
* If there are used entries, there's nothing to do.
*/
if (be32_to_cpu(free->hdr.nused) > 0) {
- xfs_da_brelse(tp, bp);
+ xfs_trans_brelse(tp, bp);
*rvalp = 0;
return 0;
}
@@ -1987,7 +1951,7 @@ xfs_dir2_node_trim_free(
* pieces. This is the last block of an extent.
*/
ASSERT(error != ENOSPC);
- xfs_da_brelse(tp, bp);
+ xfs_trans_brelse(tp, bp);
return error;
}
/*
diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h
index 067f403ecf8a..3523d3e15aa8 100644
--- a/fs/xfs/xfs_dir2_priv.h
+++ b/fs/xfs/xfs_dir2_priv.h
@@ -25,7 +25,7 @@ extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r);
extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
xfs_dir2_db_t *dbp);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
- struct xfs_dabuf *bp);
+ struct xfs_buf *bp);
extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
const unsigned char *name, int len);
@@ -37,11 +37,11 @@ extern int xfs_dir2_block_lookup(struct xfs_da_args *args);
extern int xfs_dir2_block_removename(struct xfs_da_args *args);
extern int xfs_dir2_block_replace(struct xfs_da_args *args);
extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
- struct xfs_dabuf *lbp, struct xfs_dabuf *dbp);
+ struct xfs_buf *lbp, struct xfs_buf *dbp);
/* xfs_dir2_data.c */
#ifdef DEBUG
-extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_dabuf *bp);
+extern void xfs_dir2_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
#else
#define xfs_dir2_data_check(dp,bp)
#endif
@@ -51,43 +51,43 @@ xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
struct xfs_dir2_data_hdr *hdr, int *loghead);
extern int xfs_dir2_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
- struct xfs_dabuf **bpp);
-extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_dabuf *bp,
+ struct xfs_buf **bpp);
+extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_dir2_data_entry *dep);
extern void xfs_dir2_data_log_header(struct xfs_trans *tp,
- struct xfs_dabuf *bp);
-extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_dabuf *bp,
+ struct xfs_buf *bp);
+extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_dir2_data_unused *dup);
-extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp,
xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,
int *needlogp, int *needscanp);
-extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_dabuf *bp,
+extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp,
struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset,
xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);
/* xfs_dir2_leaf.c */
extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
- struct xfs_dabuf *dbp);
+ struct xfs_buf *dbp);
extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
extern void xfs_dir2_leaf_compact(struct xfs_da_args *args,
- struct xfs_dabuf *bp);
-extern void xfs_dir2_leaf_compact_x1(struct xfs_dabuf *bp, int *indexp,
+ struct xfs_buf *bp);
+extern void xfs_dir2_leaf_compact_x1(struct xfs_buf *bp, int *indexp,
int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);
extern int xfs_dir2_leaf_getdents(struct xfs_inode *dp, void *dirent,
size_t bufsize, xfs_off_t *offset, filldir_t filldir);
extern int xfs_dir2_leaf_init(struct xfs_da_args *args, xfs_dir2_db_t bno,
- struct xfs_dabuf **bpp, int magic);
-extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_dabuf *bp,
+ struct xfs_buf **bpp, int magic);
+extern void xfs_dir2_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp,
int first, int last);
extern void xfs_dir2_leaf_log_header(struct xfs_trans *tp,
- struct xfs_dabuf *bp);
+ struct xfs_buf *bp);
extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_leaf_removename(struct xfs_da_args *args);
extern int xfs_dir2_leaf_replace(struct xfs_da_args *args);
extern int xfs_dir2_leaf_search_hash(struct xfs_da_args *args,
- struct xfs_dabuf *lbp);
+ struct xfs_buf *lbp);
extern int xfs_dir2_leaf_trim_data(struct xfs_da_args *args,
- struct xfs_dabuf *lbp, xfs_dir2_db_t db);
+ struct xfs_buf *lbp, xfs_dir2_db_t db);
extern struct xfs_dir2_leaf_entry *
xfs_dir2_leaf_find_entry(struct xfs_dir2_leaf *leaf, int index, int compact,
int lowstale, int highstale,
@@ -96,13 +96,13 @@ extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
/* xfs_dir2_node.c */
extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
- struct xfs_dabuf *lbp);
-extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_dabuf *bp, int *count);
-extern int xfs_dir2_leafn_lookup_int(struct xfs_dabuf *bp,
+ struct xfs_buf *lbp);
+extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count);
+extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,
struct xfs_da_args *args, int *indexp,
struct xfs_da_state *state);
-extern int xfs_dir2_leafn_order(struct xfs_dabuf *leaf1_bp,
- struct xfs_dabuf *leaf2_bp);
+extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp,
+ struct xfs_buf *leaf2_bp);
extern int xfs_dir2_leafn_split(struct xfs_da_state *state,
struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk);
extern int xfs_dir2_leafn_toosmall(struct xfs_da_state *state, int *action);
@@ -122,7 +122,7 @@ extern xfs_ino_t xfs_dir2_sfe_get_ino(struct xfs_dir2_sf_hdr *sfp,
struct xfs_dir2_sf_entry *sfep);
extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
-extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_dabuf *bp,
+extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
int size, xfs_dir2_sf_hdr_t *sfhp);
extern int xfs_dir2_sf_addname(struct xfs_da_args *args);
extern int xfs_dir2_sf_create(struct xfs_da_args *args, xfs_ino_t pino);
diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c
index 19bf0c5e38f4..1b9fc3ec7e4b 100644
--- a/fs/xfs/xfs_dir2_sf.c
+++ b/fs/xfs/xfs_dir2_sf.c
@@ -222,7 +222,7 @@ xfs_dir2_block_sfsize(
int /* error */
xfs_dir2_block_to_sf(
xfs_da_args_t *args, /* operation arguments */
- xfs_dabuf_t *bp, /* block buffer */
+ struct xfs_buf *bp,
int size, /* shortform directory size */
xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */
{
@@ -249,7 +249,7 @@ xfs_dir2_block_to_sf(
* and add local data.
*/
hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP);
- memcpy(hdr, bp->data, mp->m_dirblksize);
+ memcpy(hdr, bp->b_addr, mp->m_dirblksize);
logflags = XFS_ILOG_CORE;
if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) {
ASSERT(error != ENOSPC);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 9f7ec15a6522..56afcdb2377d 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -236,7 +236,6 @@ xfs_file_aio_read(
ssize_t ret = 0;
int ioflags = 0;
xfs_fsize_t n;
- unsigned long seg;
XFS_STATS_INC(xs_read_calls);
@@ -247,19 +246,9 @@ xfs_file_aio_read(
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- /* START copy & waste from filemap.c */
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *iv = &iovp[seg];
-
- /*
- * If any segment has a negative length, or the cumulative
- * length ever wraps negative then return -EINVAL.
- */
- size += iv->iov_len;
- if (unlikely((ssize_t)(size|iv->iov_len) < 0))
- return XFS_ERROR(-EINVAL);
- }
- /* END copy & waste from filemap.c */
+ ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
+ if (ret < 0)
+ return ret;
if (unlikely(ioflags & IO_ISDIRECT)) {
xfs_buftarg_t *target =
@@ -273,7 +262,7 @@ xfs_file_aio_read(
}
}
- n = XFS_MAXIOFFSET(mp) - iocb->ki_pos;
+ n = mp->m_super->s_maxbytes - iocb->ki_pos;
if (n <= 0 || size == 0)
return 0;
@@ -781,10 +770,12 @@ xfs_file_aio_write(
if (ocount == 0)
return 0;
- xfs_wait_for_freeze(ip->i_mount, SB_FREEZE_WRITE);
+ sb_start_write(inode->i_sb);
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ ret = -EIO;
+ goto out;
+ }
if (unlikely(file->f_flags & O_DIRECT))
ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
@@ -803,6 +794,8 @@ xfs_file_aio_write(
ret = err;
}
+out:
+ sb_end_write(inode->i_sb);
return ret;
}
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 177a21a7ac49..21e37b55f7e5 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -442,14 +442,13 @@ xfs_ialloc_next_ag(
* Select an allocation group to look for a free inode in, based on the parent
* inode and then mode. Return the allocation group buffer.
*/
-STATIC xfs_buf_t * /* allocation group buffer */
+STATIC xfs_agnumber_t
xfs_ialloc_ag_select(
xfs_trans_t *tp, /* transaction pointer */
xfs_ino_t parent, /* parent directory inode number */
umode_t mode, /* bits set to indicate file type */
int okalloc) /* ok to allocate more space */
{
- xfs_buf_t *agbp; /* allocation group header buffer */
xfs_agnumber_t agcount; /* number of ag's in the filesystem */
xfs_agnumber_t agno; /* current ag number */
int flags; /* alloc buffer locking flags */
@@ -459,6 +458,7 @@ xfs_ialloc_ag_select(
int needspace; /* file mode implies space allocated */
xfs_perag_t *pag; /* per allocation group data */
xfs_agnumber_t pagno; /* parent (starting) ag number */
+ int error;
/*
* Files of these types need at least one block if length > 0
@@ -474,7 +474,9 @@ xfs_ialloc_ag_select(
if (pagno >= agcount)
pagno = 0;
}
+
ASSERT(pagno < agcount);
+
/*
* Loop through allocation groups, looking for one with a little
* free space in it. Note we don't look for free inodes, exactly.
@@ -486,51 +488,45 @@ xfs_ialloc_ag_select(
flags = XFS_ALLOC_FLAG_TRYLOCK;
for (;;) {
pag = xfs_perag_get(mp, agno);
+ if (!pag->pagi_inodeok) {
+ xfs_ialloc_next_ag(mp);
+ goto nextag;
+ }
+
if (!pag->pagi_init) {
- if (xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
- agbp = NULL;
+ error = xfs_ialloc_pagi_init(mp, tp, agno);
+ if (error)
goto nextag;
- }
- } else
- agbp = NULL;
+ }
- if (!pag->pagi_inodeok) {
- xfs_ialloc_next_ag(mp);
- goto unlock_nextag;
+ if (pag->pagi_freecount) {
+ xfs_perag_put(pag);
+ return agno;
}
- /*
- * Is there enough free space for the file plus a block
- * of inodes (if we need to allocate some)?
- */
- ineed = pag->pagi_freecount ? 0 : XFS_IALLOC_BLOCKS(mp);
- if (ineed && !pag->pagf_init) {
- if (agbp == NULL &&
- xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
- agbp = NULL;
+ if (!okalloc)
+ goto nextag;
+
+ if (!pag->pagf_init) {
+ error = xfs_alloc_pagf_init(mp, tp, agno, flags);
+ if (error)
goto nextag;
- }
- (void)xfs_alloc_pagf_init(mp, tp, agno, flags);
}
- if (!ineed || pag->pagf_init) {
- if (ineed && !(longest = pag->pagf_longest))
- longest = pag->pagf_flcount > 0;
- if (!ineed ||
- (pag->pagf_freeblks >= needspace + ineed &&
- longest >= ineed &&
- okalloc)) {
- if (agbp == NULL &&
- xfs_ialloc_read_agi(mp, tp, agno, &agbp)) {
- agbp = NULL;
- goto nextag;
- }
- xfs_perag_put(pag);
- return agbp;
- }
+
+ /*
+ * Is there enough free space for the file plus a block of
+ * inodes? (if we need to allocate some)?
+ */
+ ineed = XFS_IALLOC_BLOCKS(mp);
+ longest = pag->pagf_longest;
+ if (!longest)
+ longest = pag->pagf_flcount > 0;
+
+ if (pag->pagf_freeblks >= needspace + ineed &&
+ longest >= ineed) {
+ xfs_perag_put(pag);
+ return agno;
}
-unlock_nextag:
- if (agbp)
- xfs_trans_brelse(tp, agbp);
nextag:
xfs_perag_put(pag);
/*
@@ -538,13 +534,13 @@ nextag:
* down.
*/
if (XFS_FORCED_SHUTDOWN(mp))
- return NULL;
+ return NULLAGNUMBER;
agno++;
if (agno >= agcount)
agno = 0;
if (agno == pagno) {
if (flags == 0)
- return NULL;
+ return NULLAGNUMBER;
flags = 0;
}
}
@@ -607,195 +603,39 @@ xfs_ialloc_get_rec(
}
/*
- * Visible inode allocation functions.
- */
-/*
- * Find a free (set) bit in the inode bitmask.
- */
-static inline int xfs_ialloc_find_free(xfs_inofree_t *fp)
-{
- return xfs_lowbit64(*fp);
-}
-
-/*
- * Allocate an inode on disk.
- * Mode is used to tell whether the new inode will need space, and whether
- * it is a directory.
- *
- * The arguments IO_agbp and alloc_done are defined to work within
- * the constraint of one allocation per transaction.
- * xfs_dialloc() is designed to be called twice if it has to do an
- * allocation to make more free inodes. On the first call,
- * IO_agbp should be set to NULL. If an inode is available,
- * i.e., xfs_dialloc() did not need to do an allocation, an inode
- * number is returned. In this case, IO_agbp would be set to the
- * current ag_buf and alloc_done set to false.
- * If an allocation needed to be done, xfs_dialloc would return
- * the current ag_buf in IO_agbp and set alloc_done to true.
- * The caller should then commit the current transaction, allocate a new
- * transaction, and call xfs_dialloc() again, passing in the previous
- * value of IO_agbp. IO_agbp should be held across the transactions.
- * Since the agbp is locked across the two calls, the second call is
- * guaranteed to have a free inode available.
+ * Allocate an inode.
*
- * Once we successfully pick an inode its number is returned and the
- * on-disk data structures are updated. The inode itself is not read
- * in, since doing so would break ordering constraints with xfs_reclaim.
+ * The caller selected an AG for us, and made sure that free inodes are
+ * available.
*/
-int
-xfs_dialloc(
- xfs_trans_t *tp, /* transaction pointer */
- xfs_ino_t parent, /* parent inode (directory) */
- umode_t mode, /* mode bits for new inode */
- int okalloc, /* ok to allocate more space */
- xfs_buf_t **IO_agbp, /* in/out ag header's buffer */
- boolean_t *alloc_done, /* true if we needed to replenish
- inode freelist */
- xfs_ino_t *inop) /* inode number allocated */
+STATIC int
+xfs_dialloc_ag(
+ struct xfs_trans *tp,
+ struct xfs_buf *agbp,
+ xfs_ino_t parent,
+ xfs_ino_t *inop)
{
- xfs_agnumber_t agcount; /* number of allocation groups */
- xfs_buf_t *agbp; /* allocation group header's buffer */
- xfs_agnumber_t agno; /* allocation group number */
- xfs_agi_t *agi; /* allocation group header structure */
- xfs_btree_cur_t *cur; /* inode allocation btree cursor */
- int error; /* error return value */
- int i; /* result code */
- int ialloced; /* inode allocation status */
- int noroom = 0; /* no space for inode blk allocation */
- xfs_ino_t ino; /* fs-relative inode to be returned */
- /* REFERENCED */
- int j; /* result code */
- xfs_mount_t *mp; /* file system mount structure */
- int offset; /* index of inode in chunk */
- xfs_agino_t pagino; /* parent's AG relative inode # */
- xfs_agnumber_t pagno; /* parent's AG number */
- xfs_inobt_rec_incore_t rec; /* inode allocation record */
- xfs_agnumber_t tagno; /* testing allocation group number */
- xfs_btree_cur_t *tcur; /* temp cursor */
- xfs_inobt_rec_incore_t trec; /* temp inode allocation record */
- struct xfs_perag *pag;
-
-
- if (*IO_agbp == NULL) {
- /*
- * We do not have an agbp, so select an initial allocation
- * group for inode allocation.
- */
- agbp = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
- /*
- * Couldn't find an allocation group satisfying the
- * criteria, give up.
- */
- if (!agbp) {
- *inop = NULLFSINO;
- return 0;
- }
- agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
- } else {
- /*
- * Continue where we left off before. In this case, we
- * know that the allocation group has free inodes.
- */
- agbp = *IO_agbp;
- agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
- ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
- }
- mp = tp->t_mountp;
- agcount = mp->m_sb.sb_agcount;
- agno = be32_to_cpu(agi->agi_seqno);
- tagno = agno;
- pagno = XFS_INO_TO_AGNO(mp, parent);
- pagino = XFS_INO_TO_AGINO(mp, parent);
-
- /*
- * If we have already hit the ceiling of inode blocks then clear
- * okalloc so we scan all available agi structures for a free
- * inode.
- */
-
- if (mp->m_maxicount &&
- mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
- noroom = 1;
- okalloc = 0;
- }
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_agi *agi = XFS_BUF_TO_AGI(agbp);
+ xfs_agnumber_t agno = be32_to_cpu(agi->agi_seqno);
+ xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
+ xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
+ struct xfs_perag *pag;
+ struct xfs_btree_cur *cur, *tcur;
+ struct xfs_inobt_rec_incore rec, trec;
+ xfs_ino_t ino;
+ int error;
+ int offset;
+ int i, j;
- /*
- * Loop until we find an allocation group that either has free inodes
- * or in which we can allocate some inodes. Iterate through the
- * allocation groups upward, wrapping at the end.
- */
- *alloc_done = B_FALSE;
- while (!agi->agi_freecount) {
- /*
- * Don't do anything if we're not supposed to allocate
- * any blocks, just go on to the next ag.
- */
- if (okalloc) {
- /*
- * Try to allocate some new inodes in the allocation
- * group.
- */
- if ((error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced))) {
- xfs_trans_brelse(tp, agbp);
- if (error == ENOSPC) {
- *inop = NULLFSINO;
- return 0;
- } else
- return error;
- }
- if (ialloced) {
- /*
- * We successfully allocated some inodes, return
- * the current context to the caller so that it
- * can commit the current transaction and call
- * us again where we left off.
- */
- ASSERT(be32_to_cpu(agi->agi_freecount) > 0);
- *alloc_done = B_TRUE;
- *IO_agbp = agbp;
- *inop = NULLFSINO;
- return 0;
- }
- }
- /*
- * If it failed, give up on this ag.
- */
- xfs_trans_brelse(tp, agbp);
- /*
- * Go on to the next ag: get its ag header.
- */
-nextag:
- if (++tagno == agcount)
- tagno = 0;
- if (tagno == agno) {
- *inop = NULLFSINO;
- return noroom ? ENOSPC : 0;
- }
- pag = xfs_perag_get(mp, tagno);
- if (pag->pagi_inodeok == 0) {
- xfs_perag_put(pag);
- goto nextag;
- }
- error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp);
- xfs_perag_put(pag);
- if (error)
- goto nextag;
- agi = XFS_BUF_TO_AGI(agbp);
- ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
- }
- /*
- * Here with an allocation group that has a free inode.
- * Reset agno since we may have chosen a new ag in the
- * loop above.
- */
- agno = tagno;
- *IO_agbp = NULL;
pag = xfs_perag_get(mp, agno);
+ ASSERT(pag->pagi_init);
+ ASSERT(pag->pagi_inodeok);
+ ASSERT(pag->pagi_freecount > 0);
+
restart_pagno:
- cur = xfs_inobt_init_cursor(mp, tp, agbp, be32_to_cpu(agi->agi_seqno));
+ cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
/*
* If pagino is 0 (this is the root inode allocation) use newino.
* This must work because we've just allocated some.
@@ -995,7 +835,7 @@ newino:
}
alloc_inode:
- offset = xfs_ialloc_find_free(&rec.ir_free);
+ offset = xfs_lowbit64(rec.ir_free);
ASSERT(offset >= 0);
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
@@ -1028,6 +868,164 @@ error0:
}
/*
+ * Allocate an inode on disk.
+ *
+ * Mode is used to tell whether the new inode will need space, and whether it
+ * is a directory.
+ *
+ * This function is designed to be called twice if it has to do an allocation
+ * to make more free inodes. On the first call, *IO_agbp should be set to NULL.
+ * If an inode is available without having to performn an allocation, an inode
+ * number is returned. In this case, *IO_agbp would be NULL. If an allocation
+ * needes to be done, xfs_dialloc would return the current AGI buffer in
+ * *IO_agbp. The caller should then commit the current transaction, allocate a
+ * new transaction, and call xfs_dialloc() again, passing in the previous value
+ * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI
+ * buffer is locked across the two calls, the second call is guaranteed to have
+ * a free inode available.
+ *
+ * Once we successfully pick an inode its number is returned and the on-disk
+ * data structures are updated. The inode itself is not read in, since doing so
+ * would break ordering constraints with xfs_reclaim.
+ */
+int
+xfs_dialloc(
+ struct xfs_trans *tp,
+ xfs_ino_t parent,
+ umode_t mode,
+ int okalloc,
+ struct xfs_buf **IO_agbp,
+ xfs_ino_t *inop)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *agbp;
+ xfs_agnumber_t agno;
+ int error;
+ int ialloced;
+ int noroom = 0;
+ xfs_agnumber_t start_agno;
+ struct xfs_perag *pag;
+
+ if (*IO_agbp) {
+ /*
+ * If the caller passes in a pointer to the AGI buffer,
+ * continue where we left off before. In this case, we
+ * know that the allocation group has free inodes.
+ */
+ agbp = *IO_agbp;
+ goto out_alloc;
+ }
+
+ /*
+ * We do not have an agbp, so select an initial allocation
+ * group for inode allocation.
+ */
+ start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc);
+ if (start_agno == NULLAGNUMBER) {
+ *inop = NULLFSINO;
+ return 0;
+ }
+
+ /*
+ * If we have already hit the ceiling of inode blocks then clear
+ * okalloc so we scan all available agi structures for a free
+ * inode.
+ */
+ if (mp->m_maxicount &&
+ mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) {
+ noroom = 1;
+ okalloc = 0;
+ }
+
+ /*
+ * Loop until we find an allocation group that either has free inodes
+ * or in which we can allocate some inodes. Iterate through the
+ * allocation groups upward, wrapping at the end.
+ */
+ agno = start_agno;
+ for (;;) {
+ pag = xfs_perag_get(mp, agno);
+ if (!pag->pagi_inodeok) {
+ xfs_ialloc_next_ag(mp);
+ goto nextag;
+ }
+
+ if (!pag->pagi_init) {
+ error = xfs_ialloc_pagi_init(mp, tp, agno);
+ if (error)
+ goto out_error;
+ }
+
+ /*
+ * Do a first racy fast path check if this AG is usable.
+ */
+ if (!pag->pagi_freecount && !okalloc)
+ goto nextag;
+
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+ if (error)
+ goto out_error;
+
+ /*
+ * Once the AGI has been read in we have to recheck
+ * pagi_freecount with the AGI buffer lock held.
+ */
+ if (pag->pagi_freecount) {
+ xfs_perag_put(pag);
+ goto out_alloc;
+ }
+
+ if (!okalloc) {
+ xfs_trans_brelse(tp, agbp);
+ goto nextag;
+ }
+
+ error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
+ if (error) {
+ xfs_trans_brelse(tp, agbp);
+
+ if (error != ENOSPC)
+ goto out_error;
+
+ xfs_perag_put(pag);
+ *inop = NULLFSINO;
+ return 0;
+ }
+
+ if (ialloced) {
+ /*
+ * We successfully allocated some inodes, return
+ * the current context to the caller so that it
+ * can commit the current transaction and call
+ * us again where we left off.
+ */
+ ASSERT(pag->pagi_freecount > 0);
+ xfs_perag_put(pag);
+
+ *IO_agbp = agbp;
+ *inop = NULLFSINO;
+ return 0;
+ }
+
+nextag:
+ xfs_perag_put(pag);
+ if (++agno == mp->m_sb.sb_agcount)
+ agno = 0;
+ if (agno == start_agno) {
+ *inop = NULLFSINO;
+ return noroom ? ENOSPC : 0;
+ }
+ }
+
+out_alloc:
+ *IO_agbp = NULL;
+ return xfs_dialloc_ag(tp, agbp, parent, inop);
+out_error:
+ xfs_perag_put(pag);
+ return XFS_ERROR(error);
+}
+
+/*
* Free disk inode. Carefully avoids touching the incore inode, all
* manipulations incore are the caller's responsibility.
* The on-disk inode is not changed by this operation, only the
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 65ac57c8063c..1fd6ea4e9c91 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -75,8 +75,6 @@ xfs_dialloc(
umode_t mode, /* mode bits for new inode */
int okalloc, /* ok to allocate more space */
struct xfs_buf **agbp, /* buf for a.g. inode header */
- boolean_t *alloc_done, /* an allocation was done to replenish
- the free inodes */
xfs_ino_t *inop); /* inode number allocated */
/*
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index 1bb4365e8c25..784a803383ec 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -41,17 +41,6 @@
/*
- * Define xfs inode iolock lockdep classes. We need to ensure that all active
- * inodes are considered the same for lockdep purposes, including inodes that
- * are recycled through the XFS_IRECLAIMABLE state. This is the the only way to
- * guarantee the locks are considered the same when there are multiple lock
- * initialisation siteѕ. Also, define a reclaimable inode class so it is
- * obvious in lockdep reports which class the report is against.
- */
-static struct lock_class_key xfs_iolock_active;
-struct lock_class_key xfs_iolock_reclaimable;
-
-/*
* Allocate and initialise an xfs_inode.
*/
STATIC struct xfs_inode *
@@ -80,8 +69,6 @@ xfs_inode_alloc(
ASSERT(ip->i_ino == 0);
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
- lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
- &xfs_iolock_active, "xfs_iolock_active");
/* initialise the xfs inode */
ip->i_ino = ino;
@@ -250,8 +237,6 @@ xfs_iget_cache_hit(
ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
- lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
- &xfs_iolock_active, "xfs_iolock_active");
spin_unlock(&ip->i_flags_lock);
spin_unlock(&pag->pag_ici_lock);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a59eea09930a..2778258fcfa2 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -132,23 +132,28 @@ xfs_inobp_check(
#endif
/*
- * Find the buffer associated with the given inode map
- * We do basic validation checks on the buffer once it has been
- * retrieved from disk.
+ * This routine is called to map an inode to the buffer containing the on-disk
+ * version of the inode. It returns a pointer to the buffer containing the
+ * on-disk inode in the bpp parameter, and in the dipp parameter it returns a
+ * pointer to the on-disk inode within that buffer.
+ *
+ * If a non-zero error is returned, then the contents of bpp and dipp are
+ * undefined.
*/
-STATIC int
+int
xfs_imap_to_bp(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- struct xfs_imap *imap,
- xfs_buf_t **bpp,
- uint buf_flags,
- uint iget_flags)
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_imap *imap,
+ struct xfs_dinode **dipp,
+ struct xfs_buf **bpp,
+ uint buf_flags,
+ uint iget_flags)
{
- int error;
- int i;
- int ni;
- xfs_buf_t *bp;
+ struct xfs_buf *bp;
+ int error;
+ int i;
+ int ni;
buf_flags |= XBF_UNMAPPED;
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, imap->im_blkno,
@@ -189,8 +194,8 @@ xfs_imap_to_bp(
xfs_trans_brelse(tp, bp);
return XFS_ERROR(EINVAL);
}
- XFS_CORRUPTION_ERROR("xfs_imap_to_bp",
- XFS_ERRLEVEL_HIGH, mp, dip);
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
+ mp, dip);
#ifdef DEBUG
xfs_emerg(mp,
"bad inode magic/vsn daddr %lld #%d (magic=%x)",
@@ -204,96 +209,9 @@ xfs_imap_to_bp(
}
xfs_inobp_check(mp, bp);
- *bpp = bp;
- return 0;
-}
-
-/*
- * This routine is called to map an inode number within a file
- * system to the buffer containing the on-disk version of the
- * inode. It returns a pointer to the buffer containing the
- * on-disk inode in the bpp parameter, and in the dip parameter
- * it returns a pointer to the on-disk inode within that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and
- * dipp are undefined.
- *
- * Use xfs_imap() to determine the size and location of the
- * buffer to read from disk.
- */
-int
-xfs_inotobp(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_ino_t ino,
- xfs_dinode_t **dipp,
- xfs_buf_t **bpp,
- int *offset,
- uint imap_flags)
-{
- struct xfs_imap imap;
- xfs_buf_t *bp;
- int error;
-
- imap.im_blkno = 0;
- error = xfs_imap(mp, tp, ino, &imap, imap_flags);
- if (error)
- return error;
-
- error = xfs_imap_to_bp(mp, tp, &imap, &bp, 0, imap_flags);
- if (error)
- return error;
-
- *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, imap.im_boffset);
- *bpp = bp;
- *offset = imap.im_boffset;
- return 0;
-}
-
-
-/*
- * This routine is called to map an inode to the buffer containing
- * the on-disk version of the inode. It returns a pointer to the
- * buffer containing the on-disk inode in the bpp parameter, and in
- * the dip parameter it returns a pointer to the on-disk inode within
- * that buffer.
- *
- * If a non-zero error is returned, then the contents of bpp and
- * dipp are undefined.
- *
- * The inode is expected to already been mapped to its buffer and read
- * in once, thus we can use the mapping information stored in the inode
- * rather than calling xfs_imap(). This allows us to avoid the overhead
- * of looking at the inode btree for small block file systems
- * (see xfs_imap()).
- */
-int
-xfs_itobp(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dinode_t **dipp,
- xfs_buf_t **bpp,
- uint buf_flags)
-{
- xfs_buf_t *bp;
- int error;
-
- ASSERT(ip->i_imap.im_blkno != 0);
-
- error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, buf_flags, 0);
- if (error)
- return error;
- if (!bp) {
- ASSERT(buf_flags & XBF_TRYLOCK);
- ASSERT(tp == NULL);
- *bpp = NULL;
- return EAGAIN;
- }
-
- *dipp = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
*bpp = bp;
+ *dipp = (struct xfs_dinode *)xfs_buf_offset(bp, imap->im_boffset);
return 0;
}
@@ -796,10 +714,9 @@ xfs_iread(
/*
* Get pointers to the on-disk inode and the buffer containing it.
*/
- error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &bp, 0, iget_flags);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &bp, 0, iget_flags);
if (error)
return error;
- dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset);
/*
* If we got something that isn't an inode it means someone
@@ -876,7 +793,7 @@ xfs_iread(
/*
* Use xfs_trans_brelse() to release the buffer containing the
* on-disk inode, because it was acquired with xfs_trans_read_buf()
- * in xfs_itobp() above. If tp is NULL, this is just a normal
+ * in xfs_imap_to_bp() above. If tp is NULL, this is just a normal
* brelse(). If we're within a transaction, then xfs_trans_brelse()
* will only release the buffer if it is not dirty within the
* transaction. It will be OK to release the buffer in this case,
@@ -970,7 +887,6 @@ xfs_ialloc(
prid_t prid,
int okalloc,
xfs_buf_t **ialloc_context,
- boolean_t *call_again,
xfs_inode_t **ipp)
{
xfs_ino_t ino;
@@ -985,10 +901,10 @@ xfs_ialloc(
* the on-disk inode to be allocated.
*/
error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc,
- ialloc_context, call_again, &ino);
+ ialloc_context, &ino);
if (error)
return error;
- if (*call_again || ino == NULLFSINO) {
+ if (*ialloc_context || ino == NULLFSINO) {
*ipp = NULL;
return 0;
}
@@ -1207,7 +1123,9 @@ xfs_itruncate_extents(
int error = 0;
int done = 0;
- ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+ ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
+ xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(new_size <= XFS_ISIZE(ip));
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(ip->i_itemp != NULL);
@@ -1226,7 +1144,7 @@ xfs_itruncate_extents(
* then there is nothing to do.
*/
first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- last_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+ last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (first_unmap_block == last_block)
return 0;
@@ -1355,7 +1273,8 @@ xfs_iunlink(
* Here we put the head pointer into our next pointer,
* and then we fall through to point the head at us.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+ 0, 0);
if (error)
return error;
@@ -1429,16 +1348,16 @@ xfs_iunlink_remove(
if (be32_to_cpu(agi->agi_unlinked[bucket_index]) == agino) {
/*
- * We're at the head of the list. Get the inode's
- * on-disk buffer to see if there is anyone after us
- * on the list. Only modify our next pointer if it
- * is not already NULLAGINO. This saves us the overhead
- * of dealing with the buffer when there is no need to
- * change it.
+ * We're at the head of the list. Get the inode's on-disk
+ * buffer to see if there is anyone after us on the list.
+ * Only modify our next pointer if it is not already NULLAGINO.
+ * This saves us the overhead of dealing with the buffer when
+ * there is no need to change it.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+ 0, 0);
if (error) {
- xfs_warn(mp, "%s: xfs_itobp() returned error %d.",
+ xfs_warn(mp, "%s: xfs_imap_to_bp returned error %d.",
__func__, error);
return error;
}
@@ -1472,34 +1391,45 @@ xfs_iunlink_remove(
next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
last_ibp = NULL;
while (next_agino != agino) {
- /*
- * If the last inode wasn't the one pointing to
- * us, then release its buffer since we're not
- * going to do anything with it.
- */
- if (last_ibp != NULL) {
+ struct xfs_imap imap;
+
+ if (last_ibp)
xfs_trans_brelse(tp, last_ibp);
- }
+
+ imap.im_blkno = 0;
next_ino = XFS_AGINO_TO_INO(mp, agno, next_agino);
- error = xfs_inotobp(mp, tp, next_ino, &last_dip,
- &last_ibp, &last_offset, 0);
+
+ error = xfs_imap(mp, tp, next_ino, &imap, 0);
+ if (error) {
+ xfs_warn(mp,
+ "%s: xfs_imap returned error %d.",
+ __func__, error);
+ return error;
+ }
+
+ error = xfs_imap_to_bp(mp, tp, &imap, &last_dip,
+ &last_ibp, 0, 0);
if (error) {
xfs_warn(mp,
- "%s: xfs_inotobp() returned error %d.",
+ "%s: xfs_imap_to_bp returned error %d.",
__func__, error);
return error;
}
+
+ last_offset = imap.im_boffset;
next_agino = be32_to_cpu(last_dip->di_next_unlinked);
ASSERT(next_agino != NULLAGINO);
ASSERT(next_agino != 0);
}
+
/*
- * Now last_ibp points to the buffer previous to us on
- * the unlinked list. Pull us from the list.
+ * Now last_ibp points to the buffer previous to us on the
+ * unlinked list. Pull us from the list.
*/
- error = xfs_itobp(mp, tp, ip, &dip, &ibp, 0);
+ error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &dip, &ibp,
+ 0, 0);
if (error) {
- xfs_warn(mp, "%s: xfs_itobp(2) returned error %d.",
+ xfs_warn(mp, "%s: xfs_imap_to_bp(2) returned error %d.",
__func__, error);
return error;
}
@@ -1749,7 +1679,8 @@ xfs_ifree(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_itobp(ip->i_mount, tp, ip, &dip, &ibp, 0);
+ error = xfs_imap_to_bp(ip->i_mount, tp, &ip->i_imap, &dip, &ibp,
+ 0, 0);
if (error)
return error;
@@ -2428,7 +2359,7 @@ xfs_iflush(
/*
* For stale inodes we cannot rely on the backing buffer remaining
* stale in cache for the remaining life of the stale inode and so
- * xfs_itobp() below may give us a buffer that no longer contains
+ * xfs_imap_to_bp() below may give us a buffer that no longer contains
* inodes below. We have to check this after ensuring the inode is
* unpinned so that it is safe to reclaim the stale inode after the
* flush call.
@@ -2454,7 +2385,8 @@ xfs_iflush(
/*
* Get the buffer containing the on-disk inode.
*/
- error = xfs_itobp(mp, NULL, ip, &dip, &bp, XBF_TRYLOCK);
+ error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
+ 0);
if (error || !bp) {
xfs_ifunlock(ip);
return error;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 1efff36a75b6..94b32f906e79 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -487,8 +487,6 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
#define XFS_IOLOCK_DEP(flags) (((flags) & XFS_IOLOCK_DEP_MASK) >> XFS_IOLOCK_SHIFT)
#define XFS_ILOCK_DEP(flags) (((flags) & XFS_ILOCK_DEP_MASK) >> XFS_ILOCK_SHIFT)
-extern struct lock_class_key xfs_iolock_reclaimable;
-
/*
* For multiple groups support: if S_ISGID bit is set in the parent
* directory, group of new file is set to that of the parent, and
@@ -517,7 +515,7 @@ void xfs_inode_free(struct xfs_inode *ip);
*/
int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,
xfs_nlink_t, xfs_dev_t, prid_t, int,
- struct xfs_buf **, boolean_t *, xfs_inode_t **);
+ struct xfs_buf **, xfs_inode_t **);
uint xfs_ip2xflags(struct xfs_inode *);
uint xfs_dic2xflags(struct xfs_dinode *);
@@ -557,12 +555,9 @@ do { \
#define XFS_IGET_UNTRUSTED 0x2
#define XFS_IGET_DONTCACHE 0x4
-int xfs_inotobp(struct xfs_mount *, struct xfs_trans *,
- xfs_ino_t, struct xfs_dinode **,
- struct xfs_buf **, int *, uint);
-int xfs_itobp(struct xfs_mount *, struct xfs_trans *,
- struct xfs_inode *, struct xfs_dinode **,
- struct xfs_buf **, uint);
+int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
+ struct xfs_imap *, struct xfs_dinode **,
+ struct xfs_buf **, uint, uint);
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
struct xfs_inode *, uint);
void xfs_dinode_to_disk(struct xfs_dinode *,
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 1f1535d25a9b..0e0232c3b6d9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -364,9 +364,15 @@ xfs_fssetdm_by_handle(
if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(parfilp);
+ if (error)
+ return error;
+
dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
- if (IS_ERR(dentry))
+ if (IS_ERR(dentry)) {
+ mnt_drop_write_file(parfilp);
return PTR_ERR(dentry);
+ }
if (IS_IMMUTABLE(dentry->d_inode) || IS_APPEND(dentry->d_inode)) {
error = -XFS_ERROR(EPERM);
@@ -382,6 +388,7 @@ xfs_fssetdm_by_handle(
fsd.fsd_dmstate);
out:
+ mnt_drop_write_file(parfilp);
dput(dentry);
return error;
}
@@ -634,7 +641,11 @@ xfs_ioc_space(
if (ioflags & IO_INVIS)
attr_flags |= XFS_ATTR_DMI;
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags);
+ mnt_drop_write_file(filp);
return -error;
}
@@ -1163,6 +1174,7 @@ xfs_ioc_fssetxattr(
{
struct fsxattr fa;
unsigned int mask;
+ int error;
if (copy_from_user(&fa, arg, sizeof(fa)))
return -EFAULT;
@@ -1171,7 +1183,12 @@ xfs_ioc_fssetxattr(
if (filp->f_flags & (O_NDELAY|O_NONBLOCK))
mask |= FSX_NONBLOCK;
- return -xfs_ioctl_setattr(ip, &fa, mask);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
+ error = xfs_ioctl_setattr(ip, &fa, mask);
+ mnt_drop_write_file(filp);
+ return -error;
}
STATIC int
@@ -1196,6 +1213,7 @@ xfs_ioc_setxflags(
struct fsxattr fa;
unsigned int flags;
unsigned int mask;
+ int error;
if (copy_from_user(&flags, arg, sizeof(flags)))
return -EFAULT;
@@ -1210,7 +1228,12 @@ xfs_ioc_setxflags(
mask |= FSX_NONBLOCK;
fa.fsx_xflags = xfs_merge_ioc_xflags(flags, xfs_ip2xflags(ip));
- return -xfs_ioctl_setattr(ip, &fa, mask);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
+ error = xfs_ioctl_setattr(ip, &fa, mask);
+ mnt_drop_write_file(filp);
+ return -error;
}
STATIC int
@@ -1385,8 +1408,13 @@ xfs_file_ioctl(
if (copy_from_user(&dmi, arg, sizeof(dmi)))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
+
error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
dmi.fsd_dmstate);
+ mnt_drop_write_file(filp);
return -error;
}
@@ -1434,7 +1462,11 @@ xfs_file_ioctl(
if (copy_from_user(&sxp, arg, sizeof(xfs_swapext_t)))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
error = xfs_swapext(&sxp);
+ mnt_drop_write_file(filp);
return -error;
}
@@ -1463,9 +1495,14 @@ xfs_file_ioctl(
if (copy_from_user(&inout, arg, sizeof(inout)))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
+
/* input parameter is passed in resblks field of structure */
in = inout.resblks;
error = xfs_reserve_blocks(mp, &in, &inout);
+ mnt_drop_write_file(filp);
if (error)
return -error;
@@ -1496,7 +1533,11 @@ xfs_file_ioctl(
if (copy_from_user(&in, arg, sizeof(in)))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
error = xfs_growfs_data(mp, &in);
+ mnt_drop_write_file(filp);
return -error;
}
@@ -1506,7 +1547,11 @@ xfs_file_ioctl(
if (copy_from_user(&in, arg, sizeof(in)))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
error = xfs_growfs_log(mp, &in);
+ mnt_drop_write_file(filp);
return -error;
}
@@ -1516,7 +1561,11 @@ xfs_file_ioctl(
if (copy_from_user(&in, arg, sizeof(in)))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
error = xfs_growfs_rt(mp, &in);
+ mnt_drop_write_file(filp);
return -error;
}
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c4f2da0d2bf5..1244274a5674 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -600,7 +600,11 @@ xfs_file_compat_ioctl(
if (xfs_compat_growfs_data_copyin(&in, arg))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
error = xfs_growfs_data(mp, &in);
+ mnt_drop_write_file(filp);
return -error;
}
case XFS_IOC_FSGROWFSRT_32: {
@@ -608,7 +612,11 @@ xfs_file_compat_ioctl(
if (xfs_compat_growfs_rt_copyin(&in, arg))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
error = xfs_growfs_rt(mp, &in);
+ mnt_drop_write_file(filp);
return -error;
}
#endif
@@ -627,7 +635,11 @@ xfs_file_compat_ioctl(
offsetof(struct xfs_swapext, sx_stat)) ||
xfs_ioctl32_bstat_copyin(&sxp.sx_stat, &sxu->sx_stat))
return -XFS_ERROR(EFAULT);
+ error = mnt_want_write_file(filp);
+ if (error)
+ return error;
error = xfs_swapext(&sxp);
+ mnt_drop_write_file(filp);
return -error;
}
case XFS_IOC_FSBULKSTAT_32:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index aadfce6681ee..973dff6ad935 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -285,7 +285,7 @@ xfs_iomap_eof_want_preallocate(
* do any speculative allocation.
*/
start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
- count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+ count_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
while (count_fsb > 0) {
imaps = nimaps;
firstblock = NULLFSBLOCK;
@@ -416,8 +416,8 @@ retry:
* Make sure preallocation does not create extents beyond the range we
* actually support in this filesystem.
*/
- if (last_fsb > XFS_B_TO_FSB(mp, mp->m_maxioffset))
- last_fsb = XFS_B_TO_FSB(mp, mp->m_maxioffset);
+ if (last_fsb > XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes))
+ last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
ASSERT(last_fsb > offset_fsb);
@@ -680,9 +680,9 @@ xfs_iomap_write_unwritten(
* the same inode that we complete here and might deadlock
* on the iolock.
*/
- xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
+ sb_start_intwrite(mp->m_super);
tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
- tp->t_flags |= XFS_TRANS_RESERVE;
+ tp->t_flags |= XFS_TRANS_RESERVE | XFS_TRANS_FREEZE_PROT;
error = xfs_trans_reserve(tp, resblks,
XFS_WRITE_LOG_RES(mp), 0,
XFS_TRANS_PERM_LOG_RES,
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 9c4340f5c3e0..4e00cf091d2c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -897,6 +897,47 @@ xfs_vn_setattr(
return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0);
}
+STATIC int
+xfs_vn_update_time(
+ struct inode *inode,
+ struct timespec *now,
+ int flags)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ int error;
+
+ trace_xfs_update_time(ip);
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ return -error;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ if (flags & S_CTIME) {
+ inode->i_ctime = *now;
+ ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec;
+ ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec;
+ }
+ if (flags & S_MTIME) {
+ inode->i_mtime = *now;
+ ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec;
+ ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec;
+ }
+ if (flags & S_ATIME) {
+ inode->i_atime = *now;
+ ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec;
+ ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec;
+ }
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
+ return -xfs_trans_commit(tp, 0);
+}
+
#define XFS_FIEMAP_FLAGS (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
/*
@@ -991,6 +1032,7 @@ static const struct inode_operations xfs_inode_operations = {
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
.fiemap = xfs_vn_fiemap,
+ .update_time = xfs_vn_update_time,
};
static const struct inode_operations xfs_dir_inode_operations = {
@@ -1016,6 +1058,7 @@ static const struct inode_operations xfs_dir_inode_operations = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
+ .update_time = xfs_vn_update_time,
};
static const struct inode_operations xfs_dir_ci_inode_operations = {
@@ -1041,6 +1084,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
+ .update_time = xfs_vn_update_time,
};
static const struct inode_operations xfs_symlink_inode_operations = {
@@ -1054,6 +1098,7 @@ static const struct inode_operations xfs_symlink_inode_operations = {
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
.listxattr = xfs_vn_listxattr,
+ .update_time = xfs_vn_update_time,
};
STATIC void
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index eff577a9b67f..01d10a66e302 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -555,7 +555,7 @@ xfs_bulkstat_single(
/*
* note that requesting valid inode numbers which are not allocated
- * to inodes will most likely cause xfs_itobp to generate warning
+ * to inodes will most likely cause xfs_imap_to_bp to generate warning
* messages about bad magic numbers. This is ok. The fact that
* the inode isn't actually an inode is handled by the
* error check below. Done this way to make the usual case faster
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index d90d4a388609..7f4f9370d0e7 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -45,51 +45,85 @@ xlog_commit_record(
struct xlog_in_core **iclog,
xfs_lsn_t *commitlsnp);
-STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
- xfs_buftarg_t *log_target,
- xfs_daddr_t blk_offset,
- int num_bblks);
+STATIC struct xlog *
+xlog_alloc_log(
+ struct xfs_mount *mp,
+ struct xfs_buftarg *log_target,
+ xfs_daddr_t blk_offset,
+ int num_bblks);
STATIC int
xlog_space_left(
struct xlog *log,
atomic64_t *head);
-STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
-STATIC void xlog_dealloc_log(xlog_t *log);
+STATIC int
+xlog_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog);
+STATIC void
+xlog_dealloc_log(
+ struct xlog *log);
/* local state machine functions */
STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
-STATIC void xlog_state_do_callback(xlog_t *log,int aborted, xlog_in_core_t *iclog);
-STATIC int xlog_state_get_iclog_space(xlog_t *log,
- int len,
- xlog_in_core_t **iclog,
- xlog_ticket_t *ticket,
- int *continued_write,
- int *logoffsetp);
-STATIC int xlog_state_release_iclog(xlog_t *log,
- xlog_in_core_t *iclog);
-STATIC void xlog_state_switch_iclogs(xlog_t *log,
- xlog_in_core_t *iclog,
- int eventual_size);
-STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
+STATIC void
+xlog_state_do_callback(
+ struct xlog *log,
+ int aborted,
+ struct xlog_in_core *iclog);
+STATIC int
+xlog_state_get_iclog_space(
+ struct xlog *log,
+ int len,
+ struct xlog_in_core **iclog,
+ struct xlog_ticket *ticket,
+ int *continued_write,
+ int *logoffsetp);
+STATIC int
+xlog_state_release_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog);
+STATIC void
+xlog_state_switch_iclogs(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int eventual_size);
+STATIC void
+xlog_state_want_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog);
STATIC void
xlog_grant_push_ail(
- struct xlog *log,
- int need_bytes);
-STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
- xlog_ticket_t *ticket);
-STATIC void xlog_ungrant_log_space(xlog_t *log,
- xlog_ticket_t *ticket);
+ struct xlog *log,
+ int need_bytes);
+STATIC void
+xlog_regrant_reserve_log_space(
+ struct xlog *log,
+ struct xlog_ticket *ticket);
+STATIC void
+xlog_ungrant_log_space(
+ struct xlog *log,
+ struct xlog_ticket *ticket);
#if defined(DEBUG)
-STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
+STATIC void
+xlog_verify_dest_ptr(
+ struct xlog *log,
+ char *ptr);
STATIC void
xlog_verify_grant_tail(
- struct xlog *log);
-STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
- int count, boolean_t syncing);
-STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
- xfs_lsn_t tail_lsn);
+ struct xlog *log);
+STATIC void
+xlog_verify_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int count,
+ boolean_t syncing);
+STATIC void
+xlog_verify_tail_lsn(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ xfs_lsn_t tail_lsn);
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_tail(a)
@@ -97,7 +131,9 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
#define xlog_verify_tail_lsn(a,b,c)
#endif
-STATIC int xlog_iclogs_empty(xlog_t *log);
+STATIC int
+xlog_iclogs_empty(
+ struct xlog *log);
static void
xlog_grant_sub_space(
@@ -684,7 +720,7 @@ xfs_log_mount_finish(xfs_mount_t *mp)
int
xfs_log_unmount_write(xfs_mount_t *mp)
{
- xlog_t *log = mp->m_log;
+ struct xlog *log = mp->m_log;
xlog_in_core_t *iclog;
#ifdef DEBUG
xlog_in_core_t *first_iclog;
@@ -893,7 +929,7 @@ int
xfs_log_need_covered(xfs_mount_t *mp)
{
int needed = 0;
- xlog_t *log = mp->m_log;
+ struct xlog *log = mp->m_log;
if (!xfs_fs_writable(mp))
return 0;
@@ -1024,9 +1060,9 @@ xlog_space_left(
void
xlog_iodone(xfs_buf_t *bp)
{
- xlog_in_core_t *iclog = bp->b_fspriv;
- xlog_t *l = iclog->ic_log;
- int aborted = 0;
+ struct xlog_in_core *iclog = bp->b_fspriv;
+ struct xlog *l = iclog->ic_log;
+ int aborted = 0;
/*
* Race to shutdown the filesystem if we see an error.
@@ -1067,8 +1103,9 @@ xlog_iodone(xfs_buf_t *bp)
*/
STATIC void
-xlog_get_iclog_buffer_size(xfs_mount_t *mp,
- xlog_t *log)
+xlog_get_iclog_buffer_size(
+ struct xfs_mount *mp,
+ struct xlog *log)
{
int size;
int xhdrs;
@@ -1129,13 +1166,14 @@ done:
* Its primary purpose is to fill in enough, so recovery can occur. However,
* some other stuff may be filled in too.
*/
-STATIC xlog_t *
-xlog_alloc_log(xfs_mount_t *mp,
- xfs_buftarg_t *log_target,
- xfs_daddr_t blk_offset,
- int num_bblks)
+STATIC struct xlog *
+xlog_alloc_log(
+ struct xfs_mount *mp,
+ struct xfs_buftarg *log_target,
+ xfs_daddr_t blk_offset,
+ int num_bblks)
{
- xlog_t *log;
+ struct xlog *log;
xlog_rec_header_t *head;
xlog_in_core_t **iclogp;
xlog_in_core_t *iclog, *prev_iclog=NULL;
@@ -1144,7 +1182,7 @@ xlog_alloc_log(xfs_mount_t *mp,
int error = ENOMEM;
uint log2_size = 0;
- log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
+ log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
if (!log) {
xfs_warn(mp, "Log allocation failed: No memory!");
goto out;
@@ -1434,8 +1472,9 @@ xlog_bdstrat(
*/
STATIC int
-xlog_sync(xlog_t *log,
- xlog_in_core_t *iclog)
+xlog_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
{
xfs_caddr_t dptr; /* pointer to byte sized element */
xfs_buf_t *bp;
@@ -1584,7 +1623,8 @@ xlog_sync(xlog_t *log,
* Deallocate a log structure
*/
STATIC void
-xlog_dealloc_log(xlog_t *log)
+xlog_dealloc_log(
+ struct xlog *log)
{
xlog_in_core_t *iclog, *next_iclog;
int i;
@@ -1616,10 +1656,11 @@ xlog_dealloc_log(xlog_t *log)
*/
/* ARGSUSED */
static inline void
-xlog_state_finish_copy(xlog_t *log,
- xlog_in_core_t *iclog,
- int record_cnt,
- int copy_bytes)
+xlog_state_finish_copy(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int record_cnt,
+ int copy_bytes)
{
spin_lock(&log->l_icloglock);
@@ -2142,7 +2183,8 @@ xlog_write(
* State Change: DIRTY -> ACTIVE
*/
STATIC void
-xlog_state_clean_log(xlog_t *log)
+xlog_state_clean_log(
+ struct xlog *log)
{
xlog_in_core_t *iclog;
int changed = 0;
@@ -2222,7 +2264,7 @@ xlog_state_clean_log(xlog_t *log)
STATIC xfs_lsn_t
xlog_get_lowest_lsn(
- xlog_t *log)
+ struct xlog *log)
{
xlog_in_core_t *lsn_log;
xfs_lsn_t lowest_lsn, lsn;
@@ -2245,9 +2287,9 @@ xlog_get_lowest_lsn(
STATIC void
xlog_state_do_callback(
- xlog_t *log,
- int aborted,
- xlog_in_core_t *ciclog)
+ struct xlog *log,
+ int aborted,
+ struct xlog_in_core *ciclog)
{
xlog_in_core_t *iclog;
xlog_in_core_t *first_iclog; /* used to know when we've
@@ -2467,7 +2509,7 @@ xlog_state_done_syncing(
xlog_in_core_t *iclog,
int aborted)
{
- xlog_t *log = iclog->ic_log;
+ struct xlog *log = iclog->ic_log;
spin_lock(&log->l_icloglock);
@@ -2521,12 +2563,13 @@ xlog_state_done_syncing(
* is copied.
*/
STATIC int
-xlog_state_get_iclog_space(xlog_t *log,
- int len,
- xlog_in_core_t **iclogp,
- xlog_ticket_t *ticket,
- int *continued_write,
- int *logoffsetp)
+xlog_state_get_iclog_space(
+ struct xlog *log,
+ int len,
+ struct xlog_in_core **iclogp,
+ struct xlog_ticket *ticket,
+ int *continued_write,
+ int *logoffsetp)
{
int log_offset;
xlog_rec_header_t *head;
@@ -2631,8 +2674,9 @@ restart:
* move grant reservation head forward.
*/
STATIC void
-xlog_regrant_reserve_log_space(xlog_t *log,
- xlog_ticket_t *ticket)
+xlog_regrant_reserve_log_space(
+ struct xlog *log,
+ struct xlog_ticket *ticket)
{
trace_xfs_log_regrant_reserve_enter(log, ticket);
@@ -2677,8 +2721,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
* in the current reservation field.
*/
STATIC void
-xlog_ungrant_log_space(xlog_t *log,
- xlog_ticket_t *ticket)
+xlog_ungrant_log_space(
+ struct xlog *log,
+ struct xlog_ticket *ticket)
{
int bytes;
@@ -2717,8 +2762,8 @@ xlog_ungrant_log_space(xlog_t *log,
*/
STATIC int
xlog_state_release_iclog(
- xlog_t *log,
- xlog_in_core_t *iclog)
+ struct xlog *log,
+ struct xlog_in_core *iclog)
{
int sync = 0; /* do we sync? */
@@ -2768,9 +2813,10 @@ xlog_state_release_iclog(
* that every data block. We have run out of space in this log record.
*/
STATIC void
-xlog_state_switch_iclogs(xlog_t *log,
- xlog_in_core_t *iclog,
- int eventual_size)
+xlog_state_switch_iclogs(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int eventual_size)
{
ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
if (!eventual_size)
@@ -3114,7 +3160,9 @@ xfs_log_force_lsn(
* disk.
*/
STATIC void
-xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
+xlog_state_want_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
{
assert_spin_locked(&log->l_icloglock);
@@ -3158,7 +3206,7 @@ xfs_log_ticket_get(
/*
* Allocate and initialise a new log ticket.
*/
-xlog_ticket_t *
+struct xlog_ticket *
xlog_ticket_alloc(
struct xlog *log,
int unit_bytes,
@@ -3346,9 +3394,10 @@ xlog_verify_grant_tail(
/* check if it will fit */
STATIC void
-xlog_verify_tail_lsn(xlog_t *log,
- xlog_in_core_t *iclog,
- xfs_lsn_t tail_lsn)
+xlog_verify_tail_lsn(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ xfs_lsn_t tail_lsn)
{
int blocks;
@@ -3385,10 +3434,11 @@ xlog_verify_tail_lsn(xlog_t *log,
* the cycle numbers agree with the current cycle number.
*/
STATIC void
-xlog_verify_iclog(xlog_t *log,
- xlog_in_core_t *iclog,
- int count,
- boolean_t syncing)
+xlog_verify_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int count,
+ boolean_t syncing)
{
xlog_op_header_t *ophead;
xlog_in_core_t *icptr;
@@ -3482,7 +3532,7 @@ xlog_verify_iclog(xlog_t *log,
*/
STATIC int
xlog_state_ioerror(
- xlog_t *log)
+ struct xlog *log)
{
xlog_in_core_t *iclog, *ic;
@@ -3527,7 +3577,7 @@ xfs_log_force_umount(
struct xfs_mount *mp,
int logerror)
{
- xlog_t *log;
+ struct xlog *log;
int retval;
log = mp->m_log;
@@ -3634,7 +3684,8 @@ xfs_log_force_umount(
}
STATIC int
-xlog_iclogs_empty(xlog_t *log)
+xlog_iclogs_empty(
+ struct xlog *log)
{
xlog_in_core_t *iclog;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 72eba2201b14..18a801d76a42 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -487,7 +487,7 @@ struct xlog_grant_head {
* overflow 31 bits worth of byte offset, so using a byte number will mean
* that round off problems won't occur when releasing partial reservations.
*/
-typedef struct xlog {
+struct xlog {
/* The following fields don't need locking */
struct xfs_mount *l_mp; /* mount point */
struct xfs_ail *l_ailp; /* AIL log is working with */
@@ -540,7 +540,7 @@ typedef struct xlog {
char *l_iclog_bak[XLOG_MAX_ICLOGS];
#endif
-} xlog_t;
+};
#define XLOG_BUF_CANCEL_BUCKET(log, blkno) \
((log)->l_buf_cancel_table + ((__uint64_t)blkno % XLOG_BC_TABLE_SIZE))
@@ -548,9 +548,17 @@ typedef struct xlog {
#define XLOG_FORCED_SHUTDOWN(log) ((log)->l_flags & XLOG_IO_ERROR)
/* common routines */
-extern int xlog_recover(xlog_t *log);
-extern int xlog_recover_finish(xlog_t *log);
-extern void xlog_pack_data(xlog_t *log, xlog_in_core_t *iclog, int);
+extern int
+xlog_recover(
+ struct xlog *log);
+extern int
+xlog_recover_finish(
+ struct xlog *log);
+extern void
+xlog_pack_data(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int);
extern kmem_zone_t *xfs_log_ticket_zone;
struct xlog_ticket *
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index a7be98abd6a9..5da3ace352bf 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -43,10 +43,18 @@
#include "xfs_utils.h"
#include "xfs_trace.h"
-STATIC int xlog_find_zeroed(xlog_t *, xfs_daddr_t *);
-STATIC int xlog_clear_stale_blocks(xlog_t *, xfs_lsn_t);
+STATIC int
+xlog_find_zeroed(
+ struct xlog *,
+ xfs_daddr_t *);
+STATIC int
+xlog_clear_stale_blocks(
+ struct xlog *,
+ xfs_lsn_t);
#if defined(DEBUG)
-STATIC void xlog_recover_check_summary(xlog_t *);
+STATIC void
+xlog_recover_check_summary(
+ struct xlog *);
#else
#define xlog_recover_check_summary(log)
#endif
@@ -74,7 +82,7 @@ struct xfs_buf_cancel {
static inline int
xlog_buf_bbcount_valid(
- xlog_t *log,
+ struct xlog *log,
int bbcount)
{
return bbcount > 0 && bbcount <= log->l_logBBsize;
@@ -87,7 +95,7 @@ xlog_buf_bbcount_valid(
*/
STATIC xfs_buf_t *
xlog_get_bp(
- xlog_t *log,
+ struct xlog *log,
int nbblks)
{
struct xfs_buf *bp;
@@ -138,10 +146,10 @@ xlog_put_bp(
*/
STATIC xfs_caddr_t
xlog_align(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
- xfs_buf_t *bp)
+ struct xfs_buf *bp)
{
xfs_daddr_t offset = blk_no & ((xfs_daddr_t)log->l_sectBBsize - 1);
@@ -155,10 +163,10 @@ xlog_align(
*/
STATIC int
xlog_bread_noalign(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
- xfs_buf_t *bp)
+ struct xfs_buf *bp)
{
int error;
@@ -189,10 +197,10 @@ xlog_bread_noalign(
STATIC int
xlog_bread(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
- xfs_buf_t *bp,
+ struct xfs_buf *bp,
xfs_caddr_t *offset)
{
int error;
@@ -211,10 +219,10 @@ xlog_bread(
*/
STATIC int
xlog_bread_offset(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t blk_no, /* block to read from */
int nbblks, /* blocks to read */
- xfs_buf_t *bp,
+ struct xfs_buf *bp,
xfs_caddr_t offset)
{
xfs_caddr_t orig_offset = bp->b_addr;
@@ -241,10 +249,10 @@ xlog_bread_offset(
*/
STATIC int
xlog_bwrite(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t blk_no,
int nbblks,
- xfs_buf_t *bp)
+ struct xfs_buf *bp)
{
int error;
@@ -378,8 +386,8 @@ xlog_recover_iodone(
*/
STATIC int
xlog_find_cycle_start(
- xlog_t *log,
- xfs_buf_t *bp,
+ struct xlog *log,
+ struct xfs_buf *bp,
xfs_daddr_t first_blk,
xfs_daddr_t *last_blk,
uint cycle)
@@ -421,7 +429,7 @@ xlog_find_cycle_start(
*/
STATIC int
xlog_find_verify_cycle(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t start_blk,
int nbblks,
uint stop_on_cycle_no,
@@ -490,7 +498,7 @@ out:
*/
STATIC int
xlog_find_verify_log_record(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t start_blk,
xfs_daddr_t *last_blk,
int extra_bblks)
@@ -600,7 +608,7 @@ out:
*/
STATIC int
xlog_find_head(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t *return_head_blk)
{
xfs_buf_t *bp;
@@ -871,7 +879,7 @@ validate_head:
*/
STATIC int
xlog_find_tail(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t *head_blk,
xfs_daddr_t *tail_blk)
{
@@ -1080,7 +1088,7 @@ done:
*/
STATIC int
xlog_find_zeroed(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t *blk_no)
{
xfs_buf_t *bp;
@@ -1183,7 +1191,7 @@ bp_err:
*/
STATIC void
xlog_add_record(
- xlog_t *log,
+ struct xlog *log,
xfs_caddr_t buf,
int cycle,
int block,
@@ -1205,7 +1213,7 @@ xlog_add_record(
STATIC int
xlog_write_log_records(
- xlog_t *log,
+ struct xlog *log,
int cycle,
int start_block,
int blocks,
@@ -1305,7 +1313,7 @@ xlog_write_log_records(
*/
STATIC int
xlog_clear_stale_blocks(
- xlog_t *log,
+ struct xlog *log,
xfs_lsn_t tail_lsn)
{
int tail_cycle, head_cycle;
@@ -2050,11 +2058,11 @@ xfs_qm_dqcheck(
*/
STATIC void
xlog_recover_do_dquot_buffer(
- xfs_mount_t *mp,
- xlog_t *log,
- xlog_recover_item_t *item,
- xfs_buf_t *bp,
- xfs_buf_log_format_t *buf_f)
+ struct xfs_mount *mp,
+ struct xlog *log,
+ struct xlog_recover_item *item,
+ struct xfs_buf *bp,
+ struct xfs_buf_log_format *buf_f)
{
uint type;
@@ -2108,9 +2116,9 @@ xlog_recover_do_dquot_buffer(
*/
STATIC int
xlog_recover_buffer_pass2(
- xlog_t *log,
- struct list_head *buffer_list,
- xlog_recover_item_t *item)
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item)
{
xfs_buf_log_format_t *buf_f = item->ri_buf[0].i_addr;
xfs_mount_t *mp = log->l_mp;
@@ -2189,9 +2197,9 @@ xlog_recover_buffer_pass2(
STATIC int
xlog_recover_inode_pass2(
- xlog_t *log,
- struct list_head *buffer_list,
- xlog_recover_item_t *item)
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item)
{
xfs_inode_log_format_t *in_f;
xfs_mount_t *mp = log->l_mp;
@@ -2452,14 +2460,14 @@ error:
}
/*
- * Recover QUOTAOFF records. We simply make a note of it in the xlog_t
+ * Recover QUOTAOFF records. We simply make a note of it in the xlog
* structure, so that we know not to do any dquot item or dquot buffer recovery,
* of that type.
*/
STATIC int
xlog_recover_quotaoff_pass1(
- xlog_t *log,
- xlog_recover_item_t *item)
+ struct xlog *log,
+ struct xlog_recover_item *item)
{
xfs_qoff_logformat_t *qoff_f = item->ri_buf[0].i_addr;
ASSERT(qoff_f);
@@ -2483,9 +2491,9 @@ xlog_recover_quotaoff_pass1(
*/
STATIC int
xlog_recover_dquot_pass2(
- xlog_t *log,
- struct list_head *buffer_list,
- xlog_recover_item_t *item)
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item)
{
xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
@@ -2578,9 +2586,9 @@ xlog_recover_dquot_pass2(
*/
STATIC int
xlog_recover_efi_pass2(
- xlog_t *log,
- xlog_recover_item_t *item,
- xfs_lsn_t lsn)
+ struct xlog *log,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
{
int error;
xfs_mount_t *mp = log->l_mp;
@@ -2616,8 +2624,8 @@ xlog_recover_efi_pass2(
*/
STATIC int
xlog_recover_efd_pass2(
- xlog_t *log,
- xlog_recover_item_t *item)
+ struct xlog *log,
+ struct xlog_recover_item *item)
{
xfs_efd_log_format_t *efd_formatp;
xfs_efi_log_item_t *efip = NULL;
@@ -2812,9 +2820,9 @@ xlog_recover_unmount_trans(
*/
STATIC int
xlog_recover_process_data(
- xlog_t *log,
+ struct xlog *log,
struct hlist_head rhash[],
- xlog_rec_header_t *rhead,
+ struct xlog_rec_header *rhead,
xfs_caddr_t dp,
int pass)
{
@@ -2986,7 +2994,7 @@ abort_error:
*/
STATIC int
xlog_recover_process_efis(
- xlog_t *log)
+ struct xlog *log)
{
xfs_log_item_t *lip;
xfs_efi_log_item_t *efip;
@@ -3098,7 +3106,7 @@ xlog_recover_process_one_iunlink(
/*
* Get the on disk inode to find the next inode in the bucket.
*/
- error = xfs_itobp(mp, NULL, ip, &dip, &ibp, 0);
+ error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
if (error)
goto fail_iput;
@@ -3147,7 +3155,7 @@ xlog_recover_process_one_iunlink(
*/
STATIC void
xlog_recover_process_iunlinks(
- xlog_t *log)
+ struct xlog *log)
{
xfs_mount_t *mp;
xfs_agnumber_t agno;
@@ -3209,9 +3217,9 @@ xlog_recover_process_iunlinks(
#ifdef DEBUG
STATIC void
xlog_pack_data_checksum(
- xlog_t *log,
- xlog_in_core_t *iclog,
- int size)
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int size)
{
int i;
__be32 *up;
@@ -3234,8 +3242,8 @@ xlog_pack_data_checksum(
*/
void
xlog_pack_data(
- xlog_t *log,
- xlog_in_core_t *iclog,
+ struct xlog *log,
+ struct xlog_in_core *iclog,
int roundoff)
{
int i, j, k;
@@ -3274,9 +3282,9 @@ xlog_pack_data(
STATIC void
xlog_unpack_data(
- xlog_rec_header_t *rhead,
+ struct xlog_rec_header *rhead,
xfs_caddr_t dp,
- xlog_t *log)
+ struct xlog *log)
{
int i, j, k;
@@ -3299,8 +3307,8 @@ xlog_unpack_data(
STATIC int
xlog_valid_rec_header(
- xlog_t *log,
- xlog_rec_header_t *rhead,
+ struct xlog *log,
+ struct xlog_rec_header *rhead,
xfs_daddr_t blkno)
{
int hlen;
@@ -3343,7 +3351,7 @@ xlog_valid_rec_header(
*/
STATIC int
xlog_do_recovery_pass(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
int pass)
@@ -3595,7 +3603,7 @@ xlog_do_recovery_pass(
*/
STATIC int
xlog_do_log_recovery(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk)
{
@@ -3646,7 +3654,7 @@ xlog_do_log_recovery(
*/
STATIC int
xlog_do_recover(
- xlog_t *log,
+ struct xlog *log,
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk)
{
@@ -3721,7 +3729,7 @@ xlog_do_recover(
*/
int
xlog_recover(
- xlog_t *log)
+ struct xlog *log)
{
xfs_daddr_t head_blk, tail_blk;
int error;
@@ -3767,7 +3775,7 @@ xlog_recover(
*/
int
xlog_recover_finish(
- xlog_t *log)
+ struct xlog *log)
{
/*
* Now we're ready to do the transactions needed for the
@@ -3814,7 +3822,7 @@ xlog_recover_finish(
*/
void
xlog_recover_check_summary(
- xlog_t *log)
+ struct xlog *log)
{
xfs_mount_t *mp;
xfs_agf_t *agfp;
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 536021fb3d4e..29c2f83d4147 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1200,8 +1200,6 @@ xfs_mountfs(
xfs_set_maxicount(mp);
- mp->m_maxioffset = xfs_max_file_offset(sbp->sb_blocklog);
-
error = xfs_uuid_mount(mp);
if (error)
goto out;
@@ -1531,6 +1529,15 @@ xfs_unmountfs(
xfs_ail_push_all_sync(mp->m_ail);
xfs_wait_buftarg(mp->m_ddev_targp);
+ /*
+ * The superblock buffer is uncached and xfsaild_push() will lock and
+ * set the XBF_ASYNC flag on the buffer. We cannot do xfs_buf_iowait()
+ * here but a lock on the superblock buffer will block until iodone()
+ * has completed.
+ */
+ xfs_buf_lock(mp->m_sb_bp);
+ xfs_buf_unlock(mp->m_sb_bp);
+
xfs_log_unmount_write(mp);
xfs_log_unmount(mp);
xfs_uuid_unmount(mp);
@@ -1544,7 +1551,7 @@ xfs_unmountfs(
int
xfs_fs_writable(xfs_mount_t *mp)
{
- return !(xfs_test_for_freeze(mp) || XFS_FORCED_SHUTDOWN(mp) ||
+ return !(mp->m_super->s_writers.frozen || XFS_FORCED_SHUTDOWN(mp) ||
(mp->m_flags & XFS_MOUNT_RDONLY));
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 90c1fc9eaea4..05a05a7b6119 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -176,7 +176,6 @@ typedef struct xfs_mount {
uint m_qflags; /* quota status flags */
xfs_trans_reservations_t m_reservations;/* precomputed res values */
__uint64_t m_maxicount; /* maximum inode count */
- __uint64_t m_maxioffset; /* maximum inode offset */
__uint64_t m_resblks; /* total reserved blocks */
__uint64_t m_resblks_avail;/* available reserved blocks */
__uint64_t m_resblks_save; /* reserved blks @ remount,ro */
@@ -297,8 +296,6 @@ xfs_preferred_iosize(xfs_mount_t *mp)
PAGE_CACHE_SIZE));
}
-#define XFS_MAXIOFFSET(mp) ((mp)->m_maxioffset)
-
#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \
((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
@@ -314,9 +311,6 @@ void xfs_do_force_shutdown(struct xfs_mount *mp, int flags, char *fname,
#define SHUTDOWN_REMOTE_REQ 0x0010 /* shutdown came from remote cell */
#define SHUTDOWN_DEVICE_REQ 0x0020 /* failed all paths to the device */
-#define xfs_test_for_freeze(mp) ((mp)->m_super->s_frozen)
-#define xfs_wait_for_freeze(mp,l) vfs_check_frozen((mp)->m_super, (l))
-
/*
* Flags for xfs_mountfs
*/
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 249db1987764..2e86fa0cfc0d 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -940,7 +940,7 @@ xfs_qm_dqiterate(
map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
lblkno = 0;
- maxlblkcnt = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+ maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
do {
nmaps = XFS_DQITER_MAP_SIZE;
/*
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 0d9de41a7151..bdaf4cb9f4a2 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -868,67 +868,14 @@ xfs_fs_inode_init_once(
"xfsino", ip->i_ino);
}
-/*
- * This is called by the VFS when dirtying inode metadata. This can happen
- * for a few reasons, but we only care about timestamp updates, given that
- * we handled the rest ourselves. In theory no other calls should happen,
- * but for example generic_write_end() keeps dirtying the inode after
- * updating i_size. Thus we check that the flags are exactly I_DIRTY_SYNC,
- * and skip this call otherwise.
- *
- * We'll hopefull get a different method just for updating timestamps soon,
- * at which point this hack can go away, and maybe we'll also get real
- * error handling here.
- */
-STATIC void
-xfs_fs_dirty_inode(
- struct inode *inode,
- int flags)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_trans *tp;
- int error;
-
- if (flags != I_DIRTY_SYNC)
- return;
-
- trace_xfs_dirty_inode(ip);
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
- error = xfs_trans_reserve(tp, 0, XFS_FSYNC_TS_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- goto trouble;
- }
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- /*
- * Grab all the latest timestamps from the Linux inode.
- */
- ip->i_d.di_atime.t_sec = (__int32_t)inode->i_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = (__int32_t)inode->i_atime.tv_nsec;
- ip->i_d.di_ctime.t_sec = (__int32_t)inode->i_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = (__int32_t)inode->i_ctime.tv_nsec;
- ip->i_d.di_mtime.t_sec = (__int32_t)inode->i_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = (__int32_t)inode->i_mtime.tv_nsec;
-
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
- error = xfs_trans_commit(tp, 0);
- if (error)
- goto trouble;
- return;
-
-trouble:
- xfs_warn(mp, "failed to update timestamps for inode 0x%llx", ip->i_ino);
-}
-
STATIC void
xfs_fs_evict_inode(
struct inode *inode)
{
xfs_inode_t *ip = XFS_I(inode);
+ ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+
trace_xfs_evict_inode(ip);
truncate_inode_pages(&inode->i_data, 0);
@@ -937,22 +884,6 @@ xfs_fs_evict_inode(
XFS_STATS_INC(vn_remove);
XFS_STATS_DEC(vn_active);
- /*
- * The iolock is used by the file system to coordinate reads,
- * writes, and block truncates. Up to this point the lock
- * protected concurrent accesses by users of the inode. But
- * from here forward we're doing some final processing of the
- * inode because we're done with it, and although we reuse the
- * iolock for protection it is really a distinct lock class
- * (in the lockdep sense) from before. To keep lockdep happy
- * (and basically indicate what we are doing), we explicitly
- * re-init the iolock here.
- */
- ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
- mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
- lockdep_set_class_and_name(&ip->i_iolock.mr_lock,
- &xfs_iolock_reclaimable, "xfs_iolock_reclaimable");
-
xfs_inactive(ip);
}
@@ -1436,7 +1367,6 @@ xfs_fs_free_cached_objects(
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
- .dirty_inode = xfs_fs_dirty_inode,
.evict_inode = xfs_fs_evict_inode,
.drop_inode = xfs_fs_drop_inode,
.put_super = xfs_fs_put_super,
@@ -1491,13 +1421,9 @@ xfs_init_zones(void)
if (!xfs_da_state_zone)
goto out_destroy_btree_cur_zone;
- xfs_dabuf_zone = kmem_zone_init(sizeof(xfs_dabuf_t), "xfs_dabuf");
- if (!xfs_dabuf_zone)
- goto out_destroy_da_state_zone;
-
xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
if (!xfs_ifork_zone)
- goto out_destroy_dabuf_zone;
+ goto out_destroy_da_state_zone;
xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
if (!xfs_trans_zone)
@@ -1514,9 +1440,8 @@ xfs_init_zones(void)
* size possible under XFS. This wastes a little bit of memory,
* but it is much faster.
*/
- xfs_buf_item_zone = kmem_zone_init((sizeof(xfs_buf_log_item_t) +
- (((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
- NBWORD) * sizeof(int))), "xfs_buf_item");
+ xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
+ "xfs_buf_item");
if (!xfs_buf_item_zone)
goto out_destroy_log_item_desc_zone;
@@ -1561,8 +1486,6 @@ xfs_init_zones(void)
kmem_zone_destroy(xfs_trans_zone);
out_destroy_ifork_zone:
kmem_zone_destroy(xfs_ifork_zone);
- out_destroy_dabuf_zone:
- kmem_zone_destroy(xfs_dabuf_zone);
out_destroy_da_state_zone:
kmem_zone_destroy(xfs_da_state_zone);
out_destroy_btree_cur_zone:
@@ -1590,7 +1513,6 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_log_item_desc_zone);
kmem_zone_destroy(xfs_trans_zone);
kmem_zone_destroy(xfs_ifork_zone);
- kmem_zone_destroy(xfs_dabuf_zone);
kmem_zone_destroy(xfs_da_state_zone);
kmem_zone_destroy(xfs_btree_cur_zone);
kmem_zone_destroy(xfs_bmap_free_item_zone);
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 1e9ee064dbb2..96548176db80 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -359,6 +359,15 @@ xfs_quiesce_attr(
* added an item to the AIL, thus flush it again.
*/
xfs_ail_push_all_sync(mp->m_ail);
+
+ /*
+ * The superblock buffer is uncached and xfsaild_push() will lock and
+ * set the XBF_ASYNC flag on the buffer. We cannot do xfs_buf_iowait()
+ * here but a lock on the superblock buffer will block until iodone()
+ * has completed.
+ */
+ xfs_buf_lock(mp->m_sb_bp);
+ xfs_buf_unlock(mp->m_sb_bp);
}
static void
@@ -394,7 +403,7 @@ xfs_sync_worker(
if (!(mp->m_super->s_flags & MS_ACTIVE) &&
!(mp->m_flags & XFS_MOUNT_RDONLY)) {
/* dgc: errors ignored here */
- if (mp->m_super->s_frozen == SB_UNFROZEN &&
+ if (mp->m_super->s_writers.frozen == SB_UNFROZEN &&
xfs_log_need_covered(mp))
error = xfs_fs_log_dummy(mp);
else
@@ -712,8 +721,8 @@ restart:
* Note that xfs_iflush will never block on the inode buffer lock, as
* xfs_ifree_cluster() can lock the inode buffer before it locks the
* ip->i_lock, and we are doing the exact opposite here. As a result,
- * doing a blocking xfs_itobp() to get the cluster buffer would result
- * in an ABBA deadlock with xfs_ifree_cluster().
+ * doing a blocking xfs_imap_to_bp() to get the cluster buffer would
+ * result in an ABBA deadlock with xfs_ifree_cluster().
*
* As xfs_ifree_cluser() must gather all inodes that are active in the
* cache to mark them stale, if we hit this case we don't actually want
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index caf5dabfd553..e5795dd6013a 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -578,8 +578,8 @@ DEFINE_INODE_EVENT(xfs_ioctl_setattr);
DEFINE_INODE_EVENT(xfs_dir_fsync);
DEFINE_INODE_EVENT(xfs_file_fsync);
DEFINE_INODE_EVENT(xfs_destroy_inode);
-DEFINE_INODE_EVENT(xfs_dirty_inode);
DEFINE_INODE_EVENT(xfs_evict_inode);
+DEFINE_INODE_EVENT(xfs_update_time);
DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index fdf324508c5e..06ed520a767f 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -576,8 +576,12 @@ xfs_trans_alloc(
xfs_mount_t *mp,
uint type)
{
- xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
- return _xfs_trans_alloc(mp, type, KM_SLEEP);
+ xfs_trans_t *tp;
+
+ sb_start_intwrite(mp->m_super);
+ tp = _xfs_trans_alloc(mp, type, KM_SLEEP);
+ tp->t_flags |= XFS_TRANS_FREEZE_PROT;
+ return tp;
}
xfs_trans_t *
@@ -588,6 +592,7 @@ _xfs_trans_alloc(
{
xfs_trans_t *tp;
+ WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
atomic_inc(&mp->m_active_trans);
tp = kmem_zone_zalloc(xfs_trans_zone, memflags);
@@ -611,6 +616,8 @@ xfs_trans_free(
xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
atomic_dec(&tp->t_mountp->m_active_trans);
+ if (tp->t_flags & XFS_TRANS_FREEZE_PROT)
+ sb_end_intwrite(tp->t_mountp->m_super);
xfs_trans_free_dqinfo(tp);
kmem_zone_free(xfs_trans_zone, tp);
}
@@ -643,7 +650,11 @@ xfs_trans_dup(
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
ASSERT(tp->t_ticket != NULL);
- ntp->t_flags = XFS_TRANS_PERM_LOG_RES | (tp->t_flags & XFS_TRANS_RESERVE);
+ ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
+ (tp->t_flags & XFS_TRANS_RESERVE) |
+ (tp->t_flags & XFS_TRANS_FREEZE_PROT);
+ /* We gave our writer reference to the new transaction */
+ tp->t_flags &= ~XFS_TRANS_FREEZE_PROT;
ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
tp->t_blk_res = tp->t_blk_res_used;
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 7c37b533aa8e..db056544cbb5 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -179,6 +179,8 @@ struct xfs_log_item_desc {
#define XFS_TRANS_SYNC 0x08 /* make commit synchronous */
#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */
#define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */
+#define XFS_TRANS_FREEZE_PROT 0x40 /* Transaction has elevated writer
+ count in superblock */
/*
* Values for call flags parameter.
@@ -448,11 +450,51 @@ xfs_trans_t *xfs_trans_dup(xfs_trans_t *);
int xfs_trans_reserve(xfs_trans_t *, uint, uint, uint,
uint, uint);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
-struct xfs_buf *xfs_trans_get_buf(xfs_trans_t *, struct xfs_buftarg *, xfs_daddr_t,
- int, uint);
-int xfs_trans_read_buf(struct xfs_mount *, xfs_trans_t *,
- struct xfs_buftarg *, xfs_daddr_t, int, uint,
- struct xfs_buf **);
+
+struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map, int nmaps,
+ uint flags);
+
+static inline struct xfs_buf *
+xfs_trans_get_buf(
+ struct xfs_trans *tp,
+ struct xfs_buftarg *target,
+ xfs_daddr_t blkno,
+ int numblks,
+ uint flags)
+{
+ struct xfs_buf_map map = {
+ .bm_bn = blkno,
+ .bm_len = numblks,
+ };
+ return xfs_trans_get_buf_map(tp, target, &map, 1, flags);
+}
+
+int xfs_trans_read_buf_map(struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map, int nmaps,
+ xfs_buf_flags_t flags,
+ struct xfs_buf **bpp);
+
+static inline int
+xfs_trans_read_buf(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buftarg *target,
+ xfs_daddr_t blkno,
+ int numblks,
+ xfs_buf_flags_t flags,
+ struct xfs_buf **bpp)
+{
+ struct xfs_buf_map map = {
+ .bm_bn = blkno,
+ .bm_len = numblks,
+ };
+ return xfs_trans_read_buf_map(mp, tp, target, &map, 1, flags, bpp);
+}
+
struct xfs_buf *xfs_trans_getsb(xfs_trans_t *, struct xfs_mount *, int);
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 9c514483e599..6011ee661339 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -383,6 +383,12 @@ xfsaild_push(
}
spin_lock(&ailp->xa_lock);
+
+ /* barrier matches the xa_target update in xfs_ail_push() */
+ smp_rmb();
+ target = ailp->xa_target;
+ ailp->xa_target_prev = target;
+
lip = xfs_trans_ail_cursor_first(ailp, &cur, ailp->xa_last_pushed_lsn);
if (!lip) {
/*
@@ -397,7 +403,6 @@ xfsaild_push(
XFS_STATS_INC(xs_push_ail);
lsn = lip->li_lsn;
- target = ailp->xa_target;
while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
int lock_result;
@@ -527,8 +532,32 @@ xfsaild(
__set_current_state(TASK_KILLABLE);
else
__set_current_state(TASK_INTERRUPTIBLE);
- schedule_timeout(tout ?
- msecs_to_jiffies(tout) : MAX_SCHEDULE_TIMEOUT);
+
+ spin_lock(&ailp->xa_lock);
+
+ /*
+ * Idle if the AIL is empty and we are not racing with a target
+ * update. We check the AIL after we set the task to a sleep
+ * state to guarantee that we either catch an xa_target update
+ * or that a wake_up resets the state to TASK_RUNNING.
+ * Otherwise, we run the risk of sleeping indefinitely.
+ *
+ * The barrier matches the xa_target update in xfs_ail_push().
+ */
+ smp_rmb();
+ if (!xfs_ail_min(ailp) &&
+ ailp->xa_target == ailp->xa_target_prev) {
+ spin_unlock(&ailp->xa_lock);
+ schedule();
+ tout = 0;
+ continue;
+ }
+ spin_unlock(&ailp->xa_lock);
+
+ if (tout)
+ schedule_timeout(msecs_to_jiffies(tout));
+
+ __set_current_state(TASK_RUNNING);
try_to_freeze();
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 21c5a5e3700d..6311b99c267f 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -41,20 +41,26 @@ STATIC struct xfs_buf *
xfs_trans_buf_item_match(
struct xfs_trans *tp,
struct xfs_buftarg *target,
- xfs_daddr_t blkno,
- int len)
+ struct xfs_buf_map *map,
+ int nmaps)
{
struct xfs_log_item_desc *lidp;
struct xfs_buf_log_item *blip;
+ int len = 0;
+ int i;
+
+ for (i = 0; i < nmaps; i++)
+ len += map[i].bm_len;
- len = BBTOB(len);
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
blip = (struct xfs_buf_log_item *)lidp->lid_item;
if (blip->bli_item.li_type == XFS_LI_BUF &&
blip->bli_buf->b_target == target &&
- XFS_BUF_ADDR(blip->bli_buf) == blkno &&
- BBTOB(blip->bli_buf->b_length) == len)
+ XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn &&
+ blip->bli_buf->b_length == len) {
+ ASSERT(blip->bli_buf->b_map_count == nmaps);
return blip->bli_buf;
+ }
}
return NULL;
@@ -128,21 +134,19 @@ xfs_trans_bjoin(
* If the transaction pointer is NULL, make this just a normal
* get_buf() call.
*/
-xfs_buf_t *
-xfs_trans_get_buf(xfs_trans_t *tp,
- xfs_buftarg_t *target_dev,
- xfs_daddr_t blkno,
- int len,
- uint flags)
+struct xfs_buf *
+xfs_trans_get_buf_map(
+ struct xfs_trans *tp,
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
+ xfs_buf_flags_t flags)
{
xfs_buf_t *bp;
xfs_buf_log_item_t *bip;
- /*
- * Default to a normal get_buf() call if the tp is NULL.
- */
- if (tp == NULL)
- return xfs_buf_get(target_dev, blkno, len, flags);
+ if (!tp)
+ return xfs_buf_get_map(target, map, nmaps, flags);
/*
* If we find the buffer in the cache with this transaction
@@ -150,7 +154,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
* have it locked. In this case we just increment the lock
* recursion count and return the buffer to the caller.
*/
- bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
+ bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
if (bp != NULL) {
ASSERT(xfs_buf_islocked(bp));
if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
@@ -167,7 +171,7 @@ xfs_trans_get_buf(xfs_trans_t *tp,
return (bp);
}
- bp = xfs_buf_get(target_dev, blkno, len, flags);
+ bp = xfs_buf_get_map(target, map, nmaps, flags);
if (bp == NULL) {
return NULL;
}
@@ -246,26 +250,22 @@ int xfs_error_mod = 33;
* read_buf() call.
*/
int
-xfs_trans_read_buf(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- int len,
- uint flags,
- xfs_buf_t **bpp)
+xfs_trans_read_buf_map(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
+ xfs_buf_flags_t flags,
+ struct xfs_buf **bpp)
{
xfs_buf_t *bp;
xfs_buf_log_item_t *bip;
int error;
*bpp = NULL;
-
- /*
- * Default to a normal get_buf() call if the tp is NULL.
- */
- if (tp == NULL) {
- bp = xfs_buf_read(target, blkno, len, flags);
+ if (!tp) {
+ bp = xfs_buf_read_map(target, map, nmaps, flags);
if (!bp)
return (flags & XBF_TRYLOCK) ?
EAGAIN : XFS_ERROR(ENOMEM);
@@ -303,7 +303,7 @@ xfs_trans_read_buf(
* If the buffer is not yet read in, then we read it in, increment
* the lock recursion count, and return it to the caller.
*/
- bp = xfs_trans_buf_item_match(tp, target, blkno, len);
+ bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
if (bp != NULL) {
ASSERT(xfs_buf_islocked(bp));
ASSERT(bp->b_transp == tp);
@@ -349,7 +349,7 @@ xfs_trans_read_buf(
return 0;
}
- bp = xfs_buf_read(target, blkno, len, flags);
+ bp = xfs_buf_read_map(target, map, nmaps, flags);
if (bp == NULL) {
*bpp = NULL;
return (flags & XBF_TRYLOCK) ?
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h
index fb62377d1cbc..53b7c9b0f8f7 100644
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -67,6 +67,7 @@ struct xfs_ail {
struct task_struct *xa_task;
struct list_head xa_ail;
xfs_lsn_t xa_target;
+ xfs_lsn_t xa_target_prev;
struct list_head xa_cursors;
spinlock_t xa_lock;
xfs_lsn_t xa_last_pushed_lsn;
diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h
index 398cf681d025..7a41874f4c20 100644
--- a/fs/xfs/xfs_types.h
+++ b/fs/xfs/xfs_types.h
@@ -133,6 +133,20 @@ typedef __uint64_t xfs_filblks_t; /* number of blocks in a file */
#define MAXAEXTNUM ((xfs_aextnum_t)0x7fff) /* signed short */
/*
+ * Minimum and maximum blocksize and sectorsize.
+ * The blocksize upper limit is pretty much arbitrary.
+ * The sectorsize upper limit is due to sizeof(sb_sectsize).
+ */
+#define XFS_MIN_BLOCKSIZE_LOG 9 /* i.e. 512 bytes */
+#define XFS_MAX_BLOCKSIZE_LOG 16 /* i.e. 65536 bytes */
+#define XFS_MIN_BLOCKSIZE (1 << XFS_MIN_BLOCKSIZE_LOG)
+#define XFS_MAX_BLOCKSIZE (1 << XFS_MAX_BLOCKSIZE_LOG)
+#define XFS_MIN_SECTORSIZE_LOG 9 /* i.e. 512 bytes */
+#define XFS_MAX_SECTORSIZE_LOG 15 /* i.e. 32768 bytes */
+#define XFS_MIN_SECTORSIZE (1 << XFS_MIN_SECTORSIZE_LOG)
+#define XFS_MAX_SECTORSIZE (1 << XFS_MAX_SECTORSIZE_LOG)
+
+/*
* Min numbers of data/attr fork btree root pointers.
*/
#define MINDBTPTRS 3
diff --git a/fs/xfs/xfs_utils.c b/fs/xfs/xfs_utils.c
index 4e5b9ad5cb97..0025c78ac03c 100644
--- a/fs/xfs/xfs_utils.c
+++ b/fs/xfs/xfs_utils.c
@@ -65,7 +65,6 @@ xfs_dir_ialloc(
xfs_trans_t *ntp;
xfs_inode_t *ip;
xfs_buf_t *ialloc_context = NULL;
- boolean_t call_again = B_FALSE;
int code;
uint log_res;
uint log_count;
@@ -91,7 +90,7 @@ xfs_dir_ialloc(
* the inode(s) that we've just allocated.
*/
code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc,
- &ialloc_context, &call_again, &ip);
+ &ialloc_context, &ip);
/*
* Return an error if we were unable to allocate a new inode.
@@ -102,19 +101,18 @@ xfs_dir_ialloc(
*ipp = NULL;
return code;
}
- if (!call_again && (ip == NULL)) {
+ if (!ialloc_context && !ip) {
*ipp = NULL;
return XFS_ERROR(ENOSPC);
}
/*
- * If call_again is set, then we were unable to get an
+ * If the AGI buffer is non-NULL, then we were unable to get an
* inode in one operation. We need to commit the current
* transaction and call xfs_ialloc() again. It is guaranteed
* to succeed the second time.
*/
- if (call_again) {
-
+ if (ialloc_context) {
/*
* Normally, xfs_trans_commit releases all the locks.
* We call bhold to hang on to the ialloc_context across
@@ -195,7 +193,7 @@ xfs_dir_ialloc(
* this call should always succeed.
*/
code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
- okalloc, &ialloc_context, &call_again, &ip);
+ okalloc, &ialloc_context, &ip);
/*
* If we get an error at this point, return to the caller
@@ -206,12 +204,11 @@ xfs_dir_ialloc(
*ipp = NULL;
return code;
}
- ASSERT ((!call_again) && (ip != NULL));
+ ASSERT(!ialloc_context && ip);
} else {
- if (committed != NULL) {
+ if (committed != NULL)
*committed = 0;
- }
}
*ipp = ip;
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index b6a82d817a82..2a5c637344b4 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -146,11 +146,6 @@ xfs_readlink(
}
/*
- * Flags for xfs_free_eofblocks
- */
-#define XFS_FREE_EOF_TRYLOCK (1<<0)
-
-/*
* This is called by xfs_inactive to free any blocks beyond eof
* when the link count isn't zero and by xfs_dm_punch_hole() when
* punching a hole to EOF.
@@ -159,7 +154,7 @@ STATIC int
xfs_free_eofblocks(
xfs_mount_t *mp,
xfs_inode_t *ip,
- int flags)
+ bool need_iolock)
{
xfs_trans_t *tp;
int error;
@@ -174,7 +169,7 @@ xfs_free_eofblocks(
* of the file. If not, then there is nothing to do.
*/
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
- last_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
+ last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (last_fsb <= end_fsb)
return 0;
map_len = last_fsb - end_fsb;
@@ -201,13 +196,11 @@ xfs_free_eofblocks(
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- if (flags & XFS_FREE_EOF_TRYLOCK) {
+ if (need_iolock) {
if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL)) {
xfs_trans_cancel(tp, 0);
return 0;
}
- } else {
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
}
error = xfs_trans_reserve(tp, 0,
@@ -217,7 +210,8 @@ xfs_free_eofblocks(
if (error) {
ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+ if (need_iolock)
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
}
@@ -244,7 +238,10 @@ xfs_free_eofblocks(
error = xfs_trans_commit(tp,
XFS_TRANS_RELEASE_LOG_RES);
}
- xfs_iunlock(ip, XFS_IOLOCK_EXCL|XFS_ILOCK_EXCL);
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ if (need_iolock)
+ xfs_iunlock(ip, XFS_IOLOCK_EXCL);
}
return error;
}
@@ -282,23 +279,15 @@ xfs_inactive_symlink_rmt(
* free them all in one bunmapi call.
*/
ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2);
- if ((error = xfs_trans_reserve(tp, 0, XFS_ITRUNCATE_LOG_RES(mp), 0,
- XFS_TRANS_PERM_LOG_RES, XFS_ITRUNCATE_LOG_COUNT))) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
- *tpp = NULL;
- return error;
- }
+
/*
* Lock the inode, fix the size, and join it to the transaction.
* Hold it so in the normal path, we still have it locked for
* the second transaction. In the error paths we need it
* held so the cancel won't rele it, see below.
*/
- xfs_ilock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
size = (int)ip->i_d.di_size;
ip->i_d.di_size = 0;
- xfs_trans_ijoin(tp, ip, 0);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
/*
* Find the block(s) so we can inval and unmap them.
@@ -385,114 +374,14 @@ xfs_inactive_symlink_rmt(
ASSERT(XFS_FORCED_SHUTDOWN(mp));
goto error0;
}
- /*
- * Return with the inode locked but not joined to the transaction.
- */
+
+ xfs_trans_ijoin(tp, ip, 0);
*tpp = tp;
return 0;
error1:
xfs_bmap_cancel(&free_list);
error0:
- /*
- * Have to come here with the inode locked and either
- * (held and in the transaction) or (not in the transaction).
- * If the inode isn't held then cancel would iput it, but
- * that's wrong since this is inactive and the vnode ref
- * count is 0 already.
- * Cancel won't do anything to the inode if held, but it still
- * needs to be locked until the cancel is done, if it was
- * joined to the transaction.
- */
- xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
- *tpp = NULL;
- return error;
-
-}
-
-STATIC int
-xfs_inactive_symlink_local(
- xfs_inode_t *ip,
- xfs_trans_t **tpp)
-{
- int error;
-
- ASSERT(ip->i_d.di_size <= XFS_IFORK_DSIZE(ip));
- /*
- * We're freeing a symlink which fit into
- * the inode. Just free the memory used
- * to hold the old symlink.
- */
- error = xfs_trans_reserve(*tpp, 0,
- XFS_ITRUNCATE_LOG_RES(ip->i_mount),
- 0, XFS_TRANS_PERM_LOG_RES,
- XFS_ITRUNCATE_LOG_COUNT);
-
- if (error) {
- xfs_trans_cancel(*tpp, 0);
- *tpp = NULL;
- return error;
- }
- xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
- /*
- * Zero length symlinks _can_ exist.
- */
- if (ip->i_df.if_bytes > 0) {
- xfs_idata_realloc(ip,
- -(ip->i_df.if_bytes),
- XFS_DATA_FORK);
- ASSERT(ip->i_df.if_bytes == 0);
- }
- return 0;
-}
-
-STATIC int
-xfs_inactive_attrs(
- xfs_inode_t *ip,
- xfs_trans_t **tpp)
-{
- xfs_trans_t *tp;
- int error;
- xfs_mount_t *mp;
-
- ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
- tp = *tpp;
- mp = ip->i_mount;
- ASSERT(ip->i_d.di_forkoff != 0);
- error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- if (error)
- goto error_unlock;
-
- error = xfs_attr_inactive(ip);
- if (error)
- goto error_unlock;
-
- tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- error = xfs_trans_reserve(tp, 0,
- XFS_IFREE_LOG_RES(mp),
- 0, XFS_TRANS_PERM_LOG_RES,
- XFS_INACTIVE_LOG_COUNT);
- if (error)
- goto error_cancel;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, 0);
- xfs_idestroy_fork(ip, XFS_ATTR_FORK);
-
- ASSERT(ip->i_d.di_anextents == 0);
-
- *tpp = tp;
- return 0;
-
-error_cancel:
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
-error_unlock:
- *tpp = NULL;
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
return error;
}
@@ -574,8 +463,7 @@ xfs_release(
if (xfs_iflags_test(ip, XFS_IDIRTY_RELEASE))
return 0;
- error = xfs_free_eofblocks(mp, ip,
- XFS_FREE_EOF_TRYLOCK);
+ error = xfs_free_eofblocks(mp, ip, true);
if (error)
return error;
@@ -604,7 +492,7 @@ xfs_inactive(
xfs_trans_t *tp;
xfs_mount_t *mp;
int error;
- int truncate;
+ int truncate = 0;
/*
* If the inode is already free, then there can be nothing
@@ -616,17 +504,6 @@ xfs_inactive(
return VN_INACTIVE_CACHE;
}
- /*
- * Only do a truncate if it's a regular file with
- * some actual space in it. It's OK to look at the
- * inode's fields without the lock because we're the
- * only one with a reference to the inode.
- */
- truncate = ((ip->i_d.di_nlink == 0) &&
- ((ip->i_d.di_size != 0) || XFS_ISIZE(ip) != 0 ||
- (ip->i_d.di_nextents > 0) || (ip->i_delayed_blks > 0)) &&
- S_ISREG(ip->i_d.di_mode));
-
mp = ip->i_mount;
error = 0;
@@ -643,99 +520,100 @@ xfs_inactive(
(!(ip->i_d.di_flags &
(XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) ||
ip->i_delayed_blks != 0))) {
- error = xfs_free_eofblocks(mp, ip, 0);
+ error = xfs_free_eofblocks(mp, ip, false);
if (error)
return VN_INACTIVE_CACHE;
}
goto out;
}
- ASSERT(ip->i_d.di_nlink == 0);
+ if (S_ISREG(ip->i_d.di_mode) &&
+ (ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
+ ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
+ truncate = 1;
error = xfs_qm_dqattach(ip, 0);
if (error)
return VN_INACTIVE_CACHE;
tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
- if (truncate) {
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- error = xfs_trans_reserve(tp, 0,
- XFS_ITRUNCATE_LOG_RES(mp),
- 0, XFS_TRANS_PERM_LOG_RES,
- XFS_ITRUNCATE_LOG_COUNT);
- if (error) {
- /* Don't call itruncate_cleanup */
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
- xfs_trans_cancel(tp, 0);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return VN_INACTIVE_CACHE;
- }
+ error = xfs_trans_reserve(tp, 0,
+ (truncate || S_ISLNK(ip->i_d.di_mode)) ?
+ XFS_ITRUNCATE_LOG_RES(mp) :
+ XFS_IFREE_LOG_RES(mp),
+ 0,
+ XFS_TRANS_PERM_LOG_RES,
+ XFS_ITRUNCATE_LOG_COUNT);
+ if (error) {
+ ASSERT(XFS_FORCED_SHUTDOWN(mp));
+ xfs_trans_cancel(tp, 0);
+ return VN_INACTIVE_CACHE;
+ }
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, 0);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, 0);
+ if (S_ISLNK(ip->i_d.di_mode)) {
+ /*
+ * Zero length symlinks _can_ exist.
+ */
+ if (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) {
+ error = xfs_inactive_symlink_rmt(ip, &tp);
+ if (error)
+ goto out_cancel;
+ } else if (ip->i_df.if_bytes > 0) {
+ xfs_idata_realloc(ip, -(ip->i_df.if_bytes),
+ XFS_DATA_FORK);
+ ASSERT(ip->i_df.if_bytes == 0);
+ }
+ } else if (truncate) {
ip->i_d.di_size = 0;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0);
- if (error) {
- xfs_trans_cancel(tp,
- XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
- return VN_INACTIVE_CACHE;
- }
+ if (error)
+ goto out_cancel;
ASSERT(ip->i_d.di_nextents == 0);
- } else if (S_ISLNK(ip->i_d.di_mode)) {
+ }
- /*
- * If we get an error while cleaning up a
- * symlink we bail out.
- */
- error = (ip->i_d.di_size > XFS_IFORK_DSIZE(ip)) ?
- xfs_inactive_symlink_rmt(ip, &tp) :
- xfs_inactive_symlink_local(ip, &tp);
+ /*
+ * If there are attributes associated with the file then blow them away
+ * now. The code calls a routine that recursively deconstructs the
+ * attribute fork. We need to just commit the current transaction
+ * because we can't use it for xfs_attr_inactive().
+ */
+ if (ip->i_d.di_anextents > 0) {
+ ASSERT(ip->i_d.di_forkoff != 0);
- if (error) {
- ASSERT(tp == NULL);
- return VN_INACTIVE_CACHE;
- }
+ error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
+ if (error)
+ goto out_unlock;
- xfs_trans_ijoin(tp, ip, 0);
- } else {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+ error = xfs_attr_inactive(ip);
+ if (error)
+ goto out;
+
+ tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE);
error = xfs_trans_reserve(tp, 0,
XFS_IFREE_LOG_RES(mp),
0, XFS_TRANS_PERM_LOG_RES,
XFS_INACTIVE_LOG_COUNT);
if (error) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
- return VN_INACTIVE_CACHE;
+ goto out;
}
- xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
}
- /*
- * If there are attributes associated with the file
- * then blow them away now. The code calls a routine
- * that recursively deconstructs the attribute fork.
- * We need to just commit the current transaction
- * because we can't use it for xfs_attr_inactive().
- */
- if (ip->i_d.di_anextents > 0) {
- error = xfs_inactive_attrs(ip, &tp);
- /*
- * If we got an error, the transaction is already
- * cancelled, and the inode is unlocked. Just get out.
- */
- if (error)
- return VN_INACTIVE_CACHE;
- } else if (ip->i_afp) {
+ if (ip->i_afp)
xfs_idestroy_fork(ip, XFS_ATTR_FORK);
- }
+
+ ASSERT(ip->i_d.di_anextents == 0);
/*
* Free the inode.
@@ -779,10 +657,13 @@ xfs_inactive(
* Release the dquots held by inode, if any.
*/
xfs_qm_dqdetach(ip);
- xfs_iunlock(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
-
- out:
+out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+out:
return VN_INACTIVE_CACHE;
+out_cancel:
+ xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
+ goto out_unlock;
}
/*
@@ -2262,10 +2143,10 @@ xfs_change_file_space(
llen = bf->l_len > 0 ? bf->l_len - 1 : bf->l_len;
- if ( (bf->l_start < 0)
- || (bf->l_start > XFS_MAXIOFFSET(mp))
- || (bf->l_start + llen < 0)
- || (bf->l_start + llen > XFS_MAXIOFFSET(mp)))
+ if (bf->l_start < 0 ||
+ bf->l_start > mp->m_super->s_maxbytes ||
+ bf->l_start + llen < 0 ||
+ bf->l_start + llen > mp->m_super->s_maxbytes)
return XFS_ERROR(EINVAL);
bf->l_whence = 0;
OpenPOWER on IntegriCloud