From d5e66348bbe39dc78509e7561f7252aa443df8c0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Sep 2008 17:28:35 -0400 Subject: NFS: Fix nfs_file_llseek() After the BKL removal patches were applied to the rest of the NFS code, the BKL protection in nfs_file_llseek() is no longer sufficient to ensure that inode->i_size is read safely in generic_file_llseek_unlocked(). In order to fix the situation, we either have to replace the naked read of inode->i_size in generic_file_llseek_unlocked() with i_size_read(), or the whole thing needs to be executed under the inode->i_lock; In order to avoid disrupting other filesystems, avoid touching generic_file_llseek_unlocked() for now... Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 78460657f5cb..3ddb00433f4f 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -188,13 +188,16 @@ static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) /* origin == SEEK_END => we must revalidate the cached file length */ if (origin == SEEK_END) { struct inode *inode = filp->f_mapping->host; + int retval = nfs_revalidate_file_size(inode, filp); if (retval < 0) return (loff_t)retval; - } - lock_kernel(); /* BKL needed? */ - loff = generic_file_llseek_unlocked(filp, offset, origin); - unlock_kernel(); + + spin_lock(&inode->i_lock); + loff = generic_file_llseek_unlocked(filp, offset, origin); + spin_unlock(&inode->i_lock); + } else + loff = generic_file_llseek_unlocked(filp, offset, origin); return loff; } -- cgit v1.2.1 From 1daef0a868370c5a96d031b9202e3354bea060e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 27 Jul 2008 18:19:01 -0400 Subject: NFS: Clean up nfs_sb_active/nfs_sb_deactive Instead of causing umount requests to block on server->active_wq while the asynchronous sillyrename deletes are executing, we can use the sb->s_active counter to obtain a reference to the super_block, and then release that reference in nfs_async_unlink_release(). Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 1 - fs/nfs/internal.h | 4 ++-- fs/nfs/super.c | 24 ++++++++---------------- fs/nfs/unlink.c | 5 +++-- 4 files changed, 13 insertions(+), 21 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 5ee23e7058b3..2accb67427c6 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -850,7 +850,6 @@ static struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); - init_waitqueue_head(&server->active_wq); atomic_set(&server->active, 0); server->io_stats = nfs_alloc_iostats(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 24241fcbb98d..7bcf6ec2d458 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -163,8 +163,8 @@ extern struct rpc_stat nfs_rpcstat; extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); -extern void nfs_sb_active(struct nfs_server *server); -extern void nfs_sb_deactive(struct nfs_server *server); +extern void nfs_sb_active(struct super_block *sb); +extern void nfs_sb_deactive(struct super_block *sb); /* namespace.c */ extern char *nfs_path(const char *base, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e9b20173fef3..e527fab40419 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -209,7 +209,6 @@ static int nfs_get_sb(struct file_system_type *, int, const char *, void *, stru static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); static void nfs_kill_super(struct super_block *); -static void nfs_put_super(struct super_block *); static int nfs_remount(struct super_block *sb, int *flags, char *raw_data); static struct file_system_type nfs_fs_type = { @@ -232,7 +231,6 @@ static const struct super_operations nfs_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, - .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, @@ -337,26 +335,20 @@ void __exit unregister_nfs_fs(void) unregister_filesystem(&nfs_fs_type); } -void nfs_sb_active(struct nfs_server *server) +void nfs_sb_active(struct super_block *sb) { - atomic_inc(&server->active); -} + struct nfs_server *server = NFS_SB(sb); -void nfs_sb_deactive(struct nfs_server *server) -{ - if (atomic_dec_and_test(&server->active)) - wake_up(&server->active_wq); + if (atomic_inc_return(&server->active) == 1) + atomic_inc(&sb->s_active); } -static void nfs_put_super(struct super_block *sb) +void nfs_sb_deactive(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); - /* - * Make sure there are no outstanding ops to this server. - * If so, wait for them to finish before allowing the - * unmount to continue. - */ - wait_event(server->active_wq, atomic_read(&server->active) == 0); + + if (atomic_dec_and_test(&server->active)) + deactivate_super(sb); } /* diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index f089e5839d7d..ecc295347775 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -99,7 +99,7 @@ static void nfs_async_unlink_release(void *calldata) nfs_dec_sillycount(data->dir); nfs_free_unlinkdata(data); - nfs_sb_deactive(NFS_SB(sb)); + nfs_sb_deactive(sb); } static const struct rpc_call_ops nfs_unlink_ops = { @@ -118,6 +118,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n .rpc_message = &msg, .callback_ops = &nfs_unlink_ops, .callback_data = data, + .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC, }; struct rpc_task *task; @@ -149,7 +150,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n nfs_dec_sillycount(dir); return 0; } - nfs_sb_active(NFS_SERVER(dir)); + nfs_sb_active(dir->i_sb); data->args.fh = NFS_FH(dir); nfs_fattr_init(&data->res.dir_attr); -- cgit v1.2.1 From 4eec952e42314b53e48fef1f54dd89cbf9789734 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2008 17:58:13 -0400 Subject: NFS: Add options for finer control of the lookup cache Add the flag NFS_MOUNT_LOOKUP_CACHE_NONEG to turn off the caching of negative dentries. In reality what we do is to force nfs_lookup_revalidate() to always discard negative dentries. Add the flag NFS_MOUNT_LOOKUP_CACHE_NONE for enforcing stricter revalidation of dentries. It forces the revalidate code to always do a lookup instead of just checking the cached mtime of the parent directory. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 74f92b717f78..49d565412827 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -667,6 +667,8 @@ static int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) + return 0; if (!nfs_verify_change_attribute(dir, dentry->d_time)) return 0; /* Revalidate nfsi->cache_change_attribute before we declare a match */ @@ -750,6 +752,8 @@ int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry, /* Don't revalidate a negative dentry if we're creating a new file */ if (nd != NULL && nfs_lookup_check_intent(nd, LOOKUP_CREATE) != 0) return 0; + if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) + return 1; return !nfs_check_verifier(dir, dentry); } -- cgit v1.2.1 From ff3525a539f5cc81970d08304bdedb4ffba984da Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 15 Aug 2008 16:59:14 -0400 Subject: NFS: Don't apply NFS_MOUNT_FLAGMASK to text-based mounts The point of introducing text-based mounts was to allow us to add functionality without having to worry about legacy binary mount formats. The mask should be there in order to ensure that binary formats don't start enabling features that they cannot support. There is no justification for applying it to the text mount path. Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 ++-- fs/nfs/super.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2accb67427c6..7547600b6174 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -675,7 +675,7 @@ static int nfs_init_server(struct nfs_server *server, server->nfs_client = clp; /* Initialise the client representation from the mount data */ - server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->flags = data->flags; if (data->rsize) server->rsize = nfs_block_size(data->rsize, NULL); @@ -1072,7 +1072,7 @@ static int nfs4_init_server(struct nfs_server *server, goto error; /* Initialise the client representation from the mount data */ - server->flags = data->flags & NFS_MOUNT_FLAGMASK; + server->flags = data->flags; server->caps |= NFS_CAP_ATOMIC_OPEN; if (data->rsize) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index e527fab40419..81686aeb1b5d 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -1550,7 +1550,7 @@ static int nfs_validate_mount_data(void *options, * Translate to nfs_parsed_mount_data, which nfs_fill_super * can deal with. */ - args->flags = data->flags; + args->flags = data->flags & NFS_MOUNT_FLAGMASK; args->rsize = data->rsize; args->wsize = data->wsize; args->timeo = data->timeo; -- cgit v1.2.1 From 7973c1f15a0687f47ed70e591e4642d6fc4334d0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Jul 2008 17:58:14 -0400 Subject: NFS: Add mount options for controlling the lookup cache Add the following NFS-specific mount options to the parser. -o lookupcache=all /* Default: cache positive & negative dentries */ -o lookupcache=pos[itive] /* Don't cache negative dentries */ -o lookupcache=none /* Strict revalidation of all dentries */ Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 81686aeb1b5d..1e3558697219 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -91,6 +91,7 @@ enum { /* Mount options that take string arguments */ Opt_sec, Opt_proto, Opt_mountproto, Opt_mounthost, Opt_addr, Opt_mountaddr, Opt_clientaddr, + Opt_lookupcache, /* Special mount options */ Opt_userspace, Opt_deprecated, Opt_sloppy, @@ -154,6 +155,8 @@ static match_table_t nfs_mount_option_tokens = { { Opt_mounthost, "mounthost=%s" }, { Opt_mountaddr, "mountaddr=%s" }, + { Opt_lookupcache, "lookupcache=%s" }, + { Opt_err, NULL } }; @@ -200,6 +203,22 @@ static match_table_t nfs_secflavor_tokens = { { Opt_sec_err, NULL } }; +enum { + Opt_lookupcache_all, Opt_lookupcache_positive, + Opt_lookupcache_none, + + Opt_lookupcache_err +}; + +static match_table_t nfs_lookupcache_tokens = { + { Opt_lookupcache_all, "all" }, + { Opt_lookupcache_positive, "pos" }, + { Opt_lookupcache_positive, "positive" }, + { Opt_lookupcache_none, "none" }, + + { Opt_lookupcache_err, NULL } +}; + static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct dentry *, struct kstatfs *); @@ -1250,6 +1269,30 @@ static int nfs_parse_mount_options(char *raw, &mnt->mount_server.addrlen); kfree(string); break; + case Opt_lookupcache: + string = match_strdup(args); + if (string == NULL) + goto out_nomem; + token = match_token(string, + nfs_lookupcache_tokens, args); + kfree(string); + switch (token) { + case Opt_lookupcache_all: + mnt->flags &= ~(NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE); + break; + case Opt_lookupcache_positive: + mnt->flags &= ~NFS_MOUNT_LOOKUP_CACHE_NONE; + mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG; + break; + case Opt_lookupcache_none: + mnt->flags |= NFS_MOUNT_LOOKUP_CACHE_NONEG|NFS_MOUNT_LOOKUP_CACHE_NONE; + break; + default: + errors++; + dfprintk(MOUNT, "NFS: invalid " + "lookupcache argument\n"); + }; + break; /* * Special options -- cgit v1.2.1 From 870a5be8b92151332da65021b7b21104e9c1de07 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Oct 2008 12:07:23 -0400 Subject: NFS: Clean up nfs_refresh_inode() and nfs_post_op_update_inode() Try to avoid taking and dropping the inode->i_lock more than once. Do so by moving the code in nfs_refresh_inode() that needs to be done under the spinlock into a function nfs_refresh_inode_locked(), and then having both nfs_refresh_inode() and nfs_post_op_update_inode() call it directly. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 52daefa2f521..f189169348b1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -948,6 +948,15 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return 0; } +static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (time_after(fattr->time_start, nfsi->last_updated)) + return nfs_update_inode(inode, fattr); + return nfs_check_inode_attributes(inode, fattr); +} + /** * nfs_refresh_inode - try to update the inode attribute cache * @inode - pointer to inode @@ -960,17 +969,12 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat */ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); int status; if ((fattr->valid & NFS_ATTR_FATTR) == 0) return 0; spin_lock(&inode->i_lock); - if (time_after(fattr->time_start, nfsi->last_updated)) - status = nfs_update_inode(inode, fattr); - else - status = nfs_check_inode_attributes(inode, fattr); - + status = nfs_refresh_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; } @@ -992,13 +996,16 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); + int status = 0; spin_lock(&inode->i_lock); nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; if (S_ISDIR(inode->i_mode)) nfsi->cache_validity |= NFS_INO_INVALID_DATA; + if ((fattr->valid & NFS_ATTR_FATTR) != 0) + status = nfs_refresh_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); - return nfs_refresh_inode(inode, fattr); + return status; } /** -- cgit v1.2.1 From a10ad17630024bf7aae8e7f18352f816ee483091 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Sep 2008 17:28:41 -0400 Subject: NFS: Fix the NFS attribute update Currently nfs_refresh_inode() will only update the inode metadata if it sees that the RPC call that returned the nfs_fattr was started after the last update of the inode. This means that if we have parallel RPC calls to the same inode (when sending WRITE calls, for instance), we may often miss updates. This patch attempts to recover those missed updates by also accepting them if the ctime in the nfs_fattr is more recent than the inode's cached ctime. It also recovers the case where the file size has increased, but the ctime has not been updated due to limited ctime resolution. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f189169348b1..8c514a1353c0 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -948,11 +948,49 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return 0; } -static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); + return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0; +} + +static int nfs_size_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +{ + return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); +} - if (time_after(fattr->time_start, nfsi->last_updated)) +/** + * nfs_inode_attrs_need_update - check if the inode attributes need updating + * @inode - pointer to inode + * @fattr - attributes + * + * Attempt to divine whether or not an RPC call reply carrying stale + * attributes got scheduled after another call carrying updated ones. + * + * To do so, the function first assumes that a more recent ctime means + * that the attributes in fattr are newer, however it also attempt to + * catch the case where ctime either didn't change, or went backwards + * (if someone reset the clock on the server) by looking at whether + * or not this RPC call was started after the inode was last updated. + * Note also the check for jiffy wraparound if the last_updated timestamp + * is later than 'jiffies'. + * + * The function returns 'true' if it thinks the attributes in 'fattr' are + * more recent than the ones cached in the inode. + * + */ +static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr) +{ + const struct nfs_inode *nfsi = NFS_I(inode); + + return nfs_ctime_need_update(inode, fattr) || + nfs_size_need_update(inode, fattr) || + time_after(fattr->time_start, nfsi->last_updated) || + time_after(nfsi->last_updated, jiffies); +} + +static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +{ + if (nfs_inode_attrs_need_update(inode, fattr)) return nfs_update_inode(inode, fattr); return nfs_check_inode_attributes(inode, fattr); } -- cgit v1.2.1 From d65f557f39448c2d9e58cd564037b81e646aed2c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Oct 2008 12:27:55 -0400 Subject: NFS: Fix nfs_post_op_update_inode_force_wcc() If we believe that the attributes are old (see nfs_refresh_inode()), then we shouldn't force an update. Also ensure that we hold the inode->i_lock across attribute checks and the call to nfs_refresh_inode_locked() to ensure that we don't race with other attribute updates. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 8c514a1353c0..610d022fc7a5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1017,6 +1017,18 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) return status; } +static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr *fattr) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; + if (S_ISDIR(inode->i_mode)) + nfsi->cache_validity |= NFS_INO_INVALID_DATA; + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + return nfs_refresh_inode_locked(inode, fattr); +} + /** * nfs_post_op_update_inode - try to update the inode attribute cache * @inode - pointer to inode @@ -1033,15 +1045,10 @@ int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) */ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) { - struct nfs_inode *nfsi = NFS_I(inode); - int status = 0; + int status; spin_lock(&inode->i_lock); - nfsi->cache_validity |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; - if (S_ISDIR(inode->i_mode)) - nfsi->cache_validity |= NFS_INO_INVALID_DATA; - if ((fattr->valid & NFS_ATTR_FATTR) != 0) - status = nfs_refresh_inode_locked(inode, fattr); + status = nfs_post_op_update_inode_locked(inode, fattr); spin_unlock(&inode->i_lock); return status; } @@ -1059,6 +1066,15 @@ int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr) */ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr) { + int status; + + spin_lock(&inode->i_lock); + /* Don't do a WCC update if these attributes are already stale */ + if ((fattr->valid & NFS_ATTR_FATTR) == 0 || + !nfs_inode_attrs_need_update(inode, fattr)) { + fattr->valid &= ~(NFS_ATTR_WCC_V4|NFS_ATTR_WCC); + goto out_noforce; + } if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && (fattr->valid & NFS_ATTR_WCC_V4) == 0) { fattr->pre_change_attr = NFS_I(inode)->change_attr; @@ -1071,7 +1087,10 @@ int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fa fattr->pre_size = i_size_read(inode); fattr->valid |= NFS_ATTR_WCC; } - return nfs_post_op_update_inode(inode, fattr); +out_noforce: + status = nfs_post_op_update_inode_locked(inode, fattr); + spin_unlock(&inode->i_lock); + return status; } /* -- cgit v1.2.1 From 4dc05efb86239321d43a9d74fd2ecd5c21bfc2ad Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 23 Sep 2008 17:28:42 -0400 Subject: NFS: Convert __nfs_revalidate_inode() to use nfs_refresh_inode() In the case where there are parallel RPC calls to the same inode, we may receive stale metadata due to the lack of ordering, hence the sanity checking of metadata in nfs_refresh_inode(). Currently, __nfs_revalidate_inode() is calling nfs_update_inode() directly, without any further sanity checks, and hence may end up setting the inode up with stale metadata. Fix is to use nfs_refresh_inode() instead of nfs_update_inode(). Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 610d022fc7a5..697157c1fdd1 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -724,16 +724,13 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) goto out; } - spin_lock(&inode->i_lock); - status = nfs_update_inode(inode, &fattr); + status = nfs_refresh_inode(inode, &fattr); if (status) { - spin_unlock(&inode->i_lock); dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); goto out; } - spin_unlock(&inode->i_lock); if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); -- cgit v1.2.1 From 076f1fc94c44be2664172c63b4a2b51ae2d265ea Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Oct 2008 13:31:21 -0400 Subject: NFS: Don't clear nfsi->cache_validity in nfs_check_inode_attributes() If we're merely checking the inode attributes because we suspect that the 'updated' attributes returned by the RPC call are stale, then we shouldn't be doing weak cache consistency updates or clearing the cache_validity flags. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 697157c1fdd1..a2f54154d825 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -905,9 +905,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat return -EIO; } - /* Do atomic weak cache consistency updates */ - nfs_wcc_update_inode(inode, fattr); - if ((fattr->valid & NFS_ATTR_FATTR_V4) != 0 && nfsi->change_attr != fattr->change_attr) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE; @@ -936,10 +933,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat if (invalid != 0) nfsi->cache_validity |= invalid; - else - nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_ATIME - | NFS_INO_REVAL_PAGECACHE); nfsi->read_cache_jiffies = fattr->time_start; return 0; -- cgit v1.2.1 From 2f28ea614ff497202d5a52af82da523ae4a20718 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Oct 2008 14:26:11 -0400 Subject: NFS: Fix up nfs_setattr_update_inode() Ensure that it sets the inode metadata under the correct spinlock. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a2f54154d825..f3b8ed904df7 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -453,6 +453,7 @@ out_big: void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) { if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + spin_lock(&inode->i_lock); if ((attr->ia_valid & ATTR_MODE) != 0) { int mode = attr->ia_mode & S_IALLUGO; mode |= inode->i_mode & ~S_IALLUGO; @@ -462,7 +463,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) inode->i_uid = attr->ia_uid; if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; - spin_lock(&inode->i_lock); NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; spin_unlock(&inode->i_lock); } -- cgit v1.2.1 From 691beb13cdc88358334ef0ba867c080a247a760f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 5 Oct 2008 14:48:22 -0400 Subject: NFS: Allow concurrent inode revalidation Currently, if two processes are both trying to revalidate metadata for the same inode, they will find themselves being serialised. There is no good justification for this now that we have improved our ability to detect stale attribute data, so we should remove that serialisation. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 43 ++----------------------------------------- 1 file changed, 2 insertions(+), 41 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index f3b8ed904df7..e25009f35cc2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -472,37 +472,6 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr) } } -static int nfs_wait_schedule(void *word) -{ - if (signal_pending(current)) - return -ERESTARTSYS; - schedule(); - return 0; -} - -/* - * Wait for the inode to get unlocked. - */ -static int nfs_wait_on_inode(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - int error; - - error = wait_on_bit_lock(&nfsi->flags, NFS_INO_REVALIDATING, - nfs_wait_schedule, TASK_KILLABLE); - - return error; -} - -static void nfs_wake_up_inode(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - clear_bit(NFS_INO_REVALIDATING, &nfsi->flags); - smp_mb__after_clear_bit(); - wake_up_bit(&nfsi->flags, NFS_INO_REVALIDATING); -} - int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; @@ -697,20 +666,15 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); - nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (is_bad_inode(inode)) - goto out_nowait; + goto out; if (NFS_STALE(inode)) - goto out_nowait; - - status = nfs_wait_on_inode(inode); - if (status < 0) goto out; - status = -ESTALE; if (NFS_STALE(inode)) goto out; + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); if (status != 0) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", @@ -740,9 +704,6 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) (long long)NFS_FILEID(inode)); out: - nfs_wake_up_inode(inode); - - out_nowait: return status; } -- cgit v1.2.1 From bb8a3b53c20f2c07164a23ff6c320794fee8b95f Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Fri, 25 Jul 2008 02:55:49 +0300 Subject: fix fs/nfs/nfsroot.c compilation This patch fixes the following compile error caused by commit f9247273cb69ba101877e946d2d83044409cc8c5 (UFS: add const to parser token tabl): <-- snip --> ... CC fs/nfs/nfsroot.o /home/bunk/linux/kernel-2.6/git/linux-2.6/fs/nfs/nfsroot.c:130: error: tokens causes a section type conflict make[3]: *** [fs/nfs/nfsroot.o] Error 1 <-- snip --> Signed-off-by: Adrian Bunk Signed-off-by: Trond Myklebust --- fs/nfs/nfsroot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c index 46763d1cd397..8478fc25daee 100644 --- a/fs/nfs/nfsroot.c +++ b/fs/nfs/nfsroot.c @@ -127,7 +127,7 @@ enum { Opt_err }; -static match_table_t __initdata tokens = { +static match_table_t __initconst tokens = { {Opt_port, "port=%u"}, {Opt_rsize, "rsize=%u"}, {Opt_wsize, "wsize=%u"}, -- cgit v1.2.1 From fd08d7e9d196ca49afcce0181f1f0ca68f241aa2 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Thu, 31 Jul 2008 09:38:55 +0400 Subject: nfs: ERR_PTR is expected on failure from nfs_do_clone_mount Replace NULL with ERR_PTR(-EINVAL). Signed-off-by: Denis V. Lunev Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 66df08dd1caf..d398775a3af5 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -189,7 +189,7 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server, struct nfs_clone_mount *mountdata) { #ifdef CONFIG_NFS_V4 - struct vfsmount *mnt = NULL; + struct vfsmount *mnt = ERR_PTR(-EINVAL); switch (server->nfs_client->rpc_ops->version) { case 2: case 3: -- cgit v1.2.1 From 44d5759d3fdad660f000ef319f0ec33a6ac6ae28 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 11 Aug 2008 12:02:34 +0400 Subject: nfs: BUG_ON in nfs_follow_mountpoint Unfortunately, BUG_ON(IS_ROOT(dentry)) can happen inside nfs_follow_mountpoint with NFS running Fedora 8 using a specific setup. https://bugzilla.redhat.com/show_bug.cgi?id=458622 So, the situation should be handled on NFS client gracefully. Signed-off-by: Denis V. Lunev CC: Trond Myklebust CC: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/namespace.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index d398775a3af5..64a288ee046d 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -105,7 +105,10 @@ static void * nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) dprintk("--> nfs_follow_mountpoint()\n"); - BUG_ON(IS_ROOT(dentry)); + err = -ESTALE; + if (IS_ROOT(dentry)) + goto out_err; + dprintk("%s: enter\n", __func__); dput(nd->path.dentry); nd->path.dentry = dget(dentry); -- cgit v1.2.1 From f200c11c257b8db5c49dfc0b7f84bceae3109779 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 14 Aug 2008 18:32:55 -0400 Subject: nfs: remove an obsolete nfs_flock comment We *do* now allow bsd flocks over nfs. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3ddb00433f4f..d319b49f8f06 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -702,13 +702,6 @@ static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) filp->f_path.dentry->d_name.name, fl->fl_type, fl->fl_flags); - /* - * No BSD flocks over NFS allowed. - * Note: we could try to fake a POSIX lock request here by - * using ((u32) filp | 0x80000000) or some such as the pid. - * Not sure whether that would be unique, though, or whether - * that would break in other places. - */ if (!(fl->fl_flags & FL_FLOCK)) return -ENOLCK; -- cgit v1.2.1 From f25b874d39461935b1b5bbffaa622e735e79d49e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 18 Aug 2008 09:17:58 -0400 Subject: NFS: missing nfs_fattr_init in nfs3_proc_getacl and nfs3_proc_setacls (resend #2) The fattrs used in the NFSv3 getacl/setacl calls are not being properly initialized. This occasionally causes nfs_update_inode to fall into NFSv4 specific codepaths when handling post-op attrs from these calls. Thanks to Cai Qian for noticing the spurious NFSv4 messages in debug output from a v3 mount... Signed-off-by: Jeff Layton Signed-off-by: Trond Myklebust --- fs/nfs/nfs3acl.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 423842f51ac9..cef62557c87d 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -229,6 +229,7 @@ struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) dprintk("NFS call getacl\n"); msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_GETACL]; + nfs_fattr_init(&fattr); status = rpc_call_sync(server->client_acl, &msg, 0); dprintk("NFS reply getacl: %d\n", status); @@ -322,6 +323,7 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, dprintk("NFS call setacl\n"); msg.rpc_proc = &server->client_acl->cl_procinfo[ACLPROC3_SETACL]; + nfs_fattr_init(&fattr); status = rpc_call_sync(server->client_acl, &msg, 0); nfs_access_zap_cache(inode); nfs_zap_acl_cache(inode); -- cgit v1.2.1 From 37ca8f5c6041516aac603a5abb89b05675493802 Mon Sep 17 00:00:00 2001 From: EG Keizer Date: Tue, 19 Aug 2008 16:34:36 -0400 Subject: nfs: authenticated deep mounting Allow mount to do authenticated mounts below the root of the exported tree. The wording in RFC 2623, sec 2.3.2. allows fsinfo with UNIX authentication on the root of the export. Mounts are not always done on the root of the exported tree. Especially autoumounts often mount below the root of the exported tree. Some server implementations (justly) require full authentication for the so-called deep mounts. The old code used AUTH_SYS only. This caused deep mounts to fail on systems requiring stronger authentication.. The client should try both authentication types and use the first one that succeeds. This method was already partially implemented. This patch completes the implementation for NFS2 and NFS3. This patch was developed to allow Debian systems to automount home directories on Solaris servers with krb5 authentication. Tested on kernel 2.6.24-etchnhalf.1 Signed-off-by: E.G. Keizer Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 20 ++++++++++++++++++-- fs/nfs/proc.c | 10 ++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 1e750e4574a9..c55be7a7679e 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -699,7 +699,7 @@ nfs3_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, } static int -nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, +do_proc_fsinfo(struct rpc_clnt *client, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { struct rpc_message msg = { @@ -711,11 +711,27 @@ nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("NFS call fsinfo\n"); nfs_fattr_init(info->fattr); - status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); + status = rpc_call_sync(client, &msg, 0); dprintk("NFS reply fsinfo: %d\n", status); return status; } +/* + * Bare-bones access to fsinfo: this is for nfs_get_root/nfs_get_sb via + * nfs_create_server + */ +static int +nfs3_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int status; + + status = do_proc_fsinfo(server->client, fhandle, info); + if (status && server->nfs_client->cl_rpcclient != server->client) + status = do_proc_fsinfo(server->nfs_client->cl_rpcclient, fhandle, info); + return status; +} + static int nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_pathconf *info) diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 4dbb84df1b68..193465210d7c 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -65,14 +65,20 @@ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, dprintk("%s: call getattr\n", __func__); nfs_fattr_init(fattr); - status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); + status = rpc_call_sync(server->client, &msg, 0); + /* Retry with default authentication if different */ + if (status && server->nfs_client->cl_rpcclient != server->client) + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("%s: reply getattr: %d\n", __func__, status); if (status) return status; dprintk("%s: call statfs\n", __func__); msg.rpc_proc = &nfs_procedures[NFSPROC_STATFS]; msg.rpc_resp = &fsinfo; - status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); + status = rpc_call_sync(server->client, &msg, 0); + /* Retry with default authentication if different */ + if (status && server->nfs_client->cl_rpcclient != server->client) + status = rpc_call_sync(server->nfs_client->cl_rpcclient, &msg, 0); dprintk("%s: reply statfs: %d\n", __func__, status); if (status) return status; -- cgit v1.2.1 From 4ada29d5c4dd2d3ba89510bdbc64be22961fd1cb Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Aug 2008 16:10:20 -0400 Subject: nfs: break up nfs_follow_referral This function is a little longer and more deeply nested than necessary. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 84 +++++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 38 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index b112857301f7..956cbbc2ae9f 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -110,6 +110,48 @@ static inline int valid_ipaddr4(const char *buf) return 0; } +static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, + char *page, char *page2, + const struct nfs4_fs_location *location) +{ + struct vfsmount *mnt = ERR_PTR(-ENOENT); + char *mnt_path; + unsigned int s = 0; + + mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); + if (IS_ERR(mnt_path)) + return mnt; + mountdata->mnt_path = mnt_path; + + while (s < location->nservers) { + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = htons(NFS_PORT), + }; + + if (location->servers[s].len <= 0 || + valid_ipaddr4(location->servers[s].data) < 0) { + s++; + continue; + } + + mountdata->hostname = location->servers[s].data; + addr.sin_addr.s_addr = in_aton(mountdata->hostname), + mountdata->addr = (struct sockaddr *)&addr; + mountdata->addrlen = sizeof(addr); + + snprintf(page, PAGE_SIZE, "%s:%s", + mountdata->hostname, + mountdata->mnt_path); + + mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); + if (!IS_ERR(mnt)) + break; + s++; + } + return mnt; +} + /** * nfs_follow_referral - set up mountpoint when hitting a referral on moved error * @mnt_parent - mountpoint of parent directory @@ -128,7 +170,6 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, }; char *page = NULL, *page2 = NULL; - unsigned int s; int loc, error; if (locations == NULL || locations->nlocations <= 0) @@ -153,9 +194,8 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, } loc = 0; - while (loc < locations->nlocations && IS_ERR(mnt)) { + while (loc < locations->nlocations) { const struct nfs4_fs_location *location = &locations->locations[loc]; - char *mnt_path; if (location == NULL || location->nservers <= 0 || location->rootpath.ncomponents == 0) { @@ -163,41 +203,9 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, continue; } - mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); - if (IS_ERR(mnt_path)) { - loc++; - continue; - } - mountdata.mnt_path = mnt_path; - - s = 0; - while (s < location->nservers) { - struct sockaddr_in addr = { - .sin_family = AF_INET, - .sin_port = htons(NFS_PORT), - }; - - if (location->servers[s].len <= 0 || - valid_ipaddr4(location->servers[s].data) < 0) { - s++; - continue; - } - - mountdata.hostname = location->servers[s].data; - addr.sin_addr.s_addr = in_aton(mountdata.hostname), - mountdata.addr = (struct sockaddr *)&addr; - mountdata.addrlen = sizeof(addr); - - snprintf(page, PAGE_SIZE, "%s:%s", - mountdata.hostname, - mountdata.mnt_path); - - mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, &mountdata); - if (!IS_ERR(mnt)) { - break; - } - s++; - } + mnt = try_location(&mountdata, page, page2, location); + if (!IS_ERR(mnt)) + break; loc++; } -- cgit v1.2.1 From 460cdbc83268dd9641b57d893b03ef52fcc3f96d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Aug 2008 16:10:21 -0400 Subject: nfs: replace while loop by for loops in nfs_follow_referral Whoever wrote this had a bizarre allergy to for loops. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/nfs4namespace.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 956cbbc2ae9f..6bcc5696f911 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -116,24 +116,22 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, { struct vfsmount *mnt = ERR_PTR(-ENOENT); char *mnt_path; - unsigned int s = 0; + unsigned int s; mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); if (IS_ERR(mnt_path)) return mnt; mountdata->mnt_path = mnt_path; - while (s < location->nservers) { + for (s = 0; s < location->nservers; s++) { struct sockaddr_in addr = { .sin_family = AF_INET, .sin_port = htons(NFS_PORT), }; if (location->servers[s].len <= 0 || - valid_ipaddr4(location->servers[s].data) < 0) { - s++; + valid_ipaddr4(location->servers[s].data) < 0) continue; - } mountdata->hostname = location->servers[s].data; addr.sin_addr.s_addr = in_aton(mountdata->hostname), @@ -147,7 +145,6 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, mnt = vfs_kern_mount(&nfs4_referral_fs_type, 0, page, mountdata); if (!IS_ERR(mnt)) break; - s++; } return mnt; } @@ -193,20 +190,16 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, goto out; } - loc = 0; - while (loc < locations->nlocations) { + for (loc = 0; loc < locations->nlocations; loc++) { const struct nfs4_fs_location *location = &locations->locations[loc]; if (location == NULL || location->nservers <= 0 || - location->rootpath.ncomponents == 0) { - loc++; + location->rootpath.ncomponents == 0) continue; - } mnt = try_location(&mountdata, page, page2, location); if (!IS_ERR(mnt)) break; - loc++; } out: -- cgit v1.2.1 From f0c929251e01a7a86b6254c775cb6b65c6457f10 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Aug 2008 16:10:22 -0400 Subject: nfs: prepare to share nfs_set_port We plan to use this function elsewhere. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 20 ++++++++++++++++++++ fs/nfs/super.c | 19 ------------------- 2 files changed, 20 insertions(+), 19 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 7bcf6ec2d458..8d91bd88e310 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -276,3 +276,23 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) PAGE_SIZE - 1) >> PAGE_SHIFT; } + +/* + * Set the port number in an address. Be agnostic about the address + * family. + */ +static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) +{ + switch (sap->sa_family) { + case AF_INET: { + struct sockaddr_in *ap = (struct sockaddr_in *)sap; + ap->sin_port = htons(port); + break; + } + case AF_INET6: { + struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; + ap->sin6_port = htons(port); + break; + } + } +} diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 1e3558697219..b99096b8e827 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -674,25 +674,6 @@ static void nfs_umount_begin(struct super_block *sb) rpc_killall_tasks(rpc); } -/* - * Set the port number in an address. Be agnostic about the address family. - */ -static void nfs_set_port(struct sockaddr *sap, unsigned short port) -{ - switch (sap->sa_family) { - case AF_INET: { - struct sockaddr_in *ap = (struct sockaddr_in *)sap; - ap->sin_port = htons(port); - break; - } - case AF_INET6: { - struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; - ap->sin6_port = htons(port); - break; - } - } -} - /* * Sanity-check a server address provided by the mount command. * -- cgit v1.2.1 From ea31a4437c59219bf3ea946d58984b01a45a289c Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 20 Aug 2008 16:10:23 -0400 Subject: nfs: Fix misparsing of nfsv4 fs_locations attribute The code incorrectly assumes here that the server name (or ip address) is null-terminated. This can cause referrals to fail in some cases. Also support ipv6 addresses. Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 2 ++ fs/nfs/nfs4namespace.c | 44 ++++++++++++++++++-------------------------- fs/nfs/super.c | 4 +--- 3 files changed, 21 insertions(+), 29 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8d91bd88e310..5d2a5d3c4241 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -153,6 +153,7 @@ extern void nfs4_clear_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); /* super.c */ +void nfs_parse_ip_address(char *, size_t, struct sockaddr *, size_t *); extern struct file_system_type nfs_xdev_fs_type; #ifdef CONFIG_NFS_V4 extern struct file_system_type nfs4_xdev_fs_type; @@ -276,6 +277,7 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) PAGE_SIZE - 1) >> PAGE_SHIFT; } +#define IPV6_SCOPE_DELIMITER '%' /* * Set the port number in an address. Be agnostic about the address diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 6bcc5696f911..30befc39b3c6 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -93,50 +93,42 @@ static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, return 0; } -/* - * Check if the string represents a "valid" IPv4 address - */ -static inline int valid_ipaddr4(const char *buf) -{ - int rc, count, in[4]; - - rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]); - if (rc != 4) - return -EINVAL; - for (count = 0; count < 4; count++) { - if (in[count] > 255) - return -EINVAL; - } - return 0; -} - static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, char *page, char *page2, const struct nfs4_fs_location *location) { struct vfsmount *mnt = ERR_PTR(-ENOENT); char *mnt_path; + int page2len; unsigned int s; mnt_path = nfs4_pathname_string(&location->rootpath, page2, PAGE_SIZE); if (IS_ERR(mnt_path)) return mnt; mountdata->mnt_path = mnt_path; + page2 += strlen(mnt_path) + 1; + page2len = PAGE_SIZE - strlen(mnt_path) - 1; for (s = 0; s < location->nservers; s++) { - struct sockaddr_in addr = { - .sin_family = AF_INET, - .sin_port = htons(NFS_PORT), - }; + const struct nfs4_string *buf = &location->servers[s]; + struct sockaddr_storage addr; - if (location->servers[s].len <= 0 || - valid_ipaddr4(location->servers[s].data) < 0) + if (buf->len <= 0 || buf->len >= PAGE_SIZE) continue; - mountdata->hostname = location->servers[s].data; - addr.sin_addr.s_addr = in_aton(mountdata->hostname), mountdata->addr = (struct sockaddr *)&addr; - mountdata->addrlen = sizeof(addr); + + if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len)) + continue; + nfs_parse_ip_address(buf->data, buf->len, + mountdata->addr, &mountdata->addrlen); + if (mountdata->addr->sa_family == AF_UNSPEC) + continue; + nfs_set_port(mountdata->addr, NFS_PORT); + + strncpy(page2, buf->data, page2len); + page2[page2len] = '\0'; + mountdata->hostname = page2; snprintf(page, PAGE_SIZE, "%s:%s", mountdata->hostname, diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b99096b8e827..20dc4ccdff56 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -716,8 +716,6 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len, *addr_len = 0; } -#define IPV6_SCOPE_DELIMITER '%' - #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, const char *delim, @@ -790,7 +788,7 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, * If there is a problem constructing the new sockaddr, set the address * family to AF_UNSPEC. */ -static void nfs_parse_ip_address(char *string, size_t str_len, +void nfs_parse_ip_address(char *string, size_t str_len, struct sockaddr *sap, size_t *addr_len) { unsigned int i, colons; -- cgit v1.2.1 From 8491945f11c227400ef294d560f6d7aace76bc33 Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Fri, 11 Apr 2008 20:03:06 -0400 Subject: NFS: Client mounts hang when exported directory do not exist This patch fixes a regression that was introduced by the string based mounts. nfs_mount() statically returns -EACCES for every error returned by the remote mounted. This is incorrect because -EACCES is an non-fatal error to the mount.nfs command. This error causes mount.nfs to retry the mount even in the case when the exported directory does not exist. This patch maps the errors returned by the remote mountd into valid errno values, exactly how it was done pre-string based mounts. By returning the correct errno enables mount.nfs to do the right thing. Signed-off-by: Steve Dickson [Trond.Myklebust@netapp.com: nfs_stat_to_errno() now correctly returns negative errors, so remove the sign change.] Signed-off-by: Trond Myklebust --- fs/nfs/mount_clnt.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 779d2eb649c5..086a6830d785 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -14,6 +14,7 @@ #include #include #include +#include "internal.h" #ifdef RPC_DEBUG # define NFSDBG_FACILITY NFSDBG_MOUNT @@ -98,7 +99,7 @@ out_call_err: out_mnt_err: dprintk("NFS: MNT server returned result %d\n", result.status); - status = -EACCES; + status = nfs_stat_to_errno(result.status); goto out; } -- cgit v1.2.1 From d7fb120774f062ce7db439863ab5d4190d6f989c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 6 Oct 2008 20:08:56 -0400 Subject: NFS: Don't use range_cyclic for data integrity syncs It is more efficient to write linearly starting from the beginning of the file. Signed-off-by: Trond Myklebust --- fs/nfs/write.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 3229e217c773..9f9845859fc1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1427,8 +1427,9 @@ static int nfs_write_mapping(struct address_space *mapping, int how) .bdi = mapping->backing_dev_info, .sync_mode = WB_SYNC_NONE, .nr_to_write = LONG_MAX, + .range_start = 0, + .range_end = LLONG_MAX, .for_writepages = 1, - .range_cyclic = 1, }; int ret; -- cgit v1.2.1 From 03254e65a60d3113164672dbbadc023c4a56ecd1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 9 Oct 2008 13:27:55 -0400 Subject: NFS: Fix attribute updates This fixes a regression seen when running the Connectathon testsuite against an ext3 filesystem. The reason was that the inode was constantly being marked as 'just updated' by the jiffy wraparound test. This again meant that newer GETATTR calls were failing to pass the nfs_inode_attrs_need_update() test unless the changes caused a ctime update on the server, since they were perceived as having been started before the latest inode update. Given that nfs_inode_attrs_need_update() already checks for wraparound of nfsi->last_updated, we can drop the buggy "protection" in nfs_update_inode(). Also make a slight micro-optimisation of nfs_inode_attrs_need_update(): we are more often going to see time_after(fattr->time_start, nfsi->last_updated) be true, rather than seeing an update of ctime/size, so put that test first to ensure that we optimise away the ctime/size tests. Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index e25009f35cc2..6554281e24a2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -933,10 +933,10 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n { const struct nfs_inode *nfsi = NFS_I(inode); - return nfs_ctime_need_update(inode, fattr) || - nfs_size_need_update(inode, fattr) || - time_after(fattr->time_start, nfsi->last_updated) || - time_after(nfsi->last_updated, jiffies); + return time_after(fattr->time_start, nfsi->last_updated) || + nfs_ctime_need_update(inode, fattr) || + nfs_size_need_update(inode, fattr) || + time_after(nfsi->last_updated, jiffies); } static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) @@ -1167,11 +1167,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; } - /* - * Avoid jiffy wraparound issues with nfsi->last_updated - */ - if (!time_in_range(nfsi->last_updated, nfsi->read_cache_jiffies, now)) - nfsi->last_updated = nfsi->read_cache_jiffies; } invalid &= ~NFS_INO_INVALID_ATTR; /* Don't invalidate the data if we were to blame */ -- cgit v1.2.1 From 456018d791ff4ef03d610f72486c637056bcd749 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 8 Oct 2008 15:31:14 -0400 Subject: NFS: Cleanup nfs_set_port Signed-off-by: "J. Bruce Fields" Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 5d2a5d3c4241..d212ee41caf2 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -285,16 +285,15 @@ unsigned int nfs_page_array_len(unsigned int base, size_t len) */ static inline void nfs_set_port(struct sockaddr *sap, unsigned short port) { + struct sockaddr_in *ap = (struct sockaddr_in *)sap; + struct sockaddr_in6 *ap6 = (struct sockaddr_in6 *)sap; + switch (sap->sa_family) { - case AF_INET: { - struct sockaddr_in *ap = (struct sockaddr_in *)sap; - ap->sin_port = htons(port); - break; - } - case AF_INET6: { - struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap; - ap->sin6_port = htons(port); - break; - } + case AF_INET: + ap->sin_port = htons(port); + break; + case AF_INET6: + ap6->sin6_port = htons(port); + break; } } -- cgit v1.2.1 From 5e2e7721f04c11e6dc4a74b33f05a0e1c0381e2e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 8 Oct 2008 15:38:10 -0400 Subject: NFS: fix nfs_parse_ip_address() corner case Bruce observed that nfs_parse_ip_address() will successfully parse an IPv6 address that looks like this: "::1%" A scope delimiter is present, but there is no scope ID following it. This is harmless, as it would simply set the scope ID to zero. However, in some cases we would like to flag this as an improperly formed address. We are now also careful to reject addresses where garbage follows the address (up to the length of the string), instead of ignoring the non-address characters; and where the scope ID is nonsense (not a valid device name, but also not numeric). Before, both of these cases would result in a harmless zero scope ID. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 20dc4ccdff56..d496e4016224 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -717,17 +717,21 @@ static void nfs_parse_ipv4_address(char *string, size_t str_len, } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, - const char *delim, - struct sockaddr_in6 *sin6) +static int nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, + const char *delim, + struct sockaddr_in6 *sin6) { char *p; size_t len; - if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) - return ; + if ((string + str_len) == delim) + return 1; + if (*delim != IPV6_SCOPE_DELIMITER) - return; + return 0; + + if (!(ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)) + return 0; len = (string + str_len) - delim - 1; p = kstrndup(delim + 1, len, GFP_KERNEL); @@ -740,14 +744,20 @@ static void nfs_parse_ipv6_scope_id(const char *string, const size_t str_len, scope_id = dev->ifindex; dev_put(dev); } else { - /* scope_id is set to zero on error */ - strict_strtoul(p, 10, &scope_id); + if (strict_strtoul(p, 10, &scope_id) == 0) { + kfree(p); + return 0; + } } kfree(p); + sin6->sin6_scope_id = scope_id; dfprintk(MOUNT, "NFS: IPv6 scope ID = %lu\n", scope_id); + return 1; } + + return 0; } static void nfs_parse_ipv6_address(char *string, size_t str_len, @@ -763,9 +773,11 @@ static void nfs_parse_ipv6_address(char *string, size_t str_len, sin6->sin6_family = AF_INET6; *addr_len = sizeof(*sin6); - if (in6_pton(string, str_len, addr, IPV6_SCOPE_DELIMITER, &delim)) { - nfs_parse_ipv6_scope_id(string, str_len, delim, sin6); - return; + if (in6_pton(string, str_len, addr, + IPV6_SCOPE_DELIMITER, &delim) != 0) { + if (nfs_parse_ipv6_scope_id(string, str_len, + delim, sin6) != 0) + return; } } -- cgit v1.2.1 From 921615f111108258820226a3258a047d9bf1d96a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Oct 2008 19:23:07 -0400 Subject: NFS: Changes to inode->i_nlinks must set the NFS_INO_INVALID_ATTR flag Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 6554281e24a2..de3f11e6234e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1141,6 +1141,9 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_gid != fattr->gid) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + if (inode->i_nlink != fattr->nlink) + invalid |= NFS_INO_INVALID_ATTR; + inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; -- cgit v1.2.1 From 4704f0e274829e3af00737d2d9adace2d71a9605 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Oct 2008 19:16:07 -0400 Subject: NFS: Fix the resolution problem with nfs_inode_attrs_need_update() It appears that 'jiffies' timestamps do not have high enough resolution for nfs_inode_attrs_need_update(). One problem is that a GETATTR can be launched within < 1 jiffy of the last operation that updated the attribute. Another problem is that RPC calls can take < 1 jiffy to execute. We can fix this by switching the variables to use a simple global counter that gets incremented every time we start another GETATTR call. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 14 ++++++++++---- fs/nfs/inode.c | 37 ++++++++++++++++++++++++++++++------- 2 files changed, 40 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 49d565412827..4807074ada8c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -156,6 +156,7 @@ typedef struct { decode_dirent_t decode; int plus; unsigned long timestamp; + unsigned long gencount; int timestamp_valid; } nfs_readdir_descriptor_t; @@ -177,7 +178,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) struct file *file = desc->file; struct inode *inode = file->f_path.dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); - unsigned long timestamp; + unsigned long timestamp, gencount; int error; dfprintk(DIRCACHE, "NFS: %s: reading cookie %Lu into page %lu\n", @@ -186,6 +187,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) again: timestamp = jiffies; + gencount = nfs_inc_attr_generation_counter(); error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { @@ -199,6 +201,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page) goto error; } desc->timestamp = timestamp; + desc->gencount = gencount; desc->timestamp_valid = 1; SetPageUptodate(page); /* Ensure consistent page alignment of the data. @@ -224,9 +227,10 @@ int dir_decode(nfs_readdir_descriptor_t *desc) if (IS_ERR(p)) return PTR_ERR(p); desc->ptr = p; - if (desc->timestamp_valid) + if (desc->timestamp_valid) { desc->entry->fattr->time_start = desc->timestamp; - else + desc->entry->fattr->gencount = desc->gencount; + } else desc->entry->fattr->valid &= ~NFS_ATTR_FATTR; return 0; } @@ -471,7 +475,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, struct rpc_cred *cred = nfs_file_cred(file); struct page *page = NULL; int status; - unsigned long timestamp; + unsigned long timestamp, gencount; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); @@ -482,6 +486,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, goto out; } timestamp = jiffies; + gencount = nfs_inc_attr_generation_counter(); status = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, @@ -490,6 +495,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent, desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (status >= 0) { desc->timestamp = timestamp; + desc->gencount = gencount; desc->timestamp_valid = 1; if ((status = dir_decode(desc)) == 0) desc->entry->prev_cookie = *desc->dir_cookie; diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index de3f11e6234e..116a3bd2bc9b 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -305,7 +305,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) init_special_inode(inode, inode->i_mode, fattr->rdev); nfsi->read_cache_jiffies = fattr->time_start; - nfsi->last_updated = now; + nfsi->attr_gencount = fattr->gencount; nfsi->cache_change_attribute = now; inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; @@ -909,6 +909,30 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt return nfs_size_to_loff_t(fattr->size) > i_size_read(inode); } +static unsigned long nfs_attr_generation_counter; + +static unsigned long nfs_read_attr_generation_counter(void) +{ + smp_rmb(); + return nfs_attr_generation_counter; +} + +unsigned long nfs_inc_attr_generation_counter(void) +{ + unsigned long ret; + smp_rmb(); + ret = ++nfs_attr_generation_counter; + smp_wmb(); + return ret; +} + +void nfs_fattr_init(struct nfs_fattr *fattr) +{ + fattr->valid = 0; + fattr->time_start = jiffies; + fattr->gencount = nfs_inc_attr_generation_counter(); +} + /** * nfs_inode_attrs_need_update - check if the inode attributes need updating * @inode - pointer to inode @@ -922,8 +946,7 @@ static int nfs_size_need_update(const struct inode *inode, const struct nfs_fatt * catch the case where ctime either didn't change, or went backwards * (if someone reset the clock on the server) by looking at whether * or not this RPC call was started after the inode was last updated. - * Note also the check for jiffy wraparound if the last_updated timestamp - * is later than 'jiffies'. + * Note also the check for wraparound of 'attr_gencount' * * The function returns 'true' if it thinks the attributes in 'fattr' are * more recent than the ones cached in the inode. @@ -933,10 +956,10 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n { const struct nfs_inode *nfsi = NFS_I(inode); - return time_after(fattr->time_start, nfsi->last_updated) || + return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 || nfs_ctime_need_update(inode, fattr) || nfs_size_need_update(inode, fattr) || - time_after(nfsi->last_updated, jiffies); + ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0); } static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr) @@ -1107,7 +1130,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } /* If ctime has changed we should definitely clear access+acl caches */ if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) - invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } else if (nfsi->change_attr != fattr->change_attr) { dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); @@ -1163,7 +1186,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; - nfsi->last_updated = now; + nfsi->attr_gencount = nfs_inc_attr_generation_counter(); } else { if (!time_in_range(now, nfsi->attrtimeo_timestamp, nfsi->attrtimeo_timestamp + nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) -- cgit v1.2.1 From 011935a0a710c20bb7ae63523b78856848db1926 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 14 Oct 2008 19:24:50 -0400 Subject: NFS: Fix a resolution problem with nfs_inode->cache_change_attribute The cache_change_attribute is used to decide whether or not a directory has changed, in which case we may need to look it up again. Again, the use of 'jiffies' leads to an issue of resolution. Once again, the fix is to change nfs_inode->cache_change_attribute, and just make it a simple counter. Signed-off-by: Trond Myklebust --- fs/nfs/dir.c | 2 +- fs/nfs/inode.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4807074ada8c..2ab70d46ecbc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -661,7 +661,7 @@ static int nfs_fsync_dir(struct file *filp, struct dentry *dentry, int datasync) */ void nfs_force_lookup_revalidate(struct inode *dir) { - NFS_I(dir)->cache_change_attribute = jiffies; + NFS_I(dir)->cache_change_attribute++; } /* diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 116a3bd2bc9b..b9195c02a863 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -306,7 +306,6 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->read_cache_jiffies = fattr->time_start; nfsi->attr_gencount = fattr->gencount; - nfsi->cache_change_attribute = now; inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; inode->i_ctime = fattr->ctime; -- cgit v1.2.1