diff options
Diffstat (limited to 'fs')
42 files changed, 2381 insertions, 1470 deletions
diff --git a/fs/9p/cache.c b/fs/9p/cache.c index a9ea73d6dcf3..2b7a032c37bc 100644 --- a/fs/9p/cache.c +++ b/fs/9p/cache.c @@ -90,7 +90,7 @@ void v9fs_cache_session_get_cookie(struct v9fs_session_info *v9ses) v9ses->fscache = fscache_acquire_cookie(v9fs_cache_netfs.primary_index, &v9fs_cache_session_index_def, - v9ses); + v9ses, true); p9_debug(P9_DEBUG_FSC, "session %p get cookie %p\n", v9ses, v9ses->fscache); } @@ -204,7 +204,7 @@ void v9fs_cache_inode_get_cookie(struct inode *inode) v9ses = v9fs_inode2v9ses(inode); v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, &v9fs_cache_inode_index_def, - v9inode); + v9inode, true); p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n", inode, v9inode->fscache); @@ -271,7 +271,7 @@ void v9fs_cache_inode_reset_cookie(struct inode *inode) v9ses = v9fs_inode2v9ses(inode); v9inode->fscache = fscache_acquire_cookie(v9ses->fscache, &v9fs_cache_inode_index_def, - v9inode); + v9inode, true); p9_debug(P9_DEBUG_FSC, "inode %p revalidating cookie old %p new %p\n", inode, old, v9inode->fscache); diff --git a/fs/afs/cell.c b/fs/afs/cell.c index 3c090b7555ea..ca0a3cf93791 100644 --- a/fs/afs/cell.c +++ b/fs/afs/cell.c @@ -179,7 +179,7 @@ struct afs_cell *afs_cell_create(const char *name, unsigned namesz, /* put it up for caching (this never returns an error) */ cell->cache = fscache_acquire_cookie(afs_cache_netfs.primary_index, &afs_cell_cache_index_def, - cell); + cell, true); #endif /* add to the cell lists */ diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 789bc253b5f6..ce25d755b7aa 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -259,7 +259,7 @@ struct inode *afs_iget(struct super_block *sb, struct key *key, #ifdef CONFIG_AFS_FSCACHE vnode->cache = fscache_acquire_cookie(vnode->volume->cache, &afs_vnode_cache_index_def, - vnode); + vnode, true); #endif ret = afs_inode_map_status(vnode, key); diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 57bcb1596530..b6df2e83809f 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -308,7 +308,8 @@ static int afs_vlocation_fill_in_record(struct afs_vlocation *vl, /* see if we have an in-cache copy (will set vl->valid if there is) */ #ifdef CONFIG_AFS_FSCACHE vl->cache = fscache_acquire_cookie(vl->cell->cache, - &afs_vlocation_cache_index_def, vl); + &afs_vlocation_cache_index_def, vl, + true); #endif if (vl->valid) { diff --git a/fs/afs/volume.c b/fs/afs/volume.c index 401eeb21869f..2b607257820c 100644 --- a/fs/afs/volume.c +++ b/fs/afs/volume.c @@ -131,7 +131,7 @@ struct afs_volume *afs_volume_lookup(struct afs_mount_params *params) #ifdef CONFIG_AFS_FSCACHE volume->cache = fscache_acquire_cookie(vlocation->cache, &afs_volume_cache_index_def, - volume); + volume, true); #endif afs_get_vlocation(vlocation); volume->vlocation = vlocation; diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 43eb5592cdea..00baf1419989 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -270,7 +270,7 @@ static void cachefiles_drop_object(struct fscache_object *_object) #endif /* delete retired objects */ - if (test_bit(FSCACHE_COOKIE_RETIRED, &object->fscache.cookie->flags) && + if (test_bit(FSCACHE_OBJECT_RETIRED, &object->fscache.flags) && _object != cache->cache.fsdef ) { _debug("- retire object OBJ%x", object->fscache.debug_id); diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c index 6bfe65e0b038..7db2e6ca4b8f 100644 --- a/fs/ceph/cache.c +++ b/fs/ceph/cache.c @@ -68,7 +68,7 @@ int ceph_fscache_register_fs(struct ceph_fs_client* fsc) { fsc->fscache = fscache_acquire_cookie(ceph_cache_netfs.primary_index, &ceph_fscache_fsid_object_def, - fsc); + fsc, true); if (fsc->fscache == NULL) { pr_err("Unable to resgister fsid: %p fscache cookie", fsc); @@ -204,7 +204,7 @@ void ceph_fscache_register_inode_cookie(struct ceph_fs_client* fsc, ci->fscache = fscache_acquire_cookie(fsc->fscache, &ceph_fscache_inode_object_def, - ci); + ci, true); done: mutex_unlock(&inode->i_mutex); diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c index b3258f35e88a..8d4b7bc8ae91 100644 --- a/fs/cifs/fscache.c +++ b/fs/cifs/fscache.c @@ -27,7 +27,7 @@ void cifs_fscache_get_client_cookie(struct TCP_Server_Info *server) { server->fscache = fscache_acquire_cookie(cifs_fscache_netfs.primary_index, - &cifs_fscache_server_index_def, server); + &cifs_fscache_server_index_def, server, true); cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, server, server->fscache); } @@ -46,7 +46,7 @@ void cifs_fscache_get_super_cookie(struct cifs_tcon *tcon) tcon->fscache = fscache_acquire_cookie(server->fscache, - &cifs_fscache_super_index_def, tcon); + &cifs_fscache_super_index_def, tcon, true); cifs_dbg(FYI, "%s: (0x%p/0x%p)\n", __func__, server->fscache, tcon->fscache); } @@ -69,7 +69,7 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode) if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) { cifsi->fscache = fscache_acquire_cookie(tcon->fscache, - &cifs_fscache_inode_object_def, cifsi); + &cifs_fscache_inode_object_def, cifsi, true); cifs_dbg(FYI, "%s: got FH cookie (0x%p/0x%p)\n", __func__, tcon->fscache, cifsi->fscache); } @@ -119,7 +119,7 @@ void cifs_fscache_reset_inode_cookie(struct inode *inode) cifsi->fscache = fscache_acquire_cookie( cifs_sb_master_tcon(cifs_sb)->fscache, &cifs_fscache_inode_object_def, - cifsi); + cifsi, true); cifs_dbg(FYI, "%s: new cookie 0x%p oldcookie 0x%p\n", __func__, cifsi->fscache, old); } diff --git a/fs/dcache.c b/fs/dcache.c index 20532cb0b06e..ae6ebb88ceff 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -542,7 +542,7 @@ EXPORT_SYMBOL(d_drop); * If ref is non-zero, then decrement the refcount too. * Returns dentry requiring refcount drop, or NULL if we're done. */ -static inline struct dentry * +static struct dentry * dentry_kill(struct dentry *dentry, int unlock_on_failure) __releases(dentry->d_lock) { @@ -630,7 +630,8 @@ repeat: goto kill_it; } - dentry->d_flags |= DCACHE_REFERENCED; + if (!(dentry->d_flags & DCACHE_REFERENCED)) + dentry->d_flags |= DCACHE_REFERENCED; dentry_lru_add(dentry); dentry->d_lockref.count--; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 473e09da7d02..810c28fb8c3c 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -34,7 +34,6 @@ #include <linux/mutex.h> #include <linux/anon_inodes.h> #include <linux/device.h> -#include <linux/freezer.h> #include <asm/uaccess.h> #include <asm/io.h> #include <asm/mman.h> @@ -1605,8 +1604,7 @@ fetch_events: } spin_unlock_irqrestore(&ep->lock, flags); - if (!freezable_schedule_hrtimeout_range(to, slack, - HRTIMER_MODE_ABS)) + if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) timed_out = 1; spin_lock_irqsave(&ep->lock, flags); diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c index b2a86e324aac..29d7feb62cf7 100644 --- a/fs/fscache/cookie.c +++ b/fs/fscache/cookie.c @@ -58,15 +58,16 @@ void fscache_cookie_init_once(void *_cookie) struct fscache_cookie *__fscache_acquire_cookie( struct fscache_cookie *parent, const struct fscache_cookie_def *def, - void *netfs_data) + void *netfs_data, + bool enable) { struct fscache_cookie *cookie; BUG_ON(!def); - _enter("{%s},{%s},%p", + _enter("{%s},{%s},%p,%u", parent ? (char *) parent->def->name : "<no-parent>", - def->name, netfs_data); + def->name, netfs_data, enable); fscache_stat(&fscache_n_acquires); @@ -106,7 +107,7 @@ struct fscache_cookie *__fscache_acquire_cookie( cookie->def = def; cookie->parent = parent; cookie->netfs_data = netfs_data; - cookie->flags = 0; + cookie->flags = (1 << FSCACHE_COOKIE_NO_DATA_YET); /* radix tree insertion won't use the preallocation pool unless it's * told it may not wait */ @@ -124,16 +125,22 @@ struct fscache_cookie *__fscache_acquire_cookie( break; } - /* if the object is an index then we need do nothing more here - we - * create indices on disk when we need them as an index may exist in - * multiple caches */ - if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { - if (fscache_acquire_non_index_cookie(cookie) < 0) { - atomic_dec(&parent->n_children); - __fscache_cookie_put(cookie); - fscache_stat(&fscache_n_acquires_nobufs); - _leave(" = NULL"); - return NULL; + if (enable) { + /* if the object is an index then we need do nothing more here + * - we create indices on disk when we need them as an index + * may exist in multiple caches */ + if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + if (fscache_acquire_non_index_cookie(cookie) == 0) { + set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); + } else { + atomic_dec(&parent->n_children); + __fscache_cookie_put(cookie); + fscache_stat(&fscache_n_acquires_nobufs); + _leave(" = NULL"); + return NULL; + } + } else { + set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); } } @@ -144,6 +151,39 @@ struct fscache_cookie *__fscache_acquire_cookie( EXPORT_SYMBOL(__fscache_acquire_cookie); /* + * Enable a cookie to permit it to accept new operations. + */ +void __fscache_enable_cookie(struct fscache_cookie *cookie, + bool (*can_enable)(void *data), + void *data) +{ + _enter("%p", cookie); + + wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + + if (test_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) + goto out_unlock; + + if (can_enable && !can_enable(data)) { + /* The netfs decided it didn't want to enable after all */ + } else if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX) { + /* Wait for outstanding disablement to complete */ + __fscache_wait_on_invalidate(cookie); + + if (fscache_acquire_non_index_cookie(cookie) == 0) + set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); + } else { + set_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags); + } + +out_unlock: + clear_bit_unlock(FSCACHE_COOKIE_ENABLEMENT_LOCK, &cookie->flags); + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK); +} +EXPORT_SYMBOL(__fscache_enable_cookie); + +/* * acquire a non-index cookie * - this must make sure the index chain is instantiated and instantiate the * object representation too @@ -157,7 +197,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) _enter(""); - cookie->flags = 1 << FSCACHE_COOKIE_UNAVAILABLE; + set_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); /* now we need to see whether the backing objects for this cookie yet * exist, if not there'll be nothing to search */ @@ -180,9 +220,7 @@ static int fscache_acquire_non_index_cookie(struct fscache_cookie *cookie) _debug("cache %s", cache->tag->name); - cookie->flags = - (1 << FSCACHE_COOKIE_LOOKING_UP) | - (1 << FSCACHE_COOKIE_NO_DATA_YET); + set_bit(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); /* ask the cache to allocate objects for this cookie and its parent * chain */ @@ -398,7 +436,8 @@ void __fscache_invalidate(struct fscache_cookie *cookie) if (!hlist_empty(&cookie->backing_objects)) { spin_lock(&cookie->lock); - if (!hlist_empty(&cookie->backing_objects) && + if (fscache_cookie_enabled(cookie) && + !hlist_empty(&cookie->backing_objects) && !test_and_set_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) { object = hlist_entry(cookie->backing_objects.first, @@ -452,10 +491,14 @@ void __fscache_update_cookie(struct fscache_cookie *cookie) spin_lock(&cookie->lock); - /* update the index entry on disk in each cache backing this cookie */ - hlist_for_each_entry(object, - &cookie->backing_objects, cookie_link) { - fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); + if (fscache_cookie_enabled(cookie)) { + /* update the index entry on disk in each cache backing this + * cookie. + */ + hlist_for_each_entry(object, + &cookie->backing_objects, cookie_link) { + fscache_raise_event(object, FSCACHE_OBJECT_EV_UPDATE); + } } spin_unlock(&cookie->lock); @@ -464,28 +507,14 @@ void __fscache_update_cookie(struct fscache_cookie *cookie) EXPORT_SYMBOL(__fscache_update_cookie); /* - * release a cookie back to the cache - * - the object will be marked as recyclable on disk if retire is true - * - all dependents of this cookie must have already been unregistered - * (indices/files/pages) + * Disable a cookie to stop it from accepting new requests from the netfs. */ -void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) +void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate) { struct fscache_object *object; + bool awaken = false; - fscache_stat(&fscache_n_relinquishes); - if (retire) - fscache_stat(&fscache_n_relinquishes_retire); - - if (!cookie) { - fscache_stat(&fscache_n_relinquishes_null); - _leave(" [no cookie]"); - return; - } - - _enter("%p{%s,%p,%d},%d", - cookie, cookie->def->name, cookie->netfs_data, - atomic_read(&cookie->n_active), retire); + _enter("%p,%u", cookie, invalidate); ASSERTCMP(atomic_read(&cookie->n_active), >, 0); @@ -495,24 +524,82 @@ void __fscache_relinquish_cookie(struct fscache_cookie *cookie, int retire) BUG(); } - /* No further netfs-accessing operations on this cookie permitted */ - set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags); - if (retire) - set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); + wait_on_bit_lock(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK, + fscache_wait_bit, TASK_UNINTERRUPTIBLE); + if (!test_and_clear_bit(FSCACHE_COOKIE_ENABLED, &cookie->flags)) + goto out_unlock_enable; + + /* If the cookie is being invalidated, wait for that to complete first + * so that we can reuse the flag. + */ + __fscache_wait_on_invalidate(cookie); + + /* Dispose of the backing objects */ + set_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags); spin_lock(&cookie->lock); - hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { - fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); + if (!hlist_empty(&cookie->backing_objects)) { + hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) { + if (invalidate) + set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); + fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL); + } + } else { + if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) + awaken = true; } spin_unlock(&cookie->lock); + if (awaken) + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_INVALIDATING); /* Wait for cessation of activity requiring access to the netfs (when - * n_active reaches 0). + * n_active reaches 0). This makes sure outstanding reads and writes + * have completed. */ if (!atomic_dec_and_test(&cookie->n_active)) wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t, TASK_UNINTERRUPTIBLE); + /* Reset the cookie state if it wasn't relinquished */ + if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) { + atomic_inc(&cookie->n_active); + set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + } + +out_unlock_enable: + clear_bit_unlock(FSCACHE_COOKIE_ENABLEMENT_LOCK, &cookie->flags); + wake_up_bit(&cookie->flags, FSCACHE_COOKIE_ENABLEMENT_LOCK); + _leave(""); +} +EXPORT_SYMBOL(__fscache_disable_cookie); + +/* + * release a cookie back to the cache + * - the object will be marked as recyclable on disk if retire is true + * - all dependents of this cookie must have already been unregistered + * (indices/files/pages) + */ +void __fscache_relinquish_cookie(struct fscache_cookie *cookie, bool retire) +{ + fscache_stat(&fscache_n_relinquishes); + if (retire) + fscache_stat(&fscache_n_relinquishes_retire); + + if (!cookie) { + fscache_stat(&fscache_n_relinquishes_null); + _leave(" [no cookie]"); + return; + } + + _enter("%p{%s,%p,%d},%d", + cookie, cookie->def->name, cookie->netfs_data, + atomic_read(&cookie->n_active), retire); + + /* No further netfs-accessing operations on this cookie permitted */ + set_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags); + + __fscache_disable_cookie(cookie, retire); + /* Clear pointers back to the netfs */ cookie->netfs_data = NULL; cookie->def = NULL; @@ -568,6 +655,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) { struct fscache_operation *op; struct fscache_object *object; + bool wake_cookie = false; int ret; _enter("%p,", cookie); @@ -591,7 +679,8 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) spin_lock(&cookie->lock); - if (hlist_empty(&cookie->backing_objects)) + if (!fscache_cookie_enabled(cookie) || + hlist_empty(&cookie->backing_objects)) goto inconsistent; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); @@ -600,7 +689,7 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) op->debug_id = atomic_inc_return(&fscache_op_debug_id); - atomic_inc(&cookie->n_active); + __fscache_use_cookie(cookie); if (fscache_submit_op(object, op) < 0) goto submit_failed; @@ -622,9 +711,11 @@ int __fscache_check_consistency(struct fscache_cookie *cookie) return ret; submit_failed: - atomic_dec(&cookie->n_active); + wake_cookie = __fscache_unuse_cookie(cookie); inconsistent: spin_unlock(&cookie->lock); + if (wake_cookie) + __fscache_wake_unused_cookie(cookie); kfree(op); _leave(" = -ESTALE"); return -ESTALE; diff --git a/fs/fscache/fsdef.c b/fs/fscache/fsdef.c index 10a2ade0bdf8..5a117df2a9ef 100644 --- a/fs/fscache/fsdef.c +++ b/fs/fscache/fsdef.c @@ -59,6 +59,7 @@ struct fscache_cookie fscache_fsdef_index = { .lock = __SPIN_LOCK_UNLOCKED(fscache_fsdef_index.lock), .backing_objects = HLIST_HEAD_INIT, .def = &fscache_fsdef_index_def, + .flags = 1 << FSCACHE_COOKIE_ENABLED, }; EXPORT_SYMBOL(fscache_fsdef_index); diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c index b1bb6117473a..989f39401547 100644 --- a/fs/fscache/netfs.c +++ b/fs/fscache/netfs.c @@ -45,6 +45,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs) netfs->primary_index->def = &fscache_fsdef_netfs_def; netfs->primary_index->parent = &fscache_fsdef_index; netfs->primary_index->netfs_data = netfs; + netfs->primary_index->flags = 1 << FSCACHE_COOKIE_ENABLED; atomic_inc(&netfs->primary_index->parent->usage); atomic_inc(&netfs->primary_index->parent->n_children); diff --git a/fs/fscache/object.c b/fs/fscache/object.c index 86d75a60b20c..dcb821617774 100644 --- a/fs/fscache/object.c +++ b/fs/fscache/object.c @@ -495,6 +495,7 @@ void fscache_object_lookup_negative(struct fscache_object *object) * returning ENODATA. */ set_bit(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); _debug("wake up lookup %p", &cookie->flags); clear_bit_unlock(FSCACHE_COOKIE_LOOKING_UP, &cookie->flags); @@ -527,6 +528,7 @@ void fscache_obtained_object(struct fscache_object *object) /* We do (presumably) have data */ clear_bit_unlock(FSCACHE_COOKIE_NO_DATA_YET, &cookie->flags); + clear_bit(FSCACHE_COOKIE_UNAVAILABLE, &cookie->flags); /* Allow write requests to begin stacking up and read requests * to begin shovelling data. @@ -679,7 +681,8 @@ static const struct fscache_state *fscache_drop_object(struct fscache_object *ob */ spin_lock(&cookie->lock); hlist_del_init(&object->cookie_link); - if (test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) + if (hlist_empty(&cookie->backing_objects) && + test_and_clear_bit(FSCACHE_COOKIE_INVALIDATING, &cookie->flags)) awaken = true; spin_unlock(&cookie->lock); @@ -927,7 +930,7 @@ static const struct fscache_state *_fscache_invalidate_object(struct fscache_obj */ if (!fscache_use_cookie(object)) { ASSERT(object->cookie->stores.rnode == NULL); - set_bit(FSCACHE_COOKIE_RETIRED, &cookie->flags); + set_bit(FSCACHE_OBJECT_RETIRED, &object->flags); _leave(" [no cookie]"); return transit_to(KILL_OBJECT); } diff --git a/fs/fscache/page.c b/fs/fscache/page.c index 73899c1c3449..7f5c658af755 100644 --- a/fs/fscache/page.c +++ b/fs/fscache/page.c @@ -163,12 +163,10 @@ static void fscache_attr_changed_op(struct fscache_operation *op) fscache_stat(&fscache_n_attr_changed_calls); - if (fscache_object_is_active(object) && - fscache_use_cookie(object)) { + if (fscache_object_is_active(object)) { fscache_stat(&fscache_n_cop_attr_changed); ret = object->cache->ops->attr_changed(object); fscache_stat_d(&fscache_n_cop_attr_changed); - fscache_unuse_cookie(object); if (ret < 0) fscache_abort_object(object); } @@ -184,6 +182,7 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) { struct fscache_operation *op; struct fscache_object *object; + bool wake_cookie; _enter("%p", cookie); @@ -199,15 +198,19 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) } fscache_operation_init(op, fscache_attr_changed_op, NULL); - op->flags = FSCACHE_OP_ASYNC | (1 << FSCACHE_OP_EXCLUSIVE); + op->flags = FSCACHE_OP_ASYNC | + (1 << FSCACHE_OP_EXCLUSIVE) | + (1 << FSCACHE_OP_UNUSE_COOKIE); spin_lock(&cookie->lock); - if (hlist_empty(&cookie->backing_objects)) + if (!fscache_cookie_enabled(cookie) || + hlist_empty(&cookie->backing_objects)) goto nobufs; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); + __fscache_use_cookie(cookie); if (fscache_submit_exclusive_op(object, op) < 0) goto nobufs; spin_unlock(&cookie->lock); @@ -217,8 +220,11 @@ int __fscache_attr_changed(struct fscache_cookie *cookie) return 0; nobufs: + wake_cookie = __fscache_unuse_cookie(cookie); spin_unlock(&cookie->lock); kfree(op); + if (wake_cookie) + __fscache_wake_unused_cookie(cookie); fscache_stat(&fscache_n_attr_changed_nobufs); _leave(" = %d", -ENOBUFS); return -ENOBUFS; @@ -263,7 +269,6 @@ static struct fscache_retrieval *fscache_alloc_retrieval( } fscache_operation_init(&op->op, NULL, fscache_release_retrieval_op); - atomic_inc(&cookie->n_active); op->op.flags = FSCACHE_OP_MYTHREAD | (1UL << FSCACHE_OP_WAITING) | (1UL << FSCACHE_OP_UNUSE_COOKIE); @@ -384,6 +389,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, { struct fscache_retrieval *op; struct fscache_object *object; + bool wake_cookie = false; int ret; _enter("%p,%p,,,", cookie, page); @@ -405,7 +411,7 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, return -ERESTARTSYS; op = fscache_alloc_retrieval(cookie, page->mapping, - end_io_func,context); + end_io_func, context); if (!op) { _leave(" = -ENOMEM"); return -ENOMEM; @@ -414,13 +420,15 @@ int __fscache_read_or_alloc_page(struct fscache_cookie *cookie, spin_lock(&cookie->lock); - if (hlist_empty(&cookie->backing_objects)) + if (!fscache_cookie_enabled(cookie) || + hlist_empty(&cookie->backing_objects)) goto nobufs_unlock; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); ASSERT(test_bit(FSCACHE_OBJECT_IS_LOOKED_UP, &object->flags)); + __fscache_use_cookie(cookie); atomic_inc(&object->n_reads); __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); @@ -475,9 +483,11 @@ error: nobufs_unlock_dec: atomic_dec(&object->n_reads); + wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - atomic_dec(&cookie->n_active); + if (wake_cookie) + __fscache_wake_unused_cookie(cookie); kfree(op); nobufs: fscache_stat(&fscache_n_retrievals_nobufs); @@ -514,6 +524,7 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, { struct fscache_retrieval *op; struct fscache_object *object; + bool wake_cookie = false; int ret; _enter("%p,,%d,,,", cookie, *nr_pages); @@ -542,11 +553,13 @@ int __fscache_read_or_alloc_pages(struct fscache_cookie *cookie, spin_lock(&cookie->lock); - if (hlist_empty(&cookie->backing_objects)) + if (!fscache_cookie_enabled(cookie) || + hlist_empty(&cookie->backing_objects)) goto nobufs_unlock; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); + __fscache_use_cookie(cookie); atomic_inc(&object->n_reads); __set_bit(FSCACHE_OP_DEC_READ_CNT, &op->op.flags); @@ -601,10 +614,12 @@ error: nobufs_unlock_dec: atomic_dec(&object->n_reads); + wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - atomic_dec(&cookie->n_active); kfree(op); + if (wake_cookie) + __fscache_wake_unused_cookie(cookie); nobufs: fscache_stat(&fscache_n_retrievals_nobufs); _leave(" = -ENOBUFS"); @@ -626,6 +641,7 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, { struct fscache_retrieval *op; struct fscache_object *object; + bool wake_cookie = false; int ret; _enter("%p,%p,,,", cookie, page); @@ -653,13 +669,15 @@ int __fscache_alloc_page(struct fscache_cookie *cookie, spin_lock(&cookie->lock); - if (hlist_empty(&cookie->backing_objects)) + if (!fscache_cookie_enabled(cookie) || + hlist_empty(&cookie->backing_objects)) goto nobufs_unlock; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); + __fscache_use_cookie(cookie); if (fscache_submit_op(object, &op->op) < 0) - goto nobufs_unlock; + goto nobufs_unlock_dec; spin_unlock(&cookie->lock); fscache_stat(&fscache_n_alloc_ops); @@ -689,10 +707,13 @@ error: _leave(" = %d", ret); return ret; +nobufs_unlock_dec: + wake_cookie = __fscache_unuse_cookie(cookie); nobufs_unlock: spin_unlock(&cookie->lock); - atomic_dec(&cookie->n_active); kfree(op); + if (wake_cookie) + __fscache_wake_unused_cookie(cookie); nobufs: fscache_stat(&fscache_n_allocs_nobufs); _leave(" = -ENOBUFS"); @@ -889,6 +910,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, { struct fscache_storage *op; struct fscache_object *object; + bool wake_cookie = false; int ret; _enter("%p,%x,", cookie, (u32) page->flags); @@ -920,7 +942,8 @@ int __fscache_write_page(struct fscache_cookie *cookie, ret = -ENOBUFS; spin_lock(&cookie->lock); - if (hlist_empty(&cookie->backing_objects)) + if (!fscache_cookie_enabled(cookie) || + hlist_empty(&cookie->backing_objects)) goto nobufs; object = hlist_entry(cookie->backing_objects.first, struct fscache_object, cookie_link); @@ -957,7 +980,7 @@ int __fscache_write_page(struct fscache_cookie *cookie, op->op.debug_id = atomic_inc_return(&fscache_op_debug_id); op->store_limit = object->store_limit; - atomic_inc(&cookie->n_active); + __fscache_use_cookie(cookie); if (fscache_submit_op(object, &op->op) < 0) goto submit_failed; @@ -984,10 +1007,10 @@ already_pending: return 0; submit_failed: - atomic_dec(&cookie->n_active); spin_lock(&cookie->stores_lock); radix_tree_delete(&cookie->stores, page->index); spin_unlock(&cookie->stores_lock); + wake_cookie = __fscache_unuse_cookie(cookie); page_cache_release(page); ret = -ENOBUFS; goto nobufs; @@ -999,6 +1022,8 @@ nobufs: spin_unlock(&cookie->lock); radix_tree_preload_end(); kfree(op); + if (wake_cookie) + __fscache_wake_unused_cookie(cookie); fscache_stat(&fscache_n_stores_nobufs); _leave(" = -ENOBUFS"); return -ENOBUFS; diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index b5e80b0af315..38c1768b4142 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -140,6 +140,17 @@ config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN If the NFS client is unchanged from the upstream kernel, this option should be set to the default "kernel.org". +config NFS_V4_1_MIGRATION + bool "NFSv4.1 client support for migration" + depends on NFS_V4_1 + default n + help + This option makes the NFS client advertise to NFSv4.1 servers that + it can support NFSv4 migration. + + The NFSv4.1 pieces of the Linux NFSv4 migration implementation are + still experimental. If you are not an NFSv4 developer, say N here. + config NFS_V4_SECURITY_LABEL bool depends on NFS_V4_2 && SECURITY diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 67cd73213168..073b4cf67ed9 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -164,8 +164,7 @@ nfs41_callback_up(struct svc_serv *serv) svc_xprt_put(serv->sv_bc_xprt); serv->sv_bc_xprt = NULL; } - dprintk("--> %s return %ld\n", __func__, - IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); + dprintk("--> %s return %d\n", __func__, PTR_ERR_OR_ZERO(rqstp)); return rqstp; } diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 2dceee4db076..1d09289c8f0e 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -590,6 +590,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_DISCRTRY; + if (test_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags)) + args.flags |= RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT; if (test_bit(NFS_CS_NORESVPORT, &clp->cl_flags)) args.flags |= RPC_CLNT_CREATE_NONPRIVPORT; if (test_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags)) @@ -784,8 +786,10 @@ static int nfs_init_server(struct nfs_server *server, goto error; server->port = data->nfs_server.port; + server->auth_info = data->auth_info; - error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]); + error = nfs_init_server_rpcclient(server, &timeparms, + data->selected_flavor); if (error < 0) goto error; @@ -926,6 +930,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->acdirmax = source->acdirmax; target->caps = source->caps; target->options = source->options; + target->auth_info = source->auth_info; } EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); @@ -943,7 +948,7 @@ void nfs_server_insert_lists(struct nfs_server *server) } EXPORT_SYMBOL_GPL(nfs_server_insert_lists); -static void nfs_server_remove_lists(struct nfs_server *server) +void nfs_server_remove_lists(struct nfs_server *server) { struct nfs_client *clp = server->nfs_client; struct nfs_net *nn; @@ -960,6 +965,7 @@ static void nfs_server_remove_lists(struct nfs_server *server) synchronize_rcu(); } +EXPORT_SYMBOL_GPL(nfs_server_remove_lists); /* * Allocate and initialise a server record diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 02b0df769e2d..9a8676f33350 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1139,7 +1139,13 @@ out_zap_parent: if (inode && S_ISDIR(inode->i_mode)) { /* Purge readdir caches. */ nfs_zap_caches(inode); - if (dentry->d_flags & DCACHE_DISCONNECTED) + /* + * We can't d_drop the root of a disconnected tree: + * its d_hash is on the s_anon list and d_drop() would hide + * it from shrink_dcache_for_unmount(), leading to busy + * inodes on unmount and further oopses. + */ + if (IS_ROOT(dentry)) goto out_valid; } /* If we have submounts, don't unhash ! */ @@ -1381,7 +1387,7 @@ static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, i static int do_open(struct inode *inode, struct file *filp) { - nfs_fscache_set_inode_cookie(inode, filp); + nfs_fscache_open_file(inode, filp); return 0; } diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c index 24d1d1c5fcaf..3ef01f0ba0bc 100644 --- a/fs/nfs/fscache.c +++ b/fs/nfs/fscache.c @@ -39,7 +39,7 @@ void nfs_fscache_get_client_cookie(struct nfs_client *clp) /* create a cache index for looking up filehandles */ clp->fscache = fscache_acquire_cookie(nfs_fscache_netfs.primary_index, &nfs_fscache_server_index_def, - clp); + clp, true); dfprintk(FSCACHE, "NFS: get client cookie (0x%p/0x%p)\n", clp, clp->fscache); } @@ -139,7 +139,7 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int /* create a cache index for looking up filehandles */ nfss->fscache = fscache_acquire_cookie(nfss->nfs_client->fscache, &nfs_fscache_super_index_def, - nfss); + nfss, true); dfprintk(FSCACHE, "NFS: get superblock cookie (0x%p/0x%p)\n", nfss, nfss->fscache); return; @@ -178,163 +178,79 @@ void nfs_fscache_release_super_cookie(struct super_block *sb) /* * Initialise the per-inode cache cookie pointer for an NFS inode. */ -void nfs_fscache_init_inode_cookie(struct inode *inode) +void nfs_fscache_init_inode(struct inode *inode) { - NFS_I(inode)->fscache = NULL; - if (S_ISREG(inode->i_mode)) - set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); -} - -/* - * Get the per-inode cache cookie for an NFS inode. - */ -static void nfs_fscache_enable_inode_cookie(struct inode *inode) -{ - struct super_block *sb = inode->i_sb; struct nfs_inode *nfsi = NFS_I(inode); - if (nfsi->fscache || !NFS_FSCACHE(inode)) + nfsi->fscache = NULL; + if (!S_ISREG(inode->i_mode)) return; - - if ((NFS_SB(sb)->options & NFS_OPTION_FSCACHE)) { - nfsi->fscache = fscache_acquire_cookie( - NFS_SB(sb)->fscache, - &nfs_fscache_inode_object_def, - nfsi); - - dfprintk(FSCACHE, "NFS: get FH cookie (0x%p/0x%p/0x%p)\n", - sb, nfsi, nfsi->fscache); - } + nfsi->fscache = fscache_acquire_cookie(NFS_SB(inode->i_sb)->fscache, + &nfs_fscache_inode_object_def, + nfsi, false); } /* * Release a per-inode cookie. */ -void nfs_fscache_release_inode_cookie(struct inode *inode) +void nfs_fscache_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); + struct fscache_cookie *cookie = nfs_i_fscache(inode); - dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", - nfsi, nfsi->fscache); + dfprintk(FSCACHE, "NFS: clear cookie (0x%p/0x%p)\n", nfsi, cookie); - fscache_relinquish_cookie(nfsi->fscache, 0); + fscache_relinquish_cookie(cookie, false); nfsi->fscache = NULL; } -/* - * Retire a per-inode cookie, destroying the data attached to it. - */ -void nfs_fscache_zap_inode_cookie(struct inode *inode) +static bool nfs_fscache_can_enable(void *data) { - struct nfs_inode *nfsi = NFS_I(inode); + struct inode *inode = data; - dfprintk(FSCACHE, "NFS: zapping cookie (0x%p/0x%p)\n", - nfsi, nfsi->fscache); - - fscache_relinquish_cookie(nfsi->fscache, 1); - nfsi->fscache = NULL; + return !inode_is_open_for_write(inode); } /* - * Turn off the cache with regard to a per-inode cookie if opened for writing, - * invalidating all the pages in the page cache relating to the associated - * inode to clear the per-page caching. - */ -static void nfs_fscache_disable_inode_cookie(struct inode *inode) -{ - clear_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); - - if (NFS_I(inode)->fscache) { - dfprintk(FSCACHE, - "NFS: nfsi 0x%p turning cache off\n", NFS_I(inode)); - - /* Need to uncache any pages attached to this inode that - * fscache knows about before turning off the cache. - */ - fscache_uncache_all_inode_pages(NFS_I(inode)->fscache, inode); - nfs_fscache_zap_inode_cookie(inode); - } -} - -/* - * wait_on_bit() sleep function for uninterruptible waiting - */ -static int nfs_fscache_wait_bit(void *flags) -{ - schedule(); - return 0; -} - -/* - * Lock against someone else trying to also acquire or relinquish a cookie - */ -static inline void nfs_fscache_inode_lock(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - while (test_and_set_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags)) - wait_on_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK, - nfs_fscache_wait_bit, TASK_UNINTERRUPTIBLE); -} - -/* - * Unlock cookie management lock - */ -static inline void nfs_fscache_inode_unlock(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - smp_mb__before_clear_bit(); - clear_bit(NFS_INO_FSCACHE_LOCK, &nfsi->flags); - smp_mb__after_clear_bit(); - wake_up_bit(&nfsi->flags, NFS_INO_FSCACHE_LOCK); -} - -/* - * Decide if we should enable or disable local caching for this inode. - * - For now, with NFS, only regular files that are open read-only will be able - * to use the cache. - * - May be invoked multiple times in parallel by parallel nfs_open() functions. - */ -void nfs_fscache_set_inode_cookie(struct inode *inode, struct file *filp) -{ - if (NFS_FSCACHE(inode)) { - nfs_fscache_inode_lock(inode); - if ((filp->f_flags & O_ACCMODE) != O_RDONLY) - nfs_fscache_disable_inode_cookie(inode); - else - nfs_fscache_enable_inode_cookie(inode); - nfs_fscache_inode_unlock(inode); - } -} -EXPORT_SYMBOL_GPL(nfs_fscache_set_inode_cookie); - -/* - * Replace a per-inode cookie due to revalidation detecting a file having - * changed on the server. + * Enable or disable caching for a file that is being opened as appropriate. + * The cookie is allocated when the inode is initialised, but is not enabled at + * that time. Enablement is deferred to file-open time to avoid stat() and + * access() thrashing the cache. + * + * For now, with NFS, only regular files that are open read-only will be able + * to use the cache. + * + * We enable the cache for an inode if we open it read-only and it isn't + * currently open for writing. We disable the cache if the inode is open + * write-only. + * + * The caller uses the file struct to pin i_writecount on the inode before + * calling us when a file is opened for writing, so we can make use of that. + * + * Note that this may be invoked multiple times in parallel by parallel + * nfs_open() functions. */ -void nfs_fscache_reset_inode_cookie(struct inode *inode) +void nfs_fscache_open_file(struct inode *inode, struct file *filp) { struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_server *nfss = NFS_SERVER(inode); - NFS_IFDEBUG(struct fscache_cookie *old = nfsi->fscache); + struct fscache_cookie *cookie = nfs_i_fscache(inode); - nfs_fscache_inode_lock(inode); - if (nfsi->fscache) { - /* retire the current fscache cache and get a new one */ - fscache_relinquish_cookie(nfsi->fscache, 1); - - nfsi->fscache = fscache_acquire_cookie( - nfss->nfs_client->fscache, - &nfs_fscache_inode_object_def, - nfsi); + if (!fscache_cookie_valid(cookie)) + return; - dfprintk(FSCACHE, - "NFS: revalidation new cookie (0x%p/0x%p/0x%p/0x%p)\n", - nfss, nfsi, old, nfsi->fscache); + if (inode_is_open_for_write(inode)) { + dfprintk(FSCACHE, "NFS: nfsi 0x%p disabling cache\n", nfsi); + clear_bit(NFS_INO_FSCACHE, &nfsi->flags); + fscache_disable_cookie(cookie, true); + fscache_uncache_all_inode_pages(cookie, inode); + } else { + dfprintk(FSCACHE, "NFS: nfsi 0x%p enabling cache\n", nfsi); + fscache_enable_cookie(cookie, nfs_fscache_can_enable, inode); + if (fscache_cookie_enabled(cookie)) + set_bit(NFS_INO_FSCACHE, &NFS_I(inode)->flags); } - nfs_fscache_inode_unlock(inode); } +EXPORT_SYMBOL_GPL(nfs_fscache_open_file); /* * Release the caching state associated with a page, if the page isn't busy @@ -344,12 +260,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode) int nfs_fscache_release_page(struct page *page, gfp_t gfp) { if (PageFsCache(page)) { - struct nfs_inode *nfsi = NFS_I(page->mapping->host); - struct fscache_cookie *cookie = nfsi->fscache; + struct fscache_cookie *cookie = nfs_i_fscache(page->mapping->host); BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n", - cookie, page, nfsi); + cookie, page, NFS_I(page->mapping->host)); if (!fscache_maybe_release_page(cookie, page, gfp)) return 0; @@ -367,13 +282,12 @@ int nfs_fscache_release_page(struct page *page, gfp_t gfp) */ void __nfs_fscache_invalidate_page(struct page *page, struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct fscache_cookie *cookie = nfsi->fscache; + struct fscache_cookie *cookie = nfs_i_fscache(inode); BUG_ON(!cookie); dfprintk(FSCACHE, "NFS: fscache invalidatepage (0x%p/0x%p/0x%p)\n", - cookie, page, nfsi); + cookie, page, NFS_I(inode)); fscache_wait_on_page_write(cookie, page); @@ -417,9 +331,9 @@ int __nfs_readpage_from_fscache(struct nfs_open_context *ctx, dfprintk(FSCACHE, "NFS: readpage_from_fscache(fsc:%p/p:%p(i:%lx f:%lx)/0x%p)\n", - NFS_I(inode)->fscache, page, page->index, page->flags, inode); + nfs_i_fscache(inode), page, page->index, page->flags, inode); - ret = fscache_read_or_alloc_page(NFS_I(inode)->fscache, + ret = fscache_read_or_alloc_page(nfs_i_fscache(inode), page, nfs_readpage_from_fscache_complete, ctx, @@ -459,9 +373,9 @@ int __nfs_readpages_from_fscache(struct nfs_open_context *ctx, int ret; dfprintk(FSCACHE, "NFS: nfs_getpages_from_fscache (0x%p/%u/0x%p)\n", - NFS_I(inode)->fscache, npages, inode); + nfs_i_fscache(inode), npages, inode); - ret = fscache_read_or_alloc_pages(NFS_I(inode)->fscache, + ret = fscache_read_or_alloc_pages(nfs_i_fscache(inode), mapping, pages, nr_pages, nfs_readpage_from_fscache_complete, ctx, @@ -506,15 +420,15 @@ void __nfs_readpage_to_fscache(struct inode *inode, struct page *page, int sync) dfprintk(FSCACHE, "NFS: readpage_to_fscache(fsc:%p/p:%p(i:%lx f:%lx)/%d)\n", - NFS_I(inode)->fscache, page, page->index, page->flags, sync); + nfs_i_fscache(inode), page, page->index, page->flags, sync); - ret = fscache_write_page(NFS_I(inode)->fscache, page, GFP_KERNEL); + ret = fscache_write_page(nfs_i_fscache(inode), page, GFP_KERNEL); dfprintk(FSCACHE, "NFS: readpage_to_fscache: p:%p(i:%lu f:%lx) ret %d\n", page, page->index, page->flags, ret); if (ret != 0) { - fscache_uncache_page(NFS_I(inode)->fscache, page); + fscache_uncache_page(nfs_i_fscache(inode), page); nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_WRITTEN_FAIL, 1); nfs_add_fscache_stats(inode, NFSIOS_FSCACHE_PAGES_UNCACHED, 1); diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 4ecb76652eba..d7fe3e799f2f 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -76,11 +76,9 @@ extern void nfs_fscache_release_client_cookie(struct nfs_client *); extern void nfs_fscache_get_super_cookie(struct super_block *, const char *, int); extern void nfs_fscache_release_super_cookie(struct super_block *); -extern void nfs_fscache_init_inode_cookie(struct inode *); -extern void nfs_fscache_release_inode_cookie(struct inode *); -extern void nfs_fscache_zap_inode_cookie(struct inode *); -extern void nfs_fscache_set_inode_cookie(struct inode *, struct file *); -extern void nfs_fscache_reset_inode_cookie(struct inode *); +extern void nfs_fscache_init_inode(struct inode *); +extern void nfs_fscache_clear_inode(struct inode *); +extern void nfs_fscache_open_file(struct inode *, struct file *); extern void __nfs_fscache_invalidate_page(struct page *, struct inode *); extern int nfs_fscache_release_page(struct page *, gfp_t); @@ -187,12 +185,10 @@ static inline void nfs_fscache_release_client_cookie(struct nfs_client *clp) {} static inline void nfs_fscache_release_super_cookie(struct super_block *sb) {} -static inline void nfs_fscache_init_inode_cookie(struct inode *inode) {} -static inline void nfs_fscache_release_inode_cookie(struct inode *inode) {} -static inline void nfs_fscache_zap_inode_cookie(struct inode *inode) {} -static inline void nfs_fscache_set_inode_cookie(struct inode *inode, - struct file *filp) {} -static inline void nfs_fscache_reset_inode_cookie(struct inode *inode) {} +static inline void nfs_fscache_init_inode(struct inode *inode) {} +static inline void nfs_fscache_clear_inode(struct inode *inode) {} +static inline void nfs_fscache_open_file(struct inode *inode, + struct file *filp) {} static inline int nfs_fscache_release_page(struct page *page, gfp_t gfp) { diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index eda8879171c4..18ab2da4eeb6 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -122,7 +122,7 @@ void nfs_clear_inode(struct inode *inode) WARN_ON_ONCE(!list_empty(&NFS_I(inode)->open_files)); nfs_zap_acl_cache(inode); nfs_access_zap_cache(inode); - nfs_fscache_release_inode_cookie(inode); + nfs_fscache_clear_inode(inode); } EXPORT_SYMBOL_GPL(nfs_clear_inode); @@ -274,12 +274,6 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, if (label == NULL) return; - if (nfs_server_capable(inode, NFS_CAP_SECURITY_LABEL) == 0) - return; - - if (NFS_SERVER(inode)->nfs_client->cl_minorversion < 2) - return; - if ((fattr->valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL) && inode->i_security) { error = security_inode_notifysecctx(inode, label->label, label->len); @@ -459,7 +453,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr, st nfsi->attrtimeo_timestamp = now; nfsi->access_cache = RB_ROOT; - nfs_fscache_init_inode_cookie(inode); + nfs_fscache_init_inode(inode); unlock_new_inode(inode); } else @@ -854,7 +848,7 @@ int nfs_open(struct inode *inode, struct file *filp) return PTR_ERR(ctx); nfs_file_set_open_context(filp, ctx); put_nfs_open_context(ctx); - nfs_fscache_set_inode_cookie(inode, filp); + nfs_fscache_open_file(inode, filp); return 0; } @@ -923,6 +917,8 @@ __nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) if (nfsi->cache_validity & NFS_INO_INVALID_ACL) nfs_zap_acl_cache(inode); + nfs_setsecurity(inode, fattr, label); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1209,6 +1205,7 @@ u32 _nfs_display_fhandle_hash(const struct nfs_fh *fh) * not on the result */ return nfs_fhandle_hash(fh); } +EXPORT_SYMBOL_GPL(_nfs_display_fhandle_hash); /* * _nfs_display_fhandle - display an NFS file handle on the console @@ -1253,6 +1250,7 @@ void _nfs_display_fhandle(const struct nfs_fh *fh, const char *caption) } } } +EXPORT_SYMBOL_GPL(_nfs_display_fhandle); #endif /** diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 38da8c2b81ac..bca6a3e3c49c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -88,8 +88,8 @@ struct nfs_parsed_mount_data { unsigned int namlen; unsigned int options; unsigned int bsize; - unsigned int auth_flavor_len; - rpc_authflavor_t auth_flavors[1]; + struct nfs_auth_info auth_info; + rpc_authflavor_t selected_flavor; char *client_address; unsigned int version; unsigned int minorversion; @@ -154,6 +154,7 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *, rpc_authflavor_t); int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *, struct nfs_fattr *); void nfs_server_insert_lists(struct nfs_server *); +void nfs_server_remove_lists(struct nfs_server *); void nfs_init_timeout_values(struct rpc_timeout *, int, unsigned int, unsigned int); int nfs_init_server_rpcclient(struct nfs_server *, const struct rpc_timeout *t, rpc_authflavor_t); @@ -174,6 +175,8 @@ extern struct nfs_server *nfs4_create_server( struct nfs_subversion *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); +extern int nfs4_update_server(struct nfs_server *server, const char *hostname, + struct sockaddr *sap, size_t salen); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, @@ -323,6 +326,7 @@ extern struct file_system_type nfs_xdev_fs_type; extern struct file_system_type nfs4_xdev_fs_type; extern struct file_system_type nfs4_referral_fs_type; #endif +bool nfs_auth_info_match(const struct nfs_auth_info *, rpc_authflavor_t); struct dentry *nfs_try_mount(int, const char *, struct nfs_mount_info *, struct nfs_subversion *); void nfs_initialise_sb(struct super_block *); diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 28842abafab4..3ce79b04522e 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -29,6 +29,8 @@ enum nfs4_client_state { NFS4CLNT_SERVER_SCOPE_MISMATCH, NFS4CLNT_PURGE_STATE, NFS4CLNT_BIND_CONN_TO_SESSION, + NFS4CLNT_MOVED, + NFS4CLNT_LEASE_MOVED, }; #define NFS4_RENEW_TIMEOUT 0x01 @@ -50,6 +52,7 @@ struct nfs4_minor_version_ops { const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; const struct nfs4_state_maintenance_ops *state_renewal_ops; + const struct nfs4_mig_recovery_ops *mig_recovery_ops; }; #define NFS_SEQID_CONFIRMED 1 @@ -203,6 +206,12 @@ struct nfs4_state_maintenance_ops { int (*renew_lease)(struct nfs_client *, struct rpc_cred *); }; +struct nfs4_mig_recovery_ops { + int (*get_locations)(struct inode *, struct nfs4_fs_locations *, + struct page *, struct rpc_cred *); + int (*fsid_present)(struct inode *, struct rpc_cred *); +}; + extern const struct dentry_operations nfs4_dentry_operations; /* dir.c */ @@ -213,10 +222,11 @@ int nfs_atomic_open(struct inode *, struct dentry *, struct file *, extern struct file_system_type nfs4_fs_type; /* nfs4namespace.c */ -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *, struct nfs_fh *, struct nfs_fattr *); +int nfs4_replace_transport(struct nfs_server *server, + const struct nfs4_fs_locations *locations); /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); @@ -231,6 +241,9 @@ extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, struct nfs4_fs_locations *, struct page *); +extern int nfs4_proc_get_locations(struct inode *, struct nfs4_fs_locations *, + struct page *page, struct rpc_cred *); +extern int nfs4_proc_fsid_present(struct inode *, struct rpc_cred *); extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *, struct nfs_fh *, struct nfs_fattr *); extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); @@ -411,6 +424,8 @@ extern int nfs4_client_recover_expired_lease(struct nfs_client *clp); extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_path_down_recovery(struct nfs_client *clp); extern int nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); +extern int nfs4_schedule_migration_recovery(const struct nfs_server *); +extern void nfs4_schedule_lease_moved_recovery(struct nfs_client *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_server_scope(struct nfs_client *, struct nfs41_server_scope **); diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index a860ab566d6e..b4a160a405ce 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -197,6 +197,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED; clp->cl_minorversion = cl_init->minorversion; clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion]; + clp->cl_mig_gen = 1; return clp; error: @@ -368,6 +369,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, if (clp->cl_minorversion != 0) __set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags); __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); + __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_GSS_KRB5I); if (error == -EINVAL) error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX); @@ -924,7 +926,7 @@ static int nfs4_server_common_setup(struct nfs_server *server, dprintk("Server FSID: %llx:%llx\n", (unsigned long long) server->fsid.major, (unsigned long long) server->fsid.minor); - dprintk("Mount FH: %d\n", mntfh->size); + nfs_display_fhandle(mntfh, "Pseudo-fs root FH"); nfs4_session_set_rwsize(server); @@ -947,9 +949,8 @@ out: * Create a version 4 volume record */ static int nfs4_init_server(struct nfs_server *server, - const struct nfs_parsed_mount_data *data) + struct nfs_parsed_mount_data *data) { - rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX; struct rpc_timeout timeparms; int error; @@ -961,9 +962,15 @@ static int nfs4_init_server(struct nfs_server *server, /* Initialise the client representation from the mount data */ server->flags = data->flags; server->options = data->options; + server->auth_info = data->auth_info; - if (data->auth_flavor_len >= 1) - pseudoflavor = data->auth_flavors[0]; + /* Use the first specified auth flavor. If this flavor isn't + * allowed by the server, use the SECINFO path to try the + * other specified flavors */ + if (data->auth_info.flavor_len >= 1) + data->selected_flavor = data->auth_info.flavors[0]; + else + data->selected_flavor = RPC_AUTH_UNIX; /* Get a client record */ error = nfs4_set_client(server, @@ -971,7 +978,7 @@ static int nfs4_init_server(struct nfs_server *server, (const struct sockaddr *)&data->nfs_server.address, data->nfs_server.addrlen, data->client_address, - pseudoflavor, + data->selected_flavor, data->nfs_server.protocol, &timeparms, data->minorversion, @@ -991,7 +998,8 @@ static int nfs4_init_server(struct nfs_server *server, server->port = data->nfs_server.port; - error = nfs_init_server_rpcclient(server, &timeparms, pseudoflavor); + error = nfs_init_server_rpcclient(server, &timeparms, + data->selected_flavor); error: /* Done */ @@ -1018,7 +1026,7 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info, if (!server) return ERR_PTR(-ENOMEM); - auth_probe = mount_info->parsed->auth_flavor_len < 1; + auth_probe = mount_info->parsed->auth_info.flavor_len < 1; /* set up the general RPC client */ error = nfs4_init_server(server, mount_info->parsed); @@ -1046,6 +1054,7 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, { struct nfs_client *parent_client; struct nfs_server *server, *parent_server; + bool auth_probe; int error; dprintk("--> nfs4_create_referral_server()\n"); @@ -1078,8 +1087,9 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, if (error < 0) goto error; - error = nfs4_server_common_setup(server, mntfh, - !(parent_server->flags & NFS_MOUNT_SECFLAVOUR)); + auth_probe = parent_server->auth_info.flavor_len < 1; + + error = nfs4_server_common_setup(server, mntfh, auth_probe); if (error < 0) goto error; @@ -1091,3 +1101,111 @@ error: dprintk("<-- nfs4_create_referral_server() = error %d\n", error); return ERR_PTR(error); } + +/* + * Grab the destination's particulars, including lease expiry time. + * + * Returns zero if probe succeeded and retrieved FSID matches the FSID + * we have cached. + */ +static int nfs_probe_destination(struct nfs_server *server) +{ + struct inode *inode = server->super->s_root->d_inode; + struct nfs_fattr *fattr; + int error; + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) + return -ENOMEM; + + /* Sanity: the probe won't work if the destination server + * does not recognize the migrated FH. */ + error = nfs_probe_fsinfo(server, NFS_FH(inode), fattr); + + nfs_free_fattr(fattr); + return error; +} + +/** + * nfs4_update_server - Move an nfs_server to a different nfs_client + * + * @server: represents FSID to be moved + * @hostname: new end-point's hostname + * @sap: new end-point's socket address + * @salen: size of "sap" + * + * The nfs_server must be quiescent before this function is invoked. + * Either its session is drained (NFSv4.1+), or its transport is + * plugged and drained (NFSv4.0). + * + * Returns zero on success, or a negative errno value. + */ +int nfs4_update_server(struct nfs_server *server, const char *hostname, + struct sockaddr *sap, size_t salen) +{ + struct nfs_client *clp = server->nfs_client; + struct rpc_clnt *clnt = server->client; + struct xprt_create xargs = { + .ident = clp->cl_proto, + .net = &init_net, + .dstaddr = sap, + .addrlen = salen, + .servername = hostname, + }; + char buf[INET6_ADDRSTRLEN + 1]; + struct sockaddr_storage address; + struct sockaddr *localaddr = (struct sockaddr *)&address; + int error; + + dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + hostname); + + error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout); + if (error != 0) { + dprintk("<-- %s(): rpc_switch_client_transport returned %d\n", + __func__, error); + goto out; + } + + error = rpc_localaddr(clnt, localaddr, sizeof(address)); + if (error != 0) { + dprintk("<-- %s(): rpc_localaddr returned %d\n", + __func__, error); + goto out; + } + + error = -EAFNOSUPPORT; + if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) { + dprintk("<-- %s(): rpc_ntop returned %d\n", + __func__, error); + goto out; + } + + nfs_server_remove_lists(server); + error = nfs4_set_client(server, hostname, sap, salen, buf, + clp->cl_rpcclient->cl_auth->au_flavor, + clp->cl_proto, clnt->cl_timeout, + clp->cl_minorversion, clp->cl_net); + nfs_put_client(clp); + if (error != 0) { + nfs_server_insert_lists(server); + dprintk("<-- %s(): nfs4_set_client returned %d\n", + __func__, error); + goto out; + } + + if (server->nfs_client->cl_hostname == NULL) + server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); + nfs_server_insert_lists(server); + + error = nfs_probe_destination(server); + if (error < 0) + goto out; + + dprintk("<-- %s() succeeded\n", __func__); + +out: + return error; +} diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 77efaf15ec90..1f01b55692ee 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -75,7 +75,7 @@ nfs4_file_open(struct inode *inode, struct file *filp) nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); nfs_file_set_open_context(filp, ctx); - nfs_fscache_set_inode_cookie(inode, filp); + nfs_fscache_open_file(inode, filp); err = 0; out_put_ctx: diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 2288cd3c9278..c08cbf40c59e 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -137,6 +137,7 @@ static size_t nfs_parse_server_name(char *string, size_t len, /** * nfs_find_best_sec - Find a security mechanism supported locally + * @server: NFS server struct * @flavors: List of security tuples returned by SECINFO procedure * * Return the pseudoflavor of the first security mechanism in @@ -145,7 +146,8 @@ static size_t nfs_parse_server_name(char *string, size_t len, * is searched in the order returned from the server, per RFC 3530 * recommendation. */ -rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +static rpc_authflavor_t nfs_find_best_sec(struct nfs_server *server, + struct nfs4_secinfo_flavors *flavors) { rpc_authflavor_t pseudoflavor; struct nfs4_secinfo4 *secinfo; @@ -160,12 +162,19 @@ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) case RPC_AUTH_GSS: pseudoflavor = rpcauth_get_pseudoflavor(secinfo->flavor, &secinfo->flavor_info); - if (pseudoflavor != RPC_AUTH_MAXFLAVOR) + /* make sure pseudoflavor matches sec= mount opt */ + if (pseudoflavor != RPC_AUTH_MAXFLAVOR && + nfs_auth_info_match(&server->auth_info, + pseudoflavor)) return pseudoflavor; break; } } + /* if there were any sec= options then nothing matched */ + if (server->auth_info.flavor_len > 0) + return -EPERM; + return RPC_AUTH_UNIX; } @@ -187,7 +196,7 @@ static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr goto out; } - flavor = nfs_find_best_sec(flavors); + flavor = nfs_find_best_sec(NFS_SERVER(inode), flavors); out: put_page(page); @@ -390,7 +399,7 @@ struct vfsmount *nfs4_submount(struct nfs_server *server, struct dentry *dentry, if (client->cl_auth->au_flavor != flavor) flavor = client->cl_auth->au_flavor; - else if (!(server->flags & NFS_MOUNT_SECFLAVOUR)) { + else { rpc_authflavor_t new = nfs4_negotiate_security(dir, name); if ((int)new >= 0) flavor = new; @@ -400,3 +409,104 @@ out: rpc_shutdown_client(client); return mnt; } + +/* + * Try one location from the fs_locations array. + * + * Returns zero on success, or a negative errno value. + */ +static int nfs4_try_replacing_one_location(struct nfs_server *server, + char *page, char *page2, + const struct nfs4_fs_location *location) +{ + const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct sockaddr *sap; + unsigned int s; + size_t salen; + int error; + + sap = kmalloc(addr_bufsize, GFP_KERNEL); + if (sap == NULL) + return -ENOMEM; + + error = -ENOENT; + for (s = 0; s < location->nservers; s++) { + const struct nfs4_string *buf = &location->servers[s]; + char *hostname; + + if (buf->len <= 0 || buf->len > PAGE_SIZE) + continue; + + if (memchr(buf->data, IPV6_SCOPE_DELIMITER, buf->len) != NULL) + continue; + + salen = nfs_parse_server_name(buf->data, buf->len, + sap, addr_bufsize, server); + if (salen == 0) + continue; + rpc_set_port(sap, NFS_PORT); + + error = -ENOMEM; + hostname = kstrndup(buf->data, buf->len, GFP_KERNEL); + if (hostname == NULL) + break; + + error = nfs4_update_server(server, hostname, sap, salen); + kfree(hostname); + if (error == 0) + break; + } + + kfree(sap); + return error; +} + +/** + * nfs4_replace_transport - set up transport to destination server + * + * @server: export being migrated + * @locations: fs_locations array + * + * Returns zero on success, or a negative errno value. + * + * The client tries all the entries in the "locations" array, in the + * order returned by the server, until one works or the end of the + * array is reached. + */ +int nfs4_replace_transport(struct nfs_server *server, + const struct nfs4_fs_locations *locations) +{ + char *page = NULL, *page2 = NULL; + int loc, error; + + error = -ENOENT; + if (locations == NULL || locations->nlocations <= 0) + goto out; + + error = -ENOMEM; + page = (char *) __get_free_page(GFP_USER); + if (!page) + goto out; + page2 = (char *) __get_free_page(GFP_USER); + if (!page2) + goto out; + + for (loc = 0; loc < locations->nlocations; loc++) { + const struct nfs4_fs_location *location = + &locations->locations[loc]; + + if (location == NULL || location->nservers <= 0 || + location->rootpath.ncomponents == 0) + continue; + + error = nfs4_try_replacing_one_location(server, page, + page2, location); + if (error == 0) + break; + } + +out: + free_page((unsigned long)page); + free_page((unsigned long)page2); + return error; +} diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d53d6785cba2..5ab33c0792df 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -105,9 +105,6 @@ nfs4_label_init_security(struct inode *dir, struct dentry *dentry, if (nfs_server_capable(dir, NFS_CAP_SECURITY_LABEL) == 0) return NULL; - if (NFS_SERVER(dir)->nfs_client->cl_minorversion < 2) - return NULL; - err = security_dentry_init_security(dentry, sattr->ia_mode, &dentry->d_name, (void **)&label->label, &label->len); if (err == 0) @@ -384,6 +381,14 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; + case -NFS4ERR_MOVED: + ret = nfs4_schedule_migration_recovery(server); + if (ret < 0) + break; + goto wait_on_recovery; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(clp); + goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -431,6 +436,8 @@ static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struc return nfs4_map_errors(ret); wait_on_recovery: ret = nfs4_wait_clnt_recover(clp); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -EIO; if (ret == 0) exception->retry = 1; return ret; @@ -1318,31 +1325,24 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) int ret; if (!data->rpc_done) { - ret = data->rpc_status; - goto err; + if (data->rpc_status) { + ret = data->rpc_status; + goto err; + } + /* cached opens have already been processed */ + goto update; } - ret = -ESTALE; - if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) || - !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) || - !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE)) - goto err; - - ret = -ENOMEM; - state = nfs4_get_open_state(inode, data->owner); - if (state == NULL) - goto err; - ret = nfs_refresh_inode(inode, &data->f_attr); if (ret) goto err; - nfs_setsecurity(inode, &data->f_attr, data->f_label); - if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); +update: update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode); + atomic_inc(&state->count); return state; err: @@ -1575,6 +1575,12 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct /* Don't recall a delegation if it was lost */ nfs4_schedule_lease_recovery(server->nfs_client); return -EAGAIN; + case -NFS4ERR_MOVED: + nfs4_schedule_migration_recovery(server); + return -EAGAIN; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(server->nfs_client); + return -EAGAIN; case -NFS4ERR_DELEG_REVOKED: case -NFS4ERR_ADMIN_REVOKED: case -NFS4ERR_BAD_STATEID: @@ -2697,6 +2703,10 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync) nfs4_close_state(ctx->state, ctx->mode); } +#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL) +#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL) +#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL) + static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle) { struct nfs4_server_caps_arg args = { @@ -2712,12 +2722,25 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status == 0) { + /* Sanity check the server answers */ + switch (server->nfs_client->cl_minorversion) { + case 0: + res.attr_bitmask[1] &= FATTR4_WORD1_NFS40_MASK; + res.attr_bitmask[2] = 0; + break; + case 1: + res.attr_bitmask[2] &= FATTR4_WORD2_NFS41_MASK; + break; + case 2: + res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK; + } memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| NFS_CAP_SYMLINKS|NFS_CAP_FILEID| NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER| NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME| - NFS_CAP_CTIME|NFS_CAP_MTIME); + NFS_CAP_CTIME|NFS_CAP_MTIME| + NFS_CAP_SECURITY_LABEL); if (res.attr_bitmask[0] & FATTR4_WORD0_ACL) server->caps |= NFS_CAP_ACLS; if (res.has_links != 0) @@ -2746,14 +2769,12 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f #endif memcpy(server->attr_bitmask_nl, res.attr_bitmask, sizeof(server->attr_bitmask)); + server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - if (server->caps & NFS_CAP_SECURITY_LABEL) { - server->attr_bitmask_nl[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - res.attr_bitmask[2] &= ~FATTR4_WORD2_SECURITY_LABEL; - } memcpy(server->cache_consistency_bitmask, res.attr_bitmask, sizeof(server->cache_consistency_bitmask)); server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + server->cache_consistency_bitmask[2] = 0; server->acl_bitmask = res.acl_bitmask; server->fh_expire_type = res.fh_expire_type; } @@ -2864,11 +2885,24 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, int status = -EPERM; size_t i; - for (i = 0; i < ARRAY_SIZE(flav_array); i++) { - status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]); - if (status == -NFS4ERR_WRONGSEC || status == -EACCES) - continue; - break; + if (server->auth_info.flavor_len > 0) { + /* try each flavor specified by user */ + for (i = 0; i < server->auth_info.flavor_len; i++) { + status = nfs4_lookup_root_sec(server, fhandle, info, + server->auth_info.flavors[i]); + if (status == -NFS4ERR_WRONGSEC || status == -EACCES) + continue; + break; + } + } else { + /* no flavors specified by user, try default list */ + for (i = 0; i < ARRAY_SIZE(flav_array); i++) { + status = nfs4_lookup_root_sec(server, fhandle, info, + flav_array[i]); + if (status == -NFS4ERR_WRONGSEC || status == -EACCES) + continue; + break; + } } /* @@ -2910,9 +2944,6 @@ int nfs4_proc_get_rootfh(struct nfs_server *server, struct nfs_fh *fhandle, status = nfs4_lookup_root(server, fhandle, info); if (status != -NFS4ERR_WRONGSEC) break; - /* Did user force a 'sec=' mount option? */ - if (server->flags & NFS_MOUNT_SECFLAVOUR) - break; default: status = nfs4_do_find_root_sec(server, fhandle, info); } @@ -2981,11 +3012,16 @@ static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, status = nfs4_proc_fs_locations(client, dir, name, locations, page); if (status != 0) goto out; - /* Make sure server returned a different fsid for the referral */ + + /* + * If the fsid didn't change, this is a migration event, not a + * referral. Cause us to drop into the exception handler, which + * will kick off migration recovery. + */ if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) { dprintk("%s: server did not return a different fsid for" " a referral at %s\n", __func__, name->name); - status = -EIO; + status = -NFS4ERR_MOVED; goto out; } /* Fixup attributes for the nfs_lookup() call to nfs_fhget() */ @@ -3165,9 +3201,6 @@ static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, err = -EPERM; if (client != *clnt) goto out; - /* No security negotiation if the user specified 'sec=' */ - if (NFS_SERVER(dir)->flags & NFS_MOUNT_SECFLAVOUR) - goto out; client = nfs4_create_sec_client(client, dir, name); if (IS_ERR(client)) return PTR_ERR(client); @@ -4221,7 +4254,13 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata) unsigned long timestamp = data->timestamp; trace_nfs4_renew_async(clp, task->tk_status); - if (task->tk_status < 0) { + switch (task->tk_status) { + case 0: + break; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(clp); + break; + default: /* Unless we're shutting down, schedule state recovery! */ if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) == 0) return; @@ -4575,7 +4614,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, struct nfs4_label label = {0, 0, buflen, buf}; u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; - struct nfs4_getattr_arg args = { + struct nfs4_getattr_arg arg = { .fh = NFS_FH(inode), .bitmask = bitmask, }; @@ -4586,14 +4625,14 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], - .rpc_argp = &args, + .rpc_argp = &arg, .rpc_resp = &res, }; int ret; nfs_fattr_init(&fattr); - ret = rpc_call_sync(server->client, &msg, 0); + ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0); if (ret) return ret; if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL)) @@ -4630,7 +4669,7 @@ static int _nfs4_do_set_security_label(struct inode *inode, struct iattr sattr = {0}; struct nfs_server *server = NFS_SERVER(inode); const u32 bitmask[3] = { 0, 0, FATTR4_WORD2_SECURITY_LABEL }; - struct nfs_setattrargs args = { + struct nfs_setattrargs arg = { .fh = NFS_FH(inode), .iap = &sattr, .server = server, @@ -4644,14 +4683,14 @@ static int _nfs4_do_set_security_label(struct inode *inode, }; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETATTR], - .rpc_argp = &args, + .rpc_argp = &arg, .rpc_resp = &res, }; int status; - nfs4_stateid_copy(&args.stateid, &zero_stateid); + nfs4_stateid_copy(&arg.stateid, &zero_stateid); - status = rpc_call_sync(server->client, &msg, 0); + status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1); if (status) dprintk("%s failed: %d\n", __func__, status); @@ -4735,17 +4774,24 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, if (state == NULL) break; if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto stateid_invalid; + goto recovery_failed; goto wait_on_recovery; case -NFS4ERR_EXPIRED: if (state != NULL) { if (nfs4_schedule_stateid_recovery(server, state) < 0) - goto stateid_invalid; + goto recovery_failed; } case -NFS4ERR_STALE_STATEID: case -NFS4ERR_STALE_CLIENTID: nfs4_schedule_lease_recovery(clp); goto wait_on_recovery; + case -NFS4ERR_MOVED: + if (nfs4_schedule_migration_recovery(server) < 0) + goto recovery_failed; + goto wait_on_recovery; + case -NFS4ERR_LEASE_MOVED: + nfs4_schedule_lease_moved_recovery(clp); + goto wait_on_recovery; #if defined(CONFIG_NFS_V4_1) case -NFS4ERR_BADSESSION: case -NFS4ERR_BADSLOT: @@ -4757,29 +4803,28 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - task->tk_status = 0; - return -EAGAIN; + goto restart_call; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); case -NFS4ERR_GRACE: rpc_delay(task, NFS4_POLL_RETRY_MAX); - task->tk_status = 0; - return -EAGAIN; case -NFS4ERR_RETRY_UNCACHED_REP: case -NFS4ERR_OLD_STATEID: - task->tk_status = 0; - return -EAGAIN; + goto restart_call; } task->tk_status = nfs4_map_errors(task->tk_status); return 0; -stateid_invalid: +recovery_failed: task->tk_status = -EIO; return 0; wait_on_recovery: rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + goto recovery_failed; +restart_call: task->tk_status = 0; return -EAGAIN; } @@ -5106,6 +5151,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock status = 0; } request->fl_ops->fl_release_private(request); + request->fl_ops = NULL; out: return status; } @@ -5779,6 +5825,7 @@ struct nfs_release_lockowner_data { struct nfs_release_lockowner_args args; struct nfs4_sequence_args seq_args; struct nfs4_sequence_res seq_res; + unsigned long timestamp; }; static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata) @@ -5786,12 +5833,27 @@ static void nfs4_release_lockowner_prepare(struct rpc_task *task, void *calldata struct nfs_release_lockowner_data *data = calldata; nfs40_setup_sequence(data->server, &data->seq_args, &data->seq_res, task); + data->timestamp = jiffies; } static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata) { struct nfs_release_lockowner_data *data = calldata; + struct nfs_server *server = data->server; + nfs40_sequence_done(task, &data->seq_res); + + switch (task->tk_status) { + case 0: + renew_lease(server, data->timestamp); + break; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_EXPIRED: + case -NFS4ERR_LEASE_MOVED: + case -NFS4ERR_DELAY: + if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) + rpc_restart_call_prepare(task); + } } static void nfs4_release_lockowner_release(void *calldata) @@ -5990,6 +6052,283 @@ int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, return err; } +/* + * This operation also signals the server that this client is + * performing migration recovery. The server can stop returning + * NFS4ERR_LEASE_MOVED to this client. A RENEW operation is + * appended to this compound to identify the client ID which is + * performing recovery. + */ +static int _nfs40_proc_get_locations(struct inode *inode, + struct nfs4_fs_locations *locations, + struct page *page, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + }; + struct nfs4_fs_locations_arg args = { + .clientid = server->nfs_client->cl_clientid, + .fh = NFS_FH(inode), + .page = page, + .bitmask = bitmask, + .migration = 1, /* skip LOOKUP */ + .renew = 1, /* append RENEW */ + }; + struct nfs4_fs_locations_res res = { + .fs_locations = locations, + .migration = 1, + .renew = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + unsigned long now = jiffies; + int status; + + nfs_fattr_init(&locations->fattr); + locations->server = server; + locations->nlocations = 0; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + if (status) + return status; + + renew_lease(server, now); + return 0; +} + +#ifdef CONFIG_NFS_V4_1 + +/* + * This operation also signals the server that this client is + * performing migration recovery. The server can stop asserting + * SEQ4_STATUS_LEASE_MOVED for this client. The client ID + * performing this operation is identified in the SEQUENCE + * operation in this compound. + * + * When the client supports GETATTR(fs_locations_info), it can + * be plumbed in here. + */ +static int _nfs41_proc_get_locations(struct inode *inode, + struct nfs4_fs_locations *locations, + struct page *page, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + u32 bitmask[2] = { + [0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS, + }; + struct nfs4_fs_locations_arg args = { + .fh = NFS_FH(inode), + .page = page, + .bitmask = bitmask, + .migration = 1, /* skip LOOKUP */ + }; + struct nfs4_fs_locations_res res = { + .fs_locations = locations, + .migration = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FS_LOCATIONS], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + int status; + + nfs_fattr_init(&locations->fattr); + locations->server = server; + locations->nlocations = 0; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + if (status == NFS4_OK && + res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) + status = -NFS4ERR_LEASE_MOVED; + return status; +} + +#endif /* CONFIG_NFS_V4_1 */ + +/** + * nfs4_proc_get_locations - discover locations for a migrated FSID + * @inode: inode on FSID that is migrating + * @locations: result of query + * @page: buffer + * @cred: credential to use for this operation + * + * Returns NFS4_OK on success, a negative NFS4ERR status code if the + * operation failed, or a negative errno if a local error occurred. + * + * On success, "locations" is filled in, but if the server has + * no locations information, NFS_ATTR_FATTR_V4_LOCATIONS is not + * asserted. + * + * -NFS4ERR_LEASE_MOVED is returned if the server still has leases + * from this client that require migration recovery. + */ +int nfs4_proc_get_locations(struct inode *inode, + struct nfs4_fs_locations *locations, + struct page *page, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; + const struct nfs4_mig_recovery_ops *ops = + clp->cl_mvops->mig_recovery_ops; + struct nfs4_exception exception = { }; + int status; + + dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + nfs_display_fhandle(NFS_FH(inode), __func__); + + do { + status = ops->get_locations(inode, locations, page, cred); + if (status != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + return status; +} + +/* + * This operation also signals the server that this client is + * performing "lease moved" recovery. The server can stop + * returning NFS4ERR_LEASE_MOVED to this client. A RENEW operation + * is appended to this compound to identify the client ID which is + * performing recovery. + */ +static int _nfs40_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct rpc_clnt *clnt = server->client; + struct nfs4_fsid_present_arg args = { + .fh = NFS_FH(inode), + .clientid = clp->cl_clientid, + .renew = 1, /* append RENEW */ + }; + struct nfs4_fsid_present_res res = { + .renew = 1, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + unsigned long now = jiffies; + int status; + + res.fh = nfs_alloc_fhandle(); + if (res.fh == NULL) + return -ENOMEM; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + nfs_free_fhandle(res.fh); + if (status) + return status; + + do_renew_lease(clp, now); + return 0; +} + +#ifdef CONFIG_NFS_V4_1 + +/* + * This operation also signals the server that this client is + * performing "lease moved" recovery. The server can stop asserting + * SEQ4_STATUS_LEASE_MOVED for this client. The client ID performing + * this operation is identified in the SEQUENCE operation in this + * compound. + */ +static int _nfs41_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct rpc_clnt *clnt = server->client; + struct nfs4_fsid_present_arg args = { + .fh = NFS_FH(inode), + }; + struct nfs4_fsid_present_res res = { + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSID_PRESENT], + .rpc_argp = &args, + .rpc_resp = &res, + .rpc_cred = cred, + }; + int status; + + res.fh = nfs_alloc_fhandle(); + if (res.fh == NULL) + return -ENOMEM; + + nfs4_init_sequence(&args.seq_args, &res.seq_res, 0); + nfs4_set_sequence_privileged(&args.seq_args); + status = nfs4_call_sync_sequence(clnt, server, &msg, + &args.seq_args, &res.seq_res); + nfs_free_fhandle(res.fh); + if (status == NFS4_OK && + res.seq_res.sr_status_flags & SEQ4_STATUS_LEASE_MOVED) + status = -NFS4ERR_LEASE_MOVED; + return status; +} + +#endif /* CONFIG_NFS_V4_1 */ + +/** + * nfs4_proc_fsid_present - Is this FSID present or absent on server? + * @inode: inode on FSID to check + * @cred: credential to use for this operation + * + * Server indicates whether the FSID is present, moved, or not + * recognized. This operation is necessary to clear a LEASE_MOVED + * condition for this client ID. + * + * Returns NFS4_OK if the FSID is present on this server, + * -NFS4ERR_MOVED if the FSID is no longer present, a negative + * NFS4ERR code if some error occurred on the server, or a + * negative errno if a local failure occurred. + */ +int nfs4_proc_fsid_present(struct inode *inode, struct rpc_cred *cred) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; + const struct nfs4_mig_recovery_ops *ops = + clp->cl_mvops->mig_recovery_ops; + struct nfs4_exception exception = { }; + int status; + + dprintk("%s: FSID %llx:%llx on \"%s\"\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + nfs_display_fhandle(NFS_FH(inode), __func__); + + do { + status = ops->fsid_present(inode, cred); + if (status != -NFS4ERR_DELAY) + break; + nfs4_handle_exception(server, status, &exception); + } while (exception.retry); + return status; +} + /** * If 'use_integrity' is true and the state managment nfs_client * cl_rpcclient is using krb5i/p, use the integrity protected cl_rpcclient @@ -6276,8 +6615,14 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred, struct nfs41_exchange_id_args args = { .verifier = &verifier, .client = clp, +#ifdef CONFIG_NFS_V4_1_MIGRATION .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | - EXCHGID4_FLAG_BIND_PRINC_STATEID, + EXCHGID4_FLAG_BIND_PRINC_STATEID | + EXCHGID4_FLAG_SUPP_MOVED_MIGR, +#else + .flags = EXCHGID4_FLAG_SUPP_MOVED_REFER | + EXCHGID4_FLAG_BIND_PRINC_STATEID, +#endif }; struct nfs41_exchange_id_res res = { 0 @@ -7616,6 +7961,9 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, break; } + if (!nfs_auth_info_match(&server->auth_info, flavor)) + flavor = RPC_AUTH_MAXFLAVOR; + if (flavor != RPC_AUTH_MAXFLAVOR) { err = nfs4_lookup_root_sec(server, fhandle, info, flavor); @@ -7887,6 +8235,18 @@ static const struct nfs4_state_maintenance_ops nfs41_state_renewal_ops = { }; #endif +static const struct nfs4_mig_recovery_ops nfs40_mig_recovery_ops = { + .get_locations = _nfs40_proc_get_locations, + .fsid_present = _nfs40_proc_fsid_present, +}; + +#if defined(CONFIG_NFS_V4_1) +static const struct nfs4_mig_recovery_ops nfs41_mig_recovery_ops = { + .get_locations = _nfs41_proc_get_locations, + .fsid_present = _nfs41_proc_fsid_present, +}; +#endif /* CONFIG_NFS_V4_1 */ + static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .init_caps = NFS_CAP_READDIRPLUS @@ -7902,6 +8262,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, + .mig_recovery_ops = &nfs40_mig_recovery_ops, }; #if defined(CONFIG_NFS_V4_1) @@ -7922,6 +8283,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, + .mig_recovery_ops = &nfs41_mig_recovery_ops, }; #endif diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index cc14cbb78b73..c8e729deb4f7 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -239,8 +239,6 @@ static void nfs4_end_drain_session(struct nfs_client *clp) } } -#if defined(CONFIG_NFS_V4_1) - static int nfs4_drain_slot_tbl(struct nfs4_slot_table *tbl) { set_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state); @@ -270,6 +268,8 @@ static int nfs4_begin_drain_session(struct nfs_client *clp) return nfs4_drain_slot_tbl(&ses->fc_slot_table); } +#if defined(CONFIG_NFS_V4_1) + static int nfs41_setup_state_renewal(struct nfs_client *clp) { int status; @@ -1197,20 +1197,74 @@ void nfs4_schedule_lease_recovery(struct nfs_client *clp) } EXPORT_SYMBOL_GPL(nfs4_schedule_lease_recovery); +/** + * nfs4_schedule_migration_recovery - trigger migration recovery + * + * @server: FSID that is migrating + * + * Returns zero if recovery has started, otherwise a negative NFS4ERR + * value is returned. + */ +int nfs4_schedule_migration_recovery(const struct nfs_server *server) +{ + struct nfs_client *clp = server->nfs_client; + + if (server->fh_expire_type != NFS4_FH_PERSISTENT) { + pr_err("NFS: volatile file handles not supported (server %s)\n", + clp->cl_hostname); + return -NFS4ERR_IO; + } + + if (test_bit(NFS_MIG_FAILED, &server->mig_status)) + return -NFS4ERR_IO; + + dprintk("%s: scheduling migration recovery for (%llx:%llx) on %s\n", + __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + + set_bit(NFS_MIG_IN_TRANSITION, + &((struct nfs_server *)server)->mig_status); + set_bit(NFS4CLNT_MOVED, &clp->cl_state); + + nfs4_schedule_state_manager(clp); + return 0; +} +EXPORT_SYMBOL_GPL(nfs4_schedule_migration_recovery); + +/** + * nfs4_schedule_lease_moved_recovery - start lease-moved recovery + * + * @clp: server to check for moved leases + * + */ +void nfs4_schedule_lease_moved_recovery(struct nfs_client *clp) +{ + dprintk("%s: scheduling lease-moved recovery for client ID %llx on %s\n", + __func__, clp->cl_clientid, clp->cl_hostname); + + set_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state); + nfs4_schedule_state_manager(clp); +} +EXPORT_SYMBOL_GPL(nfs4_schedule_lease_moved_recovery); + int nfs4_wait_clnt_recover(struct nfs_client *clp) { int res; might_sleep(); + atomic_inc(&clp->cl_count); res = wait_on_bit(&clp->cl_state, NFS4CLNT_MANAGER_RUNNING, nfs_wait_bit_killable, TASK_KILLABLE); if (res) - return res; - + goto out; if (clp->cl_cons_state < 0) - return clp->cl_cons_state; - return 0; + res = clp->cl_cons_state; +out: + nfs_put_client(clp); + return res; } int nfs4_client_recover_expired_lease(struct nfs_client *clp) @@ -1375,8 +1429,8 @@ static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: goto out; default: - printk(KERN_ERR "NFS: %s: unhandled error %d. " - "Zeroing state\n", __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d\n", + __func__, status); case -ENOMEM: case -NFS4ERR_DENIED: case -NFS4ERR_RECLAIM_BAD: @@ -1422,7 +1476,7 @@ restart: if (status >= 0) { status = nfs4_reclaim_locks(state, ops); if (status >= 0) { - if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0) { + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) { spin_lock(&state->state_lock); list_for_each_entry(lock, &state->lock_states, ls_locks) { if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags)) @@ -1439,15 +1493,12 @@ restart: } switch (status) { default: - printk(KERN_ERR "NFS: %s: unhandled error %d. " - "Zeroing state\n", __func__, status); + printk(KERN_ERR "NFS: %s: unhandled error %d\n", + __func__, status); case -ENOENT: case -ENOMEM: case -ESTALE: - /* - * Open state on this file cannot be recovered - * All we can do is revert to using the zero stateid. - */ + /* Open state on this file cannot be recovered */ nfs4_state_mark_recovery_failed(state, status); break; case -EAGAIN: @@ -1628,7 +1679,6 @@ static int nfs4_recovery_handle_error(struct nfs_client *clp, int error) nfs4_state_end_reclaim_reboot(clp); break; case -NFS4ERR_STALE_CLIENTID: - case -NFS4ERR_LEASE_MOVED: set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); nfs4_state_clear_reclaim_reboot(clp); nfs4_state_start_reclaim_reboot(clp); @@ -1829,6 +1879,168 @@ static int nfs4_purge_lease(struct nfs_client *clp) return 0; } +/* + * Try remote migration of one FSID from a source server to a + * destination server. The source server provides a list of + * potential destinations. + * + * Returns zero or a negative NFS4ERR status code. + */ +static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred) +{ + struct nfs_client *clp = server->nfs_client; + struct nfs4_fs_locations *locations = NULL; + struct inode *inode; + struct page *page; + int status, result; + + dprintk("--> %s: FSID %llx:%llx on \"%s\"\n", __func__, + (unsigned long long)server->fsid.major, + (unsigned long long)server->fsid.minor, + clp->cl_hostname); + + result = 0; + page = alloc_page(GFP_KERNEL); + locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); + if (page == NULL || locations == NULL) { + dprintk("<-- %s: no memory\n", __func__); + goto out; + } + + inode = server->super->s_root->d_inode; + result = nfs4_proc_get_locations(inode, locations, page, cred); + if (result) { + dprintk("<-- %s: failed to retrieve fs_locations: %d\n", + __func__, result); + goto out; + } + + result = -NFS4ERR_NXIO; + if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) { + dprintk("<-- %s: No fs_locations data, migration skipped\n", + __func__); + goto out; + } + + nfs4_begin_drain_session(clp); + + status = nfs4_replace_transport(server, locations); + if (status != 0) { + dprintk("<-- %s: failed to replace transport: %d\n", + __func__, status); + goto out; + } + + result = 0; + dprintk("<-- %s: migration succeeded\n", __func__); + +out: + if (page != NULL) + __free_page(page); + kfree(locations); + if (result) { + pr_err("NFS: migration recovery failed (server %s)\n", + clp->cl_hostname); + set_bit(NFS_MIG_FAILED, &server->mig_status); + } + return result; +} + +/* + * Returns zero or a negative NFS4ERR status code. + */ +static int nfs4_handle_migration(struct nfs_client *clp) +{ + const struct nfs4_state_maintenance_ops *ops = + clp->cl_mvops->state_renewal_ops; + struct nfs_server *server; + struct rpc_cred *cred; + + dprintk("%s: migration reported on \"%s\"\n", __func__, + clp->cl_hostname); + + spin_lock(&clp->cl_lock); + cred = ops->get_state_renewal_cred_locked(clp); + spin_unlock(&clp->cl_lock); + if (cred == NULL) + return -NFS4ERR_NOENT; + + clp->cl_mig_gen++; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + int status; + + if (server->mig_gen == clp->cl_mig_gen) + continue; + server->mig_gen = clp->cl_mig_gen; + + if (!test_and_clear_bit(NFS_MIG_IN_TRANSITION, + &server->mig_status)) + continue; + + rcu_read_unlock(); + status = nfs4_try_migration(server, cred); + if (status < 0) { + put_rpccred(cred); + return status; + } + goto restart; + } + rcu_read_unlock(); + put_rpccred(cred); + return 0; +} + +/* + * Test each nfs_server on the clp's cl_superblocks list to see + * if it's moved to another server. Stop when the server no longer + * returns NFS4ERR_LEASE_MOVED. + */ +static int nfs4_handle_lease_moved(struct nfs_client *clp) +{ + const struct nfs4_state_maintenance_ops *ops = + clp->cl_mvops->state_renewal_ops; + struct nfs_server *server; + struct rpc_cred *cred; + + dprintk("%s: lease moved reported on \"%s\"\n", __func__, + clp->cl_hostname); + + spin_lock(&clp->cl_lock); + cred = ops->get_state_renewal_cred_locked(clp); + spin_unlock(&clp->cl_lock); + if (cred == NULL) + return -NFS4ERR_NOENT; + + clp->cl_mig_gen++; +restart: + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + struct inode *inode; + int status; + + if (server->mig_gen == clp->cl_mig_gen) + continue; + server->mig_gen = clp->cl_mig_gen; + + rcu_read_unlock(); + + inode = server->super->s_root->d_inode; + status = nfs4_proc_fsid_present(inode, cred); + if (status != -NFS4ERR_MOVED) + goto restart; /* wasn't this one */ + if (nfs4_try_migration(server, cred) == -NFS4ERR_LEASE_MOVED) + goto restart; /* there are more */ + goto out; + } + rcu_read_unlock(); + +out: + put_rpccred(cred); + return 0; +} + /** * nfs4_discover_server_trunking - Detect server IP address trunking * @@ -2017,9 +2229,10 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) nfs41_handle_server_reboot(clp); if (flags & (SEQ4_STATUS_EXPIRED_ALL_STATE_REVOKED | SEQ4_STATUS_EXPIRED_SOME_STATE_REVOKED | - SEQ4_STATUS_ADMIN_STATE_REVOKED | - SEQ4_STATUS_LEASE_MOVED)) + SEQ4_STATUS_ADMIN_STATE_REVOKED)) nfs41_handle_state_revoked(clp); + if (flags & SEQ4_STATUS_LEASE_MOVED) + nfs4_schedule_lease_moved_recovery(clp); if (flags & SEQ4_STATUS_RECALLABLE_STATE_REVOKED) nfs41_handle_recallable_state_revoked(clp); if (flags & SEQ4_STATUS_BACKCHANNEL_FAULT) @@ -2157,7 +2370,20 @@ static void nfs4_state_manager(struct nfs_client *clp) status = nfs4_check_lease(clp); if (status < 0) goto out_error; - continue; + } + + if (test_and_clear_bit(NFS4CLNT_MOVED, &clp->cl_state)) { + section = "migration"; + status = nfs4_handle_migration(clp); + if (status < 0) + goto out_error; + } + + if (test_and_clear_bit(NFS4CLNT_LEASE_MOVED, &clp->cl_state)) { + section = "lease moved"; + status = nfs4_handle_lease_moved(clp); + if (status < 0) + goto out_error; } /* First recover reboot state... */ diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index e26acdd1a645..65ab0a0ca1c4 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -261,9 +261,9 @@ struct dentry *nfs4_try_mount(int flags, const char *dev_name, res = nfs_follow_remote_path(root_mnt, export_path); - dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); + dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", + PTR_ERR_OR_ZERO(res), + IS_ERR(res) ? " [error]" : ""); return res; } @@ -319,9 +319,9 @@ static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type, data->mnt_path = export_path; res = nfs_follow_remote_path(root_mnt, export_path); - dprintk("<-- nfs4_referral_mount() = %ld%s\n", - IS_ERR(res) ? PTR_ERR(res) : 0, - IS_ERR(res) ? " [error]" : ""); + dprintk("<-- nfs4_referral_mount() = %d%s\n", + PTR_ERR_OR_ZERO(res), + IS_ERR(res) ? " [error]" : ""); return res; } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 79210d23f607..5be2868c02f1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -105,12 +105,8 @@ static int nfs4_stat_to_errno(int); #ifdef CONFIG_NFS_V4_SECURITY_LABEL /* PI(4 bytes) + LFS(4 bytes) + 1(for null terminator?) + MAXLABELLEN */ #define nfs4_label_maxsz (4 + 4 + 1 + XDR_QUADLEN(NFS4_MAXLABELLEN)) -#define encode_readdir_space 24 -#define encode_readdir_bitmask_sz 3 #else #define nfs4_label_maxsz 0 -#define encode_readdir_space 20 -#define encode_readdir_bitmask_sz 2 #endif /* We support only one layout type per file system */ #define decode_mdsthreshold_maxsz (1 + 1 + nfs4_fattr_bitmap_maxsz + 1 + 8) @@ -595,11 +591,13 @@ static int nfs4_stat_to_errno(int); #define NFS4_enc_getattr_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ - encode_getattr_maxsz) + encode_getattr_maxsz + \ + encode_renew_maxsz) #define NFS4_dec_getattr_sz (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ - decode_getattr_maxsz) + decode_getattr_maxsz + \ + decode_renew_maxsz) #define NFS4_enc_lookup_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -736,13 +734,15 @@ static int nfs4_stat_to_errno(int); encode_sequence_maxsz + \ encode_putfh_maxsz + \ encode_lookup_maxsz + \ - encode_fs_locations_maxsz) + encode_fs_locations_maxsz + \ + encode_renew_maxsz) #define NFS4_dec_fs_locations_sz \ (compound_decode_hdr_maxsz + \ decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_lookup_maxsz + \ - decode_fs_locations_maxsz) + decode_fs_locations_maxsz + \ + decode_renew_maxsz) #define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \ encode_sequence_maxsz + \ encode_putfh_maxsz + \ @@ -751,6 +751,18 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_secinfo_maxsz) +#define NFS4_enc_fsid_present_sz \ + (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putfh_maxsz + \ + encode_getfh_maxsz + \ + encode_renew_maxsz) +#define NFS4_dec_fsid_present_sz \ + (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putfh_maxsz + \ + decode_getfh_maxsz + \ + decode_renew_maxsz) #if defined(CONFIG_NFS_V4_1) #define NFS4_enc_bind_conn_to_session_sz \ (compound_encode_hdr_maxsz + \ @@ -1565,6 +1577,8 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg }; uint32_t dircount = readdir->count >> 1; __be32 *p, verf[2]; + uint32_t attrlen = 0; + unsigned int i; if (readdir->plus) { attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| @@ -1573,26 +1587,27 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; dircount >>= 1; } /* Use mounted_on_fileid only if the server supports it */ if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) attrs[0] |= FATTR4_WORD0_FILEID; + for (i = 0; i < ARRAY_SIZE(attrs); i++) { + attrs[i] &= readdir->bitmask[i]; + if (attrs[i] != 0) + attrlen = i+1; + } encode_op_hdr(xdr, OP_READDIR, decode_readdir_maxsz, hdr); encode_uint64(xdr, readdir->cookie); encode_nfs4_verifier(xdr, &readdir->verifier); - p = reserve_space(xdr, encode_readdir_space); + p = reserve_space(xdr, 12 + (attrlen << 2)); *p++ = cpu_to_be32(dircount); *p++ = cpu_to_be32(readdir->count); - *p++ = cpu_to_be32(encode_readdir_bitmask_sz); - *p++ = cpu_to_be32(attrs[0] & readdir->bitmask[0]); - *p = cpu_to_be32(attrs[1] & readdir->bitmask[1]); - if (encode_readdir_bitmask_sz > 2) { - if (hdr->minorversion > 1) - attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; - p++, *p++ = cpu_to_be32(attrs[2] & readdir->bitmask[2]); - } + *p++ = cpu_to_be32(attrlen); + for (i = 0; i < attrlen; i++) + *p++ = cpu_to_be32(attrs[i]); memcpy(verf, readdir->verifier.data, sizeof(verf)); dprintk("%s: cookie = %llu, verifier = %08x:%08x, bitmap = %08x:%08x:%08x\n", @@ -2687,11 +2702,20 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); - encode_putfh(xdr, args->dir_fh, &hdr); - encode_lookup(xdr, args->name, &hdr); - replen = hdr.replen; /* get the attribute into args->page */ - encode_fs_locations(xdr, args->bitmask, &hdr); + if (args->migration) { + encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen; + encode_fs_locations(xdr, args->bitmask, &hdr); + if (args->renew) + encode_renew(xdr, args->clientid, &hdr); + } else { + encode_putfh(xdr, args->dir_fh, &hdr); + encode_lookup(xdr, args->name, &hdr); + replen = hdr.replen; + encode_fs_locations(xdr, args->bitmask, &hdr); + } + /* Set up reply kvec to capture returned fs_locations array. */ xdr_inline_pages(&req->rq_rcv_buf, replen << 2, &args->page, 0, PAGE_SIZE); encode_nops(&hdr); @@ -2715,6 +2739,26 @@ static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req, encode_nops(&hdr); } +/* + * Encode FSID_PRESENT request + */ +static void nfs4_xdr_enc_fsid_present(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs4_fsid_present_arg *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putfh(xdr, args->fh, &hdr); + encode_getfh(xdr, &hdr); + if (args->renew) + encode_renew(xdr, args->clientid, &hdr); + encode_nops(&hdr); +} + #if defined(CONFIG_NFS_V4_1) /* * BIND_CONN_TO_SESSION request @@ -6824,13 +6868,26 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req, status = decode_putfh(xdr); if (status) goto out; - status = decode_lookup(xdr); - if (status) - goto out; - xdr_enter_page(xdr, PAGE_SIZE); - status = decode_getfattr_generic(xdr, &res->fs_locations->fattr, + if (res->migration) { + xdr_enter_page(xdr, PAGE_SIZE); + status = decode_getfattr_generic(xdr, + &res->fs_locations->fattr, + NULL, res->fs_locations, + NULL, res->fs_locations->server); + if (status) + goto out; + if (res->renew) + status = decode_renew(xdr); + } else { + status = decode_lookup(xdr); + if (status) + goto out; + xdr_enter_page(xdr, PAGE_SIZE); + status = decode_getfattr_generic(xdr, + &res->fs_locations->fattr, NULL, res->fs_locations, NULL, res->fs_locations->server); + } out: return status; } @@ -6859,6 +6916,34 @@ out: return status; } +/* + * Decode FSID_PRESENT response + */ +static int nfs4_xdr_dec_fsid_present(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_fsid_present_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putfh(xdr); + if (status) + goto out; + status = decode_getfh(xdr, res->fh); + if (status) + goto out; + if (res->renew) + status = decode_renew(xdr); +out: + return status; +} + #if defined(CONFIG_NFS_V4_1) /* * Decode BIND_CONN_TO_SESSION response @@ -7373,6 +7458,7 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), PROC(SECINFO, enc_secinfo, dec_secinfo), + PROC(FSID_PRESENT, enc_fsid_present, dec_fsid_present), #if defined(CONFIG_NFS_V4_1) PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), PROC(CREATE_SESSION, enc_create_session, dec_create_session), diff --git a/fs/nfs/super.c b/fs/nfs/super.c index a03b9c6f9489..317d6fc2160e 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -497,7 +497,8 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) static const struct { rpc_authflavor_t flavour; const char *str; - } sec_flavours[] = { + } sec_flavours[NFS_AUTH_INFO_MAX_FLAVORS] = { + /* update NFS_AUTH_INFO_MAX_FLAVORS when this list changes! */ { RPC_AUTH_NULL, "null" }, { RPC_AUTH_UNIX, "sys" }, { RPC_AUTH_GSS_KRB5, "krb5" }, @@ -923,8 +924,7 @@ static struct nfs_parsed_mount_data *nfs_alloc_parsed_mount_data(void) data->mount_server.port = NFS_UNSPEC_PORT; data->nfs_server.port = NFS_UNSPEC_PORT; data->nfs_server.protocol = XPRT_TRANSPORT_TCP; - data->auth_flavors[0] = RPC_AUTH_MAXFLAVOR; - data->auth_flavor_len = 0; + data->selected_flavor = RPC_AUTH_MAXFLAVOR; data->minorversion = 0; data->need_mount = true; data->net = current->nsproxy->net_ns; @@ -1019,12 +1019,51 @@ static void nfs_set_mount_transport_protocol(struct nfs_parsed_mount_data *mnt) } } -static void nfs_set_auth_parsed_mount_data(struct nfs_parsed_mount_data *data, - rpc_authflavor_t pseudoflavor) +/* + * Add 'flavor' to 'auth_info' if not already present. + * Returns true if 'flavor' ends up in the list, false otherwise + */ +static bool nfs_auth_info_add(struct nfs_auth_info *auth_info, + rpc_authflavor_t flavor) +{ + unsigned int i; + unsigned int max_flavor_len = (sizeof(auth_info->flavors) / + sizeof(auth_info->flavors[0])); + + /* make sure this flavor isn't already in the list */ + for (i = 0; i < auth_info->flavor_len; i++) { + if (flavor == auth_info->flavors[i]) + return true; + } + + if (auth_info->flavor_len + 1 >= max_flavor_len) { + dfprintk(MOUNT, "NFS: too many sec= flavors\n"); + return false; + } + + auth_info->flavors[auth_info->flavor_len++] = flavor; + return true; +} + +/* + * Return true if 'match' is in auth_info or auth_info is empty. + * Return false otherwise. + */ +bool nfs_auth_info_match(const struct nfs_auth_info *auth_info, + rpc_authflavor_t match) { - data->auth_flavors[0] = pseudoflavor; - data->auth_flavor_len = 1; + int i; + + if (!auth_info->flavor_len) + return true; + + for (i = 0; i < auth_info->flavor_len; i++) { + if (auth_info->flavors[i] == match) + return true; + } + return false; } +EXPORT_SYMBOL_GPL(nfs_auth_info_match); /* * Parse the value of the 'sec=' option. @@ -1034,49 +1073,55 @@ static int nfs_parse_security_flavors(char *value, { substring_t args[MAX_OPT_ARGS]; rpc_authflavor_t pseudoflavor; + char *p; dfprintk(MOUNT, "NFS: parsing sec=%s option\n", value); - switch (match_token(value, nfs_secflavor_tokens, args)) { - case Opt_sec_none: - pseudoflavor = RPC_AUTH_NULL; - break; - case Opt_sec_sys: - pseudoflavor = RPC_AUTH_UNIX; - break; - case Opt_sec_krb5: - pseudoflavor = RPC_AUTH_GSS_KRB5; - break; - case Opt_sec_krb5i: - pseudoflavor = RPC_AUTH_GSS_KRB5I; - break; - case Opt_sec_krb5p: - pseudoflavor = RPC_AUTH_GSS_KRB5P; - break; - case Opt_sec_lkey: - pseudoflavor = RPC_AUTH_GSS_LKEY; - break; - case Opt_sec_lkeyi: - pseudoflavor = RPC_AUTH_GSS_LKEYI; - break; - case Opt_sec_lkeyp: - pseudoflavor = RPC_AUTH_GSS_LKEYP; - break; - case Opt_sec_spkm: - pseudoflavor = RPC_AUTH_GSS_SPKM; - break; - case Opt_sec_spkmi: - pseudoflavor = RPC_AUTH_GSS_SPKMI; - break; - case Opt_sec_spkmp: - pseudoflavor = RPC_AUTH_GSS_SPKMP; - break; - default: - return 0; + while ((p = strsep(&value, ":")) != NULL) { + switch (match_token(p, nfs_secflavor_tokens, args)) { + case Opt_sec_none: + pseudoflavor = RPC_AUTH_NULL; + break; + case Opt_sec_sys: + pseudoflavor = RPC_AUTH_UNIX; + break; + case Opt_sec_krb5: + pseudoflavor = RPC_AUTH_GSS_KRB5; + break; + case Opt_sec_krb5i: + pseudoflavor = RPC_AUTH_GSS_KRB5I; + break; + case Opt_sec_krb5p: + pseudoflavor = RPC_AUTH_GSS_KRB5P; + break; + case Opt_sec_lkey: + pseudoflavor = RPC_AUTH_GSS_LKEY; + break; + case Opt_sec_lkeyi: + pseudoflavor = RPC_AUTH_GSS_LKEYI; + break; + case Opt_sec_lkeyp: + pseudoflavor = RPC_AUTH_GSS_LKEYP; + break; + case Opt_sec_spkm: + pseudoflavor = RPC_AUTH_GSS_SPKM; + break; + case Opt_sec_spkmi: + pseudoflavor = RPC_AUTH_GSS_SPKMI; + break; + case Opt_sec_spkmp: + pseudoflavor = RPC_AUTH_GSS_SPKMP; + break; + default: + dfprintk(MOUNT, + "NFS: sec= option '%s' not recognized\n", p); + return 0; + } + + if (!nfs_auth_info_add(&mnt->auth_info, pseudoflavor)) + return 0; } - mnt->flags |= NFS_MOUNT_SECFLAVOUR; - nfs_set_auth_parsed_mount_data(mnt, pseudoflavor); return 1; } @@ -1623,12 +1668,14 @@ out_security_failure: } /* - * Ensure that the specified authtype in args->auth_flavors[0] is supported by - * the server. Returns 0 if it's ok, and -EACCES if not. + * Ensure that a specified authtype in args->auth_info is supported by + * the server. Returns 0 and sets args->selected_flavor if it's ok, and + * -EACCES if not. */ -static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args, +static int nfs_verify_authflavors(struct nfs_parsed_mount_data *args, rpc_authflavor_t *server_authlist, unsigned int count) { + rpc_authflavor_t flavor = RPC_AUTH_MAXFLAVOR; unsigned int i; /* @@ -1640,17 +1687,20 @@ static int nfs_verify_authflavor(struct nfs_parsed_mount_data *args, * can be used. */ for (i = 0; i < count; i++) { - if (args->auth_flavors[0] == server_authlist[i] || - server_authlist[i] == RPC_AUTH_NULL) + flavor = server_authlist[i]; + + if (nfs_auth_info_match(&args->auth_info, flavor) || + flavor == RPC_AUTH_NULL) goto out; } - dfprintk(MOUNT, "NFS: auth flavor %u not supported by server\n", - args->auth_flavors[0]); + dfprintk(MOUNT, + "NFS: specified auth flavors not supported by server\n"); return -EACCES; out: - dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); + args->selected_flavor = flavor; + dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->selected_flavor); return 0; } @@ -1738,9 +1788,10 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf * Was a sec= authflavor specified in the options? First, verify * whether the server supports it, and then just try to use it if so. */ - if (args->auth_flavor_len > 0) { - status = nfs_verify_authflavor(args, authlist, authlist_len); - dfprintk(MOUNT, "NFS: using auth flavor %u\n", args->auth_flavors[0]); + if (args->auth_info.flavor_len > 0) { + status = nfs_verify_authflavors(args, authlist, authlist_len); + dfprintk(MOUNT, "NFS: using auth flavor %u\n", + args->selected_flavor); if (status) return ERR_PTR(status); return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); @@ -1769,7 +1820,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Fallthrough */ } dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", flavor); - nfs_set_auth_parsed_mount_data(args, flavor); + args->selected_flavor = flavor; server = nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); if (!IS_ERR(server)) return server; @@ -1785,7 +1836,7 @@ static struct nfs_server *nfs_try_mount_request(struct nfs_mount_info *mount_inf /* Last chance! Try AUTH_UNIX */ dfprintk(MOUNT, "NFS: attempting to use auth flavor %u\n", RPC_AUTH_UNIX); - nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); + args->selected_flavor = RPC_AUTH_UNIX; return nfs_mod->rpc_ops->create_server(mount_info, nfs_mod); } @@ -1972,9 +2023,9 @@ static int nfs23_validate_mount_data(void *options, args->bsize = data->bsize; if (data->flags & NFS_MOUNT_SECFLAVOUR) - nfs_set_auth_parsed_mount_data(args, data->pseudoflavor); + args->selected_flavor = data->pseudoflavor; else - nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); + args->selected_flavor = RPC_AUTH_UNIX; if (!args->nfs_server.hostname) goto out_nomem; @@ -2108,9 +2159,6 @@ static int nfs_validate_text_mount_data(void *options, nfs_set_port(sap, &args->nfs_server.port, port); - if (args->auth_flavor_len > 1) - goto out_bad_auth; - return nfs_parse_devname(dev_name, &args->nfs_server.hostname, max_namelen, @@ -2130,10 +2178,6 @@ out_invalid_transport_udp: out_no_address: dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n"); return -EINVAL; - -out_bad_auth: - dfprintk(MOUNT, "NFS: Too many RPC auth flavours specified\n"); - return -EINVAL; } static int @@ -2143,8 +2187,10 @@ nfs_compare_remount_data(struct nfs_server *nfss, if (data->flags != nfss->flags || data->rsize != nfss->rsize || data->wsize != nfss->wsize || + data->version != nfss->nfs_client->rpc_ops->version || + data->minorversion != nfss->nfs_client->cl_minorversion || data->retrans != nfss->client->cl_timeout->to_retries || - data->auth_flavors[0] != nfss->client->cl_auth->au_flavor || + data->selected_flavor != nfss->client->cl_auth->au_flavor || data->acregmin != nfss->acregmin / HZ || data->acregmax != nfss->acregmax / HZ || data->acdirmin != nfss->acdirmin / HZ || @@ -2189,7 +2235,8 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->rsize = nfss->rsize; data->wsize = nfss->wsize; data->retrans = nfss->client->cl_timeout->to_retries; - nfs_set_auth_parsed_mount_data(data, nfss->client->cl_auth->au_flavor); + data->selected_flavor = nfss->client->cl_auth->au_flavor; + data->auth_info = nfss->auth_info; data->acregmin = nfss->acregmin / HZ; data->acregmax = nfss->acregmax / HZ; data->acdirmin = nfss->acdirmin / HZ; @@ -2197,12 +2244,14 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data) data->timeo = 10U * nfss->client->cl_timeout->to_initval / HZ; data->nfs_server.port = nfss->port; data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen; + data->version = nfsvers; + data->minorversion = nfss->nfs_client->cl_minorversion; memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr, data->nfs_server.addrlen); /* overwrite those values with any that were specified */ - error = nfs_parse_mount_options((char *)options, data); - if (error < 0) + error = -EINVAL; + if (!nfs_parse_mount_options((char *)options, data)) goto out; /* @@ -2332,7 +2381,7 @@ static int nfs_compare_mount_options(const struct super_block *s, const struct n goto Ebusy; if (a->acdirmax != b->acdirmax) goto Ebusy; - if (b->flags & NFS_MOUNT_SECFLAVOUR && + if (b->auth_info.flavor_len > 0 && clnt_a->cl_auth->au_flavor != clnt_b->cl_auth->au_flavor) goto Ebusy; return 1; @@ -2530,6 +2579,7 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, mntroot = ERR_PTR(error); goto error_splat_bdi; } + server->super = s; } if (!s->s_root) { @@ -2713,9 +2763,9 @@ static int nfs4_validate_mount_data(void *options, data->auth_flavours, sizeof(pseudoflavor))) return -EFAULT; - nfs_set_auth_parsed_mount_data(args, pseudoflavor); + args->selected_flavor = pseudoflavor; } else - nfs_set_auth_parsed_mount_data(args, RPC_AUTH_UNIX); + args->selected_flavor = RPC_AUTH_UNIX; c = strndup_user(data->hostname.data, NFS4_MAXNAMLEN); if (IS_ERR(c)) diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index bb939edd4c99..0c29b1bb3936 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -493,7 +493,7 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) unsigned long long fileid; struct dentry *sdentry; struct rpc_task *task; - int error = -EIO; + int error = -EBUSY; dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, @@ -503,7 +503,6 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) /* * We don't allow a dentry to be silly-renamed twice. */ - error = -EBUSY; if (dentry->d_flags & DCACHE_NFSFS_RENAMED) goto out; diff --git a/fs/select.c b/fs/select.c index 35d4adc749d9..dfd5cb18c012 100644 --- a/fs/select.c +++ b/fs/select.c @@ -238,8 +238,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, set_current_state(state); if (!pwq->triggered) - rc = freezable_schedule_hrtimeout_range(expires, slack, - HRTIMER_MODE_ABS); + rc = schedule_hrtimeout_range(expires, slack, HRTIMER_MODE_ABS); __set_current_state(TASK_RUNNING); /* diff --git a/fs/sysfs/Makefile b/fs/sysfs/Makefile index 7a1ceb946b80..8876ac183373 100644 --- a/fs/sysfs/Makefile +++ b/fs/sysfs/Makefile @@ -2,5 +2,4 @@ # Makefile for the sysfs virtual filesystem # -obj-y := inode.o file.o dir.o symlink.o mount.o bin.o \ - group.o +obj-y := inode.o file.o dir.o symlink.o mount.o group.o diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c deleted file mode 100644 index c590cabd57bb..000000000000 --- a/fs/sysfs/bin.c +++ /dev/null @@ -1,502 +0,0 @@ -/* - * fs/sysfs/bin.c - sysfs binary file implementation - * - * Copyright (c) 2003 Patrick Mochel - * Copyright (c) 2003 Matthew Wilcox - * Copyright (c) 2004 Silicon Graphics, Inc. - * Copyright (c) 2007 SUSE Linux Products GmbH - * Copyright (c) 2007 Tejun Heo <teheo@suse.de> - * - * This file is released under the GPLv2. - * - * Please see Documentation/filesystems/sysfs.txt for more information. - */ - -#undef DEBUG - -#include <linux/errno.h> -#include <linux/fs.h> -#include <linux/kernel.h> -#include <linux/kobject.h> -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/mutex.h> -#include <linux/mm.h> -#include <linux/uaccess.h> - -#include "sysfs.h" - -/* - * There's one bin_buffer for each open file. - * - * filp->private_data points to bin_buffer and - * sysfs_dirent->s_bin_attr.buffers points to a the bin_buffer s - * sysfs_dirent->s_bin_attr.buffers is protected by sysfs_bin_lock - */ -static DEFINE_MUTEX(sysfs_bin_lock); - -struct bin_buffer { - struct mutex mutex; - void *buffer; - int mmapped; - const struct vm_operations_struct *vm_ops; - struct file *file; - struct hlist_node list; -}; - -static int -fill_read(struct file *file, char *buffer, loff_t off, size_t count) -{ - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - int rc; - - /* need attr_sd for attr, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) - return -ENODEV; - - rc = -EIO; - if (attr->read) - rc = attr->read(file, kobj, attr, buffer, off, count); - - sysfs_put_active(attr_sd); - - return rc; -} - -static ssize_t -read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off) -{ - struct bin_buffer *bb = file->private_data; - int size = file_inode(file)->i_size; - loff_t offs = *off; - int count = min_t(size_t, bytes, PAGE_SIZE); - char *temp; - - if (!bytes) - return 0; - - if (size) { - if (offs > size) - return 0; - if (offs + count > size) - count = size - offs; - } - - temp = kmalloc(count, GFP_KERNEL); - if (!temp) - return -ENOMEM; - - mutex_lock(&bb->mutex); - - count = fill_read(file, bb->buffer, offs, count); - if (count < 0) { - mutex_unlock(&bb->mutex); - goto out_free; - } - - memcpy(temp, bb->buffer, count); - - mutex_unlock(&bb->mutex); - - if (copy_to_user(userbuf, temp, count)) { - count = -EFAULT; - goto out_free; - } - - pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); - - *off = offs + count; - - out_free: - kfree(temp); - return count; -} - -static int -flush_write(struct file *file, char *buffer, loff_t offset, size_t count) -{ - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - int rc; - - /* need attr_sd for attr, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) - return -ENODEV; - - rc = -EIO; - if (attr->write) - rc = attr->write(file, kobj, attr, buffer, offset, count); - - sysfs_put_active(attr_sd); - - return rc; -} - -static ssize_t write(struct file *file, const char __user *userbuf, - size_t bytes, loff_t *off) -{ - struct bin_buffer *bb = file->private_data; - int size = file_inode(file)->i_size; - loff_t offs = *off; - int count = min_t(size_t, bytes, PAGE_SIZE); - char *temp; - - if (!bytes) - return 0; - - if (size) { - if (offs > size) - return 0; - if (offs + count > size) - count = size - offs; - } - - temp = memdup_user(userbuf, count); - if (IS_ERR(temp)) - return PTR_ERR(temp); - - mutex_lock(&bb->mutex); - - memcpy(bb->buffer, temp, count); - - count = flush_write(file, bb->buffer, offs, count); - mutex_unlock(&bb->mutex); - - if (count > 0) - *off = offs + count; - - kfree(temp); - return count; -} - -static void bin_vma_open(struct vm_area_struct *vma) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - - if (!bb->vm_ops) - return; - - if (!sysfs_get_active(attr_sd)) - return; - - if (bb->vm_ops->open) - bb->vm_ops->open(vma); - - sysfs_put_active(attr_sd); -} - -static int bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return VM_FAULT_SIGBUS; - - if (!sysfs_get_active(attr_sd)) - return VM_FAULT_SIGBUS; - - ret = VM_FAULT_SIGBUS; - if (bb->vm_ops->fault) - ret = bb->vm_ops->fault(vma, vmf); - - sysfs_put_active(attr_sd); - return ret; -} - -static int bin_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return VM_FAULT_SIGBUS; - - if (!sysfs_get_active(attr_sd)) - return VM_FAULT_SIGBUS; - - ret = 0; - if (bb->vm_ops->page_mkwrite) - ret = bb->vm_ops->page_mkwrite(vma, vmf); - else - file_update_time(file); - - sysfs_put_active(attr_sd); - return ret; -} - -static int bin_access(struct vm_area_struct *vma, unsigned long addr, - void *buf, int len, int write) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return -EINVAL; - - if (!sysfs_get_active(attr_sd)) - return -EINVAL; - - ret = -EINVAL; - if (bb->vm_ops->access) - ret = bb->vm_ops->access(vma, addr, buf, len, write); - - sysfs_put_active(attr_sd); - return ret; -} - -#ifdef CONFIG_NUMA -static int bin_set_policy(struct vm_area_struct *vma, struct mempolicy *new) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return 0; - - if (!sysfs_get_active(attr_sd)) - return -EINVAL; - - ret = 0; - if (bb->vm_ops->set_policy) - ret = bb->vm_ops->set_policy(vma, new); - - sysfs_put_active(attr_sd); - return ret; -} - -static struct mempolicy *bin_get_policy(struct vm_area_struct *vma, - unsigned long addr) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct mempolicy *pol; - - if (!bb->vm_ops) - return vma->vm_policy; - - if (!sysfs_get_active(attr_sd)) - return vma->vm_policy; - - pol = vma->vm_policy; - if (bb->vm_ops->get_policy) - pol = bb->vm_ops->get_policy(vma, addr); - - sysfs_put_active(attr_sd); - return pol; -} - -static int bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, - const nodemask_t *to, unsigned long flags) -{ - struct file *file = vma->vm_file; - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - int ret; - - if (!bb->vm_ops) - return 0; - - if (!sysfs_get_active(attr_sd)) - return 0; - - ret = 0; - if (bb->vm_ops->migrate) - ret = bb->vm_ops->migrate(vma, from, to, flags); - - sysfs_put_active(attr_sd); - return ret; -} -#endif - -static const struct vm_operations_struct bin_vm_ops = { - .open = bin_vma_open, - .fault = bin_fault, - .page_mkwrite = bin_page_mkwrite, - .access = bin_access, -#ifdef CONFIG_NUMA - .set_policy = bin_set_policy, - .get_policy = bin_get_policy, - .migrate = bin_migrate, -#endif -}; - -static int mmap(struct file *file, struct vm_area_struct *vma) -{ - struct bin_buffer *bb = file->private_data; - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - int rc; - - mutex_lock(&bb->mutex); - - /* need attr_sd for attr, its parent for kobj */ - rc = -ENODEV; - if (!sysfs_get_active(attr_sd)) - goto out_unlock; - - rc = -EINVAL; - if (!attr->mmap) - goto out_put; - - rc = attr->mmap(file, kobj, attr, vma); - if (rc) - goto out_put; - - /* - * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() - * to satisfy versions of X which crash if the mmap fails: that - * substitutes a new vm_file, and we don't then want bin_vm_ops. - */ - if (vma->vm_file != file) - goto out_put; - - rc = -EINVAL; - if (bb->mmapped && bb->vm_ops != vma->vm_ops) - goto out_put; - - /* - * It is not possible to successfully wrap close. - * So error if someone is trying to use close. - */ - rc = -EINVAL; - if (vma->vm_ops && vma->vm_ops->close) - goto out_put; - - rc = 0; - bb->mmapped = 1; - bb->vm_ops = vma->vm_ops; - vma->vm_ops = &bin_vm_ops; -out_put: - sysfs_put_active(attr_sd); -out_unlock: - mutex_unlock(&bb->mutex); - - return rc; -} - -static int open(struct inode *inode, struct file *file) -{ - struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; - struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr; - struct bin_buffer *bb = NULL; - int error; - - /* binary file operations requires both @sd and its parent */ - if (!sysfs_get_active(attr_sd)) - return -ENODEV; - - error = -EACCES; - if ((file->f_mode & FMODE_WRITE) && !(attr->write || attr->mmap)) - goto err_out; - if ((file->f_mode & FMODE_READ) && !(attr->read || attr->mmap)) - goto err_out; - - error = -ENOMEM; - bb = kzalloc(sizeof(*bb), GFP_KERNEL); - if (!bb) - goto err_out; - - bb->buffer = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (!bb->buffer) - goto err_out; - - mutex_init(&bb->mutex); - bb->file = file; - file->private_data = bb; - - mutex_lock(&sysfs_bin_lock); - hlist_add_head(&bb->list, &attr_sd->s_bin_attr.buffers); - mutex_unlock(&sysfs_bin_lock); - - /* open succeeded, put active references */ - sysfs_put_active(attr_sd); - return 0; - - err_out: - sysfs_put_active(attr_sd); - kfree(bb); - return error; -} - -static int release(struct inode *inode, struct file *file) -{ - struct bin_buffer *bb = file->private_data; - - mutex_lock(&sysfs_bin_lock); - hlist_del(&bb->list); - mutex_unlock(&sysfs_bin_lock); - - kfree(bb->buffer); - kfree(bb); - return 0; -} - -const struct file_operations bin_fops = { - .read = read, - .write = write, - .mmap = mmap, - .llseek = generic_file_llseek, - .open = open, - .release = release, -}; - - -void unmap_bin_file(struct sysfs_dirent *attr_sd) -{ - struct bin_buffer *bb; - - if (sysfs_type(attr_sd) != SYSFS_KOBJ_BIN_ATTR) - return; - - mutex_lock(&sysfs_bin_lock); - - hlist_for_each_entry(bb, &attr_sd->s_bin_attr.buffers, list) { - struct inode *inode = file_inode(bb->file); - - unmap_mapping_range(inode->i_mapping, 0, 0, 1); - } - - mutex_unlock(&sysfs_bin_lock); -} - -/** - * sysfs_create_bin_file - create binary file for object. - * @kobj: object. - * @attr: attribute descriptor. - */ -int sysfs_create_bin_file(struct kobject *kobj, - const struct bin_attribute *attr) -{ - BUG_ON(!kobj || !kobj->sd || !attr); - - return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); -} -EXPORT_SYMBOL_GPL(sysfs_create_bin_file); - -/** - * sysfs_remove_bin_file - remove binary file for object. - * @kobj: object. - * @attr: attribute descriptor. - */ -void sysfs_remove_bin_file(struct kobject *kobj, - const struct bin_attribute *attr) -{ - sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name); -} -EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 4d83cedb9fcb..5e73d6626e50 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -26,21 +26,21 @@ #include "sysfs.h" DEFINE_MUTEX(sysfs_mutex); -DEFINE_SPINLOCK(sysfs_assoc_lock); +DEFINE_SPINLOCK(sysfs_symlink_target_lock); -#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb); +#define to_sysfs_dirent(X) rb_entry((X), struct sysfs_dirent, s_rb) static DEFINE_SPINLOCK(sysfs_ino_lock); static DEFINE_IDA(sysfs_ino_ida); /** * sysfs_name_hash - * @ns: Namespace tag to hash * @name: Null terminated string to hash + * @ns: Namespace tag to hash * * Returns 31 bit hash of ns + name (so it fits in an off_t ) */ -static unsigned int sysfs_name_hash(const void *ns, const char *name) +static unsigned int sysfs_name_hash(const char *name, const void *ns) { unsigned long hash = init_name_hash(); unsigned int len = strlen(name); @@ -56,8 +56,8 @@ static unsigned int sysfs_name_hash(const void *ns, const char *name) return hash; } -static int sysfs_name_compare(unsigned int hash, const void *ns, - const char *name, const struct sysfs_dirent *sd) +static int sysfs_name_compare(unsigned int hash, const char *name, + const void *ns, const struct sysfs_dirent *sd) { if (hash != sd->s_hash) return hash - sd->s_hash; @@ -69,7 +69,7 @@ static int sysfs_name_compare(unsigned int hash, const void *ns, static int sysfs_sd_compare(const struct sysfs_dirent *left, const struct sysfs_dirent *right) { - return sysfs_name_compare(left->s_hash, left->s_ns, left->s_name, + return sysfs_name_compare(left->s_hash, left->s_name, left->s_ns, right); } @@ -132,24 +132,6 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) rb_erase(&sd->s_rb, &sd->s_parent->s_dir.children); } -#ifdef CONFIG_DEBUG_LOCK_ALLOC - -/* Test for attributes that want to ignore lockdep for read-locking */ -static bool ignore_lockdep(struct sysfs_dirent *sd) -{ - return sysfs_type(sd) == SYSFS_KOBJ_ATTR && - sd->s_attr.attr->ignore_lockdep; -} - -#else - -static inline bool ignore_lockdep(struct sysfs_dirent *sd) -{ - return true; -} - -#endif - /** * sysfs_get_active - get an active reference to sysfs_dirent * @sd: sysfs_dirent to get an active reference to @@ -168,7 +150,7 @@ struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd) if (!atomic_inc_unless_negative(&sd->s_active)) return NULL; - if (likely(!ignore_lockdep(sd))) + if (likely(!sysfs_ignore_lockdep(sd))) rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_); return sd; } @@ -187,7 +169,7 @@ void sysfs_put_active(struct sysfs_dirent *sd) if (unlikely(!sd)) return; - if (likely(!ignore_lockdep(sd))) + if (likely(!sysfs_ignore_lockdep(sd))) rwsem_release(&sd->dep_map, 1, _RET_IP_); v = atomic_dec_return(&sd->s_active); if (likely(v != SD_DEACTIVATED_BIAS)) @@ -400,22 +382,19 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type) /** * sysfs_addrm_start - prepare for sysfs_dirent add/remove * @acxt: pointer to sysfs_addrm_cxt to be used - * @parent_sd: parent sysfs_dirent * - * This function is called when the caller is about to add or - * remove sysfs_dirent under @parent_sd. This function acquires - * sysfs_mutex. @acxt is used to keep and pass context to - * other addrm functions. + * This function is called when the caller is about to add or remove + * sysfs_dirent. This function acquires sysfs_mutex. @acxt is used + * to keep and pass context to other addrm functions. * * LOCKING: * Kernel thread context (may sleep). sysfs_mutex is locked on * return. */ -void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, - struct sysfs_dirent *parent_sd) +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt) + __acquires(sysfs_mutex) { memset(acxt, 0, sizeof(*acxt)); - acxt->parent_sd = parent_sd; mutex_lock(&sysfs_mutex); } @@ -424,10 +403,11 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, * __sysfs_add_one - add sysfs_dirent to parent without warning * @acxt: addrm context to use * @sd: sysfs_dirent to be added + * @parent_sd: the parent sysfs_dirent to add @sd to * - * Get @acxt->parent_sd and set sd->s_parent to it and increment - * nlink of parent inode if @sd is a directory and link into the - * children list of the parent. + * Get @parent_sd and set @sd->s_parent to it and increment nlink of + * the parent inode if @sd is a directory and link into the children + * list of the parent. * * This function should be called between calls to * sysfs_addrm_start() and sysfs_addrm_finish() and should be @@ -440,27 +420,28 @@ void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, * 0 on success, -EEXIST if entry with the given name already * exists. */ -int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd) { struct sysfs_inode_attrs *ps_iattr; int ret; - if (!!sysfs_ns_type(acxt->parent_sd) != !!sd->s_ns) { + if (!!sysfs_ns_type(parent_sd) != !!sd->s_ns) { WARN(1, KERN_WARNING "sysfs: ns %s in '%s' for '%s'\n", - sysfs_ns_type(acxt->parent_sd) ? "required" : "invalid", - acxt->parent_sd->s_name, sd->s_name); + sysfs_ns_type(parent_sd) ? "required" : "invalid", + parent_sd->s_name, sd->s_name); return -EINVAL; } - sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); - sd->s_parent = sysfs_get(acxt->parent_sd); + sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); + sd->s_parent = sysfs_get(parent_sd); ret = sysfs_link_sibling(sd); if (ret) return ret; /* Update timestamps on the parent */ - ps_iattr = acxt->parent_sd->s_iattr; + ps_iattr = parent_sd->s_iattr; if (ps_iattr) { struct iattr *ps_iattrs = &ps_iattr->ia_iattr; ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; @@ -490,14 +471,32 @@ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) return path; } +void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name) +{ + char *path; + + path = kzalloc(PATH_MAX, GFP_KERNEL); + if (path) { + sysfs_pathname(parent, path); + strlcat(path, "/", PATH_MAX); + strlcat(path, name, PATH_MAX); + } + + WARN(1, KERN_WARNING "sysfs: cannot create duplicate filename '%s'\n", + path ? path : name); + + kfree(path); +} + /** * sysfs_add_one - add sysfs_dirent to parent * @acxt: addrm context to use * @sd: sysfs_dirent to be added + * @parent_sd: the parent sysfs_dirent to add @sd to * - * Get @acxt->parent_sd and set sd->s_parent to it and increment - * nlink of parent inode if @sd is a directory and link into the - * children list of the parent. + * Get @parent_sd and set @sd->s_parent to it and increment nlink of + * the parent inode if @sd is a directory and link into the children + * list of the parent. * * This function should be called between calls to * sysfs_addrm_start() and sysfs_addrm_finish() and should be @@ -510,23 +509,15 @@ static char *sysfs_pathname(struct sysfs_dirent *sd, char *path) * 0 on success, -EEXIST if entry with the given name already * exists. */ -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd) { int ret; - ret = __sysfs_add_one(acxt, sd); - if (ret == -EEXIST) { - char *path = kzalloc(PATH_MAX, GFP_KERNEL); - WARN(1, KERN_WARNING - "sysfs: cannot create duplicate filename '%s'\n", - (path == NULL) ? sd->s_name - : (sysfs_pathname(acxt->parent_sd, path), - strlcat(path, "/", PATH_MAX), - strlcat(path, sd->s_name, PATH_MAX), - path)); - kfree(path); - } + ret = __sysfs_add_one(acxt, sd, parent_sd); + if (ret == -EEXIST) + sysfs_warn_dup(parent_sd, sd->s_name); return ret; } @@ -545,16 +536,22 @@ int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) * LOCKING: * Determined by sysfs_addrm_start(). */ -void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) +static void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd) { struct sysfs_inode_attrs *ps_iattr; - BUG_ON(sd->s_flags & SYSFS_FLAG_REMOVED); + /* + * Removal can be called multiple times on the same node. Only the + * first invocation is effective and puts the base ref. + */ + if (sd->s_flags & SYSFS_FLAG_REMOVED) + return; sysfs_unlink_sibling(sd); /* Update timestamps on the parent */ - ps_iattr = acxt->parent_sd->s_iattr; + ps_iattr = sd->s_parent->s_iattr; if (ps_iattr) { struct iattr *ps_iattrs = &ps_iattr->ia_iattr; ps_iattrs->ia_ctime = ps_iattrs->ia_mtime = CURRENT_TIME; @@ -577,6 +574,7 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd) * sysfs_mutex is released. */ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) + __releases(sysfs_mutex) { /* release resources acquired by sysfs_addrm_start() */ mutex_unlock(&sysfs_mutex); @@ -588,7 +586,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) acxt->removed = sd->u.removed_list; sysfs_deactivate(sd); - unmap_bin_file(sd); + sysfs_unmap_bin_file(sd); sysfs_put(sd); } } @@ -597,6 +595,7 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) * sysfs_find_dirent - find sysfs_dirent with the given name * @parent_sd: sysfs_dirent to search under * @name: name to look for + * @ns: the namespace tag to use * * Look for sysfs_dirent with name @name under @parent_sd. * @@ -607,8 +606,8 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt) * Pointer to sysfs_dirent if found, NULL if not. */ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, - const void *ns, - const unsigned char *name) + const unsigned char *name, + const void *ns) { struct rb_node *node = parent_sd->s_dir.children.rb_node; unsigned int hash; @@ -620,13 +619,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, return NULL; } - hash = sysfs_name_hash(ns, name); + hash = sysfs_name_hash(name, ns); while (node) { struct sysfs_dirent *sd; int result; sd = to_sysfs_dirent(node); - result = sysfs_name_compare(hash, ns, name, sd); + result = sysfs_name_compare(hash, name, ns, sd); if (result < 0) node = node->rb_left; else if (result > 0) @@ -638,9 +637,10 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, } /** - * sysfs_get_dirent - find and get sysfs_dirent with the given name + * sysfs_get_dirent_ns - find and get sysfs_dirent with the given name * @parent_sd: sysfs_dirent to search under * @name: name to look for + * @ns: the namespace tag to use * * Look for sysfs_dirent with name @name under @parent_sd and get * it if found. @@ -651,24 +651,25 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, * RETURNS: * Pointer to sysfs_dirent if found, NULL if not. */ -struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, - const void *ns, - const unsigned char *name) +struct sysfs_dirent *sysfs_get_dirent_ns(struct sysfs_dirent *parent_sd, + const unsigned char *name, + const void *ns) { struct sysfs_dirent *sd; mutex_lock(&sysfs_mutex); - sd = sysfs_find_dirent(parent_sd, ns, name); + sd = sysfs_find_dirent(parent_sd, name, ns); sysfs_get(sd); mutex_unlock(&sysfs_mutex); return sd; } -EXPORT_SYMBOL_GPL(sysfs_get_dirent); +EXPORT_SYMBOL_GPL(sysfs_get_dirent_ns); static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, - enum kobj_ns_type type, const void *ns, const char *name, - struct sysfs_dirent **p_sd) + enum kobj_ns_type type, + const char *name, const void *ns, + struct sysfs_dirent **p_sd) { umode_t mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO; struct sysfs_addrm_cxt acxt; @@ -685,8 +686,8 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd, sd->s_dir.kobj = kobj; /* link in */ - sysfs_addrm_start(&acxt, parent_sd); - rc = sysfs_add_one(&acxt, sd); + sysfs_addrm_start(&acxt); + rc = sysfs_add_one(&acxt, sd, parent_sd); sysfs_addrm_finish(&acxt); if (rc == 0) @@ -701,7 +702,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd) { return create_dir(kobj, kobj->sd, - KOBJ_NS_TYPE_NONE, NULL, name, p_sd); + KOBJ_NS_TYPE_NONE, name, NULL, p_sd); } /** @@ -730,14 +731,14 @@ static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj) } /** - * sysfs_create_dir - create a directory for an object. - * @kobj: object we're creating directory for. + * sysfs_create_dir_ns - create a directory for an object with a namespace tag + * @kobj: object we're creating directory for + * @ns: the namespace tag to use */ -int sysfs_create_dir(struct kobject *kobj) +int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { enum kobj_ns_type type; struct sysfs_dirent *parent_sd, *sd; - const void *ns = NULL; int error = 0; BUG_ON(!kobj); @@ -750,11 +751,9 @@ int sysfs_create_dir(struct kobject *kobj) if (!parent_sd) return -ENOENT; - if (sysfs_ns_type(parent_sd)) - ns = kobj->ktype->namespace(kobj); type = sysfs_read_ns_type(kobj); - error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd); + error = create_dir(kobj, parent_sd, type, kobject_name(kobj), ns, &sd); if (!error) kobj->sd = sd; return error; @@ -776,7 +775,7 @@ static struct dentry *sysfs_lookup(struct inode *dir, struct dentry *dentry, type = sysfs_ns_type(parent_sd); ns = sysfs_info(dir->i_sb)->ns[type]; - sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name); + sd = sysfs_find_dirent(parent_sd, dentry->d_name.name, ns); /* no such entry */ if (!sd) { @@ -807,41 +806,128 @@ const struct inode_operations sysfs_dir_inode_operations = { .setxattr = sysfs_setxattr, }; -static void remove_dir(struct sysfs_dirent *sd) +static struct sysfs_dirent *sysfs_leftmost_descendant(struct sysfs_dirent *pos) { - struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *last; - sysfs_addrm_start(&acxt, sd->s_parent); - sysfs_remove_one(&acxt, sd); - sysfs_addrm_finish(&acxt); + while (true) { + struct rb_node *rbn; + + last = pos; + + if (sysfs_type(pos) != SYSFS_DIR) + break; + + rbn = rb_first(&pos->s_dir.children); + if (!rbn) + break; + + pos = to_sysfs_dirent(rbn); + } + + return last; } -void sysfs_remove_subdir(struct sysfs_dirent *sd) +/** + * sysfs_next_descendant_post - find the next descendant for post-order walk + * @pos: the current position (%NULL to initiate traversal) + * @root: sysfs_dirent whose descendants to walk + * + * Find the next descendant to visit for post-order traversal of @root's + * descendants. @root is included in the iteration and the last node to be + * visited. + */ +static struct sysfs_dirent *sysfs_next_descendant_post(struct sysfs_dirent *pos, + struct sysfs_dirent *root) { - remove_dir(sd); + struct rb_node *rbn; + + lockdep_assert_held(&sysfs_mutex); + + /* if first iteration, visit leftmost descendant which may be root */ + if (!pos) + return sysfs_leftmost_descendant(root); + + /* if we visited @root, we're done */ + if (pos == root) + return NULL; + + /* if there's an unvisited sibling, visit its leftmost descendant */ + rbn = rb_next(&pos->s_rb); + if (rbn) + return sysfs_leftmost_descendant(to_sysfs_dirent(rbn)); + + /* no sibling left, visit parent */ + return pos->s_parent; } +static void __sysfs_remove(struct sysfs_addrm_cxt *acxt, + struct sysfs_dirent *sd) +{ + struct sysfs_dirent *pos, *next; + + if (!sd) + return; -static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) + pr_debug("sysfs %s: removing\n", sd->s_name); + + next = NULL; + do { + pos = next; + next = sysfs_next_descendant_post(pos, sd); + if (pos) + sysfs_remove_one(acxt, pos); + } while (next); +} + +/** + * sysfs_remove - remove a sysfs_dirent recursively + * @sd: the sysfs_dirent to remove + * + * Remove @sd along with all its subdirectories and files. + */ +void sysfs_remove(struct sysfs_dirent *sd) { struct sysfs_addrm_cxt acxt; - struct rb_node *pos; - if (!dir_sd) - return; + sysfs_addrm_start(&acxt); + __sysfs_remove(&acxt, sd); + sysfs_addrm_finish(&acxt); +} - pr_debug("sysfs %s: removing dir\n", dir_sd->s_name); - sysfs_addrm_start(&acxt, dir_sd); - pos = rb_first(&dir_sd->s_dir.children); - while (pos) { - struct sysfs_dirent *sd = to_sysfs_dirent(pos); - pos = rb_next(pos); - if (sysfs_type(sd) != SYSFS_DIR) - sysfs_remove_one(&acxt, sd); +/** + * sysfs_hash_and_remove - find a sysfs_dirent by name and remove it + * @dir_sd: parent of the target + * @name: name of the sysfs_dirent to remove + * @ns: namespace tag of the sysfs_dirent to remove + * + * Look for the sysfs_dirent with @name and @ns under @dir_sd and remove + * it. Returns 0 on success, -ENOENT if such entry doesn't exist. + */ +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, + const void *ns) +{ + struct sysfs_addrm_cxt acxt; + struct sysfs_dirent *sd; + + if (!dir_sd) { + WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", + name); + return -ENOENT; } + + sysfs_addrm_start(&acxt); + + sd = sysfs_find_dirent(dir_sd, name, ns); + if (sd) + __sysfs_remove(&acxt, sd); + sysfs_addrm_finish(&acxt); - remove_dir(dir_sd); + if (sd) + return 0; + else + return -ENOENT; } /** @@ -852,21 +938,34 @@ static void __sysfs_remove_dir(struct sysfs_dirent *dir_sd) * the directory before we remove the directory, and we've inlined * what used to be sysfs_rmdir() below, instead of calling separately. */ - void sysfs_remove_dir(struct kobject *kobj) { struct sysfs_dirent *sd = kobj->sd; - spin_lock(&sysfs_assoc_lock); + /* + * In general, kboject owner is responsible for ensuring removal + * doesn't race with other operations and sysfs doesn't provide any + * protection; however, when @kobj is used as a symlink target, the + * symlinking entity usually doesn't own @kobj and thus has no + * control over removal. @kobj->sd may be removed anytime and + * symlink code may end up dereferencing an already freed sd. + * + * sysfs_symlink_target_lock synchronizes @kobj->sd disassociation + * against symlink operations so that symlink code can safely + * dereference @kobj->sd. + */ + spin_lock(&sysfs_symlink_target_lock); kobj->sd = NULL; - spin_unlock(&sysfs_assoc_lock); + spin_unlock(&sysfs_symlink_target_lock); - __sysfs_remove_dir(sd); + if (sd) { + WARN_ON_ONCE(sysfs_type(sd) != SYSFS_DIR); + sysfs_remove(sd); + } } -int sysfs_rename(struct sysfs_dirent *sd, - struct sysfs_dirent *new_parent_sd, const void *new_ns, - const char *new_name) +int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, + const char *new_name, const void *new_ns) { int error; @@ -878,7 +977,7 @@ int sysfs_rename(struct sysfs_dirent *sd, goto out; /* nothing to rename */ error = -EEXIST; - if (sysfs_find_dirent(new_parent_sd, new_ns, new_name)) + if (sysfs_find_dirent(new_parent_sd, new_name, new_ns)) goto out; /* rename sysfs_dirent */ @@ -899,7 +998,7 @@ int sysfs_rename(struct sysfs_dirent *sd, sysfs_get(new_parent_sd); sysfs_put(sd->s_parent); sd->s_ns = new_ns; - sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); + sd->s_hash = sysfs_name_hash(sd->s_name, sd->s_ns); sd->s_parent = new_parent_sd; sysfs_link_sibling(sd); @@ -909,30 +1008,25 @@ int sysfs_rename(struct sysfs_dirent *sd, return error; } -int sysfs_rename_dir(struct kobject *kobj, const char *new_name) +int sysfs_rename_dir_ns(struct kobject *kobj, const char *new_name, + const void *new_ns) { struct sysfs_dirent *parent_sd = kobj->sd->s_parent; - const void *new_ns = NULL; - - if (sysfs_ns_type(parent_sd)) - new_ns = kobj->ktype->namespace(kobj); - return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name); + return sysfs_rename(kobj->sd, parent_sd, new_name, new_ns); } -int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) +int sysfs_move_dir_ns(struct kobject *kobj, struct kobject *new_parent_kobj, + const void *new_ns) { struct sysfs_dirent *sd = kobj->sd; struct sysfs_dirent *new_parent_sd; - const void *new_ns = NULL; BUG_ON(!sd->s_parent); - if (sysfs_ns_type(sd->s_parent)) - new_ns = kobj->ktype->namespace(kobj); new_parent_sd = new_parent_kobj && new_parent_kobj->sd ? new_parent_kobj->sd : &sysfs_root; - return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name); + return sysfs_rename(sd, new_parent_sd, sd->s_name, new_ns); } /* Relationship between s_mode and the DT_xxx types */ diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 15ef5eb13663..79b5da2acbe1 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -21,70 +21,114 @@ #include <linux/mutex.h> #include <linux/limits.h> #include <linux/uaccess.h> +#include <linux/seq_file.h> +#include <linux/mm.h> #include "sysfs.h" /* - * There's one sysfs_buffer for each open file and one - * sysfs_open_dirent for each sysfs_dirent with one or more open - * files. + * There's one sysfs_open_file for each open file and one sysfs_open_dirent + * for each sysfs_dirent with one or more open files. * - * filp->private_data points to sysfs_buffer and - * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open - * is protected by sysfs_open_dirent_lock. + * sysfs_dirent->s_attr.open points to sysfs_open_dirent. s_attr.open is + * protected by sysfs_open_dirent_lock. + * + * filp->private_data points to seq_file whose ->private points to + * sysfs_open_file. sysfs_open_files are chained at + * sysfs_open_dirent->files, which is protected by sysfs_open_file_mutex. */ static DEFINE_SPINLOCK(sysfs_open_dirent_lock); +static DEFINE_MUTEX(sysfs_open_file_mutex); struct sysfs_open_dirent { atomic_t refcnt; atomic_t event; wait_queue_head_t poll; - struct list_head buffers; /* goes through sysfs_buffer.list */ + struct list_head files; /* goes through sysfs_open_file.list */ }; -struct sysfs_buffer { - size_t count; - loff_t pos; - char *page; - const struct sysfs_ops *ops; +struct sysfs_open_file { + struct sysfs_dirent *sd; + struct file *file; struct mutex mutex; - int needs_read_fill; int event; struct list_head list; + + bool mmapped; + const struct vm_operations_struct *vm_ops; }; -/** - * fill_read_buffer - allocate and fill buffer from object. - * @dentry: dentry pointer. - * @buffer: data buffer for file. - * - * Allocate @buffer->page, if it hasn't been already, then call the - * kobject's show() method to fill the buffer with this attribute's - * data. - * This is called only once, on the file's first read unless an error - * is returned. +static bool sysfs_is_bin(struct sysfs_dirent *sd) +{ + return sysfs_type(sd) == SYSFS_KOBJ_BIN_ATTR; +} + +static struct sysfs_open_file *sysfs_of(struct file *file) +{ + return ((struct seq_file *)file->private_data)->private; +} + +/* + * Determine ktype->sysfs_ops for the given sysfs_dirent. This function + * must be called while holding an active reference. */ -static int fill_read_buffer(struct dentry *dentry, struct sysfs_buffer *buffer) +static const struct sysfs_ops *sysfs_file_ops(struct sysfs_dirent *sd) { - struct sysfs_dirent *attr_sd = dentry->d_fsdata; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - const struct sysfs_ops *ops = buffer->ops; - int ret = 0; + struct kobject *kobj = sd->s_parent->s_dir.kobj; + + if (!sysfs_ignore_lockdep(sd)) + lockdep_assert_held(sd); + return kobj->ktype ? kobj->ktype->sysfs_ops : NULL; +} + +/* + * Reads on sysfs are handled through seq_file, which takes care of hairy + * details like buffering and seeking. The following function pipes + * sysfs_ops->show() result through seq_file. + */ +static int sysfs_seq_show(struct seq_file *sf, void *v) +{ + struct sysfs_open_file *of = sf->private; + struct kobject *kobj = of->sd->s_parent->s_dir.kobj; + const struct sysfs_ops *ops; + char *buf; ssize_t count; - if (!buffer->page) - buffer->page = (char *) get_zeroed_page(GFP_KERNEL); - if (!buffer->page) - return -ENOMEM; + /* acquire buffer and ensure that it's >= PAGE_SIZE */ + count = seq_get_buf(sf, &buf); + if (count < PAGE_SIZE) { + seq_commit(sf, -1); + return 0; + } - /* need attr_sd for attr and ops, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) + /* + * Need @of->sd for attr and ops, its parent for kobj. @of->mutex + * nests outside active ref and is just to ensure that the ops + * aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) { + mutex_unlock(&of->mutex); return -ENODEV; + } - buffer->event = atomic_read(&attr_sd->s_attr.open->event); - count = ops->show(kobj, attr_sd->s_attr.attr, buffer->page); + of->event = atomic_read(&of->sd->s_attr.open->event); - sysfs_put_active(attr_sd); + /* + * Lookup @ops and invoke show(). Control may reach here via seq + * file lseek even if @ops->show() isn't implemented. + */ + ops = sysfs_file_ops(of->sd); + if (ops->show) + count = ops->show(kobj, of->sd->s_attr.attr, buf); + else + count = 0; + + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); + + if (count < 0) + return count; /* * The code works fine with PAGE_SIZE return but it's likely to @@ -96,155 +140,389 @@ static int fill_read_buffer(struct dentry *dentry, struct sysfs_buffer *buffer) /* Try to struggle along */ count = PAGE_SIZE - 1; } - if (count >= 0) { - buffer->needs_read_fill = 0; - buffer->count = count; - } else { - ret = count; - } - return ret; + seq_commit(sf, count); + return 0; } -/** - * sysfs_read_file - read an attribute. - * @file: file pointer. - * @buf: buffer to fill. - * @count: number of bytes to read. - * @ppos: starting offset in file. - * - * Userspace wants to read an attribute file. The attribute descriptor - * is in the file's ->d_fsdata. The target object is in the directory's - * ->d_fsdata. - * - * We call fill_read_buffer() to allocate and fill the buffer from the - * object's show() method exactly once (if the read is happening from - * the beginning of the file). That should fill the entire buffer with - * all the data the object has to offer for that attribute. - * We then call flush_read_buffer() to copy the buffer to userspace - * in the increments specified. +/* + * Read method for bin files. As reading a bin file can have side-effects, + * the exact offset and bytes specified in read(2) call should be passed to + * the read callback making it difficult to use seq_file. Implement + * simplistic custom buffering for bin files. */ - -static ssize_t -sysfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos) +static ssize_t sysfs_bin_read(struct file *file, char __user *userbuf, + size_t bytes, loff_t *off) { - struct sysfs_buffer *buffer = file->private_data; - ssize_t retval = 0; + struct sysfs_open_file *of = sysfs_of(file); + struct bin_attribute *battr = of->sd->s_attr.bin_attr; + struct kobject *kobj = of->sd->s_parent->s_dir.kobj; + loff_t size = file_inode(file)->i_size; + int count = min_t(size_t, bytes, PAGE_SIZE); + loff_t offs = *off; + char *buf; + + if (!bytes) + return 0; - mutex_lock(&buffer->mutex); - if (buffer->needs_read_fill || *ppos == 0) { - retval = fill_read_buffer(file->f_path.dentry, buffer); - if (retval) - goto out; + if (size) { + if (offs > size) + return 0; + if (offs + count > size) + count = size - offs; } - pr_debug("%s: count = %zd, ppos = %lld, buf = %s\n", - __func__, count, *ppos, buffer->page); - retval = simple_read_from_buffer(buf, count, ppos, buffer->page, - buffer->count); -out: - mutex_unlock(&buffer->mutex); - return retval; -} -/** - * fill_write_buffer - copy buffer from userspace. - * @buffer: data buffer for file. - * @buf: data from user. - * @count: number of bytes in @userbuf. - * - * Allocate @buffer->page if it hasn't been already, then - * copy the user-supplied buffer into it. - */ -static int fill_write_buffer(struct sysfs_buffer *buffer, - const char __user *buf, size_t count) -{ - int error; - - if (!buffer->page) - buffer->page = (char *)get_zeroed_page(GFP_KERNEL); - if (!buffer->page) + buf = kmalloc(count, GFP_KERNEL); + if (!buf) return -ENOMEM; - if (count >= PAGE_SIZE) - count = PAGE_SIZE - 1; - error = copy_from_user(buffer->page, buf, count); - buffer->needs_read_fill = 1; - /* if buf is assumed to contain a string, terminate it by \0, - so e.g. sscanf() can scan the string easily */ - buffer->page[count] = 0; - return error ? -EFAULT : count; -} + /* need of->sd for battr, its parent for kobj */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) { + count = -ENODEV; + mutex_unlock(&of->mutex); + goto out_free; + } + + if (battr->read) + count = battr->read(file, kobj, battr, buf, offs, count); + else + count = -EIO; + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); + + if (count < 0) + goto out_free; + + if (copy_to_user(userbuf, buf, count)) { + count = -EFAULT; + goto out_free; + } + + pr_debug("offs = %lld, *off = %lld, count = %d\n", offs, *off, count); + + *off = offs + count; + + out_free: + kfree(buf); + return count; +} /** - * flush_write_buffer - push buffer to kobject. - * @dentry: dentry to the attribute - * @buffer: data buffer for file. - * @count: number of bytes + * flush_write_buffer - push buffer to kobject + * @of: open file + * @buf: data buffer for file + * @off: file offset to write to + * @count: number of bytes * - * Get the correct pointers for the kobject and the attribute we're - * dealing with, then call the store() method for the attribute, - * passing the buffer that we acquired in fill_write_buffer(). + * Get the correct pointers for the kobject and the attribute we're dealing + * with, then call the store() method for it with @buf. */ -static int flush_write_buffer(struct dentry *dentry, - struct sysfs_buffer *buffer, size_t count) +static int flush_write_buffer(struct sysfs_open_file *of, char *buf, loff_t off, + size_t count) { - struct sysfs_dirent *attr_sd = dentry->d_fsdata; - struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - const struct sysfs_ops *ops = buffer->ops; - int rc; + struct kobject *kobj = of->sd->s_parent->s_dir.kobj; + int rc = 0; - /* need attr_sd for attr and ops, its parent for kobj */ - if (!sysfs_get_active(attr_sd)) + /* + * Need @of->sd for attr and ops, its parent for kobj. @of->mutex + * nests outside active ref and is just to ensure that the ops + * aren't called concurrently for the same open file. + */ + mutex_lock(&of->mutex); + if (!sysfs_get_active(of->sd)) { + mutex_unlock(&of->mutex); return -ENODEV; + } - rc = ops->store(kobj, attr_sd->s_attr.attr, buffer->page, count); + if (sysfs_is_bin(of->sd)) { + struct bin_attribute *battr = of->sd->s_attr.bin_attr; - sysfs_put_active(attr_sd); + rc = -EIO; + if (battr->write) + rc = battr->write(of->file, kobj, battr, buf, off, + count); + } else { + const struct sysfs_ops *ops = sysfs_file_ops(of->sd); + + rc = ops->store(kobj, of->sd->s_attr.attr, buf, count); + } + + sysfs_put_active(of->sd); + mutex_unlock(&of->mutex); return rc; } - /** - * sysfs_write_file - write an attribute. - * @file: file pointer - * @buf: data to write - * @count: number of bytes - * @ppos: starting offset + * sysfs_write_file - write an attribute + * @file: file pointer + * @user_buf: data to write + * @count: number of bytes + * @ppos: starting offset + * + * Copy data in from userland and pass it to the matching + * sysfs_ops->store() by invoking flush_write_buffer(). * - * Similar to sysfs_read_file(), though working in the opposite direction. - * We allocate and fill the data from the user in fill_write_buffer(), - * then push it to the kobject in flush_write_buffer(). - * There is no easy way for us to know if userspace is only doing a partial - * write, so we don't support them. We expect the entire buffer to come - * on the first write. - * Hint: if you're writing a value, first read the file, modify only the - * the value you're changing, then write entire buffer back. + * There is no easy way for us to know if userspace is only doing a partial + * write, so we don't support them. We expect the entire buffer to come on + * the first write. Hint: if you're writing a value, first read the file, + * modify only the the value you're changing, then write entire buffer + * back. */ -static ssize_t sysfs_write_file(struct file *file, const char __user *buf, +static ssize_t sysfs_write_file(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos) { - struct sysfs_buffer *buffer = file->private_data; - ssize_t len; + struct sysfs_open_file *of = sysfs_of(file); + ssize_t len = min_t(size_t, count, PAGE_SIZE); + loff_t size = file_inode(file)->i_size; + char *buf; + + if (sysfs_is_bin(of->sd) && size) { + if (size <= *ppos) + return 0; + len = min_t(ssize_t, len, size - *ppos); + } - mutex_lock(&buffer->mutex); - len = fill_write_buffer(buffer, buf, count); - if (len > 0) - len = flush_write_buffer(file->f_path.dentry, buffer, len); + if (!len) + return 0; + + buf = kmalloc(len + 1, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (copy_from_user(buf, user_buf, len)) { + len = -EFAULT; + goto out_free; + } + buf[len] = '\0'; /* guarantee string termination */ + + len = flush_write_buffer(of, buf, *ppos, len); if (len > 0) *ppos += len; - mutex_unlock(&buffer->mutex); +out_free: + kfree(buf); return len; } +static void sysfs_bin_vma_open(struct vm_area_struct *vma) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + + if (!of->vm_ops) + return; + + if (!sysfs_get_active(of->sd)) + return; + + if (of->vm_ops->open) + of->vm_ops->open(vma); + + sysfs_put_active(of->sd); +} + +static int sysfs_bin_fault(struct vm_area_struct *vma, struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active(of->sd)) + return VM_FAULT_SIGBUS; + + ret = VM_FAULT_SIGBUS; + if (of->vm_ops->fault) + ret = of->vm_ops->fault(vma, vmf); + + sysfs_put_active(of->sd); + return ret; +} + +static int sysfs_bin_page_mkwrite(struct vm_area_struct *vma, + struct vm_fault *vmf) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return VM_FAULT_SIGBUS; + + if (!sysfs_get_active(of->sd)) + return VM_FAULT_SIGBUS; + + ret = 0; + if (of->vm_ops->page_mkwrite) + ret = of->vm_ops->page_mkwrite(vma, vmf); + else + file_update_time(file); + + sysfs_put_active(of->sd); + return ret; +} + +static int sysfs_bin_access(struct vm_area_struct *vma, unsigned long addr, + void *buf, int len, int write) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return -EINVAL; + + if (!sysfs_get_active(of->sd)) + return -EINVAL; + + ret = -EINVAL; + if (of->vm_ops->access) + ret = of->vm_ops->access(vma, addr, buf, len, write); + + sysfs_put_active(of->sd); + return ret; +} + +#ifdef CONFIG_NUMA +static int sysfs_bin_set_policy(struct vm_area_struct *vma, + struct mempolicy *new) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return 0; + + if (!sysfs_get_active(of->sd)) + return -EINVAL; + + ret = 0; + if (of->vm_ops->set_policy) + ret = of->vm_ops->set_policy(vma, new); + + sysfs_put_active(of->sd); + return ret; +} + +static struct mempolicy *sysfs_bin_get_policy(struct vm_area_struct *vma, + unsigned long addr) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + struct mempolicy *pol; + + if (!of->vm_ops) + return vma->vm_policy; + + if (!sysfs_get_active(of->sd)) + return vma->vm_policy; + + pol = vma->vm_policy; + if (of->vm_ops->get_policy) + pol = of->vm_ops->get_policy(vma, addr); + + sysfs_put_active(of->sd); + return pol; +} + +static int sysfs_bin_migrate(struct vm_area_struct *vma, const nodemask_t *from, + const nodemask_t *to, unsigned long flags) +{ + struct file *file = vma->vm_file; + struct sysfs_open_file *of = sysfs_of(file); + int ret; + + if (!of->vm_ops) + return 0; + + if (!sysfs_get_active(of->sd)) + return 0; + + ret = 0; + if (of->vm_ops->migrate) + ret = of->vm_ops->migrate(vma, from, to, flags); + + sysfs_put_active(of->sd); + return ret; +} +#endif + +static const struct vm_operations_struct sysfs_bin_vm_ops = { + .open = sysfs_bin_vma_open, + .fault = sysfs_bin_fault, + .page_mkwrite = sysfs_bin_page_mkwrite, + .access = sysfs_bin_access, +#ifdef CONFIG_NUMA + .set_policy = sysfs_bin_set_policy, + .get_policy = sysfs_bin_get_policy, + .migrate = sysfs_bin_migrate, +#endif +}; + +static int sysfs_bin_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct sysfs_open_file *of = sysfs_of(file); + struct bin_attribute *battr = of->sd->s_attr.bin_attr; + struct kobject *kobj = of->sd->s_parent->s_dir.kobj; + int rc; + + mutex_lock(&of->mutex); + + /* need of->sd for battr, its parent for kobj */ + rc = -ENODEV; + if (!sysfs_get_active(of->sd)) + goto out_unlock; + + if (!battr->mmap) + goto out_put; + + rc = battr->mmap(file, kobj, battr, vma); + if (rc) + goto out_put; + + /* + * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup() + * to satisfy versions of X which crash if the mmap fails: that + * substitutes a new vm_file, and we don't then want bin_vm_ops. + */ + if (vma->vm_file != file) + goto out_put; + + rc = -EINVAL; + if (of->mmapped && of->vm_ops != vma->vm_ops) + goto out_put; + + /* + * It is not possible to successfully wrap close. + * So error if someone is trying to use close. + */ + rc = -EINVAL; + if (vma->vm_ops && vma->vm_ops->close) + goto out_put; + + rc = 0; + of->mmapped = 1; + of->vm_ops = vma->vm_ops; + vma->vm_ops = &sysfs_bin_vm_ops; +out_put: + sysfs_put_active(of->sd); +out_unlock: + mutex_unlock(&of->mutex); + + return rc; +} + /** * sysfs_get_open_dirent - get or create sysfs_open_dirent * @sd: target sysfs_dirent - * @buffer: sysfs_buffer for this instance of open + * @of: sysfs_open_file for this instance of open * * If @sd->s_attr.open exists, increment its reference count; - * otherwise, create one. @buffer is chained to the buffers - * list. + * otherwise, create one. @of is chained to the files list. * * LOCKING: * Kernel thread context (may sleep). @@ -253,11 +531,12 @@ static ssize_t sysfs_write_file(struct file *file, const char __user *buf, * 0 on success, -errno on failure. */ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, - struct sysfs_buffer *buffer) + struct sysfs_open_file *of) { struct sysfs_open_dirent *od, *new_od = NULL; retry: + mutex_lock(&sysfs_open_file_mutex); spin_lock_irq(&sysfs_open_dirent_lock); if (!sd->s_attr.open && new_od) { @@ -268,10 +547,11 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, od = sd->s_attr.open; if (od) { atomic_inc(&od->refcnt); - list_add_tail(&buffer->list, &od->buffers); + list_add_tail(&of->list, &od->files); } spin_unlock_irq(&sysfs_open_dirent_lock); + mutex_unlock(&sysfs_open_file_mutex); if (od) { kfree(new_od); @@ -286,36 +566,40 @@ static int sysfs_get_open_dirent(struct sysfs_dirent *sd, atomic_set(&new_od->refcnt, 0); atomic_set(&new_od->event, 1); init_waitqueue_head(&new_od->poll); - INIT_LIST_HEAD(&new_od->buffers); + INIT_LIST_HEAD(&new_od->files); goto retry; } /** * sysfs_put_open_dirent - put sysfs_open_dirent * @sd: target sysfs_dirent - * @buffer: associated sysfs_buffer + * @of: associated sysfs_open_file * - * Put @sd->s_attr.open and unlink @buffer from the buffers list. - * If reference count reaches zero, disassociate and free it. + * Put @sd->s_attr.open and unlink @of from the files list. If + * reference count reaches zero, disassociate and free it. * * LOCKING: * None. */ static void sysfs_put_open_dirent(struct sysfs_dirent *sd, - struct sysfs_buffer *buffer) + struct sysfs_open_file *of) { struct sysfs_open_dirent *od = sd->s_attr.open; unsigned long flags; + mutex_lock(&sysfs_open_file_mutex); spin_lock_irqsave(&sysfs_open_dirent_lock, flags); - list_del(&buffer->list); + if (of) + list_del(&of->list); + if (atomic_dec_and_test(&od->refcnt)) sd->s_attr.open = NULL; else od = NULL; spin_unlock_irqrestore(&sysfs_open_dirent_lock, flags); + mutex_unlock(&sysfs_open_file_mutex); kfree(od); } @@ -324,67 +608,81 @@ static int sysfs_open_file(struct inode *inode, struct file *file) { struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata; struct kobject *kobj = attr_sd->s_parent->s_dir.kobj; - struct sysfs_buffer *buffer; - const struct sysfs_ops *ops; + struct sysfs_open_file *of; + bool has_read, has_write; int error = -EACCES; /* need attr_sd for attr and ops, its parent for kobj */ if (!sysfs_get_active(attr_sd)) return -ENODEV; - /* every kobject with an attribute needs a ktype assigned */ - if (kobj->ktype && kobj->ktype->sysfs_ops) - ops = kobj->ktype->sysfs_ops; - else { - WARN(1, KERN_ERR - "missing sysfs attribute operations for kobject: %s\n", - kobject_name(kobj)); - goto err_out; - } + if (sysfs_is_bin(attr_sd)) { + struct bin_attribute *battr = attr_sd->s_attr.bin_attr; - /* File needs write support. - * The inode's perms must say it's ok, - * and we must have a store method. - */ - if (file->f_mode & FMODE_WRITE) { - if (!(inode->i_mode & S_IWUGO) || !ops->store) - goto err_out; - } + has_read = battr->read || battr->mmap; + has_write = battr->write || battr->mmap; + } else { + const struct sysfs_ops *ops = sysfs_file_ops(attr_sd); - /* File needs read support. - * The inode's perms must say it's ok, and we there - * must be a show method for it. - */ - if (file->f_mode & FMODE_READ) { - if (!(inode->i_mode & S_IRUGO) || !ops->show) + /* every kobject with an attribute needs a ktype assigned */ + if (WARN(!ops, KERN_ERR + "missing sysfs attribute operations for kobject: %s\n", + kobject_name(kobj))) goto err_out; + + has_read = ops->show; + has_write = ops->store; } - /* No error? Great, allocate a buffer for the file, and store it - * it in file->private_data for easy access. - */ + /* check perms and supported operations */ + if ((file->f_mode & FMODE_WRITE) && + (!(inode->i_mode & S_IWUGO) || !has_write)) + goto err_out; + + if ((file->f_mode & FMODE_READ) && + (!(inode->i_mode & S_IRUGO) || !has_read)) + goto err_out; + + /* allocate a sysfs_open_file for the file */ error = -ENOMEM; - buffer = kzalloc(sizeof(struct sysfs_buffer), GFP_KERNEL); - if (!buffer) + of = kzalloc(sizeof(struct sysfs_open_file), GFP_KERNEL); + if (!of) goto err_out; - mutex_init(&buffer->mutex); - buffer->needs_read_fill = 1; - buffer->ops = ops; - file->private_data = buffer; + mutex_init(&of->mutex); + of->sd = attr_sd; + of->file = file; - /* make sure we have open dirent struct */ - error = sysfs_get_open_dirent(attr_sd, buffer); + /* + * Always instantiate seq_file even if read access doesn't use + * seq_file or is not requested. This unifies private data access + * and readable regular files are the vast majority anyway. + */ + if (sysfs_is_bin(attr_sd)) + error = single_open(file, NULL, of); + else + error = single_open(file, sysfs_seq_show, of); if (error) goto err_free; + /* seq_file clears PWRITE unconditionally, restore it if WRITE */ + if (file->f_mode & FMODE_WRITE) + file->f_mode |= FMODE_PWRITE; + + /* make sure we have open dirent struct */ + error = sysfs_get_open_dirent(attr_sd, of); + if (error) + goto err_close; + /* open succeeded, put active references */ sysfs_put_active(attr_sd); return 0; - err_free: - kfree(buffer); - err_out: +err_close: + single_release(inode, file); +err_free: + kfree(of); +err_out: sysfs_put_active(attr_sd); return error; } @@ -392,17 +690,41 @@ static int sysfs_open_file(struct inode *inode, struct file *file) static int sysfs_release(struct inode *inode, struct file *filp) { struct sysfs_dirent *sd = filp->f_path.dentry->d_fsdata; - struct sysfs_buffer *buffer = filp->private_data; + struct sysfs_open_file *of = sysfs_of(filp); - sysfs_put_open_dirent(sd, buffer); - - if (buffer->page) - free_page((unsigned long)buffer->page); - kfree(buffer); + sysfs_put_open_dirent(sd, of); + single_release(inode, filp); + kfree(of); return 0; } +void sysfs_unmap_bin_file(struct sysfs_dirent *sd) +{ + struct sysfs_open_dirent *od; + struct sysfs_open_file *of; + + if (!sysfs_is_bin(sd)) + return; + + spin_lock_irq(&sysfs_open_dirent_lock); + od = sd->s_attr.open; + if (od) + atomic_inc(&od->refcnt); + spin_unlock_irq(&sysfs_open_dirent_lock); + if (!od) + return; + + mutex_lock(&sysfs_open_file_mutex); + list_for_each_entry(of, &od->files, list) { + struct inode *inode = file_inode(of->file); + unmap_mapping_range(inode->i_mapping, 0, 0, 1); + } + mutex_unlock(&sysfs_open_file_mutex); + + sysfs_put_open_dirent(sd, NULL); +} + /* Sysfs attribute files are pollable. The idea is that you read * the content and then you use 'poll' or 'select' to wait for * the content to change. When the content changes (assuming the @@ -418,7 +740,7 @@ static int sysfs_release(struct inode *inode, struct file *filp) */ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) { - struct sysfs_buffer *buffer = filp->private_data; + struct sysfs_open_file *of = sysfs_of(filp); struct sysfs_dirent *attr_sd = filp->f_path.dentry->d_fsdata; struct sysfs_open_dirent *od = attr_sd->s_attr.open; @@ -430,13 +752,12 @@ static unsigned int sysfs_poll(struct file *filp, poll_table *wait) sysfs_put_active(attr_sd); - if (buffer->event != atomic_read(&od->event)) + if (of->event != atomic_read(&od->event)) goto trigger; return DEFAULT_POLLMASK; trigger: - buffer->needs_read_fill = 1; return DEFAULT_POLLMASK|POLLERR|POLLPRI; } @@ -466,9 +787,9 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) mutex_lock(&sysfs_mutex); if (sd && dir) - sd = sysfs_find_dirent(sd, NULL, dir); + sd = sysfs_find_dirent(sd, dir, NULL); if (sd && attr) - sd = sysfs_find_dirent(sd, NULL, attr); + sd = sysfs_find_dirent(sd, attr, NULL); if (sd) sysfs_notify_dirent(sd); @@ -477,7 +798,7 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr) EXPORT_SYMBOL_GPL(sysfs_notify); const struct file_operations sysfs_file_operations = { - .read = sysfs_read_file, + .read = seq_read, .write = sysfs_write_file, .llseek = generic_file_llseek, .open = sysfs_open_file, @@ -485,58 +806,25 @@ const struct file_operations sysfs_file_operations = { .poll = sysfs_poll, }; -static int sysfs_attr_ns(struct kobject *kobj, const struct attribute *attr, - const void **pns) -{ - struct sysfs_dirent *dir_sd = kobj->sd; - const struct sysfs_ops *ops; - const void *ns = NULL; - int err; - - if (!dir_sd) { - WARN(1, KERN_ERR "sysfs: kobject %s without dirent\n", - kobject_name(kobj)); - return -ENOENT; - } - - err = 0; - if (!sysfs_ns_type(dir_sd)) - goto out; - - err = -EINVAL; - if (!kobj->ktype) - goto out; - ops = kobj->ktype->sysfs_ops; - if (!ops) - goto out; - if (!ops->namespace) - goto out; - - err = 0; - ns = ops->namespace(kobj, attr); -out: - if (err) { - WARN(1, KERN_ERR - "missing sysfs namespace attribute operation for kobject: %s\n", - kobject_name(kobj)); - } - *pns = ns; - return err; -} +const struct file_operations sysfs_bin_operations = { + .read = sysfs_bin_read, + .write = sysfs_write_file, + .llseek = generic_file_llseek, + .mmap = sysfs_bin_mmap, + .open = sysfs_open_file, + .release = sysfs_release, + .poll = sysfs_poll, +}; -int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type, umode_t amode) +int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, + const struct attribute *attr, int type, + umode_t amode, const void *ns) { umode_t mode = (amode & S_IALLUGO) | S_IFREG; struct sysfs_addrm_cxt acxt; struct sysfs_dirent *sd; - const void *ns; int rc; - rc = sysfs_attr_ns(dir_sd->s_dir.kobj, attr, &ns); - if (rc) - return rc; - sd = sysfs_new_dirent(attr->name, mode, type); if (!sd) return -ENOMEM; @@ -545,8 +833,8 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, sd->s_attr.attr = (void *)attr; sysfs_dirent_init_lockdep(sd); - sysfs_addrm_start(&acxt, dir_sd); - rc = sysfs_add_one(&acxt, sd); + sysfs_addrm_start(&acxt); + rc = sysfs_add_one(&acxt, sd, dir_sd); sysfs_addrm_finish(&acxt); if (rc) @@ -559,23 +847,25 @@ int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, int type) { - return sysfs_add_file_mode(dir_sd, attr, type, attr->mode); + return sysfs_add_file_mode_ns(dir_sd, attr, type, attr->mode, NULL); } - /** - * sysfs_create_file - create an attribute file for an object. - * @kobj: object we're creating for. - * @attr: attribute descriptor. + * sysfs_create_file_ns - create an attribute file for an object with custom ns + * @kobj: object we're creating for + * @attr: attribute descriptor + * @ns: namespace the new file should belong to */ -int sysfs_create_file(struct kobject *kobj, const struct attribute *attr) +int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, + const void *ns) { BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->sd, attr, SYSFS_KOBJ_ATTR); + return sysfs_add_file_mode_ns(kobj->sd, attr, SYSFS_KOBJ_ATTR, + attr->mode, ns); } -EXPORT_SYMBOL_GPL(sysfs_create_file); +EXPORT_SYMBOL_GPL(sysfs_create_file_ns); int sysfs_create_files(struct kobject *kobj, const struct attribute **ptr) { @@ -604,7 +894,7 @@ int sysfs_add_file_to_group(struct kobject *kobj, int error; if (group) - dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); + dir_sd = sysfs_get_dirent(kobj->sd, group); else dir_sd = sysfs_get(kobj->sd); @@ -630,17 +920,12 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, { struct sysfs_dirent *sd; struct iattr newattrs; - const void *ns; int rc; - rc = sysfs_attr_ns(kobj, attr, &ns); - if (rc) - return rc; - mutex_lock(&sysfs_mutex); rc = -ENOENT; - sd = sysfs_find_dirent(kobj->sd, ns, attr->name); + sd = sysfs_find_dirent(kobj->sd, attr->name, NULL); if (!sd) goto out; @@ -655,22 +940,21 @@ int sysfs_chmod_file(struct kobject *kobj, const struct attribute *attr, EXPORT_SYMBOL_GPL(sysfs_chmod_file); /** - * sysfs_remove_file - remove an object attribute. - * @kobj: object we're acting for. - * @attr: attribute descriptor. + * sysfs_remove_file_ns - remove an object attribute with a custom ns tag + * @kobj: object we're acting for + * @attr: attribute descriptor + * @ns: namespace tag of the file to remove * - * Hash the attribute name and kill the victim. + * Hash the attribute name and namespace tag and kill the victim. */ -void sysfs_remove_file(struct kobject *kobj, const struct attribute *attr) +void sysfs_remove_file_ns(struct kobject *kobj, const struct attribute *attr, + const void *ns) { - const void *ns; - - if (sysfs_attr_ns(kobj, attr, &ns)) - return; + struct sysfs_dirent *dir_sd = kobj->sd; - sysfs_hash_and_remove(kobj->sd, ns, attr->name); + sysfs_hash_and_remove(dir_sd, attr->name, ns); } -EXPORT_SYMBOL_GPL(sysfs_remove_file); +EXPORT_SYMBOL_GPL(sysfs_remove_file_ns); void sysfs_remove_files(struct kobject *kobj, const struct attribute **ptr) { @@ -692,16 +976,42 @@ void sysfs_remove_file_from_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; if (group) - dir_sd = sysfs_get_dirent(kobj->sd, NULL, group); + dir_sd = sysfs_get_dirent(kobj->sd, group); else dir_sd = sysfs_get(kobj->sd); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, NULL, attr->name); + sysfs_hash_and_remove(dir_sd, attr->name, NULL); sysfs_put(dir_sd); } } EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); +/** + * sysfs_create_bin_file - create binary file for object. + * @kobj: object. + * @attr: attribute descriptor. + */ +int sysfs_create_bin_file(struct kobject *kobj, + const struct bin_attribute *attr) +{ + BUG_ON(!kobj || !kobj->sd || !attr); + + return sysfs_add_file(kobj->sd, &attr->attr, SYSFS_KOBJ_BIN_ATTR); +} +EXPORT_SYMBOL_GPL(sysfs_create_bin_file); + +/** + * sysfs_remove_bin_file - remove binary file for object. + * @kobj: object. + * @attr: attribute descriptor. + */ +void sysfs_remove_bin_file(struct kobject *kobj, + const struct bin_attribute *attr) +{ + sysfs_hash_and_remove(kobj->sd, attr->attr.name, NULL); +} +EXPORT_SYMBOL_GPL(sysfs_remove_bin_file); + struct sysfs_schedule_callback_struct { struct list_head workq_list; struct kobject *kobj; diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 5f92cd2f61c1..1898a10e38ce 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -26,7 +26,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, if (grp->attrs) for (attr = grp->attrs; *attr; attr++) - sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); if (grp->bin_attrs) for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) sysfs_remove_bin_file(kobj, *bin_attr); @@ -49,16 +49,17 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj, * re-adding (if required) the file. */ if (update) - sysfs_hash_and_remove(dir_sd, NULL, - (*attr)->name); + sysfs_hash_and_remove(dir_sd, (*attr)->name, + NULL); if (grp->is_visible) { mode = grp->is_visible(kobj, *attr, i); if (!mode) continue; } - error = sysfs_add_file_mode(dir_sd, *attr, - SYSFS_KOBJ_ATTR, - (*attr)->mode | mode); + error = sysfs_add_file_mode_ns(dir_sd, *attr, + SYSFS_KOBJ_ATTR, + (*attr)->mode | mode, + NULL); if (unlikely(error)) break; } @@ -110,7 +111,7 @@ static int internal_create_group(struct kobject *kobj, int update, error = create_files(sd, kobj, grp, update); if (error) { if (grp->name) - sysfs_remove_subdir(sd); + sysfs_remove(sd); } sysfs_put(sd); return error; @@ -206,7 +207,7 @@ void sysfs_remove_group(struct kobject *kobj, struct sysfs_dirent *sd; if (grp->name) { - sd = sysfs_get_dirent(dir_sd, NULL, grp->name); + sd = sysfs_get_dirent(dir_sd, grp->name); if (!sd) { WARN(!sd, KERN_WARNING "sysfs group %p not found for kobject '%s'\n", @@ -218,7 +219,7 @@ void sysfs_remove_group(struct kobject *kobj, remove_files(sd, kobj, grp); if (grp->name) - sysfs_remove_subdir(sd); + sysfs_remove(sd); sysfs_put(sd); } @@ -261,7 +262,7 @@ int sysfs_merge_group(struct kobject *kobj, struct attribute *const *attr; int i; - dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); + dir_sd = sysfs_get_dirent(kobj->sd, grp->name); if (!dir_sd) return -ENOENT; @@ -269,7 +270,7 @@ int sysfs_merge_group(struct kobject *kobj, error = sysfs_add_file(dir_sd, *attr, SYSFS_KOBJ_ATTR); if (error) { while (--i >= 0) - sysfs_hash_and_remove(dir_sd, NULL, (*--attr)->name); + sysfs_hash_and_remove(dir_sd, (*--attr)->name, NULL); } sysfs_put(dir_sd); @@ -288,10 +289,10 @@ void sysfs_unmerge_group(struct kobject *kobj, struct sysfs_dirent *dir_sd; struct attribute *const *attr; - dir_sd = sysfs_get_dirent(kobj->sd, NULL, grp->name); + dir_sd = sysfs_get_dirent(kobj->sd, grp->name); if (dir_sd) { for (attr = grp->attrs; *attr; ++attr) - sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name); + sysfs_hash_and_remove(dir_sd, (*attr)->name, NULL); sysfs_put(dir_sd); } } @@ -310,7 +311,7 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct sysfs_dirent *dir_sd; int error = 0; - dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name); + dir_sd = sysfs_get_dirent(kobj->sd, group_name); if (!dir_sd) return -ENOENT; @@ -332,9 +333,9 @@ void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, { struct sysfs_dirent *dir_sd; - dir_sd = sysfs_get_dirent(kobj->sd, NULL, group_name); + dir_sd = sysfs_get_dirent(kobj->sd, group_name); if (dir_sd) { - sysfs_hash_and_remove(dir_sd, NULL, link_name); + sysfs_hash_and_remove(dir_sd, link_name, NULL); sysfs_put(dir_sd); } } diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c index 963f910c8034..1750f790af3b 100644 --- a/fs/sysfs/inode.c +++ b/fs/sysfs/inode.c @@ -258,9 +258,9 @@ static void sysfs_init_inode(struct sysfs_dirent *sd, struct inode *inode) inode->i_fop = &sysfs_file_operations; break; case SYSFS_KOBJ_BIN_ATTR: - bin_attr = sd->s_bin_attr.bin_attr; + bin_attr = sd->s_attr.bin_attr; inode->i_size = bin_attr->size; - inode->i_fop = &bin_fops; + inode->i_fop = &sysfs_bin_operations; break; case SYSFS_KOBJ_LINK: inode->i_op = &sysfs_symlink_inode_operations; @@ -314,32 +314,6 @@ void sysfs_evict_inode(struct inode *inode) sysfs_put(sd); } -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, - const char *name) -{ - struct sysfs_addrm_cxt acxt; - struct sysfs_dirent *sd; - - if (!dir_sd) { - WARN(1, KERN_WARNING "sysfs: can not remove '%s', no directory\n", - name); - return -ENOENT; - } - - sysfs_addrm_start(&acxt, dir_sd); - - sd = sysfs_find_dirent(dir_sd, ns, name); - if (sd) - sysfs_remove_one(&acxt, sd); - - sysfs_addrm_finish(&acxt); - - if (sd) - return 0; - else - return -ENOENT; -} - int sysfs_permission(struct inode *inode, int mask) { struct sysfs_dirent *sd; diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c index 2dd4507d9edd..3ae3f1bf1a09 100644 --- a/fs/sysfs/symlink.c +++ b/fs/sysfs/symlink.c @@ -33,13 +33,15 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, BUG_ON(!name || !parent_sd); - /* target->sd can go away beneath us but is protected with - * sysfs_assoc_lock. Fetch target_sd from it. + /* + * We don't own @target and it may be removed at any time. + * Synchronize using sysfs_symlink_target_lock. See + * sysfs_remove_dir() for details. */ - spin_lock(&sysfs_assoc_lock); + spin_lock(&sysfs_symlink_target_lock); if (target->sd) target_sd = sysfs_get(target->sd); - spin_unlock(&sysfs_assoc_lock); + spin_unlock(&sysfs_symlink_target_lock); error = -ENOENT; if (!target_sd) @@ -52,18 +54,18 @@ static int sysfs_do_create_link_sd(struct sysfs_dirent *parent_sd, ns_type = sysfs_ns_type(parent_sd); if (ns_type) - sd->s_ns = target->ktype->namespace(target); + sd->s_ns = target_sd->s_ns; sd->s_symlink.target_sd = target_sd; target_sd = NULL; /* reference is now owned by the symlink */ - sysfs_addrm_start(&acxt, parent_sd); + sysfs_addrm_start(&acxt); /* Symlinks must be between directories with the same ns_type */ if (!ns_type || (ns_type == sysfs_ns_type(sd->s_symlink.target_sd->s_parent))) { if (warn) - error = sysfs_add_one(&acxt, sd); + error = sysfs_add_one(&acxt, sd, parent_sd); else - error = __sysfs_add_one(&acxt, sd); + error = __sysfs_add_one(&acxt, sd, parent_sd); } else { error = -EINVAL; WARN(1, KERN_WARNING @@ -155,11 +157,17 @@ void sysfs_delete_link(struct kobject *kobj, struct kobject *targ, const char *name) { const void *ns = NULL; - spin_lock(&sysfs_assoc_lock); + + /* + * We don't own @target and it may be removed at any time. + * Synchronize using sysfs_symlink_target_lock. See + * sysfs_remove_dir() for details. + */ + spin_lock(&sysfs_symlink_target_lock); if (targ->sd && sysfs_ns_type(kobj->sd)) ns = targ->sd->s_ns; - spin_unlock(&sysfs_assoc_lock); - sysfs_hash_and_remove(kobj->sd, ns, name); + spin_unlock(&sysfs_symlink_target_lock); + sysfs_hash_and_remove(kobj->sd, name, ns); } /** @@ -176,24 +184,25 @@ void sysfs_remove_link(struct kobject *kobj, const char *name) else parent_sd = kobj->sd; - sysfs_hash_and_remove(parent_sd, NULL, name); + sysfs_hash_and_remove(parent_sd, name, NULL); } EXPORT_SYMBOL_GPL(sysfs_remove_link); /** - * sysfs_rename_link - rename symlink in object's directory. + * sysfs_rename_link_ns - rename symlink in object's directory. * @kobj: object we're acting for. * @targ: object we're pointing to. * @old: previous name of the symlink. * @new: new name of the symlink. + * @new_ns: new namespace of the symlink. * * A helper function for the common rename symlink idiom. */ -int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, - const char *old, const char *new) +int sysfs_rename_link_ns(struct kobject *kobj, struct kobject *targ, + const char *old, const char *new, const void *new_ns) { struct sysfs_dirent *parent_sd, *sd = NULL; - const void *old_ns = NULL, *new_ns = NULL; + const void *old_ns = NULL; int result; if (!kobj) @@ -205,7 +214,7 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, old_ns = targ->sd->s_ns; result = -ENOENT; - sd = sysfs_get_dirent(parent_sd, old_ns, old); + sd = sysfs_get_dirent_ns(parent_sd, old, old_ns); if (!sd) goto out; @@ -215,16 +224,13 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ, if (sd->s_symlink.target_sd->s_dir.kobj != targ) goto out; - if (sysfs_ns_type(parent_sd)) - new_ns = targ->ktype->namespace(targ); - - result = sysfs_rename(sd, parent_sd, new_ns, new); + result = sysfs_rename(sd, parent_sd, new, new_ns); out: sysfs_put(sd); return result; } -EXPORT_SYMBOL_GPL(sysfs_rename_link); +EXPORT_SYMBOL_GPL(sysfs_rename_link_ns); static int sysfs_get_target_path(struct sysfs_dirent *parent_sd, struct sysfs_dirent *target_sd, char *path) diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index b6deca3e301d..0af09fbfb3f6 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -29,15 +29,13 @@ struct sysfs_elem_symlink { }; struct sysfs_elem_attr { - struct attribute *attr; + union { + struct attribute *attr; + struct bin_attribute *bin_attr; + }; struct sysfs_open_dirent *open; }; -struct sysfs_elem_bin_attr { - struct bin_attribute *bin_attr; - struct hlist_head buffers; -}; - struct sysfs_inode_attrs { struct iattr ia_iattr; void *ia_secdata; @@ -74,7 +72,6 @@ struct sysfs_dirent { struct sysfs_elem_dir s_dir; struct sysfs_elem_symlink s_symlink; struct sysfs_elem_attr s_attr; - struct sysfs_elem_bin_attr s_bin_attr; }; unsigned short s_flags; @@ -115,6 +112,7 @@ static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd) } #ifdef CONFIG_DEBUG_LOCK_ALLOC + #define sysfs_dirent_init_lockdep(sd) \ do { \ struct attribute *attr = sd->s_attr.attr; \ @@ -124,15 +122,31 @@ do { \ \ lockdep_init_map(&sd->dep_map, "s_active", key, 0); \ } while (0) + +/* Test for attributes that want to ignore lockdep for read-locking */ +static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) +{ + int type = sysfs_type(sd); + + return (type == SYSFS_KOBJ_ATTR || type == SYSFS_KOBJ_BIN_ATTR) && + sd->s_attr.attr->ignore_lockdep; +} + #else + #define sysfs_dirent_init_lockdep(sd) do {} while (0) + +static inline bool sysfs_ignore_lockdep(struct sysfs_dirent *sd) +{ + return true; +} + #endif /* * Context structure to be used while adding/removing nodes. */ struct sysfs_addrm_cxt { - struct sysfs_dirent *parent_sd; struct sysfs_dirent *removed; }; @@ -156,38 +170,37 @@ extern struct kmem_cache *sysfs_dir_cachep; * dir.c */ extern struct mutex sysfs_mutex; -extern spinlock_t sysfs_assoc_lock; +extern spinlock_t sysfs_symlink_target_lock; extern const struct dentry_operations sysfs_dentry_ops; extern const struct file_operations sysfs_dir_operations; extern const struct inode_operations sysfs_dir_inode_operations; -struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd); struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd); void sysfs_put_active(struct sysfs_dirent *sd); -void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt, - struct sysfs_dirent *parent_sd); -int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); -int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); -void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd); +void sysfs_addrm_start(struct sysfs_addrm_cxt *acxt); +void sysfs_warn_dup(struct sysfs_dirent *parent, const char *name); +int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd); +int sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd, + struct sysfs_dirent *parent_sd); +void sysfs_remove(struct sysfs_dirent *sd); +int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name, + const void *ns); void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt); struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd, - const void *ns, - const unsigned char *name); -struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd, - const void *ns, - const unsigned char *name); + const unsigned char *name, + const void *ns); struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type); void release_sysfs_dirent(struct sysfs_dirent *sd); int sysfs_create_subdir(struct kobject *kobj, const char *name, struct sysfs_dirent **p_sd); -void sysfs_remove_subdir(struct sysfs_dirent *sd); int sysfs_rename(struct sysfs_dirent *sd, struct sysfs_dirent *new_parent_sd, - const void *ns, const char *new_name); + const char *new_name, const void *new_ns); static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd) { @@ -218,25 +231,21 @@ int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags); -int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, - const char *name); int sysfs_inode_init(void); /* * file.c */ extern const struct file_operations sysfs_file_operations; +extern const struct file_operations sysfs_bin_operations; int sysfs_add_file(struct sysfs_dirent *dir_sd, const struct attribute *attr, int type); -int sysfs_add_file_mode(struct sysfs_dirent *dir_sd, - const struct attribute *attr, int type, umode_t amode); -/* - * bin.c - */ -extern const struct file_operations bin_fops; -void unmap_bin_file(struct sysfs_dirent *attr_sd); +int sysfs_add_file_mode_ns(struct sysfs_dirent *dir_sd, + const struct attribute *attr, int type, + umode_t amode, const void *ns); +void sysfs_unmap_bin_file(struct sysfs_dirent *sd); /* * symlink.c |