summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/buffer.c69
-rw-r--r--fs/cramfs/inode.c2
-rw-r--r--fs/dcache.c71
-rw-r--r--fs/exec.c4
-rw-r--r--fs/fat/misc.c4
-rw-r--r--fs/file_table.c124
-rw-r--r--fs/fs_struct.c32
-rw-r--r--fs/generic_acl.c1
-rw-r--r--fs/hostfs/hostfs_kern.c4
-rw-r--r--fs/internal.h7
-rw-r--r--fs/jbd/checkpoint.c4
-rw-r--r--fs/jbd/commit.c49
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd/revoke.c2
-rw-r--r--fs/jbd2/checkpoint.c4
-rw-r--r--fs/jbd2/commit.c39
-rw-r--r--fs/jbd2/journal.c2
-rw-r--r--fs/jbd2/revoke.c2
-rw-r--r--fs/mbcache.c30
-rw-r--r--fs/namei.c119
-rw-r--r--fs/namespace.c177
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/dir.c9
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/nfs4proc.c11
-rw-r--r--fs/nfs/super.c7
-rw-r--r--fs/nfsd/Kconfig1
-rw-r--r--fs/nilfs2/super.c28
-rw-r--r--fs/open.c4
-rw-r--r--fs/pnode.c11
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/reiserfs/journal.c2
-rw-r--r--fs/super.c18
-rw-r--r--fs/ufs/balloc.c24
-rw-r--r--fs/ufs/ialloc.c18
-rw-r--r--fs/ufs/truncate.c18
-rw-r--r--fs/ufs/util.c20
-rw-r--r--fs/ufs/util.h3
38 files changed, 540 insertions, 386 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 50efa339e051..3e7dca279d1c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
spin_unlock(lock);
/*
* Ensure any pending I/O completes so that
- * ll_rw_block() actually writes the current
- * contents - it is a noop if I/O is still in
- * flight on potentially older contents.
+ * write_dirty_buffer() actually writes the
+ * current contents - it is a noop if I/O is
+ * still in flight on potentially older
+ * contents.
*/
- ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh);
+ write_dirty_buffer(bh, WRITE_SYNC_PLUG);
/*
* Kick off IO for the previous mapping. Note
@@ -2912,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh)
BUG_ON(buffer_unwritten(bh));
/*
- * Mask in barrier bit for a write (could be either a WRITE or a
- * WRITE_SYNC
- */
- if (buffer_ordered(bh) && (rw & WRITE))
- rw |= WRITE_BARRIER;
-
- /*
* Only clear out a write error when rewriting
*/
if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -2956,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh);
/**
* ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
* @nr: number of &struct buffer_heads in the array
* @bhs: array of pointers to &struct buffer_head
*
* ll_rw_block() takes an array of pointers to &struct buffer_heads, and
* requests an I/O operation on them, either a %READ or a %WRITE. The third
- * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
- * are sent to disk. The fourth %READA option is described in the documentation
- * for generic_make_request() which ll_rw_block() calls.
+ * %READA option is described in the documentation for generic_make_request()
+ * which ll_rw_block() calls.
*
* This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
- * clean when doing a write request, and any buffer that appears to be
- * up-to-date when doing read request. Further it marks as clean buffers that
- * are processed for writing (the buffer cache won't assume that they are
- * actually clean until the buffer gets unlocked).
+ * BH_Lock state bit), any buffer that appears to be clean when doing a write
+ * request, and any buffer that appears to be up-to-date when doing read
+ * request. Further it marks as clean buffers that are processed for
+ * writing (the buffer cache won't assume that they are actually clean
+ * until the buffer gets unlocked).
*
* ll_rw_block sets b_end_io to simple completion handler that marks
* the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -2987,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
for (i = 0; i < nr; i++) {
struct buffer_head *bh = bhs[i];
- if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG)
- lock_buffer(bh);
- else if (!trylock_buffer(bh))
+ if (!trylock_buffer(bh))
continue;
-
- if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
- rw == SWRITE_SYNC_PLUG) {
+ if (rw == WRITE) {
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- if (rw == SWRITE_SYNC)
- submit_bh(WRITE_SYNC, bh);
- else
- submit_bh(WRITE, bh);
+ submit_bh(WRITE, bh);
continue;
}
} else {
@@ -3016,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
}
EXPORT_SYMBOL(ll_rw_block);
+void write_dirty_buffer(struct buffer_head *bh, int rw)
+{
+ lock_buffer(bh);
+ if (!test_clear_buffer_dirty(bh)) {
+ unlock_buffer(bh);
+ return;
+ }
+ bh->b_end_io = end_buffer_write_sync;
+ get_bh(bh);
+ submit_bh(rw, bh);
+}
+EXPORT_SYMBOL(write_dirty_buffer);
+
/*
* For a data-integrity writeout, we need to wait upon any in-progress I/O
* and then start new I/O and then wait upon it. The caller must have a ref on
* the buffer_head.
*/
-int sync_dirty_buffer(struct buffer_head *bh)
+int __sync_dirty_buffer(struct buffer_head *bh, int rw)
{
int ret = 0;
@@ -3030,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(WRITE_SYNC, bh);
+ ret = submit_bh(rw, bh);
wait_on_buffer(bh);
if (buffer_eopnotsupp(bh)) {
clear_buffer_eopnotsupp(bh);
@@ -3043,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
}
return ret;
}
+EXPORT_SYMBOL(__sync_dirty_buffer);
+
+int sync_dirty_buffer(struct buffer_head *bh)
+{
+ return __sync_dirty_buffer(bh, WRITE_SYNC);
+}
EXPORT_SYMBOL(sync_dirty_buffer);
/*
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index a53b130b366c..1e7a33028d33 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -80,7 +80,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
}
} else {
inode = iget_locked(sb, CRAMINO(cramfs_inode));
- if (inode) {
+ if (inode && (inode->i_state & I_NEW)) {
setup_inode(inode, cramfs_inode);
unlock_new_inode(inode);
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 4d13bf50b7b1..83293be48149 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1332,31 +1332,13 @@ EXPORT_SYMBOL(d_add_ci);
* d_lookup - search for a dentry
* @parent: parent dentry
* @name: qstr of name we wish to find
+ * Returns: dentry, or NULL
*
- * Searches the children of the parent dentry for the name in question. If
- * the dentry is found its reference count is incremented and the dentry
- * is returned. The caller must use dput to free the entry when it has
- * finished using it. %NULL is returned on failure.
- *
- * __d_lookup is dcache_lock free. The hash list is protected using RCU.
- * Memory barriers are used while updating and doing lockless traversal.
- * To avoid races with d_move while rename is happening, d_lock is used.
- *
- * Overflows in memcmp(), while d_move, are avoided by keeping the length
- * and name pointer in one structure pointed by d_qstr.
- *
- * rcu_read_lock() and rcu_read_unlock() are used to disable preemption while
- * lookup is going on.
- *
- * The dentry unused LRU is not updated even if lookup finds the required dentry
- * in there. It is updated in places such as prune_dcache, shrink_dcache_sb,
- * select_parent and __dget_locked. This laziness saves lookup from dcache_lock
- * acquisition.
- *
- * d_lookup() is protected against the concurrent renames in some unrelated
- * directory using the seqlockt_t rename_lock.
+ * d_lookup searches the children of the parent dentry for the name in
+ * question. If the dentry is found its reference count is incremented and the
+ * dentry is returned. The caller must use dput to free the entry when it has
+ * finished using it. %NULL is returned if the dentry does not exist.
*/
-
struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
{
struct dentry * dentry = NULL;
@@ -1372,6 +1354,21 @@ struct dentry * d_lookup(struct dentry * parent, struct qstr * name)
}
EXPORT_SYMBOL(d_lookup);
+/*
+ * __d_lookup - search for a dentry (racy)
+ * @parent: parent dentry
+ * @name: qstr of name we wish to find
+ * Returns: dentry, or NULL
+ *
+ * __d_lookup is like d_lookup, however it may (rarely) return a
+ * false-negative result due to unrelated rename activity.
+ *
+ * __d_lookup is slightly faster by avoiding rename_lock read seqlock,
+ * however it must be used carefully, eg. with a following d_lookup in
+ * the case of failure.
+ *
+ * __d_lookup callers must be commented.
+ */
struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
{
unsigned int len = name->len;
@@ -1382,6 +1379,19 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
struct hlist_node *node;
struct dentry *dentry;
+ /*
+ * The hash list is protected using RCU.
+ *
+ * Take d_lock when comparing a candidate dentry, to avoid races
+ * with d_move().
+ *
+ * It is possible that concurrent renames can mess up our list
+ * walk here and result in missing our dentry, resulting in the
+ * false-negative result. d_lookup() protects against concurrent
+ * renames using rename_lock seqlock.
+ *
+ * See Documentation/vfs/dcache-locking.txt for more details.
+ */
rcu_read_lock();
hlist_for_each_entry_rcu(dentry, node, head, d_hash) {
@@ -1396,8 +1406,8 @@ struct dentry * __d_lookup(struct dentry * parent, struct qstr * name)
/*
* Recheck the dentry after taking the lock - d_move may have
- * changed things. Don't bother checking the hash because we're
- * about to compare the whole name anyway.
+ * changed things. Don't bother checking the hash because
+ * we're about to compare the whole name anyway.
*/
if (dentry->d_parent != parent)
goto next;
@@ -1925,7 +1935,7 @@ static int prepend_path(const struct path *path, struct path *root,
bool slash = false;
int error = 0;
- spin_lock(&vfsmount_lock);
+ br_read_lock(vfsmount_lock);
while (dentry != root->dentry || vfsmnt != root->mnt) {
struct dentry * parent;
@@ -1954,7 +1964,7 @@ out:
if (!error && !slash)
error = prepend(buffer, buflen, "/", 1);
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return error;
global_root:
@@ -2292,11 +2302,12 @@ int path_is_under(struct path *path1, struct path *path2)
struct vfsmount *mnt = path1->mnt;
struct dentry *dentry = path1->dentry;
int res;
- spin_lock(&vfsmount_lock);
+
+ br_read_lock(vfsmount_lock);
if (mnt != path2->mnt) {
for (;;) {
if (mnt->mnt_parent == mnt) {
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return 0;
}
if (mnt->mnt_parent == path2->mnt)
@@ -2306,7 +2317,7 @@ int path_is_under(struct path *path1, struct path *path2)
dentry = mnt->mnt_mountpoint;
}
res = is_subdir(dentry, path2->dentry);
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return res;
}
EXPORT_SYMBOL(path_is_under);
diff --git a/fs/exec.c b/fs/exec.c
index 05c7d6b84df7..2d9455282744 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1118,7 +1118,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
bprm->unsafe = tracehook_unsafe_exec(p);
n_fs = 1;
- write_lock(&p->fs->lock);
+ spin_lock(&p->fs->lock);
rcu_read_lock();
for (t = next_thread(p); t != p; t = next_thread(t)) {
if (t->fs == p->fs)
@@ -1135,7 +1135,7 @@ int check_unsafe_exec(struct linux_binprm *bprm)
res = 1;
}
}
- write_unlock(&p->fs->lock);
+ spin_unlock(&p->fs->lock);
return res;
}
diff --git a/fs/fat/misc.c b/fs/fat/misc.c
index 1fa23f6ffba5..1736f2356388 100644
--- a/fs/fat/misc.c
+++ b/fs/fat/misc.c
@@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs)
{
int i, err = 0;
- ll_rw_block(SWRITE, nr_bhs, bhs);
+ for (i = 0; i < nr_bhs; i++)
+ write_dirty_buffer(bhs[i], WRITE);
+
for (i = 0; i < nr_bhs; i++) {
wait_on_buffer(bhs[i]);
if (buffer_eopnotsupp(bhs[i])) {
diff --git a/fs/file_table.c b/fs/file_table.c
index edecd36fed9b..a04bdd81c11c 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -20,7 +20,9 @@
#include <linux/cdev.h>
#include <linux/fsnotify.h>
#include <linux/sysctl.h>
+#include <linux/lglock.h>
#include <linux/percpu_counter.h>
+#include <linux/percpu.h>
#include <linux/ima.h>
#include <asm/atomic.h>
@@ -32,8 +34,8 @@ struct files_stat_struct files_stat = {
.max_files = NR_FILE
};
-/* public. Not pretty! */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(files_lock);
+DECLARE_LGLOCK(files_lglock);
+DEFINE_LGLOCK(files_lglock);
/* SLAB cache for file structures */
static struct kmem_cache *filp_cachep __read_mostly;
@@ -249,7 +251,7 @@ static void __fput(struct file *file)
cdev_put(inode->i_cdev);
fops_put(file->f_op);
put_pid(file->f_owner.pid);
- file_kill(file);
+ file_sb_list_del(file);
if (file->f_mode & FMODE_WRITE)
drop_file_write_access(file);
file->f_path.dentry = NULL;
@@ -328,41 +330,107 @@ struct file *fget_light(unsigned int fd, int *fput_needed)
return file;
}
-
void put_filp(struct file *file)
{
if (atomic_long_dec_and_test(&file->f_count)) {
security_file_free(file);
- file_kill(file);
+ file_sb_list_del(file);
file_free(file);
}
}
-void file_move(struct file *file, struct list_head *list)
+static inline int file_list_cpu(struct file *file)
{
- if (!list)
- return;
- file_list_lock();
- list_move(&file->f_u.fu_list, list);
- file_list_unlock();
+#ifdef CONFIG_SMP
+ return file->f_sb_list_cpu;
+#else
+ return smp_processor_id();
+#endif
+}
+
+/* helper for file_sb_list_add to reduce ifdefs */
+static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
+{
+ struct list_head *list;
+#ifdef CONFIG_SMP
+ int cpu;
+ cpu = smp_processor_id();
+ file->f_sb_list_cpu = cpu;
+ list = per_cpu_ptr(sb->s_files, cpu);
+#else
+ list = &sb->s_files;
+#endif
+ list_add(&file->f_u.fu_list, list);
}
-void file_kill(struct file *file)
+/**
+ * file_sb_list_add - add a file to the sb's file list
+ * @file: file to add
+ * @sb: sb to add it to
+ *
+ * Use this function to associate a file with the superblock of the inode it
+ * refers to.
+ */
+void file_sb_list_add(struct file *file, struct super_block *sb)
+{
+ lg_local_lock(files_lglock);
+ __file_sb_list_add(file, sb);
+ lg_local_unlock(files_lglock);
+}
+
+/**
+ * file_sb_list_del - remove a file from the sb's file list
+ * @file: file to remove
+ * @sb: sb to remove it from
+ *
+ * Use this function to remove a file from its superblock.
+ */
+void file_sb_list_del(struct file *file)
{
if (!list_empty(&file->f_u.fu_list)) {
- file_list_lock();
+ lg_local_lock_cpu(files_lglock, file_list_cpu(file));
list_del_init(&file->f_u.fu_list);
- file_list_unlock();
+ lg_local_unlock_cpu(files_lglock, file_list_cpu(file));
}
}
+#ifdef CONFIG_SMP
+
+/*
+ * These macros iterate all files on all CPUs for a given superblock.
+ * files_lglock must be held globally.
+ */
+#define do_file_list_for_each_entry(__sb, __file) \
+{ \
+ int i; \
+ for_each_possible_cpu(i) { \
+ struct list_head *list; \
+ list = per_cpu_ptr((__sb)->s_files, i); \
+ list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry \
+ } \
+}
+
+#else
+
+#define do_file_list_for_each_entry(__sb, __file) \
+{ \
+ struct list_head *list; \
+ list = &(sb)->s_files; \
+ list_for_each_entry((__file), list, f_u.fu_list)
+
+#define while_file_list_for_each_entry \
+}
+
+#endif
+
int fs_may_remount_ro(struct super_block *sb)
{
struct file *file;
-
/* Check that no files are currently opened for writing. */
- file_list_lock();
- list_for_each_entry(file, &sb->s_files, f_u.fu_list) {
+ lg_global_lock(files_lglock);
+ do_file_list_for_each_entry(sb, file) {
struct inode *inode = file->f_path.dentry->d_inode;
/* File with pending delete? */
@@ -372,11 +440,11 @@ int fs_may_remount_ro(struct super_block *sb)
/* Writeable file? */
if (S_ISREG(inode->i_mode) && (file->f_mode & FMODE_WRITE))
goto too_bad;
- }
- file_list_unlock();
+ } while_file_list_for_each_entry;
+ lg_global_unlock(files_lglock);
return 1; /* Tis' cool bro. */
too_bad:
- file_list_unlock();
+ lg_global_unlock(files_lglock);
return 0;
}
@@ -392,8 +460,8 @@ void mark_files_ro(struct super_block *sb)
struct file *f;
retry:
- file_list_lock();
- list_for_each_entry(f, &sb->s_files, f_u.fu_list) {
+ lg_global_lock(files_lglock);
+ do_file_list_for_each_entry(sb, f) {
struct vfsmount *mnt;
if (!S_ISREG(f->f_path.dentry->d_inode->i_mode))
continue;
@@ -408,16 +476,13 @@ retry:
continue;
file_release_write(f);
mnt = mntget(f->f_path.mnt);
- file_list_unlock();
- /*
- * This can sleep, so we can't hold
- * the file_list_lock() spinlock.
- */
+ /* This can sleep, so we can't hold the spinlock. */
+ lg_global_unlock(files_lglock);
mnt_drop_write(mnt);
mntput(mnt);
goto retry;
- }
- file_list_unlock();
+ } while_file_list_for_each_entry;
+ lg_global_unlock(files_lglock);
}
void __init files_init(unsigned long mempages)
@@ -437,5 +502,6 @@ void __init files_init(unsigned long mempages)
if (files_stat.max_files < NR_FILE)
files_stat.max_files = NR_FILE;
files_defer_init();
+ lg_lock_init(files_lglock);
percpu_counter_init(&nr_files, 0);
}
diff --git a/fs/fs_struct.c b/fs/fs_struct.c
index 1ee40eb9a2c0..ed45a9cf5f3d 100644
--- a/fs/fs_struct.c
+++ b/fs/fs_struct.c
@@ -13,11 +13,11 @@ void set_fs_root(struct fs_struct *fs, struct path *path)
{
struct path old_root;
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
old_root = fs->root;
fs->root = *path;
path_get(path);
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
if (old_root.dentry)
path_put(&old_root);
}
@@ -30,11 +30,11 @@ void set_fs_pwd(struct fs_struct *fs, struct path *path)
{
struct path old_pwd;
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
old_pwd = fs->pwd;
fs->pwd = *path;
path_get(path);
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
if (old_pwd.dentry)
path_put(&old_pwd);
@@ -51,7 +51,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
task_lock(p);
fs = p->fs;
if (fs) {
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
if (fs->root.dentry == old_root->dentry
&& fs->root.mnt == old_root->mnt) {
path_get(new_root);
@@ -64,7 +64,7 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root)
fs->pwd = *new_root;
count++;
}
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
}
task_unlock(p);
} while_each_thread(g, p);
@@ -87,10 +87,10 @@ void exit_fs(struct task_struct *tsk)
if (fs) {
int kill;
task_lock(tsk);
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
tsk->fs = NULL;
kill = !--fs->users;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
task_unlock(tsk);
if (kill)
free_fs_struct(fs);
@@ -104,7 +104,7 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old)
if (fs) {
fs->users = 1;
fs->in_exec = 0;
- rwlock_init(&fs->lock);
+ spin_lock_init(&fs->lock);
fs->umask = old->umask;
get_fs_root_and_pwd(old, &fs->root, &fs->pwd);
}
@@ -121,10 +121,10 @@ int unshare_fs_struct(void)
return -ENOMEM;
task_lock(current);
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
kill = !--fs->users;
current->fs = new_fs;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
task_unlock(current);
if (kill)
@@ -143,7 +143,7 @@ EXPORT_SYMBOL(current_umask);
/* to be mentioned only in INIT_TASK */
struct fs_struct init_fs = {
.users = 1,
- .lock = __RW_LOCK_UNLOCKED(init_fs.lock),
+ .lock = __SPIN_LOCK_UNLOCKED(init_fs.lock),
.umask = 0022,
};
@@ -156,14 +156,14 @@ void daemonize_fs_struct(void)
task_lock(current);
- write_lock(&init_fs.lock);
+ spin_lock(&init_fs.lock);
init_fs.users++;
- write_unlock(&init_fs.lock);
+ spin_unlock(&init_fs.lock);
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
current->fs = &init_fs;
kill = !--fs->users;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
task_unlock(current);
if (kill)
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index 99800e564157..6bc9e3a5a693 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -94,6 +94,7 @@ generic_acl_set(struct dentry *dentry, const char *name, const void *value,
if (error < 0)
goto failed;
inode->i_mode = mode;
+ inode->i_ctime = CURRENT_TIME;
if (error == 0) {
posix_acl_release(acl);
acl = NULL;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index dd1e55535a4e..f7dc9b5f9ef8 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -104,7 +104,7 @@ static char *__dentry_name(struct dentry *dentry, char *name)
__putname(name);
return NULL;
}
- strncpy(name, root, PATH_MAX);
+ strlcpy(name, root, PATH_MAX);
if (len > p - name) {
__putname(name);
return NULL;
@@ -876,7 +876,7 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd)
char *path = dentry_name(dentry);
int err = -ENOMEM;
if (path) {
- int err = hostfs_do_readlink(path, link, PATH_MAX);
+ err = hostfs_do_readlink(path, link, PATH_MAX);
if (err == PATH_MAX)
err = -E2BIG;
__putname(path);
diff --git a/fs/internal.h b/fs/internal.h
index 6b706bc60a66..a6910e91cee8 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -9,6 +9,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#include <linux/lglock.h>
+
struct super_block;
struct linux_binprm;
struct path;
@@ -70,7 +72,8 @@ extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int);
extern void __init mnt_init(void);
-extern spinlock_t vfsmount_lock;
+DECLARE_BRLOCK(vfsmount_lock);
+
/*
* fs_struct.c
@@ -80,6 +83,8 @@ extern void chroot_fs_refs(struct path *, struct path *);
/*
* file_table.c
*/
+extern void file_sb_list_add(struct file *f, struct super_block *sb);
+extern void file_sb_list_del(struct file *f);
extern void mark_files_ro(struct super_block *);
extern struct file *get_empty_filp(void);
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index b0435dd0654d..05a38b9c4c0e 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
{
int i;
- ll_rw_block(SWRITE, *batch_count, bhs);
+ for (i = 0; i < *batch_count; i++)
+ write_dirty_buffer(bhs[i], WRITE);
+
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = bhs[i];
clear_buffer_jwrite(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 28a9ddaa0c49..95d8c11c929e 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal,
struct buffer_head *bh;
journal_header_t *header;
int ret;
- int barrier_done = 0;
if (is_journal_aborted(journal))
return 0;
@@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal,
JBUFFER_TRACE(descriptor, "write commit block");
set_buffer_dirty(bh);
+
if (journal->j_flags & JFS_BARRIER) {
- set_buffer_ordered(bh);
- barrier_done = 1;
- }
- ret = sync_dirty_buffer(bh);
- if (barrier_done)
- clear_buffer_ordered(bh);
- /* is it possible for another commit to fail at roughly
- * the same time as this one? If so, we don't want to
- * trust the barrier flag in the super, but instead want
- * to remember if we sent a barrier request
- */
- if (ret == -EOPNOTSUPP && barrier_done) {
- char b[BDEVNAME_SIZE];
+ ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER);
- printk(KERN_WARNING
- "JBD: barrier-based sync failed on %s - "
- "disabling barriers\n",
- bdevname(journal->j_dev, b));
- spin_lock(&journal->j_state_lock);
- journal->j_flags &= ~JFS_BARRIER;
- spin_unlock(&journal->j_state_lock);
+ /*
+ * Is it possible for another commit to fail at roughly
+ * the same time as this one? If so, we don't want to
+ * trust the barrier flag in the super, but instead want
+ * to remember if we sent a barrier request
+ */
+ if (ret == -EOPNOTSUPP) {
+ char b[BDEVNAME_SIZE];
- /* And try again, without the barrier */
- set_buffer_uptodate(bh);
- set_buffer_dirty(bh);
+ printk(KERN_WARNING
+ "JBD: barrier-based sync failed on %s - "
+ "disabling barriers\n",
+ bdevname(journal->j_dev, b));
+ spin_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JFS_BARRIER;
+ spin_unlock(&journal->j_state_lock);
+
+ /* And try again, without the barrier */
+ set_buffer_uptodate(bh);
+ set_buffer_dirty(bh);
+ ret = sync_dirty_buffer(bh);
+ }
+ } else {
ret = sync_dirty_buffer(bh);
}
+
put_bh(bh); /* One for getblk() */
journal_put_journal_head(descriptor);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index f19ce94693d8..2c4b1f109da9 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait)
if (wait)
sync_dirty_buffer(bh);
else
- ll_rw_block(SWRITE, 1, &bh);
+ write_dirty_buffer(bh, WRITE);
out:
/* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c
index ad717328343a..d29018307e2e 100644
--- a/fs/jbd/revoke.c
+++ b/fs/jbd/revoke.c
@@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(bh);
BUFFER_TRACE(bh, "write");
set_buffer_dirty(bh);
- ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
+ write_dirty_buffer(bh, write_op);
}
#endif
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 1c23a0f4e8a3..5247e7ffdcb4 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count)
{
int i;
- ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs);
+ for (i = 0; i < *batch_count; i++)
+ write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE);
+
for (i = 0; i < *batch_count; i++) {
struct buffer_head *bh = journal->j_chkpt_bhs[i];
clear_buffer_jwrite(bh);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index f52e5e8049f1..7c068c189d80 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal,
struct commit_header *tmp;
struct buffer_head *bh;
int ret;
- int barrier_done = 0;
struct timespec now = current_kernel_time();
if (is_journal_aborted(journal))
@@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal,
if (journal->j_flags & JBD2_BARRIER &&
!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
- set_buffer_ordered(bh);
- barrier_done = 1;
- }
- ret = submit_bh(WRITE_SYNC_PLUG, bh);
- if (barrier_done)
- clear_buffer_ordered(bh);
-
- /* is it possible for another commit to fail at roughly
- * the same time as this one? If so, we don't want to
- * trust the barrier flag in the super, but instead want
- * to remember if we sent a barrier request
- */
- if (ret == -EOPNOTSUPP && barrier_done) {
- printk(KERN_WARNING
- "JBD2: Disabling barriers on %s, "
- "not supported by device\n", journal->j_devname);
- write_lock(&journal->j_state_lock);
- journal->j_flags &= ~JBD2_BARRIER;
- write_unlock(&journal->j_state_lock);
+ ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
+ if (ret == -EOPNOTSUPP) {
+ printk(KERN_WARNING
+ "JBD2: Disabling barriers on %s, "
+ "not supported by device\n", journal->j_devname);
+ write_lock(&journal->j_state_lock);
+ journal->j_flags &= ~JBD2_BARRIER;
+ write_unlock(&journal->j_state_lock);
- /* And try again, without the barrier */
- lock_buffer(bh);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
+ /* And try again, without the barrier */
+ lock_buffer(bh);
+ set_buffer_uptodate(bh);
+ clear_buffer_dirty(bh);
+ ret = submit_bh(WRITE_SYNC_PLUG, bh);
+ }
+ } else {
ret = submit_bh(WRITE_SYNC_PLUG, bh);
}
*cbh = bh;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index ad5866aaf0f9..0e8014ea6b94 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait)
set_buffer_uptodate(bh);
}
} else
- ll_rw_block(SWRITE, 1, &bh);
+ write_dirty_buffer(bh, WRITE);
out:
/* If we have just flushed the log (by marking s_start==0), then
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index a360b06af2e3..9ad321fd63fd 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal,
set_buffer_jwrite(bh);
BUFFER_TRACE(bh, "write");
set_buffer_dirty(bh);
- ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh);
+ write_dirty_buffer(bh, write_op);
}
#endif
diff --git a/fs/mbcache.c b/fs/mbcache.c
index cf4e6cdfd15b..93444747237b 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -80,6 +80,7 @@ struct mb_cache {
struct list_head c_cache_list;
const char *c_name;
atomic_t c_entry_count;
+ int c_max_entries;
int c_bucket_bits;
struct kmem_cache *c_entry_cache;
struct list_head *c_block_hash;
@@ -243,6 +244,12 @@ mb_cache_create(const char *name, int bucket_bits)
if (!cache->c_entry_cache)
goto fail2;
+ /*
+ * Set an upper limit on the number of cache entries so that the hash
+ * chains won't grow too long.
+ */
+ cache->c_max_entries = bucket_count << 4;
+
spin_lock(&mb_cache_spinlock);
list_add(&cache->c_cache_list, &mb_cache_list);
spin_unlock(&mb_cache_spinlock);
@@ -333,7 +340,6 @@ mb_cache_destroy(struct mb_cache *cache)
kfree(cache);
}
-
/*
* mb_cache_entry_alloc()
*
@@ -345,17 +351,29 @@ mb_cache_destroy(struct mb_cache *cache)
struct mb_cache_entry *
mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
{
- struct mb_cache_entry *ce;
-
- ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
- if (ce) {
+ struct mb_cache_entry *ce = NULL;
+
+ if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
+ spin_lock(&mb_cache_spinlock);
+ if (!list_empty(&mb_cache_lru_list)) {
+ ce = list_entry(mb_cache_lru_list.next,
+ struct mb_cache_entry, e_lru_list);
+ list_del_init(&ce->e_lru_list);
+ __mb_cache_entry_unhash(ce);
+ }
+ spin_unlock(&mb_cache_spinlock);
+ }
+ if (!ce) {
+ ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
+ if (!ce)
+ return NULL;
atomic_inc(&cache->c_entry_count);
INIT_LIST_HEAD(&ce->e_lru_list);
INIT_LIST_HEAD(&ce->e_block_list);
ce->e_cache = cache;
- ce->e_used = 1 + MB_CACHE_WRITER;
ce->e_queued = 0;
}
+ ce->e_used = 1 + MB_CACHE_WRITER;
return ce;
}
diff --git a/fs/namei.c b/fs/namei.c
index 17ea76bf2fbe..24896e833565 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -595,15 +595,16 @@ int follow_up(struct path *path)
{
struct vfsmount *parent;
struct dentry *mountpoint;
- spin_lock(&vfsmount_lock);
+
+ br_read_lock(vfsmount_lock);
parent = path->mnt->mnt_parent;
if (parent == path->mnt) {
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return 0;
}
mntget(parent);
mountpoint = dget(path->mnt->mnt_mountpoint);
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
dput(path->dentry);
path->dentry = mountpoint;
mntput(path->mnt);
@@ -686,6 +687,35 @@ static __always_inline void follow_dotdot(struct nameidata *nd)
}
/*
+ * Allocate a dentry with name and parent, and perform a parent
+ * directory ->lookup on it. Returns the new dentry, or ERR_PTR
+ * on error. parent->d_inode->i_mutex must be held. d_lookup must
+ * have verified that no child exists while under i_mutex.
+ */
+static struct dentry *d_alloc_and_lookup(struct dentry *parent,
+ struct qstr *name, struct nameidata *nd)
+{
+ struct inode *inode = parent->d_inode;
+ struct dentry *dentry;
+ struct dentry *old;
+
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(inode)))
+ return ERR_PTR(-ENOENT);
+
+ dentry = d_alloc(parent, name);
+ if (unlikely(!dentry))
+ return ERR_PTR(-ENOMEM);
+
+ old = inode->i_op->lookup(inode, dentry, nd);
+ if (unlikely(old)) {
+ dput(dentry);
+ dentry = old;
+ }
+ return dentry;
+}
+
+/*
* It's more convoluted than I'd like it to be, but... it's still fairly
* small and for now I'd prefer to have fast path as straight as possible.
* It _is_ time-critical.
@@ -706,9 +736,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
return err;
}
+ /*
+ * Rename seqlock is not required here because in the off chance
+ * of a false negative due to a concurrent rename, we're going to
+ * do the non-racy lookup, below.
+ */
dentry = __d_lookup(nd->path.dentry, name);
if (!dentry)
goto need_lookup;
+found:
if (dentry->d_op && dentry->d_op->d_revalidate)
goto need_revalidate;
done:
@@ -724,56 +760,28 @@ need_lookup:
mutex_lock(&dir->i_mutex);
/*
* First re-do the cached lookup just in case it was created
- * while we waited for the directory semaphore..
+ * while we waited for the directory semaphore, or the first
+ * lookup failed due to an unrelated rename.
*
- * FIXME! This could use version numbering or similar to
- * avoid unnecessary cache lookups.
- *
- * The "dcache_lock" is purely to protect the RCU list walker
- * from concurrent renames at this point (we mustn't get false
- * negatives from the RCU list walk here, unlike the optimistic
- * fast walk).
- *
- * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
+ * This could use version numbering or similar to avoid unnecessary
+ * cache lookups, but then we'd have to do the first lookup in the
+ * non-racy way. However in the common case here, everything should
+ * be hot in cache, so would it be a big win?
*/
dentry = d_lookup(parent, name);
- if (!dentry) {
- struct dentry *new;
-
- /* Don't create child dentry for a dead directory. */
- dentry = ERR_PTR(-ENOENT);
- if (IS_DEADDIR(dir))
- goto out_unlock;
-
- new = d_alloc(parent, name);
- dentry = ERR_PTR(-ENOMEM);
- if (new) {
- dentry = dir->i_op->lookup(dir, new, nd);
- if (dentry)
- dput(new);
- else
- dentry = new;
- }
-out_unlock:
+ if (likely(!dentry)) {
+ dentry = d_alloc_and_lookup(parent, name, nd);
mutex_unlock(&dir->i_mutex);
if (IS_ERR(dentry))
goto fail;
goto done;
}
-
/*
* Uhhuh! Nasty case: the cache was re-populated while
* we waited on the semaphore. Need to revalidate.
*/
mutex_unlock(&dir->i_mutex);
- if (dentry->d_op && dentry->d_op->d_revalidate) {
- dentry = do_revalidate(dentry, nd);
- if (!dentry)
- dentry = ERR_PTR(-ENOENT);
- }
- if (IS_ERR(dentry))
- goto fail;
- goto done;
+ goto found;
need_revalidate:
dentry = do_revalidate(dentry, nd);
@@ -1130,35 +1138,18 @@ static struct dentry *__lookup_hash(struct qstr *name,
goto out;
}
- dentry = __d_lookup(base, name);
-
- /* lockess __d_lookup may fail due to concurrent d_move()
- * in some unrelated directory, so try with d_lookup
+ /*
+ * Don't bother with __d_lookup: callers are for creat as
+ * well as unlink, so a lot of the time it would cost
+ * a double lookup.
*/
- if (!dentry)
- dentry = d_lookup(base, name);
+ dentry = d_lookup(base, name);
if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
dentry = do_revalidate(dentry, nd);
- if (!dentry) {
- struct dentry *new;
-
- /* Don't create child dentry for a dead directory. */
- dentry = ERR_PTR(-ENOENT);
- if (IS_DEADDIR(inode))
- goto out;
-
- new = d_alloc(base, name);
- dentry = ERR_PTR(-ENOMEM);
- if (!new)
- goto out;
- dentry = inode->i_op->lookup(inode, new, nd);
- if (!dentry)
- dentry = new;
- else
- dput(new);
- }
+ if (!dentry)
+ dentry = d_alloc_and_lookup(base, name, nd);
out:
return dentry;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 2e10cb19c5b0..de402eb6eafb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -11,6 +11,8 @@
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
#include <linux/kernel.h>
@@ -38,12 +40,10 @@
#define HASH_SHIFT ilog2(PAGE_SIZE / sizeof(struct list_head))
#define HASH_SIZE (1UL << HASH_SHIFT)
-/* spinlock for vfsmount related operations, inplace of dcache_lock */
-__cacheline_aligned_in_smp DEFINE_SPINLOCK(vfsmount_lock);
-
static int event;
static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
+static DEFINE_SPINLOCK(mnt_id_lock);
static int mnt_id_start = 0;
static int mnt_group_start = 1;
@@ -55,6 +55,16 @@ static struct rw_semaphore namespace_sem;
struct kobject *fs_kobj;
EXPORT_SYMBOL_GPL(fs_kobj);
+/*
+ * vfsmount lock may be taken for read to prevent changes to the
+ * vfsmount hash, ie. during mountpoint lookups or walking back
+ * up the tree.
+ *
+ * It should be taken for write in all cases where the vfsmount
+ * tree or hash is modified or when a vfsmount structure is modified.
+ */
+DEFINE_BRLOCK(vfsmount_lock);
+
static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
{
unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -65,18 +75,21 @@ static inline unsigned long hash(struct vfsmount *mnt, struct dentry *dentry)
#define MNT_WRITER_UNDERFLOW_LIMIT -(1<<16)
-/* allocation is serialized by namespace_sem */
+/*
+ * allocation is serialized by namespace_sem, but we need the spinlock to
+ * serialize with freeing.
+ */
static int mnt_alloc_id(struct vfsmount *mnt)
{
int res;
retry:
ida_pre_get(&mnt_id_ida, GFP_KERNEL);
- spin_lock(&vfsmount_lock);
+ spin_lock(&mnt_id_lock);
res = ida_get_new_above(&mnt_id_ida, mnt_id_start, &mnt->mnt_id);
if (!res)
mnt_id_start = mnt->mnt_id + 1;
- spin_unlock(&vfsmount_lock);
+ spin_unlock(&mnt_id_lock);
if (res == -EAGAIN)
goto retry;
@@ -86,11 +99,11 @@ retry:
static void mnt_free_id(struct vfsmount *mnt)
{
int id = mnt->mnt_id;
- spin_lock(&vfsmount_lock);
+ spin_lock(&mnt_id_lock);
ida_remove(&mnt_id_ida, id);
if (mnt_id_start > id)
mnt_id_start = id;
- spin_unlock(&vfsmount_lock);
+ spin_unlock(&mnt_id_lock);
}
/*
@@ -348,7 +361,7 @@ static int mnt_make_readonly(struct vfsmount *mnt)
{
int ret = 0;
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
mnt->mnt_flags |= MNT_WRITE_HOLD;
/*
* After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -382,15 +395,15 @@ static int mnt_make_readonly(struct vfsmount *mnt)
*/
smp_wmb();
mnt->mnt_flags &= ~MNT_WRITE_HOLD;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
return ret;
}
static void __mnt_unmake_readonly(struct vfsmount *mnt)
{
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
mnt->mnt_flags &= ~MNT_READONLY;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
void simple_set_mnt(struct vfsmount *mnt, struct super_block *sb)
@@ -414,6 +427,7 @@ void free_vfsmnt(struct vfsmount *mnt)
/*
* find the first or last mount at @dentry on vfsmount @mnt depending on
* @dir. If @dir is set return the first mount else return the last mount.
+ * vfsmount_lock must be held for read or write.
*/
struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
int dir)
@@ -443,10 +457,11 @@ struct vfsmount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry,
struct vfsmount *lookup_mnt(struct path *path)
{
struct vfsmount *child_mnt;
- spin_lock(&vfsmount_lock);
+
+ br_read_lock(vfsmount_lock);
if ((child_mnt = __lookup_mnt(path->mnt, path->dentry, 1)))
mntget(child_mnt);
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return child_mnt;
}
@@ -455,6 +470,9 @@ static inline int check_mnt(struct vfsmount *mnt)
return mnt->mnt_ns == current->nsproxy->mnt_ns;
}
+/*
+ * vfsmount lock must be held for write
+ */
static void touch_mnt_namespace(struct mnt_namespace *ns)
{
if (ns) {
@@ -463,6 +481,9 @@ static void touch_mnt_namespace(struct mnt_namespace *ns)
}
}
+/*
+ * vfsmount lock must be held for write
+ */
static void __touch_mnt_namespace(struct mnt_namespace *ns)
{
if (ns && ns->event != event) {
@@ -471,6 +492,9 @@ static void __touch_mnt_namespace(struct mnt_namespace *ns)
}
}
+/*
+ * vfsmount lock must be held for write
+ */
static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
{
old_path->dentry = mnt->mnt_mountpoint;
@@ -482,6 +506,9 @@ static void detach_mnt(struct vfsmount *mnt, struct path *old_path)
old_path->dentry->d_mounted--;
}
+/*
+ * vfsmount lock must be held for write
+ */
void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
struct vfsmount *child_mnt)
{
@@ -490,6 +517,9 @@ void mnt_set_mountpoint(struct vfsmount *mnt, struct dentry *dentry,
dentry->d_mounted++;
}
+/*
+ * vfsmount lock must be held for write
+ */
static void attach_mnt(struct vfsmount *mnt, struct path *path)
{
mnt_set_mountpoint(path->mnt, path->dentry, mnt);
@@ -499,7 +529,7 @@ static void attach_mnt(struct vfsmount *mnt, struct path *path)
}
/*
- * the caller must hold vfsmount_lock
+ * vfsmount lock must be held for write
*/
static void commit_tree(struct vfsmount *mnt)
{
@@ -623,39 +653,43 @@ static inline void __mntput(struct vfsmount *mnt)
void mntput_no_expire(struct vfsmount *mnt)
{
repeat:
- if (atomic_dec_and_lock(&mnt->mnt_count, &vfsmount_lock)) {
- if (likely(!mnt->mnt_pinned)) {
- spin_unlock(&vfsmount_lock);
- __mntput(mnt);
- return;
- }
- atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
- mnt->mnt_pinned = 0;
- spin_unlock(&vfsmount_lock);
- acct_auto_close_mnt(mnt);
- goto repeat;
+ if (atomic_add_unless(&mnt->mnt_count, -1, 1))
+ return;
+ br_write_lock(vfsmount_lock);
+ if (!atomic_dec_and_test(&mnt->mnt_count)) {
+ br_write_unlock(vfsmount_lock);
+ return;
+ }
+ if (likely(!mnt->mnt_pinned)) {
+ br_write_unlock(vfsmount_lock);
+ __mntput(mnt);
+ return;
}
+ atomic_add(mnt->mnt_pinned + 1, &mnt->mnt_count);
+ mnt->mnt_pinned = 0;
+ br_write_unlock(vfsmount_lock);
+ acct_auto_close_mnt(mnt);
+ goto repeat;
}
-
EXPORT_SYMBOL(mntput_no_expire);
void mnt_pin(struct vfsmount *mnt)
{
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
mnt->mnt_pinned++;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
EXPORT_SYMBOL(mnt_pin);
void mnt_unpin(struct vfsmount *mnt)
{
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (mnt->mnt_pinned) {
atomic_inc(&mnt->mnt_count);
mnt->mnt_pinned--;
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
EXPORT_SYMBOL(mnt_unpin);
@@ -746,12 +780,12 @@ int mnt_had_events(struct proc_mounts *p)
struct mnt_namespace *ns = p->ns;
int res = 0;
- spin_lock(&vfsmount_lock);
+ br_read_lock(vfsmount_lock);
if (p->event != ns->event) {
p->event = ns->event;
res = 1;
}
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
return res;
}
@@ -952,12 +986,12 @@ int may_umount_tree(struct vfsmount *mnt)
int minimum_refs = 0;
struct vfsmount *p;
- spin_lock(&vfsmount_lock);
+ br_read_lock(vfsmount_lock);
for (p = mnt; p; p = next_mnt(p, mnt)) {
actual_refs += atomic_read(&p->mnt_count);
minimum_refs += 2;
}
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
if (actual_refs > minimum_refs)
return 0;
@@ -984,10 +1018,10 @@ int may_umount(struct vfsmount *mnt)
{
int ret = 1;
down_read(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_read_lock(vfsmount_lock);
if (propagate_mount_busy(mnt, 2))
ret = 0;
- spin_unlock(&vfsmount_lock);
+ br_read_unlock(vfsmount_lock);
up_read(&namespace_sem);
return ret;
}
@@ -1003,13 +1037,14 @@ void release_mounts(struct list_head *head)
if (mnt->mnt_parent != mnt) {
struct dentry *dentry;
struct vfsmount *m;
- spin_lock(&vfsmount_lock);
+
+ br_write_lock(vfsmount_lock);
dentry = mnt->mnt_mountpoint;
m = mnt->mnt_parent;
mnt->mnt_mountpoint = mnt->mnt_root;
mnt->mnt_parent = mnt;
m->mnt_ghosts--;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
dput(dentry);
mntput(m);
}
@@ -1017,6 +1052,10 @@ void release_mounts(struct list_head *head)
}
}
+/*
+ * vfsmount lock must be held for write
+ * namespace_sem must be held for write
+ */
void umount_tree(struct vfsmount *mnt, int propagate, struct list_head *kill)
{
struct vfsmount *p;
@@ -1107,7 +1146,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
}
down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
event++;
if (!(flags & MNT_DETACH))
@@ -1119,7 +1158,7 @@ static int do_umount(struct vfsmount *mnt, int flags)
umount_tree(mnt, 1, &umount_list);
retval = 0;
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
return retval;
@@ -1231,19 +1270,19 @@ struct vfsmount *copy_tree(struct vfsmount *mnt, struct dentry *dentry,
q = clone_mnt(p, p->mnt_root, flag);
if (!q)
goto Enomem;
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
list_add_tail(&q->mnt_list, &res->mnt_list);
attach_mnt(q, &path);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
}
return res;
Enomem:
if (res) {
LIST_HEAD(umount_list);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
umount_tree(res, 0, &umount_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
release_mounts(&umount_list);
}
return NULL;
@@ -1262,9 +1301,9 @@ void drop_collected_mounts(struct vfsmount *mnt)
{
LIST_HEAD(umount_list);
down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
umount_tree(mnt, 0, &umount_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
}
@@ -1392,7 +1431,7 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
if (err)
goto out_cleanup_ids;
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
@@ -1411,7 +1450,8 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
list_del_init(&child->mnt_hash);
commit_tree(child);
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
+
return 0;
out_cleanup_ids:
@@ -1466,10 +1506,10 @@ static int do_change_type(struct path *path, int flag)
goto out_unlock;
}
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
for (m = mnt; m; m = (recurse ? next_mnt(m, mnt) : NULL))
change_mnt_propagation(m, type);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
out_unlock:
up_write(&namespace_sem);
@@ -1513,9 +1553,10 @@ static int do_loopback(struct path *path, char *old_name,
err = graft_tree(mnt, path);
if (err) {
LIST_HEAD(umount_list);
- spin_lock(&vfsmount_lock);
+
+ br_write_lock(vfsmount_lock);
umount_tree(mnt, 0, &umount_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
release_mounts(&umount_list);
}
@@ -1568,16 +1609,16 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
else
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
mnt_flags |= path->mnt->mnt_flags & MNT_PROPAGATION_MASK;
path->mnt->mnt_flags = mnt_flags;
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
up_write(&sb->s_umount);
if (!err) {
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
touch_mnt_namespace(path->mnt->mnt_ns);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
}
return err;
}
@@ -1754,7 +1795,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
return;
down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
/* extract from the expiration list every vfsmount that matches the
* following criteria:
@@ -1773,7 +1814,7 @@ void mark_mounts_for_expiry(struct list_head *mounts)
touch_mnt_namespace(mnt->mnt_ns);
umount_tree(mnt, 1, &umounts);
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umounts);
@@ -1830,6 +1871,8 @@ resume:
/*
* process a list of expirable mountpoints with the intent of discarding any
* submounts of a specific parent mountpoint
+ *
+ * vfsmount_lock must be held for write
*/
static void shrink_submounts(struct vfsmount *mnt, struct list_head *umounts)
{
@@ -2048,9 +2091,9 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns,
kfree(new_ns);
return ERR_PTR(-ENOMEM);
}
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
list_add_tail(&new_ns->list, &new_ns->root->mnt_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
/*
* Second pass: switch the tsk->fs->* elements and mark new vfsmounts
@@ -2244,7 +2287,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
goto out2; /* not attached */
/* make sure we can reach put_old from new_root */
tmp = old.mnt;
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
if (tmp != new.mnt) {
for (;;) {
if (tmp->mnt_parent == tmp)
@@ -2264,7 +2307,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* mount new_root on / */
attach_mnt(new.mnt, &root_parent);
touch_mnt_namespace(current->nsproxy->mnt_ns);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
chroot_fs_refs(&root, &new);
error = 0;
path_put(&root_parent);
@@ -2279,7 +2322,7 @@ out1:
out0:
return error;
out3:
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
goto out2;
}
@@ -2326,6 +2369,8 @@ void __init mnt_init(void)
for (u = 0; u < HASH_SIZE; u++)
INIT_LIST_HEAD(&mount_hashtable[u]);
+ br_lock_init(vfsmount_lock);
+
err = sysfs_init();
if (err)
printk(KERN_WARNING "%s: sysfs_init error: %d\n",
@@ -2344,9 +2389,9 @@ void put_mnt_ns(struct mnt_namespace *ns)
if (!atomic_dec_and_test(&ns->count))
return;
down_write(&namespace_sem);
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
umount_tree(ns->root, 0, &umount_list);
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
up_write(&namespace_sem);
release_mounts(&umount_list);
kfree(ns);
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 26a510a7be09..6c2aad49d731 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -63,7 +63,6 @@ config NFS_V3_ACL
config NFS_V4
bool "NFS client support for NFS version 4"
depends on NFS_FS
- select RPCSEC_GSS_KRB5
help
This option enables support for version 4 of the NFS protocol
(RFC 3530) in the kernel's NFS client.
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 29539ceeb745..e257172d438c 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -140,6 +140,13 @@ nfs_opendir(struct inode *inode, struct file *filp)
/* Call generic open code in order to cache credentials */
res = nfs_open(inode, filp);
+ if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
+ /* This is a mountpoint, so d_revalidate will never
+ * have been called, so we need to refresh the
+ * inode (for close-open consistency) ourselves.
+ */
+ __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ }
return res;
}
@@ -1103,7 +1110,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd)
if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
goto no_open_dput;
/* We can't create new files, or truncate existing ones here */
- openflags &= ~(O_CREAT|O_TRUNC);
+ openflags &= ~(O_CREAT|O_EXCL|O_TRUNC);
/*
* Note: we're not holding inode->i_mutex and so may be racing with
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 2d141a74ae82..eb51bd6201da 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -323,7 +323,7 @@ nfs_file_fsync(struct file *file, int datasync)
have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
if (have_error)
ret = xchg(&ctx->error, 0);
- if (!ret)
+ if (!ret && status < 0)
ret = status;
return ret;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7ffbb98ddec3..089da5b5d20a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2036,7 +2036,8 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
struct rpc_cred *cred;
struct nfs4_state *state;
struct dentry *res;
- fmode_t fmode = nd->intent.open.flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
+ int open_flags = nd->intent.open.flags;
+ fmode_t fmode = open_flags & (FMODE_READ | FMODE_WRITE | FMODE_EXEC);
if (nd->flags & LOOKUP_CREATE) {
attr.ia_mode = nd->intent.open.create_mode;
@@ -2044,8 +2045,9 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
if (!IS_POSIXACL(dir))
attr.ia_mode &= ~current_umask();
} else {
+ open_flags &= ~O_EXCL;
attr.ia_valid = 0;
- BUG_ON(nd->intent.open.flags & O_CREAT);
+ BUG_ON(open_flags & O_CREAT);
}
cred = rpc_lookup_cred();
@@ -2054,7 +2056,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
parent = dentry->d_parent;
/* Protect against concurrent sillydeletes */
nfs_block_sillyrename(parent);
- state = nfs4_do_open(dir, &path, fmode, nd->intent.open.flags, &attr, cred);
+ state = nfs4_do_open(dir, &path, fmode, open_flags, &attr, cred);
put_rpccred(cred);
if (IS_ERR(state)) {
if (PTR_ERR(state) == -ENOENT) {
@@ -2273,8 +2275,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct
out:
if (page)
__free_page(page);
- if (locations)
- kfree(locations);
+ kfree(locations);
return status;
}
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ee26316ad1f4..ec3966e4706b 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -655,6 +655,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
if (nfss->options & NFS_OPTION_FSCACHE)
seq_printf(m, ",fsc");
+
+ if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) {
+ if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
+ seq_printf(m, ",lookupcache=none");
+ else
+ seq_printf(m, ",lookupcache=pos");
+ }
}
/*
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 503b9da159a3..95932f523aef 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -69,7 +69,6 @@ config NFSD_V4
depends on NFSD && PROC_FS && EXPERIMENTAL
select NFSD_V3
select FS_POSIX_ACL
- select RPCSEC_GSS_KRB5
help
This option enables support in your system's NFS server for
version 4 of the NFS protocol (RFC 3530).
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index bee60c04109a..922263393c76 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag)
{
struct the_nilfs *nilfs = sbi->s_nilfs;
int err;
- int barrier_done = 0;
- if (nilfs_test_opt(sbi, BARRIER)) {
- set_buffer_ordered(nilfs->ns_sbh[0]);
- barrier_done = 1;
- }
retry:
set_buffer_dirty(nilfs->ns_sbh[0]);
- err = sync_dirty_buffer(nilfs->ns_sbh[0]);
- if (err == -EOPNOTSUPP && barrier_done) {
- nilfs_warning(sbi->s_super, __func__,
- "barrier-based sync failed. "
- "disabling barriers\n");
- nilfs_clear_opt(sbi, BARRIER);
- barrier_done = 0;
- clear_buffer_ordered(nilfs->ns_sbh[0]);
- goto retry;
+
+ if (nilfs_test_opt(sbi, BARRIER)) {
+ err = __sync_dirty_buffer(nilfs->ns_sbh[0],
+ WRITE_SYNC | WRITE_BARRIER);
+ if (err == -EOPNOTSUPP) {
+ nilfs_warning(sbi->s_super, __func__,
+ "barrier-based sync failed. "
+ "disabling barriers\n");
+ nilfs_clear_opt(sbi, BARRIER);
+ goto retry;
+ }
+ } else {
+ err = sync_dirty_buffer(nilfs->ns_sbh[0]);
}
+
if (unlikely(err)) {
printk(KERN_ERR
"NILFS: unable to write superblock (err=%d)\n", err);
diff --git a/fs/open.c b/fs/open.c
index 630715f9f73d..d74e1983e8dc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -675,7 +675,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
f->f_path.mnt = mnt;
f->f_pos = 0;
f->f_op = fops_get(inode->i_fop);
- file_move(f, &inode->i_sb->s_files);
+ file_sb_list_add(f, inode->i_sb);
error = security_dentry_open(f, cred);
if (error)
@@ -721,7 +721,7 @@ cleanup_all:
mnt_drop_write(mnt);
}
}
- file_kill(f);
+ file_sb_list_del(f);
f->f_path.dentry = NULL;
f->f_path.mnt = NULL;
cleanup_file:
diff --git a/fs/pnode.c b/fs/pnode.c
index 5cc564a83149..8066b8dd748f 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -126,6 +126,9 @@ static int do_make_slave(struct vfsmount *mnt)
return 0;
}
+/*
+ * vfsmount lock must be held for write
+ */
void change_mnt_propagation(struct vfsmount *mnt, int type)
{
if (type == MS_SHARED) {
@@ -270,12 +273,12 @@ int propagate_mnt(struct vfsmount *dest_mnt, struct dentry *dest_dentry,
prev_src_mnt = child;
}
out:
- spin_lock(&vfsmount_lock);
+ br_write_lock(vfsmount_lock);
while (!list_empty(&tmp_list)) {
child = list_first_entry(&tmp_list, struct vfsmount, mnt_hash);
umount_tree(child, 0, &umount_list);
}
- spin_unlock(&vfsmount_lock);
+ br_write_unlock(vfsmount_lock);
release_mounts(&umount_list);
return ret;
}
@@ -296,6 +299,8 @@ static inline int do_refcount_check(struct vfsmount *mnt, int count)
* other mounts its parent propagates to.
* Check if any of these mounts that **do not have submounts**
* have more references than 'refcnt'. If so return busy.
+ *
+ * vfsmount lock must be held for read or write
*/
int propagate_mount_busy(struct vfsmount *mnt, int refcnt)
{
@@ -353,6 +358,8 @@ static void __propagate_umount(struct vfsmount *mnt)
* collect all mounts that receive propagation from the mount in @list,
* and return these additional mounts in the same list.
* @list: the list of mounts to be unmounted.
+ *
+ * vfsmount lock must be held for write
*/
int propagate_umount(struct list_head *list)
{
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ae35413dcbe1..caa758377d66 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -83,6 +83,7 @@ void reiserfs_evict_inode(struct inode *inode)
dquot_drop(inode);
inode->i_blocks = 0;
reiserfs_write_unlock_once(inode->i_sb, depth);
+ return;
no_delete:
end_writeback(inode);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 1ec952b1f036..812e2c05aa29 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb,
/* flush out the real blocks */
for (i = 0; i < get_desc_trans_len(desc); i++) {
set_buffer_dirty(real_blocks[i]);
- ll_rw_block(SWRITE, 1, real_blocks + i);
+ write_dirty_buffer(real_blocks[i], WRITE);
}
for (i = 0; i < get_desc_trans_len(desc); i++) {
wait_on_buffer(real_blocks[i]);
diff --git a/fs/super.c b/fs/super.c
index 9674ab2c8718..8819e3a7ff20 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -54,7 +54,22 @@ static struct super_block *alloc_super(struct file_system_type *type)
s = NULL;
goto out;
}
+#ifdef CONFIG_SMP
+ s->s_files = alloc_percpu(struct list_head);
+ if (!s->s_files) {
+ security_sb_free(s);
+ kfree(s);
+ s = NULL;
+ goto out;
+ } else {
+ int i;
+
+ for_each_possible_cpu(i)
+ INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
+ }
+#else
INIT_LIST_HEAD(&s->s_files);
+#endif
INIT_LIST_HEAD(&s->s_instances);
INIT_HLIST_HEAD(&s->s_anon);
INIT_LIST_HEAD(&s->s_inodes);
@@ -108,6 +123,9 @@ out:
*/
static inline void destroy_super(struct super_block *s)
{
+#ifdef CONFIG_SMP
+ free_percpu(s->s_files);
+#endif
security_sb_free(s);
kfree(s->s_subtype);
kfree(s->s_options);
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index 048484fb10d2..46f7a807bbc1 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
unlock_super (sb);
@@ -207,10 +205,8 @@ do_more:
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
if (overflow) {
fragment += count;
@@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment);
@@ -680,10 +674,8 @@ cg_found:
succed:
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
result += cgno * uspi->s_fpg;
diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index 428017e018fe..2eabf04af3de 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode)
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
unlock_super (sb);
@@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb,
fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb);
ubh_mark_buffer_dirty(UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer(UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
UFSD("EXIT\n");
}
@@ -290,10 +286,8 @@ cg_found:
}
ubh_mark_buffer_dirty (USPI_UBH(uspi));
ubh_mark_buffer_dirty (UCPI_UBH(ucpi));
- if (sb->s_flags & MS_SYNCHRONOUS) {
- ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi));
- ubh_wait_on_buffer (UCPI_UBH(ucpi));
- }
+ if (sb->s_flags & MS_SYNCHRONOUS)
+ ubh_sync_block(UCPI_UBH(ucpi));
sb->s_dirt = 1;
inode->i_ino = cg * uspi->s_ipg + bit;
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index 34d5cb135320..a58f9155fc9a 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p)
ubh_bforget(ind_ubh);
ind_ubh = NULL;
}
- if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) {
- ubh_ll_rw_block(SWRITE, ind_ubh);
- ubh_wait_on_buffer (ind_ubh);
- }
+ if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh))
+ ubh_sync_block(ind_ubh);
ubh_brelse (ind_ubh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p)
ubh_bforget(dind_bh);
dind_bh = NULL;
}
- if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) {
- ubh_ll_rw_block(SWRITE, dind_bh);
- ubh_wait_on_buffer (dind_bh);
- }
+ if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh))
+ ubh_sync_block(dind_bh);
ubh_brelse (dind_bh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
@@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode)
ubh_bforget(tind_bh);
tind_bh = NULL;
}
- if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) {
- ubh_ll_rw_block(SWRITE, tind_bh);
- ubh_wait_on_buffer (tind_bh);
- }
+ if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh))
+ ubh_sync_block(tind_bh);
ubh_brelse (tind_bh);
UFSD("EXIT: ino %lu\n", inode->i_ino);
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index 85a7fc9e4a4e..d2c36d53fe66 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag)
}
}
-void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh)
+void ubh_sync_block(struct ufs_buffer_head *ubh)
{
- if (!ubh)
- return;
+ if (ubh) {
+ unsigned i;
- ll_rw_block(rw, ubh->count, ubh->bh);
-}
+ for (i = 0; i < ubh->count; i++)
+ write_dirty_buffer(ubh->bh[i], WRITE);
-void ubh_wait_on_buffer (struct ufs_buffer_head * ubh)
-{
- unsigned i;
- if (!ubh)
- return;
- for ( i = 0; i < ubh->count; i++ )
- wait_on_buffer (ubh->bh[i]);
+ for (i = 0; i < ubh->count; i++)
+ wait_on_buffer(ubh->bh[i]);
+ }
}
void ubh_bforget (struct ufs_buffer_head * ubh)
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 0466036912f1..9f8775ce381c 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *);
extern void ubh_brelse_uspi (struct ufs_sb_private_info *);
extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *);
extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int);
-extern void ubh_ll_rw_block(int, struct ufs_buffer_head *);
-extern void ubh_wait_on_buffer (struct ufs_buffer_head *);
+extern void ubh_sync_block(struct ufs_buffer_head *);
extern void ubh_bforget (struct ufs_buffer_head *);
extern int ubh_buffer_dirty (struct ufs_buffer_head *);
#define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size)
OpenPOWER on IntegriCloud