diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-07 12:26:27 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-05-07 12:26:27 -0700 |
commit | 5cefcab3db2b13093480f2a42bf081574dd72d3d (patch) | |
tree | c3755a241553436a1b84d65ad3c00f77ce6d02ad /fs/gfs2 | |
parent | 5f757f91e70a97eda8f0cc13bddc853209b2d173 (diff) | |
parent | 37fde8ca6c60ea61f5e9d7cb877c25ac60e74167 (diff) | |
download | blackbird-obmc-linux-5cefcab3db2b13093480f2a42bf081574dd72d3d.tar.gz blackbird-obmc-linux-5cefcab3db2b13093480f2a42bf081574dd72d3d.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-2.6-nmw: (34 commits)
[GFS2] Uncomment sprintf_symbol calling code
[DLM] lowcomms style
[GFS2] printk warning fixes
[GFS2] Patch to fix mmap of stuffed files
[GFS2] use lib/parser for parsing mount options
[DLM] Lowcomms nodeid range & initialisation fixes
[DLM] Fix dlm_lowcoms_stop hang
[DLM] fix mode munging
[GFS2] lockdump improvements
[GFS2] Patch to detect corrupt number of dir entries in leaf and/or inode blocks
[GFS2] bz 236008: Kernel gpf doing cat /debugfs/gfs2/xxx (lock dump)
[DLM] fs/dlm/ast.c should #include "ast.h"
[DLM] Consolidate transport protocols
[DLM] Remove redundant assignment
[GFS2] Fix bz 234168 (ignoring rgrp flags)
[DLM] change lkid format
[DLM] interface for purge (2/2)
[DLM] add orphan purging code (1/2)
[DLM] split create_message function
[GFS2] Set drop_count to 0 (off) by default
...
Diffstat (limited to 'fs/gfs2')
-rw-r--r-- | fs/gfs2/dir.c | 38 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 619 | ||||
-rw-r--r-- | fs/gfs2/glock.h | 8 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 14 | ||||
-rw-r--r-- | fs/gfs2/locking/dlm/lock.c | 14 | ||||
-rw-r--r-- | fs/gfs2/locking/dlm/lock_dlm.h | 3 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 20 | ||||
-rw-r--r-- | fs/gfs2/main.c | 4 | ||||
-rw-r--r-- | fs/gfs2/mount.c | 239 | ||||
-rw-r--r-- | fs/gfs2/ops_address.c | 21 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 4 | ||||
-rw-r--r-- | fs/gfs2/ops_super.c | 28 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 12 |
13 files changed, 642 insertions, 382 deletions
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 82a1ac7895a2..a96fa07b3f3b 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1262,9 +1262,10 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, u64 leaf_no) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *bh; struct gfs2_leaf *lf; - unsigned entries = 0; + unsigned entries = 0, entries2 = 0; unsigned leaves = 0; const struct gfs2_dirent **darr, *dent; struct dirent_gather g; @@ -1290,7 +1291,13 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, return 0; error = -ENOMEM; - larr = vmalloc((leaves + entries) * sizeof(void *)); + /* + * The extra 99 entries are not normally used, but are a buffer + * zone in case the number of entries in the leaf is corrupt. + * 99 is the maximum number of entries that can fit in a single + * leaf block. + */ + larr = vmalloc((leaves + entries + 99) * sizeof(void *)); if (!larr) goto out; darr = (const struct gfs2_dirent **)(larr + leaves); @@ -1305,10 +1312,20 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, lf = (struct gfs2_leaf *)bh->b_data; lfn = be64_to_cpu(lf->lf_next); if (lf->lf_entries) { + entries2 += be16_to_cpu(lf->lf_entries); dent = gfs2_dirent_scan(inode, bh->b_data, bh->b_size, gfs2_dirent_gather, NULL, &g); error = PTR_ERR(dent); - if (IS_ERR(dent)) { + if (IS_ERR(dent)) + goto out_kfree; + if (entries2 != g.offset) { + fs_warn(sdp, "Number of entries corrupt in dir " + "leaf %llu, entries2 (%u) != " + "g.offset (%u)\n", + (unsigned long long)bh->b_blocknr, + entries2, g.offset); + + error = -EIO; goto out_kfree; } error = 0; @@ -1318,6 +1335,7 @@ static int gfs2_dir_read_leaf(struct inode *inode, u64 *offset, void *opaque, } } while(lfn); + BUG_ON(entries2 != entries); error = do_filldir_main(ip, offset, opaque, filldir, darr, entries, copied); out_kfree: @@ -1401,6 +1419,7 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, filldir_t filldir) { struct gfs2_inode *dip = GFS2_I(inode); + struct gfs2_sbd *sdp = GFS2_SB(inode); struct dirent_gather g; const struct gfs2_dirent **darr, *dent; struct buffer_head *dibh; @@ -1423,8 +1442,8 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, return error; error = -ENOMEM; - darr = kmalloc(dip->i_di.di_entries * sizeof(struct gfs2_dirent *), - GFP_KERNEL); + /* 96 is max number of dirents which can be stuffed into an inode */ + darr = kmalloc(96 * sizeof(struct gfs2_dirent *), GFP_KERNEL); if (darr) { g.pdent = darr; g.offset = 0; @@ -1434,6 +1453,15 @@ int gfs2_dir_read(struct inode *inode, u64 *offset, void *opaque, error = PTR_ERR(dent); goto out; } + if (dip->i_di.di_entries != g.offset) { + fs_warn(sdp, "Number of entries corrupt in dir %llu, " + "ip->i_di.di_entries (%u) != g.offset (%u)\n", + (unsigned long long)dip->i_num.no_addr, + dip->i_di.di_entries, + g.offset); + error = -EIO; + goto out; + } error = do_filldir_main(dip, offset, opaque, filldir, darr, dip->i_di.di_entries, &copied); out: diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 12accb08fe02..1815429a2978 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -23,6 +23,10 @@ #include <linux/module.h> #include <linux/rwsem.h> #include <asm/uaccess.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/module.h> +#include <linux/kallsyms.h> #include "gfs2.h" #include "incore.h" @@ -40,20 +44,30 @@ struct gfs2_gl_hash_bucket { struct hlist_head hb_list; }; +struct glock_iter { + int hash; /* hash bucket index */ + struct gfs2_sbd *sdp; /* incore superblock */ + struct gfs2_glock *gl; /* current glock struct */ + struct hlist_head *hb_list; /* current hash bucket ptr */ + struct seq_file *seq; /* sequence file for debugfs */ + char string[512]; /* scratch space */ +}; + typedef void (*glock_examiner) (struct gfs2_glock * gl); static int gfs2_dump_lockstate(struct gfs2_sbd *sdp); -static int dump_glock(struct gfs2_glock *gl); -static int dump_inode(struct gfs2_inode *ip); -static void gfs2_glock_xmote_th(struct gfs2_holder *gh); +static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl); +static void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh); static void gfs2_glock_drop_th(struct gfs2_glock *gl); static DECLARE_RWSEM(gfs2_umount_flush_sem); +static struct dentry *gfs2_root; #define GFS2_GL_HASH_SHIFT 15 #define GFS2_GL_HASH_SIZE (1 << GFS2_GL_HASH_SHIFT) #define GFS2_GL_HASH_MASK (GFS2_GL_HASH_SIZE - 1) static struct gfs2_gl_hash_bucket gl_hash_table[GFS2_GL_HASH_SIZE]; +static struct dentry *gfs2_root; /* * Despite what you might think, the numbers below are not arbitrary :-) @@ -202,7 +216,6 @@ int gfs2_glock_put(struct gfs2_glock *gl) gfs2_assert(sdp, list_empty(&gl->gl_reclaim)); gfs2_assert(sdp, list_empty(&gl->gl_holders)); gfs2_assert(sdp, list_empty(&gl->gl_waiters1)); - gfs2_assert(sdp, list_empty(&gl->gl_waiters2)); gfs2_assert(sdp, list_empty(&gl->gl_waiters3)); glock_free(gl); rv = 1; @@ -303,7 +316,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number, atomic_set(&gl->gl_ref, 1); gl->gl_state = LM_ST_UNLOCKED; gl->gl_hash = hash; - gl->gl_owner = NULL; + gl->gl_owner_pid = 0; gl->gl_ip = 0; gl->gl_ops = glops; gl->gl_req_gh = NULL; @@ -367,7 +380,7 @@ void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, unsigned flags, INIT_LIST_HEAD(&gh->gh_list); gh->gh_gl = gl; gh->gh_ip = (unsigned long)__builtin_return_address(0); - gh->gh_owner = current; + gh->gh_owner_pid = current->pid; gh->gh_state = state; gh->gh_flags = flags; gh->gh_error = 0; @@ -389,7 +402,7 @@ void gfs2_holder_reinit(unsigned int state, unsigned flags, struct gfs2_holder * { gh->gh_state = state; gh->gh_flags = flags; - gh->gh_iflags &= 1 << HIF_ALLOCED; + gh->gh_iflags = 0; gh->gh_ip = (unsigned long)__builtin_return_address(0); } @@ -406,54 +419,8 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) gh->gh_ip = 0; } -/** - * gfs2_holder_get - get a struct gfs2_holder structure - * @gl: the glock - * @state: the state we're requesting - * @flags: the modifier flags - * @gfp_flags: - * - * Figure out how big an impact this function has. Either: - * 1) Replace it with a cache of structures hanging off the struct gfs2_sbd - * 2) Leave it like it is - * - * Returns: the holder structure, NULL on ENOMEM - */ - -static struct gfs2_holder *gfs2_holder_get(struct gfs2_glock *gl, - unsigned int state, - int flags, gfp_t gfp_flags) -{ - struct gfs2_holder *gh; - - gh = kmalloc(sizeof(struct gfs2_holder), gfp_flags); - if (!gh) - return NULL; - - gfs2_holder_init(gl, state, flags, gh); - set_bit(HIF_ALLOCED, &gh->gh_iflags); - gh->gh_ip = (unsigned long)__builtin_return_address(0); - return gh; -} - -/** - * gfs2_holder_put - get rid of a struct gfs2_holder structure - * @gh: the holder structure - * - */ - -static void gfs2_holder_put(struct gfs2_holder *gh) +static void gfs2_holder_wake(struct gfs2_holder *gh) { - gfs2_holder_uninit(gh); - kfree(gh); -} - -static void gfs2_holder_dispose_or_wake(struct gfs2_holder *gh) -{ - if (test_bit(HIF_DEALLOC, &gh->gh_iflags)) { - gfs2_holder_put(gh); - return; - } clear_bit(HIF_WAIT, &gh->gh_iflags); smp_mb(); wake_up_bit(&gh->gh_iflags, HIF_WAIT); @@ -519,7 +486,7 @@ static int rq_promote(struct gfs2_holder *gh) gfs2_reclaim_glock(sdp); } - gfs2_glock_xmote_th(gh); + gfs2_glock_xmote_th(gh->gh_gl, gh); spin_lock(&gl->gl_spin); } return 1; @@ -542,7 +509,7 @@ static int rq_promote(struct gfs2_holder *gh) gh->gh_error = 0; set_bit(HIF_HOLDER, &gh->gh_iflags); - gfs2_holder_dispose_or_wake(gh); + gfs2_holder_wake(gh); return 0; } @@ -554,32 +521,24 @@ static int rq_promote(struct gfs2_holder *gh) * Returns: 1 if the queue is blocked */ -static int rq_demote(struct gfs2_holder *gh) +static int rq_demote(struct gfs2_glock *gl) { - struct gfs2_glock *gl = gh->gh_gl; - if (!list_empty(&gl->gl_holders)) return 1; - if (gl->gl_state == gh->gh_state || gl->gl_state == LM_ST_UNLOCKED) { - list_del_init(&gh->gh_list); - gh->gh_error = 0; - spin_unlock(&gl->gl_spin); - gfs2_holder_dispose_or_wake(gh); - spin_lock(&gl->gl_spin); - } else { - gl->gl_req_gh = gh; - set_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_spin); - - if (gh->gh_state == LM_ST_UNLOCKED || - gl->gl_state != LM_ST_EXCLUSIVE) - gfs2_glock_drop_th(gl); - else - gfs2_glock_xmote_th(gh); - - spin_lock(&gl->gl_spin); + if (gl->gl_state == gl->gl_demote_state || + gl->gl_state == LM_ST_UNLOCKED) { + clear_bit(GLF_DEMOTE, &gl->gl_flags); + return 0; } + set_bit(GLF_LOCK, &gl->gl_flags); + spin_unlock(&gl->gl_spin); + if (gl->gl_demote_state == LM_ST_UNLOCKED || + gl->gl_state != LM_ST_EXCLUSIVE) + gfs2_glock_drop_th(gl); + else + gfs2_glock_xmote_th(gl, NULL); + spin_lock(&gl->gl_spin); return 0; } @@ -607,16 +566,8 @@ static void run_queue(struct gfs2_glock *gl) else gfs2_assert_warn(gl->gl_sbd, 0); - } else if (!list_empty(&gl->gl_waiters2) && - !test_bit(GLF_SKIP_WAITERS2, &gl->gl_flags)) { - gh = list_entry(gl->gl_waiters2.next, - struct gfs2_holder, gh_list); - - if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) - blocked = rq_demote(gh); - else - gfs2_assert_warn(gl->gl_sbd, 0); - + } else if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { + blocked = rq_demote(gl); } else if (!list_empty(&gl->gl_waiters3)) { gh = list_entry(gl->gl_waiters3.next, struct gfs2_holder, gh_list); @@ -654,7 +605,7 @@ static void gfs2_glmutex_lock(struct gfs2_glock *gl) if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { list_add_tail(&gh.gh_list, &gl->gl_waiters1); } else { - gl->gl_owner = current; + gl->gl_owner_pid = current->pid; gl->gl_ip = (unsigned long)__builtin_return_address(0); clear_bit(HIF_WAIT, &gh.gh_iflags); smp_mb(); @@ -681,7 +632,7 @@ static int gfs2_glmutex_trylock(struct gfs2_glock *gl) if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) { acquired = 0; } else { - gl->gl_owner = current; + gl->gl_owner_pid = current->pid; gl->gl_ip = (unsigned long)__builtin_return_address(0); } spin_unlock(&gl->gl_spin); @@ -699,7 +650,7 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) { spin_lock(&gl->gl_spin); clear_bit(GLF_LOCK, &gl->gl_flags); - gl->gl_owner = NULL; + gl->gl_owner_pid = 0; gl->gl_ip = 0; run_queue(gl); BUG_ON(!spin_is_locked(&gl->gl_spin)); @@ -707,50 +658,24 @@ static void gfs2_glmutex_unlock(struct gfs2_glock *gl) } /** - * handle_callback - add a demote request to a lock's queue + * handle_callback - process a demote request * @gl: the glock * @state: the state the caller wants us to change to * - * Note: This may fail sliently if we are out of memory. + * There are only two requests that we are going to see in actual + * practise: LM_ST_SHARED and LM_ST_UNLOCKED */ static void handle_callback(struct gfs2_glock *gl, unsigned int state) { - struct gfs2_holder *gh, *new_gh = NULL; - -restart: spin_lock(&gl->gl_spin); - - list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { - if (test_bit(HIF_DEMOTE, &gh->gh_iflags) && - gl->gl_req_gh != gh) { - if (gh->gh_state != state) - gh->gh_state = LM_ST_UNLOCKED; - goto out; - } - } - - if (new_gh) { - list_add_tail(&new_gh->gh_list, &gl->gl_waiters2); - new_gh = NULL; - } else { - spin_unlock(&gl->gl_spin); - - new_gh = gfs2_holder_get(gl, state, LM_FLAG_TRY, GFP_NOFS); - if (!new_gh) - return; - set_bit(HIF_DEMOTE, &new_gh->gh_iflags); - set_bit(HIF_DEALLOC, &new_gh->gh_iflags); - set_bit(HIF_WAIT, &new_gh->gh_iflags); - - goto restart; + if (test_and_set_bit(GLF_DEMOTE, &gl->gl_flags) == 0) { + gl->gl_demote_state = state; + gl->gl_demote_time = jiffies; + } else if (gl->gl_demote_state != LM_ST_UNLOCKED) { + gl->gl_demote_state = state; } - -out: spin_unlock(&gl->gl_spin); - - if (new_gh) - gfs2_holder_put(new_gh); } /** @@ -810,56 +735,37 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) /* Deal with each possible exit condition */ - if (!gh) + if (!gh) { gl->gl_stamp = jiffies; - else if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) { + if (ret & LM_OUT_CANCELED) + op_done = 0; + else + clear_bit(GLF_DEMOTE, &gl->gl_flags); + } else { spin_lock(&gl->gl_spin); list_del_init(&gh->gh_list); gh->gh_error = -EIO; - spin_unlock(&gl->gl_spin); - } else if (test_bit(HIF_DEMOTE, &gh->gh_iflags)) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - if (gl->gl_state == gh->gh_state || - gl->gl_state == LM_ST_UNLOCKED) { + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + goto out; + gh->gh_error = GLR_CANCELED; + if (ret & LM_OUT_CANCELED) + goto out; + if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { + list_add_tail(&gh->gh_list, &gl->gl_holders); gh->gh_error = 0; - } else { - if (gfs2_assert_warn(sdp, gh->gh_flags & - (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) == -1) - fs_warn(sdp, "ret = 0x%.8X\n", ret); - gh->gh_error = GLR_TRYFAILED; + set_bit(HIF_HOLDER, &gh->gh_iflags); + set_bit(HIF_FIRST, &gh->gh_iflags); + op_done = 0; + goto out; } - spin_unlock(&gl->gl_spin); - - if (ret & LM_OUT_CANCELED) - handle_callback(gl, LM_ST_UNLOCKED); - - } else if (ret & LM_OUT_CANCELED) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); - gh->gh_error = GLR_CANCELED; - spin_unlock(&gl->gl_spin); - - } else if (relaxed_state_ok(gl->gl_state, gh->gh_state, gh->gh_flags)) { - spin_lock(&gl->gl_spin); - list_move_tail(&gh->gh_list, &gl->gl_holders); - gh->gh_error = 0; - set_bit(HIF_HOLDER, &gh->gh_iflags); - spin_unlock(&gl->gl_spin); - - set_bit(HIF_FIRST, &gh->gh_iflags); - - op_done = 0; - - } else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { - spin_lock(&gl->gl_spin); - list_del_init(&gh->gh_list); gh->gh_error = GLR_TRYFAILED; - spin_unlock(&gl->gl_spin); - - } else { + if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) + goto out; + gh->gh_error = -EINVAL; if (gfs2_assert_withdraw(sdp, 0) == -1) fs_err(sdp, "ret = 0x%.8X\n", ret); +out: + spin_unlock(&gl->gl_spin); } if (glops->go_xmote_bh) @@ -877,7 +783,7 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_glock_put(gl); if (gh) - gfs2_holder_dispose_or_wake(gh); + gfs2_holder_wake(gh); } /** @@ -888,12 +794,11 @@ static void xmote_bh(struct gfs2_glock *gl, unsigned int ret) * */ -void gfs2_glock_xmote_th(struct gfs2_holder *gh) +void gfs2_glock_xmote_th(struct gfs2_glock *gl, struct gfs2_holder *gh) { - struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_sbd; - int flags = gh->gh_flags; - unsigned state = gh->gh_state; + int flags = gh ? gh->gh_flags : 0; + unsigned state = gh ? gh->gh_state : gl->gl_demote_state; const struct gfs2_glock_operations *glops = gl->gl_ops; int lck_flags = flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP | LM_FLAG_ANY | @@ -943,6 +848,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_assert_warn(sdp, !ret); state_change(gl, LM_ST_UNLOCKED); + clear_bit(GLF_DEMOTE, &gl->gl_flags); if (glops->go_inval) glops->go_inval(gl, DIO_METADATA); @@ -964,7 +870,7 @@ static void drop_bh(struct gfs2_glock *gl, unsigned int ret) gfs2_glock_put(gl); if (gh) - gfs2_holder_dispose_or_wake(gh); + gfs2_holder_wake(gh); } /** @@ -1097,18 +1003,32 @@ static int glock_wait_internal(struct gfs2_holder *gh) } static inline struct gfs2_holder * -find_holder_by_owner(struct list_head *head, struct task_struct *owner) +find_holder_by_owner(struct list_head *head, pid_t pid) { struct gfs2_holder *gh; list_for_each_entry(gh, head, gh_list) { - if (gh->gh_owner == owner) + if (gh->gh_owner_pid == pid) return gh; } return NULL; } +static void print_dbg(struct glock_iter *gi, const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + if (gi) { + vsprintf(gi->string, fmt, args); + seq_printf(gi->seq, gi->string); + } + else + vprintk(fmt, args); + va_end(args); +} + /** * add_to_queue - Add a holder to the wait queue (but look for recursion) * @gh: the holder structure to add @@ -1120,24 +1040,24 @@ static void add_to_queue(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; struct gfs2_holder *existing; - BUG_ON(!gh->gh_owner); + BUG_ON(!gh->gh_owner_pid); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) BUG(); - existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner); + existing = find_holder_by_owner(&gl->gl_holders, gh->gh_owner_pid); if (existing) { print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); - printk(KERN_INFO "pid : %d\n", existing->gh_owner->pid); + printk(KERN_INFO "pid : %d\n", existing->gh_owner_pid); printk(KERN_INFO "lock type : %d lock state : %d\n", existing->gh_gl->gl_name.ln_type, existing->gh_gl->gl_state); print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); - printk(KERN_INFO "pid : %d\n", gh->gh_owner->pid); + printk(KERN_INFO "pid : %d\n", gh->gh_owner_pid); printk(KERN_INFO "lock type : %d lock state : %d\n", gl->gl_name.ln_type, gl->gl_state); BUG(); } - existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner); + existing = find_holder_by_owner(&gl->gl_waiters3, gh->gh_owner_pid); if (existing) { print_symbol(KERN_WARNING "original: %s\n", existing->gh_ip); print_symbol(KERN_WARNING "new: %s\n", gh->gh_ip); @@ -1267,9 +1187,8 @@ void gfs2_glock_dq(struct gfs2_holder *gh) if (glops->go_unlock) glops->go_unlock(gh); - gl->gl_stamp = jiffies; - spin_lock(&gl->gl_spin); + gl->gl_stamp = jiffies; } clear_bit(GLF_LOCK, &gl->gl_flags); @@ -1841,6 +1760,15 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) * Diagnostic routines to help debug distributed deadlock */ +static void gfs2_print_symbol(struct glock_iter *gi, const char *fmt, + unsigned long address) +{ + char buffer[KSYM_SYMBOL_LEN]; + + sprint_symbol(buffer, address); + print_dbg(gi, fmt, buffer); +} + /** * dump_holder - print information about a glock holder * @str: a string naming the type of holder @@ -1849,31 +1777,37 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_holder(char *str, struct gfs2_holder *gh) +static int dump_holder(struct glock_iter *gi, char *str, + struct gfs2_holder *gh) { unsigned int x; - int error = -ENOBUFS; - - printk(KERN_INFO " %s\n", str); - printk(KERN_INFO " owner = %ld\n", - (gh->gh_owner) ? (long)gh->gh_owner->pid : -1); - printk(KERN_INFO " gh_state = %u\n", gh->gh_state); - printk(KERN_INFO " gh_flags ="); + struct task_struct *gh_owner; + + print_dbg(gi, " %s\n", str); + if (gh->gh_owner_pid) { + print_dbg(gi, " owner = %ld ", (long)gh->gh_owner_pid); + gh_owner = find_task_by_pid(gh->gh_owner_pid); + if (gh_owner) + print_dbg(gi, "(%s)\n", gh_owner->comm); + else + print_dbg(gi, "(ended)\n"); + } else + print_dbg(gi, " owner = -1\n"); + print_dbg(gi, " gh_state = %u\n", gh->gh_state); + print_dbg(gi, " gh_flags ="); for (x = 0; x < 32; x++) if (gh->gh_flags & (1 << x)) - printk(" %u", x); - printk(" \n"); - printk(KERN_INFO " error = %d\n", gh->gh_error); - printk(KERN_INFO " gh_iflags ="); + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + print_dbg(gi, " error = %d\n", gh->gh_error); + print_dbg(gi, " gh_iflags ="); for (x = 0; x < 32; x++) if (test_bit(x, &gh->gh_iflags)) - printk(" %u", x); - printk(" \n"); - print_symbol(KERN_INFO " initialized at: %s\n", gh->gh_ip); - - error = 0; + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + gfs2_print_symbol(gi, " initialized at: %s\n", gh->gh_ip); - return error; + return 0; } /** @@ -1883,25 +1817,20 @@ static int dump_holder(char *str, struct gfs2_holder *gh) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_inode(struct gfs2_inode *ip) +static int dump_inode(struct glock_iter *gi, struct gfs2_inode *ip) { unsigned int x; - int error = -ENOBUFS; - printk(KERN_INFO " Inode:\n"); - printk(KERN_INFO " num = %llu %llu\n", - (unsigned long long)ip->i_num.no_formal_ino, - (unsigned long long)ip->i_num.no_addr); - printk(KERN_INFO " type = %u\n", IF2DT(ip->i_inode.i_mode)); - printk(KERN_INFO " i_flags ="); + print_dbg(gi, " Inode:\n"); + print_dbg(gi, " num = %llu/%llu\n", + ip->i_num.no_formal_ino, ip->i_num.no_addr); + print_dbg(gi, " type = %u\n", IF2DT(ip->i_inode.i_mode)); + print_dbg(gi, " i_flags ="); for (x = 0; x < 32; x++) if (test_bit(x, &ip->i_flags)) - printk(" %u", x); - printk(" \n"); - - error = 0; - - return error; + print_dbg(gi, " %u", x); + print_dbg(gi, " \n"); + return 0; } /** @@ -1912,74 +1841,86 @@ static int dump_inode(struct gfs2_inode *ip) * Returns: 0 on success, -ENOBUFS when we run out of space */ -static int dump_glock(struct gfs2_glock *gl) +static int dump_glock(struct glock_iter *gi, struct gfs2_glock *gl) { struct gfs2_holder *gh; unsigned int x; int error = -ENOBUFS; + struct task_struct *gl_owner; spin_lock(&gl->gl_spin); - printk(KERN_INFO "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, - (unsigned long long)gl->gl_name.ln_number); - printk(KERN_INFO " gl_flags ="); + print_dbg(gi, "Glock 0x%p (%u, %llu)\n", gl, gl->gl_name.ln_type, + (unsigned long long)gl->gl_name.ln_number); + print_dbg(gi, " gl_flags ="); for (x = 0; x < 32; x++) { if (test_bit(x, &gl->gl_flags)) - printk(" %u", x); + print_dbg(gi, " %u", x); } - printk(" \n"); - printk(KERN_INFO " gl_ref = %d\n", atomic_read(&gl->gl_ref)); - printk(KERN_INFO " gl_state = %u\n", gl->gl_state); - printk(KERN_INFO " gl_owner = %s\n", gl->gl_owner->comm); - print_symbol(KERN_INFO " gl_ip = %s\n", gl->gl_ip); - printk(KERN_INFO " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); - printk(KERN_INFO " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); - printk(KERN_INFO " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); - printk(KERN_INFO " object = %s\n", (gl->gl_object) ? "yes" : "no"); - printk(KERN_INFO " le = %s\n", + if (!test_bit(GLF_LOCK, &gl->gl_flags)) + print_dbg(gi, " (unlocked)"); + print_dbg(gi, " \n"); + print_dbg(gi, " gl_ref = %d\n", atomic_read(&gl->gl_ref)); + print_dbg(gi, " gl_state = %u\n", gl->gl_state); + if (gl->gl_owner_pid) { + gl_owner = find_task_by_pid(gl->gl_owner_pid); + if (gl_owner) + print_dbg(gi, " gl_owner = pid %d (%s)\n", + gl->gl_owner_pid, gl_owner->comm); + else + print_dbg(gi, " gl_owner = %d (ended)\n", + gl->gl_owner_pid); + } else + print_dbg(gi, " gl_owner = -1\n"); + print_dbg(gi, " gl_ip = %lu\n", gl->gl_ip); + print_dbg(gi, " req_gh = %s\n", (gl->gl_req_gh) ? "yes" : "no"); + print_dbg(gi, " req_bh = %s\n", (gl->gl_req_bh) ? "yes" : "no"); + print_dbg(gi, " lvb_count = %d\n", atomic_read(&gl->gl_lvb_count)); + print_dbg(gi, " object = %s\n", (gl->gl_object) ? "yes" : "no"); + print_dbg(gi, " le = %s\n", (list_empty(&gl->gl_le.le_list)) ? "no" : "yes"); - printk(KERN_INFO " reclaim = %s\n", - (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); + print_dbg(gi, " reclaim = %s\n", + (list_empty(&gl->gl_reclaim)) ? "no" : "yes"); if (gl->gl_aspace) - printk(KERN_INFO " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, - gl->gl_aspace->i_mapping->nrpages); + print_dbg(gi, " aspace = 0x%p nrpages = %lu\n", gl->gl_aspace, + gl->gl_aspace->i_mapping->nrpages); else - printk(KERN_INFO " aspace = no\n"); - printk(KERN_INFO " ail = %d\n", atomic_read(&gl->gl_ail_count)); + print_dbg(gi, " aspace = no\n"); + print_dbg(gi, " ail = %d\n", atomic_read(&gl->gl_ail_count)); if (gl->gl_req_gh) { - error = dump_holder("Request", gl->gl_req_gh); + error = dump_holder(gi, "Request", gl->gl_req_gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_holders, gh_list) { - error = dump_holder("Holder", gh); + error = dump_holder(gi, "Holder", gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_waiters1, gh_list) { - error = dump_holder("Waiter1", gh); - if (error) - goto out; - } - list_for_each_entry(gh, &gl->gl_waiters2, gh_list) { - error = dump_holder("Waiter2", gh); + error = dump_holder(gi, "Waiter1", gh); if (error) goto out; } list_for_each_entry(gh, &gl->gl_waiters3, gh_list) { - error = dump_holder("Waiter3", gh); + error = dump_holder(gi, "Waiter3", gh); if (error) goto out; } + if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { + print_dbg(gi, " Demotion req to state %u (%llu uS ago)\n", + gl->gl_demote_state, + (u64)(jiffies - gl->gl_demote_time)*(1000000/HZ)); + } if (gl->gl_ops == &gfs2_inode_glops && gl->gl_object) { if (!test_bit(GLF_LOCK, &gl->gl_flags) && - list_empty(&gl->gl_holders)) { - error = dump_inode(gl->gl_object); + list_empty(&gl->gl_holders)) { + error = dump_inode(gi, gl->gl_object); if (error) goto out; } else { error = -ENOBUFS; - printk(KERN_INFO " Inode: busy\n"); + print_dbg(gi, " Inode: busy\n"); } } @@ -2014,7 +1955,7 @@ static int gfs2_dump_lockstate(struct gfs2_sbd *sdp) if (gl->gl_sbd != sdp) continue; - error = dump_glock(gl); + error = dump_glock(NULL, gl); if (error) break; } @@ -2043,3 +1984,189 @@ int __init gfs2_glock_init(void) return 0; } +static int gfs2_glock_iter_next(struct glock_iter *gi) +{ + read_lock(gl_lock_addr(gi->hash)); + while (1) { + if (!gi->hb_list) { /* If we don't have a hash bucket yet */ + gi->hb_list = &gl_hash_table[gi->hash].hb_list; + if (hlist_empty(gi->hb_list)) { + read_unlock(gl_lock_addr(gi->hash)); + gi->hash++; + read_lock(gl_lock_addr(gi->hash)); + gi->hb_list = NULL; + if (gi->hash >= GFS2_GL_HASH_SIZE) { + read_unlock(gl_lock_addr(gi->hash)); + return 1; + } + else + continue; + } + if (!hlist_empty(gi->hb_list)) { + gi->gl = list_entry(gi->hb_list->first, + struct gfs2_glock, + gl_list); + } + } else { + if (gi->gl->gl_list.next == NULL) { + read_unlock(gl_lock_addr(gi->hash)); + gi->hash++; + read_lock(gl_lock_addr(gi->hash)); + gi->hb_list = NULL; + continue; + } + gi->gl = list_entry(gi->gl->gl_list.next, + struct gfs2_glock, gl_list); + } + if (gi->gl) + break; + } + read_unlock(gl_lock_addr(gi->hash)); + return 0; +} + +static void gfs2_glock_iter_free(struct glock_iter *gi) +{ + kfree(gi); +} + +static struct glock_iter *gfs2_glock_iter_init(struct gfs2_sbd *sdp) +{ + struct glock_iter *gi; + + gi = kmalloc(sizeof (*gi), GFP_KERNEL); + if (!gi) + return NULL; + + gi->sdp = sdp; + gi->hash = 0; + gi->gl = NULL; + gi->hb_list = NULL; + gi->seq = NULL; + memset(gi->string, 0, sizeof(gi->string)); + + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + + return gi; +} + +static void *gfs2_glock_seq_start(struct seq_file *file, loff_t *pos) +{ + struct glock_iter *gi; + loff_t n = *pos; + + gi = gfs2_glock_iter_init(file->private); + if (!gi) + return NULL; + + while (n--) { + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + } + + return gi; +} + +static void *gfs2_glock_seq_next(struct seq_file *file, void *iter_ptr, + loff_t *pos) +{ + struct glock_iter *gi = iter_ptr; + + (*pos)++; + + if (gfs2_glock_iter_next(gi)) { + gfs2_glock_iter_free(gi); + return NULL; + } + + return gi; +} + +static void gfs2_glock_seq_stop(struct seq_file *file, void *iter_ptr) +{ + /* nothing for now */ +} + +static int gfs2_glock_seq_show(struct seq_file *file, void *iter_ptr) +{ + struct glock_iter *gi = iter_ptr; + + gi->seq = file; + dump_glock(gi, gi->gl); + + return 0; +} + +static struct seq_operations gfs2_glock_seq_ops = { + .start = gfs2_glock_seq_start, + .next = gfs2_glock_seq_next, + .stop = gfs2_glock_seq_stop, + .show = gfs2_glock_seq_show, +}; + +static int gfs2_debugfs_open(struct inode *inode, struct file *file) +{ + struct seq_file *seq; + int ret; + + ret = seq_open(file, &gfs2_glock_seq_ops); + if (ret) + return ret; + + seq = file->private_data; + seq->private = inode->i_private; + + return 0; +} + +static const struct file_operations gfs2_debug_fops = { + .owner = THIS_MODULE, + .open = gfs2_debugfs_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +int gfs2_create_debugfs_file(struct gfs2_sbd *sdp) +{ + sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root); + if (!sdp->debugfs_dir) + return -ENOMEM; + sdp->debugfs_dentry_glocks = debugfs_create_file("glocks", + S_IFREG | S_IRUGO, + sdp->debugfs_dir, sdp, + &gfs2_debug_fops); + if (!sdp->debugfs_dentry_glocks) + return -ENOMEM; + + return 0; +} + +void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp) +{ + if (sdp && sdp->debugfs_dir) { + if (sdp->debugfs_dentry_glocks) { + debugfs_remove(sdp->debugfs_dentry_glocks); + sdp->debugfs_dentry_glocks = NULL; + } + debugfs_remove(sdp->debugfs_dir); + sdp->debugfs_dir = NULL; + } +} + +int gfs2_register_debugfs(void) +{ + gfs2_root = debugfs_create_dir("gfs2", NULL); + return gfs2_root ? 0 : -ENOMEM; +} + +void gfs2_unregister_debugfs(void) +{ + debugfs_remove(gfs2_root); + gfs2_root = NULL; +} diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index f50e40ceca43..11477ca3a3c0 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -38,7 +38,7 @@ static inline int gfs2_glock_is_locked_by_me(struct gfs2_glock *gl) /* Look in glock's list of holders for one with current task as owner */ spin_lock(&gl->gl_spin); list_for_each_entry(gh, &gl->gl_holders, gh_list) { - if (gh->gh_owner == current) { + if (gh->gh_owner_pid == current->pid) { locked = 1; break; } @@ -67,7 +67,7 @@ static inline int gfs2_glock_is_blocking(struct gfs2_glock *gl) { int ret; spin_lock(&gl->gl_spin); - ret = !list_empty(&gl->gl_waiters2) || !list_empty(&gl->gl_waiters3); + ret = test_bit(GLF_DEMOTE, &gl->gl_flags) || !list_empty(&gl->gl_waiters3); spin_unlock(&gl->gl_spin); return ret; } @@ -135,5 +135,9 @@ void gfs2_scand_internal(struct gfs2_sbd *sdp); void gfs2_gl_hash_clear(struct gfs2_sbd *sdp, int wait); int __init gfs2_glock_init(void); +int gfs2_create_debugfs_file(struct gfs2_sbd *sdp); +void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp); +int gfs2_register_debugfs(void); +void gfs2_unregister_debugfs(void); #endif /* __GLOCK_DOT_H__ */ diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 49f0dbf40d86..d995441373ab 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -115,11 +115,8 @@ enum { /* Actions */ HIF_MUTEX = 0, HIF_PROMOTE = 1, - HIF_DEMOTE = 2, /* States */ - HIF_ALLOCED = 4, - HIF_DEALLOC = 5, HIF_HOLDER = 6, HIF_FIRST = 7, HIF_ABORTED = 9, @@ -130,7 +127,7 @@ struct gfs2_holder { struct list_head gh_list; struct gfs2_glock *gh_gl; - struct task_struct *gh_owner; + pid_t gh_owner_pid; unsigned int gh_state; unsigned gh_flags; @@ -142,8 +139,8 @@ struct gfs2_holder { enum { GLF_LOCK = 1, GLF_STICKY = 2, + GLF_DEMOTE = 3, GLF_DIRTY = 5, - GLF_SKIP_WAITERS2 = 6, }; struct gfs2_glock { @@ -156,11 +153,12 @@ struct gfs2_glock { unsigned int gl_state; unsigned int gl_hash; - struct task_struct *gl_owner; + unsigned int gl_demote_state; /* state requested by remote node */ + unsigned long gl_demote_time; /* time of first demote request */ + pid_t gl_owner_pid; unsigned long gl_ip; struct list_head gl_holders; struct list_head gl_waiters1; /* HIF_MUTEX */ - struct list_head gl_waiters2; /* HIF_DEMOTE */ struct list_head gl_waiters3; /* HIF_PROMOTE */ const struct gfs2_glock_operations *gl_ops; @@ -611,6 +609,8 @@ struct gfs2_sbd { unsigned long sd_last_warning; struct vfsmount *sd_gfs2mnt; + struct dentry *debugfs_dir; /* debugfs directory */ + struct dentry *debugfs_dentry_glocks; /* for debugfs */ }; #endif /* __INCORE_DOT_H__ */ diff --git a/fs/gfs2/locking/dlm/lock.c b/fs/gfs2/locking/dlm/lock.c index b167addf9fd1..c305255bfe8a 100644 --- a/fs/gfs2/locking/dlm/lock.c +++ b/fs/gfs2/locking/dlm/lock.c @@ -151,7 +151,7 @@ static inline unsigned int make_flags(struct gdlm_lock *lp, /* make_strname - convert GFS lock numbers to a string */ -static inline void make_strname(struct lm_lockname *lockname, +static inline void make_strname(const struct lm_lockname *lockname, struct gdlm_strname *str) { sprintf(str->name, "%8x%16llx", lockname->ln_type, @@ -169,6 +169,7 @@ static int gdlm_create_lp(struct gdlm_ls *ls, struct lm_lockname *name, return -ENOMEM; lp->lockname = *name; + make_strname(name, &lp->strname); lp->ls = ls; lp->cur = DLM_LOCK_IV; lp->lvb = NULL; @@ -227,7 +228,6 @@ void gdlm_put_lock(void *lock) unsigned int gdlm_do_lock(struct gdlm_lock *lp) { struct gdlm_ls *ls = lp->ls; - struct gdlm_strname str; int error, bast = 1; /* @@ -249,8 +249,6 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) if (test_bit(LFL_NOBAST, &lp->flags)) bast = 0; - make_strname(&lp->lockname, &str); - set_bit(LFL_ACTIVE, &lp->flags); log_debug("lk %x,%llx id %x %d,%d %x", lp->lockname.ln_type, @@ -258,8 +256,8 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) lp->cur, lp->req, lp->lkf); error = dlm_lock(ls->dlm_lockspace, lp->req, &lp->lksb, lp->lkf, - str.name, str.namelen, 0, gdlm_ast, lp, - bast ? gdlm_bast : NULL); + lp->strname.name, lp->strname.namelen, 0, gdlm_ast, + lp, bast ? gdlm_bast : NULL); if ((error == -EAGAIN) && (lp->lkf & DLM_LKF_NOQUEUE)) { lp->lksb.sb_status = -EAGAIN; @@ -268,7 +266,7 @@ unsigned int gdlm_do_lock(struct gdlm_lock *lp) } if (error) { - log_debug("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x " + log_error("%s: gdlm_lock %x,%llx err=%d cur=%d req=%d lkf=%x " "flags=%lx", ls->fsname, lp->lockname.ln_type, (unsigned long long)lp->lockname.ln_number, error, lp->cur, lp->req, lp->lkf, lp->flags); @@ -296,7 +294,7 @@ static unsigned int gdlm_do_unlock(struct gdlm_lock *lp) error = dlm_unlock(ls->dlm_lockspace, lp->lksb.sb_lkid, lkf, NULL, lp); if (error) { - log_debug("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x " + log_error("%s: gdlm_unlock %x,%llx err=%d cur=%d req=%d lkf=%x " "flags=%lx", ls->fsname, lp->lockname.ln_type, (unsigned long long)lp->lockname.ln_number, error, lp->cur, lp->req, lp->lkf, lp->flags); diff --git a/fs/gfs2/locking/dlm/lock_dlm.h b/fs/gfs2/locking/dlm/lock_dlm.h index a87c7bf3c568..d074c6e6f9bf 100644 --- a/fs/gfs2/locking/dlm/lock_dlm.h +++ b/fs/gfs2/locking/dlm/lock_dlm.h @@ -36,7 +36,7 @@ #define GDLM_STRNAME_BYTES 24 #define GDLM_LVB_SIZE 32 -#define GDLM_DROP_COUNT 200000 +#define GDLM_DROP_COUNT 0 #define GDLM_DROP_PERIOD 60 #define GDLM_NAME_LEN 128 @@ -106,6 +106,7 @@ enum { struct gdlm_lock { struct gdlm_ls *ls; struct lm_lockname lockname; + struct gdlm_strname strname; char *lvb; struct dlm_lksb lksb; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 16bb4b4561ae..f82d84d05d23 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -33,16 +33,17 @@ static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) tr->tr_touched = 1; - if (!list_empty(&le->le_list)) - return; - gl = container_of(le, struct gfs2_glock, gl_le); if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) return; - gfs2_glock_hold(gl); - set_bit(GLF_DIRTY, &gl->gl_flags); gfs2_log_lock(sdp); + if (!list_empty(&le->le_list)){ + gfs2_log_unlock(sdp); + return; + } + gfs2_glock_hold(gl); + set_bit(GLF_DIRTY, &gl->gl_flags); sdp->sd_log_num_gl++; list_add(&le->le_list, &sdp->sd_log_le_gl); gfs2_log_unlock(sdp); @@ -415,13 +416,14 @@ static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) tr->tr_touched = 1; - if (!list_empty(&le->le_list)) - return; - rgd = container_of(le, struct gfs2_rgrpd, rd_le); - gfs2_rgrp_bh_hold(rgd); gfs2_log_lock(sdp); + if (!list_empty(&le->le_list)){ + gfs2_log_unlock(sdp); + return; + } + gfs2_rgrp_bh_hold(rgd); sdp->sd_log_num_rg++; list_add(&le->le_list, &sdp->sd_log_le_rg); gfs2_log_unlock(sdp); diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index e2bffae683cc..e460487c0557 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -43,7 +43,6 @@ static void gfs2_init_glock_once(void *foo, struct kmem_cache *cachep, unsigned spin_lock_init(&gl->gl_spin); INIT_LIST_HEAD(&gl->gl_holders); INIT_LIST_HEAD(&gl->gl_waiters1); - INIT_LIST_HEAD(&gl->gl_waiters2); INIT_LIST_HEAD(&gl->gl_waiters3); gl->gl_lvb = NULL; atomic_set(&gl->gl_lvb_count, 0); @@ -101,6 +100,8 @@ static int __init init_gfs2_fs(void) if (error) goto fail_unregister; + gfs2_register_debugfs(); + printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); return 0; @@ -128,6 +129,7 @@ fail: static void __exit exit_gfs2_fs(void) { + gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index 32caecd20300..4864659555d4 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -13,6 +13,7 @@ #include <linux/buffer_head.h> #include <linux/gfs2_ondisk.h> #include <linux/lm_interface.h> +#include <linux/parser.h> #include "gfs2.h" #include "incore.h" @@ -20,6 +21,52 @@ #include "sys.h" #include "util.h" +enum { + Opt_lockproto, + Opt_locktable, + Opt_hostdata, + Opt_spectator, + Opt_ignore_local_fs, + Opt_localflocks, + Opt_localcaching, + Opt_debug, + Opt_nodebug, + Opt_upgrade, + Opt_num_glockd, + Opt_acl, + Opt_noacl, + Opt_quota_off, + Opt_quota_account, + Opt_quota_on, + Opt_suiddir, + Opt_nosuiddir, + Opt_data_writeback, + Opt_data_ordered, +}; + +static match_table_t tokens = { + {Opt_lockproto, "lockproto=%s"}, + {Opt_locktable, "locktable=%s"}, + {Opt_hostdata, "hostdata=%s"}, + {Opt_spectator, "spectator"}, + {Opt_ignore_local_fs, "ignore_local_fs"}, + {Opt_localflocks, "localflocks"}, + {Opt_localcaching, "localcaching"}, + {Opt_debug, "debug"}, + {Opt_nodebug, "nodebug"}, + {Opt_upgrade, "upgrade"}, + {Opt_num_glockd, "num_glockd=%d"}, + {Opt_acl, "acl"}, + {Opt_noacl, "noacl"}, + {Opt_quota_off, "quota=off"}, + {Opt_quota_account, "quota=account"}, + {Opt_quota_on, "quota=on"}, + {Opt_suiddir, "suiddir"}, + {Opt_nosuiddir, "nosuiddir"}, + {Opt_data_writeback, "data=writeback"}, + {Opt_data_ordered, "data=ordered"} +}; + /** * gfs2_mount_args - Parse mount options * @sdp: @@ -54,146 +101,150 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) process them */ for (options = data; (o = strsep(&options, ",")); ) { + int token, option; + substring_t tmp[MAX_OPT_ARGS]; + if (!*o) continue; - v = strchr(o, '='); - if (v) - *v++ = 0; + token = match_token(o, tokens, tmp); + switch (token) { + case Opt_lockproto: + v = match_strdup(&tmp[0]); + if (!v) { + fs_info(sdp, "no memory for lockproto\n"); + error = -ENOMEM; + goto out_error; + } - if (!strcmp(o, "lockproto")) { - if (!v) - goto need_value; - if (remount && strcmp(v, args->ar_lockproto)) + if (remount && strcmp(v, args->ar_lockproto)) { + kfree(v); goto cant_remount; + } + strncpy(args->ar_lockproto, v, GFS2_LOCKNAME_LEN); args->ar_lockproto[GFS2_LOCKNAME_LEN - 1] = 0; - } + kfree(v); + break; + case Opt_locktable: + v = match_strdup(&tmp[0]); + if (!v) { + fs_info(sdp, "no memory for locktable\n"); + error = -ENOMEM; + goto out_error; + } - else if (!strcmp(o, "locktable")) { - if (!v) - goto need_value; - if (remount && strcmp(v, args->ar_locktable)) + if (remount && strcmp(v, args->ar_locktable)) { + kfree(v); goto cant_remount; + } + strncpy(args->ar_locktable, v, GFS2_LOCKNAME_LEN); - args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0; - } + args->ar_locktable[GFS2_LOCKNAME_LEN - 1] = 0; + kfree(v); + break; + case Opt_hostdata: + v = match_strdup(&tmp[0]); + if (!v) { + fs_info(sdp, "no memory for hostdata\n"); + error = -ENOMEM; + goto out_error; + } - else if (!strcmp(o, "hostdata")) { - if (!v) - goto need_value; - if (remount && strcmp(v, args->ar_hostdata)) + if (remount && strcmp(v, args->ar_hostdata)) { + kfree(v); goto cant_remount; + } + strncpy(args->ar_hostdata, v, GFS2_LOCKNAME_LEN); args->ar_hostdata[GFS2_LOCKNAME_LEN - 1] = 0; - } - - else if (!strcmp(o, "spectator")) { + kfree(v); + break; + case Opt_spectator: if (remount && !args->ar_spectator) goto cant_remount; args->ar_spectator = 1; sdp->sd_vfs->s_flags |= MS_RDONLY; - } - - else if (!strcmp(o, "ignore_local_fs")) { + break; + case Opt_ignore_local_fs: if (remount && !args->ar_ignore_local_fs) goto cant_remount; args->ar_ignore_local_fs = 1; - } - - else if (!strcmp(o, "localflocks")) { + break; + case Opt_localflocks: if (remount && !args->ar_localflocks) goto cant_remount; args->ar_localflocks = 1; - } - - else if (!strcmp(o, "localcaching")) { + break; + case Opt_localcaching: if (remount && !args->ar_localcaching) goto cant_remount; args->ar_localcaching = 1; - } - - else if (!strcmp(o, "debug")) + break; + case Opt_debug: args->ar_debug = 1; - - else if (!strcmp(o, "nodebug")) + break; + case Opt_nodebug: args->ar_debug = 0; - - else if (!strcmp(o, "upgrade")) { + break; + case Opt_upgrade: if (remount && !args->ar_upgrade) goto cant_remount; args->ar_upgrade = 1; - } + break; + case Opt_num_glockd: + if ((error = match_int(&tmp[0], &option))) { + fs_info(sdp, "problem getting num_glockd\n"); + goto out_error; + } - else if (!strcmp(o, "num_glockd")) { - unsigned int x; - if (!v) - goto need_value; - sscanf(v, "%u", &x); - if (remount && x != args->ar_num_glockd) + if (remount && option != args->ar_num_glockd) goto cant_remount; - if (!x || x > GFS2_GLOCKD_MAX) { + if (!option || option > GFS2_GLOCKD_MAX) { fs_info(sdp, "0 < num_glockd <= %u (not %u)\n", - GFS2_GLOCKD_MAX, x); + GFS2_GLOCKD_MAX, option); error = -EINVAL; - break; + goto out_error; } - args->ar_num_glockd = x; - } - - else if (!strcmp(o, "acl")) { + args->ar_num_glockd = option; + break; + case Opt_acl: args->ar_posix_acl = 1; sdp->sd_vfs->s_flags |= MS_POSIXACL; - } - - else if (!strcmp(o, "noacl")) { + break; + case Opt_noacl: args->ar_posix_acl = 0; sdp->sd_vfs->s_flags &= ~MS_POSIXACL; - } - - else if (!strcmp(o, "quota")) { - if (!v) - goto need_value; - if (!strcmp(v, "off")) - args->ar_quota = GFS2_QUOTA_OFF; - else if (!strcmp(v, "account")) - args->ar_quota = GFS2_QUOTA_ACCOUNT; - else if (!strcmp(v, "on")) - args->ar_quota = GFS2_QUOTA_ON; - else { - fs_info(sdp, "invalid value for quota\n"); - error = -EINVAL; - break; - } - } - - else if (!strcmp(o, "suiddir")) + break; + case Opt_quota_off: + args->ar_quota = GFS2_QUOTA_OFF; + break; + case Opt_quota_account: + args->ar_quota = GFS2_QUOTA_ACCOUNT; + break; + case Opt_quota_on: + args->ar_quota = GFS2_QUOTA_ON; + break; + case Opt_suiddir: args->ar_suiddir = 1; - - else if (!strcmp(o, "nosuiddir")) + break; + case Opt_nosuiddir: args->ar_suiddir = 0; - - else if (!strcmp(o, "data")) { - if (!v) - goto need_value; - if (!strcmp(v, "writeback")) - args->ar_data = GFS2_DATA_WRITEBACK; - else if (!strcmp(v, "ordered")) - args->ar_data = GFS2_DATA_ORDERED; - else { - fs_info(sdp, "invalid value for data\n"); - error = -EINVAL; - break; - } - } - - else { + break; + case Opt_data_writeback: + args->ar_data = GFS2_DATA_WRITEBACK; + break; + case Opt_data_ordered: + args->ar_data = GFS2_DATA_ORDERED; + break; + default: fs_info(sdp, "unknown option: %s\n", o); error = -EINVAL; - break; + goto out_error; } } +out_error: if (error) fs_info(sdp, "invalid mount option(s)\n"); @@ -202,10 +253,6 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, char *data_arg, int remount) return error; -need_value: - fs_info(sdp, "need value for option %s\n", o); - return -EINVAL; - cant_remount: fs_info(sdp, "can't remount with option %s\n", o); return -EINVAL; diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index b3b7e8475359..30c15622174f 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -197,7 +197,19 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) void *kaddr; int error; - BUG_ON(page->index); + /* + * Due to the order of unstuffing files and ->nopage(), we can be + * asked for a zero page in the case of a stuffed file being extended, + * so we need to supply one here. It doesn't happen often. + */ + if (unlikely(page->index)) { + kaddr = kmap_atomic(page, KM_USER0); + memset(kaddr, 0, PAGE_CACHE_SIZE); + kunmap_atomic(kaddr, KM_USER0); + flush_dcache_page(page); + SetPageUptodate(page); + return 0; + } error = gfs2_meta_inode_buffer(ip, &dibh); if (error) @@ -208,9 +220,8 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page) ip->i_di.di_size); memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size); kunmap_atomic(kaddr, KM_USER0); - + flush_dcache_page(page); brelse(dibh); - SetPageUptodate(page); return 0; @@ -507,7 +518,9 @@ static int gfs2_commit_write(struct file *file, struct page *page, gfs2_quota_unlock(ip); gfs2_alloc_put(ip); } + unlock_page(page); gfs2_glock_dq_m(1, &ip->i_gh); + lock_page(page); gfs2_holder_uninit(&ip->i_gh); return 0; @@ -520,7 +533,9 @@ fail_endtrans: gfs2_quota_unlock(ip); gfs2_alloc_put(ip); } + unlock_page(page); gfs2_glock_dq_m(1, &ip->i_gh); + lock_page(page); gfs2_holder_uninit(&ip->i_gh); fail_nounlock: ClearPageUptodate(page); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index ee54cb667083..2c5f8e7def0d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -690,6 +690,8 @@ static int fill_super(struct super_block *sb, void *data, int silent) if (error) goto fail; + gfs2_create_debugfs_file(sdp); + error = gfs2_sys_fs_add(sdp); if (error) goto fail; @@ -754,6 +756,7 @@ fail_lm: fail_sys: gfs2_sys_fs_del(sdp); fail: + gfs2_delete_debugfs_file(sdp); kfree(sdp); sb->s_fs_info = NULL; return error; @@ -896,6 +899,7 @@ error: static void gfs2_kill_sb(struct super_block *sb) { + gfs2_delete_debugfs_file(sb->s_fs_info); kill_block_super(sb); } diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index b89999d3a767..485ce3d49923 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -284,6 +284,31 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) } /** + * gfs2_drop_inode - Drop an inode (test for remote unlink) + * @inode: The inode to drop + * + * If we've received a callback on an iopen lock then its because a + * remote node tried to deallocate the inode but failed due to this node + * still having the inode open. Here we mark the link count zero + * since we know that it must have reached zero if the GLF_DEMOTE flag + * is set on the iopen glock. If we didn't do a disk read since the + * remote node removed the final link then we might otherwise miss + * this event. This check ensures that this node will deallocate the + * inode's blocks, or alternatively pass the baton on to another + * node for later deallocation. + */ +static void gfs2_drop_inode(struct inode *inode) +{ + if (inode->i_private && inode->i_nlink) { + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl; + if (gl && test_bit(GLF_DEMOTE, &gl->gl_flags)) + clear_nlink(inode); + } + generic_drop_inode(inode); +} + +/** * gfs2_clear_inode - Deallocate an inode when VFS is done with it * @inode: The VFS inode * @@ -441,7 +466,7 @@ out_unlock: out_uninit: gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); - if (error) + if (error && error != GLR_TRYFAILED) fs_warn(sdp, "gfs2_delete_inode: %d\n", error); out: truncate_inode_pages(&inode->i_data, 0); @@ -481,6 +506,7 @@ const struct super_operations gfs2_super_ops = { .statfs = gfs2_statfs, .remount_fs = gfs2_remount_fs, .clear_inode = gfs2_clear_inode, + .drop_inode = gfs2_drop_inode, .show_options = gfs2_show_options, }; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 8d9c08b5c4b6..1727f5012efe 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -27,6 +27,7 @@ #include "trans.h" #include "ops_file.h" #include "util.h" +#include "log.h" #define BFITNOENT ((u32)~0) @@ -697,8 +698,6 @@ struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) * @al: the struct gfs2_alloc structure describing the reservation * * If there's room for the requested blocks to be allocated from the RG: - * Sets the $al_reserved_data field in @al. - * Sets the $al_reserved_meta field in @al. * Sets the $al_rgd field in @al. * * Returns: 1 on success (it fits), 0 on failure (it doesn't fit) @@ -709,6 +708,9 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) struct gfs2_sbd *sdp = rgd->rd_sbd; int ret = 0; + if (rgd->rd_rg.rg_flags & GFS2_RGF_NOALLOC) + return 0; + spin_lock(&sdp->sd_rindex_spin); if (rgd->rd_free_clone >= al->al_requested) { al->al_rgd = rgd; @@ -941,9 +943,13 @@ static int get_local_rgrp(struct gfs2_inode *ip) rgd = gfs2_rgrpd_get_first(sdp); if (rgd == begin) { - if (++loops >= 2 || !skipped) + if (++loops >= 3) return -ENOSPC; + if (!skipped) + loops++; flags = 0; + if (loops == 2) + gfs2_log_flush(sdp, NULL); } } |