From 5e64b0d9e86ffff8b299556341d85319117539e9 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 7 Sep 2010 13:30:05 +0800 Subject: ocfs2/lockdep: Move ip_xattr_sem out of ocfs2_xattr_get_nolock. As the name shows, we shouldn't have any lock in ocfs2_xattr_get_nolock. so lift ip_xattr_sem to the caller. This should be safe for us since the only 2 callers are: 1. ocfs2_xattr_get which will lock the resources. 2. ocfs2_mknod which don't need this locking. And this also resolves the following lockdep warning. ======================================================= [ INFO: possible circular locking dependency detected ] 2.6.35+ #5 ------------------------------------------------------- reflink/30027 is trying to acquire lock: (&oi->ip_alloc_sem){+.+.+.}, at: [] ocfs2_reflink_ioctl+0x69a/0x1226 [ocfs2] but task is already holding lock: (&oi->ip_xattr_sem){++++..}, at: [] ocfs2_reflink_ioctl+0x68b/0x1226 [ocfs2] which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&oi->ip_xattr_sem){++++..}: [] __lock_acquire+0x79a/0x7f1 [] lock_acquire+0xc6/0xed [] down_read+0x34/0x47 [] ocfs2_xattr_get_nolock+0xa0/0x4e6 [ocfs2] [] ocfs2_get_acl_nolock+0x5c/0x132 [ocfs2] [] ocfs2_init_acl+0x60/0x243 [ocfs2] [] ocfs2_mknod+0xae8/0xfea [ocfs2] [] ocfs2_create+0x9d/0x105 [ocfs2] [] vfs_create+0x9b/0xf4 [] do_last+0x2fd/0x5be [] do_filp_open+0x1fb/0x572 [] do_sys_open+0x5a/0xe7 [] sys_open+0x1b/0x1d [] system_call_fastpath+0x16/0x1b -> #2 (jbd2_handle){+.+...}: [] __lock_acquire+0x79a/0x7f1 [] lock_acquire+0xc6/0xed [] start_this_handle+0x4a3/0x4bc [jbd2] [] jbd2__journal_start+0xba/0xee [jbd2] [] jbd2_journal_start+0xe/0x10 [jbd2] [] ocfs2_start_trans+0xb7/0x19b [ocfs2] [] ocfs2_mknod+0x73e/0xfea [ocfs2] [] ocfs2_create+0x9d/0x105 [ocfs2] [] vfs_create+0x9b/0xf4 [] do_last+0x2fd/0x5be [] do_filp_open+0x1fb/0x572 [] do_sys_open+0x5a/0xe7 [] sys_open+0x1b/0x1d [] system_call_fastpath+0x16/0x1b -> #1 (&journal->j_trans_barrier){.+.+..}: [] __lock_acquire+0x79a/0x7f1 [] lock_release_non_nested+0x1e5/0x24b [] lock_release+0x158/0x17a [] __mutex_unlock_slowpath+0xbf/0x11b [] mutex_unlock+0x9/0xb [] ocfs2_free_ac_resource+0x31/0x67 [ocfs2] [] ocfs2_free_alloc_context+0x11/0x1d [ocfs2] [] ocfs2_write_begin_nolock+0x141e/0x159b [ocfs2] [] ocfs2_write_begin+0x11e/0x1e7 [ocfs2] [] generic_file_buffered_write+0x10c/0x210 [] ocfs2_file_aio_write+0x4cc/0x6d3 [ocfs2] [] do_sync_write+0xc2/0x106 [] vfs_write+0xae/0x131 [] sys_write+0x47/0x6f [] system_call_fastpath+0x16/0x1b -> #0 (&oi->ip_alloc_sem){+.+.+.}: [] validate_chain+0x727/0xd68 [] __lock_acquire+0x79a/0x7f1 [] lock_acquire+0xc6/0xed [] down_write+0x31/0x52 [] ocfs2_reflink_ioctl+0x69a/0x1226 [ocfs2] [] ocfs2_ioctl+0x61a/0x656 [ocfs2] [] vfs_ioctl+0x2a/0x9d [] do_vfs_ioctl+0x45d/0x4ae [] sys_ioctl+0x57/0x7a [] system_call_fastpath+0x16/0x1b Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/xattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index d03469f61801..06fa5e77c40e 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -1286,13 +1286,11 @@ int ocfs2_xattr_get_nolock(struct inode *inode, xis.inode_bh = xbs.inode_bh = di_bh; di = (struct ocfs2_dinode *)di_bh->b_data; - down_read(&oi->ip_xattr_sem); ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer, buffer_size, &xis); if (ret == -ENODATA && di->i_xattr_loc) ret = ocfs2_xattr_block_get(inode, name_index, name, buffer, buffer_size, &xbs); - up_read(&oi->ip_xattr_sem); return ret; } @@ -1316,8 +1314,10 @@ static int ocfs2_xattr_get(struct inode *inode, mlog_errno(ret); return ret; } + down_read(&OCFS2_I(inode)->ip_xattr_sem); ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index, name, buffer, buffer_size); + up_read(&OCFS2_I(inode)->ip_xattr_sem); ocfs2_inode_unlock(inode, 0); -- cgit v1.2.1 From 07eaac9438b13ec0b863111698b91ccec8f3b8d4 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Tue, 7 Sep 2010 13:30:06 +0800 Subject: ocfs2: Fix lockdep warning in reflink. This patch change mutex_lock to a new subclass and add a new inode lock subclass for the target inode which caused this lockdep warning. ============================================= [ INFO: possible recursive locking detected ] 2.6.35+ #5 --------------------------------------------- reflink/11086 is trying to acquire lock: (Meta){+++++.}, at: [] ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] but task is already holding lock: (Meta){+++++.}, at: [] ocfs2_reflink_ioctl+0x5d3/0x1229 [ocfs2] other info that might help us debug this: 6 locks held by reflink/11086: #0: (&sb->s_type->i_mutex_key#15/1){+.+.+.}, at: [] lookup_create+0x26/0x97 #1: (&sb->s_type->i_mutex_key#15){+.+.+.}, at: [] ocfs2_reflink_ioctl+0x4d3/0x1229 [ocfs2] #2: (Meta){+++++.}, at: [] ocfs2_reflink_ioctl+0x5d3/0x1229 [ocfs2] #3: (&oi->ip_xattr_sem){+.+.+.}, at: [] ocfs2_reflink_ioctl+0x68b/0x1229 [ocfs2] #4: (&oi->ip_alloc_sem){+.+.+.}, at: [] ocfs2_reflink_ioctl+0x69a/0x1229 [ocfs2] #5: (&sb->s_type->i_mutex_key#15/2){+.+...}, at: [] ocfs2_reflink_ioctl+0x882/0x1229 [ocfs2] stack backtrace: Pid: 11086, comm: reflink Not tainted 2.6.35+ #5 Call Trace: [] validate_chain+0x56e/0xd68 [] ? mark_held_locks+0x49/0x69 [] __lock_acquire+0x79a/0x7f1 [] lock_acquire+0xc6/0xed [] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] [] __ocfs2_cluster_lock+0x975/0xa0d [ocfs2] [] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] [] ? ocfs2_wait_for_recovery+0x15/0x8a [ocfs2] [] ocfs2_inode_lock_full_nested+0x1ac/0xdc5 [ocfs2] [] ? ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] [] ? trace_hardirqs_on_caller+0x10b/0x12f [] ? debug_mutex_free_waiter+0x4f/0x53 [] ocfs2_reflink_ioctl+0x898/0x1229 [ocfs2] [] ? ocfs2_file_lock_res_init+0x66/0x78 [ocfs2] [] ? might_fault+0x40/0x8d [] ocfs2_ioctl+0x61a/0x656 [ocfs2] [] ? mntput_no_expire+0x1d/0xb0 [] ? path_put+0x2c/0x31 [] vfs_ioctl+0x2a/0x9d [] do_vfs_ioctl+0x45d/0x4ae [] ? _raw_spin_unlock+0x26/0x2a [] ? sysret_check+0x27/0x62 [] sys_ioctl+0x57/0x7a [] system_call_fastpath+0x16/0x1b Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/dlmglue.h | 1 + fs/ocfs2/refcounttree.c | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlmglue.h b/fs/ocfs2/dlmglue.h index d1ce48e1b3d6..1d596d8c4a4a 100644 --- a/fs/ocfs2/dlmglue.h +++ b/fs/ocfs2/dlmglue.h @@ -84,6 +84,7 @@ enum { OI_LS_PARENT, OI_LS_RENAME1, OI_LS_RENAME2, + OI_LS_REFLINK_TARGET, }; int ocfs2_dlm_init(struct ocfs2_super *osb); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 0afeda83120f..efdd75607406 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4201,8 +4201,9 @@ static int __ocfs2_reflink(struct dentry *old_dentry, goto out; } - mutex_lock(&new_inode->i_mutex); - ret = ocfs2_inode_lock(new_inode, &new_bh, 1); + mutex_lock_nested(&new_inode->i_mutex, I_MUTEX_CHILD); + ret = ocfs2_inode_lock_nested(new_inode, &new_bh, 1, + OI_LS_REFLINK_TARGET); if (ret) { mlog_errno(ret); goto out_unlock; -- cgit v1.2.1 From 0f4da216b8c3c35c90ecd18e1899c6f125957c2b Mon Sep 17 00:00:00 2001 From: Tristan Ye Date: Wed, 8 Sep 2010 17:12:38 +0800 Subject: Ocfs2: Re-access the journal after ocfs2_insert_extent() in dxdir codes. In ocfs2_dx_dir_rebalance(), we need to rejournal_acess the blocks after calling ocfs2_insert_extent() since growing an extent tree may trigger ocfs2_extend_trans(), which makes previous journal_access meaningless. Signed-off-by: Tristan Ye Signed-off-by: Joel Becker --- fs/ocfs2/dir.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index f04ebcfffc4a..c49f6de0e7ab 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -3931,6 +3931,15 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, goto out_commit; } + cpos = split_hash; + ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, + data_ac, meta_ac, new_dx_leaves, + num_dx_leaves); + if (ret) { + mlog_errno(ret); + goto out_commit; + } + for (i = 0; i < num_dx_leaves; i++) { ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), orig_dx_leaves[i], @@ -3939,15 +3948,14 @@ static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir, mlog_errno(ret); goto out_commit; } - } - cpos = split_hash; - ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle, - data_ac, meta_ac, new_dx_leaves, - num_dx_leaves); - if (ret) { - mlog_errno(ret); - goto out_commit; + ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), + new_dx_leaves[i], + OCFS2_JOURNAL_ACCESS_WRITE); + if (ret) { + mlog_errno(ret); + goto out_commit; + } } ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf, -- cgit v1.2.1 From 228ac6357718df2d5c8d70210fa51b2225aab5ee Mon Sep 17 00:00:00 2001 From: Tristan Ye Date: Fri, 10 Sep 2010 10:16:33 +0800 Subject: Ocfs2: Handle empty list in lockres_seq_start() for dlmdebug.c This patch tries to handle the case in which list 'dlm->tracking_list' is empty, to avoid accessing an invalid pointer. It fixes the following oops: http://oss.oracle.com/bugzilla/show_bug.cgi?id=1287 Signed-off-by: Tristan Ye Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmdebug.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 5efdd37dfe48..901ca52bf86b 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -636,8 +636,14 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) spin_lock(&dlm->track_lock); if (oldres) track_list = &oldres->tracking; - else + else { track_list = &dlm->tracking_list; + if (list_empty(track_list)) { + dl = NULL; + spin_unlock(&dlm->track_lock); + goto bail; + } + } list_for_each_entry(res, track_list, tracking) { if (&res->tracking == &dlm->tracking_list) @@ -660,6 +666,7 @@ static void *lockres_seq_start(struct seq_file *m, loff_t *pos) } else dl = NULL; +bail: /* passed to seq_show */ return dl; } -- cgit v1.2.1 From 50aff040363d31f87e94f38f1710973d99489951 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 21 Aug 2010 14:40:20 +0800 Subject: ocfs2/net: fix uninitialized ret in o2net_send_message_vec() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit mmotm/fs/ocfs2/cluster/tcp.c: In function ‘o2net_send_message_vec’: mmotm/fs/ocfs2/cluster/tcp.c:980:6: warning: ‘ret’ may be used uninitialized in this function It seems a real bug introduced by commit 9af0b38ff3 (ocfs2/net: Use wait_event() in o2net_send_message_vec()). cc: Sunil Mushran Signed-off-by: Wu Fengguang Signed-off-by: Joel Becker --- fs/ocfs2/cluster/tcp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 1361997cf205..cbe2f057cc28 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -977,7 +977,7 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, size_t caller_veclen, u8 target_node, int *status) { - int ret; + int ret = 0; struct o2net_msg *msg = NULL; size_t veclen, caller_bytes = 0; struct kvec *vec = NULL; -- cgit v1.2.1 From 12828061cdacfb1db3eb03fd71952d5ebc555bbb Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 13 Sep 2010 14:00:23 +0800 Subject: ocfs2: update ctime when changing the file's permission by setfacl In commit 30e2bab, ext3 fixed it. So change it accordingly in ocfs2. Steps to reproduce: # touch aaa # stat -c %Z aaa 1283760364 # setfacl -m 'u::x,g::x,o::x' aaa # stat -c %Z aaa 1283760364 Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/acl.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index a76e0aa5cd3f..391915093fe1 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -209,7 +209,10 @@ static int ocfs2_acl_set_mode(struct inode *inode, struct buffer_head *di_bh, } inode->i_mode = new_mode; + inode->i_ctime = CURRENT_TIME; di->i_mode = cpu_to_le16(inode->i_mode); + di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec); + di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec); ocfs2_journal_dirty(handle, di_bh); -- cgit v1.2.1 From 47dea423799d98c53793237ab386a94976f305d5 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Mon, 13 Sep 2010 15:13:50 +0800 Subject: ocfs2: Use cpu_to_le16 for e_leaf_clusters in ocfs2_bg_discontig_add_extent. e_leaf_clusters is a le16, so use cpu_to_le16 instead of cpu_to_le32. What's more, we change 'clusters' to unsigned int to signify that the size of 'clusters' isn't important here. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/suballoc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 8a286f54dca1..849c2f0e0a0e 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -357,7 +357,7 @@ out: static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, struct ocfs2_group_desc *bg, struct ocfs2_chain_list *cl, - u64 p_blkno, u32 clusters) + u64 p_blkno, unsigned int clusters) { struct ocfs2_extent_list *el = &bg->bg_list; struct ocfs2_extent_rec *rec; @@ -369,7 +369,7 @@ static void ocfs2_bg_discontig_add_extent(struct ocfs2_super *osb, rec->e_blkno = cpu_to_le64(p_blkno); rec->e_cpos = cpu_to_le32(le16_to_cpu(bg->bg_bits) / le16_to_cpu(cl->cl_bpc)); - rec->e_leaf_clusters = cpu_to_le32(clusters); + rec->e_leaf_clusters = cpu_to_le16(clusters); le16_add_cpu(&bg->bg_bits, clusters * le16_to_cpu(cl->cl_bpc)); le16_add_cpu(&bg->bg_free_bits_count, clusters * le16_to_cpu(cl->cl_bpc)); -- cgit v1.2.1 From 4a452de4fdfe4dbb27e491904d8bfaf1262bdff4 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 19 Sep 2010 13:42:28 +0800 Subject: ocfs2: Move 'wanted' into parens of ocfs2_resmap_resv_bits. The first time I read the function ocfs2_resmap_resv_bits, I consider about what 'wanted' will be used and consider about the comments. Then I find it is only used if the reservation is empty. ;) So we'd better move it to the parens so that it make the code more readable, what's more, ocfs2_resmap_resv_bits is used so frequently and we should save some cpus. Acked-by: Mark Fasheh Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/reservations.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index d8b6e4259b80..3e78db361bc7 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -732,25 +732,23 @@ int ocfs2_resmap_resv_bits(struct ocfs2_reservation_map *resmap, struct ocfs2_alloc_reservation *resv, int *cstart, int *clen) { - unsigned int wanted = *clen; - if (resv == NULL || ocfs2_resmap_disabled(resmap)) return -ENOSPC; spin_lock(&resv_lock); - /* - * We don't want to over-allocate for temporary - * windows. Otherwise, we run the risk of fragmenting the - * allocation space. - */ - wanted = ocfs2_resv_window_bits(resmap, resv); - if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) - wanted = *clen; - if (ocfs2_resv_empty(resv)) { - mlog(0, "empty reservation, find new window\n"); + /* + * We don't want to over-allocate for temporary + * windows. Otherwise, we run the risk of fragmenting the + * allocation space. + */ + unsigned int wanted = ocfs2_resv_window_bits(resmap, resv); + if ((resv->r_flags & OCFS2_RESV_FLAG_TMP) || wanted < *clen) + wanted = *clen; + + mlog(0, "empty reservation, find new window\n"); /* * Try to get a window here. If it works, we must fall * through and test the bitmap . This avoids some -- cgit v1.2.1 From 0000b862027d624ac564609b87c1aa4d14dd1e46 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Sun, 19 Sep 2010 13:42:29 +0800 Subject: ocfs2: Sync inode flags with ext2. We sync our inode flags with ext2 and define them by hex values. But actually in commit 3669567(4 years ago), all these values are moved to include/linux/fs.h. So we'd better also use them as what ext2 did. So sync our inode flags with ext2 by using FS_*. Signed-off-by: Tao Ma Signed-off-by: Joel Becker --- fs/ocfs2/ocfs2_fs.h | 37 +++++++++++++++++++++++++------------ fs/ocfs2/ocfs2_ioctl.h | 8 ++++---- 2 files changed, 29 insertions(+), 16 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index 33f1c9a8258d..fa31d05e41b7 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -235,18 +235,31 @@ #define OCFS2_HAS_REFCOUNT_FL (0x0010) /* Inode attributes, keep in sync with EXT2 */ -#define OCFS2_SECRM_FL (0x00000001) /* Secure deletion */ -#define OCFS2_UNRM_FL (0x00000002) /* Undelete */ -#define OCFS2_COMPR_FL (0x00000004) /* Compress file */ -#define OCFS2_SYNC_FL (0x00000008) /* Synchronous updates */ -#define OCFS2_IMMUTABLE_FL (0x00000010) /* Immutable file */ -#define OCFS2_APPEND_FL (0x00000020) /* writes to file may only append */ -#define OCFS2_NODUMP_FL (0x00000040) /* do not dump file */ -#define OCFS2_NOATIME_FL (0x00000080) /* do not update atime */ -#define OCFS2_DIRSYNC_FL (0x00010000) /* dirsync behaviour (directories only) */ - -#define OCFS2_FL_VISIBLE (0x000100FF) /* User visible flags */ -#define OCFS2_FL_MODIFIABLE (0x000100FF) /* User modifiable flags */ +#define OCFS2_SECRM_FL FS_SECRM_FL /* Secure deletion */ +#define OCFS2_UNRM_FL FS_UNRM_FL /* Undelete */ +#define OCFS2_COMPR_FL FS_COMPR_FL /* Compress file */ +#define OCFS2_SYNC_FL FS_SYNC_FL /* Synchronous updates */ +#define OCFS2_IMMUTABLE_FL FS_IMMUTABLE_FL /* Immutable file */ +#define OCFS2_APPEND_FL FS_APPEND_FL /* writes to file may only append */ +#define OCFS2_NODUMP_FL FS_NODUMP_FL /* do not dump file */ +#define OCFS2_NOATIME_FL FS_NOATIME_FL /* do not update atime */ +/* Reserved for compression usage... */ +#define OCFS2_DIRTY_FL FS_DIRTY_FL +#define OCFS2_COMPRBLK_FL FS_COMPRBLK_FL /* One or more compressed clusters */ +#define OCFS2_NOCOMP_FL FS_NOCOMP_FL /* Don't compress */ +#define OCFS2_ECOMPR_FL FS_ECOMPR_FL /* Compression error */ +/* End compression flags --- maybe not all used */ +#define OCFS2_BTREE_FL FS_BTREE_FL /* btree format dir */ +#define OCFS2_INDEX_FL FS_INDEX_FL /* hash-indexed directory */ +#define OCFS2_IMAGIC_FL FS_IMAGIC_FL /* AFS directory */ +#define OCFS2_JOURNAL_DATA_FL FS_JOURNAL_DATA_FL /* Reserved for ext3 */ +#define OCFS2_NOTAIL_FL FS_NOTAIL_FL /* file tail should not be merged */ +#define OCFS2_DIRSYNC_FL FS_DIRSYNC_FL /* dirsync behaviour (directories only) */ +#define OCFS2_TOPDIR_FL FS_TOPDIR_FL /* Top of directory hierarchies*/ +#define OCFS2_RESERVED_FL FS_RESERVED_FL /* reserved for ext2 lib */ + +#define OCFS2_FL_VISIBLE FS_FL_USER_VISIBLE /* User visible flags */ +#define OCFS2_FL_MODIFIABLE FS_FL_USER_MODIFIABLE /* User modifiable flags */ /* * Extent record flags (e_node.leaf.flags) diff --git a/fs/ocfs2/ocfs2_ioctl.h b/fs/ocfs2/ocfs2_ioctl.h index 2d3420af1a83..5d241505690b 100644 --- a/fs/ocfs2/ocfs2_ioctl.h +++ b/fs/ocfs2/ocfs2_ioctl.h @@ -23,10 +23,10 @@ /* * ioctl commands */ -#define OCFS2_IOC_GETFLAGS _IOR('f', 1, long) -#define OCFS2_IOC_SETFLAGS _IOW('f', 2, long) -#define OCFS2_IOC32_GETFLAGS _IOR('f', 1, int) -#define OCFS2_IOC32_SETFLAGS _IOW('f', 2, int) +#define OCFS2_IOC_GETFLAGS FS_IOC_GETFLAGS +#define OCFS2_IOC_SETFLAGS FS_IOC_SETFLAGS +#define OCFS2_IOC32_GETFLAGS FS_IOC32_GETFLAGS +#define OCFS2_IOC32_SETFLAGS FS_IOC32_SETFLAGS /* * Space reservation / allocation / free ioctls and argument structure -- cgit v1.2.1 From 5dad6c39d156fbbde0b0ef170d9173feffdeb546 Mon Sep 17 00:00:00 2001 From: Srinivas Eeda Date: Tue, 21 Sep 2010 16:27:26 -0700 Subject: o2dlm: force free mles during dlm exit While umounting, a block mle doesn't get freed if dlm is shutdown after master request is received but before assert master. This results in unclean shutdown of dlm domain. This patch frees all mles that lie around after other nodes were notified about exiting the dlm and marking dlm state as leaving. Only block mles are expected to be around, so we log ERROR for other mles but still free them. Signed-off-by: Srinivas Eeda Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmcommon.h | 1 + fs/ocfs2/dlm/dlmdomain.c | 1 + fs/ocfs2/dlm/dlmmaster.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 4b6ae2c13b47..765298908f1d 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -1030,6 +1030,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res); void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node); +void dlm_force_free_mles(struct dlm_ctxt *dlm); int dlm_lock_basts_flushed(struct dlm_ctxt *dlm, struct dlm_lock *lock); int __dlm_lockres_has_locks(struct dlm_lock_resource *res); int __dlm_lockres_unused(struct dlm_lock_resource *res); diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 153abb5abef0..11a5c87fd7f7 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -693,6 +693,7 @@ void dlm_unregister_domain(struct dlm_ctxt *dlm) dlm_mark_domain_leaving(dlm); dlm_leave_domain(dlm); + dlm_force_free_mles(dlm); dlm_complete_dlm_shutdown(dlm); } dlm_put(dlm); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ffb4c68dafa4..f564b0e5f80d 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -3433,3 +3433,43 @@ void dlm_lockres_release_ast(struct dlm_ctxt *dlm, wake_up(&res->wq); wake_up(&dlm->migration_wq); } + +void dlm_force_free_mles(struct dlm_ctxt *dlm) +{ + int i; + struct hlist_head *bucket; + struct dlm_master_list_entry *mle; + struct hlist_node *tmp, *list; + + /* + * We notified all other nodes that we are exiting the domain and + * marked the dlm state to DLM_CTXT_LEAVING. If any mles are still + * around we force free them and wake any processes that are waiting + * on the mles + */ + spin_lock(&dlm->spinlock); + spin_lock(&dlm->master_lock); + + BUG_ON(dlm->dlm_state != DLM_CTXT_LEAVING); + BUG_ON((find_next_bit(dlm->domain_map, O2NM_MAX_NODES, 0) < O2NM_MAX_NODES)); + + for (i = 0; i < DLM_HASH_BUCKETS; i++) { + bucket = dlm_master_hash(dlm, i); + hlist_for_each_safe(list, tmp, bucket) { + mle = hlist_entry(list, struct dlm_master_list_entry, + master_hash_node); + if (mle->type != DLM_MLE_BLOCK) { + mlog(ML_ERROR, "bad mle: %p\n", mle); + dlm_print_one_mle(mle); + } + atomic_set(&mle->woken, 1); + wake_up(&mle->wq); + + __dlm_unlink_mle(dlm, mle); + __dlm_mle_detach_hb_events(dlm, mle); + __dlm_put_mle(mle); + } + } + spin_unlock(&dlm->master_lock); + spin_unlock(&dlm->spinlock); +} -- cgit v1.2.1 From 1fc8a117865b54590acd773a55fbac9221b018f0 Mon Sep 17 00:00:00 2001 From: Joel Becker Date: Wed, 29 Sep 2010 17:33:05 -0700 Subject: ocfs2: Don't walk off the end of fast symlinks. ocfs2 fast symlinks are NUL terminated strings stored inline in the inode data area. However, disk corruption or a local attacker could, in theory, remove that NUL. Because we're using strlen() (my fault, introduced in a731d1 when removing vfs_follow_link()), we could walk off the end of that string. Signed-off-by: Joel Becker Cc: stable@kernel.org --- fs/ocfs2/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c index 32499d213fc4..9975457c981f 100644 --- a/fs/ocfs2/symlink.c +++ b/fs/ocfs2/symlink.c @@ -128,7 +128,7 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry, } /* Fast symlinks can't be large */ - len = strlen(target); + len = strnlen(target, ocfs2_fast_symlink_chars(inode->i_sb)); link = kzalloc(len + 1, GFP_NOFS); if (!link) { status = -ENOMEM; -- cgit v1.2.1 From 54b5187b5a1ad6573ade8b18e065dda92501fc52 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 15:26:08 -0700 Subject: ocfs2/cluster: Add heartbeat mode configfs parameter Add heartbeat mode parameter to the configfs tree. This will be used to set/show the heartbeat mode. The user is free to toggle the mode between local and global as long as there is no active heartbeat region. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 72 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 41d5f1f92d56..4d36459a8343 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -77,7 +77,19 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type); #define O2HB_DEFAULT_BLOCK_BITS 9 +enum o2hb_heartbeat_modes { + O2HB_HEARTBEAT_LOCAL = 0, + O2HB_HEARTBEAT_GLOBAL, + O2HB_HEARTBEAT_NUM_MODES, +}; + +char *o2hb_heartbeat_mode_desc[O2HB_HEARTBEAT_NUM_MODES] = { + "local", /* O2HB_HEARTBEAT_LOCAL */ + "global", /* O2HB_HEARTBEAT_GLOBAL */ +}; + unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD; +unsigned int o2hb_heartbeat_mode = O2HB_HEARTBEAT_LOCAL; /* Only sets a new threshold if there are no active regions. * @@ -94,6 +106,22 @@ static void o2hb_dead_threshold_set(unsigned int threshold) } } +static int o2hb_global_hearbeat_mode_set(unsigned int hb_mode) +{ + int ret = -1; + + if (hb_mode < O2HB_HEARTBEAT_NUM_MODES) { + spin_lock(&o2hb_live_lock); + if (list_empty(&o2hb_all_regions)) { + o2hb_heartbeat_mode = hb_mode; + ret = 0; + } + spin_unlock(&o2hb_live_lock); + } + + return ret; +} + struct o2hb_node_event { struct list_head hn_item; enum o2hb_callback_type hn_event_type; @@ -1688,6 +1716,41 @@ static ssize_t o2hb_heartbeat_group_threshold_store(struct o2hb_heartbeat_group return count; } +static +ssize_t o2hb_heartbeat_group_mode_show(struct o2hb_heartbeat_group *group, + char *page) +{ + return sprintf(page, "%s\n", + o2hb_heartbeat_mode_desc[o2hb_heartbeat_mode]); +} + +static +ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, + const char *page, size_t count) +{ + unsigned int i; + int ret; + size_t len; + + len = (page[count - 1] == '\n') ? count - 1 : count; + if (!len) + return -EINVAL; + + for (i = 0; i < O2HB_HEARTBEAT_NUM_MODES; ++i) { + if (strnicmp(page, o2hb_heartbeat_mode_desc[i], len)) + continue; + + ret = o2hb_global_hearbeat_mode_set(i); + if (!ret) + printk(KERN_NOTICE "ocfs2: Heartbeat mode set to %s\n", + o2hb_heartbeat_mode_desc[i]); + return count; + } + + return -EINVAL; + +} + static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold = { .attr = { .ca_owner = THIS_MODULE, .ca_name = "dead_threshold", @@ -1696,8 +1759,17 @@ static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_threshold .store = o2hb_heartbeat_group_threshold_store, }; +static struct o2hb_heartbeat_group_attribute o2hb_heartbeat_group_attr_mode = { + .attr = { .ca_owner = THIS_MODULE, + .ca_name = "mode", + .ca_mode = S_IRUGO | S_IWUSR }, + .show = o2hb_heartbeat_group_mode_show, + .store = o2hb_heartbeat_group_mode_store, +}; + static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = { &o2hb_heartbeat_group_attr_threshold.attr, + &o2hb_heartbeat_group_attr_mode.attr, NULL, }; -- cgit v1.2.1 From 98f486f23bc5b6a6fa90e1a0707b7e9fe0e7f3e4 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Sat, 9 Oct 2010 10:24:46 -0700 Subject: ocfs2: Add an incompat feature flag OCFS2_FEATURE_INCOMPAT_CLUSTERINFO OCFS2_FEATURE_INCOMPAT_CLUSTERINFO allows us to use sb->s_cluster_info for both userspace and o2cb cluster stacks. It also allows us to extend cluster info to include stack flags. This patch also adds stackflags to sb->s_clusterinfo. It also introduces a clusterinfo flag OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT to denote the enabled global heartbeat mode. This incompat flag can be set/cleared using tunefs.ocfs2 --fs-features. The clusterinfo flag is set/cleared using tunefs.ocfs2 --update-cluster-stack. Signed-off-by: Sunil Mushran --- fs/ocfs2/ocfs2.h | 31 +++++++++++++++++++++++++++++-- fs/ocfs2/ocfs2_fs.h | 40 ++++++++++++++++++++++++++++++++++------ fs/ocfs2/super.c | 4 +++- 3 files changed, 66 insertions(+), 9 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index c67003b6b5a2..d5496a792bdb 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -368,6 +368,8 @@ struct ocfs2_super struct ocfs2_alloc_stats alloc_stats; char dev_str[20]; /* "major,minor" of the device */ + u8 osb_stackflags; + char osb_cluster_stack[OCFS2_STACK_LABEL_LEN + 1]; struct ocfs2_cluster_connection *cconn; struct ocfs2_lock_res osb_super_lockres; @@ -601,10 +603,35 @@ static inline int ocfs2_is_soft_readonly(struct ocfs2_super *osb) return ret; } -static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) +static inline int ocfs2_clusterinfo_valid(struct ocfs2_super *osb) { return (osb->s_feature_incompat & - OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK); + (OCFS2_FEATURE_INCOMPAT_USERSPACE_STACK | + OCFS2_FEATURE_INCOMPAT_CLUSTERINFO)); +} + +static inline int ocfs2_userspace_stack(struct ocfs2_super *osb) +{ + if (ocfs2_clusterinfo_valid(osb) && + memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, + OCFS2_STACK_LABEL_LEN)) + return 1; + return 0; +} + +static inline int ocfs2_o2cb_stack(struct ocfs2_super *osb) +{ + if (ocfs2_clusterinfo_valid(osb) && + !memcmp(osb->osb_cluster_stack, OCFS2_CLASSIC_CLUSTER_STACK, + OCFS2_STACK_LABEL_LEN)) + return 1; + return 0; +} + +static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) +{ + return ocfs2_o2cb_stack(osb) && + (osb->osb_stackflags & OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT); } static inline int ocfs2_mount_local(struct ocfs2_super *osb) diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index fa31d05e41b7..d5b1d99abc3c 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -101,7 +101,8 @@ | OCFS2_FEATURE_INCOMPAT_META_ECC \ | OCFS2_FEATURE_INCOMPAT_INDEXED_DIRS \ | OCFS2_FEATURE_INCOMPAT_REFCOUNT_TREE \ - | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG) + | OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG \ + | OCFS2_FEATURE_INCOMPAT_CLUSTERINFO) #define OCFS2_FEATURE_RO_COMPAT_SUPP (OCFS2_FEATURE_RO_COMPAT_UNWRITTEN \ | OCFS2_FEATURE_RO_COMPAT_USRQUOTA \ | OCFS2_FEATURE_RO_COMPAT_GRPQUOTA) @@ -169,6 +170,13 @@ /* Discontigous block groups */ #define OCFS2_FEATURE_INCOMPAT_DISCONTIG_BG 0x2000 +/* + * Incompat bit to indicate useable clusterinfo with stackflags for all + * cluster stacks (userspace adnd o2cb). If this bit is set, + * INCOMPAT_USERSPACE_STACK becomes superfluous and thus should not be set. + */ +#define OCFS2_FEATURE_INCOMPAT_CLUSTERINFO 0x4000 + /* * backup superblock flag is used to indicate that this volume * has backup superblocks. @@ -292,10 +300,13 @@ #define OCFS2_VOL_UUID_LEN 16 #define OCFS2_MAX_VOL_LABEL_LEN 64 -/* The alternate, userspace stack fields */ +/* The cluster stack fields */ #define OCFS2_STACK_LABEL_LEN 4 #define OCFS2_CLUSTER_NAME_LEN 16 +/* Classic (historically speaking) cluster stack */ +#define OCFS2_CLASSIC_CLUSTER_STACK "o2cb" + /* Journal limits (in bytes) */ #define OCFS2_MIN_JOURNAL_SIZE (4 * 1024 * 1024) @@ -305,6 +316,11 @@ */ #define OCFS2_MIN_XATTR_INLINE_SIZE 256 +/* + * Cluster info flags (ocfs2_cluster_info.ci_stackflags) + */ +#define OCFS2_CLUSTER_O2CB_GLOBAL_HEARTBEAT (0x01) + struct ocfs2_system_inode_info { char *si_name; int si_iflags; @@ -566,9 +582,21 @@ struct ocfs2_slot_map_extended { */ }; +/* + * ci_stackflags is only valid if the incompat bit + * OCFS2_FEATURE_INCOMPAT_CLUSTERINFO is set. + */ struct ocfs2_cluster_info { /*00*/ __u8 ci_stack[OCFS2_STACK_LABEL_LEN]; - __le32 ci_reserved; + union { + __le32 ci_reserved; + struct { + __u8 ci_stackflags; + __u8 ci_reserved1; + __u8 ci_reserved2; + __u8 ci_reserved3; + }; + }; /*08*/ __u8 ci_cluster[OCFS2_CLUSTER_NAME_LEN]; /*18*/ }; @@ -605,9 +633,9 @@ struct ocfs2_super_block { * group header */ /*50*/ __u8 s_label[OCFS2_MAX_VOL_LABEL_LEN]; /* Label for mounting, etc. */ /*90*/ __u8 s_uuid[OCFS2_VOL_UUID_LEN]; /* 128-bit uuid */ -/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Selected userspace - stack. Only valid - with INCOMPAT flag. */ +/*A0*/ struct ocfs2_cluster_info s_cluster_info; /* Only valid if either + userspace or clusterinfo + INCOMPAT flag set. */ /*B8*/ __le16 s_xattr_inline_size; /* extended attribute inline size for this fs*/ __le16 s_reserved0; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index fa1be1b304d1..755431739396 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -2149,7 +2149,9 @@ static int ocfs2_initialize_super(struct super_block *sb, goto bail; } - if (ocfs2_userspace_stack(osb)) { + if (ocfs2_clusterinfo_valid(osb)) { + osb->osb_stackflags = + OCFS2_RAW_SB(di)->s_cluster_info.ci_stackflags; memcpy(osb->osb_cluster_stack, OCFS2_RAW_SB(di)->s_cluster_info.ci_stack, OCFS2_STACK_LABEL_LEN); -- cgit v1.2.1 From 2c442719e90a44a6982c033d69df4aae4b167cfa Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 15:23:50 -0700 Subject: ocfs2: Add support for heartbeat=global mount option Adds support for heartbeat=global mount option. It ensures that the heartbeat mode passed matches the one enabled on disk. Signed-off-by: Sunil Mushran --- fs/ocfs2/ocfs2.h | 4 +++- fs/ocfs2/ocfs2_fs.h | 1 + fs/ocfs2/super.c | 55 +++++++++++++++++++++++++++++++++++++++-------------- 3 files changed, 45 insertions(+), 15 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index d5496a792bdb..481387b90b21 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -243,7 +243,7 @@ enum ocfs2_local_alloc_state enum ocfs2_mount_options { - OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Heartbeat started in local mode */ + OCFS2_MOUNT_HB_LOCAL = 1 << 0, /* Local heartbeat */ OCFS2_MOUNT_BARRIER = 1 << 1, /* Use block barriers */ OCFS2_MOUNT_NOINTR = 1 << 2, /* Don't catch signals */ OCFS2_MOUNT_ERRORS_PANIC = 1 << 3, /* Panic on errors */ @@ -256,6 +256,8 @@ enum ocfs2_mount_options control lists */ OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */ OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */ + OCFS2_MOUNT_HB_NONE = 1 << 12, /* No heartbeat */ + OCFS2_MOUNT_HB_GLOBAL = 1 << 13, /* Global heartbeat */ }; #define OCFS2_OSB_SOFT_RO 0x0001 diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h index d5b1d99abc3c..28ff536b4f8d 100644 --- a/fs/ocfs2/ocfs2_fs.h +++ b/fs/ocfs2/ocfs2_fs.h @@ -376,6 +376,7 @@ static struct ocfs2_system_inode_info ocfs2_system_inodes[NUM_SYSTEM_INODES] = { /* Parameter passed from mount.ocfs2 to module */ #define OCFS2_HB_NONE "heartbeat=none" #define OCFS2_HB_LOCAL "heartbeat=local" +#define OCFS2_HB_GLOBAL "heartbeat=global" /* * OCFS2 directory file types. Only the low 3 bits are used. The diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 755431739396..4e009ad303a1 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -162,6 +162,7 @@ enum { Opt_nointr, Opt_hb_none, Opt_hb_local, + Opt_hb_global, Opt_data_ordered, Opt_data_writeback, Opt_atime_quantum, @@ -190,6 +191,7 @@ static const match_table_t tokens = { {Opt_nointr, "nointr"}, {Opt_hb_none, OCFS2_HB_NONE}, {Opt_hb_local, OCFS2_HB_LOCAL}, + {Opt_hb_global, OCFS2_HB_GLOBAL}, {Opt_data_ordered, "data=ordered"}, {Opt_data_writeback, "data=writeback"}, {Opt_atime_quantum, "atime_quantum=%u"}, @@ -608,6 +610,7 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) int ret = 0; struct mount_options parsed_options; struct ocfs2_super *osb = OCFS2_SB(sb); + u32 tmp; lock_kernel(); @@ -617,8 +620,9 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto out; } - if ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) != - (parsed_options.mount_opt & OCFS2_MOUNT_HB_LOCAL)) { + tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE; + if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { ret = -EINVAL; mlog(ML_ERROR, "Cannot change heartbeat mode on remount\n"); goto out; @@ -809,23 +813,29 @@ bail: static int ocfs2_verify_heartbeat(struct ocfs2_super *osb) { - if (ocfs2_mount_local(osb)) { - if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { + u32 hb_enabled = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL; + + if (osb->s_mount_opt & hb_enabled) { + if (ocfs2_mount_local(osb)) { mlog(ML_ERROR, "Cannot heartbeat on a locally " "mounted device.\n"); return -EINVAL; } - } - - if (ocfs2_userspace_stack(osb)) { - if (osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) { + if (ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Userspace stack expected, but " "o2cb heartbeat arguments passed to mount\n"); return -EINVAL; } + if (((osb->s_mount_opt & OCFS2_MOUNT_HB_GLOBAL) && + !ocfs2_cluster_o2cb_global_heartbeat(osb)) || + ((osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL) && + ocfs2_cluster_o2cb_global_heartbeat(osb))) { + mlog(ML_ERROR, "Mismatching o2cb heartbeat modes\n"); + return -EINVAL; + } } - if (!(osb->s_mount_opt & OCFS2_MOUNT_HB_LOCAL)) { + if (!(osb->s_mount_opt & hb_enabled)) { if (!ocfs2_mount_local(osb) && !ocfs2_is_hard_readonly(osb) && !ocfs2_userspace_stack(osb)) { mlog(ML_ERROR, "Heartbeat has to be started to mount " @@ -1291,6 +1301,7 @@ static int ocfs2_parse_options(struct super_block *sb, { int status; char *p; + u32 tmp; mlog_entry("remount: %d, options: \"%s\"\n", is_remount, options ? options : "(none)"); @@ -1322,7 +1333,10 @@ static int ocfs2_parse_options(struct super_block *sb, mopt->mount_opt |= OCFS2_MOUNT_HB_LOCAL; break; case Opt_hb_none: - mopt->mount_opt &= ~OCFS2_MOUNT_HB_LOCAL; + mopt->mount_opt |= OCFS2_MOUNT_HB_NONE; + break; + case Opt_hb_global: + mopt->mount_opt |= OCFS2_MOUNT_HB_GLOBAL; break; case Opt_barrier: if (match_int(&args[0], &option)) { @@ -1477,6 +1491,15 @@ static int ocfs2_parse_options(struct super_block *sb, } } + /* Ensure only one heartbeat mode */ + tmp = mopt->mount_opt & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | + OCFS2_MOUNT_HB_NONE); + if (hweight32(tmp) != 1) { + mlog(ML_ERROR, "Invalid heartbeat mount options\n"); + status = 0; + goto bail; + } + status = 1; bail: @@ -1490,10 +1513,14 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt) unsigned long opts = osb->s_mount_opt; unsigned int local_alloc_megs; - if (opts & OCFS2_MOUNT_HB_LOCAL) - seq_printf(s, ",_netdev,heartbeat=local"); - else - seq_printf(s, ",heartbeat=none"); + if (opts & (OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL)) { + seq_printf(s, ",_netdev"); + if (opts & OCFS2_MOUNT_HB_LOCAL) + seq_printf(s, ",%s", OCFS2_HB_LOCAL); + else + seq_printf(s, ",%s", OCFS2_HB_GLOBAL); + } else + seq_printf(s, ",%s", OCFS2_HB_NONE); if (opts & OCFS2_MOUNT_NOINTR) seq_printf(s, ",nointr"); -- cgit v1.2.1 From b1365d0bd14b912cceb424cbeed9fe939a9038e3 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 17:55:34 -0700 Subject: ocfs2/dlm: Expose dlm_protocol in dlm_state Add dlm_protocol to the list of info shown by the debugfs file, dlm_state. Signed-off-by: Sunil Mushran --- fs/ocfs2/dlm/dlmdebug.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index 901ca52bf86b..f693ab812f3e 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -782,7 +782,9 @@ static int debug_state_print(struct dlm_ctxt *dlm, struct debug_buffer *db) /* Domain: xxxxxxxxxx Key: 0xdfbac769 */ out += snprintf(db->buf + out, db->len - out, - "Domain: %s Key: 0x%08x\n", dlm->name, dlm->key); + "Domain: %s Key: 0x%08x Protocol: %d.%d\n", + dlm->name, dlm->key, dlm->dlm_locking_proto.pv_major, + dlm->dlm_locking_proto.pv_minor); /* Thread Pid: xxx Node: xxx State: xxxxx */ out += snprintf(db->buf + out, db->len - out, -- cgit v1.2.1 From b3c85c4cdf77154acc940dd0f14d1fb99cbbaf75 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 14:31:06 -0700 Subject: ocfs2/cluster: Get all heartbeat regions Export function in o2hb to get a list of heartbeat regions. It also adds an upper limit to the length of the heartbeat region name. o2hb_global_heartbeat_active() currently disables global heartbeat. It will be enabled in a later patch after all the code is added. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 34 ++++++++++++++++++++++++++++++++++ fs/ocfs2/cluster/heartbeat.h | 4 ++++ 2 files changed, 38 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 4d36459a8343..3415e58ff77b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1623,6 +1623,9 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g if (reg == NULL) return ERR_PTR(-ENOMEM); + if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + config_item_init_type_name(®->hr_item, name, &o2hb_region_type); spin_lock(&o2hb_live_lock); @@ -2035,3 +2038,34 @@ void o2hb_stop_all_regions(void) spin_unlock(&o2hb_live_lock); } EXPORT_SYMBOL_GPL(o2hb_stop_all_regions); + +int o2hb_get_all_regions(char *region_uuids, u8 max_regions) +{ + struct o2hb_region *reg; + int numregs = 0; + char *p; + + spin_lock(&o2hb_live_lock); + + p = region_uuids; + list_for_each_entry(reg, &o2hb_all_regions, hr_all_item) { + mlog(0, "Region: %s\n", config_item_name(®->hr_item)); + if (numregs < max_regions) { + memcpy(p, config_item_name(®->hr_item), + O2HB_MAX_REGION_NAME_LEN); + p += O2HB_MAX_REGION_NAME_LEN; + } + numregs++; + } + + spin_unlock(&o2hb_live_lock); + + return numregs; +} +EXPORT_SYMBOL_GPL(o2hb_get_all_regions); + +int o2hb_global_heartbeat_active(void) +{ + return 0; +} +EXPORT_SYMBOL(o2hb_global_heartbeat_active); diff --git a/fs/ocfs2/cluster/heartbeat.h b/fs/ocfs2/cluster/heartbeat.h index 2f1649253b49..00ad8e8fea51 100644 --- a/fs/ocfs2/cluster/heartbeat.h +++ b/fs/ocfs2/cluster/heartbeat.h @@ -31,6 +31,8 @@ #define O2HB_REGION_TIMEOUT_MS 2000 +#define O2HB_MAX_REGION_NAME_LEN 32 + /* number of changes to be seen as live */ #define O2HB_LIVE_THRESHOLD 2 /* number of equal samples to be seen as dead */ @@ -81,5 +83,7 @@ int o2hb_check_node_heartbeating(u8 node_num); int o2hb_check_node_heartbeating_from_callback(u8 node_num); int o2hb_check_local_node_heartbeating(void); void o2hb_stop_all_regions(void); +int o2hb_get_all_regions(char *region_uuids, u8 numregions); +int o2hb_global_heartbeat_active(void); #endif /* O2CLUSTER_HEARTBEAT_H */ -- cgit v1.2.1 From ea2034416b54700e30371f2ad6517cbb94674083 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Sat, 9 Oct 2010 10:26:23 -0700 Subject: ocfs2/dlm: Add message DLM_QUERY_REGION Adds new dlm message DLM_QUERY_REGION that sends the names of all active heartbeat regions. This message is only sent in the global heartbeat mode. If the regions in the joining node do not fully match the ones in the active nodes, the join domain request is rejected. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/ocfs2_nodemanager.h | 6 + fs/ocfs2/dlm/dlmcommon.h | 12 +- fs/ocfs2/dlm/dlmdomain.c | 218 +++++++++++++++++++++++++++++++++++ 3 files changed, 235 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/ocfs2_nodemanager.h b/fs/ocfs2/cluster/ocfs2_nodemanager.h index 5b9854bad571..49b594325bec 100644 --- a/fs/ocfs2/cluster/ocfs2_nodemanager.h +++ b/fs/ocfs2/cluster/ocfs2_nodemanager.h @@ -36,4 +36,10 @@ /* host name, group name, cluster name all 64 bytes */ #define O2NM_MAX_NAME_LEN 64 // __NEW_UTS_LEN +/* + * Maximum number of global heartbeat regions allowed. + * **CAUTION** Changing this number will break dlm compatibility. + */ +#define O2NM_MAX_REGIONS 32 + #endif /* _OCFS2_NODEMANAGER_H */ diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index 765298908f1d..aa506d3e2ae6 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -445,7 +445,8 @@ enum { DLM_LOCK_REQUEST_MSG, /* 515 */ DLM_RECO_DATA_DONE_MSG, /* 516 */ DLM_BEGIN_RECO_MSG, /* 517 */ - DLM_FINALIZE_RECO_MSG /* 518 */ + DLM_FINALIZE_RECO_MSG, /* 518 */ + DLM_QUERY_REGION, /* 519 */ }; struct dlm_reco_node_data @@ -727,6 +728,15 @@ struct dlm_cancel_join u8 domain[O2NM_MAX_NAME_LEN]; }; +struct dlm_query_region { + u8 qr_node; + u8 qr_numregions; + u8 qr_namelen; + u8 pad1; + u8 qr_domain[O2NM_MAX_NAME_LEN]; + u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS]; +}; + struct dlm_exit_domain { u8 node_idx; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 11a5c87fd7f7..49650756dfef 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -128,6 +128,9 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); * will have a negotiated version with the same major number and a minor * number equal or smaller. The dlm_ctxt->dlm_locking_proto field should * be used to determine what a running domain is actually using. + * + * New in version 1.1: + * - Message DLM_QUERY_REGION added to support global heartbeat */ static const struct dlm_protocol_version dlm_protocol = { .pv_major = 1, @@ -142,6 +145,8 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data); static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data); +static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, + void *data, void **ret_data); static int dlm_exit_domain_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data); static int dlm_protocol_compare(struct dlm_protocol_version *existing, @@ -921,6 +926,203 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, return 0; } +static int dlm_match_regions(struct dlm_ctxt *dlm, + struct dlm_query_region *qr) +{ + char *local = NULL, *remote = qr->qr_regions; + char *l, *r; + int localnr, i, j, foundit; + int status = 0; + + if (!o2hb_global_heartbeat_active()) { + if (qr->qr_numregions) { + mlog(ML_ERROR, "Domain %s: Joining node %d has global " + "heartbeat enabled but local node %d does not\n", + qr->qr_domain, qr->qr_node, dlm->node_num); + status = -EINVAL; + } + goto bail; + } + + if (o2hb_global_heartbeat_active() && !qr->qr_numregions) { + mlog(ML_ERROR, "Domain %s: Local node %d has global " + "heartbeat enabled but joining node %d does not\n", + qr->qr_domain, dlm->node_num, qr->qr_node); + status = -EINVAL; + goto bail; + } + + r = remote; + for (i = 0; i < qr->qr_numregions; ++i) { + mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, r); + r += O2HB_MAX_REGION_NAME_LEN; + } + + local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL); + if (!local) { + status = -ENOMEM; + goto bail; + } + + localnr = o2hb_get_all_regions(local, O2NM_MAX_REGIONS); + + /* compare local regions with remote */ + l = local; + for (i = 0; i < localnr; ++i) { + foundit = 0; + r = remote; + for (j = 0; j <= qr->qr_numregions; ++j) { + if (!memcmp(l, r, O2HB_MAX_REGION_NAME_LEN)) { + foundit = 1; + break; + } + r += O2HB_MAX_REGION_NAME_LEN; + } + if (!foundit) { + status = -EINVAL; + mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " + "in local node %d but not in joining node %d\n", + qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, l, + dlm->node_num, qr->qr_node); + goto bail; + } + l += O2HB_MAX_REGION_NAME_LEN; + } + + /* compare remote with local regions */ + r = remote; + for (i = 0; i < qr->qr_numregions; ++i) { + foundit = 0; + l = local; + for (j = 0; j < localnr; ++j) { + if (!memcmp(r, l, O2HB_MAX_REGION_NAME_LEN)) { + foundit = 1; + break; + } + l += O2HB_MAX_REGION_NAME_LEN; + } + if (!foundit) { + status = -EINVAL; + mlog(ML_ERROR, "Domain %s: Region '%.*s' registered " + "in joining node %d but not in local node %d\n", + qr->qr_domain, O2HB_MAX_REGION_NAME_LEN, r, + qr->qr_node, dlm->node_num); + goto bail; + } + r += O2HB_MAX_REGION_NAME_LEN; + } + +bail: + kfree(local); + + return status; +} + +static int dlm_send_regions(struct dlm_ctxt *dlm, unsigned long *node_map) +{ + struct dlm_query_region *qr = NULL; + int status, ret = 0, i; + char *p; + + if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + goto bail; + + qr = kzalloc(sizeof(struct dlm_query_region), GFP_KERNEL); + if (!qr) { + ret = -ENOMEM; + mlog_errno(ret); + goto bail; + } + + qr->qr_node = dlm->node_num; + qr->qr_namelen = strlen(dlm->name); + memcpy(qr->qr_domain, dlm->name, qr->qr_namelen); + /* if local hb, the numregions will be zero */ + if (o2hb_global_heartbeat_active()) + qr->qr_numregions = o2hb_get_all_regions(qr->qr_regions, + O2NM_MAX_REGIONS); + + p = qr->qr_regions; + for (i = 0; i < qr->qr_numregions; ++i, p += O2HB_MAX_REGION_NAME_LEN) + mlog(0, "Region %.*s\n", O2HB_MAX_REGION_NAME_LEN, p); + + i = -1; + while ((i = find_next_bit(node_map, O2NM_MAX_NODES, + i + 1)) < O2NM_MAX_NODES) { + if (i == dlm->node_num) + continue; + + mlog(0, "Sending regions to node %d\n", i); + + ret = o2net_send_message(DLM_QUERY_REGION, DLM_MOD_KEY, qr, + sizeof(struct dlm_query_region), + i, &status); + if (ret >= 0) + ret = status; + if (ret) { + mlog(ML_ERROR, "Region mismatch %d, node %d\n", + ret, i); + break; + } + } + +bail: + kfree(qr); + return ret; +} + +static int dlm_query_region_handler(struct o2net_msg *msg, u32 len, + void *data, void **ret_data) +{ + struct dlm_query_region *qr; + struct dlm_ctxt *dlm = NULL; + int status = 0; + int locked = 0; + + qr = (struct dlm_query_region *) msg->buf; + + mlog(0, "Node %u queries hb regions on domain %s\n", qr->qr_node, + qr->qr_domain); + + status = -EINVAL; + + spin_lock(&dlm_domain_lock); + dlm = __dlm_lookup_domain_full(qr->qr_domain, qr->qr_namelen); + if (!dlm) { + mlog(ML_ERROR, "Node %d queried hb regions on domain %s " + "before join domain\n", qr->qr_node, qr->qr_domain); + goto bail; + } + + spin_lock(&dlm->spinlock); + locked = 1; + if (dlm->joining_node != qr->qr_node) { + mlog(ML_ERROR, "Node %d queried hb regions on domain %s " + "but joining node is %d\n", qr->qr_node, qr->qr_domain, + dlm->joining_node); + goto bail; + } + + /* Support for global heartbeat was added in 1.1 */ + if (dlm->dlm_locking_proto.pv_major == 1 && + dlm->dlm_locking_proto.pv_minor == 0) { + mlog(ML_ERROR, "Node %d queried hb regions on domain %s " + "but active dlm protocol is %d.%d\n", qr->qr_node, + qr->qr_domain, dlm->dlm_locking_proto.pv_major, + dlm->dlm_locking_proto.pv_minor); + goto bail; + } + + status = dlm_match_regions(dlm, qr); + +bail: + if (locked) + spin_unlock(&dlm->spinlock); + spin_unlock(&dlm_domain_lock); + + return status; +} + static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data) { @@ -1241,6 +1443,15 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) set_bit(dlm->node_num, dlm->domain_map); spin_unlock(&dlm->spinlock); + /* Support for global heartbeat was added in 1.1 */ + if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { + status = dlm_send_regions(dlm, ctxt->yes_resp_map); + if (status) { + mlog_errno(status); + goto bail; + } + } + dlm_send_join_asserts(dlm, ctxt->yes_resp_map); /* Joined state *must* be set before the joining node @@ -1807,6 +2018,13 @@ static int dlm_register_net_handlers(void) sizeof(struct dlm_cancel_join), dlm_cancel_join_handler, NULL, NULL, &dlm_join_handlers); + if (status) + goto bail; + + status = o2net_register_handler(DLM_QUERY_REGION, DLM_MOD_KEY, + sizeof(struct dlm_query_region), + dlm_query_region_handler, + NULL, NULL, &dlm_join_handlers); bail: if (status < 0) -- cgit v1.2.1 From 5f3c6d9c615770708df464c170316f83cf437242 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 17:55:29 -0700 Subject: ocfs2: Print message if user mounts without starting global heartbeat In global heartbeat mode, the heartbeat is started by the user. This patch prints an error if the user attempts to mount a volume without starting the heartbeat. Signed-off-by: Sunil Mushran --- fs/ocfs2/stack_o2cb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c index 0d3049f696c5..19965b00c43c 100644 --- a/fs/ocfs2/stack_o2cb.c +++ b/fs/ocfs2/stack_o2cb.c @@ -283,6 +283,8 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn) /* for now we only have one cluster/node, make sure we see it * in the heartbeat universe */ if (!o2hb_check_local_node_heartbeating()) { + if (o2hb_global_heartbeat_active()) + mlog(ML_ERROR, "Global heartbeat not started\n"); rc = -EINVAL; goto out; } -- cgit v1.2.1 From 18cfdf1b1a8e83b09e4185c02396257ce7e7bee3 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 16:47:03 -0700 Subject: ocfs2/dlm: Add message DLM_QUERY_NODEINFO Adds new dlm message DLM_QUERY_NODEINFO that sends the attributes of all registered nodes. This message is sent if the negotiated dlm protocol is 1.1 or higher. If the information of the joining node does not match that of any existing nodes, the join domain request is rejected. Signed-off-by: Sunil Mushran --- fs/ocfs2/dlm/dlmcommon.h | 17 +++++ fs/ocfs2/dlm/dlmdomain.c | 182 ++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 198 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index aa506d3e2ae6..b36d0bf77a5a 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -447,6 +447,7 @@ enum { DLM_BEGIN_RECO_MSG, /* 517 */ DLM_FINALIZE_RECO_MSG, /* 518 */ DLM_QUERY_REGION, /* 519 */ + DLM_QUERY_NODEINFO, /* 520 */ }; struct dlm_reco_node_data @@ -737,6 +738,22 @@ struct dlm_query_region { u8 qr_regions[O2HB_MAX_REGION_NAME_LEN * O2NM_MAX_REGIONS]; }; +struct dlm_node_info { + u8 ni_nodenum; + u8 pad1; + u16 ni_ipv4_port; + u32 ni_ipv4_address; +}; + +struct dlm_query_nodeinfo { + u8 qn_nodenum; + u8 qn_numnodes; + u8 qn_namelen; + u8 pad1; + u8 qn_domain[O2NM_MAX_NAME_LEN]; + struct dlm_node_info qn_nodes[O2NM_MAX_NODES]; +}; + struct dlm_exit_domain { u8 node_idx; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 49650756dfef..78d428f5e10e 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -131,6 +131,7 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); * * New in version 1.1: * - Message DLM_QUERY_REGION added to support global heartbeat + * - Message DLM_QUERY_NODEINFO added to allow online node removes */ static const struct dlm_protocol_version dlm_protocol = { .pv_major = 1, @@ -1123,6 +1124,173 @@ bail: return status; } +static int dlm_match_nodes(struct dlm_ctxt *dlm, struct dlm_query_nodeinfo *qn) +{ + struct o2nm_node *local; + struct dlm_node_info *remote; + int i, j; + int status = 0; + + for (j = 0; j < qn->qn_numnodes; ++j) + mlog(0, "Node %3d, %pI4:%u\n", qn->qn_nodes[j].ni_nodenum, + &(qn->qn_nodes[j].ni_ipv4_address), + ntohs(qn->qn_nodes[j].ni_ipv4_port)); + + for (i = 0; i < O2NM_MAX_NODES && !status; ++i) { + local = o2nm_get_node_by_num(i); + remote = NULL; + for (j = 0; j < qn->qn_numnodes; ++j) { + if (qn->qn_nodes[j].ni_nodenum == i) { + remote = &(qn->qn_nodes[j]); + break; + } + } + + if (!local && !remote) + continue; + + if ((local && !remote) || (!local && remote)) + status = -EINVAL; + + if (!status && + ((remote->ni_nodenum != local->nd_num) || + (remote->ni_ipv4_port != local->nd_ipv4_port) || + (remote->ni_ipv4_address != local->nd_ipv4_address))) + status = -EINVAL; + + if (status) { + if (remote && !local) + mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " + "registered in joining node %d but not in " + "local node %d\n", qn->qn_domain, + remote->ni_nodenum, + &(remote->ni_ipv4_address), + ntohs(remote->ni_ipv4_port), + qn->qn_nodenum, dlm->node_num); + if (local && !remote) + mlog(ML_ERROR, "Domain %s: Node %d (%pI4:%u) " + "registered in local node %d but not in " + "joining node %d\n", qn->qn_domain, + local->nd_num, &(local->nd_ipv4_address), + ntohs(local->nd_ipv4_port), + dlm->node_num, qn->qn_nodenum); + BUG_ON((!local && !remote)); + } + + if (local) + o2nm_node_put(local); + } + + return status; +} + +static int dlm_send_nodeinfo(struct dlm_ctxt *dlm, unsigned long *node_map) +{ + struct dlm_query_nodeinfo *qn = NULL; + struct o2nm_node *node; + int ret = 0, status, count, i; + + if (find_next_bit(node_map, O2NM_MAX_NODES, 0) >= O2NM_MAX_NODES) + goto bail; + + qn = kzalloc(sizeof(struct dlm_query_nodeinfo), GFP_KERNEL); + if (!qn) { + ret = -ENOMEM; + mlog_errno(ret); + goto bail; + } + + for (i = 0, count = 0; i < O2NM_MAX_NODES; ++i) { + node = o2nm_get_node_by_num(i); + if (!node) + continue; + qn->qn_nodes[count].ni_nodenum = node->nd_num; + qn->qn_nodes[count].ni_ipv4_port = node->nd_ipv4_port; + qn->qn_nodes[count].ni_ipv4_address = node->nd_ipv4_address; + mlog(0, "Node %3d, %pI4:%u\n", node->nd_num, + &(node->nd_ipv4_address), ntohs(node->nd_ipv4_port)); + ++count; + o2nm_node_put(node); + } + + qn->qn_nodenum = dlm->node_num; + qn->qn_numnodes = count; + qn->qn_namelen = strlen(dlm->name); + memcpy(qn->qn_domain, dlm->name, qn->qn_namelen); + + i = -1; + while ((i = find_next_bit(node_map, O2NM_MAX_NODES, + i + 1)) < O2NM_MAX_NODES) { + if (i == dlm->node_num) + continue; + + mlog(0, "Sending nodeinfo to node %d\n", i); + + ret = o2net_send_message(DLM_QUERY_NODEINFO, DLM_MOD_KEY, + qn, sizeof(struct dlm_query_nodeinfo), + i, &status); + if (ret >= 0) + ret = status; + if (ret) { + mlog(ML_ERROR, "node mismatch %d, node %d\n", ret, i); + break; + } + } + +bail: + kfree(qn); + return ret; +} + +static int dlm_query_nodeinfo_handler(struct o2net_msg *msg, u32 len, + void *data, void **ret_data) +{ + struct dlm_query_nodeinfo *qn; + struct dlm_ctxt *dlm = NULL; + int locked = 0, status = -EINVAL; + + qn = (struct dlm_query_nodeinfo *) msg->buf; + + mlog(0, "Node %u queries nodes on domain %s\n", qn->qn_nodenum, + qn->qn_domain); + + spin_lock(&dlm_domain_lock); + dlm = __dlm_lookup_domain_full(qn->qn_domain, qn->qn_namelen); + if (!dlm) { + mlog(ML_ERROR, "Node %d queried nodes on domain %s before " + "join domain\n", qn->qn_nodenum, qn->qn_domain); + goto bail; + } + + spin_lock(&dlm->spinlock); + locked = 1; + if (dlm->joining_node != qn->qn_nodenum) { + mlog(ML_ERROR, "Node %d queried nodes on domain %s but " + "joining node is %d\n", qn->qn_nodenum, qn->qn_domain, + dlm->joining_node); + goto bail; + } + + /* Support for node query was added in 1.1 */ + if (dlm->dlm_locking_proto.pv_major == 1 && + dlm->dlm_locking_proto.pv_minor == 0) { + mlog(ML_ERROR, "Node %d queried nodes on domain %s " + "but active dlm protocol is %d.%d\n", qn->qn_nodenum, + qn->qn_domain, dlm->dlm_locking_proto.pv_major, + dlm->dlm_locking_proto.pv_minor); + goto bail; + } + + status = dlm_match_nodes(dlm, qn); + +bail: + if (locked) + spin_unlock(&dlm->spinlock); + spin_unlock(&dlm_domain_lock); + + return status; +} + static int dlm_cancel_join_handler(struct o2net_msg *msg, u32 len, void *data, void **ret_data) { @@ -1443,8 +1611,13 @@ static int dlm_try_to_join_domain(struct dlm_ctxt *dlm) set_bit(dlm->node_num, dlm->domain_map); spin_unlock(&dlm->spinlock); - /* Support for global heartbeat was added in 1.1 */ + /* Support for global heartbeat and node info was added in 1.1 */ if (dlm_protocol.pv_major > 1 || dlm_protocol.pv_minor > 0) { + status = dlm_send_nodeinfo(dlm, ctxt->yes_resp_map); + if (status) { + mlog_errno(status); + goto bail; + } status = dlm_send_regions(dlm, ctxt->yes_resp_map); if (status) { mlog_errno(status); @@ -2026,6 +2199,13 @@ static int dlm_register_net_handlers(void) dlm_query_region_handler, NULL, NULL, &dlm_join_handlers); + if (status) + goto bail; + + status = o2net_register_handler(DLM_QUERY_NODEINFO, DLM_MOD_KEY, + sizeof(struct dlm_query_nodeinfo), + dlm_query_nodeinfo_handler, + NULL, NULL, &dlm_join_handlers); bail: if (status < 0) dlm_unregister_net_handlers(); -- cgit v1.2.1 From 18c50cb0d3c293eabd6c2ef89c43f2a968e709ed Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 18:26:59 -0700 Subject: ocfs2/cluster: Print messages when adding/removing heartbeat regions Prints messages when the user adds or removes heartbeat regions in global heartbeat mode. These messages are useful when debugging cluster related issues. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 3415e58ff77b..12bb12ba8640 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1476,6 +1476,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, else ret = -EIO; + if (hb_task && o2hb_global_heartbeat_active()) + printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n", + config_item_name(®->hr_item)); + out: if (filp) fput(filp); @@ -1659,6 +1663,9 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, wake_up(&o2hb_steady_queue); } + if (o2hb_global_heartbeat_active()) + printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n", + config_item_name(®->hr_item)); config_item_put(item); } @@ -1745,7 +1752,7 @@ ssize_t o2hb_heartbeat_group_mode_store(struct o2hb_heartbeat_group *group, ret = o2hb_global_hearbeat_mode_set(i); if (!ret) - printk(KERN_NOTICE "ocfs2: Heartbeat mode set to %s\n", + printk(KERN_NOTICE "o2hb: Heartbeat mode set to %s\n", o2hb_heartbeat_mode_desc[i]); return count; } -- cgit v1.2.1 From 39a298563e0619b1b6e2e0974e58801de780621c Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 17:30:17 -0700 Subject: ocfs2/cluster: Print messages when adding/removing nodes Prints messages when the user adds or removes nodes. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/masklog.h | 3 ++- fs/ocfs2/cluster/nodemanager.c | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index fd96e2a2fa56..ea2ed9f56c94 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -119,7 +119,8 @@ #define ML_ERROR 0x0000000100000000ULL /* sent to KERN_ERR */ #define ML_NOTICE 0x0000000200000000ULL /* setn to KERN_NOTICE */ #define ML_KTHREAD 0x0000000400000000ULL /* kernel thread activity */ -#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ +#define ML_RESERVATIONS 0x0000000800000000ULL /* ocfs2 alloc reservations */ +#define ML_CLUSTER 0x0000001000000000ULL /* cluster stack */ #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE) #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT) diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c index ed0c9f367fed..bb240647ca5f 100644 --- a/fs/ocfs2/cluster/nodemanager.c +++ b/fs/ocfs2/cluster/nodemanager.c @@ -711,6 +711,8 @@ static struct config_item *o2nm_node_group_make_item(struct config_group *group, config_item_init_type_name(&node->nd_item, name, &o2nm_node_type); spin_lock_init(&node->nd_lock); + mlog(ML_CLUSTER, "o2nm: Registering node %s\n", name); + return &node->nd_item; } @@ -744,6 +746,9 @@ static void o2nm_node_group_drop_item(struct config_group *group, } write_unlock(&cluster->cl_nodes_lock); + mlog(ML_CLUSTER, "o2nm: Unregistered node %s\n", + config_item_name(&node->nd_item)); + config_item_put(item); } -- cgit v1.2.1 From 0e105d37c2adb19cb777aa6701a866f211764a30 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 17:00:16 -0700 Subject: ocfs2/cluster: Check slots for unconfigured live nodes o2hb currently checks slots for configured nodes only. This patch makes it check the slots for the live nodes too to take care of a race in which a node is removed from the configuration but not from the live map. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 38 +++++++++++++++++++++++++++++++------- fs/ocfs2/cluster/tcp.c | 5 +++++ 2 files changed, 36 insertions(+), 7 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 12bb12ba8640..a8f10649674d 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -541,6 +541,8 @@ static void o2hb_queue_node_event(struct o2hb_node_event *event, { assert_spin_locked(&o2hb_live_lock); + BUG_ON((!node) && (type != O2HB_NODE_DOWN_CB)); + event->hn_event_type = type; event->hn_node = node; event->hn_node_num = node_num; @@ -593,14 +595,22 @@ static int o2hb_check_slot(struct o2hb_region *reg, u64 cputime; unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS; unsigned int slot_dead_ms; + int tmp; memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes); - /* Is this correct? Do we assume that the node doesn't exist - * if we're not configured for him? */ + /* + * If a node is no longer configured but is still in the livemap, we + * may need to clear that bit from the livemap. + */ node = o2nm_get_node_by_num(slot->ds_node_num); - if (!node) - return 0; + if (!node) { + spin_lock(&o2hb_live_lock); + tmp = test_bit(slot->ds_node_num, o2hb_live_node_bitmap); + spin_unlock(&o2hb_live_lock); + if (!tmp) + return 0; + } if (!o2hb_verify_crc(reg, hb_block)) { /* all paths from here will drop o2hb_live_lock for @@ -717,8 +727,9 @@ fire_callbacks: if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); - o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node, - slot->ds_node_num); + /* node can be null */ + o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, + node, slot->ds_node_num); changed = 1; } @@ -738,7 +749,8 @@ out: o2hb_run_event_list(&event); - o2nm_node_put(node); + if (node) + o2nm_node_put(node); return changed; } @@ -765,6 +777,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) { int i, ret, highest_node, change = 0; unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; struct o2hb_bio_wait_ctxt write_wc; ret = o2nm_configured_node_map(configured_nodes, @@ -774,6 +787,17 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg) return ret; } + /* + * If a node is not configured but is in the livemap, we still need + * to read the slot so as to be able to remove it from the livemap. + */ + o2hb_fill_node_map(live_node_bitmap, sizeof(live_node_bitmap)); + i = -1; + while ((i = find_next_bit(live_node_bitmap, + O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) { + set_bit(i, configured_nodes); + } + highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES); if (highest_node >= O2NM_MAX_NODES) { mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n"); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index cbe2f057cc28..9aa426e42123 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1696,6 +1696,9 @@ static void o2net_hb_node_down_cb(struct o2nm_node *node, int node_num, { o2quo_hb_down(node_num); + if (!node) + return; + if (node_num != o2nm_this_node()) o2net_disconnect_node(node); @@ -1709,6 +1712,8 @@ static void o2net_hb_node_up_cb(struct o2nm_node *node, int node_num, o2quo_hb_up(node_num); + BUG_ON(!node); + /* ensure an immediate connect attempt */ nn->nn_last_connect_attempt = jiffies - (msecs_to_jiffies(o2net_reconnect_delay()) + 1); -- cgit v1.2.1 From 8ca8b0bbd841b6bcd8ac05e51b0143aa61cfeff3 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 17:01:27 -0700 Subject: ocfs2/cluster: Reorganize o2hb debugfs init o2hb debugfs handling is reorganized to allow for easy expansion. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 101 +++++++++++++++++++++++++++++++++---------- 1 file changed, 78 insertions(+), 23 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index a8f10649674d..16e49765c853 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -62,8 +62,19 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; static LIST_HEAD(o2hb_node_events); static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); +#define O2HB_DB_TYPE_LIVENODES 0 +struct o2hb_debug_buf { + int db_type; + int db_size; + int db_len; + void *db_data; +}; + +static struct o2hb_debug_buf *o2hb_db_livenodes; + #define O2HB_DEBUG_DIR "o2hb" #define O2HB_DEBUG_LIVENODES "livenodes" + static struct dentry *o2hb_debug_dir; static struct dentry *o2hb_debug_livenodes; @@ -969,21 +980,35 @@ static int o2hb_thread(void *data) #ifdef CONFIG_DEBUG_FS static int o2hb_debug_open(struct inode *inode, struct file *file) { + struct o2hb_debug_buf *db = inode->i_private; unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; char *buf = NULL; int i = -1; int out = 0; + /* max_nodes should be the largest bitmap we pass here */ + BUG_ON(sizeof(map) < db->db_size); + buf = kmalloc(PAGE_SIZE, GFP_KERNEL); if (!buf) goto bail; - o2hb_fill_node_map(map, sizeof(map)); + switch (db->db_type) { + case O2HB_DB_TYPE_LIVENODES: + spin_lock(&o2hb_live_lock); + memcpy(map, db->db_data, db->db_size); + spin_unlock(&o2hb_live_lock); + break; + + default: + goto done; + } - while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) + while ((i = find_next_bit(map, db->db_len, i + 1)) < db->db_len) out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i); out += snprintf(buf + out, PAGE_SIZE - out, "\n"); +done: i_size_write(inode, out); file->private_data = buf; @@ -1030,10 +1055,56 @@ static const struct file_operations o2hb_debug_fops = { void o2hb_exit(void) { - if (o2hb_debug_livenodes) - debugfs_remove(o2hb_debug_livenodes); - if (o2hb_debug_dir) - debugfs_remove(o2hb_debug_dir); + kfree(o2hb_db_livenodes); + debugfs_remove(o2hb_debug_livenodes); + debugfs_remove(o2hb_debug_dir); +} + +static struct dentry *o2hb_debug_create(const char *name, struct dentry *dir, + struct o2hb_debug_buf **db, int db_len, + int type, int size, int len, void *data) +{ + *db = kmalloc(db_len, GFP_KERNEL); + if (!*db) + return NULL; + + (*db)->db_type = type; + (*db)->db_size = size; + (*db)->db_len = len; + (*db)->db_data = data; + + return debugfs_create_file(name, S_IFREG|S_IRUSR, dir, *db, + &o2hb_debug_fops); +} + +static int o2hb_debug_init(void) +{ + int ret = -ENOMEM; + + o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); + if (!o2hb_debug_dir) { + mlog_errno(ret); + goto bail; + } + + o2hb_debug_livenodes = o2hb_debug_create(O2HB_DEBUG_LIVENODES, + o2hb_debug_dir, + &o2hb_db_livenodes, + sizeof(*o2hb_db_livenodes), + O2HB_DB_TYPE_LIVENODES, + sizeof(o2hb_live_node_bitmap), + O2NM_MAX_NODES, + o2hb_live_node_bitmap); + if (!o2hb_debug_livenodes) { + mlog_errno(ret); + goto bail; + } + ret = 0; +bail: + if (ret) + o2hb_exit(); + + return ret; } int o2hb_init(void) @@ -1050,23 +1121,7 @@ int o2hb_init(void) memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); - o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL); - if (!o2hb_debug_dir) { - mlog_errno(-ENOMEM); - return -ENOMEM; - } - - o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES, - S_IFREG|S_IRUSR, - o2hb_debug_dir, NULL, - &o2hb_debug_fops); - if (!o2hb_debug_livenodes) { - mlog_errno(-ENOMEM); - debugfs_remove(o2hb_debug_dir); - return -ENOMEM; - } - - return 0; + return o2hb_debug_init(); } /* if we're already in a callback then we're already serialized by the sem */ -- cgit v1.2.1 From 823a637ae933fde8fdb280612dd3ff9912e301e3 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 17:55:21 -0700 Subject: ocfs2/cluster: Maintain live node bitmap per heartbeat region Currently we track a global livenode bitmap that keeps track of all nodes that are heartbeating in all regions. This patch adds the ability to track the livenode bitmap on a per region basis. We will use this facility in a later patch to allow us to withstand the loss of a minority number of regions. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 16e49765c853..188f50269b89 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -174,6 +174,9 @@ struct o2hb_region { struct block_device *hr_bdev; struct o2hb_disk_slot *hr_slots; + /* live node map of this region */ + unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; + /* let the person setting up hb wait for it to return until it * has reached a 'steady' state. This will be fixed when we have * a more complete api that doesn't lead to this sort of fragility. */ @@ -688,6 +691,8 @@ fire_callbacks: mlog(ML_HEARTBEAT, "Node %d (id 0x%llx) joined my region\n", slot->ds_node_num, (long long)slot->ds_last_generation); + set_bit(slot->ds_node_num, reg->hr_live_node_bitmap); + /* first on the list generates a callback */ if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { set_bit(slot->ds_node_num, o2hb_live_node_bitmap); @@ -733,6 +738,8 @@ fire_callbacks: mlog(ML_HEARTBEAT, "Node %d left my region\n", slot->ds_node_num); + clear_bit(slot->ds_node_num, reg->hr_live_node_bitmap); + /* last off the live_slot generates a callback */ list_del_init(&slot->ds_live_item); if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { -- cgit v1.2.1 From 536f0741f324f116d8b059295999945a2dac56bc Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 17:03:07 -0700 Subject: ocfs2/cluster: Track number of global heartbeat regions In global heartbeat mode, we have a upper limit for the number of active regions. This patch adds the facility to track the number of active global heartbeat regions and fails to start heartbeat if the number exceeds the maximum. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 188f50269b89..d66b17c000d4 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -62,6 +62,12 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; static LIST_HEAD(o2hb_node_events); static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); +/* + * In global heartbeat, we maintain a series of region bitmaps. + * - o2hb_region_bitmap allows us to limit the region number to max region. + */ +static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; + #define O2HB_DB_TYPE_LIVENODES 0 struct o2hb_debug_buf { int db_type; @@ -176,6 +182,7 @@ struct o2hb_region { /* live node map of this region */ unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; + unsigned int hr_region_num; /* let the person setting up hb wait for it to return until it * has reached a 'steady' state. This will be fixed when we have @@ -1127,6 +1134,7 @@ int o2hb_init(void) INIT_LIST_HEAD(&o2hb_node_events); memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); + memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); return o2hb_debug_init(); } @@ -1716,12 +1724,22 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) return ERR_PTR(-ENAMETOOLONG); - config_item_init_type_name(®->hr_item, name, &o2hb_region_type); - spin_lock(&o2hb_live_lock); + reg->hr_region_num = 0; + if (o2hb_global_heartbeat_active()) { + reg->hr_region_num = find_first_zero_bit(o2hb_region_bitmap, + O2NM_MAX_REGIONS); + if (reg->hr_region_num >= O2NM_MAX_REGIONS) { + spin_unlock(&o2hb_live_lock); + return ERR_PTR(-EFBIG); + } + set_bit(reg->hr_region_num, o2hb_region_bitmap); + } list_add_tail(®->hr_all_item, &o2hb_all_regions); spin_unlock(&o2hb_live_lock); + config_item_init_type_name(®->hr_item, name, &o2hb_region_type); + return ®->hr_item; } @@ -1733,6 +1751,8 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, /* stop the thread when the user removes the region dir */ spin_lock(&o2hb_live_lock); + if (o2hb_global_heartbeat_active()) + clear_bit(reg->hr_region_num, o2hb_region_bitmap); hb_task = reg->hr_task; reg->hr_task = NULL; spin_unlock(&o2hb_live_lock); -- cgit v1.2.1 From e7d656baf6607a0775f4ca85464a4ead306741e5 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 17:55:18 -0700 Subject: ocfs2/cluster: Track bitmap of live heartbeat regions A heartbeat region becomes live (or active) after a fixed number of (steady) iterations. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index d66b17c000d4..2a7cd17e96f0 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -65,8 +65,10 @@ static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); /* * In global heartbeat, we maintain a series of region bitmaps. * - o2hb_region_bitmap allows us to limit the region number to max region. + * - o2hb_live_region_bitmap tracks live regions (seen steady iterations). */ static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; +static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; #define O2HB_DB_TYPE_LIVENODES 0 struct o2hb_debug_buf { @@ -1135,6 +1137,7 @@ int o2hb_init(void) memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); + memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); return o2hb_debug_init(); } @@ -1563,6 +1566,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg, /* Ok, we were woken. Make sure it wasn't by drop_item() */ spin_lock(&o2hb_live_lock); hb_task = reg->hr_task; + if (o2hb_global_heartbeat_active()) + set_bit(reg->hr_region_num, o2hb_live_region_bitmap); spin_unlock(&o2hb_live_lock); if (hb_task) @@ -1751,8 +1756,10 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group, /* stop the thread when the user removes the region dir */ spin_lock(&o2hb_live_lock); - if (o2hb_global_heartbeat_active()) + if (o2hb_global_heartbeat_active()) { clear_bit(reg->hr_region_num, o2hb_region_bitmap); + clear_bit(reg->hr_region_num, o2hb_live_region_bitmap); + } hb_task = reg->hr_task; reg->hr_task = NULL; spin_unlock(&o2hb_live_lock); -- cgit v1.2.1 From 43182d2a799865872041b6e4d8387131e9462f56 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 17:55:16 -0700 Subject: ocfs2/cluster: Maintain bitmap of quorum regions o2hb allows online adding of regions. However, a newly added region is not used in quorum calculations unless it has been added on all nodes. This patch tracks a bitmap of such quorum regions. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 2a7cd17e96f0..62a8af271344 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -66,9 +66,12 @@ static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); * In global heartbeat, we maintain a series of region bitmaps. * - o2hb_region_bitmap allows us to limit the region number to max region. * - o2hb_live_region_bitmap tracks live regions (seen steady iterations). + * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes + * heartbeat on it. */ static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; +static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; #define O2HB_DB_TYPE_LIVENODES 0 struct o2hb_debug_buf { @@ -607,6 +610,35 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot) o2nm_node_put(node); } +static void o2hb_set_quorum_device(struct o2hb_region *reg, + struct o2hb_disk_slot *slot) +{ + assert_spin_locked(&o2hb_live_lock); + + if (!o2hb_global_heartbeat_active()) + return; + + if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) + return; + + /* + * A region can be added to the quorum only when it sees all + * live nodes heartbeat on it. In other words, the region has been + * added to all nodes. + */ + if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap, + sizeof(o2hb_live_node_bitmap))) + return; + + if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD) + return; + + printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n", + config_item_name(®->hr_item)); + + set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap); +} + static int o2hb_check_slot(struct o2hb_region *reg, struct o2hb_disk_slot *slot) { @@ -772,6 +804,8 @@ fire_callbacks: slot->ds_equal_samples = 0; } out: + o2hb_set_quorum_device(reg, slot); + spin_unlock(&o2hb_live_lock); o2hb_run_event_list(&event); @@ -1138,6 +1172,7 @@ int o2hb_init(void) memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap)); memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); + memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); return o2hb_debug_init(); } -- cgit v1.2.1 From b1c5ebfbe398b3360614a4788c02061cd153e60a Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Thu, 7 Oct 2010 17:05:52 -0700 Subject: ocfs2/cluster: Maintain bitmap of failed regions In global heartbeat mode, we track the bitmap of regions that have seen heartbeat timeouts. We fence if the number of such regions is greater than or equal to half the number of quorum regions. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 62a8af271344..f890656127fa 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -68,10 +68,12 @@ static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue); * - o2hb_live_region_bitmap tracks live regions (seen steady iterations). * - o2hb_quorum_region_bitmap tracks live regions that have seen all nodes * heartbeat on it. + * - o2hb_failed_region_bitmap tracks the regions that have seen io timeouts. */ static unsigned long o2hb_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; static unsigned long o2hb_live_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; +static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; #define O2HB_DB_TYPE_LIVENODES 0 struct o2hb_debug_buf { @@ -217,8 +219,19 @@ struct o2hb_bio_wait_ctxt { int wc_error; }; +static int o2hb_pop_count(void *map, int count) +{ + int i = -1, pop = 0; + + while ((i = find_next_bit(map, count, i + 1)) < count) + pop++; + return pop; +} + static void o2hb_write_timeout(struct work_struct *work) { + int failed, quorum; + unsigned long flags; struct o2hb_region *reg = container_of(work, struct o2hb_region, hr_write_timeout_work.work); @@ -226,6 +239,28 @@ static void o2hb_write_timeout(struct work_struct *work) mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u " "milliseconds\n", reg->hr_dev_name, jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); + + if (o2hb_global_heartbeat_active()) { + spin_lock_irqsave(&o2hb_live_lock, flags); + if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap)) + set_bit(reg->hr_region_num, o2hb_failed_region_bitmap); + failed = o2hb_pop_count(&o2hb_failed_region_bitmap, + O2NM_MAX_REGIONS); + quorum = o2hb_pop_count(&o2hb_quorum_region_bitmap, + O2NM_MAX_REGIONS); + spin_unlock_irqrestore(&o2hb_live_lock, flags); + + mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n", + quorum, failed); + + /* + * Fence if the number of failed regions >= half the number + * of quorum regions + */ + if ((failed << 1) < quorum) + return; + } + o2quo_disk_timeout(); } @@ -234,6 +269,11 @@ static void o2hb_arm_write_timeout(struct o2hb_region *reg) mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS); + if (o2hb_global_heartbeat_active()) { + spin_lock(&o2hb_live_lock); + clear_bit(reg->hr_region_num, o2hb_failed_region_bitmap); + spin_unlock(&o2hb_live_lock); + } cancel_delayed_work(®->hr_write_timeout_work); reg->hr_last_timeout_start = jiffies; schedule_delayed_work(®->hr_write_timeout_work, @@ -1173,6 +1213,7 @@ int o2hb_init(void) memset(o2hb_region_bitmap, 0, sizeof(o2hb_region_bitmap)); memset(o2hb_live_region_bitmap, 0, sizeof(o2hb_live_region_bitmap)); memset(o2hb_quorum_region_bitmap, 0, sizeof(o2hb_quorum_region_bitmap)); + memset(o2hb_failed_region_bitmap, 0, sizeof(o2hb_failed_region_bitmap)); return o2hb_debug_init(); } -- cgit v1.2.1 From a6de013654b4839c8609e26241ebd9eb1ecc52e6 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 17:55:13 -0700 Subject: ocfs2/cluster: Create debugfs files for live, quorum and failed region bitmaps This patch prints the bitmaps of live, quorum and failed regions. This information will be useful in debugging cluster issues. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 63 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f890656127fa..b06b9e52fba8 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -76,6 +76,9 @@ static unsigned long o2hb_quorum_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; #define O2HB_DB_TYPE_LIVENODES 0 +#define O2HB_DB_TYPE_LIVEREGIONS 1 +#define O2HB_DB_TYPE_QUORUMREGIONS 2 +#define O2HB_DB_TYPE_FAILEDREGIONS 3 struct o2hb_debug_buf { int db_type; int db_size; @@ -84,12 +87,21 @@ struct o2hb_debug_buf { }; static struct o2hb_debug_buf *o2hb_db_livenodes; +static struct o2hb_debug_buf *o2hb_db_liveregions; +static struct o2hb_debug_buf *o2hb_db_quorumregions; +static struct o2hb_debug_buf *o2hb_db_failedregions; #define O2HB_DEBUG_DIR "o2hb" #define O2HB_DEBUG_LIVENODES "livenodes" +#define O2HB_DEBUG_LIVEREGIONS "live_regions" +#define O2HB_DEBUG_QUORUMREGIONS "quorum_regions" +#define O2HB_DEBUG_FAILEDREGIONS "failed_regions" static struct dentry *o2hb_debug_dir; static struct dentry *o2hb_debug_livenodes; +static struct dentry *o2hb_debug_liveregions; +static struct dentry *o2hb_debug_quorumregions; +static struct dentry *o2hb_debug_failedregions; static LIST_HEAD(o2hb_all_regions); @@ -1085,6 +1097,9 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) switch (db->db_type) { case O2HB_DB_TYPE_LIVENODES: + case O2HB_DB_TYPE_LIVEREGIONS: + case O2HB_DB_TYPE_QUORUMREGIONS: + case O2HB_DB_TYPE_FAILEDREGIONS: spin_lock(&o2hb_live_lock); memcpy(map, db->db_data, db->db_size); spin_unlock(&o2hb_live_lock); @@ -1146,6 +1161,12 @@ static const struct file_operations o2hb_debug_fops = { void o2hb_exit(void) { kfree(o2hb_db_livenodes); + kfree(o2hb_db_liveregions); + kfree(o2hb_db_quorumregions); + kfree(o2hb_db_failedregions); + debugfs_remove(o2hb_debug_failedregions); + debugfs_remove(o2hb_debug_quorumregions); + debugfs_remove(o2hb_debug_liveregions); debugfs_remove(o2hb_debug_livenodes); debugfs_remove(o2hb_debug_dir); } @@ -1189,6 +1210,48 @@ static int o2hb_debug_init(void) mlog_errno(ret); goto bail; } + + o2hb_debug_liveregions = o2hb_debug_create(O2HB_DEBUG_LIVEREGIONS, + o2hb_debug_dir, + &o2hb_db_liveregions, + sizeof(*o2hb_db_liveregions), + O2HB_DB_TYPE_LIVEREGIONS, + sizeof(o2hb_live_region_bitmap), + O2NM_MAX_REGIONS, + o2hb_live_region_bitmap); + if (!o2hb_debug_liveregions) { + mlog_errno(ret); + goto bail; + } + + o2hb_debug_quorumregions = + o2hb_debug_create(O2HB_DEBUG_QUORUMREGIONS, + o2hb_debug_dir, + &o2hb_db_quorumregions, + sizeof(*o2hb_db_quorumregions), + O2HB_DB_TYPE_QUORUMREGIONS, + sizeof(o2hb_quorum_region_bitmap), + O2NM_MAX_REGIONS, + o2hb_quorum_region_bitmap); + if (!o2hb_debug_quorumregions) { + mlog_errno(ret); + goto bail; + } + + o2hb_debug_failedregions = + o2hb_debug_create(O2HB_DEBUG_FAILEDREGIONS, + o2hb_debug_dir, + &o2hb_db_failedregions, + sizeof(*o2hb_db_failedregions), + O2HB_DB_TYPE_FAILEDREGIONS, + sizeof(o2hb_failed_region_bitmap), + O2NM_MAX_REGIONS, + o2hb_failed_region_bitmap); + if (!o2hb_debug_failedregions) { + mlog_errno(ret); + goto bail; + } + ret = 0; bail: if (ret) -- cgit v1.2.1 From 1f28530537f106f83e5cf7ef0193075667b6d520 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 17:55:12 -0700 Subject: ocfs2/cluster: Create debugfs dir/files for each region This patch creates debugfs directory for each o2hb region and creates files to expose the region number and the per region live node bitmap. This information will be useful in debugging cluster issues. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 77 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index b06b9e52fba8..f28de4b09c6b 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -79,6 +79,8 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; #define O2HB_DB_TYPE_LIVEREGIONS 1 #define O2HB_DB_TYPE_QUORUMREGIONS 2 #define O2HB_DB_TYPE_FAILEDREGIONS 3 +#define O2HB_DB_TYPE_REGION_LIVENODES 4 +#define O2HB_DB_TYPE_REGION_NUMBER 5 struct o2hb_debug_buf { int db_type; int db_size; @@ -96,6 +98,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions; #define O2HB_DEBUG_LIVEREGIONS "live_regions" #define O2HB_DEBUG_QUORUMREGIONS "quorum_regions" #define O2HB_DEBUG_FAILEDREGIONS "failed_regions" +#define O2HB_DEBUG_REGION_NUMBER "num" static struct dentry *o2hb_debug_dir; static struct dentry *o2hb_debug_livenodes; @@ -203,6 +206,12 @@ struct o2hb_region { unsigned long hr_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)]; unsigned int hr_region_num; + struct dentry *hr_debug_dir; + struct dentry *hr_debug_livenodes; + struct dentry *hr_debug_regnum; + struct o2hb_debug_buf *hr_db_livenodes; + struct o2hb_debug_buf *hr_db_regnum; + /* let the person setting up hb wait for it to return until it * has reached a 'steady' state. This will be fixed when we have * a more complete api that doesn't lead to this sort of fragility. */ @@ -1083,6 +1092,7 @@ static int o2hb_thread(void *data) static int o2hb_debug_open(struct inode *inode, struct file *file) { struct o2hb_debug_buf *db = inode->i_private; + struct o2hb_region *reg; unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)]; char *buf = NULL; int i = -1; @@ -1105,6 +1115,19 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) spin_unlock(&o2hb_live_lock); break; + case O2HB_DB_TYPE_REGION_LIVENODES: + spin_lock(&o2hb_live_lock); + reg = (struct o2hb_region *)db->db_data; + memcpy(map, reg->hr_live_node_bitmap, db->db_size); + spin_unlock(&o2hb_live_lock); + break; + + case O2HB_DB_TYPE_REGION_NUMBER: + reg = (struct o2hb_region *)db->db_data; + out += snprintf(buf + out, PAGE_SIZE - out, "%d\n", + reg->hr_region_num); + goto done; + default: goto done; } @@ -1342,6 +1365,12 @@ static void o2hb_region_release(struct config_item *item) if (reg->hr_slots) kfree(reg->hr_slots); + kfree(reg->hr_db_regnum); + kfree(reg->hr_db_livenodes); + debugfs_remove(reg->hr_debug_livenodes); + debugfs_remove(reg->hr_debug_regnum); + debugfs_remove(reg->hr_debug_dir); + spin_lock(&o2hb_live_lock); list_del(®->hr_all_item); spin_unlock(&o2hb_live_lock); @@ -1856,10 +1885,52 @@ static struct o2hb_heartbeat_group *to_o2hb_heartbeat_group(struct config_group : NULL; } +static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) +{ + int ret = -ENOMEM; + + reg->hr_debug_dir = + debugfs_create_dir(config_item_name(®->hr_item), dir); + if (!reg->hr_debug_dir) { + mlog_errno(ret); + goto bail; + } + + reg->hr_debug_livenodes = + o2hb_debug_create(O2HB_DEBUG_LIVENODES, + reg->hr_debug_dir, + &(reg->hr_db_livenodes), + sizeof(*(reg->hr_db_livenodes)), + O2HB_DB_TYPE_REGION_LIVENODES, + sizeof(reg->hr_live_node_bitmap), + O2NM_MAX_NODES, reg); + if (!reg->hr_debug_livenodes) { + mlog_errno(ret); + goto bail; + } + + reg->hr_debug_regnum = + o2hb_debug_create(O2HB_DEBUG_REGION_NUMBER, + reg->hr_debug_dir, + &(reg->hr_db_regnum), + sizeof(*(reg->hr_db_regnum)), + O2HB_DB_TYPE_REGION_NUMBER, + 0, O2NM_MAX_NODES, reg); + if (!reg->hr_debug_regnum) { + mlog_errno(ret); + goto bail; + } + + ret = 0; +bail: + return ret; +} + static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *group, const char *name) { struct o2hb_region *reg = NULL; + int ret; reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL); if (reg == NULL) @@ -1884,6 +1955,12 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g config_item_init_type_name(®->hr_item, name, &o2hb_region_type); + ret = o2hb_debug_region_init(reg, o2hb_debug_dir); + if (ret) { + config_item_put(®->hr_item); + return ERR_PTR(ret); + } + return ®->hr_item; } -- cgit v1.2.1 From d6aa1c7c9e4b48081c2302e14b0f857017461efd Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 18:50:50 -0700 Subject: ocfs2/cluster: Add mlogs for heartbeat up/down events This patch adds mlogs for o2hb up and down events. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f28de4b09c6b..e8676accf902 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -797,6 +797,8 @@ fire_callbacks: /* first on the list generates a callback */ if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { + mlog(ML_HEARTBEAT, "o2hb: Add node %d to live nodes " + "bitmap\n", slot->ds_node_num); set_bit(slot->ds_node_num, o2hb_live_node_bitmap); o2hb_queue_node_event(&event, O2HB_NODE_UP_CB, node, @@ -845,6 +847,8 @@ fire_callbacks: /* last off the live_slot generates a callback */ list_del_init(&slot->ds_live_item); if (list_empty(&o2hb_live_slots[slot->ds_node_num])) { + mlog(ML_HEARTBEAT, "o2hb: Remove node %d from live " + "nodes bitmap\n", slot->ds_node_num); clear_bit(slot->ds_node_num, o2hb_live_node_bitmap); /* node can be null */ -- cgit v1.2.1 From 43695d095dfaf266a8a940d9b07eed7f46076b49 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Wed, 6 Oct 2010 17:55:09 -0700 Subject: ocfs2/cluster: Show per region heartbeat elapsed time This patch adds a per region debugfs file that shows the elapsed time since the time the o2hb timer was last armed. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index e8676accf902..29aee2128edb 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -81,6 +81,7 @@ static unsigned long o2hb_failed_region_bitmap[BITS_TO_LONGS(O2NM_MAX_REGIONS)]; #define O2HB_DB_TYPE_FAILEDREGIONS 3 #define O2HB_DB_TYPE_REGION_LIVENODES 4 #define O2HB_DB_TYPE_REGION_NUMBER 5 +#define O2HB_DB_TYPE_REGION_ELAPSED_TIME 6 struct o2hb_debug_buf { int db_type; int db_size; @@ -99,6 +100,7 @@ static struct o2hb_debug_buf *o2hb_db_failedregions; #define O2HB_DEBUG_QUORUMREGIONS "quorum_regions" #define O2HB_DEBUG_FAILEDREGIONS "failed_regions" #define O2HB_DEBUG_REGION_NUMBER "num" +#define O2HB_DEBUG_REGION_ELAPSED_TIME "elapsed_time_in_ms" static struct dentry *o2hb_debug_dir; static struct dentry *o2hb_debug_livenodes; @@ -209,8 +211,10 @@ struct o2hb_region { struct dentry *hr_debug_dir; struct dentry *hr_debug_livenodes; struct dentry *hr_debug_regnum; + struct dentry *hr_debug_elapsed_time; struct o2hb_debug_buf *hr_db_livenodes; struct o2hb_debug_buf *hr_db_regnum; + struct o2hb_debug_buf *hr_db_elapsed_time; /* let the person setting up hb wait for it to return until it * has reached a 'steady' state. This will be fixed when we have @@ -1132,6 +1136,13 @@ static int o2hb_debug_open(struct inode *inode, struct file *file) reg->hr_region_num); goto done; + case O2HB_DB_TYPE_REGION_ELAPSED_TIME: + reg = (struct o2hb_region *)db->db_data; + out += snprintf(buf + out, PAGE_SIZE - out, "%u\n", + jiffies_to_msecs(jiffies - + reg->hr_last_timeout_start)); + goto done; + default: goto done; } @@ -1925,6 +1936,18 @@ static int o2hb_debug_region_init(struct o2hb_region *reg, struct dentry *dir) goto bail; } + reg->hr_debug_elapsed_time = + o2hb_debug_create(O2HB_DEBUG_REGION_ELAPSED_TIME, + reg->hr_debug_dir, + &(reg->hr_db_elapsed_time), + sizeof(*(reg->hr_db_elapsed_time)), + O2HB_DB_TYPE_REGION_ELAPSED_TIME, + 0, 0, reg); + if (!reg->hr_debug_elapsed_time) { + mlog_errno(ret); + goto bail; + } + ret = 0; bail: return ret; -- cgit v1.2.1 From 4d94aa1b1d437f9513ddc89974d8bd214b8304f6 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Sat, 9 Oct 2010 10:27:04 -0700 Subject: ocfs2/cluster: Bump up dlm protocol to version 1.1 dlm protocol 1.1. activates messages DLM_QUERY_REGION and DLM_QUERY_NODEINFO that are a must for global heartbeat. It also activates o2hb_global_heartbeat_active(). Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 2 +- fs/ocfs2/dlm/dlmdomain.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 29aee2128edb..6a1280a013ea 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -2429,6 +2429,6 @@ EXPORT_SYMBOL_GPL(o2hb_get_all_regions); int o2hb_global_heartbeat_active(void) { - return 0; + return (o2hb_heartbeat_mode == O2HB_HEARTBEAT_GLOBAL); } EXPORT_SYMBOL(o2hb_global_heartbeat_active); diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 78d428f5e10e..58a93b953735 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -135,7 +135,7 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events); */ static const struct dlm_protocol_version dlm_protocol = { .pv_major = 1, - .pv_minor = 0, + .pv_minor = 1, }; #define DLM_DOMAIN_BACKOFF_MS 200 -- cgit v1.2.1 From d4396eafe402b710a8535137b3bf2abe6c059a15 Mon Sep 17 00:00:00 2001 From: Sunil Mushran Date: Fri, 15 Oct 2010 11:57:21 -0700 Subject: ocfs2/cluster: Release debugfs file elapsed_time_in_ms An earlier commit forgot to remove a debugfs file, elapsed_time_in_ms. Signed-off-by: Sunil Mushran --- fs/ocfs2/cluster/heartbeat.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ocfs2') diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index 6a1280a013ea..52c7557f3e25 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1384,6 +1384,7 @@ static void o2hb_region_release(struct config_item *item) kfree(reg->hr_db_livenodes); debugfs_remove(reg->hr_debug_livenodes); debugfs_remove(reg->hr_debug_regnum); + debugfs_remove(reg->hr_debug_elapsed_time); debugfs_remove(reg->hr_debug_dir); spin_lock(&o2hb_live_lock); -- cgit v1.2.1