15 files changed, 964 insertions, 179 deletions
diff --git a/fs/dlm/Makefile b/fs/dlm/Makefile
index 604cf7dc5f39..d248e60951ba 100644
--- a/fs/dlm/Makefile
+++ b/fs/dlm/Makefile
@@ -8,6 +8,7 @@ dlm-y :=			ast.o \
 				member.o \
 				memory.o \
 				midcomms.o \
+				netlink.o \
 				lowcomms.o \
 				rcom.o \
 				recover.o \
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 822abdcd1434..5069b2cb5a1f 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -90,6 +90,7 @@ struct cluster {
 	unsigned int cl_scan_secs;
 	unsigned int cl_log_debug;
 	unsigned int cl_protocol;
+	unsigned int cl_timewarn_cs;
 };
 
 enum {
@@ -103,6 +104,7 @@ enum {
 	CLUSTER_ATTR_SCAN_SECS,
 	CLUSTER_ATTR_LOG_DEBUG,
 	CLUSTER_ATTR_PROTOCOL,
+	CLUSTER_ATTR_TIMEWARN_CS,
 };
 
 struct cluster_attribute {
@@ -162,6 +164,7 @@ CLUSTER_ATTR(toss_secs, 1);
 CLUSTER_ATTR(scan_secs, 1);
 CLUSTER_ATTR(log_debug, 0);
 CLUSTER_ATTR(protocol, 0);
+CLUSTER_ATTR(timewarn_cs, 1);
 
 static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_TCP_PORT] = &cluster_attr_tcp_port.attr,
@@ -174,6 +177,7 @@ static struct configfs_attribute *cluster_attrs[] = {
 	[CLUSTER_ATTR_SCAN_SECS] = &cluster_attr_scan_secs.attr,
 	[CLUSTER_ATTR_LOG_DEBUG] = &cluster_attr_log_debug.attr,
 	[CLUSTER_ATTR_PROTOCOL] = &cluster_attr_protocol.attr,
+	[CLUSTER_ATTR_TIMEWARN_CS] = &cluster_attr_timewarn_cs.attr,
 	NULL,
 };
 
@@ -429,6 +433,8 @@ static struct config_group *make_cluster(struct config_group *g,
 	cl->cl_toss_secs = dlm_config.ci_toss_secs;
 	cl->cl_scan_secs = dlm_config.ci_scan_secs;
 	cl->cl_log_debug = dlm_config.ci_log_debug;
+	cl->cl_protocol = dlm_config.ci_protocol;
+	cl->cl_timewarn_cs = dlm_config.ci_timewarn_cs;
 
 	space_list = &sps->ss_group;
 	comm_list = &cms->cs_group;
@@ -748,9 +754,16 @@ static ssize_t node_weight_write(struct node *nd, const char *buf, size_t len)
 
 static struct space *get_space(char *name)
 {
+	struct config_item *i;
+
 	if (!space_list)
 		return NULL;
-	return to_space(config_group_find_obj(space_list, name));
+
+	down(&space_list->cg_subsys->su_sem);
+	i = config_group_find_obj(space_list, name);
+	up(&space_list->cg_subsys->su_sem);
+
+	return to_space(i);
 }
 
 static void put_space(struct space *sp)
@@ -776,20 +789,20 @@ static struct comm *get_comm(int nodeid, struct sockaddr_storage *addr)
 			if (cm->nodeid != nodeid)
 				continue;
 			found = 1;
+			config_item_get(i);
 			break;
 		} else {
 			if (!cm->addr_count ||
 			    memcmp(cm->addr[0], addr, sizeof(*addr)))
 				continue;
 			found = 1;
+			config_item_get(i);
 			break;
 		}
 	}
 	up(&clusters_root.subsys.su_sem);
 
-	if (found)
-		config_item_get(i);
-	else
+	if (!found)
 		cm = NULL;
 	return cm;
 }
@@ -909,6 +922,7 @@ int dlm_our_addr(struct sockaddr_storage *addr, int num)
 #define DEFAULT_SCAN_SECS          5
 #define DEFAULT_LOG_DEBUG          0
 #define DEFAULT_PROTOCOL           0
+#define DEFAULT_TIMEWARN_CS      500 /* 5 sec = 500 centiseconds */
 
 struct dlm_config_info dlm_config = {
 	.ci_tcp_port = DEFAULT_TCP_PORT,
@@ -920,6 +934,7 @@ struct dlm_config_info dlm_config = {
 	.ci_toss_secs = DEFAULT_TOSS_SECS,
 	.ci_scan_secs = DEFAULT_SCAN_SECS,
 	.ci_log_debug = DEFAULT_LOG_DEBUG,
-	.ci_protocol = DEFAULT_PROTOCOL
+	.ci_protocol = DEFAULT_PROTOCOL,
+	.ci_timewarn_cs = DEFAULT_TIMEWARN_CS
 };
 
diff --git a/fs/dlm/config.h b/fs/dlm/config.h
index 967cc3d72e5e..a3170fe22090 100644
--- a/fs/dlm/config.h
+++ b/fs/dlm/config.h
@@ -27,6 +27,7 @@ struct dlm_config_info {
 	int ci_scan_secs;
 	int ci_log_debug;
 	int ci_protocol;
+	int ci_timewarn_cs;
 };
 
 extern struct dlm_config_info dlm_config;
diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
index 61ba670b9e02..12c3bfd5e660 100644
--- a/fs/dlm/debug_fs.c
+++ b/fs/dlm/debug_fs.c
@@ -17,6 +17,7 @@
 #include <linux/debugfs.h>
 
 #include "dlm_internal.h"
+#include "lock.h"
 
 #define DLM_DEBUG_BUF_LEN 4096
 static char debug_buf[DLM_DEBUG_BUF_LEN];
@@ -26,6 +27,8 @@ static struct dentry *dlm_root;
 
 struct rsb_iter {
 	int entry;
+	int locks;
+	int header;
 	struct dlm_ls *ls;
 	struct list_head *next;
 	struct dlm_rsb *rsb;
@@ -57,8 +60,8 @@ static char *print_lockmode(int mode)
 	}
 }
 
-static void print_lock(struct seq_file *s, struct dlm_lkb *lkb,
-		       struct dlm_rsb *res)
+static void print_resource_lock(struct seq_file *s, struct dlm_lkb *lkb,
+				struct dlm_rsb *res)
 {
 	seq_printf(s, "%08x %s", lkb->lkb_id, print_lockmode(lkb->lkb_grmode));
 
@@ -85,6 +88,8 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 	struct dlm_lkb *lkb;
 	int i, lvblen = res->res_ls->ls_lvblen, recover_list, root_list;
 
+	lock_rsb(res);
+
 	seq_printf(s, "\nResource %p Name (len=%d) \"", res, res->res_length);
 	for (i = 0; i < res->res_length; i++) {
 		if (isprint(res->res_name[i]))
@@ -129,15 +134,15 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 	/* Print the locks attached to this resource */
 	seq_printf(s, "Granted Queue\n");
 	list_for_each_entry(lkb, &res->res_grantqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);
 
 	seq_printf(s, "Conversion Queue\n");
 	list_for_each_entry(lkb, &res->res_convertqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);
 
 	seq_printf(s, "Waiting Queue\n");
 	list_for_each_entry(lkb, &res->res_waitqueue, lkb_statequeue)
-		print_lock(s, lkb, res);
+		print_resource_lock(s, lkb, res);
 
 	if (list_empty(&res->res_lookup))
 		goto out;
@@ -151,6 +156,61 @@ static int print_resource(struct dlm_rsb *res, struct seq_file *s)
 		seq_printf(s, "\n");
 	}
  out:
+	unlock_rsb(res);
+	return 0;
+}
+
+static void print_lock(struct seq_file *s, struct dlm_lkb *lkb, struct dlm_rsb *r)
+{
+	struct dlm_user_args *ua;
+	unsigned int waiting = 0;
+	uint64_t xid = 0;
+
+	if (lkb->lkb_flags & DLM_IFL_USER) {
+		ua = (struct dlm_user_args *) lkb->lkb_astparam;
+		if (ua)
+			xid = ua->xid;
+	}
+
+	if (lkb->lkb_timestamp)
+		waiting = jiffies_to_msecs(jiffies - lkb->lkb_timestamp);
+
+	/* id nodeid remid pid xid exflags flags sts grmode rqmode time_ms
+	   r_nodeid r_len r_name */
+
+	seq_printf(s, "%x %d %x %u %llu %x %x %d %d %d %u %u %d \"%s\"\n",
+		   lkb->lkb_id,
+		   lkb->lkb_nodeid,
+		   lkb->lkb_remid,
+		   lkb->lkb_ownpid,
+		   (unsigned long long)xid,
+		   lkb->lkb_exflags,
+		   lkb->lkb_flags,
+		   lkb->lkb_status,
+		   lkb->lkb_grmode,
+		   lkb->lkb_rqmode,
+		   waiting,
+		   r->res_nodeid,
+		   r->res_length,
+		   r->res_name);
+}
+
+static int print_locks(struct dlm_rsb *r, struct seq_file *s)
+{
+	struct dlm_lkb *lkb;
+
+	lock_rsb(r);
+
+	list_for_each_entry(lkb, &r->res_grantqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	list_for_each_entry(lkb, &r->res_convertqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	list_for_each_entry(lkb, &r->res_waitqueue, lkb_statequeue)
+		print_lock(s, lkb, r);
+
+	unlock_rsb(r);
 	return 0;
 }
 
@@ -166,6 +226,9 @@ static int rsb_iter_next(struct rsb_iter *ri)
 			read_lock(&ls->ls_rsbtbl[i].lock);
 			if (!list_empty(&ls->ls_rsbtbl[i].list)) {
 				ri->next = ls->ls_rsbtbl[i].list.next;
+				ri->rsb = list_entry(ri->next, struct dlm_rsb,
+							res_hashchain);
+				dlm_hold_rsb(ri->rsb);
 				read_unlock(&ls->ls_rsbtbl[i].lock);
 				break;
 			}
@@ -176,6 +239,7 @@ static int rsb_iter_next(struct rsb_iter *ri)
 		if (ri->entry >= ls->ls_rsbtbl_size)
 			return 1;
 	} else {
+		struct dlm_rsb *old = ri->rsb;
 		i = ri->entry;
 		read_lock(&ls->ls_rsbtbl[i].lock);
 		ri->next = ri->next->next;
@@ -184,11 +248,14 @@ static int rsb_iter_next(struct rsb_iter *ri)
 			ri->next = NULL;
 			ri->entry++;
 			read_unlock(&ls->ls_rsbtbl[i].lock);
+			dlm_put_rsb(old);
 			goto top;
                 }
+		ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
+		dlm_hold_rsb(ri->rsb);
 		read_unlock(&ls->ls_rsbtbl[i].lock);
+		dlm_put_rsb(old);
 	}
-	ri->rsb = list_entry(ri->next, struct dlm_rsb, res_hashchain);
 
 	return 0;
 }
@@ -202,7 +269,7 @@ static struct rsb_iter *rsb_iter_init(struct dlm_ls *ls)
 {
 	struct rsb_iter *ri;
 
-	ri = kmalloc(sizeof *ri, GFP_KERNEL);
+	ri = kzalloc(sizeof *ri, GFP_KERNEL);
 	if (!ri)
 		return NULL;
 
@@ -260,7 +327,17 @@ static int rsb_seq_show(struct seq_file *file, void *iter_ptr)
 {
 	struct rsb_iter *ri = iter_ptr;
 
-	print_resource(ri->rsb, file);
+	if (ri->locks) {
+		if (ri->header) {
+			seq_printf(file, "id nodeid remid pid xid exflags flags "
+					 "sts grmode rqmode time_ms r_nodeid "
+					 "r_len r_name\n");
+			ri->header = 0;
+		}
+		print_locks(ri->rsb, file);
+	} else {
+		print_resource(ri->rsb, file);
+	}
 
 	return 0;
 }
@@ -296,6 +373,83 @@ static const struct file_operations rsb_fops = {
 };
 
 /*
+ * Dump state in compact per-lock listing
+ */
+
+static struct rsb_iter *locks_iter_init(struct dlm_ls *ls, loff_t *pos)
+{
+	struct rsb_iter *ri;
+
+	ri = kzalloc(sizeof *ri, GFP_KERNEL);
+	if (!ri)
+		return NULL;
+
+	ri->ls = ls;
+	ri->entry = 0;
+	ri->next = NULL;
+	ri->locks = 1;
+
+	if (*pos == 0)
+		ri->header = 1;
+
+	if (rsb_iter_next(ri)) {
+		rsb_iter_free(ri);
+		return NULL;
+	}
+
+	return ri;
+}
+
+static void *locks_seq_start(struct seq_file *file, loff_t *pos)
+{
+	struct rsb_iter *ri;
+	loff_t n = *pos;
+
+	ri = locks_iter_init(file->private, pos);
+	if (!ri)
+		return NULL;
+
+	while (n--) {
+		if (rsb_iter_next(ri)) {
+			rsb_iter_free(ri);
+			return NULL;
+		}
+	}
+
+	return ri;
+}
+
+static struct seq_operations locks_seq_ops = {
+	.start = locks_seq_start,
+	.next  = rsb_seq_next,
+	.stop  = rsb_seq_stop,
+	.show  = rsb_seq_show,
+};
+
+static int locks_open(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq;
+	int ret;
+
+	ret = seq_open(file, &locks_seq_ops);
+	if (ret)
+		return ret;
+
+	seq = file->private_data;
+	seq->private = inode->i_private;
+
+	return 0;
+}
+
+static const struct file_operations locks_fops = {
+	.owner   = THIS_MODULE,
+	.open    = locks_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release
+};
+
+/*
  * dump lkb's on the ls_waiters list
  */
 
@@ -362,6 +516,20 @@ int dlm_create_debug_file(struct dlm_ls *ls)
 		return -ENOMEM;
 	}
 
+	memset(name, 0, sizeof(name));
+	snprintf(name, DLM_LOCKSPACE_LEN+8, "%s_locks", ls->ls_name);
+
+	ls->ls_debug_locks_dentry = debugfs_create_file(name,
+							S_IFREG | S_IRUGO,
+							dlm_root,
+							ls,
+							&locks_fops);
+	if (!ls->ls_debug_locks_dentry) {
+		debugfs_remove(ls->ls_debug_waiters_dentry);
+		debugfs_remove(ls->ls_debug_rsb_dentry);
+		return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -371,6 +539,8 @@ void dlm_delete_debug_file(struct dlm_ls *ls)
 		debugfs_remove(ls->ls_debug_rsb_dentry);
 	if (ls->ls_debug_waiters_dentry)
 		debugfs_remove(ls->ls_debug_waiters_dentry);
+	if (ls->ls_debug_locks_dentry)
+		debugfs_remove(ls->ls_debug_locks_dentry);
 }
 
 int dlm_register_debugfs(void)
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
index 30994d68f6a0..74901e981e10 100644
--- a/fs/dlm/dlm_internal.h
+++ b/fs/dlm/dlm_internal.h
@@ -151,6 +151,7 @@ struct dlm_args {
 	void			*bastaddr;
 	int			mode;
 	struct dlm_lksb		*lksb;
+	unsigned long		timeout;
 };
 
 
@@ -213,6 +214,9 @@ struct dlm_args {
 #define DLM_IFL_OVERLAP_UNLOCK  0x00080000
 #define DLM_IFL_OVERLAP_CANCEL  0x00100000
 #define DLM_IFL_ENDOFLIFE	0x00200000
+#define DLM_IFL_WATCH_TIMEWARN	0x00400000
+#define DLM_IFL_TIMEOUT_CANCEL	0x00800000
+#define DLM_IFL_DEADLOCK_CANCEL	0x01000000
 #define DLM_IFL_USER		0x00000001
 #define DLM_IFL_ORPHAN		0x00000002
 
@@ -243,6 +247,9 @@ struct dlm_lkb {
 	struct list_head	lkb_wait_reply;	/* waiting for remote reply */
 	struct list_head	lkb_astqueue;	/* need ast to be sent */
 	struct list_head	lkb_ownqueue;	/* list of locks for a process */
+	struct list_head	lkb_time_list;
+	unsigned long		lkb_timestamp;
+	unsigned long		lkb_timeout_cs;
 
 	char			*lkb_lvbptr;
 	struct dlm_lksb		*lkb_lksb;      /* caller's status block */
@@ -447,12 +454,16 @@ struct dlm_ls {
 	struct mutex		ls_orphans_mutex;
 	struct list_head	ls_orphans;
 
+	struct mutex		ls_timeout_mutex;
+	struct list_head	ls_timeout;
+
 	struct list_head	ls_nodes;	/* current nodes in ls */
 	struct list_head	ls_nodes_gone;	/* dead node list, recovery */
 	int			ls_num_nodes;	/* number of nodes in ls */
 	int			ls_low_nodeid;
 	int			ls_total_weight;
 	int			*ls_node_array;
+	gfp_t			ls_allocation;
 
 	struct dlm_rsb		ls_stub_rsb;	/* for returning errors */
 	struct dlm_lkb		ls_stub_lkb;	/* for returning errors */
@@ -460,9 +471,12 @@ struct dlm_ls {
 
 	struct dentry		*ls_debug_rsb_dentry; /* debugfs */
 	struct dentry		*ls_debug_waiters_dentry; /* debugfs */
+	struct dentry		*ls_debug_locks_dentry; /* debugfs */
 
 	wait_queue_head_t	ls_uevent_wait;	/* user part of join/leave */
 	int			ls_uevent_result;
+	struct completion	ls_members_done;
+	int			ls_members_result;
 
 	struct miscdevice       ls_device;
 
@@ -472,6 +486,7 @@ struct dlm_ls {
 	struct task_struct	*ls_recoverd_task;
 	struct mutex		ls_recoverd_active;
 	spinlock_t		ls_recover_lock;
+	unsigned long		ls_recover_begin; /* jiffies timestamp */
 	uint32_t		ls_recover_status; /* DLM_RS_ */
 	uint64_t		ls_recover_seq;
 	struct dlm_recover	*ls_recover_args;
@@ -501,6 +516,7 @@ struct dlm_ls {
 #define LSFL_RCOM_READY		3
 #define LSFL_RCOM_WAIT		4
 #define LSFL_UEVENT_WAIT	5
+#define LSFL_TIMEWARN		6
 
 /* much of this is just saving user space pointers associated with the
    lock that we pass back to the user lib with an ast */
@@ -518,6 +534,7 @@ struct dlm_user_args {
 	void __user		*castaddr;
 	void __user		*bastparam;
 	void __user		*bastaddr;
+	uint64_t		xid;
 };
 
 #define DLM_PROC_FLAGS_CLOSING 1
diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
index d8d6e729f96b..b455919c1998 100644
--- a/fs/dlm/lock.c
+++ b/fs/dlm/lock.c
@@ -82,10 +82,13 @@ static int send_bast(struct dlm_rsb *r, struct dlm_lkb *lkb, int mode);
 static int send_lookup(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static int send_remove(struct dlm_rsb *r);
 static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
+static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb);
 static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 				    struct dlm_message *ms);
 static int receive_extralen(struct dlm_message *ms);
 static void do_purge(struct dlm_ls *ls, int nodeid, int pid);
+static void del_timeout(struct dlm_lkb *lkb);
+void dlm_timeout_warn(struct dlm_lkb *lkb);
 
 /*
  * Lock compatibilty matrix - thanks Steve
@@ -194,17 +197,17 @@ void dlm_dump_rsb(struct dlm_rsb *r)
 
 /* Threads cannot use the lockspace while it's being recovered */
 
-static inline void lock_recovery(struct dlm_ls *ls)
+static inline void dlm_lock_recovery(struct dlm_ls *ls)
 {
 	down_read(&ls->ls_in_recovery);
 }
 
-static inline void unlock_recovery(struct dlm_ls *ls)
+void dlm_unlock_recovery(struct dlm_ls *ls)
 {
 	up_read(&ls->ls_in_recovery);
 }
 
-static inline int lock_recovery_try(struct dlm_ls *ls)
+int dlm_lock_recovery_try(struct dlm_ls *ls)
 {
 	return down_read_trylock(&ls->ls_in_recovery);
 }
@@ -286,8 +289,22 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
 	if (is_master_copy(lkb))
 		return;
 
+	del_timeout(lkb);
+
 	DLM_ASSERT(lkb->lkb_lksb, dlm_print_lkb(lkb););
 
+	/* if the operation was a cancel, then return -DLM_ECANCEL, if a
+	   timeout caused the cancel then return -ETIMEDOUT */
+	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_TIMEOUT_CANCEL)) {
+		lkb->lkb_flags &= ~DLM_IFL_TIMEOUT_CANCEL;
+		rv = -ETIMEDOUT;
+	}
+
+	if (rv == -DLM_ECANCEL && (lkb->lkb_flags & DLM_IFL_DEADLOCK_CANCEL)) {
+		lkb->lkb_flags &= ~DLM_IFL_DEADLOCK_CANCEL;
+		rv = -EDEADLK;
+	}
+
 	lkb->lkb_lksb->sb_status = rv;
 	lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
 
@@ -581,6 +598,7 @@ static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret)
 	kref_init(&lkb->lkb_ref);
 	INIT_LIST_HEAD(&lkb->lkb_ownqueue);
 	INIT_LIST_HEAD(&lkb->lkb_rsb_lookup);
+	INIT_LIST_HEAD(&lkb->lkb_time_list);
 
 	get_random_bytes(&bucket, sizeof(bucket));
 	bucket &= (ls->ls_lkbtbl_size - 1);
@@ -985,15 +1003,136 @@ void dlm_scan_rsbs(struct dlm_ls *ls)
 {
 	int i;
 
-	if (dlm_locking_stopped(ls))
-		return;
-
 	for (i = 0; i < ls->ls_rsbtbl_size; i++) {
 		shrink_bucket(ls, i);
+		if (dlm_locking_stopped(ls))
+			break;
 		cond_resched();
 	}
 }
 
+static void add_timeout(struct dlm_lkb *lkb)
+{
+	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+	if (is_master_copy(lkb)) {
+		lkb->lkb_timestamp = jiffies;
+		return;
+	}
+
+	if (test_bit(LSFL_TIMEWARN, &ls->ls_flags) &&
+	    !(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+		lkb->lkb_flags |= DLM_IFL_WATCH_TIMEWARN;
+		goto add_it;
+	}
+	if (lkb->lkb_exflags & DLM_LKF_TIMEOUT)
+		goto add_it;
+	return;
+
+ add_it:
+	DLM_ASSERT(list_empty(&lkb->lkb_time_list), dlm_print_lkb(lkb););
+	mutex_lock(&ls->ls_timeout_mutex);
+	hold_lkb(lkb);
+	lkb->lkb_timestamp = jiffies;
+	list_add_tail(&lkb->lkb_time_list, &ls->ls_timeout);
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+static void del_timeout(struct dlm_lkb *lkb)
+{
+	struct dlm_ls *ls = lkb->lkb_resource->res_ls;
+
+	mutex_lock(&ls->ls_timeout_mutex);
+	if (!list_empty(&lkb->lkb_time_list)) {
+		list_del_init(&lkb->lkb_time_list);
+		unhold_lkb(lkb);
+	}
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
+/* FIXME: is it safe to look at lkb_exflags, lkb_flags, lkb_timestamp, and
+   lkb_lksb_timeout without lock_rsb?  Note: we can't lock timeout_mutex
+   and then lock rsb because of lock ordering in add_timeout.  We may need
+   to specify some special timeout-related bits in the lkb that are just to
+   be accessed under the timeout_mutex. */
+
+void dlm_scan_timeout(struct dlm_ls *ls)
+{
+	struct dlm_rsb *r;
+	struct dlm_lkb *lkb;
+	int do_cancel, do_warn;
+
+	for (;;) {
+		if (dlm_locking_stopped(ls))
+			break;
+
+		do_cancel = 0;
+		do_warn = 0;
+		mutex_lock(&ls->ls_timeout_mutex);
+		list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list) {
+
+			if ((lkb->lkb_exflags & DLM_LKF_TIMEOUT) &&
+			    time_after_eq(jiffies, lkb->lkb_timestamp +
+					  lkb->lkb_timeout_cs * HZ/100))
+				do_cancel = 1;
+
+			if ((lkb->lkb_flags & DLM_IFL_WATCH_TIMEWARN) &&
+			    time_after_eq(jiffies, lkb->lkb_timestamp +
+				   	   dlm_config.ci_timewarn_cs * HZ/100))
+				do_warn = 1;
+
+			if (!do_cancel && !do_warn)
+				continue;
+			hold_lkb(lkb);
+			break;
+		}
+		mutex_unlock(&ls->ls_timeout_mutex);
+
+		if (!do_cancel && !do_warn)
+			break;
+
+		r = lkb->lkb_resource;
+		hold_rsb(r);
+		lock_rsb(r);
+
+		if (do_warn) {
+			/* clear flag so we only warn once */
+			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+			if (!(lkb->lkb_exflags & DLM_LKF_TIMEOUT))
+				del_timeout(lkb);
+			dlm_timeout_warn(lkb);
+		}
+
+		if (do_cancel) {
+			log_debug(ls, "timeout cancel %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			lkb->lkb_flags &= ~DLM_IFL_WATCH_TIMEWARN;
+			lkb->lkb_flags |= DLM_IFL_TIMEOUT_CANCEL;
+			del_timeout(lkb);
+			_cancel_lock(r, lkb);
+		}
+
+		unlock_rsb(r);
+		unhold_rsb(r);
+		dlm_put_lkb(lkb);
+	}
+}
+
+/* This is only called by dlm_recoverd, and we rely on dlm_ls_stop() stopping
+   dlm_recoverd before checking/setting ls_recover_begin. */
+
+void dlm_adjust_timeouts(struct dlm_ls *ls)
+{
+	struct dlm_lkb *lkb;
+	long adj = jiffies - ls->ls_recover_begin;
+
+	ls->ls_recover_begin = 0;
+	mutex_lock(&ls->ls_timeout_mutex);
+	list_for_each_entry(lkb, &ls->ls_timeout, lkb_time_list)
+		lkb->lkb_timestamp += adj;
+	mutex_unlock(&ls->ls_timeout_mutex);
+}
+
 /* lkb is master or local copy */
 
 static void set_lvb_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
@@ -1275,10 +1414,8 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
  * queue for one resource.  The granted mode of each lock blocks the requested
  * mode of the other lock."
  *
- * Part 2: if the granted mode of lkb is preventing the first lkb in the
- * convert queue from being granted, then demote lkb (set grmode to NL).
- * This second form requires that we check for conv-deadlk even when
- * now == 0 in _can_be_granted().
+ * Part 2: if the granted mode of lkb is preventing an earlier lkb in the
+ * convert queue from being granted, then deadlk/demote lkb.
  *
  * Example:
  * Granted Queue: empty
@@ -1287,41 +1424,52 @@ static int queue_conflict(struct list_head *head, struct dlm_lkb *lkb)
  *
  * The first lock can't be granted because of the granted mode of the second
  * lock and the second lock can't be granted because it's not first in the
- * list.  We demote the granted mode of the second lock (the lkb passed to this
- * function).
+ * list.  We either cancel lkb's conversion (PR->EX) and return EDEADLK, or we
+ * demote the granted mode of lkb (from PR to NL) if it has the CONVDEADLK
+ * flag set and return DEMOTED in the lksb flags.
+ *
+ * Originally, this function detected conv-deadlk in a more limited scope:
+ * - if !modes_compat(lkb1, lkb2) && !modes_compat(lkb2, lkb1), or
+ * - if lkb1 was the first entry in the queue (not just earlier), and was
+ *   blocked by the granted mode of lkb2, and there was nothing on the
+ *   granted queue preventing lkb1 from being granted immediately, i.e.
+ *   lkb2 was the only thing preventing lkb1 from being granted.
+ *
+ * That second condition meant we'd only say there was conv-deadlk if
+ * resolving it (by demotion) would lead to the first lock on the convert
+ * queue being granted right away.  It allowed conversion deadlocks to exist
+ * between locks on the convert queue while they couldn't be granted anyway.
  *
- * After the resolution, the "grant pending" function needs to go back and try
- * to grant locks on the convert queue again since the first lock can now be
- * granted.
+ * Now, we detect and take action on conversion deadlocks immediately when
+ * they're created, even if they may not be immediately consequential.  If
+ * lkb1 exists anywhere in the convert queue and lkb2 comes in with a granted
+ * mode that would prevent lkb1's conversion from being granted, we do a
+ * deadlk/demote on lkb2 right away and don't let it onto the convert queue.
+ * I think this means that the lkb_is_ahead condition below should always
+ * be zero, i.e. there will never be conv-deadlk between two locks that are
+ * both already on the convert queue.
  */
 
-static int conversion_deadlock_detect(struct dlm_rsb *rsb, struct dlm_lkb *lkb)
+static int conversion_deadlock_detect(struct dlm_rsb *r, struct dlm_lkb *lkb2)
 {
-	struct dlm_lkb *this, *first = NULL, *self = NULL;
+	struct dlm_lkb *lkb1;
+	int lkb_is_ahead = 0;
 
-	list_for_each_entry(this, &rsb->res_convertqueue, lkb_statequeue) {
-		if (!first)
-			first = this;
-		if (this == lkb) {
-			self = lkb;
+	list_for_each_entry(lkb1, &r->res_convertqueue, lkb_statequeue) {
+		if (lkb1 == lkb2) {
+			lkb_is_ahead = 1;
 			continue;
 		}
 
-		if (!modes_compat(this, lkb) && !modes_compat(lkb, this))
-			return 1;
-	}
-
-	/* if lkb is on the convert queue and is preventing the first
-	   from being granted, then there's deadlock and we demote lkb.
-	   multiple converting locks may need to do this before the first
-	   converting lock can be granted. */
-
-	if (self && self != first) {
-		if (!modes_compat(lkb, first) &&
-		    !queue_conflict(&rsb->res_grantqueue, first))
-			return 1;
+		if (!lkb_is_ahead) {
+			if (!modes_compat(lkb2, lkb1))
+				return 1;
+		} else {
+			if (!modes_compat(lkb2, lkb1) &&
+			    !modes_compat(lkb1, lkb2))
+				return 1;
+		}
 	}
-
 	return 0;
 }
 
@@ -1450,42 +1598,57 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 	if (!now && !conv && list_empty(&r->res_convertqueue) &&
 	    first_in_list(lkb, &r->res_waitqueue))
 		return 1;
-
  out:
-	/*
-	 * The following, enabled by CONVDEADLK, departs from VMS.
-	 */
-
-	if (conv && (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) &&
-	    conversion_deadlock_detect(r, lkb)) {
-		lkb->lkb_grmode = DLM_LOCK_NL;
-		lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
-	}
-
 	return 0;
 }
 
-/*
- * The ALTPR and ALTCW flags aren't traditional lock manager flags, but are a
- * simple way to provide a big optimization to applications that can use them.
- */
-
-static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
+static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now,
+			  int *err)
 {
-	uint32_t flags = lkb->lkb_exflags;
 	int rv;
 	int8_t alt = 0, rqmode = lkb->lkb_rqmode;
+	int8_t is_convert = (lkb->lkb_grmode != DLM_LOCK_IV);
+
+	if (err)
+		*err = 0;
 
 	rv = _can_be_granted(r, lkb, now);
 	if (rv)
 		goto out;
 
-	if (lkb->lkb_sbflags & DLM_SBF_DEMOTED)
+	/*
+	 * The CONVDEADLK flag is non-standard and tells the dlm to resolve
+	 * conversion deadlocks by demoting grmode to NL, otherwise the dlm
+	 * cancels one of the locks.
+	 */
+
+	if (is_convert && can_be_queued(lkb) &&
+	    conversion_deadlock_detect(r, lkb)) {
+		if (lkb->lkb_exflags & DLM_LKF_CONVDEADLK) {
+			lkb->lkb_grmode = DLM_LOCK_NL;
+			lkb->lkb_sbflags |= DLM_SBF_DEMOTED;
+		} else if (!(lkb->lkb_exflags & DLM_LKF_NODLCKWT)) {
+			if (err)
+				*err = -EDEADLK;
+			else {
+				log_print("can_be_granted deadlock %x now %d",
+					  lkb->lkb_id, now);
+				dlm_dump_rsb(r);
+			}
+		}
 		goto out;
+	}
 
-	if (rqmode != DLM_LOCK_PR && flags & DLM_LKF_ALTPR)
+	/*
+	 * The ALTPR and ALTCW flags are non-standard and tell the dlm to try
+	 * to grant a request in a mode other than the normal rqmode.  It's a
+	 * simple way to provide a big optimization to applications that can
+	 * use them.
+	 */
+
+	if (rqmode != DLM_LOCK_PR && (lkb->lkb_exflags & DLM_LKF_ALTPR))
 		alt = DLM_LOCK_PR;
-	else if (rqmode != DLM_LOCK_CW && flags & DLM_LKF_ALTCW)
+	else if (rqmode != DLM_LOCK_CW && (lkb->lkb_exflags & DLM_LKF_ALTCW))
 		alt = DLM_LOCK_CW;
 
 	if (alt) {
@@ -1500,10 +1663,20 @@ static int can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now)
 	return rv;
 }
 
+/* FIXME: I don't think that can_be_granted() can/will demote or find deadlock
+   for locks pending on the convert list.  Once verified (watch for these
+   log_prints), we should be able to just call _can_be_granted() and not
+   bother with the demote/deadlk cases here (and there's no easy way to deal
+   with a deadlk here, we'd have to generate something like grant_lock with
+   the deadlk error.) */
+
+/* returns the highest requested mode of all blocked conversions */
+
 static int grant_pending_convert(struct dlm_rsb *r, int high)
 {
 	struct dlm_lkb *lkb, *s;
 	int hi, demoted, quit, grant_restart, demote_restart;
+	int deadlk;
 
 	quit = 0;
  restart:
@@ -1513,14 +1686,29 @@ static int grant_pending_convert(struct dlm_rsb *r, int high)
 
 	list_for_each_entry_safe(lkb, s, &r->res_convertqueue, lkb_statequeue) {
 		demoted = is_demoted(lkb);
-		if (can_be_granted(r, lkb, 0)) {
+		deadlk = 0;
+
+		if (can_be_granted(r, lkb, 0, &deadlk)) {
 			grant_lock_pending(r, lkb);
 			grant_restart = 1;
-		} else {
-			hi = max_t(int, lkb->lkb_rqmode, hi);
-			if (!demoted && is_demoted(lkb))
-				demote_restart = 1;
+			continue;
 		}
+
+		if (!demoted && is_demoted(lkb)) {
+			log_print("WARN: pending demoted %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			demote_restart = 1;
+			continue;
+		}
+
+		if (deadlk) {
+			log_print("WARN: pending deadlock %x node %d %s",
+				  lkb->lkb_id, lkb->lkb_nodeid, r->res_name);
+			dlm_dump_rsb(r);
+			continue;
+		}
+
+		hi = max_t(int, lkb->lkb_rqmode, hi);
 	}
 
 	if (grant_restart)
@@ -1538,7 +1726,7 @@ static int grant_pending_wait(struct dlm_rsb *r, int high)
 	struct dlm_lkb *lkb, *s;
 
 	list_for_each_entry_safe(lkb, s, &r->res_waitqueue, lkb_statequeue) {
-		if (can_be_granted(r, lkb, 0))
+		if (can_be_granted(r, lkb, 0, NULL))
 			grant_lock_pending(r, lkb);
                 else
 			high = max_t(int, lkb->lkb_rqmode, high);
@@ -1733,7 +1921,7 @@ static void confirm_master(struct dlm_rsb *r, int error)
 }
 
 static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
-			 int namelen, uint32_t parent_lkid, void *ast,
+			 int namelen, unsigned long timeout_cs, void *ast,
 			 void *astarg, void *bast, struct dlm_args *args)
 {
 	int rv = -EINVAL;
@@ -1776,10 +1964,6 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
 	if (flags & DLM_LKF_VALBLK && !lksb->sb_lvbptr)
 		goto out;
 
-	/* parent/child locks not yet supported */
-	if (parent_lkid)
-		goto out;
-
 	if (flags & DLM_LKF_CONVERT && !lksb->sb_lkid)
 		goto out;
 
@@ -1791,6 +1975,7 @@ static int set_lock_args(int mode, struct dlm_lksb *lksb, uint32_t flags,
 	args->astaddr = ast;
 	args->astparam = (long) astarg;
 	args->bastaddr = bast;
+	args->timeout = timeout_cs;
 	args->mode = mode;
 	args->lksb = lksb;
 	rv = 0;
@@ -1845,6 +2030,7 @@ static int validate_lock_args(struct dlm_ls *ls, struct dlm_lkb *lkb,
 	lkb->lkb_lksb = args->lksb;
 	lkb->lkb_lvbptr = args->lksb->sb_lvbptr;
 	lkb->lkb_ownpid = (int) current->pid;
+	lkb->lkb_timeout_cs = args->timeout;
 	rv = 0;
  out:
 	return rv;
@@ -1903,6 +2089,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
 		if (is_overlap(lkb))
 			goto out;
 
+		/* don't let scand try to do a cancel */
+		del_timeout(lkb);
+
 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
 			lkb->lkb_flags |= DLM_IFL_OVERLAP_CANCEL;
 			rv = -EBUSY;
@@ -1934,6 +2123,9 @@ static int validate_unlock_args(struct dlm_lkb *lkb, struct dlm_args *args)
 		if (is_overlap_unlock(lkb))
 			goto out;
 
+		/* don't let scand try to do a cancel */
+		del_timeout(lkb);
+
 		if (lkb->lkb_flags & DLM_IFL_RESEND) {
 			lkb->lkb_flags |= DLM_IFL_OVERLAP_UNLOCK;
 			rv = -EBUSY;
@@ -1984,7 +2176,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
 	int error = 0;
 
-	if (can_be_granted(r, lkb, 1)) {
+	if (can_be_granted(r, lkb, 1, NULL)) {
 		grant_lock(r, lkb);
 		queue_cast(r, lkb, 0);
 		goto out;
@@ -1994,6 +2186,7 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 		error = -EINPROGRESS;
 		add_lkb(r, lkb, DLM_LKSTS_WAITING);
 		send_blocking_asts(r, lkb);
+		add_timeout(lkb);
 		goto out;
 	}
 
@@ -2009,16 +2202,32 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
 static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 {
 	int error = 0;
+	int deadlk = 0;
 
 	/* changing an existing lock may allow others to be granted */
 
-	if (can_be_granted(r, lkb, 1)) {
+	if (can_be_granted(r, lkb, 1, &deadlk)) {
 		grant_lock(r, lkb);
 		queue_cast(r, lkb, 0);
 		grant_pending_locks(r);
 		goto out;
 	}
 
+	/* can_be_granted() detected that this lock would block in a conversion
+	   deadlock, so we leave it on the granted queue and return EDEADLK in
+	   the ast for the convert. */
+
+	if (deadlk) {
+		/* it's left on the granted queue */
+		log_debug(r->res_ls, "deadlock %x node %d sts%d g%d r%d %s",
+			  lkb->lkb_id, lkb->lkb_nodeid, lkb->lkb_status,
+			  lkb->lkb_grmode, lkb->lkb_rqmode, r->res_name);
+		revert_lock(r, lkb);
+		queue_cast(r, lkb, -EDEADLK);
+		error = -EDEADLK;
+		goto out;
+	}
+
 	/* is_demoted() means the can_be_granted() above set the grmode
 	   to NL, and left us on the granted queue.  This auto-demotion
 	   (due to CONVDEADLK) might mean other locks, and/or this lock, are
@@ -2041,6 +2250,7 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
 		del_lkb(r, lkb);
 		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
 		send_blocking_asts(r, lkb);
+		add_timeout(lkb);
 		goto out;
 	}
 
@@ -2274,7 +2484,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 	if (!ls)
 		return -EINVAL;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	if (convert)
 		error = find_lkb(ls, lksb->sb_lkid, &lkb);
@@ -2284,7 +2494,7 @@ int dlm_lock(dlm_lockspace_t *lockspace,
 	if (error)
 		goto out;
 
-	error = set_lock_args(mode, lksb, flags, namelen, parent_lkid, ast,
+	error = set_lock_args(mode, lksb, flags, namelen, 0, ast,
 			      astarg, bast, &args);
 	if (error)
 		goto out_put;
@@ -2299,10 +2509,10 @@ int dlm_lock(dlm_lockspace_t *lockspace,
  out_put:
 	if (convert || error)
 		__put_lkb(ls, lkb);
-	if (error == -EAGAIN)
+	if (error == -EAGAIN || error == -EDEADLK)
 		error = 0;
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	dlm_put_lockspace(ls);
 	return error;
 }
@@ -2322,7 +2532,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
 	if (!ls)
 		return -EINVAL;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@@ -2344,7 +2554,7 @@ int dlm_unlock(dlm_lockspace_t *lockspace,
  out_put:
 	dlm_put_lkb(lkb);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	dlm_put_lockspace(ls);
 	return error;
 }
@@ -2384,7 +2594,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
 	   pass into lowcomms_commit and a message buffer (mb) that we
 	   write our data into */
 
-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh)
 		return -ENOBUFS;
 
@@ -3111,9 +3321,10 @@ static void receive_request_reply(struct dlm_ls *ls, struct dlm_message *ms)
 		lkb->lkb_remid = ms->m_lkid;
 		if (is_altmode(lkb))
 			munge_altmode(lkb, ms);
-		if (result)
+		if (result) {
 			add_lkb(r, lkb, DLM_LKSTS_WAITING);
-		else {
+			add_timeout(lkb);
+		} else {
 			grant_lock_pc(r, lkb, ms);
 			queue_cast(r, lkb, 0);
 		}
@@ -3172,6 +3383,12 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 		queue_cast(r, lkb, -EAGAIN);
 		break;
 
+	case -EDEADLK:
+		receive_flags_reply(lkb, ms);
+		revert_lock_pc(r, lkb);
+		queue_cast(r, lkb, -EDEADLK);
+		break;
+
 	case -EINPROGRESS:
 		/* convert was queued on remote master */
 		receive_flags_reply(lkb, ms);
@@ -3179,6 +3396,7 @@ static void __receive_convert_reply(struct dlm_rsb *r, struct dlm_lkb *lkb,
 			munge_demoted(lkb, ms);
 		del_lkb(r, lkb);
 		add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+		add_timeout(lkb);
 		break;
 
 	case 0:
@@ -3298,8 +3516,7 @@ static void _receive_cancel_reply(struct dlm_lkb *lkb, struct dlm_message *ms)
 	case -DLM_ECANCEL:
 		receive_flags_reply(lkb, ms);
 		revert_lock_pc(r, lkb);
-		if (ms->m_result)
-			queue_cast(r, lkb, -DLM_ECANCEL);
+		queue_cast(r, lkb, -DLM_ECANCEL);
 		break;
 	case 0:
 		break;
@@ -3424,7 +3641,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
 			}
 		}
 
-		if (lock_recovery_try(ls))
+		if (dlm_lock_recovery_try(ls))
 			break;
 		schedule();
 	}
@@ -3503,7 +3720,7 @@ int dlm_receive_message(struct dlm_header *hd, int nodeid, int recovery)
 		log_error(ls, "unknown message type %d", ms->m_type);
 	}
 
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
  out:
 	dlm_put_lockspace(ls);
 	dlm_astd_wake();
@@ -4034,13 +4251,13 @@ int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc)
 
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 		     int mode, uint32_t flags, void *name, unsigned int namelen,
-		     uint32_t parent_lkid)
+		     unsigned long timeout_cs)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_args args;
 	int error;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = create_lkb(ls, &lkb);
 	if (error) {
@@ -4062,7 +4279,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 	   When DLM_IFL_USER is set, the dlm knows that this is a userspace
 	   lock and that lkb_astparam is the dlm_user_args structure. */
 
-	error = set_lock_args(mode, &ua->lksb, flags, namelen, parent_lkid,
+	error = set_lock_args(mode, &ua->lksb, flags, namelen, timeout_cs,
 			      DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
 	lkb->lkb_flags |= DLM_IFL_USER;
 	ua->old_mode = DLM_LOCK_IV;
@@ -4094,19 +4311,20 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
 	list_add_tail(&lkb->lkb_ownqueue, &ua->proc->locks);
 	spin_unlock(&ua->proc->locks_spin);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	return error;
 }
 
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-		     int mode, uint32_t flags, uint32_t lkid, char *lvb_in)
+		     int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+		     unsigned long timeout_cs)
 {
 	struct dlm_lkb *lkb;
 	struct dlm_args args;
 	struct dlm_user_args *ua;
 	int error;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@@ -4127,6 +4345,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	if (lvb_in && ua->lksb.sb_lvbptr)
 		memcpy(ua->lksb.sb_lvbptr, lvb_in, DLM_USER_LVB_LEN);
 
+	ua->xid = ua_tmp->xid;
 	ua->castparam = ua_tmp->castparam;
 	ua->castaddr = ua_tmp->castaddr;
 	ua->bastparam = ua_tmp->bastparam;
@@ -4134,19 +4353,19 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	ua->user_lksb = ua_tmp->user_lksb;
 	ua->old_mode = lkb->lkb_grmode;
 
-	error = set_lock_args(mode, &ua->lksb, flags, 0, 0, DLM_FAKE_USER_AST,
-			      ua, DLM_FAKE_USER_AST, &args);
+	error = set_lock_args(mode, &ua->lksb, flags, 0, timeout_cs,
+			      DLM_FAKE_USER_AST, ua, DLM_FAKE_USER_AST, &args);
 	if (error)
 		goto out_put;
 
 	error = convert_lock(ls, lkb, &args);
 
-	if (error == -EINPROGRESS || error == -EAGAIN)
+	if (error == -EINPROGRESS || error == -EAGAIN || error == -EDEADLK)
 		error = 0;
  out_put:
 	dlm_put_lkb(lkb);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }
@@ -4159,7 +4378,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	struct dlm_user_args *ua;
 	int error;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@@ -4194,7 +4413,7 @@ int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
  out_put:
 	dlm_put_lkb(lkb);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }
@@ -4207,7 +4426,7 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	struct dlm_user_args *ua;
 	int error;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	error = find_lkb(ls, lkid, &lkb);
 	if (error)
@@ -4231,11 +4450,59 @@ int dlm_user_cancel(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
  out_put:
 	dlm_put_lkb(lkb);
  out:
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 	kfree(ua_tmp);
 	return error;
 }
 
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid)
+{
+	struct dlm_lkb *lkb;
+	struct dlm_args args;
+	struct dlm_user_args *ua;
+	struct dlm_rsb *r;
+	int error;
+
+	dlm_lock_recovery(ls);
+
+	error = find_lkb(ls, lkid, &lkb);
+	if (error)
+		goto out;
+
+	ua = (struct dlm_user_args *)lkb->lkb_astparam;
+
+	error = set_unlock_args(flags, ua, &args);
+	if (error)
+		goto out_put;
+
+	/* same as cancel_lock(), but set DEADLOCK_CANCEL after lock_rsb */
+
+	r = lkb->lkb_resource;
+	hold_rsb(r);
+	lock_rsb(r);
+
+	error = validate_unlock_args(lkb, &args);
+	if (error)
+		goto out_r;
+	lkb->lkb_flags |= DLM_IFL_DEADLOCK_CANCEL;
+
+	error = _cancel_lock(r, lkb);
+ out_r:
+	unlock_rsb(r);
+	put_rsb(r);
+
+	if (error == -DLM_ECANCEL)
+		error = 0;
+	/* from validate_unlock_args() */
+	if (error == -EBUSY)
+		error = 0;
+ out_put:
+	dlm_put_lkb(lkb);
+ out:
+	dlm_unlock_recovery(ls);
+	return error;
+}
+
 /* lkb's that are removed from the waiters list by revert are just left on the
    orphans list with the granted orphan locks, to be freed by purge */
 
@@ -4314,12 +4581,13 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 {
 	struct dlm_lkb *lkb, *safe;
 
-	lock_recovery(ls);
+	dlm_lock_recovery(ls);
 
 	while (1) {
 		lkb = del_proc_lock(ls, proc);
 		if (!lkb)
 			break;
+		del_timeout(lkb);
 		if (lkb->lkb_exflags & DLM_LKF_PERSISTENT)
 			orphan_proc_lock(ls, lkb);
 		else
@@ -4347,7 +4615,7 @@ void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
 	}
 
 	mutex_unlock(&ls->ls_clear_proc_locks);
-	unlock_recovery(ls);
+	dlm_unlock_recovery(ls);
 }
 
 static void purge_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc)
@@ -4429,12 +4697,12 @@ int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	if (nodeid != dlm_our_nodeid()) {
 		error = send_purge(ls, nodeid, pid);
 	} else {
-		lock_recovery(ls);
+		dlm_lock_recovery(ls);
 		if (pid == current->pid)
 			purge_proc_locks(ls, proc);
 		else
 			do_purge(ls, nodeid, pid);
-		unlock_recovery(ls);
+		dlm_unlock_recovery(ls);
 	}
 	return error;
 }
diff --git a/fs/dlm/lock.h b/fs/dlm/lock.h
index 64fc4ec40668..1720313c22df 100644
--- a/fs/dlm/lock.h
+++ b/fs/dlm/lock.h
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -24,6 +24,10 @@ void dlm_put_rsb(struct dlm_rsb *r);
 void dlm_hold_rsb(struct dlm_rsb *r);
 int dlm_put_lkb(struct dlm_lkb *lkb);
 void dlm_scan_rsbs(struct dlm_ls *ls);
+int dlm_lock_recovery_try(struct dlm_ls *ls);
+void dlm_unlock_recovery(struct dlm_ls *ls);
+void dlm_scan_timeout(struct dlm_ls *ls);
+void dlm_adjust_timeouts(struct dlm_ls *ls);
 
 int dlm_purge_locks(struct dlm_ls *ls);
 void dlm_purge_mstcpy_locks(struct dlm_rsb *r);
@@ -34,15 +38,18 @@ int dlm_recover_master_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 int dlm_recover_process_copy(struct dlm_ls *ls, struct dlm_rcom *rc);
 
 int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua, int mode,
-	uint32_t flags, void *name, unsigned int namelen, uint32_t parent_lkid);
+	uint32_t flags, void *name, unsigned int namelen,
+	unsigned long timeout_cs);
 int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
-	int mode, uint32_t flags, uint32_t lkid, char *lvb_in);
+	int mode, uint32_t flags, uint32_t lkid, char *lvb_in,
+	unsigned long timeout_cs);
 int dlm_user_unlock(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
 	uint32_t flags, uint32_t lkid, char *lvb_in);
 int dlm_user_cancel(struct dlm_ls *ls,  struct dlm_user_args *ua_tmp,
 	uint32_t flags, uint32_t lkid);
 int dlm_user_purge(struct dlm_ls *ls, struct dlm_user_proc *proc,
 	int nodeid, int pid);
+int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid);
 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc);
 
 static inline int is_master(struct dlm_rsb *r)
diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
index a677b2a5eed4..1dc72105ab12 100644
--- a/fs/dlm/lockspace.c
+++ b/fs/dlm/lockspace.c
@@ -197,13 +197,24 @@ static int do_uevent(struct dlm_ls *ls, int in)
 	else
 		kobject_uevent(&ls->ls_kobj, KOBJ_OFFLINE);
 
+	log_debug(ls, "%s the lockspace group...", in ? "joining" : "leaving");
+
+	/* dlm_controld will see the uevent, do the necessary group management
+	   and then write to sysfs to wake us */
+
 	error = wait_event_interruptible(ls->ls_uevent_wait,
 			test_and_clear_bit(LSFL_UEVENT_WAIT, &ls->ls_flags));
+
+	log_debug(ls, "group event done %d %d", error, ls->ls_uevent_result);
+
 	if (error)
 		goto out;
 
 	error = ls->ls_uevent_result;
  out:
+	if (error)
+		log_error(ls, "group %s failed %d %d", in ? "join" : "leave",
+			  error, ls->ls_uevent_result);
 	return error;
 }
 
@@ -234,8 +245,13 @@ static int dlm_scand(void *data)
 	struct dlm_ls *ls;
 
 	while (!kthread_should_stop()) {
-		list_for_each_entry(ls, &lslist, ls_list)
-			dlm_scan_rsbs(ls);
+		list_for_each_entry(ls, &lslist, ls_list) {
+			if (dlm_lock_recovery_try(ls)) {
+				dlm_scan_rsbs(ls);
+				dlm_scan_timeout(ls);
+				dlm_unlock_recovery(ls);
+			}
+		}
 		schedule_timeout_interruptible(dlm_config.ci_scan_secs * HZ);
 	}
 	return 0;
@@ -395,6 +411,7 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 {
 	struct dlm_ls *ls;
 	int i, size, error = -ENOMEM;
+	int do_unreg = 0;
 
 	if (namelen > DLM_LOCKSPACE_LEN)
 		return -EINVAL;
@@ -417,11 +434,22 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 		goto out;
 	memcpy(ls->ls_name, name, namelen);
 	ls->ls_namelen = namelen;
-	ls->ls_exflags = flags;
 	ls->ls_lvblen = lvblen;
 	ls->ls_count = 0;
 	ls->ls_flags = 0;
 
+	if (flags & DLM_LSFL_TIMEWARN)
+		set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+
+	if (flags & DLM_LSFL_FS)
+		ls->ls_allocation = GFP_NOFS;
+	else
+		ls->ls_allocation = GFP_KERNEL;
+
+	/* ls_exflags are forced to match among nodes, and we don't
+	   need to require all nodes to have TIMEWARN or FS set */
+	ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS));
+
 	size = dlm_config.ci_rsbtbl_size;
 	ls->ls_rsbtbl_size = size;
 
@@ -461,6 +489,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	mutex_init(&ls->ls_waiters_mutex);
 	INIT_LIST_HEAD(&ls->ls_orphans);
 	mutex_init(&ls->ls_orphans_mutex);
+	INIT_LIST_HEAD(&ls->ls_timeout);
+	mutex_init(&ls->ls_timeout_mutex);
 
 	INIT_LIST_HEAD(&ls->ls_nodes);
 	INIT_LIST_HEAD(&ls->ls_nodes_gone);
@@ -477,6 +507,8 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 
 	init_waitqueue_head(&ls->ls_uevent_wait);
 	ls->ls_uevent_result = 0;
+	init_completion(&ls->ls_members_done);
+	ls->ls_members_result = -1;
 
 	ls->ls_recoverd_task = NULL;
 	mutex_init(&ls->ls_recoverd_active);
@@ -513,32 +545,49 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
 	error = dlm_recoverd_start(ls);
 	if (error) {
 		log_error(ls, "can't start dlm_recoverd %d", error);
-		goto out_rcomfree;
+		goto out_delist;
 	}
 
-	dlm_create_debug_file(ls);
-
 	error = kobject_setup(ls);
 	if (error)
-		goto out_del;
+		goto out_stop;
 
 	error = kobject_register(&ls->ls_kobj);
 	if (error)
-		goto out_del;
+		goto out_stop;
+
+	/* let kobject handle freeing of ls if there's an error */
+	do_unreg = 1;
+
+	/* This uevent triggers dlm_controld in userspace to add us to the
+	   group of nodes that are members of this lockspace (managed by the
+	   cluster infrastructure.)  Once it's done that, it tells us who the
+	   current lockspace members are (via configfs) and then tells the
+	   lockspace to start running (via sysfs) in dlm_ls_start(). */
 
 	error = do_uevent(ls, 1);
 	if (error)
-		goto out_unreg;
+		goto out_stop;
+
+	wait_for_completion(&ls->ls_members_done);
+	error = ls->ls_members_result;
+	if (error)
+		goto out_members;
+
+	dlm_create_debug_file(ls);
+
+	log_debug(ls, "join complete");
 
 	*lockspace = ls;
 	return 0;
 
- out_unreg:
-	kobject_unregister(&ls->ls_kobj);
- out_del:
-	dlm_delete_debug_file(ls);
+ out_members:
+	do_uevent(ls, 0);
+	dlm_clear_members(ls);
+	kfree(ls->ls_node_array);
+ out_stop:
 	dlm_recoverd_stop(ls);
- out_rcomfree:
+ out_delist:
 	spin_lock(&lslist_lock);
 	list_del(&ls->ls_list);
 	spin_unlock(&lslist_lock);
@@ -550,7 +599,10 @@ static int new_lockspace(char *name, int namelen, void **lockspace,
  out_rsbfree:
 	kfree(ls->ls_rsbtbl);
  out_lsfree:
-	kfree(ls);
+	if (do_unreg)
+		kobject_unregister(&ls->ls_kobj);
+	else
+		kfree(ls);
  out:
 	module_put(THIS_MODULE);
 	return error;
@@ -570,6 +622,8 @@ int dlm_new_lockspace(char *name, int namelen, void **lockspace,
 	error = new_lockspace(name, namelen, lockspace, flags, lvblen);
 	if (!error)
 		ls_count++;
+	else if (!ls_count)
+		threads_stop();
  out:
 	mutex_unlock(&ls_lock);
 	return error;
@@ -696,7 +750,7 @@ static int release_lockspace(struct dlm_ls *ls, int force)
 	dlm_clear_members_gone(ls);
 	kfree(ls->ls_node_array);
 	kobject_unregister(&ls->ls_kobj);
-        /* The ls structure will be freed when the kobject is done with */
+	/* The ls structure will be freed when the kobject is done with */
 
 	mutex_lock(&ls_lock);
 	ls_count--;
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 27970a58d29b..0553a6158dcb 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -260,7 +260,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 static void lowcomms_data_ready(struct sock *sk, int count_unused)
 {
 	struct connection *con = sock2con(sk);
-	if (!test_and_set_bit(CF_READ_PENDING, &con->flags))
+	if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags))
 		queue_work(recv_workqueue, &con->rwork);
 }
 
@@ -268,7 +268,7 @@ static void lowcomms_write_space(struct sock *sk)
 {
 	struct connection *con = sock2con(sk);
 
-	if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags))
+	if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags))
 		queue_work(send_workqueue, &con->swork);
 }
 
@@ -720,11 +720,17 @@ static int tcp_accept_from_sock(struct connection *con)
 			INIT_WORK(&othercon->rwork, process_recv_sockets);
 			set_bit(CF_IS_OTHERCON, &othercon->flags);
 			newcon->othercon = othercon;
+			othercon->sock = newsock;
+			newsock->sk->sk_user_data = othercon;
+			add_sock(newsock, othercon);
+			addcon = othercon;
+		}
+		else {
+			printk("Extra connection from node %d attempted\n", nodeid);
+			result = -EAGAIN;
+			mutex_unlock(&newcon->sock_mutex);
+			goto accept_err;
 		}
-		othercon->sock = newsock;
-		newsock->sk->sk_user_data = othercon;
-		add_sock(newsock, othercon);
-		addcon = othercon;
 	}
 	else {
 		newsock->sk->sk_user_data = newcon;
@@ -1400,8 +1406,11 @@ void dlm_lowcomms_stop(void)
 	down(&connections_lock);
 	for (i = 0; i <= max_nodeid; i++) {
 		con = __nodeid2con(i, 0);
-		if (con)
+		if (con) {
 			con->flags |= 0xFF;
+			if (con->sock)
+				con->sock->sk->sk_user_data = NULL;
+		}
 	}
 	up(&connections_lock);
 
diff --git a/fs/dlm/main.c b/fs/dlm/main.c
index 162fbae58fe5..eca2907f2386 100644
--- a/fs/dlm/main.c
+++ b/fs/dlm/main.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -25,6 +25,8 @@ void dlm_unregister_debugfs(void);
 static inline int dlm_register_debugfs(void) { return 0; }
 static inline void dlm_unregister_debugfs(void) { }
 #endif
+int dlm_netlink_init(void);
+void dlm_netlink_exit(void);
 
 static int __init init_dlm(void)
 {
@@ -50,10 +52,16 @@ static int __init init_dlm(void)
 	if (error)
 		goto out_debug;
 
+	error = dlm_netlink_init();
+	if (error)
+		goto out_user;
+
 	printk("DLM (built %s %s) installed\n", __DATE__, __TIME__);
 
 	return 0;
 
+ out_user:
+	dlm_user_exit();
  out_debug:
 	dlm_unregister_debugfs();
  out_config:
@@ -68,6 +76,7 @@ static int __init init_dlm(void)
 
 static void __exit exit_dlm(void)
 {
+	dlm_netlink_exit();
 	dlm_user_exit();
 	dlm_config_exit();
 	dlm_memory_exit();
diff --git a/fs/dlm/member.c b/fs/dlm/member.c
index 85e2897bd740..073599dced2a 100644
--- a/fs/dlm/member.c
+++ b/fs/dlm/member.c
@@ -1,7 +1,7 @@
 /******************************************************************************
 *******************************************************************************
 **
-**  Copyright (C) 2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2005-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -233,6 +233,12 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
 	*neg_out = neg;
 
 	error = ping_members(ls);
+	if (!error || error == -EPROTO) {
+		/* new_lockspace() may be waiting to know if the config
+		   is good or bad */
+		ls->ls_members_result = error;
+		complete(&ls->ls_members_done);
+	}
 	if (error)
 		goto out;
 
@@ -284,6 +290,9 @@ int dlm_ls_stop(struct dlm_ls *ls)
 	dlm_recoverd_suspend(ls);
 	ls->ls_recover_status = 0;
 	dlm_recoverd_resume(ls);
+
+	if (!ls->ls_recover_begin)
+		ls->ls_recover_begin = jiffies;
 	return 0;
 }
 
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
new file mode 100644
index 000000000000..863b87d0dc71
--- /dev/null
+++ b/fs/dlm/netlink.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+ * of the GNU General Public License v.2.
+ */
+
+#include <net/genetlink.h>
+#include <linux/dlm.h>
+#include <linux/dlm_netlink.h>
+
+#include "dlm_internal.h"
+
+static uint32_t dlm_nl_seqnum;
+static uint32_t listener_nlpid;
+
+static struct genl_family family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= DLM_GENL_NAME,
+	.version	= DLM_GENL_VERSION,
+};
+
+static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+{
+	struct sk_buff *skb;
+	void *data;
+
+	skb = genlmsg_new(size, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	/* add the message headers */
+	data = genlmsg_put(skb, 0, dlm_nl_seqnum++, &family, 0, cmd);
+	if (!data) {
+		nlmsg_free(skb);
+		return -EINVAL;
+	}
+
+	*skbp = skb;
+	return 0;
+}
+
+static struct dlm_lock_data *mk_data(struct sk_buff *skb)
+{
+	struct nlattr *ret;
+
+	ret = nla_reserve(skb, DLM_TYPE_LOCK, sizeof(struct dlm_lock_data));
+	if (!ret)
+		return NULL;
+	return nla_data(ret);
+}
+
+static int send_data(struct sk_buff *skb)
+{
+	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
+	void *data = genlmsg_data(genlhdr);
+	int rv;
+
+	rv = genlmsg_end(skb, data);
+	if (rv < 0) {
+		nlmsg_free(skb);
+		return rv;
+	}
+
+	return genlmsg_unicast(skb, listener_nlpid);
+}
+
+static int user_cmd(struct sk_buff *skb, struct genl_info *info)
+{
+	listener_nlpid = info->snd_pid;
+	printk("user_cmd nlpid %u\n", listener_nlpid);
+	return 0;
+}
+
+static struct genl_ops dlm_nl_ops = {
+	.cmd		= DLM_CMD_HELLO,
+	.doit		= user_cmd,
+};
+
+int dlm_netlink_init(void)
+{
+	int rv;
+
+	rv = genl_register_family(&family);
+	if (rv)
+		return rv;
+
+	rv = genl_register_ops(&family, &dlm_nl_ops);
+	if (rv < 0)
+		goto err;
+	return 0;
+ err:
+	genl_unregister_family(&family);
+	return rv;
+}
+
+void dlm_netlink_exit(void)
+{
+	genl_unregister_ops(&family, &dlm_nl_ops);
+	genl_unregister_family(&family);
+}
+
+static void fill_data(struct dlm_lock_data *data, struct dlm_lkb *lkb)
+{
+	struct dlm_rsb *r = lkb->lkb_resource;
+	struct dlm_user_args *ua = (struct dlm_user_args *) lkb->lkb_astparam;
+
+	memset(data, 0, sizeof(struct dlm_lock_data));
+
+	data->version = DLM_LOCK_DATA_VERSION;
+	data->nodeid = lkb->lkb_nodeid;
+	data->ownpid = lkb->lkb_ownpid;
+	data->id = lkb->lkb_id;
+	data->remid = lkb->lkb_remid;
+	data->status = lkb->lkb_status;
+	data->grmode = lkb->lkb_grmode;
+	data->rqmode = lkb->lkb_rqmode;
+	data->timestamp = lkb->lkb_timestamp;
+	if (ua)
+		data->xid = ua->xid;
+	if (r) {
+		data->lockspace_id = r->res_ls->ls_global_id;
+		data->resource_namelen = r->res_length;
+		memcpy(data->resource_name, r->res_name, r->res_length);
+	}
+}
+
+void dlm_timeout_warn(struct dlm_lkb *lkb)
+{
+	struct dlm_lock_data *data;
+	struct sk_buff *send_skb;
+	size_t size;
+	int rv;
+
+	size = nla_total_size(sizeof(struct dlm_lock_data)) +
+	       nla_total_size(0); /* why this? */
+
+	rv = prepare_data(DLM_CMD_TIMEOUT, &send_skb, size);
+	if (rv < 0)
+		return;
+
+	data = mk_data(send_skb);
+	if (!data) {
+		nlmsg_free(send_skb);
+		return;
+	}
+
+	fill_data(data, lkb);
+
+	send_data(send_skb);
+}
+
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
index 6bfbd6153809..e3a1527cbdbe 100644
--- a/fs/dlm/rcom.c
+++ b/fs/dlm/rcom.c
@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
 	char *mb;
 	int mb_len = sizeof(struct dlm_rcom) + len;
 
-	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh) {
 		log_print("create_rcom to %d type %d len %d ENOBUFS",
 			  to_nodeid, type, len);
@@ -90,7 +90,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 		log_error(ls, "version mismatch: %x nodeid %d: %x",
 			  DLM_HEADER_MAJOR | DLM_HEADER_MINOR, nodeid,
 			  rc->rc_header.h_version);
-		return -EINVAL;
+		return -EPROTO;
 	}
 
 	if (rf->rf_lvblen != ls->ls_lvblen ||
@@ -98,7 +98,7 @@ static int check_config(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid)
 		log_error(ls, "config mismatch: %d,%x nodeid %d: %d,%x",
 			  ls->ls_lvblen, ls->ls_exflags,
 			  nodeid, rf->rf_lvblen, rf->rf_lsflags);
-		return -EINVAL;
+		return -EPROTO;
 	}
 	return 0;
 }
@@ -386,7 +386,8 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in)
 	dlm_recover_process_copy(ls, rc_in);
 }
 
-static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
+static int send_ls_not_ready(struct dlm_ls *ls, int nodeid,
+			     struct dlm_rcom *rc_in)
 {
 	struct dlm_rcom *rc;
 	struct rcom_config *rf;
@@ -394,7 +395,7 @@ static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in)
 	char *mb;
 	int mb_len = sizeof(struct dlm_rcom) + sizeof(struct rcom_config);
 
-	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, GFP_KERNEL, &mb);
+	mh = dlm_lowcomms_get_buffer(nodeid, mb_len, ls->ls_allocation, &mb);
 	if (!mh)
 		return -ENOBUFS;
 	memset(mb, 0, mb_len);
@@ -464,7 +465,7 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid)
 		log_print("lockspace %x from %d type %x not found",
 			  hd->h_lockspace, nodeid, rc->rc_type);
 		if (rc->rc_type == DLM_RCOM_STATUS)
-			send_ls_not_ready(nodeid, rc);
+			send_ls_not_ready(ls, nodeid, rc);
 		return;
 	}
 
diff --git a/fs/dlm/recoverd.c b/fs/dlm/recoverd.c
index 3cb636d60249..66575997861c 100644
--- a/fs/dlm/recoverd.c
+++ b/fs/dlm/recoverd.c
@@ -2,7 +2,7 @@
 *******************************************************************************
 **
 **  Copyright (C) Sistina Software, Inc.  1997-2003  All rights reserved.
-**  Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
+**  Copyright (C) 2004-2007 Red Hat, Inc.  All rights reserved.
 **
 **  This copyrighted material is made available to anyone wishing to use,
 **  modify, copy, or redistribute it subject to the terms and conditions
@@ -190,6 +190,8 @@ static int ls_recover(struct dlm_ls *ls, struct dlm_recover *rv)
 
 	dlm_clear_members_gone(ls);
 
+	dlm_adjust_timeouts(ls);
+
 	error = enable_locking(ls, rv->seq);
 	if (error) {
 		log_debug(ls, "enable_locking failed %d", error);
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index b0201ec325a7..6438941ab1f8 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -33,16 +33,17 @@ static const struct file_operations device_fops;
 struct dlm_lock_params32 {
 	__u8 mode;
 	__u8 namelen;
-	__u16 flags;
+	__u16 unused;
+	__u32 flags;
 	__u32 lkid;
 	__u32 parent;
-
+	__u64 xid;
+	__u64 timeout;
 	__u32 castparam;
 	__u32 castaddr;
 	__u32 bastparam;
 	__u32 bastaddr;
 	__u32 lksb;
-
 	char lvb[DLM_USER_LVB_LEN];
 	char name[0];
 };
@@ -68,6 +69,7 @@ struct dlm_lksb32 {
 };
 
 struct dlm_lock_result32 {
+	__u32 version[3];
 	__u32 length;
 	__u32 user_astaddr;
 	__u32 user_astparam;
@@ -102,6 +104,8 @@ static void compat_input(struct dlm_write_request *kb,
 		kb->i.lock.flags = kb32->i.lock.flags;
 		kb->i.lock.lkid = kb32->i.lock.lkid;
 		kb->i.lock.parent = kb32->i.lock.parent;
+		kb->i.lock.xid = kb32->i.lock.xid;
+		kb->i.lock.timeout = kb32->i.lock.timeout;
 		kb->i.lock.castparam = (void *)(long)kb32->i.lock.castparam;
 		kb->i.lock.castaddr = (void *)(long)kb32->i.lock.castaddr;
 		kb->i.lock.bastparam = (void *)(long)kb32->i.lock.bastparam;
@@ -115,6 +119,10 @@ static void compat_input(struct dlm_write_request *kb,
 static void compat_output(struct dlm_lock_result *res,
 			  struct dlm_lock_result32 *res32)
 {
+	res32->version[0] = res->version[0];
+	res32->version[1] = res->version[1];
+	res32->version[2] = res->version[2];
+
 	res32->user_astaddr = (__u32)(long)res->user_astaddr;
 	res32->user_astparam = (__u32)(long)res->user_astparam;
 	res32->user_lksb = (__u32)(long)res->user_lksb;
@@ -130,6 +138,36 @@ static void compat_output(struct dlm_lock_result *res,
 }
 #endif
 
+/* Figure out if this lock is at the end of its life and no longer
+   available for the application to use.  The lkb still exists until
+   the final ast is read.  A lock becomes EOL in three situations:
+     1. a noqueue request fails with EAGAIN
+     2. an unlock completes with EUNLOCK
+     3. a cancel of a waiting request completes with ECANCEL/EDEADLK
+   An EOL lock needs to be removed from the process's list of locks.
+   And we can't allow any new operation on an EOL lock.  This is
+   not related to the lifetime of the lkb struct which is managed
+   entirely by refcount. */
+
+static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+{
+	switch (sb_status) {
+	case -DLM_EUNLOCK:
+		return 1;
+	case -DLM_ECANCEL:
+	case -ETIMEDOUT:
+	case -EDEADLK:
+		if (lkb->lkb_grmode == DLM_LOCK_IV)
+			return 1;
+		break;
+	case -EAGAIN:
+		if (type == AST_COMP && lkb->lkb_grmode == DLM_LOCK_IV)
+			return 1;
+		break;
+	}
+	return 0;
+}
+
 /* we could possibly check if the cancel of an orphan has resulted in the lkb
    being removed and then remove that lkb from the orphans list and free it */
 
@@ -176,25 +214,7 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type)
 		log_debug(ls, "ast overlap %x status %x %x",
 			  lkb->lkb_id, ua->lksb.sb_status, lkb->lkb_flags);
 
-	/* Figure out if this lock is at the end of its life and no longer
-	   available for the application to use.  The lkb still exists until
-	   the final ast is read.  A lock becomes EOL in three situations:
-	     1. a noqueue request fails with EAGAIN
-	     2. an unlock completes with EUNLOCK
-	     3. a cancel of a waiting request completes with ECANCEL
-	   An EOL lock needs to be removed from the process's list of locks.
-	   And we can't allow any new operation on an EOL lock.  This is
-	   not related to the lifetime of the lkb struct which is managed
-	   entirely by refcount. */
-
-	if (type == AST_COMP &&
-	    lkb->lkb_grmode == DLM_LOCK_IV &&
-	    ua->lksb.sb_status == -EAGAIN)
-		eol = 1;
-	else if (ua->lksb.sb_status == -DLM_EUNLOCK ||
-	    (ua->lksb.sb_status == -DLM_ECANCEL &&
-	     lkb->lkb_grmode == DLM_LOCK_IV))
-		eol = 1;
+	eol = lkb_is_endoflife(lkb, ua->lksb.sb_status, type);
 	if (eol) {
 		lkb->lkb_ast_type &= ~AST_BAST;
 		lkb->lkb_flags |= DLM_IFL_ENDOFLIFE;
@@ -252,16 +272,18 @@ static int device_user_lock(struct dlm_user_proc *proc,
 	ua->castaddr = params->castaddr;
 	ua->bastparam = params->bastparam;
 	ua->bastaddr = params->bastaddr;
+	ua->xid = params->xid;
 
 	if (params->flags & DLM_LKF_CONVERT)
 		error = dlm_user_convert(ls, ua,
 				         params->mode, params->flags,
-				         params->lkid, params->lvb);
+				         params->lkid, params->lvb,
+					 (unsigned long) params->timeout);
 	else {
 		error = dlm_user_request(ls, ua,
 					 params->mode, params->flags,
 					 params->name, params->namelen,
-					 params->parent);
+					 (unsigned long) params->timeout);
 		if (!error)
 			error = ua->lksb.sb_lkid;
 	}
@@ -299,6 +321,22 @@ static int device_user_unlock(struct dlm_user_proc *proc,
 	return error;
 }
 
+static int device_user_deadlock(struct dlm_user_proc *proc,
+				struct dlm_lock_params *params)
+{
+	struct dlm_ls *ls;
+	int error;
+
+	ls = dlm_find_lockspace_local(proc->lockspace);
+	if (!ls)
+		return -ENOENT;
+
+	error = dlm_user_deadlock(ls, params->flags, params->lkid);
+
+	dlm_put_lockspace(ls);
+	return error;
+}
+
 static int create_misc_device(struct dlm_ls *ls, char *name)
 {
 	int error, len;
@@ -348,7 +386,7 @@ static int device_create_lockspace(struct dlm_lspace_params *params)
 		return -EPERM;
 
 	error = dlm_new_lockspace(params->name, strlen(params->name),
-				  &lockspace, 0, DLM_USER_LVB_LEN);
+				  &lockspace, params->flags, DLM_USER_LVB_LEN);
 	if (error)
 		return error;
 
@@ -524,6 +562,14 @@ static ssize_t device_write(struct file *file, const char __user *buf,
 		error = device_user_unlock(proc, &kbuf->i.lock);
 		break;
 
+	case DLM_USER_DEADLOCK:
+		if (!proc) {
+			log_print("no locking on control device");
+			goto out_sig;
+		}
+		error = device_user_deadlock(proc, &kbuf->i.lock);
+		break;
+
 	case DLM_USER_CREATE_LOCKSPACE:
 		if (proc) {
 			log_print("create/remove only on control device");
@@ -641,6 +687,9 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
 	int struct_len;
 
 	memset(&result, 0, sizeof(struct dlm_lock_result));
+	result.version[0] = DLM_DEVICE_VERSION_MAJOR;
+	result.version[1] = DLM_DEVICE_VERSION_MINOR;
+	result.version[2] = DLM_DEVICE_VERSION_PATCH;
 	memcpy(&result.lksb, &ua->lksb, sizeof(struct dlm_lksb));
 	result.user_lksb = ua->user_lksb;
 
@@ -699,6 +748,20 @@ static int copy_result_to_user(struct dlm_user_args *ua, int compat, int type,
 	return error;
 }
 
+static int copy_version_to_user(char __user *buf, size_t count)
+{
+	struct dlm_device_version ver;
+
+	memset(&ver, 0, sizeof(struct dlm_device_version));
+	ver.version[0] = DLM_DEVICE_VERSION_MAJOR;
+	ver.version[1] = DLM_DEVICE_VERSION_MINOR;
+	ver.version[2] = DLM_DEVICE_VERSION_PATCH;
+
+	if (copy_to_user(buf, &ver, sizeof(struct dlm_device_version)))
+		return -EFAULT;
+	return sizeof(struct dlm_device_version);
+}
+
 /* a read returns a single ast described in a struct dlm_lock_result */
 
 static ssize_t device_read(struct file *file, char __user *buf, size_t count,
@@ -710,6 +773,16 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
 	DECLARE_WAITQUEUE(wait, current);
 	int error, type=0, bmode=0, removed = 0;
 
+	if (count == sizeof(struct dlm_device_version)) {
+		error = copy_version_to_user(buf, count);
+		return error;
+	}
+
+	if (!proc) {
+		log_print("non-version read from control device %zu", count);
+		return -EINVAL;
+	}
+
 #ifdef CONFIG_COMPAT
 	if (count < sizeof(struct dlm_lock_result32))
 #else
@@ -747,11 +820,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count,
 		}
 	}
 
-	if (list_empty(&proc->asts)) {
-		spin_unlock(&proc->asts_spin);
-		return -EAGAIN;
-	}
-
 	/* there may be both completion and blocking asts to return for
 	   the lkb, don't remove lkb from asts list unless no asts remain */
 
@@ -823,6 +891,7 @@ static const struct file_operations device_fops = {
 static const struct file_operations ctl_device_fops = {
 	.open    = ctl_device_open,
 	.release = ctl_device_close,
+	.read    = device_read,
 	.write   = device_write,
 	.owner   = THIS_MODULE,
 };