From 13a4214cd9ec14d7b77e98bd3ee51f60f868a6e5 Mon Sep 17 00:00:00 2001 From: Henry C Chang Date: Tue, 1 Jun 2010 11:31:08 -0700 Subject: ceph: fix d_subdirs ordering problem We misused list_move_tail() to order the dentry in d_subdirs. This will screw up the d_subdirs order. This bug can be reliably reproduced by: 1. mount ceph fs. 2. on ceph fs, git clone git://ceph.newdream.net/git/ceph.git 3. Run autogen.sh in ceph directory. (Note: Errors only occur at the first time you run autogen.sh.) Signed-off-by: Henry C Chang Signed-off-by: Sage Weil --- fs/ceph/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 226f5a50d362..ab47f46ca282 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -827,7 +827,7 @@ static void ceph_set_dentry_offset(struct dentry *dn) spin_lock(&dcache_lock); spin_lock(&dn->d_lock); - list_move_tail(&dir->d_subdirs, &dn->d_u.d_child); + list_move(&dn->d_u.d_child, &dir->d_subdirs); dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset, dn->d_u.d_child.prev, dn->d_u.d_child.next); spin_unlock(&dn->d_lock); -- cgit v1.2.1 From 205475679a74fe40b63a1c7f41110fdb64daa8b9 Mon Sep 17 00:00:00 2001 From: Yehuda Sadeh Date: Tue, 1 Jun 2010 10:37:40 -0700 Subject: ceph: fix memory leak in statfs Freeing the statfs request structure when required. Signed-off-by: Yehuda Sadeh Signed-off-by: Sage Weil --- fs/ceph/mon_client.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/ceph/mon_client.c b/fs/ceph/mon_client.c index 21c62e9b7d1d..07a539906e67 100644 --- a/fs/ceph/mon_client.c +++ b/fs/ceph/mon_client.c @@ -400,6 +400,8 @@ static void release_generic_request(struct kref *kref) ceph_msg_put(req->reply); if (req->request) ceph_msg_put(req->request); + + kfree(req); } static void put_generic_request(struct ceph_mon_generic_request *req) -- cgit v1.2.1 From 558d3499bd059d4534b1f2b69dc1c562acc733fe Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 1 Jun 2010 12:51:12 -0700 Subject: ceph: fix f_namelen reported by statfs We were setting f_namelen in kstatfs to PATH_MAX instead of NAME_MAX. That disagrees with ceph_lookup behavior (which checks against NAME_MAX), and also makes the pjd posix test suite spit out ugly errors because with can't clean up its temporary files. Signed-off-by: Sage Weil --- fs/ceph/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 4e0bee240b9d..8db88792b5ad 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -89,7 +89,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) buf->f_files = le64_to_cpu(st.num_objects); buf->f_ffree = -1; - buf->f_namelen = PATH_MAX; + buf->f_namelen = NAME_MAX; buf->f_frsize = PAGE_CACHE_SIZE; /* leave fsid little-endian, regardless of host endianness */ -- cgit v1.2.1 From 1e5ea23df11c7c90c7e7268dd3a6603bfa5aadf7 Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Fri, 4 Jun 2010 10:05:40 -0700 Subject: ceph: fix lease revocation when seq doesn't match If the client revokes a lease with a higher seq than what we have, keep the mds's seq, so that it honors our release. Otherwise, we can hang indefinitely. Signed-off-by: Sage Weil --- fs/ceph/mds_client.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index b49f12822cbc..29b4485cf1ca 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2433,6 +2433,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_dentry_info *di; int mds = session->s_mds; struct ceph_mds_lease *h = msg->front.iov_base; + u32 seq; struct ceph_vino vino; int mask; struct qstr dname; @@ -2446,6 +2447,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; mask = le16_to_cpu(h->mask); + seq = le32_to_cpu(h->seq); dname.name = (void *)h + sizeof(*h) + sizeof(u32); dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); if (dname.len != get_unaligned_le32(h+1)) @@ -2456,8 +2458,9 @@ static void handle_lease(struct ceph_mds_client *mdsc, /* lookup inode */ inode = ceph_find_inode(sb, vino); - dout("handle_lease '%s', mask %d, ino %llx %p\n", - ceph_lease_op_name(h->action), mask, vino.ino, inode); + dout("handle_lease %s, mask %d, ino %llx %p %.*s\n", + ceph_lease_op_name(h->action), mask, vino.ino, inode, + dname.len, dname.name); if (inode == NULL) { dout("handle_lease no inode %llx\n", vino.ino); goto release; @@ -2482,7 +2485,8 @@ static void handle_lease(struct ceph_mds_client *mdsc, switch (h->action) { case CEPH_MDS_LEASE_REVOKE: if (di && di->lease_session == session) { - h->seq = cpu_to_le32(di->lease_seq); + if (ceph_seq_cmp(di->lease_seq, seq) > 0) + h->seq = cpu_to_le32(di->lease_seq); __ceph_mdsc_drop_dentry_lease(dentry); } release = 1; @@ -2496,7 +2500,7 @@ static void handle_lease(struct ceph_mds_client *mdsc, unsigned long duration = le32_to_cpu(h->duration_ms) * HZ / 1000; - di->lease_seq = le32_to_cpu(h->seq); + di->lease_seq = seq; dentry->d_time = di->lease_renew_from + duration; di->lease_renew_after = di->lease_renew_from + (duration >> 1); -- cgit v1.2.1 From 00d5643e7c5ed4ae1bb0b385fe2f41bb951cc3cd Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Thu, 10 Jun 2010 11:13:58 -0400 Subject: ceph: fix atomic64_t initialization on ia64 bdi_seq is an atomic_long_t but we're using ATOMIC_INIT, which causes build failures on ia64. This patch fixes it to use ATOMIC_LONG_INIT. Signed-off-by: Jeff Mahoney Signed-off-by: Sage Weil --- fs/ceph/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 8db88792b5ad..fa87f51e38e1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -926,7 +926,7 @@ static int ceph_compare_super(struct super_block *sb, void *data) /* * construct our own bdi so we can control readahead, etc. */ -static atomic_long_t bdi_seq = ATOMIC_INIT(0); +static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0); static int ceph_register_bdi(struct super_block *sb, struct ceph_client *client) { -- cgit v1.2.1 From 9dbd412f56c453f15014396c6024b895c1485ccb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Thu, 10 Jun 2010 13:21:20 -0700 Subject: ceph: fix misleading/incorrect debug message Nothing is released here: the caps message is simply ignored in this case. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index ae3e3a306445..da2a0e3cb200 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2714,7 +2714,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, spin_lock(&inode->i_lock); cap = __get_cap_for_mds(ceph_inode(inode), mds); if (!cap) { - dout("no cap on %p ino %llx.%llx from mds%d, releasing\n", + dout(" no cap on %p ino %llx.%llx from mds%d\n", inode, ceph_ino(inode), ceph_snap(inode), mds); spin_unlock(&inode->i_lock); goto done; -- cgit v1.2.1 From 3d7ded4d81d807c2f75f310a8d74a5d72be13a1b Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 9 Jun 2010 16:47:10 -0700 Subject: ceph: release cap on import if we don't have the inode If we get an IMPORT that give us a cap, but we don't have the inode, queue a release (and try to send it immediately) so that the MDS doesn't get stuck waiting for us. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 90 ++++++++++++++++++++++++++++++++-------------------- fs/ceph/mds_client.c | 6 ++-- fs/ceph/mds_client.h | 3 ++ 3 files changed, 61 insertions(+), 38 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index da2a0e3cb200..7c692e746237 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -981,6 +981,46 @@ static int send_cap_msg(struct ceph_mds_session *session, return 0; } +static void __queue_cap_release(struct ceph_mds_session *session, + u64 ino, u64 cap_id, u32 migrate_seq, + u32 issue_seq) +{ + struct ceph_msg *msg; + struct ceph_mds_cap_release *head; + struct ceph_mds_cap_item *item; + + spin_lock(&session->s_cap_lock); + BUG_ON(!session->s_num_cap_releases); + msg = list_first_entry(&session->s_cap_releases, + struct ceph_msg, list_head); + + dout(" adding %llx release to mds%d msg %p (%d left)\n", + ino, session->s_mds, msg, session->s_num_cap_releases); + + BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); + head = msg->front.iov_base; + head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); + item = msg->front.iov_base + msg->front.iov_len; + item->ino = cpu_to_le64(ino); + item->cap_id = cpu_to_le64(cap_id); + item->migrate_seq = cpu_to_le32(migrate_seq); + item->seq = cpu_to_le32(issue_seq); + + session->s_num_cap_releases--; + + msg->front.iov_len += sizeof(*item); + if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { + dout(" release msg %p full\n", msg); + list_move_tail(&msg->list_head, &session->s_cap_releases_done); + } else { + dout(" release msg %p at %d/%d (%d)\n", msg, + (int)le32_to_cpu(head->num), + (int)CEPH_CAPS_PER_RELEASE, + (int)msg->front.iov_len); + } + spin_unlock(&session->s_cap_lock); +} + /* * Queue cap releases when an inode is dropped from our cache. Since * inode is about to be destroyed, there is no need for i_lock. @@ -994,41 +1034,9 @@ void ceph_queue_caps_release(struct inode *inode) while (p) { struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node); struct ceph_mds_session *session = cap->session; - struct ceph_msg *msg; - struct ceph_mds_cap_release *head; - struct ceph_mds_cap_item *item; - spin_lock(&session->s_cap_lock); - BUG_ON(!session->s_num_cap_releases); - msg = list_first_entry(&session->s_cap_releases, - struct ceph_msg, list_head); - - dout(" adding %p release to mds%d msg %p (%d left)\n", - inode, session->s_mds, msg, session->s_num_cap_releases); - - BUG_ON(msg->front.iov_len + sizeof(*item) > PAGE_CACHE_SIZE); - head = msg->front.iov_base; - head->num = cpu_to_le32(le32_to_cpu(head->num) + 1); - item = msg->front.iov_base + msg->front.iov_len; - item->ino = cpu_to_le64(ceph_ino(inode)); - item->cap_id = cpu_to_le64(cap->cap_id); - item->migrate_seq = cpu_to_le32(cap->mseq); - item->seq = cpu_to_le32(cap->issue_seq); - - session->s_num_cap_releases--; - - msg->front.iov_len += sizeof(*item); - if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) { - dout(" release msg %p full\n", msg); - list_move_tail(&msg->list_head, - &session->s_cap_releases_done); - } else { - dout(" release msg %p at %d/%d (%d)\n", msg, - (int)le32_to_cpu(head->num), - (int)CEPH_CAPS_PER_RELEASE, - (int)msg->front.iov_len); - } - spin_unlock(&session->s_cap_lock); + __queue_cap_release(session, ceph_ino(inode), cap->cap_id, + cap->mseq, cap->issue_seq); p = rb_next(p); __ceph_remove_cap(cap); } @@ -2655,7 +2663,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_mds_caps *h; int mds = session->s_mds; int op; - u32 seq; + u32 seq, mseq; struct ceph_vino vino; u64 cap_id; u64 size, max_size; @@ -2675,6 +2683,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, vino.snap = CEPH_NOSNAP; cap_id = le64_to_cpu(h->cap_id); seq = le32_to_cpu(h->seq); + mseq = le32_to_cpu(h->migrate_seq); size = le64_to_cpu(h->size); max_size = le64_to_cpu(h->max_size); @@ -2689,6 +2698,17 @@ void ceph_handle_caps(struct ceph_mds_session *session, vino.snap, inode); if (!inode) { dout(" i don't have ino %llx\n", vino.ino); + + if (op == CEPH_CAP_OP_IMPORT) + __queue_cap_release(session, vino.ino, cap_id, + mseq, seq); + + /* + * send any full release message to try to move things + * along for the mds (who clearly thinks we still have this + * cap). + */ + ceph_send_cap_releases(mdsc, session); goto done; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 29b4485cf1ca..d28b6a9c0f96 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1176,8 +1176,8 @@ static int check_cap_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq) /* * called under s_mutex */ -static void send_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session) +void ceph_send_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session) { struct ceph_msg *msg; @@ -2693,7 +2693,7 @@ static void delayed_work(struct work_struct *work) add_cap_releases(mdsc, s, -1); if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG) - send_cap_releases(mdsc, s); + ceph_send_cap_releases(mdsc, s); mutex_unlock(&s->s_mutex); ceph_put_mds_session(s); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index d9936c4f1212..e43752b52635 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -322,6 +322,9 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } +extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session); + extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, -- cgit v1.2.1 From 2b2300d62ea413bec631d5b880effa2cc5363acb Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Wed, 9 Jun 2010 16:52:04 -0700 Subject: ceph: try to send partial cap release on cap message on missing inode If we have enough memory to allocate a new cap release message, do so, so that we can send a partial release message immediately. This keeps us from making the MDS wait when the cap release it needs is in a partially full release message. If we fail because of ENOMEM, oh well, they'll just have to wait a bit longer. Signed-off-by: Sage Weil --- fs/ceph/caps.c | 1 + fs/ceph/mds_client.c | 10 +++++----- fs/ceph/mds_client.h | 3 +++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7c692e746237..619b61655ee5 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2708,6 +2708,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, * along for the mds (who clearly thinks we still have this * cap). */ + ceph_add_cap_releases(mdsc, session, -1); ceph_send_cap_releases(mdsc, session); goto done; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d28b6a9c0f96..1766947fc07a 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1066,9 +1066,9 @@ static int trim_caps(struct ceph_mds_client *mdsc, * * Called under s_mutex. */ -static int add_cap_releases(struct ceph_mds_client *mdsc, - struct ceph_mds_session *session, - int extra) +int ceph_add_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int extra) { struct ceph_msg *msg; struct ceph_mds_cap_release *head; @@ -1980,7 +1980,7 @@ out_err: } mutex_unlock(&mdsc->mutex); - add_cap_releases(mdsc, req->r_session, -1); + ceph_add_cap_releases(mdsc, req->r_session, -1); mutex_unlock(&session->s_mutex); /* kick calling process */ @@ -2690,7 +2690,7 @@ static void delayed_work(struct work_struct *work) send_renew_caps(mdsc, s); else ceph_con_keepalive(&s->s_con); - add_cap_releases(mdsc, s, -1); + ceph_add_cap_releases(mdsc, s, -1); if (s->s_state == CEPH_MDS_SESSION_OPEN || s->s_state == CEPH_MDS_SESSION_HUNG) ceph_send_cap_releases(mdsc, s); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e43752b52635..b292fa42a66d 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -322,6 +322,9 @@ static inline void ceph_mdsc_put_request(struct ceph_mds_request *req) kref_put(&req->r_kref, ceph_mdsc_release_request); } +extern int ceph_add_cap_releases(struct ceph_mds_client *mdsc, + struct ceph_mds_session *session, + int extra); extern void ceph_send_cap_releases(struct ceph_mds_client *mdsc, struct ceph_mds_session *session); -- cgit v1.2.1