diff options
author | Sage Weil <sage@newdream.net> | 2010-03-01 15:16:56 -0800 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-03-01 15:28:02 -0800 |
commit | e9964c102312967a4bc1fd501cb628c4a3b19034 (patch) | |
tree | 7bccb0c4e29d94baaf8c30e008ec5aebdccf9c57 | |
parent | 7af8f1e4aa86720840d3318e4dc225c3c7e5a6d0 (diff) | |
download | blackbird-op-linux-e9964c102312967a4bc1fd501cb628c4a3b19034.tar.gz blackbird-op-linux-e9964c102312967a4bc1fd501cb628c4a3b19034.zip |
ceph: fix flush_dirty_caps race with caps migration
The flush_dirty_caps() used to loop over the first entry of the cap_dirty
dirty list on the assumption that after calling ceph_check_caps() it would
be removed from the list. This isn't true for caps that are being
migrated between MDSs, where we've received the EXPORT but not the IMPORT.
Instead, do a safe list iteration, and pin the next inode on the list via
the CEPH_I_NOFLUSH flag.
Signed-off-by: Sage Weil <sage@newdream.net>
-rw-r--r-- | fs/ceph/caps.c | 45 | ||||
-rw-r--r-- | fs/ceph/super.h | 1 |
2 files changed, 39 insertions, 7 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 295b7e547a31..8b89b9123252 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -1573,6 +1573,11 @@ retry_locked: } ack: + if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { + dout(" skipping %p I_NOFLUSH set\n", inode); + continue; + } + if (session && session != cap->session) { dout("oops, wrong session %p mutex\n", session); mutex_unlock(&session->s_mutex); @@ -1652,6 +1657,10 @@ static int try_flush_caps(struct inode *inode, struct ceph_mds_session *session, retry: spin_lock(&inode->i_lock); + if (ci->i_ceph_flags & CEPH_I_NOFLUSH) { + dout("try_flush_caps skipping %p I_NOFLUSH set\n", inode); + goto out; + } if (ci->i_dirty_caps && ci->i_auth_cap) { struct ceph_cap *cap = ci->i_auth_cap; int used = __ceph_caps_used(ci); @@ -2747,16 +2756,38 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc) */ void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc) { - struct ceph_inode_info *ci; - struct inode *inode; + struct ceph_inode_info *ci, *nci = NULL; + struct inode *inode, *ninode = NULL; + struct list_head *p, *n; dout("flush_dirty_caps\n"); spin_lock(&mdsc->cap_dirty_lock); - while (!list_empty(&mdsc->cap_dirty)) { - ci = list_first_entry(&mdsc->cap_dirty, - struct ceph_inode_info, - i_dirty_item); - inode = igrab(&ci->vfs_inode); + list_for_each_safe(p, n, &mdsc->cap_dirty) { + if (nci) { + ci = nci; + inode = ninode; + ci->i_ceph_flags &= ~CEPH_I_NOFLUSH; + dout("flush_dirty_caps inode %p (was next inode)\n", + inode); + } else { + ci = list_entry(p, struct ceph_inode_info, + i_dirty_item); + inode = igrab(&ci->vfs_inode); + BUG_ON(!inode); + dout("flush_dirty_caps inode %p\n", inode); + } + if (n != &mdsc->cap_dirty) { + nci = list_entry(n, struct ceph_inode_info, + i_dirty_item); + ninode = igrab(&nci->vfs_inode); + BUG_ON(!ninode); + nci->i_ceph_flags |= CEPH_I_NOFLUSH; + dout("flush_dirty_caps next inode %p, noflush\n", + ninode); + } else { + nci = NULL; + ninode = NULL; + } spin_unlock(&mdsc->cap_dirty_lock); if (inode) { ceph_check_caps(ci, CHECK_CAPS_NODELAY|CHECK_CAPS_FLUSH, diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ff7aaa32736c..6a778f2c3f6e 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -289,6 +289,7 @@ struct ceph_inode_xattrs_info { #define CEPH_I_COMPLETE 1 /* we have complete directory cached */ #define CEPH_I_NODELAY 4 /* do not delay cap release */ #define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */ +#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */ struct ceph_inode_info { struct ceph_vino i_vino; /* ceph ino + snap */ |