summaryrefslogtreecommitdiffstats
path: root/fs/ceph
diff options
context:
space:
mode:
authorYan, Zheng <zyan@redhat.com>2015-05-05 21:22:13 +0800
committerIlya Dryomov <idryomov@gmail.com>2015-06-25 11:49:29 +0300
commitaffbc19a68f9966ad65a773db405f78e2bafc07b (patch)
tree63c34c40700e8b1fe1a73f1df244f3143b7aa99f /fs/ceph
parent622f3e250f498976ad4cbae6f2be5cb359ded4f5 (diff)
downloadtalos-obmc-linux-affbc19a68f9966ad65a773db405f78e2bafc07b.tar.gz
talos-obmc-linux-affbc19a68f9966ad65a773db405f78e2bafc07b.zip
ceph: make sure syncfs flushes all cap snaps
Signed-off-by: Yan, Zheng <zyan@redhat.com>
Diffstat (limited to 'fs/ceph')
-rw-r--r--fs/ceph/caps.c18
-rw-r--r--fs/ceph/mds_client.c86
-rw-r--r--fs/ceph/mds_client.h1
-rw-r--r--fs/ceph/snap.c2
4 files changed, 76 insertions, 31 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 900c05fd77d8..bbd969e16a01 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1259,14 +1259,14 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
* asynchronously back to the MDS once sync writes complete and dirty
* data is written out.
*
- * Unless @again is true, skip cap_snaps that were already sent to
+ * Unless @kick is true, skip cap_snaps that were already sent to
* the MDS (i.e., during this session).
*
* Called under i_ceph_lock. Takes s_mutex as needed.
*/
void __ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession,
- int again)
+ int kick)
__releases(ci->i_ceph_lock)
__acquires(ci->i_ceph_lock)
{
@@ -1307,7 +1307,7 @@ retry:
}
/* only flush each capsnap once */
- if (!again && !list_empty(&capsnap->flushing_item)) {
+ if (!kick && !list_empty(&capsnap->flushing_item)) {
dout("already flushed %p, skipping\n", capsnap);
continue;
}
@@ -1317,6 +1317,9 @@ retry:
if (session && session->s_mds != mds) {
dout("oops, wrong session %p mutex\n", session);
+ if (kick)
+ goto out;
+
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
session = NULL;
@@ -1342,10 +1345,9 @@ retry:
capsnap->flush_tid = ++ci->i_cap_flush_last_tid;
atomic_inc(&capsnap->nref);
- if (!list_empty(&capsnap->flushing_item))
- list_del_init(&capsnap->flushing_item);
- list_add_tail(&capsnap->flushing_item,
- &session->s_cap_snaps_flushing);
+ if (list_empty(&capsnap->flushing_item))
+ list_add_tail(&capsnap->flushing_item,
+ &session->s_cap_snaps_flushing);
spin_unlock(&ci->i_ceph_lock);
dout("flush_snaps %p cap_snap %p follows %lld tid %llu\n",
@@ -2876,6 +2878,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
struct ceph_mds_session *session)
{
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
u64 follows = le64_to_cpu(m->snap_follows);
struct ceph_cap_snap *capsnap;
int drop = 0;
@@ -2899,6 +2902,7 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
list_del(&capsnap->ci_item);
list_del(&capsnap->flushing_item);
ceph_put_cap_snap(capsnap);
+ wake_up_all(&mdsc->cap_flushing_wq);
drop = 1;
break;
} else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 88010f9a254d..2bb9264b9225 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1488,17 +1488,22 @@ out_unlocked:
return err;
}
-static int check_cap_flush(struct inode *inode, u64 want_flush_seq)
+static int check_cap_flush(struct ceph_inode_info *ci,
+ u64 want_flush_seq, u64 want_snap_seq)
{
- struct ceph_inode_info *ci = ceph_inode(inode);
- int ret;
+ int ret1 = 1, ret2 = 1;
spin_lock(&ci->i_ceph_lock);
- if (ci->i_flushing_caps)
- ret = ci->i_cap_flush_seq >= want_flush_seq;
- else
- ret = 1;
+ if (want_flush_seq > 0 && ci->i_flushing_caps)
+ ret1 = ci->i_cap_flush_seq >= want_flush_seq;
+
+ if (want_snap_seq > 0 && !list_empty(&ci->i_cap_snaps)) {
+ struct ceph_cap_snap *capsnap =
+ list_first_entry(&ci->i_cap_snaps,
+ struct ceph_cap_snap, ci_item);
+ ret2 = capsnap->follows >= want_snap_seq;
+ }
spin_unlock(&ci->i_ceph_lock);
- return ret;
+ return ret1 && ret2;
}
/*
@@ -1506,45 +1511,72 @@ static int check_cap_flush(struct inode *inode, u64 want_flush_seq)
*
* returns true if we've flushed through want_flush_seq
*/
-static void wait_caps_flush(struct ceph_mds_client *mdsc, u64 want_flush_seq)
+static void wait_caps_flush(struct ceph_mds_client *mdsc,
+ u64 want_flush_seq, u64 want_snap_seq)
{
int mds;
dout("check_cap_flush want %lld\n", want_flush_seq);
mutex_lock(&mdsc->mutex);
- for (mds = 0; mds < mdsc->max_sessions; mds++) {
+ for (mds = 0; mds < mdsc->max_sessions; ) {
struct ceph_mds_session *session = mdsc->sessions[mds];
- struct inode *inode = NULL;
+ struct inode *inode1 = NULL, *inode2 = NULL;
- if (!session)
+ if (!session) {
+ mds++;
continue;
+ }
get_session(session);
mutex_unlock(&mdsc->mutex);
mutex_lock(&session->s_mutex);
if (!list_empty(&session->s_cap_flushing)) {
struct ceph_inode_info *ci =
- list_entry(session->s_cap_flushing.next,
- struct ceph_inode_info,
- i_flushing_item);
+ list_first_entry(&session->s_cap_flushing,
+ struct ceph_inode_info,
+ i_flushing_item);
- if (!check_cap_flush(&ci->vfs_inode, want_flush_seq)) {
+ if (!check_cap_flush(ci, want_flush_seq, 0)) {
dout("check_cap_flush still flushing %p "
"seq %lld <= %lld to mds%d\n",
&ci->vfs_inode, ci->i_cap_flush_seq,
- want_flush_seq, session->s_mds);
- inode = igrab(&ci->vfs_inode);
+ want_flush_seq, mds);
+ inode1 = igrab(&ci->vfs_inode);
+ }
+ }
+ if (!list_empty(&session->s_cap_snaps_flushing)) {
+ struct ceph_cap_snap *capsnap =
+ list_first_entry(&session->s_cap_snaps_flushing,
+ struct ceph_cap_snap,
+ flushing_item);
+ struct ceph_inode_info *ci = capsnap->ci;
+ if (!check_cap_flush(ci, 0, want_snap_seq)) {
+ dout("check_cap_flush still flushing snap %p "
+ "follows %lld <= %lld to mds%d\n",
+ &ci->vfs_inode, capsnap->follows,
+ want_snap_seq, mds);
+ inode2 = igrab(&ci->vfs_inode);
}
}
mutex_unlock(&session->s_mutex);
ceph_put_mds_session(session);
- if (inode) {
+ if (inode1) {
wait_event(mdsc->cap_flushing_wq,
- check_cap_flush(inode, want_flush_seq));
- iput(inode);
+ check_cap_flush(ceph_inode(inode1),
+ want_flush_seq, 0));
+ iput(inode1);
+ }
+ if (inode2) {
+ wait_event(mdsc->cap_flushing_wq,
+ check_cap_flush(ceph_inode(inode2),
+ 0, want_snap_seq));
+ iput(inode2);
}
+ if (!inode1 && !inode2)
+ mds++;
+
mutex_lock(&mdsc->mutex);
}
@@ -3391,6 +3423,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
atomic_set(&mdsc->num_sessions, 0);
mdsc->max_sessions = 0;
mdsc->stopping = 0;
+ mdsc->last_snap_seq = 0;
init_rwsem(&mdsc->snap_rwsem);
mdsc->snap_realms = RB_ROOT;
INIT_LIST_HEAD(&mdsc->snap_empty);
@@ -3517,7 +3550,7 @@ restart:
void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
{
- u64 want_tid, want_flush;
+ u64 want_tid, want_flush, want_snap;
if (mdsc->fsc->mount_state == CEPH_MOUNT_SHUTDOWN)
return;
@@ -3532,10 +3565,15 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
want_flush = mdsc->cap_flush_seq;
spin_unlock(&mdsc->cap_dirty_lock);
- dout("sync want tid %lld flush_seq %lld\n", want_tid, want_flush);
+ down_read(&mdsc->snap_rwsem);
+ want_snap = mdsc->last_snap_seq;
+ up_read(&mdsc->snap_rwsem);
+
+ dout("sync want tid %lld flush_seq %lld snap_seq %lld\n",
+ want_tid, want_flush, want_snap);
wait_unsafe_requests(mdsc, want_tid);
- wait_caps_flush(mdsc, want_flush);
+ wait_caps_flush(mdsc, want_flush, want_snap);
}
/*
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d474141c034a..bf24d88cfeb2 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -290,6 +290,7 @@ struct ceph_mds_client {
* references (implying they contain no inodes with caps) that
* should be destroyed.
*/
+ u64 last_snap_seq;
struct rw_semaphore snap_rwsem;
struct rb_root snap_realms;
struct list_head snap_empty;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index ba708017d60b..233d906aec02 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -730,6 +730,8 @@ more:
/* queue realm for cap_snap creation */
list_add(&realm->dirty_item, &dirty_realms);
+ if (realm->seq > mdsc->last_snap_seq)
+ mdsc->last_snap_seq = realm->seq;
invalidate = 1;
} else if (!realm->cached_context) {
OpenPOWER on IntegriCloud