diff options
author | Joseph Qi <joseph.qi@huawei.com> | 2015-02-16 16:00:12 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-16 17:56:05 -0800 |
commit | 4813962beef7586f890a645a1bda77691da4b74a (patch) | |
tree | 75885da49ee8bc539aa41c6af8395a4f88c2a7d9 | |
parent | 3a83b342c87e6d21290de8dc76ec20a67821261d (diff) | |
download | blackbird-op-linux-4813962beef7586f890a645a1bda77691da4b74a.tar.gz blackbird-op-linux-4813962beef7586f890a645a1bda77691da4b74a.zip |
ocfs2: wait for orphan recovery first once append O_DIRECT write crash
If one node has crashed with orphan entry leftover, another node which do
append O_DIRECT write to the same file will override the
i_dio_orphaned_slot. Then the old entry won't be cleaned forever. If
this case happens, we let it wait for orphan recovery first.
Signed-off-by: Joseph Qi <joseph.qi@huawei.com>
Cc: Weiwei Wang <wangww631@huawei.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Mark Fasheh <mfasheh@suse.com>
Cc: Xuejiufei <xuejiufei@huawei.com>
Cc: alex chen <alex.chen@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | fs/ocfs2/inode.h | 2 | ||||
-rw-r--r-- | fs/ocfs2/journal.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/namei.c | 37 | ||||
-rw-r--r-- | fs/ocfs2/super.c | 2 |
4 files changed, 43 insertions, 0 deletions
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h index ca3431ee7f24..5e86b247c821 100644 --- a/fs/ocfs2/inode.h +++ b/fs/ocfs2/inode.h @@ -81,6 +81,8 @@ struct ocfs2_inode_info tid_t i_sync_tid; tid_t i_datasync_tid; + wait_queue_head_t append_dio_wq; + struct dquot *i_dquot[MAXQUOTAS]; }; diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 9730f5350ef4..ff531928269e 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -2204,6 +2204,8 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, ret = ocfs2_del_inode_from_orphan(osb, inode, 0, 0); if (ret) mlog_errno(ret); + + wake_up(&OCFS2_I(inode)->append_dio_wq); } /* else if ORPHAN_NO_NEED_TRUNCATE, do nothing */ next: diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 7eec45d0d85f..b5c3a5ea3ee6 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2577,6 +2577,27 @@ leave: return status; } +static int ocfs2_dio_orphan_recovered(struct inode *inode) +{ + int ret; + struct buffer_head *di_bh = NULL; + struct ocfs2_dinode *di = NULL; + + ret = ocfs2_inode_lock(inode, &di_bh, 1); + if (ret < 0) { + mlog_errno(ret); + return 0; + } + + di = (struct ocfs2_dinode *) di_bh->b_data; + ret = !(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL)); + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + + return ret; +} + +#define OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL 10000 int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, struct inode *inode) { @@ -2586,13 +2607,29 @@ int ocfs2_add_inode_to_orphan(struct ocfs2_super *osb, struct buffer_head *di_bh = NULL; int status = 0; handle_t *handle = NULL; + struct ocfs2_dinode *di = NULL; +restart: status = ocfs2_inode_lock(inode, &di_bh, 1); if (status < 0) { mlog_errno(status); goto bail; } + di = (struct ocfs2_dinode *) di_bh->b_data; + /* + * Another append dio crashed? + * If so, wait for recovery first. + */ + if (unlikely(di->i_flags & cpu_to_le32(OCFS2_DIO_ORPHANED_FL))) { + ocfs2_inode_unlock(inode, 1); + brelse(di_bh); + wait_event_interruptible_timeout(OCFS2_I(inode)->append_dio_wq, + ocfs2_dio_orphan_recovered(inode), + msecs_to_jiffies(OCFS2_DIO_ORPHANED_FL_CHECK_INTERVAL)); + goto restart; + } + status = ocfs2_prepare_orphan_dir(osb, &orphan_dir_inode, OCFS2_I(inode)->ip_blkno, orphan_name, diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 87a1f7679d9b..26675185b886 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1746,6 +1746,8 @@ static void ocfs2_inode_init_once(void *data) ocfs2_lock_res_init_once(&oi->ip_inode_lockres); ocfs2_lock_res_init_once(&oi->ip_open_lockres); + init_waitqueue_head(&oi->append_dio_wq); + ocfs2_metadata_cache_init(INODE_CACHE(&oi->vfs_inode), &ocfs2_inode_caching_ops); |