From 7994e6f7254354e03028a11f98a27bd67dace9f1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:48:01 +0200 Subject: vfs: Move waiting for inode writeback from end_writeback() to evict_inode() Currently, I_SYNC can never be set when evict_inode() (and thus end_writeback()) is called because flusher thread holds inode reference while inode is under writeback. As a result inode_sync_wait() in those places currently does nothing. However that is going to change and unveils problems with calling inode_sync_wait() from end_writeback(). Several filesystems call end_writeback() after they have deleted the inode (btrfs, gfs2, ...) and other filesystems (ext3, ext4, reiserfs, ...) can deadlock when waiting for I_SYNC because they call end_writeback() from within a transaction. To avoid these issues, we move inode_sync_wait() into evict_inode() before calling ->evict_inode(). That way we preserve the current property that ->evict_inode() and writeback never run in parallel and all filesystems are safe. Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu --- fs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 9f4f5fecc096..501fc5daf6f4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -500,7 +500,6 @@ void end_writeback(struct inode *inode) BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); - inode_sync_wait(inode); /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } @@ -531,6 +530,8 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); + inode_sync_wait(inode); + if (op->evict_inode) { op->evict_inode(inode); } else { -- cgit v1.2.1 From dbd5768f87ff6fb0a4fe09c4d7b6c4a24de99430 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:48:02 +0200 Subject: vfs: Rename end_writeback() to clear_inode() After we moved inode_sync_wait() from end_writeback() it doesn't make sense to call the function end_writeback() anymore. Rename it to clear_inode() which well says what the function really does - set I_CLEAR flag. Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu --- fs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 501fc5daf6f4..02c0fa5e16a4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -486,7 +486,7 @@ void __remove_inode_hash(struct inode *inode) } EXPORT_SYMBOL(__remove_inode_hash); -void end_writeback(struct inode *inode) +void clear_inode(struct inode *inode) { might_sleep(); /* @@ -503,7 +503,7 @@ void end_writeback(struct inode *inode) /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } -EXPORT_SYMBOL(end_writeback); +EXPORT_SYMBOL(clear_inode); /* * Free the inode passed in, removing it from the lists it is still connected @@ -537,7 +537,7 @@ static void evict(struct inode *inode) } else { if (inode->i_data.nrpages) truncate_inode_pages(&inode->i_data, 0); - end_writeback(inode); + clear_inode(inode); } if (S_ISBLK(inode->i_mode) && inode->i_bdev) bd_forget(inode); -- cgit v1.2.1 From 169ebd90131b2ffca74bb2dbe7eeacd39fb83714 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 3 May 2012 14:48:03 +0200 Subject: writeback: Avoid iput() from flusher thread Doing iput() from flusher thread (writeback_sb_inodes()) can create problems because iput() can do a lot of work - for example truncate the inode if it's the last iput on unlinked file. Some filesystems depend on flusher thread progressing (e.g. because they need to flush delay allocated blocks to reduce allocation uncertainty) and so flusher thread doing truncate creates interesting dependencies and possibilities for deadlocks. We get rid of iput() in flusher thread by using the fact that I_SYNC inode flag effectively pins the inode in memory. So if we take care to either hold i_lock or have I_SYNC set, we can get away without taking inode reference in writeback_sb_inodes(). As a side effect of these changes, we also fix possible use-after-free in wb_writeback() because inode_wait_for_writeback() call could try to reacquire i_lock on the inode that was already free. Signed-off-by: Jan Kara Signed-off-by: Fengguang Wu --- fs/inode.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 02c0fa5e16a4..f4e145016611 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -530,7 +530,13 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); - inode_sync_wait(inode); + /* + * Wait for flusher thread to be done with the inode so that filesystem + * does not start destroying it while writeback is still running. Since + * the inode has I_FREEING set, flusher thread won't start new work on + * the inode. We just have to wait for running writeback to finish. + */ + inode_wait_for_writeback(inode); if (op->evict_inode) { op->evict_inode(inode); -- cgit v1.2.1