summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@infradead.org>2012-04-23 15:58:35 +1000
committerBen Myers <bpm@sgi.com>2012-05-14 16:20:28 -0500
commit8a48088f6439249019b5e17f6391e710656879d9 (patch)
treeb898076f1f2245dc435fa8e64b1ddac2b8424c7e
parent211e4d434bd737be38aabad0247ce3da9964370e (diff)
downloadtalos-obmc-linux-8a48088f6439249019b5e17f6391e710656879d9.tar.gz
talos-obmc-linux-8a48088f6439249019b5e17f6391e710656879d9.zip
xfs: don't flush inodes from background inode reclaim
We already flush dirty inodes throug the AIL regularly, there is no reason to have second thread compete with it and disturb the I/O pattern. We still do write inodes when doing a synchronous reclaim from the shrinker or during unmount for now. Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Dave Chinner <dchinner@redhat.com> Reviewed-by: Mark Tinguely <tinguely@sgi.com> Signed-off-by: Ben Myers <bpm@sgi.com>
-rw-r--r--fs/xfs/xfs_sync.c102
1 files changed, 42 insertions, 60 deletions
diff --git a/fs/xfs/xfs_sync.c b/fs/xfs/xfs_sync.c
index 85d03e6a2677..7b2bccc4d67b 100644
--- a/fs/xfs/xfs_sync.c
+++ b/fs/xfs/xfs_sync.c
@@ -631,11 +631,8 @@ xfs_reclaim_inode_grab(
}
/*
- * Inodes in different states need to be treated differently, and the return
- * value of xfs_iflush is not sufficient to get this right. The following table
- * lists the inode states and the reclaim actions necessary for non-blocking
- * reclaim:
- *
+ * Inodes in different states need to be treated differently. The following
+ * table lists the inode states and the reclaim actions necessary:
*
* inode state iflush ret required action
* --------------- ---------- ---------------
@@ -645,9 +642,8 @@ xfs_reclaim_inode_grab(
* stale, unpinned 0 reclaim
* clean, pinned(*) 0 requeue
* stale, pinned EAGAIN requeue
- * dirty, delwri ok 0 requeue
- * dirty, delwri blocked EAGAIN requeue
- * dirty, sync flush 0 reclaim
+ * dirty, async - requeue
+ * dirty, sync 0 reclaim
*
* (*) dgc: I don't think the clean, pinned state is possible but it gets
* handled anyway given the order of checks implemented.
@@ -658,26 +654,23 @@ xfs_reclaim_inode_grab(
*
* Also, because we get the flush lock first, we know that any inode that has
* been flushed delwri has had the flush completed by the time we check that
- * the inode is clean. The clean inode check needs to be done before flushing
- * the inode delwri otherwise we would loop forever requeuing clean inodes as
- * we cannot tell apart a successful delwri flush and a clean inode from the
- * return value of xfs_iflush().
+ * the inode is clean.
*
- * Note that because the inode is flushed delayed write by background
- * writeback, the flush lock may already be held here and waiting on it can
- * result in very long latencies. Hence for sync reclaims, where we wait on the
- * flush lock, the caller should push out delayed write inodes first before
- * trying to reclaim them to minimise the amount of time spent waiting. For
- * background relaim, we just requeue the inode for the next pass.
+ * Note that because the inode is flushed delayed write by AIL pushing, the
+ * flush lock may already be held here and waiting on it can result in very
+ * long latencies. Hence for sync reclaims, where we wait on the flush lock,
+ * the caller should push the AIL first before trying to reclaim inodes to
+ * minimise the amount of time spent waiting. For background relaim, we only
+ * bother to reclaim clean inodes anyway.
*
* Hence the order of actions after gaining the locks should be:
* bad => reclaim
* shutdown => unpin and reclaim
- * pinned, delwri => requeue
+ * pinned, async => requeue
* pinned, sync => unpin
* stale => reclaim
* clean => reclaim
- * dirty, delwri => flush and requeue
+ * dirty, async => requeue
* dirty, sync => flush, wait and reclaim
*/
STATIC int
@@ -716,10 +709,8 @@ restart:
goto reclaim;
}
if (xfs_ipincount(ip)) {
- if (!(sync_mode & SYNC_WAIT)) {
- xfs_ifunlock(ip);
- goto out;
- }
+ if (!(sync_mode & SYNC_WAIT))
+ goto out_ifunlock;
xfs_iunpin_wait(ip);
}
if (xfs_iflags_test(ip, XFS_ISTALE))
@@ -728,6 +719,13 @@ restart:
goto reclaim;
/*
+ * Never flush out dirty data during non-blocking reclaim, as it would
+ * just contend with AIL pushing trying to do the same job.
+ */
+ if (!(sync_mode & SYNC_WAIT))
+ goto out_ifunlock;
+
+ /*
* Now we have an inode that needs flushing.
*
* We do a nonblocking flush here even if we are doing a SYNC_WAIT
@@ -745,42 +743,13 @@ restart:
* pass through will see the stale flag set on the inode.
*/
error = xfs_iflush(ip, SYNC_TRYLOCK | sync_mode);
- if (sync_mode & SYNC_WAIT) {
- if (error == EAGAIN) {
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /* backoff longer than in xfs_ifree_cluster */
- delay(2);
- goto restart;
- }
- xfs_iflock(ip);
- goto reclaim;
- }
-
- /*
- * When we have to flush an inode but don't have SYNC_WAIT set, we
- * flush the inode out using a delwri buffer and wait for the next
- * call into reclaim to find it in a clean state instead of waiting for
- * it now. We also don't return errors here - if the error is transient
- * then the next reclaim pass will flush the inode, and if the error
- * is permanent then the next sync reclaim will reclaim the inode and
- * pass on the error.
- */
- if (error && error != EAGAIN && !XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- xfs_warn(ip->i_mount,
- "inode 0x%llx background reclaim flush failed with %d",
- (long long)ip->i_ino, error);
+ if (error == EAGAIN) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ /* backoff longer than in xfs_ifree_cluster */
+ delay(2);
+ goto restart;
}
-out:
- xfs_iflags_clear(ip, XFS_IRECLAIM);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- /*
- * We could return EAGAIN here to make reclaim rescan the inode tree in
- * a short while. However, this just burns CPU time scanning the tree
- * waiting for IO to complete and xfssyncd never goes back to the idle
- * state. Instead, return 0 to let the next scheduled background reclaim
- * attempt to reclaim the inode again.
- */
- return 0;
+ xfs_iflock(ip);
reclaim:
xfs_ifunlock(ip);
@@ -814,8 +783,21 @@ reclaim:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_inode_free(ip);
-
return error;
+
+out_ifunlock:
+ xfs_ifunlock(ip);
+out:
+ xfs_iflags_clear(ip, XFS_IRECLAIM);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ /*
+ * We could return EAGAIN here to make reclaim rescan the inode tree in
+ * a short while. However, this just burns CPU time scanning the tree
+ * waiting for IO to complete and xfssyncd never goes back to the idle
+ * state. Instead, return 0 to let the next scheduled background reclaim
+ * attempt to reclaim the inode again.
+ */
+ return 0;
}
/*
OpenPOWER on IntegriCloud