summaryrefslogtreecommitdiffstats
path: root/fs/xfs/linux-2.6/xfs_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6/xfs_buf.c')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c481
1 files changed, 282 insertions, 199 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 286e36e21dae..ac1c7e8378dd 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -44,12 +44,7 @@
static kmem_zone_t *xfs_buf_zone;
STATIC int xfsbufd(void *);
-STATIC int xfsbufd_wakeup(struct shrinker *, int, gfp_t);
STATIC void xfs_buf_delwri_queue(xfs_buf_t *, int);
-static struct shrinker xfs_buf_shake = {
- .shrink = xfsbufd_wakeup,
- .seeks = DEFAULT_SEEKS,
-};
static struct workqueue_struct *xfslogd_workqueue;
struct workqueue_struct *xfsdatad_workqueue;
@@ -168,8 +163,79 @@ test_page_region(
}
/*
- * Internal xfs_buf_t object manipulation
+ * xfs_buf_lru_add - add a buffer to the LRU.
+ *
+ * The LRU takes a new reference to the buffer so that it will only be freed
+ * once the shrinker takes the buffer off the LRU.
*/
+STATIC void
+xfs_buf_lru_add(
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = bp->b_target;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (list_empty(&bp->b_lru)) {
+ atomic_inc(&bp->b_hold);
+ list_add_tail(&bp->b_lru, &btp->bt_lru);
+ btp->bt_lru_nr++;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * xfs_buf_lru_del - remove a buffer from the LRU
+ *
+ * The unlocked check is safe here because it only occurs when there are not
+ * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
+ * to optimise the shrinker removing the buffer from the LRU and calling
+ * xfs_buf_free(). i.e. it removes an unneccessary round trip on the
+ * bt_lru_lock.
+ */
+STATIC void
+xfs_buf_lru_del(
+ struct xfs_buf *bp)
+{
+ struct xfs_buftarg *btp = bp->b_target;
+
+ if (list_empty(&bp->b_lru))
+ return;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (!list_empty(&bp->b_lru)) {
+ list_del_init(&bp->b_lru);
+ btp->bt_lru_nr--;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+}
+
+/*
+ * When we mark a buffer stale, we remove the buffer from the LRU and clear the
+ * b_lru_ref count so that the buffer is freed immediately when the buffer
+ * reference count falls to zero. If the buffer is already on the LRU, we need
+ * to remove the reference that LRU holds on the buffer.
+ *
+ * This prevents build-up of stale buffers on the LRU.
+ */
+void
+xfs_buf_stale(
+ struct xfs_buf *bp)
+{
+ bp->b_flags |= XBF_STALE;
+ atomic_set(&(bp)->b_lru_ref, 0);
+ if (!list_empty(&bp->b_lru)) {
+ struct xfs_buftarg *btp = bp->b_target;
+
+ spin_lock(&btp->bt_lru_lock);
+ if (!list_empty(&bp->b_lru)) {
+ list_del_init(&bp->b_lru);
+ btp->bt_lru_nr--;
+ atomic_dec(&bp->b_hold);
+ }
+ spin_unlock(&btp->bt_lru_lock);
+ }
+ ASSERT(atomic_read(&bp->b_hold) >= 1);
+}
STATIC void
_xfs_buf_initialize(
@@ -186,10 +252,12 @@ _xfs_buf_initialize(
memset(bp, 0, sizeof(xfs_buf_t));
atomic_set(&bp->b_hold, 1);
+ atomic_set(&bp->b_lru_ref, 1);
init_completion(&bp->b_iowait);
+ INIT_LIST_HEAD(&bp->b_lru);
INIT_LIST_HEAD(&bp->b_list);
- INIT_LIST_HEAD(&bp->b_hash_list);
- init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+ RB_CLEAR_NODE(&bp->b_rbnode);
+ sema_init(&bp->b_sema, 0); /* held, no waiters */
XB_SET_OWNER(bp);
bp->b_target = target;
bp->b_file_offset = range_base;
@@ -262,7 +330,7 @@ xfs_buf_free(
{
trace_xfs_buf_free(bp, _RET_IP_);
- ASSERT(list_empty(&bp->b_hash_list));
+ ASSERT(list_empty(&bp->b_lru));
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
@@ -339,7 +407,6 @@ _xfs_buf_lookup_pages(
__func__, gfp_mask);
XFS_STATS_INC(xb_page_retries);
- xfsbufd_wakeup(NULL, 0, gfp_mask);
congestion_wait(BLK_RW_ASYNC, HZ/50);
goto retry;
}
@@ -422,8 +489,10 @@ _xfs_buf_find(
{
xfs_off_t range_base;
size_t range_length;
- xfs_bufhash_t *hash;
- xfs_buf_t *bp, *n;
+ struct xfs_perag *pag;
+ struct rb_node **rbp;
+ struct rb_node *parent;
+ xfs_buf_t *bp;
range_base = (ioff << BBSHIFT);
range_length = (isize << BBSHIFT);
@@ -432,14 +501,37 @@ _xfs_buf_find(
ASSERT(!(range_length < (1 << btp->bt_sshift)));
ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
- hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
-
- spin_lock(&hash->bh_lock);
-
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
- ASSERT(btp == bp->b_target);
- if (bp->b_file_offset == range_base &&
- bp->b_buffer_length == range_length) {
+ /* get tree root */
+ pag = xfs_perag_get(btp->bt_mount,
+ xfs_daddr_to_agno(btp->bt_mount, ioff));
+
+ /* walk tree */
+ spin_lock(&pag->pag_buf_lock);
+ rbp = &pag->pag_buf_tree.rb_node;
+ parent = NULL;
+ bp = NULL;
+ while (*rbp) {
+ parent = *rbp;
+ bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+
+ if (range_base < bp->b_file_offset)
+ rbp = &(*rbp)->rb_left;
+ else if (range_base > bp->b_file_offset)
+ rbp = &(*rbp)->rb_right;
+ else {
+ /*
+ * found a block offset match. If the range doesn't
+ * match, the only way this is allowed is if the buffer
+ * in the cache is stale and the transaction that made
+ * it stale has not yet committed. i.e. we are
+ * reallocating a busy extent. Skip this buffer and
+ * continue searching to the right for an exact match.
+ */
+ if (bp->b_buffer_length != range_length) {
+ ASSERT(bp->b_flags & XBF_STALE);
+ rbp = &(*rbp)->rb_right;
+ continue;
+ }
atomic_inc(&bp->b_hold);
goto found;
}
@@ -449,41 +541,32 @@ _xfs_buf_find(
if (new_bp) {
_xfs_buf_initialize(new_bp, btp, range_base,
range_length, flags);
- new_bp->b_hash = hash;
- list_add(&new_bp->b_hash_list, &hash->bh_list);
+ rb_link_node(&new_bp->b_rbnode, parent, rbp);
+ rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+ /* the buffer keeps the perag reference until it is freed */
+ new_bp->b_pag = pag;
+ spin_unlock(&pag->pag_buf_lock);
} else {
XFS_STATS_INC(xb_miss_locked);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
}
-
- spin_unlock(&hash->bh_lock);
return new_bp;
found:
- spin_unlock(&hash->bh_lock);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
- /* Attempt to get the semaphore without sleeping,
- * if this does not work then we need to drop the
- * spinlock and do a hard attempt on the semaphore.
- */
- if (down_trylock(&bp->b_sema)) {
+ if (xfs_buf_cond_lock(bp)) {
+ /* failed, so wait for the lock if requested. */
if (!(flags & XBF_TRYLOCK)) {
- /* wait for buffer ownership */
xfs_buf_lock(bp);
XFS_STATS_INC(xb_get_locked_waited);
} else {
- /* We asked for a trylock and failed, no need
- * to look at file offset and length here, we
- * know that this buffer at least overlaps our
- * buffer and is locked, therefore our buffer
- * either does not exist, or is this buffer.
- */
xfs_buf_rele(bp);
XFS_STATS_INC(xb_busy_locked);
return NULL;
}
- } else {
- /* trylock worked */
- XB_SET_OWNER(bp);
}
if (bp->b_flags & XBF_STALE) {
@@ -625,8 +708,7 @@ void
xfs_buf_readahead(
xfs_buftarg_t *target,
xfs_off_t ioff,
- size_t isize,
- xfs_buf_flags_t flags)
+ size_t isize)
{
struct backing_dev_info *bdi;
@@ -634,8 +716,42 @@ xfs_buf_readahead(
if (bdi_read_congested(bdi))
return;
- flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
- xfs_buf_read(target, ioff, isize, flags);
+ xfs_buf_read(target, ioff, isize,
+ XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+ struct xfs_mount *mp,
+ struct xfs_buftarg *target,
+ xfs_daddr_t daddr,
+ size_t length,
+ int flags)
+{
+ xfs_buf_t *bp;
+ int error;
+
+ bp = xfs_buf_get_uncached(target, length, flags);
+ if (!bp)
+ return NULL;
+
+ /* set up the buffer for a read IO */
+ xfs_buf_lock(bp);
+ XFS_BUF_SET_ADDR(bp, daddr);
+ XFS_BUF_READ(bp);
+ XFS_BUF_BUSY(bp);
+
+ xfsbdstrat(mp, bp);
+ error = xfs_buf_iowait(bp);
+ if (error || bp->b_error) {
+ xfs_buf_relse(bp);
+ return NULL;
+ }
+ return bp;
}
xfs_buf_t *
@@ -707,9 +823,10 @@ xfs_buf_associate_memory(
}
xfs_buf_t *
-xfs_buf_get_noaddr(
+xfs_buf_get_uncached(
+ struct xfs_buftarg *target,
size_t len,
- xfs_buftarg_t *target)
+ int flags)
{
unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
int error, i;
@@ -725,7 +842,7 @@ xfs_buf_get_noaddr(
goto fail_free_buf;
for (i = 0; i < page_count; i++) {
- bp->b_pages[i] = alloc_page(GFP_KERNEL);
+ bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
if (!bp->b_pages[i])
goto fail_free_mem;
}
@@ -740,7 +857,7 @@ xfs_buf_get_noaddr(
xfs_buf_unlock(bp);
- trace_xfs_buf_get_noaddr(bp, _RET_IP_);
+ trace_xfs_buf_get_uncached(bp, _RET_IP_);
return bp;
fail_free_mem:
@@ -774,29 +891,32 @@ void
xfs_buf_rele(
xfs_buf_t *bp)
{
- xfs_bufhash_t *hash = bp->b_hash;
+ struct xfs_perag *pag = bp->b_pag;
trace_xfs_buf_rele(bp, _RET_IP_);
- if (unlikely(!hash)) {
- ASSERT(!bp->b_relse);
+ if (!pag) {
+ ASSERT(list_empty(&bp->b_lru));
+ ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
xfs_buf_free(bp);
return;
}
+ ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
+
ASSERT(atomic_read(&bp->b_hold) > 0);
- if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
- if (bp->b_relse) {
- atomic_inc(&bp->b_hold);
- spin_unlock(&hash->bh_lock);
- (*(bp->b_relse)) (bp);
- } else if (bp->b_flags & XBF_FS_MANAGED) {
- spin_unlock(&hash->bh_lock);
+ if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
+ if (!(bp->b_flags & XBF_STALE) &&
+ atomic_read(&bp->b_lru_ref)) {
+ xfs_buf_lru_add(bp);
+ spin_unlock(&pag->pag_buf_lock);
} else {
+ xfs_buf_lru_del(bp);
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
- list_del_init(&bp->b_hash_list);
- spin_unlock(&hash->bh_lock);
+ rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
xfs_buf_free(bp);
}
}
@@ -814,10 +934,18 @@ xfs_buf_rele(
*/
/*
- * Locks a buffer object, if it is not already locked.
- * Note that this in no way locks the underlying pages, so it is only
- * useful for synchronizing concurrent use of buffer objects, not for
- * synchronizing independent access to the underlying pages.
+ * Locks a buffer object, if it is not already locked. Note that this in
+ * no way locks the underlying pages, so it is only useful for
+ * synchronizing concurrent use of buffer objects, not for synchronizing
+ * independent access to the underlying pages.
+ *
+ * If we come across a stale, pinned, locked buffer, we know that we are
+ * being asked to lock a buffer that has been reallocated. Because it is
+ * pinned, we know that the log has not been pushed to disk and hence it
+ * will still be locked. Rather than continuing to have trylock attempts
+ * fail until someone else pushes the log, push it ourselves before
+ * returning. This means that the xfsaild will not get stuck trying
+ * to push on stale inode buffers.
*/
int
xfs_buf_cond_lock(
@@ -828,6 +956,8 @@ xfs_buf_cond_lock(
locked = down_trylock(&bp->b_sema) == 0;
if (locked)
XB_SET_OWNER(bp);
+ else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+ xfs_log_force(bp->b_target->bt_mount, 0);
trace_xfs_buf_cond_lock(bp, _RET_IP_);
return locked ? 0 : -EBUSY;
@@ -859,7 +989,7 @@ xfs_buf_lock(
trace_xfs_buf_lock(bp, _RET_IP_);
if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
- xfs_log_force(bp->b_mount, 0);
+ xfs_log_force(bp->b_target->bt_mount, 0);
if (atomic_read(&bp->b_io_remaining))
blk_run_address_space(bp->b_target->bt_mapping);
down(&bp->b_sema);
@@ -924,19 +1054,7 @@ xfs_buf_iodone_work(
xfs_buf_t *bp =
container_of(work, xfs_buf_t, b_iodone_work);
- /*
- * We can get an EOPNOTSUPP to ordered writes. Here we clear the
- * ordered flag and reissue them. Because we can't tell the higher
- * layers directly that they should not issue ordered I/O anymore, they
- * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
- */
- if ((bp->b_error == EOPNOTSUPP) &&
- (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
- trace_xfs_buf_ordered_retry(bp, _RET_IP_);
- bp->b_flags &= ~XBF_ORDERED;
- bp->b_flags |= _XFS_BARRIER_FAILED;
- xfs_buf_iorequest(bp);
- } else if (bp->b_iodone)
+ if (bp->b_iodone)
(*(bp->b_iodone))(bp);
else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
@@ -982,7 +1100,6 @@ xfs_bwrite(
{
int error;
- bp->b_mount = mp;
bp->b_flags |= XBF_WRITE;
bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
@@ -1003,8 +1120,6 @@ xfs_bdwrite(
{
trace_xfs_buf_bdwrite(bp, _RET_IP_);
- bp->b_mount = mp;
-
bp->b_flags &= ~XBF_READ;
bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
@@ -1013,7 +1128,7 @@ xfs_bdwrite(
/*
* Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call biodone
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
* so that the proper iodone callbacks get called.
*/
STATIC int
@@ -1030,21 +1145,21 @@ xfs_bioerror(
XFS_BUF_ERROR(bp, EIO);
/*
- * We're calling biodone, so delete XBF_DONE flag.
+ * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
- xfs_biodone(bp);
+ xfs_buf_ioend(bp, 0);
return EIO;
}
/*
* Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the biodone call.
+ * here ourselves, and avoiding the xfs_buf_ioend call.
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
@@ -1093,7 +1208,7 @@ int
xfs_bdstrat_cb(
struct xfs_buf *bp)
{
- if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+ if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
trace_xfs_bdstrat_shut(bp, _RET_IP_);
/*
* Metadata write that didn't get logged but
@@ -1195,7 +1310,7 @@ _xfs_buf_ioapply(
if (bp->b_flags & XBF_ORDERED) {
ASSERT(!(bp->b_flags & XBF_READ));
- rw = WRITE_BARRIER;
+ rw = WRITE_FLUSH_FUA;
} else if (bp->b_flags & XBF_LOG_BUFFER) {
ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
bp->b_flags &= ~_XBF_RUN_QUEUES;
@@ -1394,89 +1509,84 @@ xfs_buf_iomove(
*/
/*
- * Wait for any bufs with callbacks that have been submitted but
- * have not yet returned... walk the hash list for the target.
+ * Wait for any bufs with callbacks that have been submitted but have not yet
+ * returned. These buffers will have an elevated hold count, so wait on those
+ * while freeing all the buffers only held by the LRU.
*/
void
xfs_wait_buftarg(
- xfs_buftarg_t *btp)
+ struct xfs_buftarg *btp)
{
- xfs_buf_t *bp, *n;
- xfs_bufhash_t *hash;
- uint i;
-
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
- hash = &btp->bt_hash[i];
-again:
- spin_lock(&hash->bh_lock);
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
- ASSERT(btp == bp->b_target);
- if (!(bp->b_flags & XBF_FS_MANAGED)) {
- spin_unlock(&hash->bh_lock);
- /*
- * Catch superblock reference count leaks
- * immediately
- */
- BUG_ON(bp->b_bn == 0);
- delay(100);
- goto again;
- }
+ struct xfs_buf *bp;
+
+restart:
+ spin_lock(&btp->bt_lru_lock);
+ while (!list_empty(&btp->bt_lru)) {
+ bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
+ if (atomic_read(&bp->b_hold) > 1) {
+ spin_unlock(&btp->bt_lru_lock);
+ delay(100);
+ goto restart;
}
- spin_unlock(&hash->bh_lock);
+ /*
+ * clear the LRU reference count so the bufer doesn't get
+ * ignored in xfs_buf_rele().
+ */
+ atomic_set(&bp->b_lru_ref, 0);
+ spin_unlock(&btp->bt_lru_lock);
+ xfs_buf_rele(bp);
+ spin_lock(&btp->bt_lru_lock);
}
+ spin_unlock(&btp->bt_lru_lock);
}
-/*
- * Allocate buffer hash table for a given target.
- * For devices containing metadata (i.e. not the log/realtime devices)
- * we need to allocate a much larger hash table.
- */
-STATIC void
-xfs_alloc_bufhash(
- xfs_buftarg_t *btp,
- int external)
+int
+xfs_buftarg_shrink(
+ struct shrinker *shrink,
+ int nr_to_scan,
+ gfp_t mask)
{
- unsigned int i;
+ struct xfs_buftarg *btp = container_of(shrink,
+ struct xfs_buftarg, bt_shrinker);
+ struct xfs_buf *bp;
+ LIST_HEAD(dispose);
- btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */
- btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
- sizeof(xfs_bufhash_t));
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
- spin_lock_init(&btp->bt_hash[i].bh_lock);
- INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
- }
-}
+ if (!nr_to_scan)
+ return btp->bt_lru_nr;
-STATIC void
-xfs_free_bufhash(
- xfs_buftarg_t *btp)
-{
- kmem_free_large(btp->bt_hash);
- btp->bt_hash = NULL;
-}
+ spin_lock(&btp->bt_lru_lock);
+ while (!list_empty(&btp->bt_lru)) {
+ if (nr_to_scan-- <= 0)
+ break;
-/*
- * buftarg list for delwrite queue processing
- */
-static LIST_HEAD(xfs_buftarg_list);
-static DEFINE_SPINLOCK(xfs_buftarg_lock);
+ bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-STATIC void
-xfs_register_buftarg(
- xfs_buftarg_t *btp)
-{
- spin_lock(&xfs_buftarg_lock);
- list_add(&btp->bt_list, &xfs_buftarg_list);
- spin_unlock(&xfs_buftarg_lock);
-}
+ /*
+ * Decrement the b_lru_ref count unless the value is already
+ * zero. If the value is already zero, we need to reclaim the
+ * buffer, otherwise it gets another trip through the LRU.
+ */
+ if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+ list_move_tail(&bp->b_lru, &btp->bt_lru);
+ continue;
+ }
-STATIC void
-xfs_unregister_buftarg(
- xfs_buftarg_t *btp)
-{
- spin_lock(&xfs_buftarg_lock);
- list_del(&btp->bt_list);
- spin_unlock(&xfs_buftarg_lock);
+ /*
+ * remove the buffer from the LRU now to avoid needing another
+ * lock round trip inside xfs_buf_rele().
+ */
+ list_move(&bp->b_lru, &dispose);
+ btp->bt_lru_nr--;
+ }
+ spin_unlock(&btp->bt_lru_lock);
+
+ while (!list_empty(&dispose)) {
+ bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+ list_del_init(&bp->b_lru);
+ xfs_buf_rele(bp);
+ }
+
+ return btp->bt_lru_nr;
}
void
@@ -1484,18 +1594,14 @@ xfs_free_buftarg(
struct xfs_mount *mp,
struct xfs_buftarg *btp)
{
+ unregister_shrinker(&btp->bt_shrinker);
+
xfs_flush_buftarg(btp, 1);
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_blkdev_issue_flush(btp);
- xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
- /* Unregister the buftarg first so that we don't get a
- * wakeup finding a non-existent task
- */
- xfs_unregister_buftarg(btp);
kthread_stop(btp->bt_task);
-
kmem_free(btp);
}
@@ -1572,6 +1678,7 @@ xfs_mapping_buftarg(
XFS_BUFTARG_NAME(btp));
return ENOMEM;
}
+ inode->i_ino = get_next_ino();
inode->i_mode = S_IFBLK;
inode->i_bdev = bdev;
inode->i_rdev = bdev->bd_dev;
@@ -1591,24 +1698,18 @@ xfs_alloc_delwrite_queue(
xfs_buftarg_t *btp,
const char *fsname)
{
- int error = 0;
-
- INIT_LIST_HEAD(&btp->bt_list);
INIT_LIST_HEAD(&btp->bt_delwrite_queue);
spin_lock_init(&btp->bt_delwrite_lock);
btp->bt_flags = 0;
btp->bt_task = kthread_run(xfsbufd, btp, "xfsbufd/%s", fsname);
- if (IS_ERR(btp->bt_task)) {
- error = PTR_ERR(btp->bt_task);
- goto out_error;
- }
- xfs_register_buftarg(btp);
-out_error:
- return error;
+ if (IS_ERR(btp->bt_task))
+ return PTR_ERR(btp->bt_task);
+ return 0;
}
xfs_buftarg_t *
xfs_alloc_buftarg(
+ struct xfs_mount *mp,
struct block_device *bdev,
int external,
const char *fsname)
@@ -1617,15 +1718,20 @@ xfs_alloc_buftarg(
btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+ btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
+ INIT_LIST_HEAD(&btp->bt_lru);
+ spin_lock_init(&btp->bt_lru_lock);
if (xfs_setsize_buftarg_early(btp, bdev))
goto error;
if (xfs_mapping_buftarg(btp, bdev))
goto error;
if (xfs_alloc_delwrite_queue(btp, fsname))
goto error;
- xfs_alloc_bufhash(btp, external);
+ btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+ btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+ register_shrinker(&btp->bt_shrinker);
return btp;
error:
@@ -1730,27 +1836,6 @@ xfs_buf_runall_queues(
flush_workqueue(queue);
}
-STATIC int
-xfsbufd_wakeup(
- struct shrinker *shrink,
- int priority,
- gfp_t mask)
-{
- xfs_buftarg_t *btp;
-
- spin_lock(&xfs_buftarg_lock);
- list_for_each_entry(btp, &xfs_buftarg_list, bt_list) {
- if (test_bit(XBT_FORCE_SLEEP, &btp->bt_flags))
- continue;
- if (list_empty(&btp->bt_delwrite_queue))
- continue;
- set_bit(XBT_FORCE_FLUSH, &btp->bt_flags);
- wake_up_process(btp->bt_task);
- }
- spin_unlock(&xfs_buftarg_lock);
- return 0;
-}
-
/*
* Move as many buffers as specified to the supplied list
* idicating if we skipped any buffers to prevent deadlocks.
@@ -1771,7 +1856,6 @@ xfs_buf_delwri_split(
INIT_LIST_HEAD(list);
spin_lock(dwlk);
list_for_each_entry_safe(bp, n, dwq, b_list) {
- trace_xfs_buf_delwri_split(bp, _RET_IP_);
ASSERT(bp->b_flags & XBF_DELWRI);
if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1785,6 +1869,7 @@ xfs_buf_delwri_split(
_XBF_RUN_QUEUES);
bp->b_flags |= XBF_WRITE;
list_move_tail(&bp->b_list, list);
+ trace_xfs_buf_delwri_split(bp, _RET_IP_);
} else
skipped++;
}
@@ -1916,7 +2001,7 @@ xfs_flush_buftarg(
bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
list_del_init(&bp->b_list);
- xfs_iowait(bp);
+ xfs_buf_iowait(bp);
xfs_buf_relse(bp);
}
}
@@ -1933,7 +2018,7 @@ xfs_buf_init(void)
goto out;
xfslogd_workqueue = alloc_workqueue("xfslogd",
- WQ_RESCUER | WQ_HIGHPRI, 1);
+ WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
if (!xfslogd_workqueue)
goto out_free_buf_zone;
@@ -1945,7 +2030,6 @@ xfs_buf_init(void)
if (!xfsconvertd_workqueue)
goto out_destroy_xfsdatad_workqueue;
- register_shrinker(&xfs_buf_shake);
return 0;
out_destroy_xfsdatad_workqueue:
@@ -1961,7 +2045,6 @@ xfs_buf_init(void)
void
xfs_buf_terminate(void)
{
- unregister_shrinker(&xfs_buf_shake);
destroy_workqueue(xfsconvertd_workqueue);
destroy_workqueue(xfsdatad_workqueue);
destroy_workqueue(xfslogd_workqueue);
OpenPOWER on IntegriCloud