diff options
Diffstat (limited to 'fs/xfs/xfs_buf.c')
-rw-r--r-- | fs/xfs/xfs_buf.c | 214 |
1 files changed, 127 insertions, 87 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index ca0849043f54..217e4f82a44a 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -198,20 +198,22 @@ xfs_buf_free_maps( } } -static struct xfs_buf * +static int _xfs_buf_alloc( struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags) + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { struct xfs_buf *bp; int error; int i; + *bpp = NULL; bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS); if (unlikely(!bp)) - return NULL; + return -ENOMEM; /* * We don't want certain flags to appear in b_flags unless they are @@ -238,8 +240,8 @@ _xfs_buf_alloc( */ error = xfs_buf_get_maps(bp, nmaps); if (error) { - kmem_zone_free(xfs_buf_zone, bp); - return NULL; + kmem_cache_free(xfs_buf_zone, bp); + return error; } bp->b_bn = map[0].bm_bn; @@ -256,7 +258,8 @@ _xfs_buf_alloc( XFS_STATS_INC(bp->b_mount, xb_create); trace_xfs_buf_init(bp, _RET_IP_); - return bp; + *bpp = bp; + return 0; } /* @@ -304,7 +307,7 @@ _xfs_buf_free_pages( * The buffer must not be on any hash - use xfs_buf_rele instead for * hashed and refcounted buffers */ -void +static void xfs_buf_free( xfs_buf_t *bp) { @@ -328,7 +331,7 @@ xfs_buf_free( kmem_free(bp->b_addr); _xfs_buf_free_pages(bp); xfs_buf_free_maps(bp); - kmem_zone_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_zone, bp); } /* @@ -345,6 +348,15 @@ xfs_buf_allocate_memory( unsigned short page_count, i; xfs_off_t start, end; int error; + xfs_km_flags_t kmflag_mask = 0; + + /* + * assure zeroed buffer for non-read cases. + */ + if (!(flags & XBF_READ)) { + kmflag_mask |= KM_ZERO; + gfp_mask |= __GFP_ZERO; + } /* * for buffers that are contained within a single page, just allocate @@ -353,7 +365,9 @@ xfs_buf_allocate_memory( */ size = BBTOB(bp->b_length); if (size < PAGE_SIZE) { - bp->b_addr = kmem_alloc(size, KM_NOFS); + int align_mask = xfs_buftarg_dma_alignment(bp->b_target); + bp->b_addr = kmem_alloc_io(size, align_mask, + KM_NOFS | kmflag_mask); if (!bp->b_addr) { /* low memory - use alloc_page loop instead */ goto use_alloc_page; @@ -368,7 +382,7 @@ xfs_buf_allocate_memory( } bp->b_offset = offset_in_page(bp->b_addr); bp->b_pages = bp->b_page_array; - bp->b_pages[0] = virt_to_page(bp->b_addr); + bp->b_pages[0] = kmem_to_page(bp->b_addr); bp->b_page_count = 1; bp->b_flags |= _XBF_KMEM; return 0; @@ -450,7 +464,7 @@ _xfs_buf_map_pages( unsigned nofs_flag; /* - * vm_map_ram() will allocate auxillary structures (e.g. + * vm_map_ram() will allocate auxiliary structures (e.g. * pagetables) with GFP_KERNEL, yet we are likely to be under * GFP_NOFS context here. Hence we need to tell memory reclaim * that we are in such a context via PF_MEMALLOC_NOFS to prevent @@ -671,53 +685,39 @@ xfs_buf_incore( * cache hits, as metadata intensive workloads will see 3 orders of magnitude * more hits than misses. */ -struct xfs_buf * +int xfs_buf_get_map( struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, - xfs_buf_flags_t flags) + xfs_buf_flags_t flags, + struct xfs_buf **bpp) { struct xfs_buf *bp; struct xfs_buf *new_bp; int error = 0; + *bpp = NULL; error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp); - - switch (error) { - case 0: - /* cache hit */ + if (!error) goto found; - case -EAGAIN: - /* cache hit, trylock failure, caller handles failure */ - ASSERT(flags & XBF_TRYLOCK); - return NULL; - case -ENOENT: - /* cache miss, go for insert */ - break; - case -EFSCORRUPTED: - default: - /* - * None of the higher layers understand failure types - * yet, so return NULL to signal a fatal lookup error. - */ - return NULL; - } + if (error != -ENOENT) + return error; - new_bp = _xfs_buf_alloc(target, map, nmaps, flags); - if (unlikely(!new_bp)) - return NULL; + error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp); + if (error) + return error; error = xfs_buf_allocate_memory(new_bp, flags); if (error) { xfs_buf_free(new_bp); - return NULL; + return error; } error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp); if (error) { xfs_buf_free(new_bp); - return NULL; + return error; } if (bp != new_bp) @@ -730,7 +730,7 @@ found: xfs_warn(target->bt_mount, "%s: failed to map pagesn", __func__); xfs_buf_relse(bp); - return NULL; + return error; } } @@ -743,7 +743,8 @@ found: XFS_STATS_INC(target->bt_mount, xb_get); trace_xfs_buf_get(bp, flags, _RET_IP_); - return bp; + *bpp = bp; + return 0; } STATIC int @@ -795,46 +796,77 @@ xfs_buf_reverify( return bp->b_error; } -xfs_buf_t * +int xfs_buf_read_map( struct xfs_buftarg *target, struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags, - const struct xfs_buf_ops *ops) + struct xfs_buf **bpp, + const struct xfs_buf_ops *ops, + xfs_failaddr_t fa) { struct xfs_buf *bp; + int error; flags |= XBF_READ; + *bpp = NULL; - bp = xfs_buf_get_map(target, map, nmaps, flags); - if (!bp) - return NULL; + error = xfs_buf_get_map(target, map, nmaps, flags, &bp); + if (error) + return error; trace_xfs_buf_read(bp, flags, _RET_IP_); if (!(bp->b_flags & XBF_DONE)) { + /* Initiate the buffer read and wait. */ XFS_STATS_INC(target->bt_mount, xb_get_read); bp->b_ops = ops; - _xfs_buf_read(bp, flags); - return bp; + error = _xfs_buf_read(bp, flags); + + /* Readahead iodone already dropped the buffer, so exit. */ + if (flags & XBF_ASYNC) + return 0; + } else { + /* Buffer already read; all we need to do is check it. */ + error = xfs_buf_reverify(bp, ops); + + /* Readahead already finished; drop the buffer and exit. */ + if (flags & XBF_ASYNC) { + xfs_buf_relse(bp); + return 0; + } + + /* We do not want read in the flags */ + bp->b_flags &= ~XBF_READ; + ASSERT(bp->b_ops != NULL || ops == NULL); } - xfs_buf_reverify(bp, ops); + /* + * If we've had a read error, then the contents of the buffer are + * invalid and should not be used. To ensure that a followup read tries + * to pull the buffer from disk again, we clear the XBF_DONE flag and + * mark the buffer stale. This ensures that anyone who has a current + * reference to the buffer will interpret it's contents correctly and + * future cache lookups will also treat it as an empty, uninitialised + * buffer. + */ + if (error) { + if (!XFS_FORCED_SHUTDOWN(target->bt_mount)) + xfs_buf_ioerror_alert(bp, fa); - if (flags & XBF_ASYNC) { - /* - * Read ahead call which is already satisfied, - * drop the buffer - */ + bp->b_flags &= ~XBF_DONE; + xfs_buf_stale(bp); xfs_buf_relse(bp); - return NULL; + + /* bad CRC means corrupted metadata */ + if (error == -EFSBADCRC) + error = -EFSCORRUPTED; + return error; } - /* We do not want read in the flags */ - bp->b_flags &= ~XBF_READ; - ASSERT(bp->b_ops != NULL || ops == NULL); - return bp; + *bpp = bp; + return 0; } /* @@ -848,11 +880,14 @@ xfs_buf_readahead_map( int nmaps, const struct xfs_buf_ops *ops) { + struct xfs_buf *bp; + if (bdi_read_congested(target->bt_bdev->bd_bdi)) return; xfs_buf_read_map(target, map, nmaps, - XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops); + XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops, + __this_address); } /* @@ -869,12 +904,13 @@ xfs_buf_read_uncached( const struct xfs_buf_ops *ops) { struct xfs_buf *bp; + int error; *bpp = NULL; - bp = xfs_buf_get_uncached(target, numblks, flags); - if (!bp) - return -ENOMEM; + error = xfs_buf_get_uncached(target, numblks, flags, &bp); + if (error) + return error; /* set up the buffer for a read IO */ ASSERT(bp->b_map_count == 1); @@ -885,7 +921,7 @@ xfs_buf_read_uncached( xfs_buf_submit(bp); if (bp->b_error) { - int error = bp->b_error; + error = bp->b_error; xfs_buf_relse(bp); return error; } @@ -894,20 +930,23 @@ xfs_buf_read_uncached( return 0; } -xfs_buf_t * +int xfs_buf_get_uncached( struct xfs_buftarg *target, size_t numblks, - int flags) + int flags, + struct xfs_buf **bpp) { unsigned long page_count; int error, i; struct xfs_buf *bp; DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks); + *bpp = NULL; + /* flags might contain irrelevant bits, pass only what we care about */ - bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT); - if (unlikely(bp == NULL)) + error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp); + if (error) goto fail; page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; @@ -917,8 +956,10 @@ xfs_buf_get_uncached( for (i = 0; i < page_count; i++) { bp->b_pages[i] = alloc_page(xb_to_gfp(flags)); - if (!bp->b_pages[i]) + if (!bp->b_pages[i]) { + error = -ENOMEM; goto fail_free_mem; + } } bp->b_flags |= _XBF_PAGES; @@ -930,7 +971,8 @@ xfs_buf_get_uncached( } trace_xfs_buf_get_uncached(bp, _RET_IP_); - return bp; + *bpp = bp; + return 0; fail_free_mem: while (--i >= 0) @@ -938,9 +980,9 @@ xfs_buf_get_uncached( _xfs_buf_free_pages(bp); fail_free_buf: xfs_buf_free_maps(bp); - kmem_zone_free(xfs_buf_zone, bp); + kmem_cache_free(xfs_buf_zone, bp); fail: - return NULL; + return error; } /* @@ -1194,10 +1236,10 @@ __xfs_buf_ioerror( void xfs_buf_ioerror_alert( struct xfs_buf *bp, - const char *func) + xfs_failaddr_t func) { xfs_alert(bp->b_mount, -"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d", +"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d", func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length, -bp->b_error); } @@ -1250,8 +1292,7 @@ xfs_buf_ioapply_map( int map, int *buf_offset, int *count, - int op, - int op_flags) + int op) { int page_index; int total_nr_pages = bp->b_page_count; @@ -1286,7 +1327,7 @@ next_chunk: bio->bi_iter.bi_sector = sector; bio->bi_end_io = xfs_buf_bio_end_io; bio->bi_private = bp; - bio_set_op_attrs(bio, op, op_flags); + bio->bi_opf = op; for (; size && nr_pages; nr_pages--, page_index++) { int rbytes, nbytes = PAGE_SIZE - offset; @@ -1331,7 +1372,6 @@ _xfs_buf_ioapply( { struct blk_plug plug; int op; - int op_flags = 0; int offset; int size; int i; @@ -1373,15 +1413,14 @@ _xfs_buf_ioapply( dump_stack(); } } - } else if (bp->b_flags & XBF_READ_AHEAD) { - op = REQ_OP_READ; - op_flags = REQ_RAHEAD; } else { op = REQ_OP_READ; + if (bp->b_flags & XBF_READ_AHEAD) + op |= REQ_RAHEAD; } /* we only use the buffer cache for meta-data */ - op_flags |= REQ_META; + op |= REQ_META; /* * Walk all the vectors issuing IO on them. Set up the initial offset @@ -1393,7 +1432,7 @@ _xfs_buf_ioapply( size = BBTOB(bp->b_length); blk_start_plug(&plug); for (i = 0; i < bp->b_map_count; i++) { - xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags); + xfs_buf_ioapply_map(bp, i, &offset, &size, op); if (bp->b_error) break; if (size <= 0) @@ -1741,7 +1780,7 @@ xfs_alloc_buftarg( { xfs_buftarg_t *btp; - btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS); + btp = kmem_zalloc(sizeof(*btp), KM_NOFS); btp->bt_mount = mp; btp->bt_dev = bdev->bd_dev; @@ -2052,8 +2091,9 @@ xfs_buf_delwri_pushbuf( int __init xfs_buf_init(void) { - xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf", - KM_ZONE_HWALIGN, NULL); + xfs_buf_zone = kmem_cache_create("xfs_buf", + sizeof(struct xfs_buf), 0, + SLAB_HWCACHE_ALIGN, NULL); if (!xfs_buf_zone) goto out; @@ -2066,7 +2106,7 @@ xfs_buf_init(void) void xfs_buf_terminate(void) { - kmem_zone_destroy(xfs_buf_zone); + kmem_cache_destroy(xfs_buf_zone); } void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref) @@ -2096,7 +2136,7 @@ xfs_verify_magic( int idx; idx = xfs_sb_version_hascrc(&mp->m_sb); - if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))) + if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])) return false; return dmagic == bp->b_ops->magic[idx]; } @@ -2114,7 +2154,7 @@ xfs_verify_magic16( int idx; idx = xfs_sb_version_hascrc(&mp->m_sb); - if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))) + if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])) return false; return dmagic == bp->b_ops->magic16[idx]; } |