diff options
Diffstat (limited to 'fs/xfs')
33 files changed, 254 insertions, 165 deletions
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 758f37ac5ad3..4b87472f35bc 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -104,7 +104,7 @@ kmem_zone_init(int size, char *zone_name) } static inline kmem_zone_t * -kmem_zone_init_flags(int size, char *zone_name, unsigned long flags, +kmem_zone_init_flags(int size, char *zone_name, slab_flags_t flags, void (*construct)(void *)) { return kmem_cache_create(zone_name, size, 0, flags, construct); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 08df809e2315..1210f684d3c2 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -5662,7 +5662,8 @@ xfs_bmap_collapse_extents( *done = true; goto del_cursor; } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); new_startoff = got.br_startoff - offset_shift_fsb; if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) { @@ -5767,7 +5768,8 @@ xfs_bmap_insert_extents( goto del_cursor; } } - XFS_WANT_CORRUPTED_RETURN(mp, !isnullstartblock(got.br_startblock)); + XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock), + del_cursor); if (stop_fsb >= got.br_startoff + got.br_blockcount) { error = -EIO; diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index de3f04a98656..3b57ef0f2f76 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -920,8 +920,7 @@ STATIC xfs_agnumber_t xfs_ialloc_ag_select( xfs_trans_t *tp, /* transaction pointer */ xfs_ino_t parent, /* parent directory inode number */ - umode_t mode, /* bits set to indicate file type */ - int okalloc) /* ok to allocate more space */ + umode_t mode) /* bits set to indicate file type */ { xfs_agnumber_t agcount; /* number of ag's in the filesystem */ xfs_agnumber_t agno; /* current ag number */ @@ -978,9 +977,6 @@ xfs_ialloc_ag_select( return agno; } - if (!okalloc) - goto nextag; - if (!pag->pagf_init) { error = xfs_alloc_pagf_init(mp, tp, agno, flags); if (error) @@ -1680,7 +1676,6 @@ xfs_dialloc( struct xfs_trans *tp, xfs_ino_t parent, umode_t mode, - int okalloc, struct xfs_buf **IO_agbp, xfs_ino_t *inop) { @@ -1692,6 +1687,7 @@ xfs_dialloc( int noroom = 0; xfs_agnumber_t start_agno; struct xfs_perag *pag; + int okalloc = 1; if (*IO_agbp) { /* @@ -1707,7 +1703,7 @@ xfs_dialloc( * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); + start_agno = xfs_ialloc_ag_select(tp, parent, mode); if (start_agno == NULLAGNUMBER) { *inop = NULLFSINO; return 0; diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index d2bdcd5e7312..66a8de0b1caa 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -81,7 +81,6 @@ xfs_dialloc( struct xfs_trans *tp, /* transaction pointer */ xfs_ino_t parent, /* parent inode (directory) */ umode_t mode, /* mode bits for new inode */ - int okalloc, /* ok to allocate more space */ struct xfs_buf **agbp, /* buf for a.g. inode header */ xfs_ino_t *inop); /* inode number allocated */ diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c index 343a94246f5b..89bf16b4d937 100644 --- a/fs/xfs/libxfs/xfs_iext_tree.c +++ b/fs/xfs/libxfs/xfs_iext_tree.c @@ -302,7 +302,7 @@ xfs_iext_rec_cmp( xfs_fileoff_t offset) { uint64_t rec_offset = rec->lo & XFS_IEXT_STARTOFF_MASK; - u32 rec_len = rec->hi & XFS_IEXT_LENGTH_MASK; + uint32_t rec_len = rec->hi & XFS_IEXT_LENGTH_MASK; if (rec_offset > offset) return 1; @@ -850,9 +850,9 @@ static void xfs_iext_free_last_leaf( struct xfs_ifork *ifp) { - ifp->if_u1.if_root = NULL; ifp->if_height--; kmem_free(ifp->if_u1.if_root); + ifp->if_u1.if_root = NULL; } void diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c index 1c90ec41e9df..c79a1616b79d 100644 --- a/fs/xfs/libxfs/xfs_inode_fork.c +++ b/fs/xfs/libxfs/xfs_inode_fork.c @@ -42,11 +42,6 @@ STATIC int xfs_iformat_local(xfs_inode_t *, xfs_dinode_t *, int, int); STATIC int xfs_iformat_extents(xfs_inode_t *, xfs_dinode_t *, int); STATIC int xfs_iformat_btree(xfs_inode_t *, xfs_dinode_t *, int); -static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev) -{ - return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); -} - /* * Copy inode type and data and attr format specific information from the * on-disk inode to the in-core inode and fork structures. For fifos, devices, @@ -792,7 +787,8 @@ xfs_iflush_fork( case XFS_DINODE_FMT_DEV: if (iip->ili_fields & XFS_ILOG_DEV) { ASSERT(whichfork == XFS_DATA_FORK); - xfs_dinode_put_rdev(dip, sysv_encode_dev(VFS_I(ip)->i_rdev)); + xfs_dinode_put_rdev(dip, + linux_to_xfs_dev_t(VFS_I(ip)->i_rdev)); } break; diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 996f035ee205..349d9f8edb89 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -274,7 +274,7 @@ struct xfs_inode_log_format { uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ - u8 __pad[16]; /* unused */ + uint8_t __pad[16]; /* unused */ } ilf_u; int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ @@ -295,7 +295,7 @@ struct xfs_inode_log_format_32 { uint64_t ilf_ino; /* inode number */ union { uint32_t ilfu_rdev; /* rdev value for dev inode*/ - u8 __pad[16]; /* unused */ + uint8_t __pad[16]; /* unused */ } ilf_u; int64_t ilf_blkno; /* blkno of inode buffer */ int32_t ilf_len; /* len of inode buffer */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index 637b7a892313..f120fb20452f 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -318,8 +318,20 @@ xfs_scrub_dinode( /* di_mode */ mode = be16_to_cpu(dip->di_mode); - if (mode & ~(S_IALLUGO | S_IFMT)) + switch (mode & S_IFMT) { + case S_IFLNK: + case S_IFREG: + case S_IFDIR: + case S_IFCHR: + case S_IFBLK: + case S_IFIFO: + case S_IFSOCK: + /* mode is recognized */ + break; + default: xfs_scrub_ino_set_corrupt(sc, ino, bp); + break; + } /* v1/v2 fields */ switch (dip->di_version) { diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c index 8e58ba842946..3d9037eceaf1 100644 --- a/fs/xfs/scrub/quota.c +++ b/fs/xfs/scrub/quota.c @@ -107,7 +107,7 @@ xfs_scrub_quota_item( unsigned long long rcount; xfs_ino_t fs_icount; - offset = id * qi->qi_dqperchunk; + offset = id / qi->qi_dqperchunk; /* * We fed $id and DQNEXT into the xfs_qm_dqget call, which means @@ -207,7 +207,7 @@ xfs_scrub_quota( xfs_dqid_t id = 0; uint dqtype; int nimaps; - int error; + int error = 0; if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return -ENOENT; diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 9c42c4efd01e..ab3aef2ae823 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -46,7 +46,6 @@ #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" -#include "scrub/scrub.h" #include "scrub/btree.h" /* diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 472080e75788..86daed0e3a45 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -26,7 +26,6 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_da_format.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_trans.h" diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index a3eeaba156c5..21e2d70884e1 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -399,7 +399,7 @@ xfs_map_blocks( (ip->i_df.if_flags & XFS_IFEXTENTS)); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + count > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) count = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); offset_fsb = XFS_B_TO_FSBT(mp, offset); @@ -896,13 +896,13 @@ xfs_writepage_map( struct writeback_control *wbc, struct inode *inode, struct page *page, - loff_t offset, - uint64_t end_offset) + uint64_t end_offset) { LIST_HEAD(submit_list); struct xfs_ioend *ioend, *next; struct buffer_head *bh, *head; ssize_t len = i_blocksize(inode); + uint64_t offset; int error = 0; int count = 0; int uptodate = 1; @@ -1146,7 +1146,7 @@ xfs_do_writepage( end_offset = offset; } - return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); + return xfs_writepage_map(wpc, wbc, inode, page, end_offset); redirty: redirty_page_for_writepage(wbc, page); @@ -1265,7 +1265,7 @@ xfs_map_trim_size( if (mapping_size > size) mapping_size = size; if (offset < i_size_read(inode) && - offset + mapping_size >= i_size_read(inode)) { + (xfs_ufsize_t)offset + mapping_size >= i_size_read(inode)) { /* limit mapping to block that spans EOF */ mapping_size = roundup_64(i_size_read(inode) - offset, i_blocksize(inode)); @@ -1312,7 +1312,7 @@ xfs_get_blocks( lockmode = xfs_ilock_data_map_shared(ip); ASSERT(offset <= mp->m_super->s_maxbytes); - if (offset + size > mp->m_super->s_maxbytes) + if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) size = mp->m_super->s_maxbytes - offset; end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); offset_fsb = XFS_B_TO_FSBT(mp, offset); diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index dd136f7275e4..e5fb008d75e8 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -389,7 +389,8 @@ xfs_bud_init( int xfs_bui_recover( struct xfs_mount *mp, - struct xfs_bui_log_item *buip) + struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops) { int error = 0; unsigned int bui_type; @@ -404,9 +405,7 @@ xfs_bui_recover( xfs_exntst_t state; struct xfs_trans *tp; struct xfs_inode *ip = NULL; - struct xfs_defer_ops dfops; struct xfs_bmbt_irec irec; - xfs_fsblock_t firstfsb; ASSERT(!test_bit(XFS_BUI_RECOVERED, &buip->bui_flags)); @@ -464,7 +463,6 @@ xfs_bui_recover( if (VFS_I(ip)->i_nlink == 0) xfs_iflags_set(ip, XFS_IRECOVERY); - xfs_defer_init(&dfops, &firstfsb); /* Process deferred bmap item. */ state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? @@ -479,16 +477,16 @@ xfs_bui_recover( break; default: error = -EFSCORRUPTED; - goto err_dfops; + goto err_inode; } xfs_trans_ijoin(tp, ip, 0); count = bmap->me_len; - error = xfs_trans_log_finish_bmap_update(tp, budp, &dfops, type, + error = xfs_trans_log_finish_bmap_update(tp, budp, dfops, type, ip, whichfork, bmap->me_startoff, bmap->me_startblock, &count, state); if (error) - goto err_dfops; + goto err_inode; if (count > 0) { ASSERT(type == XFS_BMAP_UNMAP); @@ -496,16 +494,11 @@ xfs_bui_recover( irec.br_blockcount = count; irec.br_startoff = bmap->me_startoff; irec.br_state = state; - error = xfs_bmap_unmap_extent(tp->t_mountp, &dfops, ip, &irec); + error = xfs_bmap_unmap_extent(tp->t_mountp, dfops, ip, &irec); if (error) - goto err_dfops; + goto err_inode; } - /* Finish transaction, free inodes. */ - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto err_dfops; - set_bit(XFS_BUI_RECOVERED, &buip->bui_flags); error = xfs_trans_commit(tp); xfs_iunlock(ip, XFS_ILOCK_EXCL); @@ -513,8 +506,6 @@ xfs_bui_recover( return error; -err_dfops: - xfs_defer_cancel(&dfops); err_inode: xfs_trans_cancel(tp); if (ip) { diff --git a/fs/xfs/xfs_bmap_item.h b/fs/xfs/xfs_bmap_item.h index c867daae4a3c..24b354a2c836 100644 --- a/fs/xfs/xfs_bmap_item.h +++ b/fs/xfs/xfs_bmap_item.h @@ -93,6 +93,7 @@ struct xfs_bud_log_item *xfs_bud_init(struct xfs_mount *, struct xfs_bui_log_item *); void xfs_bui_item_free(struct xfs_bui_log_item *); void xfs_bui_release(struct xfs_bui_log_item *); -int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip); +int xfs_bui_recover(struct xfs_mount *mp, struct xfs_bui_log_item *buip, + struct xfs_defer_ops *dfops); #endif /* __XFS_BMAP_ITEM_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4db6e8d780f6..4c6e86d861fd 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -1815,22 +1815,27 @@ xfs_alloc_buftarg( btp->bt_daxdev = dax_dev; if (xfs_setsize_buftarg_early(btp, bdev)) - goto error; + goto error_free; if (list_lru_init(&btp->bt_lru)) - goto error; + goto error_free; if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL)) - goto error; + goto error_lru; btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count; btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan; btp->bt_shrinker.seeks = DEFAULT_SEEKS; btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&btp->bt_shrinker); + if (register_shrinker(&btp->bt_shrinker)) + goto error_pcpu; return btp; -error: +error_pcpu: + percpu_counter_destroy(&btp->bt_io_count); +error_lru: + list_lru_destroy(&btp->bt_lru); +error_free: kmem_free(btp); return NULL; } diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index d57c2db64e59..f248708c10ff 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -970,14 +970,22 @@ xfs_qm_dqflush_done( * holding the lock before removing the dquot from the AIL. */ if ((lip->li_flags & XFS_LI_IN_AIL) && - lip->li_lsn == qip->qli_flush_lsn) { + ((lip->li_lsn == qip->qli_flush_lsn) || + (lip->li_flags & XFS_LI_FAILED))) { /* xfs_trans_ail_delete() drops the AIL lock. */ spin_lock(&ailp->xa_lock); - if (lip->li_lsn == qip->qli_flush_lsn) + if (lip->li_lsn == qip->qli_flush_lsn) { xfs_trans_ail_delete(ailp, lip, SHUTDOWN_CORRUPT_INCORE); - else + } else { + /* + * Clear the failed state since we are about to drop the + * flush lock + */ + if (lip->li_flags & XFS_LI_FAILED) + xfs_clear_li_failed(lip); spin_unlock(&ailp->xa_lock); + } } /* diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index 2c7a1629e064..664dea105e76 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -137,6 +137,26 @@ xfs_qm_dqunpin_wait( wait_event(dqp->q_pinwait, (atomic_read(&dqp->q_pincount) == 0)); } +/* + * Callback used to mark a buffer with XFS_LI_FAILED when items in the buffer + * have been failed during writeback + * + * this informs the AIL that the dquot is already flush locked on the next push, + * and acquires a hold on the buffer to ensure that it isn't reclaimed before + * dirty data makes it to disk. + */ +STATIC void +xfs_dquot_item_error( + struct xfs_log_item *lip, + struct xfs_buf *bp) +{ + struct xfs_dquot *dqp; + + dqp = DQUOT_ITEM(lip)->qli_dquot; + ASSERT(!completion_done(&dqp->q_flush)); + xfs_set_li_failed(lip, bp); +} + STATIC uint xfs_qm_dquot_logitem_push( struct xfs_log_item *lip, @@ -144,13 +164,28 @@ xfs_qm_dquot_logitem_push( __acquires(&lip->li_ailp->xa_lock) { struct xfs_dquot *dqp = DQUOT_ITEM(lip)->qli_dquot; - struct xfs_buf *bp = NULL; + struct xfs_buf *bp = lip->li_buf; uint rval = XFS_ITEM_SUCCESS; int error; if (atomic_read(&dqp->q_pincount) > 0) return XFS_ITEM_PINNED; + /* + * The buffer containing this item failed to be written back + * previously. Resubmit the buffer for IO + */ + if (lip->li_flags & XFS_LI_FAILED) { + if (!xfs_buf_trylock(bp)) + return XFS_ITEM_LOCKED; + + if (!xfs_buf_resubmit_failed_buffers(bp, lip, buffer_list)) + rval = XFS_ITEM_FLUSHING; + + xfs_buf_unlock(bp); + return rval; + } + if (!xfs_dqlock_nowait(dqp)) return XFS_ITEM_LOCKED; @@ -242,7 +277,8 @@ static const struct xfs_item_ops xfs_dquot_item_ops = { .iop_unlock = xfs_qm_dquot_logitem_unlock, .iop_committed = xfs_qm_dquot_logitem_committed, .iop_push = xfs_qm_dquot_logitem_push, - .iop_committing = xfs_qm_dquot_logitem_committing + .iop_committing = xfs_qm_dquot_logitem_committing, + .iop_error = xfs_dquot_item_error }; /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 18146873a8b3..8601275cc5e6 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -44,6 +44,7 @@ #include <linux/falloc.h> #include <linux/pagevec.h> #include <linux/backing-dev.h> +#include <linux/mman.h> static const struct vm_operations_struct xfs_file_vm_ops; @@ -1045,7 +1046,11 @@ __xfs_filemap_fault( xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops); + pfn_t pfn; + + ret = dax_iomap_fault(vmf, pe_size, &pfn, &xfs_iomap_ops); + if (ret & VM_FAULT_NEEDDSYNC) + ret = dax_finish_sync_fault(vmf, pe_size, pfn); } else { if (write_fault) ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); @@ -1090,37 +1095,16 @@ xfs_filemap_page_mkwrite( } /* - * pfn_mkwrite was originally inteneded to ensure we capture time stamp - * updates on write faults. In reality, it's need to serialise against - * truncate similar to page_mkwrite. Hence we cycle the XFS_MMAPLOCK_SHARED - * to ensure we serialise the fault barrier in place. + * pfn_mkwrite was originally intended to ensure we capture time stamp updates + * on write faults. In reality, it needs to serialise against truncate and + * prepare memory for writing so handle is as standard write fault. */ static int xfs_filemap_pfn_mkwrite( struct vm_fault *vmf) { - struct inode *inode = file_inode(vmf->vma->vm_file); - struct xfs_inode *ip = XFS_I(inode); - int ret = VM_FAULT_NOPAGE; - loff_t size; - - trace_xfs_filemap_pfn_mkwrite(ip); - - sb_start_pagefault(inode->i_sb); - file_update_time(vmf->vma->vm_file); - - /* check if the faulting page hasn't raced with truncate */ - xfs_ilock(ip, XFS_MMAPLOCK_SHARED); - size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT; - if (vmf->pgoff >= size) - ret = VM_FAULT_SIGBUS; - else if (IS_DAX(inode)) - ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); - xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); - sb_end_pagefault(inode->i_sb); - return ret; - + return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); } static const struct vm_operations_struct xfs_file_vm_ops = { @@ -1136,6 +1120,13 @@ xfs_file_mmap( struct file *filp, struct vm_area_struct *vma) { + /* + * We don't support synchronous mappings for non-DAX files. At least + * until someone comes with a sensible use case. + */ + if (!IS_DAX(file_inode(filp)) && (vma->vm_flags & VM_SYNC)) + return -EOPNOTSUPP; + file_accessed(filp); vma->vm_ops = &xfs_file_vm_ops; if (IS_DAX(file_inode(filp))) @@ -1154,6 +1145,7 @@ const struct file_operations xfs_file_operations = { .compat_ioctl = xfs_file_compat_ioctl, #endif .mmap = xfs_file_mmap, + .mmap_supported_flags = MAP_SYNC, .open = xfs_file_open, .release = xfs_file_release, .fsync = xfs_file_fsync, diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d8226f7a5dde..b41952a4ddd8 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -749,7 +749,6 @@ xfs_ialloc( xfs_nlink_t nlink, dev_t rdev, prid_t prid, - int okalloc, xfs_buf_t **ialloc_context, xfs_inode_t **ipp) { @@ -765,7 +764,7 @@ xfs_ialloc( * Call the space management code to pick * the on-disk inode to be allocated. */ - error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, + error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, ialloc_context, &ino); if (error) return error; @@ -957,7 +956,6 @@ xfs_dir_ialloc( xfs_nlink_t nlink, dev_t rdev, prid_t prid, /* project id */ - int okalloc, /* ok to allocate new space */ xfs_inode_t **ipp, /* pointer to inode; it will be locked. */ int *committed) @@ -988,8 +986,8 @@ xfs_dir_ialloc( * transaction commit so that no other process can steal * the inode(s) that we've just allocated. */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, - &ialloc_context, &ip); + code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context, + &ip); /* * Return an error if we were unable to allocate a new inode. @@ -1061,7 +1059,7 @@ xfs_dir_ialloc( * this call should always succeed. */ code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, - okalloc, &ialloc_context, &ip); + &ialloc_context, &ip); /* * If we get an error at this point, return to the caller @@ -1182,11 +1180,6 @@ xfs_create( xfs_flush_inodes(mp); error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); } - if (error == -ENOSPC) { - /* No space at all so try a "no-allocation" reservation */ - resblks = 0; - error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp); - } if (error) goto out_release_inode; @@ -1203,19 +1196,13 @@ xfs_create( if (error) goto out_trans_cancel; - if (!resblks) { - error = xfs_dir_canenter(tp, dp, name); - if (error) - goto out_trans_cancel; - } - /* * A newly created regular or special file just has one directory * entry pointing to them, but a directory also the "." entry * pointing to itself. */ - error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, - prid, resblks > 0, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip, + NULL); if (error) goto out_trans_cancel; @@ -1340,11 +1327,6 @@ xfs_create_tmpfile( tres = &M_RES(mp)->tr_create_tmpfile; error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); - if (error == -ENOSPC) { - /* No space at all so try a "no-allocation" reservation */ - resblks = 0; - error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp); - } if (error) goto out_release_inode; @@ -1353,8 +1335,7 @@ xfs_create_tmpfile( if (error) goto out_trans_cancel; - error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, - prid, resblks > 0, &ip, NULL); + error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL); if (error) goto out_trans_cancel; @@ -2357,6 +2338,7 @@ retry: */ if (ip->i_ino != inum + i) { xfs_iunlock(ip, XFS_ILOCK_EXCL); + rcu_read_unlock(); continue; } } @@ -2400,6 +2382,24 @@ retry: } /* + * Free any local-format buffers sitting around before we reset to + * extents format. + */ +static inline void +xfs_ifree_local_data( + struct xfs_inode *ip, + int whichfork) +{ + struct xfs_ifork *ifp; + + if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL) + return; + + ifp = XFS_IFORK_PTR(ip, whichfork); + xfs_idata_realloc(ip, -ifp->if_bytes, whichfork); +} + +/* * This is called to return an inode to the inode free list. * The inode should already be truncated to 0 length and have * no pages associated with it. This routine also assumes that @@ -2436,6 +2436,9 @@ xfs_ifree( if (error) return error; + xfs_ifree_local_data(ip, XFS_DATA_FORK); + xfs_ifree_local_data(ip, XFS_ATTR_FORK); + VFS_I(ip)->i_mode = 0; /* mark incore inode as free */ ip->i_d.di_flags = 0; ip->i_d.di_dmevmask = 0; diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index cc13c3763721..b2136af9289f 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -428,7 +428,7 @@ xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, - xfs_nlink_t, dev_t, prid_t, int, + xfs_nlink_t, dev_t, prid_t, struct xfs_inode **, int *); /* from xfs_file.c */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 18077e2189a9..7ab52a8bc0a9 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -34,6 +34,7 @@ #include "xfs_error.h" #include "xfs_trans.h" #include "xfs_trans_space.h" +#include "xfs_inode_item.h" #include "xfs_iomap.h" #include "xfs_trace.h" #include "xfs_icache.h" @@ -1089,6 +1090,10 @@ xfs_file_iomap_begin( trace_xfs_iomap_found(ip, offset, length, 0, &imap); } + if (xfs_ipincount(ip) && (ip->i_itemp->ili_fsync_fields + & ~XFS_ILOG_TIMESTAMP)) + iomap->flags |= IOMAP_F_DIRTY; + xfs_bmbt_to_iomap(ip, iomap, &imap); if (shared) @@ -1208,7 +1213,7 @@ xfs_xattr_iomap_begin( ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL); error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, - &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK); + &nimaps, XFS_BMAPI_ATTRFORK); out_unlock: xfs_iunlock(ip, lockmode); diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 6282bfc1afa9..99562ec0de56 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -204,6 +204,16 @@ static inline kgid_t xfs_gid_to_kgid(uint32_t gid) return make_kgid(&init_user_ns, gid); } +static inline dev_t xfs_to_linux_dev_t(xfs_dev_t dev) +{ + return MKDEV(sysv_major(dev) & 0x1ff, sysv_minor(dev)); +} + +static inline xfs_dev_t linux_to_xfs_dev_t(dev_t dev) +{ + return sysv_encode_dev(dev); +} + /* * Various platform dependent calls that don't fit anywhere else */ diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 38d4227895ae..a503af96d780 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -781,17 +781,17 @@ xfs_log_mount_finish( * something to an unlinked inode, the irele won't cause * premature truncation and freeing of the inode, which results * in log recovery failure. We have to evict the unreferenced - * lru inodes after clearing MS_ACTIVE because we don't + * lru inodes after clearing SB_ACTIVE because we don't * otherwise clean up the lru if there's a subsequent failure in * xfs_mountfs, which leads to us leaking the inodes if nothing * else (e.g. quotacheck) references the inodes before the * mount failure occurs. */ - mp->m_super->s_flags |= MS_ACTIVE; + mp->m_super->s_flags |= SB_ACTIVE; error = xlog_recover_finish(mp->m_log); if (!error) xfs_log_work_queue(mp); - mp->m_super->s_flags &= ~MS_ACTIVE; + mp->m_super->s_flags &= ~SB_ACTIVE; evict_inodes(mp->m_super); /* diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87b1c331f9eb..28d1abfe835e 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -24,6 +24,7 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_defer.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_inode.h" @@ -4716,7 +4717,8 @@ STATIC int xlog_recover_process_cui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_cui_log_item *cuip; int error; @@ -4729,7 +4731,7 @@ xlog_recover_process_cui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_cui_recover(mp, cuip); + error = xfs_cui_recover(mp, cuip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4756,7 +4758,8 @@ STATIC int xlog_recover_process_bui( struct xfs_mount *mp, struct xfs_ail *ailp, - struct xfs_log_item *lip) + struct xfs_log_item *lip, + struct xfs_defer_ops *dfops) { struct xfs_bui_log_item *buip; int error; @@ -4769,7 +4772,7 @@ xlog_recover_process_bui( return 0; spin_unlock(&ailp->xa_lock); - error = xfs_bui_recover(mp, buip); + error = xfs_bui_recover(mp, buip, dfops); spin_lock(&ailp->xa_lock); return error; @@ -4805,6 +4808,46 @@ static inline bool xlog_item_is_intent(struct xfs_log_item *lip) } } +/* Take all the collected deferred ops and finish them in order. */ +static int +xlog_finish_defer_ops( + struct xfs_mount *mp, + struct xfs_defer_ops *dfops) +{ + struct xfs_trans *tp; + int64_t freeblks; + uint resblks; + int error; + + /* + * We're finishing the defer_ops that accumulated as a result of + * recovering unfinished intent items during log recovery. We + * reserve an itruncate transaction because it is the largest + * permanent transaction type. Since we're the only user of the fs + * right now, take 93% (15/16) of the available free blocks. Use + * weird math to avoid a 64-bit division. + */ + freeblks = percpu_counter_sum(&mp->m_fdblocks); + if (freeblks <= 0) + return -ENOSPC; + resblks = min_t(int64_t, UINT_MAX, freeblks); + resblks = (resblks * 15) >> 4; + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, resblks, + 0, XFS_TRANS_RESERVE, &tp); + if (error) + return error; + + error = xfs_defer_finish(&tp, dfops); + if (error) + goto out_cancel; + + return xfs_trans_commit(tp); + +out_cancel: + xfs_trans_cancel(tp); + return error; +} + /* * When this is called, all of the log intent items which did not have * corresponding log done items should be in the AIL. What we do now @@ -4825,10 +4868,12 @@ STATIC int xlog_recover_process_intents( struct xlog *log) { - struct xfs_log_item *lip; - int error = 0; + struct xfs_defer_ops dfops; struct xfs_ail_cursor cur; + struct xfs_log_item *lip; struct xfs_ail *ailp; + xfs_fsblock_t firstfsb; + int error = 0; #if defined(DEBUG) || defined(XFS_WARN) xfs_lsn_t last_lsn; #endif @@ -4839,6 +4884,7 @@ xlog_recover_process_intents( #if defined(DEBUG) || defined(XFS_WARN) last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block); #endif + xfs_defer_init(&dfops, &firstfsb); while (lip != NULL) { /* * We're done when we see something other than an intent. @@ -4859,6 +4905,12 @@ xlog_recover_process_intents( */ ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0); + /* + * NOTE: If your intent processing routine can create more + * deferred ops, you /must/ attach them to the dfops in this + * routine or else those subsequent intents will get + * replayed in the wrong order! + */ switch (lip->li_type) { case XFS_LI_EFI: error = xlog_recover_process_efi(log->l_mp, ailp, lip); @@ -4867,10 +4919,12 @@ xlog_recover_process_intents( error = xlog_recover_process_rui(log->l_mp, ailp, lip); break; case XFS_LI_CUI: - error = xlog_recover_process_cui(log->l_mp, ailp, lip); + error = xlog_recover_process_cui(log->l_mp, ailp, lip, + &dfops); break; case XFS_LI_BUI: - error = xlog_recover_process_bui(log->l_mp, ailp, lip); + error = xlog_recover_process_bui(log->l_mp, ailp, lip, + &dfops); break; } if (error) @@ -4880,6 +4934,11 @@ xlog_recover_process_intents( out: xfs_trans_ail_cursor_done(&cur); spin_unlock(&ailp->xa_lock); + if (error) + xfs_defer_cancel(&dfops); + else + error = xlog_finish_defer_ops(log->l_mp, &dfops); + return error; } diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 010a13a201aa..ec952dfad359 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -793,8 +793,8 @@ xfs_qm_qino_alloc( return error; if (need_alloc) { - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, - &committed); + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip, + &committed); if (error) { xfs_trans_cancel(tp); return error; diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 8f2e2fac4255..3a55d6fc271b 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -393,7 +393,8 @@ xfs_cud_init( int xfs_cui_recover( struct xfs_mount *mp, - struct xfs_cui_log_item *cuip) + struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops) { int i; int error = 0; @@ -405,11 +406,9 @@ xfs_cui_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; enum xfs_refcount_intent_type type; - xfs_fsblock_t firstfsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; struct xfs_bmbt_irec irec; - struct xfs_defer_ops dfops; bool requeue_only = false; ASSERT(!test_bit(XFS_CUI_RECOVERED, &cuip->cui_flags)); @@ -465,7 +464,6 @@ xfs_cui_recover( return error; cudp = xfs_trans_get_cud(tp, cuip); - xfs_defer_init(&dfops, &firstfsb); for (i = 0; i < cuip->cui_format.cui_nextents; i++) { refc = &cuip->cui_format.cui_extents[i]; refc_type = refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK; @@ -485,7 +483,7 @@ xfs_cui_recover( new_len = refc->pe_len; } else error = xfs_trans_log_finish_refcount_update(tp, cudp, - &dfops, type, refc->pe_startblock, refc->pe_len, + dfops, type, refc->pe_startblock, refc->pe_len, &new_fsb, &new_len, &rcur); if (error) goto abort_error; @@ -497,21 +495,21 @@ xfs_cui_recover( switch (type) { case XFS_REFCOUNT_INCREASE: error = xfs_refcount_increase_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_DECREASE: error = xfs_refcount_decrease_extent( - tp->t_mountp, &dfops, &irec); + tp->t_mountp, dfops, &irec); break; case XFS_REFCOUNT_ALLOC_COW: error = xfs_refcount_alloc_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; case XFS_REFCOUNT_FREE_COW: error = xfs_refcount_free_cow_extent( - tp->t_mountp, &dfops, + tp->t_mountp, dfops, irec.br_startblock, irec.br_blockcount); break; @@ -525,17 +523,12 @@ xfs_cui_recover( } xfs_refcount_finish_one_cleanup(tp, rcur, error); - error = xfs_defer_finish(&tp, &dfops); - if (error) - goto abort_defer; set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags); error = xfs_trans_commit(tp); return error; abort_error: xfs_refcount_finish_one_cleanup(tp, rcur, error); -abort_defer: - xfs_defer_cancel(&dfops); xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_refcount_item.h b/fs/xfs/xfs_refcount_item.h index 5b74dddfa64b..0e5327349a13 100644 --- a/fs/xfs/xfs_refcount_item.h +++ b/fs/xfs/xfs_refcount_item.h @@ -96,6 +96,7 @@ struct xfs_cud_log_item *xfs_cud_init(struct xfs_mount *, struct xfs_cui_log_item *); void xfs_cui_item_free(struct xfs_cui_log_item *); void xfs_cui_release(struct xfs_cui_log_item *); -int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip); +int xfs_cui_recover(struct xfs_mount *mp, struct xfs_cui_log_item *cuip, + struct xfs_defer_ops *dfops); #endif /* __XFS_REFCOUNT_ITEM_H__ */ diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index cc041a29eb70..cf7c8f81bebb 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -49,8 +49,6 @@ #include "xfs_alloc.h" #include "xfs_quota_defs.h" #include "xfs_quota.h" -#include "xfs_btree.h" -#include "xfs_bmap_btree.h" #include "xfs_reflink.h" #include "xfs_iomap.h" #include "xfs_rmap_btree.h" diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index f663022353c0..5122d3021117 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -212,9 +212,9 @@ xfs_parseargs( */ if (sb_rdonly(sb)) mp->m_flags |= XFS_MOUNT_RDONLY; - if (sb->s_flags & MS_DIRSYNC) + if (sb->s_flags & SB_DIRSYNC) mp->m_flags |= XFS_MOUNT_DIRSYNC; - if (sb->s_flags & MS_SYNCHRONOUS) + if (sb->s_flags & SB_SYNCHRONOUS) mp->m_flags |= XFS_MOUNT_WSYNC; /* @@ -1312,7 +1312,7 @@ xfs_fs_remount( } /* ro -> rw */ - if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) { + if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) { if (mp->m_flags & XFS_MOUNT_NORECOVERY) { xfs_warn(mp, "ro->rw transition prohibited on norecovery mount"); @@ -1368,7 +1368,7 @@ xfs_fs_remount( } /* rw -> ro */ - if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) { + if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { /* Free the per-AG metadata reservation pool. */ error = xfs_fs_unreserve_ag_blocks(mp); if (error) { diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h index 5f2f32408011..fcc5dfc70aa0 100644 --- a/fs/xfs/xfs_super.h +++ b/fs/xfs/xfs_super.h @@ -30,7 +30,7 @@ extern void xfs_qm_exit(void); #ifdef CONFIG_XFS_POSIX_ACL # define XFS_ACL_STRING "ACLs, " -# define set_posix_acl_flag(sb) ((sb)->s_flags |= MS_POSIXACL) +# define set_posix_acl_flag(sb) ((sb)->s_flags |= SB_POSIXACL) #else # define XFS_ACL_STRING # define set_posix_acl_flag(sb) do { } while (0) diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 68d3ca2c4968..2e9e793a8f9d 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -232,11 +232,6 @@ xfs_symlink( resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp); - if (error == -ENOSPC && fs_blocks == 0) { - resblks = 0; - error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0, - &tp); - } if (error) goto out_release_inode; @@ -260,14 +255,6 @@ xfs_symlink( goto out_trans_cancel; /* - * Check for ability to enter directory entry, if no space reserved. - */ - if (!resblks) { - error = xfs_dir_canenter(tp, dp, link_name); - if (error) - goto out_trans_cancel; - } - /* * Initialize the bmap freelist prior to calling either * bmapi or the directory create code. */ @@ -277,7 +264,7 @@ xfs_symlink( * Allocate an inode for the symlink. */ error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, - prid, resblks > 0, &ip, NULL); + prid, &ip, NULL); if (error) goto out_trans_cancel; diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 5d95fe348294..35f3546b6af5 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -24,7 +24,6 @@ #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_da_format.h" -#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_da_btree.h" diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 515ba042d75c..d718a10c2271 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -654,8 +654,6 @@ DEFINE_INODE_EVENT(xfs_inode_set_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_clear_cowblocks_tag); DEFINE_INODE_EVENT(xfs_inode_free_cowblocks_invalid); -DEFINE_INODE_EVENT(xfs_filemap_pfn_mkwrite); - TRACE_EVENT(xfs_filemap_fault, TP_PROTO(struct xfs_inode *ip, enum page_entry_size pe_size, bool write_fault), |