summaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/kmem.c79
-rw-r--r--fs/xfs/kmem.h45
-rw-r--r--fs/xfs/libxfs/xfs_ag.c22
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c1303
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h16
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_attr.c117
-rw-r--r--fs/xfs/libxfs/xfs_attr.h17
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c287
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h33
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c86
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.h2
-rw-r--r--fs/xfs/libxfs/xfs_bit.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c837
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h17
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c16
-rw-r--r--fs/xfs/libxfs/xfs_btree.c158
-rw-r--r--fs/xfs/libxfs/xfs_btree.h68
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c674
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.h79
-rw-r--r--fs/xfs/libxfs/xfs_da_format.c888
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h63
-rw-r--r--fs/xfs/libxfs/xfs_defer.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c107
-rw-r--r--fs/xfs/libxfs/xfs_dir2.h90
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c135
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c282
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c307
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c1007
-rw-r--r--fs/xfs/libxfs/xfs_dir2_priv.h103
-rw-r--r--fs/xfs/libxfs/xfs_dir2_sf.c430
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c8
-rw-r--r--fs/xfs/libxfs/xfs_format.h21
-rw-r--r--fs/xfs/libxfs/xfs_fs.h14
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c202
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c10
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c21
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h5
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c38
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.h18
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h23
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h4
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c228
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h12
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c436
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h11
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c4
-rw-r--r--fs/xfs/libxfs/xfs_sb.c20
-rw-r--r--fs/xfs/libxfs/xfs_shared.h6
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c8
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c102
-rw-r--r--fs/xfs/libxfs/xfs_types.h10
-rw-r--r--fs/xfs/scrub/agheader.c4
-rw-r--r--fs/xfs/scrub/agheader_repair.c4
-rw-r--r--fs/xfs/scrub/alloc.c3
-rw-r--r--fs/xfs/scrub/attr.c17
-rw-r--r--fs/xfs/scrub/bitmap.c3
-rw-r--r--fs/xfs/scrub/bmap.c81
-rw-r--r--fs/xfs/scrub/common.h9
-rw-r--r--fs/xfs/scrub/dabtree.c62
-rw-r--r--fs/xfs/scrub/dabtree.h3
-rw-r--r--fs/xfs/scrub/dir.c132
-rw-r--r--fs/xfs/scrub/fscounters.c13
-rw-r--r--fs/xfs/scrub/health.c1
-rw-r--r--fs/xfs/scrub/parent.c27
-rw-r--r--fs/xfs/scrub/quota.c7
-rw-r--r--fs/xfs/scrub/refcount.c3
-rw-r--r--fs/xfs/scrub/repair.c16
-rw-r--r--fs/xfs/scrub/repair.h1
-rw-r--r--fs/xfs/scrub/scrub.c1
-rw-r--r--fs/xfs/scrub/symlink.c2
-rw-r--r--fs/xfs/scrub/trace.h6
-rw-r--r--fs/xfs/xfs_acl.c41
-rw-r--r--fs/xfs/xfs_aops.c791
-rw-r--r--fs/xfs/xfs_aops.h20
-rw-r--r--fs/xfs/xfs_attr_inactive.c222
-rw-r--r--fs/xfs/xfs_attr_list.c77
-rw-r--r--fs/xfs/xfs_bmap_item.c19
-rw-r--r--fs/xfs/xfs_bmap_util.c293
-rw-r--r--fs/xfs/xfs_bmap_util.h4
-rw-r--r--fs/xfs/xfs_buf.c214
-rw-r--r--fs/xfs/xfs_buf.h40
-rw-r--r--fs/xfs/xfs_buf_item.c57
-rw-r--r--fs/xfs/xfs_buf_item.h1
-rw-r--r--fs/xfs/xfs_dir2_readdir.c137
-rw-r--r--fs/xfs/xfs_discard.c8
-rw-r--r--fs/xfs/xfs_dquot.c64
-rw-r--r--fs/xfs/xfs_dquot.h98
-rw-r--r--fs/xfs/xfs_dquot_item.c2
-rw-r--r--fs/xfs/xfs_dquot_item.h34
-rw-r--r--fs/xfs/xfs_error.c33
-rw-r--r--fs/xfs/xfs_error.h33
-rw-r--r--fs/xfs/xfs_extent_busy.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c17
-rw-r--r--fs/xfs/xfs_file.c158
-rw-r--r--fs/xfs/xfs_filestream.c14
-rw-r--r--fs/xfs/xfs_fsmap.c13
-rw-r--r--fs/xfs/xfs_icache.c10
-rw-r--r--fs/xfs/xfs_icreate_item.c4
-rw-r--r--fs/xfs/xfs_inode.c168
-rw-r--r--fs/xfs/xfs_inode.h31
-rw-r--r--fs/xfs/xfs_inode_item.c17
-rw-r--r--fs/xfs/xfs_ioctl.c246
-rw-r--r--fs/xfs/xfs_ioctl.h7
-rw-r--r--fs/xfs/xfs_ioctl32.c116
-rw-r--r--fs/xfs/xfs_ioctl32.h15
-rw-r--r--fs/xfs/xfs_iomap.c871
-rw-r--r--fs/xfs/xfs_iomap.h13
-rw-r--r--fs/xfs/xfs_iops.c77
-rw-r--r--fs/xfs/xfs_itable.c16
-rw-r--r--fs/xfs/xfs_itable.h13
-rw-r--r--fs/xfs/xfs_iwalk.c7
-rw-r--r--fs/xfs/xfs_iwalk.h13
-rw-r--r--fs/xfs/xfs_linux.h14
-rw-r--r--fs/xfs/xfs_log.c710
-rw-r--r--fs/xfs/xfs_log_cil.c16
-rw-r--r--fs/xfs/xfs_log_priv.h33
-rw-r--r--fs/xfs/xfs_log_recover.c234
-rw-r--r--fs/xfs/xfs_message.c22
-rw-r--r--fs/xfs/xfs_message.h6
-rw-r--r--fs/xfs/xfs_mount.c222
-rw-r--r--fs/xfs/xfs_mount.h64
-rw-r--r--fs/xfs/xfs_mru_cache.c4
-rw-r--r--fs/xfs/xfs_ondisk.h1
-rw-r--r--fs/xfs/xfs_pnfs.c60
-rw-r--r--fs/xfs/xfs_qm.c69
-rw-r--r--fs/xfs/xfs_qm.h12
-rw-r--r--fs/xfs/xfs_qm_bhv.c8
-rw-r--r--fs/xfs/xfs_qm_syscalls.c139
-rw-r--r--fs/xfs/xfs_quotaops.c9
-rw-r--r--fs/xfs/xfs_refcount_item.c25
-rw-r--r--fs/xfs/xfs_reflink.c235
-rw-r--r--fs/xfs/xfs_reflink.h6
-rw-r--r--fs/xfs/xfs_rmap_item.c19
-rw-r--r--fs/xfs/xfs_rtalloc.c15
-rw-r--r--fs/xfs/xfs_super.c1517
-rw-r--r--fs/xfs/xfs_super.h10
-rw-r--r--fs/xfs/xfs_symlink.c38
-rw-r--r--fs/xfs/xfs_symlink.h2
-rw-r--r--fs/xfs/xfs_sysfs.c13
-rw-r--r--fs/xfs/xfs_trace.h159
-rw-r--r--fs/xfs/xfs_trans.c6
-rw-r--r--fs/xfs/xfs_trans.h14
-rw-r--r--fs/xfs/xfs_trans_ail.c10
-rw-r--r--fs/xfs/xfs_trans_buf.c61
-rw-r--r--fs/xfs/xfs_trans_dquot.c64
-rw-r--r--fs/xfs/xfs_xattr.c15
150 files changed, 8020 insertions, 8702 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 06b68b6115bc..aceca2f9a3db 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -27,7 +27,6 @@ xfs-y += $(addprefix libxfs/, \
xfs_bmap_btree.o \
xfs_btree.o \
xfs_da_btree.o \
- xfs_da_format.o \
xfs_defer.o \
xfs_dir2.o \
xfs_dir2_block.o \
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
index 16bb9a328678..1da94237a8cf 100644
--- a/fs/xfs/kmem.c
+++ b/fs/xfs/kmem.c
@@ -3,10 +3,10 @@
* Copyright (c) 2000-2005 Silicon Graphics, Inc.
* All Rights Reserved.
*/
-#include <linux/sched/mm.h>
+#include "xfs.h"
#include <linux/backing-dev.h>
-#include "kmem.h"
#include "xfs_message.h"
+#include "xfs_trace.h"
void *
kmem_alloc(size_t size, xfs_km_flags_t flags)
@@ -15,9 +15,11 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
gfp_t lflags = kmem_flags_convert(flags);
void *ptr;
+ trace_kmem_alloc(size, flags, _RET_IP_);
+
do {
ptr = kmalloc(size, lflags);
- if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+ if (ptr || (flags & KM_MAYFAIL))
return ptr;
if (!(++retries % 100))
xfs_err(NULL,
@@ -28,28 +30,24 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)
} while (1);
}
-void *
-kmem_alloc_large(size_t size, xfs_km_flags_t flags)
+
+/*
+ * __vmalloc() will allocate data pages and auxiliary structures (e.g.
+ * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context here. Hence
+ * we need to tell memory reclaim that we are in such a context via
+ * PF_MEMALLOC_NOFS to prevent memory reclaim re-entering the filesystem here
+ * and potentially deadlocking.
+ */
+static void *
+__kmem_vmalloc(size_t size, xfs_km_flags_t flags)
{
unsigned nofs_flag = 0;
void *ptr;
- gfp_t lflags;
-
- ptr = kmem_alloc(size, flags | KM_MAYFAIL);
- if (ptr)
- return ptr;
+ gfp_t lflags = kmem_flags_convert(flags);
- /*
- * __vmalloc() will allocate data pages and auxillary structures (e.g.
- * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context
- * here. Hence we need to tell memory reclaim that we are in such a
- * context via PF_MEMALLOC_NOFS to prevent memory reclaim re-entering
- * the filesystem here and potentially deadlocking.
- */
if (flags & KM_NOFS)
nofs_flag = memalloc_nofs_save();
- lflags = kmem_flags_convert(flags);
ptr = __vmalloc(size, lflags, PAGE_KERNEL);
if (flags & KM_NOFS)
@@ -58,6 +56,44 @@ kmem_alloc_large(size_t size, xfs_km_flags_t flags)
return ptr;
}
+/*
+ * Same as kmem_alloc_large, except we guarantee the buffer returned is aligned
+ * to the @align_mask. We only guarantee alignment up to page size, we'll clamp
+ * alignment at page size if it is larger. vmalloc always returns a PAGE_SIZE
+ * aligned region.
+ */
+void *
+kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags)
+{
+ void *ptr;
+
+ trace_kmem_alloc_io(size, flags, _RET_IP_);
+
+ if (WARN_ON_ONCE(align_mask >= PAGE_SIZE))
+ align_mask = PAGE_SIZE - 1;
+
+ ptr = kmem_alloc(size, flags | KM_MAYFAIL);
+ if (ptr) {
+ if (!((uintptr_t)ptr & align_mask))
+ return ptr;
+ kfree(ptr);
+ }
+ return __kmem_vmalloc(size, flags);
+}
+
+void *
+kmem_alloc_large(size_t size, xfs_km_flags_t flags)
+{
+ void *ptr;
+
+ trace_kmem_alloc_large(size, flags, _RET_IP_);
+
+ ptr = kmem_alloc(size, flags | KM_MAYFAIL);
+ if (ptr)
+ return ptr;
+ return __kmem_vmalloc(size, flags);
+}
+
void *
kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags)
{
@@ -65,9 +101,11 @@ kmem_realloc(const void *old, size_t newsize, xfs_km_flags_t flags)
gfp_t lflags = kmem_flags_convert(flags);
void *ptr;
+ trace_kmem_realloc(newsize, flags, _RET_IP_);
+
do {
ptr = krealloc(old, newsize, lflags);
- if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+ if (ptr || (flags & KM_MAYFAIL))
return ptr;
if (!(++retries % 100))
xfs_err(NULL,
@@ -85,9 +123,10 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)
gfp_t lflags = kmem_flags_convert(flags);
void *ptr;
+ trace_kmem_zone_alloc(kmem_cache_size(zone), flags, _RET_IP_);
do {
ptr = kmem_cache_alloc(zone, lflags);
- if (ptr || (flags & (KM_MAYFAIL|KM_NOSLEEP)))
+ if (ptr || (flags & KM_MAYFAIL))
return ptr;
if (!(++retries % 100))
xfs_err(NULL,
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index 267655acd426..6143117770e9 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -16,8 +16,6 @@
*/
typedef unsigned __bitwise xfs_km_flags_t;
-#define KM_SLEEP ((__force xfs_km_flags_t)0x0001u)
-#define KM_NOSLEEP ((__force xfs_km_flags_t)0x0002u)
#define KM_NOFS ((__force xfs_km_flags_t)0x0004u)
#define KM_MAYFAIL ((__force xfs_km_flags_t)0x0008u)
#define KM_ZERO ((__force xfs_km_flags_t)0x0010u)
@@ -32,15 +30,11 @@ kmem_flags_convert(xfs_km_flags_t flags)
{
gfp_t lflags;
- BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_ZERO));
+ BUG_ON(flags & ~(KM_NOFS|KM_MAYFAIL|KM_ZERO));
- if (flags & KM_NOSLEEP) {
- lflags = GFP_ATOMIC | __GFP_NOWARN;
- } else {
- lflags = GFP_KERNEL | __GFP_NOWARN;
- if (flags & KM_NOFS)
- lflags &= ~__GFP_FS;
- }
+ lflags = GFP_KERNEL | __GFP_NOWARN;
+ if (flags & KM_NOFS)
+ lflags &= ~__GFP_FS;
/*
* Default page/slab allocator behavior is to retry for ever
@@ -59,6 +53,7 @@ kmem_flags_convert(xfs_km_flags_t flags)
}
extern void *kmem_alloc(size_t, xfs_km_flags_t);
+extern void *kmem_alloc_io(size_t size, int align_mask, xfs_km_flags_t flags);
extern void *kmem_alloc_large(size_t size, xfs_km_flags_t);
extern void *kmem_realloc(const void *, size_t, xfs_km_flags_t);
static inline void kmem_free(const void *ptr)
@@ -83,39 +78,9 @@ kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
* Zone interfaces
*/
-#define KM_ZONE_HWALIGN SLAB_HWCACHE_ALIGN
-#define KM_ZONE_RECLAIM SLAB_RECLAIM_ACCOUNT
-#define KM_ZONE_SPREAD SLAB_MEM_SPREAD
-#define KM_ZONE_ACCOUNT SLAB_ACCOUNT
-
#define kmem_zone kmem_cache
#define kmem_zone_t struct kmem_cache
-static inline kmem_zone_t *
-kmem_zone_init(int size, char *zone_name)
-{
- return kmem_cache_create(zone_name, size, 0, 0, NULL);
-}
-
-static inline kmem_zone_t *
-kmem_zone_init_flags(int size, char *zone_name, slab_flags_t flags,
- void (*construct)(void *))
-{
- return kmem_cache_create(zone_name, size, 0, flags, construct);
-}
-
-static inline void
-kmem_zone_free(kmem_zone_t *zone, void *ptr)
-{
- kmem_cache_free(zone, ptr);
-}
-
-static inline void
-kmem_zone_destroy(kmem_zone_t *zone)
-{
- kmem_cache_destroy(zone);
-}
-
extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t);
static inline void *
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 5de296b34ab1..08d6beb54f8c 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -23,26 +23,28 @@
#include "xfs_ag_resv.h"
#include "xfs_health.h"
-static struct xfs_buf *
+static int
xfs_get_aghdr_buf(
struct xfs_mount *mp,
xfs_daddr_t blkno,
size_t numblks,
- int flags,
+ struct xfs_buf **bpp,
const struct xfs_buf_ops *ops)
{
struct xfs_buf *bp;
+ int error;
- bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
- if (!bp)
- return NULL;
+ error = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, 0, &bp);
+ if (error)
+ return error;
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
bp->b_bn = blkno;
bp->b_maps[0].bm_bn = blkno;
bp->b_ops = ops;
- return bp;
+ *bpp = bp;
+ return 0;
}
static inline bool is_log_ag(struct xfs_mount *mp, struct aghdr_init_data *id)
@@ -341,13 +343,13 @@ xfs_ag_init_hdr(
struct aghdr_init_data *id,
aghdr_init_work_f work,
const struct xfs_buf_ops *ops)
-
{
struct xfs_buf *bp;
+ int error;
- bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops);
- if (!bp)
- return -ENOMEM;
+ error = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, &bp, ops);
+ if (error)
+ return error;
(*work)(mp, bp, id);
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 87a9747f1d36..fdfe6dc0d307 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -19,6 +19,8 @@
#include "xfs_btree.h"
#include "xfs_refcount_btree.h"
#include "xfs_ialloc_btree.h"
+#include "xfs_sb.h"
+#include "xfs_ag_resv.h"
/*
* Per-AG Block Reservations
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 372ad55631fc..d8053bc96c4d 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -146,9 +146,13 @@ xfs_alloc_lookup_eq(
xfs_extlen_t len, /* length of extent */
int *stat) /* success/failure */
{
+ int error;
+
cur->bc_rec.a.ar_startblock = bno;
cur->bc_rec.a.ar_blockcount = len;
- return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+ error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
+ cur->bc_private.a.priv.abt.active = (*stat == 1);
+ return error;
}
/*
@@ -162,9 +166,13 @@ xfs_alloc_lookup_ge(
xfs_extlen_t len, /* length of extent */
int *stat) /* success/failure */
{
+ int error;
+
cur->bc_rec.a.ar_startblock = bno;
cur->bc_rec.a.ar_blockcount = len;
- return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+ error = xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
+ cur->bc_private.a.priv.abt.active = (*stat == 1);
+ return error;
}
/*
@@ -178,9 +186,19 @@ xfs_alloc_lookup_le(
xfs_extlen_t len, /* length of extent */
int *stat) /* success/failure */
{
+ int error;
cur->bc_rec.a.ar_startblock = bno;
cur->bc_rec.a.ar_blockcount = len;
- return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+ error = xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
+ cur->bc_private.a.priv.abt.active = (*stat == 1);
+ return error;
+}
+
+static inline bool
+xfs_alloc_cur_active(
+ struct xfs_btree_cur *cur)
+{
+ return cur && cur->bc_private.a.priv.abt.active;
}
/*
@@ -313,7 +331,7 @@ xfs_alloc_compute_diff(
xfs_extlen_t newlen1=0; /* length with newbno1 */
xfs_extlen_t newlen2=0; /* length with newbno2 */
xfs_agblock_t wantend; /* end of target extent */
- bool userdata = xfs_alloc_is_userdata(datatype);
+ bool userdata = datatype & XFS_ALLOC_USERDATA;
ASSERT(freelen >= wantlen);
freeend = freebno + freelen;
@@ -433,13 +451,17 @@ xfs_alloc_fixup_trees(
#ifdef DEBUG
if ((error = xfs_alloc_get_rec(cnt_cur, &nfbno1, &nflen1, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp,
- i == 1 && nfbno1 == fbno && nflen1 == flen);
+ if (XFS_IS_CORRUPT(mp,
+ i != 1 ||
+ nfbno1 != fbno ||
+ nflen1 != flen))
+ return -EFSCORRUPTED;
#endif
} else {
if ((error = xfs_alloc_lookup_eq(cnt_cur, fbno, flen, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
}
/*
* Look up the record in the by-block tree if necessary.
@@ -448,13 +470,17 @@ xfs_alloc_fixup_trees(
#ifdef DEBUG
if ((error = xfs_alloc_get_rec(bno_cur, &nfbno1, &nflen1, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp,
- i == 1 && nfbno1 == fbno && nflen1 == flen);
+ if (XFS_IS_CORRUPT(mp,
+ i != 1 ||
+ nfbno1 != fbno ||
+ nflen1 != flen))
+ return -EFSCORRUPTED;
#endif
} else {
if ((error = xfs_alloc_lookup_eq(bno_cur, fbno, flen, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
}
#ifdef DEBUG
@@ -465,8 +491,10 @@ xfs_alloc_fixup_trees(
bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]);
cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
- XFS_WANT_CORRUPTED_RETURN(mp,
- bnoblock->bb_numrecs == cntblock->bb_numrecs);
+ if (XFS_IS_CORRUPT(mp,
+ bnoblock->bb_numrecs !=
+ cntblock->bb_numrecs))
+ return -EFSCORRUPTED;
}
#endif
@@ -496,25 +524,30 @@ xfs_alloc_fixup_trees(
*/
if ((error = xfs_btree_delete(cnt_cur, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
/*
* Add new by-size btree entry(s).
*/
if (nfbno1 != NULLAGBLOCK) {
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
+ if (XFS_IS_CORRUPT(mp, i != 0))
+ return -EFSCORRUPTED;
if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
}
if (nfbno2 != NULLAGBLOCK) {
if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
+ if (XFS_IS_CORRUPT(mp, i != 0))
+ return -EFSCORRUPTED;
if ((error = xfs_btree_insert(cnt_cur, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
}
/*
* Fix up the by-block btree entry(s).
@@ -525,7 +558,8 @@ xfs_alloc_fixup_trees(
*/
if ((error = xfs_btree_delete(bno_cur, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
} else {
/*
* Update the by-block entry to start later|be shorter.
@@ -539,10 +573,12 @@ xfs_alloc_fixup_trees(
*/
if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
+ if (XFS_IS_CORRUPT(mp, i != 0))
+ return -EFSCORRUPTED;
if ((error = xfs_btree_insert(bno_cur, &i)))
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
}
return 0;
}
@@ -684,16 +720,298 @@ xfs_alloc_update_counters(
xfs_trans_agblocks_delta(tp, len);
if (unlikely(be32_to_cpu(agf->agf_freeblks) >
- be32_to_cpu(agf->agf_length)))
+ be32_to_cpu(agf->agf_length))) {
+ xfs_buf_corruption_error(agbp);
return -EFSCORRUPTED;
+ }
xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS);
return 0;
}
/*
- * Allocation group level functions.
+ * Block allocation algorithm and data structures.
+ */
+struct xfs_alloc_cur {
+ struct xfs_btree_cur *cnt; /* btree cursors */
+ struct xfs_btree_cur *bnolt;
+ struct xfs_btree_cur *bnogt;
+ xfs_extlen_t cur_len;/* current search length */
+ xfs_agblock_t rec_bno;/* extent startblock */
+ xfs_extlen_t rec_len;/* extent length */
+ xfs_agblock_t bno; /* alloc bno */
+ xfs_extlen_t len; /* alloc len */
+ xfs_extlen_t diff; /* diff from search bno */
+ unsigned int busy_gen;/* busy state */
+ bool busy;
+};
+
+/*
+ * Set up cursors, etc. in the extent allocation cursor. This function can be
+ * called multiple times to reset an initialized structure without having to
+ * reallocate cursors.
+ */
+static int
+xfs_alloc_cur_setup(
+ struct xfs_alloc_arg *args,
+ struct xfs_alloc_cur *acur)
+{
+ int error;
+ int i;
+
+ ASSERT(args->alignment == 1 || args->type != XFS_ALLOCTYPE_THIS_BNO);
+
+ acur->cur_len = args->maxlen;
+ acur->rec_bno = 0;
+ acur->rec_len = 0;
+ acur->bno = 0;
+ acur->len = 0;
+ acur->diff = -1;
+ acur->busy = false;
+ acur->busy_gen = 0;
+
+ /*
+ * Perform an initial cntbt lookup to check for availability of maxlen
+ * extents. If this fails, we'll return -ENOSPC to signal the caller to
+ * attempt a small allocation.
+ */
+ if (!acur->cnt)
+ acur->cnt = xfs_allocbt_init_cursor(args->mp, args->tp,
+ args->agbp, args->agno, XFS_BTNUM_CNT);
+ error = xfs_alloc_lookup_ge(acur->cnt, 0, args->maxlen, &i);
+ if (error)
+ return error;
+
+ /*
+ * Allocate the bnobt left and right search cursors.
+ */
+ if (!acur->bnolt)
+ acur->bnolt = xfs_allocbt_init_cursor(args->mp, args->tp,
+ args->agbp, args->agno, XFS_BTNUM_BNO);
+ if (!acur->bnogt)
+ acur->bnogt = xfs_allocbt_init_cursor(args->mp, args->tp,
+ args->agbp, args->agno, XFS_BTNUM_BNO);
+ return i == 1 ? 0 : -ENOSPC;
+}
+
+static void
+xfs_alloc_cur_close(
+ struct xfs_alloc_cur *acur,
+ bool error)
+{
+ int cur_error = XFS_BTREE_NOERROR;
+
+ if (error)
+ cur_error = XFS_BTREE_ERROR;
+
+ if (acur->cnt)
+ xfs_btree_del_cursor(acur->cnt, cur_error);
+ if (acur->bnolt)
+ xfs_btree_del_cursor(acur->bnolt, cur_error);
+ if (acur->bnogt)
+ xfs_btree_del_cursor(acur->bnogt, cur_error);
+ acur->cnt = acur->bnolt = acur->bnogt = NULL;
+}
+
+/*
+ * Check an extent for allocation and track the best available candidate in the
+ * allocation structure. The cursor is deactivated if it has entered an out of
+ * range state based on allocation arguments. Optionally return the extent
+ * extent geometry and allocation status if requested by the caller.
+ */
+static int
+xfs_alloc_cur_check(
+ struct xfs_alloc_arg *args,
+ struct xfs_alloc_cur *acur,
+ struct xfs_btree_cur *cur,
+ int *new)
+{
+ int error, i;
+ xfs_agblock_t bno, bnoa, bnew;
+ xfs_extlen_t len, lena, diff = -1;
+ bool busy;
+ unsigned busy_gen = 0;
+ bool deactivate = false;
+ bool isbnobt = cur->bc_btnum == XFS_BTNUM_BNO;
+
+ *new = 0;
+
+ error = xfs_alloc_get_rec(cur, &bno, &len, &i);
+ if (error)
+ return error;
+ if (XFS_IS_CORRUPT(args->mp, i != 1))
+ return -EFSCORRUPTED;
+
+ /*
+ * Check minlen and deactivate a cntbt cursor if out of acceptable size
+ * range (i.e., walking backwards looking for a minlen extent).
+ */
+ if (len < args->minlen) {
+ deactivate = !isbnobt;
+ goto out;
+ }
+
+ busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
+ &busy_gen);
+ acur->busy |= busy;
+ if (busy)
+ acur->busy_gen = busy_gen;
+ /* deactivate a bnobt cursor outside of locality range */
+ if (bnoa < args->min_agbno || bnoa > args->max_agbno) {
+ deactivate = isbnobt;
+ goto out;
+ }
+ if (lena < args->minlen)
+ goto out;
+
+ args->len = XFS_EXTLEN_MIN(lena, args->maxlen);
+ xfs_alloc_fix_len(args);
+ ASSERT(args->len >= args->minlen);
+ if (args->len < acur->len)
+ goto out;
+
+ /*
+ * We have an aligned record that satisfies minlen and beats or matches
+ * the candidate extent size. Compare locality for near allocation mode.
+ */
+ ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
+ diff = xfs_alloc_compute_diff(args->agbno, args->len,
+ args->alignment, args->datatype,
+ bnoa, lena, &bnew);
+ if (bnew == NULLAGBLOCK)
+ goto out;
+
+ /*
+ * Deactivate a bnobt cursor with worse locality than the current best.
+ */
+ if (diff > acur->diff) {
+ deactivate = isbnobt;
+ goto out;
+ }
+
+ ASSERT(args->len > acur->len ||
+ (args->len == acur->len && diff <= acur->diff));
+ acur->rec_bno = bno;
+ acur->rec_len = len;
+ acur->bno = bnew;
+ acur->len = args->len;
+ acur->diff = diff;
+ *new = 1;
+
+ /*
+ * We're done if we found a perfect allocation. This only deactivates
+ * the current cursor, but this is just an optimization to terminate a
+ * cntbt search that otherwise runs to the edge of the tree.
+ */
+ if (acur->diff == 0 && acur->len == args->maxlen)
+ deactivate = true;
+out:
+ if (deactivate)
+ cur->bc_private.a.priv.abt.active = false;
+ trace_xfs_alloc_cur_check(args->mp, cur->bc_btnum, bno, len, diff,
+ *new);
+ return 0;
+}
+
+/*
+ * Complete an allocation of a candidate extent. Remove the extent from both
+ * trees and update the args structure.
*/
+STATIC int
+xfs_alloc_cur_finish(
+ struct xfs_alloc_arg *args,
+ struct xfs_alloc_cur *acur)
+{
+ int error;
+
+ ASSERT(acur->cnt && acur->bnolt);
+ ASSERT(acur->bno >= acur->rec_bno);
+ ASSERT(acur->bno + acur->len <= acur->rec_bno + acur->rec_len);
+ ASSERT(acur->rec_bno + acur->rec_len <=
+ be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
+
+ error = xfs_alloc_fixup_trees(acur->cnt, acur->bnolt, acur->rec_bno,
+ acur->rec_len, acur->bno, acur->len, 0);
+ if (error)
+ return error;
+
+ args->agbno = acur->bno;
+ args->len = acur->len;
+ args->wasfromfl = 0;
+
+ trace_xfs_alloc_cur(args);
+ return 0;
+}
+
+/*
+ * Locality allocation lookup algorithm. This expects a cntbt cursor and uses
+ * bno optimized lookup to search for extents with ideal size and locality.
+ */
+STATIC int
+xfs_alloc_cntbt_iter(
+ struct xfs_alloc_arg *args,
+ struct xfs_alloc_cur *acur)
+{
+ struct xfs_btree_cur *cur = acur->cnt;
+ xfs_agblock_t bno;
+ xfs_extlen_t len, cur_len;
+ int error;
+ int i;
+
+ if (!xfs_alloc_cur_active(cur))
+ return 0;
+
+ /* locality optimized lookup */
+ cur_len = acur->cur_len;
+ error = xfs_alloc_lookup_ge(cur, args->agbno, cur_len, &i);
+ if (error)
+ return error;
+ if (i == 0)
+ return 0;
+ error = xfs_alloc_get_rec(cur, &bno, &len, &i);
+ if (error)
+ return error;
+
+ /* check the current record and update search length from it */
+ error = xfs_alloc_cur_check(args, acur, cur, &i);
+ if (error)
+ return error;
+ ASSERT(len >= acur->cur_len);
+ acur->cur_len = len;
+
+ /*
+ * We looked up the first record >= [agbno, len] above. The agbno is a
+ * secondary key and so the current record may lie just before or after
+ * agbno. If it is past agbno, check the previous record too so long as
+ * the length matches as it may be closer. Don't check a smaller record
+ * because that could deactivate our cursor.
+ */
+ if (bno > args->agbno) {
+ error = xfs_btree_decrement(cur, 0, &i);
+ if (!error && i) {
+ error = xfs_alloc_get_rec(cur, &bno, &len, &i);
+ if (!error && i && len == acur->cur_len)
+ error = xfs_alloc_cur_check(args, acur, cur,
+ &i);
+ }
+ if (error)
+ return error;
+ }
+
+ /*
+ * Increment the search key until we find at least one allocation
+ * candidate or if the extent we found was larger. Otherwise, double the
+ * search key to optimize the search. Efficiency is more important here
+ * than absolute best locality.
+ */
+ cur_len <<= 1;
+ if (!acur->len || acur->cur_len >= cur_len)
+ acur->cur_len++;
+ else
+ acur->cur_len = cur_len;
+
+ return error;
+}
/*
* Deal with the case where only small freespaces remain. Either return the
@@ -727,7 +1045,10 @@ xfs_alloc_ag_vextent_small(
error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i);
if (error)
goto error;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error);
+ if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
goto out;
}
@@ -744,23 +1065,26 @@ xfs_alloc_ag_vextent_small(
goto out;
xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1,
- xfs_alloc_allow_busy_reuse(args->datatype));
+ (args->datatype & XFS_ALLOC_NOBUSY));
- if (xfs_alloc_is_userdata(args->datatype)) {
+ if (args->datatype & XFS_ALLOC_USERDATA) {
struct xfs_buf *bp;
- bp = xfs_btree_get_bufs(args->mp, args->tp, args->agno, fbno);
- if (!bp) {
- error = -EFSCORRUPTED;
+ error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
+ args->mp->m_bsize, 0, &bp);
+ if (error)
goto error;
- }
xfs_trans_binval(args->tp, bp);
}
*fbnop = args->agbno = fbno;
*flenp = args->len = 1;
- XFS_WANT_CORRUPTED_GOTO(args->mp,
- fbno < be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
- error);
+ if (XFS_IS_CORRUPT(args->mp,
+ fbno >= be32_to_cpu(
+ XFS_BUF_TO_AGF(args->agbp)->agf_length))) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
args->wasfromfl = 1;
trace_xfs_alloc_small_freelist(args);
@@ -915,7 +1239,10 @@ xfs_alloc_ag_vextent_exact(
error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
ASSERT(fbno <= args->agbno);
/*
@@ -984,98 +1311,243 @@ error0:
}
/*
- * Search the btree in a given direction via the search cursor and compare
- * the records found against the good extent we've already found.
+ * Search a given number of btree records in a given direction. Check each
+ * record against the good extent we've already found.
*/
STATIC int
-xfs_alloc_find_best_extent(
- struct xfs_alloc_arg *args, /* allocation argument structure */
- struct xfs_btree_cur **gcur, /* good cursor */
- struct xfs_btree_cur **scur, /* searching cursor */
- xfs_agblock_t gdiff, /* difference for search comparison */
- xfs_agblock_t *sbno, /* extent found by search */
- xfs_extlen_t *slen, /* extent length */
- xfs_agblock_t *sbnoa, /* aligned extent found by search */
- xfs_extlen_t *slena, /* aligned extent length */
- int dir) /* 0 = search right, 1 = search left */
+xfs_alloc_walk_iter(
+ struct xfs_alloc_arg *args,
+ struct xfs_alloc_cur *acur,
+ struct xfs_btree_cur *cur,
+ bool increment,
+ bool find_one, /* quit on first candidate */
+ int count, /* rec count (-1 for infinite) */
+ int *stat)
{
- xfs_agblock_t new;
- xfs_agblock_t sdiff;
int error;
int i;
- unsigned busy_gen;
- /* The good extent is perfect, no need to search. */
- if (!gdiff)
- goto out_use_good;
+ *stat = 0;
/*
- * Look until we find a better one, run out of space or run off the end.
+ * Search so long as the cursor is active or we find a better extent.
+ * The cursor is deactivated if it extends beyond the range of the
+ * current allocation candidate.
*/
- do {
- error = xfs_alloc_get_rec(*scur, sbno, slen, &i);
+ while (xfs_alloc_cur_active(cur) && count) {
+ error = xfs_alloc_cur_check(args, acur, cur, &i);
if (error)
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- xfs_alloc_compute_aligned(args, *sbno, *slen,
- sbnoa, slena, &busy_gen);
+ return error;
+ if (i == 1) {
+ *stat = 1;
+ if (find_one)
+ break;
+ }
+ if (!xfs_alloc_cur_active(cur))
+ break;
+
+ if (increment)
+ error = xfs_btree_increment(cur, 0, &i);
+ else
+ error = xfs_btree_decrement(cur, 0, &i);
+ if (error)
+ return error;
+ if (i == 0)
+ cur->bc_private.a.priv.abt.active = false;
+
+ if (count > 0)
+ count--;
+ }
+
+ return 0;
+}
+
+/*
+ * Search the by-bno and by-size btrees in parallel in search of an extent with
+ * ideal locality based on the NEAR mode ->agbno locality hint.
+ */
+STATIC int
+xfs_alloc_ag_vextent_locality(
+ struct xfs_alloc_arg *args,
+ struct xfs_alloc_cur *acur,
+ int *stat)
+{
+ struct xfs_btree_cur *fbcur = NULL;
+ int error;
+ int i;
+ bool fbinc;
+
+ ASSERT(acur->len == 0);
+ ASSERT(args->type == XFS_ALLOCTYPE_NEAR_BNO);
+
+ *stat = 0;
+
+ error = xfs_alloc_lookup_ge(acur->cnt, args->agbno, acur->cur_len, &i);
+ if (error)
+ return error;
+ error = xfs_alloc_lookup_le(acur->bnolt, args->agbno, 0, &i);
+ if (error)
+ return error;
+ error = xfs_alloc_lookup_ge(acur->bnogt, args->agbno, 0, &i);
+ if (error)
+ return error;
+
+ /*
+ * Search the bnobt and cntbt in parallel. Search the bnobt left and
+ * right and lookup the closest extent to the locality hint for each
+ * extent size key in the cntbt. The entire search terminates
+ * immediately on a bnobt hit because that means we've found best case
+ * locality. Otherwise the search continues until the cntbt cursor runs
+ * off the end of the tree. If no allocation candidate is found at this
+ * point, give up on locality, walk backwards from the end of the cntbt
+ * and take the first available extent.
+ *
+ * The parallel tree searches balance each other out to provide fairly
+ * consistent performance for various situations. The bnobt search can
+ * have pathological behavior in the worst case scenario of larger
+ * allocation requests and fragmented free space. On the other hand, the
+ * bnobt is able to satisfy most smaller allocation requests much more
+ * quickly than the cntbt. The cntbt search can sift through fragmented
+ * free space and sets of free extents for larger allocation requests
+ * more quickly than the bnobt. Since the locality hint is just a hint
+ * and we don't want to scan the entire bnobt for perfect locality, the
+ * cntbt search essentially bounds the bnobt search such that we can
+ * find good enough locality at reasonable performance in most cases.
+ */
+ while (xfs_alloc_cur_active(acur->bnolt) ||
+ xfs_alloc_cur_active(acur->bnogt) ||
+ xfs_alloc_cur_active(acur->cnt)) {
+
+ trace_xfs_alloc_cur_lookup(args);
/*
- * The good extent is closer than this one.
+ * Search the bnobt left and right. In the case of a hit, finish
+ * the search in the opposite direction and we're done.
*/
- if (!dir) {
- if (*sbnoa > args->max_agbno)
- goto out_use_good;
- if (*sbnoa >= args->agbno + gdiff)
- goto out_use_good;
- } else {
- if (*sbnoa < args->min_agbno)
- goto out_use_good;
- if (*sbnoa <= args->agbno - gdiff)
- goto out_use_good;
+ error = xfs_alloc_walk_iter(args, acur, acur->bnolt, false,
+ true, 1, &i);
+ if (error)
+ return error;
+ if (i == 1) {
+ trace_xfs_alloc_cur_left(args);
+ fbcur = acur->bnogt;
+ fbinc = true;
+ break;
+ }
+ error = xfs_alloc_walk_iter(args, acur, acur->bnogt, true, true,
+ 1, &i);
+ if (error)
+ return error;
+ if (i == 1) {
+ trace_xfs_alloc_cur_right(args);
+ fbcur = acur->bnolt;
+ fbinc = false;
+ break;
}
/*
- * Same distance, compare length and pick the best.
+ * Check the extent with best locality based on the current
+ * extent size search key and keep track of the best candidate.
*/
- if (*slena >= args->minlen) {
- args->len = XFS_EXTLEN_MIN(*slena, args->maxlen);
- xfs_alloc_fix_len(args);
-
- sdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment,
- args->datatype, *sbnoa,
- *slena, &new);
+ error = xfs_alloc_cntbt_iter(args, acur);
+ if (error)
+ return error;
+ if (!xfs_alloc_cur_active(acur->cnt)) {
+ trace_xfs_alloc_cur_lookup_done(args);
+ break;
+ }
+ }
- /*
- * Choose closer size and invalidate other cursor.
- */
- if (sdiff < gdiff)
- goto out_use_search;
- goto out_use_good;
+ /*
+ * If we failed to find anything due to busy extents, return empty
+ * handed so the caller can flush and retry. If no busy extents were
+ * found, walk backwards from the end of the cntbt as a last resort.
+ */
+ if (!xfs_alloc_cur_active(acur->cnt) && !acur->len && !acur->busy) {
+ error = xfs_btree_decrement(acur->cnt, 0, &i);
+ if (error)
+ return error;
+ if (i) {
+ acur->cnt->bc_private.a.priv.abt.active = true;
+ fbcur = acur->cnt;
+ fbinc = false;
}
+ }
- if (!dir)
- error = xfs_btree_increment(*scur, 0, &i);
- else
- error = xfs_btree_decrement(*scur, 0, &i);
+ /*
+ * Search in the opposite direction for a better entry in the case of
+ * a bnobt hit or walk backwards from the end of the cntbt.
+ */
+ if (fbcur) {
+ error = xfs_alloc_walk_iter(args, acur, fbcur, fbinc, true, -1,
+ &i);
if (error)
- goto error0;
- } while (i);
+ return error;
+ }
-out_use_good:
- xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR);
- *scur = NULL;
- return 0;
+ if (acur->len)
+ *stat = 1;
-out_use_search:
- xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR);
- *gcur = NULL;
return 0;
+}
-error0:
- /* caller invalidates cursors */
- return error;
+/* Check the last block of the cnt btree for allocations. */
+static int
+xfs_alloc_ag_vextent_lastblock(
+ struct xfs_alloc_arg *args,
+ struct xfs_alloc_cur *acur,
+ xfs_agblock_t *bno,
+ xfs_extlen_t *len,
+ bool *allocated)
+{
+ int error;
+ int i;
+
+#ifdef DEBUG
+ /* Randomly don't execute the first algorithm. */
+ if (prandom_u32() & 1)
+ return 0;
+#endif
+
+ /*
+ * Start from the entry that lookup found, sequence through all larger
+ * free blocks. If we're actually pointing at a record smaller than
+ * maxlen, go to the start of this block, and skip all those smaller
+ * than minlen.
+ */
+ if (len || args->alignment > 1) {
+ acur->cnt->bc_ptrs[0] = 1;
+ do {
+ error = xfs_alloc_get_rec(acur->cnt, bno, len, &i);
+ if (error)
+ return error;
+ if (XFS_IS_CORRUPT(args->mp, i != 1))
+ return -EFSCORRUPTED;
+ if (*len >= args->minlen)
+ break;
+ error = xfs_btree_increment(acur->cnt, 0, &i);
+ if (error)
+ return error;
+ } while (i);
+ ASSERT(*len >= args->minlen);
+ if (!i)
+ return 0;
+ }
+
+ error = xfs_alloc_walk_iter(args, acur, acur->cnt, true, false, -1, &i);
+ if (error)
+ return error;
+
+ /*
+ * It didn't work. We COULD be in a case where there's a good record
+ * somewhere, so try again.
+ */
+ if (acur->len == 0)
+ return 0;
+
+ trace_xfs_alloc_near_first(args);
+ *allocated = true;
+ return 0;
}
/*
@@ -1084,41 +1556,17 @@ error0:
* and of the form k * prod + mod unless there's nothing that large.
* Return the starting a.g. block, or NULLAGBLOCK if we can't do it.
*/
-STATIC int /* error */
+STATIC int
xfs_alloc_ag_vextent_near(
- xfs_alloc_arg_t *args) /* allocation argument structure */
+ struct xfs_alloc_arg *args)
{
- xfs_btree_cur_t *bno_cur_gt; /* cursor for bno btree, right side */
- xfs_btree_cur_t *bno_cur_lt; /* cursor for bno btree, left side */
- xfs_btree_cur_t *cnt_cur; /* cursor for count btree */
- xfs_agblock_t gtbno; /* start bno of right side entry */
- xfs_agblock_t gtbnoa; /* aligned ... */
- xfs_extlen_t gtdiff; /* difference to right side entry */
- xfs_extlen_t gtlen; /* length of right side entry */
- xfs_extlen_t gtlena; /* aligned ... */
- xfs_agblock_t gtnew; /* useful start bno of right side */
- int error; /* error code */
- int i; /* result code, temporary */
- int j; /* result code, temporary */
- xfs_agblock_t ltbno; /* start bno of left side entry */
- xfs_agblock_t ltbnoa; /* aligned ... */
- xfs_extlen_t ltdiff; /* difference to left side entry */
- xfs_extlen_t ltlen; /* length of left side entry */
- xfs_extlen_t ltlena; /* aligned ... */
- xfs_agblock_t ltnew; /* useful start bno of left side */
- xfs_extlen_t rlen; /* length of returned extent */
- bool busy;
- unsigned busy_gen;
-#ifdef DEBUG
- /*
- * Randomly don't execute the first algorithm.
- */
- int dofirst; /* set to do first algorithm */
+ struct xfs_alloc_cur acur = {};
+ int error; /* error code */
+ int i; /* result code, temporary */
+ xfs_agblock_t bno;
+ xfs_extlen_t len;
- dofirst = prandom_u32() & 1;
-#endif
-
- /* handle unitialized agbno range so caller doesn't have to */
+ /* handle uninitialized agbno range so caller doesn't have to */
if (!args->min_agbno && !args->max_agbno)
args->max_agbno = args->mp->m_sb.sb_agblocks - 1;
ASSERT(args->min_agbno <= args->max_agbno);
@@ -1130,40 +1578,27 @@ xfs_alloc_ag_vextent_near(
args->agbno = args->max_agbno;
restart:
- bno_cur_lt = NULL;
- bno_cur_gt = NULL;
- ltlen = 0;
- gtlena = 0;
- ltlena = 0;
- busy = false;
+ len = 0;
/*
- * Get a cursor for the by-size btree.
+ * Set up cursors and see if there are any free extents as big as
+ * maxlen. If not, pick the last entry in the tree unless the tree is
+ * empty.
*/
- cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_CNT);
-
- /*
- * See if there are any free extents as big as maxlen.
- */
- if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen, &i)))
- goto error0;
- /*
- * If none, then pick up the last entry in the tree unless the
- * tree is empty.
- */
- if (!i) {
- if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &ltbno,
- &ltlen, &i)))
- goto error0;
- if (i == 0 || ltlen == 0) {
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
+ error = xfs_alloc_cur_setup(args, &acur);
+ if (error == -ENOSPC) {
+ error = xfs_alloc_ag_vextent_small(args, acur.cnt, &bno,
+ &len, &i);
+ if (error)
+ goto out;
+ if (i == 0 || len == 0) {
trace_xfs_alloc_near_noentry(args);
- return 0;
+ goto out;
}
ASSERT(i == 1);
+ } else if (error) {
+ goto out;
}
- args->wasfromfl = 0;
/*
* First algorithm.
@@ -1172,311 +1607,47 @@ restart:
* near the right edge of the tree. If it's in the last btree leaf
* block, then we just examine all the entries in that block
* that are big enough, and pick the best one.
- * This is written as a while loop so we can break out of it,
- * but we never loop back to the top.
*/
- while (xfs_btree_islastblock(cnt_cur, 0)) {
- xfs_extlen_t bdiff;
- int besti=0;
- xfs_extlen_t blen=0;
- xfs_agblock_t bnew=0;
+ if (xfs_btree_islastblock(acur.cnt, 0)) {
+ bool allocated = false;
-#ifdef DEBUG
- if (dofirst)
- break;
-#endif
- /*
- * Start from the entry that lookup found, sequence through
- * all larger free blocks. If we're actually pointing at a
- * record smaller than maxlen, go to the start of this block,
- * and skip all those smaller than minlen.
- */
- if (ltlen || args->alignment > 1) {
- cnt_cur->bc_ptrs[0] = 1;
- do {
- if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno,
- &ltlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- if (ltlen >= args->minlen)
- break;
- if ((error = xfs_btree_increment(cnt_cur, 0, &i)))
- goto error0;
- } while (i);
- ASSERT(ltlen >= args->minlen);
- if (!i)
- break;
- }
- i = cnt_cur->bc_ptrs[0];
- for (j = 1, blen = 0, bdiff = 0;
- !error && j && (blen < args->maxlen || bdiff > 0);
- error = xfs_btree_increment(cnt_cur, 0, &j)) {
- /*
- * For each entry, decide if it's better than
- * the previous best entry.
- */
- if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- busy = xfs_alloc_compute_aligned(args, ltbno, ltlen,
- &ltbnoa, &ltlena, &busy_gen);
- if (ltlena < args->minlen)
- continue;
- if (ltbnoa < args->min_agbno || ltbnoa > args->max_agbno)
- continue;
- args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
- xfs_alloc_fix_len(args);
- ASSERT(args->len >= args->minlen);
- if (args->len < blen)
- continue;
- ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment, args->datatype, ltbnoa,
- ltlena, &ltnew);
- if (ltnew != NULLAGBLOCK &&
- (args->len > blen || ltdiff < bdiff)) {
- bdiff = ltdiff;
- bnew = ltnew;
- blen = args->len;
- besti = cnt_cur->bc_ptrs[0];
- }
- }
- /*
- * It didn't work. We COULD be in a case where
- * there's a good record somewhere, so try again.
- */
- if (blen == 0)
- break;
- /*
- * Point at the best entry, and retrieve it again.
- */
- cnt_cur->bc_ptrs[0] = besti;
- if ((error = xfs_alloc_get_rec(cnt_cur, &ltbno, &ltlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
- args->len = blen;
-
- /*
- * We are allocating starting at bnew for blen blocks.
- */
- args->agbno = bnew;
- ASSERT(bnew >= ltbno);
- ASSERT(bnew + blen <= ltbno + ltlen);
- /*
- * Set up a cursor for the by-bno tree.
- */
- bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp,
- args->agbp, args->agno, XFS_BTNUM_BNO);
- /*
- * Fix up the btree entries.
- */
- if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
- ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
- goto error0;
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
-
- trace_xfs_alloc_near_first(args);
- return 0;
- }
- /*
- * Second algorithm.
- * Search in the by-bno tree to the left and to the right
- * simultaneously, until in each case we find a space big enough,
- * or run into the edge of the tree. When we run into the edge,
- * we deallocate that cursor.
- * If both searches succeed, we compare the two spaces and pick
- * the better one.
- * With alignment, it's possible for both to fail; the upper
- * level algorithm that picks allocation groups for allocations
- * is not supposed to do this.
- */
- /*
- * Allocate and initialize the cursor for the leftward search.
- */
- bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
- args->agno, XFS_BTNUM_BNO);
- /*
- * Lookup <= bno to find the leftward search's starting point.
- */
- if ((error = xfs_alloc_lookup_le(bno_cur_lt, args->agbno, args->maxlen, &i)))
- goto error0;
- if (!i) {
- /*
- * Didn't find anything; use this cursor for the rightward
- * search.
- */
- bno_cur_gt = bno_cur_lt;
- bno_cur_lt = NULL;
- }
- /*
- * Found something. Duplicate the cursor for the rightward search.
- */
- else if ((error = xfs_btree_dup_cursor(bno_cur_lt, &bno_cur_gt)))
- goto error0;
- /*
- * Increment the cursor, so we will point at the entry just right
- * of the leftward entry if any, or to the leftmost entry.
- */
- if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
- goto error0;
- if (!i) {
- /*
- * It failed, there are no rightward entries.
- */
- xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
+ error = xfs_alloc_ag_vextent_lastblock(args, &acur, &bno, &len,
+ &allocated);
+ if (error)
+ goto out;
+ if (allocated)
+ goto alloc_finish;
}
- /*
- * Loop going left with the leftward cursor, right with the
- * rightward cursor, until either both directions give up or
- * we find an entry at least as big as minlen.
- */
- do {
- if (bno_cur_lt) {
- if ((error = xfs_alloc_get_rec(bno_cur_lt, &ltbno, &ltlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- busy |= xfs_alloc_compute_aligned(args, ltbno, ltlen,
- &ltbnoa, &ltlena, &busy_gen);
- if (ltlena >= args->minlen && ltbnoa >= args->min_agbno)
- break;
- if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i)))
- goto error0;
- if (!i || ltbnoa < args->min_agbno) {
- xfs_btree_del_cursor(bno_cur_lt,
- XFS_BTREE_NOERROR);
- bno_cur_lt = NULL;
- }
- }
- if (bno_cur_gt) {
- if ((error = xfs_alloc_get_rec(bno_cur_gt, &gtbno, &gtlen, &i)))
- goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
- busy |= xfs_alloc_compute_aligned(args, gtbno, gtlen,
- &gtbnoa, &gtlena, &busy_gen);
- if (gtlena >= args->minlen && gtbnoa <= args->max_agbno)
- break;
- if ((error = xfs_btree_increment(bno_cur_gt, 0, &i)))
- goto error0;
- if (!i || gtbnoa > args->max_agbno) {
- xfs_btree_del_cursor(bno_cur_gt,
- XFS_BTREE_NOERROR);
- bno_cur_gt = NULL;
- }
- }
- } while (bno_cur_lt || bno_cur_gt);
/*
- * Got both cursors still active, need to find better entry.
+ * Second algorithm. Combined cntbt and bnobt search to find ideal
+ * locality.
*/
- if (bno_cur_lt && bno_cur_gt) {
- if (ltlena >= args->minlen) {
- /*
- * Left side is good, look for a right side entry.
- */
- args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
- xfs_alloc_fix_len(args);
- ltdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment, args->datatype, ltbnoa,
- ltlena, &ltnew);
-
- error = xfs_alloc_find_best_extent(args,
- &bno_cur_lt, &bno_cur_gt,
- ltdiff, &gtbno, &gtlen,
- &gtbnoa, &gtlena,
- 0 /* search right */);
- } else {
- ASSERT(gtlena >= args->minlen);
-
- /*
- * Right side is good, look for a left side entry.
- */
- args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen);
- xfs_alloc_fix_len(args);
- gtdiff = xfs_alloc_compute_diff(args->agbno, args->len,
- args->alignment, args->datatype, gtbnoa,
- gtlena, &gtnew);
-
- error = xfs_alloc_find_best_extent(args,
- &bno_cur_gt, &bno_cur_lt,
- gtdiff, &ltbno, &ltlen,
- &ltbnoa, &ltlena,
- 1 /* search left */);
- }
-
- if (error)
- goto error0;
- }
+ error = xfs_alloc_ag_vextent_locality(args, &acur, &i);
+ if (error)
+ goto out;
/*
* If we couldn't get anything, give up.
*/
- if (bno_cur_lt == NULL && bno_cur_gt == NULL) {
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
-
- if (busy) {
+ if (!acur.len) {
+ if (acur.busy) {
trace_xfs_alloc_near_busy(args);
- xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
+ xfs_extent_busy_flush(args->mp, args->pag,
+ acur.busy_gen);
goto restart;
}
trace_xfs_alloc_size_neither(args);
args->agbno = NULLAGBLOCK;
- return 0;
+ goto out;
}
- /*
- * At this point we have selected a freespace entry, either to the
- * left or to the right. If it's on the right, copy all the
- * useful variables to the "left" set so we only have one
- * copy of this code.
- */
- if (bno_cur_gt) {
- bno_cur_lt = bno_cur_gt;
- bno_cur_gt = NULL;
- ltbno = gtbno;
- ltbnoa = gtbnoa;
- ltlen = gtlen;
- ltlena = gtlena;
- j = 1;
- } else
- j = 0;
+alloc_finish:
+ /* fix up btrees on a successful allocation */
+ error = xfs_alloc_cur_finish(args, &acur);
- /*
- * Fix up the length and compute the useful address.
- */
- args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen);
- xfs_alloc_fix_len(args);
- rlen = args->len;
- (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment,
- args->datatype, ltbnoa, ltlena, &ltnew);
- ASSERT(ltnew >= ltbno);
- ASSERT(ltnew + rlen <= ltbnoa + ltlena);
- ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
- ASSERT(ltnew >= args->min_agbno && ltnew <= args->max_agbno);
- args->agbno = ltnew;
-
- if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
- ltnew, rlen, XFSA_FIXUP_BNO_OK)))
- goto error0;
-
- if (j)
- trace_xfs_alloc_near_greater(args);
- else
- trace_xfs_alloc_near_lesser(args);
-
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
- xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
- return 0;
-
- error0:
- trace_xfs_alloc_near_error(args);
- if (cnt_cur != NULL)
- xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
- if (bno_cur_lt != NULL)
- xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_ERROR);
- if (bno_cur_gt != NULL)
- xfs_btree_del_cursor(bno_cur_gt, XFS_BTREE_ERROR);
+out:
+ xfs_alloc_cur_close(&acur, error);
return error;
}
@@ -1545,7 +1716,10 @@ restart:
error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
busy = xfs_alloc_compute_aligned(args, fbno, flen,
&rbno, &rlen, &busy_gen);
@@ -1579,8 +1753,13 @@ restart:
* This can't happen in the second case above.
*/
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
- XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
- (rlen <= flen && rbno + rlen <= fbno + flen), error0);
+ if (XFS_IS_CORRUPT(args->mp,
+ rlen != 0 &&
+ (rlen > flen ||
+ rbno + rlen > fbno + flen))) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if (rlen < args->maxlen) {
xfs_agblock_t bestfbno;
xfs_extlen_t bestflen;
@@ -1599,15 +1778,22 @@ restart:
if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen,
&i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if (flen < bestrlen)
break;
busy = xfs_alloc_compute_aligned(args, fbno, flen,
&rbno, &rlen, &busy_gen);
rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
- XFS_WANT_CORRUPTED_GOTO(args->mp, rlen == 0 ||
- (rlen <= flen && rbno + rlen <= fbno + flen),
- error0);
+ if (XFS_IS_CORRUPT(args->mp,
+ rlen != 0 &&
+ (rlen > flen ||
+ rbno + rlen > fbno + flen))) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if (rlen > bestrlen) {
bestrlen = rlen;
bestrbno = rbno;
@@ -1620,7 +1806,10 @@ restart:
if ((error = xfs_alloc_lookup_eq(cnt_cur, bestfbno, bestflen,
&i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(args->mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(args->mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
rlen = bestrlen;
rbno = bestrbno;
flen = bestflen;
@@ -1643,7 +1832,10 @@ restart:
xfs_alloc_fix_len(args);
rlen = args->len;
- XFS_WANT_CORRUPTED_GOTO(args->mp, rlen <= flen, error0);
+ if (XFS_IS_CORRUPT(args->mp, rlen > flen)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* Allocate and initialize a cursor for the by-block tree.
*/
@@ -1657,10 +1849,13 @@ restart:
cnt_cur = bno_cur = NULL;
args->len = rlen;
args->agbno = rbno;
- XFS_WANT_CORRUPTED_GOTO(args->mp,
- args->agbno + args->len <=
- be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length),
- error0);
+ if (XFS_IS_CORRUPT(args->mp,
+ args->agbno + args->len >
+ be32_to_cpu(
+ XFS_BUF_TO_AGF(args->agbp)->agf_length))) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
trace_xfs_alloc_size_done(args);
return 0;
@@ -1732,7 +1927,10 @@ xfs_free_ag_extent(
*/
if ((error = xfs_alloc_get_rec(bno_cur, &ltbno, &ltlen, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* It's not contiguous, though.
*/
@@ -1744,8 +1942,10 @@ xfs_free_ag_extent(
* space was invalid, it's (partly) already free.
* Very bad.
*/
- XFS_WANT_CORRUPTED_GOTO(mp,
- ltbno + ltlen <= bno, error0);
+ if (XFS_IS_CORRUPT(mp, ltbno + ltlen > bno)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
}
}
/*
@@ -1760,7 +1960,10 @@ xfs_free_ag_extent(
*/
if ((error = xfs_alloc_get_rec(bno_cur, &gtbno, &gtlen, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* It's not contiguous, though.
*/
@@ -1772,7 +1975,10 @@ xfs_free_ag_extent(
* space was invalid, it's (partly) already free.
* Very bad.
*/
- XFS_WANT_CORRUPTED_GOTO(mp, gtbno >= bno + len, error0);
+ if (XFS_IS_CORRUPT(mp, bno + len > gtbno)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
}
}
/*
@@ -1789,31 +1995,49 @@ xfs_free_ag_extent(
*/
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* Delete the old by-size entry on the right.
*/
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* Delete the old by-block entry for the right block.
*/
if ((error = xfs_btree_delete(bno_cur, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* Move the by-block cursor back to the left neighbor.
*/
if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
#ifdef DEBUG
/*
* Check that this is the right record: delete didn't
@@ -1826,9 +2050,13 @@ xfs_free_ag_extent(
if ((error = xfs_alloc_get_rec(bno_cur, &xxbno, &xxlen,
&i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp,
- i == 1 && xxbno == ltbno && xxlen == ltlen,
- error0);
+ if (XFS_IS_CORRUPT(mp,
+ i != 1 ||
+ xxbno != ltbno ||
+ xxlen != ltlen)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
}
#endif
/*
@@ -1849,17 +2077,26 @@ xfs_free_ag_extent(
*/
if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* Back up the by-block cursor to the left neighbor, and
* update its length.
*/
if ((error = xfs_btree_decrement(bno_cur, 0, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
nbno = ltbno;
nlen = len + ltlen;
if ((error = xfs_alloc_update(bno_cur, nbno, nlen)))
@@ -1875,10 +2112,16 @@ xfs_free_ag_extent(
*/
if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if ((error = xfs_btree_delete(cnt_cur, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* Update the starting block and length of the right
* neighbor in the by-block tree.
@@ -1897,7 +2140,10 @@ xfs_free_ag_extent(
nlen = len;
if ((error = xfs_btree_insert(bno_cur, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
}
xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
bno_cur = NULL;
@@ -1906,10 +2152,16 @@ xfs_free_ag_extent(
*/
if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, error0);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if ((error = xfs_btree_insert(cnt_cur, &i)))
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
cnt_cur = NULL;
@@ -1989,30 +2241,39 @@ xfs_alloc_longest_free_extent(
* reservations and AGFL rules in place, we can return this extent.
*/
if (pag->pagf_longest > delta)
- return pag->pagf_longest - delta;
+ return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
+ pag->pagf_longest - delta);
/* Otherwise, let the caller try for 1 block if there's space. */
return pag->pagf_flcount > 0 || pag->pagf_longest > 0;
}
+/*
+ * Compute the minimum length of the AGFL in the given AG. If @pag is NULL,
+ * return the largest possible minimum length.
+ */
unsigned int
xfs_alloc_min_freelist(
struct xfs_mount *mp,
struct xfs_perag *pag)
{
+ /* AG btrees have at least 1 level. */
+ static const uint8_t fake_levels[XFS_BTNUM_AGF] = {1, 1, 1};
+ const uint8_t *levels = pag ? pag->pagf_levels : fake_levels;
unsigned int min_free;
+ ASSERT(mp->m_ag_maxlevels > 0);
+
/* space needed by-bno freespace btree */
- min_free = min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_BNOi] + 1,
+ min_free = min_t(unsigned int, levels[XFS_BTNUM_BNOi] + 1,
mp->m_ag_maxlevels);
/* space needed by-size freespace btree */
- min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1,
+ min_free += min_t(unsigned int, levels[XFS_BTNUM_CNTi] + 1,
mp->m_ag_maxlevels);
/* space needed reverse mapping used space btree */
if (xfs_sb_version_hasrmapbt(&mp->m_sb))
- min_free += min_t(unsigned int,
- pag->pagf_levels[XFS_BTNUM_RMAPi] + 1,
- mp->m_rmap_maxlevels);
+ min_free += min_t(unsigned int, levels[XFS_BTNUM_RMAPi] + 1,
+ mp->m_rmap_maxlevels);
return min_free;
}
@@ -2086,9 +2347,11 @@ xfs_free_agfl_block(
if (error)
return error;
- bp = xfs_btree_get_bufs(tp->t_mountp, tp, agno, agbno);
- if (!bp)
- return -EFSCORRUPTED;
+ error = xfs_trans_get_buf(tp, tp->t_mountp->m_ddev_targp,
+ XFS_AGB_TO_DADDR(tp->t_mountp, agno, agbno),
+ tp->t_mountp->m_bsize, 0, &bp);
+ if (error)
+ return error;
xfs_trans_binval(tp, bp);
return 0;
@@ -2205,7 +2468,7 @@ xfs_defer_agfl_block(
ASSERT(xfs_bmap_free_item_zone != NULL);
ASSERT(oinfo != NULL);
- new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
+ new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
new->xefi_startblock = XFS_AGB_TO_FSB(mp, agno, agbno);
new->xefi_blockcount = 1;
new->xefi_oinfo = *oinfo;
@@ -2239,12 +2502,11 @@ xfs_alloc_fix_freelist(
if (!pag->pagf_init) {
error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
- if (error)
+ if (error) {
+ /* Couldn't lock the AGF so skip this AG. */
+ if (error == -EAGAIN)
+ error = 0;
goto out_no_agbp;
- if (!pag->pagf_init) {
- ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
- ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
- goto out_agbp_relse;
}
}
@@ -2253,7 +2515,7 @@ xfs_alloc_fix_freelist(
* somewhere else if we are not being asked to try harder at this
* point
*/
- if (pag->pagf_metadata && xfs_alloc_is_userdata(args->datatype) &&
+ if (pag->pagf_metadata && (args->datatype & XFS_ALLOC_USERDATA) &&
(flags & XFS_ALLOC_FLAG_TRYLOCK)) {
ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
goto out_agbp_relse;
@@ -2270,11 +2532,10 @@ xfs_alloc_fix_freelist(
*/
if (!agbp) {
error = xfs_alloc_read_agf(mp, tp, args->agno, flags, &agbp);
- if (error)
- goto out_no_agbp;
- if (!agbp) {
- ASSERT(flags & XFS_ALLOC_FLAG_TRYLOCK);
- ASSERT(!(flags & XFS_ALLOC_FLAG_FREEING));
+ if (error) {
+ /* Couldn't lock the AGF so skip this AG. */
+ if (error == -EAGAIN)
+ error = 0;
goto out_no_agbp;
}
}
@@ -2505,11 +2766,10 @@ xfs_alloc_pagf_init(
xfs_buf_t *bp;
int error;
- if ((error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp)))
- return error;
- if (bp)
+ error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
+ if (!error)
xfs_trans_brelse(tp, bp);
- return 0;
+ return error;
}
/*
@@ -2695,14 +2955,11 @@ xfs_read_agf(
trace_xfs_read_agf(mp, agno);
ASSERT(agno != NULLAGNUMBER);
- error = xfs_trans_read_buf(
- mp, tp, mp->m_ddev_targp,
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops);
if (error)
return error;
- if (!*bpp)
- return 0;
ASSERT(!(*bpp)->b_error);
xfs_buf_set_ref(*bpp, XFS_AGF_REF);
@@ -2726,14 +2983,15 @@ xfs_alloc_read_agf(
trace_xfs_alloc_read_agf(mp, agno);
+ /* We don't support trylock when freeing. */
+ ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
+ (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK));
ASSERT(agno != NULLAGNUMBER);
error = xfs_read_agf(mp, tp, agno,
(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
bpp);
if (error)
return error;
- if (!*bpp)
- return 0;
ASSERT(!(*bpp)->b_error);
agf = XFS_BUF_TO_AGF(*bpp);
@@ -2956,13 +3214,6 @@ xfs_alloc_vextent(
args->len);
#endif
- /* Zero the extent if we were asked to do so */
- if (args->datatype & XFS_ALLOC_USERDATA_ZERO) {
- error = xfs_zero_extent(args->ip, args->fsbno, args->len);
- if (error)
- goto error0;
- }
-
}
xfs_perag_put(args->pag);
return 0;
@@ -3038,12 +3289,18 @@ __xfs_free_extent(
if (error)
return error;
- XFS_WANT_CORRUPTED_GOTO(mp, agbno < mp->m_sb.sb_agblocks, err);
+ if (XFS_IS_CORRUPT(mp, agbno >= mp->m_sb.sb_agblocks)) {
+ error = -EFSCORRUPTED;
+ goto err;
+ }
/* validate the extent size is legal now we have the agf locked */
- XFS_WANT_CORRUPTED_GOTO(mp,
- agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length),
- err);
+ if (XFS_IS_CORRUPT(mp,
+ agbno + len >
+ be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length))) {
+ error = -EFSCORRUPTED;
+ goto err;
+ }
error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, type);
if (error)
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index d6ed5d2c07c2..7380fbe4a3ff 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -54,7 +54,6 @@ typedef struct xfs_alloc_arg {
struct xfs_mount *mp; /* file system mount point */
struct xfs_buf *agbp; /* buffer for a.g. freelist header */
struct xfs_perag *pag; /* per-ag struct for this agno */
- struct xfs_inode *ip; /* for userdata zeroing method */
xfs_fsblock_t fsbno; /* file system block number */
xfs_agnumber_t agno; /* allocation group number */
xfs_agblock_t agbno; /* allocation group-relative block # */
@@ -83,20 +82,7 @@ typedef struct xfs_alloc_arg {
*/
#define XFS_ALLOC_USERDATA (1 << 0)/* allocation is for user data*/
#define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */
-#define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */
-#define XFS_ALLOC_NOBUSY (1 << 3)/* Busy extents not allowed */
-
-static inline bool
-xfs_alloc_is_userdata(int datatype)
-{
- return (datatype & ~XFS_ALLOC_NOBUSY) != 0;
-}
-
-static inline bool
-xfs_alloc_allow_busy_reuse(int datatype)
-{
- return (datatype & XFS_ALLOC_NOBUSY) == 0;
-}
+#define XFS_ALLOC_NOBUSY (1 << 2)/* Busy extents not allowed */
/* freespace limit calculations */
#define XFS_ALLOC_AGFL_RESERVE 4
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 2a94543857a1..279694d73e4e 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -507,6 +507,7 @@ xfs_allocbt_init_cursor(
cur->bc_private.a.agbp = agbp;
cur->bc_private.a.agno = agno;
+ cur->bc_private.a.priv.abt.active = false;
if (xfs_sb_version_hascrc(&mp->m_sb))
cur->bc_flags |= XFS_BTREE_CRC_BLOCKS;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index d48fcf11cc35..e6149720ce02 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -62,6 +62,7 @@ xfs_attr_args_init(
struct xfs_da_args *args,
struct xfs_inode *dp,
const unsigned char *name,
+ size_t namelen,
int flags)
{
@@ -74,7 +75,7 @@ xfs_attr_args_init(
args->dp = dp;
args->flags = flags;
args->name = name;
- args->namelen = strlen((const char *)name);
+ args->namelen = namelen;
if (args->namelen >= MAXNAMELEN)
return -EFAULT; /* match IRIX behaviour */
@@ -97,7 +98,10 @@ xfs_inode_hasattr(
* Overall external interface routines.
*========================================================================*/
-/* Retrieve an extended attribute and its value. Must have ilock. */
+/*
+ * Retrieve an extended attribute and its value. Must have ilock.
+ * Returns 0 on successful retrieval, otherwise an error.
+ */
int
xfs_attr_get_ilocked(
struct xfs_inode *ip,
@@ -115,12 +119,29 @@ xfs_attr_get_ilocked(
return xfs_attr_node_get(args);
}
-/* Retrieve an extended attribute by name, and its value. */
+/*
+ * Retrieve an extended attribute by name, and its value if requested.
+ *
+ * If ATTR_KERNOVAL is set in @flags, then the caller does not want the value,
+ * just an indication whether the attribute exists and the size of the value if
+ * it exists. The size is returned in @valuelenp,
+ *
+ * If the attribute is found, but exceeds the size limit set by the caller in
+ * @valuelenp, return -ERANGE with the size of the attribute that was found in
+ * @valuelenp.
+ *
+ * If ATTR_ALLOC is set in @flags, allocate the buffer for the value after
+ * existence of the attribute has been determined. On success, return that
+ * buffer to the caller and leave them to free it. On failure, free any
+ * allocated buffer and ensure the buffer pointer returned to the caller is
+ * null.
+ */
int
xfs_attr_get(
struct xfs_inode *ip,
const unsigned char *name,
- unsigned char *value,
+ size_t namelen,
+ unsigned char **value,
int *valuelenp,
int flags)
{
@@ -128,26 +149,40 @@ xfs_attr_get(
uint lock_mode;
int error;
+ ASSERT((flags & (ATTR_ALLOC | ATTR_KERNOVAL)) || *value);
+
XFS_STATS_INC(ip->i_mount, xs_attr_get);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- error = xfs_attr_args_init(&args, ip, name, flags);
+ error = xfs_attr_args_init(&args, ip, name, namelen, flags);
if (error)
return error;
- args.value = value;
- args.valuelen = *valuelenp;
/* Entirely possible to look up a name which doesn't exist */
args.op_flags = XFS_DA_OP_OKNOENT;
+ if (flags & ATTR_ALLOC)
+ args.op_flags |= XFS_DA_OP_ALLOCVAL;
+ else
+ args.value = *value;
+ args.valuelen = *valuelenp;
lock_mode = xfs_ilock_attr_map_shared(ip);
error = xfs_attr_get_ilocked(ip, &args);
xfs_iunlock(ip, lock_mode);
-
*valuelenp = args.valuelen;
- return error == -EEXIST ? 0 : error;
+
+ /* on error, we have to clean up allocated value buffers */
+ if (error) {
+ if (flags & ATTR_ALLOC) {
+ kmem_free(args.value);
+ *value = NULL;
+ }
+ return error;
+ }
+ *value = args.value;
+ return 0;
}
/*
@@ -305,6 +340,7 @@ int
xfs_attr_set(
struct xfs_inode *dp,
const unsigned char *name,
+ size_t namelen,
unsigned char *value,
int valuelen,
int flags)
@@ -320,7 +356,7 @@ xfs_attr_set(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
- error = xfs_attr_args_init(&args, dp, name, flags);
+ error = xfs_attr_args_init(&args, dp, name, namelen, flags);
if (error)
return error;
@@ -409,6 +445,7 @@ int
xfs_attr_remove(
struct xfs_inode *dp,
const unsigned char *name,
+ size_t namelen,
int flags)
{
struct xfs_mount *mp = dp->i_mount;
@@ -420,7 +457,7 @@ xfs_attr_remove(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
- error = xfs_attr_args_init(&args, dp, name, flags);
+ error = xfs_attr_args_init(&args, dp, name, namelen, flags);
if (error)
return error;
@@ -556,7 +593,7 @@ xfs_attr_leaf_addname(
*/
dp = args->dp;
args->blkno = 0;
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
if (error)
return error;
@@ -682,7 +719,7 @@ xfs_attr_leaf_addname(
* remove the "old" attr from that block (neat, huh!)
*/
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno,
- -1, &bp);
+ &bp);
if (error)
return error;
@@ -736,7 +773,7 @@ xfs_attr_leaf_removename(
*/
dp = args->dp;
args->blkno = 0;
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
if (error)
return error;
@@ -768,6 +805,8 @@ xfs_attr_leaf_removename(
*
* This leaf block cannot have a "remote" value, we only call this routine
* if bmap_one_block() says there is only one block (ie: no remote blks).
+ *
+ * Returns 0 on successful retrieval, otherwise an error.
*/
STATIC int
xfs_attr_leaf_get(xfs_da_args_t *args)
@@ -778,7 +817,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
trace_xfs_attr_leaf_get(args);
args->blkno = 0;
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
if (error)
return error;
@@ -789,9 +828,6 @@ xfs_attr_leaf_get(xfs_da_args_t *args)
}
error = xfs_attr3_leaf_getvalue(bp, args);
xfs_trans_brelse(args->trans, bp);
- if (!error && (args->rmtblkno > 0) && !(args->flags & ATTR_KERNOVAL)) {
- error = xfs_attr_rmtval_get(args);
- }
return error;
}
@@ -975,7 +1011,7 @@ restart:
* The INCOMPLETE flag means that we will find the "old"
* attr, not the "new" one.
*/
- args->flags |= XFS_ATTR_INCOMPLETE;
+ args->op_flags |= XFS_DA_OP_INCOMPLETE;
state = xfs_da_state_alloc();
state->args = args;
state->mp = mp;
@@ -1141,7 +1177,7 @@ xfs_attr_node_removename(
ASSERT(state->path.blk[0].bp);
state->path.blk[0].bp = NULL;
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, -1, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
if (error)
goto out;
@@ -1234,10 +1270,9 @@ xfs_attr_refillstate(xfs_da_state_t *state)
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->disk_blkno) {
- error = xfs_da3_node_read(state->args->trans,
- state->args->dp,
- blk->blkno, blk->disk_blkno,
- &blk->bp, XFS_ATTR_FORK);
+ error = xfs_da3_node_read_mapped(state->args->trans,
+ state->args->dp, blk->disk_blkno,
+ &blk->bp, XFS_ATTR_FORK);
if (error)
return error;
} else {
@@ -1253,10 +1288,9 @@ xfs_attr_refillstate(xfs_da_state_t *state)
ASSERT((path->active >= 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
for (blk = path->blk, level = 0; level < path->active; blk++, level++) {
if (blk->disk_blkno) {
- error = xfs_da3_node_read(state->args->trans,
- state->args->dp,
- blk->blkno, blk->disk_blkno,
- &blk->bp, XFS_ATTR_FORK);
+ error = xfs_da3_node_read_mapped(state->args->trans,
+ state->args->dp, blk->disk_blkno,
+ &blk->bp, XFS_ATTR_FORK);
if (error)
return error;
} else {
@@ -1268,11 +1302,13 @@ xfs_attr_refillstate(xfs_da_state_t *state)
}
/*
- * Look up a filename in a node attribute list.
+ * Retrieve the attribute data from a node attribute list.
*
* This routine gets called for any attribute fork that has more than one
* block, ie: both true Btree attr lists and for single-leaf-blocks with
* "remote" values taking up more blocks.
+ *
+ * Returns 0 on successful retrieval, otherwise an error.
*/
STATIC int
xfs_attr_node_get(xfs_da_args_t *args)
@@ -1294,24 +1330,21 @@ xfs_attr_node_get(xfs_da_args_t *args)
error = xfs_da3_node_lookup_int(state, &retval);
if (error) {
retval = error;
- } else if (retval == -EEXIST) {
- blk = &state->path.blk[ state->path.active-1 ];
- ASSERT(blk->bp != NULL);
- ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
-
- /*
- * Get the value, local or "remote"
- */
- retval = xfs_attr3_leaf_getvalue(blk->bp, args);
- if (!retval && (args->rmtblkno > 0)
- && !(args->flags & ATTR_KERNOVAL)) {
- retval = xfs_attr_rmtval_get(args);
- }
+ goto out_release;
}
+ if (retval != -EEXIST)
+ goto out_release;
+
+ /*
+ * Get the value, local or "remote"
+ */
+ blk = &state->path.blk[state->path.active - 1];
+ retval = xfs_attr3_leaf_getvalue(blk->bp, args);
/*
* If not in a transaction, we have to release all the buffers.
*/
+out_release:
for (i = 0; i < state->path.active; i++) {
xfs_trans_brelse(args->trans, state->path.blk[i].bp);
state->path.blk[i].bp = NULL;
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index ff28ebf3b635..4243b2272642 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -26,7 +26,7 @@ struct xfs_attr_list_context;
*========================================================================*/
-#define ATTR_DONTFOLLOW 0x0001 /* -- unused, from IRIX -- */
+#define ATTR_DONTFOLLOW 0x0001 /* -- ignored, from IRIX -- */
#define ATTR_ROOT 0x0002 /* use attrs in root (trusted) namespace */
#define ATTR_TRUST 0x0004 /* -- unused, from IRIX -- */
#define ATTR_SECURE 0x0008 /* use attrs in security namespace */
@@ -37,6 +37,10 @@ struct xfs_attr_list_context;
#define ATTR_KERNOVAL 0x2000 /* [kernel] get attr size only, not value */
#define ATTR_INCOMPLETE 0x4000 /* [kernel] return INCOMPLETE attr keys */
+#define ATTR_ALLOC 0x8000 /* [kernel] allocate xattr buffer on demand */
+
+#define ATTR_KERNEL_FLAGS \
+ (ATTR_KERNOTIME | ATTR_KERNOVAL | ATTR_INCOMPLETE | ATTR_ALLOC)
#define XFS_ATTR_FLAGS \
{ ATTR_DONTFOLLOW, "DONTFOLLOW" }, \
@@ -47,7 +51,8 @@ struct xfs_attr_list_context;
{ ATTR_REPLACE, "REPLACE" }, \
{ ATTR_KERNOTIME, "KERNOTIME" }, \
{ ATTR_KERNOVAL, "KERNOVAL" }, \
- { ATTR_INCOMPLETE, "INCOMPLETE" }
+ { ATTR_INCOMPLETE, "INCOMPLETE" }, \
+ { ATTR_ALLOC, "ALLOC" }
/*
* The maximum size (into the kernel or returned from the kernel) of an
@@ -143,11 +148,13 @@ int xfs_attr_list_int(struct xfs_attr_list_context *);
int xfs_inode_hasattr(struct xfs_inode *ip);
int xfs_attr_get_ilocked(struct xfs_inode *ip, struct xfs_da_args *args);
int xfs_attr_get(struct xfs_inode *ip, const unsigned char *name,
- unsigned char *value, int *valuelenp, int flags);
+ size_t namelen, unsigned char **value, int *valuelenp,
+ int flags);
int xfs_attr_set(struct xfs_inode *dp, const unsigned char *name,
- unsigned char *value, int valuelen, int flags);
+ size_t namelen, unsigned char *value, int valuelen, int flags);
int xfs_attr_set_args(struct xfs_da_args *args);
-int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name, int flags);
+int xfs_attr_remove(struct xfs_inode *dp, const unsigned char *name,
+ size_t namelen, int flags);
int xfs_attr_remove_args(struct xfs_da_args *args);
int xfs_attr_list(struct xfs_inode *dp, char *buffer, int bufsize,
int flags, struct attrlist_cursor_kern *cursor);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 70eb941d02e4..fed537a4353d 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -233,6 +233,61 @@ xfs_attr3_leaf_hdr_to_disk(
}
static xfs_failaddr_t
+xfs_attr3_leaf_verify_entry(
+ struct xfs_mount *mp,
+ char *buf_end,
+ struct xfs_attr_leafblock *leaf,
+ struct xfs_attr3_icleaf_hdr *leafhdr,
+ struct xfs_attr_leaf_entry *ent,
+ int idx,
+ __u32 *last_hashval)
+{
+ struct xfs_attr_leaf_name_local *lentry;
+ struct xfs_attr_leaf_name_remote *rentry;
+ char *name_end;
+ unsigned int nameidx;
+ unsigned int namesize;
+ __u32 hashval;
+
+ /* hash order check */
+ hashval = be32_to_cpu(ent->hashval);
+ if (hashval < *last_hashval)
+ return __this_address;
+ *last_hashval = hashval;
+
+ nameidx = be16_to_cpu(ent->nameidx);
+ if (nameidx < leafhdr->firstused || nameidx >= mp->m_attr_geo->blksize)
+ return __this_address;
+
+ /*
+ * Check the name information. The namelen fields are u8 so we can't
+ * possibly exceed the maximum name length of 255 bytes.
+ */
+ if (ent->flags & XFS_ATTR_LOCAL) {
+ lentry = xfs_attr3_leaf_name_local(leaf, idx);
+ namesize = xfs_attr_leaf_entsize_local(lentry->namelen,
+ be16_to_cpu(lentry->valuelen));
+ name_end = (char *)lentry + namesize;
+ if (lentry->namelen == 0)
+ return __this_address;
+ } else {
+ rentry = xfs_attr3_leaf_name_remote(leaf, idx);
+ namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
+ name_end = (char *)rentry + namesize;
+ if (rentry->namelen == 0)
+ return __this_address;
+ if (!(ent->flags & XFS_ATTR_INCOMPLETE) &&
+ rentry->valueblk == 0)
+ return __this_address;
+ }
+
+ if (name_end > buf_end)
+ return __this_address;
+
+ return NULL;
+}
+
+static xfs_failaddr_t
xfs_attr3_leaf_verify(
struct xfs_buf *bp)
{
@@ -240,7 +295,10 @@ xfs_attr3_leaf_verify(
struct xfs_mount *mp = bp->b_mount;
struct xfs_attr_leafblock *leaf = bp->b_addr;
struct xfs_attr_leaf_entry *entries;
+ struct xfs_attr_leaf_entry *ent;
+ char *buf_end;
uint32_t end; /* must be 32bit - see below */
+ __u32 last_hashval = 0;
int i;
xfs_failaddr_t fa;
@@ -273,8 +331,13 @@ xfs_attr3_leaf_verify(
(char *)bp->b_addr + ichdr.firstused)
return __this_address;
- /* XXX: need to range check rest of attr header values */
- /* XXX: hash order check? */
+ buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
+ for (i = 0, ent = entries; i < ichdr.count; ent++, i++) {
+ fa = xfs_attr3_leaf_verify_entry(mp, buf_end, leaf, &ichdr,
+ ent, i, &last_hashval);
+ if (fa)
+ return fa;
+ }
/*
* Quickly check the freemap information. Attribute data has to be
@@ -367,13 +430,12 @@ xfs_attr3_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
- xfs_daddr_t mappedbno,
struct xfs_buf **bpp)
{
int err;
- err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
- XFS_ATTR_FORK, &xfs_attr3_leaf_buf_ops);
+ err = xfs_da_read_buf(tp, dp, bno, 0, bpp, XFS_ATTR_FORK,
+ &xfs_attr3_leaf_buf_ops);
if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_ATTR_LEAF_BUF);
return err;
@@ -393,6 +455,50 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
return XFS_ATTR_NSP_ONDISK(ondisk_flags) == XFS_ATTR_NSP_ARGS_TO_ONDISK(arg_flags);
}
+static int
+xfs_attr_copy_value(
+ struct xfs_da_args *args,
+ unsigned char *value,
+ int valuelen)
+{
+ /*
+ * No copy if all we have to do is get the length
+ */
+ if (args->flags & ATTR_KERNOVAL) {
+ args->valuelen = valuelen;
+ return 0;
+ }
+
+ /*
+ * No copy if the length of the existing buffer is too small
+ */
+ if (args->valuelen < valuelen) {
+ args->valuelen = valuelen;
+ return -ERANGE;
+ }
+
+ if (args->op_flags & XFS_DA_OP_ALLOCVAL) {
+ args->value = kmem_alloc_large(valuelen, 0);
+ if (!args->value)
+ return -ENOMEM;
+ }
+ args->valuelen = valuelen;
+
+ /* remote block xattr requires IO for copy-in */
+ if (args->rmtblkno)
+ return xfs_attr_rmtval_get(args);
+
+ /*
+ * This is to prevent a GCC warning because the remote xattr case
+ * doesn't have a value to pass in. In that case, we never reach here,
+ * but GCC can't work that out and so throws a "passing NULL to
+ * memcpy" warning.
+ */
+ if (!value)
+ return -EINVAL;
+ memcpy(args->value, value, valuelen);
+ return 0;
+}
/*========================================================================
* External routines when attribute fork size < XFS_LITINO(mp).
@@ -409,13 +515,15 @@ xfs_attr_namesp_match(int arg_flags, int ondisk_flags)
* special case for dev/uuid inodes, they have fixed size data forks.
*/
int
-xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
+xfs_attr_shortform_bytesfit(
+ struct xfs_inode *dp,
+ int bytes)
{
- int offset;
- int minforkoff; /* lower limit on valid forkoff locations */
- int maxforkoff; /* upper limit on valid forkoff locations */
- int dsize;
- xfs_mount_t *mp = dp->i_mount;
+ struct xfs_mount *mp = dp->i_mount;
+ int64_t dsize;
+ int minforkoff;
+ int maxforkoff;
+ int offset;
/* rounded down */
offset = (XFS_LITINO(mp, dp->i_d.di_version) - bytes) >> 3;
@@ -481,7 +589,7 @@ xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes)
* A data fork btree root must have space for at least
* MINDBTPTRS key/ptr pairs if the data fork is small or empty.
*/
- minforkoff = max(dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
+ minforkoff = max_t(int64_t, dsize, XFS_BMDR_SPACE_CALC(MINDBTPTRS));
minforkoff = roundup(minforkoff, 8) >> 3;
/* attr fork btree root can have at least this many key/ptr pairs */
@@ -720,15 +828,19 @@ xfs_attr_shortform_lookup(xfs_da_args_t *args)
}
/*
- * Look up a name in a shortform attribute list structure.
+ * Retrieve the attribute value and length.
+ *
+ * If ATTR_KERNOVAL is specified, only the length needs to be returned.
+ * Unlike a lookup, we only return an error if the attribute does not
+ * exist or we can't retrieve the value.
*/
-/*ARGSUSED*/
int
-xfs_attr_shortform_getvalue(xfs_da_args_t *args)
+xfs_attr_shortform_getvalue(
+ struct xfs_da_args *args)
{
- xfs_attr_shortform_t *sf;
- xfs_attr_sf_entry_t *sfe;
- int i;
+ struct xfs_attr_shortform *sf;
+ struct xfs_attr_sf_entry *sfe;
+ int i;
ASSERT(args->dp->i_afp->if_flags == XFS_IFINLINE);
sf = (xfs_attr_shortform_t *)args->dp->i_afp->if_u1.if_data;
@@ -741,18 +853,8 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
continue;
if (!xfs_attr_namesp_match(args->flags, sfe->flags))
continue;
- if (args->flags & ATTR_KERNOVAL) {
- args->valuelen = sfe->valuelen;
- return -EEXIST;
- }
- if (args->valuelen < sfe->valuelen) {
- args->valuelen = sfe->valuelen;
- return -ERANGE;
- }
- args->valuelen = sfe->valuelen;
- memcpy(args->value, &sfe->nameval[args->namelen],
- args->valuelen);
- return -EEXIST;
+ return xfs_attr_copy_value(args, &sfe->nameval[args->namelen],
+ sfe->valuelen);
}
return -ENOATTR;
}
@@ -782,38 +884,23 @@ xfs_attr_shortform_to_leaf(
ifp = dp->i_afp;
sf = (xfs_attr_shortform_t *)ifp->if_u1.if_data;
size = be16_to_cpu(sf->hdr.totsize);
- tmpbuffer = kmem_alloc(size, KM_SLEEP);
+ tmpbuffer = kmem_alloc(size, 0);
ASSERT(tmpbuffer != NULL);
memcpy(tmpbuffer, ifp->if_u1.if_data, size);
sf = (xfs_attr_shortform_t *)tmpbuffer;
xfs_idata_realloc(dp, -size, XFS_ATTR_FORK);
- xfs_bmap_local_to_extents_empty(dp, XFS_ATTR_FORK);
+ xfs_bmap_local_to_extents_empty(args->trans, dp, XFS_ATTR_FORK);
bp = NULL;
error = xfs_da_grow_inode(args, &blkno);
- if (error) {
- /*
- * If we hit an IO error middle of the transaction inside
- * grow_inode(), we may have inconsistent data. Bail out.
- */
- if (error == -EIO)
- goto out;
- xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
- memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
+ if (error)
goto out;
- }
ASSERT(blkno == 0);
error = xfs_attr3_leaf_create(args, blkno, &bp);
- if (error) {
- /* xfs_attr3_leaf_create may not have instantiated a block */
- if (bp && (xfs_da_shrink_inode(args, 0, bp) != 0))
- goto out;
- xfs_idata_realloc(dp, size, XFS_ATTR_FORK); /* try to put */
- memcpy(ifp->if_u1.if_data, tmpbuffer, size); /* it back */
+ if (error)
goto out;
- }
memset((char *)&nargs, 0, sizeof(nargs));
nargs.dp = dp;
@@ -901,7 +988,7 @@ xfs_attr_shortform_verify(
char *endp;
struct xfs_ifork *ifp;
int i;
- int size;
+ int64_t size;
ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL);
ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
@@ -985,7 +1072,7 @@ xfs_attr3_leaf_to_shortform(
trace_xfs_attr_leaf_to_sf(args);
- tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
+ tmpbuffer = kmem_alloc(args->geo->blksize, 0);
if (!tmpbuffer)
return -ENOMEM;
@@ -1057,7 +1144,6 @@ xfs_attr3_leaf_to_node(
struct xfs_attr_leafblock *leaf;
struct xfs_attr3_icleaf_hdr icleafhdr;
struct xfs_attr_leaf_entry *entries;
- struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr icnodehdr;
struct xfs_da_intnode *node;
struct xfs_inode *dp = args->dp;
@@ -1072,11 +1158,11 @@ xfs_attr3_leaf_to_node(
error = xfs_da_grow_inode(args, &blkno);
if (error)
goto out;
- error = xfs_attr3_leaf_read(args->trans, dp, 0, -1, &bp1);
+ error = xfs_attr3_leaf_read(args->trans, dp, 0, &bp1);
if (error)
goto out;
- error = xfs_da_get_buf(args->trans, dp, blkno, -1, &bp2, XFS_ATTR_FORK);
+ error = xfs_da_get_buf(args->trans, dp, blkno, &bp2, XFS_ATTR_FORK);
if (error)
goto out;
@@ -1097,18 +1183,17 @@ xfs_attr3_leaf_to_node(
if (error)
goto out;
node = bp1->b_addr;
- dp->d_ops->node_hdr_from_disk(&icnodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(mp, &icnodehdr, node);
leaf = bp2->b_addr;
xfs_attr3_leaf_hdr_from_disk(args->geo, &icleafhdr, leaf);
entries = xfs_attr3_leaf_entryp(leaf);
/* both on-disk, don't endian-flip twice */
- btree[0].hashval = entries[icleafhdr.count - 1].hashval;
- btree[0].before = cpu_to_be32(blkno);
+ icnodehdr.btree[0].hashval = entries[icleafhdr.count - 1].hashval;
+ icnodehdr.btree[0].before = cpu_to_be32(blkno);
icnodehdr.count = 1;
- dp->d_ops->node_hdr_to_disk(node, &icnodehdr);
+ xfs_da3_node_hdr_to_disk(dp->i_mount, node, &icnodehdr);
xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1);
error = 0;
out:
@@ -1138,7 +1223,7 @@ xfs_attr3_leaf_create(
trace_xfs_attr_leaf_create(args);
- error = xfs_da_get_buf(args->trans, args->dp, blkno, -1, &bp,
+ error = xfs_da_get_buf(args->trans, args->dp, blkno, &bp,
XFS_ATTR_FORK);
if (error)
return error;
@@ -1424,7 +1509,9 @@ xfs_attr3_leaf_add_work(
for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) {
if (ichdr->freemap[i].base == tmp) {
ichdr->freemap[i].base += sizeof(xfs_attr_leaf_entry_t);
- ichdr->freemap[i].size -= sizeof(xfs_attr_leaf_entry_t);
+ ichdr->freemap[i].size -=
+ min_t(uint16_t, ichdr->freemap[i].size,
+ sizeof(xfs_attr_leaf_entry_t));
}
}
ichdr->usedbytes += xfs_attr_leaf_entsize(leaf, args->index);
@@ -1448,7 +1535,7 @@ xfs_attr3_leaf_compact(
trace_xfs_attr_leaf_compact(args);
- tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);
+ tmpbuffer = kmem_alloc(args->geo->blksize, 0);
memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
memset(bp->b_addr, 0, args->geo->blksize);
leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
@@ -1908,7 +1995,7 @@ xfs_attr3_leaf_toosmall(
if (blkno == 0)
continue;
error = xfs_attr3_leaf_read(state->args->trans, state->args->dp,
- blkno, -1, &bp);
+ blkno, &bp);
if (error)
return error;
@@ -2167,7 +2254,7 @@ xfs_attr3_leaf_unbalance(
struct xfs_attr_leafblock *tmp_leaf;
struct xfs_attr3_icleaf_hdr tmphdr;
- tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP);
+ tmp_leaf = kmem_zalloc(state->args->geo->blksize, 0);
/*
* Copy the header into the temp leaf so that all the stuff
@@ -2258,8 +2345,10 @@ xfs_attr3_leaf_lookup_int(
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
entries = xfs_attr3_leaf_entryp(leaf);
- if (ichdr.count >= args->geo->blksize / 8)
+ if (ichdr.count >= args->geo->blksize / 8) {
+ xfs_buf_corruption_error(bp);
return -EFSCORRUPTED;
+ }
/*
* Binary search. (note: small blocks will skip this loop)
@@ -2275,10 +2364,14 @@ xfs_attr3_leaf_lookup_int(
else
break;
}
- if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count)))
+ if (!(probe >= 0 && (!ichdr.count || probe < ichdr.count))) {
+ xfs_buf_corruption_error(bp);
return -EFSCORRUPTED;
- if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval))
+ }
+ if (!(span <= 4 || be32_to_cpu(entry->hashval) == hashval)) {
+ xfs_buf_corruption_error(bp);
return -EFSCORRUPTED;
+ }
/*
* Since we may have duplicate hashval's, find the first matching
@@ -2310,8 +2403,8 @@ xfs_attr3_leaf_lookup_int(
* If we are looking for INCOMPLETE entries, show only those.
* If we are looking for complete entries, show only those.
*/
- if ((args->flags & XFS_ATTR_INCOMPLETE) !=
- (entry->flags & XFS_ATTR_INCOMPLETE)) {
+ if (!!(args->op_flags & XFS_DA_OP_INCOMPLETE) !=
+ !!(entry->flags & XFS_ATTR_INCOMPLETE)) {
continue;
}
if (entry->flags & XFS_ATTR_LOCAL) {
@@ -2350,6 +2443,10 @@ xfs_attr3_leaf_lookup_int(
/*
* Get the value associated with an attribute name from a leaf attribute
* list structure.
+ *
+ * If ATTR_KERNOVAL is specified, only the length needs to be returned.
+ * Unlike a lookup, we only return an error if the attribute does not
+ * exist or we can't retrieve the value.
*/
int
xfs_attr3_leaf_getvalue(
@@ -2361,7 +2458,6 @@ xfs_attr3_leaf_getvalue(
struct xfs_attr_leaf_entry *entry;
struct xfs_attr_leaf_name_local *name_loc;
struct xfs_attr_leaf_name_remote *name_rmt;
- int valuelen;
leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(args->geo, &ichdr, leaf);
@@ -2373,36 +2469,19 @@ xfs_attr3_leaf_getvalue(
name_loc = xfs_attr3_leaf_name_local(leaf, args->index);
ASSERT(name_loc->namelen == args->namelen);
ASSERT(memcmp(args->name, name_loc->nameval, args->namelen) == 0);
- valuelen = be16_to_cpu(name_loc->valuelen);
- if (args->flags & ATTR_KERNOVAL) {
- args->valuelen = valuelen;
- return 0;
- }
- if (args->valuelen < valuelen) {
- args->valuelen = valuelen;
- return -ERANGE;
- }
- args->valuelen = valuelen;
- memcpy(args->value, &name_loc->nameval[args->namelen], valuelen);
- } else {
- name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
- ASSERT(name_rmt->namelen == args->namelen);
- ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
- args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
- args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
- args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
- args->rmtvaluelen);
- if (args->flags & ATTR_KERNOVAL) {
- args->valuelen = args->rmtvaluelen;
- return 0;
- }
- if (args->valuelen < args->rmtvaluelen) {
- args->valuelen = args->rmtvaluelen;
- return -ERANGE;
- }
- args->valuelen = args->rmtvaluelen;
- }
- return 0;
+ return xfs_attr_copy_value(args,
+ &name_loc->nameval[args->namelen],
+ be16_to_cpu(name_loc->valuelen));
+ }
+
+ name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);
+ ASSERT(name_rmt->namelen == args->namelen);
+ ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0);
+ args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);
+ args->rmtblkno = be32_to_cpu(name_rmt->valueblk);
+ args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount,
+ args->rmtvaluelen);
+ return xfs_attr_copy_value(args, NULL, args->rmtvaluelen);
}
/*========================================================================
@@ -2652,7 +2731,7 @@ xfs_attr3_leaf_clearflag(
/*
* Set up the operation.
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
if (error)
return error;
@@ -2719,7 +2798,7 @@ xfs_attr3_leaf_setflag(
/*
* Set up the operation.
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp);
if (error)
return error;
@@ -2781,7 +2860,7 @@ xfs_attr3_leaf_flipflags(
/*
* Read the block containing the "old" attr
*/
- error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, -1, &bp1);
+ error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno, &bp1);
if (error)
return error;
@@ -2790,7 +2869,7 @@ xfs_attr3_leaf_flipflags(
*/
if (args->blkno2 != args->blkno) {
error = xfs_attr3_leaf_read(args->trans, args->dp, args->blkno2,
- -1, &bp2);
+ &bp2);
if (error)
return error;
} else {
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index 7b74e18becff..73615b1dd1a8 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -17,13 +17,27 @@ struct xfs_inode;
struct xfs_trans;
/*
- * Used to keep a list of "remote value" extents when unlinking an inode.
+ * Incore version of the attribute leaf header.
*/
-typedef struct xfs_attr_inactive_list {
- xfs_dablk_t valueblk; /* block number of value bytes */
- int valuelen; /* number of bytes in value */
-} xfs_attr_inactive_list_t;
-
+struct xfs_attr3_icleaf_hdr {
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t usedbytes;
+ /*
+ * Firstused is 32-bit here instead of 16-bit like the on-disk variant
+ * to support maximum fsb size of 64k without overflow issues throughout
+ * the attr code. Instead, the overflow condition is handled on
+ * conversion to/from disk.
+ */
+ uint32_t firstused;
+ __u8 holes;
+ struct {
+ uint16_t base;
+ uint16_t size;
+ } freemap[XFS_ATTR_LEAF_MAPSIZE];
+};
/*========================================================================
* Function prototypes for the kernel.
@@ -67,8 +81,8 @@ int xfs_attr3_leaf_add(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
int xfs_attr3_leaf_remove(struct xfs_buf *leaf_buffer,
struct xfs_da_args *args);
-void xfs_attr3_leaf_list_int(struct xfs_buf *bp,
- struct xfs_attr_list_context *context);
+int xfs_attr3_leaf_list_int(struct xfs_buf *bp,
+ struct xfs_attr_list_context *context);
/*
* Routines used for shrinking the Btree.
@@ -85,8 +99,7 @@ int xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,
struct xfs_buf *leaf2_bp);
int xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);
int xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, xfs_daddr_t mappedbno,
- struct xfs_buf **bpp);
+ xfs_dablk_t bno, struct xfs_buf **bpp);
void xfs_attr3_leaf_hdr_from_disk(struct xfs_da_geometry *geo,
struct xfs_attr3_icleaf_hdr *to,
struct xfs_attr_leafblock *from);
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 4eb30d357045..8b7f74b3bea2 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -19,12 +19,30 @@
#include "xfs_trans.h"
#include "xfs_bmap.h"
#include "xfs_attr.h"
+#include "xfs_attr_remote.h"
#include "xfs_trace.h"
#include "xfs_error.h"
#define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */
/*
+ * Remote Attribute Values
+ * =======================
+ *
+ * Remote extended attribute values are conceptually simple -- they're written
+ * to data blocks mapped by an inode's attribute fork, and they have an upper
+ * size limit of 64k. Setting a value does not involve the XFS log.
+ *
+ * However, on a v5 filesystem, maximally sized remote attr values require one
+ * block more than 64k worth of space to hold both the remote attribute value
+ * header (64 bytes). On a 4k block filesystem this results in a 68k buffer;
+ * on a 64k block filesystem, this would be a 128k buffer. Note that the log
+ * format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k).
+ * Therefore, we /must/ ensure that remote attribute value buffers never touch
+ * the logging system and therefore never have a log item.
+ */
+
+/*
* Each contiguous block has a header, so it is not just a simple attribute
* length to FSB conversion.
*/
@@ -358,6 +376,8 @@ xfs_attr_rmtval_copyin(
/*
* Read the value associated with an attribute from the out-of-line buffer
* that we stored it in.
+ *
+ * Returns 0 on successful retrieval, otherwise an error.
*/
int
xfs_attr_rmtval_get(
@@ -398,17 +418,15 @@ xfs_attr_rmtval_get(
(map[i].br_startblock != HOLESTARTBLOCK));
dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock);
dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount);
- error = xfs_trans_read_buf(mp, args->trans,
- mp->m_ddev_targp,
- dblkno, dblkcnt, 0, &bp,
- &xfs_attr3_rmt_buf_ops);
+ error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt,
+ 0, &bp, &xfs_attr3_rmt_buf_ops);
if (error)
return error;
error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino,
&offset, &valuelen,
&dst);
- xfs_trans_brelse(args->trans, bp);
+ xfs_buf_relse(bp);
if (error)
return error;
@@ -527,9 +545,9 @@ xfs_attr_rmtval_set(
dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
- bp = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt);
- if (!bp)
- return -ENOMEM;
+ error = xfs_buf_get(mp->m_ddev_targp, dblkno, dblkcnt, &bp);
+ if (error)
+ return error;
bp->b_ops = &xfs_attr3_rmt_buf_ops;
xfs_attr_rmtval_copyin(mp, bp, args->dp->i_ino, &offset,
@@ -549,6 +567,33 @@ xfs_attr_rmtval_set(
return 0;
}
+/* Mark stale any incore buffers for the remote value. */
+int
+xfs_attr_rmtval_stale(
+ struct xfs_inode *ip,
+ struct xfs_bmbt_irec *map,
+ xfs_buf_flags_t incore_flags)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buf *bp;
+
+ ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+ if (XFS_IS_CORRUPT(mp, map->br_startblock == DELAYSTARTBLOCK) ||
+ XFS_IS_CORRUPT(mp, map->br_startblock == HOLESTARTBLOCK))
+ return -EFSCORRUPTED;
+
+ bp = xfs_buf_incore(mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, map->br_startblock),
+ XFS_FSB_TO_BB(mp, map->br_blockcount), incore_flags);
+ if (bp) {
+ xfs_buf_stale(bp);
+ xfs_buf_relse(bp);
+ }
+
+ return 0;
+}
+
/*
* Remove the value associated with an attribute by deleting the
* out-of-line buffer that it is stored on.
@@ -557,7 +602,6 @@ int
xfs_attr_rmtval_remove(
struct xfs_da_args *args)
{
- struct xfs_mount *mp = args->dp->i_mount;
xfs_dablk_t lblkno;
int blkcnt;
int error;
@@ -572,9 +616,6 @@ xfs_attr_rmtval_remove(
blkcnt = args->rmtblkcnt;
while (blkcnt > 0) {
struct xfs_bmbt_irec map;
- struct xfs_buf *bp;
- xfs_daddr_t dblkno;
- int dblkcnt;
int nmap;
/*
@@ -585,22 +626,11 @@ xfs_attr_rmtval_remove(
blkcnt, &map, &nmap, XFS_BMAPI_ATTRFORK);
if (error)
return error;
- ASSERT(nmap == 1);
- ASSERT((map.br_startblock != DELAYSTARTBLOCK) &&
- (map.br_startblock != HOLESTARTBLOCK));
-
- dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock),
- dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
-
- /*
- * If the "remote" value is in the cache, remove it.
- */
- bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK);
- if (bp) {
- xfs_buf_stale(bp);
- xfs_buf_relse(bp);
- bp = NULL;
- }
+ if (XFS_IS_CORRUPT(args->dp->i_mount, nmap != 1))
+ return -EFSCORRUPTED;
+ error = xfs_attr_rmtval_stale(args->dp, &map, XBF_TRYLOCK);
+ if (error)
+ return error;
lblkno += map.br_blockcount;
blkcnt -= map.br_blockcount;
diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h
index 9d20b66ad379..6fb4572845ce 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.h
+++ b/fs/xfs/libxfs/xfs_attr_remote.h
@@ -11,5 +11,7 @@ int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);
int xfs_attr_rmtval_get(struct xfs_da_args *args);
int xfs_attr_rmtval_set(struct xfs_da_args *args);
int xfs_attr_rmtval_remove(struct xfs_da_args *args);
+int xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map,
+ xfs_buf_flags_t incore_flags);
#endif /* __XFS_ATTR_REMOTE_H__ */
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 7071ff98fdbc..40ce5f3094d1 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -5,6 +5,7 @@
*/
#include "xfs.h"
#include "xfs_log_format.h"
+#include "xfs_bit.h"
/*
* XFS bit manipulation routines, used in non-realtime code.
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 07aad70f3931..9a6d7a84689a 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -34,6 +34,7 @@
#include "xfs_ag_resv.h"
#include "xfs_refcount.h"
#include "xfs_icache.h"
+#include "xfs_iomap.h"
kmem_zone_t *xfs_bmap_free_item_zone;
@@ -383,8 +384,10 @@ xfs_bmap_check_leaf_extents(
xfs_check_block(block, mp, 0, 0);
pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
bno = be64_to_cpu(*pp);
- XFS_WANT_CORRUPTED_GOTO(mp,
- xfs_verify_fsbno(mp, bno), error0);
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbno(mp, bno))) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if (bp_release) {
bp_release = 0;
xfs_trans_brelse(NULL, bp);
@@ -553,7 +556,7 @@ __xfs_bmap_add_free(
#endif
ASSERT(xfs_bmap_free_item_zone != NULL);
- new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP);
+ new = kmem_zone_alloc(xfs_bmap_free_item_zone, 0);
new->xefi_startblock = bno;
new->xefi_blockcount = (xfs_extlen_t)len;
if (oinfo)
@@ -611,8 +614,8 @@ xfs_bmap_btree_to_extents(
pp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, ifp->if_broot_bytes);
cbno = be64_to_cpu(*pp);
#ifdef DEBUG
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp,
- xfs_btree_check_lptr(cur, cbno, 1));
+ if (XFS_IS_CORRUPT(cur->bc_mp, !xfs_btree_check_lptr(cur, cbno, 1)))
+ return -EFSCORRUPTED;
#endif
error = xfs_btree_read_bufl(mp, tp, cbno, &cbp, XFS_BMAP_BTREE_REF,
&xfs_bmbt_buf_ops);
@@ -727,11 +730,11 @@ xfs_bmap_extents_to_btree(
cur->bc_private.b.allocated++;
ip->i_d.di_nblocks++;
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, 1L);
- abp = xfs_btree_get_bufl(mp, tp, args.fsbno);
- if (!abp) {
- error = -EFSCORRUPTED;
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, args.fsbno),
+ mp->m_bsize, 0, &abp);
+ if (error)
goto out_unreserve_dquot;
- }
/*
* Fill in the child block.
@@ -792,6 +795,7 @@ out_root_realloc:
*/
void
xfs_bmap_local_to_extents_empty(
+ struct xfs_trans *tp,
struct xfs_inode *ip,
int whichfork)
{
@@ -808,6 +812,7 @@ xfs_bmap_local_to_extents_empty(
ifp->if_u1.if_root = NULL;
ifp->if_height = 0;
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
}
@@ -840,7 +845,7 @@ xfs_bmap_local_to_extents(
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
if (!ifp->if_bytes) {
- xfs_bmap_local_to_extents_empty(ip, whichfork);
+ xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
flags = XFS_ILOG_CORE;
goto done;
}
@@ -873,7 +878,11 @@ xfs_bmap_local_to_extents(
ASSERT(args.fsbno != NULLFSBLOCK);
ASSERT(args.len == 1);
tp->t_firstblock = args.fsbno;
- bp = xfs_btree_get_bufl(args.mp, tp, args.fsbno);
+ error = xfs_trans_get_buf(tp, args.mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(args.mp, args.fsbno),
+ args.mp->m_bsize, 0, &bp);
+ if (error)
+ goto done;
/*
* Initialize the block, copy the data and log the remote buffer.
@@ -887,7 +896,7 @@ xfs_bmap_local_to_extents(
/* account for the change in fork size */
xfs_idata_realloc(ip, -ifp->if_bytes, whichfork);
- xfs_bmap_local_to_extents_empty(ip, whichfork);
+ xfs_bmap_local_to_extents_empty(tp, ip, whichfork);
flags |= XFS_ILOG_CORE;
ifp->if_u1.if_root = NULL;
@@ -934,7 +943,10 @@ xfs_bmap_add_attrfork_btree(
if (error)
goto error0;
/* must be at least one entry */
- XFS_WANT_CORRUPTED_GOTO(mp, stat == 1, error0);
+ if (XFS_IS_CORRUPT(mp, stat != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if ((error = xfs_btree_new_iroot(cur, flags, &stat)))
goto error0;
if (stat == 0) {
@@ -1081,7 +1093,7 @@ xfs_bmap_add_attrfork(
goto trans_cancel;
if (XFS_IFORK_Q(ip))
goto trans_cancel;
- if (ip->i_d.di_anextents != 0) {
+ if (XFS_IS_CORRUPT(mp, ip->i_d.di_anextents != 0)) {
error = -EFSCORRUPTED;
goto trans_cancel;
}
@@ -1099,7 +1111,7 @@ xfs_bmap_add_attrfork(
if (error)
goto trans_cancel;
ASSERT(ip->i_afp == NULL);
- ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP);
+ ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, 0);
ip->i_afp->if_flags = XFS_IFEXTENTS;
logflags = 0;
switch (ip->i_d.di_format) {
@@ -1152,6 +1164,65 @@ trans_cancel:
* Internal and external extent tree search functions.
*/
+struct xfs_iread_state {
+ struct xfs_iext_cursor icur;
+ xfs_extnum_t loaded;
+};
+
+/* Stuff every bmbt record from this block into the incore extent map. */
+static int
+xfs_iread_bmbt_block(
+ struct xfs_btree_cur *cur,
+ int level,
+ void *priv)
+{
+ struct xfs_iread_state *ir = priv;
+ struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_inode *ip = cur->bc_private.b.ip;
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+ struct xfs_bmbt_rec *frp;
+ xfs_extnum_t num_recs;
+ xfs_extnum_t j;
+ int whichfork = cur->bc_private.b.whichfork;
+
+ block = xfs_btree_get_block(cur, level, &bp);
+
+ /* Abort if we find more records than nextents. */
+ num_recs = xfs_btree_get_numrecs(block);
+ if (unlikely(ir->loaded + num_recs >
+ XFS_IFORK_NEXTENTS(ip, whichfork))) {
+ xfs_warn(ip->i_mount, "corrupt dinode %llu, (btree extents).",
+ (unsigned long long)ip->i_ino);
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, block,
+ sizeof(*block), __this_address);
+ return -EFSCORRUPTED;
+ }
+
+ /* Copy records into the incore cache. */
+ frp = XFS_BMBT_REC_ADDR(mp, block, 1);
+ for (j = 0; j < num_recs; j++, frp++, ir->loaded++) {
+ struct xfs_bmbt_irec new;
+ xfs_failaddr_t fa;
+
+ xfs_bmbt_disk_get_all(frp, &new);
+ fa = xfs_bmap_validate_extent(ip, whichfork, &new);
+ if (fa) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED,
+ "xfs_iread_extents(2)", frp,
+ sizeof(*frp), fa);
+ return -EFSCORRUPTED;
+ }
+ xfs_iext_insert(ip, &ir->icur, &new,
+ xfs_bmap_fork_to_state(whichfork));
+ trace_xfs_read_extent(ip, &ir->icur,
+ xfs_bmap_fork_to_state(whichfork), _THIS_IP_);
+ xfs_iext_next(XFS_IFORK_PTR(ip, whichfork), &ir->icur);
+ }
+
+ return 0;
+}
+
/*
* Read in extents from a btree-format inode.
*/
@@ -1161,134 +1232,39 @@ xfs_iread_extents(
struct xfs_inode *ip,
int whichfork)
{
- struct xfs_mount *mp = ip->i_mount;
- int state = xfs_bmap_fork_to_state(whichfork);
+ struct xfs_iread_state ir;
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
- xfs_extnum_t nextents = XFS_IFORK_NEXTENTS(ip, whichfork);
- struct xfs_btree_block *block = ifp->if_broot;
- struct xfs_iext_cursor icur;
- struct xfs_bmbt_irec new;
- xfs_fsblock_t bno;
- struct xfs_buf *bp;
- xfs_extnum_t i, j;
- int level;
- __be64 *pp;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_btree_cur *cur;
int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- return -EFSCORRUPTED;
- }
-
- /*
- * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
- */
- level = be16_to_cpu(block->bb_level);
- if (unlikely(level == 0)) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- return -EFSCORRUPTED;
- }
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
- bno = be64_to_cpu(*pp);
-
- /*
- * Go down the tree until leaf level is reached, following the first
- * pointer (leftmost) at each level.
- */
- while (level-- > 0) {
- error = xfs_btree_read_bufl(mp, tp, bno, &bp,
- XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
- if (error)
- goto out;
- block = XFS_BUF_TO_BLOCK(bp);
- if (level == 0)
- break;
- pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
- bno = be64_to_cpu(*pp);
- XFS_WANT_CORRUPTED_GOTO(mp,
- xfs_verify_fsbno(mp, bno), out_brelse);
- xfs_trans_brelse(tp, bp);
+ if (XFS_IS_CORRUPT(mp,
+ XFS_IFORK_FORMAT(ip, whichfork) !=
+ XFS_DINODE_FMT_BTREE)) {
+ error = -EFSCORRUPTED;
+ goto out;
}
- /*
- * Here with bp and block set to the leftmost leaf node in the tree.
- */
- i = 0;
- xfs_iext_first(ifp, &icur);
-
- /*
- * Loop over all leaf nodes. Copy information to the extent records.
- */
- for (;;) {
- xfs_bmbt_rec_t *frp;
- xfs_fsblock_t nextbno;
- xfs_extnum_t num_recs;
-
- num_recs = xfs_btree_get_numrecs(block);
- if (unlikely(i + num_recs > nextents)) {
- xfs_warn(ip->i_mount,
- "corrupt dinode %Lu, (btree extents).",
- (unsigned long long) ip->i_ino);
- xfs_inode_verifier_error(ip, -EFSCORRUPTED,
- __func__, block, sizeof(*block),
- __this_address);
- error = -EFSCORRUPTED;
- goto out_brelse;
- }
- /*
- * Read-ahead the next leaf block, if any.
- */
- nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
- if (nextbno != NULLFSBLOCK)
- xfs_btree_reada_bufl(mp, nextbno, 1,
- &xfs_bmbt_buf_ops);
- /*
- * Copy records into the extent records.
- */
- frp = XFS_BMBT_REC_ADDR(mp, block, 1);
- for (j = 0; j < num_recs; j++, frp++, i++) {
- xfs_failaddr_t fa;
-
- xfs_bmbt_disk_get_all(frp, &new);
- fa = xfs_bmap_validate_extent(ip, whichfork, &new);
- if (fa) {
- error = -EFSCORRUPTED;
- xfs_inode_verifier_error(ip, error,
- "xfs_iread_extents(2)",
- frp, sizeof(*frp), fa);
- goto out_brelse;
- }
- xfs_iext_insert(ip, &icur, &new, state);
- trace_xfs_read_extent(ip, &icur, state, _THIS_IP_);
- xfs_iext_next(ifp, &icur);
- }
- xfs_trans_brelse(tp, bp);
- bno = nextbno;
- /*
- * If we've reached the end, stop.
- */
- if (bno == NULLFSBLOCK)
- break;
- error = xfs_btree_read_bufl(mp, tp, bno, &bp,
- XFS_BMAP_BTREE_REF, &xfs_bmbt_buf_ops);
- if (error)
- goto out;
- block = XFS_BUF_TO_BLOCK(bp);
- }
+ ir.loaded = 0;
+ xfs_iext_first(ifp, &ir.icur);
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+ error = xfs_btree_visit_blocks(cur, xfs_iread_bmbt_block,
+ XFS_BTREE_VISIT_RECORDS, &ir);
+ xfs_btree_del_cursor(cur, error);
+ if (error)
+ goto out;
- if (i != XFS_IFORK_NEXTENTS(ip, whichfork)) {
+ if (XFS_IS_CORRUPT(mp,
+ ir.loaded != XFS_IFORK_NEXTENTS(ip, whichfork))) {
error = -EFSCORRUPTED;
goto out;
}
- ASSERT(i == xfs_iext_count(ifp));
+ ASSERT(ir.loaded == xfs_iext_count(ifp));
ifp->if_flags |= XFS_IFEXTENTS;
return 0;
-
-out_brelse:
- xfs_trans_brelse(tp, bp);
out:
xfs_iext_destroy(ifp);
return error;
@@ -1315,8 +1291,7 @@ xfs_bmap_first_unused(
xfs_fileoff_t lowest, max;
int error;
- ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_BTREE ||
- XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ||
+ ASSERT(xfs_ifork_has_extents(ip, whichfork) ||
XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL) {
@@ -1372,7 +1347,8 @@ xfs_bmap_last_before(
case XFS_DINODE_FMT_EXTENTS:
break;
default:
- return -EIO;
+ ASSERT(0);
+ return -EFSCORRUPTED;
}
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
@@ -1471,9 +1447,8 @@ xfs_bmap_last_offset(
if (XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL)
return 0;
- if (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS)
- return -EIO;
+ if (XFS_IS_CORRUPT(ip->i_mount, !xfs_ifork_has_extents(ip, whichfork)))
+ return -EFSCORRUPTED;
error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, &is_empty);
if (error || is_empty)
@@ -1650,15 +1625,24 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_delete(bma->cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_decrement(bma->cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(bma->cur, &LEFT);
if (error)
goto done;
@@ -1684,7 +1668,10 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(bma->cur, &LEFT);
if (error)
goto done;
@@ -1714,7 +1701,10 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, &RIGHT, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(bma->cur, &PREV);
if (error)
goto done;
@@ -1739,11 +1729,17 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_insert(bma->cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
break;
@@ -1774,7 +1770,10 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(bma->cur, &LEFT);
if (error)
goto done;
@@ -1795,11 +1794,17 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_insert(bma->cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
@@ -1840,7 +1845,10 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(bma->cur, &RIGHT);
if (error)
goto done;
@@ -1872,11 +1880,17 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_insert(bma->cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
@@ -1952,11 +1966,17 @@ xfs_bmap_add_extent_delay_real(
error = xfs_bmbt_lookup_eq(bma->cur, new, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_insert(bma->cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
@@ -1985,11 +2005,8 @@ xfs_bmap_add_extent_delay_real(
}
/* add reverse mapping unless caller opted out */
- if (!(bma->flags & XFS_BMAPI_NORMAP)) {
- error = xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
- if (error)
- goto done;
- }
+ if (!(bma->flags & XFS_BMAPI_NORMAP))
+ xfs_rmap_map_extent(bma->tp, bma->ip, whichfork, new);
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
@@ -2153,19 +2170,34 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_delete(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_delete(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &LEFT);
if (error)
goto done;
@@ -2191,13 +2223,22 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, &PREV, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_delete(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &LEFT);
if (error)
goto done;
@@ -2226,13 +2267,22 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, &RIGHT, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_delete(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_decrement(cur, 0, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
@@ -2255,7 +2305,10 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, new, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
@@ -2285,7 +2338,10 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
@@ -2319,14 +2375,20 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
cur->bc_rec.b = *new;
if ((error = xfs_btree_insert(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
break;
@@ -2353,7 +2415,10 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
@@ -2387,17 +2452,26 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &PREV);
if (error)
goto done;
error = xfs_bmbt_lookup_eq(cur, new, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if ((error = xfs_btree_insert(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
break;
@@ -2431,7 +2505,10 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
/* new right extent - oldext */
error = xfs_bmbt_update(cur, &r[1]);
if (error)
@@ -2440,7 +2517,10 @@ xfs_bmap_add_extent_unwritten_real(
cur->bc_rec.b = PREV;
if ((error = xfs_btree_insert(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
/*
* Reset the cursor to the position of the new extent
* we are about to insert as we can't trust it after
@@ -2449,11 +2529,17 @@ xfs_bmap_add_extent_unwritten_real(
error = xfs_bmbt_lookup_eq(cur, new, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
/* new middle extent - newext */
if ((error = xfs_btree_insert(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
break;
@@ -2471,9 +2557,7 @@ xfs_bmap_add_extent_unwritten_real(
}
/* update reverse mappings */
- error = xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
- if (error)
- goto done;
+ xfs_rmap_convert_extent(mp, tp, ip, whichfork, new);
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(ip, whichfork)) {
@@ -2738,15 +2822,24 @@ xfs_bmap_add_extent_hole_real(
error = xfs_bmbt_lookup_eq(cur, &right, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &left);
if (error)
goto done;
@@ -2772,7 +2865,10 @@ xfs_bmap_add_extent_hole_real(
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &left);
if (error)
goto done;
@@ -2799,7 +2895,10 @@ xfs_bmap_add_extent_hole_real(
error = xfs_bmbt_lookup_eq(cur, &old, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_bmbt_update(cur, &right);
if (error)
goto done;
@@ -2822,21 +2921,24 @@ xfs_bmap_add_extent_hole_real(
error = xfs_bmbt_lookup_eq(cur, new, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
break;
}
/* add reverse mapping unless caller opted out */
- if (!(flags & XFS_BMAPI_NORMAP)) {
- error = xfs_rmap_map_extent(tp, ip, whichfork, new);
- if (error)
- goto done;
- }
+ if (!(flags & XFS_BMAPI_NORMAP))
+ xfs_rmap_map_extent(tp, ip, whichfork, new);
/* convert to a btree if necessary */
if (xfs_bmap_needs_btree(ip, whichfork)) {
@@ -3064,7 +3166,7 @@ xfs_bmap_adjacent(
mp = ap->ip->i_mount;
nullfb = ap->tp->t_firstblock == NULLFSBLOCK;
rt = XFS_IS_REALTIME_INODE(ap->ip) &&
- xfs_alloc_is_userdata(ap->datatype);
+ (ap->datatype & XFS_ALLOC_USERDATA);
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
ap->tp->t_firstblock);
/*
@@ -3209,11 +3311,12 @@ xfs_bmap_longest_free_extent(
pag = xfs_perag_get(mp, ag);
if (!pag->pagf_init) {
error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK);
- if (error)
- goto out;
-
- if (!pag->pagf_init) {
- *notinit = 1;
+ if (error) {
+ /* Couldn't lock the AGF, so skip this AG. */
+ if (error == -EAGAIN) {
+ *notinit = 1;
+ error = 0;
+ }
goto out;
}
}
@@ -3417,7 +3520,7 @@ xfs_bmap_btalloc(
if (ap->flags & XFS_BMAPI_COWFORK)
align = xfs_get_cowextsz_hint(ap->ip);
- else if (xfs_alloc_is_userdata(ap->datatype))
+ else if (ap->datatype & XFS_ALLOC_USERDATA)
align = xfs_get_extsz_hint(ap->ip);
if (align) {
error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
@@ -3432,7 +3535,7 @@ xfs_bmap_btalloc(
fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp,
ap->tp->t_firstblock);
if (nullfb) {
- if (xfs_alloc_is_userdata(ap->datatype) &&
+ if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs_inode_is_filestream(ap->ip)) {
ag = xfs_filestream_lookup_ag(ap->ip);
ag = (ag != NULLAGNUMBER) ? ag : 0;
@@ -3472,7 +3575,7 @@ xfs_bmap_btalloc(
* enough for the request. If one isn't found, then adjust
* the minimum allocation size to the largest space found.
*/
- if (xfs_alloc_is_userdata(ap->datatype) &&
+ if ((ap->datatype & XFS_ALLOC_USERDATA) &&
xfs_inode_is_filestream(ap->ip))
error = xfs_bmap_btalloc_filestreams(ap, &args, &blen);
else
@@ -3506,13 +3609,11 @@ xfs_bmap_btalloc(
args.mod = args.prod - args.mod;
}
/*
- * If we are not low on available data blocks, and the
- * underlying logical volume manager is a stripe, and
- * the file offset is zero then try to allocate data
- * blocks on stripe unit boundary.
- * NOTE: ap->aeof is only set if the allocation length
- * is >= the stripe unit and the allocation offset is
- * at the end of file.
+ * If we are not low on available data blocks, and the underlying
+ * logical volume manager is a stripe, and the file offset is zero then
+ * try to allocate data blocks on stripe unit boundary. NOTE: ap->aeof
+ * is only set if the allocation length is >= the stripe unit and the
+ * allocation offset is at the end of file.
*/
if (!(ap->tp->t_flags & XFS_TRANS_LOWMODE) && ap->aeof) {
if (!ap->offset) {
@@ -3520,9 +3621,11 @@ xfs_bmap_btalloc(
atype = args.type;
isaligned = 1;
/*
- * Adjust for alignment
+ * Adjust minlen to try and preserve alignment if we
+ * can't guarantee an aligned maxlen extent.
*/
- if (blen > args.alignment && blen <= args.maxlen)
+ if (blen > args.alignment &&
+ blen <= args.maxlen + args.alignment)
args.minlen = blen - args.alignment;
args.minalignslop = 0;
} else {
@@ -3560,8 +3663,6 @@ xfs_bmap_btalloc(
args.wasdel = ap->wasdel;
args.resv = XFS_AG_RESV_NONE;
args.datatype = ap->datatype;
- if (ap->datatype & XFS_ALLOC_USERDATA_ZERO)
- args.ip = ap->ip;
error = xfs_alloc_vextent(&args);
if (error)
@@ -3646,20 +3747,6 @@ xfs_bmap_btalloc(
return 0;
}
-/*
- * xfs_bmap_alloc is called by xfs_bmapi to allocate an extent for a file.
- * It figures out where to ask the underlying allocator to put the new extent.
- */
-STATIC int
-xfs_bmap_alloc(
- struct xfs_bmalloca *ap) /* bmap alloc argument struct */
-{
- if (XFS_IS_REALTIME_INODE(ap->ip) &&
- xfs_alloc_is_userdata(ap->datatype))
- return xfs_bmap_rtalloc(ap);
- return xfs_bmap_btalloc(ap);
-}
-
/* Trim extent to fit a logical block range. */
void
xfs_trim_extent(
@@ -3821,11 +3908,8 @@ xfs_bmapi_read(
XFS_BMAPI_COWFORK)));
ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));
- if (unlikely(XFS_TEST_ERROR(
- (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT))) {
- XFS_ERROR_REPORT("xfs_bmapi_read", XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
@@ -4016,6 +4100,39 @@ out_unreserve_quota:
}
static int
+xfs_bmap_alloc_userdata(
+ struct xfs_bmalloca *bma)
+{
+ struct xfs_mount *mp = bma->ip->i_mount;
+ int whichfork = xfs_bmapi_whichfork(bma->flags);
+ int error;
+
+ /*
+ * Set the data type being allocated. For the data fork, the first data
+ * in the file is treated differently to all other allocations. For the
+ * attribute fork, we only need to ensure the allocated range is not on
+ * the busy list.
+ */
+ bma->datatype = XFS_ALLOC_NOBUSY;
+ if (whichfork == XFS_DATA_FORK) {
+ bma->datatype |= XFS_ALLOC_USERDATA;
+ if (bma->offset == 0)
+ bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
+
+ if (mp->m_dalign && bma->length >= mp->m_dalign) {
+ error = xfs_bmap_isaeof(bma, whichfork);
+ if (error)
+ return error;
+ }
+
+ if (XFS_IS_REALTIME_INODE(bma->ip))
+ return xfs_bmap_rtalloc(bma);
+ }
+
+ return xfs_bmap_btalloc(bma);
+}
+
+static int
xfs_bmapi_allocate(
struct xfs_bmalloca *bma)
{
@@ -4034,7 +4151,8 @@ xfs_bmapi_allocate(
if (bma->wasdel) {
bma->length = (xfs_extlen_t)bma->got.br_blockcount;
bma->offset = bma->got.br_startoff;
- xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev);
+ if (!xfs_iext_peek_prev_extent(ifp, &bma->icur, &bma->prev))
+ bma->prev.br_startoff = NULLFILEOFF;
} else {
bma->length = XFS_FILBLKS_MIN(bma->length, MAXEXTLEN);
if (!bma->eof)
@@ -4042,43 +4160,24 @@ xfs_bmapi_allocate(
bma->got.br_startoff - bma->offset);
}
- /*
- * Set the data type being allocated. For the data fork, the first data
- * in the file is treated differently to all other allocations. For the
- * attribute fork, we only need to ensure the allocated range is not on
- * the busy list.
- */
- if (!(bma->flags & XFS_BMAPI_METADATA)) {
- bma->datatype = XFS_ALLOC_NOBUSY;
- if (whichfork == XFS_DATA_FORK) {
- if (bma->offset == 0)
- bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
- else
- bma->datatype |= XFS_ALLOC_USERDATA;
- }
- if (bma->flags & XFS_BMAPI_ZERO)
- bma->datatype |= XFS_ALLOC_USERDATA_ZERO;
- }
+ if (bma->flags & XFS_BMAPI_CONTIG)
+ bma->minlen = bma->length;
+ else
+ bma->minlen = 1;
- bma->minlen = (bma->flags & XFS_BMAPI_CONTIG) ? bma->length : 1;
+ if (bma->flags & XFS_BMAPI_METADATA)
+ error = xfs_bmap_btalloc(bma);
+ else
+ error = xfs_bmap_alloc_userdata(bma);
+ if (error || bma->blkno == NULLFSBLOCK)
+ return error;
- /*
- * Only want to do the alignment at the eof if it is userdata and
- * allocation length is larger than a stripe unit.
- */
- if (mp->m_dalign && bma->length >= mp->m_dalign &&
- !(bma->flags & XFS_BMAPI_METADATA) && whichfork == XFS_DATA_FORK) {
- error = xfs_bmap_isaeof(bma, whichfork);
+ if (bma->flags & XFS_BMAPI_ZERO) {
+ error = xfs_zero_extent(bma->ip, bma->blkno, bma->length);
if (error)
return error;
}
- error = xfs_bmap_alloc(bma);
- if (error)
- return error;
-
- if (bma->blkno == NULLFSBLOCK)
- return 0;
if ((ifp->if_flags & XFS_IFBROOT) && !bma->cur)
bma->cur = xfs_bmbt_init_cursor(mp, bma->tp, bma->ip, whichfork);
/*
@@ -4318,11 +4417,8 @@ xfs_bmapi_write(
ASSERT((flags & (XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO)) !=
(XFS_BMAPI_PREALLOC | XFS_BMAPI_ZERO));
- if (unlikely(XFS_TEST_ERROR(
- (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT))) {
- XFS_ERROR_REPORT("xfs_bmapi_write", XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
@@ -4401,12 +4497,9 @@ xfs_bmapi_write(
* If this is a CoW allocation, record the data in
* the refcount btree for orphan recovery.
*/
- if (whichfork == XFS_COW_FORK) {
- error = xfs_refcount_alloc_cow_extent(tp,
- bma.blkno, bma.length);
- if (error)
- goto error0;
- }
+ if (whichfork == XFS_COW_FORK)
+ xfs_refcount_alloc_cow_extent(tp, bma.blkno,
+ bma.length);
}
/* Deal with the allocated space we found. */
@@ -4465,16 +4558,21 @@ int
xfs_bmapi_convert_delalloc(
struct xfs_inode *ip,
int whichfork,
- xfs_fileoff_t offset_fsb,
- struct xfs_bmbt_irec *imap,
+ xfs_off_t offset,
+ struct iomap *iomap,
unsigned int *seq)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
struct xfs_bmalloca bma = { NULL };
+ uint16_t flags = 0;
struct xfs_trans *tp;
int error;
+ if (whichfork == XFS_COW_FORK)
+ flags |= IOMAP_F_SHARED;
+
/*
* Space for the extent and indirect blocks was reserved when the
* delalloc extent was created so there's no need to do so here.
@@ -4504,7 +4602,7 @@ xfs_bmapi_convert_delalloc(
* the extent. Just return the real extent at this offset.
*/
if (!isnullstartblock(bma.got.br_startblock)) {
- *imap = bma.got;
+ xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
*seq = READ_ONCE(ifp->if_seq);
goto out_trans_cancel;
}
@@ -4514,7 +4612,6 @@ xfs_bmapi_convert_delalloc(
bma.wasdel = true;
bma.offset = bma.got.br_startoff;
bma.length = max_t(xfs_filblks_t, bma.got.br_blockcount, MAXEXTLEN);
- bma.total = XFS_EXTENTADD_SPACE_RES(ip->i_mount, XFS_DATA_FORK);
bma.minleft = xfs_bmapi_minleft(tp, ip, whichfork);
if (whichfork == XFS_COW_FORK)
bma.flags = XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC;
@@ -4530,22 +4627,18 @@ xfs_bmapi_convert_delalloc(
if (WARN_ON_ONCE(bma.blkno == NULLFSBLOCK))
goto out_finish;
error = -EFSCORRUPTED;
- if (WARN_ON_ONCE(!bma.got.br_startblock && !XFS_IS_REALTIME_INODE(ip)))
+ if (WARN_ON_ONCE(!xfs_valid_startblock(ip, bma.got.br_startblock)))
goto out_finish;
XFS_STATS_ADD(mp, xs_xstrat_bytes, XFS_FSB_TO_B(mp, bma.length));
XFS_STATS_INC(mp, xs_xstrat_quick);
ASSERT(!isnullstartblock(bma.got.br_startblock));
- *imap = bma.got;
+ xfs_bmbt_to_iomap(ip, iomap, &bma.got, flags);
*seq = READ_ONCE(ifp->if_seq);
- if (whichfork == XFS_COW_FORK) {
- error = xfs_refcount_alloc_cow_extent(tp, bma.blkno,
- bma.length);
- if (error)
- goto out_finish;
- }
+ if (whichfork == XFS_COW_FORK)
+ xfs_refcount_alloc_cow_extent(tp, bma.blkno, bma.length);
error = xfs_bmap_btree_to_extents(tp, ip, bma.cur, &bma.logflags,
whichfork);
@@ -4591,11 +4684,8 @@ xfs_bmapi_remap(
ASSERT((flags & (XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC)) !=
(XFS_BMAPI_ATTRFORK | XFS_BMAPI_PREALLOC));
- if (unlikely(XFS_TEST_ERROR(
- (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT))) {
- XFS_ERROR_REPORT("xfs_bmapi_remap", XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
@@ -5026,7 +5116,10 @@ xfs_bmap_del_extent_real(
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
}
if (got.br_startoff == del->br_startoff)
@@ -5050,7 +5143,10 @@ xfs_bmap_del_extent_real(
}
if ((error = xfs_btree_delete(cur, &i)))
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
break;
case BMAP_LEFT_FILLING:
/*
@@ -5121,7 +5217,10 @@ xfs_bmap_del_extent_real(
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
/*
* Update the btree record back
* to the original value.
@@ -5138,7 +5237,10 @@ xfs_bmap_del_extent_real(
error = -ENOSPC;
goto done;
}
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
} else
flags |= xfs_ilog_fext(whichfork);
XFS_IFORK_NEXT_SET(ip, whichfork,
@@ -5149,18 +5251,14 @@ xfs_bmap_del_extent_real(
}
/* remove reverse mapping */
- error = xfs_rmap_unmap_extent(tp, ip, whichfork, del);
- if (error)
- goto done;
+ xfs_rmap_unmap_extent(tp, ip, whichfork, del);
/*
* If we need to, add to list of extents to delete.
*/
if (do_fx && !(bflags & XFS_BMAPI_REMAP)) {
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
- error = xfs_refcount_decrease_extent(tp, del);
- if (error)
- goto done;
+ xfs_refcount_decrease_extent(tp, del);
} else {
__xfs_bmap_add_free(tp, del->br_startblock,
del->br_blockcount, NULL,
@@ -5209,7 +5307,7 @@ __xfs_bunmapi(
int isrt; /* freeing in rt area */
int logflags; /* transaction logging flags */
xfs_extlen_t mod; /* rt extent offset */
- struct xfs_mount *mp; /* mount structure */
+ struct xfs_mount *mp = ip->i_mount;
int tmp_logflags; /* partial logging flags */
int wasdel; /* was a delayed alloc extent */
int whichfork; /* data or attribute fork */
@@ -5226,14 +5324,8 @@ __xfs_bunmapi(
whichfork = xfs_bmapi_whichfork(flags);
ASSERT(whichfork != XFS_COW_FORK);
ifp = XFS_IFORK_PTR(ip, whichfork);
- if (unlikely(
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {
- XFS_ERROR_REPORT("xfs_bunmapi", XFS_ERRLEVEL_LOW,
- ip->i_mount);
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)))
return -EFSCORRUPTED;
- }
- mp = ip->i_mount;
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@@ -5317,7 +5409,7 @@ __xfs_bunmapi(
* Make sure we don't touch multiple AGF headers out of order
* in a single transaction, as that could cause AB-BA deadlocks.
*/
- if (!wasdel) {
+ if (!wasdel && !isrt) {
agno = XFS_FSB_TO_AGNO(mp, del.br_startblock);
if (prev_agno != NULLAGNUMBER && prev_agno > agno)
break;
@@ -5393,16 +5485,17 @@ __xfs_bunmapi(
}
div_u64_rem(del.br_startblock, mp->m_sb.sb_rextsize, &mod);
if (mod) {
+ xfs_extlen_t off = mp->m_sb.sb_rextsize - mod;
+
/*
* Realtime extent is lined up at the end but not
* at the front. We'll get rid of full extents if
* we can.
*/
- mod = mp->m_sb.sb_rextsize - mod;
- if (del.br_blockcount > mod) {
- del.br_blockcount -= mod;
- del.br_startoff += mod;
- del.br_startblock += mod;
+ if (del.br_blockcount > off) {
+ del.br_blockcount -= off;
+ del.br_startoff += off;
+ del.br_startblock += off;
} else if (del.br_startoff == start &&
(del.br_state == XFS_EXT_UNWRITTEN ||
tp->t_blk_res == 0)) {
@@ -5420,6 +5513,7 @@ __xfs_bunmapi(
continue;
} else if (del.br_state == XFS_EXT_UNWRITTEN) {
struct xfs_bmbt_irec prev;
+ xfs_fileoff_t unwrite_start;
/*
* This one is already unwritten.
@@ -5433,12 +5527,13 @@ __xfs_bunmapi(
ASSERT(!isnullstartblock(prev.br_startblock));
ASSERT(del.br_startblock ==
prev.br_startblock + prev.br_blockcount);
- if (prev.br_startoff < start) {
- mod = start - prev.br_startoff;
- prev.br_blockcount -= mod;
- prev.br_startblock += mod;
- prev.br_startoff = start;
- }
+ unwrite_start = max3(start,
+ del.br_startoff - mod,
+ prev.br_startoff);
+ mod = unwrite_start - prev.br_startoff;
+ prev.br_startoff = unwrite_start;
+ prev.br_startblock += mod;
+ prev.br_blockcount -= mod;
prev.br_state = XFS_EXT_UNWRITTEN;
error = xfs_bmap_add_extent_unwritten_real(tp,
ip, whichfork, &icur, &cur,
@@ -5627,23 +5722,31 @@ xfs_bmse_merge(
error = xfs_bmbt_lookup_eq(cur, got, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
error = xfs_btree_delete(cur, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
/* lookup and update size of the previous extent */
error = xfs_bmbt_lookup_eq(cur, left, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
error = xfs_bmbt_update(cur, &new);
if (error)
return error;
+ /* change to extent format if required after extent removal */
+ error = xfs_bmap_btree_to_extents(tp, ip, cur, logflags, whichfork);
+ if (error)
+ return error;
+
done:
xfs_iext_remove(ip, icur, 0);
xfs_iext_prev(XFS_IFORK_PTR(ip, whichfork), icur);
@@ -5651,12 +5754,11 @@ done:
&new);
/* update reverse mapping. rmap functions merge the rmaps for us */
- error = xfs_rmap_unmap_extent(tp, ip, whichfork, got);
- if (error)
- return error;
+ xfs_rmap_unmap_extent(tp, ip, whichfork, got);
memcpy(&new, got, sizeof(new));
new.br_startoff = left->br_startoff + left->br_blockcount;
- return xfs_rmap_map_extent(tp, ip, whichfork, &new);
+ xfs_rmap_map_extent(tp, ip, whichfork, &new);
+ return 0;
}
static int
@@ -5682,7 +5784,8 @@ xfs_bmap_shift_update_extent(
error = xfs_bmbt_lookup_eq(cur, &prev, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
+ if (XFS_IS_CORRUPT(mp, i != 1))
+ return -EFSCORRUPTED;
error = xfs_bmbt_update(cur, got);
if (error)
@@ -5695,10 +5798,9 @@ xfs_bmap_shift_update_extent(
got);
/* update reverse mapping */
- error = xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
- if (error)
- return error;
- return xfs_rmap_map_extent(tp, ip, whichfork, got);
+ xfs_rmap_unmap_extent(tp, ip, whichfork, &prev);
+ xfs_rmap_map_extent(tp, ip, whichfork, got);
+ return 0;
}
int
@@ -5719,11 +5821,8 @@ xfs_bmap_collapse_extents(
int error = 0;
int logflags = 0;
- if (unlikely(XFS_TEST_ERROR(
- (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT))) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
@@ -5747,8 +5846,10 @@ xfs_bmap_collapse_extents(
*done = true;
goto del_cursor;
}
- XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
- del_cursor);
+ if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
+ error = -EFSCORRUPTED;
+ goto del_cursor;
+ }
new_startoff = got.br_startoff - offset_shift_fsb;
if (xfs_iext_peek_prev_extent(ifp, &icur, &prev)) {
@@ -5837,11 +5938,8 @@ xfs_bmap_insert_extents(
int error = 0;
int logflags = 0;
- if (unlikely(XFS_TEST_ERROR(
- (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT))) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
@@ -5874,11 +5972,13 @@ xfs_bmap_insert_extents(
goto del_cursor;
}
}
- XFS_WANT_CORRUPTED_GOTO(mp, !isnullstartblock(got.br_startblock),
- del_cursor);
+ if (XFS_IS_CORRUPT(mp, isnullstartblock(got.br_startblock))) {
+ error = -EFSCORRUPTED;
+ goto del_cursor;
+ }
- if (stop_fsb >= got.br_startoff + got.br_blockcount) {
- error = -EIO;
+ if (XFS_IS_CORRUPT(mp, stop_fsb > got.br_startoff)) {
+ error = -EFSCORRUPTED;
goto del_cursor;
}
@@ -5943,12 +6043,8 @@ xfs_bmap_split_extent_at(
int logflags = 0;
int i = 0;
- if (unlikely(XFS_TEST_ERROR(
- (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT))) {
- XFS_ERROR_REPORT("xfs_bmap_split_extent_at",
- XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, whichfork)) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
return -EFSCORRUPTED;
}
@@ -5982,7 +6078,10 @@ xfs_bmap_split_extent_at(
error = xfs_bmbt_lookup_eq(cur, &got, &i);
if (error)
goto del_cursor;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto del_cursor;
+ }
}
got.br_blockcount = gotblkcnt;
@@ -6007,11 +6106,17 @@ xfs_bmap_split_extent_at(
error = xfs_bmbt_lookup_eq(cur, &new, &i);
if (error)
goto del_cursor;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, del_cursor);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto del_cursor;
+ }
error = xfs_btree_insert(cur, &i);
if (error)
goto del_cursor;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, del_cursor);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto del_cursor;
+ }
}
/*
@@ -6094,7 +6199,7 @@ __xfs_bmap_add(
bmap->br_blockcount,
bmap->br_state);
- bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_SLEEP | KM_NOFS);
+ bi = kmem_alloc(sizeof(struct xfs_bmap_intent), KM_NOFS);
INIT_LIST_HEAD(&bi->bi_list);
bi->bi_type = type;
bi->bi_owner = ip;
@@ -6106,29 +6211,29 @@ __xfs_bmap_add(
}
/* Map an extent into a file. */
-int
+void
xfs_bmap_map_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
struct xfs_bmbt_irec *PREV)
{
if (!xfs_bmap_is_update_needed(PREV))
- return 0;
+ return;
- return __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
+ __xfs_bmap_add(tp, XFS_BMAP_MAP, ip, XFS_DATA_FORK, PREV);
}
/* Unmap an extent out of a file. */
-int
+void
xfs_bmap_unmap_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
struct xfs_bmbt_irec *PREV)
{
if (!xfs_bmap_is_update_needed(PREV))
- return 0;
+ return;
- return __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
+ __xfs_bmap_add(tp, XFS_BMAP_UNMAP, ip, XFS_DATA_FORK, PREV);
}
/*
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 8f597f9abdbe..14d25e0b7d9c 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -171,11 +171,19 @@ static inline bool xfs_bmap_is_real_extent(struct xfs_bmbt_irec *irec)
!isnullstartblock(irec->br_startblock);
}
+/*
+ * Check the mapping for obviously garbage allocations that could trash the
+ * filesystem immediately.
+ */
+#define xfs_valid_startblock(ip, startblock) \
+ ((startblock) != 0 || XFS_IS_REALTIME_INODE(ip))
+
void xfs_trim_extent(struct xfs_bmbt_irec *irec, xfs_fileoff_t bno,
xfs_filblks_t len);
int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd);
int xfs_bmap_set_attrforkoff(struct xfs_inode *ip, int size, int *version);
-void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork);
+void xfs_bmap_local_to_extents_empty(struct xfs_trans *tp,
+ struct xfs_inode *ip, int whichfork);
void __xfs_bmap_add_free(struct xfs_trans *tp, xfs_fsblock_t bno,
xfs_filblks_t len, const struct xfs_owner_info *oinfo,
bool skip_discard);
@@ -220,8 +228,7 @@ int xfs_bmapi_reserve_delalloc(struct xfs_inode *ip, int whichfork,
struct xfs_bmbt_irec *got, struct xfs_iext_cursor *cur,
int eof);
int xfs_bmapi_convert_delalloc(struct xfs_inode *ip, int whichfork,
- xfs_fileoff_t offset_fsb, struct xfs_bmbt_irec *imap,
- unsigned int *seq);
+ xfs_off_t offset, struct iomap *iomap, unsigned int *seq);
int xfs_bmap_add_extent_unwritten_real(struct xfs_trans *tp,
struct xfs_inode *ip, int whichfork,
struct xfs_iext_cursor *icur, struct xfs_btree_cur **curp,
@@ -254,9 +261,9 @@ int xfs_bmap_finish_one(struct xfs_trans *tp, struct xfs_inode *ip,
enum xfs_bmap_intent_type type, int whichfork,
xfs_fileoff_t startoff, xfs_fsblock_t startblock,
xfs_filblks_t *blockcount, xfs_exntst_t state);
-int xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+void xfs_bmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
struct xfs_bmbt_irec *imap);
-int xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+void xfs_bmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
struct xfs_bmbt_irec *imap);
static inline int xfs_bmap_fork_to_state(int whichfork)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index fbb18ba5d905..ffe608d2a2d9 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -400,8 +400,20 @@ xfs_bmbt_diff_two_keys(
union xfs_btree_key *k1,
union xfs_btree_key *k2)
{
- return (int64_t)be64_to_cpu(k1->bmbt.br_startoff) -
- be64_to_cpu(k2->bmbt.br_startoff);
+ uint64_t a = be64_to_cpu(k1->bmbt.br_startoff);
+ uint64_t b = be64_to_cpu(k2->bmbt.br_startoff);
+
+ /*
+ * Note: This routine previously casted a and b to int64 and subtracted
+ * them to generate a result. This lead to problems if b was the
+ * "maximum" key value (all ones) being signed incorrectly, hence this
+ * somewhat less efficient version.
+ */
+ if (a > b)
+ return 1;
+ if (b > a)
+ return -1;
+ return 0;
}
static xfs_failaddr_t
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index f1048efa4268..fd300dc93ca4 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -105,11 +105,10 @@ xfs_btree_check_lblock(
xfs_failaddr_t fa;
fa = __xfs_btree_check_lblock(cur, block, level, bp);
- if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
- XFS_ERRTAG_BTREE_CHECK_LBLOCK))) {
+ if (XFS_IS_CORRUPT(mp, fa != NULL) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK)) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
return 0;
@@ -169,11 +168,10 @@ xfs_btree_check_sblock(
xfs_failaddr_t fa;
fa = __xfs_btree_check_sblock(cur, block, level, bp);
- if (unlikely(XFS_TEST_ERROR(fa != NULL, mp,
- XFS_ERRTAG_BTREE_CHECK_SBLOCK))) {
+ if (XFS_IS_CORRUPT(mp, fa != NULL) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK)) {
if (bp)
trace_xfs_btree_corrupt(bp, _RET_IP_);
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
return 0;
@@ -384,7 +382,7 @@ xfs_btree_del_cursor(
/*
* Free the cursor.
*/
- kmem_zone_free(xfs_btree_cur_zone, cur);
+ kmem_cache_free(xfs_btree_cur_zone, cur);
}
/*
@@ -681,61 +679,6 @@ xfs_btree_get_block(
}
/*
- * Get a buffer for the block, return it with no data read.
- * Long-form addressing.
- */
-xfs_buf_t * /* buffer for fsbno */
-xfs_btree_get_bufl(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_fsblock_t fsbno) /* file system block number */
-{
- xfs_daddr_t d; /* real disk block address */
-
- ASSERT(fsbno != NULLFSBLOCK);
- d = XFS_FSB_TO_DADDR(mp, fsbno);
- return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
-}
-
-/*
- * Get a buffer for the block, return it with no data read.
- * Short-form addressing.
- */
-xfs_buf_t * /* buffer for agno/agbno */
-xfs_btree_get_bufs(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno) /* allocation group block number */
-{
- xfs_daddr_t d; /* real disk block address */
-
- ASSERT(agno != NULLAGNUMBER);
- ASSERT(agbno != NULLAGBLOCK);
- d = XFS_AGB_TO_DADDR(mp, agno, agbno);
- return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, 0);
-}
-
-/*
- * Check for the cursor referring to the last block at the given level.
- */
-int /* 1=is last block, 0=not last block */
-xfs_btree_islastblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level) /* level to check */
-{
- struct xfs_btree_block *block; /* generic btree block pointer */
- xfs_buf_t *bp; /* buffer containing block */
-
- block = xfs_btree_get_block(cur, level, &bp);
- xfs_btree_check_block(cur, block, level, bp);
- if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
- return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
- else
- return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
-}
-
-/*
* Change the cursor to point to the first record at the given level.
* Other levels are unaffected.
*/
@@ -1291,11 +1234,10 @@ xfs_btree_get_buf_block(
error = xfs_btree_ptr_to_daddr(cur, ptr, &d);
if (error)
return error;
- *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,
- mp->m_bsize, 0);
-
- if (!*bpp)
- return -ENOMEM;
+ error = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize,
+ 0, bpp);
+ if (error)
+ return error;
(*bpp)->b_ops = cur->bc_ops->buf_ops;
*block = XFS_BUF_TO_BLOCK(*bpp);
@@ -1820,6 +1762,7 @@ xfs_btree_lookup_get_block(
out_bad:
*blkp = NULL;
+ xfs_buf_corruption_error(bp);
xfs_trans_brelse(cur->bc_tp, bp);
return -EFSCORRUPTED;
}
@@ -1867,7 +1810,7 @@ xfs_btree_lookup(
XFS_BTREE_STATS_INC(cur, lookup);
/* No such thing as a zero-level tree. */
- if (cur->bc_nlevels == 0)
+ if (XFS_IS_CORRUPT(cur->bc_mp, cur->bc_nlevels == 0))
return -EFSCORRUPTED;
block = NULL;
@@ -1987,7 +1930,8 @@ xfs_btree_lookup(
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
*stat = 1;
return 0;
}
@@ -2408,8 +2352,6 @@ xfs_btree_lshift(
XFS_BTREE_STATS_ADD(cur, moves, rrecs - 1);
if (level > 0) {
/* It's a nonleaf. operate on keys and ptrs */
- int i; /* loop index */
-
for (i = 0; i < rrecs; i++) {
error = xfs_btree_debug_check_ptr(cur, rpp, i + 1, level);
if (error)
@@ -2442,7 +2384,10 @@ xfs_btree_lshift(
if (error)
goto error0;
i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_decrement(tcur, level, &i);
if (error)
@@ -2609,7 +2554,10 @@ xfs_btree_rshift(
if (error)
goto error0;
i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(tcur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(tcur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_increment(tcur, level, &i);
if (error)
@@ -3463,7 +3411,10 @@ xfs_btree_insert(
goto error0;
}
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
level++;
/*
@@ -3867,15 +3818,24 @@ xfs_btree_delrec(
* Actually any entry but the first would suffice.
*/
i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_increment(tcur, level, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
i = xfs_btree_lastrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/* Grab a pointer to the block. */
right = xfs_btree_get_block(tcur, level, &rbp);
@@ -3919,12 +3879,18 @@ xfs_btree_delrec(
rrecs = xfs_btree_get_numrecs(right);
if (!xfs_btree_ptr_is_null(cur, &lptr)) {
i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_decrement(tcur, level, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
}
}
@@ -3938,13 +3904,19 @@ xfs_btree_delrec(
* previous block.
*/
i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_btree_decrement(tcur, level, &i);
if (error)
goto error0;
i = xfs_btree_firstrec(tcur, level);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/* Grab a pointer to the block. */
left = xfs_btree_get_block(tcur, level, &lbp);
@@ -4286,6 +4258,7 @@ int
xfs_btree_visit_blocks(
struct xfs_btree_cur *cur,
xfs_btree_visit_blocks_fn fn,
+ unsigned int flags,
void *data)
{
union xfs_btree_ptr lptr;
@@ -4311,6 +4284,11 @@ xfs_btree_visit_blocks(
/* save for the next iteration of the loop */
xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
+
+ if (!(flags & XFS_BTREE_VISIT_LEAVES))
+ continue;
+ } else if (!(flags & XFS_BTREE_VISIT_RECORDS)) {
+ continue;
}
/* for each buffer in the level */
@@ -4413,7 +4391,7 @@ xfs_btree_change_owner(
bbcoi.buffer_list = buffer_list;
return xfs_btree_visit_blocks(cur, xfs_btree_block_change_owner,
- &bbcoi);
+ XFS_BTREE_VISIT_ALL, &bbcoi);
}
/* Verify the v5 fields of a long-format btree block. */
@@ -4466,8 +4444,6 @@ xfs_btree_lblock_verify(
* btree block
*
* @bp: buffer containing the btree block
- * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
- * @pag_max_level: pointer to the per-ag max level field
*/
xfs_failaddr_t
xfs_btree_sblock_v5hdr_verify(
@@ -4600,7 +4576,7 @@ xfs_btree_simple_query_range(
/* Callback */
error = fn(cur, recp, priv);
- if (error < 0 || error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error)
break;
advloop:
@@ -4702,8 +4678,7 @@ pop_up:
*/
if (ldiff >= 0 && hdiff >= 0) {
error = fn(cur, recp, priv);
- if (error < 0 ||
- error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error)
break;
} else if (hdiff < 0) {
/* Record is larger than high key; pop. */
@@ -4774,8 +4749,7 @@ out:
* Query a btree for all records overlapping a given interval of keys. The
* supplied function will be called with each record found; return one of the
* XFS_BTREE_QUERY_RANGE_{CONTINUE,ABORT} values or the usual negative error
- * code. This function returns XFS_BTREE_QUERY_RANGE_ABORT, zero, or a
- * negative error code.
+ * code. This function returns -ECANCELED, zero, or a negative error code.
*/
int
xfs_btree_query_range(
@@ -4869,7 +4843,7 @@ xfs_btree_count_blocks(
{
*blocks = 0;
return xfs_btree_visit_blocks(cur, xfs_btree_count_blocks_helper,
- blocks);
+ XFS_BTREE_VISIT_ALL, blocks);
}
/* Compare two btree pointers. */
@@ -4891,7 +4865,7 @@ xfs_btree_has_record_helper(
union xfs_btree_rec *rec,
void *priv)
{
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
}
/* Is there a record covering a given range of keys? */
@@ -4906,7 +4880,7 @@ xfs_btree_has_record(
error = xfs_btree_query_range(cur, low, high,
&xfs_btree_has_record_helper, NULL);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ if (error == -ECANCELED) {
*exists = true;
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index fa3cd8ab9aba..3eff7c321d43 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -183,6 +183,9 @@ union xfs_btree_cur_private {
unsigned long nr_ops; /* # record updates */
int shape_changes; /* # of extent splits */
} refc;
+ struct {
+ bool active; /* allocation cursor state */
+ } abt;
};
/*
@@ -294,35 +297,6 @@ xfs_btree_dup_cursor(
xfs_btree_cur_t **ncur);/* output cursor */
/*
- * Get a buffer for the block, return it with no data read.
- * Long-form addressing.
- */
-struct xfs_buf * /* buffer for fsbno */
-xfs_btree_get_bufl(
- struct xfs_mount *mp, /* file system mount point */
- struct xfs_trans *tp, /* transaction pointer */
- xfs_fsblock_t fsbno); /* file system block number */
-
-/*
- * Get a buffer for the block, return it with no data read.
- * Short-form addressing.
- */
-struct xfs_buf * /* buffer for agno/agbno */
-xfs_btree_get_bufs(
- struct xfs_mount *mp, /* file system mount point */
- struct xfs_trans *tp, /* transaction pointer */
- xfs_agnumber_t agno, /* allocation group number */
- xfs_agblock_t agbno); /* allocation group block number */
-
-/*
- * Check for the cursor referring to the last block at the given level.
- */
-int /* 1=is last block, 0=not last block */
-xfs_btree_islastblock(
- xfs_btree_cur_t *cur, /* btree cursor */
- int level); /* level to check */
-
-/*
* Compute first and last byte offsets for the fields given.
* Interprets the offsets table, which contains struct field offsets.
*/
@@ -464,9 +438,13 @@ xfs_failaddr_t xfs_btree_lblock_verify(struct xfs_buf *bp,
uint xfs_btree_compute_maxlevels(uint *limits, unsigned long len);
unsigned long long xfs_btree_calc_size(uint *limits, unsigned long long len);
-/* return codes */
-#define XFS_BTREE_QUERY_RANGE_CONTINUE (XFS_ITER_CONTINUE) /* keep iterating */
-#define XFS_BTREE_QUERY_RANGE_ABORT (XFS_ITER_ABORT) /* stop iterating */
+/*
+ * Return codes for the query range iterator function are 0 to continue
+ * iterating, and non-zero to stop iterating. Any non-zero value will be
+ * passed up to the _query_range caller. The special value -ECANCELED can be
+ * used to stop iteration, because _query_range never generates that error
+ * code on its own.
+ */
typedef int (*xfs_btree_query_range_fn)(struct xfs_btree_cur *cur,
union xfs_btree_rec *rec, void *priv);
@@ -478,8 +456,15 @@ int xfs_btree_query_all(struct xfs_btree_cur *cur, xfs_btree_query_range_fn fn,
typedef int (*xfs_btree_visit_blocks_fn)(struct xfs_btree_cur *cur, int level,
void *data);
+/* Visit record blocks. */
+#define XFS_BTREE_VISIT_RECORDS (1 << 0)
+/* Visit leaf blocks. */
+#define XFS_BTREE_VISIT_LEAVES (1 << 1)
+/* Visit all blocks. */
+#define XFS_BTREE_VISIT_ALL (XFS_BTREE_VISIT_RECORDS | \
+ XFS_BTREE_VISIT_LEAVES)
int xfs_btree_visit_blocks(struct xfs_btree_cur *cur,
- xfs_btree_visit_blocks_fn fn, void *data);
+ xfs_btree_visit_blocks_fn fn, unsigned int flags, void *data);
int xfs_btree_count_blocks(struct xfs_btree_cur *cur, xfs_extlen_t *blocks);
@@ -510,4 +495,21 @@ int xfs_btree_has_record(struct xfs_btree_cur *cur, union xfs_btree_irec *low,
union xfs_btree_irec *high, bool *exists);
bool xfs_btree_has_more_records(struct xfs_btree_cur *cur);
+/* Does this cursor point to the last block in the given level? */
+static inline bool
+xfs_btree_islastblock(
+ xfs_btree_cur_t *cur,
+ int level)
+{
+ struct xfs_btree_block *block;
+ struct xfs_buf *bp;
+
+ block = xfs_btree_get_block(cur, level, &bp);
+ ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0);
+
+ if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+ return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
+ return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);
+}
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 0bf56e94bfe9..875e04f82541 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -12,9 +12,9 @@
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
#include "xfs_mount.h"
+#include "xfs_inode.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
-#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_bmap.h"
#include "xfs_attr_leaf.h"
@@ -107,7 +107,66 @@ xfs_da_state_free(xfs_da_state_t *state)
#ifdef DEBUG
memset((char *)state, 0, sizeof(*state));
#endif /* DEBUG */
- kmem_zone_free(xfs_da_state_zone, state);
+ kmem_cache_free(xfs_da_state_zone, state);
+}
+
+static inline int xfs_dabuf_nfsb(struct xfs_mount *mp, int whichfork)
+{
+ if (whichfork == XFS_DATA_FORK)
+ return mp->m_dir_geo->fsbcount;
+ return mp->m_attr_geo->fsbcount;
+}
+
+void
+xfs_da3_node_hdr_from_disk(
+ struct xfs_mount *mp,
+ struct xfs_da3_icnode_hdr *to,
+ struct xfs_da_intnode *from)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_da3_intnode *from3 = (struct xfs_da3_intnode *)from;
+
+ to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
+ to->back = be32_to_cpu(from3->hdr.info.hdr.back);
+ to->magic = be16_to_cpu(from3->hdr.info.hdr.magic);
+ to->count = be16_to_cpu(from3->hdr.__count);
+ to->level = be16_to_cpu(from3->hdr.__level);
+ to->btree = from3->__btree;
+ ASSERT(to->magic == XFS_DA3_NODE_MAGIC);
+ } else {
+ to->forw = be32_to_cpu(from->hdr.info.forw);
+ to->back = be32_to_cpu(from->hdr.info.back);
+ to->magic = be16_to_cpu(from->hdr.info.magic);
+ to->count = be16_to_cpu(from->hdr.__count);
+ to->level = be16_to_cpu(from->hdr.__level);
+ to->btree = from->__btree;
+ ASSERT(to->magic == XFS_DA_NODE_MAGIC);
+ }
+}
+
+void
+xfs_da3_node_hdr_to_disk(
+ struct xfs_mount *mp,
+ struct xfs_da_intnode *to,
+ struct xfs_da3_icnode_hdr *from)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_da3_intnode *to3 = (struct xfs_da3_intnode *)to;
+
+ ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
+ to3->hdr.info.hdr.forw = cpu_to_be32(from->forw);
+ to3->hdr.info.hdr.back = cpu_to_be32(from->back);
+ to3->hdr.info.hdr.magic = cpu_to_be16(from->magic);
+ to3->hdr.__count = cpu_to_be16(from->count);
+ to3->hdr.__level = cpu_to_be16(from->level);
+ } else {
+ ASSERT(from->magic == XFS_DA_NODE_MAGIC);
+ to->hdr.info.forw = cpu_to_be32(from->forw);
+ to->hdr.info.back = cpu_to_be32(from->back);
+ to->hdr.info.magic = cpu_to_be16(from->magic);
+ to->hdr.__count = cpu_to_be16(from->count);
+ to->hdr.__level = cpu_to_be16(from->level);
+ }
}
/*
@@ -145,12 +204,9 @@ xfs_da3_node_verify(
struct xfs_mount *mp = bp->b_mount;
struct xfs_da_intnode *hdr = bp->b_addr;
struct xfs_da3_icnode_hdr ichdr;
- const struct xfs_dir_ops *ops;
xfs_failaddr_t fa;
- ops = xfs_dir_get_ops(mp, NULL);
-
- ops->node_hdr_from_disk(&ichdr, hdr);
+ xfs_da3_node_hdr_from_disk(mp, &ichdr, hdr);
fa = xfs_da3_blkinfo_verify(bp, bp->b_addr);
if (fa)
@@ -275,46 +331,76 @@ const struct xfs_buf_ops xfs_da3_node_buf_ops = {
.verify_struct = xfs_da3_node_verify_struct,
};
+static int
+xfs_da3_node_set_type(
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
+{
+ struct xfs_da_blkinfo *info = bp->b_addr;
+
+ switch (be16_to_cpu(info->magic)) {
+ case XFS_DA_NODE_MAGIC:
+ case XFS_DA3_NODE_MAGIC:
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
+ return 0;
+ case XFS_ATTR_LEAF_MAGIC:
+ case XFS_ATTR3_LEAF_MAGIC:
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_ATTR_LEAF_BUF);
+ return 0;
+ case XFS_DIR2_LEAFN_MAGIC:
+ case XFS_DIR3_LEAFN_MAGIC:
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAFN_BUF);
+ return 0;
+ default:
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, tp->t_mountp,
+ info, sizeof(*info));
+ xfs_trans_brelse(tp, bp);
+ return -EFSCORRUPTED;
+ }
+}
+
int
xfs_da3_node_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
+ struct xfs_buf **bpp,
+ int whichfork)
+{
+ int error;
+
+ error = xfs_da_read_buf(tp, dp, bno, 0, bpp, whichfork,
+ &xfs_da3_node_buf_ops);
+ if (error || !*bpp || !tp)
+ return error;
+ return xfs_da3_node_set_type(tp, *bpp);
+}
+
+int
+xfs_da3_node_read_mapped(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
xfs_daddr_t mappedbno,
struct xfs_buf **bpp,
- int which_fork)
+ int whichfork)
{
- int err;
+ struct xfs_mount *mp = dp->i_mount;
+ int error;
- err = xfs_da_read_buf(tp, dp, bno, mappedbno, bpp,
- which_fork, &xfs_da3_node_buf_ops);
- if (!err && tp && *bpp) {
- struct xfs_da_blkinfo *info = (*bpp)->b_addr;
- int type;
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, mappedbno,
+ XFS_FSB_TO_BB(mp, xfs_dabuf_nfsb(mp, whichfork)), 0,
+ bpp, &xfs_da3_node_buf_ops);
+ if (error || !*bpp)
+ return error;
- switch (be16_to_cpu(info->magic)) {
- case XFS_DA_NODE_MAGIC:
- case XFS_DA3_NODE_MAGIC:
- type = XFS_BLFT_DA_NODE_BUF;
- break;
- case XFS_ATTR_LEAF_MAGIC:
- case XFS_ATTR3_LEAF_MAGIC:
- type = XFS_BLFT_ATTR_LEAF_BUF;
- break;
- case XFS_DIR2_LEAFN_MAGIC:
- case XFS_DIR3_LEAFN_MAGIC:
- type = XFS_BLFT_DIR_LEAFN_BUF;
- break;
- default:
- XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW,
- tp->t_mountp, info, sizeof(*info));
- xfs_trans_brelse(tp, *bpp);
- *bpp = NULL;
- return -EFSCORRUPTED;
- }
- xfs_trans_buf_set_type(tp, *bpp, type);
- }
- return err;
+ if (whichfork == XFS_ATTR_FORK)
+ xfs_buf_set_ref(*bpp, XFS_ATTR_BTREE_REF);
+ else
+ xfs_buf_set_ref(*bpp, XFS_DIR_BTREE_REF);
+
+ if (!tp)
+ return 0;
+ return xfs_da3_node_set_type(tp, *bpp);
}
/*========================================================================
@@ -343,7 +429,7 @@ xfs_da3_node_create(
trace_xfs_da_node_create(args);
ASSERT(level <= XFS_DA_NODE_MAXDEPTH);
- error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);
+ error = xfs_da_get_buf(tp, dp, blkno, &bp, whichfork);
if (error)
return error;
bp->b_ops = &xfs_da3_node_buf_ops;
@@ -363,9 +449,9 @@ xfs_da3_node_create(
}
ichdr.level = level;
- dp->d_ops->node_hdr_to_disk(node, &ichdr);
+ xfs_da3_node_hdr_to_disk(dp->i_mount, node, &ichdr);
xfs_trans_log_buf(tp, bp,
- XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+ XFS_DA_LOGRANGE(node, &node->hdr, args->geo->node_hdr_size));
*bpp = bp;
return 0;
@@ -504,6 +590,7 @@ xfs_da3_split(
node = oldblk->bp->b_addr;
if (node->hdr.info.forw) {
if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
+ xfs_buf_corruption_error(oldblk->bp);
error = -EFSCORRUPTED;
goto out;
}
@@ -516,6 +603,7 @@ xfs_da3_split(
node = oldblk->bp->b_addr;
if (node->hdr.info.back) {
if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
+ xfs_buf_corruption_error(oldblk->bp);
error = -EFSCORRUPTED;
goto out;
}
@@ -568,7 +656,7 @@ xfs_da3_root_split(
dp = args->dp;
tp = args->trans;
- error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, args->whichfork);
+ error = xfs_da_get_buf(tp, dp, blkno, &bp, args->whichfork);
if (error)
return error;
node = bp->b_addr;
@@ -577,8 +665,8 @@ xfs_da3_root_split(
oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {
struct xfs_da3_icnode_hdr icnodehdr;
- dp->d_ops->node_hdr_from_disk(&icnodehdr, oldroot);
- btree = dp->d_ops->node_tree_p(oldroot);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &icnodehdr, oldroot);
+ btree = icnodehdr.btree;
size = (int)((char *)&btree[icnodehdr.count] - (char *)oldroot);
level = icnodehdr.level;
@@ -589,15 +677,14 @@ xfs_da3_root_split(
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DA_NODE_BUF);
} else {
struct xfs_dir3_icleaf_hdr leafhdr;
- struct xfs_dir2_leaf_entry *ents;
leaf = (xfs_dir2_leaf_t *)oldroot;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
- size = (int)((char *)&ents[leafhdr.count] - (char *)leaf);
+ size = (int)((char *)&leafhdr.ents[leafhdr.count] -
+ (char *)leaf);
level = 0;
/*
@@ -637,14 +724,14 @@ xfs_da3_root_split(
return error;
node = bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
+ btree = nodehdr.btree;
btree[0].hashval = cpu_to_be32(blk1->hashval);
btree[0].before = cpu_to_be32(blk1->blkno);
btree[1].hashval = cpu_to_be32(blk2->hashval);
btree[1].before = cpu_to_be32(blk2->blkno);
nodehdr.count = 2;
- dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+ xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
#ifdef DEBUG
if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
@@ -686,7 +773,7 @@ xfs_da3_node_split(
trace_xfs_da_node_split(state->args);
node = oldblk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
/*
* With V2 dirs the extra block is data or freespace.
@@ -733,7 +820,7 @@ xfs_da3_node_split(
* If we had double-split op below us, then add the extra block too.
*/
node = oldblk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
if (oldblk->index <= nodehdr.count) {
oldblk->index++;
xfs_da3_node_add(state, oldblk, addblk);
@@ -788,10 +875,10 @@ xfs_da3_node_rebalance(
node1 = blk1->bp->b_addr;
node2 = blk2->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
- dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
- btree1 = dp->d_ops->node_tree_p(node1);
- btree2 = dp->d_ops->node_tree_p(node2);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
+ btree1 = nodehdr1.btree;
+ btree2 = nodehdr2.btree;
/*
* Figure out how many entries need to move, and in which direction.
@@ -804,10 +891,10 @@ xfs_da3_node_rebalance(
tmpnode = node1;
node1 = node2;
node2 = tmpnode;
- dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
- dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
- btree1 = dp->d_ops->node_tree_p(node1);
- btree2 = dp->d_ops->node_tree_p(node2);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
+ btree1 = nodehdr1.btree;
+ btree2 = nodehdr2.btree;
swap = 1;
}
@@ -869,14 +956,15 @@ xfs_da3_node_rebalance(
/*
* Log header of node 1 and all current bits of node 2.
*/
- dp->d_ops->node_hdr_to_disk(node1, &nodehdr1);
+ xfs_da3_node_hdr_to_disk(dp->i_mount, node1, &nodehdr1);
xfs_trans_log_buf(tp, blk1->bp,
- XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size));
+ XFS_DA_LOGRANGE(node1, &node1->hdr,
+ state->args->geo->node_hdr_size));
- dp->d_ops->node_hdr_to_disk(node2, &nodehdr2);
+ xfs_da3_node_hdr_to_disk(dp->i_mount, node2, &nodehdr2);
xfs_trans_log_buf(tp, blk2->bp,
XFS_DA_LOGRANGE(node2, &node2->hdr,
- dp->d_ops->node_hdr_size +
+ state->args->geo->node_hdr_size +
(sizeof(btree2[0]) * nodehdr2.count)));
/*
@@ -886,10 +974,10 @@ xfs_da3_node_rebalance(
if (swap) {
node1 = blk1->bp->b_addr;
node2 = blk2->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr1, node1);
- dp->d_ops->node_hdr_from_disk(&nodehdr2, node2);
- btree1 = dp->d_ops->node_tree_p(node1);
- btree2 = dp->d_ops->node_tree_p(node2);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr1, node1);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr2, node2);
+ btree1 = nodehdr1.btree;
+ btree2 = nodehdr2.btree;
}
blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);
blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval);
@@ -921,8 +1009,8 @@ xfs_da3_node_add(
trace_xfs_da_node_add(state->args);
node = oldblk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
+ btree = nodehdr.btree;
ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);
ASSERT(newblk->blkno != 0);
@@ -945,9 +1033,10 @@ xfs_da3_node_add(
tmp + sizeof(*btree)));
nodehdr.count += 1;
- dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+ xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
xfs_trans_log_buf(state->args->trans, oldblk->bp,
- XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+ XFS_DA_LOGRANGE(node, &node->hdr,
+ state->args->geo->node_hdr_size));
/*
* Copy the last hash value from the oldblk to propagate upwards.
@@ -1082,7 +1171,6 @@ xfs_da3_root_join(
xfs_dablk_t child;
struct xfs_buf *bp;
struct xfs_da3_icnode_hdr oldroothdr;
- struct xfs_da_node_entry *btree;
int error;
struct xfs_inode *dp = state->args->dp;
@@ -1092,7 +1180,7 @@ xfs_da3_root_join(
args = state->args;
oldroot = root_blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &oldroothdr, oldroot);
ASSERT(oldroothdr.forw == 0);
ASSERT(oldroothdr.back == 0);
@@ -1106,11 +1194,9 @@ xfs_da3_root_join(
* Read in the (only) child block, then copy those bytes into
* the root block's buffer and free the original child block.
*/
- btree = dp->d_ops->node_tree_p(oldroot);
- child = be32_to_cpu(btree[0].before);
+ child = be32_to_cpu(oldroothdr.btree[0].before);
ASSERT(child != 0);
- error = xfs_da3_node_read(args->trans, dp, child, -1, &bp,
- args->whichfork);
+ error = xfs_da3_node_read(args->trans, dp, child, &bp, args->whichfork);
if (error)
return error;
xfs_da_blkinfo_onlychild_validate(bp->b_addr, oldroothdr.level);
@@ -1172,7 +1258,7 @@ xfs_da3_node_toosmall(
blk = &state->path.blk[ state->path.active-1 ];
info = blk->bp->b_addr;
node = (xfs_da_intnode_t *)info;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
if (nodehdr.count > (state->args->geo->node_ents >> 1)) {
*action = 0; /* blk over 50%, don't try to join */
return 0; /* blk over 50%, don't try to join */
@@ -1224,13 +1310,13 @@ xfs_da3_node_toosmall(
blkno = nodehdr.back;
if (blkno == 0)
continue;
- error = xfs_da3_node_read(state->args->trans, dp,
- blkno, -1, &bp, state->args->whichfork);
+ error = xfs_da3_node_read(state->args->trans, dp, blkno, &bp,
+ state->args->whichfork);
if (error)
return error;
node = bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&thdr, node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &thdr, node);
xfs_trans_brelse(state->args->trans, bp);
if (count - thdr.count >= 0)
@@ -1272,18 +1358,14 @@ xfs_da3_node_lasthash(
struct xfs_buf *bp,
int *count)
{
- struct xfs_da_intnode *node;
- struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr nodehdr;
- node = bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, bp->b_addr);
if (count)
*count = nodehdr.count;
if (!nodehdr.count)
return 0;
- btree = dp->d_ops->node_tree_p(node);
- return be32_to_cpu(btree[nodehdr.count - 1].hashval);
+ return be32_to_cpu(nodehdr.btree[nodehdr.count - 1].hashval);
}
/*
@@ -1328,8 +1410,8 @@ xfs_da3_fixhashpath(
struct xfs_da3_icnode_hdr nodehdr;
node = blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
+ btree = nodehdr.btree;
if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
break;
blk->hashval = lasthash;
@@ -1360,7 +1442,7 @@ xfs_da3_node_remove(
trace_xfs_da_node_remove(state->args);
node = drop_blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
ASSERT(drop_blk->index < nodehdr.count);
ASSERT(drop_blk->index >= 0);
@@ -1368,7 +1450,7 @@ xfs_da3_node_remove(
* Copy over the offending entry, or just zero it out.
*/
index = drop_blk->index;
- btree = dp->d_ops->node_tree_p(node);
+ btree = nodehdr.btree;
if (index < nodehdr.count - 1) {
tmp = nodehdr.count - index - 1;
tmp *= (uint)sizeof(xfs_da_node_entry_t);
@@ -1381,9 +1463,9 @@ xfs_da3_node_remove(
xfs_trans_log_buf(state->args->trans, drop_blk->bp,
XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));
nodehdr.count -= 1;
- dp->d_ops->node_hdr_to_disk(node, &nodehdr);
+ xfs_da3_node_hdr_to_disk(dp->i_mount, node, &nodehdr);
xfs_trans_log_buf(state->args->trans, drop_blk->bp,
- XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));
+ XFS_DA_LOGRANGE(node, &node->hdr, state->args->geo->node_hdr_size));
/*
* Copy the last hash value from the block to propagate upwards.
@@ -1416,10 +1498,10 @@ xfs_da3_node_unbalance(
drop_node = drop_blk->bp->b_addr;
save_node = save_blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node);
- dp->d_ops->node_hdr_from_disk(&save_hdr, save_node);
- drop_btree = dp->d_ops->node_tree_p(drop_node);
- save_btree = dp->d_ops->node_tree_p(save_node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &drop_hdr, drop_node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &save_hdr, save_node);
+ drop_btree = drop_hdr.btree;
+ save_btree = save_hdr.btree;
tp = state->args->trans;
/*
@@ -1453,10 +1535,10 @@ xfs_da3_node_unbalance(
memcpy(&save_btree[sindex], &drop_btree[0], tmp);
save_hdr.count += drop_hdr.count;
- dp->d_ops->node_hdr_to_disk(save_node, &save_hdr);
+ xfs_da3_node_hdr_to_disk(dp->i_mount, save_node, &save_hdr);
xfs_trans_log_buf(tp, save_blk->bp,
XFS_DA_LOGRANGE(save_node, &save_node->hdr,
- dp->d_ops->node_hdr_size));
+ state->args->geo->node_hdr_size));
/*
* Save the last hashval in the remaining block for upward propagation.
@@ -1517,7 +1599,7 @@ xfs_da3_node_lookup_int(
*/
blk->blkno = blkno;
error = xfs_da3_node_read(args->trans, args->dp, blkno,
- -1, &blk->bp, args->whichfork);
+ &blk->bp, args->whichfork);
if (error) {
blk->blkno = 0;
state->path.active--;
@@ -1541,8 +1623,10 @@ xfs_da3_node_lookup_int(
break;
}
- if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC)
+ if (magic != XFS_DA_NODE_MAGIC && magic != XFS_DA3_NODE_MAGIC) {
+ xfs_buf_corruption_error(blk->bp);
return -EFSCORRUPTED;
+ }
blk->magic = XFS_DA_NODE_MAGIC;
@@ -1550,19 +1634,22 @@ xfs_da3_node_lookup_int(
* Search an intermediate node for a match.
*/
node = blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr, node);
+ btree = nodehdr.btree;
/* Tree taller than we can handle; bail out! */
- if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
+ if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) {
+ xfs_buf_corruption_error(blk->bp);
return -EFSCORRUPTED;
+ }
/* Check the level from the root. */
if (blkno == args->geo->leafblk)
expected_level = nodehdr.level - 1;
- else if (expected_level != nodehdr.level)
+ else if (expected_level != nodehdr.level) {
+ xfs_buf_corruption_error(blk->bp);
return -EFSCORRUPTED;
- else
+ } else
expected_level--;
max = nodehdr.count;
@@ -1612,11 +1699,11 @@ xfs_da3_node_lookup_int(
}
/* We can't point back to the root. */
- if (blkno == args->geo->leafblk)
+ if (XFS_IS_CORRUPT(dp->i_mount, blkno == args->geo->leafblk))
return -EFSCORRUPTED;
}
- if (expected_level != 0)
+ if (XFS_IS_CORRUPT(dp->i_mount, expected_level != 0))
return -EFSCORRUPTED;
/*
@@ -1678,10 +1765,10 @@ xfs_da3_node_order(
node1 = node1_bp->b_addr;
node2 = node2_bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&node1hdr, node1);
- dp->d_ops->node_hdr_from_disk(&node2hdr, node2);
- btree1 = dp->d_ops->node_tree_p(node1);
- btree2 = dp->d_ops->node_tree_p(node2);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &node1hdr, node1);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &node2hdr, node2);
+ btree1 = node1hdr.btree;
+ btree2 = node2hdr.btree;
if (node1hdr.count > 0 && node2hdr.count > 0 &&
((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) ||
@@ -1746,7 +1833,7 @@ xfs_da3_blk_link(
if (old_info->back) {
error = xfs_da3_node_read(args->trans, dp,
be32_to_cpu(old_info->back),
- -1, &bp, args->whichfork);
+ &bp, args->whichfork);
if (error)
return error;
ASSERT(bp != NULL);
@@ -1767,7 +1854,7 @@ xfs_da3_blk_link(
if (old_info->forw) {
error = xfs_da3_node_read(args->trans, dp,
be32_to_cpu(old_info->forw),
- -1, &bp, args->whichfork);
+ &bp, args->whichfork);
if (error)
return error;
ASSERT(bp != NULL);
@@ -1826,7 +1913,7 @@ xfs_da3_blk_unlink(
if (drop_info->back) {
error = xfs_da3_node_read(args->trans, args->dp,
be32_to_cpu(drop_info->back),
- -1, &bp, args->whichfork);
+ &bp, args->whichfork);
if (error)
return error;
ASSERT(bp != NULL);
@@ -1843,7 +1930,7 @@ xfs_da3_blk_unlink(
if (drop_info->forw) {
error = xfs_da3_node_read(args->trans, args->dp,
be32_to_cpu(drop_info->forw),
- -1, &bp, args->whichfork);
+ &bp, args->whichfork);
if (error)
return error;
ASSERT(bp != NULL);
@@ -1878,7 +1965,6 @@ xfs_da3_path_shift(
{
struct xfs_da_state_blk *blk;
struct xfs_da_blkinfo *info;
- struct xfs_da_intnode *node;
struct xfs_da_args *args;
struct xfs_da_node_entry *btree;
struct xfs_da3_icnode_hdr nodehdr;
@@ -1901,17 +1987,16 @@ xfs_da3_path_shift(
ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
level = (path->active-1) - 1; /* skip bottom layer in path */
for (blk = &path->blk[level]; level >= 0; blk--, level--) {
- node = blk->bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
+ blk->bp->b_addr);
if (forward && (blk->index < nodehdr.count - 1)) {
blk->index++;
- blkno = be32_to_cpu(btree[blk->index].before);
+ blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
break;
} else if (!forward && (blk->index > 0)) {
blk->index--;
- blkno = be32_to_cpu(btree[blk->index].before);
+ blkno = be32_to_cpu(nodehdr.btree[blk->index].before);
break;
}
}
@@ -1929,7 +2014,7 @@ xfs_da3_path_shift(
/*
* Read the next child block into a local buffer.
*/
- error = xfs_da3_node_read(args->trans, dp, blkno, -1, &bp,
+ error = xfs_da3_node_read(args->trans, dp, blkno, &bp,
args->whichfork);
if (error)
return error;
@@ -1962,9 +2047,9 @@ xfs_da3_path_shift(
case XFS_DA_NODE_MAGIC:
case XFS_DA3_NODE_MAGIC:
blk->magic = XFS_DA_NODE_MAGIC;
- node = (xfs_da_intnode_t *)info;
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = dp->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &nodehdr,
+ bp->b_addr);
+ btree = nodehdr.btree;
blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);
if (forward)
blk->index = 0;
@@ -2044,18 +2129,6 @@ xfs_da_compname(
XFS_CMP_EXACT : XFS_CMP_DIFFERENT;
}
-static xfs_dahash_t
-xfs_default_hashname(
- struct xfs_name *name)
-{
- return xfs_da_hashname(name->name, name->len);
-}
-
-const struct xfs_nameops xfs_default_nameops = {
- .hashname = xfs_default_hashname,
- .compname = xfs_da_compname
-};
-
int
xfs_da_grow_inode_int(
struct xfs_da_args *args,
@@ -2098,7 +2171,7 @@ xfs_da_grow_inode_int(
* If we didn't get it and the block might work if fragmented,
* try without the CONTIG flag. Loop until we get it all.
*/
- mapp = kmem_alloc(sizeof(*mapp) * count, KM_SLEEP);
+ mapp = kmem_alloc(sizeof(*mapp) * count, 0);
for (b = *bno, mapi = 0; b < *bno + count; ) {
nmap = min(XFS_BMAP_MAX_NMAP, count);
c = (int)(*bno + count - b);
@@ -2213,16 +2286,13 @@ xfs_da3_swap_lastblock(
error = xfs_bmap_last_before(tp, dp, &lastoff, w);
if (error)
return error;
- if (unlikely(lastoff == 0)) {
- XFS_ERROR_REPORT("xfs_da_swap_lastblock(1)", XFS_ERRLEVEL_LOW,
- mp);
+ if (XFS_IS_CORRUPT(mp, lastoff == 0))
return -EFSCORRUPTED;
- }
/*
* Read the last block in the btree space.
*/
last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount;
- error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);
+ error = xfs_da3_node_read(tp, dp, last_blkno, &last_buf, w);
if (error)
return error;
/*
@@ -2240,16 +2310,17 @@ xfs_da3_swap_lastblock(
struct xfs_dir2_leaf_entry *ents;
dead_leaf2 = (xfs_dir2_leaf_t *)dead_info;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2);
- ents = dp->d_ops->leaf_ents_p(dead_leaf2);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr,
+ dead_leaf2);
+ ents = leafhdr.ents;
dead_level = 0;
dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);
} else {
struct xfs_da3_icnode_hdr deadhdr;
dead_node = (xfs_da_intnode_t *)dead_info;
- dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node);
- btree = dp->d_ops->node_tree_p(dead_node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &deadhdr, dead_node);
+ btree = deadhdr.btree;
dead_level = deadhdr.level;
dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);
}
@@ -2258,15 +2329,13 @@ xfs_da3_swap_lastblock(
* If the moved block has a left sibling, fix up the pointers.
*/
if ((sib_blkno = be32_to_cpu(dead_info->back))) {
- error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
+ error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
if (error)
goto done;
sib_info = sib_buf->b_addr;
- if (unlikely(
- be32_to_cpu(sib_info->forw) != last_blkno ||
- sib_info->magic != dead_info->magic)) {
- XFS_ERROR_REPORT("xfs_da_swap_lastblock(2)",
- XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp,
+ be32_to_cpu(sib_info->forw) != last_blkno ||
+ sib_info->magic != dead_info->magic)) {
error = -EFSCORRUPTED;
goto done;
}
@@ -2280,15 +2349,13 @@ xfs_da3_swap_lastblock(
* If the moved block has a right sibling, fix up the pointers.
*/
if ((sib_blkno = be32_to_cpu(dead_info->forw))) {
- error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);
+ error = xfs_da3_node_read(tp, dp, sib_blkno, &sib_buf, w);
if (error)
goto done;
sib_info = sib_buf->b_addr;
- if (unlikely(
- be32_to_cpu(sib_info->back) != last_blkno ||
- sib_info->magic != dead_info->magic)) {
- XFS_ERROR_REPORT("xfs_da_swap_lastblock(3)",
- XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp,
+ be32_to_cpu(sib_info->back) != last_blkno ||
+ sib_info->magic != dead_info->magic)) {
error = -EFSCORRUPTED;
goto done;
}
@@ -2304,27 +2371,24 @@ xfs_da3_swap_lastblock(
* Walk down the tree looking for the parent of the moved block.
*/
for (;;) {
- error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
+ error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
if (error)
goto done;
par_node = par_buf->b_addr;
- dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
- if (level >= 0 && level != par_hdr.level + 1) {
- XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",
- XFS_ERRLEVEL_LOW, mp);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
+ if (XFS_IS_CORRUPT(mp,
+ level >= 0 && level != par_hdr.level + 1)) {
error = -EFSCORRUPTED;
goto done;
}
level = par_hdr.level;
- btree = dp->d_ops->node_tree_p(par_node);
+ btree = par_hdr.btree;
for (entno = 0;
entno < par_hdr.count &&
be32_to_cpu(btree[entno].hashval) < dead_hash;
entno++)
continue;
- if (entno == par_hdr.count) {
- XFS_ERROR_REPORT("xfs_da_swap_lastblock(5)",
- XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp, entno == par_hdr.count)) {
error = -EFSCORRUPTED;
goto done;
}
@@ -2349,24 +2413,20 @@ xfs_da3_swap_lastblock(
par_blkno = par_hdr.forw;
xfs_trans_brelse(tp, par_buf);
par_buf = NULL;
- if (unlikely(par_blkno == 0)) {
- XFS_ERROR_REPORT("xfs_da_swap_lastblock(6)",
- XFS_ERRLEVEL_LOW, mp);
+ if (XFS_IS_CORRUPT(mp, par_blkno == 0)) {
error = -EFSCORRUPTED;
goto done;
}
- error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);
+ error = xfs_da3_node_read(tp, dp, par_blkno, &par_buf, w);
if (error)
goto done;
par_node = par_buf->b_addr;
- dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);
- if (par_hdr.level != level) {
- XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",
- XFS_ERRLEVEL_LOW, mp);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &par_hdr, par_node);
+ if (XFS_IS_CORRUPT(mp, par_hdr.level != level)) {
error = -EFSCORRUPTED;
goto done;
}
- btree = dp->d_ops->node_tree_p(par_node);
+ btree = par_hdr.btree;
entno = 0;
}
/*
@@ -2429,159 +2489,84 @@ xfs_da_shrink_inode(
return error;
}
-/*
- * See if the mapping(s) for this btree block are valid, i.e.
- * don't contain holes, are logically contiguous, and cover the whole range.
- */
-STATIC int
-xfs_da_map_covers_blocks(
- int nmap,
- xfs_bmbt_irec_t *mapp,
- xfs_dablk_t bno,
- int count)
-{
- int i;
- xfs_fileoff_t off;
-
- for (i = 0, off = bno; i < nmap; i++) {
- if (mapp[i].br_startblock == HOLESTARTBLOCK ||
- mapp[i].br_startblock == DELAYSTARTBLOCK) {
- return 0;
- }
- if (off != mapp[i].br_startoff) {
- return 0;
- }
- off += mapp[i].br_blockcount;
- }
- return off == bno + count;
-}
-
-/*
- * Convert a struct xfs_bmbt_irec to a struct xfs_buf_map.
- *
- * For the single map case, it is assumed that the caller has provided a pointer
- * to a valid xfs_buf_map. For the multiple map case, this function will
- * allocate the xfs_buf_map to hold all the maps and replace the caller's single
- * map pointer with the allocated map.
- */
static int
-xfs_buf_map_from_irec(
- struct xfs_mount *mp,
+xfs_dabuf_map(
+ struct xfs_inode *dp,
+ xfs_dablk_t bno,
+ unsigned int flags,
+ int whichfork,
struct xfs_buf_map **mapp,
- int *nmaps,
- struct xfs_bmbt_irec *irecs,
- int nirecs)
+ int *nmaps)
{
- struct xfs_buf_map *map;
- int i;
-
- ASSERT(*nmaps == 1);
- ASSERT(nirecs >= 1);
+ struct xfs_mount *mp = dp->i_mount;
+ int nfsb = xfs_dabuf_nfsb(mp, whichfork);
+ struct xfs_bmbt_irec irec, *irecs = &irec;
+ struct xfs_buf_map *map = *mapp;
+ xfs_fileoff_t off = bno;
+ int error = 0, nirecs, i;
+
+ if (nfsb > 1)
+ irecs = kmem_zalloc(sizeof(irec) * nfsb, KM_NOFS);
+
+ nirecs = nfsb;
+ error = xfs_bmapi_read(dp, bno, nfsb, irecs, &nirecs,
+ xfs_bmapi_aflag(whichfork));
+ if (error)
+ goto out_free_irecs;
+ /*
+ * Use the caller provided map for the single map case, else allocate a
+ * larger one that needs to be free by the caller.
+ */
if (nirecs > 1) {
- map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map),
- KM_SLEEP | KM_NOFS);
+ map = kmem_zalloc(nirecs * sizeof(struct xfs_buf_map), KM_NOFS);
if (!map)
- return -ENOMEM;
+ goto out_free_irecs;
*mapp = map;
}
- *nmaps = nirecs;
- map = *mapp;
- for (i = 0; i < *nmaps; i++) {
- ASSERT(irecs[i].br_startblock != DELAYSTARTBLOCK &&
- irecs[i].br_startblock != HOLESTARTBLOCK);
+ for (i = 0; i < nirecs; i++) {
+ if (irecs[i].br_startblock == HOLESTARTBLOCK ||
+ irecs[i].br_startblock == DELAYSTARTBLOCK)
+ goto invalid_mapping;
+ if (off != irecs[i].br_startoff)
+ goto invalid_mapping;
+
map[i].bm_bn = XFS_FSB_TO_DADDR(mp, irecs[i].br_startblock);
map[i].bm_len = XFS_FSB_TO_BB(mp, irecs[i].br_blockcount);
+ off += irecs[i].br_blockcount;
}
- return 0;
-}
-/*
- * Map the block we are given ready for reading. There are three possible return
- * values:
- * -1 - will be returned if we land in a hole and mappedbno == -2 so the
- * caller knows not to execute a subsequent read.
- * 0 - if we mapped the block successfully
- * >0 - positive error number if there was an error.
- */
-static int
-xfs_dabuf_map(
- struct xfs_inode *dp,
- xfs_dablk_t bno,
- xfs_daddr_t mappedbno,
- int whichfork,
- struct xfs_buf_map **map,
- int *nmaps)
-{
- struct xfs_mount *mp = dp->i_mount;
- int nfsb;
- int error = 0;
- struct xfs_bmbt_irec irec;
- struct xfs_bmbt_irec *irecs = &irec;
- int nirecs;
+ if (off != bno + nfsb)
+ goto invalid_mapping;
- ASSERT(map && *map);
- ASSERT(*nmaps == 1);
-
- if (whichfork == XFS_DATA_FORK)
- nfsb = mp->m_dir_geo->fsbcount;
- else
- nfsb = mp->m_attr_geo->fsbcount;
-
- /*
- * Caller doesn't have a mapping. -2 means don't complain
- * if we land in a hole.
- */
- if (mappedbno == -1 || mappedbno == -2) {
- /*
- * Optimize the one-block case.
- */
- if (nfsb != 1)
- irecs = kmem_zalloc(sizeof(irec) * nfsb,
- KM_SLEEP | KM_NOFS);
+ *nmaps = nirecs;
+out_free_irecs:
+ if (irecs != &irec)
+ kmem_free(irecs);
+ return error;
- nirecs = nfsb;
- error = xfs_bmapi_read(dp, (xfs_fileoff_t)bno, nfsb, irecs,
- &nirecs, xfs_bmapi_aflag(whichfork));
- if (error)
- goto out;
- } else {
- irecs->br_startblock = XFS_DADDR_TO_FSB(mp, mappedbno);
- irecs->br_startoff = (xfs_fileoff_t)bno;
- irecs->br_blockcount = nfsb;
- irecs->br_state = 0;
- nirecs = 1;
- }
+invalid_mapping:
+ /* Caller ok with no mapping. */
+ if (XFS_IS_CORRUPT(mp, !(flags & XFS_DABUF_MAP_HOLE_OK))) {
+ error = -EFSCORRUPTED;
+ if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
+ xfs_alert(mp, "%s: bno %u inode %llu",
+ __func__, bno, dp->i_ino);
- if (!xfs_da_map_covers_blocks(nirecs, irecs, bno, nfsb)) {
- error = mappedbno == -2 ? -1 : -EFSCORRUPTED;
- if (unlikely(error == -EFSCORRUPTED)) {
- if (xfs_error_level >= XFS_ERRLEVEL_LOW) {
- int i;
- xfs_alert(mp, "%s: bno %lld dir: inode %lld",
- __func__, (long long)bno,
- (long long)dp->i_ino);
- for (i = 0; i < *nmaps; i++) {
- xfs_alert(mp,
+ for (i = 0; i < nirecs; i++) {
+ xfs_alert(mp,
"[%02d] br_startoff %lld br_startblock %lld br_blockcount %lld br_state %d",
- i,
- (long long)irecs[i].br_startoff,
- (long long)irecs[i].br_startblock,
- (long long)irecs[i].br_blockcount,
- irecs[i].br_state);
- }
+ i, irecs[i].br_startoff,
+ irecs[i].br_startblock,
+ irecs[i].br_blockcount,
+ irecs[i].br_state);
}
- XFS_ERROR_REPORT("xfs_da_do_buf(1)",
- XFS_ERRLEVEL_LOW, mp);
}
- goto out;
+ } else {
+ *nmaps = 0;
}
- error = xfs_buf_map_from_irec(mp, map, nmaps, irecs, nirecs);
-out:
- if (irecs != &irec)
- kmem_free(irecs);
- return error;
+ goto out_free_irecs;
}
/*
@@ -2589,39 +2574,26 @@ out:
*/
int
xfs_da_get_buf(
- struct xfs_trans *trans,
+ struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
- xfs_daddr_t mappedbno,
struct xfs_buf **bpp,
int whichfork)
{
+ struct xfs_mount *mp = dp->i_mount;
struct xfs_buf *bp;
- struct xfs_buf_map map;
- struct xfs_buf_map *mapp;
- int nmap;
+ struct xfs_buf_map map, *mapp = &map;
+ int nmap = 1;
int error;
*bpp = NULL;
- mapp = &map;
- nmap = 1;
- error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
- &mapp, &nmap);
- if (error) {
- /* mapping a hole is not an error, but we don't continue */
- if (error == -1)
- error = 0;
+ error = xfs_dabuf_map(dp, bno, 0, whichfork, &mapp, &nmap);
+ if (error || nmap == 0)
goto out_free;
- }
- bp = xfs_trans_get_buf_map(trans, dp->i_mount->m_ddev_targp,
- mapp, nmap, 0);
- error = bp ? bp->b_error : -EIO;
- if (error) {
- if (bp)
- xfs_trans_brelse(trans, bp);
+ error = xfs_trans_get_buf_map(tp, mp->m_ddev_targp, mapp, nmap, 0, &bp);
+ if (error)
goto out_free;
- }
*bpp = bp;
@@ -2637,35 +2609,27 @@ out_free:
*/
int
xfs_da_read_buf(
- struct xfs_trans *trans,
+ struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
- xfs_daddr_t mappedbno,
+ unsigned int flags,
struct xfs_buf **bpp,
int whichfork,
const struct xfs_buf_ops *ops)
{
+ struct xfs_mount *mp = dp->i_mount;
struct xfs_buf *bp;
- struct xfs_buf_map map;
- struct xfs_buf_map *mapp;
- int nmap;
+ struct xfs_buf_map map, *mapp = &map;
+ int nmap = 1;
int error;
*bpp = NULL;
- mapp = &map;
- nmap = 1;
- error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
- &mapp, &nmap);
- if (error) {
- /* mapping a hole is not an error, but we don't continue */
- if (error == -1)
- error = 0;
+ error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
+ if (error || !nmap)
goto out_free;
- }
- error = xfs_trans_read_buf_map(dp->i_mount, trans,
- dp->i_mount->m_ddev_targp,
- mapp, nmap, 0, &bp, ops);
+ error = xfs_trans_read_buf_map(mp, tp, mp->m_ddev_targp, mapp, nmap, 0,
+ &bp, ops);
if (error)
goto out_free;
@@ -2688,7 +2652,7 @@ int
xfs_da_reada_buf(
struct xfs_inode *dp,
xfs_dablk_t bno,
- xfs_daddr_t mappedbno,
+ unsigned int flags,
int whichfork,
const struct xfs_buf_ops *ops)
{
@@ -2699,16 +2663,10 @@ xfs_da_reada_buf(
mapp = &map;
nmap = 1;
- error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,
- &mapp, &nmap);
- if (error) {
- /* mapping a hole is not an error, but we don't continue */
- if (error == -1)
- error = 0;
+ error = xfs_dabuf_map(dp, bno, flags, whichfork, &mapp, &nmap);
+ if (error || !nmap)
goto out_free;
- }
- mappedbno = mapp[0].bm_bn;
xfs_buf_readahead_map(dp->i_mount->m_ddev_targp, mapp, nmap, ops);
out_free:
diff --git a/fs/xfs/libxfs/xfs_da_btree.h b/fs/xfs/libxfs/xfs_da_btree.h
index 84dd865b6c3d..0f4fbb0889ff 100644
--- a/fs/xfs/libxfs/xfs_da_btree.h
+++ b/fs/xfs/libxfs/xfs_da_btree.h
@@ -10,7 +10,6 @@
struct xfs_inode;
struct xfs_trans;
struct zone;
-struct xfs_dir_ops;
/*
* Directory/attribute geometry information. There will be one of these for each
@@ -18,15 +17,23 @@ struct xfs_dir_ops;
* structures will be attached to the xfs_mount.
*/
struct xfs_da_geometry {
- int blksize; /* da block size in bytes */
- int fsbcount; /* da block size in filesystem blocks */
+ unsigned int blksize; /* da block size in bytes */
+ unsigned int fsbcount; /* da block size in filesystem blocks */
uint8_t fsblog; /* log2 of _filesystem_ block size */
uint8_t blklog; /* log2 of da block size */
- uint node_ents; /* # of entries in a danode */
- int magicpct; /* 37% of block size in bytes */
+ unsigned int node_hdr_size; /* danode header size in bytes */
+ unsigned int node_ents; /* # of entries in a danode */
+ unsigned int magicpct; /* 37% of block size in bytes */
xfs_dablk_t datablk; /* blockno of dir data v2 */
+ unsigned int leaf_hdr_size; /* dir2 leaf header size */
+ unsigned int leaf_max_ents; /* # of entries in dir2 leaf */
xfs_dablk_t leafblk; /* blockno of leaf data v2 */
+ unsigned int free_hdr_size; /* dir2 free header size */
+ unsigned int free_max_bests; /* # of bests entries in dir2 free */
xfs_dablk_t freeblk; /* blockno of free data v2 */
+
+ xfs_dir2_data_aoff_t data_first_offset;
+ size_t data_entry_offset;
};
/*========================================================================
@@ -81,13 +88,17 @@ typedef struct xfs_da_args {
#define XFS_DA_OP_ADDNAME 0x0004 /* this is an add operation */
#define XFS_DA_OP_OKNOENT 0x0008 /* lookup/add op, ENOENT ok, else die */
#define XFS_DA_OP_CILOOKUP 0x0010 /* lookup to return CI name if found */
+#define XFS_DA_OP_ALLOCVAL 0x0020 /* lookup to alloc buffer if found */
+#define XFS_DA_OP_INCOMPLETE 0x0040 /* lookup INCOMPLETE attr keys */
#define XFS_DA_OP_FLAGS \
{ XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
{ XFS_DA_OP_RENAME, "RENAME" }, \
{ XFS_DA_OP_ADDNAME, "ADDNAME" }, \
{ XFS_DA_OP_OKNOENT, "OKNOENT" }, \
- { XFS_DA_OP_CILOOKUP, "CILOOKUP" }
+ { XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \
+ { XFS_DA_OP_ALLOCVAL, "ALLOCVAL" }, \
+ { XFS_DA_OP_INCOMPLETE, "INCOMPLETE" }
/*
* Storage for holding state during Btree searches and split/join ops.
@@ -123,6 +134,25 @@ typedef struct xfs_da_state {
} xfs_da_state_t;
/*
+ * In-core version of the node header to abstract the differences in the v2 and
+ * v3 disk format of the headers. Callers need to convert to/from disk format as
+ * appropriate.
+ */
+struct xfs_da3_icnode_hdr {
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t level;
+
+ /*
+ * Pointer to the on-disk format entries, which are behind the
+ * variable size (v4 vs v5) header in the on-disk block.
+ */
+ struct xfs_da_node_entry *btree;
+};
+
+/*
* Utility macros to aid in logging changed structure fields.
*/
#define XFS_DA_LOGOFF(BASE, ADDR) ((char *)(ADDR) - (char *)(BASE))
@@ -130,16 +160,6 @@ typedef struct xfs_da_state {
(uint)(XFS_DA_LOGOFF(BASE, ADDR)), \
(uint)(XFS_DA_LOGOFF(BASE, ADDR)+(SIZE)-1)
-/*
- * Name ops for directory and/or attr name operations
- */
-struct xfs_nameops {
- xfs_dahash_t (*hashname)(struct xfs_name *);
- enum xfs_dacmp (*compname)(struct xfs_da_args *,
- const unsigned char *, int);
-};
-
-
/*========================================================================
* Function prototypes.
*========================================================================*/
@@ -170,25 +190,28 @@ int xfs_da3_path_shift(xfs_da_state_t *state, xfs_da_state_path_t *path,
int xfs_da3_blk_link(xfs_da_state_t *state, xfs_da_state_blk_t *old_blk,
xfs_da_state_blk_t *new_blk);
int xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, xfs_daddr_t mappedbno,
- struct xfs_buf **bpp, int which_fork);
+ xfs_dablk_t bno, struct xfs_buf **bpp, int whichfork);
+int xfs_da3_node_read_mapped(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_daddr_t mappedbno, struct xfs_buf **bpp,
+ int whichfork);
/*
* Utility routines.
*/
+
+#define XFS_DABUF_MAP_HOLE_OK (1 << 0)
+
int xfs_da_grow_inode(xfs_da_args_t *args, xfs_dablk_t *new_blkno);
int xfs_da_grow_inode_int(struct xfs_da_args *args, xfs_fileoff_t *bno,
int count);
int xfs_da_get_buf(struct xfs_trans *trans, struct xfs_inode *dp,
- xfs_dablk_t bno, xfs_daddr_t mappedbno,
- struct xfs_buf **bp, int whichfork);
+ xfs_dablk_t bno, struct xfs_buf **bp, int whichfork);
int xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,
- xfs_dablk_t bno, xfs_daddr_t mappedbno,
- struct xfs_buf **bpp, int whichfork,
- const struct xfs_buf_ops *ops);
+ xfs_dablk_t bno, unsigned int flags, struct xfs_buf **bpp,
+ int whichfork, const struct xfs_buf_ops *ops);
int xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno,
- xfs_daddr_t mapped_bno, int whichfork,
- const struct xfs_buf_ops *ops);
+ unsigned int flags, int whichfork,
+ const struct xfs_buf_ops *ops);
int xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,
struct xfs_buf *dead_buf);
@@ -200,7 +223,11 @@ enum xfs_dacmp xfs_da_compname(struct xfs_da_args *args,
xfs_da_state_t *xfs_da_state_alloc(void);
void xfs_da_state_free(xfs_da_state_t *state);
+void xfs_da3_node_hdr_from_disk(struct xfs_mount *mp,
+ struct xfs_da3_icnode_hdr *to, struct xfs_da_intnode *from);
+void xfs_da3_node_hdr_to_disk(struct xfs_mount *mp,
+ struct xfs_da_intnode *to, struct xfs_da3_icnode_hdr *from);
+
extern struct kmem_zone *xfs_da_state_zone;
-extern const struct xfs_nameops xfs_default_nameops;
#endif /* __XFS_DA_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_format.c b/fs/xfs/libxfs/xfs_da_format.c
deleted file mode 100644
index b1ae572496b6..000000000000
--- a/fs/xfs/libxfs/xfs_da_format.c
+++ /dev/null
@@ -1,888 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
- * Copyright (c) 2013 Red Hat, Inc.
- * All Rights Reserved.
- */
-#include "xfs.h"
-#include "xfs_fs.h"
-#include "xfs_shared.h"
-#include "xfs_format.h"
-#include "xfs_log_format.h"
-#include "xfs_trans_resv.h"
-#include "xfs_mount.h"
-#include "xfs_inode.h"
-#include "xfs_dir2.h"
-
-/*
- * Shortform directory ops
- */
-static int
-xfs_dir2_sf_entsize(
- struct xfs_dir2_sf_hdr *hdr,
- int len)
-{
- int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */
-
- count += len; /* name */
- count += hdr->i8count ? XFS_INO64_SIZE : XFS_INO32_SIZE; /* ino # */
- return count;
-}
-
-static int
-xfs_dir3_sf_entsize(
- struct xfs_dir2_sf_hdr *hdr,
- int len)
-{
- return xfs_dir2_sf_entsize(hdr, len) + sizeof(uint8_t);
-}
-
-static struct xfs_dir2_sf_entry *
-xfs_dir2_sf_nextentry(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep)
-{
- return (struct xfs_dir2_sf_entry *)
- ((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
-}
-
-static struct xfs_dir2_sf_entry *
-xfs_dir3_sf_nextentry(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep)
-{
- return (struct xfs_dir2_sf_entry *)
- ((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen));
-}
-
-
-/*
- * For filetype enabled shortform directories, the file type field is stored at
- * the end of the name. Because it's only a single byte, endian conversion is
- * not necessary. For non-filetype enable directories, the type is always
- * unknown and we never store the value.
- */
-static uint8_t
-xfs_dir2_sfe_get_ftype(
- struct xfs_dir2_sf_entry *sfep)
-{
- return XFS_DIR3_FT_UNKNOWN;
-}
-
-static void
-xfs_dir2_sfe_put_ftype(
- struct xfs_dir2_sf_entry *sfep,
- uint8_t ftype)
-{
- ASSERT(ftype < XFS_DIR3_FT_MAX);
-}
-
-static uint8_t
-xfs_dir3_sfe_get_ftype(
- struct xfs_dir2_sf_entry *sfep)
-{
- uint8_t ftype;
-
- ftype = sfep->name[sfep->namelen];
- if (ftype >= XFS_DIR3_FT_MAX)
- return XFS_DIR3_FT_UNKNOWN;
- return ftype;
-}
-
-static void
-xfs_dir3_sfe_put_ftype(
- struct xfs_dir2_sf_entry *sfep,
- uint8_t ftype)
-{
- ASSERT(ftype < XFS_DIR3_FT_MAX);
-
- sfep->name[sfep->namelen] = ftype;
-}
-
-/*
- * Inode numbers in short-form directories can come in two versions,
- * either 4 bytes or 8 bytes wide. These helpers deal with the
- * two forms transparently by looking at the headers i8count field.
- *
- * For 64-bit inode number the most significant byte must be zero.
- */
-static xfs_ino_t
-xfs_dir2_sf_get_ino(
- struct xfs_dir2_sf_hdr *hdr,
- uint8_t *from)
-{
- if (hdr->i8count)
- return get_unaligned_be64(from) & 0x00ffffffffffffffULL;
- else
- return get_unaligned_be32(from);
-}
-
-static void
-xfs_dir2_sf_put_ino(
- struct xfs_dir2_sf_hdr *hdr,
- uint8_t *to,
- xfs_ino_t ino)
-{
- ASSERT((ino & 0xff00000000000000ULL) == 0);
-
- if (hdr->i8count)
- put_unaligned_be64(ino, to);
- else
- put_unaligned_be32(ino, to);
-}
-
-static xfs_ino_t
-xfs_dir2_sf_get_parent_ino(
- struct xfs_dir2_sf_hdr *hdr)
-{
- return xfs_dir2_sf_get_ino(hdr, hdr->parent);
-}
-
-static void
-xfs_dir2_sf_put_parent_ino(
- struct xfs_dir2_sf_hdr *hdr,
- xfs_ino_t ino)
-{
- xfs_dir2_sf_put_ino(hdr, hdr->parent, ino);
-}
-
-/*
- * In short-form directory entries the inode numbers are stored at variable
- * offset behind the entry name. If the entry stores a filetype value, then it
- * sits between the name and the inode number. Hence the inode numbers may only
- * be accessed through the helpers below.
- */
-static xfs_ino_t
-xfs_dir2_sfe_get_ino(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep)
-{
- return xfs_dir2_sf_get_ino(hdr, &sfep->name[sfep->namelen]);
-}
-
-static void
-xfs_dir2_sfe_put_ino(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep,
- xfs_ino_t ino)
-{
- xfs_dir2_sf_put_ino(hdr, &sfep->name[sfep->namelen], ino);
-}
-
-static xfs_ino_t
-xfs_dir3_sfe_get_ino(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep)
-{
- return xfs_dir2_sf_get_ino(hdr, &sfep->name[sfep->namelen + 1]);
-}
-
-static void
-xfs_dir3_sfe_put_ino(
- struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep,
- xfs_ino_t ino)
-{
- xfs_dir2_sf_put_ino(hdr, &sfep->name[sfep->namelen + 1], ino);
-}
-
-
-/*
- * Directory data block operations
- */
-
-/*
- * For special situations, the dirent size ends up fixed because we always know
- * what the size of the entry is. That's true for the "." and "..", and
- * therefore we know that they are a fixed size and hence their offsets are
- * constant, as is the first entry.
- *
- * Hence, this calculation is written as a macro to be able to be calculated at
- * compile time and so certain offsets can be calculated directly in the
- * structure initaliser via the macro. There are two macros - one for dirents
- * with ftype and without so there are no unresolvable conditionals in the
- * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power
- * of 2 and the compiler doesn't reject it (unlike roundup()).
- */
-#define XFS_DIR2_DATA_ENTSIZE(n) \
- round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
- sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN)
-
-#define XFS_DIR3_DATA_ENTSIZE(n) \
- round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
- sizeof(xfs_dir2_data_off_t) + sizeof(uint8_t)), \
- XFS_DIR2_DATA_ALIGN)
-
-static int
-xfs_dir2_data_entsize(
- int n)
-{
- return XFS_DIR2_DATA_ENTSIZE(n);
-}
-
-static int
-xfs_dir3_data_entsize(
- int n)
-{
- return XFS_DIR3_DATA_ENTSIZE(n);
-}
-
-static uint8_t
-xfs_dir2_data_get_ftype(
- struct xfs_dir2_data_entry *dep)
-{
- return XFS_DIR3_FT_UNKNOWN;
-}
-
-static void
-xfs_dir2_data_put_ftype(
- struct xfs_dir2_data_entry *dep,
- uint8_t ftype)
-{
- ASSERT(ftype < XFS_DIR3_FT_MAX);
-}
-
-static uint8_t
-xfs_dir3_data_get_ftype(
- struct xfs_dir2_data_entry *dep)
-{
- uint8_t ftype = dep->name[dep->namelen];
-
- if (ftype >= XFS_DIR3_FT_MAX)
- return XFS_DIR3_FT_UNKNOWN;
- return ftype;
-}
-
-static void
-xfs_dir3_data_put_ftype(
- struct xfs_dir2_data_entry *dep,
- uint8_t type)
-{
- ASSERT(type < XFS_DIR3_FT_MAX);
- ASSERT(dep->namelen != 0);
-
- dep->name[dep->namelen] = type;
-}
-
-/*
- * Pointer to an entry's tag word.
- */
-static __be16 *
-xfs_dir2_data_entry_tag_p(
- struct xfs_dir2_data_entry *dep)
-{
- return (__be16 *)((char *)dep +
- xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-static __be16 *
-xfs_dir3_data_entry_tag_p(
- struct xfs_dir2_data_entry *dep)
-{
- return (__be16 *)((char *)dep +
- xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16));
-}
-
-/*
- * location of . and .. in data space (always block 0)
- */
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_dot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_dotdot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR2_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_first_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR2_DATA_ENTSIZE(1) +
- XFS_DIR2_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_ftype_data_dotdot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_ftype_data_first_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1) +
- XFS_DIR3_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_dot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_dotdot_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_first_entry_p(
- struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1) +
- XFS_DIR3_DATA_ENTSIZE(2));
-}
-
-static struct xfs_dir2_data_free *
-xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
-{
- return hdr->bestfree;
-}
-
-static struct xfs_dir2_data_free *
-xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
-{
- return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_unused *
-xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_unused *)
- ((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
-}
-
-static struct xfs_dir2_data_entry *
-xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_entry *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-static struct xfs_dir2_data_unused *
-xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
-{
- return (struct xfs_dir2_data_unused *)
- ((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
-}
-
-
-/*
- * Directory Leaf block operations
- */
-static int
-xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo)
-{
- return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) /
- (uint)sizeof(struct xfs_dir2_leaf_entry);
-}
-
-static struct xfs_dir2_leaf_entry *
-xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp)
-{
- return lp->__ents;
-}
-
-static int
-xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo)
-{
- return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) /
- (uint)sizeof(struct xfs_dir2_leaf_entry);
-}
-
-static struct xfs_dir2_leaf_entry *
-xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
-{
- return ((struct xfs_dir3_leaf *)lp)->__ents;
-}
-
-static void
-xfs_dir2_leaf_hdr_from_disk(
- struct xfs_dir3_icleaf_hdr *to,
- struct xfs_dir2_leaf *from)
-{
- to->forw = be32_to_cpu(from->hdr.info.forw);
- to->back = be32_to_cpu(from->hdr.info.back);
- to->magic = be16_to_cpu(from->hdr.info.magic);
- to->count = be16_to_cpu(from->hdr.count);
- to->stale = be16_to_cpu(from->hdr.stale);
-
- ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
- to->magic == XFS_DIR2_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir2_leaf_hdr_to_disk(
- struct xfs_dir2_leaf *to,
- struct xfs_dir3_icleaf_hdr *from)
-{
- ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
- from->magic == XFS_DIR2_LEAFN_MAGIC);
-
- to->hdr.info.forw = cpu_to_be32(from->forw);
- to->hdr.info.back = cpu_to_be32(from->back);
- to->hdr.info.magic = cpu_to_be16(from->magic);
- to->hdr.count = cpu_to_be16(from->count);
- to->hdr.stale = cpu_to_be16(from->stale);
-}
-
-static void
-xfs_dir3_leaf_hdr_from_disk(
- struct xfs_dir3_icleaf_hdr *to,
- struct xfs_dir2_leaf *from)
-{
- struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
-
- to->forw = be32_to_cpu(hdr3->info.hdr.forw);
- to->back = be32_to_cpu(hdr3->info.hdr.back);
- to->magic = be16_to_cpu(hdr3->info.hdr.magic);
- to->count = be16_to_cpu(hdr3->count);
- to->stale = be16_to_cpu(hdr3->stale);
-
- ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC ||
- to->magic == XFS_DIR3_LEAFN_MAGIC);
-}
-
-static void
-xfs_dir3_leaf_hdr_to_disk(
- struct xfs_dir2_leaf *to,
- struct xfs_dir3_icleaf_hdr *from)
-{
- struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
-
- ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
- from->magic == XFS_DIR3_LEAFN_MAGIC);
-
- hdr3->info.hdr.forw = cpu_to_be32(from->forw);
- hdr3->info.hdr.back = cpu_to_be32(from->back);
- hdr3->info.hdr.magic = cpu_to_be16(from->magic);
- hdr3->count = cpu_to_be16(from->count);
- hdr3->stale = cpu_to_be16(from->stale);
-}
-
-
-/*
- * Directory/Attribute Node block operations
- */
-static struct xfs_da_node_entry *
-xfs_da2_node_tree_p(struct xfs_da_intnode *dap)
-{
- return dap->__btree;
-}
-
-static struct xfs_da_node_entry *
-xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
-{
- return ((struct xfs_da3_intnode *)dap)->__btree;
-}
-
-static void
-xfs_da2_node_hdr_from_disk(
- struct xfs_da3_icnode_hdr *to,
- struct xfs_da_intnode *from)
-{
- ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
- to->forw = be32_to_cpu(from->hdr.info.forw);
- to->back = be32_to_cpu(from->hdr.info.back);
- to->magic = be16_to_cpu(from->hdr.info.magic);
- to->count = be16_to_cpu(from->hdr.__count);
- to->level = be16_to_cpu(from->hdr.__level);
-}
-
-static void
-xfs_da2_node_hdr_to_disk(
- struct xfs_da_intnode *to,
- struct xfs_da3_icnode_hdr *from)
-{
- ASSERT(from->magic == XFS_DA_NODE_MAGIC);
- to->hdr.info.forw = cpu_to_be32(from->forw);
- to->hdr.info.back = cpu_to_be32(from->back);
- to->hdr.info.magic = cpu_to_be16(from->magic);
- to->hdr.__count = cpu_to_be16(from->count);
- to->hdr.__level = cpu_to_be16(from->level);
-}
-
-static void
-xfs_da3_node_hdr_from_disk(
- struct xfs_da3_icnode_hdr *to,
- struct xfs_da_intnode *from)
-{
- struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
-
- ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
- to->forw = be32_to_cpu(hdr3->info.hdr.forw);
- to->back = be32_to_cpu(hdr3->info.hdr.back);
- to->magic = be16_to_cpu(hdr3->info.hdr.magic);
- to->count = be16_to_cpu(hdr3->__count);
- to->level = be16_to_cpu(hdr3->__level);
-}
-
-static void
-xfs_da3_node_hdr_to_disk(
- struct xfs_da_intnode *to,
- struct xfs_da3_icnode_hdr *from)
-{
- struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
-
- ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
- hdr3->info.hdr.forw = cpu_to_be32(from->forw);
- hdr3->info.hdr.back = cpu_to_be32(from->back);
- hdr3->info.hdr.magic = cpu_to_be16(from->magic);
- hdr3->__count = cpu_to_be16(from->count);
- hdr3->__level = cpu_to_be16(from->level);
-}
-
-
-/*
- * Directory free space block operations
- */
-static int
-xfs_dir2_free_max_bests(struct xfs_da_geometry *geo)
-{
- return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) /
- sizeof(xfs_dir2_data_off_t);
-}
-
-static __be16 *
-xfs_dir2_free_bests_p(struct xfs_dir2_free *free)
-{
- return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr));
-}
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static xfs_dir2_db_t
-xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
- return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
- (db / xfs_dir2_free_max_bests(geo));
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static int
-xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
- return db % xfs_dir2_free_max_bests(geo);
-}
-
-static int
-xfs_dir3_free_max_bests(struct xfs_da_geometry *geo)
-{
- return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) /
- sizeof(xfs_dir2_data_off_t);
-}
-
-static __be16 *
-xfs_dir3_free_bests_p(struct xfs_dir2_free *free)
-{
- return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr));
-}
-
-/*
- * Convert data space db to the corresponding free db.
- */
-static xfs_dir2_db_t
-xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
- return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
- (db / xfs_dir3_free_max_bests(geo));
-}
-
-/*
- * Convert data space db to the corresponding index in a free db.
- */
-static int
-xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
-{
- return db % xfs_dir3_free_max_bests(geo);
-}
-
-static void
-xfs_dir2_free_hdr_from_disk(
- struct xfs_dir3_icfree_hdr *to,
- struct xfs_dir2_free *from)
-{
- to->magic = be32_to_cpu(from->hdr.magic);
- to->firstdb = be32_to_cpu(from->hdr.firstdb);
- to->nvalid = be32_to_cpu(from->hdr.nvalid);
- to->nused = be32_to_cpu(from->hdr.nused);
- ASSERT(to->magic == XFS_DIR2_FREE_MAGIC);
-}
-
-static void
-xfs_dir2_free_hdr_to_disk(
- struct xfs_dir2_free *to,
- struct xfs_dir3_icfree_hdr *from)
-{
- ASSERT(from->magic == XFS_DIR2_FREE_MAGIC);
-
- to->hdr.magic = cpu_to_be32(from->magic);
- to->hdr.firstdb = cpu_to_be32(from->firstdb);
- to->hdr.nvalid = cpu_to_be32(from->nvalid);
- to->hdr.nused = cpu_to_be32(from->nused);
-}
-
-static void
-xfs_dir3_free_hdr_from_disk(
- struct xfs_dir3_icfree_hdr *to,
- struct xfs_dir2_free *from)
-{
- struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
-
- to->magic = be32_to_cpu(hdr3->hdr.magic);
- to->firstdb = be32_to_cpu(hdr3->firstdb);
- to->nvalid = be32_to_cpu(hdr3->nvalid);
- to->nused = be32_to_cpu(hdr3->nused);
-
- ASSERT(to->magic == XFS_DIR3_FREE_MAGIC);
-}
-
-static void
-xfs_dir3_free_hdr_to_disk(
- struct xfs_dir2_free *to,
- struct xfs_dir3_icfree_hdr *from)
-{
- struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
-
- ASSERT(from->magic == XFS_DIR3_FREE_MAGIC);
-
- hdr3->hdr.magic = cpu_to_be32(from->magic);
- hdr3->firstdb = cpu_to_be32(from->firstdb);
- hdr3->nvalid = cpu_to_be32(from->nvalid);
- hdr3->nused = cpu_to_be32(from->nused);
-}
-
-static const struct xfs_dir_ops xfs_dir2_ops = {
- .sf_entsize = xfs_dir2_sf_entsize,
- .sf_nextentry = xfs_dir2_sf_nextentry,
- .sf_get_ftype = xfs_dir2_sfe_get_ftype,
- .sf_put_ftype = xfs_dir2_sfe_put_ftype,
- .sf_get_ino = xfs_dir2_sfe_get_ino,
- .sf_put_ino = xfs_dir2_sfe_put_ino,
- .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
- .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
- .data_entsize = xfs_dir2_data_entsize,
- .data_get_ftype = xfs_dir2_data_get_ftype,
- .data_put_ftype = xfs_dir2_data_put_ftype,
- .data_entry_tag_p = xfs_dir2_data_entry_tag_p,
- .data_bestfree_p = xfs_dir2_data_bestfree_p,
-
- .data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
- .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR2_DATA_ENTSIZE(1),
- .data_first_offset = sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR2_DATA_ENTSIZE(1) +
- XFS_DIR2_DATA_ENTSIZE(2),
- .data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
-
- .data_dot_entry_p = xfs_dir2_data_dot_entry_p,
- .data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p,
- .data_first_entry_p = xfs_dir2_data_first_entry_p,
- .data_entry_p = xfs_dir2_data_entry_p,
- .data_unused_p = xfs_dir2_data_unused_p,
-
- .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
- .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
- .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
- .leaf_max_ents = xfs_dir2_max_leaf_ents,
- .leaf_ents_p = xfs_dir2_leaf_ents_p,
-
- .node_hdr_size = sizeof(struct xfs_da_node_hdr),
- .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
- .node_tree_p = xfs_da2_node_tree_p,
-
- .free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
- .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
- .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
- .free_max_bests = xfs_dir2_free_max_bests,
- .free_bests_p = xfs_dir2_free_bests_p,
- .db_to_fdb = xfs_dir2_db_to_fdb,
- .db_to_fdindex = xfs_dir2_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir2_ftype_ops = {
- .sf_entsize = xfs_dir3_sf_entsize,
- .sf_nextentry = xfs_dir3_sf_nextentry,
- .sf_get_ftype = xfs_dir3_sfe_get_ftype,
- .sf_put_ftype = xfs_dir3_sfe_put_ftype,
- .sf_get_ino = xfs_dir3_sfe_get_ino,
- .sf_put_ino = xfs_dir3_sfe_put_ino,
- .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
- .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
- .data_entsize = xfs_dir3_data_entsize,
- .data_get_ftype = xfs_dir3_data_get_ftype,
- .data_put_ftype = xfs_dir3_data_put_ftype,
- .data_entry_tag_p = xfs_dir3_data_entry_tag_p,
- .data_bestfree_p = xfs_dir2_data_bestfree_p,
-
- .data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
- .data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1),
- .data_first_offset = sizeof(struct xfs_dir2_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1) +
- XFS_DIR3_DATA_ENTSIZE(2),
- .data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
-
- .data_dot_entry_p = xfs_dir2_data_dot_entry_p,
- .data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p,
- .data_first_entry_p = xfs_dir2_ftype_data_first_entry_p,
- .data_entry_p = xfs_dir2_data_entry_p,
- .data_unused_p = xfs_dir2_data_unused_p,
-
- .leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
- .leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
- .leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
- .leaf_max_ents = xfs_dir2_max_leaf_ents,
- .leaf_ents_p = xfs_dir2_leaf_ents_p,
-
- .node_hdr_size = sizeof(struct xfs_da_node_hdr),
- .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
- .node_tree_p = xfs_da2_node_tree_p,
-
- .free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
- .free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
- .free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
- .free_max_bests = xfs_dir2_free_max_bests,
- .free_bests_p = xfs_dir2_free_bests_p,
- .db_to_fdb = xfs_dir2_db_to_fdb,
- .db_to_fdindex = xfs_dir2_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir3_ops = {
- .sf_entsize = xfs_dir3_sf_entsize,
- .sf_nextentry = xfs_dir3_sf_nextentry,
- .sf_get_ftype = xfs_dir3_sfe_get_ftype,
- .sf_put_ftype = xfs_dir3_sfe_put_ftype,
- .sf_get_ino = xfs_dir3_sfe_get_ino,
- .sf_put_ino = xfs_dir3_sfe_put_ino,
- .sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
- .sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
-
- .data_entsize = xfs_dir3_data_entsize,
- .data_get_ftype = xfs_dir3_data_get_ftype,
- .data_put_ftype = xfs_dir3_data_put_ftype,
- .data_entry_tag_p = xfs_dir3_data_entry_tag_p,
- .data_bestfree_p = xfs_dir3_data_bestfree_p,
-
- .data_dot_offset = sizeof(struct xfs_dir3_data_hdr),
- .data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1),
- .data_first_offset = sizeof(struct xfs_dir3_data_hdr) +
- XFS_DIR3_DATA_ENTSIZE(1) +
- XFS_DIR3_DATA_ENTSIZE(2),
- .data_entry_offset = sizeof(struct xfs_dir3_data_hdr),
-
- .data_dot_entry_p = xfs_dir3_data_dot_entry_p,
- .data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p,
- .data_first_entry_p = xfs_dir3_data_first_entry_p,
- .data_entry_p = xfs_dir3_data_entry_p,
- .data_unused_p = xfs_dir3_data_unused_p,
-
- .leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr),
- .leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk,
- .leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk,
- .leaf_max_ents = xfs_dir3_max_leaf_ents,
- .leaf_ents_p = xfs_dir3_leaf_ents_p,
-
- .node_hdr_size = sizeof(struct xfs_da3_node_hdr),
- .node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
- .node_tree_p = xfs_da3_node_tree_p,
-
- .free_hdr_size = sizeof(struct xfs_dir3_free_hdr),
- .free_hdr_to_disk = xfs_dir3_free_hdr_to_disk,
- .free_hdr_from_disk = xfs_dir3_free_hdr_from_disk,
- .free_max_bests = xfs_dir3_free_max_bests,
- .free_bests_p = xfs_dir3_free_bests_p,
- .db_to_fdb = xfs_dir3_db_to_fdb,
- .db_to_fdindex = xfs_dir3_db_to_fdindex,
-};
-
-static const struct xfs_dir_ops xfs_dir2_nondir_ops = {
- .node_hdr_size = sizeof(struct xfs_da_node_hdr),
- .node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
- .node_tree_p = xfs_da2_node_tree_p,
-};
-
-static const struct xfs_dir_ops xfs_dir3_nondir_ops = {
- .node_hdr_size = sizeof(struct xfs_da3_node_hdr),
- .node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
- .node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
- .node_tree_p = xfs_da3_node_tree_p,
-};
-
-/*
- * Return the ops structure according to the current config. If we are passed
- * an inode, then that overrides the default config we use which is based on
- * feature bits.
- */
-const struct xfs_dir_ops *
-xfs_dir_get_ops(
- struct xfs_mount *mp,
- struct xfs_inode *dp)
-{
- if (dp)
- return dp->d_ops;
- if (mp->m_dir_inode_ops)
- return mp->m_dir_inode_ops;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- return &xfs_dir3_ops;
- if (xfs_sb_version_hasftype(&mp->m_sb))
- return &xfs_dir2_ftype_ops;
- return &xfs_dir2_ops;
-}
-
-const struct xfs_dir_ops *
-xfs_nondir_get_ops(
- struct xfs_mount *mp,
- struct xfs_inode *dp)
-{
- if (dp)
- return dp->d_ops;
- if (mp->m_nondir_inode_ops)
- return mp->m_nondir_inode_ops;
- if (xfs_sb_version_hascrc(&mp->m_sb))
- return &xfs_dir3_nondir_ops;
- return &xfs_dir2_nondir_ops;
-}
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index ae654e06b2fb..734837a9b51a 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -94,19 +94,6 @@ struct xfs_da3_intnode {
};
/*
- * In-core version of the node header to abstract the differences in the v2 and
- * v3 disk format of the headers. Callers need to convert to/from disk format as
- * appropriate.
- */
-struct xfs_da3_icnode_hdr {
- uint32_t forw;
- uint32_t back;
- uint16_t magic;
- uint16_t count;
- uint16_t level;
-};
-
-/*
* Directory version 2.
*
* There are 4 possible formats:
@@ -230,7 +217,7 @@ typedef struct xfs_dir2_sf_entry {
* A 64-bit or 32-bit inode number follows here, at a variable offset
* after the name.
*/
-} xfs_dir2_sf_entry_t;
+} __packed xfs_dir2_sf_entry_t;
static inline int xfs_dir2_sf_hdr_size(int i8count)
{
@@ -434,14 +421,6 @@ struct xfs_dir3_leaf_hdr {
__be32 pad; /* 64 bit alignment */
};
-struct xfs_dir3_icleaf_hdr {
- uint32_t forw;
- uint32_t back;
- uint16_t magic;
- uint16_t count;
- uint16_t stale;
-};
-
/*
* Leaf block entry.
*/
@@ -482,7 +461,7 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)
}
/*
- * Free space block defintions for the node format.
+ * Free space block definitions for the node format.
*/
/*
@@ -521,19 +500,6 @@ struct xfs_dir3_free {
#define XFS_DIR3_FREE_CRC_OFF offsetof(struct xfs_dir3_free, hdr.hdr.crc)
/*
- * In core version of the free block header, abstracted away from on-disk format
- * differences. Use this in the code, and convert to/from the disk version using
- * xfs_dir3_free_hdr_from_disk/xfs_dir3_free_hdr_to_disk.
- */
-struct xfs_dir3_icfree_hdr {
- uint32_t magic;
- uint32_t firstdb;
- uint32_t nvalid;
- uint32_t nused;
-
-};
-
-/*
* Single block format.
*
* The single block format looks like the following drawing on disk:
@@ -710,29 +676,6 @@ struct xfs_attr3_leafblock {
};
/*
- * incore, neutral version of the attribute leaf header
- */
-struct xfs_attr3_icleaf_hdr {
- uint32_t forw;
- uint32_t back;
- uint16_t magic;
- uint16_t count;
- uint16_t usedbytes;
- /*
- * firstused is 32-bit here instead of 16-bit like the on-disk variant
- * to support maximum fsb size of 64k without overflow issues throughout
- * the attr code. Instead, the overflow condition is handled on
- * conversion to/from disk.
- */
- uint32_t firstused;
- __u8 holes;
- struct {
- uint16_t base;
- uint16_t size;
- } freemap[XFS_ATTR_LEAF_MAPSIZE];
-};
-
-/*
* Special value to represent fs block size in the leaf header firstused field.
* Only used when block size overflows the 2-bytes available on disk.
*/
@@ -740,8 +683,6 @@ struct xfs_attr3_icleaf_hdr {
/*
* Flags used in the leaf_entry[i].flags field.
- * NOTE: the INCOMPLETE bit must not collide with the flags bits specified
- * on the system call, they are "or"ed together for various operations.
*/
#define XFS_ATTR_LOCAL_BIT 0 /* attr is stored locally */
#define XFS_ATTR_ROOT_BIT 1 /* limit access to trusted attrs */
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index eb2be2a6a25a..22557527cfdb 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -517,7 +517,7 @@ xfs_defer_add(
}
if (!dfp) {
dfp = kmem_alloc(sizeof(struct xfs_defer_pending),
- KM_SLEEP | KM_NOFS);
+ KM_NOFS);
dfp->dfp_type = type;
dfp->dfp_intent = NULL;
dfp->dfp_done = NULL;
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 67840723edbb..dd6fcaaea318 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -52,7 +52,7 @@ xfs_mode_to_ftype(
* ASCII case-insensitive (ie. A-Z) support for directories that was
* used in IRIX.
*/
-STATIC xfs_dahash_t
+xfs_dahash_t
xfs_ascii_ci_hashname(
struct xfs_name *name)
{
@@ -65,14 +65,14 @@ xfs_ascii_ci_hashname(
return hash;
}
-STATIC enum xfs_dacmp
+enum xfs_dacmp
xfs_ascii_ci_compname(
- struct xfs_da_args *args,
- const unsigned char *name,
- int len)
+ struct xfs_da_args *args,
+ const unsigned char *name,
+ int len)
{
- enum xfs_dacmp result;
- int i;
+ enum xfs_dacmp result;
+ int i;
if (args->namelen != len)
return XFS_CMP_DIFFERENT;
@@ -89,30 +89,20 @@ xfs_ascii_ci_compname(
return result;
}
-static const struct xfs_nameops xfs_ascii_ci_nameops = {
- .hashname = xfs_ascii_ci_hashname,
- .compname = xfs_ascii_ci_compname,
-};
-
int
xfs_da_mount(
struct xfs_mount *mp)
{
struct xfs_da_geometry *dageo;
- int nodehdr_size;
ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);
ASSERT(xfs_dir2_dirblock_bytes(&mp->m_sb) <= XFS_MAX_BLOCKSIZE);
- mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL);
- mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL);
-
- nodehdr_size = mp->m_dir_inode_ops->node_hdr_size;
mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
- KM_SLEEP | KM_MAYFAIL);
+ KM_MAYFAIL);
mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry),
- KM_SLEEP | KM_MAYFAIL);
+ KM_MAYFAIL);
if (!mp->m_dir_geo || !mp->m_attr_geo) {
kmem_free(mp->m_dir_geo);
kmem_free(mp->m_attr_geo);
@@ -125,6 +115,27 @@ xfs_da_mount(
dageo->fsblog = mp->m_sb.sb_blocklog;
dageo->blksize = xfs_dir2_dirblock_bytes(&mp->m_sb);
dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog;
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ dageo->node_hdr_size = sizeof(struct xfs_da3_node_hdr);
+ dageo->leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr);
+ dageo->free_hdr_size = sizeof(struct xfs_dir3_free_hdr);
+ dageo->data_entry_offset =
+ sizeof(struct xfs_dir3_data_hdr);
+ } else {
+ dageo->node_hdr_size = sizeof(struct xfs_da_node_hdr);
+ dageo->leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr);
+ dageo->free_hdr_size = sizeof(struct xfs_dir2_free_hdr);
+ dageo->data_entry_offset =
+ sizeof(struct xfs_dir2_data_hdr);
+ }
+ dageo->leaf_max_ents = (dageo->blksize - dageo->leaf_hdr_size) /
+ sizeof(struct xfs_dir2_leaf_entry);
+ dageo->free_max_bests = (dageo->blksize - dageo->free_hdr_size) /
+ sizeof(xfs_dir2_data_off_t);
+
+ dageo->data_first_offset = dageo->data_entry_offset +
+ xfs_dir2_data_entsize(mp, 1) +
+ xfs_dir2_data_entsize(mp, 2);
/*
* Now we've set up the block conversion variables, we can calculate the
@@ -133,7 +144,7 @@ xfs_da_mount(
dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET);
dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET);
dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET);
- dageo->node_ents = (dageo->blksize - nodehdr_size) /
+ dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) /
(uint)sizeof(xfs_da_node_entry_t);
dageo->magicpct = (dageo->blksize * 37) / 100;
@@ -143,15 +154,10 @@ xfs_da_mount(
dageo->fsblog = mp->m_sb.sb_blocklog;
dageo->blksize = 1 << dageo->blklog;
dageo->fsbcount = 1;
- dageo->node_ents = (dageo->blksize - nodehdr_size) /
+ dageo->node_hdr_size = mp->m_dir_geo->node_hdr_size;
+ dageo->node_ents = (dageo->blksize - dageo->node_hdr_size) /
(uint)sizeof(xfs_da_node_entry_t);
dageo->magicpct = (dageo->blksize * 37) / 100;
-
- if (xfs_sb_version_hasasciici(&mp->m_sb))
- mp->m_dirnameops = &xfs_ascii_ci_nameops;
- else
- mp->m_dirnameops = &xfs_default_nameops;
-
return 0;
}
@@ -191,10 +197,10 @@ xfs_dir_ino_validate(
{
bool ino_ok = xfs_verify_dir_ino(mp, ino);
- if (unlikely(XFS_TEST_ERROR(!ino_ok, mp, XFS_ERRTAG_DIR_INO_VALIDATE))) {
+ if (XFS_IS_CORRUPT(mp, !ino_ok) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DIR_INO_VALIDATE)) {
xfs_warn(mp, "Invalid inode number 0x%Lx",
(unsigned long long) ino);
- XFS_ERROR_REPORT("xfs_dir_ino_validate", XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
return 0;
@@ -217,7 +223,7 @@ xfs_dir_init(
if (error)
return error;
- args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args = kmem_zalloc(sizeof(*args), KM_NOFS);
if (!args)
return -ENOMEM;
@@ -254,7 +260,7 @@ xfs_dir_createname(
XFS_STATS_INC(dp->i_mount, xs_dir_create);
}
- args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args = kmem_zalloc(sizeof(*args), KM_NOFS);
if (!args)
return -ENOMEM;
@@ -262,7 +268,7 @@ xfs_dir_createname(
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
- args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->hashval = xfs_dir2_hashname(dp->i_mount, name);
args->inumber = inum;
args->dp = dp;
args->total = total;
@@ -353,12 +359,12 @@ xfs_dir_lookup(
* lockdep Doing this avoids having to add a bunch of lockdep class
* annotations into the reclaim path for the ilock.
*/
- args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args = kmem_zalloc(sizeof(*args), KM_NOFS);
args->geo = dp->i_mount->m_dir_geo;
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
- args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->hashval = xfs_dir2_hashname(dp->i_mount, name);
args->dp = dp;
args->whichfork = XFS_DATA_FORK;
args->trans = tp;
@@ -422,7 +428,7 @@ xfs_dir_removename(
ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_remove);
- args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args = kmem_zalloc(sizeof(*args), KM_NOFS);
if (!args)
return -ENOMEM;
@@ -430,7 +436,7 @@ xfs_dir_removename(
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
- args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->hashval = xfs_dir2_hashname(dp->i_mount, name);
args->inumber = ino;
args->dp = dp;
args->total = total;
@@ -483,7 +489,7 @@ xfs_dir_replace(
if (rval)
return rval;
- args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
+ args = kmem_zalloc(sizeof(*args), KM_NOFS);
if (!args)
return -ENOMEM;
@@ -491,7 +497,7 @@ xfs_dir_replace(
args->name = name->name;
args->namelen = name->len;
args->filetype = name->type;
- args->hashval = dp->i_mount->m_dirnameops->hashname(name);
+ args->hashval = xfs_dir2_hashname(dp->i_mount, name);
args->inumber = inum;
args->dp = dp;
args->total = total;
@@ -600,7 +606,9 @@ xfs_dir2_isblock(
if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))
return rval;
rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize;
- if (rval != 0 && args->dp->i_d.di_size != args->geo->blksize)
+ if (XFS_IS_CORRUPT(args->dp->i_mount,
+ rval != 0 &&
+ args->dp->i_d.di_size != args->geo->blksize))
return -EFSCORRUPTED;
*vp = rval;
return 0;
@@ -716,3 +724,24 @@ xfs_dir2_namecheck(
/* There shouldn't be any slashes or nulls here */
return !memchr(name, '/', length) && !memchr(name, 0, length);
}
+
+xfs_dahash_t
+xfs_dir2_hashname(
+ struct xfs_mount *mp,
+ struct xfs_name *name)
+{
+ if (unlikely(xfs_sb_version_hasasciici(&mp->m_sb)))
+ return xfs_ascii_ci_hashname(name);
+ return xfs_da_hashname(name->name, name->len);
+}
+
+enum xfs_dacmp
+xfs_dir2_compname(
+ struct xfs_da_args *args,
+ const unsigned char *name,
+ int len)
+{
+ if (unlikely(xfs_sb_version_hasasciici(&args->dp->i_mount->m_sb)))
+ return xfs_ascii_ci_compname(args, name, len);
+ return xfs_da_compname(args, name, len);
+}
diff --git a/fs/xfs/libxfs/xfs_dir2.h b/fs/xfs/libxfs/xfs_dir2.h
index f54244779492..033777e282f2 100644
--- a/fs/xfs/libxfs/xfs_dir2.h
+++ b/fs/xfs/libxfs/xfs_dir2.h
@@ -18,6 +18,8 @@ struct xfs_dir2_sf_entry;
struct xfs_dir2_data_hdr;
struct xfs_dir2_data_entry;
struct xfs_dir2_data_unused;
+struct xfs_dir3_icfree_hdr;
+struct xfs_dir3_icleaf_hdr;
extern struct xfs_name xfs_name_dotdot;
@@ -27,85 +29,6 @@ extern struct xfs_name xfs_name_dotdot;
extern unsigned char xfs_mode_to_ftype(int mode);
/*
- * directory operations vector for encode/decode routines
- */
-struct xfs_dir_ops {
- int (*sf_entsize)(struct xfs_dir2_sf_hdr *hdr, int len);
- struct xfs_dir2_sf_entry *
- (*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep);
- uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep);
- void (*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep,
- uint8_t ftype);
- xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep);
- void (*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr,
- struct xfs_dir2_sf_entry *sfep,
- xfs_ino_t ino);
- xfs_ino_t (*sf_get_parent_ino)(struct xfs_dir2_sf_hdr *hdr);
- void (*sf_put_parent_ino)(struct xfs_dir2_sf_hdr *hdr,
- xfs_ino_t ino);
-
- int (*data_entsize)(int len);
- uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep);
- void (*data_put_ftype)(struct xfs_dir2_data_entry *dep,
- uint8_t ftype);
- __be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep);
- struct xfs_dir2_data_free *
- (*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr);
-
- xfs_dir2_data_aoff_t data_dot_offset;
- xfs_dir2_data_aoff_t data_dotdot_offset;
- xfs_dir2_data_aoff_t data_first_offset;
- size_t data_entry_offset;
-
- struct xfs_dir2_data_entry *
- (*data_dot_entry_p)(struct xfs_dir2_data_hdr *hdr);
- struct xfs_dir2_data_entry *
- (*data_dotdot_entry_p)(struct xfs_dir2_data_hdr *hdr);
- struct xfs_dir2_data_entry *
- (*data_first_entry_p)(struct xfs_dir2_data_hdr *hdr);
- struct xfs_dir2_data_entry *
- (*data_entry_p)(struct xfs_dir2_data_hdr *hdr);
- struct xfs_dir2_data_unused *
- (*data_unused_p)(struct xfs_dir2_data_hdr *hdr);
-
- int leaf_hdr_size;
- void (*leaf_hdr_to_disk)(struct xfs_dir2_leaf *to,
- struct xfs_dir3_icleaf_hdr *from);
- void (*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to,
- struct xfs_dir2_leaf *from);
- int (*leaf_max_ents)(struct xfs_da_geometry *geo);
- struct xfs_dir2_leaf_entry *
- (*leaf_ents_p)(struct xfs_dir2_leaf *lp);
-
- int node_hdr_size;
- void (*node_hdr_to_disk)(struct xfs_da_intnode *to,
- struct xfs_da3_icnode_hdr *from);
- void (*node_hdr_from_disk)(struct xfs_da3_icnode_hdr *to,
- struct xfs_da_intnode *from);
- struct xfs_da_node_entry *
- (*node_tree_p)(struct xfs_da_intnode *dap);
-
- int free_hdr_size;
- void (*free_hdr_to_disk)(struct xfs_dir2_free *to,
- struct xfs_dir3_icfree_hdr *from);
- void (*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to,
- struct xfs_dir2_free *from);
- int (*free_max_bests)(struct xfs_da_geometry *geo);
- __be16 * (*free_bests_p)(struct xfs_dir2_free *free);
- xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo,
- xfs_dir2_db_t db);
- int (*db_to_fdindex)(struct xfs_da_geometry *geo,
- xfs_dir2_db_t db);
-};
-
-extern const struct xfs_dir_ops *
- xfs_dir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp);
-extern const struct xfs_dir_ops *
- xfs_nondir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp);
-
-/*
* Generic directory interface routines
*/
extern void xfs_dir_startup(void);
@@ -124,6 +47,8 @@ extern int xfs_dir_lookup(struct xfs_trans *tp, struct xfs_inode *dp,
extern int xfs_dir_removename(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t ino,
xfs_extlen_t tot);
+extern bool xfs_dir2_sf_replace_needblock(struct xfs_inode *dp,
+ xfs_ino_t inum);
extern int xfs_dir_replace(struct xfs_trans *tp, struct xfs_inode *dp,
struct xfs_name *name, xfs_ino_t inum,
xfs_extlen_t tot);
@@ -143,10 +68,7 @@ extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r);
extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,
struct xfs_buf *bp);
-extern void xfs_dir2_data_freescan_int(struct xfs_da_geometry *geo,
- const struct xfs_dir_ops *ops,
- struct xfs_dir2_data_hdr *hdr, int *loghead);
-extern void xfs_dir2_data_freescan(struct xfs_inode *dp,
+extern void xfs_dir2_data_freescan(struct xfs_mount *mp,
struct xfs_dir2_data_hdr *hdr, int *loghead);
extern void xfs_dir2_data_log_entry(struct xfs_da_args *args,
struct xfs_buf *bp, struct xfs_dir2_data_entry *dep);
@@ -324,7 +246,7 @@ xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp)
#define XFS_READDIR_BUFSIZE (32768)
unsigned char xfs_dir3_get_dtype(struct xfs_mount *mp, uint8_t filetype);
-void *xfs_dir3_data_endp(struct xfs_da_geometry *geo,
+unsigned int xfs_dir3_data_end_offset(struct xfs_da_geometry *geo,
struct xfs_dir2_data_hdr *hdr);
bool xfs_dir2_namecheck(const void *name, size_t length);
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index a6fb0cc2085e..d6ced59b9567 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -123,7 +123,7 @@ xfs_dir3_block_read(
struct xfs_mount *mp = dp->i_mount;
int err;
- err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,
+ err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, 0, bpp,
XFS_DATA_FORK, &xfs_dir3_block_buf_ops);
if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF);
@@ -172,7 +172,7 @@ xfs_dir2_block_need_space(
struct xfs_dir2_data_unused *enddup = NULL;
*compact = 0;
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
/*
* If there are stale entries we'll use one for the leaf.
@@ -311,7 +311,7 @@ xfs_dir2_block_compact(
* This needs to happen before the next call to use_free.
*/
if (needscan)
- xfs_dir2_data_freescan(args->dp, hdr, needlog);
+ xfs_dir2_data_freescan(args->dp->i_mount, hdr, needlog);
}
/*
@@ -355,7 +355,7 @@ xfs_dir2_block_addname(
if (error)
return error;
- len = dp->d_ops->data_entsize(args->namelen);
+ len = xfs_dir2_data_entsize(dp->i_mount, args->namelen);
/*
* Set up pointers to parts of the block.
@@ -458,7 +458,7 @@ xfs_dir2_block_addname(
* This needs to happen before the next call to use_free.
*/
if (needscan) {
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
needscan = 0;
}
/*
@@ -541,14 +541,14 @@ xfs_dir2_block_addname(
dep->inumber = cpu_to_be64(args->inumber);
dep->namelen = args->namelen;
memcpy(dep->name, args->name, args->namelen);
- dp->d_ops->data_put_ftype(dep, args->filetype);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype);
+ tagp = xfs_dir2_data_entry_tag_p(dp->i_mount, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
/*
* Clean up the bestfree array and log the header, tail, and entry.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(args, bp);
xfs_dir2_block_log_tail(tp, bp);
@@ -633,7 +633,7 @@ xfs_dir2_block_lookup(
* Fill in inode number, CI name if appropriate, release the block.
*/
args->inumber = be64_to_cpu(dep->inumber);
- args->filetype = dp->d_ops->data_get_ftype(dep);
+ args->filetype = xfs_dir2_data_get_ftype(dp->i_mount, dep);
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_trans_brelse(args->trans, bp);
return error;
@@ -660,13 +660,11 @@ xfs_dir2_block_lookup_int(
int high; /* binary search high index */
int low; /* binary search low index */
int mid; /* binary search current idx */
- xfs_mount_t *mp; /* filesystem mount point */
xfs_trans_t *tp; /* transaction pointer */
enum xfs_dacmp cmp; /* comparison result */
dp = args->dp;
tp = args->trans;
- mp = dp->i_mount;
error = xfs_dir3_block_read(tp, dp, &bp);
if (error)
@@ -718,7 +716,7 @@ xfs_dir2_block_lookup_int(
* and buffer. If it's the first case-insensitive match, store
* the index and buffer and continue looking for an exact match.
*/
- cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ cmp = xfs_dir2_compname(args, dep->name, dep->namelen);
if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
args->cmpresult = cmp;
*bpp = bp;
@@ -791,7 +789,8 @@ xfs_dir2_block_removename(
needlog = needscan = 0;
xfs_dir2_data_make_free(args, bp,
(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
- dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+ xfs_dir2_data_entsize(dp->i_mount, dep->namelen), &needlog,
+ &needscan);
/*
* Fix up the block tail.
*/
@@ -806,7 +805,7 @@ xfs_dir2_block_removename(
* Fix up bestfree, log the header if necessary.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(args, bp);
xfs_dir3_data_check(dp, bp);
@@ -864,7 +863,7 @@ xfs_dir2_block_replace(
* Change the inode number to the new value.
*/
dep->inumber = cpu_to_be64(args->inumber);
- dp->d_ops->data_put_ftype(dep, args->filetype);
+ xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype);
xfs_dir2_data_log_entry(args, bp, dep);
xfs_dir3_data_check(dp, bp);
return 0;
@@ -914,7 +913,6 @@ xfs_dir2_leaf_to_block(
__be16 *tagp; /* end of entry (tag) */
int to; /* block/leaf to index */
xfs_trans_t *tp; /* transaction pointer */
- struct xfs_dir2_leaf_entry *ents;
struct xfs_dir3_icleaf_hdr leafhdr;
trace_xfs_dir2_leaf_to_block(args);
@@ -923,8 +921,7 @@ xfs_dir2_leaf_to_block(
tp = args->trans;
mp = dp->i_mount;
leaf = lbp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||
@@ -938,7 +935,7 @@ xfs_dir2_leaf_to_block(
while (dp->i_d.di_size > args->geo->blksize) {
int hdrsz;
- hdrsz = dp->d_ops->data_entry_offset;
+ hdrsz = args->geo->data_entry_offset;
bestsp = xfs_dir2_leaf_bests_p(ltp);
if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) ==
args->geo->blksize - hdrsz) {
@@ -953,7 +950,7 @@ xfs_dir2_leaf_to_block(
* Read the data block if we don't already have it, give up if it fails.
*/
if (!dbp) {
- error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp);
+ error = xfs_dir3_data_read(tp, dp, args->geo->datablk, 0, &dbp);
if (error)
return error;
}
@@ -1004,9 +1001,10 @@ xfs_dir2_leaf_to_block(
*/
lep = xfs_dir2_block_leaf_p(btp);
for (from = to = 0; from < leafhdr.count; from++) {
- if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+ if (leafhdr.ents[from].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
continue;
- lep[to++] = ents[from];
+ lep[to++] = leafhdr.ents[from];
}
ASSERT(to == be32_to_cpu(btp->count));
xfs_dir2_block_log_leaf(tp, dbp, 0, be32_to_cpu(btp->count) - 1);
@@ -1014,7 +1012,7 @@ xfs_dir2_leaf_to_block(
* Scan the bestfree if we need it and log the data block header.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(args, dbp);
/*
@@ -1039,47 +1037,38 @@ xfs_dir2_leaf_to_block(
*/
int /* error */
xfs_dir2_sf_to_block(
- xfs_da_args_t *args) /* operation arguments */
+ struct xfs_da_args *args)
{
+ struct xfs_trans *tp = args->trans;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
+ struct xfs_da_geometry *geo = args->geo;
xfs_dir2_db_t blkno; /* dir-relative block # (0) */
xfs_dir2_data_hdr_t *hdr; /* block header */
xfs_dir2_leaf_entry_t *blp; /* block leaf entries */
struct xfs_buf *bp; /* block buffer */
xfs_dir2_block_tail_t *btp; /* block tail pointer */
xfs_dir2_data_entry_t *dep; /* data entry pointer */
- xfs_inode_t *dp; /* incore directory inode */
int dummy; /* trash */
xfs_dir2_data_unused_t *dup; /* unused entry pointer */
int endoffset; /* end of data objects */
int error; /* error return value */
int i; /* index */
- xfs_mount_t *mp; /* filesystem mount point */
int needlog; /* need to log block header */
int needscan; /* need to scan block freespc */
int newoffset; /* offset from current entry */
- int offset; /* target block offset */
+ unsigned int offset = geo->data_entry_offset;
xfs_dir2_sf_entry_t *sfep; /* sf entry pointer */
xfs_dir2_sf_hdr_t *oldsfp; /* old shortform header */
xfs_dir2_sf_hdr_t *sfp; /* shortform header */
__be16 *tagp; /* end of data entry */
- xfs_trans_t *tp; /* transaction pointer */
struct xfs_name name;
- struct xfs_ifork *ifp;
trace_xfs_dir2_sf_to_block(args);
- dp = args->dp;
- tp = args->trans;
- mp = dp->i_mount;
- ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK);
ASSERT(ifp->if_flags & XFS_IFINLINE);
- /*
- * Bomb out if the shortform directory is way too short.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(mp));
- return -EIO;
- }
+ ASSERT(dp->i_d.di_size >= offsetof(struct xfs_dir2_sf_hdr, parent));
oldsfp = (xfs_dir2_sf_hdr_t *)ifp->if_u1.if_data;
@@ -1092,11 +1081,11 @@ xfs_dir2_sf_to_block(
* Copy the directory into a temporary buffer.
* Then pitch the incore inode data so we can make extents.
*/
- sfp = kmem_alloc(ifp->if_bytes, KM_SLEEP);
+ sfp = kmem_alloc(ifp->if_bytes, 0);
memcpy(sfp, oldsfp, ifp->if_bytes);
xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK);
- xfs_bmap_local_to_extents_empty(dp, XFS_DATA_FORK);
+ xfs_bmap_local_to_extents_empty(tp, dp, XFS_DATA_FORK);
dp->i_d.di_size = 0;
/*
@@ -1123,7 +1112,7 @@ xfs_dir2_sf_to_block(
* The whole thing is initialized to free by the init routine.
* Say we're using the leaf and tail area.
*/
- dup = dp->d_ops->data_unused_p(hdr);
+ dup = bp->b_addr + offset;
needlog = needscan = 0;
error = xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i,
i, &needlog, &needscan);
@@ -1146,35 +1135,37 @@ xfs_dir2_sf_to_block(
be16_to_cpu(dup->length), &needlog, &needscan);
if (error)
goto out_free;
+
/*
* Create entry for .
*/
- dep = dp->d_ops->data_dot_entry_p(hdr);
+ dep = bp->b_addr + offset;
dep->inumber = cpu_to_be64(dp->i_ino);
dep->namelen = 1;
dep->name[0] = '.';
- dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
- tagp = dp->d_ops->data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+ xfs_dir2_data_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
+ tagp = xfs_dir2_data_entry_tag_p(mp, dep);
+ *tagp = cpu_to_be16(offset);
xfs_dir2_data_log_entry(args, bp, dep);
blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot);
- blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
- (char *)dep - (char *)hdr));
+ blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(offset));
+ offset += xfs_dir2_data_entsize(mp, dep->namelen);
+
/*
* Create entry for ..
*/
- dep = dp->d_ops->data_dotdot_entry_p(hdr);
- dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp));
+ dep = bp->b_addr + offset;
+ dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp));
dep->namelen = 2;
dep->name[0] = dep->name[1] = '.';
- dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR);
- tagp = dp->d_ops->data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+ xfs_dir2_data_put_ftype(mp, dep, XFS_DIR3_FT_DIR);
+ tagp = xfs_dir2_data_entry_tag_p(mp, dep);
+ *tagp = cpu_to_be16(offset);
xfs_dir2_data_log_entry(args, bp, dep);
blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot);
- blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
- (char *)dep - (char *)hdr));
- offset = dp->d_ops->data_first_offset;
+ blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(offset));
+ offset += xfs_dir2_data_entsize(mp, dep->namelen);
+
/*
* Loop over existing entries, stuff them in.
*/
@@ -1183,6 +1174,7 @@ xfs_dir2_sf_to_block(
sfep = NULL;
else
sfep = xfs_dir2_sf_firstentry(sfp);
+
/*
* Need to preserve the existing offset values in the sf directory.
* Insert holes (unused entries) where necessary.
@@ -1199,40 +1191,39 @@ xfs_dir2_sf_to_block(
* There should be a hole here, make one.
*/
if (offset < newoffset) {
- dup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);
+ dup = bp->b_addr + offset;
dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
dup->length = cpu_to_be16(newoffset - offset);
- *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(
- ((char *)dup - (char *)hdr));
+ *xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(offset);
xfs_dir2_data_log_unused(args, bp, dup);
xfs_dir2_data_freeinsert(hdr,
- dp->d_ops->data_bestfree_p(hdr),
- dup, &dummy);
+ xfs_dir2_data_bestfree_p(mp, hdr),
+ dup, &dummy);
offset += be16_to_cpu(dup->length);
continue;
}
/*
* Copy a real entry.
*/
- dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset);
- dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep));
+ dep = bp->b_addr + newoffset;
+ dep->inumber = cpu_to_be64(xfs_dir2_sf_get_ino(mp, sfp, sfep));
dep->namelen = sfep->namelen;
- dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep));
+ xfs_dir2_data_put_ftype(mp, dep,
+ xfs_dir2_sf_get_ftype(mp, sfep));
memcpy(dep->name, sfep->name, dep->namelen);
- tagp = dp->d_ops->data_entry_tag_p(dep);
- *tagp = cpu_to_be16((char *)dep - (char *)hdr);
+ tagp = xfs_dir2_data_entry_tag_p(mp, dep);
+ *tagp = cpu_to_be16(newoffset);
xfs_dir2_data_log_entry(args, bp, dep);
name.name = sfep->name;
name.len = sfep->namelen;
- blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->
- hashname(&name));
- blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(
- (char *)dep - (char *)hdr));
+ blp[2 + i].hashval = cpu_to_be32(xfs_dir2_hashname(mp, &name));
+ blp[2 + i].address =
+ cpu_to_be32(xfs_dir2_byte_to_dataptr(newoffset));
offset = (int)((char *)(tagp + 1) - (char *)hdr);
if (++i == sfp->count)
sfep = NULL;
else
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
}
/* Done with the temporary buffer */
kmem_free(sfp);
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index 2c79be4c3153..b9eba8213180 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -13,6 +13,7 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
#include "xfs_error.h"
#include "xfs_trans.h"
#include "xfs_buf_item.h"
@@ -23,6 +24,71 @@ static xfs_failaddr_t xfs_dir2_data_freefind_verify(
struct xfs_dir2_data_unused *dup,
struct xfs_dir2_data_free **bf_ent);
+struct xfs_dir2_data_free *
+xfs_dir2_data_bestfree_p(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_hdr *hdr)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb))
+ return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
+ return hdr->bestfree;
+}
+
+/*
+ * Pointer to an entry's tag word.
+ */
+__be16 *
+xfs_dir2_data_entry_tag_p(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep)
+{
+ return (__be16 *)((char *)dep +
+ xfs_dir2_data_entsize(mp, dep->namelen) - sizeof(__be16));
+}
+
+uint8_t
+xfs_dir2_data_get_ftype(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep)
+{
+ if (xfs_sb_version_hasftype(&mp->m_sb)) {
+ uint8_t ftype = dep->name[dep->namelen];
+
+ if (likely(ftype < XFS_DIR3_FT_MAX))
+ return ftype;
+ }
+
+ return XFS_DIR3_FT_UNKNOWN;
+}
+
+void
+xfs_dir2_data_put_ftype(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep,
+ uint8_t ftype)
+{
+ ASSERT(ftype < XFS_DIR3_FT_MAX);
+ ASSERT(dep->namelen != 0);
+
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ dep->name[dep->namelen] = ftype;
+}
+
+/*
+ * The number of leaf entries is limited by the size of the block and the amount
+ * of space used by the data entries. We don't know how much space is used by
+ * the data entries yet, so just ensure that the count falls somewhere inside
+ * the block right now.
+ */
+static inline unsigned int
+xfs_dir2_data_max_leaf_entries(
+ struct xfs_da_geometry *geo)
+{
+ return (geo->blksize - sizeof(struct xfs_dir2_block_tail) -
+ geo->data_entry_offset) /
+ sizeof(struct xfs_dir2_leaf_entry);
+}
+
/*
* Check the consistency of the data block.
* The input can also be a block-format directory.
@@ -38,40 +104,27 @@ __xfs_dir3_data_check(
xfs_dir2_block_tail_t *btp=NULL; /* block tail */
int count; /* count of entries found */
xfs_dir2_data_hdr_t *hdr; /* data block header */
- xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_free_t *dfp; /* bestfree entry */
- xfs_dir2_data_unused_t *dup; /* unused entry */
- char *endp; /* end of useful data */
int freeseen; /* mask of bestfrees seen */
xfs_dahash_t hash; /* hash of current name */
int i; /* leaf index */
int lastfree; /* last entry was unused */
xfs_dir2_leaf_entry_t *lep=NULL; /* block leaf entries */
struct xfs_mount *mp = bp->b_mount;
- char *p; /* current data position */
int stale; /* count of stale leaves */
struct xfs_name name;
- const struct xfs_dir_ops *ops;
- struct xfs_da_geometry *geo;
-
- geo = mp->m_dir_geo;
+ unsigned int offset;
+ unsigned int end;
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
/*
- * We can be passed a null dp here from a verifier, so we need to go the
- * hard way to get them.
+ * If this isn't a directory, something is seriously wrong. Bail out.
*/
- ops = xfs_dir_get_ops(mp, dp);
-
- /*
- * If this isn't a directory, or we don't get handed the dir ops,
- * something is seriously wrong. Bail out.
- */
- if ((dp && !S_ISDIR(VFS_I(dp)->i_mode)) ||
- ops != xfs_dir_get_ops(mp, NULL))
+ if (dp && !S_ISDIR(VFS_I(dp)->i_mode))
return __this_address;
hdr = bp->b_addr;
- p = (char *)ops->data_entry_p(hdr);
+ offset = geo->data_entry_offset;
switch (hdr->magic) {
case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
@@ -79,15 +132,8 @@ __xfs_dir3_data_check(
btp = xfs_dir2_block_tail_p(geo, hdr);
lep = xfs_dir2_block_leaf_p(btp);
- /*
- * The number of leaf entries is limited by the size of the
- * block and the amount of space used by the data entries.
- * We don't know how much space is used by the data entries yet,
- * so just ensure that the count falls somewhere inside the
- * block right now.
- */
if (be32_to_cpu(btp->count) >=
- ((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry))
+ xfs_dir2_data_max_leaf_entries(geo))
return __this_address;
break;
case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
@@ -96,14 +142,14 @@ __xfs_dir3_data_check(
default:
return __this_address;
}
- endp = xfs_dir3_data_endp(geo, hdr);
- if (!endp)
+ end = xfs_dir3_data_end_offset(geo, hdr);
+ if (!end)
return __this_address;
/*
* Account for zero bestfree entries.
*/
- bf = ops->data_bestfree_p(hdr);
+ bf = xfs_dir2_data_bestfree_p(mp, hdr);
count = lastfree = freeseen = 0;
if (!bf[0].length) {
if (bf[0].offset)
@@ -128,8 +174,10 @@ __xfs_dir3_data_check(
/*
* Loop over the data/unused entries.
*/
- while (p < endp) {
- dup = (xfs_dir2_data_unused_t *)p;
+ while (offset < end) {
+ struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
+ struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
+
/*
* If it's unused, look for the space in the bestfree table.
* If we find it, account for that, else make sure it
@@ -140,10 +188,10 @@ __xfs_dir3_data_check(
if (lastfree != 0)
return __this_address;
- if (endp < p + be16_to_cpu(dup->length))
+ if (offset + be16_to_cpu(dup->length) > end)
return __this_address;
if (be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) !=
- (char *)dup - (char *)hdr)
+ offset)
return __this_address;
fa = xfs_dir2_data_freefind_verify(hdr, bf, dup, &dfp);
if (fa)
@@ -158,7 +206,7 @@ __xfs_dir3_data_check(
be16_to_cpu(bf[2].length))
return __this_address;
}
- p += be16_to_cpu(dup->length);
+ offset += be16_to_cpu(dup->length);
lastfree = 1;
continue;
}
@@ -168,17 +216,15 @@ __xfs_dir3_data_check(
* in the leaf section of the block.
* The linear search is crude but this is DEBUG code.
*/
- dep = (xfs_dir2_data_entry_t *)p;
if (dep->namelen == 0)
return __this_address;
if (xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)))
return __this_address;
- if (endp < p + ops->data_entsize(dep->namelen))
+ if (offset + xfs_dir2_data_entsize(mp, dep->namelen) > end)
return __this_address;
- if (be16_to_cpu(*ops->data_entry_tag_p(dep)) !=
- (char *)dep - (char *)hdr)
+ if (be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)) != offset)
return __this_address;
- if (ops->data_get_ftype(dep) >= XFS_DIR3_FT_MAX)
+ if (xfs_dir2_data_get_ftype(mp, dep) >= XFS_DIR3_FT_MAX)
return __this_address;
count++;
lastfree = 0;
@@ -189,7 +235,7 @@ __xfs_dir3_data_check(
((char *)dep - (char *)hdr));
name.name = dep->name;
name.len = dep->namelen;
- hash = mp->m_dirnameops->hashname(&name);
+ hash = xfs_dir2_hashname(mp, &name);
for (i = 0; i < be32_to_cpu(btp->count); i++) {
if (be32_to_cpu(lep[i].address) == addr &&
be32_to_cpu(lep[i].hashval) == hash)
@@ -198,7 +244,7 @@ __xfs_dir3_data_check(
if (i >= be32_to_cpu(btp->count))
return __this_address;
}
- p += ops->data_entsize(dep->namelen);
+ offset += xfs_dir2_data_entsize(mp, dep->namelen);
}
/*
* Need to have seen all the entries and all the bestfree slots.
@@ -354,13 +400,13 @@ xfs_dir3_data_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t bno,
- xfs_daddr_t mapped_bno,
+ unsigned int flags,
struct xfs_buf **bpp)
{
int err;
- err = xfs_da_read_buf(tp, dp, bno, mapped_bno, bpp,
- XFS_DATA_FORK, &xfs_dir3_data_buf_ops);
+ err = xfs_da_read_buf(tp, dp, bno, flags, bpp, XFS_DATA_FORK,
+ &xfs_dir3_data_buf_ops);
if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_DATA_BUF);
return err;
@@ -370,10 +416,10 @@ int
xfs_dir3_data_readahead(
struct xfs_inode *dp,
xfs_dablk_t bno,
- xfs_daddr_t mapped_bno)
+ unsigned int flags)
{
- return xfs_da_reada_buf(dp, bno, mapped_bno,
- XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);
+ return xfs_da_reada_buf(dp, bno, flags, XFS_DATA_FORK,
+ &xfs_dir3_data_reada_buf_ops);
}
/*
@@ -561,17 +607,16 @@ xfs_dir2_data_freeremove(
* Given a data block, reconstruct its bestfree map.
*/
void
-xfs_dir2_data_freescan_int(
- struct xfs_da_geometry *geo,
- const struct xfs_dir_ops *ops,
- struct xfs_dir2_data_hdr *hdr,
- int *loghead)
+xfs_dir2_data_freescan(
+ struct xfs_mount *mp,
+ struct xfs_dir2_data_hdr *hdr,
+ int *loghead)
{
- xfs_dir2_data_entry_t *dep; /* active data entry */
- xfs_dir2_data_unused_t *dup; /* unused data entry */
- struct xfs_dir2_data_free *bf;
- char *endp; /* end of block's data */
- char *p; /* current entry pointer */
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
+ struct xfs_dir2_data_free *bf = xfs_dir2_data_bestfree_p(mp, hdr);
+ void *addr = hdr;
+ unsigned int offset = geo->data_entry_offset;
+ unsigned int end;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||
@@ -581,79 +626,60 @@ xfs_dir2_data_freescan_int(
/*
* Start by clearing the table.
*/
- bf = ops->data_bestfree_p(hdr);
memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);
*loghead = 1;
- /*
- * Set up pointers.
- */
- p = (char *)ops->data_entry_p(hdr);
- endp = xfs_dir3_data_endp(geo, hdr);
- /*
- * Loop over the block's entries.
- */
- while (p < endp) {
- dup = (xfs_dir2_data_unused_t *)p;
+
+ end = xfs_dir3_data_end_offset(geo, addr);
+ while (offset < end) {
+ struct xfs_dir2_data_unused *dup = addr + offset;
+ struct xfs_dir2_data_entry *dep = addr + offset;
+
/*
* If it's a free entry, insert it.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
- ASSERT((char *)dup - (char *)hdr ==
+ ASSERT(offset ==
be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)));
xfs_dir2_data_freeinsert(hdr, bf, dup, loghead);
- p += be16_to_cpu(dup->length);
+ offset += be16_to_cpu(dup->length);
+ continue;
}
+
/*
* For active entries, check their tags and skip them.
*/
- else {
- dep = (xfs_dir2_data_entry_t *)p;
- ASSERT((char *)dep - (char *)hdr ==
- be16_to_cpu(*ops->data_entry_tag_p(dep)));
- p += ops->data_entsize(dep->namelen);
- }
+ ASSERT(offset ==
+ be16_to_cpu(*xfs_dir2_data_entry_tag_p(mp, dep)));
+ offset += xfs_dir2_data_entsize(mp, dep->namelen);
}
}
-void
-xfs_dir2_data_freescan(
- struct xfs_inode *dp,
- struct xfs_dir2_data_hdr *hdr,
- int *loghead)
-{
- return xfs_dir2_data_freescan_int(dp->i_mount->m_dir_geo, dp->d_ops,
- hdr, loghead);
-}
-
/*
* Initialize a data block at the given block number in the directory.
* Give back the buffer for the created block.
*/
int /* error */
xfs_dir3_data_init(
- xfs_da_args_t *args, /* directory operation args */
- xfs_dir2_db_t blkno, /* logical dir block number */
- struct xfs_buf **bpp) /* output block buffer */
+ struct xfs_da_args *args, /* directory operation args */
+ xfs_dir2_db_t blkno, /* logical dir block number */
+ struct xfs_buf **bpp) /* output block buffer */
{
- struct xfs_buf *bp; /* block buffer */
- xfs_dir2_data_hdr_t *hdr; /* data block header */
- xfs_inode_t *dp; /* incore directory inode */
- xfs_dir2_data_unused_t *dup; /* unused entry pointer */
- struct xfs_dir2_data_free *bf;
- int error; /* error return value */
- int i; /* bestfree index */
- xfs_mount_t *mp; /* filesystem mount point */
- xfs_trans_t *tp; /* transaction pointer */
- int t; /* temp */
-
- dp = args->dp;
- mp = dp->i_mount;
- tp = args->trans;
+ struct xfs_trans *tp = args->trans;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_geometry *geo = args->geo;
+ struct xfs_buf *bp;
+ struct xfs_dir2_data_hdr *hdr;
+ struct xfs_dir2_data_unused *dup;
+ struct xfs_dir2_data_free *bf;
+ int error;
+ int i;
+
/*
* Get the buffer set up for the block.
*/
error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno),
- -1, &bp, XFS_DATA_FORK);
+ &bp, XFS_DATA_FORK);
if (error)
return error;
bp->b_ops = &xfs_dir3_data_buf_ops;
@@ -675,8 +701,9 @@ xfs_dir3_data_init(
} else
hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC);
- bf = dp->d_ops->data_bestfree_p(hdr);
- bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset);
+ bf = xfs_dir2_data_bestfree_p(mp, hdr);
+ bf[0].offset = cpu_to_be16(geo->data_entry_offset);
+ bf[0].length = cpu_to_be16(geo->blksize - geo->data_entry_offset);
for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {
bf[i].length = 0;
bf[i].offset = 0;
@@ -685,13 +712,11 @@ xfs_dir3_data_init(
/*
* Set up an unused entry for the block's body.
*/
- dup = dp->d_ops->data_unused_p(hdr);
+ dup = bp->b_addr + geo->data_entry_offset;
dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);
-
- t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset;
- bf[0].length = cpu_to_be16(t);
- dup->length = cpu_to_be16(t);
+ dup->length = bf[0].length;
*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);
+
/*
* Log it and return it.
*/
@@ -710,6 +735,7 @@ xfs_dir2_data_log_entry(
struct xfs_buf *bp,
xfs_dir2_data_entry_t *dep) /* data entry pointer */
{
+ struct xfs_mount *mp = bp->b_mount;
struct xfs_dir2_data_hdr *hdr = bp->b_addr;
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
@@ -718,7 +744,7 @@ xfs_dir2_data_log_entry(
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr),
- (uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) -
+ (uint)((char *)(xfs_dir2_data_entry_tag_p(mp, dep) + 1) -
(char *)hdr - 1));
}
@@ -739,8 +765,7 @@ xfs_dir2_data_log_header(
hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC));
#endif
- xfs_trans_log_buf(args->trans, bp, 0,
- args->dp->d_ops->data_entry_offset - 1);
+ xfs_trans_log_buf(args->trans, bp, 0, args->geo->data_entry_offset - 1);
}
/*
@@ -789,11 +814,11 @@ xfs_dir2_data_make_free(
{
xfs_dir2_data_hdr_t *hdr; /* data block pointer */
xfs_dir2_data_free_t *dfp; /* bestfree pointer */
- char *endptr; /* end of data area */
int needscan; /* need to regen bestfree */
xfs_dir2_data_unused_t *newdup; /* new unused entry */
xfs_dir2_data_unused_t *postdup; /* unused entry after us */
xfs_dir2_data_unused_t *prevdup; /* unused entry before us */
+ unsigned int end;
struct xfs_dir2_data_free *bf;
hdr = bp->b_addr;
@@ -801,14 +826,14 @@ xfs_dir2_data_make_free(
/*
* Figure out where the end of the data area is.
*/
- endptr = xfs_dir3_data_endp(args->geo, hdr);
- ASSERT(endptr != NULL);
+ end = xfs_dir3_data_end_offset(args->geo, hdr);
+ ASSERT(end != 0);
/*
* If this isn't the start of the block, then back up to
* the previous entry and see if it's free.
*/
- if (offset > args->dp->d_ops->data_entry_offset) {
+ if (offset > args->geo->data_entry_offset) {
__be16 *tagp; /* tag just before us */
tagp = (__be16 *)((char *)hdr + offset) - 1;
@@ -821,7 +846,7 @@ xfs_dir2_data_make_free(
* If this isn't the end of the block, see if the entry after
* us is free.
*/
- if ((char *)hdr + offset + len < endptr) {
+ if (offset + len < end) {
postdup =
(xfs_dir2_data_unused_t *)((char *)hdr + offset + len);
if (be16_to_cpu(postdup->freetag) != XFS_DIR2_DATA_FREE_TAG)
@@ -834,7 +859,7 @@ xfs_dir2_data_make_free(
* Previous and following entries are both free,
* merge everything into a single free entry.
*/
- bf = args->dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr);
if (prevdup && postdup) {
xfs_dir2_data_free_t *dfp2; /* another bestfree pointer */
@@ -1025,7 +1050,7 @@ xfs_dir2_data_use_free(
* Look up the entry in the bestfree table.
*/
oldlen = be16_to_cpu(dup->length);
- bf = args->dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir2_data_bestfree_p(args->dp->i_mount, hdr);
dfp = xfs_dir2_data_freefind(hdr, bf, dup);
ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));
/*
@@ -1149,19 +1174,22 @@ corrupt:
}
/* Find the end of the entry data in a data/block format dir block. */
-void *
-xfs_dir3_data_endp(
+unsigned int
+xfs_dir3_data_end_offset(
struct xfs_da_geometry *geo,
struct xfs_dir2_data_hdr *hdr)
{
+ void *p;
+
switch (hdr->magic) {
case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):
case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC):
- return xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr));
+ p = xfs_dir2_block_leaf_p(xfs_dir2_block_tail_p(geo, hdr));
+ return p - (void *)hdr;
case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
- return (char *)hdr + geo->blksize;
+ return geo->blksize;
default:
- return NULL;
+ return 0;
}
}
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index a53e4585a2f3..a131b520aac7 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -24,12 +24,73 @@
* Local function declarations.
*/
static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,
- int *indexp, struct xfs_buf **dbpp);
+ int *indexp, struct xfs_buf **dbpp,
+ struct xfs_dir3_icleaf_hdr *leafhdr);
static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args,
struct xfs_buf *bp, int first, int last);
static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args,
struct xfs_buf *bp);
+void
+xfs_dir2_leaf_hdr_from_disk(
+ struct xfs_mount *mp,
+ struct xfs_dir3_icleaf_hdr *to,
+ struct xfs_dir2_leaf *from)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_dir3_leaf *from3 = (struct xfs_dir3_leaf *)from;
+
+ to->forw = be32_to_cpu(from3->hdr.info.hdr.forw);
+ to->back = be32_to_cpu(from3->hdr.info.hdr.back);
+ to->magic = be16_to_cpu(from3->hdr.info.hdr.magic);
+ to->count = be16_to_cpu(from3->hdr.count);
+ to->stale = be16_to_cpu(from3->hdr.stale);
+ to->ents = from3->__ents;
+
+ ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC ||
+ to->magic == XFS_DIR3_LEAFN_MAGIC);
+ } else {
+ to->forw = be32_to_cpu(from->hdr.info.forw);
+ to->back = be32_to_cpu(from->hdr.info.back);
+ to->magic = be16_to_cpu(from->hdr.info.magic);
+ to->count = be16_to_cpu(from->hdr.count);
+ to->stale = be16_to_cpu(from->hdr.stale);
+ to->ents = from->__ents;
+
+ ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
+ to->magic == XFS_DIR2_LEAFN_MAGIC);
+ }
+}
+
+void
+xfs_dir2_leaf_hdr_to_disk(
+ struct xfs_mount *mp,
+ struct xfs_dir2_leaf *to,
+ struct xfs_dir3_icleaf_hdr *from)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_dir3_leaf *to3 = (struct xfs_dir3_leaf *)to;
+
+ ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
+ from->magic == XFS_DIR3_LEAFN_MAGIC);
+
+ to3->hdr.info.hdr.forw = cpu_to_be32(from->forw);
+ to3->hdr.info.hdr.back = cpu_to_be32(from->back);
+ to3->hdr.info.hdr.magic = cpu_to_be16(from->magic);
+ to3->hdr.count = cpu_to_be16(from->count);
+ to3->hdr.stale = cpu_to_be16(from->stale);
+ } else {
+ ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
+ from->magic == XFS_DIR2_LEAFN_MAGIC);
+
+ to->hdr.info.forw = cpu_to_be32(from->forw);
+ to->hdr.info.back = cpu_to_be32(from->back);
+ to->hdr.info.magic = cpu_to_be16(from->magic);
+ to->hdr.count = cpu_to_be16(from->count);
+ to->hdr.stale = cpu_to_be16(from->stale);
+ }
+}
+
/*
* Check the internal consistency of a leaf1 block.
* Pop an assert if something is wrong.
@@ -43,7 +104,7 @@ xfs_dir3_leaf1_check(
struct xfs_dir2_leaf *leaf = bp->b_addr;
struct xfs_dir3_icleaf_hdr leafhdr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
@@ -52,7 +113,7 @@ xfs_dir3_leaf1_check(
} else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)
return __this_address;
- return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
+ return xfs_dir3_leaf_check_int(dp->i_mount, &leafhdr, leaf);
}
static inline void
@@ -76,31 +137,15 @@ xfs_dir3_leaf_check(
xfs_failaddr_t
xfs_dir3_leaf_check_int(
- struct xfs_mount *mp,
- struct xfs_inode *dp,
- struct xfs_dir3_icleaf_hdr *hdr,
- struct xfs_dir2_leaf *leaf)
+ struct xfs_mount *mp,
+ struct xfs_dir3_icleaf_hdr *hdr,
+ struct xfs_dir2_leaf *leaf)
{
- struct xfs_dir2_leaf_entry *ents;
- xfs_dir2_leaf_tail_t *ltp;
- int stale;
- int i;
- const struct xfs_dir_ops *ops;
- struct xfs_dir3_icleaf_hdr leafhdr;
- struct xfs_da_geometry *geo = mp->m_dir_geo;
-
- /*
- * we can be passed a null dp here from a verifier, so we need to go the
- * hard way to get them.
- */
- ops = xfs_dir_get_ops(mp, dp);
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
+ xfs_dir2_leaf_tail_t *ltp;
+ int stale;
+ int i;
- if (!hdr) {
- ops->leaf_hdr_from_disk(&leafhdr, leaf);
- hdr = &leafhdr;
- }
-
- ents = ops->leaf_ents_p(leaf);
ltp = xfs_dir2_leaf_tail_p(geo, leaf);
/*
@@ -108,23 +153,23 @@ xfs_dir3_leaf_check_int(
* Should factor in the size of the bests table as well.
* We can deduce a value for that from di_size.
*/
- if (hdr->count > ops->leaf_max_ents(geo))
+ if (hdr->count > geo->leaf_max_ents)
return __this_address;
/* Leaves and bests don't overlap in leaf format. */
if ((hdr->magic == XFS_DIR2_LEAF1_MAGIC ||
hdr->magic == XFS_DIR3_LEAF1_MAGIC) &&
- (char *)&ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
+ (char *)&hdr->ents[hdr->count] > (char *)xfs_dir2_leaf_bests_p(ltp))
return __this_address;
/* Check hash value order, count stale entries. */
for (i = stale = 0; i < hdr->count; i++) {
if (i + 1 < hdr->count) {
- if (be32_to_cpu(ents[i].hashval) >
- be32_to_cpu(ents[i + 1].hashval))
+ if (be32_to_cpu(hdr->ents[i].hashval) >
+ be32_to_cpu(hdr->ents[i + 1].hashval))
return __this_address;
}
- if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+ if (hdr->ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
if (hdr->stale != stale)
@@ -139,17 +184,18 @@ xfs_dir3_leaf_check_int(
*/
static xfs_failaddr_t
xfs_dir3_leaf_verify(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
- struct xfs_mount *mp = bp->b_mount;
- struct xfs_dir2_leaf *leaf = bp->b_addr;
- xfs_failaddr_t fa;
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_dir3_icleaf_hdr leafhdr;
+ xfs_failaddr_t fa;
fa = xfs_da3_blkinfo_verify(bp, bp->b_addr);
if (fa)
return fa;
- return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);
+ xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, bp->b_addr);
+ return xfs_dir3_leaf_check_int(mp, &leafhdr, bp->b_addr);
}
static void
@@ -216,13 +262,12 @@ xfs_dir3_leaf_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t fbno,
- xfs_daddr_t mappedbno,
struct xfs_buf **bpp)
{
int err;
- err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
- XFS_DATA_FORK, &xfs_dir3_leaf1_buf_ops);
+ err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK,
+ &xfs_dir3_leaf1_buf_ops);
if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAF1_BUF);
return err;
@@ -233,13 +278,12 @@ xfs_dir3_leafn_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t fbno,
- xfs_daddr_t mappedbno,
struct xfs_buf **bpp)
{
int err;
- err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
- XFS_DATA_FORK, &xfs_dir3_leafn_buf_ops);
+ err = xfs_da_read_buf(tp, dp, fbno, 0, bpp, XFS_DATA_FORK,
+ &xfs_dir3_leafn_buf_ops);
if (!err && tp && *bpp)
xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_LEAFN_BUF);
return err;
@@ -311,7 +355,7 @@ xfs_dir3_leaf_get_buf(
bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));
error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno),
- -1, &bp, XFS_DATA_FORK);
+ &bp, XFS_DATA_FORK);
if (error)
return error;
@@ -346,7 +390,6 @@ xfs_dir2_block_to_leaf(
int needscan; /* need to rescan bestfree */
xfs_trans_t *tp; /* transaction pointer */
struct xfs_dir2_data_free *bf;
- struct xfs_dir2_leaf_entry *ents;
struct xfs_dir3_icleaf_hdr leafhdr;
trace_xfs_dir2_block_to_leaf(args);
@@ -375,24 +418,24 @@ xfs_dir2_block_to_leaf(
xfs_dir3_data_check(dp, dbp);
btp = xfs_dir2_block_tail_p(args->geo, hdr);
blp = xfs_dir2_block_leaf_p(btp);
- bf = dp->d_ops->data_bestfree_p(hdr);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
/*
* Set the counts in the leaf header.
*/
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
leafhdr.count = be32_to_cpu(btp->count);
leafhdr.stale = be32_to_cpu(btp->stale);
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr);
xfs_dir3_leaf_log_header(args, lbp);
/*
* Could compact these but I think we always do the conversion
* after squeezing out stale entries.
*/
- memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1);
+ memcpy(leafhdr.ents, blp,
+ be32_to_cpu(btp->count) * sizeof(struct xfs_dir2_leaf_entry));
+ xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, 0, leafhdr.count - 1);
needscan = 0;
needlog = 1;
/*
@@ -415,7 +458,7 @@ xfs_dir2_block_to_leaf(
hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
/*
* Set up leaf tail and bests table.
*/
@@ -594,7 +637,7 @@ xfs_dir2_leaf_addname(
trace_xfs_dir2_leaf_addname(args);
- error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
+ error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp);
if (error)
return error;
@@ -607,10 +650,10 @@ xfs_dir2_leaf_addname(
index = xfs_dir2_leaf_search_hash(args, lbp);
leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
+ ents = leafhdr.ents;
bestsp = xfs_dir2_leaf_bests_p(ltp);
- length = dp->d_ops->data_entsize(args->namelen);
+ length = xfs_dir2_data_entsize(dp->i_mount, args->namelen);
/*
* See if there are any entries with the same hash value
@@ -773,7 +816,7 @@ xfs_dir2_leaf_addname(
else
xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);
hdr = dbp->b_addr;
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
bestsp[use_block] = bf[0].length;
grown = 1;
} else {
@@ -783,13 +826,13 @@ xfs_dir2_leaf_addname(
*/
error = xfs_dir3_data_read(tp, dp,
xfs_dir2_db_to_da(args->geo, use_block),
- -1, &dbp);
+ 0, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
}
hdr = dbp->b_addr;
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
grown = 0;
}
/*
@@ -815,14 +858,14 @@ xfs_dir2_leaf_addname(
dep->inumber = cpu_to_be64(args->inumber);
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
- dp->d_ops->data_put_ftype(dep, args->filetype);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype);
+ tagp = xfs_dir2_data_entry_tag_p(dp->i_mount, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
/*
* Need to scan fix up the bestfree table.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
/*
* Need to log the data block's header.
*/
@@ -852,9 +895,9 @@ xfs_dir2_leaf_addname(
/*
* Log the leaf fields and give up the buffers.
*/
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr);
xfs_dir3_leaf_log_header(args, lbp);
- xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh);
+ xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, lfloglow, lfloghigh);
xfs_dir3_leaf_check(dp, lbp);
xfs_dir3_data_check(dp, dbp);
return 0;
@@ -874,7 +917,6 @@ xfs_dir3_leaf_compact(
xfs_dir2_leaf_t *leaf; /* leaf structure */
int loglow; /* first leaf entry to log */
int to; /* target leaf index */
- struct xfs_dir2_leaf_entry *ents;
struct xfs_inode *dp = args->dp;
leaf = bp->b_addr;
@@ -884,9 +926,9 @@ xfs_dir3_leaf_compact(
/*
* Compress out the stale entries in place.
*/
- ents = dp->d_ops->leaf_ents_p(leaf);
for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {
- if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+ if (leafhdr->ents[from].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
continue;
/*
* Only actually copy the entries that are different.
@@ -894,7 +936,7 @@ xfs_dir3_leaf_compact(
if (from > to) {
if (loglow == -1)
loglow = to;
- ents[to] = ents[from];
+ leafhdr->ents[to] = leafhdr->ents[from];
}
to++;
}
@@ -905,10 +947,10 @@ xfs_dir3_leaf_compact(
leafhdr->count -= leafhdr->stale;
leafhdr->stale = 0;
- dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, leafhdr);
xfs_dir3_leaf_log_header(args, bp);
if (loglow != -1)
- xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1);
+ xfs_dir3_leaf_log_ents(args, leafhdr, bp, loglow, to - 1);
}
/*
@@ -1037,6 +1079,7 @@ xfs_dir3_leaf_log_bests(
void
xfs_dir3_leaf_log_ents(
struct xfs_da_args *args,
+ struct xfs_dir3_icleaf_hdr *hdr,
struct xfs_buf *bp,
int first,
int last)
@@ -1044,16 +1087,14 @@ xfs_dir3_leaf_log_ents(
xfs_dir2_leaf_entry_t *firstlep; /* pointer to first entry */
xfs_dir2_leaf_entry_t *lastlep; /* pointer to last entry */
struct xfs_dir2_leaf *leaf = bp->b_addr;
- struct xfs_dir2_leaf_entry *ents;
ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||
leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC));
- ents = args->dp->d_ops->leaf_ents_p(leaf);
- firstlep = &ents[first];
- lastlep = &ents[last];
+ firstlep = &hdr->ents[first];
+ lastlep = &hdr->ents[last];
xfs_trans_log_buf(args->trans, bp,
(uint)((char *)firstlep - (char *)leaf),
(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));
@@ -1076,7 +1117,7 @@ xfs_dir3_leaf_log_header(
xfs_trans_log_buf(args->trans, bp,
(uint)((char *)&leaf->hdr - (char *)leaf),
- args->dp->d_ops->leaf_hdr_size - 1);
+ args->geo->leaf_hdr_size - 1);
}
/*
@@ -1115,28 +1156,27 @@ xfs_dir2_leaf_lookup(
int error; /* error return code */
int index; /* found entry index */
struct xfs_buf *lbp; /* leaf buffer */
- xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_trans_t *tp; /* transaction pointer */
- struct xfs_dir2_leaf_entry *ents;
+ struct xfs_dir3_icleaf_hdr leafhdr;
trace_xfs_dir2_leaf_lookup(args);
/*
* Look up name in the leaf block, returning both buffers and index.
*/
- if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
+ error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr);
+ if (error)
return error;
- }
+
tp = args->trans;
dp = args->dp;
xfs_dir3_leaf_check(dp, lbp);
- leaf = lbp->b_addr;
- ents = dp->d_ops->leaf_ents_p(leaf);
+
/*
* Get to the leaf entry and contained data entry address.
*/
- lep = &ents[index];
+ lep = &leafhdr.ents[index];
/*
* Point to the data entry.
@@ -1148,7 +1188,7 @@ xfs_dir2_leaf_lookup(
* Return the found inode number & CI name if appropriate
*/
args->inumber = be64_to_cpu(dep->inumber);
- args->filetype = dp->d_ops->data_get_ftype(dep);
+ args->filetype = xfs_dir2_data_get_ftype(dp->i_mount, dep);
error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);
xfs_trans_brelse(tp, dbp);
xfs_trans_brelse(tp, lbp);
@@ -1166,7 +1206,8 @@ xfs_dir2_leaf_lookup_int(
xfs_da_args_t *args, /* operation arguments */
struct xfs_buf **lbpp, /* out: leaf buffer */
int *indexp, /* out: index in leaf block */
- struct xfs_buf **dbpp) /* out: data buffer */
+ struct xfs_buf **dbpp, /* out: data buffer */
+ struct xfs_dir3_icleaf_hdr *leafhdr)
{
xfs_dir2_db_t curdb = -1; /* current data block number */
struct xfs_buf *dbp = NULL; /* data buffer */
@@ -1182,22 +1223,19 @@ xfs_dir2_leaf_lookup_int(
xfs_trans_t *tp; /* transaction pointer */
xfs_dir2_db_t cidb = -1; /* case match data block no. */
enum xfs_dacmp cmp; /* name compare result */
- struct xfs_dir2_leaf_entry *ents;
- struct xfs_dir3_icleaf_hdr leafhdr;
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
- error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);
+ error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, &lbp);
if (error)
return error;
*lbpp = lbp;
leaf = lbp->b_addr;
xfs_dir3_leaf_check(dp, lbp);
- ents = dp->d_ops->leaf_ents_p(leaf);
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir2_leaf_hdr_from_disk(mp, leafhdr, leaf);
/*
* Look for the first leaf entry with our hash value.
@@ -1207,8 +1245,9 @@ xfs_dir2_leaf_lookup_int(
* Loop over all the entries with the right hash value
* looking to match the name.
*/
- for (lep = &ents[index];
- index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
+ for (lep = &leafhdr->ents[index];
+ index < leafhdr->count &&
+ be32_to_cpu(lep->hashval) == args->hashval;
lep++, index++) {
/*
* Skip over stale leaf entries.
@@ -1229,7 +1268,7 @@ xfs_dir2_leaf_lookup_int(
xfs_trans_brelse(tp, dbp);
error = xfs_dir3_data_read(tp, dp,
xfs_dir2_db_to_da(args->geo, newdb),
- -1, &dbp);
+ 0, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1247,7 +1286,7 @@ xfs_dir2_leaf_lookup_int(
* and buffer. If it's the first case-insensitive match, store
* the index and buffer and continue looking for an exact match.
*/
- cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ cmp = xfs_dir2_compname(args, dep->name, dep->namelen);
if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
args->cmpresult = cmp;
*indexp = index;
@@ -1271,7 +1310,7 @@ xfs_dir2_leaf_lookup_int(
xfs_trans_brelse(tp, dbp);
error = xfs_dir3_data_read(tp, dp,
xfs_dir2_db_to_da(args->geo, cidb),
- -1, &dbp);
+ 0, &dbp);
if (error) {
xfs_trans_brelse(tp, lbp);
return error;
@@ -1297,6 +1336,7 @@ int /* error */
xfs_dir2_leaf_removename(
xfs_da_args_t *args) /* operation arguments */
{
+ struct xfs_da_geometry *geo = args->geo;
__be16 *bestsp; /* leaf block best freespace */
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t db; /* data block number */
@@ -1314,7 +1354,6 @@ xfs_dir2_leaf_removename(
int needscan; /* need to rescan data frees */
xfs_dir2_data_off_t oldbest; /* old value of best free */
struct xfs_dir2_data_free *bf; /* bestfree table */
- struct xfs_dir2_leaf_entry *ents;
struct xfs_dir3_icleaf_hdr leafhdr;
trace_xfs_dir2_leaf_removename(args);
@@ -1322,51 +1361,54 @@ xfs_dir2_leaf_removename(
/*
* Lookup the leaf entry, get the leaf and data blocks read in.
*/
- if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
+ error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr);
+ if (error)
return error;
- }
+
dp = args->dp;
leaf = lbp->b_addr;
hdr = dbp->b_addr;
xfs_dir3_data_check(dp, dbp);
- bf = dp->d_ops->data_bestfree_p(hdr);
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
+
/*
* Point to the leaf entry, use that to point to the data entry.
*/
- lep = &ents[index];
- db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
+ lep = &leafhdr.ents[index];
+ db = xfs_dir2_dataptr_to_db(geo, be32_to_cpu(lep->address));
dep = (xfs_dir2_data_entry_t *)((char *)hdr +
- xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));
+ xfs_dir2_dataptr_to_off(geo, be32_to_cpu(lep->address)));
needscan = needlog = 0;
oldbest = be16_to_cpu(bf[0].length);
- ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+ ltp = xfs_dir2_leaf_tail_p(geo, leaf);
bestsp = xfs_dir2_leaf_bests_p(ltp);
- if (be16_to_cpu(bestsp[db]) != oldbest)
+ if (be16_to_cpu(bestsp[db]) != oldbest) {
+ xfs_buf_corruption_error(lbp);
return -EFSCORRUPTED;
+ }
/*
* Mark the former data entry unused.
*/
xfs_dir2_data_make_free(args, dbp,
(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr),
- dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+ xfs_dir2_data_entsize(dp->i_mount, dep->namelen), &needlog,
+ &needscan);
/*
* We just mark the leaf entry stale by putting a null in it.
*/
leafhdr.stale++;
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr);
xfs_dir3_leaf_log_header(args, lbp);
lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
- xfs_dir3_leaf_log_ents(args, lbp, index, index);
+ xfs_dir3_leaf_log_ents(args, &leafhdr, lbp, index, index);
/*
* Scan the freespace in the data block again if necessary,
* log the data block header if necessary.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(args, dbp);
/*
@@ -1382,8 +1424,8 @@ xfs_dir2_leaf_removename(
* If the data block is now empty then get rid of the data block.
*/
if (be16_to_cpu(bf[0].length) ==
- args->geo->blksize - dp->d_ops->data_entry_offset) {
- ASSERT(db != args->geo->datablk);
+ geo->blksize - geo->data_entry_offset) {
+ ASSERT(db != geo->datablk);
if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {
/*
* Nope, can't get rid of it because it caused
@@ -1425,7 +1467,7 @@ xfs_dir2_leaf_removename(
/*
* If the data block was not the first one, drop it.
*/
- else if (db != args->geo->datablk)
+ else if (db != geo->datablk)
dbp = NULL;
xfs_dir3_leaf_check(dp, lbp);
@@ -1448,26 +1490,24 @@ xfs_dir2_leaf_replace(
int error; /* error return code */
int index; /* index of leaf entry */
struct xfs_buf *lbp; /* leaf buffer */
- xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
xfs_trans_t *tp; /* transaction pointer */
- struct xfs_dir2_leaf_entry *ents;
+ struct xfs_dir3_icleaf_hdr leafhdr;
trace_xfs_dir2_leaf_replace(args);
/*
* Look up the entry.
*/
- if ((error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp))) {
+ error = xfs_dir2_leaf_lookup_int(args, &lbp, &index, &dbp, &leafhdr);
+ if (error)
return error;
- }
+
dp = args->dp;
- leaf = lbp->b_addr;
- ents = dp->d_ops->leaf_ents_p(leaf);
/*
* Point to the leaf entry, get data address from it.
*/
- lep = &ents[index];
+ lep = &leafhdr.ents[index];
/*
* Point to the data entry.
*/
@@ -1479,7 +1519,7 @@ xfs_dir2_leaf_replace(
* Put the new inode number in, log it.
*/
dep->inumber = cpu_to_be64(args->inumber);
- dp->d_ops->data_put_ftype(dep, args->filetype);
+ xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype);
tp = args->trans;
xfs_dir2_data_log_entry(args, dbp, dep);
xfs_dir3_leaf_check(dp, lbp);
@@ -1501,21 +1541,17 @@ xfs_dir2_leaf_search_hash(
xfs_dahash_t hashwant; /* hash value looking for */
int high; /* high leaf index */
int low; /* low leaf index */
- xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry */
int mid=0; /* current leaf index */
- struct xfs_dir2_leaf_entry *ents;
struct xfs_dir3_icleaf_hdr leafhdr;
- leaf = lbp->b_addr;
- ents = args->dp->d_ops->leaf_ents_p(leaf);
- args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir2_leaf_hdr_from_disk(args->dp->i_mount, &leafhdr, lbp->b_addr);
/*
* Note, the table cannot be empty, so we have to go through the loop.
* Binary search the leaf entries looking for our hash value.
*/
- for (lep = ents, low = 0, high = leafhdr.count - 1,
+ for (lep = leafhdr.ents, low = 0, high = leafhdr.count - 1,
hashwant = args->hashval;
low <= high; ) {
mid = (low + high) >> 1;
@@ -1552,6 +1588,7 @@ xfs_dir2_leaf_trim_data(
struct xfs_buf *lbp, /* leaf buffer */
xfs_dir2_db_t db) /* data block number */
{
+ struct xfs_da_geometry *geo = args->geo;
__be16 *bestsp; /* leaf bests table */
struct xfs_buf *dbp; /* data block buffer */
xfs_inode_t *dp; /* incore directory inode */
@@ -1565,23 +1602,23 @@ xfs_dir2_leaf_trim_data(
/*
* Read the offending data block. We need its buffer.
*/
- error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db),
- -1, &dbp);
+ error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(geo, db), 0, &dbp);
if (error)
return error;
leaf = lbp->b_addr;
- ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
+ ltp = xfs_dir2_leaf_tail_p(geo, leaf);
#ifdef DEBUG
{
struct xfs_dir2_data_hdr *hdr = dbp->b_addr;
- struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr);
+ struct xfs_dir2_data_free *bf =
+ xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||
hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));
ASSERT(be16_to_cpu(bf[0].length) ==
- args->geo->blksize - dp->d_ops->data_entry_offset);
+ geo->blksize - geo->data_entry_offset);
ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);
}
#endif
@@ -1639,7 +1676,6 @@ xfs_dir2_node_to_leaf(
int error; /* error return code */
struct xfs_buf *fbp; /* buffer for freespace block */
xfs_fileoff_t fo; /* freespace file offset */
- xfs_dir2_free_t *free; /* freespace structure */
struct xfs_buf *lbp; /* buffer for leaf block */
xfs_dir2_leaf_tail_t *ltp; /* tail of leaf structure */
xfs_dir2_leaf_t *leaf; /* leaf structure */
@@ -1697,7 +1733,7 @@ xfs_dir2_node_to_leaf(
return 0;
lbp = state->path.blk[0].bp;
leaf = lbp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
leafhdr.magic == XFS_DIR3_LEAFN_MAGIC);
@@ -1708,8 +1744,7 @@ xfs_dir2_node_to_leaf(
error = xfs_dir2_free_read(tp, dp, args->geo->freeblk, &fbp);
if (error)
return error;
- free = fbp->b_addr;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ xfs_dir2_free_hdr_from_disk(mp, &freehdr, fbp->b_addr);
ASSERT(!freehdr.firstdb);
@@ -1743,10 +1778,10 @@ xfs_dir2_node_to_leaf(
/*
* Set up the leaf bests table.
*/
- memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free),
+ memcpy(xfs_dir2_leaf_bests_p(ltp), freehdr.bests,
freehdr.nvalid * sizeof(xfs_dir2_data_off_t));
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir2_leaf_hdr_to_disk(mp, leaf, &leafhdr);
xfs_dir3_leaf_log_header(args, lbp);
xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);
xfs_dir3_leaf_log_tail(args, lbp);
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 1fc44efc344d..a0cc5e240306 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -32,8 +32,25 @@ static void xfs_dir2_leafn_rebalance(xfs_da_state_t *state,
static int xfs_dir2_leafn_remove(xfs_da_args_t *args, struct xfs_buf *bp,
int index, xfs_da_state_blk_t *dblk,
int *rval);
-static int xfs_dir2_node_addname_int(xfs_da_args_t *args,
- xfs_da_state_blk_t *fblk);
+
+/*
+ * Convert data space db to the corresponding free db.
+ */
+static xfs_dir2_db_t
+xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+ return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
+ (db / geo->free_max_bests);
+}
+
+/*
+ * Convert data space db to the corresponding index in a free db.
+ */
+static int
+xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
+{
+ return db % geo->free_max_bests;
+}
/*
* Check internal consistency of a leafn block.
@@ -47,7 +64,7 @@ xfs_dir3_leafn_check(
struct xfs_dir2_leaf *leaf = bp->b_addr;
struct xfs_dir3_icleaf_hdr leafhdr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {
struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
@@ -56,7 +73,7 @@ xfs_dir3_leafn_check(
} else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)
return __this_address;
- return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);
+ return xfs_dir3_leaf_check_int(dp->i_mount, &leafhdr, leaf);
}
static inline void
@@ -162,10 +179,9 @@ xfs_dir3_free_header_check(
struct xfs_buf *bp)
{
struct xfs_mount *mp = dp->i_mount;
+ int maxbests = mp->m_dir_geo->free_max_bests;
unsigned int firstdb;
- int maxbests;
- maxbests = dp->d_ops->free_max_bests(mp->m_dir_geo);
firstdb = (xfs_dir2_da_to_db(mp->m_dir_geo, fbno) -
xfs_dir2_byte_to_db(mp->m_dir_geo, XFS_DIR2_FREE_OFFSET)) *
maxbests;
@@ -196,14 +212,14 @@ __xfs_dir3_free_read(
struct xfs_trans *tp,
struct xfs_inode *dp,
xfs_dablk_t fbno,
- xfs_daddr_t mappedbno,
+ unsigned int flags,
struct xfs_buf **bpp)
{
xfs_failaddr_t fa;
int err;
- err = xfs_da_read_buf(tp, dp, fbno, mappedbno, bpp,
- XFS_DATA_FORK, &xfs_dir3_free_buf_ops);
+ err = xfs_da_read_buf(tp, dp, fbno, flags, bpp, XFS_DATA_FORK,
+ &xfs_dir3_free_buf_ops);
if (err || !*bpp)
return err;
@@ -222,6 +238,58 @@ __xfs_dir3_free_read(
return 0;
}
+void
+xfs_dir2_free_hdr_from_disk(
+ struct xfs_mount *mp,
+ struct xfs_dir3_icfree_hdr *to,
+ struct xfs_dir2_free *from)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_dir3_free *from3 = (struct xfs_dir3_free *)from;
+
+ to->magic = be32_to_cpu(from3->hdr.hdr.magic);
+ to->firstdb = be32_to_cpu(from3->hdr.firstdb);
+ to->nvalid = be32_to_cpu(from3->hdr.nvalid);
+ to->nused = be32_to_cpu(from3->hdr.nused);
+ to->bests = from3->bests;
+
+ ASSERT(to->magic == XFS_DIR3_FREE_MAGIC);
+ } else {
+ to->magic = be32_to_cpu(from->hdr.magic);
+ to->firstdb = be32_to_cpu(from->hdr.firstdb);
+ to->nvalid = be32_to_cpu(from->hdr.nvalid);
+ to->nused = be32_to_cpu(from->hdr.nused);
+ to->bests = from->bests;
+
+ ASSERT(to->magic == XFS_DIR2_FREE_MAGIC);
+ }
+}
+
+static void
+xfs_dir2_free_hdr_to_disk(
+ struct xfs_mount *mp,
+ struct xfs_dir2_free *to,
+ struct xfs_dir3_icfree_hdr *from)
+{
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ struct xfs_dir3_free *to3 = (struct xfs_dir3_free *)to;
+
+ ASSERT(from->magic == XFS_DIR3_FREE_MAGIC);
+
+ to3->hdr.hdr.magic = cpu_to_be32(from->magic);
+ to3->hdr.firstdb = cpu_to_be32(from->firstdb);
+ to3->hdr.nvalid = cpu_to_be32(from->nvalid);
+ to3->hdr.nused = cpu_to_be32(from->nused);
+ } else {
+ ASSERT(from->magic == XFS_DIR2_FREE_MAGIC);
+
+ to->hdr.magic = cpu_to_be32(from->magic);
+ to->hdr.firstdb = cpu_to_be32(from->firstdb);
+ to->hdr.nvalid = cpu_to_be32(from->nvalid);
+ to->hdr.nused = cpu_to_be32(from->nused);
+ }
+}
+
int
xfs_dir2_free_read(
struct xfs_trans *tp,
@@ -229,7 +297,7 @@ xfs_dir2_free_read(
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
- return __xfs_dir3_free_read(tp, dp, fbno, -1, bpp);
+ return __xfs_dir3_free_read(tp, dp, fbno, 0, bpp);
}
static int
@@ -239,7 +307,7 @@ xfs_dir2_free_try_read(
xfs_dablk_t fbno,
struct xfs_buf **bpp)
{
- return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp);
+ return __xfs_dir3_free_read(tp, dp, fbno, XFS_DABUF_MAP_HOLE_OK, bpp);
}
static int
@@ -256,7 +324,7 @@ xfs_dir3_free_get_buf(
struct xfs_dir3_icfree_hdr hdr;
error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno),
- -1, &bp, XFS_DATA_FORK);
+ &bp, XFS_DATA_FORK);
if (error)
return error;
@@ -280,7 +348,7 @@ xfs_dir3_free_get_buf(
uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_meta_uuid);
} else
hdr.magic = XFS_DIR2_FREE_MAGIC;
- dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr);
+ xfs_dir2_free_hdr_to_disk(mp, bp->b_addr, &hdr);
*bpp = bp;
return 0;
}
@@ -291,21 +359,19 @@ xfs_dir3_free_get_buf(
STATIC void
xfs_dir2_free_log_bests(
struct xfs_da_args *args,
+ struct xfs_dir3_icfree_hdr *hdr,
struct xfs_buf *bp,
int first, /* first entry to log */
int last) /* last entry to log */
{
- xfs_dir2_free_t *free; /* freespace structure */
- __be16 *bests;
+ struct xfs_dir2_free *free = bp->b_addr;
- free = bp->b_addr;
- bests = args->dp->d_ops->free_bests_p(free);
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
xfs_trans_log_buf(args->trans, bp,
- (uint)((char *)&bests[first] - (char *)free),
- (uint)((char *)&bests[last] - (char *)free +
- sizeof(bests[0]) - 1));
+ (char *)&hdr->bests[first] - (char *)free,
+ (char *)&hdr->bests[last] - (char *)free +
+ sizeof(hdr->bests[0]) - 1);
}
/*
@@ -324,7 +390,7 @@ xfs_dir2_free_log_header(
free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
#endif
xfs_trans_log_buf(args->trans, bp, 0,
- args->dp->d_ops->free_hdr_size - 1);
+ args->geo->free_hdr_size - 1);
}
/*
@@ -341,14 +407,12 @@ xfs_dir2_leaf_to_node(
int error; /* error return value */
struct xfs_buf *fbp; /* freespace buffer */
xfs_dir2_db_t fdb; /* freespace block number */
- xfs_dir2_free_t *free; /* freespace structure */
__be16 *from; /* pointer to freespace entry */
int i; /* leaf freespace index */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_tail_t *ltp; /* leaf tail structure */
int n; /* count of live freespc ents */
xfs_dir2_data_off_t off; /* freespace entry value */
- __be16 *to; /* pointer to freespace entry */
xfs_trans_t *tp; /* transaction pointer */
struct xfs_dir3_icfree_hdr freehdr;
@@ -370,24 +434,25 @@ xfs_dir2_leaf_to_node(
if (error)
return error;
- free = fbp->b_addr;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ xfs_dir2_free_hdr_from_disk(dp->i_mount, &freehdr, fbp->b_addr);
leaf = lbp->b_addr;
ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);
if (be32_to_cpu(ltp->bestcount) >
- (uint)dp->i_d.di_size / args->geo->blksize)
+ (uint)dp->i_d.di_size / args->geo->blksize) {
+ xfs_buf_corruption_error(lbp);
return -EFSCORRUPTED;
+ }
/*
* Copy freespace entries from the leaf block to the new block.
* Count active entries.
*/
from = xfs_dir2_leaf_bests_p(ltp);
- to = dp->d_ops->free_bests_p(free);
- for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {
- if ((off = be16_to_cpu(*from)) != NULLDATAOFF)
+ for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++) {
+ off = be16_to_cpu(*from);
+ if (off != NULLDATAOFF)
n++;
- *to = cpu_to_be16(off);
+ freehdr.bests[i] = cpu_to_be16(off);
}
/*
@@ -396,8 +461,8 @@ xfs_dir2_leaf_to_node(
freehdr.nused = n;
freehdr.nvalid = be32_to_cpu(ltp->bestcount);
- dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
- xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1);
+ xfs_dir2_free_hdr_to_disk(dp->i_mount, fbp->b_addr, &freehdr);
+ xfs_dir2_free_log_bests(args, &freehdr, fbp, 0, freehdr.nvalid - 1);
xfs_dir2_free_log_header(args, fbp);
/*
@@ -440,15 +505,17 @@ xfs_dir2_leafn_add(
trace_xfs_dir2_leafn_add(args, index);
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
+ ents = leafhdr.ents;
/*
* Quick check just to make sure we are not going to index
* into other peoples memory
*/
- if (index < 0)
+ if (index < 0) {
+ xfs_buf_corruption_error(bp);
return -EFSCORRUPTED;
+ }
/*
* If there are already the maximum number of leaf entries in
@@ -457,7 +524,7 @@ xfs_dir2_leafn_add(
* a compact.
*/
- if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) {
+ if (leafhdr.count == args->geo->leaf_max_ents) {
if (!leafhdr.stale)
return -ENOSPC;
compact = leafhdr.stale > 1;
@@ -495,9 +562,9 @@ xfs_dir2_leafn_add(
lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo,
args->blkno, args->index));
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr);
xfs_dir3_leaf_log_header(args, bp);
- xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh);
+ xfs_dir3_leaf_log_ents(args, &leafhdr, bp, lfloglow, lfloghigh);
xfs_dir3_leaf_check(dp, bp);
return 0;
}
@@ -511,10 +578,9 @@ xfs_dir2_free_hdr_check(
{
struct xfs_dir3_icfree_hdr hdr;
- dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr);
+ xfs_dir2_free_hdr_from_disk(dp->i_mount, &hdr, bp->b_addr);
- ASSERT((hdr.firstdb %
- dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0);
+ ASSERT((hdr.firstdb % dp->i_mount->m_dir_geo->free_max_bests) == 0);
ASSERT(hdr.firstdb <= db);
ASSERT(db < hdr.firstdb + hdr.nvalid);
}
@@ -532,11 +598,9 @@ xfs_dir2_leaf_lasthash(
struct xfs_buf *bp, /* leaf buffer */
int *count) /* count of entries in leaf */
{
- struct xfs_dir2_leaf *leaf = bp->b_addr;
- struct xfs_dir2_leaf_entry *ents;
struct xfs_dir3_icleaf_hdr leafhdr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, bp->b_addr);
ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||
leafhdr.magic == XFS_DIR3_LEAFN_MAGIC ||
@@ -547,9 +611,7 @@ xfs_dir2_leaf_lasthash(
*count = leafhdr.count;
if (!leafhdr.count)
return 0;
-
- ents = dp->d_ops->leaf_ents_p(leaf);
- return be32_to_cpu(ents[leafhdr.count - 1].hashval);
+ return be32_to_cpu(leafhdr.ents[leafhdr.count - 1].hashval);
}
/*
@@ -578,15 +640,13 @@ xfs_dir2_leafn_lookup_for_addname(
xfs_dir2_db_t newdb; /* new data block number */
xfs_dir2_db_t newfdb; /* new free block number */
xfs_trans_t *tp; /* transaction pointer */
- struct xfs_dir2_leaf_entry *ents;
struct xfs_dir3_icleaf_hdr leafhdr;
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
xfs_dir3_leaf_check(dp, bp);
ASSERT(leafhdr.count > 0);
@@ -606,11 +666,11 @@ xfs_dir2_leafn_lookup_for_addname(
ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||
free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));
}
- length = dp->d_ops->data_entsize(args->namelen);
+ length = xfs_dir2_data_entsize(mp, args->namelen);
/*
* Loop over leaf entries with the right hash value.
*/
- for (lep = &ents[index];
+ for (lep = &leafhdr.ents[index];
index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
lep++, index++) {
/*
@@ -632,14 +692,14 @@ xfs_dir2_leafn_lookup_for_addname(
* in hand, take a look at it.
*/
if (newdb != curdb) {
- __be16 *bests;
+ struct xfs_dir3_icfree_hdr freehdr;
curdb = newdb;
/*
* Convert the data block to the free block
* holding its freespace information.
*/
- newfdb = dp->d_ops->db_to_fdb(args->geo, newdb);
+ newfdb = xfs_dir2_db_to_fdb(args->geo, newdb);
/*
* If it's not the one we have in hand, read it in.
*/
@@ -663,20 +723,20 @@ xfs_dir2_leafn_lookup_for_addname(
/*
* Get the index for our entry.
*/
- fi = dp->d_ops->db_to_fdindex(args->geo, curdb);
+ fi = xfs_dir2_db_to_fdindex(args->geo, curdb);
/*
* If it has room, return it.
*/
- bests = dp->d_ops->free_bests_p(free);
- if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) {
- XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",
- XFS_ERRLEVEL_LOW, mp);
+ xfs_dir2_free_hdr_from_disk(mp, &freehdr, free);
+ if (XFS_IS_CORRUPT(mp,
+ freehdr.bests[fi] ==
+ cpu_to_be16(NULLDATAOFF))) {
if (curfdb != newfdb)
xfs_trans_brelse(tp, curbp);
return -EFSCORRUPTED;
}
curfdb = newfdb;
- if (be16_to_cpu(bests[fi]) >= length)
+ if (be16_to_cpu(freehdr.bests[fi]) >= length)
goto out;
}
}
@@ -730,19 +790,19 @@ xfs_dir2_leafn_lookup_for_entry(
xfs_dir2_db_t newdb; /* new data block number */
xfs_trans_t *tp; /* transaction pointer */
enum xfs_dacmp cmp; /* comparison result */
- struct xfs_dir2_leaf_entry *ents;
struct xfs_dir3_icleaf_hdr leafhdr;
dp = args->dp;
tp = args->trans;
mp = dp->i_mount;
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(mp, &leafhdr, leaf);
xfs_dir3_leaf_check(dp, bp);
- if (leafhdr.count <= 0)
+ if (leafhdr.count <= 0) {
+ xfs_buf_corruption_error(bp);
return -EFSCORRUPTED;
+ }
/*
* Look up the hash value in the leaf entries.
@@ -758,7 +818,7 @@ xfs_dir2_leafn_lookup_for_entry(
/*
* Loop over leaf entries with the right hash value.
*/
- for (lep = &ents[index];
+ for (lep = &leafhdr.ents[index];
index < leafhdr.count && be32_to_cpu(lep->hashval) == args->hashval;
lep++, index++) {
/*
@@ -797,7 +857,7 @@ xfs_dir2_leafn_lookup_for_entry(
error = xfs_dir3_data_read(tp, dp,
xfs_dir2_db_to_da(args->geo,
newdb),
- -1, &curbp);
+ 0, &curbp);
if (error)
return error;
}
@@ -815,7 +875,7 @@ xfs_dir2_leafn_lookup_for_entry(
* EEXIST immediately. If it's the first case-insensitive
* match, store the block & inode number and continue looking.
*/
- cmp = mp->m_dirnameops->compname(args, dep->name, dep->namelen);
+ cmp = xfs_dir2_compname(args, dep->name, dep->namelen);
if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
/* If there is a CI match block, drop it */
if (args->cmpresult != XFS_CMP_DIFFERENT &&
@@ -823,7 +883,7 @@ xfs_dir2_leafn_lookup_for_entry(
xfs_trans_brelse(tp, state->extrablk.bp);
args->cmpresult = cmp;
args->inumber = be64_to_cpu(dep->inumber);
- args->filetype = dp->d_ops->data_get_ftype(dep);
+ args->filetype = xfs_dir2_data_get_ftype(mp, dep);
*indexp = index;
state->extravalid = 1;
state->extrablk.bp = curbp;
@@ -913,7 +973,7 @@ xfs_dir3_leafn_moveents(
if (start_d < dhdr->count) {
memmove(&dents[start_d + count], &dents[start_d],
(dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(args, bp_d, start_d + count,
+ xfs_dir3_leaf_log_ents(args, dhdr, bp_d, start_d + count,
count + dhdr->count - 1);
}
/*
@@ -935,7 +995,7 @@ xfs_dir3_leafn_moveents(
*/
memcpy(&dents[start_d], &sents[start_s],
count * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1);
+ xfs_dir3_leaf_log_ents(args, dhdr, bp_d, start_d, start_d + count - 1);
/*
* If there are source entries after the ones we copied,
@@ -944,7 +1004,8 @@ xfs_dir3_leafn_moveents(
if (start_s + count < shdr->count) {
memmove(&sents[start_s], &sents[start_s + count],
count * sizeof(xfs_dir2_leaf_entry_t));
- xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1);
+ xfs_dir3_leaf_log_ents(args, shdr, bp_s, start_s,
+ start_s + count - 1);
}
/*
@@ -973,10 +1034,10 @@ xfs_dir2_leafn_order(
struct xfs_dir3_icleaf_hdr hdr1;
struct xfs_dir3_icleaf_hdr hdr2;
- dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
- dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2);
- ents1 = dp->d_ops->leaf_ents_p(leaf1);
- ents2 = dp->d_ops->leaf_ents_p(leaf2);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr1, leaf1);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr2, leaf2);
+ ents1 = hdr1.ents;
+ ents2 = hdr2.ents;
if (hdr1.count > 0 && hdr2.count > 0 &&
(be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) ||
@@ -1026,10 +1087,10 @@ xfs_dir2_leafn_rebalance(
leaf1 = blk1->bp->b_addr;
leaf2 = blk2->bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1);
- dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2);
- ents1 = dp->d_ops->leaf_ents_p(leaf1);
- ents2 = dp->d_ops->leaf_ents_p(leaf2);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr1, leaf1);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr2, leaf2);
+ ents1 = hdr1.ents;
+ ents2 = hdr2.ents;
oldsum = hdr1.count + hdr2.count;
#if defined(DEBUG) || defined(XFS_WARN)
@@ -1075,8 +1136,8 @@ xfs_dir2_leafn_rebalance(
ASSERT(hdr1.stale + hdr2.stale == oldstale);
/* log the changes made when moving the entries */
- dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1);
- dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf1, &hdr1);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf2, &hdr2);
xfs_dir3_leaf_log_header(args, blk1->bp);
xfs_dir3_leaf_log_header(args, blk2->bp);
@@ -1122,19 +1183,17 @@ xfs_dir3_data_block_free(
int longest)
{
int logfree = 0;
- __be16 *bests;
struct xfs_dir3_icfree_hdr freehdr;
struct xfs_inode *dp = args->dp;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
- bests = dp->d_ops->free_bests_p(free);
+ xfs_dir2_free_hdr_from_disk(dp->i_mount, &freehdr, free);
if (hdr) {
/*
* Data block is not empty, just set the free entry to the new
* value.
*/
- bests[findex] = cpu_to_be16(longest);
- xfs_dir2_free_log_bests(args, fbp, findex, findex);
+ freehdr.bests[findex] = cpu_to_be16(longest);
+ xfs_dir2_free_log_bests(args, &freehdr, fbp, findex, findex);
return 0;
}
@@ -1150,18 +1209,18 @@ xfs_dir3_data_block_free(
int i; /* free entry index */
for (i = findex - 1; i >= 0; i--) {
- if (bests[i] != cpu_to_be16(NULLDATAOFF))
+ if (freehdr.bests[i] != cpu_to_be16(NULLDATAOFF))
break;
}
freehdr.nvalid = i + 1;
logfree = 0;
} else {
/* Not the last entry, just punch it out. */
- bests[findex] = cpu_to_be16(NULLDATAOFF);
+ freehdr.bests[findex] = cpu_to_be16(NULLDATAOFF);
logfree = 1;
}
- dp->d_ops->free_hdr_to_disk(free, &freehdr);
+ xfs_dir2_free_hdr_to_disk(dp->i_mount, free, &freehdr);
xfs_dir2_free_log_header(args, fbp);
/*
@@ -1186,7 +1245,7 @@ xfs_dir3_data_block_free(
/* Log the free entry that changed, unless we got rid of it. */
if (logfree)
- xfs_dir2_free_log_bests(args, fbp, findex, findex);
+ xfs_dir2_free_log_bests(args, &freehdr, fbp, findex, findex);
return 0;
}
@@ -1203,6 +1262,7 @@ xfs_dir2_leafn_remove(
xfs_da_state_blk_t *dblk, /* data block */
int *rval) /* resulting block needs join */
{
+ struct xfs_da_geometry *geo = args->geo;
xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_db_t db; /* data block number */
struct xfs_buf *dbp; /* data block buffer */
@@ -1217,27 +1277,25 @@ xfs_dir2_leafn_remove(
xfs_trans_t *tp; /* transaction pointer */
struct xfs_dir2_data_free *bf; /* bestfree table */
struct xfs_dir3_icleaf_hdr leafhdr;
- struct xfs_dir2_leaf_entry *ents;
trace_xfs_dir2_leafn_remove(args, index);
dp = args->dp;
tp = args->trans;
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
/*
* Point to the entry we're removing.
*/
- lep = &ents[index];
+ lep = &leafhdr.ents[index];
/*
* Extract the data block and offset from the entry.
*/
- db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));
+ db = xfs_dir2_dataptr_to_db(geo, be32_to_cpu(lep->address));
ASSERT(dblk->blkno == db);
- off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address));
+ off = xfs_dir2_dataptr_to_off(geo, be32_to_cpu(lep->address));
ASSERT(dblk->index == off);
/*
@@ -1245,11 +1303,11 @@ xfs_dir2_leafn_remove(
* Log the leaf block changes.
*/
leafhdr.stale++;
- dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, leaf, &leafhdr);
xfs_dir3_leaf_log_header(args, bp);
lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR);
- xfs_dir3_leaf_log_ents(args, bp, index, index);
+ xfs_dir3_leaf_log_ents(args, &leafhdr, bp, index, index);
/*
* Make the data entry free. Keep track of the longest freespace
@@ -1258,17 +1316,18 @@ xfs_dir2_leafn_remove(
dbp = dblk->bp;
hdr = dbp->b_addr;
dep = (xfs_dir2_data_entry_t *)((char *)hdr + off);
- bf = dp->d_ops->data_bestfree_p(hdr);
+ bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
longest = be16_to_cpu(bf[0].length);
needlog = needscan = 0;
xfs_dir2_data_make_free(args, dbp, off,
- dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);
+ xfs_dir2_data_entsize(dp->i_mount, dep->namelen), &needlog,
+ &needscan);
/*
* Rescan the data block freespaces for bestfree.
* Log the data block header if needed.
*/
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(args, dbp);
xfs_dir3_data_check(dp, dbp);
@@ -1287,9 +1346,8 @@ xfs_dir2_leafn_remove(
* Convert the data block number to a free block,
* read in the free block.
*/
- fdb = dp->d_ops->db_to_fdb(args->geo, db);
- error = xfs_dir2_free_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, fdb),
+ fdb = xfs_dir2_db_to_fdb(geo, db);
+ error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(geo, fdb),
&fbp);
if (error)
return error;
@@ -1297,23 +1355,22 @@ xfs_dir2_leafn_remove(
#ifdef DEBUG
{
struct xfs_dir3_icfree_hdr freehdr;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
- ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) *
- (fdb - xfs_dir2_byte_to_db(args->geo,
- XFS_DIR2_FREE_OFFSET)));
+
+ xfs_dir2_free_hdr_from_disk(dp->i_mount, &freehdr, free);
+ ASSERT(freehdr.firstdb == geo->free_max_bests *
+ (fdb - xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET)));
}
#endif
/*
* Calculate which entry we need to fix.
*/
- findex = dp->d_ops->db_to_fdindex(args->geo, db);
+ findex = xfs_dir2_db_to_fdindex(geo, db);
longest = be16_to_cpu(bf[0].length);
/*
* If the data block is now empty we can get rid of it
* (usually).
*/
- if (longest == args->geo->blksize -
- dp->d_ops->data_entry_offset) {
+ if (longest == geo->blksize - geo->data_entry_offset) {
/*
* Try to punch out the data block.
*/
@@ -1345,9 +1402,9 @@ xfs_dir2_leafn_remove(
* Return indication of whether this leaf block is empty enough
* to justify trying to join it with a neighbor.
*/
- *rval = (dp->d_ops->leaf_hdr_size +
- (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) <
- args->geo->magicpct;
+ *rval = (geo->leaf_hdr_size +
+ (uint)sizeof(leafhdr.ents) * (leafhdr.count - leafhdr.stale)) <
+ geo->magicpct;
return 0;
}
@@ -1446,12 +1503,12 @@ xfs_dir2_leafn_toosmall(
*/
blk = &state->path.blk[state->path.active - 1];
leaf = blk->bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &leafhdr, leaf);
+ ents = leafhdr.ents;
xfs_dir3_leaf_check(dp, blk->bp);
count = leafhdr.count - leafhdr.stale;
- bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]);
+ bytes = state->args->geo->leaf_hdr_size + count * sizeof(ents[0]);
if (bytes > (state->args->geo->blksize >> 1)) {
/*
* Blk over 50%, don't try to join.
@@ -1496,8 +1553,7 @@ xfs_dir2_leafn_toosmall(
/*
* Read the sibling leaf block.
*/
- error = xfs_dir3_leafn_read(state->args->trans, dp,
- blkno, -1, &bp);
+ error = xfs_dir3_leafn_read(state->args->trans, dp, blkno, &bp);
if (error)
return error;
@@ -1509,8 +1565,8 @@ xfs_dir2_leafn_toosmall(
(state->args->geo->blksize >> 2);
leaf = bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf);
- ents = dp->d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &hdr2, leaf);
+ ents = hdr2.ents;
count += hdr2.count - hdr2.stale;
bytes -= count * sizeof(ents[0]);
@@ -1572,10 +1628,10 @@ xfs_dir2_leafn_unbalance(
drop_leaf = drop_blk->bp->b_addr;
save_leaf = save_blk->bp->b_addr;
- dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf);
- dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf);
- sents = dp->d_ops->leaf_ents_p(save_leaf);
- dents = dp->d_ops->leaf_ents_p(drop_leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &savehdr, save_leaf);
+ xfs_dir2_leaf_hdr_from_disk(dp->i_mount, &drophdr, drop_leaf);
+ sents = savehdr.ents;
+ dents = drophdr.ents;
/*
* If there are any stale leaf entries, take this opportunity
@@ -1601,8 +1657,8 @@ xfs_dir2_leafn_unbalance(
save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval);
/* log the changes made when moving the entries */
- dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr);
- dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, save_leaf, &savehdr);
+ xfs_dir2_leaf_hdr_to_disk(dp->i_mount, drop_leaf, &drophdr);
xfs_dir3_leaf_log_header(args, save_blk->bp);
xfs_dir3_leaf_log_header(args, drop_blk->bp);
@@ -1611,113 +1667,143 @@ xfs_dir2_leafn_unbalance(
}
/*
- * Top-level node form directory addname routine.
+ * Add a new data block to the directory at the free space index that the caller
+ * has specified.
*/
-int /* error */
-xfs_dir2_node_addname(
- xfs_da_args_t *args) /* operation arguments */
+static int
+xfs_dir2_node_add_datablk(
+ struct xfs_da_args *args,
+ struct xfs_da_state_blk *fblk,
+ xfs_dir2_db_t *dbno,
+ struct xfs_buf **dbpp,
+ struct xfs_buf **fbpp,
+ struct xfs_dir3_icfree_hdr *hdr,
+ int *findex)
{
- xfs_da_state_blk_t *blk; /* leaf block for insert */
- int error; /* error return value */
- int rval; /* sub-return value */
- xfs_da_state_t *state; /* btree cursor */
+ struct xfs_inode *dp = args->dp;
+ struct xfs_trans *tp = args->trans;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_dir2_data_free *bf;
+ xfs_dir2_db_t fbno;
+ struct xfs_buf *fbp;
+ struct xfs_buf *dbp;
+ int error;
- trace_xfs_dir2_node_addname(args);
+ /* Not allowed to allocate, return failure. */
+ if (args->total == 0)
+ return -ENOSPC;
+
+ /* Allocate and initialize the new data block. */
+ error = xfs_dir2_grow_inode(args, XFS_DIR2_DATA_SPACE, dbno);
+ if (error)
+ return error;
+ error = xfs_dir3_data_init(args, *dbno, &dbp);
+ if (error)
+ return error;
/*
- * Allocate and initialize the state (btree cursor).
- */
- state = xfs_da_state_alloc();
- state->args = args;
- state->mp = args->dp->i_mount;
- /*
- * Look up the name. We're not supposed to find it, but
- * this gives us the insertion point.
+ * Get the freespace block corresponding to the data block
+ * that was just allocated.
*/
- error = xfs_da3_node_lookup_int(state, &rval);
+ fbno = xfs_dir2_db_to_fdb(args->geo, *dbno);
+ error = xfs_dir2_free_try_read(tp, dp,
+ xfs_dir2_db_to_da(args->geo, fbno), &fbp);
if (error)
- rval = error;
- if (rval != -ENOENT) {
- goto done;
- }
+ return error;
+
/*
- * Add the data entry to a data block.
- * Extravalid is set to a freeblock found by lookup.
+ * If there wasn't a freespace block, the read will
+ * return a NULL fbp. Allocate and initialize a new one.
*/
- rval = xfs_dir2_node_addname_int(args,
- state->extravalid ? &state->extrablk : NULL);
- if (rval) {
- goto done;
+ if (!fbp) {
+ error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fbno);
+ if (error)
+ return error;
+
+ if (XFS_IS_CORRUPT(mp,
+ xfs_dir2_db_to_fdb(args->geo, *dbno) !=
+ fbno)) {
+ xfs_alert(mp,
+"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld",
+ __func__, (unsigned long long)dp->i_ino,
+ (long long)xfs_dir2_db_to_fdb(args->geo, *dbno),
+ (long long)*dbno, (long long)fbno);
+ if (fblk) {
+ xfs_alert(mp,
+ " fblk "PTR_FMT" blkno %llu index %d magic 0x%x",
+ fblk, (unsigned long long)fblk->blkno,
+ fblk->index, fblk->magic);
+ } else {
+ xfs_alert(mp, " ... fblk is NULL");
+ }
+ return -EFSCORRUPTED;
+ }
+
+ /* Get a buffer for the new block. */
+ error = xfs_dir3_free_get_buf(args, fbno, &fbp);
+ if (error)
+ return error;
+ xfs_dir2_free_hdr_from_disk(mp, hdr, fbp->b_addr);
+
+ /* Remember the first slot as our empty slot. */
+ hdr->firstdb = (fbno - xfs_dir2_byte_to_db(args->geo,
+ XFS_DIR2_FREE_OFFSET)) *
+ args->geo->free_max_bests;
+ } else {
+ xfs_dir2_free_hdr_from_disk(mp, hdr, fbp->b_addr);
}
- blk = &state->path.blk[state->path.active - 1];
- ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
+
+ /* Set the freespace block index from the data block number. */
+ *findex = xfs_dir2_db_to_fdindex(args->geo, *dbno);
+
+ /* Extend the freespace table if the new data block is off the end. */
+ if (*findex >= hdr->nvalid) {
+ ASSERT(*findex < args->geo->free_max_bests);
+ hdr->nvalid = *findex + 1;
+ hdr->bests[*findex] = cpu_to_be16(NULLDATAOFF);
+ }
+
/*
- * Add the new leaf entry.
+ * If this entry was for an empty data block (this should always be
+ * true) then update the header.
*/
- rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
- if (rval == 0) {
- /*
- * It worked, fix the hash values up the btree.
- */
- if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
- xfs_da3_fixhashpath(state, &state->path);
- } else {
- /*
- * It didn't work, we need to split the leaf block.
- */
- if (args->total == 0) {
- ASSERT(rval == -ENOSPC);
- goto done;
- }
- /*
- * Split the leaf block and insert the new entry.
- */
- rval = xfs_da3_split(state);
+ if (hdr->bests[*findex] == cpu_to_be16(NULLDATAOFF)) {
+ hdr->nused++;
+ xfs_dir2_free_hdr_to_disk(mp, fbp->b_addr, hdr);
+ xfs_dir2_free_log_header(args, fbp);
}
-done:
- xfs_da_state_free(state);
- return rval;
+
+ /* Update the freespace value for the new block in the table. */
+ bf = xfs_dir2_data_bestfree_p(mp, dbp->b_addr);
+ hdr->bests[*findex] = bf[0].length;
+
+ *dbpp = dbp;
+ *fbpp = fbp;
+ return 0;
}
-/*
- * Add the data entry for a node-format directory name addition.
- * The leaf entry is added in xfs_dir2_leafn_add.
- * We may enter with a freespace block that the lookup found.
- */
-static int /* error */
-xfs_dir2_node_addname_int(
- xfs_da_args_t *args, /* operation arguments */
- xfs_da_state_blk_t *fblk) /* optional freespace block */
+static int
+xfs_dir2_node_find_freeblk(
+ struct xfs_da_args *args,
+ struct xfs_da_state_blk *fblk,
+ xfs_dir2_db_t *dbnop,
+ struct xfs_buf **fbpp,
+ struct xfs_dir3_icfree_hdr *hdr,
+ int *findexp,
+ int length)
{
- xfs_dir2_data_hdr_t *hdr; /* data block header */
- xfs_dir2_db_t dbno; /* data block number */
- struct xfs_buf *dbp; /* data block buffer */
- xfs_dir2_data_entry_t *dep; /* data entry pointer */
- xfs_inode_t *dp; /* incore directory inode */
- xfs_dir2_data_unused_t *dup; /* data unused entry pointer */
- int error; /* error return value */
- xfs_dir2_db_t fbno; /* freespace block number */
- struct xfs_buf *fbp; /* freespace buffer */
- int findex; /* freespace entry index */
- xfs_dir2_free_t *free=NULL; /* freespace block structure */
- xfs_dir2_db_t ifbno; /* initial freespace block no */
- xfs_dir2_db_t lastfbno=0; /* highest freespace block no */
- int length; /* length of the new entry */
- int logfree; /* need to log free entry */
- xfs_mount_t *mp; /* filesystem mount point */
- int needlog; /* need to log data header */
- int needscan; /* need to rescan data frees */
- __be16 *tagp; /* data entry tag pointer */
- xfs_trans_t *tp; /* transaction pointer */
- __be16 *bests;
- struct xfs_dir3_icfree_hdr freehdr;
- struct xfs_dir2_data_free *bf;
- xfs_dir2_data_aoff_t aoff;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_trans *tp = args->trans;
+ struct xfs_buf *fbp = NULL;
+ xfs_dir2_db_t firstfbno;
+ xfs_dir2_db_t lastfbno;
+ xfs_dir2_db_t ifbno = -1;
+ xfs_dir2_db_t dbno = -1;
+ xfs_dir2_db_t fbno;
+ xfs_fileoff_t fo;
+ int findex = 0;
+ int error;
- dp = args->dp;
- mp = dp->i_mount;
- tp = args->trans;
- length = dp->d_ops->data_entsize(args->namelen);
/*
* If we came in with a freespace block that means that lookup
* found an entry with our hash value. This is the freespace
@@ -1725,288 +1811,150 @@ xfs_dir2_node_addname_int(
*/
if (fblk) {
fbp = fblk->bp;
- /*
- * Remember initial freespace block number.
- */
- ifbno = fblk->blkno;
- free = fbp->b_addr;
findex = fblk->index;
- bests = dp->d_ops->free_bests_p(free);
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
- /*
- * This means the free entry showed that the data block had
- * space for our entry, so we remembered it.
- * Use that data block.
- */
+ xfs_dir2_free_hdr_from_disk(dp->i_mount, hdr, fbp->b_addr);
if (findex >= 0) {
- ASSERT(findex < freehdr.nvalid);
- ASSERT(be16_to_cpu(bests[findex]) != NULLDATAOFF);
- ASSERT(be16_to_cpu(bests[findex]) >= length);
- dbno = freehdr.firstdb + findex;
- } else {
- /*
- * The data block looked at didn't have enough room.
- * We'll start at the beginning of the freespace entries.
- */
- dbno = -1;
- findex = 0;
+ /* caller already found the freespace for us. */
+ ASSERT(findex < hdr->nvalid);
+ ASSERT(be16_to_cpu(hdr->bests[findex]) != NULLDATAOFF);
+ ASSERT(be16_to_cpu(hdr->bests[findex]) >= length);
+ dbno = hdr->firstdb + findex;
+ goto found_block;
}
- } else {
+
/*
- * Didn't come in with a freespace block, so no data block.
+ * The data block looked at didn't have enough room.
+ * We'll start at the beginning of the freespace entries.
*/
- ifbno = dbno = -1;
+ ifbno = fblk->blkno;
+ xfs_trans_brelse(tp, fbp);
fbp = NULL;
- findex = 0;
+ fblk->bp = NULL;
}
/*
- * If we don't have a data block yet, we're going to scan the
- * freespace blocks looking for one. Figure out what the
- * highest freespace block number is.
- */
- if (dbno == -1) {
- xfs_fileoff_t fo; /* freespace block number */
-
- if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))
- return error;
- lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
- fbno = ifbno;
- }
- /*
- * While we haven't identified a data block, search the freeblock
- * data for a good data block. If we find a null freeblock entry,
- * indicating a hole in the data blocks, remember that.
- */
- while (dbno == -1) {
- /*
- * If we don't have a freeblock in hand, get the next one.
- */
- if (fbp == NULL) {
- /*
- * Happens the first time through unless lookup gave
- * us a freespace block to start with.
- */
- if (++fbno == 0)
- fbno = xfs_dir2_byte_to_db(args->geo,
- XFS_DIR2_FREE_OFFSET);
- /*
- * If it's ifbno we already looked at it.
- */
- if (fbno == ifbno)
- fbno++;
- /*
- * If it's off the end we're done.
- */
- if (fbno >= lastfbno)
- break;
- /*
- * Read the block. There can be holes in the
- * freespace blocks, so this might not succeed.
- * This should be really rare, so there's no reason
- * to avoid it.
- */
- error = xfs_dir2_free_try_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, fbno),
- &fbp);
- if (error)
- return error;
- if (!fbp)
- continue;
- free = fbp->b_addr;
- findex = 0;
- }
- /*
- * Look at the current free entry. Is it good enough?
- *
- * The bests initialisation should be where the bufer is read in
- * the above branch. But gcc is too stupid to realise that bests
- * and the freehdr are actually initialised if they are placed
- * there, so we have to do it here to avoid warnings. Blech.
- */
- bests = dp->d_ops->free_bests_p(free);
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
- if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
- be16_to_cpu(bests[findex]) >= length)
- dbno = freehdr.firstdb + findex;
- else {
- /*
- * Are we done with the freeblock?
- */
- if (++findex == freehdr.nvalid) {
- /*
- * Drop the block.
- */
- xfs_trans_brelse(tp, fbp);
- fbp = NULL;
- if (fblk && fblk->bp)
- fblk->bp = NULL;
- }
- }
- }
- /*
- * If we don't have a data block, we need to allocate one and make
- * the freespace entries refer to it.
+ * If we don't have a data block yet, we're going to scan the freespace
+ * data for a data block with enough free space in it.
*/
- if (unlikely(dbno == -1)) {
- /*
- * Not allowed to allocate, return failure.
- */
- if ((args->op_flags & XFS_DA_OP_JUSTCHECK) || args->total == 0)
- return -ENOSPC;
-
- /*
- * Allocate and initialize the new data block.
- */
- if (unlikely((error = xfs_dir2_grow_inode(args,
- XFS_DIR2_DATA_SPACE,
- &dbno)) ||
- (error = xfs_dir3_data_init(args, dbno, &dbp))))
- return error;
+ error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK);
+ if (error)
+ return error;
+ lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);
+ firstfbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
- /*
- * If (somehow) we have a freespace block, get rid of it.
- */
- if (fbp)
- xfs_trans_brelse(tp, fbp);
- if (fblk && fblk->bp)
- fblk->bp = NULL;
+ for (fbno = lastfbno - 1; fbno >= firstfbno; fbno--) {
+ /* If it's ifbno we already looked at it. */
+ if (fbno == ifbno)
+ continue;
/*
- * Get the freespace block corresponding to the data block
- * that was just allocated.
+ * Read the block. There can be holes in the freespace blocks,
+ * so this might not succeed. This should be really rare, so
+ * there's no reason to avoid it.
*/
- fbno = dp->d_ops->db_to_fdb(args->geo, dbno);
error = xfs_dir2_free_try_read(tp, dp,
- xfs_dir2_db_to_da(args->geo, fbno),
- &fbp);
+ xfs_dir2_db_to_da(args->geo, fbno),
+ &fbp);
if (error)
return error;
+ if (!fbp)
+ continue;
- /*
- * If there wasn't a freespace block, the read will
- * return a NULL fbp. Allocate and initialize a new one.
- */
- if (!fbp) {
- error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE,
- &fbno);
- if (error)
- return error;
+ xfs_dir2_free_hdr_from_disk(dp->i_mount, hdr, fbp->b_addr);
- if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) {
- xfs_alert(mp,
-"%s: dir ino %llu needed freesp block %lld for data block %lld, got %lld ifbno %llu lastfbno %d",
- __func__, (unsigned long long)dp->i_ino,
- (long long)dp->d_ops->db_to_fdb(
- args->geo, dbno),
- (long long)dbno, (long long)fbno,
- (unsigned long long)ifbno, lastfbno);
- if (fblk) {
- xfs_alert(mp,
- " fblk "PTR_FMT" blkno %llu index %d magic 0x%x",
- fblk,
- (unsigned long long)fblk->blkno,
- fblk->index,
- fblk->magic);
- } else {
- xfs_alert(mp, " ... fblk is NULL");
- }
- XFS_ERROR_REPORT("xfs_dir2_node_addname_int",
- XFS_ERRLEVEL_LOW, mp);
- return -EFSCORRUPTED;
+ /* Scan the free entry array for a large enough free space. */
+ for (findex = hdr->nvalid - 1; findex >= 0; findex--) {
+ if (be16_to_cpu(hdr->bests[findex]) != NULLDATAOFF &&
+ be16_to_cpu(hdr->bests[findex]) >= length) {
+ dbno = hdr->firstdb + findex;
+ goto found_block;
}
-
- /*
- * Get a buffer for the new block.
- */
- error = xfs_dir3_free_get_buf(args, fbno, &fbp);
- if (error)
- return error;
- free = fbp->b_addr;
- bests = dp->d_ops->free_bests_p(free);
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
-
- /*
- * Remember the first slot as our empty slot.
- */
- freehdr.firstdb =
- (fbno - xfs_dir2_byte_to_db(args->geo,
- XFS_DIR2_FREE_OFFSET)) *
- dp->d_ops->free_max_bests(args->geo);
- } else {
- free = fbp->b_addr;
- bests = dp->d_ops->free_bests_p(free);
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
}
- /*
- * Set the freespace block index from the data block number.
- */
- findex = dp->d_ops->db_to_fdindex(args->geo, dbno);
- /*
- * If it's after the end of the current entries in the
- * freespace block, extend that table.
- */
- if (findex >= freehdr.nvalid) {
- ASSERT(findex < dp->d_ops->free_max_bests(args->geo));
- freehdr.nvalid = findex + 1;
- /*
- * Tag new entry so nused will go up.
- */
- bests[findex] = cpu_to_be16(NULLDATAOFF);
- }
- /*
- * If this entry was for an empty data block
- * (this should always be true) then update the header.
- */
- if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {
- freehdr.nused++;
- dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr);
- xfs_dir2_free_log_header(args, fbp);
- }
- /*
- * Update the real value in the table.
- * We haven't allocated the data entry yet so this will
- * change again.
- */
- hdr = dbp->b_addr;
- bf = dp->d_ops->data_bestfree_p(hdr);
- bests[findex] = bf[0].length;
- logfree = 1;
+ /* Didn't find free space, go on to next free block */
+ xfs_trans_brelse(tp, fbp);
}
+
+found_block:
+ *dbnop = dbno;
+ *fbpp = fbp;
+ *findexp = findex;
+ return 0;
+}
+
+/*
+ * Add the data entry for a node-format directory name addition.
+ * The leaf entry is added in xfs_dir2_leafn_add.
+ * We may enter with a freespace block that the lookup found.
+ */
+static int
+xfs_dir2_node_addname_int(
+ struct xfs_da_args *args, /* operation arguments */
+ struct xfs_da_state_blk *fblk) /* optional freespace block */
+{
+ struct xfs_dir2_data_unused *dup; /* data unused entry pointer */
+ struct xfs_dir2_data_entry *dep; /* data entry pointer */
+ struct xfs_dir2_data_hdr *hdr; /* data block header */
+ struct xfs_dir2_data_free *bf;
+ struct xfs_trans *tp = args->trans;
+ struct xfs_inode *dp = args->dp;
+ struct xfs_dir3_icfree_hdr freehdr;
+ struct xfs_buf *dbp; /* data block buffer */
+ struct xfs_buf *fbp; /* freespace buffer */
+ xfs_dir2_data_aoff_t aoff;
+ xfs_dir2_db_t dbno; /* data block number */
+ int error; /* error return value */
+ int findex; /* freespace entry index */
+ int length; /* length of the new entry */
+ int logfree = 0; /* need to log free entry */
+ int needlog = 0; /* need to log data header */
+ int needscan = 0; /* need to rescan data frees */
+ __be16 *tagp; /* data entry tag pointer */
+
+ length = xfs_dir2_data_entsize(dp->i_mount, args->namelen);
+ error = xfs_dir2_node_find_freeblk(args, fblk, &dbno, &fbp, &freehdr,
+ &findex, length);
+ if (error)
+ return error;
+
/*
- * We had a data block so we don't have to make a new one.
+ * Now we know if we must allocate blocks, so if we are checking whether
+ * we can insert without allocation then we can return now.
*/
- else {
- /*
- * If just checking, we succeeded.
- */
- if (args->op_flags & XFS_DA_OP_JUSTCHECK)
- return 0;
+ if (args->op_flags & XFS_DA_OP_JUSTCHECK) {
+ if (dbno == -1)
+ return -ENOSPC;
+ return 0;
+ }
- /*
- * Read the data block in.
- */
+ /*
+ * If we don't have a data block, we need to allocate one and make
+ * the freespace entries refer to it.
+ */
+ if (dbno == -1) {
+ /* we're going to have to log the free block index later */
+ logfree = 1;
+ error = xfs_dir2_node_add_datablk(args, fblk, &dbno, &dbp, &fbp,
+ &freehdr, &findex);
+ } else {
+ /* Read the data block in. */
error = xfs_dir3_data_read(tp, dp,
xfs_dir2_db_to_da(args->geo, dbno),
- -1, &dbp);
- if (error)
- return error;
- hdr = dbp->b_addr;
- bf = dp->d_ops->data_bestfree_p(hdr);
- logfree = 0;
+ 0, &dbp);
}
+ if (error)
+ return error;
+
+ /* setup for data block up now */
+ hdr = dbp->b_addr;
+ bf = xfs_dir2_data_bestfree_p(dp->i_mount, hdr);
ASSERT(be16_to_cpu(bf[0].length) >= length);
- /*
- * Point to the existing unused space.
- */
+
+ /* Point to the existing unused space. */
dup = (xfs_dir2_data_unused_t *)
((char *)hdr + be16_to_cpu(bf[0].offset));
- needscan = needlog = 0;
- /*
- * Mark the first part of the unused space, inuse for us.
- */
+
+ /* Mark the first part of the unused space, inuse for us. */
aoff = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr);
error = xfs_dir2_data_use_free(args, dbp, dup, aoff, length,
&needlog, &needscan);
@@ -2014,46 +1962,106 @@ xfs_dir2_node_addname_int(
xfs_trans_brelse(tp, dbp);
return error;
}
- /*
- * Fill in the new entry and log it.
- */
+
+ /* Fill in the new entry and log it. */
dep = (xfs_dir2_data_entry_t *)dup;
dep->inumber = cpu_to_be64(args->inumber);
dep->namelen = args->namelen;
memcpy(dep->name, args->name, dep->namelen);
- dp->d_ops->data_put_ftype(dep, args->filetype);
- tagp = dp->d_ops->data_entry_tag_p(dep);
+ xfs_dir2_data_put_ftype(dp->i_mount, dep, args->filetype);
+ tagp = xfs_dir2_data_entry_tag_p(dp->i_mount, dep);
*tagp = cpu_to_be16((char *)dep - (char *)hdr);
xfs_dir2_data_log_entry(args, dbp, dep);
- /*
- * Rescan the block for bestfree if needed.
- */
+
+ /* Rescan the freespace and log the data block if needed. */
if (needscan)
- xfs_dir2_data_freescan(dp, hdr, &needlog);
- /*
- * Log the data block header if needed.
- */
+ xfs_dir2_data_freescan(dp->i_mount, hdr, &needlog);
if (needlog)
xfs_dir2_data_log_header(args, dbp);
+
+ /* If the freespace block entry is now wrong, update it. */
+ if (freehdr.bests[findex] != bf[0].length) {
+ freehdr.bests[findex] = bf[0].length;
+ logfree = 1;
+ }
+
+ /* Log the freespace entry if needed. */
+ if (logfree)
+ xfs_dir2_free_log_bests(args, &freehdr, fbp, findex, findex);
+
+ /* Return the data block and offset in args. */
+ args->blkno = (xfs_dablk_t)dbno;
+ args->index = be16_to_cpu(*tagp);
+ return 0;
+}
+
+/*
+ * Top-level node form directory addname routine.
+ */
+int /* error */
+xfs_dir2_node_addname(
+ xfs_da_args_t *args) /* operation arguments */
+{
+ xfs_da_state_blk_t *blk; /* leaf block for insert */
+ int error; /* error return value */
+ int rval; /* sub-return value */
+ xfs_da_state_t *state; /* btree cursor */
+
+ trace_xfs_dir2_node_addname(args);
+
/*
- * If the freespace entry is now wrong, update it.
+ * Allocate and initialize the state (btree cursor).
*/
- bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */
- if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) {
- bests[findex] = bf[0].length;
- logfree = 1;
+ state = xfs_da_state_alloc();
+ state->args = args;
+ state->mp = args->dp->i_mount;
+ /*
+ * Look up the name. We're not supposed to find it, but
+ * this gives us the insertion point.
+ */
+ error = xfs_da3_node_lookup_int(state, &rval);
+ if (error)
+ rval = error;
+ if (rval != -ENOENT) {
+ goto done;
}
/*
- * Log the freespace entry if needed.
+ * Add the data entry to a data block.
+ * Extravalid is set to a freeblock found by lookup.
*/
- if (logfree)
- xfs_dir2_free_log_bests(args, fbp, findex, findex);
+ rval = xfs_dir2_node_addname_int(args,
+ state->extravalid ? &state->extrablk : NULL);
+ if (rval) {
+ goto done;
+ }
+ blk = &state->path.blk[state->path.active - 1];
+ ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
/*
- * Return the data block and offset in args, then drop the data block.
+ * Add the new leaf entry.
*/
- args->blkno = (xfs_dablk_t)dbno;
- args->index = be16_to_cpu(*tagp);
- return 0;
+ rval = xfs_dir2_leafn_add(blk->bp, args, blk->index);
+ if (rval == 0) {
+ /*
+ * It worked, fix the hash values up the btree.
+ */
+ if (!(args->op_flags & XFS_DA_OP_JUSTCHECK))
+ xfs_da3_fixhashpath(state, &state->path);
+ } else {
+ /*
+ * It didn't work, we need to split the leaf block.
+ */
+ if (args->total == 0) {
+ ASSERT(rval == -ENOSPC);
+ goto done;
+ }
+ /*
+ * Split the leaf block and insert the new entry.
+ */
+ rval = xfs_da3_split(state);
+ }
+done:
+ xfs_da_state_free(state);
+ return rval;
}
/*
@@ -2187,8 +2195,6 @@ xfs_dir2_node_replace(
int i; /* btree level */
xfs_ino_t inum; /* new inode number */
int ftype; /* new file type */
- xfs_dir2_leaf_t *leaf; /* leaf structure */
- xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */
int rval; /* internal return value */
xfs_da_state_t *state; /* btree cursor */
@@ -2220,16 +2226,17 @@ xfs_dir2_node_replace(
* and locked it. But paranoia is good.
*/
if (rval == -EEXIST) {
- struct xfs_dir2_leaf_entry *ents;
+ struct xfs_dir3_icleaf_hdr leafhdr;
+
/*
* Find the leaf entry.
*/
blk = &state->path.blk[state->path.active - 1];
ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);
- leaf = blk->bp->b_addr;
- ents = args->dp->d_ops->leaf_ents_p(leaf);
- lep = &ents[blk->index];
ASSERT(state->extravalid);
+
+ xfs_dir2_leaf_hdr_from_disk(state->mp, &leafhdr,
+ blk->bp->b_addr);
/*
* Point to the data entry.
*/
@@ -2239,13 +2246,13 @@ xfs_dir2_node_replace(
dep = (xfs_dir2_data_entry_t *)
((char *)hdr +
xfs_dir2_dataptr_to_off(args->geo,
- be32_to_cpu(lep->address)));
+ be32_to_cpu(leafhdr.ents[blk->index].address)));
ASSERT(inum != be64_to_cpu(dep->inumber));
/*
* Fill in the new inode number and log the entry.
*/
dep->inumber = cpu_to_be64(inum);
- args->dp->d_ops->data_put_ftype(dep, ftype);
+ xfs_dir2_data_put_ftype(state->mp, dep, ftype);
xfs_dir2_data_log_entry(args, state->extrablk.bp, dep);
rval = 0;
}
@@ -2302,7 +2309,7 @@ xfs_dir2_node_trim_free(
if (!bp)
return 0;
free = bp->b_addr;
- dp->d_ops->free_hdr_from_disk(&freehdr, free);
+ xfs_dir2_free_hdr_from_disk(dp->i_mount, &freehdr, free);
/*
* If there are used entries, there's nothing to do.
diff --git a/fs/xfs/libxfs/xfs_dir2_priv.h b/fs/xfs/libxfs/xfs_dir2_priv.h
index 59f9fb2241a5..01ee0b926572 100644
--- a/fs/xfs/libxfs/xfs_dir2_priv.h
+++ b/fs/xfs/libxfs/xfs_dir2_priv.h
@@ -8,7 +8,41 @@
struct dir_context;
+/*
+ * In-core version of the leaf and free block headers to abstract the
+ * differences in the v2 and v3 disk format of the headers.
+ */
+struct xfs_dir3_icleaf_hdr {
+ uint32_t forw;
+ uint32_t back;
+ uint16_t magic;
+ uint16_t count;
+ uint16_t stale;
+
+ /*
+ * Pointer to the on-disk format entries, which are behind the
+ * variable size (v4 vs v5) header in the on-disk block.
+ */
+ struct xfs_dir2_leaf_entry *ents;
+};
+
+struct xfs_dir3_icfree_hdr {
+ uint32_t magic;
+ uint32_t firstdb;
+ uint32_t nvalid;
+ uint32_t nused;
+
+ /*
+ * Pointer to the on-disk format entries, which are behind the
+ * variable size (v4 vs v5) header in the on-disk block.
+ */
+ __be16 *bests;
+};
+
/* xfs_dir2.c */
+xfs_dahash_t xfs_ascii_ci_hashname(struct xfs_name *name);
+enum xfs_dacmp xfs_ascii_ci_compname(struct xfs_da_args *args,
+ const unsigned char *name, int len);
extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space,
xfs_dir2_db_t *dbp);
extern int xfs_dir_cilookup_result(struct xfs_da_args *args,
@@ -26,6 +60,15 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,
struct xfs_buf *lbp, struct xfs_buf *dbp);
/* xfs_dir2_data.c */
+struct xfs_dir2_data_free *xfs_dir2_data_bestfree_p(struct xfs_mount *mp,
+ struct xfs_dir2_data_hdr *hdr);
+__be16 *xfs_dir2_data_entry_tag_p(struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep);
+uint8_t xfs_dir2_data_get_ftype(struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep);
+void xfs_dir2_data_put_ftype(struct xfs_mount *mp,
+ struct xfs_dir2_data_entry *dep, uint8_t ftype);
+
#ifdef DEBUG
extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
#else
@@ -34,10 +77,10 @@ extern void xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);
extern xfs_failaddr_t __xfs_dir3_data_check(struct xfs_inode *dp,
struct xfs_buf *bp);
-extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp);
-extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
- xfs_daddr_t mapped_bno);
+int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t bno, unsigned int flags, struct xfs_buf **bpp);
+int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno,
+ unsigned int flags);
extern struct xfs_dir2_data_free *
xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr,
@@ -47,10 +90,14 @@ extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,
struct xfs_buf **bpp);
/* xfs_dir2_leaf.c */
-extern int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
-extern int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
- xfs_dablk_t fbno, xfs_daddr_t mappedbno, struct xfs_buf **bpp);
+void xfs_dir2_leaf_hdr_from_disk(struct xfs_mount *mp,
+ struct xfs_dir3_icleaf_hdr *to, struct xfs_dir2_leaf *from);
+void xfs_dir2_leaf_hdr_to_disk(struct xfs_mount *mp, struct xfs_dir2_leaf *to,
+ struct xfs_dir3_icleaf_hdr *from);
+int xfs_dir3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t fbno, struct xfs_buf **bpp);
+int xfs_dir3_leafn_read(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dablk_t fbno, struct xfs_buf **bpp);
extern int xfs_dir2_block_to_leaf(struct xfs_da_args *args,
struct xfs_buf *dbp);
extern int xfs_dir2_leaf_addname(struct xfs_da_args *args);
@@ -62,7 +109,8 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,
extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,
struct xfs_buf **bpp, uint16_t magic);
extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args,
- struct xfs_buf *bp, int first, int last);
+ struct xfs_dir3_icleaf_hdr *hdr, struct xfs_buf *bp, int first,
+ int last);
extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,
struct xfs_buf *bp);
extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);
@@ -79,10 +127,11 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,
extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state);
extern xfs_failaddr_t xfs_dir3_leaf_check_int(struct xfs_mount *mp,
- struct xfs_inode *dp, struct xfs_dir3_icleaf_hdr *hdr,
- struct xfs_dir2_leaf *leaf);
+ struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);
/* xfs_dir2_node.c */
+void xfs_dir2_free_hdr_from_disk(struct xfs_mount *mp,
+ struct xfs_dir3_icfree_hdr *to, struct xfs_dir2_free *from);
extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,
struct xfs_buf *lbp);
extern xfs_dahash_t xfs_dir2_leaf_lasthash(struct xfs_inode *dp,
@@ -108,6 +157,14 @@ extern int xfs_dir2_free_read(struct xfs_trans *tp, struct xfs_inode *dp,
xfs_dablk_t fbno, struct xfs_buf **bpp);
/* xfs_dir2_sf.c */
+xfs_ino_t xfs_dir2_sf_get_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep);
+xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *hdr);
+void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *hdr, xfs_ino_t ino);
+uint8_t xfs_dir2_sf_get_ftype(struct xfs_mount *mp,
+ struct xfs_dir2_sf_entry *sfep);
+struct xfs_dir2_sf_entry *xfs_dir2_sf_nextentry(struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep);
extern int xfs_dir2_block_sfsize(struct xfs_inode *dp,
struct xfs_dir2_data_hdr *block, struct xfs_dir2_sf_hdr *sfhp);
extern int xfs_dir2_block_to_sf(struct xfs_da_args *args, struct xfs_buf *bp,
@@ -118,9 +175,33 @@ extern int xfs_dir2_sf_lookup(struct xfs_da_args *args);
extern int xfs_dir2_sf_removename(struct xfs_da_args *args);
extern int xfs_dir2_sf_replace(struct xfs_da_args *args);
extern xfs_failaddr_t xfs_dir2_sf_verify(struct xfs_inode *ip);
+int xfs_dir2_sf_entsize(struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr, int len);
+void xfs_dir2_sf_put_ino(struct xfs_mount *mp, struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep, xfs_ino_t ino);
+void xfs_dir2_sf_put_ftype(struct xfs_mount *mp,
+ struct xfs_dir2_sf_entry *sfep, uint8_t ftype);
/* xfs_dir2_readdir.c */
extern int xfs_readdir(struct xfs_trans *tp, struct xfs_inode *dp,
struct dir_context *ctx, size_t bufsize);
+static inline unsigned int
+xfs_dir2_data_entsize(
+ struct xfs_mount *mp,
+ unsigned int namelen)
+{
+ unsigned int len;
+
+ len = offsetof(struct xfs_dir2_data_entry, name[0]) + namelen +
+ sizeof(xfs_dir2_data_off_t) /* tag */;
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ len += sizeof(uint8_t);
+ return round_up(len, XFS_DIR2_DATA_ALIGN);
+}
+
+xfs_dahash_t xfs_dir2_hashname(struct xfs_mount *mp, struct xfs_name *name);
+enum xfs_dacmp xfs_dir2_compname(struct xfs_da_args *args,
+ const unsigned char *name, int len);
+
#endif /* __XFS_DIR2_PRIV_H__ */
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 033589257f54..7b7f6fb2ea3b 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -37,6 +37,126 @@ static void xfs_dir2_sf_check(xfs_da_args_t *args);
static void xfs_dir2_sf_toino4(xfs_da_args_t *args);
static void xfs_dir2_sf_toino8(xfs_da_args_t *args);
+int
+xfs_dir2_sf_entsize(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ int len)
+{
+ int count = len;
+
+ count += sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */
+ count += hdr->i8count ? XFS_INO64_SIZE : XFS_INO32_SIZE; /* ino # */
+
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ count += sizeof(uint8_t);
+ return count;
+}
+
+struct xfs_dir2_sf_entry *
+xfs_dir2_sf_nextentry(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ return (void *)sfep + xfs_dir2_sf_entsize(mp, hdr, sfep->namelen);
+}
+
+/*
+ * In short-form directory entries the inode numbers are stored at variable
+ * offset behind the entry name. If the entry stores a filetype value, then it
+ * sits between the name and the inode number. The actual inode numbers can
+ * come in two formats as well, either 4 bytes or 8 bytes wide.
+ */
+xfs_ino_t
+xfs_dir2_sf_get_ino(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ uint8_t *from = sfep->name + sfep->namelen;
+
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ from++;
+
+ if (!hdr->i8count)
+ return get_unaligned_be32(from);
+ return get_unaligned_be64(from) & XFS_MAXINUMBER;
+}
+
+void
+xfs_dir2_sf_put_ino(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_hdr *hdr,
+ struct xfs_dir2_sf_entry *sfep,
+ xfs_ino_t ino)
+{
+ uint8_t *to = sfep->name + sfep->namelen;
+
+ ASSERT(ino <= XFS_MAXINUMBER);
+
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ to++;
+
+ if (hdr->i8count)
+ put_unaligned_be64(ino, to);
+ else
+ put_unaligned_be32(ino, to);
+}
+
+xfs_ino_t
+xfs_dir2_sf_get_parent_ino(
+ struct xfs_dir2_sf_hdr *hdr)
+{
+ if (!hdr->i8count)
+ return get_unaligned_be32(hdr->parent);
+ return get_unaligned_be64(hdr->parent) & XFS_MAXINUMBER;
+}
+
+void
+xfs_dir2_sf_put_parent_ino(
+ struct xfs_dir2_sf_hdr *hdr,
+ xfs_ino_t ino)
+{
+ ASSERT(ino <= XFS_MAXINUMBER);
+
+ if (hdr->i8count)
+ put_unaligned_be64(ino, hdr->parent);
+ else
+ put_unaligned_be32(ino, hdr->parent);
+}
+
+/*
+ * The file type field is stored at the end of the name for filetype enabled
+ * shortform directories, or not at all otherwise.
+ */
+uint8_t
+xfs_dir2_sf_get_ftype(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_entry *sfep)
+{
+ if (xfs_sb_version_hasftype(&mp->m_sb)) {
+ uint8_t ftype = sfep->name[sfep->namelen];
+
+ if (ftype < XFS_DIR3_FT_MAX)
+ return ftype;
+ }
+
+ return XFS_DIR3_FT_UNKNOWN;
+}
+
+void
+xfs_dir2_sf_put_ftype(
+ struct xfs_mount *mp,
+ struct xfs_dir2_sf_entry *sfep,
+ uint8_t ftype)
+{
+ ASSERT(ftype < XFS_DIR3_FT_MAX);
+
+ if (xfs_sb_version_hasftype(&mp->m_sb))
+ sfep->name[sfep->namelen] = ftype;
+}
+
/*
* Given a block directory (dp/block), calculate its size as a shortform (sf)
* directory and a header for the sf directory, if it will fit it the
@@ -125,7 +245,7 @@ xfs_dir2_block_sfsize(
*/
sfhp->count = count;
sfhp->i8count = i8count;
- dp->d_ops->sf_put_parent_ino(sfhp, parent);
+ xfs_dir2_sf_put_parent_ino(sfhp, parent);
return size;
}
@@ -135,64 +255,48 @@ xfs_dir2_block_sfsize(
*/
int /* error */
xfs_dir2_block_to_sf(
- xfs_da_args_t *args, /* operation arguments */
+ struct xfs_da_args *args, /* operation arguments */
struct xfs_buf *bp,
int size, /* shortform directory size */
- xfs_dir2_sf_hdr_t *sfhp) /* shortform directory hdr */
+ struct xfs_dir2_sf_hdr *sfhp) /* shortform directory hdr */
{
- xfs_dir2_data_hdr_t *hdr; /* block header */
- xfs_dir2_data_entry_t *dep; /* data entry pointer */
- xfs_inode_t *dp; /* incore directory inode */
- xfs_dir2_data_unused_t *dup; /* unused data pointer */
- char *endptr; /* end of data entries */
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
int error; /* error return value */
int logflags; /* inode logging flags */
- xfs_mount_t *mp; /* filesystem mount point */
- char *ptr; /* current data pointer */
- xfs_dir2_sf_entry_t *sfep; /* shortform entry */
- xfs_dir2_sf_hdr_t *sfp; /* shortform directory header */
- xfs_dir2_sf_hdr_t *dst; /* temporary data buffer */
+ struct xfs_dir2_sf_entry *sfep; /* shortform entry */
+ struct xfs_dir2_sf_hdr *sfp; /* shortform directory header */
+ unsigned int offset = args->geo->data_entry_offset;
+ unsigned int end;
trace_xfs_dir2_block_to_sf(args);
- dp = args->dp;
- mp = dp->i_mount;
-
- /*
- * allocate a temporary destination buffer the size of the inode
- * to format the data into. Once we have formatted the data, we
- * can free the block and copy the formatted data into the inode literal
- * area.
- */
- dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP);
- hdr = bp->b_addr;
-
/*
- * Copy the header into the newly allocate local space.
+ * Allocate a temporary destination buffer the size of the inode to
+ * format the data into. Once we have formatted the data, we can free
+ * the block and copy the formatted data into the inode literal area.
*/
- sfp = (xfs_dir2_sf_hdr_t *)dst;
+ sfp = kmem_alloc(mp->m_sb.sb_inodesize, 0);
memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
/*
- * Set up to loop over the block's entries.
+ * Loop over the active and unused entries. Stop when we reach the
+ * leaf/tail portion of the block.
*/
- ptr = (char *)dp->d_ops->data_entry_p(hdr);
- endptr = xfs_dir3_data_endp(args->geo, hdr);
+ end = xfs_dir3_data_end_offset(args->geo, bp->b_addr);
sfep = xfs_dir2_sf_firstentry(sfp);
- /*
- * Loop over the active and unused entries.
- * Stop when we reach the leaf/tail portion of the block.
- */
- while (ptr < endptr) {
+ while (offset < end) {
+ struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
+ struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
+
/*
* If it's unused, just skip over it.
*/
- dup = (xfs_dir2_data_unused_t *)ptr;
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
- ptr += be16_to_cpu(dup->length);
+ offset += be16_to_cpu(dup->length);
continue;
}
- dep = (xfs_dir2_data_entry_t *)ptr;
+
/*
* Skip .
*/
@@ -204,24 +308,22 @@ xfs_dir2_block_to_sf(
else if (dep->namelen == 2 &&
dep->name[0] == '.' && dep->name[1] == '.')
ASSERT(be64_to_cpu(dep->inumber) ==
- dp->d_ops->sf_get_parent_ino(sfp));
+ xfs_dir2_sf_get_parent_ino(sfp));
/*
* Normal entry, copy it into shortform.
*/
else {
sfep->namelen = dep->namelen;
- xfs_dir2_sf_put_offset(sfep,
- (xfs_dir2_data_aoff_t)
- ((char *)dep - (char *)hdr));
+ xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, dep->name, dep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep,
+ xfs_dir2_sf_put_ino(mp, sfp, sfep,
be64_to_cpu(dep->inumber));
- dp->d_ops->sf_put_ftype(sfep,
- dp->d_ops->data_get_ftype(dep));
+ xfs_dir2_sf_put_ftype(mp, sfep,
+ xfs_dir2_data_get_ftype(mp, dep));
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
}
- ptr += dp->d_ops->data_entsize(dep->namelen);
+ offset += xfs_dir2_data_entsize(mp, dep->namelen);
}
ASSERT((char *)sfep - (char *)sfp == size);
@@ -240,7 +342,7 @@ xfs_dir2_block_to_sf(
* Convert the inode to local format and copy the data in.
*/
ASSERT(dp->i_df.if_bytes == 0);
- xfs_init_local_fork(dp, XFS_DATA_FORK, dst, size);
+ xfs_init_local_fork(dp, XFS_DATA_FORK, sfp, size);
dp->i_d.di_format = XFS_DINODE_FMT_LOCAL;
dp->i_d.di_size = size;
@@ -248,7 +350,7 @@ xfs_dir2_block_to_sf(
xfs_dir2_sf_check(args);
out:
xfs_trans_log_inode(args->trans, dp, logflags);
- kmem_free(dst);
+ kmem_free(sfp);
return error;
}
@@ -277,13 +379,7 @@ xfs_dir2_sf_addname(
ASSERT(xfs_dir2_sf_lookup(args) == -ENOENT);
dp = args->dp;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
- /*
- * Make sure the shortform value has some of its header.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
+ ASSERT(dp->i_d.di_size >= offsetof(struct xfs_dir2_sf_hdr, parent));
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
@@ -291,7 +387,7 @@ xfs_dir2_sf_addname(
/*
* Compute entry (and change in) size.
*/
- incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);
+ incr_isize = xfs_dir2_sf_entsize(dp->i_mount, sfp, args->namelen);
objchange = 0;
/*
@@ -364,18 +460,17 @@ xfs_dir2_sf_addname_easy(
xfs_dir2_data_aoff_t offset, /* offset to use for new ent */
int new_isize) /* new directory size */
{
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
int byteoff; /* byte offset in sf dir */
- xfs_inode_t *dp; /* incore directory inode */
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
- dp = args->dp;
-
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
byteoff = (int)((char *)sfep - (char *)sfp);
/*
* Grow the in-inode space.
*/
- xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen),
+ xfs_idata_realloc(dp, xfs_dir2_sf_entsize(mp, sfp, args->namelen),
XFS_DATA_FORK);
/*
* Need to set up again due to realloc of the inode data.
@@ -388,8 +483,8 @@ xfs_dir2_sf_addname_easy(
sfep->namelen = args->namelen;
xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, args->name, sfep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
- dp->d_ops->sf_put_ftype(sfep, args->filetype);
+ xfs_dir2_sf_put_ino(mp, sfp, sfep, args->inumber);
+ xfs_dir2_sf_put_ftype(mp, sfep, args->filetype);
/*
* Update the header and inode.
@@ -416,9 +511,10 @@ xfs_dir2_sf_addname_hard(
int objchange, /* changing inode number size */
int new_isize) /* new directory size */
{
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
int add_datasize; /* data size need for new ent */
char *buf; /* buffer for old */
- xfs_inode_t *dp; /* incore directory inode */
int eof; /* reached end of old dir */
int nbytes; /* temp for byte copies */
xfs_dir2_data_aoff_t new_offset; /* next offset value */
@@ -432,11 +528,9 @@ xfs_dir2_sf_addname_hard(
/*
* Copy the old directory to the stack buffer.
*/
- dp = args->dp;
-
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
old_isize = (int)dp->i_d.di_size;
- buf = kmem_alloc(old_isize, KM_SLEEP);
+ buf = kmem_alloc(old_isize, 0);
oldsfp = (xfs_dir2_sf_hdr_t *)buf;
memcpy(oldsfp, sfp, old_isize);
/*
@@ -444,13 +538,13 @@ xfs_dir2_sf_addname_hard(
* to insert the new entry.
* If it's going to end up at the end then oldsfep will point there.
*/
- for (offset = dp->d_ops->data_first_offset,
+ for (offset = args->geo->data_first_offset,
oldsfep = xfs_dir2_sf_firstentry(oldsfp),
- add_datasize = dp->d_ops->data_entsize(args->namelen),
+ add_datasize = xfs_dir2_data_entsize(mp, args->namelen),
eof = (char *)oldsfep == &buf[old_isize];
!eof;
- offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen),
- oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep),
+ offset = new_offset + xfs_dir2_data_entsize(mp, oldsfep->namelen),
+ oldsfep = xfs_dir2_sf_nextentry(mp, oldsfp, oldsfep),
eof = (char *)oldsfep == &buf[old_isize]) {
new_offset = xfs_dir2_sf_get_offset(oldsfep);
if (offset + add_datasize <= new_offset)
@@ -479,8 +573,8 @@ xfs_dir2_sf_addname_hard(
sfep->namelen = args->namelen;
xfs_dir2_sf_put_offset(sfep, offset);
memcpy(sfep->name, args->name, sfep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
- dp->d_ops->sf_put_ftype(sfep, args->filetype);
+ xfs_dir2_sf_put_ino(mp, sfp, sfep, args->inumber);
+ xfs_dir2_sf_put_ftype(mp, sfep, args->filetype);
sfp->count++;
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange)
sfp->i8count++;
@@ -488,7 +582,7 @@ xfs_dir2_sf_addname_hard(
* If there's more left to copy, do that.
*/
if (!eof) {
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
memcpy(sfep, oldsfep, old_isize - nbytes);
}
kmem_free(buf);
@@ -510,7 +604,8 @@ xfs_dir2_sf_addname_pick(
xfs_dir2_sf_entry_t **sfepp, /* out(1): new entry ptr */
xfs_dir2_data_aoff_t *offsetp) /* out(1): new offset */
{
- xfs_inode_t *dp; /* incore directory inode */
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
int holefit; /* found hole it will fit in */
int i; /* entry number */
xfs_dir2_data_aoff_t offset; /* data block offset */
@@ -519,11 +614,9 @@ xfs_dir2_sf_addname_pick(
int size; /* entry's data size */
int used; /* data bytes used */
- dp = args->dp;
-
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- size = dp->d_ops->data_entsize(args->namelen);
- offset = dp->d_ops->data_first_offset;
+ size = xfs_dir2_data_entsize(mp, args->namelen);
+ offset = args->geo->data_first_offset;
sfep = xfs_dir2_sf_firstentry(sfp);
holefit = 0;
/*
@@ -535,8 +628,8 @@ xfs_dir2_sf_addname_pick(
if (!holefit)
holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);
offset = xfs_dir2_sf_get_offset(sfep) +
- dp->d_ops->data_entsize(sfep->namelen);
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ xfs_dir2_data_entsize(mp, sfep->namelen);
+ sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
}
/*
* Calculate data bytes used excluding the new entry, if this
@@ -578,7 +671,8 @@ static void
xfs_dir2_sf_check(
xfs_da_args_t *args) /* operation arguments */
{
- xfs_inode_t *dp; /* incore directory inode */
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
int i; /* entry number */
int i8count; /* number of big inode#s */
xfs_ino_t ino; /* entry inode number */
@@ -586,23 +680,21 @@ xfs_dir2_sf_check(
xfs_dir2_sf_entry_t *sfep; /* shortform dir entry */
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
- dp = args->dp;
-
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
- offset = dp->d_ops->data_first_offset;
- ino = dp->d_ops->sf_get_parent_ino(sfp);
+ offset = args->geo->data_first_offset;
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);
i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+ i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep)) {
ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset);
- ino = dp->d_ops->sf_get_ino(sfp, sfep);
+ ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
offset =
xfs_dir2_sf_get_offset(sfep) +
- dp->d_ops->data_entsize(sfep->namelen);
- ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX);
+ xfs_dir2_data_entsize(mp, sfep->namelen);
+ ASSERT(xfs_dir2_sf_get_ftype(mp, sfep) < XFS_DIR3_FT_MAX);
}
ASSERT(i8count == sfp->i8count);
ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);
@@ -622,22 +714,16 @@ xfs_dir2_sf_verify(
struct xfs_dir2_sf_entry *sfep;
struct xfs_dir2_sf_entry *next_sfep;
char *endp;
- const struct xfs_dir_ops *dops;
struct xfs_ifork *ifp;
xfs_ino_t ino;
int i;
int i8count;
int offset;
- int size;
+ int64_t size;
int error;
uint8_t filetype;
ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_LOCAL);
- /*
- * xfs_iread calls us before xfs_setup_inode sets up ip->d_ops,
- * so we can only trust the mountpoint to have the right pointer.
- */
- dops = xfs_dir_get_ops(mp, NULL);
ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
sfp = (struct xfs_dir2_sf_hdr *)ifp->if_u1.if_data;
@@ -653,12 +739,12 @@ xfs_dir2_sf_verify(
endp = (char *)sfp + size;
/* Check .. entry */
- ino = dops->sf_get_parent_ino(sfp);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
i8count = ino > XFS_DIR2_MAX_SHORT_INUM;
error = xfs_dir_ino_validate(mp, ino);
if (error)
return __this_address;
- offset = dops->data_first_offset;
+ offset = mp->m_dir_geo->data_first_offset;
/* Check all reported entries */
sfep = xfs_dir2_sf_firstentry(sfp);
@@ -680,7 +766,7 @@ xfs_dir2_sf_verify(
* within the data buffer. The next entry starts after the
* name component, so nextentry is an acceptable test.
*/
- next_sfep = dops->sf_nextentry(sfp, sfep);
+ next_sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
if (endp < (char *)next_sfep)
return __this_address;
@@ -689,19 +775,19 @@ xfs_dir2_sf_verify(
return __this_address;
/* Check the inode number. */
- ino = dops->sf_get_ino(sfp, sfep);
+ ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
i8count += ino > XFS_DIR2_MAX_SHORT_INUM;
error = xfs_dir_ino_validate(mp, ino);
if (error)
return __this_address;
/* Check the file type. */
- filetype = dops->sf_get_ftype(sfep);
+ filetype = xfs_dir2_sf_get_ftype(mp, sfep);
if (filetype >= XFS_DIR3_FT_MAX)
return __this_address;
offset = xfs_dir2_sf_get_offset(sfep) +
- dops->data_entsize(sfep->namelen);
+ xfs_dir2_data_entsize(mp, sfep->namelen);
sfep = next_sfep;
}
@@ -763,7 +849,7 @@ xfs_dir2_sf_create(
/*
* Now can put in the inode number, since i8count is set.
*/
- dp->d_ops->sf_put_parent_ino(sfp, pino);
+ xfs_dir2_sf_put_parent_ino(sfp, pino);
sfp->count = 0;
dp->i_d.di_size = size;
xfs_dir2_sf_check(args);
@@ -779,7 +865,8 @@ int /* error */
xfs_dir2_sf_lookup(
xfs_da_args_t *args) /* operation arguments */
{
- xfs_inode_t *dp; /* incore directory inode */
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
int i; /* entry index */
int error;
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
@@ -790,16 +877,9 @@ xfs_dir2_sf_lookup(
trace_xfs_dir2_sf_lookup(args);
xfs_dir2_sf_check(args);
- dp = args->dp;
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
- /*
- * Bail out if the directory is way too short.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
+ ASSERT(dp->i_d.di_size >= offsetof(struct xfs_dir2_sf_hdr, parent));
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
@@ -818,7 +898,7 @@ xfs_dir2_sf_lookup(
*/
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
- args->inumber = dp->d_ops->sf_get_parent_ino(sfp);
+ args->inumber = xfs_dir2_sf_get_parent_ino(sfp);
args->cmpresult = XFS_CMP_EXACT;
args->filetype = XFS_DIR3_FT_DIR;
return -EEXIST;
@@ -828,18 +908,17 @@ xfs_dir2_sf_lookup(
*/
ci_sfep = NULL;
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+ i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep)) {
/*
* Compare name and if it's an exact match, return the inode
* number. If it's the first case-insensitive match, store the
* inode number and continue looking for an exact match.
*/
- cmp = dp->i_mount->m_dirnameops->compname(args, sfep->name,
- sfep->namelen);
+ cmp = xfs_dir2_compname(args, sfep->name, sfep->namelen);
if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {
args->cmpresult = cmp;
- args->inumber = dp->d_ops->sf_get_ino(sfp, sfep);
- args->filetype = dp->d_ops->sf_get_ftype(sfep);
+ args->inumber = xfs_dir2_sf_get_ino(mp, sfp, sfep);
+ args->filetype = xfs_dir2_sf_get_ftype(mp, sfep);
if (cmp == XFS_CMP_EXACT)
return -EEXIST;
ci_sfep = sfep;
@@ -864,8 +943,9 @@ int /* error */
xfs_dir2_sf_removename(
xfs_da_args_t *args)
{
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
int byteoff; /* offset of removed entry */
- xfs_inode_t *dp; /* incore directory inode */
int entsize; /* this entry's size */
int i; /* shortform entry index */
int newsize; /* new inode size */
@@ -875,17 +955,9 @@ xfs_dir2_sf_removename(
trace_xfs_dir2_sf_removename(args);
- dp = args->dp;
-
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
oldsize = (int)dp->i_d.di_size;
- /*
- * Bail out if the directory is way too short.
- */
- if (oldsize < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
+ ASSERT(oldsize >= offsetof(struct xfs_dir2_sf_hdr, parent));
ASSERT(dp->i_df.if_bytes == oldsize);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
@@ -895,10 +967,10 @@ xfs_dir2_sf_removename(
* Find the one we're deleting.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+ i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
- ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) ==
+ ASSERT(xfs_dir2_sf_get_ino(mp, sfp, sfep) ==
args->inumber);
break;
}
@@ -912,7 +984,7 @@ xfs_dir2_sf_removename(
* Calculate sizes.
*/
byteoff = (int)((char *)sfep - (char *)sfp);
- entsize = dp->d_ops->sf_entsize(sfp, args->namelen);
+ entsize = xfs_dir2_sf_entsize(mp, sfp, args->namelen);
newsize = oldsize - entsize;
/*
* Copy the part if any after the removed entry, sliding it down.
@@ -945,13 +1017,35 @@ xfs_dir2_sf_removename(
}
/*
+ * Check whether the sf dir replace operation need more blocks.
+ */
+bool
+xfs_dir2_sf_replace_needblock(
+ struct xfs_inode *dp,
+ xfs_ino_t inum)
+{
+ int newsize;
+ struct xfs_dir2_sf_hdr *sfp;
+
+ if (dp->i_d.di_format != XFS_DINODE_FMT_LOCAL)
+ return false;
+
+ sfp = (struct xfs_dir2_sf_hdr *)dp->i_df.if_u1.if_data;
+ newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
+
+ return inum > XFS_DIR2_MAX_SHORT_INUM &&
+ sfp->i8count == 0 && newsize > XFS_IFORK_DSIZE(dp);
+}
+
+/*
* Replace the inode number of an entry in a shortform directory.
*/
int /* error */
xfs_dir2_sf_replace(
xfs_da_args_t *args) /* operation arguments */
{
- xfs_inode_t *dp; /* incore directory inode */
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
int i; /* entry index */
xfs_ino_t ino=0; /* entry old inode number */
int i8elevated; /* sf_toino8 set i8count=1 */
@@ -960,16 +1054,8 @@ xfs_dir2_sf_replace(
trace_xfs_dir2_sf_replace(args);
- dp = args->dp;
-
ASSERT(dp->i_df.if_flags & XFS_IFINLINE);
- /*
- * Bail out if the shortform directory is way too small.
- */
- if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) {
- ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));
- return -EIO;
- }
+ ASSERT(dp->i_d.di_size >= offsetof(struct xfs_dir2_sf_hdr, parent));
ASSERT(dp->i_df.if_bytes == dp->i_d.di_size);
ASSERT(dp->i_df.if_u1.if_data != NULL);
sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
@@ -980,17 +1066,14 @@ xfs_dir2_sf_replace(
*/
if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {
int error; /* error return value */
- int newsize; /* new inode size */
- newsize = dp->i_df.if_bytes + (sfp->count + 1) * XFS_INO64_DIFF;
/*
* Won't fit as shortform, convert to block then do replace.
*/
- if (newsize > XFS_IFORK_DSIZE(dp)) {
+ if (xfs_dir2_sf_replace_needblock(dp, args->inumber)) {
error = xfs_dir2_sf_to_block(args);
- if (error) {
+ if (error)
return error;
- }
return xfs_dir2_block_replace(args);
}
/*
@@ -1008,22 +1091,23 @@ xfs_dir2_sf_replace(
*/
if (args->namelen == 2 &&
args->name[0] == '.' && args->name[1] == '.') {
- ino = dp->d_ops->sf_get_parent_ino(sfp);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
ASSERT(args->inumber != ino);
- dp->d_ops->sf_put_parent_ino(sfp, args->inumber);
+ xfs_dir2_sf_put_parent_ino(sfp, args->inumber);
}
/*
* Normal entry, look for the name.
*/
else {
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {
+ i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep)) {
if (xfs_da_compname(args, sfep->name, sfep->namelen) ==
XFS_CMP_EXACT) {
- ino = dp->d_ops->sf_get_ino(sfp, sfep);
+ ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
ASSERT(args->inumber != ino);
- dp->d_ops->sf_put_ino(sfp, sfep, args->inumber);
- dp->d_ops->sf_put_ftype(sfep, args->filetype);
+ xfs_dir2_sf_put_ino(mp, sfp, sfep,
+ args->inumber);
+ xfs_dir2_sf_put_ftype(mp, sfep, args->filetype);
break;
}
}
@@ -1076,8 +1160,9 @@ static void
xfs_dir2_sf_toino4(
xfs_da_args_t *args) /* operation arguments */
{
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
char *buf; /* old dir's buffer */
- xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
int newsize; /* new inode size */
xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
@@ -1088,15 +1173,13 @@ xfs_dir2_sf_toino4(
trace_xfs_dir2_sf_toino4(args);
- dp = args->dp;
-
/*
* Copy the old directory to the buffer.
* Then nuke it from the inode, and add the new buffer to the inode.
* Don't want xfs_idata_realloc copying the data here.
*/
oldsize = dp->i_df.if_bytes;
- buf = kmem_alloc(oldsize, KM_SLEEP);
+ buf = kmem_alloc(oldsize, 0);
oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
ASSERT(oldsfp->i8count == 1);
memcpy(buf, oldsfp, oldsize);
@@ -1116,21 +1199,22 @@ xfs_dir2_sf_toino4(
*/
sfp->count = oldsfp->count;
sfp->i8count = 0;
- dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));
+ xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
/*
* Copy the entries field by field.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp);
i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
- oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
+ i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep),
+ oldsfep = xfs_dir2_sf_nextentry(mp, oldsfp, oldsfep)) {
sfep->namelen = oldsfep->namelen;
memcpy(sfep->offset, oldsfep->offset, sizeof(sfep->offset));
memcpy(sfep->name, oldsfep->name, sfep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep,
- dp->d_ops->sf_get_ino(oldsfp, oldsfep));
- dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));
+ xfs_dir2_sf_put_ino(mp, sfp, sfep,
+ xfs_dir2_sf_get_ino(mp, oldsfp, oldsfep));
+ xfs_dir2_sf_put_ftype(mp, sfep,
+ xfs_dir2_sf_get_ftype(mp, oldsfep));
}
/*
* Clean up the inode.
@@ -1149,8 +1233,9 @@ static void
xfs_dir2_sf_toino8(
xfs_da_args_t *args) /* operation arguments */
{
+ struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
char *buf; /* old dir's buffer */
- xfs_inode_t *dp; /* incore directory inode */
int i; /* entry index */
int newsize; /* new inode size */
xfs_dir2_sf_entry_t *oldsfep; /* old sf entry */
@@ -1161,15 +1246,13 @@ xfs_dir2_sf_toino8(
trace_xfs_dir2_sf_toino8(args);
- dp = args->dp;
-
/*
* Copy the old directory to the buffer.
* Then nuke it from the inode, and add the new buffer to the inode.
* Don't want xfs_idata_realloc copying the data here.
*/
oldsize = dp->i_df.if_bytes;
- buf = kmem_alloc(oldsize, KM_SLEEP);
+ buf = kmem_alloc(oldsize, 0);
oldsfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data;
ASSERT(oldsfp->i8count == 0);
memcpy(buf, oldsfp, oldsize);
@@ -1189,21 +1272,22 @@ xfs_dir2_sf_toino8(
*/
sfp->count = oldsfp->count;
sfp->i8count = 1;
- dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));
+ xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp));
/*
* Copy the entries field by field.
*/
for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),
oldsfep = xfs_dir2_sf_firstentry(oldsfp);
i < sfp->count;
- i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep),
- oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {
+ i++, sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep),
+ oldsfep = xfs_dir2_sf_nextentry(mp, oldsfp, oldsfep)) {
sfep->namelen = oldsfep->namelen;
memcpy(sfep->offset, oldsfep->offset, sizeof(sfep->offset));
memcpy(sfep->name, oldsfep->name, sfep->namelen);
- dp->d_ops->sf_put_ino(sfp, sfep,
- dp->d_ops->sf_get_ino(oldsfp, oldsfep));
- dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));
+ xfs_dir2_sf_put_ino(mp, sfp, sfep,
+ xfs_dir2_sf_get_ino(mp, oldsfp, oldsfep));
+ xfs_dir2_sf_put_ftype(mp, sfep,
+ xfs_dir2_sf_get_ftype(mp, oldsfep));
}
/*
* Clean up the inode.
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index e8bd688a4073..bedc1e752b60 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -35,10 +35,10 @@ xfs_calc_dquots_per_chunk(
xfs_failaddr_t
xfs_dquot_verify(
- struct xfs_mount *mp,
- xfs_disk_dquot_t *ddq,
- xfs_dqid_t id,
- uint type) /* used only during quotacheck */
+ struct xfs_mount *mp,
+ struct xfs_disk_dquot *ddq,
+ xfs_dqid_t id,
+ uint type) /* used only during quotacheck */
{
/*
* We can encounter an uninitialized dquot buffer for 2 reasons:
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index c968b60cee15..77e9fa385980 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -920,13 +920,13 @@ static inline uint xfs_dinode_size(int version)
* This enum is used in string mapping in xfs_trace.h; please keep the
* TRACE_DEFINE_ENUMs for it up to date.
*/
-typedef enum xfs_dinode_fmt {
+enum xfs_dinode_fmt {
XFS_DINODE_FMT_DEV, /* xfs_dev_t */
XFS_DINODE_FMT_LOCAL, /* bulk data */
XFS_DINODE_FMT_EXTENTS, /* struct xfs_bmbt_rec */
XFS_DINODE_FMT_BTREE, /* struct xfs_bmdr_block */
XFS_DINODE_FMT_UUID /* added long ago, but never used */
-} xfs_dinode_fmt_t;
+};
#define XFS_INODE_FORMAT_STR \
{ XFS_DINODE_FMT_DEV, "dev" }, \
@@ -1144,11 +1144,11 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
/*
* This is the main portion of the on-disk representation of quota
- * information for a user. This is the q_core of the xfs_dquot_t that
+ * information for a user. This is the q_core of the struct xfs_dquot that
* is kept in kernel memory. We pad this with some more expansion room
* to construct the on disk structure.
*/
-typedef struct xfs_disk_dquot {
+struct xfs_disk_dquot {
__be16 d_magic; /* dquot magic = XFS_DQUOT_MAGIC */
__u8 d_version; /* dquot version */
__u8 d_flags; /* XFS_DQ_USER/PROJ/GROUP */
@@ -1171,15 +1171,15 @@ typedef struct xfs_disk_dquot {
__be32 d_rtbtimer; /* similar to above; for RT disk blocks */
__be16 d_rtbwarns; /* warnings issued wrt RT disk blocks */
__be16 d_pad;
-} xfs_disk_dquot_t;
+};
/*
* This is what goes on disk. This is separated from the xfs_disk_dquot because
* carrying the unnecessary padding would be a waste of memory.
*/
typedef struct xfs_dqblk {
- xfs_disk_dquot_t dd_diskdq; /* portion that lives incore as well */
- char dd_fill[4]; /* filling for posterity */
+ struct xfs_disk_dquot dd_diskdq; /* portion living incore as well */
+ char dd_fill[4];/* filling for posterity */
/*
* These two are only present on filesystems with the CRC bits set.
@@ -1540,6 +1540,13 @@ typedef struct xfs_bmdr_block {
#define BMBT_BLOCKCOUNT_BITLEN 21
#define BMBT_STARTOFF_MASK ((1ULL << BMBT_STARTOFF_BITLEN) - 1)
+#define BMBT_BLOCKCOUNT_MASK ((1ULL << BMBT_BLOCKCOUNT_BITLEN) - 1)
+
+/*
+ * bmbt records have a file offset (block) field that is 54 bits wide, so this
+ * is the largest xfs_fileoff_t that we ever expect to see.
+ */
+#define XFS_MAX_FILEOFF (BMBT_STARTOFF_MASK + BMBT_BLOCKCOUNT_MASK)
typedef struct xfs_bmbt_rec {
__be64 l0, l1;
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 52d03a3a02a4..ef95ca07d084 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -287,7 +287,7 @@ struct xfs_ag_geometry {
uint32_t ag_ifree; /* o: inodes free */
uint32_t ag_sick; /* o: sick things in ag */
uint32_t ag_checked; /* o: checked metadata in ag */
- uint32_t ag_reserved32; /* o: zero */
+ uint32_t ag_flags; /* i/o: flags for this ag */
uint64_t ag_reserved[12];/* o: zero */
};
#define XFS_AG_GEOM_SICK_SB (1 << 0) /* superblock */
@@ -324,7 +324,7 @@ typedef struct xfs_growfs_rt {
* Structures returned from ioctl XFS_IOC_FSBULKSTAT & XFS_IOC_FSBULKSTAT_SINGLE
*/
typedef struct xfs_bstime {
- time_t tv_sec; /* seconds */
+ __kernel_long_t tv_sec; /* seconds */
__s32 tv_nsec; /* and nanoseconds */
} xfs_bstime_t;
@@ -366,11 +366,11 @@ struct xfs_bulkstat {
uint64_t bs_blocks; /* number of blocks */
uint64_t bs_xflags; /* extended flags */
- uint64_t bs_atime; /* access time, seconds */
- uint64_t bs_mtime; /* modify time, seconds */
+ int64_t bs_atime; /* access time, seconds */
+ int64_t bs_mtime; /* modify time, seconds */
- uint64_t bs_ctime; /* inode change time, seconds */
- uint64_t bs_btime; /* creation time, seconds */
+ int64_t bs_ctime; /* inode change time, seconds */
+ int64_t bs_btime; /* creation time, seconds */
uint32_t bs_gen; /* generation count */
uint32_t bs_uid; /* user id */
@@ -416,7 +416,7 @@ struct xfs_bulkstat {
/*
* Project quota id helpers (previously projid was 16bit only
- * and using two 16bit values to hold new 32bit projid was choosen
+ * and using two 16bit values to hold new 32bit projid was chosen
* to retain compatibility with "old" filesystems).
*/
static inline uint32_t
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 04377ab75863..bf161e930f1d 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -276,6 +276,7 @@ xfs_ialloc_inode_init(
int i, j;
xfs_daddr_t d;
xfs_ino_t ino = 0;
+ int error;
/*
* Loop over the new block(s), filling in the inodes. For small block
@@ -327,12 +328,11 @@ xfs_ialloc_inode_init(
*/
d = XFS_AGB_TO_DADDR(mp, agno, agbno +
(j * M_IGEO(mp)->blocks_per_cluster));
- fbuf = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
- mp->m_bsize *
- M_IGEO(mp)->blocks_per_cluster,
- XBF_UNMAPPED);
- if (!fbuf)
- return -ENOMEM;
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+ mp->m_bsize * M_IGEO(mp)->blocks_per_cluster,
+ XBF_UNMAPPED, &fbuf);
+ if (error)
+ return error;
/* Initialize the inode buffers and log them appropriately. */
fbuf->b_ops = &xfs_inode_buf_ops;
@@ -544,7 +544,10 @@ xfs_inobt_insert_sprec(
nrec->ir_free, &i);
if (error)
goto error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
goto out;
}
@@ -557,17 +560,23 @@ xfs_inobt_insert_sprec(
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
goto error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
- XFS_WANT_CORRUPTED_GOTO(mp,
- rec.ir_startino == nrec->ir_startino,
- error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
+ if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
/*
* This should never fail. If we have coexisting records that
* cannot merge, something is seriously wrong.
*/
- XFS_WANT_CORRUPTED_GOTO(mp, __xfs_inobt_can_merge(nrec, &rec),
- error);
+ if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
trace_xfs_irec_merge_pre(mp, agno, rec.ir_startino,
rec.ir_holemask, nrec->ir_startino,
@@ -1057,7 +1066,8 @@ xfs_ialloc_next_rec(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
}
return 0;
@@ -1081,7 +1091,8 @@ xfs_ialloc_get_rec(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
}
return 0;
@@ -1161,12 +1172,18 @@ xfs_dialloc_ag_inobt(
error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_inobt_get_rec(cur, &rec, &j);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, j == 1, error0);
+ if (XFS_IS_CORRUPT(mp, j != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if (rec.ir_freecount > 0) {
/*
@@ -1321,19 +1338,28 @@ xfs_dialloc_ag_inobt(
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
for (;;) {
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
if (rec.ir_freecount > 0)
break;
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto error0;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
}
alloc_inode:
@@ -1393,7 +1419,8 @@ xfs_dialloc_ag_finobt_near(
error = xfs_inobt_get_rec(lcur, rec, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(lcur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
/*
* See if we've landed in the parent inode record. The finobt
@@ -1416,10 +1443,16 @@ xfs_dialloc_ag_finobt_near(
error = xfs_inobt_get_rec(rcur, &rrec, &j);
if (error)
goto error_rcur;
- XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, j == 1, error_rcur);
+ if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) {
+ error = -EFSCORRUPTED;
+ goto error_rcur;
+ }
}
- XFS_WANT_CORRUPTED_GOTO(lcur->bc_mp, i == 1 || j == 1, error_rcur);
+ if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) {
+ error = -EFSCORRUPTED;
+ goto error_rcur;
+ }
if (i == 1 && j == 1) {
/*
* Both the left and right records are valid. Choose the closer
@@ -1472,7 +1505,8 @@ xfs_dialloc_ag_finobt_newino(
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
return 0;
}
}
@@ -1483,12 +1517,14 @@ xfs_dialloc_ag_finobt_newino(
error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
error = xfs_inobt_get_rec(cur, rec, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
return 0;
}
@@ -1510,20 +1546,24 @@ xfs_dialloc_ag_update_inobt(
error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, i == 1);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1))
+ return -EFSCORRUPTED;
ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
rec.ir_free &= ~XFS_INOBT_MASK(offset);
rec.ir_freecount--;
- XFS_WANT_CORRUPTED_RETURN(cur->bc_mp, (rec.ir_free == frec->ir_free) &&
- (rec.ir_freecount == frec->ir_freecount));
+ if (XFS_IS_CORRUPT(cur->bc_mp,
+ rec.ir_free != frec->ir_free ||
+ rec.ir_freecount != frec->ir_freecount))
+ return -EFSCORRUPTED;
return xfs_inobt_update(cur, &rec);
}
@@ -1933,14 +1973,20 @@ xfs_difree_inobt(
__func__, error);
goto error0;
}
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error) {
xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
__func__, error);
goto error0;
}
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error0);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error0;
+ }
/*
* Get the offset in the inode chunk.
*/
@@ -2052,7 +2098,10 @@ xfs_difree_finobt(
* freed an inode in a previously fully allocated chunk. If not,
* something is out of sync.
*/
- XFS_WANT_CORRUPTED_GOTO(mp, ibtrec->ir_freecount == 1, error);
+ if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
ibtrec->ir_count,
@@ -2075,14 +2124,20 @@ xfs_difree_finobt(
error = xfs_inobt_get_rec(cur, &rec, &i);
if (error)
goto error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
rec.ir_free |= XFS_INOBT_MASK(offset);
rec.ir_freecount++;
- XFS_WANT_CORRUPTED_GOTO(mp, (rec.ir_free == ibtrec->ir_free) &&
- (rec.ir_freecount == ibtrec->ir_freecount),
- error);
+ if (XFS_IS_CORRUPT(mp,
+ rec.ir_free != ibtrec->ir_free ||
+ rec.ir_freecount != ibtrec->ir_freecount)) {
+ error = -EFSCORRUPTED;
+ goto error;
+ }
/*
* The content of inobt records should always match between the inobt
@@ -2787,8 +2842,13 @@ xfs_ialloc_setup_geometry(
igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
inodes);
- /* Set the maximum inode count for this filesystem. */
- if (sbp->sb_imax_pct) {
+ /*
+ * Set the maximum inode count for this filesystem, being careful not
+ * to use obviously garbage sb_inopblog/sb_inopblock values. Regular
+ * users should never get here due to failing sb verification, but
+ * certain users (xfs_db) need to be usable even with corrupt metadata.
+ */
+ if (sbp->sb_imax_pct && igeo->ialloc_blks) {
/*
* Make sure the maximum inode count is a multiple
* of the units we allocate inodes in.
@@ -2849,3 +2909,67 @@ xfs_ialloc_setup_geometry(
else
igeo->ialloc_align = 0;
}
+
+/* Compute the location of the root directory inode that is laid out by mkfs. */
+xfs_ino_t
+xfs_ialloc_calc_rootino(
+ struct xfs_mount *mp,
+ int sunit)
+{
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_agblock_t first_bno;
+
+ /*
+ * Pre-calculate the geometry of AG 0. We know what it looks like
+ * because libxfs knows how to create allocation groups now.
+ *
+ * first_bno is the first block in which mkfs could possibly have
+ * allocated the root directory inode, once we factor in the metadata
+ * that mkfs formats before it. Namely, the four AG headers...
+ */
+ first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
+
+ /* ...the two free space btree roots... */
+ first_bno += 2;
+
+ /* ...the inode btree root... */
+ first_bno += 1;
+
+ /* ...the initial AGFL... */
+ first_bno += xfs_alloc_min_freelist(mp, NULL);
+
+ /* ...the free inode btree root... */
+ if (xfs_sb_version_hasfinobt(&mp->m_sb))
+ first_bno++;
+
+ /* ...the reverse mapping btree root... */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ first_bno++;
+
+ /* ...the reference count btree... */
+ if (xfs_sb_version_hasreflink(&mp->m_sb))
+ first_bno++;
+
+ /*
+ * ...and the log, if it is allocated in the first allocation group.
+ *
+ * This can happen with filesystems that only have a single
+ * allocation group, or very odd geometries created by old mkfs
+ * versions on very small filesystems.
+ */
+ if (mp->m_sb.sb_logstart &&
+ XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart) == 0)
+ first_bno += mp->m_sb.sb_logblocks;
+
+ /*
+ * Now round first_bno up to whatever allocation alignment is given
+ * by the filesystem or was passed in.
+ */
+ if (xfs_sb_version_hasdalign(&mp->m_sb) && igeo->ialloc_align > 0)
+ first_bno = roundup(first_bno, sunit);
+ else if (xfs_sb_version_hasalign(&mp->m_sb) &&
+ mp->m_sb.sb_inoalignmt > 1)
+ first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
+
+ return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 323592d563d5..72b3468b97b1 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -152,5 +152,6 @@ int xfs_inobt_insert_rec(struct xfs_btree_cur *cur, uint16_t holemask,
int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
+xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 27aa3f2bc4bc..52451809c478 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -596,7 +596,7 @@ xfs_iext_realloc_root(
struct xfs_ifork *ifp,
struct xfs_iext_cursor *cur)
{
- size_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
+ int64_t new_size = ifp->if_bytes + sizeof(struct xfs_iext_rec);
void *new;
/* account for the prev/next pointers */
@@ -616,7 +616,7 @@ xfs_iext_realloc_root(
* sequence counter is seen before the modifications to the extent tree itself
* take effect.
*/
-static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp, int state)
+static inline void xfs_iext_inc_seq(struct xfs_ifork *ifp)
{
WRITE_ONCE(ifp->if_seq, READ_ONCE(ifp->if_seq) + 1);
}
@@ -633,7 +633,7 @@ xfs_iext_insert(
struct xfs_iext_leaf *new = NULL;
int nr_entries, i;
- xfs_iext_inc_seq(ifp, state);
+ xfs_iext_inc_seq(ifp);
if (ifp->if_height == 0)
xfs_iext_alloc_root(ifp, cur);
@@ -875,7 +875,7 @@ xfs_iext_remove(
ASSERT(ifp->if_u1.if_root != NULL);
ASSERT(xfs_iext_valid(ifp, cur));
- xfs_iext_inc_seq(ifp, state);
+ xfs_iext_inc_seq(ifp);
nr_entries = xfs_iext_leaf_nr_entries(ifp, leaf, cur->pos) - 1;
for (i = cur->pos; i < nr_entries; i++)
@@ -983,7 +983,7 @@ xfs_iext_update_extent(
{
struct xfs_ifork *ifp = xfs_iext_state_to_fork(ip, state);
- xfs_iext_inc_seq(ifp, state);
+ xfs_iext_inc_seq(ifp);
if (cur->pos == 0) {
struct xfs_bmbt_irec old;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 28ab3c5255e1..8afacfe4be0a 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -213,13 +213,12 @@ xfs_inode_from_disk(
to->di_version = from->di_version;
if (to->di_version == 1) {
set_nlink(inode, be16_to_cpu(from->di_onlink));
- to->di_projid_lo = 0;
- to->di_projid_hi = 0;
+ to->di_projid = 0;
to->di_version = 2;
} else {
set_nlink(inode, be32_to_cpu(from->di_nlink));
- to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
- to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
+ to->di_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
+ be16_to_cpu(from->di_projid_lo);
}
to->di_format = from->di_format;
@@ -256,8 +255,8 @@ xfs_inode_from_disk(
if (to->di_version == 3) {
inode_set_iversion_queried(inode,
be64_to_cpu(from->di_changecount));
- to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
- to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
+ to->di_crtime.tv_sec = be32_to_cpu(from->di_crtime.t_sec);
+ to->di_crtime.tv_nsec = be32_to_cpu(from->di_crtime.t_nsec);
to->di_flags2 = be64_to_cpu(from->di_flags2);
to->di_cowextsize = be32_to_cpu(from->di_cowextsize);
}
@@ -279,8 +278,8 @@ xfs_inode_to_disk(
to->di_format = from->di_format;
to->di_uid = cpu_to_be32(from->di_uid);
to->di_gid = cpu_to_be32(from->di_gid);
- to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
- to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+ to->di_projid_lo = cpu_to_be16(from->di_projid & 0xffff);
+ to->di_projid_hi = cpu_to_be16(from->di_projid >> 16);
memset(to->di_pad, 0, sizeof(to->di_pad));
to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
@@ -306,8 +305,8 @@ xfs_inode_to_disk(
if (from->di_version == 3) {
to->di_changecount = cpu_to_be64(inode_peek_iversion(inode));
- to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
- to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+ to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.tv_sec);
+ to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.tv_nsec);
to->di_flags2 = cpu_to_be64(from->di_flags2);
to->di_cowextsize = cpu_to_be32(from->di_cowextsize);
to->di_ino = cpu_to_be64(ip->i_ino);
@@ -632,8 +631,6 @@ xfs_iread(
if ((iget_flags & XFS_IGET_CREATE) &&
xfs_sb_version_hascrc(&mp->m_sb) &&
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
- /* initialise the on-disk inode core */
- memset(&ip->i_d, 0, sizeof(ip->i_d));
VFS_I(ip)->i_generation = prandom_u32();
ip->i_d.di_version = 3;
return 0;
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index ab0f84165317..fd94b1078722 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -21,8 +21,7 @@ struct xfs_icdinode {
uint16_t di_flushiter; /* incremented on flush */
uint32_t di_uid; /* owner's user id */
uint32_t di_gid; /* owner's group id */
- uint16_t di_projid_lo; /* lower part of owner's project id */
- uint16_t di_projid_hi; /* higher part of owner's project id */
+ uint32_t di_projid; /* owner's project id */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
@@ -37,7 +36,7 @@ struct xfs_icdinode {
uint64_t di_flags2; /* more random flags */
uint32_t di_cowextsize; /* basic cow extent size for file */
- xfs_ictimestamp_t di_crtime; /* time created */
+ struct timespec64 di_crtime; /* time created */
};
/*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index bf3e04018246..ad2b9c313fd2 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -75,11 +75,15 @@ xfs_iformat_fork(
error = xfs_iformat_btree(ip, dip, XFS_DATA_FORK);
break;
default:
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
+ dip, sizeof(*dip), __this_address);
return -EFSCORRUPTED;
}
break;
default:
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
+ sizeof(*dip), __this_address);
return -EFSCORRUPTED;
}
if (error)
@@ -94,7 +98,7 @@ xfs_iformat_fork(
return 0;
ASSERT(ip->i_afp == NULL);
- ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_SLEEP | KM_NOFS);
+ ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone, KM_NOFS);
switch (dip->di_aformat) {
case XFS_DINODE_FMT_LOCAL:
@@ -110,14 +114,16 @@ xfs_iformat_fork(
error = xfs_iformat_btree(ip, dip, XFS_ATTR_FORK);
break;
default:
+ xfs_inode_verifier_error(ip, error, __func__, dip,
+ sizeof(*dip), __this_address);
error = -EFSCORRUPTED;
break;
}
if (error) {
- kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+ kmem_cache_free(xfs_ifork_zone, ip->i_afp);
ip->i_afp = NULL;
if (ip->i_cowfp)
- kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
+ kmem_cache_free(xfs_ifork_zone, ip->i_cowfp);
ip->i_cowfp = NULL;
xfs_idestroy_fork(ip, XFS_DATA_FORK);
}
@@ -129,7 +135,7 @@ xfs_init_local_fork(
struct xfs_inode *ip,
int whichfork,
const void *data,
- int size)
+ int64_t size)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
int mem_size = size, real_size = 0;
@@ -147,7 +153,7 @@ xfs_init_local_fork(
if (size) {
real_size = roundup(mem_size, 4);
- ifp->if_u1.if_data = kmem_alloc(real_size, KM_SLEEP | KM_NOFS);
+ ifp->if_u1.if_data = kmem_alloc(real_size, KM_NOFS);
memcpy(ifp->if_u1.if_data, data, size);
if (zero_terminate)
ifp->if_u1.if_data[size] = '\0';
@@ -302,7 +308,7 @@ xfs_iformat_btree(
}
ifp->if_broot_bytes = size;
- ifp->if_broot = kmem_alloc(size, KM_SLEEP | KM_NOFS);
+ ifp->if_broot = kmem_alloc(size, KM_NOFS);
ASSERT(ifp->if_broot != NULL);
/*
* Copy and convert from the on-disk structure
@@ -367,7 +373,7 @@ xfs_iroot_realloc(
*/
if (ifp->if_broot_bytes == 0) {
new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
- ifp->if_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
+ ifp->if_broot = kmem_alloc(new_size, KM_NOFS);
ifp->if_broot_bytes = (int)new_size;
return;
}
@@ -382,7 +388,7 @@ xfs_iroot_realloc(
new_max = cur_max + rec_diff;
new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
ifp->if_broot = kmem_realloc(ifp->if_broot, new_size,
- KM_SLEEP | KM_NOFS);
+ KM_NOFS);
op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
ifp->if_broot_bytes);
np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
@@ -408,7 +414,7 @@ xfs_iroot_realloc(
else
new_size = 0;
if (new_size > 0) {
- new_broot = kmem_alloc(new_size, KM_SLEEP | KM_NOFS);
+ new_broot = kmem_alloc(new_size, KM_NOFS);
/*
* First copy over the btree block header.
*/
@@ -467,11 +473,11 @@ xfs_iroot_realloc(
void
xfs_idata_realloc(
struct xfs_inode *ip,
- int byte_diff,
+ int64_t byte_diff,
int whichfork)
{
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
- int new_size = (int)ifp->if_bytes + byte_diff;
+ int64_t new_size = ifp->if_bytes + byte_diff;
ASSERT(new_size >= 0);
ASSERT(new_size <= XFS_IFORK_SIZE(ip, whichfork));
@@ -492,7 +498,7 @@ xfs_idata_realloc(
* We enforce that here.
*/
ifp->if_u1.if_data = kmem_realloc(ifp->if_u1.if_data,
- roundup(new_size, 4), KM_SLEEP | KM_NOFS);
+ roundup(new_size, 4), KM_NOFS);
ifp->if_bytes = new_size;
}
@@ -525,10 +531,10 @@ xfs_idestroy_fork(
}
if (whichfork == XFS_ATTR_FORK) {
- kmem_zone_free(xfs_ifork_zone, ip->i_afp);
+ kmem_cache_free(xfs_ifork_zone, ip->i_afp);
ip->i_afp = NULL;
} else if (whichfork == XFS_COW_FORK) {
- kmem_zone_free(xfs_ifork_zone, ip->i_cowfp);
+ kmem_cache_free(xfs_ifork_zone, ip->i_cowfp);
ip->i_cowfp = NULL;
}
}
@@ -552,7 +558,7 @@ xfs_iextents_copy(
struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
struct xfs_iext_cursor icur;
struct xfs_bmbt_irec rec;
- int copied = 0;
+ int64_t copied = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
ASSERT(ifp->if_bytes > 0);
@@ -683,7 +689,7 @@ xfs_ifork_init_cow(
return;
ip->i_cowfp = kmem_zone_zalloc(xfs_ifork_zone,
- KM_SLEEP | KM_NOFS);
+ KM_NOFS);
ip->i_cowfp->if_flags = XFS_IFEXTENTS;
ip->i_cformat = XFS_DINODE_FMT_EXTENTS;
ip->i_cnextents = 0;
diff --git a/fs/xfs/libxfs/xfs_inode_fork.h b/fs/xfs/libxfs/xfs_inode_fork.h
index 00c62ce170d0..500333d0101e 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.h
+++ b/fs/xfs/libxfs/xfs_inode_fork.h
@@ -13,16 +13,16 @@ struct xfs_dinode;
* File incore extent information, present for each of data & attr forks.
*/
struct xfs_ifork {
- int if_bytes; /* bytes in if_u1 */
- unsigned int if_seq; /* fork mod counter */
+ int64_t if_bytes; /* bytes in if_u1 */
struct xfs_btree_block *if_broot; /* file's incore btree root */
- short if_broot_bytes; /* bytes allocated for root */
- unsigned char if_flags; /* per-fork flags */
+ unsigned int if_seq; /* fork mod counter */
int if_height; /* height of the extent tree */
union {
void *if_root; /* extent tree root */
char *if_data; /* inline file data */
} if_u1;
+ short if_broot_bytes; /* bytes allocated for root */
+ unsigned char if_flags; /* per-fork flags */
};
/*
@@ -87,18 +87,24 @@ struct xfs_ifork {
#define XFS_IFORK_MAXEXT(ip, w) \
(XFS_IFORK_SIZE(ip, w) / sizeof(xfs_bmbt_rec_t))
+#define xfs_ifork_has_extents(ip, w) \
+ (XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_EXTENTS || \
+ XFS_IFORK_FORMAT((ip), (w)) == XFS_DINODE_FMT_BTREE)
+
struct xfs_ifork *xfs_iext_state_to_fork(struct xfs_inode *ip, int state);
int xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);
void xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *,
struct xfs_inode_log_item *, int);
void xfs_idestroy_fork(struct xfs_inode *, int);
-void xfs_idata_realloc(struct xfs_inode *, int, int);
+void xfs_idata_realloc(struct xfs_inode *ip, int64_t byte_diff,
+ int whichfork);
void xfs_iroot_realloc(struct xfs_inode *, int, int);
int xfs_iread_extents(struct xfs_trans *, struct xfs_inode *, int);
int xfs_iextents_copy(struct xfs_inode *, struct xfs_bmbt_rec *,
int);
-void xfs_init_local_fork(struct xfs_inode *, int, const void *, int);
+void xfs_init_local_fork(struct xfs_inode *ip, int whichfork,
+ const void *data, int64_t size);
xfs_extnum_t xfs_iext_count(struct xfs_ifork *ifp);
void xfs_iext_insert(struct xfs_inode *, struct xfs_iext_cursor *cur,
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index e5f97c69b320..9bac0d2e56dc 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -432,9 +432,9 @@ static inline uint xfs_log_dinode_size(int version)
}
/*
- * Buffer Log Format defintions
+ * Buffer Log Format definitions
*
- * These are the physical dirty bitmap defintions for the log format structure.
+ * These are the physical dirty bitmap definitions for the log format structure.
*/
#define XFS_BLF_CHUNK 128
#define XFS_BLF_SHIFT 7
@@ -462,11 +462,20 @@ static inline uint xfs_log_dinode_size(int version)
#define XFS_BLF_GDQUOT_BUF (1<<4)
/*
- * This is the structure used to lay out a buf log item in the
- * log. The data map describes which 128 byte chunks of the buffer
- * have been logged.
- */
-#define XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
+ * This is the structure used to lay out a buf log item in the log. The data
+ * map describes which 128 byte chunks of the buffer have been logged.
+ *
+ * The placement of blf_map_size causes blf_data_map to start at an odd
+ * multiple of sizeof(unsigned int) offset within the struct. Because the data
+ * bitmap size will always be an even number, the end of the data_map (and
+ * therefore the structure) will also be at an odd multiple of sizeof(unsigned
+ * int). Some 64-bit compilers will insert padding at the end of the struct to
+ * ensure 64-bit alignment of blf_blkno, but 32-bit ones will not. Therefore,
+ * XFS_BLF_DATAMAP_SIZE must be an odd number to make the padding explicit and
+ * keep the structure size consistent between 32-bit and 64-bit platforms.
+ */
+#define __XFS_BLF_DATAMAP_SIZE ((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) / NBWORD)
+#define XFS_BLF_DATAMAP_SIZE (__XFS_BLF_DATAMAP_SIZE + 1)
typedef struct xfs_buf_log_format {
unsigned short blf_type; /* buf log item type indicator */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index f3d18eaecebb..3bf671637a91 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -30,14 +30,14 @@ typedef struct xlog_recover_item {
xfs_log_iovec_t *ri_buf; /* ptr to regions buffer */
} xlog_recover_item_t;
-typedef struct xlog_recover {
+struct xlog_recover {
struct hlist_node r_list;
xlog_tid_t r_log_tid; /* log's transaction id */
xfs_trans_header_t r_theader; /* trans header for partial */
int r_state; /* not needed */
xfs_lsn_t r_lsn; /* xact lsn */
struct list_head r_itemq; /* q for items */
-} xlog_recover_t;
+};
#define ITEM_TYPE(i) (*(unsigned short *)(i)->ri_buf[0].i_addr)
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 51bb9bdb0e84..6e1665f2cb67 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -200,7 +200,10 @@ xfs_refcount_insert(
error = xfs_btree_insert(cur, i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
out_error:
if (error)
@@ -227,10 +230,16 @@ xfs_refcount_delete(
error = xfs_refcount_get_rec(cur, &irec, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
trace_xfs_refcount_delete(cur->bc_mp, cur->bc_private.a.agno, &irec);
error = xfs_btree_delete(cur, i);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, *i == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (error)
goto out_error;
error = xfs_refcount_lookup_ge(cur, irec.rc_startblock, &found_rec);
@@ -349,7 +358,10 @@ xfs_refcount_split_extent(
error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
return 0;
@@ -371,7 +383,10 @@ xfs_refcount_split_extent(
error = xfs_refcount_insert(cur, &tmp, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
return error;
out_error:
@@ -410,19 +425,27 @@ xfs_refcount_merge_center_extents(
&found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
error = xfs_refcount_delete(cur, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (center->rc_refcount > 1) {
error = xfs_refcount_delete(cur, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
- out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
}
/* Enlarge the left extent. */
@@ -430,7 +453,10 @@ xfs_refcount_merge_center_extents(
&found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
left->rc_blockcount = extlen;
error = xfs_refcount_update(cur, left);
@@ -469,14 +495,18 @@ xfs_refcount_merge_left_extent(
&found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
- out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
error = xfs_refcount_delete(cur, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
- out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
}
/* Enlarge the left extent. */
@@ -484,7 +514,10 @@ xfs_refcount_merge_left_extent(
&found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
left->rc_blockcount += cleft->rc_blockcount;
error = xfs_refcount_update(cur, left);
@@ -526,14 +559,18 @@ xfs_refcount_merge_right_extent(
&found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
- out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
error = xfs_refcount_delete(cur, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
- out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
}
/* Enlarge the right extent. */
@@ -541,7 +578,10 @@ xfs_refcount_merge_right_extent(
&found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
right->rc_startblock -= cright->rc_blockcount;
right->rc_blockcount += cright->rc_blockcount;
@@ -587,7 +627,10 @@ xfs_refcount_find_left_extents(
error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (xfs_refc_next(&tmp) != agbno)
return 0;
@@ -605,8 +648,10 @@ xfs_refcount_find_left_extents(
error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
- out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/* if tmp starts at the end of our range, just use that */
if (tmp.rc_startblock == agbno)
@@ -671,7 +716,10 @@ xfs_refcount_find_right_extents(
error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (tmp.rc_startblock != agbno + aglen)
return 0;
@@ -689,8 +737,10 @@ xfs_refcount_find_right_extents(
error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, found_rec == 1,
- out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/* if tmp ends at the end of our range, just use that */
if (xfs_refc_next(&tmp) == agbno + aglen)
@@ -913,8 +963,11 @@ xfs_refcount_adjust_extents(
&found_tmp);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
- found_tmp == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp,
+ found_tmp != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
cur->bc_private.a.priv.refc.nr_ops++;
} else {
fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
@@ -955,8 +1008,10 @@ xfs_refcount_adjust_extents(
error = xfs_refcount_delete(cur, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
- found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
cur->bc_private.a.priv.refc.nr_ops++;
goto advloop;
} else {
@@ -1122,8 +1177,6 @@ xfs_refcount_finish_one(
XFS_ALLOC_FLAG_FREEING, &agbp);
if (error)
return error;
- if (!agbp)
- return -EFSCORRUPTED;
rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
if (!rcur) {
@@ -1174,7 +1227,7 @@ out_cur:
/*
* Record a refcount intent for later processing.
*/
-static int
+static void
__xfs_refcount_add(
struct xfs_trans *tp,
enum xfs_refcount_intent_type type,
@@ -1189,44 +1242,43 @@ __xfs_refcount_add(
blockcount);
ri = kmem_alloc(sizeof(struct xfs_refcount_intent),
- KM_SLEEP | KM_NOFS);
+ KM_NOFS);
INIT_LIST_HEAD(&ri->ri_list);
ri->ri_type = type;
ri->ri_startblock = startblock;
ri->ri_blockcount = blockcount;
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
- return 0;
}
/*
* Increase the reference count of the blocks backing a file's extent.
*/
-int
+void
xfs_refcount_increase_extent(
struct xfs_trans *tp,
struct xfs_bmbt_irec *PREV)
{
if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb))
- return 0;
+ return;
- return __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE,
- PREV->br_startblock, PREV->br_blockcount);
+ __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock,
+ PREV->br_blockcount);
}
/*
* Decrease the reference count of the blocks backing a file's extent.
*/
-int
+void
xfs_refcount_decrease_extent(
struct xfs_trans *tp,
struct xfs_bmbt_irec *PREV)
{
if (!xfs_sb_version_hasreflink(&tp->t_mountp->m_sb))
- return 0;
+ return;
- return __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE,
- PREV->br_startblock, PREV->br_blockcount);
+ __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock,
+ PREV->br_blockcount);
}
/*
@@ -1273,7 +1325,10 @@ xfs_refcount_find_shared(
error = xfs_refcount_get_rec(cur, &tmp, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/* If the extent ends before the start, look at the next one */
if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
@@ -1285,7 +1340,10 @@ xfs_refcount_find_shared(
error = xfs_refcount_get_rec(cur, &tmp, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
}
/* If the extent starts after the range we want, bail out */
@@ -1313,7 +1371,10 @@ xfs_refcount_find_shared(
error = xfs_refcount_get_rec(cur, &tmp, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (tmp.rc_startblock >= agbno + aglen ||
tmp.rc_startblock != *fbno + *flen)
break;
@@ -1414,8 +1475,11 @@ xfs_refcount_adjust_cow_extents(
switch (adj) {
case XFS_REFCOUNT_ADJUST_COW_ALLOC:
/* Adding a CoW reservation, there should be nothing here. */
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
- ext.rc_startblock >= agbno + aglen, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp,
+ agbno + aglen > ext.rc_startblock)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
tmp.rc_startblock = agbno;
tmp.rc_blockcount = aglen;
@@ -1427,17 +1491,25 @@ xfs_refcount_adjust_cow_extents(
&found_tmp);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
- found_tmp == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
break;
case XFS_REFCOUNT_ADJUST_COW_FREE:
/* Removing a CoW reservation, there should be one extent. */
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
- ext.rc_startblock == agbno, out_error);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
- ext.rc_blockcount == aglen, out_error);
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
- ext.rc_refcount == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
+ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
+ if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
ext.rc_refcount = 0;
trace_xfs_refcount_modify_extent(cur->bc_mp,
@@ -1445,8 +1517,10 @@ xfs_refcount_adjust_cow_extents(
error = xfs_refcount_delete(cur, &found_rec);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(cur->bc_mp,
- found_rec == 1, out_error);
+ if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
break;
default:
ASSERT(0);
@@ -1541,47 +1615,40 @@ __xfs_refcount_cow_free(
}
/* Record a CoW staging extent in the refcount btree. */
-int
+void
xfs_refcount_alloc_cow_extent(
struct xfs_trans *tp,
xfs_fsblock_t fsb,
xfs_extlen_t len)
{
struct xfs_mount *mp = tp->t_mountp;
- int error;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
- return 0;
+ return;
- error = __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
- if (error)
- return error;
+ __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
/* Add rmap entry */
- return xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
+ xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
}
/* Forget a CoW staging event in the refcount btree. */
-int
+void
xfs_refcount_free_cow_extent(
struct xfs_trans *tp,
xfs_fsblock_t fsb,
xfs_extlen_t len)
{
struct xfs_mount *mp = tp->t_mountp;
- int error;
if (!xfs_sb_version_hasreflink(&mp->m_sb))
- return 0;
+ return;
/* Remove rmap entry */
- error = xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
+ xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
- if (error)
- return error;
-
- return __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
+ __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
}
struct xfs_refcount_recovery {
@@ -1592,17 +1659,18 @@ struct xfs_refcount_recovery {
/* Stuff an extent on the recovery list. */
STATIC int
xfs_refcount_recover_extent(
- struct xfs_btree_cur *cur,
+ struct xfs_btree_cur *cur,
union xfs_btree_rec *rec,
void *priv)
{
struct list_head *debris = priv;
struct xfs_refcount_recovery *rr;
- if (be32_to_cpu(rec->refc.rc_refcount) != 1)
+ if (XFS_IS_CORRUPT(cur->bc_mp,
+ be32_to_cpu(rec->refc.rc_refcount) != 1))
return -EFSCORRUPTED;
- rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), KM_SLEEP);
+ rr = kmem_alloc(sizeof(struct xfs_refcount_recovery), 0);
xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
list_add_tail(&rr->rr_list, debris);
@@ -1648,10 +1716,6 @@ xfs_refcount_recover_cow_leftovers(
error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
if (error)
goto out_trans;
- if (!agbp) {
- error = -ENOMEM;
- goto out_trans;
- }
cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
/* Find all the leftover CoW staging extents. */
@@ -1679,10 +1743,8 @@ xfs_refcount_recover_cow_leftovers(
/* Free the orphan record */
agbno = rr->rr_rrec.rc_startblock - XFS_REFC_COW_START;
fsb = XFS_AGB_TO_FSB(mp, agno, agbno);
- error = xfs_refcount_free_cow_extent(tp, fsb,
+ xfs_refcount_free_cow_extent(tp, fsb,
rr->rr_rrec.rc_blockcount);
- if (error)
- goto out_trans;
/* Free the block. */
xfs_bmap_add_free(tp, fsb, rr->rr_rrec.rc_blockcount, NULL);
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 1d9c518575e7..209795539c8d 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -29,9 +29,9 @@ struct xfs_refcount_intent {
xfs_extlen_t ri_blockcount;
};
-extern int xfs_refcount_increase_extent(struct xfs_trans *tp,
+void xfs_refcount_increase_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
-extern int xfs_refcount_decrease_extent(struct xfs_trans *tp,
+void xfs_refcount_decrease_extent(struct xfs_trans *tp,
struct xfs_bmbt_irec *irec);
extern void xfs_refcount_finish_one_cleanup(struct xfs_trans *tp,
@@ -45,10 +45,10 @@ extern int xfs_refcount_find_shared(struct xfs_btree_cur *cur,
xfs_agblock_t agbno, xfs_extlen_t aglen, xfs_agblock_t *fbno,
xfs_extlen_t *flen, bool find_end_of_shared);
-extern int xfs_refcount_alloc_cow_extent(struct xfs_trans *tp,
- xfs_fsblock_t fsb, xfs_extlen_t len);
-extern int xfs_refcount_free_cow_extent(struct xfs_trans *tp,
- xfs_fsblock_t fsb, xfs_extlen_t len);
+void xfs_refcount_alloc_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb,
+ xfs_extlen_t len);
+void xfs_refcount_free_cow_extent(struct xfs_trans *tp, xfs_fsblock_t fsb,
+ xfs_extlen_t len);
extern int xfs_refcount_recover_cow_leftovers(struct xfs_mount *mp,
xfs_agnumber_t agno);
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index e6aeb390b2fb..ff9412f113c4 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -113,7 +113,10 @@ xfs_rmap_insert(
error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 0, done);
+ if (XFS_IS_CORRUPT(rcur->bc_mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
rcur->bc_rec.r.rm_startblock = agbno;
rcur->bc_rec.r.rm_blockcount = len;
@@ -123,7 +126,10 @@ xfs_rmap_insert(
error = xfs_btree_insert(rcur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+ if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
done:
if (error)
trace_xfs_rmap_insert_error(rcur->bc_mp,
@@ -149,12 +155,18 @@ xfs_rmap_delete(
error = xfs_rmap_lookup_eq(rcur, agbno, len, owner, offset, flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+ if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_delete(rcur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(rcur->bc_mp, i == 1, done);
+ if (XFS_IS_CORRUPT(rcur->bc_mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
done:
if (error)
trace_xfs_rmap_delete_error(rcur->bc_mp,
@@ -168,7 +180,6 @@ xfs_rmap_btrec_to_irec(
union xfs_btree_rec *rec,
struct xfs_rmap_irec *irec)
{
- irec->rm_flags = 0;
irec->rm_startblock = be32_to_cpu(rec->rmap.rm_startblock);
irec->rm_blockcount = be32_to_cpu(rec->rmap.rm_blockcount);
irec->rm_owner = be64_to_cpu(rec->rmap.rm_owner);
@@ -254,15 +265,15 @@ xfs_rmap_find_left_neighbor_helper(
rec->rm_flags);
if (rec->rm_owner != info->high.rm_owner)
- return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ return 0;
if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) &&
!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
rec->rm_offset + rec->rm_blockcount - 1 != info->high.rm_offset)
- return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ return 0;
*info->irec = *rec;
*info->stat = 1;
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
}
/*
@@ -305,7 +316,7 @@ xfs_rmap_find_left_neighbor(
error = xfs_rmap_query_range(cur, &info.high, &info.high,
xfs_rmap_find_left_neighbor_helper, &info);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error == -ECANCELED)
error = 0;
if (*stat)
trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
@@ -330,16 +341,16 @@ xfs_rmap_lookup_le_range_helper(
rec->rm_flags);
if (rec->rm_owner != info->high.rm_owner)
- return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ return 0;
if (!XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) &&
!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) &&
(rec->rm_offset > info->high.rm_offset ||
rec->rm_offset + rec->rm_blockcount <= info->high.rm_offset))
- return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ return 0;
*info->irec = *rec;
*info->stat = 1;
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
}
/*
@@ -377,7 +388,7 @@ xfs_rmap_lookup_le_range(
cur->bc_private.a.agno, bno, 0, owner, offset, flags);
error = xfs_rmap_query_range(cur, &info.high, &info.high,
xfs_rmap_lookup_le_range_helper, &info);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error == -ECANCELED)
error = 0;
if (*stat)
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
@@ -407,24 +418,39 @@ xfs_rmap_free_check_owner(
return 0;
/* Make sure the unwritten flag matches. */
- XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
- (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
+ if (XFS_IS_CORRUPT(mp,
+ (flags & XFS_RMAP_UNWRITTEN) !=
+ (rec->rm_flags & XFS_RMAP_UNWRITTEN))) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
/* Make sure the owner matches what we expect to find in the tree. */
- XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
+ if (XFS_IS_CORRUPT(mp, owner != rec->rm_owner)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
/* Check the offset, if necessary. */
if (XFS_RMAP_NON_INODE_OWNER(owner))
goto out;
if (flags & XFS_RMAP_BMBT_BLOCK) {
- XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
- out);
+ if (XFS_IS_CORRUPT(mp,
+ !(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
} else {
- XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
- XFS_WANT_CORRUPTED_GOTO(mp,
- ltoff + rec->rm_blockcount >= offset + len,
- out);
+ if (XFS_IS_CORRUPT(mp, rec->rm_offset > offset)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+ if (XFS_IS_CORRUPT(mp,
+ offset + len > ltoff + rec->rm_blockcount)) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
}
out:
@@ -483,12 +509,18 @@ xfs_rmap_unmap(
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, flags, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
error = xfs_rmap_get_rec(cur, &ltrec, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_private.a.agno, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner,
@@ -503,8 +535,12 @@ xfs_rmap_unmap(
* be the case that the "left" extent goes all the way to EOFS.
*/
if (owner == XFS_RMAP_OWN_NULL) {
- XFS_WANT_CORRUPTED_GOTO(mp, bno >= ltrec.rm_startblock +
- ltrec.rm_blockcount, out_error);
+ if (XFS_IS_CORRUPT(mp,
+ bno <
+ ltrec.rm_startblock + ltrec.rm_blockcount)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
goto out_done;
}
@@ -527,15 +563,22 @@ xfs_rmap_unmap(
error = xfs_rmap_get_rec(cur, &rtrec, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (rtrec.rm_startblock >= bno + len)
goto out_done;
}
/* Make sure the extent we found covers the entire freeing range. */
- XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
- ltrec.rm_startblock + ltrec.rm_blockcount >=
- bno + len, out_error);
+ if (XFS_IS_CORRUPT(mp,
+ ltrec.rm_startblock > bno ||
+ ltrec.rm_startblock + ltrec.rm_blockcount <
+ bno + len)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/* Check owner information. */
error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, len, owner,
@@ -552,7 +595,10 @@ xfs_rmap_unmap(
error = xfs_btree_delete(cur, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
} else if (ltrec.rm_startblock == bno) {
/*
* overlap left hand side of extent: move the start, trim the
@@ -744,7 +790,10 @@ xfs_rmap_map(
error = xfs_rmap_get_rec(cur, &ltrec, &have_lt);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, have_lt == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, have_lt != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_private.a.agno, ltrec.rm_startblock,
ltrec.rm_blockcount, ltrec.rm_owner,
@@ -754,9 +803,12 @@ xfs_rmap_map(
have_lt = 0;
}
- XFS_WANT_CORRUPTED_GOTO(mp,
- have_lt == 0 ||
- ltrec.rm_startblock + ltrec.rm_blockcount <= bno, out_error);
+ if (XFS_IS_CORRUPT(mp,
+ have_lt != 0 &&
+ ltrec.rm_startblock + ltrec.rm_blockcount > bno)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/*
* Increment the cursor to see if we have a right-adjacent record to our
@@ -770,9 +822,14 @@ xfs_rmap_map(
error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error);
- XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= gtrec.rm_startblock,
- out_error);
+ if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
+ if (XFS_IS_CORRUPT(mp, bno + len > gtrec.rm_startblock)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
cur->bc_private.a.agno, gtrec.rm_startblock,
gtrec.rm_blockcount, gtrec.rm_owner,
@@ -822,7 +879,10 @@ xfs_rmap_map(
error = xfs_btree_delete(cur, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
}
/* point the cursor back to the left record and update */
@@ -866,7 +926,10 @@ xfs_rmap_map(
error = xfs_btree_insert(cur, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
}
trace_xfs_rmap_map_done(mp, cur->bc_private.a.agno, bno, len,
@@ -958,12 +1021,18 @@ xfs_rmap_convert(
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_rmap_get_rec(cur, &PREV, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
trace_xfs_rmap_lookup_le_range_result(cur->bc_mp,
cur->bc_private.a.agno, PREV.rm_startblock,
PREV.rm_blockcount, PREV.rm_owner,
@@ -996,10 +1065,16 @@ xfs_rmap_convert(
error = xfs_rmap_get_rec(cur, &LEFT, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
- XFS_WANT_CORRUPTED_GOTO(mp,
- LEFT.rm_startblock + LEFT.rm_blockcount <= bno,
- done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
+ if (XFS_IS_CORRUPT(mp,
+ LEFT.rm_startblock + LEFT.rm_blockcount >
+ bno)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
trace_xfs_rmap_find_left_neighbor_result(cur->bc_mp,
cur->bc_private.a.agno, LEFT.rm_startblock,
LEFT.rm_blockcount, LEFT.rm_owner,
@@ -1018,7 +1093,10 @@ xfs_rmap_convert(
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto done;
@@ -1027,9 +1105,14 @@ xfs_rmap_convert(
error = xfs_rmap_get_rec(cur, &RIGHT, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
- XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock,
- done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
+ if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
cur->bc_private.a.agno, RIGHT.rm_startblock,
RIGHT.rm_blockcount, RIGHT.rm_owner,
@@ -1056,7 +1139,10 @@ xfs_rmap_convert(
error = xfs_rmap_lookup_le(cur, bno, len, owner, offset, oldext, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
/*
* Switch out based on the FILLING and CONTIG state bits.
@@ -1072,7 +1158,10 @@ xfs_rmap_convert(
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
RIGHT.rm_startblock, RIGHT.rm_blockcount,
RIGHT.rm_owner, RIGHT.rm_offset,
@@ -1080,11 +1169,17 @@ xfs_rmap_convert(
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
PREV.rm_startblock, PREV.rm_blockcount,
PREV.rm_owner, PREV.rm_offset,
@@ -1092,11 +1187,17 @@ xfs_rmap_convert(
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW = LEFT;
NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
error = xfs_rmap_update(cur, &NEW);
@@ -1116,11 +1217,17 @@ xfs_rmap_convert(
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW = LEFT;
NEW.rm_blockcount += PREV.rm_blockcount;
error = xfs_rmap_update(cur, &NEW);
@@ -1136,7 +1243,10 @@ xfs_rmap_convert(
error = xfs_btree_increment(cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
trace_xfs_rmap_delete(mp, cur->bc_private.a.agno,
RIGHT.rm_startblock, RIGHT.rm_blockcount,
RIGHT.rm_owner, RIGHT.rm_offset,
@@ -1144,11 +1254,17 @@ xfs_rmap_convert(
error = xfs_btree_delete(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
error = xfs_btree_decrement(cur, 0, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW = PREV;
NEW.rm_blockcount = len + RIGHT.rm_blockcount;
NEW.rm_flags = newext;
@@ -1215,7 +1331,10 @@ xfs_rmap_convert(
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
break;
case RMAP_RIGHT_FILLING | RMAP_RIGHT_CONTIG:
@@ -1254,7 +1373,10 @@ xfs_rmap_convert(
oldext, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_startblock = bno;
NEW.rm_owner = owner;
NEW.rm_offset = offset;
@@ -1266,7 +1388,10 @@ xfs_rmap_convert(
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
break;
case 0:
@@ -1296,7 +1421,10 @@ xfs_rmap_convert(
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
/*
* Reset the cursor to the position of the new extent
* we are about to insert as we can't trust it after
@@ -1306,7 +1434,10 @@ xfs_rmap_convert(
oldext, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 0, done);
+ if (XFS_IS_CORRUPT(mp, i != 0)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
/* new middle extent - newext */
cur->bc_rec.r.rm_flags &= ~XFS_RMAP_UNWRITTEN;
cur->bc_rec.r.rm_flags |= newext;
@@ -1315,7 +1446,10 @@ xfs_rmap_convert(
error = xfs_btree_insert(cur, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
break;
case RMAP_LEFT_FILLING | RMAP_LEFT_CONTIG | RMAP_RIGHT_CONTIG:
@@ -1384,7 +1518,10 @@ xfs_rmap_convert_shared(
&PREV, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
ASSERT(PREV.rm_offset <= offset);
ASSERT(PREV.rm_offset + PREV.rm_blockcount >= new_endoff);
@@ -1407,9 +1544,12 @@ xfs_rmap_convert_shared(
goto done;
if (i) {
state |= RMAP_LEFT_VALID;
- XFS_WANT_CORRUPTED_GOTO(mp,
- LEFT.rm_startblock + LEFT.rm_blockcount <= bno,
- done);
+ if (XFS_IS_CORRUPT(mp,
+ LEFT.rm_startblock + LEFT.rm_blockcount >
+ bno)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
if (xfs_rmap_is_mergeable(&LEFT, owner, newext))
state |= RMAP_LEFT_CONTIG;
}
@@ -1424,9 +1564,14 @@ xfs_rmap_convert_shared(
error = xfs_rmap_get_rec(cur, &RIGHT, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
- XFS_WANT_CORRUPTED_GOTO(mp, bno + len <= RIGHT.rm_startblock,
- done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
+ if (XFS_IS_CORRUPT(mp, bno + len > RIGHT.rm_startblock)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
cur->bc_private.a.agno, RIGHT.rm_startblock,
RIGHT.rm_blockcount, RIGHT.rm_owner,
@@ -1473,7 +1618,10 @@ xfs_rmap_convert_shared(
NEW.rm_offset, NEW.rm_flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_blockcount += PREV.rm_blockcount + RIGHT.rm_blockcount;
error = xfs_rmap_update(cur, &NEW);
if (error)
@@ -1496,7 +1644,10 @@ xfs_rmap_convert_shared(
NEW.rm_offset, NEW.rm_flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_blockcount += PREV.rm_blockcount;
error = xfs_rmap_update(cur, &NEW);
if (error)
@@ -1519,7 +1670,10 @@ xfs_rmap_convert_shared(
NEW.rm_offset, NEW.rm_flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_blockcount += RIGHT.rm_blockcount;
NEW.rm_flags = RIGHT.rm_flags;
error = xfs_rmap_update(cur, &NEW);
@@ -1539,7 +1693,10 @@ xfs_rmap_convert_shared(
NEW.rm_offset, NEW.rm_flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_flags = newext;
error = xfs_rmap_update(cur, &NEW);
if (error)
@@ -1571,7 +1728,10 @@ xfs_rmap_convert_shared(
NEW.rm_offset, NEW.rm_flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_blockcount += len;
error = xfs_rmap_update(cur, &NEW);
if (error)
@@ -1613,7 +1773,10 @@ xfs_rmap_convert_shared(
NEW.rm_offset, NEW.rm_flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_blockcount = offset - NEW.rm_offset;
error = xfs_rmap_update(cur, &NEW);
if (error)
@@ -1645,7 +1808,10 @@ xfs_rmap_convert_shared(
NEW.rm_offset, NEW.rm_flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_blockcount -= len;
error = xfs_rmap_update(cur, &NEW);
if (error)
@@ -1680,7 +1846,10 @@ xfs_rmap_convert_shared(
NEW.rm_offset, NEW.rm_flags, &i);
if (error)
goto done;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto done;
+ }
NEW.rm_blockcount = offset - NEW.rm_offset;
error = xfs_rmap_update(cur, &NEW);
if (error)
@@ -1766,25 +1935,44 @@ xfs_rmap_unmap_shared(
&ltrec, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
ltoff = ltrec.rm_offset;
/* Make sure the extent we found covers the entire freeing range. */
- XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
- ltrec.rm_startblock + ltrec.rm_blockcount >=
- bno + len, out_error);
+ if (XFS_IS_CORRUPT(mp,
+ ltrec.rm_startblock > bno ||
+ ltrec.rm_startblock + ltrec.rm_blockcount <
+ bno + len)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/* Make sure the owner matches what we expect to find in the tree. */
- XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner, out_error);
+ if (XFS_IS_CORRUPT(mp, owner != ltrec.rm_owner)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/* Make sure the unwritten flag matches. */
- XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
- (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error);
+ if (XFS_IS_CORRUPT(mp,
+ (flags & XFS_RMAP_UNWRITTEN) !=
+ (ltrec.rm_flags & XFS_RMAP_UNWRITTEN))) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
/* Check the offset. */
- XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_offset <= offset, out_error);
- XFS_WANT_CORRUPTED_GOTO(mp, offset <= ltoff + ltrec.rm_blockcount,
- out_error);
+ if (XFS_IS_CORRUPT(mp, ltrec.rm_offset > offset)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
+ if (XFS_IS_CORRUPT(mp, offset > ltoff + ltrec.rm_blockcount)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
/* Exact match, simply remove the record from rmap tree. */
@@ -1837,7 +2025,10 @@ xfs_rmap_unmap_shared(
ltrec.rm_offset, ltrec.rm_flags, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
ltrec.rm_blockcount -= len;
error = xfs_rmap_update(cur, &ltrec);
if (error)
@@ -1863,7 +2054,10 @@ xfs_rmap_unmap_shared(
ltrec.rm_offset, ltrec.rm_flags, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
ltrec.rm_blockcount = bno - ltrec.rm_startblock;
error = xfs_rmap_update(cur, &ltrec);
if (error)
@@ -1939,7 +2133,10 @@ xfs_rmap_map_shared(
error = xfs_rmap_get_rec(cur, &gtrec, &have_gt);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, have_gt == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, have_gt != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
trace_xfs_rmap_find_right_neighbor_result(cur->bc_mp,
cur->bc_private.a.agno, gtrec.rm_startblock,
gtrec.rm_blockcount, gtrec.rm_owner,
@@ -1988,7 +2185,10 @@ xfs_rmap_map_shared(
ltrec.rm_offset, ltrec.rm_flags, &i);
if (error)
goto out_error;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_error;
+ }
error = xfs_rmap_update(cur, &ltrec);
if (error)
@@ -2200,7 +2400,7 @@ xfs_rmap_finish_one(
error = xfs_free_extent_fix_freelist(tp, agno, &agbp);
if (error)
return error;
- if (!agbp)
+ if (XFS_IS_CORRUPT(tp->t_mountp, !agbp))
return -EFSCORRUPTED;
rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
@@ -2268,7 +2468,7 @@ xfs_rmap_update_is_needed(
* Record a rmap intent; the list is kept sorted first by AG and then by
* increasing age.
*/
-static int
+static void
__xfs_rmap_add(
struct xfs_trans *tp,
enum xfs_rmap_intent_type type,
@@ -2287,7 +2487,7 @@ __xfs_rmap_add(
bmap->br_blockcount,
bmap->br_state);
- ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_SLEEP | KM_NOFS);
+ ri = kmem_alloc(sizeof(struct xfs_rmap_intent), KM_NOFS);
INIT_LIST_HEAD(&ri->ri_list);
ri->ri_type = type;
ri->ri_owner = owner;
@@ -2295,11 +2495,10 @@ __xfs_rmap_add(
ri->ri_bmap = *bmap;
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_RMAP, &ri->ri_list);
- return 0;
}
/* Map an extent into a file. */
-int
+void
xfs_rmap_map_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
@@ -2307,15 +2506,15 @@ xfs_rmap_map_extent(
struct xfs_bmbt_irec *PREV)
{
if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
- return 0;
+ return;
- return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
+ __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
XFS_RMAP_MAP_SHARED : XFS_RMAP_MAP, ip->i_ino,
whichfork, PREV);
}
/* Unmap an extent out of a file. */
-int
+void
xfs_rmap_unmap_extent(
struct xfs_trans *tp,
struct xfs_inode *ip,
@@ -2323,9 +2522,9 @@ xfs_rmap_unmap_extent(
struct xfs_bmbt_irec *PREV)
{
if (!xfs_rmap_update_is_needed(tp->t_mountp, whichfork))
- return 0;
+ return;
- return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
+ __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
XFS_RMAP_UNMAP_SHARED : XFS_RMAP_UNMAP, ip->i_ino,
whichfork, PREV);
}
@@ -2336,7 +2535,7 @@ xfs_rmap_unmap_extent(
* Note that tp can be NULL here as no transaction is used for COW fork
* unwritten conversion.
*/
-int
+void
xfs_rmap_convert_extent(
struct xfs_mount *mp,
struct xfs_trans *tp,
@@ -2345,15 +2544,15 @@ xfs_rmap_convert_extent(
struct xfs_bmbt_irec *PREV)
{
if (!xfs_rmap_update_is_needed(mp, whichfork))
- return 0;
+ return;
- return __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
+ __xfs_rmap_add(tp, xfs_is_reflink_inode(ip) ?
XFS_RMAP_CONVERT_SHARED : XFS_RMAP_CONVERT, ip->i_ino,
whichfork, PREV);
}
/* Schedule the creation of an rmap for non-file data. */
-int
+void
xfs_rmap_alloc_extent(
struct xfs_trans *tp,
xfs_agnumber_t agno,
@@ -2364,18 +2563,18 @@ xfs_rmap_alloc_extent(
struct xfs_bmbt_irec bmap;
if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
- return 0;
+ return;
bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
bmap.br_blockcount = len;
bmap.br_startoff = 0;
bmap.br_state = XFS_EXT_NORM;
- return __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap);
+ __xfs_rmap_add(tp, XFS_RMAP_ALLOC, owner, XFS_DATA_FORK, &bmap);
}
/* Schedule the deletion of an rmap for non-file data. */
-int
+void
xfs_rmap_free_extent(
struct xfs_trans *tp,
xfs_agnumber_t agno,
@@ -2386,14 +2585,14 @@ xfs_rmap_free_extent(
struct xfs_bmbt_irec bmap;
if (!xfs_rmap_update_is_needed(tp->t_mountp, XFS_DATA_FORK))
- return 0;
+ return;
bmap.br_startblock = XFS_AGB_TO_FSB(tp->t_mountp, agno, bno);
bmap.br_blockcount = len;
bmap.br_startoff = 0;
bmap.br_state = XFS_EXT_NORM;
- return __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap);
+ __xfs_rmap_add(tp, XFS_RMAP_FREE, owner, XFS_DATA_FORK, &bmap);
}
/* Compare rmap records. Returns -1 if a < b, 1 if a > b, and 0 if equal. */
@@ -2511,7 +2710,7 @@ xfs_rmap_has_other_keys_helper(
((rks->flags & rec->rm_flags) & XFS_RMAP_KEY_FLAGS) == rks->flags)
return 0;
rks->has_rmap = true;
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
}
/*
@@ -2540,8 +2739,11 @@ xfs_rmap_has_other_keys(
error = xfs_rmap_query_range(cur, &low, &high,
xfs_rmap_has_other_keys_helper, &rks);
+ if (error < 0)
+ return error;
+
*has_rmap = rks.has_rmap;
- return error;
+ return 0;
}
const struct xfs_owner_info XFS_RMAP_OINFO_SKIP_UPDATE = {
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index e21ed0294e5c..abe633403fd1 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -68,6 +68,7 @@ xfs_rmap_irec_offset_unpack(
if (offset & ~(XFS_RMAP_OFF_MASK | XFS_RMAP_OFF_FLAGS))
return -EFSCORRUPTED;
irec->rm_offset = XFS_RMAP_OFF(offset);
+ irec->rm_flags = 0;
if (offset & XFS_RMAP_OFF_ATTR_FORK)
irec->rm_flags |= XFS_RMAP_ATTR_FORK;
if (offset & XFS_RMAP_OFF_BMBT_BLOCK)
@@ -161,16 +162,16 @@ struct xfs_rmap_intent {
};
/* functions for updating the rmapbt based on bmbt map/unmap operations */
-int xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+void xfs_rmap_map_extent(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, struct xfs_bmbt_irec *imap);
-int xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
+void xfs_rmap_unmap_extent(struct xfs_trans *tp, struct xfs_inode *ip,
int whichfork, struct xfs_bmbt_irec *imap);
-int xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp,
+void xfs_rmap_convert_extent(struct xfs_mount *mp, struct xfs_trans *tp,
struct xfs_inode *ip, int whichfork,
struct xfs_bmbt_irec *imap);
-int xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
+void xfs_rmap_alloc_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner);
-int xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
+void xfs_rmap_free_extent(struct xfs_trans *tp, xfs_agnumber_t agno,
xfs_agblock_t bno, xfs_extlen_t len, uint64_t owner);
void xfs_rmap_finish_one_cleanup(struct xfs_trans *tp,
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 8ea1efc97b41..f42c74cb8be5 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -15,7 +15,7 @@
#include "xfs_bmap.h"
#include "xfs_trans.h"
#include "xfs_rtalloc.h"
-
+#include "xfs_error.h"
/*
* Realtime allocator bitmap functions shared with userspace.
@@ -70,7 +70,7 @@ xfs_rtbuf_get(
if (error)
return error;
- if (nmap == 0 || !xfs_bmap_is_real_extent(&map))
+ if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_real_extent(&map)))
return -EFSCORRUPTED;
ASSERT(map.br_startblock != NULLFSBLOCK);
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a08dd8f40346..2f60fc3c99a0 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -10,6 +10,7 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_bit.h"
+#include "xfs_sb.h"
#include "xfs_mount.h"
#include "xfs_ialloc.h"
#include "xfs_alloc.h"
@@ -928,7 +929,7 @@ xfs_log_sb(
xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
- xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb));
+ xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
}
/*
@@ -984,9 +985,9 @@ xfs_update_secondary_sbs(
for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) {
struct xfs_buf *bp;
- bp = xfs_buf_get(mp->m_ddev_targp,
+ error = xfs_buf_get(mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_SB_DADDR),
- XFS_FSS_TO_BB(mp, 1));
+ XFS_FSS_TO_BB(mp, 1), &bp);
/*
* If we get an error reading or writing alternate superblocks,
* continue. xfs_repair chooses the "best" superblock based
@@ -994,12 +995,12 @@ xfs_update_secondary_sbs(
* superblocks un-updated than updated, and xfs_repair may
* pick them over the properly-updated primary.
*/
- if (!bp) {
+ if (error) {
xfs_warn(mp,
"error allocating secondary superblock for ag %d",
agno);
if (!saved_error)
- saved_error = -ENOMEM;
+ saved_error = error;
continue;
}
@@ -1184,13 +1185,14 @@ xfs_sb_get_secondary(
struct xfs_buf **bpp)
{
struct xfs_buf *bp;
+ int error;
ASSERT(agno != 0 && agno != NULLAGNUMBER);
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
XFS_AG_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
- XFS_FSS_TO_BB(mp, 1), 0);
- if (!bp)
- return -ENOMEM;
+ XFS_FSS_TO_BB(mp, 1), 0, &bp);
+ if (error)
+ return error;
bp->b_ops = &xfs_sb_buf_ops;
xfs_buf_oneshot(bp);
*bpp = bp;
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index e0641b7337b3..c45acbd3add9 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -177,10 +177,4 @@ struct xfs_ino_geometry {
unsigned int agino_log; /* #bits for agino in inum */
};
-/* Keep iterating the data structure. */
-#define XFS_ITER_CONTINUE (0)
-
-/* Stop iterating the data structure. */
-#define XFS_ITER_ABORT (1)
-
#endif /* __XFS_SHARED_H__ */
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index a9ad90926b87..2b8ccb5b975d 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -55,7 +55,7 @@ xfs_trans_ichgtime(
int flags)
{
struct inode *inode = VFS_I(ip);
- struct timespec64 tv;
+ struct timespec64 tv;
ASSERT(tp);
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
@@ -66,10 +66,8 @@ xfs_trans_ichgtime(
inode->i_mtime = tv;
if (flags & XFS_ICHGTIME_CHG)
inode->i_ctime = tv;
- if (flags & XFS_ICHGTIME_CREATE) {
- ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
- ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
- }
+ if (flags & XFS_ICHGTIME_CREATE)
+ ip->i_d.di_crtime = tv;
}
/*
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index d12bbd526e7c..7a9c04920505 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -197,6 +197,24 @@ xfs_calc_inode_chunk_res(
}
/*
+ * Per-extent log reservation for the btree changes involved in freeing or
+ * allocating a realtime extent. We have to be able to log as many rtbitmap
+ * blocks as needed to mark inuse MAXEXTLEN blocks' worth of realtime extents,
+ * as well as the realtime summary block.
+ */
+static unsigned int
+xfs_rtalloc_log_count(
+ struct xfs_mount *mp,
+ unsigned int num_ops)
+{
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+ unsigned int rtbmp_bytes;
+
+ rtbmp_bytes = (MAXEXTLEN / mp->m_sb.sb_rextsize) / NBBY;
+ return (howmany(rtbmp_bytes, blksz) + 1) * num_ops;
+}
+
+/*
* Various log reservation values.
*
* These are based on the size of the file system block because that is what
@@ -218,13 +236,21 @@ xfs_calc_inode_chunk_res(
/*
* In a write transaction we can allocate a maximum of 2
- * extents. This gives:
+ * extents. This gives (t1):
* the inode getting the new extents: inode size
* the inode's bmap btree: max depth * block size
* the agfs of the ags from which the extents are allocated: 2 * sector
* the superblock free block counter: sector size
* the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
- * And the bmap_finish transaction can free bmap blocks in a join:
+ * Or, if we're writing to a realtime file (t2):
+ * the inode getting the new extents: inode size
+ * the inode's bmap btree: max depth * block size
+ * the agfs of the ags from which the extents are allocated: 2 * sector
+ * the superblock free block counter: sector size
+ * the realtime bitmap: ((MAXEXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 1 block
+ * the allocation btrees: 2 trees * (2 * max depth - 1) * block size
+ * And the bmap_finish transaction can free bmap blocks in a join (t3):
* the agfs of the ags containing the blocks: 2 * sector size
* the agfls of the ags containing the blocks: 2 * sector size
* the super block free block counter: sector size
@@ -234,40 +260,72 @@ STATIC uint
xfs_calc_write_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 1) +
+ unsigned int t1, t2, t3;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), blksz) +
+ xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t2 = xfs_calc_inode_res(mp, 1) +
xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK),
- XFS_FSB_TO_B(mp, 1)) +
+ blksz) +
xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2),
- XFS_FSB_TO_B(mp, 1))));
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 1), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 1), blksz);
+ } else {
+ t2 = 0;
+ }
+
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+
+ return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
/*
- * In truncating a file we free up to two extents at once. We can modify:
+ * In truncating a file we free up to two extents at once. We can modify (t1):
* the inode being truncated: inode size
* the inode's bmap btree: (max depth + 1) * block size
- * And the bmap_finish transaction can free the blocks and bmap blocks:
+ * And the bmap_finish transaction can free the blocks and bmap blocks (t2):
* the agf for each of the ags: 4 * sector size
* the agfl for each of the ags: 4 * sector size
* the super block to reflect the freed blocks: sector size
* worst case split in allocation btrees per extent assuming 4 extents:
* 4 exts * 2 trees * (2 * max depth - 1) * block size
+ * Or, if it's a realtime file (t3):
+ * the agf for each of the ags: 2 * sector size
+ * the agfl for each of the ags: 2 * sector size
+ * the super block to reflect the freed blocks: sector size
+ * the realtime bitmap: 2 exts * ((MAXEXTLEN / rtextsize) / NBBY) bytes
+ * the realtime summary: 2 exts * 1 block
+ * worst case split in allocation btrees per extent assuming 2 extents:
+ * 2 exts * 2 trees * (2 * max depth - 1) * block size
*/
STATIC uint
xfs_calc_itruncate_reservation(
struct xfs_mount *mp)
{
- return XFS_DQUOT_LOGRES(mp) +
- max((xfs_calc_inode_res(mp, 1) +
- xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1,
- XFS_FSB_TO_B(mp, 1))),
- (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
- xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4),
- XFS_FSB_TO_B(mp, 1))));
+ unsigned int t1, t2, t3;
+ unsigned int blksz = XFS_FSB_TO_B(mp, 1);
+
+ t1 = xfs_calc_inode_res(mp, 1) +
+ xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, blksz);
+
+ t2 = xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 4), blksz);
+
+ if (xfs_sb_version_hasrealtime(&mp->m_sb)) {
+ t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
+ xfs_calc_buf_res(xfs_rtalloc_log_count(mp, 2), blksz) +
+ xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
+ } else {
+ t3 = 0;
+ }
+
+ return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
}
/*
@@ -718,7 +776,7 @@ xfs_calc_clear_agi_bucket_reservation(
/*
* Adjusting quota limits.
- * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot)
+ * the disk quota buffer: sizeof(struct xfs_disk_dquot)
*/
STATIC uint
xfs_calc_qm_setqlim_reservation(void)
@@ -742,7 +800,7 @@ xfs_calc_qm_dqalloc_reservation(
/*
* Turning off quotas.
- * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2
* the superblock for the quota flags: sector size
*/
STATIC uint
@@ -755,7 +813,7 @@ xfs_calc_qm_quotaoff_reservation(
/*
* End of turning off quotas.
- * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2
+ * the quota off logitems: sizeof(struct xfs_qoff_logitem) * 2
*/
STATIC uint
xfs_calc_qm_quotaoff_end_reservation(void)
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 802b34cd10fe..397d94775440 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -21,7 +21,6 @@ typedef int32_t xfs_suminfo_t; /* type of bitmap summary info */
typedef uint32_t xfs_rtword_t; /* word type for bitmap manipulations */
typedef int64_t xfs_lsn_t; /* log sequence number */
-typedef int32_t xfs_tid_t; /* transaction identifier */
typedef uint32_t xfs_dablk_t; /* dir/attr block number (in file) */
typedef uint32_t xfs_dahash_t; /* dir/attr hash value */
@@ -33,7 +32,6 @@ typedef uint64_t xfs_fileoff_t; /* block number in a file */
typedef uint64_t xfs_filblks_t; /* number of blocks in a file */
typedef int64_t xfs_srtblock_t; /* signed version of xfs_rtblock_t */
-typedef int64_t xfs_sfiloff_t; /* signed block number in a file */
/*
* New verifiers will return the instruction address of the failing check.
@@ -169,6 +167,14 @@ typedef struct xfs_bmbt_irec
xfs_exntst_t br_state; /* extent state */
} xfs_bmbt_irec_t;
+/* per-AG block reservation types */
+enum xfs_ag_resv_type {
+ XFS_AG_RESV_NONE = 0,
+ XFS_AG_RESV_AGFL,
+ XFS_AG_RESV_METADATA,
+ XFS_AG_RESV_RMAPBT,
+};
+
/*
* Type verifier functions
*/
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index 16b09b941441..ba0f747c82e8 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -639,7 +639,7 @@ xchk_agfl_block(
xchk_agfl_block_xref(sc, agbno);
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return XFS_ITER_ABORT;
+ return -ECANCELED;
return 0;
}
@@ -730,7 +730,7 @@ xchk_agfl(
/* Check the blocks in the AGFL. */
error = xfs_agfl_walk(sc->mp, XFS_BUF_TO_AGF(sc->sa.agf_bp),
sc->sa.agfl_bp, xchk_agfl_block, &sai);
- if (error == XFS_ITER_ABORT) {
+ if (error == -ECANCELED) {
error = 0;
goto out_free;
}
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 7a1a38b636a9..d5e6db9af434 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -659,8 +659,6 @@ xrep_agfl(
error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
if (error)
return error;
- if (!agf_bp)
- return -ENOMEM;
/*
* Make sure we have the AGFL buffer, as scrub might have decided it
@@ -735,8 +733,6 @@ xrep_agi_find_btrees(
error = xfs_alloc_read_agf(mp, sc->tp, sc->sa.agno, 0, &agf_bp);
if (error)
return error;
- if (!agf_bp)
- return -ENOMEM;
/* Find the btree roots. */
error = xrep_find_ag_btree_roots(sc, agf_bp, fab, NULL);
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index a43d1813c4ff..5533e48e605d 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -97,7 +97,6 @@ xchk_allocbt_rec(
xfs_agnumber_t agno = bs->cur->bc_private.a.agno;
xfs_agblock_t bno;
xfs_extlen_t len;
- int error = 0;
bno = be32_to_cpu(rec->alloc.ar_startblock);
len = be32_to_cpu(rec->alloc.ar_blockcount);
@@ -109,7 +108,7 @@ xchk_allocbt_rec(
xchk_allocbt_xref(bs->sc, bno, len);
- return error;
+ return 0;
}
/* Scrub the freespace btrees for some AG. */
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 1afc58bf71dd..d9f0dd444b80 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -80,7 +80,7 @@ xchk_setup_xattr(
* without the inode lock held, which means we can sleep.
*/
if (sc->flags & XCHK_TRY_HARDER) {
- error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, KM_SLEEP);
+ error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, 0);
if (error)
return error;
}
@@ -163,8 +163,6 @@ xchk_xattr_listent(
args.valuelen = valuelen;
error = xfs_attr_get_ilocked(context->dp, &args);
- if (error == -EEXIST)
- error = 0;
if (!xchk_fblock_process_error(sx->sc, XFS_ATTR_FORK, args.blkno,
&error))
goto fail_xref;
@@ -173,7 +171,7 @@ xchk_xattr_listent(
args.blkno);
fail_xref:
if (sx->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- context->seen_enough = XFS_ITER_ABORT;
+ context->seen_enough = 1;
return;
}
@@ -400,15 +398,14 @@ out:
STATIC int
xchk_xattr_rec(
struct xchk_da_btree *ds,
- int level,
- void *rec)
+ int level)
{
struct xfs_mount *mp = ds->state->mp;
- struct xfs_attr_leaf_entry *ent = rec;
- struct xfs_da_state_blk *blk;
+ struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
struct xfs_attr_leaf_name_local *lentry;
struct xfs_attr_leaf_name_remote *rentry;
struct xfs_buf *bp;
+ struct xfs_attr_leaf_entry *ent;
xfs_dahash_t calc_hash;
xfs_dahash_t hash;
int nameidx;
@@ -416,7 +413,9 @@ xchk_xattr_rec(
unsigned int badflags;
int error;
- blk = &ds->state->path.blk[level];
+ ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
+
+ ent = xfs_attr3_leaf_entryp(blk->bp->b_addr) + blk->index;
/* Check the whole block, if necessary. */
error = xchk_xattr_block(ds, level);
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index 3d47d111be5a..18a684e18a69 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -294,5 +294,6 @@ xfs_bitmap_set_btblocks(
struct xfs_bitmap *bitmap,
struct xfs_btree_cur *cur)
{
- return xfs_btree_visit_blocks(cur, xfs_bitmap_collect_btblock, bitmap);
+ return xfs_btree_visit_blocks(cur, xfs_bitmap_collect_btblock,
+ XFS_BTREE_VISIT_ALL, bitmap);
}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 1bd29fdc2ab5..fa6ea6407992 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -75,6 +75,7 @@ struct xchk_bmap_info {
xfs_fileoff_t lastoff;
bool is_rt;
bool is_shared;
+ bool was_loaded;
int whichfork;
};
@@ -213,25 +214,20 @@ xchk_bmap_xref_rmap(
/* Cross-reference a single rtdev extent record. */
STATIC void
-xchk_bmap_rt_extent_xref(
- struct xchk_bmap_info *info,
+xchk_bmap_rt_iextent_xref(
struct xfs_inode *ip,
- struct xfs_btree_cur *cur,
+ struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
- if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return;
-
xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
irec->br_blockcount);
}
/* Cross-reference a single datadev extent record. */
STATIC void
-xchk_bmap_extent_xref(
- struct xchk_bmap_info *info,
+xchk_bmap_iextent_xref(
struct xfs_inode *ip,
- struct xfs_btree_cur *cur,
+ struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
struct xfs_mount *mp = info->sc->mp;
@@ -240,9 +236,6 @@ xchk_bmap_extent_xref(
xfs_extlen_t len;
int error;
- if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return;
-
agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
len = irec->br_blockcount;
@@ -300,20 +293,15 @@ xchk_bmap_dirattr_extent(
/* Scrub a single extent record. */
STATIC int
-xchk_bmap_extent(
+xchk_bmap_iextent(
struct xfs_inode *ip,
- struct xfs_btree_cur *cur,
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
struct xfs_mount *mp = info->sc->mp;
- struct xfs_buf *bp = NULL;
xfs_filblks_t end;
int error = 0;
- if (cur)
- xfs_btree_get_block(cur, 0, &bp);
-
/*
* Check for out-of-order extents. This record could have come
* from the incore list, for which there is no ordering check.
@@ -364,10 +352,13 @@ xchk_bmap_extent(
xchk_fblock_set_corrupt(info->sc, info->whichfork,
irec->br_startoff);
+ if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return 0;
+
if (info->is_rt)
- xchk_bmap_rt_extent_xref(info, ip, cur, irec);
+ xchk_bmap_rt_iextent_xref(ip, info, irec);
else
- xchk_bmap_extent_xref(info, ip, cur, irec);
+ xchk_bmap_iextent_xref(ip, info, irec);
info->lastoff = irec->br_startoff + irec->br_blockcount;
return error;
@@ -380,10 +371,13 @@ xchk_bmapbt_rec(
union xfs_btree_rec *rec)
{
struct xfs_bmbt_irec irec;
+ struct xfs_bmbt_irec iext_irec;
+ struct xfs_iext_cursor icur;
struct xchk_bmap_info *info = bs->private;
struct xfs_inode *ip = bs->cur->bc_private.b.ip;
struct xfs_buf *bp = NULL;
struct xfs_btree_block *block;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, info->whichfork);
uint64_t owner;
int i;
@@ -402,9 +396,26 @@ xchk_bmapbt_rec(
}
}
- /* Set up the in-core record and scrub it. */
+ /*
+ * Check that the incore extent tree contains an extent that matches
+ * this one exactly. We validate those cached bmaps later, so we don't
+ * need to check them here. If the incore extent tree was just loaded
+ * from disk by the scrubber, we assume that its contents match what's
+ * on disk (we still hold the ILOCK) and skip the equivalence check.
+ */
+ if (!info->was_loaded)
+ return 0;
+
xfs_bmbt_disk_get_all(&rec->bmbt, &irec);
- return xchk_bmap_extent(ip, bs->cur, info, &irec);
+ if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur,
+ &iext_irec) ||
+ irec.br_startoff != iext_irec.br_startoff ||
+ irec.br_startblock != iext_irec.br_startblock ||
+ irec.br_blockcount != iext_irec.br_blockcount ||
+ irec.br_state != iext_irec.br_state)
+ xchk_fblock_set_corrupt(bs->sc, info->whichfork,
+ irec.br_startoff);
+ return 0;
}
/* Scan the btree records. */
@@ -415,15 +426,26 @@ xchk_bmap_btree(
struct xchk_bmap_info *info)
{
struct xfs_owner_info oinfo;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(sc->ip, whichfork);
struct xfs_mount *mp = sc->mp;
struct xfs_inode *ip = sc->ip;
struct xfs_btree_cur *cur;
int error;
+ /* Load the incore bmap cache if it's not loaded. */
+ info->was_loaded = ifp->if_flags & XFS_IFEXTENTS;
+ if (!info->was_loaded) {
+ error = xfs_iread_extents(sc->tp, ip, whichfork);
+ if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
+ goto out;
+ }
+
+ /* Check the btree structure. */
cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork);
xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork);
error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info);
xfs_btree_del_cursor(cur, error);
+out:
return error;
}
@@ -500,7 +522,7 @@ xchk_bmap_check_rmap(
out:
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
return 0;
}
@@ -529,7 +551,7 @@ xchk_bmap_check_ag_rmaps(
sbcri.sc = sc;
sbcri.whichfork = whichfork;
error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT)
+ if (error == -ECANCELED)
error = 0;
xfs_btree_del_cursor(cur, error);
@@ -671,13 +693,6 @@ xchk_bmap(
if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out;
- /* Now try to scrub the in-memory extent list. */
- if (!(ifp->if_flags & XFS_IFEXTENTS)) {
- error = xfs_iread_extents(sc->tp, ip, whichfork);
- if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
- goto out;
- }
-
/* Find the offset of the last extent in the mapping. */
error = xfs_bmap_last_offset(ip, &endoff, whichfork);
if (!xchk_fblock_process_error(sc, whichfork, 0, &error))
@@ -689,7 +704,7 @@ xchk_bmap(
for_each_xfs_iext(ifp, &icur, &irec) {
if (xchk_should_terminate(sc, &error) ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
- break;
+ goto out;
if (isnullstartblock(irec.br_startblock))
continue;
if (irec.br_startoff >= endoff) {
@@ -697,7 +712,7 @@ xchk_bmap(
irec.br_startoff);
goto out;
}
- error = xchk_bmap_extent(ip, NULL, &info, &irec);
+ error = xchk_bmap_iextent(ip, &info, &irec);
if (error)
goto out;
}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 003a772cd26c..2e50d146105d 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -14,8 +14,15 @@
static inline bool
xchk_should_terminate(
struct xfs_scrub *sc,
- int *error)
+ int *error)
{
+ /*
+ * If preemption is disabled, we need to yield to the scheduler every
+ * few seconds so that we don't run afoul of the soft lockup watchdog
+ * or RCU stall detector.
+ */
+ cond_resched();
+
if (fatal_signal_pending(current)) {
if (*error == 0)
*error = -EAGAIN;
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 77ff9f97bcda..97a15b6f2865 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -77,40 +77,18 @@ xchk_da_set_corrupt(
__return_address);
}
-/* Find an entry at a certain level in a da btree. */
-STATIC void *
-xchk_da_btree_entry(
- struct xchk_da_btree *ds,
- int level,
- int rec)
+static struct xfs_da_node_entry *
+xchk_da_btree_node_entry(
+ struct xchk_da_btree *ds,
+ int level)
{
- char *ents;
- struct xfs_da_state_blk *blk;
- void *baddr;
+ struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
+ struct xfs_da3_icnode_hdr hdr;
- /* Dispatch the entry finding function. */
- blk = &ds->state->path.blk[level];
- baddr = blk->bp->b_addr;
- switch (blk->magic) {
- case XFS_ATTR_LEAF_MAGIC:
- case XFS_ATTR3_LEAF_MAGIC:
- ents = (char *)xfs_attr3_leaf_entryp(baddr);
- return ents + (rec * sizeof(struct xfs_attr_leaf_entry));
- case XFS_DIR2_LEAFN_MAGIC:
- case XFS_DIR3_LEAFN_MAGIC:
- ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
- return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
- case XFS_DIR2_LEAF1_MAGIC:
- case XFS_DIR3_LEAF1_MAGIC:
- ents = (char *)ds->dargs.dp->d_ops->leaf_ents_p(baddr);
- return ents + (rec * sizeof(struct xfs_dir2_leaf_entry));
- case XFS_DA_NODE_MAGIC:
- case XFS_DA3_NODE_MAGIC:
- ents = (char *)ds->dargs.dp->d_ops->node_tree_p(baddr);
- return ents + (rec * sizeof(struct xfs_da_node_entry));
- }
+ ASSERT(blk->magic == XFS_DA_NODE_MAGIC);
- return NULL;
+ xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr);
+ return hdr.btree + blk->index;
}
/* Scrub a da btree hash (key). */
@@ -120,7 +98,6 @@ xchk_da_btree_hash(
int level,
__be32 *hashp)
{
- struct xfs_da_state_blk *blks;
struct xfs_da_node_entry *entry;
xfs_dahash_t hash;
xfs_dahash_t parent_hash;
@@ -135,8 +112,7 @@ xchk_da_btree_hash(
return 0;
/* Is this hash no larger than the parent hash? */
- blks = ds->state->path.blk;
- entry = xchk_da_btree_entry(ds, level - 1, blks[level - 1].index);
+ entry = xchk_da_btree_node_entry(ds, level - 1);
parent_hash = be32_to_cpu(entry->hashval);
if (parent_hash < hash)
xchk_da_set_corrupt(ds, level);
@@ -355,8 +331,8 @@ xchk_da_btree_block(
goto out_nobuf;
/* Read the buffer. */
- error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, -2,
- &blk->bp, dargs->whichfork,
+ error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno,
+ XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork,
&xchk_da_btree_buf_ops);
if (!xchk_da_process_error(ds, level, &error))
goto out_nobuf;
@@ -433,8 +409,8 @@ xchk_da_btree_block(
XFS_BLFT_DA_NODE_BUF);
blk->magic = XFS_DA_NODE_MAGIC;
node = blk->bp->b_addr;
- ip->d_ops->node_hdr_from_disk(&nodehdr, node);
- btree = ip->d_ops->node_tree_p(node);
+ xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node);
+ btree = nodehdr.btree;
*pmaxrecs = nodehdr.count;
blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval);
if (level == 0) {
@@ -479,14 +455,12 @@ xchk_da_btree(
struct xfs_mount *mp = sc->mp;
struct xfs_da_state_blk *blks;
struct xfs_da_node_entry *key;
- void *rec;
xfs_dablk_t blkno;
int level;
int error;
/* Skip short format data structures; no btree to scan. */
- if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
+ if (!xfs_ifork_has_extents(sc->ip, whichfork))
return 0;
/* Set up initial da state. */
@@ -538,9 +512,7 @@ xchk_da_btree(
}
/* Dispatch record scrubbing. */
- rec = xchk_da_btree_entry(&ds, level,
- blks[level].index);
- error = scrub_fn(&ds, level, rec);
+ error = scrub_fn(&ds, level);
if (error)
break;
if (xchk_should_terminate(sc, &error) ||
@@ -562,7 +534,7 @@ xchk_da_btree(
}
/* Hashes in order for scrub? */
- key = xchk_da_btree_entry(&ds, level, blks[level].index);
+ key = xchk_da_btree_node_entry(&ds, level);
error = xchk_da_btree_hash(&ds, level, &key->hashval);
if (error)
goto out;
diff --git a/fs/xfs/scrub/dabtree.h b/fs/xfs/scrub/dabtree.h
index cb3f0003245b..1f3515c6d5a8 100644
--- a/fs/xfs/scrub/dabtree.h
+++ b/fs/xfs/scrub/dabtree.h
@@ -28,8 +28,7 @@ struct xchk_da_btree {
int tree_level;
};
-typedef int (*xchk_da_btree_rec_fn)(struct xchk_da_btree *ds,
- int level, void *rec);
+typedef int (*xchk_da_btree_rec_fn)(struct xchk_da_btree *ds, int level);
/* Check for da btree operation errors. */
bool xchk_da_process_error(struct xchk_da_btree *ds, int level, int *error);
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 1e2e11721eb9..266da4e4bde6 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -113,6 +113,9 @@ xchk_dir_actor(
offset = xfs_dir2_db_to_da(mp->m_dir_geo,
xfs_dir2_dataptr_to_db(mp->m_dir_geo, pos));
+ if (xchk_should_terminate(sdc->sc, &error))
+ return error;
+
/* Does this inode number make sense? */
if (!xfs_verify_dir_ino(mp, ino)) {
xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset);
@@ -179,15 +182,17 @@ out:
STATIC int
xchk_dir_rec(
struct xchk_da_btree *ds,
- int level,
- void *rec)
+ int level)
{
+ struct xfs_da_state_blk *blk = &ds->state->path.blk[level];
struct xfs_mount *mp = ds->state->mp;
- struct xfs_dir2_leaf_entry *ent = rec;
struct xfs_inode *dp = ds->dargs.dp;
+ struct xfs_da_geometry *geo = mp->m_dir_geo;
struct xfs_dir2_data_entry *dent;
struct xfs_buf *bp;
- char *p, *endp;
+ struct xfs_dir2_leaf_entry *ent;
+ unsigned int end;
+ unsigned int iter_off;
xfs_ino_t ino;
xfs_dablk_t rec_bno;
xfs_dir2_db_t db;
@@ -195,9 +200,16 @@ xchk_dir_rec(
xfs_dir2_dataptr_t ptr;
xfs_dahash_t calc_hash;
xfs_dahash_t hash;
+ struct xfs_dir3_icleaf_hdr hdr;
unsigned int tag;
int error;
+ ASSERT(blk->magic == XFS_DIR2_LEAF1_MAGIC ||
+ blk->magic == XFS_DIR2_LEAFN_MAGIC);
+
+ xfs_dir2_leaf_hdr_from_disk(mp, &hdr, blk->bp->b_addr);
+ ent = hdr.ents + blk->index;
+
/* Check the hash of the entry. */
error = xchk_da_btree_hash(ds, level, &ent->hashval);
if (error)
@@ -209,15 +221,16 @@ xchk_dir_rec(
return 0;
/* Find the directory entry's location. */
- db = xfs_dir2_dataptr_to_db(mp->m_dir_geo, ptr);
- off = xfs_dir2_dataptr_to_off(mp->m_dir_geo, ptr);
- rec_bno = xfs_dir2_db_to_da(mp->m_dir_geo, db);
+ db = xfs_dir2_dataptr_to_db(geo, ptr);
+ off = xfs_dir2_dataptr_to_off(geo, ptr);
+ rec_bno = xfs_dir2_db_to_da(geo, db);
- if (rec_bno >= mp->m_dir_geo->leafblk) {
+ if (rec_bno >= geo->leafblk) {
xchk_da_set_corrupt(ds, level);
goto out;
}
- error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno, -2, &bp);
+ error = xfs_dir3_data_read(ds->dargs.trans, dp, rec_bno,
+ XFS_DABUF_MAP_HOLE_OK, &bp);
if (!xchk_fblock_process_error(ds->sc, XFS_DATA_FORK, rec_bno,
&error))
goto out;
@@ -230,38 +243,37 @@ xchk_dir_rec(
if (ds->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
goto out_relse;
- dent = (struct xfs_dir2_data_entry *)(((char *)bp->b_addr) + off);
+ dent = bp->b_addr + off;
/* Make sure we got a real directory entry. */
- p = (char *)mp->m_dir_inode_ops->data_entry_p(bp->b_addr);
- endp = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
- if (!endp) {
+ iter_off = geo->data_entry_offset;
+ end = xfs_dir3_data_end_offset(geo, bp->b_addr);
+ if (!end) {
xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
goto out_relse;
}
- while (p < endp) {
- struct xfs_dir2_data_entry *dep;
- struct xfs_dir2_data_unused *dup;
+ for (;;) {
+ struct xfs_dir2_data_entry *dep = bp->b_addr + iter_off;
+ struct xfs_dir2_data_unused *dup = bp->b_addr + iter_off;
+
+ if (iter_off >= end) {
+ xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
+ goto out_relse;
+ }
- dup = (struct xfs_dir2_data_unused *)p;
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
- p += be16_to_cpu(dup->length);
+ iter_off += be16_to_cpu(dup->length);
continue;
}
- dep = (struct xfs_dir2_data_entry *)p;
if (dep == dent)
break;
- p += mp->m_dir_inode_ops->data_entsize(dep->namelen);
- }
- if (p >= endp) {
- xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
- goto out_relse;
+ iter_off += xfs_dir2_data_entsize(mp, dep->namelen);
}
/* Retrieve the entry, sanity check it, and compare hashes. */
ino = be64_to_cpu(dent->inumber);
hash = be32_to_cpu(ent->hashval);
- tag = be16_to_cpup(dp->d_ops->data_entry_tag_p(dent));
+ tag = be16_to_cpup(xfs_dir2_data_entry_tag_p(mp, dent));
if (!xfs_verify_dir_ino(mp, ino) || tag != off)
xchk_fblock_set_corrupt(ds->sc, XFS_DATA_FORK, rec_bno);
if (dent->namelen == 0) {
@@ -319,19 +331,15 @@ xchk_directory_data_bestfree(
struct xfs_buf *bp;
struct xfs_dir2_data_free *bf;
struct xfs_mount *mp = sc->mp;
- const struct xfs_dir_ops *d_ops;
- char *ptr;
- char *endptr;
u16 tag;
unsigned int nr_bestfrees = 0;
unsigned int nr_frees = 0;
unsigned int smallest_bestfree;
int newlen;
- int offset;
+ unsigned int offset;
+ unsigned int end;
int error;
- d_ops = sc->ip->d_ops;
-
if (is_block) {
/* dir block format */
if (lblk != XFS_B_TO_FSBT(mp, XFS_DIR2_DATA_OFFSET))
@@ -339,7 +347,7 @@ xchk_directory_data_bestfree(
error = xfs_dir3_block_read(sc->tp, sc->ip, &bp);
} else {
/* dir data format */
- error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, -1, &bp);
+ error = xfs_dir3_data_read(sc->tp, sc->ip, lblk, 0, &bp);
}
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
@@ -351,7 +359,7 @@ xchk_directory_data_bestfree(
goto out_buf;
/* Do the bestfrees correspond to actual free space? */
- bf = d_ops->data_bestfree_p(bp->b_addr);
+ bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr);
smallest_bestfree = UINT_MAX;
for (dfp = &bf[0]; dfp < &bf[XFS_DIR2_DATA_FD_COUNT]; dfp++) {
offset = be16_to_cpu(dfp->offset);
@@ -361,13 +369,13 @@ xchk_directory_data_bestfree(
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out_buf;
}
- dup = (struct xfs_dir2_data_unused *)(bp->b_addr + offset);
+ dup = bp->b_addr + offset;
tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
/* bestfree doesn't match the entry it points at? */
if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG) ||
be16_to_cpu(dup->length) != be16_to_cpu(dfp->length) ||
- tag != ((char *)dup - (char *)bp->b_addr)) {
+ tag != offset) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out_buf;
}
@@ -383,30 +391,30 @@ xchk_directory_data_bestfree(
}
/* Make sure the bestfrees are actually the best free spaces. */
- ptr = (char *)d_ops->data_entry_p(bp->b_addr);
- endptr = xfs_dir3_data_endp(mp->m_dir_geo, bp->b_addr);
+ offset = mp->m_dir_geo->data_entry_offset;
+ end = xfs_dir3_data_end_offset(mp->m_dir_geo, bp->b_addr);
/* Iterate the entries, stopping when we hit or go past the end. */
- while (ptr < endptr) {
- dup = (struct xfs_dir2_data_unused *)ptr;
+ while (offset < end) {
+ dup = bp->b_addr + offset;
+
/* Skip real entries */
if (dup->freetag != cpu_to_be16(XFS_DIR2_DATA_FREE_TAG)) {
- struct xfs_dir2_data_entry *dep;
+ struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
- dep = (struct xfs_dir2_data_entry *)ptr;
- newlen = d_ops->data_entsize(dep->namelen);
+ newlen = xfs_dir2_data_entsize(mp, dep->namelen);
if (newlen <= 0) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
lblk);
goto out_buf;
}
- ptr += newlen;
+ offset += newlen;
continue;
}
/* Spot check this free entry */
tag = be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup));
- if (tag != ((char *)dup - (char *)bp->b_addr)) {
+ if (tag != offset) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out_buf;
}
@@ -425,13 +433,13 @@ xchk_directory_data_bestfree(
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out_buf;
}
- ptr += newlen;
- if (ptr <= endptr)
+ offset += newlen;
+ if (offset <= end)
nr_frees++;
}
/* We're required to fill all the space. */
- if (ptr != endptr)
+ if (offset != end)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
/* Did we see at least as many free slots as there are bestfrees? */
@@ -458,7 +466,7 @@ xchk_directory_check_freesp(
{
struct xfs_dir2_data_free *dfp;
- dfp = sc->ip->d_ops->data_bestfree_p(dbp->b_addr);
+ dfp = xfs_dir2_data_bestfree_p(sc->mp, dbp->b_addr);
if (len != be16_to_cpu(dfp->length))
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
@@ -475,12 +483,10 @@ xchk_directory_leaf1_bestfree(
xfs_dablk_t lblk)
{
struct xfs_dir3_icleaf_hdr leafhdr;
- struct xfs_dir2_leaf_entry *ents;
struct xfs_dir2_leaf_tail *ltp;
struct xfs_dir2_leaf *leaf;
struct xfs_buf *dbp;
struct xfs_buf *bp;
- const struct xfs_dir_ops *d_ops = sc->ip->d_ops;
struct xfs_da_geometry *geo = sc->mp->m_dir_geo;
__be16 *bestp;
__u16 best;
@@ -492,14 +498,13 @@ xchk_directory_leaf1_bestfree(
int error;
/* Read the free space block. */
- error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, -1, &bp);
+ error = xfs_dir3_leaf_read(sc->tp, sc->ip, lblk, &bp);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
goto out;
xchk_buffer_recheck(sc, bp);
leaf = bp->b_addr;
- d_ops->leaf_hdr_from_disk(&leafhdr, leaf);
- ents = d_ops->leaf_ents_p(leaf);
+ xfs_dir2_leaf_hdr_from_disk(sc->ip->i_mount, &leafhdr, leaf);
ltp = xfs_dir2_leaf_tail_p(geo, leaf);
bestcount = be32_to_cpu(ltp->bestcount);
bestp = xfs_dir2_leaf_bests_p(ltp);
@@ -521,24 +526,25 @@ xchk_directory_leaf1_bestfree(
}
/* Is the leaf count even remotely sane? */
- if (leafhdr.count > d_ops->leaf_max_ents(geo)) {
+ if (leafhdr.count > geo->leaf_max_ents) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out;
}
/* Leaves and bests don't overlap in leaf format. */
- if ((char *)&ents[leafhdr.count] > (char *)bestp) {
+ if ((char *)&leafhdr.ents[leafhdr.count] > (char *)bestp) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
goto out;
}
/* Check hash value order, count stale entries. */
for (i = 0; i < leafhdr.count; i++) {
- hash = be32_to_cpu(ents[i].hashval);
+ hash = be32_to_cpu(leafhdr.ents[i].hashval);
if (i > 0 && lasthash > hash)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, lblk);
lasthash = hash;
- if (ents[i].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
+ if (leafhdr.ents[i].address ==
+ cpu_to_be32(XFS_DIR2_NULL_DATAPTR))
stale++;
}
if (leafhdr.stale != stale)
@@ -552,7 +558,7 @@ xchk_directory_leaf1_bestfree(
if (best == NULLDATAOFF)
continue;
error = xfs_dir3_data_read(sc->tp, sc->ip,
- i * args->geo->fsbcount, -1, &dbp);
+ i * args->geo->fsbcount, 0, &dbp);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
&error))
break;
@@ -575,7 +581,6 @@ xchk_directory_free_bestfree(
struct xfs_dir3_icfree_hdr freehdr;
struct xfs_buf *dbp;
struct xfs_buf *bp;
- __be16 *bestp;
__u16 best;
unsigned int stale = 0;
int i;
@@ -595,17 +600,16 @@ xchk_directory_free_bestfree(
}
/* Check all the entries. */
- sc->ip->d_ops->free_hdr_from_disk(&freehdr, bp->b_addr);
- bestp = sc->ip->d_ops->free_bests_p(bp->b_addr);
- for (i = 0; i < freehdr.nvalid; i++, bestp++) {
- best = be16_to_cpu(*bestp);
+ xfs_dir2_free_hdr_from_disk(sc->ip->i_mount, &freehdr, bp->b_addr);
+ for (i = 0; i < freehdr.nvalid; i++) {
+ best = be16_to_cpu(freehdr.bests[i]);
if (best == NULLDATAOFF) {
stale++;
continue;
}
error = xfs_dir3_data_read(sc->tp, sc->ip,
(freehdr.firstdb + i) * args->geo->fsbcount,
- -1, &dbp);
+ 0, &dbp);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk,
&error))
break;
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index fc3f510c9034..ec2064ed3c30 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -83,9 +83,6 @@ xchk_fscount_warmup(
error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp);
if (error)
break;
- error = -ENOMEM;
- if (!agf_bp || !agi_bp)
- break;
/*
* These are supposed to be initialized by the header read
@@ -104,7 +101,7 @@ next_loop_perag:
pag = NULL;
error = 0;
- if (fatal_signal_pending(current))
+ if (xchk_should_terminate(sc, &error))
break;
}
@@ -125,7 +122,7 @@ xchk_setup_fscounters(
struct xchk_fscounters *fsc;
int error;
- sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), KM_SLEEP);
+ sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), 0);
if (!sc->buf)
return -ENOMEM;
fsc = sc->buf;
@@ -163,6 +160,7 @@ xchk_fscount_aggregate_agcounts(
uint64_t delayed;
xfs_agnumber_t agno;
int tries = 8;
+ int error = 0;
retry:
fsc->icount = 0;
@@ -196,10 +194,13 @@ retry:
xfs_perag_put(pag);
- if (fatal_signal_pending(current))
+ if (xchk_should_terminate(sc, &error))
break;
}
+ if (error)
+ return error;
+
/*
* The global incore space reservation is taken from the incore
* counters, so leave that out of the computation.
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index b2f602811e9d..83d27cdf579b 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -11,6 +11,7 @@
#include "xfs_sb.h"
#include "xfs_health.h"
#include "scrub/scrub.h"
+#include "scrub/health.h"
/*
* Scrub and In-Core Filesystem Health Assessments
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index c962bd534690..5705adc43a75 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -32,8 +32,10 @@ xchk_setup_parent(
struct xchk_parent_ctx {
struct dir_context dc;
+ struct xfs_scrub *sc;
xfs_ino_t ino;
xfs_nlink_t nlink;
+ bool cancelled;
};
/* Look for a single entry in a directory pointing to an inode. */
@@ -47,11 +49,21 @@ xchk_parent_actor(
unsigned type)
{
struct xchk_parent_ctx *spc;
+ int error = 0;
spc = container_of(dc, struct xchk_parent_ctx, dc);
if (spc->ino == ino)
spc->nlink++;
- return 0;
+
+ /*
+ * If we're facing a fatal signal, bail out. Store the cancellation
+ * status separately because the VFS readdir code squashes error codes
+ * into short directory reads.
+ */
+ if (xchk_should_terminate(spc->sc, &error))
+ spc->cancelled = true;
+
+ return error;
}
/* Count the number of dentries in the parent dir that point to this inode. */
@@ -62,10 +74,9 @@ xchk_parent_count_parent_dentries(
xfs_nlink_t *nlink)
{
struct xchk_parent_ctx spc = {
- .dc.actor = xchk_parent_actor,
- .dc.pos = 0,
- .ino = sc->ip->i_ino,
- .nlink = 0,
+ .dc.actor = xchk_parent_actor,
+ .ino = sc->ip->i_ino,
+ .sc = sc,
};
size_t bufsize;
loff_t oldpos;
@@ -80,7 +91,7 @@ xchk_parent_count_parent_dentries(
*/
lock_mode = xfs_ilock_data_map_shared(parent);
if (parent->i_d.di_nextents > 0)
- error = xfs_dir3_data_readahead(parent, 0, -1);
+ error = xfs_dir3_data_readahead(parent, 0, 0);
xfs_iunlock(parent, lock_mode);
if (error)
return error;
@@ -97,6 +108,10 @@ xchk_parent_count_parent_dentries(
error = xfs_readdir(sc->tp, parent, &spc.dc, bufsize);
if (error)
goto out;
+ if (spc.cancelled) {
+ error = -EAGAIN;
+ goto out;
+ }
if (oldpos == spc.dc.pos)
break;
oldpos = spc.dc.pos;
diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 0a33b4421c32..905a34558361 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -93,6 +93,10 @@ xchk_quota_item(
unsigned long long rcount;
xfs_ino_t fs_icount;
xfs_dqid_t id = be32_to_cpu(d->d_id);
+ int error = 0;
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
/*
* Except for the root dquot, the actual dquot we got must either have
@@ -178,6 +182,9 @@ xchk_quota_item(
if (id != 0 && rhard != 0 && rcount > rhard)
xchk_fblock_set_warning(sc, XFS_DATA_FORK, offset);
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return -EFSCORRUPTED;
+
return 0;
}
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index 93b3793bc5b3..0cab11a5d390 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -341,7 +341,6 @@ xchk_refcountbt_rec(
xfs_extlen_t len;
xfs_nlink_t refcount;
bool has_cowflag;
- int error = 0;
bno = be32_to_cpu(rec->refc.rc_startblock);
len = be32_to_cpu(rec->refc.rc_blockcount);
@@ -366,7 +365,7 @@ xchk_refcountbt_rec(
xchk_refcountbt_xref(bs->sc, bno, len, refcount);
- return error;
+ return 0;
}
/* Make sure we have as many refc blocks as the rmap says. */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 4cfeec57fb05..e489d7a8446a 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -341,17 +341,21 @@ xrep_init_btblock(
struct xfs_trans *tp = sc->tp;
struct xfs_mount *mp = sc->mp;
struct xfs_buf *bp;
+ int error;
trace_xrep_init_btblock(mp, XFS_FSB_TO_AGNO(mp, fsb),
XFS_FSB_TO_AGBNO(mp, fsb), btnum);
ASSERT(XFS_FSB_TO_AGNO(mp, fsb) == sc->sa.agno);
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsb),
- XFS_FSB_TO_BB(mp, 1), 0);
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, fsb), XFS_FSB_TO_BB(mp, 1), 0,
+ &bp);
+ if (error)
+ return error;
xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
xfs_btree_init_block(mp, bp, btnum, 0, 0, sc->sa.agno);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_BTREE_BUF);
- xfs_trans_log_buf(tp, bp, 0, bp->b_length);
+ xfs_trans_log_buf(tp, bp, 0, BBTOB(bp->b_length) - 1);
bp->b_ops = ops;
*bpp = bp;
@@ -542,8 +546,6 @@ xrep_reap_block(
error = xfs_alloc_read_agf(sc->mp, sc->tp, agno, 0, &agf_bp);
if (error)
return error;
- if (!agf_bp)
- return -ENOMEM;
} else {
agf_bp = sc->sa.agf_bp;
}
@@ -664,7 +666,7 @@ xrep_findroot_agfl_walk(
{
xfs_agblock_t *agbno = priv;
- return (*agbno == bno) ? XFS_ITER_ABORT : 0;
+ return (*agbno == bno) ? -ECANCELED : 0;
}
/* Does this block match the btree information passed in? */
@@ -694,7 +696,7 @@ xrep_findroot_block(
if (owner == XFS_RMAP_OWN_AG) {
error = xfs_agfl_walk(mp, ri->agf, ri->agfl_bp,
xrep_findroot_agfl_walk, &agbno);
- if (error == XFS_ITER_ABORT)
+ if (error == -ECANCELED)
return 0;
if (error)
return error;
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 60c61d7052a8..c3422403b169 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -75,7 +75,6 @@ static inline xfs_extlen_t
xrep_calc_ag_resblks(
struct xfs_scrub *sc)
{
- ASSERT(!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR));
return 0;
}
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 15c8c5f3f688..f1775bb19313 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -16,6 +16,7 @@
#include "xfs_qm.h"
#include "xfs_errortag.h"
#include "xfs_error.h"
+#include "xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index 99c0b1234c3c..5641ae512c9e 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -22,7 +22,7 @@ xchk_setup_symlink(
struct xfs_inode *ip)
{
/* Allocate the buffer without the inode lock held. */
- sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, KM_SLEEP);
+ sc->buf = kmem_zalloc_large(XFS_SYMLINK_MAXLEN + 1, 0);
if (!sc->buf)
return -ENOMEM;
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 3362bae28b46..096203119934 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -329,7 +329,7 @@ TRACE_EVENT(xchk_btree_op_error,
__field(int, level)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, bno)
- __field(int, ptr);
+ __field(int, ptr)
__field(int, error)
__field(void *, ret_ip)
),
@@ -414,7 +414,7 @@ TRACE_EVENT(xchk_btree_error,
__field(int, level)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, bno)
- __field(int, ptr);
+ __field(int, ptr)
__field(void *, ret_ip)
),
TP_fast_assign(
@@ -452,7 +452,7 @@ TRACE_EVENT(xchk_ifork_btree_error,
__field(int, level)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, bno)
- __field(int, ptr);
+ __field(int, ptr)
__field(void *, ret_ip)
),
TP_fast_assign(
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index cbda40d40326..cd743fad8478 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -12,8 +12,10 @@
#include "xfs_inode.h"
#include "xfs_attr.h"
#include "xfs_trace.h"
-#include <linux/posix_acl_xattr.h>
+#include "xfs_error.h"
+#include "xfs_acl.h"
+#include <linux/posix_acl_xattr.h>
/*
* Locking scheme:
@@ -23,6 +25,7 @@
STATIC struct posix_acl *
xfs_acl_from_disk(
+ struct xfs_mount *mp,
const struct xfs_acl *aclp,
int len,
int max_entries)
@@ -32,11 +35,18 @@ xfs_acl_from_disk(
const struct xfs_acl_entry *ace;
unsigned int count, i;
- if (len < sizeof(*aclp))
+ if (len < sizeof(*aclp)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, aclp,
+ len);
return ERR_PTR(-EFSCORRUPTED);
+ }
+
count = be32_to_cpu(aclp->acl_cnt);
- if (count > max_entries || XFS_ACL_SIZE(count) != len)
+ if (count > max_entries || XFS_ACL_SIZE(count) != len) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, aclp,
+ len);
return ERR_PTR(-EFSCORRUPTED);
+ }
acl = posix_acl_alloc(count, GFP_KERNEL);
if (!acl)
@@ -112,7 +122,7 @@ xfs_get_acl(struct inode *inode, int type)
{
struct xfs_inode *ip = XFS_I(inode);
struct posix_acl *acl = NULL;
- struct xfs_acl *xfs_acl;
+ struct xfs_acl *xfs_acl = NULL;
unsigned char *ea_name;
int error;
int len;
@@ -135,12 +145,9 @@ xfs_get_acl(struct inode *inode, int type)
* go out to the disk.
*/
len = XFS_ACL_MAX_SIZE(ip->i_mount);
- xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
- if (!xfs_acl)
- return ERR_PTR(-ENOMEM);
-
- error = xfs_attr_get(ip, ea_name, (unsigned char *)xfs_acl,
- &len, ATTR_ROOT);
+ error = xfs_attr_get(ip, ea_name, strlen(ea_name),
+ (unsigned char **)&xfs_acl, &len,
+ ATTR_ALLOC | ATTR_ROOT);
if (error) {
/*
* If the attribute doesn't exist make sure we have a negative
@@ -149,10 +156,10 @@ xfs_get_acl(struct inode *inode, int type)
if (error != -ENOATTR)
acl = ERR_PTR(error);
} else {
- acl = xfs_acl_from_disk(xfs_acl, len,
+ acl = xfs_acl_from_disk(ip->i_mount, xfs_acl, len,
XFS_ACL_MAX_ENTRIES(ip->i_mount));
+ kmem_free(xfs_acl);
}
- kmem_free(xfs_acl);
return acl;
}
@@ -180,7 +187,7 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
struct xfs_acl *xfs_acl;
int len = XFS_ACL_MAX_SIZE(ip->i_mount);
- xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
+ xfs_acl = kmem_zalloc_large(len, 0);
if (!xfs_acl)
return -ENOMEM;
@@ -190,15 +197,17 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
len -= sizeof(struct xfs_acl_entry) *
(XFS_ACL_MAX_ENTRIES(ip->i_mount) - acl->a_count);
- error = xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
- len, ATTR_ROOT);
+ error = xfs_attr_set(ip, ea_name, strlen(ea_name),
+ (unsigned char *)xfs_acl, len, ATTR_ROOT);
kmem_free(xfs_acl);
} else {
/*
* A NULL ACL argument means we want to remove the ACL.
*/
- error = xfs_attr_remove(ip, ea_name, ATTR_ROOT);
+ error = xfs_attr_remove(ip, ea_name,
+ strlen(ea_name),
+ ATTR_ROOT);
/*
* If the attribute didn't exist to start with that's fine.
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index f16d5f196c6b..58e937be24ce 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -18,108 +18,22 @@
#include "xfs_bmap_util.h"
#include "xfs_reflink.h"
-/*
- * structure owned by writepages passed to individual writepage calls
- */
struct xfs_writepage_ctx {
- struct xfs_bmbt_irec imap;
- int fork;
+ struct iomap_writepage_ctx ctx;
unsigned int data_seq;
unsigned int cow_seq;
- struct xfs_ioend *ioend;
};
-struct block_device *
-xfs_find_bdev_for_inode(
- struct inode *inode)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
-
- if (XFS_IS_REALTIME_INODE(ip))
- return mp->m_rtdev_targp->bt_bdev;
- else
- return mp->m_ddev_targp->bt_bdev;
-}
-
-struct dax_device *
-xfs_find_daxdev_for_inode(
- struct inode *inode)
+static inline struct xfs_writepage_ctx *
+XFS_WPC(struct iomap_writepage_ctx *ctx)
{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
-
- if (XFS_IS_REALTIME_INODE(ip))
- return mp->m_rtdev_targp->bt_daxdev;
- else
- return mp->m_ddev_targp->bt_daxdev;
-}
-
-static void
-xfs_finish_page_writeback(
- struct inode *inode,
- struct bio_vec *bvec,
- int error)
-{
- struct iomap_page *iop = to_iomap_page(bvec->bv_page);
-
- if (error) {
- SetPageError(bvec->bv_page);
- mapping_set_error(inode->i_mapping, -EIO);
- }
-
- ASSERT(iop || i_blocksize(inode) == PAGE_SIZE);
- ASSERT(!iop || atomic_read(&iop->write_count) > 0);
-
- if (!iop || atomic_dec_and_test(&iop->write_count))
- end_page_writeback(bvec->bv_page);
-}
-
-/*
- * We're now finished for good with this ioend structure. Update the page
- * state, release holds on bios, and finally free up memory. Do not use the
- * ioend after this.
- */
-STATIC void
-xfs_destroy_ioend(
- struct xfs_ioend *ioend,
- int error)
-{
- struct inode *inode = ioend->io_inode;
- struct bio *bio = &ioend->io_inline_bio;
- struct bio *last = ioend->io_bio, *next;
- u64 start = bio->bi_iter.bi_sector;
- bool quiet = bio_flagged(bio, BIO_QUIET);
-
- for (bio = &ioend->io_inline_bio; bio; bio = next) {
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
-
- /*
- * For the last bio, bi_private points to the ioend, so we
- * need to explicitly end the iteration here.
- */
- if (bio == last)
- next = NULL;
- else
- next = bio->bi_private;
-
- /* walk each page on bio, ending page IO on them */
- bio_for_each_segment_all(bvec, bio, iter_all)
- xfs_finish_page_writeback(inode, bvec, error);
- bio_put(bio);
- }
-
- if (unlikely(error && !quiet)) {
- xfs_err_ratelimited(XFS_I(inode)->i_mount,
- "writeback error on sector %llu", start);
- }
+ return container_of(ctx, struct xfs_writepage_ctx, ctx);
}
/*
* Fast and loose check if this write could update the on-disk inode size.
*/
-static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
+static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
{
return ioend->io_offset + ioend->io_size >
XFS_I(ioend->io_inode)->i_d.di_size;
@@ -127,7 +41,7 @@ static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
STATIC int
xfs_setfilesize_trans_alloc(
- struct xfs_ioend *ioend)
+ struct iomap_ioend *ioend)
{
struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount;
struct xfs_trans *tp;
@@ -137,7 +51,7 @@ xfs_setfilesize_trans_alloc(
if (error)
return error;
- ioend->io_append_trans = tp;
+ ioend->io_private = tp;
/*
* We may pass freeze protection with a transaction. So tell lockdep
@@ -200,11 +114,11 @@ xfs_setfilesize(
STATIC int
xfs_setfilesize_ioend(
- struct xfs_ioend *ioend,
+ struct iomap_ioend *ioend,
int error)
{
struct xfs_inode *ip = XFS_I(ioend->io_inode);
- struct xfs_trans *tp = ioend->io_append_trans;
+ struct xfs_trans *tp = ioend->io_private;
/*
* The transaction may have been allocated in the I/O submission thread,
@@ -228,9 +142,8 @@ xfs_setfilesize_ioend(
*/
STATIC void
xfs_end_ioend(
- struct xfs_ioend *ioend)
+ struct iomap_ioend *ioend)
{
- struct list_head ioend_list;
struct xfs_inode *ip = XFS_I(ioend->io_inode);
xfs_off_t offset = ioend->io_offset;
size_t size = ioend->io_size;
@@ -257,7 +170,7 @@ xfs_end_ioend(
*/
error = blk_status_to_errno(ioend->io_bio->bi_status);
if (unlikely(error)) {
- if (ioend->io_fork == XFS_COW_FORK)
+ if (ioend->io_flags & IOMAP_F_SHARED)
xfs_reflink_cancel_cow_range(ip, offset, size, true);
goto done;
}
@@ -265,154 +178,86 @@ xfs_end_ioend(
/*
* Success: commit the COW or unwritten blocks if needed.
*/
- if (ioend->io_fork == XFS_COW_FORK)
+ if (ioend->io_flags & IOMAP_F_SHARED)
error = xfs_reflink_end_cow(ip, offset, size);
- else if (ioend->io_state == XFS_EXT_UNWRITTEN)
+ else if (ioend->io_type == IOMAP_UNWRITTEN)
error = xfs_iomap_write_unwritten(ip, offset, size, false);
else
- ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
+ ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_private);
done:
- if (ioend->io_append_trans)
+ if (ioend->io_private)
error = xfs_setfilesize_ioend(ioend, error);
- list_replace_init(&ioend->io_list, &ioend_list);
- xfs_destroy_ioend(ioend, error);
-
- while (!list_empty(&ioend_list)) {
- ioend = list_first_entry(&ioend_list, struct xfs_ioend,
- io_list);
- list_del_init(&ioend->io_list);
- xfs_destroy_ioend(ioend, error);
- }
-
+ iomap_finish_ioends(ioend, error);
memalloc_nofs_restore(nofs_flag);
}
/*
- * We can merge two adjacent ioends if they have the same set of work to do.
- */
-static bool
-xfs_ioend_can_merge(
- struct xfs_ioend *ioend,
- struct xfs_ioend *next)
-{
- if (ioend->io_bio->bi_status != next->io_bio->bi_status)
- return false;
- if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK))
- return false;
- if ((ioend->io_state == XFS_EXT_UNWRITTEN) ^
- (next->io_state == XFS_EXT_UNWRITTEN))
- return false;
- if (ioend->io_offset + ioend->io_size != next->io_offset)
- return false;
- return true;
-}
-
-/*
* If the to be merged ioend has a preallocated transaction for file
* size updates we need to ensure the ioend it is merged into also
* has one. If it already has one we can simply cancel the transaction
* as it is guaranteed to be clean.
*/
static void
-xfs_ioend_merge_append_transactions(
- struct xfs_ioend *ioend,
- struct xfs_ioend *next)
+xfs_ioend_merge_private(
+ struct iomap_ioend *ioend,
+ struct iomap_ioend *next)
{
- if (!ioend->io_append_trans) {
- ioend->io_append_trans = next->io_append_trans;
- next->io_append_trans = NULL;
+ if (!ioend->io_private) {
+ ioend->io_private = next->io_private;
+ next->io_private = NULL;
} else {
xfs_setfilesize_ioend(next, -ECANCELED);
}
}
-/* Try to merge adjacent completions. */
-STATIC void
-xfs_ioend_try_merge(
- struct xfs_ioend *ioend,
- struct list_head *more_ioends)
-{
- struct xfs_ioend *next_ioend;
-
- while (!list_empty(more_ioends)) {
- next_ioend = list_first_entry(more_ioends, struct xfs_ioend,
- io_list);
- if (!xfs_ioend_can_merge(ioend, next_ioend))
- break;
- list_move_tail(&next_ioend->io_list, &ioend->io_list);
- ioend->io_size += next_ioend->io_size;
- if (next_ioend->io_append_trans)
- xfs_ioend_merge_append_transactions(ioend, next_ioend);
- }
-}
-
-/* list_sort compare function for ioends */
-static int
-xfs_ioend_compare(
- void *priv,
- struct list_head *a,
- struct list_head *b)
-{
- struct xfs_ioend *ia;
- struct xfs_ioend *ib;
-
- ia = container_of(a, struct xfs_ioend, io_list);
- ib = container_of(b, struct xfs_ioend, io_list);
- if (ia->io_offset < ib->io_offset)
- return -1;
- else if (ia->io_offset > ib->io_offset)
- return 1;
- return 0;
-}
-
/* Finish all pending io completions. */
void
xfs_end_io(
struct work_struct *work)
{
- struct xfs_inode *ip;
- struct xfs_ioend *ioend;
- struct list_head completion_list;
+ struct xfs_inode *ip =
+ container_of(work, struct xfs_inode, i_ioend_work);
+ struct iomap_ioend *ioend;
+ struct list_head tmp;
unsigned long flags;
- ip = container_of(work, struct xfs_inode, i_ioend_work);
-
spin_lock_irqsave(&ip->i_ioend_lock, flags);
- list_replace_init(&ip->i_ioend_list, &completion_list);
+ list_replace_init(&ip->i_ioend_list, &tmp);
spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
- list_sort(NULL, &completion_list, xfs_ioend_compare);
-
- while (!list_empty(&completion_list)) {
- ioend = list_first_entry(&completion_list, struct xfs_ioend,
- io_list);
+ iomap_sort_ioends(&tmp);
+ while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
+ io_list))) {
list_del_init(&ioend->io_list);
- xfs_ioend_try_merge(ioend, &completion_list);
+ iomap_ioend_try_merge(ioend, &tmp, xfs_ioend_merge_private);
xfs_end_ioend(ioend);
}
}
+static inline bool xfs_ioend_needs_workqueue(struct iomap_ioend *ioend)
+{
+ return ioend->io_private ||
+ ioend->io_type == IOMAP_UNWRITTEN ||
+ (ioend->io_flags & IOMAP_F_SHARED);
+}
+
STATIC void
xfs_end_bio(
struct bio *bio)
{
- struct xfs_ioend *ioend = bio->bi_private;
+ struct iomap_ioend *ioend = bio->bi_private;
struct xfs_inode *ip = XFS_I(ioend->io_inode);
- struct xfs_mount *mp = ip->i_mount;
unsigned long flags;
- if (ioend->io_fork == XFS_COW_FORK ||
- ioend->io_state == XFS_EXT_UNWRITTEN ||
- ioend->io_append_trans != NULL) {
- spin_lock_irqsave(&ip->i_ioend_lock, flags);
- if (list_empty(&ip->i_ioend_list))
- WARN_ON_ONCE(!queue_work(mp->m_unwritten_workqueue,
- &ip->i_ioend_work));
- list_add_tail(&ioend->io_list, &ip->i_ioend_list);
- spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
- } else
- xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
+ ASSERT(xfs_ioend_needs_workqueue(ioend));
+
+ spin_lock_irqsave(&ip->i_ioend_lock, flags);
+ if (list_empty(&ip->i_ioend_list))
+ WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
+ &ip->i_ioend_work));
+ list_add_tail(&ioend->io_list, &ip->i_ioend_list);
+ spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
}
/*
@@ -421,19 +266,19 @@ xfs_end_bio(
*/
static bool
xfs_imap_valid(
- struct xfs_writepage_ctx *wpc,
+ struct iomap_writepage_ctx *wpc,
struct xfs_inode *ip,
- xfs_fileoff_t offset_fsb)
+ loff_t offset)
{
- if (offset_fsb < wpc->imap.br_startoff ||
- offset_fsb >= wpc->imap.br_startoff + wpc->imap.br_blockcount)
+ if (offset < wpc->iomap.offset ||
+ offset >= wpc->iomap.offset + wpc->iomap.length)
return false;
/*
* If this is a COW mapping, it is sufficient to check that the mapping
* covers the offset. Be careful to check this first because the caller
* can revalidate a COW mapping without updating the data seqno.
*/
- if (wpc->fork == XFS_COW_FORK)
+ if (wpc->iomap.flags & IOMAP_F_SHARED)
return true;
/*
@@ -443,17 +288,17 @@ xfs_imap_valid(
* checked (and found nothing at this offset) could have added
* overlapping blocks.
*/
- if (wpc->data_seq != READ_ONCE(ip->i_df.if_seq))
+ if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq))
return false;
if (xfs_inode_has_cow_data(ip) &&
- wpc->cow_seq != READ_ONCE(ip->i_cowfp->if_seq))
+ XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq))
return false;
return true;
}
/*
* Pass in a dellalloc extent and convert it to real extents, return the real
- * extent that maps offset_fsb in wpc->imap.
+ * extent that maps offset_fsb in wpc->iomap.
*
* The current page is held locked so nothing could have removed the block
* backing offset_fsb, although it could have moved from the COW to the data
@@ -461,32 +306,38 @@ xfs_imap_valid(
*/
static int
xfs_convert_blocks(
- struct xfs_writepage_ctx *wpc,
+ struct iomap_writepage_ctx *wpc,
struct xfs_inode *ip,
- xfs_fileoff_t offset_fsb)
+ int whichfork,
+ loff_t offset)
{
int error;
+ unsigned *seq;
+
+ if (whichfork == XFS_COW_FORK)
+ seq = &XFS_WPC(wpc)->cow_seq;
+ else
+ seq = &XFS_WPC(wpc)->data_seq;
/*
- * Attempt to allocate whatever delalloc extent currently backs
- * offset_fsb and put the result into wpc->imap. Allocate in a loop
- * because it may take several attempts to allocate real blocks for a
- * contiguous delalloc extent if free space is sufficiently fragmented.
+ * Attempt to allocate whatever delalloc extent currently backs offset
+ * and put the result into wpc->iomap. Allocate in a loop because it
+ * may take several attempts to allocate real blocks for a contiguous
+ * delalloc extent if free space is sufficiently fragmented.
*/
do {
- error = xfs_bmapi_convert_delalloc(ip, wpc->fork, offset_fsb,
- &wpc->imap, wpc->fork == XFS_COW_FORK ?
- &wpc->cow_seq : &wpc->data_seq);
+ error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
+ &wpc->iomap, seq);
if (error)
return error;
- } while (wpc->imap.br_startoff + wpc->imap.br_blockcount <= offset_fsb);
+ } while (wpc->iomap.offset + wpc->iomap.length <= offset);
return 0;
}
-STATIC int
+static int
xfs_map_blocks(
- struct xfs_writepage_ctx *wpc,
+ struct iomap_writepage_ctx *wpc,
struct inode *inode,
loff_t offset)
{
@@ -496,6 +347,7 @@ xfs_map_blocks(
xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count);
xfs_fileoff_t cow_fsb = NULLFILEOFF;
+ int whichfork = XFS_DATA_FORK;
struct xfs_bmbt_irec imap;
struct xfs_iext_cursor icur;
int retries = 0;
@@ -519,7 +371,7 @@ xfs_map_blocks(
* against concurrent updates and provides a memory barrier on the way
* out that ensures that we always see the current value.
*/
- if (xfs_imap_valid(wpc, ip, offset_fsb))
+ if (xfs_imap_valid(wpc, ip, offset))
return 0;
/*
@@ -541,10 +393,10 @@ retry:
xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
cow_fsb = imap.br_startoff;
if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
- wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
+ XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- wpc->fork = XFS_COW_FORK;
+ whichfork = XFS_COW_FORK;
goto allocate_blocks;
}
@@ -552,7 +404,7 @@ retry:
* No COW extent overlap. Revalidate now that we may have updated
* ->cow_seq. If the data mapping is still valid, we're done.
*/
- if (xfs_imap_valid(wpc, ip, offset_fsb)) {
+ if (xfs_imap_valid(wpc, ip, offset)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return 0;
}
@@ -564,11 +416,9 @@ retry:
*/
if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
imap.br_startoff = end_fsb; /* fake a hole past EOF */
- wpc->data_seq = READ_ONCE(ip->i_df.if_seq);
+ XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- wpc->fork = XFS_DATA_FORK;
-
/* landed in a hole or beyond EOF? */
if (imap.br_startoff > offset_fsb) {
imap.br_blockcount = imap.br_startoff - offset_fsb;
@@ -592,11 +442,11 @@ retry:
isnullstartblock(imap.br_startblock))
goto allocate_blocks;
- wpc->imap = imap;
- trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap);
+ xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0);
+ trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
return 0;
allocate_blocks:
- error = xfs_convert_blocks(wpc, ip, offset_fsb);
+ error = xfs_convert_blocks(wpc, ip, whichfork, offset);
if (error) {
/*
* If we failed to find the extent in the COW fork we might have
@@ -605,7 +455,7 @@ allocate_blocks:
* the former case, but prevent additional retries to avoid
* looping forever for the latter case.
*/
- if (error == -EAGAIN && wpc->fork == XFS_COW_FORK && !retries++)
+ if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
goto retry;
ASSERT(error != -EAGAIN);
return error;
@@ -616,34 +466,22 @@ allocate_blocks:
* original delalloc one. Trim the return extent to the next COW
* boundary again to force a re-lookup.
*/
- if (wpc->fork != XFS_COW_FORK && cow_fsb != NULLFILEOFF &&
- cow_fsb < wpc->imap.br_startoff + wpc->imap.br_blockcount)
- wpc->imap.br_blockcount = cow_fsb - wpc->imap.br_startoff;
+ if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
+ loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
+
+ if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
+ wpc->iomap.length = cow_offset - wpc->iomap.offset;
+ }
- ASSERT(wpc->imap.br_startoff <= offset_fsb);
- ASSERT(wpc->imap.br_startoff + wpc->imap.br_blockcount > offset_fsb);
- trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap);
+ ASSERT(wpc->iomap.offset <= offset);
+ ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
+ trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
return 0;
}
-/*
- * Submit the bio for an ioend. We are passed an ioend with a bio attached to
- * it, and we submit that bio. The ioend may be used for multiple bio
- * submissions, so we only want to allocate an append transaction for the ioend
- * once. In the case of multiple bio submission, each bio will take an IO
- * reference to the ioend to ensure that the ioend completion is only done once
- * all bios have been submitted and the ioend is really done.
- *
- * If @status is non-zero, it means that we have a situation where some part of
- * the submission process has failed after we have marked paged for writeback
- * and unlocked them. In this situation, we need to fail the bio and ioend
- * rather than submit it to IO. This typically only happens on a filesystem
- * shutdown.
- */
-STATIC int
-xfs_submit_ioend(
- struct writeback_control *wbc,
- struct xfs_ioend *ioend,
+static int
+xfs_prepare_ioend(
+ struct iomap_ioend *ioend,
int status)
{
unsigned int nofs_flag;
@@ -656,157 +494,24 @@ xfs_submit_ioend(
nofs_flag = memalloc_nofs_save();
/* Convert CoW extents to regular */
- if (!status && ioend->io_fork == XFS_COW_FORK) {
+ if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
ioend->io_offset, ioend->io_size);
}
/* Reserve log space if we might write beyond the on-disk inode size. */
if (!status &&
- (ioend->io_fork == XFS_COW_FORK ||
- ioend->io_state != XFS_EXT_UNWRITTEN) &&
+ ((ioend->io_flags & IOMAP_F_SHARED) ||
+ ioend->io_type != IOMAP_UNWRITTEN) &&
xfs_ioend_is_append(ioend) &&
- !ioend->io_append_trans)
+ !ioend->io_private)
status = xfs_setfilesize_trans_alloc(ioend);
memalloc_nofs_restore(nofs_flag);
- ioend->io_bio->bi_private = ioend;
- ioend->io_bio->bi_end_io = xfs_end_bio;
-
- /*
- * If we are failing the IO now, just mark the ioend with an
- * error and finish it. This will run IO completion immediately
- * as there is only one reference to the ioend at this point in
- * time.
- */
- if (status) {
- ioend->io_bio->bi_status = errno_to_blk_status(status);
- bio_endio(ioend->io_bio);
- return status;
- }
-
- submit_bio(ioend->io_bio);
- return 0;
-}
-
-static struct xfs_ioend *
-xfs_alloc_ioend(
- struct inode *inode,
- int fork,
- xfs_exntst_t state,
- xfs_off_t offset,
- struct block_device *bdev,
- sector_t sector,
- struct writeback_control *wbc)
-{
- struct xfs_ioend *ioend;
- struct bio *bio;
-
- bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset);
- bio_set_dev(bio, bdev);
- bio->bi_iter.bi_sector = sector;
- bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
- bio->bi_write_hint = inode->i_write_hint;
- wbc_init_bio(wbc, bio);
-
- ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
- INIT_LIST_HEAD(&ioend->io_list);
- ioend->io_fork = fork;
- ioend->io_state = state;
- ioend->io_inode = inode;
- ioend->io_size = 0;
- ioend->io_offset = offset;
- ioend->io_append_trans = NULL;
- ioend->io_bio = bio;
- return ioend;
-}
-
-/*
- * Allocate a new bio, and chain the old bio to the new one.
- *
- * Note that we have to do perform the chaining in this unintuitive order
- * so that the bi_private linkage is set up in the right direction for the
- * traversal in xfs_destroy_ioend().
- */
-static struct bio *
-xfs_chain_bio(
- struct bio *prev)
-{
- struct bio *new;
-
- new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
- bio_copy_dev(new, prev);/* also copies over blkcg information */
- new->bi_iter.bi_sector = bio_end_sector(prev);
- new->bi_opf = prev->bi_opf;
- new->bi_write_hint = prev->bi_write_hint;
-
- bio_chain(prev, new);
- bio_get(prev); /* for xfs_destroy_ioend */
- submit_bio(prev);
- return new;
-}
-
-/*
- * Test to see if we have an existing ioend structure that we could append to
- * first, otherwise finish off the current ioend and start another.
- */
-STATIC void
-xfs_add_to_ioend(
- struct inode *inode,
- xfs_off_t offset,
- struct page *page,
- struct iomap_page *iop,
- struct xfs_writepage_ctx *wpc,
- struct writeback_control *wbc,
- struct list_head *iolist)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- struct block_device *bdev = xfs_find_bdev_for_inode(inode);
- unsigned len = i_blocksize(inode);
- unsigned poff = offset & (PAGE_SIZE - 1);
- bool merged, same_page = false;
- sector_t sector;
-
- sector = xfs_fsb_to_db(ip, wpc->imap.br_startblock) +
- ((offset - XFS_FSB_TO_B(mp, wpc->imap.br_startoff)) >> 9);
-
- if (!wpc->ioend ||
- wpc->fork != wpc->ioend->io_fork ||
- wpc->imap.br_state != wpc->ioend->io_state ||
- sector != bio_end_sector(wpc->ioend->io_bio) ||
- offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
- if (wpc->ioend)
- list_add(&wpc->ioend->io_list, iolist);
- wpc->ioend = xfs_alloc_ioend(inode, wpc->fork,
- wpc->imap.br_state, offset, bdev, sector, wbc);
- }
-
- merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff,
- &same_page);
-
- if (iop && !same_page)
- atomic_inc(&iop->write_count);
-
- if (!merged) {
- if (bio_full(wpc->ioend->io_bio, len))
- wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio);
- bio_add_page(wpc->ioend->io_bio, page, len, poff);
- }
-
- wpc->ioend->io_size += len;
- wbc_account_cgroup_owner(wbc, page, len);
-}
-
-STATIC void
-xfs_vm_invalidatepage(
- struct page *page,
- unsigned int offset,
- unsigned int length)
-{
- trace_xfs_invalidatepage(page->mapping->host, page, offset, length);
- iomap_invalidatepage(page, offset, length);
+ if (xfs_ioend_needs_workqueue(ioend))
+ ioend->io_bio->bi_end_io = xfs_end_bio;
+ return status;
}
/*
@@ -820,8 +525,8 @@ xfs_vm_invalidatepage(
* transaction as there is no space left for block reservation (typically why we
* see a ENOSPC in writeback).
*/
-STATIC void
-xfs_aops_discard_page(
+static void
+xfs_discard_page(
struct page *page)
{
struct inode *inode = page->mapping->host;
@@ -843,246 +548,14 @@ xfs_aops_discard_page(
if (error && !XFS_FORCED_SHUTDOWN(mp))
xfs_alert(mp, "page discard unable to remove delalloc mapping.");
out_invalidate:
- xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
-}
-
-/*
- * We implement an immediate ioend submission policy here to avoid needing to
- * chain multiple ioends and hence nest mempool allocations which can violate
- * forward progress guarantees we need to provide. The current ioend we are
- * adding blocks to is cached on the writepage context, and if the new block
- * does not append to the cached ioend it will create a new ioend and cache that
- * instead.
- *
- * If a new ioend is created and cached, the old ioend is returned and queued
- * locally for submission once the entire page is processed or an error has been
- * detected. While ioends are submitted immediately after they are completed,
- * batching optimisations are provided by higher level block plugging.
- *
- * At the end of a writeback pass, there will be a cached ioend remaining on the
- * writepage context that the caller will need to submit.
- */
-static int
-xfs_writepage_map(
- struct xfs_writepage_ctx *wpc,
- struct writeback_control *wbc,
- struct inode *inode,
- struct page *page,
- uint64_t end_offset)
-{
- LIST_HEAD(submit_list);
- struct iomap_page *iop = to_iomap_page(page);
- unsigned len = i_blocksize(inode);
- struct xfs_ioend *ioend, *next;
- uint64_t file_offset; /* file offset of page */
- int error = 0, count = 0, i;
-
- ASSERT(iop || i_blocksize(inode) == PAGE_SIZE);
- ASSERT(!iop || atomic_read(&iop->write_count) == 0);
-
- /*
- * Walk through the page to find areas to write back. If we run off the
- * end of the current map or find the current map invalid, grab a new
- * one.
- */
- for (i = 0, file_offset = page_offset(page);
- i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset;
- i++, file_offset += len) {
- if (iop && !test_bit(i, iop->uptodate))
- continue;
-
- error = xfs_map_blocks(wpc, inode, file_offset);
- if (error)
- break;
- if (wpc->imap.br_startblock == HOLESTARTBLOCK)
- continue;
- xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc,
- &submit_list);
- count++;
- }
-
- ASSERT(wpc->ioend || list_empty(&submit_list));
- ASSERT(PageLocked(page));
- ASSERT(!PageWriteback(page));
-
- /*
- * On error, we have to fail the ioend here because we may have set
- * pages under writeback, we have to make sure we run IO completion to
- * mark the error state of the IO appropriately, so we can't cancel the
- * ioend directly here. That means we have to mark this page as under
- * writeback if we included any blocks from it in the ioend chain so
- * that completion treats it correctly.
- *
- * If we didn't include the page in the ioend, the on error we can
- * simply discard and unlock it as there are no other users of the page
- * now. The caller will still need to trigger submission of outstanding
- * ioends on the writepage context so they are treated correctly on
- * error.
- */
- if (unlikely(error)) {
- if (!count) {
- xfs_aops_discard_page(page);
- ClearPageUptodate(page);
- unlock_page(page);
- goto done;
- }
-
- /*
- * If the page was not fully cleaned, we need to ensure that the
- * higher layers come back to it correctly. That means we need
- * to keep the page dirty, and for WB_SYNC_ALL writeback we need
- * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed
- * so another attempt to write this page in this writeback sweep
- * will be made.
- */
- set_page_writeback_keepwrite(page);
- } else {
- clear_page_dirty_for_io(page);
- set_page_writeback(page);
- }
-
- unlock_page(page);
-
- /*
- * Preserve the original error if there was one, otherwise catch
- * submission errors here and propagate into subsequent ioend
- * submissions.
- */
- list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
- int error2;
-
- list_del_init(&ioend->io_list);
- error2 = xfs_submit_ioend(wbc, ioend, error);
- if (error2 && !error)
- error = error2;
- }
-
- /*
- * We can end up here with no error and nothing to write only if we race
- * with a partial page truncate on a sub-page block sized filesystem.
- */
- if (!count)
- end_page_writeback(page);
-done:
- mapping_set_error(page->mapping, error);
- return error;
+ iomap_invalidatepage(page, 0, PAGE_SIZE);
}
-/*
- * Write out a dirty page.
- *
- * For delalloc space on the page we need to allocate space and flush it.
- * For unwritten space on the page we need to start the conversion to
- * regular allocated space.
- */
-STATIC int
-xfs_do_writepage(
- struct page *page,
- struct writeback_control *wbc,
- void *data)
-{
- struct xfs_writepage_ctx *wpc = data;
- struct inode *inode = page->mapping->host;
- loff_t offset;
- uint64_t end_offset;
- pgoff_t end_index;
-
- trace_xfs_writepage(inode, page, 0, 0);
-
- /*
- * Refuse to write the page out if we are called from reclaim context.
- *
- * This avoids stack overflows when called from deeply used stacks in
- * random callers for direct reclaim or memcg reclaim. We explicitly
- * allow reclaim from kswapd as the stack usage there is relatively low.
- *
- * This should never happen except in the case of a VM regression so
- * warn about it.
- */
- if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
- PF_MEMALLOC))
- goto redirty;
-
- /*
- * Given that we do not allow direct reclaim to call us, we should
- * never be called while in a filesystem transaction.
- */
- if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
- goto redirty;
-
- /*
- * Is this page beyond the end of the file?
- *
- * The page index is less than the end_index, adjust the end_offset
- * to the highest offset that this page should represent.
- * -----------------------------------------------------
- * | file mapping | <EOF> |
- * -----------------------------------------------------
- * | Page ... | Page N-2 | Page N-1 | Page N | |
- * ^--------------------------------^----------|--------
- * | desired writeback range | see else |
- * ---------------------------------^------------------|
- */
- offset = i_size_read(inode);
- end_index = offset >> PAGE_SHIFT;
- if (page->index < end_index)
- end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
- else {
- /*
- * Check whether the page to write out is beyond or straddles
- * i_size or not.
- * -------------------------------------------------------
- * | file mapping | <EOF> |
- * -------------------------------------------------------
- * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
- * ^--------------------------------^-----------|---------
- * | | Straddles |
- * ---------------------------------^-----------|--------|
- */
- unsigned offset_into_page = offset & (PAGE_SIZE - 1);
-
- /*
- * Skip the page if it is fully outside i_size, e.g. due to a
- * truncate operation that is in progress. We must redirty the
- * page so that reclaim stops reclaiming it. Otherwise
- * xfs_vm_releasepage() is called on it and gets confused.
- *
- * Note that the end_index is unsigned long, it would overflow
- * if the given offset is greater than 16TB on 32-bit system
- * and if we do check the page is fully outside i_size or not
- * via "if (page->index >= end_index + 1)" as "end_index + 1"
- * will be evaluated to 0. Hence this page will be redirtied
- * and be written out repeatedly which would result in an
- * infinite loop, the user program that perform this operation
- * will hang. Instead, we can verify this situation by checking
- * if the page to write is totally beyond the i_size or if it's
- * offset is just equal to the EOF.
- */
- if (page->index > end_index ||
- (page->index == end_index && offset_into_page == 0))
- goto redirty;
-
- /*
- * The page straddles i_size. It must be zeroed out on each
- * and every writepage invocation because it may be mmapped.
- * "A file is mapped in multiples of the page size. For a file
- * that is not a multiple of the page size, the remaining
- * memory is zeroed when mapped, and writes to that region are
- * not written out to the file."
- */
- zero_user_segment(page, offset_into_page, PAGE_SIZE);
-
- /* Adjust the end_offset to the end of file */
- end_offset = offset;
- }
-
- return xfs_writepage_map(wpc, wbc, inode, page, end_offset);
-
-redirty:
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
-}
+static const struct iomap_writeback_ops xfs_writeback_ops = {
+ .map_blocks = xfs_map_blocks,
+ .prepare_ioend = xfs_prepare_ioend,
+ .discard_page = xfs_discard_page,
+};
STATIC int
xfs_vm_writepage(
@@ -1090,12 +563,8 @@ xfs_vm_writepage(
struct writeback_control *wbc)
{
struct xfs_writepage_ctx wpc = { };
- int ret;
- ret = xfs_do_writepage(page, wbc, &wpc);
- if (wpc.ioend)
- ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
- return ret;
+ return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
}
STATIC int
@@ -1104,13 +573,9 @@ xfs_vm_writepages(
struct writeback_control *wbc)
{
struct xfs_writepage_ctx wpc = { };
- int ret;
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
- ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
- if (wpc.ioend)
- ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
- return ret;
+ return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
}
STATIC int
@@ -1118,18 +583,11 @@ xfs_dax_writepages(
struct address_space *mapping,
struct writeback_control *wbc)
{
- xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
- return dax_writeback_mapping_range(mapping,
- xfs_find_bdev_for_inode(mapping->host), wbc);
-}
+ struct xfs_inode *ip = XFS_I(mapping->host);
-STATIC int
-xfs_vm_releasepage(
- struct page *page,
- gfp_t gfp_mask)
-{
- trace_xfs_releasepage(page->mapping->host, page, 0, 0);
- return iomap_releasepage(page, gfp_mask);
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ return dax_writeback_mapping_range(mapping,
+ xfs_inode_buftarg(ip)->bt_daxdev, wbc);
}
STATIC sector_t
@@ -1152,7 +610,7 @@ xfs_vm_bmap(
*/
if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
return 0;
- return iomap_bmap(mapping, block, &xfs_iomap_ops);
+ return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
}
STATIC int
@@ -1160,8 +618,7 @@ xfs_vm_readpage(
struct file *unused,
struct page *page)
{
- trace_xfs_vm_readpage(page->mapping->host, 1);
- return iomap_readpage(page, &xfs_iomap_ops);
+ return iomap_readpage(page, &xfs_read_iomap_ops);
}
STATIC int
@@ -1171,8 +628,7 @@ xfs_vm_readpages(
struct list_head *pages,
unsigned nr_pages)
{
- trace_xfs_vm_readpages(mapping->host, nr_pages);
- return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops);
+ return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops);
}
static int
@@ -1181,8 +637,9 @@ xfs_iomap_swapfile_activate(
struct file *swap_file,
sector_t *span)
{
- sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file));
- return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops);
+ sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
+ return iomap_swapfile_activate(sis, swap_file, span,
+ &xfs_read_iomap_ops);
}
const struct address_space_operations xfs_address_space_operations = {
@@ -1191,8 +648,8 @@ const struct address_space_operations xfs_address_space_operations = {
.writepage = xfs_vm_writepage,
.writepages = xfs_vm_writepages,
.set_page_dirty = iomap_set_page_dirty,
- .releasepage = xfs_vm_releasepage,
- .invalidatepage = xfs_vm_invalidatepage,
+ .releasepage = iomap_releasepage,
+ .invalidatepage = iomap_invalidatepage,
.bmap = xfs_vm_bmap,
.direct_IO = noop_direct_IO,
.migratepage = iomap_migrate_page,
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 45a1ea240cbb..e0bd68419764 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -6,29 +6,9 @@
#ifndef __XFS_AOPS_H__
#define __XFS_AOPS_H__
-extern struct bio_set xfs_ioend_bioset;
-
-/*
- * Structure for buffered I/O completions.
- */
-struct xfs_ioend {
- struct list_head io_list; /* next ioend in chain */
- int io_fork; /* inode fork written back */
- xfs_exntst_t io_state; /* extent state */
- struct inode *io_inode; /* file being written to */
- size_t io_size; /* size of the extent */
- xfs_off_t io_offset; /* offset in the file */
- struct xfs_trans *io_append_trans;/* xact. for size update */
- struct bio *io_bio; /* bio being built */
- struct bio io_inline_bio; /* MUST BE LAST! */
-};
-
extern const struct address_space_operations xfs_address_space_operations;
extern const struct address_space_operations xfs_dax_aops;
int xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
-extern struct block_device *xfs_find_bdev_for_inode(struct inode *);
-extern struct dax_device *xfs_find_daxdev_for_inode(struct inode *);
-
#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c
index dc93c51c17de..bbfa6ba84dcd 100644
--- a/fs/xfs/xfs_attr_inactive.c
+++ b/fs/xfs/xfs_attr_inactive.c
@@ -22,24 +22,21 @@
#include "xfs_attr_leaf.h"
#include "xfs_quota.h"
#include "xfs_dir2.h"
+#include "xfs_error.h"
/*
- * Look at all the extents for this logical region,
- * invalidate any buffers that are incore/in transactions.
+ * Invalidate any incore buffers associated with this remote attribute value
+ * extent. We never log remote attribute value buffers, which means that they
+ * won't be attached to a transaction and are therefore safe to mark stale.
+ * The actual bunmapi will be taken care of later.
*/
STATIC int
-xfs_attr3_leaf_freextent(
- struct xfs_trans **trans,
+xfs_attr3_rmt_stale(
struct xfs_inode *dp,
xfs_dablk_t blkno,
int blkcnt)
{
struct xfs_bmbt_irec map;
- struct xfs_buf *bp;
- xfs_dablk_t tblkno;
- xfs_daddr_t dblkno;
- int tblkcnt;
- int dblkcnt;
int nmap;
int error;
@@ -47,47 +44,29 @@ xfs_attr3_leaf_freextent(
* Roll through the "value", invalidating the attribute value's
* blocks.
*/
- tblkno = blkno;
- tblkcnt = blkcnt;
- while (tblkcnt > 0) {
+ while (blkcnt > 0) {
/*
* Try to remember where we decided to put the value.
*/
nmap = 1;
- error = xfs_bmapi_read(dp, (xfs_fileoff_t)tblkno, tblkcnt,
+ error = xfs_bmapi_read(dp, (xfs_fileoff_t)blkno, blkcnt,
&map, &nmap, XFS_BMAPI_ATTRFORK);
- if (error) {
+ if (error)
return error;
- }
- ASSERT(nmap == 1);
- ASSERT(map.br_startblock != DELAYSTARTBLOCK);
+ if (XFS_IS_CORRUPT(dp->i_mount, nmap != 1))
+ return -EFSCORRUPTED;
/*
- * If it's a hole, these are already unmapped
- * so there's nothing to invalidate.
+ * Mark any incore buffers for the remote value as stale. We
+ * never log remote attr value buffers, so the buffer should be
+ * easy to kill.
*/
- if (map.br_startblock != HOLESTARTBLOCK) {
-
- dblkno = XFS_FSB_TO_DADDR(dp->i_mount,
- map.br_startblock);
- dblkcnt = XFS_FSB_TO_BB(dp->i_mount,
- map.br_blockcount);
- bp = xfs_trans_get_buf(*trans,
- dp->i_mount->m_ddev_targp,
- dblkno, dblkcnt, 0);
- if (!bp)
- return -ENOMEM;
- xfs_trans_binval(*trans, bp);
- /*
- * Roll to next transaction.
- */
- error = xfs_trans_roll_inode(trans, dp);
- if (error)
- return error;
- }
+ error = xfs_attr_rmtval_stale(dp, &map, 0);
+ if (error)
+ return error;
- tblkno += map.br_blockcount;
- tblkcnt -= map.br_blockcount;
+ blkno += map.br_blockcount;
+ blkcnt -= map.br_blockcount;
}
return 0;
@@ -101,86 +80,45 @@ xfs_attr3_leaf_freextent(
*/
STATIC int
xfs_attr3_leaf_inactive(
- struct xfs_trans **trans,
- struct xfs_inode *dp,
- struct xfs_buf *bp)
+ struct xfs_trans **trans,
+ struct xfs_inode *dp,
+ struct xfs_buf *bp)
{
- struct xfs_attr_leafblock *leaf;
- struct xfs_attr3_icleaf_hdr ichdr;
- struct xfs_attr_leaf_entry *entry;
+ struct xfs_attr3_icleaf_hdr ichdr;
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_attr_leafblock *leaf = bp->b_addr;
+ struct xfs_attr_leaf_entry *entry;
struct xfs_attr_leaf_name_remote *name_rmt;
- struct xfs_attr_inactive_list *list;
- struct xfs_attr_inactive_list *lp;
- int error;
- int count;
- int size;
- int tmp;
- int i;
- struct xfs_mount *mp = bp->b_mount;
+ int error = 0;
+ int i;
- leaf = bp->b_addr;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &ichdr, leaf);
/*
- * Count the number of "remote" value extents.
+ * Find the remote value extents for this leaf and invalidate their
+ * incore buffers.
*/
- count = 0;
entry = xfs_attr3_leaf_entryp(leaf);
for (i = 0; i < ichdr.count; entry++, i++) {
- if (be16_to_cpu(entry->nameidx) &&
- ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
- name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
- if (name_rmt->valueblk)
- count++;
- }
- }
-
- /*
- * If there are no "remote" values, we're done.
- */
- if (count == 0) {
- xfs_trans_brelse(*trans, bp);
- return 0;
- }
-
- /*
- * Allocate storage for a list of all the "remote" value extents.
- */
- size = count * sizeof(xfs_attr_inactive_list_t);
- list = kmem_alloc(size, KM_SLEEP);
+ int blkcnt;
- /*
- * Identify each of the "remote" value extents.
- */
- lp = list;
- entry = xfs_attr3_leaf_entryp(leaf);
- for (i = 0; i < ichdr.count; entry++, i++) {
- if (be16_to_cpu(entry->nameidx) &&
- ((entry->flags & XFS_ATTR_LOCAL) == 0)) {
- name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
- if (name_rmt->valueblk) {
- lp->valueblk = be32_to_cpu(name_rmt->valueblk);
- lp->valuelen = xfs_attr3_rmt_blocks(dp->i_mount,
- be32_to_cpu(name_rmt->valuelen));
- lp++;
- }
- }
- }
- xfs_trans_brelse(*trans, bp); /* unlock for trans. in freextent() */
+ if (!entry->nameidx || (entry->flags & XFS_ATTR_LOCAL))
+ continue;
- /*
- * Invalidate each of the "remote" value extents.
- */
- error = 0;
- for (lp = list, i = 0; i < count; i++, lp++) {
- tmp = xfs_attr3_leaf_freextent(trans, dp,
- lp->valueblk, lp->valuelen);
+ name_rmt = xfs_attr3_leaf_name_remote(leaf, i);
+ if (!name_rmt->valueblk)
+ continue;
- if (error == 0)
- error = tmp; /* save only the 1st errno */
+ blkcnt = xfs_attr3_rmt_blocks(dp->i_mount,
+ be32_to_cpu(name_rmt->valuelen));
+ error = xfs_attr3_rmt_stale(dp,
+ be32_to_cpu(name_rmt->valueblk), blkcnt);
+ if (error)
+ goto err;
}
- kmem_free(list);
+ xfs_trans_brelse(*trans, bp);
+err:
return error;
}
@@ -190,37 +128,35 @@ xfs_attr3_leaf_inactive(
*/
STATIC int
xfs_attr3_node_inactive(
- struct xfs_trans **trans,
- struct xfs_inode *dp,
- struct xfs_buf *bp,
- int level)
+ struct xfs_trans **trans,
+ struct xfs_inode *dp,
+ struct xfs_buf *bp,
+ int level)
{
- xfs_da_blkinfo_t *info;
- xfs_da_intnode_t *node;
- xfs_dablk_t child_fsb;
- xfs_daddr_t parent_blkno, child_blkno;
- int error, i;
- struct xfs_buf *child_bp;
- struct xfs_da_node_entry *btree;
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_blkinfo *info;
+ xfs_dablk_t child_fsb;
+ xfs_daddr_t parent_blkno, child_blkno;
+ struct xfs_buf *child_bp;
struct xfs_da3_icnode_hdr ichdr;
+ int error, i;
/*
* Since this code is recursive (gasp!) we must protect ourselves.
*/
if (level > XFS_DA_NODE_MAXDEPTH) {
xfs_trans_brelse(*trans, bp); /* no locks for later trans */
- return -EIO;
+ xfs_buf_corruption_error(bp);
+ return -EFSCORRUPTED;
}
- node = bp->b_addr;
- dp->d_ops->node_hdr_from_disk(&ichdr, node);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &ichdr, bp->b_addr);
parent_blkno = bp->b_bn;
if (!ichdr.count) {
xfs_trans_brelse(*trans, bp);
return 0;
}
- btree = dp->d_ops->node_tree_p(node);
- child_fsb = be32_to_cpu(btree[0].before);
+ child_fsb = be32_to_cpu(ichdr.btree[0].before);
xfs_trans_brelse(*trans, bp); /* no locks for later trans */
/*
@@ -235,7 +171,7 @@ xfs_attr3_node_inactive(
* traversal of the tree so we may deal with many blocks
* before we come back to this one.
*/
- error = xfs_da3_node_read(*trans, dp, child_fsb, -1, &child_bp,
+ error = xfs_da3_node_read(*trans, dp, child_fsb, &child_bp,
XFS_ATTR_FORK);
if (error)
return error;
@@ -258,8 +194,9 @@ xfs_attr3_node_inactive(
error = xfs_attr3_leaf_inactive(trans, dp, child_bp);
break;
default:
- error = -EIO;
+ xfs_buf_corruption_error(child_bp);
xfs_trans_brelse(*trans, child_bp);
+ error = -EFSCORRUPTED;
break;
}
if (error)
@@ -268,10 +205,17 @@ xfs_attr3_node_inactive(
/*
* Remove the subsidiary block from the cache and from the log.
*/
- error = xfs_da_get_buf(*trans, dp, 0, child_blkno, &child_bp,
- XFS_ATTR_FORK);
+ error = xfs_trans_get_buf(*trans, mp->m_ddev_targp,
+ child_blkno,
+ XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0,
+ &child_bp);
if (error)
return error;
+ error = bp->b_error;
+ if (error) {
+ xfs_trans_brelse(*trans, child_bp);
+ return error;
+ }
xfs_trans_binval(*trans, child_bp);
/*
@@ -279,13 +223,15 @@ xfs_attr3_node_inactive(
* child block number.
*/
if (i + 1 < ichdr.count) {
- error = xfs_da3_node_read(*trans, dp, 0, parent_blkno,
- &bp, XFS_ATTR_FORK);
+ struct xfs_da3_icnode_hdr phdr;
+
+ error = xfs_da3_node_read_mapped(*trans, dp,
+ parent_blkno, &bp, XFS_ATTR_FORK);
if (error)
return error;
- node = bp->b_addr;
- btree = dp->d_ops->node_tree_p(node);
- child_fsb = be32_to_cpu(btree[i + 1].before);
+ xfs_da3_node_hdr_from_disk(dp->i_mount, &phdr,
+ bp->b_addr);
+ child_fsb = be32_to_cpu(phdr.btree[i + 1].before);
xfs_trans_brelse(*trans, bp);
}
/*
@@ -310,6 +256,7 @@ xfs_attr3_root_inactive(
struct xfs_trans **trans,
struct xfs_inode *dp)
{
+ struct xfs_mount *mp = dp->i_mount;
struct xfs_da_blkinfo *info;
struct xfs_buf *bp;
xfs_daddr_t blkno;
@@ -321,7 +268,7 @@ xfs_attr3_root_inactive(
* the extents in reverse order the extent containing
* block 0 must still be there.
*/
- error = xfs_da3_node_read(*trans, dp, 0, -1, &bp, XFS_ATTR_FORK);
+ error = xfs_da3_node_read(*trans, dp, 0, &bp, XFS_ATTR_FORK);
if (error)
return error;
blkno = bp->b_bn;
@@ -341,7 +288,8 @@ xfs_attr3_root_inactive(
error = xfs_attr3_leaf_inactive(trans, dp, bp);
break;
default:
- error = -EIO;
+ error = -EFSCORRUPTED;
+ xfs_buf_corruption_error(bp);
xfs_trans_brelse(*trans, bp);
break;
}
@@ -351,9 +299,15 @@ xfs_attr3_root_inactive(
/*
* Invalidate the incore copy of the root block.
*/
- error = xfs_da_get_buf(*trans, dp, 0, blkno, &bp, XFS_ATTR_FORK);
+ error = xfs_trans_get_buf(*trans, mp->m_ddev_targp, blkno,
+ XFS_FSB_TO_BB(mp, mp->m_attr_geo->fsbcount), 0, &bp);
if (error)
return error;
+ error = bp->b_error;
+ if (error) {
+ xfs_trans_brelse(*trans, bp);
+ return error;
+ }
xfs_trans_binval(*trans, bp); /* remove from cache */
/*
* Commit the invalidate and start the next transaction.
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 58fc820a70c6..d37743bdf274 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -49,14 +49,16 @@ xfs_attr_shortform_compare(const void *a, const void *b)
* we can begin returning them to the user.
*/
static int
-xfs_attr_shortform_list(xfs_attr_list_context_t *context)
+xfs_attr_shortform_list(
+ struct xfs_attr_list_context *context)
{
- attrlist_cursor_kern_t *cursor;
- xfs_attr_sf_sort_t *sbuf, *sbp;
- xfs_attr_shortform_t *sf;
- xfs_attr_sf_entry_t *sfe;
- xfs_inode_t *dp;
- int sbsize, nsbuf, count, i;
+ struct attrlist_cursor_kern *cursor;
+ struct xfs_attr_sf_sort *sbuf, *sbp;
+ struct xfs_attr_shortform *sf;
+ struct xfs_attr_sf_entry *sfe;
+ struct xfs_inode *dp;
+ int sbsize, nsbuf, count, i;
+ int error = 0;
ASSERT(context != NULL);
dp = context->dp;
@@ -84,6 +86,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
(XFS_ISRESET_CURSOR(cursor) &&
(dp->i_afp->if_bytes + sf->hdr.count * 16) < context->bufsize)) {
for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
+ if (XFS_IS_CORRUPT(context->dp->i_mount,
+ !xfs_attr_namecheck(sfe->nameval,
+ sfe->namelen)))
+ return -EFSCORRUPTED;
context->put_listent(context,
sfe->flags,
sfe->nameval,
@@ -109,7 +115,7 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
* It didn't all fit, so we have to sort everything on hashval.
*/
sbsize = sf->hdr.count * sizeof(*sbuf);
- sbp = sbuf = kmem_alloc(sbsize, KM_SLEEP | KM_NOFS);
+ sbp = sbuf = kmem_alloc(sbsize, KM_NOFS);
/*
* Scan the attribute list for the rest of the entries, storing
@@ -161,10 +167,8 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
break;
}
}
- if (i == nsbuf) {
- kmem_free(sbuf);
- return 0;
- }
+ if (i == nsbuf)
+ goto out;
/*
* Loop putting entries into the user buffer.
@@ -174,6 +178,12 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
cursor->hashval = sbp->hash;
cursor->offset = 0;
}
+ if (XFS_IS_CORRUPT(context->dp->i_mount,
+ !xfs_attr_namecheck(sbp->name,
+ sbp->namelen))) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
context->put_listent(context,
sbp->flags,
sbp->name,
@@ -183,9 +193,9 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
break;
cursor->offset++;
}
-
+out:
kmem_free(sbuf);
- return 0;
+ return error;
}
/*
@@ -213,7 +223,7 @@ xfs_attr_node_list_lookup(
ASSERT(*pbp == NULL);
cursor->blkno = 0;
for (;;) {
- error = xfs_da3_node_read(tp, dp, cursor->blkno, -1, &bp,
+ error = xfs_da3_node_read(tp, dp, cursor->blkno, &bp,
XFS_ATTR_FORK);
if (error)
return error;
@@ -229,7 +239,7 @@ xfs_attr_node_list_lookup(
goto out_corruptbuf;
}
- dp->d_ops->node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(mp, &nodehdr, node);
/* Tree taller than we can handle; bail out! */
if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH)
@@ -243,7 +253,7 @@ xfs_attr_node_list_lookup(
else
expected_level--;
- btree = dp->d_ops->node_tree_p(node);
+ btree = nodehdr.btree;
for (i = 0; i < nodehdr.count; btree++, i++) {
if (cursor->hashval <= be32_to_cpu(btree->hashval)) {
cursor->blkno = be32_to_cpu(btree->before);
@@ -258,7 +268,7 @@ xfs_attr_node_list_lookup(
return 0;
/* We can't point back to the root. */
- if (cursor->blkno == 0)
+ if (XFS_IS_CORRUPT(mp, cursor->blkno == 0))
return -EFSCORRUPTED;
}
@@ -269,6 +279,7 @@ xfs_attr_node_list_lookup(
return 0;
out_corruptbuf:
+ xfs_buf_corruption_error(bp);
xfs_trans_brelse(tp, bp);
return -EFSCORRUPTED;
}
@@ -284,7 +295,7 @@ xfs_attr_node_list(
struct xfs_buf *bp;
struct xfs_inode *dp = context->dp;
struct xfs_mount *mp = dp->i_mount;
- int error;
+ int error = 0;
trace_xfs_attr_node_list(context);
@@ -298,8 +309,8 @@ xfs_attr_node_list(
*/
bp = NULL;
if (cursor->blkno > 0) {
- error = xfs_da3_node_read(context->tp, dp, cursor->blkno, -1,
- &bp, XFS_ATTR_FORK);
+ error = xfs_da3_node_read(context->tp, dp, cursor->blkno, &bp,
+ XFS_ATTR_FORK);
if ((error != 0) && (error != -EFSCORRUPTED))
return error;
if (bp) {
@@ -358,24 +369,27 @@ xfs_attr_node_list(
*/
for (;;) {
leaf = bp->b_addr;
- xfs_attr3_leaf_list_int(bp, context);
+ error = xfs_attr3_leaf_list_int(bp, context);
+ if (error)
+ break;
xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
if (context->seen_enough || leafhdr.forw == 0)
break;
cursor->blkno = leafhdr.forw;
xfs_trans_brelse(context->tp, bp);
- error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno, -1, &bp);
+ error = xfs_attr3_leaf_read(context->tp, dp, cursor->blkno,
+ &bp);
if (error)
return error;
}
xfs_trans_brelse(context->tp, bp);
- return 0;
+ return error;
}
/*
* Copy out attribute list entries for attr_list(), for leaf attribute lists.
*/
-void
+int
xfs_attr3_leaf_list_int(
struct xfs_buf *bp,
struct xfs_attr_list_context *context)
@@ -417,7 +431,7 @@ xfs_attr3_leaf_list_int(
}
if (i == ichdr.count) {
trace_xfs_attr_list_notfound(context);
- return;
+ return 0;
}
} else {
entry = &entries[0];
@@ -457,6 +471,9 @@ xfs_attr3_leaf_list_int(
valuelen = be32_to_cpu(name_rmt->valuelen);
}
+ if (XFS_IS_CORRUPT(context->dp->i_mount,
+ !xfs_attr_namecheck(name, namelen)))
+ return -EFSCORRUPTED;
context->put_listent(context, entry->flags,
name, namelen, valuelen);
if (context->seen_enough)
@@ -464,7 +481,7 @@ xfs_attr3_leaf_list_int(
cursor->offset++;
}
trace_xfs_attr_list_leaf_end(context);
- return;
+ return 0;
}
/*
@@ -479,13 +496,13 @@ xfs_attr_leaf_list(xfs_attr_list_context_t *context)
trace_xfs_attr_leaf_list(context);
context->cursor->blkno = 0;
- error = xfs_attr3_leaf_read(context->tp, context->dp, 0, -1, &bp);
+ error = xfs_attr3_leaf_read(context->tp, context->dp, 0, &bp);
if (error)
return error;
- xfs_attr3_leaf_list_int(bp, context);
+ error = xfs_attr3_leaf_list_int(bp, context);
xfs_trans_brelse(context->tp, bp);
- return 0;
+ return error;
}
int
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 9fa4a7ee8cfc..ee6f4229cebc 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -21,7 +21,7 @@
#include "xfs_icache.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
-
+#include "xfs_error.h"
kmem_zone_t *xfs_bui_zone;
kmem_zone_t *xfs_bud_zone;
@@ -35,7 +35,7 @@ void
xfs_bui_item_free(
struct xfs_bui_log_item *buip)
{
- kmem_zone_free(xfs_bui_zone, buip);
+ kmem_cache_free(xfs_bui_zone, buip);
}
/*
@@ -141,7 +141,7 @@ xfs_bui_init(
{
struct xfs_bui_log_item *buip;
- buip = kmem_zone_zalloc(xfs_bui_zone, KM_SLEEP);
+ buip = kmem_zone_zalloc(xfs_bui_zone, 0);
xfs_log_item_init(mp, &buip->bui_item, XFS_LI_BUI, &xfs_bui_item_ops);
buip->bui_format.bui_nextents = XFS_BUI_MAX_FAST_EXTENTS;
@@ -201,7 +201,7 @@ xfs_bud_item_release(
struct xfs_bud_log_item *budp = BUD_ITEM(lip);
xfs_bui_release(budp->bud_buip);
- kmem_zone_free(xfs_bud_zone, budp);
+ kmem_cache_free(xfs_bud_zone, budp);
}
static const struct xfs_item_ops xfs_bud_item_ops = {
@@ -218,7 +218,7 @@ xfs_trans_get_bud(
{
struct xfs_bud_log_item *budp;
- budp = kmem_zone_zalloc(xfs_bud_zone, KM_SLEEP);
+ budp = kmem_zone_zalloc(xfs_bud_zone, 0);
xfs_log_item_init(tp->t_mountp, &budp->bud_item, XFS_LI_BUD,
&xfs_bud_item_ops);
budp->bud_buip = buip;
@@ -456,7 +456,7 @@ xfs_bui_recover(
if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
xfs_bui_release(buip);
- return -EIO;
+ return -EFSCORRUPTED;
}
/*
@@ -490,7 +490,7 @@ xfs_bui_recover(
*/
set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
xfs_bui_release(buip);
- return -EIO;
+ return -EFSCORRUPTED;
}
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
@@ -525,6 +525,7 @@ xfs_bui_recover(
type = bui_type;
break;
default:
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
error = -EFSCORRUPTED;
goto err_inode;
}
@@ -542,9 +543,7 @@ xfs_bui_recover(
irec.br_blockcount = count;
irec.br_startoff = bmap->me_startoff;
irec.br_state = state;
- error = xfs_bmap_unmap_extent(tp, ip, &irec);
- if (error)
- goto err_inode;
+ xfs_bmap_unmap_extent(tp, ip, &irec);
}
set_bit(XFS_BUI_RECOVERED, &buip->bui_flags);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 98c6a7a71427..e62fb5216341 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -39,9 +39,9 @@
xfs_daddr_t
xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
{
- return (XFS_IS_REALTIME_INODE(ip) ? \
- (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
- XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
+ if (XFS_IS_REALTIME_INODE(ip))
+ return XFS_FSB_TO_BB(ip->i_mount, fsb);
+ return XFS_FSB_TO_DADDR(ip->i_mount, fsb);
}
/*
@@ -53,15 +53,16 @@ xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
*/
int
xfs_zero_extent(
- struct xfs_inode *ip,
- xfs_fsblock_t start_fsb,
- xfs_off_t count_fsb)
+ struct xfs_inode *ip,
+ xfs_fsblock_t start_fsb,
+ xfs_off_t count_fsb)
{
- struct xfs_mount *mp = ip->i_mount;
- xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
- sector_t block = XFS_BB_TO_FSBT(mp, sector);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+ xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
+ sector_t block = XFS_BB_TO_FSBT(mp, sector);
- return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)),
+ return blkdev_issue_zeroout(target->bt_bdev,
block << (mp->m_super->s_blocksize_bits - 9),
count_fsb << (mp->m_super->s_blocksize_bits - 9),
GFP_NOFS, 0);
@@ -164,13 +165,6 @@ xfs_bmap_rtalloc(
xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
-
- /* Zero the extent if we were asked to do so */
- if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) {
- error = xfs_zero_extent(ap->ip, ap->blkno, ap->length);
- if (error)
- return error;
- }
} else {
ap->length = 0;
}
@@ -179,29 +173,6 @@ xfs_bmap_rtalloc(
#endif /* CONFIG_XFS_RT */
/*
- * Check if the endoff is outside the last extent. If so the caller will grow
- * the allocation to a stripe unit boundary. All offsets are considered outside
- * the end of file for an empty fork, so 1 is returned in *eof in that case.
- */
-int
-xfs_bmap_eof(
- struct xfs_inode *ip,
- xfs_fileoff_t endoff,
- int whichfork,
- int *eof)
-{
- struct xfs_bmbt_irec rec;
- int error;
-
- error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
- if (error || *eof)
- return error;
-
- *eof = endoff >= rec.br_startoff + rec.br_blockcount;
- return 0;
-}
-
-/*
* Extent tree block counting routines.
*/
@@ -229,106 +200,6 @@ xfs_bmap_count_leaves(
}
/*
- * Count leaf blocks given a range of extent records originally
- * in btree format.
- */
-STATIC void
-xfs_bmap_disk_count_leaves(
- struct xfs_mount *mp,
- struct xfs_btree_block *block,
- int numrecs,
- xfs_filblks_t *count)
-{
- int b;
- xfs_bmbt_rec_t *frp;
-
- for (b = 1; b <= numrecs; b++) {
- frp = XFS_BMBT_REC_ADDR(mp, block, b);
- *count += xfs_bmbt_disk_get_blockcount(frp);
- }
-}
-
-/*
- * Recursively walks each level of a btree
- * to count total fsblocks in use.
- */
-STATIC int
-xfs_bmap_count_tree(
- struct xfs_mount *mp,
- struct xfs_trans *tp,
- struct xfs_ifork *ifp,
- xfs_fsblock_t blockno,
- int levelin,
- xfs_extnum_t *nextents,
- xfs_filblks_t *count)
-{
- int error;
- struct xfs_buf *bp, *nbp;
- int level = levelin;
- __be64 *pp;
- xfs_fsblock_t bno = blockno;
- xfs_fsblock_t nextbno;
- struct xfs_btree_block *block, *nextblock;
- int numrecs;
-
- error = xfs_btree_read_bufl(mp, tp, bno, &bp, XFS_BMAP_BTREE_REF,
- &xfs_bmbt_buf_ops);
- if (error)
- return error;
- *count += 1;
- block = XFS_BUF_TO_BLOCK(bp);
-
- if (--level) {
- /* Not at node above leaves, count this level of nodes */
- nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
- while (nextbno != NULLFSBLOCK) {
- error = xfs_btree_read_bufl(mp, tp, nextbno, &nbp,
- XFS_BMAP_BTREE_REF,
- &xfs_bmbt_buf_ops);
- if (error)
- return error;
- *count += 1;
- nextblock = XFS_BUF_TO_BLOCK(nbp);
- nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
- xfs_trans_brelse(tp, nbp);
- }
-
- /* Dive to the next level */
- pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
- bno = be64_to_cpu(*pp);
- error = xfs_bmap_count_tree(mp, tp, ifp, bno, level, nextents,
- count);
- if (error) {
- xfs_trans_brelse(tp, bp);
- XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
- XFS_ERRLEVEL_LOW, mp);
- return -EFSCORRUPTED;
- }
- xfs_trans_brelse(tp, bp);
- } else {
- /* count all level 1 nodes and their leaves */
- for (;;) {
- nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
- numrecs = be16_to_cpu(block->bb_numrecs);
- (*nextents) += numrecs;
- xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
- xfs_trans_brelse(tp, bp);
- if (nextbno == NULLFSBLOCK)
- break;
- bno = nextbno;
- error = xfs_btree_read_bufl(mp, tp, bno, &bp,
- XFS_BMAP_BTREE_REF,
- &xfs_bmbt_buf_ops);
- if (error)
- return error;
- *count += 1;
- block = XFS_BUF_TO_BLOCK(bp);
- }
- }
- return 0;
-}
-
-/*
* Count fsblocks of the given fork. Delayed allocation extents are
* not counted towards the totals.
*/
@@ -340,26 +211,19 @@ xfs_bmap_count_blocks(
xfs_extnum_t *nextents,
xfs_filblks_t *count)
{
- struct xfs_mount *mp; /* file system mount structure */
- __be64 *pp; /* pointer to block address */
- struct xfs_btree_block *block; /* current btree block */
- struct xfs_ifork *ifp; /* fork structure */
- xfs_fsblock_t bno; /* block # of "block" */
- int level; /* btree level, for checking */
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork);
+ struct xfs_btree_cur *cur;
+ xfs_extlen_t btblocks = 0;
int error;
- bno = NULLFSBLOCK;
- mp = ip->i_mount;
*nextents = 0;
*count = 0;
- ifp = XFS_IFORK_PTR(ip, whichfork);
+
if (!ifp)
return 0;
switch (XFS_IFORK_FORMAT(ip, whichfork)) {
- case XFS_DINODE_FMT_EXTENTS:
- *nextents = xfs_bmap_count_leaves(ifp, count);
- return 0;
case XFS_DINODE_FMT_BTREE:
if (!(ifp->if_flags & XFS_IFEXTENTS)) {
error = xfs_iread_extents(tp, ip, whichfork);
@@ -367,26 +231,23 @@ xfs_bmap_count_blocks(
return error;
}
+ cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
+ error = xfs_btree_count_blocks(cur, &btblocks);
+ xfs_btree_del_cursor(cur, error);
+ if (error)
+ return error;
+
/*
- * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
+ * xfs_btree_count_blocks includes the root block contained in
+ * the inode fork in @btblocks, so subtract one because we're
+ * only interested in allocated disk blocks.
*/
- block = ifp->if_broot;
- level = be16_to_cpu(block->bb_level);
- ASSERT(level > 0);
- pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
- bno = be64_to_cpu(*pp);
- ASSERT(bno != NULLFSBLOCK);
- ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
- ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
-
- error = xfs_bmap_count_tree(mp, tp, ifp, bno, level,
- nextents, count);
- if (error) {
- XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)",
- XFS_ERRLEVEL_LOW, mp);
- return -EFSCORRUPTED;
- }
- return 0;
+ *count += btblocks - 1;
+
+ /* fall through */
+ case XFS_DINODE_FMT_EXTENTS:
+ *nextents = xfs_bmap_count_leaves(ifp, count);
+ break;
}
return 0;
@@ -864,6 +725,7 @@ xfs_alloc_file_space(
xfs_filblks_t allocatesize_fsb;
xfs_extlen_t extsz, temp;
xfs_fileoff_t startoffset_fsb;
+ xfs_fileoff_t endoffset_fsb;
int nimaps;
int quota_flag;
int rt;
@@ -891,7 +753,8 @@ xfs_alloc_file_space(
imapp = &imaps[0];
nimaps = 1;
startoffset_fsb = XFS_B_TO_FSBT(mp, offset);
- allocatesize_fsb = XFS_B_TO_FSB(mp, count);
+ endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
+ allocatesize_fsb = endoffset_fsb - startoffset_fsb;
/*
* Allocate file space until done or until there is an error
@@ -962,8 +825,8 @@ xfs_alloc_file_space(
xfs_trans_ijoin(tp, ip, 0);
error = xfs_bmapi_write(tp, ip, startoffset_fsb,
- allocatesize_fsb, alloc_type, resblks,
- imapp, &nimaps);
+ allocatesize_fsb, alloc_type, 0, imapp,
+ &nimaps);
if (error)
goto error0;
@@ -1037,6 +900,7 @@ out_trans_cancel:
goto out_unlock;
}
+/* Caller must first wait for the completion of any pending DIOs if required. */
int
xfs_flush_unmap_range(
struct xfs_inode *ip,
@@ -1048,9 +912,6 @@ xfs_flush_unmap_range(
xfs_off_t rounding, start, end;
int error;
- /* wait for the completion of any pending DIOs */
- inode_dio_wait(inode);
-
rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
start = round_down(offset, rounding);
end = round_up(offset + len, rounding) - 1;
@@ -1082,10 +943,6 @@ xfs_free_file_space(
if (len <= 0) /* if nothing being freed */
return 0;
- error = xfs_flush_unmap_range(ip, offset, len);
- if (error)
- return error;
-
startoffset_fsb = XFS_B_TO_FSB(mp, offset);
endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
@@ -1111,7 +968,8 @@ xfs_free_file_space(
return 0;
if (offset + len > XFS_ISIZE(ip))
len = XFS_ISIZE(ip) - offset;
- error = iomap_zero_range(VFS_I(ip), offset, len, NULL, &xfs_iomap_ops);
+ error = iomap_zero_range(VFS_I(ip), offset, len, NULL,
+ &xfs_buffered_write_iomap_ops);
if (error)
return error;
@@ -1129,48 +987,12 @@ xfs_free_file_space(
return error;
}
-/*
- * Preallocate and zero a range of a file. This mechanism has the allocation
- * semantics of fallocate and in addition converts data in the range to zeroes.
- */
-int
-xfs_zero_file_space(
- struct xfs_inode *ip,
- xfs_off_t offset,
- xfs_off_t len)
-{
- struct xfs_mount *mp = ip->i_mount;
- uint blksize;
- int error;
-
- trace_xfs_zero_file_space(ip);
-
- blksize = 1 << mp->m_sb.sb_blocklog;
-
- /*
- * Punch a hole and prealloc the range. We use hole punch rather than
- * unwritten extent conversion for two reasons:
- *
- * 1.) Hole punch handles partial block zeroing for us.
- *
- * 2.) If prealloc returns ENOSPC, the file range is still zero-valued
- * by virtue of the hole punch.
- */
- error = xfs_free_file_space(ip, offset, len);
- if (error || xfs_is_always_cow_inode(ip))
- return error;
-
- return xfs_alloc_file_space(ip, round_down(offset, blksize),
- round_up(offset + len, blksize) -
- round_down(offset, blksize),
- XFS_BMAPI_PREALLOC);
-}
-
static int
xfs_prepare_shift(
struct xfs_inode *ip,
loff_t offset)
{
+ struct xfs_mount *mp = ip->i_mount;
int error;
/*
@@ -1184,6 +1006,17 @@ xfs_prepare_shift(
}
/*
+ * Shift operations must stabilize the start block offset boundary along
+ * with the full range of the operation. If we don't, a COW writeback
+ * completion could race with an insert, front merge with the start
+ * extent (after split) during the shift and corrupt the file. Start
+ * with the block just prior to the start to stabilize the boundary.
+ */
+ offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
+ if (offset)
+ offset -= (1 << mp->m_sb.sb_blocklog);
+
+ /*
* Writeback and invalidate cache for the remainder of the file as we're
* about to shift down every extent from offset to EOF.
*/
@@ -1532,24 +1365,16 @@ xfs_swap_extent_rmap(
trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
/* Remove the mapping from the donor file. */
- error = xfs_bmap_unmap_extent(tp, tip, &uirec);
- if (error)
- goto out;
+ xfs_bmap_unmap_extent(tp, tip, &uirec);
/* Remove the mapping from the source file. */
- error = xfs_bmap_unmap_extent(tp, ip, &irec);
- if (error)
- goto out;
+ xfs_bmap_unmap_extent(tp, ip, &irec);
/* Map the donor file's blocks into the source file. */
- error = xfs_bmap_map_extent(tp, ip, &uirec);
- if (error)
- goto out;
+ xfs_bmap_map_extent(tp, ip, &uirec);
/* Map the source file's blocks into the donor file. */
- error = xfs_bmap_map_extent(tp, tip, &irec);
- if (error)
- goto out;
+ xfs_bmap_map_extent(tp, tip, &irec);
error = xfs_defer_finish(tpp);
tp = *tpp;
@@ -1756,6 +1581,14 @@ xfs_swap_extents(
goto out_unlock;
}
+ error = xfs_qm_dqattach(ip);
+ if (error)
+ goto out_unlock;
+
+ error = xfs_qm_dqattach(tip);
+ if (error)
+ goto out_unlock;
+
error = xfs_swap_extent_flush(ip);
if (error)
goto out_unlock;
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 7a78229cf1a7..9f993168b55b 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -30,8 +30,6 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
}
#endif /* CONFIG_XFS_RT */
-int xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,
- int whichfork, int *eof);
int xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
xfs_fileoff_t start_fsb, xfs_fileoff_t length);
@@ -59,8 +57,6 @@ int xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len, int alloc_type);
int xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t len);
-int xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset,
- xfs_off_t len);
int xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset,
xfs_off_t len);
int xfs_insert_file_space(struct xfs_inode *, xfs_off_t offset,
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ca0849043f54..217e4f82a44a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -198,20 +198,22 @@ xfs_buf_free_maps(
}
}
-static struct xfs_buf *
+static int
_xfs_buf_alloc(
struct xfs_buftarg *target,
struct xfs_buf_map *map,
int nmaps,
- xfs_buf_flags_t flags)
+ xfs_buf_flags_t flags,
+ struct xfs_buf **bpp)
{
struct xfs_buf *bp;
int error;
int i;
+ *bpp = NULL;
bp = kmem_zone_zalloc(xfs_buf_zone, KM_NOFS);
if (unlikely(!bp))
- return NULL;
+ return -ENOMEM;
/*
* We don't want certain flags to appear in b_flags unless they are
@@ -238,8 +240,8 @@ _xfs_buf_alloc(
*/
error = xfs_buf_get_maps(bp, nmaps);
if (error) {
- kmem_zone_free(xfs_buf_zone, bp);
- return NULL;
+ kmem_cache_free(xfs_buf_zone, bp);
+ return error;
}
bp->b_bn = map[0].bm_bn;
@@ -256,7 +258,8 @@ _xfs_buf_alloc(
XFS_STATS_INC(bp->b_mount, xb_create);
trace_xfs_buf_init(bp, _RET_IP_);
- return bp;
+ *bpp = bp;
+ return 0;
}
/*
@@ -304,7 +307,7 @@ _xfs_buf_free_pages(
* The buffer must not be on any hash - use xfs_buf_rele instead for
* hashed and refcounted buffers
*/
-void
+static void
xfs_buf_free(
xfs_buf_t *bp)
{
@@ -328,7 +331,7 @@ xfs_buf_free(
kmem_free(bp->b_addr);
_xfs_buf_free_pages(bp);
xfs_buf_free_maps(bp);
- kmem_zone_free(xfs_buf_zone, bp);
+ kmem_cache_free(xfs_buf_zone, bp);
}
/*
@@ -345,6 +348,15 @@ xfs_buf_allocate_memory(
unsigned short page_count, i;
xfs_off_t start, end;
int error;
+ xfs_km_flags_t kmflag_mask = 0;
+
+ /*
+ * assure zeroed buffer for non-read cases.
+ */
+ if (!(flags & XBF_READ)) {
+ kmflag_mask |= KM_ZERO;
+ gfp_mask |= __GFP_ZERO;
+ }
/*
* for buffers that are contained within a single page, just allocate
@@ -353,7 +365,9 @@ xfs_buf_allocate_memory(
*/
size = BBTOB(bp->b_length);
if (size < PAGE_SIZE) {
- bp->b_addr = kmem_alloc(size, KM_NOFS);
+ int align_mask = xfs_buftarg_dma_alignment(bp->b_target);
+ bp->b_addr = kmem_alloc_io(size, align_mask,
+ KM_NOFS | kmflag_mask);
if (!bp->b_addr) {
/* low memory - use alloc_page loop instead */
goto use_alloc_page;
@@ -368,7 +382,7 @@ xfs_buf_allocate_memory(
}
bp->b_offset = offset_in_page(bp->b_addr);
bp->b_pages = bp->b_page_array;
- bp->b_pages[0] = virt_to_page(bp->b_addr);
+ bp->b_pages[0] = kmem_to_page(bp->b_addr);
bp->b_page_count = 1;
bp->b_flags |= _XBF_KMEM;
return 0;
@@ -450,7 +464,7 @@ _xfs_buf_map_pages(
unsigned nofs_flag;
/*
- * vm_map_ram() will allocate auxillary structures (e.g.
+ * vm_map_ram() will allocate auxiliary structures (e.g.
* pagetables) with GFP_KERNEL, yet we are likely to be under
* GFP_NOFS context here. Hence we need to tell memory reclaim
* that we are in such a context via PF_MEMALLOC_NOFS to prevent
@@ -671,53 +685,39 @@ xfs_buf_incore(
* cache hits, as metadata intensive workloads will see 3 orders of magnitude
* more hits than misses.
*/
-struct xfs_buf *
+int
xfs_buf_get_map(
struct xfs_buftarg *target,
struct xfs_buf_map *map,
int nmaps,
- xfs_buf_flags_t flags)
+ xfs_buf_flags_t flags,
+ struct xfs_buf **bpp)
{
struct xfs_buf *bp;
struct xfs_buf *new_bp;
int error = 0;
+ *bpp = NULL;
error = xfs_buf_find(target, map, nmaps, flags, NULL, &bp);
-
- switch (error) {
- case 0:
- /* cache hit */
+ if (!error)
goto found;
- case -EAGAIN:
- /* cache hit, trylock failure, caller handles failure */
- ASSERT(flags & XBF_TRYLOCK);
- return NULL;
- case -ENOENT:
- /* cache miss, go for insert */
- break;
- case -EFSCORRUPTED:
- default:
- /*
- * None of the higher layers understand failure types
- * yet, so return NULL to signal a fatal lookup error.
- */
- return NULL;
- }
+ if (error != -ENOENT)
+ return error;
- new_bp = _xfs_buf_alloc(target, map, nmaps, flags);
- if (unlikely(!new_bp))
- return NULL;
+ error = _xfs_buf_alloc(target, map, nmaps, flags, &new_bp);
+ if (error)
+ return error;
error = xfs_buf_allocate_memory(new_bp, flags);
if (error) {
xfs_buf_free(new_bp);
- return NULL;
+ return error;
}
error = xfs_buf_find(target, map, nmaps, flags, new_bp, &bp);
if (error) {
xfs_buf_free(new_bp);
- return NULL;
+ return error;
}
if (bp != new_bp)
@@ -730,7 +730,7 @@ found:
xfs_warn(target->bt_mount,
"%s: failed to map pagesn", __func__);
xfs_buf_relse(bp);
- return NULL;
+ return error;
}
}
@@ -743,7 +743,8 @@ found:
XFS_STATS_INC(target->bt_mount, xb_get);
trace_xfs_buf_get(bp, flags, _RET_IP_);
- return bp;
+ *bpp = bp;
+ return 0;
}
STATIC int
@@ -795,46 +796,77 @@ xfs_buf_reverify(
return bp->b_error;
}
-xfs_buf_t *
+int
xfs_buf_read_map(
struct xfs_buftarg *target,
struct xfs_buf_map *map,
int nmaps,
xfs_buf_flags_t flags,
- const struct xfs_buf_ops *ops)
+ struct xfs_buf **bpp,
+ const struct xfs_buf_ops *ops,
+ xfs_failaddr_t fa)
{
struct xfs_buf *bp;
+ int error;
flags |= XBF_READ;
+ *bpp = NULL;
- bp = xfs_buf_get_map(target, map, nmaps, flags);
- if (!bp)
- return NULL;
+ error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
+ if (error)
+ return error;
trace_xfs_buf_read(bp, flags, _RET_IP_);
if (!(bp->b_flags & XBF_DONE)) {
+ /* Initiate the buffer read and wait. */
XFS_STATS_INC(target->bt_mount, xb_get_read);
bp->b_ops = ops;
- _xfs_buf_read(bp, flags);
- return bp;
+ error = _xfs_buf_read(bp, flags);
+
+ /* Readahead iodone already dropped the buffer, so exit. */
+ if (flags & XBF_ASYNC)
+ return 0;
+ } else {
+ /* Buffer already read; all we need to do is check it. */
+ error = xfs_buf_reverify(bp, ops);
+
+ /* Readahead already finished; drop the buffer and exit. */
+ if (flags & XBF_ASYNC) {
+ xfs_buf_relse(bp);
+ return 0;
+ }
+
+ /* We do not want read in the flags */
+ bp->b_flags &= ~XBF_READ;
+ ASSERT(bp->b_ops != NULL || ops == NULL);
}
- xfs_buf_reverify(bp, ops);
+ /*
+ * If we've had a read error, then the contents of the buffer are
+ * invalid and should not be used. To ensure that a followup read tries
+ * to pull the buffer from disk again, we clear the XBF_DONE flag and
+ * mark the buffer stale. This ensures that anyone who has a current
+ * reference to the buffer will interpret it's contents correctly and
+ * future cache lookups will also treat it as an empty, uninitialised
+ * buffer.
+ */
+ if (error) {
+ if (!XFS_FORCED_SHUTDOWN(target->bt_mount))
+ xfs_buf_ioerror_alert(bp, fa);
- if (flags & XBF_ASYNC) {
- /*
- * Read ahead call which is already satisfied,
- * drop the buffer
- */
+ bp->b_flags &= ~XBF_DONE;
+ xfs_buf_stale(bp);
xfs_buf_relse(bp);
- return NULL;
+
+ /* bad CRC means corrupted metadata */
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
+ return error;
}
- /* We do not want read in the flags */
- bp->b_flags &= ~XBF_READ;
- ASSERT(bp->b_ops != NULL || ops == NULL);
- return bp;
+ *bpp = bp;
+ return 0;
}
/*
@@ -848,11 +880,14 @@ xfs_buf_readahead_map(
int nmaps,
const struct xfs_buf_ops *ops)
{
+ struct xfs_buf *bp;
+
if (bdi_read_congested(target->bt_bdev->bd_bdi))
return;
xfs_buf_read_map(target, map, nmaps,
- XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD, ops);
+ XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
+ __this_address);
}
/*
@@ -869,12 +904,13 @@ xfs_buf_read_uncached(
const struct xfs_buf_ops *ops)
{
struct xfs_buf *bp;
+ int error;
*bpp = NULL;
- bp = xfs_buf_get_uncached(target, numblks, flags);
- if (!bp)
- return -ENOMEM;
+ error = xfs_buf_get_uncached(target, numblks, flags, &bp);
+ if (error)
+ return error;
/* set up the buffer for a read IO */
ASSERT(bp->b_map_count == 1);
@@ -885,7 +921,7 @@ xfs_buf_read_uncached(
xfs_buf_submit(bp);
if (bp->b_error) {
- int error = bp->b_error;
+ error = bp->b_error;
xfs_buf_relse(bp);
return error;
}
@@ -894,20 +930,23 @@ xfs_buf_read_uncached(
return 0;
}
-xfs_buf_t *
+int
xfs_buf_get_uncached(
struct xfs_buftarg *target,
size_t numblks,
- int flags)
+ int flags,
+ struct xfs_buf **bpp)
{
unsigned long page_count;
int error, i;
struct xfs_buf *bp;
DEFINE_SINGLE_BUF_MAP(map, XFS_BUF_DADDR_NULL, numblks);
+ *bpp = NULL;
+
/* flags might contain irrelevant bits, pass only what we care about */
- bp = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT);
- if (unlikely(bp == NULL))
+ error = _xfs_buf_alloc(target, &map, 1, flags & XBF_NO_IOACCT, &bp);
+ if (error)
goto fail;
page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT;
@@ -917,8 +956,10 @@ xfs_buf_get_uncached(
for (i = 0; i < page_count; i++) {
bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
- if (!bp->b_pages[i])
+ if (!bp->b_pages[i]) {
+ error = -ENOMEM;
goto fail_free_mem;
+ }
}
bp->b_flags |= _XBF_PAGES;
@@ -930,7 +971,8 @@ xfs_buf_get_uncached(
}
trace_xfs_buf_get_uncached(bp, _RET_IP_);
- return bp;
+ *bpp = bp;
+ return 0;
fail_free_mem:
while (--i >= 0)
@@ -938,9 +980,9 @@ xfs_buf_get_uncached(
_xfs_buf_free_pages(bp);
fail_free_buf:
xfs_buf_free_maps(bp);
- kmem_zone_free(xfs_buf_zone, bp);
+ kmem_cache_free(xfs_buf_zone, bp);
fail:
- return NULL;
+ return error;
}
/*
@@ -1194,10 +1236,10 @@ __xfs_buf_ioerror(
void
xfs_buf_ioerror_alert(
struct xfs_buf *bp,
- const char *func)
+ xfs_failaddr_t func)
{
xfs_alert(bp->b_mount,
-"metadata I/O error in \"%s\" at daddr 0x%llx len %d error %d",
+"metadata I/O error in \"%pS\" at daddr 0x%llx len %d error %d",
func, (uint64_t)XFS_BUF_ADDR(bp), bp->b_length,
-bp->b_error);
}
@@ -1250,8 +1292,7 @@ xfs_buf_ioapply_map(
int map,
int *buf_offset,
int *count,
- int op,
- int op_flags)
+ int op)
{
int page_index;
int total_nr_pages = bp->b_page_count;
@@ -1286,7 +1327,7 @@ next_chunk:
bio->bi_iter.bi_sector = sector;
bio->bi_end_io = xfs_buf_bio_end_io;
bio->bi_private = bp;
- bio_set_op_attrs(bio, op, op_flags);
+ bio->bi_opf = op;
for (; size && nr_pages; nr_pages--, page_index++) {
int rbytes, nbytes = PAGE_SIZE - offset;
@@ -1331,7 +1372,6 @@ _xfs_buf_ioapply(
{
struct blk_plug plug;
int op;
- int op_flags = 0;
int offset;
int size;
int i;
@@ -1373,15 +1413,14 @@ _xfs_buf_ioapply(
dump_stack();
}
}
- } else if (bp->b_flags & XBF_READ_AHEAD) {
- op = REQ_OP_READ;
- op_flags = REQ_RAHEAD;
} else {
op = REQ_OP_READ;
+ if (bp->b_flags & XBF_READ_AHEAD)
+ op |= REQ_RAHEAD;
}
/* we only use the buffer cache for meta-data */
- op_flags |= REQ_META;
+ op |= REQ_META;
/*
* Walk all the vectors issuing IO on them. Set up the initial offset
@@ -1393,7 +1432,7 @@ _xfs_buf_ioapply(
size = BBTOB(bp->b_length);
blk_start_plug(&plug);
for (i = 0; i < bp->b_map_count; i++) {
- xfs_buf_ioapply_map(bp, i, &offset, &size, op, op_flags);
+ xfs_buf_ioapply_map(bp, i, &offset, &size, op);
if (bp->b_error)
break;
if (size <= 0)
@@ -1741,7 +1780,7 @@ xfs_alloc_buftarg(
{
xfs_buftarg_t *btp;
- btp = kmem_zalloc(sizeof(*btp), KM_SLEEP | KM_NOFS);
+ btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
@@ -2052,8 +2091,9 @@ xfs_buf_delwri_pushbuf(
int __init
xfs_buf_init(void)
{
- xfs_buf_zone = kmem_zone_init_flags(sizeof(xfs_buf_t), "xfs_buf",
- KM_ZONE_HWALIGN, NULL);
+ xfs_buf_zone = kmem_cache_create("xfs_buf",
+ sizeof(struct xfs_buf), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
if (!xfs_buf_zone)
goto out;
@@ -2066,7 +2106,7 @@ xfs_buf_init(void)
void
xfs_buf_terminate(void)
{
- kmem_zone_destroy(xfs_buf_zone);
+ kmem_cache_destroy(xfs_buf_zone);
}
void xfs_buf_set_ref(struct xfs_buf *bp, int lru_ref)
@@ -2096,7 +2136,7 @@ xfs_verify_magic(
int idx;
idx = xfs_sb_version_hascrc(&mp->m_sb);
- if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx])))
+ if (WARN_ON(!bp->b_ops || !bp->b_ops->magic[idx]))
return false;
return dmagic == bp->b_ops->magic[idx];
}
@@ -2114,7 +2154,7 @@ xfs_verify_magic16(
int idx;
idx = xfs_sb_version_hascrc(&mp->m_sb);
- if (unlikely(WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx])))
+ if (WARN_ON(!bp->b_ops || !bp->b_ops->magic16[idx]))
return false;
return dmagic == bp->b_ops->magic16[idx];
}
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c6e57a3f409e..d79a1fe5d738 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -192,37 +192,40 @@ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
xfs_daddr_t blkno, size_t numblks,
xfs_buf_flags_t flags);
-struct xfs_buf *xfs_buf_get_map(struct xfs_buftarg *target,
- struct xfs_buf_map *map, int nmaps,
- xfs_buf_flags_t flags);
-struct xfs_buf *xfs_buf_read_map(struct xfs_buftarg *target,
- struct xfs_buf_map *map, int nmaps,
- xfs_buf_flags_t flags,
- const struct xfs_buf_ops *ops);
+int xfs_buf_get_map(struct xfs_buftarg *target, struct xfs_buf_map *map,
+ int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp);
+int xfs_buf_read_map(struct xfs_buftarg *target, struct xfs_buf_map *map,
+ int nmaps, xfs_buf_flags_t flags, struct xfs_buf **bpp,
+ const struct xfs_buf_ops *ops, xfs_failaddr_t fa);
void xfs_buf_readahead_map(struct xfs_buftarg *target,
struct xfs_buf_map *map, int nmaps,
const struct xfs_buf_ops *ops);
-static inline struct xfs_buf *
+static inline int
xfs_buf_get(
struct xfs_buftarg *target,
xfs_daddr_t blkno,
- size_t numblks)
+ size_t numblks,
+ struct xfs_buf **bpp)
{
DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
- return xfs_buf_get_map(target, &map, 1, 0);
+
+ return xfs_buf_get_map(target, &map, 1, 0, bpp);
}
-static inline struct xfs_buf *
+static inline int
xfs_buf_read(
struct xfs_buftarg *target,
xfs_daddr_t blkno,
size_t numblks,
xfs_buf_flags_t flags,
+ struct xfs_buf **bpp,
const struct xfs_buf_ops *ops)
{
DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
- return xfs_buf_read_map(target, &map, 1, flags, ops);
+
+ return xfs_buf_read_map(target, &map, 1, flags, bpp, ops,
+ __builtin_return_address(0));
}
static inline void
@@ -236,15 +239,14 @@ xfs_buf_readahead(
return xfs_buf_readahead_map(target, &map, 1, ops);
}
-struct xfs_buf *xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks,
- int flags);
+int xfs_buf_get_uncached(struct xfs_buftarg *target, size_t numblks, int flags,
+ struct xfs_buf **bpp);
int xfs_buf_read_uncached(struct xfs_buftarg *target, xfs_daddr_t daddr,
size_t numblks, int flags, struct xfs_buf **bpp,
const struct xfs_buf_ops *ops);
void xfs_buf_hold(struct xfs_buf *bp);
/* Releasing Buffers */
-extern void xfs_buf_free(xfs_buf_t *);
extern void xfs_buf_rele(xfs_buf_t *);
/* Locking and Unlocking Buffers */
@@ -260,7 +262,7 @@ extern void xfs_buf_ioend(struct xfs_buf *bp);
extern void __xfs_buf_ioerror(struct xfs_buf *bp, int error,
xfs_failaddr_t failaddr);
#define xfs_buf_ioerror(bp, err) __xfs_buf_ioerror((bp), (err), __this_address)
-extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func);
+extern void xfs_buf_ioerror_alert(struct xfs_buf *bp, xfs_failaddr_t fa);
extern int __xfs_buf_submit(struct xfs_buf *bp, bool);
static inline int xfs_buf_submit(struct xfs_buf *bp)
@@ -350,6 +352,12 @@ extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);
#define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev)
#define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev)
+static inline int
+xfs_buftarg_dma_alignment(struct xfs_buftarg *bt)
+{
+ return queue_dma_alignment(bt->bt_bdev->bd_disk->queue);
+}
+
int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7dcaec54a20b..663810e6cd59 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -27,6 +27,23 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
+/* Is this log iovec plausibly large enough to contain the buffer log format? */
+bool
+xfs_buf_log_check_iovec(
+ struct xfs_log_iovec *iovec)
+{
+ struct xfs_buf_log_format *blfp = iovec->i_addr;
+ char *bmp_end;
+ char *item_end;
+
+ if (offsetof(struct xfs_buf_log_format, blf_data_map) > iovec->i_len)
+ return false;
+
+ item_end = (char *)iovec->i_addr + iovec->i_len;
+ bmp_end = (char *)&blfp->blf_data_map[blfp->blf_map_size];
+ return bmp_end <= item_end;
+}
+
static inline int
xfs_buf_log_format_size(
struct xfs_buf_log_format *blfp)
@@ -688,7 +705,7 @@ static const struct xfs_item_ops xfs_buf_item_ops = {
.iop_push = xfs_buf_item_push,
};
-STATIC int
+STATIC void
xfs_buf_item_get_format(
struct xfs_buf_log_item *bip,
int count)
@@ -698,14 +715,11 @@ xfs_buf_item_get_format(
if (count == 1) {
bip->bli_formats = &bip->__bli_format;
- return 0;
+ return;
}
bip->bli_formats = kmem_zalloc(count * sizeof(struct xfs_buf_log_format),
- KM_SLEEP);
- if (!bip->bli_formats)
- return -ENOMEM;
- return 0;
+ 0);
}
STATIC void
@@ -731,7 +745,6 @@ xfs_buf_item_init(
struct xfs_buf_log_item *bip = bp->b_log_item;
int chunks;
int map_size;
- int error;
int i;
/*
@@ -747,7 +760,7 @@ xfs_buf_item_init(
return 0;
}
- bip = kmem_zone_zalloc(xfs_buf_item_zone, KM_SLEEP);
+ bip = kmem_zone_zalloc(xfs_buf_item_zone, 0);
xfs_log_item_init(mp, &bip->bli_item, XFS_LI_BUF, &xfs_buf_item_ops);
bip->bli_buf = bp;
@@ -760,19 +773,22 @@ xfs_buf_item_init(
* Discontiguous buffer support follows the layout of the underlying
* buffer. This makes the implementation as simple as possible.
*/
- error = xfs_buf_item_get_format(bip, bp->b_map_count);
- ASSERT(error == 0);
- if (error) { /* to stop gcc throwing set-but-unused warnings */
- kmem_zone_free(xfs_buf_item_zone, bip);
- return error;
- }
-
+ xfs_buf_item_get_format(bip, bp->b_map_count);
for (i = 0; i < bip->bli_format_count; i++) {
chunks = DIV_ROUND_UP(BBTOB(bp->b_maps[i].bm_len),
XFS_BLF_CHUNK);
map_size = DIV_ROUND_UP(chunks, NBWORD);
+ if (map_size > XFS_BLF_DATAMAP_SIZE) {
+ kmem_cache_free(xfs_buf_item_zone, bip);
+ xfs_err(mp,
+ "buffer item dirty bitmap (%u uints) too small to reflect %u bytes!",
+ map_size,
+ BBTOB(bp->b_maps[i].bm_len));
+ return -EFSCORRUPTED;
+ }
+
bip->bli_formats[i].blf_type = XFS_LI_BUF;
bip->bli_formats[i].blf_blkno = bp->b_maps[i].bm_bn;
bip->bli_formats[i].blf_len = bp->b_maps[i].bm_len;
@@ -805,6 +821,9 @@ xfs_buf_item_log_segment(
uint end_bit;
uint mask;
+ ASSERT(first < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD);
+ ASSERT(last < XFS_BLF_DATAMAP_SIZE * XFS_BLF_CHUNK * NBWORD);
+
/*
* Convert byte offsets to bit numbers.
*/
@@ -851,7 +870,7 @@ xfs_buf_item_log_segment(
* first_bit and last_bit.
*/
while ((bits_to_set - bits_set) >= NBWORD) {
- *wordp |= 0xffffffff;
+ *wordp = 0xffffffff;
bits_set += NBWORD;
wordp++;
}
@@ -939,7 +958,7 @@ xfs_buf_item_free(
{
xfs_buf_item_free_format(bip);
kmem_free(bip->bli_item.li_lv_shadow);
- kmem_zone_free(xfs_buf_item_zone, bip);
+ kmem_cache_free(xfs_buf_item_zone, bip);
}
/*
@@ -956,7 +975,7 @@ xfs_buf_item_relse(
struct xfs_buf_log_item *bip = bp->b_log_item;
trace_xfs_buf_item_relse(bp, _RET_IP_);
- ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
+ ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
bp->b_log_item = NULL;
if (list_empty(&bp->b_li_list))
@@ -1094,7 +1113,7 @@ xfs_buf_iodone_callback_error(
if (bp->b_target != lasttarg ||
time_after(jiffies, (lasttime + 5*HZ))) {
lasttime = jiffies;
- xfs_buf_ioerror_alert(bp, __func__);
+ xfs_buf_ioerror_alert(bp, __this_address);
}
lasttarg = bp->b_target;
diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h
index 4a054b11011a..30114b510332 100644
--- a/fs/xfs/xfs_buf_item.h
+++ b/fs/xfs/xfs_buf_item.h
@@ -61,6 +61,7 @@ void xfs_buf_iodone_callbacks(struct xfs_buf *);
void xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *);
bool xfs_buf_resubmit_failed_buffers(struct xfs_buf *,
struct list_head *);
+bool xfs_buf_log_check_iovec(struct xfs_log_iovec *iovec);
extern kmem_zone_t *xfs_buf_item_zone;
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 283df898dd9f..0d3b640cf1cc 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -17,6 +17,7 @@
#include "xfs_trace.h"
#include "xfs_bmap.h"
#include "xfs_trans.h"
+#include "xfs_error.h"
/*
* Directory file type support functions
@@ -47,6 +48,7 @@ xfs_dir2_sf_getdents(
{
int i; /* shortform entry number */
struct xfs_inode *dp = args->dp; /* incore directory inode */
+ struct xfs_mount *mp = dp->i_mount;
xfs_dir2_dataptr_t off; /* current entry's offset */
xfs_dir2_sf_entry_t *sfep; /* shortform directory entry */
xfs_dir2_sf_hdr_t *sfp; /* shortform structure */
@@ -68,15 +70,15 @@ xfs_dir2_sf_getdents(
return 0;
/*
- * Precalculate offsets for . and .. as we will always need them.
- *
- * XXX(hch): the second argument is sometimes 0 and sometimes
- * geo->datablk
+ * Precalculate offsets for "." and ".." as we will always need them.
+ * This relies on the fact that directories always start with the
+ * entries for "." and "..".
*/
dot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
- dp->d_ops->data_dot_offset);
+ geo->data_entry_offset);
dotdot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
- dp->d_ops->data_dotdot_offset);
+ geo->data_entry_offset +
+ xfs_dir2_data_entsize(mp, sizeof(".") - 1));
/*
* Put . entry unless we're starting past it.
@@ -91,7 +93,7 @@ xfs_dir2_sf_getdents(
* Put .. entry unless we're starting past it.
*/
if (ctx->pos <= dotdot_offset) {
- ino = dp->d_ops->sf_get_parent_ino(sfp);
+ ino = xfs_dir2_sf_get_parent_ino(sfp);
ctx->pos = dotdot_offset & 0x7fffffff;
if (!dir_emit(ctx, "..", 2, ino, DT_DIR))
return 0;
@@ -108,17 +110,21 @@ xfs_dir2_sf_getdents(
xfs_dir2_sf_get_offset(sfep));
if (ctx->pos > off) {
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
continue;
}
- ino = dp->d_ops->sf_get_ino(sfp, sfep);
- filetype = dp->d_ops->sf_get_ftype(sfep);
+ ino = xfs_dir2_sf_get_ino(mp, sfp, sfep);
+ filetype = xfs_dir2_sf_get_ftype(mp, sfep);
ctx->pos = off & 0x7fffffff;
+ if (XFS_IS_CORRUPT(dp->i_mount,
+ !xfs_dir2_namecheck(sfep->name,
+ sfep->namelen)))
+ return -EFSCORRUPTED;
if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino,
- xfs_dir3_get_dtype(dp->i_mount, filetype)))
+ xfs_dir3_get_dtype(mp, filetype)))
return 0;
- sfep = dp->d_ops->sf_nextentry(sfp, sfep);
+ sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
}
ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
@@ -135,17 +141,14 @@ xfs_dir2_block_getdents(
struct dir_context *ctx)
{
struct xfs_inode *dp = args->dp; /* incore directory inode */
- xfs_dir2_data_hdr_t *hdr; /* block header */
struct xfs_buf *bp; /* buffer for block */
- xfs_dir2_data_entry_t *dep; /* block data entry */
- xfs_dir2_data_unused_t *dup; /* block unused entry */
- char *endptr; /* end of the data entries */
int error; /* error return value */
- char *ptr; /* current data entry */
int wantoff; /* starting block offset */
xfs_off_t cook;
struct xfs_da_geometry *geo = args->geo;
int lock_mode;
+ unsigned int offset;
+ unsigned int end;
/*
* If the block number in the offset is out of range, we're done.
@@ -164,56 +167,55 @@ xfs_dir2_block_getdents(
* We'll skip entries before this.
*/
wantoff = xfs_dir2_dataptr_to_off(geo, ctx->pos);
- hdr = bp->b_addr;
xfs_dir3_data_check(dp, bp);
- /*
- * Set up values for the loop.
- */
- ptr = (char *)dp->d_ops->data_entry_p(hdr);
- endptr = xfs_dir3_data_endp(geo, hdr);
/*
* Loop over the data portion of the block.
* Each object is a real entry (dep) or an unused one (dup).
*/
- while (ptr < endptr) {
+ offset = geo->data_entry_offset;
+ end = xfs_dir3_data_end_offset(geo, bp->b_addr);
+ while (offset < end) {
+ struct xfs_dir2_data_unused *dup = bp->b_addr + offset;
+ struct xfs_dir2_data_entry *dep = bp->b_addr + offset;
uint8_t filetype;
- dup = (xfs_dir2_data_unused_t *)ptr;
/*
* Unused, skip it.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
- ptr += be16_to_cpu(dup->length);
+ offset += be16_to_cpu(dup->length);
continue;
}
- dep = (xfs_dir2_data_entry_t *)ptr;
-
/*
* Bump pointer for the next iteration.
*/
- ptr += dp->d_ops->data_entsize(dep->namelen);
+ offset += xfs_dir2_data_entsize(dp->i_mount, dep->namelen);
+
/*
* The entry is before the desired starting point, skip it.
*/
- if ((char *)dep - (char *)hdr < wantoff)
+ if (offset < wantoff)
continue;
- cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,
- (char *)dep - (char *)hdr);
+ cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, offset);
ctx->pos = cook & 0x7fffffff;
- filetype = dp->d_ops->data_get_ftype(dep);
+ filetype = xfs_dir2_data_get_ftype(dp->i_mount, dep);
/*
* If it didn't fit, set the final offset to here & return.
*/
+ if (XFS_IS_CORRUPT(dp->i_mount,
+ !xfs_dir2_namecheck(dep->name,
+ dep->namelen))) {
+ error = -EFSCORRUPTED;
+ goto out_rele;
+ }
if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
be64_to_cpu(dep->inumber),
- xfs_dir3_get_dtype(dp->i_mount, filetype))) {
- xfs_trans_brelse(args->trans, bp);
- return 0;
- }
+ xfs_dir3_get_dtype(dp->i_mount, filetype)))
+ goto out_rele;
}
/*
@@ -222,8 +224,9 @@ xfs_dir2_block_getdents(
*/
ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) &
0x7fffffff;
+out_rele:
xfs_trans_brelse(args->trans, bp);
- return 0;
+ return error;
}
/*
@@ -276,7 +279,7 @@ xfs_dir2_leaf_readbuf(
new_off = xfs_dir2_da_to_byte(geo, map.br_startoff);
if (new_off > *cur_off)
*cur_off = new_off;
- error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, -1, &bp);
+ error = xfs_dir3_data_read(args->trans, dp, map.br_startoff, 0, &bp);
if (error)
goto out;
@@ -311,7 +314,8 @@ xfs_dir2_leaf_readbuf(
break;
}
if (next_ra > *ra_blk) {
- xfs_dir3_data_readahead(dp, next_ra, -2);
+ xfs_dir3_data_readahead(dp, next_ra,
+ XFS_DABUF_MAP_HOLE_OK);
*ra_blk = next_ra;
}
ra_want -= geo->fsbcount;
@@ -343,17 +347,17 @@ xfs_dir2_leaf_getdents(
size_t bufsize)
{
struct xfs_inode *dp = args->dp;
+ struct xfs_mount *mp = dp->i_mount;
struct xfs_buf *bp = NULL; /* data block buffer */
- xfs_dir2_data_hdr_t *hdr; /* data block header */
xfs_dir2_data_entry_t *dep; /* data entry */
xfs_dir2_data_unused_t *dup; /* unused entry */
- char *ptr = NULL; /* pointer to current data */
struct xfs_da_geometry *geo = args->geo;
xfs_dablk_t rablk = 0; /* current readahead block */
xfs_dir2_off_t curoff; /* current overall offset */
int length; /* temporary length value */
int byteoff; /* offset in current block */
int lock_mode;
+ unsigned int offset = 0;
int error = 0; /* error return value */
/*
@@ -380,7 +384,7 @@ xfs_dir2_leaf_getdents(
* If we have no buffer, or we're off the end of the
* current buffer, need to get another one.
*/
- if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) {
+ if (!bp || offset >= geo->blksize) {
if (bp) {
xfs_trans_brelse(args->trans, bp);
bp = NULL;
@@ -393,36 +397,35 @@ xfs_dir2_leaf_getdents(
if (error || !bp)
break;
- hdr = bp->b_addr;
xfs_dir3_data_check(dp, bp);
/*
* Find our position in the block.
*/
- ptr = (char *)dp->d_ops->data_entry_p(hdr);
+ offset = geo->data_entry_offset;
byteoff = xfs_dir2_byte_to_off(geo, curoff);
/*
* Skip past the header.
*/
if (byteoff == 0)
- curoff += dp->d_ops->data_entry_offset;
+ curoff += geo->data_entry_offset;
/*
* Skip past entries until we reach our offset.
*/
else {
- while ((char *)ptr - (char *)hdr < byteoff) {
- dup = (xfs_dir2_data_unused_t *)ptr;
+ while (offset < byteoff) {
+ dup = bp->b_addr + offset;
if (be16_to_cpu(dup->freetag)
== XFS_DIR2_DATA_FREE_TAG) {
length = be16_to_cpu(dup->length);
- ptr += length;
+ offset += length;
continue;
}
- dep = (xfs_dir2_data_entry_t *)ptr;
- length =
- dp->d_ops->data_entsize(dep->namelen);
- ptr += length;
+ dep = bp->b_addr + offset;
+ length = xfs_dir2_data_entsize(mp,
+ dep->namelen);
+ offset += length;
}
/*
* Now set our real offset.
@@ -430,32 +433,38 @@ xfs_dir2_leaf_getdents(
curoff =
xfs_dir2_db_off_to_byte(geo,
xfs_dir2_byte_to_db(geo, curoff),
- (char *)ptr - (char *)hdr);
- if (ptr >= (char *)hdr + geo->blksize) {
+ offset);
+ if (offset >= geo->blksize)
continue;
- }
}
}
+
/*
- * We have a pointer to an entry.
- * Is it a live one?
+ * We have a pointer to an entry. Is it a live one?
*/
- dup = (xfs_dir2_data_unused_t *)ptr;
+ dup = bp->b_addr + offset;
+
/*
* No, it's unused, skip over it.
*/
if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {
length = be16_to_cpu(dup->length);
- ptr += length;
+ offset += length;
curoff += length;
continue;
}
- dep = (xfs_dir2_data_entry_t *)ptr;
- length = dp->d_ops->data_entsize(dep->namelen);
- filetype = dp->d_ops->data_get_ftype(dep);
+ dep = bp->b_addr + offset;
+ length = xfs_dir2_data_entsize(mp, dep->namelen);
+ filetype = xfs_dir2_data_get_ftype(mp, dep);
ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;
+ if (XFS_IS_CORRUPT(dp->i_mount,
+ !xfs_dir2_namecheck(dep->name,
+ dep->namelen))) {
+ error = -EFSCORRUPTED;
+ break;
+ }
if (!dir_emit(ctx, (char *)dep->name, dep->namelen,
be64_to_cpu(dep->inumber),
xfs_dir3_get_dtype(dp->i_mount, filetype)))
@@ -464,7 +473,7 @@ xfs_dir2_leaf_getdents(
/*
* Advance to next entry in the block.
*/
- ptr += length;
+ offset += length;
curoff += length;
/* bufsize may have just been a guess; don't go negative */
bufsize = bufsize > length ? bufsize - length : 0;
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index 8ec7aab89044..0b8350e84d28 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -13,6 +13,7 @@
#include "xfs_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_alloc.h"
+#include "xfs_discard.h"
#include "xfs_error.h"
#include "xfs_extent_busy.h"
#include "xfs_trace.h"
@@ -44,7 +45,7 @@ xfs_trim_extents(
xfs_log_force(mp, XFS_LOG_SYNC);
error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
- if (error || !agbp)
+ if (error)
goto out_put_perag;
cur = xfs_allocbt_init_cursor(mp, NULL, agbp, agno, XFS_BTNUM_CNT);
@@ -70,7 +71,10 @@ xfs_trim_extents(
error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
if (error)
goto out_del_cursor;
- XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_del_cursor);
+ if (XFS_IS_CORRUPT(mp, i != 1)) {
+ error = -EFSCORRUPTED;
+ goto out_del_cursor;
+ }
ASSERT(flen <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_longest));
/*
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index fb1ad4483081..d223e1ae90a6 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -48,7 +48,7 @@ static struct lock_class_key xfs_dquot_project_class;
*/
void
xfs_qm_dqdestroy(
- xfs_dquot_t *dqp)
+ struct xfs_dquot *dqp)
{
ASSERT(list_empty(&dqp->q_lru));
@@ -56,7 +56,7 @@ xfs_qm_dqdestroy(
mutex_destroy(&dqp->q_qlock);
XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot);
- kmem_zone_free(xfs_qm_dqzone, dqp);
+ kmem_cache_free(xfs_qm_dqzone, dqp);
}
/*
@@ -113,8 +113,8 @@ xfs_qm_adjust_dqlimits(
*/
void
xfs_qm_adjust_dqtimers(
- xfs_mount_t *mp,
- xfs_disk_dquot_t *d)
+ struct xfs_mount *mp,
+ struct xfs_disk_dquot *d)
{
ASSERT(d->d_id);
@@ -137,7 +137,7 @@ xfs_qm_adjust_dqtimers(
(d->d_blk_hardlimit &&
(be64_to_cpu(d->d_bcount) >
be64_to_cpu(d->d_blk_hardlimit)))) {
- d->d_btimer = cpu_to_be32(get_seconds() +
+ d->d_btimer = cpu_to_be32(ktime_get_real_seconds() +
mp->m_quotainfo->qi_btimelimit);
} else {
d->d_bwarns = 0;
@@ -160,7 +160,7 @@ xfs_qm_adjust_dqtimers(
(d->d_ino_hardlimit &&
(be64_to_cpu(d->d_icount) >
be64_to_cpu(d->d_ino_hardlimit)))) {
- d->d_itimer = cpu_to_be32(get_seconds() +
+ d->d_itimer = cpu_to_be32(ktime_get_real_seconds() +
mp->m_quotainfo->qi_itimelimit);
} else {
d->d_iwarns = 0;
@@ -183,7 +183,7 @@ xfs_qm_adjust_dqtimers(
(d->d_rtb_hardlimit &&
(be64_to_cpu(d->d_rtbcount) >
be64_to_cpu(d->d_rtb_hardlimit)))) {
- d->d_rtbtimer = cpu_to_be32(get_seconds() +
+ d->d_rtbtimer = cpu_to_be32(ktime_get_real_seconds() +
mp->m_quotainfo->qi_rtbtimelimit);
} else {
d->d_rtbwarns = 0;
@@ -305,8 +305,8 @@ xfs_dquot_disk_alloc(
/* Create the block mapping. */
xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL);
error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset,
- XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA,
- XFS_QM_DQALLOC_SPACE_RES(mp), &map, &nmaps);
+ XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map,
+ &nmaps);
if (error)
return error;
ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB);
@@ -320,10 +320,10 @@ xfs_dquot_disk_alloc(
dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
/* now we can just get the buffer (there's nothing to read yet) */
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0);
- if (!bp)
- return -ENOMEM;
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno,
+ mp->m_quotainfo->qi_dqchunklen, 0, &bp);
+ if (error)
+ return error;
bp->b_ops = &xfs_dquot_buf_ops;
/*
@@ -440,7 +440,7 @@ xfs_dquot_alloc(
{
struct xfs_dquot *dqp;
- dqp = kmem_zone_zalloc(xfs_qm_dqzone, KM_SLEEP);
+ dqp = kmem_zone_zalloc(xfs_qm_dqzone, 0);
dqp->dq_flags = type;
dqp->q_core.d_id = cpu_to_be32(id);
@@ -497,7 +497,7 @@ xfs_dquot_from_disk(
struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset;
/* copy everything from disk dquot to the incore dquot */
- memcpy(&dqp->q_core, ddqp, sizeof(xfs_disk_dquot_t));
+ memcpy(&dqp->q_core, ddqp, sizeof(struct xfs_disk_dquot));
/*
* Reservation counters are defined as reservation plus current usage
@@ -833,7 +833,7 @@ xfs_qm_id_for_quotatype(
case XFS_DQ_GROUP:
return ip->i_d.di_gid;
case XFS_DQ_PROJ:
- return xfs_get_projid(ip);
+ return ip->i_d.di_projid;
}
ASSERT(0);
return 0;
@@ -989,7 +989,7 @@ xfs_qm_dqput(
*/
void
xfs_qm_dqrele(
- xfs_dquot_t *dqp)
+ struct xfs_dquot *dqp)
{
if (!dqp)
return;
@@ -1018,8 +1018,8 @@ xfs_qm_dqflush_done(
struct xfs_buf *bp,
struct xfs_log_item *lip)
{
- xfs_dq_logitem_t *qip = (struct xfs_dq_logitem *)lip;
- xfs_dquot_t *dqp = qip->qli_dquot;
+ struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip;
+ struct xfs_dquot *dqp = qip->qli_dquot;
struct xfs_ail *ailp = lip->li_ailp;
/*
@@ -1126,11 +1126,11 @@ xfs_qm_dqflush(
xfs_buf_relse(bp);
xfs_dqfunlock(dqp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return -EIO;
+ return -EFSCORRUPTED;
}
/* This is the only portion of data that needs to persist */
- memcpy(ddqp, &dqp->q_core, sizeof(xfs_disk_dquot_t));
+ memcpy(ddqp, &dqp->q_core, sizeof(struct xfs_disk_dquot));
/*
* Clear the dirty field and remember the flush lsn for later use.
@@ -1188,8 +1188,8 @@ out_unlock:
*/
void
xfs_dqlock2(
- xfs_dquot_t *d1,
- xfs_dquot_t *d2)
+ struct xfs_dquot *d1,
+ struct xfs_dquot *d2)
{
if (d1 && d2) {
ASSERT(d1 != d2);
@@ -1211,20 +1211,22 @@ xfs_dqlock2(
int __init
xfs_qm_init(void)
{
- xfs_qm_dqzone =
- kmem_zone_init(sizeof(struct xfs_dquot), "xfs_dquot");
+ xfs_qm_dqzone = kmem_cache_create("xfs_dquot",
+ sizeof(struct xfs_dquot),
+ 0, 0, NULL);
if (!xfs_qm_dqzone)
goto out;
- xfs_qm_dqtrxzone =
- kmem_zone_init(sizeof(struct xfs_dquot_acct), "xfs_dqtrx");
+ xfs_qm_dqtrxzone = kmem_cache_create("xfs_dqtrx",
+ sizeof(struct xfs_dquot_acct),
+ 0, 0, NULL);
if (!xfs_qm_dqtrxzone)
goto out_free_dqzone;
return 0;
out_free_dqzone:
- kmem_zone_destroy(xfs_qm_dqzone);
+ kmem_cache_destroy(xfs_qm_dqzone);
out:
return -ENOMEM;
}
@@ -1232,14 +1234,14 @@ out:
void
xfs_qm_exit(void)
{
- kmem_zone_destroy(xfs_qm_dqtrxzone);
- kmem_zone_destroy(xfs_qm_dqzone);
+ kmem_cache_destroy(xfs_qm_dqtrxzone);
+ kmem_cache_destroy(xfs_qm_dqzone);
}
/*
* Iterate every dquot of a particular type. The caller must ensure that the
* particular quota type is active. iter_fn can return negative error codes,
- * or XFS_ITER_ABORT to indicate that it wants to stop iterating.
+ * or -ECANCELED to indicate that it wants to stop iterating.
*/
int
xfs_qm_dqiterate(
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 4fe85709d55d..fe3e46df604b 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -30,33 +30,36 @@ enum {
/*
* The incore dquot structure
*/
-typedef struct xfs_dquot {
- uint dq_flags; /* various flags (XFS_DQ_*) */
- struct list_head q_lru; /* global free list of dquots */
- struct xfs_mount*q_mount; /* filesystem this relates to */
- uint q_nrefs; /* # active refs from inodes */
- xfs_daddr_t q_blkno; /* blkno of dquot buffer */
- int q_bufoffset; /* off of dq in buffer (# dquots) */
- xfs_fileoff_t q_fileoffset; /* offset in quotas file */
-
- xfs_disk_dquot_t q_core; /* actual usage & quotas */
- xfs_dq_logitem_t q_logitem; /* dquot log item */
- xfs_qcnt_t q_res_bcount; /* total regular nblks used+reserved */
- xfs_qcnt_t q_res_icount; /* total inos allocd+reserved */
- xfs_qcnt_t q_res_rtbcount;/* total realtime blks used+reserved */
- xfs_qcnt_t q_prealloc_lo_wmark;/* prealloc throttle wmark */
- xfs_qcnt_t q_prealloc_hi_wmark;/* prealloc disabled wmark */
- int64_t q_low_space[XFS_QLOWSP_MAX];
- struct mutex q_qlock; /* quota lock */
- struct completion q_flush; /* flush completion queue */
- atomic_t q_pincount; /* dquot pin count */
- wait_queue_head_t q_pinwait; /* dquot pinning wait queue */
-} xfs_dquot_t;
+struct xfs_dquot {
+ uint dq_flags;
+ struct list_head q_lru;
+ struct xfs_mount *q_mount;
+ uint q_nrefs;
+ xfs_daddr_t q_blkno;
+ int q_bufoffset;
+ xfs_fileoff_t q_fileoffset;
+
+ struct xfs_disk_dquot q_core;
+ struct xfs_dq_logitem q_logitem;
+ /* total regular nblks used+reserved */
+ xfs_qcnt_t q_res_bcount;
+ /* total inos allocd+reserved */
+ xfs_qcnt_t q_res_icount;
+ /* total realtime blks used+reserved */
+ xfs_qcnt_t q_res_rtbcount;
+ xfs_qcnt_t q_prealloc_lo_wmark;
+ xfs_qcnt_t q_prealloc_hi_wmark;
+ int64_t q_low_space[XFS_QLOWSP_MAX];
+ struct mutex q_qlock;
+ struct completion q_flush;
+ atomic_t q_pincount;
+ struct wait_queue_head q_pinwait;
+};
/*
* Lock hierarchy for q_qlock:
* XFS_QLOCK_NORMAL is the implicit default,
- * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
+ * XFS_QLOCK_NESTED is the dquot with the higher id in xfs_dqlock2
*/
enum {
XFS_QLOCK_NORMAL = 0,
@@ -64,21 +67,21 @@ enum {
};
/*
- * Manage the q_flush completion queue embedded in the dquot. This completion
+ * Manage the q_flush completion queue embedded in the dquot. This completion
* queue synchronizes processes attempting to flush the in-core dquot back to
* disk.
*/
-static inline void xfs_dqflock(xfs_dquot_t *dqp)
+static inline void xfs_dqflock(struct xfs_dquot *dqp)
{
wait_for_completion(&dqp->q_flush);
}
-static inline bool xfs_dqflock_nowait(xfs_dquot_t *dqp)
+static inline bool xfs_dqflock_nowait(struct xfs_dquot *dqp)
{
return try_wait_for_completion(&dqp->q_flush);
}
-static inline void xfs_dqfunlock(xfs_dquot_t *dqp)
+static inline void xfs_dqfunlock(struct xfs_dquot *dqp)
{
complete(&dqp->q_flush);
}
@@ -112,7 +115,7 @@ static inline int xfs_this_quota_on(struct xfs_mount *mp, int type)
}
}
-static inline xfs_dquot_t *xfs_inode_dquot(struct xfs_inode *ip, int type)
+static inline struct xfs_dquot *xfs_inode_dquot(struct xfs_inode *ip, int type)
{
switch (type & XFS_DQ_ALLTYPES) {
case XFS_DQ_USER:
@@ -147,31 +150,30 @@ static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
#define XFS_QM_ISPDQ(dqp) ((dqp)->dq_flags & XFS_DQ_PROJ)
#define XFS_QM_ISGDQ(dqp) ((dqp)->dq_flags & XFS_DQ_GROUP)
-extern void xfs_qm_dqdestroy(xfs_dquot_t *);
-extern int xfs_qm_dqflush(struct xfs_dquot *, struct xfs_buf **);
-extern void xfs_qm_dqunpin_wait(xfs_dquot_t *);
-extern void xfs_qm_adjust_dqtimers(xfs_mount_t *,
- xfs_disk_dquot_t *);
-extern void xfs_qm_adjust_dqlimits(struct xfs_mount *,
- struct xfs_dquot *);
-extern xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip,
- uint type);
-extern int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id,
+void xfs_qm_dqdestroy(struct xfs_dquot *dqp);
+int xfs_qm_dqflush(struct xfs_dquot *dqp, struct xfs_buf **bpp);
+void xfs_qm_dqunpin_wait(struct xfs_dquot *dqp);
+void xfs_qm_adjust_dqtimers(struct xfs_mount *mp,
+ struct xfs_disk_dquot *d);
+void xfs_qm_adjust_dqlimits(struct xfs_mount *mp,
+ struct xfs_dquot *d);
+xfs_dqid_t xfs_qm_id_for_quotatype(struct xfs_inode *ip, uint type);
+int xfs_qm_dqget(struct xfs_mount *mp, xfs_dqid_t id,
uint type, bool can_alloc,
struct xfs_dquot **dqpp);
-extern int xfs_qm_dqget_inode(struct xfs_inode *ip, uint type,
- bool can_alloc,
- struct xfs_dquot **dqpp);
-extern int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id,
+int xfs_qm_dqget_inode(struct xfs_inode *ip, uint type,
+ bool can_alloc,
+ struct xfs_dquot **dqpp);
+int xfs_qm_dqget_next(struct xfs_mount *mp, xfs_dqid_t id,
uint type, struct xfs_dquot **dqpp);
-extern int xfs_qm_dqget_uncached(struct xfs_mount *mp,
- xfs_dqid_t id, uint type,
- struct xfs_dquot **dqpp);
-extern void xfs_qm_dqput(xfs_dquot_t *);
+int xfs_qm_dqget_uncached(struct xfs_mount *mp,
+ xfs_dqid_t id, uint type,
+ struct xfs_dquot **dqpp);
+void xfs_qm_dqput(struct xfs_dquot *dqp);
-extern void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
+void xfs_dqlock2(struct xfs_dquot *, struct xfs_dquot *);
-extern void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
+void xfs_dquot_set_prealloc_limits(struct xfs_dquot *);
static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)
{
diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c
index 282ec5af293e..d60647d7197b 100644
--- a/fs/xfs/xfs_dquot_item.c
+++ b/fs/xfs/xfs_dquot_item.c
@@ -347,7 +347,7 @@ xfs_qm_qoff_logitem_init(
{
struct xfs_qoff_logitem *qf;
- qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), KM_SLEEP);
+ qf = kmem_zalloc(sizeof(struct xfs_qoff_logitem), 0);
xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?
&xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);
diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h
index 1aed34ccdabc..3bb19e556ade 100644
--- a/fs/xfs/xfs_dquot_item.h
+++ b/fs/xfs/xfs_dquot_item.h
@@ -11,25 +11,27 @@ struct xfs_trans;
struct xfs_mount;
struct xfs_qoff_logitem;
-typedef struct xfs_dq_logitem {
- struct xfs_log_item qli_item; /* common portion */
- struct xfs_dquot *qli_dquot; /* dquot ptr */
- xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
-} xfs_dq_logitem_t;
+struct xfs_dq_logitem {
+ struct xfs_log_item qli_item; /* common portion */
+ struct xfs_dquot *qli_dquot; /* dquot ptr */
+ xfs_lsn_t qli_flush_lsn; /* lsn at last flush */
+};
-typedef struct xfs_qoff_logitem {
- struct xfs_log_item qql_item; /* common portion */
- struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
+struct xfs_qoff_logitem {
+ struct xfs_log_item qql_item; /* common portion */
+ struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */
unsigned int qql_flags;
-} xfs_qoff_logitem_t;
+};
-extern void xfs_qm_dquot_logitem_init(struct xfs_dquot *);
-extern xfs_qoff_logitem_t *xfs_qm_qoff_logitem_init(struct xfs_mount *,
- struct xfs_qoff_logitem *, uint);
-extern xfs_qoff_logitem_t *xfs_trans_get_qoff_item(struct xfs_trans *,
- struct xfs_qoff_logitem *, uint);
-extern void xfs_trans_log_quotaoff_item(struct xfs_trans *,
- struct xfs_qoff_logitem *);
+void xfs_qm_dquot_logitem_init(struct xfs_dquot *dqp);
+struct xfs_qoff_logitem *xfs_qm_qoff_logitem_init(struct xfs_mount *mp,
+ struct xfs_qoff_logitem *start,
+ uint flags);
+struct xfs_qoff_logitem *xfs_trans_get_qoff_item(struct xfs_trans *tp,
+ struct xfs_qoff_logitem *startqoff,
+ uint flags);
+void xfs_trans_log_quotaoff_item(struct xfs_trans *tp,
+ struct xfs_qoff_logitem *qlp);
#endif /* __XFS_DQUOT_ITEM_H__ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 544c9482a0ef..331765afc53e 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -213,7 +213,7 @@ xfs_errortag_init(
struct xfs_mount *mp)
{
mp->m_errortag = kmem_zalloc(sizeof(unsigned int) * XFS_ERRTAG_MAX,
- KM_SLEEP | KM_MAYFAIL);
+ KM_MAYFAIL);
if (!mp->m_errortag)
return -ENOMEM;
@@ -257,7 +257,7 @@ xfs_errortag_test(
xfs_warn_ratelimited(mp,
"Injecting error (%s) at file %s, line %d, on filesystem \"%s\"",
- expression, file, line, mp->m_fsname);
+ expression, file, line, mp->m_super->s_id);
return true;
}
@@ -329,19 +329,40 @@ xfs_corruption_error(
const char *tag,
int level,
struct xfs_mount *mp,
- void *buf,
+ const void *buf,
size_t bufsize,
const char *filename,
int linenum,
xfs_failaddr_t failaddr)
{
- if (level <= xfs_error_level)
+ if (buf && level <= xfs_error_level)
xfs_hex_dump(buf, bufsize);
xfs_error_report(tag, level, mp, filename, linenum, failaddr);
xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");
}
/*
+ * Complain about the kinds of metadata corruption that we can't detect from a
+ * verifier, such as incorrect inter-block relationship data. Does not set
+ * bp->b_error.
+ */
+void
+xfs_buf_corruption_error(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_mount;
+
+ xfs_alert_tag(mp, XFS_PTAG_VERIFIER_ERROR,
+ "Metadata corruption detected at %pS, %s block 0x%llx",
+ __return_address, bp->b_ops->name, bp->b_bn);
+
+ xfs_alert(mp, "Unmount and run xfs_repair");
+
+ if (xfs_error_level >= XFS_ERRLEVEL_HIGH)
+ xfs_stack_trace();
+}
+
+/*
* Warnings specifically for verifier errors. Differentiate CRC vs. invalid
* values, and omit the stack trace unless the error level is tuned high.
*/
@@ -350,7 +371,7 @@ xfs_buf_verifier_error(
struct xfs_buf *bp,
int error,
const char *name,
- void *buf,
+ const void *buf,
size_t bufsz,
xfs_failaddr_t failaddr)
{
@@ -402,7 +423,7 @@ xfs_inode_verifier_error(
struct xfs_inode *ip,
int error,
const char *name,
- void *buf,
+ const void *buf,
size_t bufsz,
xfs_failaddr_t failaddr)
{
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 602aa7d62b66..31a5d321ba9a 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -12,16 +12,17 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,
const char *filename, int linenum,
xfs_failaddr_t failaddr);
extern void xfs_corruption_error(const char *tag, int level,
- struct xfs_mount *mp, void *buf, size_t bufsize,
+ struct xfs_mount *mp, const void *buf, size_t bufsize,
const char *filename, int linenum,
xfs_failaddr_t failaddr);
+void xfs_buf_corruption_error(struct xfs_buf *bp);
extern void xfs_buf_verifier_error(struct xfs_buf *bp, int error,
- const char *name, void *buf, size_t bufsz,
+ const char *name, const void *buf, size_t bufsz,
xfs_failaddr_t failaddr);
extern void xfs_verifier_error(struct xfs_buf *bp, int error,
xfs_failaddr_t failaddr);
extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
- const char *name, void *buf, size_t bufsz,
+ const char *name, const void *buf, size_t bufsz,
xfs_failaddr_t failaddr);
#define XFS_ERROR_REPORT(e, lvl, mp) \
@@ -37,32 +38,6 @@ extern void xfs_inode_verifier_error(struct xfs_inode *ip, int error,
/* Dump 128 bytes of any corrupt buffer */
#define XFS_CORRUPTION_DUMP_LEN (128)
-/*
- * Macros to set EFSCORRUPTED & return/branch.
- */
-#define XFS_WANT_CORRUPTED_GOTO(mp, x, l) \
- { \
- int fs_is_ok = (x); \
- ASSERT(fs_is_ok); \
- if (unlikely(!fs_is_ok)) { \
- XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_GOTO", \
- XFS_ERRLEVEL_LOW, mp); \
- error = -EFSCORRUPTED; \
- goto l; \
- } \
- }
-
-#define XFS_WANT_CORRUPTED_RETURN(mp, x) \
- { \
- int fs_is_ok = (x); \
- ASSERT(fs_is_ok); \
- if (unlikely(!fs_is_ok)) { \
- XFS_ERROR_REPORT("XFS_WANT_CORRUPTED_RETURN", \
- XFS_ERRLEVEL_LOW, mp); \
- return -EFSCORRUPTED; \
- } \
- }
-
#ifdef DEBUG
extern int xfs_errortag_init(struct xfs_mount *mp);
extern void xfs_errortag_del(struct xfs_mount *mp);
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index 0ed68379e551..3991e59cfd18 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -33,7 +33,7 @@ xfs_extent_busy_insert(
struct rb_node **rbp;
struct rb_node *parent = NULL;
- new = kmem_zalloc(sizeof(struct xfs_extent_busy), KM_SLEEP);
+ new = kmem_zalloc(sizeof(struct xfs_extent_busy), 0);
new->agno = agno;
new->bno = bno;
new->length = len;
@@ -367,7 +367,7 @@ restart:
* If this is a metadata allocation, try to reuse the busy
* extent instead of trimming the allocation.
*/
- if (!xfs_alloc_is_userdata(args->datatype) &&
+ if (!(args->datatype & XFS_ALLOC_USERDATA) &&
!(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
if (!xfs_extent_busy_update_extent(args->mp, args->pag,
busyp, fbno, flen,
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 86f6512d6864..6ea847f6e298 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -21,7 +21,7 @@
#include "xfs_alloc.h"
#include "xfs_bmap.h"
#include "xfs_trace.h"
-
+#include "xfs_error.h"
kmem_zone_t *xfs_efi_zone;
kmem_zone_t *xfs_efd_zone;
@@ -39,7 +39,7 @@ xfs_efi_item_free(
if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS)
kmem_free(efip);
else
- kmem_zone_free(xfs_efi_zone, efip);
+ kmem_cache_free(xfs_efi_zone, efip);
}
/*
@@ -163,9 +163,9 @@ xfs_efi_init(
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
size = (uint)(sizeof(xfs_efi_log_item_t) +
((nextents - 1) * sizeof(xfs_extent_t)));
- efip = kmem_zalloc(size, KM_SLEEP);
+ efip = kmem_zalloc(size, 0);
} else {
- efip = kmem_zone_zalloc(xfs_efi_zone, KM_SLEEP);
+ efip = kmem_zone_zalloc(xfs_efi_zone, 0);
}
xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
@@ -228,6 +228,7 @@ xfs_efi_copy_format(xfs_log_iovec_t *buf, xfs_efi_log_format_t *dst_efi_fmt)
}
return 0;
}
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
}
@@ -243,7 +244,7 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp)
if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS)
kmem_free(efdp);
else
- kmem_zone_free(xfs_efd_zone, efdp);
+ kmem_cache_free(xfs_efd_zone, efdp);
}
/*
@@ -333,9 +334,9 @@ xfs_trans_get_efd(
if (nextents > XFS_EFD_MAX_FAST_EXTENTS) {
efdp = kmem_zalloc(sizeof(struct xfs_efd_log_item) +
(nextents - 1) * sizeof(struct xfs_extent),
- KM_SLEEP);
+ 0);
} else {
- efdp = kmem_zone_zalloc(xfs_efd_zone, KM_SLEEP);
+ efdp = kmem_zone_zalloc(xfs_efd_zone, 0);
}
xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
@@ -624,7 +625,7 @@ xfs_efi_recover(
*/
set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
xfs_efi_release(efip);
- return -EIO;
+ return -EFSCORRUPTED;
}
}
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 28101bbc0b78..b8a4a3f29b36 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -28,6 +28,7 @@
#include <linux/falloc.h>
#include <linux/backing-dev.h>
#include <linux/mman.h>
+#include <linux/fadvise.h>
static const struct vm_operations_struct xfs_file_vm_ops;
@@ -186,8 +187,14 @@ xfs_file_dio_aio_read(
file_accessed(iocb->ki_filp);
- xfs_ilock(ip, XFS_IOLOCK_SHARED);
- ret = iomap_dio_rw(iocb, to, &xfs_iomap_ops, NULL);
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED))
+ return -EAGAIN;
+ } else {
+ xfs_ilock(ip, XFS_IOLOCK_SHARED);
+ }
+ ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL,
+ is_sync_kiocb(iocb));
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
return ret;
@@ -214,7 +221,7 @@ xfs_file_dax_read(
xfs_ilock(ip, XFS_IOLOCK_SHARED);
}
- ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops);
+ ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
file_accessed(iocb->ki_filp);
@@ -350,7 +357,7 @@ restart:
trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize);
error = iomap_zero_range(inode, isize, iocb->ki_pos - isize,
- NULL, &xfs_iomap_ops);
+ NULL, &xfs_buffered_write_iomap_ops);
if (error)
return error;
} else
@@ -369,21 +376,23 @@ static int
xfs_dio_write_end_io(
struct kiocb *iocb,
ssize_t size,
+ int error,
unsigned flags)
{
struct inode *inode = file_inode(iocb->ki_filp);
struct xfs_inode *ip = XFS_I(inode);
loff_t offset = iocb->ki_pos;
unsigned int nofs_flag;
- int error = 0;
trace_xfs_end_io_direct_write(ip, offset, size);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -EIO;
- if (size <= 0)
- return size;
+ if (error)
+ return error;
+ if (!size)
+ return 0;
/*
* Capture amount written on completion as we can't reliably account
@@ -440,6 +449,10 @@ out:
return error;
}
+static const struct iomap_dio_ops xfs_dio_write_ops = {
+ .end_io = xfs_dio_write_end_io,
+};
+
/*
* xfs_file_dio_aio_write - handle direct IO writes
*
@@ -479,8 +492,7 @@ xfs_file_dio_aio_write(
int unaligned_io = 0;
int iolock;
size_t count = iov_iter_count(from);
- struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
/* DIO must be aligned to device logical sector size */
if ((iocb->ki_pos | count) & target->bt_logical_sectormask)
@@ -540,15 +552,13 @@ xfs_file_dio_aio_write(
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos);
- ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io);
-
/*
- * If unaligned, this is the only IO in-flight. If it has not yet
- * completed, wait on it before we release the iolock to prevent
- * subsequent overlapping IO.
+ * If unaligned, this is the only IO in-flight. Wait on it before we
+ * release the iolock to prevent subsequent overlapping IO.
*/
- if (ret == -EIOCBQUEUED && unaligned_io)
- inode_dio_wait(inode);
+ ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops,
+ &xfs_dio_write_ops,
+ is_sync_kiocb(iocb) || unaligned_io);
out:
xfs_iunlock(ip, iolock);
@@ -587,7 +597,7 @@ xfs_file_dax_write(
count = iov_iter_count(from);
trace_xfs_file_dax_write(ip, count, pos);
- ret = dax_iomap_rw(iocb, from, &xfs_iomap_ops);
+ ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops);
if (ret > 0 && iocb->ki_pos > i_size_read(inode)) {
i_size_write(inode, iocb->ki_pos);
error = xfs_setfilesize(ip, pos, ret);
@@ -634,7 +644,8 @@ write_retry:
current->backing_dev_info = inode_to_bdi(inode);
trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos);
- ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops);
+ ret = iomap_file_buffered_write(iocb, from,
+ &xfs_buffered_write_iomap_ops);
if (likely(ret >= 0))
iocb->ki_pos += ret;
@@ -811,6 +822,36 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
+ /*
+ * Must wait for all AIO to complete before we continue as AIO can
+ * change the file size on completion without holding any locks we
+ * currently hold. We must do this first because AIO can update both
+ * the on disk and in memory inode sizes, and the operations that follow
+ * require the in-memory size to be fully up-to-date.
+ */
+ inode_dio_wait(inode);
+
+ /*
+ * Now AIO and DIO has drained we flush and (if necessary) invalidate
+ * the cached range over the first operation we are about to run.
+ *
+ * We care about zero and collapse here because they both run a hole
+ * punch over the range first. Because that can zero data, and the range
+ * of invalidation for the shift operations is much larger, we still do
+ * the required flush for collapse in xfs_prepare_shift().
+ *
+ * Insert has the same range requirements as collapse, and we extend the
+ * file first which can zero data. Hence insert has the same
+ * flush/invalidate requirements as collapse and so they are both
+ * handled at the right time by xfs_prepare_shift().
+ */
+ if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE |
+ FALLOC_FL_COLLAPSE_RANGE)) {
+ error = xfs_flush_unmap_range(ip, offset, len);
+ if (error)
+ goto out_unlock;
+ }
+
if (mode & FALLOC_FL_PUNCH_HOLE) {
error = xfs_free_file_space(ip, offset, len);
if (error)
@@ -874,16 +915,30 @@ xfs_file_fallocate(
}
if (mode & FALLOC_FL_ZERO_RANGE) {
- error = xfs_zero_file_space(ip, offset, len);
+ /*
+ * Punch a hole and prealloc the range. We use a hole
+ * punch rather than unwritten extent conversion for two
+ * reasons:
+ *
+ * 1.) Hole punch handles partial block zeroing for us.
+ * 2.) If prealloc returns ENOSPC, the file range is
+ * still zero-valued by virtue of the hole punch.
+ */
+ unsigned int blksize = i_blocksize(inode);
+
+ trace_xfs_zero_file_space(ip);
+
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+ goto out_unlock;
+
+ len = round_up(offset + len, blksize) -
+ round_down(offset, blksize);
+ offset = round_down(offset, blksize);
} else if (mode & FALLOC_FL_UNSHARE_RANGE) {
error = xfs_reflink_unshare(ip, offset, len);
if (error)
goto out_unlock;
-
- if (!xfs_is_always_cow_inode(ip)) {
- error = xfs_alloc_file_space(ip, offset, len,
- XFS_BMAPI_PREALLOC);
- }
} else {
/*
* If always_cow mode we can't use preallocations and
@@ -893,12 +948,14 @@ xfs_file_fallocate(
error = -EOPNOTSUPP;
goto out_unlock;
}
+ }
+ if (!xfs_is_always_cow_inode(ip)) {
error = xfs_alloc_file_space(ip, offset, len,
XFS_BMAPI_PREALLOC);
+ if (error)
+ goto out_unlock;
}
- if (error)
- goto out_unlock;
}
if (file->f_flags & O_DSYNC)
@@ -933,6 +990,30 @@ out_unlock:
return error;
}
+STATIC int
+xfs_file_fadvise(
+ struct file *file,
+ loff_t start,
+ loff_t end,
+ int advice)
+{
+ struct xfs_inode *ip = XFS_I(file_inode(file));
+ int ret;
+ int lockflags = 0;
+
+ /*
+ * Operations creating pages in page cache need protection from hole
+ * punching and similar ops
+ */
+ if (advice == POSIX_FADV_WILLNEED) {
+ lockflags = XFS_IOLOCK_SHARED;
+ xfs_ilock(ip, lockflags);
+ }
+ ret = generic_fadvise(file, start, end, advice);
+ if (lockflags)
+ xfs_iunlock(ip, lockflags);
+ return ret;
+}
STATIC loff_t
xfs_file_remap_range(
@@ -1028,7 +1109,7 @@ xfs_dir_open(
*/
mode = xfs_ilock_data_map_shared(ip);
if (ip->i_d.di_nextents > 0)
- error = xfs_dir3_data_readahead(ip, 0, -1);
+ error = xfs_dir3_data_readahead(ip, 0, 0);
xfs_iunlock(ip, mode);
return error;
}
@@ -1125,12 +1206,16 @@ __xfs_filemap_fault(
if (IS_DAX(inode)) {
pfn_t pfn;
- ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, &xfs_iomap_ops);
+ ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL,
+ (write_fault && !vmf->cow_page) ?
+ &xfs_direct_write_iomap_ops :
+ &xfs_read_iomap_ops);
if (ret & VM_FAULT_NEEDDSYNC)
ret = dax_finish_sync_fault(vmf, pe_size, pfn);
} else {
if (write_fault)
- ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops);
+ ret = iomap_page_mkwrite(vmf,
+ &xfs_buffered_write_iomap_ops);
else
ret = filemap_fault(vmf);
}
@@ -1194,22 +1279,22 @@ static const struct vm_operations_struct xfs_file_vm_ops = {
STATIC int
xfs_file_mmap(
- struct file *filp,
- struct vm_area_struct *vma)
+ struct file *file,
+ struct vm_area_struct *vma)
{
- struct dax_device *dax_dev;
+ struct inode *inode = file_inode(file);
+ struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode));
- dax_dev = xfs_find_daxdev_for_inode(file_inode(filp));
/*
* We don't support synchronous mappings for non-DAX files and
* for DAX files if underneath dax_device is not synchronous.
*/
- if (!daxdev_mapping_supported(vma, dax_dev))
+ if (!daxdev_mapping_supported(vma, target->bt_daxdev))
return -EOPNOTSUPP;
- file_accessed(filp);
+ file_accessed(file);
vma->vm_ops = &xfs_file_vm_ops;
- if (IS_DAX(file_inode(filp)))
+ if (IS_DAX(inode))
vma->vm_flags |= VM_HUGEPAGE;
return 0;
}
@@ -1232,6 +1317,7 @@ const struct file_operations xfs_file_operations = {
.fsync = xfs_file_fsync,
.get_unmapped_area = thp_get_unmapped_area,
.fallocate = xfs_file_fallocate,
+ .fadvise = xfs_file_fadvise,
.remap_file_range = xfs_file_remap_range,
};
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 574a7a8b4736..1a88025e68a3 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -18,6 +18,7 @@
#include "xfs_trace.h"
#include "xfs_ag_resv.h"
#include "xfs_trans.h"
+#include "xfs_filestream.h"
struct xfs_fstrm_item {
struct xfs_mru_cache_elem mru;
@@ -158,16 +159,15 @@ xfs_filestream_pick_ag(
if (!pag->pagf_init) {
err = xfs_alloc_pagf_init(mp, NULL, ag, trylock);
- if (err && !trylock) {
+ if (err) {
xfs_perag_put(pag);
- return err;
+ if (err != -EAGAIN)
+ return err;
+ /* Couldn't lock the AGF, skip this AG. */
+ continue;
}
}
- /* Might fail sometimes during the 1st pass with trylock set. */
- if (!pag->pagf_init)
- goto next_ag;
-
/* Keep track of the AG with the most free blocks. */
if (pag->pagf_freeblks > maxfree) {
maxfree = pag->pagf_freeblks;
@@ -374,7 +374,7 @@ xfs_filestream_new_ag(
startag = (item->ag + 1) % mp->m_sb.sb_agcount;
}
- if (xfs_alloc_is_userdata(ap->datatype))
+ if (ap->datatype & XFS_ALLOC_USERDATA)
flags |= XFS_PICK_USERDATA;
if (ap->tp->t_flags & XFS_TRANS_LOWMODE)
flags |= XFS_PICK_LOWSPACE;
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index 5a8f9641562a..918456ca29e1 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -146,6 +146,7 @@ xfs_fsmap_owner_from_rmap(
dest->fmr_owner = XFS_FMR_OWN_FREE;
break;
default:
+ ASSERT(0);
return -EFSCORRUPTED;
}
return 0;
@@ -250,7 +251,7 @@ xfs_getfsmap_helper(
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
- return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ return 0;
}
/* Are we just counting mappings? */
@@ -259,14 +260,14 @@ xfs_getfsmap_helper(
info->head->fmh_entries++;
if (info->last)
- return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ return 0;
info->head->fmh_entries++;
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
- return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ return 0;
}
/*
@@ -276,7 +277,7 @@ xfs_getfsmap_helper(
*/
if (rec_daddr > info->next_daddr) {
if (info->head->fmh_entries >= info->head->fmh_count)
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
fmr.fmr_device = info->dev;
fmr.fmr_physical = info->next_daddr;
@@ -295,7 +296,7 @@ xfs_getfsmap_helper(
/* Fill out the extent we found */
if (info->head->fmh_entries >= info->head->fmh_count)
- return XFS_BTREE_QUERY_RANGE_ABORT;
+ return -ECANCELED;
trace_xfs_fsmap_mapping(mp, info->dev, info->agno, rec);
@@ -328,7 +329,7 @@ out:
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
if (info->next_daddr < rec_daddr)
info->next_daddr = rec_daddr;
- return XFS_BTREE_QUERY_RANGE_CONTINUE;
+ return 0;
}
/* Transform a rmapbt irec into a fsmap */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 0b0fd10a36d4..8dc2e5414276 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -40,11 +40,11 @@ xfs_inode_alloc(
* KM_MAYFAIL and return NULL here on ENOMEM. Set the
* code up to do this anyway.
*/
- ip = kmem_zone_alloc(xfs_inode_zone, KM_SLEEP);
+ ip = kmem_zone_alloc(xfs_inode_zone, 0);
if (!ip)
return NULL;
if (inode_init_always(mp->m_super, VFS_I(ip))) {
- kmem_zone_free(xfs_inode_zone, ip);
+ kmem_cache_free(xfs_inode_zone, ip);
return NULL;
}
@@ -104,7 +104,7 @@ xfs_inode_free_callback(
ip->i_itemp = NULL;
}
- kmem_zone_free(xfs_inode_zone, ip);
+ kmem_cache_free(xfs_inode_zone, ip);
}
static void
@@ -1419,7 +1419,7 @@ xfs_inode_match_id(
return 0;
if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
- xfs_get_projid(ip) != eofb->eof_prid)
+ ip->i_d.di_projid != eofb->eof_prid)
return 0;
return 1;
@@ -1443,7 +1443,7 @@ xfs_inode_match_id_union(
return 1;
if ((eofb->eof_flags & XFS_EOF_FLAGS_PRID) &&
- xfs_get_projid(ip) == eofb->eof_prid)
+ ip->i_d.di_projid == eofb->eof_prid)
return 1;
return 0;
diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c
index d99a0a3e5f40..490fee22b878 100644
--- a/fs/xfs/xfs_icreate_item.c
+++ b/fs/xfs/xfs_icreate_item.c
@@ -55,7 +55,7 @@ STATIC void
xfs_icreate_item_release(
struct xfs_log_item *lip)
{
- kmem_zone_free(xfs_icreate_zone, ICR_ITEM(lip));
+ kmem_cache_free(xfs_icreate_zone, ICR_ITEM(lip));
}
static const struct xfs_item_ops xfs_icreate_item_ops = {
@@ -89,7 +89,7 @@ xfs_icreate_log(
{
struct xfs_icreate_item *icp;
- icp = kmem_zone_zalloc(xfs_icreate_zone, KM_SLEEP);
+ icp = kmem_zone_zalloc(xfs_icreate_zone, 0);
xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE,
&xfs_icreate_item_ops);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6467d5e1df2d..c5077e6326c7 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -55,6 +55,12 @@ xfs_extlen_t
xfs_get_extsz_hint(
struct xfs_inode *ip)
{
+ /*
+ * No point in aligning allocations if we need to COW to actually
+ * write to them.
+ */
+ if (xfs_is_always_cow_inode(ip))
+ return 0;
if ((ip->i_d.di_flags & XFS_DIFLAG_EXTSIZE) && ip->i_d.di_extsize)
return ip->i_d.di_extsize;
if (XFS_IS_REALTIME_INODE(ip))
@@ -809,7 +815,7 @@ xfs_ialloc(
ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
inode->i_rdev = rdev;
- xfs_set_projid(ip, prid);
+ ip->i_d.di_projid = prid;
if (pip && XFS_INHERIT_GID(pip)) {
ip->i_d.di_gid = pip->i_d.di_gid;
@@ -845,8 +851,7 @@ xfs_ialloc(
inode_set_iversion(inode, 1);
ip->i_d.di_flags2 = 0;
ip->i_d.di_cowextsize = 0;
- ip->i_d.di_crtime.t_sec = (int32_t)tv.tv_sec;
- ip->i_d.di_crtime.t_nsec = (int32_t)tv.tv_nsec;
+ ip->i_d.di_crtime = tv;
}
@@ -1418,7 +1423,7 @@ xfs_link(
* the tree quota mechanism could be circumvented.
*/
if (unlikely((tdp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
- (xfs_get_projid(tdp) != xfs_get_projid(sip)))) {
+ tdp->i_d.di_projid != sip->i_d.di_projid)) {
error = -EXDEV;
goto error_return;
}
@@ -1513,10 +1518,8 @@ xfs_itruncate_extents_flags(
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp = *tpp;
xfs_fileoff_t first_unmap_block;
- xfs_fileoff_t last_block;
xfs_filblks_t unmap_len;
int error = 0;
- int done = 0;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
ASSERT(!atomic_read(&VFS_I(ip)->i_count) ||
@@ -1536,21 +1539,22 @@ xfs_itruncate_extents_flags(
* the end of the file (in a crash where the space is allocated
* but the inode size is not yet updated), simply remove any
* blocks which show up between the new EOF and the maximum
- * possible file size. If the first block to be removed is
- * beyond the maximum file size (ie it is the same as last_block),
- * then there is nothing to do.
+ * possible file size.
+ *
+ * We have to free all the blocks to the bmbt maximum offset, even if
+ * the page cache can't scale that far.
*/
first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
- last_block = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
- if (first_unmap_block == last_block)
+ if (first_unmap_block >= XFS_MAX_FILEOFF) {
+ WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
return 0;
+ }
- ASSERT(first_unmap_block < last_block);
- unmap_len = last_block - first_unmap_block + 1;
- while (!done) {
+ unmap_len = XFS_MAX_FILEOFF - first_unmap_block + 1;
+ while (unmap_len > 0) {
ASSERT(tp->t_firstblock == NULLFSBLOCK);
- error = xfs_bunmapi(tp, ip, first_unmap_block, unmap_len, flags,
- XFS_ITRUNC_MAX_EXTENTS, &done);
+ error = __xfs_bunmapi(tp, ip, first_unmap_block, &unmap_len,
+ flags, XFS_ITRUNC_MAX_EXTENTS);
if (error)
goto out;
@@ -1570,7 +1574,7 @@ xfs_itruncate_extents_flags(
if (whichfork == XFS_DATA_FORK) {
/* Remove all pending CoW reservations. */
error = xfs_reflink_cancel_cow_blocks(ip, &tp,
- first_unmap_block, last_block, true);
+ first_unmap_block, XFS_MAX_FILEOFF, true);
if (error)
goto out;
@@ -2018,7 +2022,7 @@ xfs_iunlink_add_backref(
if (XFS_TEST_ERROR(false, pag->pag_mount, XFS_ERRTAG_IUNLINK_FALLBACK))
return 0;
- iu = kmem_zalloc(sizeof(*iu), KM_SLEEP | KM_NOFS);
+ iu = kmem_zalloc(sizeof(*iu), KM_NOFS);
iu->iu_agino = prev_agino;
iu->iu_next_unlinked = this_agino;
@@ -2130,8 +2134,10 @@ xfs_iunlink_update_bucket(
* passed in because either we're adding or removing ourselves from the
* head of the list.
*/
- if (old_value == new_agino)
+ if (old_value == new_agino) {
+ xfs_buf_corruption_error(agibp);
return -EFSCORRUPTED;
+ }
agi->agi_unlinked[bucket_index] = cpu_to_be32(new_agino);
offset = offsetof(struct xfs_agi, agi_unlinked) +
@@ -2194,6 +2200,8 @@ xfs_iunlink_update_inode(
/* Make sure the old pointer isn't garbage. */
old_value = be32_to_cpu(dip->di_next_unlinked);
if (!xfs_verify_agino_or_null(mp, agno, old_value)) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
+ sizeof(*dip), __this_address);
error = -EFSCORRUPTED;
goto out;
}
@@ -2205,8 +2213,11 @@ xfs_iunlink_update_inode(
*/
*old_next_agino = old_value;
if (old_value == next_agino) {
- if (next_agino != NULLAGINO)
+ if (next_agino != NULLAGINO) {
+ xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__,
+ dip, sizeof(*dip), __this_address);
error = -EFSCORRUPTED;
+ }
goto out;
}
@@ -2257,8 +2268,10 @@ xfs_iunlink(
*/
next_agino = be32_to_cpu(agi->agi_unlinked[bucket_index]);
if (next_agino == agino ||
- !xfs_verify_agino_or_null(mp, agno, next_agino))
+ !xfs_verify_agino_or_null(mp, agno, next_agino)) {
+ xfs_buf_corruption_error(agibp);
return -EFSCORRUPTED;
+ }
if (next_agino != NULLAGINO) {
struct xfs_perag *pag;
@@ -2533,6 +2546,7 @@ xfs_ifree_cluster(
struct xfs_perag *pag;
struct xfs_ino_geometry *igeo = M_IGEO(mp);
xfs_ino_t inum;
+ int error;
inum = xic->first_ino;
pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum));
@@ -2561,12 +2575,11 @@ xfs_ifree_cluster(
* complete before we get a lock on it, and hence we may fail
* to mark all the active inodes on the buffer stale.
*/
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
- mp->m_bsize * igeo->blocks_per_cluster,
- XBF_UNMAPPED);
-
- if (!bp)
- return -ENOMEM;
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp, blkno,
+ mp->m_bsize * igeo->blocks_per_cluster,
+ XBF_UNMAPPED, &bp);
+ if (error)
+ return error;
/*
* This buffer may not have been correctly initialised as we
@@ -3196,6 +3209,7 @@ xfs_rename(
struct xfs_trans *tp;
struct xfs_inode *wip = NULL; /* whiteout inode */
struct xfs_inode *inodes[__XFS_SORT_INODES];
+ struct xfs_buf *agibp;
int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp);
bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
@@ -3270,7 +3284,7 @@ xfs_rename(
* tree quota mechanism would be circumvented.
*/
if (unlikely((target_dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
- (xfs_get_projid(target_dp) != xfs_get_projid(src_ip)))) {
+ target_dp->i_d.di_projid != src_ip->i_d.di_projid)) {
error = -EXDEV;
goto out_trans_cancel;
}
@@ -3282,7 +3296,8 @@ xfs_rename(
spaceres);
/*
- * Set up the target.
+ * Check for expected errors before we dirty the transaction
+ * so we can return an error without a transaction abort.
*/
if (target_ip == NULL) {
/*
@@ -3294,6 +3309,45 @@ xfs_rename(
if (error)
goto out_trans_cancel;
}
+ } else {
+ /*
+ * If target exists and it's a directory, check that whether
+ * it can be destroyed.
+ */
+ if (S_ISDIR(VFS_I(target_ip)->i_mode) &&
+ (!xfs_dir_isempty(target_ip) ||
+ (VFS_I(target_ip)->i_nlink > 2))) {
+ error = -EEXIST;
+ goto out_trans_cancel;
+ }
+ }
+
+ /*
+ * Directory entry creation below may acquire the AGF. Remove
+ * the whiteout from the unlinked list first to preserve correct
+ * AGI/AGF locking order. This dirties the transaction so failures
+ * after this point will abort and log recovery will clean up the
+ * mess.
+ *
+ * For whiteouts, we need to bump the link count on the whiteout
+ * inode. After this point, we have a real link, clear the tmpfile
+ * state flag from the inode so it doesn't accidentally get misused
+ * in future.
+ */
+ if (wip) {
+ ASSERT(VFS_I(wip)->i_nlink == 0);
+ error = xfs_iunlink_remove(tp, wip);
+ if (error)
+ goto out_trans_cancel;
+
+ xfs_bumplink(tp, wip);
+ VFS_I(wip)->i_state &= ~I_LINKABLE;
+ }
+
+ /*
+ * Set up the target.
+ */
+ if (target_ip == NULL) {
/*
* If target does not exist and the rename crosses
* directories, adjust the target directory link count
@@ -3312,22 +3366,6 @@ xfs_rename(
}
} else { /* target_ip != NULL */
/*
- * If target exists and it's a directory, check that both
- * target and source are directories and that target can be
- * destroyed, or that neither is a directory.
- */
- if (S_ISDIR(VFS_I(target_ip)->i_mode)) {
- /*
- * Make sure target dir is empty.
- */
- if (!(xfs_dir_isempty(target_ip)) ||
- (VFS_I(target_ip)->i_nlink > 2)) {
- error = -EEXIST;
- goto out_trans_cancel;
- }
- }
-
- /*
* Link the source inode under the target name.
* If the source inode is a directory and we are moving
* it across directories, its ".." entry will be
@@ -3336,6 +3374,22 @@ xfs_rename(
* In case there is already an entry with the same
* name at the destination directory, remove it first.
*/
+
+ /*
+ * Check whether the replace operation will need to allocate
+ * blocks. This happens when the shortform directory lacks
+ * space and we have to convert it to a block format directory.
+ * When more blocks are necessary, we must lock the AGI first
+ * to preserve locking order (AGI -> AGF).
+ */
+ if (xfs_dir2_sf_replace_needblock(target_dp, src_ip->i_ino)) {
+ error = xfs_read_agi(mp, tp,
+ XFS_INO_TO_AGNO(mp, target_ip->i_ino),
+ &agibp);
+ if (error)
+ goto out_trans_cancel;
+ }
+
error = xfs_dir_replace(tp, target_dp, target_name,
src_ip->i_ino, spaceres);
if (error)
@@ -3417,30 +3471,6 @@ xfs_rename(
if (error)
goto out_trans_cancel;
- /*
- * For whiteouts, we need to bump the link count on the whiteout inode.
- * This means that failures all the way up to this point leave the inode
- * on the unlinked list and so cleanup is a simple matter of dropping
- * the remaining reference to it. If we fail here after bumping the link
- * count, we're shutting down the filesystem so we'll never see the
- * intermediate state on disk.
- */
- if (wip) {
- ASSERT(VFS_I(wip)->i_nlink == 0);
- xfs_bumplink(tp, wip);
- error = xfs_iunlink_remove(tp, wip);
- if (error)
- goto out_trans_cancel;
- xfs_trans_log_inode(tp, wip, XFS_ILOG_CORE);
-
- /*
- * Now we have a real link, clear the "I'm a tmpfile" state
- * flag from the inode so it doesn't accidentally get misused in
- * future.
- */
- VFS_I(wip)->i_state &= ~I_LINKABLE;
- }
-
xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
if (new_parent)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 558173f95a03..492e53992fa9 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -37,9 +37,6 @@ typedef struct xfs_inode {
struct xfs_ifork *i_cowfp; /* copy on write extents */
struct xfs_ifork i_df; /* data fork */
- /* operations vectors */
- const struct xfs_dir_ops *d_ops; /* directory ops vector */
-
/* Transaction and locking information. */
struct xfs_inode_log_item *i_itemp; /* logging information */
mrlock_t i_lock; /* inode lock */
@@ -177,30 +174,11 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned short flags)
return ret;
}
-/*
- * Project quota id helpers (previously projid was 16bit only
- * and using two 16bit values to hold new 32bit projid was chosen
- * to retain compatibility with "old" filesystems).
- */
-static inline prid_t
-xfs_get_projid(struct xfs_inode *ip)
-{
- return (prid_t)ip->i_d.di_projid_hi << 16 | ip->i_d.di_projid_lo;
-}
-
-static inline void
-xfs_set_projid(struct xfs_inode *ip,
- prid_t projid)
-{
- ip->i_d.di_projid_hi = (uint16_t) (projid >> 16);
- ip->i_d.di_projid_lo = (uint16_t) (projid & 0xffff);
-}
-
static inline prid_t
xfs_get_initial_prid(struct xfs_inode *dp)
{
if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT)
- return xfs_get_projid(dp);
+ return dp->i_d.di_projid;
return XFS_PROJID_DEFAULT;
}
@@ -220,6 +198,13 @@ static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
}
/*
+ * Return the buftarg used for data allocations on a given inode.
+ */
+#define xfs_inode_buftarg(ip) \
+ (XFS_IS_REALTIME_INODE(ip) ? \
+ (ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
+
+/*
* In-core inode flags.
*/
#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c9a502eed204..8bd5d0de6321 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -17,6 +17,7 @@
#include "xfs_trans_priv.h"
#include "xfs_buf_item.h"
#include "xfs_log.h"
+#include "xfs_error.h"
#include <linux/iversion.h>
@@ -309,8 +310,8 @@ xfs_inode_to_log_dinode(
to->di_format = from->di_format;
to->di_uid = from->di_uid;
to->di_gid = from->di_gid;
- to->di_projid_lo = from->di_projid_lo;
- to->di_projid_hi = from->di_projid_hi;
+ to->di_projid_lo = from->di_projid & 0xffff;
+ to->di_projid_hi = from->di_projid >> 16;
memset(to->di_pad, 0, sizeof(to->di_pad));
memset(to->di_pad3, 0, sizeof(to->di_pad3));
@@ -340,8 +341,8 @@ xfs_inode_to_log_dinode(
if (from->di_version == 3) {
to->di_changecount = inode_peek_iversion(inode);
- to->di_crtime.t_sec = from->di_crtime.t_sec;
- to->di_crtime.t_nsec = from->di_crtime.t_nsec;
+ to->di_crtime.t_sec = from->di_crtime.tv_sec;
+ to->di_crtime.t_nsec = from->di_crtime.tv_nsec;
to->di_flags2 = from->di_flags2;
to->di_cowextsize = from->di_cowextsize;
to->di_ino = ip->i_ino;
@@ -651,7 +652,7 @@ xfs_inode_item_init(
struct xfs_inode_log_item *iip;
ASSERT(ip->i_itemp == NULL);
- iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, KM_SLEEP);
+ iip = ip->i_itemp = kmem_zone_zalloc(xfs_ili_zone, 0);
iip->ili_inode = ip;
xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,
@@ -666,7 +667,7 @@ xfs_inode_item_destroy(
xfs_inode_t *ip)
{
kmem_free(ip->i_itemp->ili_item.li_lv_shadow);
- kmem_zone_free(xfs_ili_zone, ip->i_itemp);
+ kmem_cache_free(xfs_ili_zone, ip->i_itemp);
}
@@ -828,8 +829,10 @@ xfs_inode_item_format_convert(
{
struct xfs_inode_log_format_32 *in_f32 = buf->i_addr;
- if (buf->i_len != sizeof(*in_f32))
+ if (buf->i_len != sizeof(*in_f32)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
+ }
in_f->ilf_type = in_f32->ilf_type;
in_f->ilf_size = in_f32->ilf_size;
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 6f7848cd5527..d42de92cb283 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -33,6 +33,8 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_health.h"
+#include "xfs_reflink.h"
+#include "xfs_ioctl.h"
#include <linux/mount.h>
#include <linux/namei.h>
@@ -67,7 +69,7 @@ xfs_find_handle(
return -EBADF;
inode = file_inode(f.file);
} else {
- error = user_lpath((const char __user *)hreq->path, &path);
+ error = user_path_at(AT_FDCWD, hreq->path, 0, &path);
if (error)
return error;
inode = d_inode(path.dentry);
@@ -290,82 +292,6 @@ xfs_readlink_by_handle(
return error;
}
-int
-xfs_set_dmattrs(
- xfs_inode_t *ip,
- uint evmask,
- uint16_t state)
-{
- xfs_mount_t *mp = ip->i_mount;
- xfs_trans_t *tp;
- int error;
-
- if (!capable(CAP_SYS_ADMIN))
- return -EPERM;
-
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
- if (error)
- return error;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
-
- ip->i_d.di_dmevmask = evmask;
- ip->i_d.di_dmstate = state;
-
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_trans_commit(tp);
-
- return error;
-}
-
-STATIC int
-xfs_fssetdm_by_handle(
- struct file *parfilp,
- void __user *arg)
-{
- int error;
- struct fsdmidata fsd;
- xfs_fsop_setdm_handlereq_t dmhreq;
- struct dentry *dentry;
-
- if (!capable(CAP_MKNOD))
- return -EPERM;
- if (copy_from_user(&dmhreq, arg, sizeof(xfs_fsop_setdm_handlereq_t)))
- return -EFAULT;
-
- error = mnt_want_write_file(parfilp);
- if (error)
- return error;
-
- dentry = xfs_handlereq_to_dentry(parfilp, &dmhreq.hreq);
- if (IS_ERR(dentry)) {
- mnt_drop_write_file(parfilp);
- return PTR_ERR(dentry);
- }
-
- if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) {
- error = -EPERM;
- goto out;
- }
-
- if (copy_from_user(&fsd, dmhreq.data, sizeof(fsd))) {
- error = -EFAULT;
- goto out;
- }
-
- error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask,
- fsd.fsd_dmstate);
-
- out:
- mnt_drop_write_file(parfilp);
- dput(dentry);
- return error;
-}
-
STATIC int
xfs_attrlist_by_handle(
struct file *parfilp,
@@ -396,7 +322,7 @@ xfs_attrlist_by_handle(
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP);
+ kbuf = kmem_zalloc_large(al_hreq.buflen, 0);
if (!kbuf)
goto out_dput;
@@ -431,14 +357,17 @@ xfs_attrmulti_attr_get(
{
unsigned char *kbuf;
int error = -EFAULT;
+ size_t namelen;
if (*len > XFS_XATTR_SIZE_MAX)
return -EINVAL;
- kbuf = kmem_zalloc_large(*len, KM_SLEEP);
+ kbuf = kmem_zalloc_large(*len, 0);
if (!kbuf)
return -ENOMEM;
- error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
+ namelen = strlen(name);
+ error = xfs_attr_get(XFS_I(inode), name, namelen, &kbuf, (int *)len,
+ flags);
if (error)
goto out_kfree;
@@ -460,6 +389,7 @@ xfs_attrmulti_attr_set(
{
unsigned char *kbuf;
int error;
+ size_t namelen;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
@@ -470,7 +400,8 @@ xfs_attrmulti_attr_set(
if (IS_ERR(kbuf))
return PTR_ERR(kbuf);
- error = xfs_attr_set(XFS_I(inode), name, kbuf, len, flags);
+ namelen = strlen(name);
+ error = xfs_attr_set(XFS_I(inode), name, namelen, kbuf, len, flags);
if (!error)
xfs_forget_acl(inode, name, flags);
kfree(kbuf);
@@ -484,10 +415,12 @@ xfs_attrmulti_attr_remove(
uint32_t flags)
{
int error;
+ size_t namelen;
if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
return -EPERM;
- error = xfs_attr_remove(XFS_I(inode), name, flags);
+ namelen = strlen(name);
+ error = xfs_attr_remove(XFS_I(inode), name, namelen, flags);
if (!error)
xfs_forget_acl(inode, name, flags);
return error;
@@ -536,6 +469,13 @@ xfs_attrmulti_by_handle(
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
+ if ((ops[i].am_flags & ATTR_ROOT) &&
+ (ops[i].am_flags & ATTR_SECURE)) {
+ ops[i].am_error = -EINVAL;
+ continue;
+ }
+ ops[i].am_flags &= ~ATTR_KERNEL_FLAGS;
+
ops[i].am_error = strncpy_from_user((char *)attr_name,
ops[i].am_attrname, MAXNAMELEN);
if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN)
@@ -588,13 +528,12 @@ xfs_attrmulti_by_handle(
int
xfs_ioc_space(
struct file *filp,
- unsigned int cmd,
xfs_flock64_t *bf)
{
struct inode *inode = file_inode(filp);
struct xfs_inode *ip = XFS_I(inode);
struct iattr iattr;
- enum xfs_prealloc_flags flags = 0;
+ enum xfs_prealloc_flags flags = XFS_PREALLOC_CLEAR;
uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
int error;
@@ -607,6 +546,9 @@ xfs_ioc_space(
if (!S_ISREG(inode->i_mode))
return -EINVAL;
+ if (xfs_is_always_cow_inode(ip))
+ return -EOPNOTSUPP;
+
if (filp->f_flags & O_DSYNC)
flags |= XFS_PREALLOC_SYNC;
if (filp->f_mode & FMODE_NOCMTIME)
@@ -620,6 +562,7 @@ xfs_ioc_space(
error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP);
if (error)
goto out_unlock;
+ inode_dio_wait(inode);
switch (bf->l_whence) {
case 0: /*SEEK_SET*/
@@ -635,73 +578,21 @@ xfs_ioc_space(
goto out_unlock;
}
- /*
- * length of <= 0 for resv/unresv/zero is invalid. length for
- * alloc/free is ignored completely and we have no idea what userspace
- * might have set it to, so set it to zero to allow range
- * checks to pass.
- */
- switch (cmd) {
- case XFS_IOC_ZERO_RANGE:
- case XFS_IOC_RESVSP:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_UNRESVSP64:
- if (bf->l_len <= 0) {
- error = -EINVAL;
- goto out_unlock;
- }
- break;
- default:
- bf->l_len = 0;
- break;
- }
-
- if (bf->l_start < 0 ||
- bf->l_start > inode->i_sb->s_maxbytes ||
- bf->l_start + bf->l_len < 0 ||
- bf->l_start + bf->l_len >= inode->i_sb->s_maxbytes) {
+ if (bf->l_start < 0 || bf->l_start > inode->i_sb->s_maxbytes) {
error = -EINVAL;
goto out_unlock;
}
- switch (cmd) {
- case XFS_IOC_ZERO_RANGE:
- flags |= XFS_PREALLOC_SET;
- error = xfs_zero_file_space(ip, bf->l_start, bf->l_len);
- break;
- case XFS_IOC_RESVSP:
- case XFS_IOC_RESVSP64:
- flags |= XFS_PREALLOC_SET;
- error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len,
- XFS_BMAPI_PREALLOC);
- break;
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_UNRESVSP64:
- error = xfs_free_file_space(ip, bf->l_start, bf->l_len);
- break;
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP:
- case XFS_IOC_FREESP64:
- flags |= XFS_PREALLOC_CLEAR;
- if (bf->l_start > XFS_ISIZE(ip)) {
- error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
- bf->l_start - XFS_ISIZE(ip), 0);
- if (error)
- goto out_unlock;
- }
-
- iattr.ia_valid = ATTR_SIZE;
- iattr.ia_size = bf->l_start;
-
- error = xfs_vn_setattr_size(file_dentry(filp), &iattr);
- break;
- default:
- ASSERT(0);
- error = -EINVAL;
+ if (bf->l_start > XFS_ISIZE(ip)) {
+ error = xfs_alloc_file_space(ip, XFS_ISIZE(ip),
+ bf->l_start - XFS_ISIZE(ip), 0);
+ if (error)
+ goto out_unlock;
}
+ iattr.ia_valid = ATTR_SIZE;
+ iattr.ia_size = bf->l_start;
+ error = xfs_vn_setattr_size(file_dentry(filp), &iattr);
if (error)
goto out_unlock;
@@ -831,7 +722,7 @@ xfs_bulkstat_fmt(
/*
* Check the incoming bulk request @hdr from userspace and initialize the
* internal @breq bulk request appropriately. Returns 0 if the bulk request
- * should proceed; XFS_ITER_ABORT if there's nothing to do; or the usual
+ * should proceed; -ECANCELED if there's nothing to do; or the usual
* negative error code.
*/
static int
@@ -889,13 +780,13 @@ xfs_bulk_ireq_setup(
/* Asking for an inode past the end of the AG? We're done! */
if (XFS_INO_TO_AGNO(mp, breq->startino) > hdr->agno)
- return XFS_ITER_ABORT;
+ return -ECANCELED;
} else if (hdr->agno)
return -EINVAL;
/* Asking for an inode past the end of the FS? We're done! */
if (XFS_INO_TO_AGNO(mp, breq->startino) >= mp->m_sb.sb_agcount)
- return XFS_ITER_ABORT;
+ return -ECANCELED;
return 0;
}
@@ -936,7 +827,7 @@ xfs_ioc_bulkstat(
return -EFAULT;
error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->bulkstat);
- if (error == XFS_ITER_ABORT)
+ if (error == -ECANCELED)
goto out_teardown;
if (error < 0)
return error;
@@ -986,7 +877,7 @@ xfs_ioc_inumbers(
return -EFAULT;
error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers);
- if (error == XFS_ITER_ABORT)
+ if (error == -ECANCELED)
goto out_teardown;
if (error < 0)
return error;
@@ -1038,6 +929,10 @@ xfs_ioc_ag_geometry(
if (copy_from_user(&ageo, arg, sizeof(ageo)))
return -EFAULT;
+ if (ageo.ag_flags)
+ return -EINVAL;
+ if (memchr_inv(&ageo.ag_reserved, 0, sizeof(ageo.ag_reserved)))
+ return -EINVAL;
error = xfs_ag_get_geometry(mp, ageo.ag_number, &ageo);
if (error)
@@ -1112,7 +1007,7 @@ xfs_fill_fsxattr(
fa->fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
fa->fsx_cowextsize = ip->i_d.di_cowextsize <<
ip->i_mount->m_sb.sb_blocklog;
- fa->fsx_projid = xfs_get_projid(ip);
+ fa->fsx_projid = ip->i_d.di_projid;
if (attr) {
if (ip->i_afp) {
@@ -1307,11 +1202,9 @@ xfs_ioctl_setattr_dax_invalidate(
* have to check the device for dax support or flush pagecache.
*/
if (fa->fsx_xflags & FS_XFLAG_DAX) {
- if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
- return -EINVAL;
- if (S_ISREG(inode->i_mode) &&
- !bdev_dax_supported(xfs_find_bdev_for_inode(VFS_I(ip)),
- sb->s_blocksize))
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+
+ if (!bdev_dax_supported(target->bt_bdev, sb->s_blocksize))
return -EINVAL;
}
@@ -1566,7 +1459,7 @@ xfs_ioctl_setattr(
}
if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp) &&
- xfs_get_projid(ip) != fa->fsx_projid) {
+ ip->i_d.di_projid != fa->fsx_projid) {
code = xfs_qm_vop_chown_reserve(tp, ip, udqp, NULL, pdqp,
capable(CAP_FOWNER) ? XFS_QMOPT_FORCE_RES : 0);
if (code) /* out of quota */
@@ -1603,13 +1496,13 @@ xfs_ioctl_setattr(
VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID);
/* Change the ownerships and register project quota modifications */
- if (xfs_get_projid(ip) != fa->fsx_projid) {
+ if (ip->i_d.di_projid != fa->fsx_projid) {
if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
olddquot = xfs_qm_vop_chown(tp, ip,
&ip->i_pdquot, pdqp);
}
ASSERT(ip->i_d.di_version > 1);
- xfs_set_projid(ip, fa->fsx_projid);
+ ip->i_d.di_projid = fa->fsx_projid;
}
/*
@@ -1881,7 +1774,7 @@ xfs_ioc_getfsmap(
info.mp = ip->i_mount;
info.data = arg;
error = xfs_getfsmap(ip->i_mount, &xhead, xfs_getfsmap_format, &info);
- if (error == XFS_BTREE_QUERY_RANGE_ABORT) {
+ if (error == -ECANCELED) {
error = 0;
aborted = true;
} else if (error)
@@ -2119,24 +2012,17 @@ xfs_file_ioctl(
return xfs_ioc_setlabel(filp, mp, arg);
case XFS_IOC_ALLOCSP:
case XFS_IOC_FREESP:
- case XFS_IOC_RESVSP:
- case XFS_IOC_UNRESVSP:
case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64:
- case XFS_IOC_ZERO_RANGE: {
+ case XFS_IOC_FREESP64: {
xfs_flock64_t bf;
if (copy_from_user(&bf, arg, sizeof(bf)))
return -EFAULT;
- return xfs_ioc_space(filp, cmd, &bf);
+ return xfs_ioc_space(filp, &bf);
}
case XFS_IOC_DIOINFO: {
- struct dioattr da;
- xfs_buftarg_t *target =
- XFS_IS_REALTIME_INODE(ip) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+ struct dioattr da;
da.d_mem = da.d_miniosz = target->bt_logical_sectorsize;
da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);
@@ -2180,22 +2066,6 @@ xfs_file_ioctl(
case XFS_IOC_SETXFLAGS:
return xfs_ioc_setxflags(ip, filp, arg);
- case XFS_IOC_FSSETDM: {
- struct fsdmidata dmi;
-
- if (copy_from_user(&dmi, arg, sizeof(dmi)))
- return -EFAULT;
-
- error = mnt_want_write_file(filp);
- if (error)
- return error;
-
- error = xfs_set_dmattrs(ip, dmi.fsd_dmevmask,
- dmi.fsd_dmstate);
- mnt_drop_write_file(filp);
- return error;
- }
-
case XFS_IOC_GETBMAP:
case XFS_IOC_GETBMAPA:
case XFS_IOC_GETBMAPX:
@@ -2223,8 +2093,6 @@ xfs_file_ioctl(
return -EFAULT;
return xfs_open_by_handle(filp, &hreq);
}
- case XFS_IOC_FSSETDM_BY_HANDLE:
- return xfs_fssetdm_by_handle(filp, arg);
case XFS_IOC_READLINK_BY_HANDLE: {
xfs_fsop_handlereq_t hreq;
diff --git a/fs/xfs/xfs_ioctl.h b/fs/xfs/xfs_ioctl.h
index 654c0bb1bcf8..420bd95dc326 100644
--- a/fs/xfs/xfs_ioctl.h
+++ b/fs/xfs/xfs_ioctl.h
@@ -9,7 +9,6 @@
extern int
xfs_ioc_space(
struct file *filp,
- unsigned int cmd,
xfs_flock64_t *bf);
int
@@ -71,12 +70,6 @@ xfs_file_compat_ioctl(
unsigned int cmd,
unsigned long arg);
-extern int
-xfs_set_dmattrs(
- struct xfs_inode *ip,
- uint evmask,
- uint16_t state);
-
struct xfs_ibulk;
struct xfs_bstat;
struct xfs_inogrp;
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index 7fcf7569743f..769581a79c58 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -107,7 +107,7 @@ xfs_ioctl32_bstime_copyin(
xfs_bstime_t *bstime,
compat_xfs_bstime_t __user *bstime32)
{
- compat_time_t sec32; /* tv_sec differs on 64 vs. 32 */
+ old_time32_t sec32; /* tv_sec differs on 64 vs. 32 */
if (get_user(sec32, &bstime32->tv_sec) ||
get_user(bstime->tv_nsec, &bstime32->tv_nsec))
@@ -381,7 +381,7 @@ xfs_compat_attrlist_by_handle(
return PTR_ERR(dentry);
error = -ENOMEM;
- kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP);
+ kbuf = kmem_zalloc_large(al_hreq.buflen, 0);
if (!kbuf)
goto out_dput;
@@ -450,6 +450,13 @@ xfs_compat_attrmulti_by_handle(
error = 0;
for (i = 0; i < am_hreq.opcount; i++) {
+ if ((ops[i].am_flags & ATTR_ROOT) &&
+ (ops[i].am_flags & ATTR_SECURE)) {
+ ops[i].am_error = -EINVAL;
+ continue;
+ }
+ ops[i].am_flags &= ~ATTR_KERNEL_FLAGS;
+
ops[i].am_error = strncpy_from_user((char *)attr_name,
compat_ptr(ops[i].am_attrname),
MAXNAMELEN);
@@ -500,44 +507,6 @@ xfs_compat_attrmulti_by_handle(
return error;
}
-STATIC int
-xfs_compat_fssetdm_by_handle(
- struct file *parfilp,
- void __user *arg)
-{
- int error;
- struct fsdmidata fsd;
- compat_xfs_fsop_setdm_handlereq_t dmhreq;
- struct dentry *dentry;
-
- if (!capable(CAP_MKNOD))
- return -EPERM;
- if (copy_from_user(&dmhreq, arg,
- sizeof(compat_xfs_fsop_setdm_handlereq_t)))
- return -EFAULT;
-
- dentry = xfs_compat_handlereq_to_dentry(parfilp, &dmhreq.hreq);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
-
- if (IS_IMMUTABLE(d_inode(dentry)) || IS_APPEND(d_inode(dentry))) {
- error = -EPERM;
- goto out;
- }
-
- if (copy_from_user(&fsd, compat_ptr(dmhreq.data), sizeof(fsd))) {
- error = -EFAULT;
- goto out;
- }
-
- error = xfs_set_dmattrs(XFS_I(d_inode(dentry)), fsd.fsd_dmevmask,
- fsd.fsd_dmstate);
-
-out:
- dput(dentry);
- return error;
-}
-
long
xfs_file_compat_ioctl(
struct file *filp,
@@ -547,79 +516,23 @@ xfs_file_compat_ioctl(
struct inode *inode = file_inode(filp);
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- void __user *arg = (void __user *)p;
+ void __user *arg = compat_ptr(p);
int error;
trace_xfs_file_compat_ioctl(ip);
switch (cmd) {
- /* No size or alignment issues on any arch */
- case XFS_IOC_DIOINFO:
- case XFS_IOC_FSGEOMETRY_V4:
- case XFS_IOC_FSGEOMETRY:
- case XFS_IOC_AG_GEOMETRY:
- case XFS_IOC_FSGETXATTR:
- case XFS_IOC_FSSETXATTR:
- case XFS_IOC_FSGETXATTRA:
- case XFS_IOC_FSSETDM:
- case XFS_IOC_GETBMAP:
- case XFS_IOC_GETBMAPA:
- case XFS_IOC_GETBMAPX:
- case XFS_IOC_FSCOUNTS:
- case XFS_IOC_SET_RESBLKS:
- case XFS_IOC_GET_RESBLKS:
- case XFS_IOC_FSGROWFSLOG:
- case XFS_IOC_GOINGDOWN:
- case XFS_IOC_ERROR_INJECTION:
- case XFS_IOC_ERROR_CLEARALL:
- case FS_IOC_GETFSMAP:
- case XFS_IOC_SCRUB_METADATA:
- case XFS_IOC_BULKSTAT:
- case XFS_IOC_INUMBERS:
- return xfs_file_ioctl(filp, cmd, p);
-#if !defined(BROKEN_X86_ALIGNMENT) || defined(CONFIG_X86_X32)
- /*
- * These are handled fine if no alignment issues. To support x32
- * which uses native 64-bit alignment we must emit these cases in
- * addition to the ia-32 compat set below.
- */
- case XFS_IOC_ALLOCSP:
- case XFS_IOC_FREESP:
- case XFS_IOC_RESVSP:
- case XFS_IOC_UNRESVSP:
- case XFS_IOC_ALLOCSP64:
- case XFS_IOC_FREESP64:
- case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64:
- case XFS_IOC_FSGEOMETRY_V1:
- case XFS_IOC_FSGROWFSDATA:
- case XFS_IOC_FSGROWFSRT:
- case XFS_IOC_ZERO_RANGE:
-#ifdef CONFIG_X86_X32
- /*
- * x32 special: this gets a different cmd number from the ia-32 compat
- * case below; the associated data will match native 64-bit alignment.
- */
- case XFS_IOC_SWAPEXT:
-#endif
- return xfs_file_ioctl(filp, cmd, p);
-#endif
#if defined(BROKEN_X86_ALIGNMENT)
case XFS_IOC_ALLOCSP_32:
case XFS_IOC_FREESP_32:
case XFS_IOC_ALLOCSP64_32:
- case XFS_IOC_FREESP64_32:
- case XFS_IOC_RESVSP_32:
- case XFS_IOC_UNRESVSP_32:
- case XFS_IOC_RESVSP64_32:
- case XFS_IOC_UNRESVSP64_32:
- case XFS_IOC_ZERO_RANGE_32: {
+ case XFS_IOC_FREESP64_32: {
struct xfs_flock64 bf;
if (xfs_compat_flock64_copyin(&bf, arg))
return -EFAULT;
cmd = _NATIVE_IOC(cmd, struct xfs_flock64);
- return xfs_ioc_space(filp, cmd, &bf);
+ return xfs_ioc_space(filp, &bf);
}
case XFS_IOC_FSGEOMETRY_V1_32:
return xfs_compat_ioc_fsgeometry_v1(mp, arg);
@@ -702,9 +615,8 @@ xfs_file_compat_ioctl(
return xfs_compat_attrlist_by_handle(filp, arg);
case XFS_IOC_ATTRMULTI_BY_HANDLE_32:
return xfs_compat_attrmulti_by_handle(filp, arg);
- case XFS_IOC_FSSETDM_BY_HANDLE_32:
- return xfs_compat_fssetdm_by_handle(filp, arg);
default:
- return -ENOIOCTLCMD;
+ /* try the native version */
+ return xfs_file_ioctl(filp, cmd, (unsigned long)arg);
}
}
diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h
index 7985344d3aa6..053de7d894cd 100644
--- a/fs/xfs/xfs_ioctl32.h
+++ b/fs/xfs/xfs_ioctl32.h
@@ -32,7 +32,7 @@
#endif
typedef struct compat_xfs_bstime {
- compat_time_t tv_sec; /* seconds */
+ old_time32_t tv_sec; /* seconds */
__s32 tv_nsec; /* and nanoseconds */
} compat_xfs_bstime_t;
@@ -99,7 +99,7 @@ typedef struct compat_xfs_fsop_handlereq {
_IOWR('X', 108, struct compat_xfs_fsop_handlereq)
/* The bstat field in the swapext struct needs translation */
-typedef struct compat_xfs_swapext {
+struct compat_xfs_swapext {
int64_t sx_version; /* version */
int64_t sx_fdtarget; /* fd of target file */
int64_t sx_fdtmp; /* fd of tmp file */
@@ -107,7 +107,7 @@ typedef struct compat_xfs_swapext {
xfs_off_t sx_length; /* leng from offset */
char sx_pad[16]; /* pad space, unused */
struct compat_xfs_bstat sx_stat; /* stat of target b4 copy */
-} __compat_packed compat_xfs_swapext_t;
+} __compat_packed;
#define XFS_IOC_SWAPEXT_32 _IOWR('X', 109, struct compat_xfs_swapext)
@@ -143,15 +143,6 @@ typedef struct compat_xfs_fsop_attrmulti_handlereq {
#define XFS_IOC_ATTRMULTI_BY_HANDLE_32 \
_IOW('X', 123, struct compat_xfs_fsop_attrmulti_handlereq)
-typedef struct compat_xfs_fsop_setdm_handlereq {
- struct compat_xfs_fsop_handlereq hreq; /* handle information */
- /* ptr to struct fsdmidata */
- compat_uptr_t data; /* DMAPI data */
-} compat_xfs_fsop_setdm_handlereq_t;
-
-#define XFS_IOC_FSSETDM_BY_HANDLE_32 \
- _IOW('X', 121, struct compat_xfs_fsop_setdm_handlereq)
-
#ifdef BROKEN_X86_ALIGNMENT
/* on ia32 l_start is on a 32-bit boundary */
typedef struct compat_xfs_flock64 {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 3a4310d7cb59..bb590a267a7f 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -29,8 +29,8 @@
#include "xfs_reflink.h"
-#define XFS_WRITEIO_ALIGN(mp,off) (((off) >> mp->m_writeio_log) \
- << mp->m_writeio_log)
+#define XFS_ALLOC_ALIGN(mp, off) \
+ (((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
static int
xfs_alert_fsblock_zero(
@@ -54,11 +54,12 @@ xfs_bmbt_to_iomap(
struct xfs_inode *ip,
struct iomap *iomap,
struct xfs_bmbt_irec *imap,
- bool shared)
+ u16 flags)
{
struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
- if (unlikely(!imap->br_startblock && !XFS_IS_REALTIME_INODE(ip)))
+ if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
return xfs_alert_fsblock_zero(ip, imap);
if (imap->br_startblock == HOLESTARTBLOCK) {
@@ -77,14 +78,13 @@ xfs_bmbt_to_iomap(
}
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
- iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
- iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
+ iomap->bdev = target->bt_bdev;
+ iomap->dax_dev = target->bt_daxdev;
+ iomap->flags = flags;
if (xfs_ipincount(ip) &&
(ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
iomap->flags |= IOMAP_F_DIRTY;
- if (shared)
- iomap->flags |= IOMAP_F_SHARED;
return 0;
}
@@ -95,18 +95,30 @@ xfs_hole_to_iomap(
xfs_fileoff_t offset_fsb,
xfs_fileoff_t end_fsb)
{
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+
iomap->addr = IOMAP_NULL_ADDR;
iomap->type = IOMAP_HOLE;
iomap->offset = XFS_FSB_TO_B(ip->i_mount, offset_fsb);
iomap->length = XFS_FSB_TO_B(ip->i_mount, end_fsb - offset_fsb);
- iomap->bdev = xfs_find_bdev_for_inode(VFS_I(ip));
- iomap->dax_dev = xfs_find_daxdev_for_inode(VFS_I(ip));
+ iomap->bdev = target->bt_bdev;
+ iomap->dax_dev = target->bt_daxdev;
+}
+
+static inline xfs_fileoff_t
+xfs_iomap_end_fsb(
+ struct xfs_mount *mp,
+ loff_t offset,
+ loff_t count)
+{
+ ASSERT(offset <= mp->m_super->s_maxbytes);
+ return min(XFS_B_TO_FSB(mp, offset + count),
+ XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
}
-xfs_extlen_t
+static xfs_extlen_t
xfs_eof_alignment(
- struct xfs_inode *ip,
- xfs_extlen_t extsize)
+ struct xfs_inode *ip)
{
struct xfs_mount *mp = ip->i_mount;
xfs_extlen_t align = 0;
@@ -129,111 +141,80 @@ xfs_eof_alignment(
align = 0;
}
- /*
- * Always round up the allocation request to an extent boundary
- * (when file on a real-time subvolume or has di_extsize hint).
- */
- if (extsize) {
- if (align)
- align = roundup_64(align, extsize);
- else
- align = extsize;
- }
-
return align;
}
-STATIC int
+/*
+ * Check if last_fsb is outside the last extent, and if so grow it to the next
+ * stripe unit boundary.
+ */
+xfs_fileoff_t
xfs_iomap_eof_align_last_fsb(
struct xfs_inode *ip,
- xfs_extlen_t extsize,
- xfs_fileoff_t *last_fsb)
+ xfs_fileoff_t end_fsb)
{
- xfs_extlen_t align = xfs_eof_alignment(ip, extsize);
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+ xfs_extlen_t extsz = xfs_get_extsz_hint(ip);
+ xfs_extlen_t align = xfs_eof_alignment(ip);
+ struct xfs_bmbt_irec irec;
+ struct xfs_iext_cursor icur;
+
+ ASSERT(ifp->if_flags & XFS_IFEXTENTS);
+
+ /*
+ * Always round up the allocation request to the extent hint boundary.
+ */
+ if (extsz) {
+ if (align)
+ align = roundup_64(align, extsz);
+ else
+ align = extsz;
+ }
if (align) {
- xfs_fileoff_t new_last_fsb = roundup_64(*last_fsb, align);
- int eof, error;
+ xfs_fileoff_t aligned_end_fsb = roundup_64(end_fsb, align);
- error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
- if (error)
- return error;
- if (eof)
- *last_fsb = new_last_fsb;
+ xfs_iext_last(ifp, &icur);
+ if (!xfs_iext_get_extent(ifp, &icur, &irec) ||
+ aligned_end_fsb >= irec.br_startoff + irec.br_blockcount)
+ return aligned_end_fsb;
}
- return 0;
+
+ return end_fsb;
}
int
xfs_iomap_write_direct(
- xfs_inode_t *ip,
- xfs_off_t offset,
- size_t count,
- xfs_bmbt_irec_t *imap,
- int nmaps)
+ struct xfs_inode *ip,
+ xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t count_fsb,
+ struct xfs_bmbt_irec *imap)
{
- xfs_mount_t *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb;
- xfs_fileoff_t last_fsb;
- xfs_filblks_t count_fsb, resaligned;
- xfs_extlen_t extsz;
- int nimaps;
- int quota_flag;
- int rt;
- xfs_trans_t *tp;
- uint qblocks, resblks, resrtextents;
- int error;
- int lockmode;
- int bmapi_flags = XFS_BMAPI_PREALLOC;
- uint tflags = 0;
-
- rt = XFS_IS_REALTIME_INODE(ip);
- extsz = xfs_get_extsz_hint(ip);
- lockmode = XFS_ILOCK_SHARED; /* locked by caller */
-
- ASSERT(xfs_isilocked(ip, lockmode));
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_trans *tp;
+ xfs_filblks_t resaligned;
+ int nimaps;
+ int quota_flag;
+ uint qblocks, resblks;
+ unsigned int resrtextents = 0;
+ int error;
+ int bmapi_flags = XFS_BMAPI_PREALLOC;
+ uint tflags = 0;
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
- if ((offset + count) > XFS_ISIZE(ip)) {
- /*
- * Assert that the in-core extent list is present since this can
- * call xfs_iread_extents() and we only have the ilock shared.
- * This should be safe because the lock was held around a bmapi
- * call in the caller and we only need it to access the in-core
- * list.
- */
- ASSERT(XFS_IFORK_PTR(ip, XFS_DATA_FORK)->if_flags &
- XFS_IFEXTENTS);
- error = xfs_iomap_eof_align_last_fsb(ip, extsz, &last_fsb);
- if (error)
- goto out_unlock;
- } else {
- if (nmaps && (imap->br_startblock == HOLESTARTBLOCK))
- last_fsb = min(last_fsb, (xfs_fileoff_t)
- imap->br_blockcount +
- imap->br_startoff);
- }
- count_fsb = last_fsb - offset_fsb;
ASSERT(count_fsb > 0);
- resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb, extsz);
- if (unlikely(rt)) {
+ resaligned = xfs_aligned_fsb_count(offset_fsb, count_fsb,
+ xfs_get_extsz_hint(ip));
+ if (unlikely(XFS_IS_REALTIME_INODE(ip))) {
resrtextents = qblocks = resaligned;
resrtextents /= mp->m_sb.sb_rextsize;
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
quota_flag = XFS_QMOPT_RES_RTBLKS;
} else {
- resrtextents = 0;
resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
quota_flag = XFS_QMOPT_RES_REGBLKS;
}
- /*
- * Drop the shared lock acquired by the caller, attach the dquot if
- * necessary and move on to transaction setup.
- */
- xfs_iunlock(ip, lockmode);
error = xfs_qm_dqattach(ip);
if (error)
return error;
@@ -263,8 +244,7 @@ xfs_iomap_write_direct(
if (error)
return error;
- lockmode = XFS_ILOCK_EXCL;
- xfs_ilock(ip, lockmode);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
if (error)
@@ -277,8 +257,8 @@ xfs_iomap_write_direct(
* caller gave to us.
*/
nimaps = 1;
- error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb,
- bmapi_flags, resblks, imap, &nimaps);
+ error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flags, 0,
+ imap, &nimaps);
if (error)
goto out_res_cancel;
@@ -297,11 +277,11 @@ xfs_iomap_write_direct(
goto out_unlock;
}
- if (!(imap->br_startblock || XFS_IS_REALTIME_INODE(ip)))
+ if (unlikely(!xfs_valid_startblock(ip, imap->br_startblock)))
error = xfs_alert_fsblock_zero(ip, imap);
out_unlock:
- xfs_iunlock(ip, lockmode);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
out_res_cancel:
@@ -410,19 +390,19 @@ xfs_iomap_prealloc_size(
if (offset + count <= XFS_ISIZE(ip))
return 0;
- if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) &&
- (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_writeio_blocks)))
+ if (!(mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
+ (XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_allocsize_blocks)))
return 0;
/*
* If an explicit allocsize is set, the file is small, or we
* are writing behind a hole, then use the minimum prealloc:
*/
- if ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ||
+ if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) ||
XFS_ISIZE(ip) < XFS_FSB_TO_B(mp, mp->m_dalign) ||
!xfs_iext_peek_prev_extent(ifp, icur, &prev) ||
prev.br_startoff + prev.br_blockcount < offset_fsb)
- return mp->m_writeio_blocks;
+ return mp->m_allocsize_blocks;
/*
* Determine the initial size of the preallocation. We are beyond the
@@ -515,219 +495,13 @@ xfs_iomap_prealloc_size(
while (alloc_blocks && alloc_blocks >= freesp)
alloc_blocks >>= 4;
check_writeio:
- if (alloc_blocks < mp->m_writeio_blocks)
- alloc_blocks = mp->m_writeio_blocks;
+ if (alloc_blocks < mp->m_allocsize_blocks)
+ alloc_blocks = mp->m_allocsize_blocks;
trace_xfs_iomap_prealloc_size(ip, alloc_blocks, shift,
- mp->m_writeio_blocks);
+ mp->m_allocsize_blocks);
return alloc_blocks;
}
-static int
-xfs_file_iomap_begin_delay(
- struct inode *inode,
- loff_t offset,
- loff_t count,
- unsigned flags,
- struct iomap *iomap)
-{
- struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
- xfs_fileoff_t maxbytes_fsb =
- XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
- xfs_fileoff_t end_fsb;
- struct xfs_bmbt_irec imap, cmap;
- struct xfs_iext_cursor icur, ccur;
- xfs_fsblock_t prealloc_blocks = 0;
- bool eof = false, cow_eof = false, shared = false;
- int whichfork = XFS_DATA_FORK;
- int error = 0;
-
- ASSERT(!XFS_IS_REALTIME_INODE(ip));
- ASSERT(!xfs_get_extsz_hint(ip));
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- if (unlikely(XFS_TEST_ERROR(
- (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_BTREE),
- mp, XFS_ERRTAG_BMAPIFORMAT))) {
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
- error = -EFSCORRUPTED;
- goto out_unlock;
- }
-
- XFS_STATS_INC(mp, xs_blk_mapw);
-
- if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
- error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
- if (error)
- goto out_unlock;
- }
-
- end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
-
- /*
- * Search the data fork fork first to look up our source mapping. We
- * always need the data fork map, as we have to return it to the
- * iomap code so that the higher level write code can read data in to
- * perform read-modify-write cycles for unaligned writes.
- */
- eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
- if (eof)
- imap.br_startoff = end_fsb; /* fake hole until the end */
-
- /* We never need to allocate blocks for zeroing a hole. */
- if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
- xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
- goto out_unlock;
- }
-
- /*
- * Search the COW fork extent list even if we did not find a data fork
- * extent. This serves two purposes: first this implements the
- * speculative preallocation using cowextsize, so that we also unshare
- * block adjacent to shared blocks instead of just the shared blocks
- * themselves. Second the lookup in the extent list is generally faster
- * than going out to the shared extent tree.
- */
- if (xfs_is_cow_inode(ip)) {
- if (!ip->i_cowfp) {
- ASSERT(!xfs_is_reflink_inode(ip));
- xfs_ifork_init_cow(ip);
- }
- cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
- &ccur, &cmap);
- if (!cow_eof && cmap.br_startoff <= offset_fsb) {
- trace_xfs_reflink_cow_found(ip, &cmap);
- whichfork = XFS_COW_FORK;
- goto done;
- }
- }
-
- if (imap.br_startoff <= offset_fsb) {
- /*
- * For reflink files we may need a delalloc reservation when
- * overwriting shared extents. This includes zeroing of
- * existing extents that contain data.
- */
- if (!xfs_is_cow_inode(ip) ||
- ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) {
- trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
- &imap);
- goto done;
- }
-
- xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
-
- /* Trim the mapping to the nearest shared extent boundary. */
- error = xfs_inode_need_cow(ip, &imap, &shared);
- if (error)
- goto out_unlock;
-
- /* Not shared? Just report the (potentially capped) extent. */
- if (!shared) {
- trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
- &imap);
- goto done;
- }
-
- /*
- * Fork all the shared blocks from our write offset until the
- * end of the extent.
- */
- whichfork = XFS_COW_FORK;
- end_fsb = imap.br_startoff + imap.br_blockcount;
- } else {
- /*
- * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
- * pages to keep the chunks of work done where somewhat
- * symmetric with the work writeback does. This is a completely
- * arbitrary number pulled out of thin air.
- *
- * Note that the values needs to be less than 32-bits wide until
- * the lower level functions are updated.
- */
- count = min_t(loff_t, count, 1024 * PAGE_SIZE);
- end_fsb = min(XFS_B_TO_FSB(mp, offset + count), maxbytes_fsb);
-
- if (xfs_is_always_cow_inode(ip))
- whichfork = XFS_COW_FORK;
- }
-
- error = xfs_qm_dqattach_locked(ip, false);
- if (error)
- goto out_unlock;
-
- if (eof) {
- prealloc_blocks = xfs_iomap_prealloc_size(ip, whichfork, offset,
- count, &icur);
- if (prealloc_blocks) {
- xfs_extlen_t align;
- xfs_off_t end_offset;
- xfs_fileoff_t p_end_fsb;
-
- end_offset = XFS_WRITEIO_ALIGN(mp, offset + count - 1);
- p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
- prealloc_blocks;
-
- align = xfs_eof_alignment(ip, 0);
- if (align)
- p_end_fsb = roundup_64(p_end_fsb, align);
-
- p_end_fsb = min(p_end_fsb, maxbytes_fsb);
- ASSERT(p_end_fsb > offset_fsb);
- prealloc_blocks = p_end_fsb - end_fsb;
- }
- }
-
-retry:
- error = xfs_bmapi_reserve_delalloc(ip, whichfork, offset_fsb,
- end_fsb - offset_fsb, prealloc_blocks,
- whichfork == XFS_DATA_FORK ? &imap : &cmap,
- whichfork == XFS_DATA_FORK ? &icur : &ccur,
- whichfork == XFS_DATA_FORK ? eof : cow_eof);
- switch (error) {
- case 0:
- break;
- case -ENOSPC:
- case -EDQUOT:
- /* retry without any preallocation */
- trace_xfs_delalloc_enospc(ip, offset, count);
- if (prealloc_blocks) {
- prealloc_blocks = 0;
- goto retry;
- }
- /*FALLTHRU*/
- default:
- goto out_unlock;
- }
-
- /*
- * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
- * them out if the write happens to fail.
- */
- iomap->flags |= IOMAP_F_NEW;
- trace_xfs_iomap_alloc(ip, offset, count, whichfork,
- whichfork == XFS_DATA_FORK ? &imap : &cmap);
-done:
- if (whichfork == XFS_COW_FORK) {
- if (imap.br_startoff > offset_fsb) {
- xfs_trim_extent(&cmap, offset_fsb,
- imap.br_startoff - offset_fsb);
- error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true);
- goto out_unlock;
- }
- /* ensure we only report blocks we have a reservation for */
- xfs_trim_extent(&imap, cmap.br_startoff, cmap.br_blockcount);
- shared = true;
- }
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, shared);
-out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- return error;
-}
-
int
xfs_iomap_write_unwritten(
xfs_inode_t *ip,
@@ -765,6 +539,11 @@ xfs_iomap_write_unwritten(
*/
resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
+ /* Attach dquots so that bmbt splits are accounted correctly. */
+ error = xfs_qm_dqattach(ip);
+ if (error)
+ return error;
+
do {
/*
* Set up a transaction to convert the range of extents
@@ -783,6 +562,11 @@ xfs_iomap_write_unwritten(
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
+ error = xfs_trans_reserve_quota_nblks(tp, ip, resblks, 0,
+ XFS_QMOPT_RES_REGBLKS);
+ if (error)
+ goto error_on_bmapi_transaction;
+
/*
* Modify the unwritten extent state of the buffer.
*/
@@ -814,7 +598,7 @@ xfs_iomap_write_unwritten(
if (error)
return error;
- if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
+ if (unlikely(!xfs_valid_startblock(ip, imap.br_startblock)))
return xfs_alert_fsblock_zero(ip, &imap);
if ((numblks_fsb = imap.br_blockcount) == 0) {
@@ -840,23 +624,42 @@ error_on_bmapi_transaction:
static inline bool
imap_needs_alloc(
struct inode *inode,
+ unsigned flags,
struct xfs_bmbt_irec *imap,
int nimaps)
{
- return !nimaps ||
- imap->br_startblock == HOLESTARTBLOCK ||
- imap->br_startblock == DELAYSTARTBLOCK ||
- (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN);
+ /* don't allocate blocks when just zeroing */
+ if (flags & IOMAP_ZERO)
+ return false;
+ if (!nimaps ||
+ imap->br_startblock == HOLESTARTBLOCK ||
+ imap->br_startblock == DELAYSTARTBLOCK)
+ return true;
+ /* we convert unwritten extents before copying the data for DAX */
+ if (IS_DAX(inode) && imap->br_state == XFS_EXT_UNWRITTEN)
+ return true;
+ return false;
}
static inline bool
-needs_cow_for_zeroing(
+imap_needs_cow(
+ struct xfs_inode *ip,
+ unsigned int flags,
struct xfs_bmbt_irec *imap,
int nimaps)
{
- return nimaps &&
- imap->br_startblock != HOLESTARTBLOCK &&
- imap->br_state != XFS_EXT_UNWRITTEN;
+ if (!xfs_is_cow_inode(ip))
+ return false;
+
+ /* when zeroing we don't have to COW holes or unwritten extents */
+ if (flags & IOMAP_ZERO) {
+ if (!nimaps ||
+ imap->br_startblock == HOLESTARTBLOCK ||
+ imap->br_state == XFS_EXT_UNWRITTEN)
+ return false;
+ }
+
+ return true;
}
static int
@@ -872,15 +675,8 @@ xfs_ilock_for_iomap(
* COW writes may allocate delalloc space or convert unwritten COW
* extents, so we need to make sure to take the lock exclusively here.
*/
- if (xfs_is_cow_inode(ip) && is_write) {
- /*
- * FIXME: It could still overwrite on unshared extents and not
- * need allocation.
- */
- if (flags & IOMAP_NOWAIT)
- return -EAGAIN;
+ if (xfs_is_cow_inode(ip) && is_write)
mode = XFS_ILOCK_EXCL;
- }
/*
* Extents not yet cached requires exclusive access, don't block. This
@@ -917,111 +713,73 @@ relock:
}
static int
-xfs_file_iomap_begin(
+xfs_direct_write_iomap_begin(
struct inode *inode,
loff_t offset,
loff_t length,
unsigned flags,
- struct iomap *iomap)
+ struct iomap *iomap,
+ struct iomap *srcmap)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- struct xfs_bmbt_irec imap;
- xfs_fileoff_t offset_fsb, end_fsb;
+ struct xfs_bmbt_irec imap, cmap;
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
int nimaps = 1, error = 0;
bool shared = false;
+ u16 iomap_flags = 0;
unsigned lockmode;
+ ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
+
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if ((flags & (IOMAP_WRITE | IOMAP_ZERO)) && !(flags & IOMAP_DIRECT) &&
- !IS_DAX(inode) && !xfs_get_extsz_hint(ip)) {
- /* Reserve delalloc blocks for regular writeback. */
- return xfs_file_iomap_begin_delay(inode, offset, length, flags,
- iomap);
- }
-
/*
- * Lock the inode in the manner required for the specified operation and
- * check for as many conditions that would result in blocking as
- * possible. This removes most of the non-blocking checks from the
- * mapping code below.
+ * Writes that span EOF might trigger an IO size update on completion,
+ * so consider them to be dirty for the purposes of O_DSYNC even if
+ * there is no other metadata changes pending or have been made here.
*/
+ if (offset + length > i_size_read(inode))
+ iomap_flags |= IOMAP_F_DIRTY;
+
error = xfs_ilock_for_iomap(ip, flags, &lockmode);
if (error)
return error;
- ASSERT(offset <= mp->m_super->s_maxbytes);
- if (offset > mp->m_super->s_maxbytes - length)
- length = mp->m_super->s_maxbytes - offset;
- offset_fsb = XFS_B_TO_FSBT(mp, offset);
- end_fsb = XFS_B_TO_FSB(mp, offset + length);
-
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
&nimaps, 0);
if (error)
goto out_unlock;
- if (flags & IOMAP_REPORT) {
- /* Trim the mapping to the nearest shared extent boundary. */
- error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
- if (error)
+ if (imap_needs_cow(ip, flags, &imap, nimaps)) {
+ error = -EAGAIN;
+ if (flags & IOMAP_NOWAIT)
goto out_unlock;
- }
-
- /* Non-modifying mapping requested, so we are done */
- if (!(flags & (IOMAP_WRITE | IOMAP_ZERO)))
- goto out_found;
-
- /*
- * Break shared extents if necessary. Checks for non-blocking IO have
- * been done up front, so we don't need to do them here.
- */
- if (xfs_is_cow_inode(ip)) {
- struct xfs_bmbt_irec cmap;
- bool directio = (flags & IOMAP_DIRECT);
-
- /* if zeroing doesn't need COW allocation, then we are done. */
- if ((flags & IOMAP_ZERO) &&
- !needs_cow_for_zeroing(&imap, nimaps))
- goto out_found;
/* may drop and re-acquire the ilock */
- cmap = imap;
- error = xfs_reflink_allocate_cow(ip, &cmap, &shared, &lockmode,
- directio);
+ error = xfs_reflink_allocate_cow(ip, &imap, &cmap, &shared,
+ &lockmode, flags & IOMAP_DIRECT);
if (error)
goto out_unlock;
-
- /*
- * For buffered writes we need to report the address of the
- * previous block (if there was any) so that the higher level
- * write code can perform read-modify-write operations; we
- * won't need the CoW fork mapping until writeback. For direct
- * I/O, which must be block aligned, we need to report the
- * newly allocated address. If the data fork has a hole, copy
- * the COW fork mapping to avoid allocating to the data fork.
- */
- if (directio || imap.br_startblock == HOLESTARTBLOCK)
- imap = cmap;
-
+ if (shared)
+ goto out_found_cow;
end_fsb = imap.br_startoff + imap.br_blockcount;
length = XFS_FSB_TO_B(mp, end_fsb) - offset;
}
- /* Don't need to allocate over holes when doing zeroing operations. */
- if (flags & IOMAP_ZERO)
- goto out_found;
+ if (imap_needs_alloc(inode, flags, &imap, nimaps))
+ goto allocate_blocks;
- if (!imap_needs_alloc(inode, &imap, nimaps))
- goto out_found;
+ xfs_iunlock(ip, lockmode);
+ trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags);
- /* If nowait is set bail since we are going to make allocations. */
- if (flags & IOMAP_NOWAIT) {
- error = -EAGAIN;
+allocate_blocks:
+ error = -EAGAIN;
+ if (flags & IOMAP_NOWAIT)
goto out_unlock;
- }
/*
* We cap the maximum length we map to a sane size to keep the chunks
@@ -1033,48 +791,273 @@ xfs_file_iomap_begin(
* lower level functions are updated.
*/
length = min_t(loff_t, length, 1024 * PAGE_SIZE);
+ end_fsb = xfs_iomap_end_fsb(mp, offset, length);
- /*
- * xfs_iomap_write_direct() expects the shared lock. It is unlocked on
- * return.
- */
- if (lockmode == XFS_ILOCK_EXCL)
- xfs_ilock_demote(ip, lockmode);
- error = xfs_iomap_write_direct(ip, offset, length, &imap,
- nimaps);
+ if (offset + length > XFS_ISIZE(ip))
+ end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb);
+ else if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
+ end_fsb = min(end_fsb, imap.br_startoff + imap.br_blockcount);
+ xfs_iunlock(ip, lockmode);
+
+ error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
+ &imap);
if (error)
return error;
- iomap->flags |= IOMAP_F_NEW;
trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, iomap_flags | IOMAP_F_NEW);
-out_finish:
- return xfs_bmbt_to_iomap(ip, iomap, &imap, shared);
-
-out_found:
- ASSERT(nimaps);
+out_found_cow:
xfs_iunlock(ip, lockmode);
- trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
- goto out_finish;
+ length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
+ trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
+ if (imap.br_startblock != HOLESTARTBLOCK) {
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
+ if (error)
+ return error;
+ }
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
}
+const struct iomap_ops xfs_direct_write_iomap_ops = {
+ .iomap_begin = xfs_direct_write_iomap_begin,
+};
+
static int
-xfs_file_iomap_end_delalloc(
- struct xfs_inode *ip,
+xfs_buffered_write_iomap_begin(
+ struct inode *inode,
+ loff_t offset,
+ loff_t count,
+ unsigned flags,
+ struct iomap *iomap,
+ struct iomap *srcmap)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, count);
+ struct xfs_bmbt_irec imap, cmap;
+ struct xfs_iext_cursor icur, ccur;
+ xfs_fsblock_t prealloc_blocks = 0;
+ bool eof = false, cow_eof = false, shared = false;
+ int allocfork = XFS_DATA_FORK;
+ int error = 0;
+
+ /* we can't use delayed allocations when using extent size hints */
+ if (xfs_get_extsz_hint(ip))
+ return xfs_direct_write_iomap_begin(inode, offset, count,
+ flags, iomap, srcmap);
+
+ ASSERT(!XFS_IS_REALTIME_INODE(ip));
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+ if (XFS_IS_CORRUPT(mp, !xfs_ifork_has_extents(ip, XFS_DATA_FORK)) ||
+ XFS_TEST_ERROR(false, mp, XFS_ERRTAG_BMAPIFORMAT)) {
+ error = -EFSCORRUPTED;
+ goto out_unlock;
+ }
+
+ XFS_STATS_INC(mp, xs_blk_mapw);
+
+ if (!(ip->i_df.if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK);
+ if (error)
+ goto out_unlock;
+ }
+
+ /*
+ * Search the data fork fork first to look up our source mapping. We
+ * always need the data fork map, as we have to return it to the
+ * iomap code so that the higher level write code can read data in to
+ * perform read-modify-write cycles for unaligned writes.
+ */
+ eof = !xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap);
+ if (eof)
+ imap.br_startoff = end_fsb; /* fake hole until the end */
+
+ /* We never need to allocate blocks for zeroing a hole. */
+ if ((flags & IOMAP_ZERO) && imap.br_startoff > offset_fsb) {
+ xfs_hole_to_iomap(ip, iomap, offset_fsb, imap.br_startoff);
+ goto out_unlock;
+ }
+
+ /*
+ * Search the COW fork extent list even if we did not find a data fork
+ * extent. This serves two purposes: first this implements the
+ * speculative preallocation using cowextsize, so that we also unshare
+ * block adjacent to shared blocks instead of just the shared blocks
+ * themselves. Second the lookup in the extent list is generally faster
+ * than going out to the shared extent tree.
+ */
+ if (xfs_is_cow_inode(ip)) {
+ if (!ip->i_cowfp) {
+ ASSERT(!xfs_is_reflink_inode(ip));
+ xfs_ifork_init_cow(ip);
+ }
+ cow_eof = !xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb,
+ &ccur, &cmap);
+ if (!cow_eof && cmap.br_startoff <= offset_fsb) {
+ trace_xfs_reflink_cow_found(ip, &cmap);
+ goto found_cow;
+ }
+ }
+
+ if (imap.br_startoff <= offset_fsb) {
+ /*
+ * For reflink files we may need a delalloc reservation when
+ * overwriting shared extents. This includes zeroing of
+ * existing extents that contain data.
+ */
+ if (!xfs_is_cow_inode(ip) ||
+ ((flags & IOMAP_ZERO) && imap.br_state != XFS_EXT_NORM)) {
+ trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
+ &imap);
+ goto found_imap;
+ }
+
+ xfs_trim_extent(&imap, offset_fsb, end_fsb - offset_fsb);
+
+ /* Trim the mapping to the nearest shared extent boundary. */
+ error = xfs_bmap_trim_cow(ip, &imap, &shared);
+ if (error)
+ goto out_unlock;
+
+ /* Not shared? Just report the (potentially capped) extent. */
+ if (!shared) {
+ trace_xfs_iomap_found(ip, offset, count, XFS_DATA_FORK,
+ &imap);
+ goto found_imap;
+ }
+
+ /*
+ * Fork all the shared blocks from our write offset until the
+ * end of the extent.
+ */
+ allocfork = XFS_COW_FORK;
+ end_fsb = imap.br_startoff + imap.br_blockcount;
+ } else {
+ /*
+ * We cap the maximum length we map here to MAX_WRITEBACK_PAGES
+ * pages to keep the chunks of work done where somewhat
+ * symmetric with the work writeback does. This is a completely
+ * arbitrary number pulled out of thin air.
+ *
+ * Note that the values needs to be less than 32-bits wide until
+ * the lower level functions are updated.
+ */
+ count = min_t(loff_t, count, 1024 * PAGE_SIZE);
+ end_fsb = xfs_iomap_end_fsb(mp, offset, count);
+
+ if (xfs_is_always_cow_inode(ip))
+ allocfork = XFS_COW_FORK;
+ }
+
+ error = xfs_qm_dqattach_locked(ip, false);
+ if (error)
+ goto out_unlock;
+
+ if (eof) {
+ prealloc_blocks = xfs_iomap_prealloc_size(ip, allocfork, offset,
+ count, &icur);
+ if (prealloc_blocks) {
+ xfs_extlen_t align;
+ xfs_off_t end_offset;
+ xfs_fileoff_t p_end_fsb;
+
+ end_offset = XFS_ALLOC_ALIGN(mp, offset + count - 1);
+ p_end_fsb = XFS_B_TO_FSBT(mp, end_offset) +
+ prealloc_blocks;
+
+ align = xfs_eof_alignment(ip);
+ if (align)
+ p_end_fsb = roundup_64(p_end_fsb, align);
+
+ p_end_fsb = min(p_end_fsb,
+ XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
+ ASSERT(p_end_fsb > offset_fsb);
+ prealloc_blocks = p_end_fsb - end_fsb;
+ }
+ }
+
+retry:
+ error = xfs_bmapi_reserve_delalloc(ip, allocfork, offset_fsb,
+ end_fsb - offset_fsb, prealloc_blocks,
+ allocfork == XFS_DATA_FORK ? &imap : &cmap,
+ allocfork == XFS_DATA_FORK ? &icur : &ccur,
+ allocfork == XFS_DATA_FORK ? eof : cow_eof);
+ switch (error) {
+ case 0:
+ break;
+ case -ENOSPC:
+ case -EDQUOT:
+ /* retry without any preallocation */
+ trace_xfs_delalloc_enospc(ip, offset, count);
+ if (prealloc_blocks) {
+ prealloc_blocks = 0;
+ goto retry;
+ }
+ /*FALLTHRU*/
+ default:
+ goto out_unlock;
+ }
+
+ if (allocfork == XFS_COW_FORK) {
+ trace_xfs_iomap_alloc(ip, offset, count, allocfork, &cmap);
+ goto found_cow;
+ }
+
+ /*
+ * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
+ * them out if the write happens to fail.
+ */
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, IOMAP_F_NEW);
+
+found_imap:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
+
+found_cow:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ if (imap.br_startoff <= offset_fsb) {
+ error = xfs_bmbt_to_iomap(ip, srcmap, &imap, 0);
+ if (error)
+ return error;
+ } else {
+ xfs_trim_extent(&cmap, offset_fsb,
+ imap.br_startoff - offset_fsb);
+ }
+ return xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
+
+out_unlock:
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ return error;
+}
+
+static int
+xfs_buffered_write_iomap_end(
+ struct inode *inode,
loff_t offset,
loff_t length,
ssize_t written,
+ unsigned flags,
struct iomap *iomap)
{
+ struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
xfs_fileoff_t start_fsb;
xfs_fileoff_t end_fsb;
int error = 0;
+ if (iomap->type != IOMAP_DELALLOC)
+ return 0;
+
/*
* Behave as if the write failed if drop writes is enabled. Set the NEW
* flag to force delalloc cleanup.
@@ -1119,24 +1102,51 @@ xfs_file_iomap_end_delalloc(
return 0;
}
+const struct iomap_ops xfs_buffered_write_iomap_ops = {
+ .iomap_begin = xfs_buffered_write_iomap_begin,
+ .iomap_end = xfs_buffered_write_iomap_end,
+};
+
static int
-xfs_file_iomap_end(
+xfs_read_iomap_begin(
struct inode *inode,
loff_t offset,
loff_t length,
- ssize_t written,
unsigned flags,
- struct iomap *iomap)
+ struct iomap *iomap,
+ struct iomap *srcmap)
{
- if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC)
- return xfs_file_iomap_end_delalloc(XFS_I(inode), offset,
- length, written, iomap);
- return 0;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_bmbt_irec imap;
+ xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset);
+ xfs_fileoff_t end_fsb = xfs_iomap_end_fsb(mp, offset, length);
+ int nimaps = 1, error = 0;
+ bool shared = false;
+ unsigned lockmode;
+
+ ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ error = xfs_ilock_for_iomap(ip, flags, &lockmode);
+ if (error)
+ return error;
+ error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
+ &nimaps, 0);
+ if (!error && (flags & IOMAP_REPORT))
+ error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
+ xfs_iunlock(ip, lockmode);
+
+ if (error)
+ return error;
+ trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, shared ? IOMAP_F_SHARED : 0);
}
-const struct iomap_ops xfs_iomap_ops = {
- .iomap_begin = xfs_file_iomap_begin,
- .iomap_end = xfs_file_iomap_end,
+const struct iomap_ops xfs_read_iomap_ops = {
+ .iomap_begin = xfs_read_iomap_begin,
};
static int
@@ -1145,7 +1155,8 @@ xfs_seek_iomap_begin(
loff_t offset,
loff_t length,
unsigned flags,
- struct iomap *iomap)
+ struct iomap *iomap,
+ struct iomap *srcmap)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -1178,8 +1189,7 @@ xfs_seek_iomap_begin(
/*
* Fake a hole until the end of the file.
*/
- data_fsb = min(XFS_B_TO_FSB(mp, offset + length),
- XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes));
+ data_fsb = xfs_iomap_end_fsb(mp, offset, length);
}
/*
@@ -1193,7 +1203,7 @@ xfs_seek_iomap_begin(
if (data_fsb < cow_fsb + cmap.br_blockcount)
end_fsb = min(end_fsb, data_fsb);
xfs_trim_extent(&cmap, offset_fsb, end_fsb);
- error = xfs_bmbt_to_iomap(ip, iomap, &cmap, true);
+ error = xfs_bmbt_to_iomap(ip, iomap, &cmap, IOMAP_F_SHARED);
/*
* This is a COW extent, so we must probe the page cache
* because there could be dirty page cache being backed
@@ -1215,7 +1225,7 @@ xfs_seek_iomap_begin(
imap.br_state = XFS_EXT_NORM;
done:
xfs_trim_extent(&imap, offset_fsb, end_fsb);
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, false);
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
out_unlock:
xfs_iunlock(ip, lockmode);
return error;
@@ -1231,7 +1241,8 @@ xfs_xattr_iomap_begin(
loff_t offset,
loff_t length,
unsigned flags,
- struct iomap *iomap)
+ struct iomap *iomap,
+ struct iomap *srcmap)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -1261,7 +1272,7 @@ out_unlock:
if (error)
return error;
ASSERT(nimaps);
- return xfs_bmbt_to_iomap(ip, iomap, &imap, false);
+ return xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
}
const struct iomap_ops xfs_xattr_iomap_ops = {
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 5c2f6aa6d78f..7d3703556d0e 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -11,13 +11,14 @@
struct xfs_inode;
struct xfs_bmbt_irec;
-int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,
- struct xfs_bmbt_irec *, int);
+int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t count_fsb, struct xfs_bmbt_irec *imap);
int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
+xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
+ xfs_fileoff_t end_fsb);
int xfs_bmbt_to_iomap(struct xfs_inode *, struct iomap *,
- struct xfs_bmbt_irec *, bool shared);
-xfs_extlen_t xfs_eof_alignment(struct xfs_inode *ip, xfs_extlen_t extsize);
+ struct xfs_bmbt_irec *, u16);
static inline xfs_filblks_t
xfs_aligned_fsb_count(
@@ -39,7 +40,9 @@ xfs_aligned_fsb_count(
return count_fsb;
}
-extern const struct iomap_ops xfs_iomap_ops;
+extern const struct iomap_ops xfs_buffered_write_iomap_ops;
+extern const struct iomap_ops xfs_direct_write_iomap_ops;
+extern const struct iomap_ops xfs_read_iomap_ops;
extern const struct iomap_ops xfs_seek_iomap_ops;
extern const struct iomap_ops xfs_xattr_iomap_ops;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ff3c1fae5357..81f2f93caec0 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -20,6 +20,7 @@
#include "xfs_symlink.h"
#include "xfs_dir2.h"
#include "xfs_iomap.h"
+#include "xfs_error.h"
#include <linux/xattr.h>
#include <linux/posix_acl.h>
@@ -49,8 +50,10 @@ xfs_initxattrs(
int error = 0;
for (xattr = xattr_array; xattr->name != NULL; xattr++) {
- error = xfs_attr_set(ip, xattr->name, xattr->value,
- xattr->value_len, ATTR_SECURE);
+ error = xfs_attr_set(ip, xattr->name,
+ strlen(xattr->name),
+ xattr->value, xattr->value_len,
+ ATTR_SECURE);
if (error < 0)
break;
}
@@ -470,20 +473,57 @@ xfs_vn_get_link_inline(
struct inode *inode,
struct delayed_call *done)
{
+ struct xfs_inode *ip = XFS_I(inode);
char *link;
- ASSERT(XFS_I(inode)->i_df.if_flags & XFS_IFINLINE);
+ ASSERT(ip->i_df.if_flags & XFS_IFINLINE);
/*
* The VFS crashes on a NULL pointer, so return -EFSCORRUPTED if
* if_data is junk.
*/
- link = XFS_I(inode)->i_df.if_u1.if_data;
- if (!link)
+ link = ip->i_df.if_u1.if_data;
+ if (XFS_IS_CORRUPT(ip->i_mount, !link))
return ERR_PTR(-EFSCORRUPTED);
return link;
}
+static uint32_t
+xfs_stat_blksize(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ /*
+ * If the file blocks are being allocated from a realtime volume, then
+ * always return the realtime extent size.
+ */
+ if (XFS_IS_REALTIME_INODE(ip))
+ return xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
+
+ /*
+ * Allow large block sizes to be reported to userspace programs if the
+ * "largeio" mount option is used.
+ *
+ * If compatibility mode is specified, simply return the basic unit of
+ * caching so that we don't get inefficient read/modify/write I/O from
+ * user apps. Otherwise....
+ *
+ * If the underlying volume is a stripe, then return the stripe width in
+ * bytes as the recommended I/O size. It is not a stripe and we've set a
+ * default buffered I/O size, return that, otherwise return the compat
+ * default.
+ */
+ if (mp->m_flags & XFS_MOUNT_LARGEIO) {
+ if (mp->m_swidth)
+ return mp->m_swidth << mp->m_sb.sb_blocklog;
+ if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
+ return 1U << mp->m_allocsize_log;
+ }
+
+ return PAGE_SIZE;
+}
+
STATIC int
xfs_vn_getattr(
const struct path *path,
@@ -516,8 +556,7 @@ xfs_vn_getattr(
if (ip->i_d.di_version == 3) {
if (request_mask & STATX_BTIME) {
stat->result_mask |= STATX_BTIME;
- stat->btime.tv_sec = ip->i_d.di_crtime.t_sec;
- stat->btime.tv_nsec = ip->i_d.di_crtime.t_nsec;
+ stat->btime = ip->i_d.di_crtime;
}
}
@@ -543,16 +582,7 @@ xfs_vn_getattr(
stat->rdev = inode->i_rdev;
break;
default:
- if (XFS_IS_REALTIME_INODE(ip)) {
- /*
- * If the file blocks are being allocated from a
- * realtime volume, then return the inode's realtime
- * extent size or the realtime volume's extent size.
- */
- stat->blksize =
- xfs_get_extsz_hint(ip) << mp->m_sb.sb_blocklog;
- } else
- stat->blksize = xfs_preferred_iosize(mp);
+ stat->blksize = xfs_stat_blksize(ip);
stat->rdev = 0;
break;
}
@@ -664,7 +694,7 @@ xfs_setattr_nonsize(
ASSERT(gdqp == NULL);
error = xfs_qm_vop_dqalloc(ip, xfs_kuid_to_uid(uid),
xfs_kgid_to_gid(gid),
- xfs_get_projid(ip),
+ ip->i_d.di_projid,
qflags, &udqp, &gdqp, NULL);
if (error)
return error;
@@ -793,6 +823,7 @@ xfs_setattr_nonsize(
out_cancel:
xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_dqrele:
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
@@ -882,10 +913,10 @@ xfs_setattr_size(
if (newsize > oldsize) {
trace_xfs_zero_eof(ip, oldsize, newsize - oldsize);
error = iomap_zero_range(inode, oldsize, newsize - oldsize,
- &did_zeroing, &xfs_iomap_ops);
+ &did_zeroing, &xfs_buffered_write_iomap_ops);
} else {
error = iomap_truncate_page(inode, newsize, &did_zeroing,
- &xfs_iomap_ops);
+ &xfs_buffered_write_iomap_ops);
}
if (error)
@@ -1113,7 +1144,7 @@ xfs_vn_fiemap(
&xfs_xattr_iomap_ops);
} else {
error = iomap_fiemap(inode, fieinfo, start, length,
- &xfs_iomap_ops);
+ &xfs_read_iomap_ops);
}
xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED);
@@ -1226,7 +1257,7 @@ xfs_inode_supports_dax(
return false;
/* Device has to support DAX too. */
- return xfs_find_daxdev_for_inode(VFS_I(ip)) != NULL;
+ return xfs_inode_buftarg(ip)->bt_daxdev != NULL;
}
STATIC void
@@ -1289,9 +1320,7 @@ xfs_setup_inode(
lockdep_set_class(&inode->i_rwsem,
&inode->i_sb->s_type->i_mutex_dir_key);
lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);
- ip->d_ops = ip->i_mount->m_dir_inode_ops;
} else {
- ip->d_ops = ip->i_mount->m_nondir_inode_ops;
lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);
}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index f5c955d35be4..4b31c29b7e6b 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -84,7 +84,7 @@ xfs_bulkstat_one_int(
/* xfs_iget returns the following without needing
* further change.
*/
- buf->bs_projectid = xfs_get_projid(ip);
+ buf->bs_projectid = ip->i_d.di_projid;
buf->bs_ino = ino;
buf->bs_uid = dic->di_uid;
buf->bs_gid = dic->di_gid;
@@ -97,8 +97,8 @@ xfs_bulkstat_one_int(
buf->bs_mtime_nsec = inode->i_mtime.tv_nsec;
buf->bs_ctime = inode->i_ctime.tv_sec;
buf->bs_ctime_nsec = inode->i_ctime.tv_nsec;
- buf->bs_btime = dic->di_crtime.t_sec;
- buf->bs_btime_nsec = dic->di_crtime.t_nsec;
+ buf->bs_btime = dic->di_crtime.tv_sec;
+ buf->bs_btime_nsec = dic->di_crtime.tv_nsec;
buf->bs_gen = inode->i_generation;
buf->bs_mode = inode->i_mode;
@@ -137,7 +137,7 @@ xfs_bulkstat_one_int(
xfs_irele(ip);
error = bc->formatter(bc->breq, buf);
- if (error == XFS_IBULK_ABORT)
+ if (error == -ECANCELED)
goto out_advance;
if (error)
goto out;
@@ -169,7 +169,7 @@ xfs_bulkstat_one(
ASSERT(breq->icount == 1);
bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
- KM_SLEEP | KM_MAYFAIL);
+ KM_MAYFAIL);
if (!bc.buf)
return -ENOMEM;
@@ -181,7 +181,7 @@ xfs_bulkstat_one(
* If we reported one inode to userspace then we abort because we hit
* the end of the buffer. Don't leak that back to userspace.
*/
- if (error == XFS_IWALK_ABORT)
+ if (error == -ECANCELED)
error = 0;
return error;
@@ -243,7 +243,7 @@ xfs_bulkstat(
return 0;
bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
- KM_SLEEP | KM_MAYFAIL);
+ KM_MAYFAIL);
if (!bc.buf)
return -ENOMEM;
@@ -342,7 +342,7 @@ xfs_inumbers_walk(
int error;
error = ic->formatter(ic->breq, &inogrp);
- if (error && error != XFS_IBULK_ABORT)
+ if (error && error != -ECANCELED)
return error;
ic->breq->startino = XFS_AGINO_TO_INO(mp, agno, irec->ir_startino) +
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index e90c1fc5b981..96a1e2a9be3f 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -18,9 +18,6 @@ struct xfs_ibulk {
/* Only iterate within the same AG as startino */
#define XFS_IBULK_SAME_AG (XFS_IWALK_SAME_AG)
-/* Return value that means we want to abort the walk. */
-#define XFS_IBULK_ABORT (XFS_IWALK_ABORT)
-
/*
* Advance the user buffer pointer by one record of the given size. If the
* buffer is now full, return the appropriate error code.
@@ -34,13 +31,21 @@ xfs_ibulk_advance(
breq->ubuffer = b + bytes;
breq->ocount++;
- return breq->ocount == breq->icount ? XFS_IBULK_ABORT : 0;
+ return breq->ocount == breq->icount ? -ECANCELED : 0;
}
/*
* Return stat information in bulk (by-inode) for the filesystem.
*/
+/*
+ * Return codes for the formatter function are 0 to continue iterating, and
+ * non-zero to stop iterating. Any non-zero value will be passed up to the
+ * bulkstat/inumbers caller. The special value -ECANCELED can be used to stop
+ * iteration, as neither bulkstat nor inumbers will ever generate that error
+ * code on their own.
+ */
+
typedef int (*bulkstat_one_fmt_pf)(struct xfs_ibulk *breq,
const struct xfs_bulkstat *bstat);
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 8c7d727149ea..233dcc8784db 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -31,7 +31,7 @@
* inode it finds, it calls a walk function with the relevant inode number and
* a pointer to caller-provided data. The walk function can return the usual
* negative error code to stop the iteration; 0 to continue the iteration; or
- * XFS_IWALK_ABORT to stop the iteration. This return value is returned to the
+ * -ECANCELED to stop the iteration. This return value is returned to the
* caller.
*
* Internally, we allow the walk function to do anything, which means that we
@@ -298,7 +298,8 @@ xfs_iwalk_ag_start(
error = xfs_inobt_get_rec(*curpp, irec, has_more);
if (error)
return error;
- XFS_WANT_CORRUPTED_RETURN(mp, *has_more == 1);
+ if (XFS_IS_CORRUPT(mp, *has_more != 1))
+ return -EFSCORRUPTED;
/*
* If the LE lookup yielded an inobt record before the cursor position,
@@ -616,7 +617,7 @@ xfs_iwalk_threaded(
if (xfs_pwork_ctl_want_abort(&pctl))
break;
- iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), KM_SLEEP);
+ iwag = kmem_zalloc(sizeof(struct xfs_iwalk_ag), 0);
iwag->mp = mp;
iwag->iwalk_fn = iwalk_fn;
iwag->data = data;
diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h
index 6c960e10ed4d..37a795f03267 100644
--- a/fs/xfs/xfs_iwalk.h
+++ b/fs/xfs/xfs_iwalk.h
@@ -6,12 +6,17 @@
#ifndef __XFS_IWALK_H__
#define __XFS_IWALK_H__
+/*
+ * Return codes for the inode/inobt walk function are 0 to continue iterating,
+ * and non-zero to stop iterating. Any non-zero value will be passed up to the
+ * iwalk or inobt_walk caller. The special value -ECANCELED can be used to
+ * stop iteration, as neither iwalk nor inobt_walk will ever generate that
+ * error code on their own.
+ */
+
/* Walk all inodes in the filesystem starting from @startino. */
typedef int (*xfs_iwalk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_ino_t ino, void *data);
-/* Return values for xfs_iwalk_fn. */
-#define XFS_IWALK_CONTINUE (XFS_ITER_CONTINUE)
-#define XFS_IWALK_ABORT (XFS_ITER_ABORT)
int xfs_iwalk(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t startino,
unsigned int flags, xfs_iwalk_fn iwalk_fn,
@@ -30,8 +35,6 @@ typedef int (*xfs_inobt_walk_fn)(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno,
const struct xfs_inobt_rec_incore *irec,
void *data);
-/* Return value (for xfs_inobt_walk_fn) that aborts the walk immediately. */
-#define XFS_INOBT_WALK_ABORT (XFS_IWALK_ABORT)
int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_ino_t startino, unsigned int flags,
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ca15105681ca..8738bb03f253 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -223,26 +223,32 @@ int xfs_rw_bdev(struct block_device *bdev, sector_t sector, unsigned int count,
char *data, unsigned int op);
#define ASSERT_ALWAYS(expr) \
- (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__))
#ifdef DEBUG
#define ASSERT(expr) \
- (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+ (likely(expr) ? (void)0 : assfail(NULL, #expr, __FILE__, __LINE__))
#else /* !DEBUG */
#ifdef XFS_WARN
#define ASSERT(expr) \
- (likely(expr) ? (void)0 : asswarn(#expr, __FILE__, __LINE__))
+ (likely(expr) ? (void)0 : asswarn(NULL, #expr, __FILE__, __LINE__))
#else /* !DEBUG && !XFS_WARN */
-#define ASSERT(expr) ((void)0)
+#define ASSERT(expr) ((void)0)
#endif /* XFS_WARN */
#endif /* DEBUG */
+#define XFS_IS_CORRUPT(mp, expr) \
+ (unlikely(expr) ? xfs_corruption_error(#expr, XFS_ERRLEVEL_LOW, (mp), \
+ NULL, 0, __FILE__, __LINE__, \
+ __this_address), \
+ true : false)
+
#define STATIC static noinline
#ifdef CONFIG_XFS_RT
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7fc3c1ad36bc..f6006d94a581 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -57,10 +57,6 @@ xlog_state_get_iclog_space(
struct xlog_ticket *ticket,
int *continued_write,
int *logoffsetp);
-STATIC int
-xlog_state_release_iclog(
- struct xlog *log,
- struct xlog_in_core *iclog);
STATIC void
xlog_state_switch_iclogs(
struct xlog *log,
@@ -83,7 +79,10 @@ STATIC void
xlog_ungrant_log_space(
struct xlog *log,
struct xlog_ticket *ticket);
-
+STATIC void
+xlog_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog);
#if defined(DEBUG)
STATIC void
xlog_verify_dest_ptr(
@@ -214,15 +213,42 @@ xlog_grant_head_wake(
{
struct xlog_ticket *tic;
int need_bytes;
+ bool woken_task = false;
list_for_each_entry(tic, &head->waiters, t_queue) {
+
+ /*
+ * There is a chance that the size of the CIL checkpoints in
+ * progress at the last AIL push target calculation resulted in
+ * limiting the target to the log head (l_last_sync_lsn) at the
+ * time. This may not reflect where the log head is now as the
+ * CIL checkpoints may have completed.
+ *
+ * Hence when we are woken here, it may be that the head of the
+ * log that has moved rather than the tail. As the tail didn't
+ * move, there still won't be space available for the
+ * reservation we require. However, if the AIL has already
+ * pushed to the target defined by the old log head location, we
+ * will hang here waiting for something else to update the AIL
+ * push target.
+ *
+ * Therefore, if there isn't space to wake the first waiter on
+ * the grant head, we need to push the AIL again to ensure the
+ * target reflects both the current log tail and log head
+ * position before we wait for the tail to move again.
+ */
+
need_bytes = xlog_ticket_reservation(log, head, tic);
- if (*free_bytes < need_bytes)
+ if (*free_bytes < need_bytes) {
+ if (!woken_task)
+ xlog_grant_push_ail(log, need_bytes);
return false;
+ }
*free_bytes -= need_bytes;
trace_xfs_log_grant_wake_up(log, tic);
wake_up_process(tic->t_task);
+ woken_task = true;
}
return true;
@@ -428,8 +454,7 @@ xfs_log_reserve(
XFS_STATS_INC(mp, xs_try_logspace);
ASSERT(*ticp == NULL);
- tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
- KM_SLEEP);
+ tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, 0);
*ticp = tic;
xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
@@ -526,16 +551,71 @@ xfs_log_done(
return lsn;
}
+static bool
+__xlog_state_release_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
+{
+ lockdep_assert_held(&log->l_icloglock);
+
+ if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+ /* update tail before writing to iclog */
+ xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+
+ iclog->ic_state = XLOG_STATE_SYNCING;
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ xlog_verify_tail_lsn(log, iclog, tail_lsn);
+ /* cycle incremented when incrementing curr_block */
+ return true;
+ }
+
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+ return false;
+}
+
+/*
+ * Flush iclog to disk if this is the last reference to the given iclog and the
+ * it is in the WANT_SYNC state.
+ */
+static int
+xlog_state_release_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
+{
+ lockdep_assert_held(&log->l_icloglock);
+
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
+ return -EIO;
+
+ if (atomic_dec_and_test(&iclog->ic_refcnt) &&
+ __xlog_state_release_iclog(log, iclog)) {
+ spin_unlock(&log->l_icloglock);
+ xlog_sync(log, iclog);
+ spin_lock(&log->l_icloglock);
+ }
+
+ return 0;
+}
+
int
xfs_log_release_iclog(
- struct xfs_mount *mp,
+ struct xfs_mount *mp,
struct xlog_in_core *iclog)
{
- if (xlog_state_release_iclog(mp->m_log, iclog)) {
+ struct xlog *log = mp->m_log;
+ bool sync;
+
+ if (iclog->ic_state == XLOG_STATE_IOERROR) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
return -EIO;
}
+ if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) {
+ sync = __xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+ if (sync)
+ xlog_sync(log, iclog);
+ }
return 0;
}
@@ -840,10 +920,7 @@ out_err:
iclog = log->l_iclog;
atomic_inc(&iclog->ic_refcnt);
xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
error = xlog_state_release_iclog(log, iclog);
-
- spin_lock(&log->l_icloglock);
switch (iclog->ic_state) {
default:
if (!XLOG_FORCED_SHUTDOWN(log)) {
@@ -898,8 +975,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
#ifdef DEBUG
first_iclog = iclog = log->l_iclog;
do {
- if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
- ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE);
+ if (iclog->ic_state != XLOG_STATE_IOERROR) {
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
ASSERT(iclog->ic_offset == 0);
}
iclog = iclog->ic_next;
@@ -924,21 +1001,17 @@ xfs_log_unmount_write(xfs_mount_t *mp)
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
atomic_inc(&iclog->ic_refcnt);
-
xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
error = xlog_state_release_iclog(log, iclog);
-
- spin_lock(&log->l_icloglock);
-
- if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE
- || iclog->ic_state == XLOG_STATE_DIRTY
- || iclog->ic_state == XLOG_STATE_IOERROR) ) {
-
- xlog_wait(&iclog->ic_force_wait,
- &log->l_icloglock);
- } else {
+ switch (iclog->ic_state) {
+ case XLOG_STATE_ACTIVE:
+ case XLOG_STATE_DIRTY:
+ case XLOG_STATE_IOERROR:
spin_unlock(&log->l_icloglock);
+ break;
+ default:
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ break;
}
}
@@ -1228,7 +1301,7 @@ xlog_ioend_work(
* didn't succeed.
*/
aborted = true;
- } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
+ } else if (iclog->ic_state == XLOG_STATE_IOERROR) {
aborted = true;
}
@@ -1404,6 +1477,7 @@ xlog_alloc_log(
*/
ASSERT(log->l_iclog_size >= 4096);
for (i = 0; i < log->l_iclog_bufs; i++) {
+ int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp);
size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
sizeof(struct bio_vec);
@@ -1415,8 +1489,8 @@ xlog_alloc_log(
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
- iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
- KM_MAYFAIL);
+ iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
+ KM_MAYFAIL | KM_ZERO);
if (!iclog->ic_data)
goto out_free_iclog;
#ifdef DEBUG
@@ -1452,7 +1526,7 @@ xlog_alloc_log(
log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
- mp->m_fsname);
+ mp->m_super->s_id);
if (!log->l_ioend_workqueue)
goto out_free_iclog;
@@ -1468,6 +1542,8 @@ out_free_iclog:
prev_iclog = iclog->ic_next;
kmem_free(iclog->ic_data);
kmem_free(iclog);
+ if (prev_iclog == log->l_iclog)
+ break;
}
out_free_log:
kmem_free(log);
@@ -1700,7 +1776,7 @@ xlog_write_iclog(
* across the log IO to archieve that.
*/
down(&iclog->ic_sema);
- if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
+ if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) {
/*
* It would seem logical to return EIO here, but we rely on
* the log state machine to propagate I/O errors instead of
@@ -1708,13 +1784,11 @@ xlog_write_iclog(
* the buffer manually, the code needs to be kept in sync
* with the I/O completion path.
*/
- xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
+ xlog_state_done_syncing(iclog, true);
up(&iclog->ic_sema);
return;
}
- iclog->ic_io_size = count;
-
bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
@@ -1724,9 +1798,9 @@ xlog_write_iclog(
if (need_flush)
iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
- xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
+ xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count);
if (is_vmalloc_addr(iclog->ic_data))
- flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
+ flush_kernel_vmap_range(iclog->ic_data, count);
/*
* If this log buffer would straddle the end of the log we will have
@@ -1942,7 +2016,6 @@ xlog_dealloc_log(
/*
* Update counters atomically now that memcpy is done.
*/
-/* ARGSUSED */
static inline void
xlog_state_finish_copy(
struct xlog *log,
@@ -1950,16 +2023,11 @@ xlog_state_finish_copy(
int record_cnt,
int copy_bytes)
{
- spin_lock(&log->l_icloglock);
+ lockdep_assert_held(&log->l_icloglock);
be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
iclog->ic_offset += copy_bytes;
-
- spin_unlock(&log->l_icloglock);
-} /* xlog_state_finish_copy */
-
-
-
+}
/*
* print out info relating to regions written which consume
@@ -2236,15 +2304,18 @@ xlog_write_copy_finish(
int log_offset,
struct xlog_in_core **commit_iclog)
{
+ int error;
+
if (*partial_copy) {
/*
* This iclog has already been marked WANT_SYNC by
* xlog_state_get_iclog_space.
*/
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
*record_cnt = 0;
*data_cnt = 0;
- return xlog_state_release_iclog(log, iclog);
+ goto release_iclog;
}
*partial_copy = 0;
@@ -2252,21 +2323,25 @@ xlog_write_copy_finish(
if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
/* no more space in this iclog - push it. */
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
*record_cnt = 0;
*data_cnt = 0;
- spin_lock(&log->l_icloglock);
xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
-
if (!commit_iclog)
- return xlog_state_release_iclog(log, iclog);
+ goto release_iclog;
+ spin_unlock(&log->l_icloglock);
ASSERT(flags & XLOG_COMMIT_TRANS);
*commit_iclog = iclog;
}
return 0;
+
+release_iclog:
+ error = xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+ return error;
}
/*
@@ -2328,7 +2403,7 @@ xlog_write(
int contwr = 0;
int record_cnt = 0;
int data_cnt = 0;
- int error;
+ int error = 0;
*start_lsn = 0;
@@ -2479,13 +2554,17 @@ next_lv:
ASSERT(len == 0);
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
- if (!commit_iclog)
- return xlog_state_release_iclog(log, iclog);
+ if (commit_iclog) {
+ ASSERT(flags & XLOG_COMMIT_TRANS);
+ *commit_iclog = iclog;
+ } else {
+ error = xlog_state_release_iclog(log, iclog);
+ }
+ spin_unlock(&log->l_icloglock);
- ASSERT(flags & XLOG_COMMIT_TRANS);
- *commit_iclog = iclog;
- return 0;
+ return error;
}
@@ -2496,21 +2575,35 @@ next_lv:
*****************************************************************************
*/
-/* Clean iclogs starting from the head. This ordering must be
- * maintained, so an iclog doesn't become ACTIVE beyond one that
- * is SYNCING. This is also required to maintain the notion that we use
- * a ordered wait queue to hold off would be writers to the log when every
- * iclog is trying to sync to disk.
+/*
+ * An iclog has just finished IO completion processing, so we need to update
+ * the iclog state and propagate that up into the overall log state. Hence we
+ * prepare the iclog for cleaning, and then clean all the pending dirty iclogs
+ * starting from the head, and then wake up any threads that are waiting for the
+ * iclog to be marked clean.
+ *
+ * The ordering of marking iclogs ACTIVE must be maintained, so an iclog
+ * doesn't become ACTIVE beyond one that is SYNCING. This is also required to
+ * maintain the notion that we use a ordered wait queue to hold off would be
+ * writers to the log when every iclog is trying to sync to disk.
*
- * State Change: DIRTY -> ACTIVE
+ * Caller must hold the icloglock before calling us.
+ *
+ * State Change: !IOERROR -> DIRTY -> ACTIVE
*/
STATIC void
-xlog_state_clean_log(
- struct xlog *log)
+xlog_state_clean_iclog(
+ struct xlog *log,
+ struct xlog_in_core *dirty_iclog)
{
- xlog_in_core_t *iclog;
- int changed = 0;
+ struct xlog_in_core *iclog;
+ int changed = 0;
+
+ /* Prepare the completed iclog. */
+ if (dirty_iclog->ic_state != XLOG_STATE_IOERROR)
+ dirty_iclog->ic_state = XLOG_STATE_DIRTY;
+ /* Walk all the iclogs to update the ordered active state. */
iclog = log->l_iclog;
do {
if (iclog->ic_state == XLOG_STATE_DIRTY) {
@@ -2548,7 +2641,13 @@ xlog_state_clean_log(
iclog = iclog->ic_next;
} while (iclog != log->l_iclog);
- /* log is locked when we are called */
+
+ /*
+ * Wake up threads waiting in xfs_log_force() for the dirty iclog
+ * to be cleaned.
+ */
+ wake_up_all(&dirty_iclog->ic_force_wait);
+
/*
* Change state for the dummy log recording.
* We usually go to NEED. But we go to NEED2 if the changed indicates
@@ -2582,7 +2681,7 @@ xlog_state_clean_log(
ASSERT(0);
}
}
-} /* xlog_state_clean_log */
+}
STATIC xfs_lsn_t
xlog_get_lowest_lsn(
@@ -2592,7 +2691,8 @@ xlog_get_lowest_lsn(
xfs_lsn_t lowest_lsn = 0, lsn;
do {
- if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
+ if (iclog->ic_state == XLOG_STATE_ACTIVE ||
+ iclog->ic_state == XLOG_STATE_DIRTY)
continue;
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
@@ -2603,30 +2703,150 @@ xlog_get_lowest_lsn(
return lowest_lsn;
}
+/*
+ * Completion of a iclog IO does not imply that a transaction has completed, as
+ * transactions can be large enough to span many iclogs. We cannot change the
+ * tail of the log half way through a transaction as this may be the only
+ * transaction in the log and moving the tail to point to the middle of it
+ * will prevent recovery from finding the start of the transaction. Hence we
+ * should only update the last_sync_lsn if this iclog contains transaction
+ * completion callbacks on it.
+ *
+ * We have to do this before we drop the icloglock to ensure we are the only one
+ * that can update it.
+ *
+ * If we are moving the last_sync_lsn forwards, we also need to ensure we kick
+ * the reservation grant head pushing. This is due to the fact that the push
+ * target is bound by the current last_sync_lsn value. Hence if we have a large
+ * amount of log space bound up in this committing transaction then the
+ * last_sync_lsn value may be the limiting factor preventing tail pushing from
+ * freeing space in the log. Hence once we've updated the last_sync_lsn we
+ * should push the AIL to ensure the push target (and hence the grant head) is
+ * no longer bound by the old log head location and can move forwards and make
+ * progress again.
+ */
+static void
+xlog_state_set_callback(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ xfs_lsn_t header_lsn)
+{
+ iclog->ic_state = XLOG_STATE_CALLBACK;
+
+ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
+ header_lsn) <= 0);
+
+ if (list_empty_careful(&iclog->ic_callbacks))
+ return;
+
+ atomic64_set(&log->l_last_sync_lsn, header_lsn);
+ xlog_grant_push_ail(log, 0);
+}
+
+/*
+ * Return true if we need to stop processing, false to continue to the next
+ * iclog. The caller will need to run callbacks if the iclog is returned in the
+ * XLOG_STATE_CALLBACK state.
+ */
+static bool
+xlog_state_iodone_process_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ bool *ioerror)
+{
+ xfs_lsn_t lowest_lsn;
+ xfs_lsn_t header_lsn;
+
+ switch (iclog->ic_state) {
+ case XLOG_STATE_ACTIVE:
+ case XLOG_STATE_DIRTY:
+ /*
+ * Skip all iclogs in the ACTIVE & DIRTY states:
+ */
+ return false;
+ case XLOG_STATE_IOERROR:
+ /*
+ * Between marking a filesystem SHUTDOWN and stopping the log,
+ * we do flush all iclogs to disk (if there wasn't a log I/O
+ * error). So, we do want things to go smoothly in case of just
+ * a SHUTDOWN w/o a LOG_IO_ERROR.
+ */
+ *ioerror = true;
+ return false;
+ case XLOG_STATE_DONE_SYNC:
+ /*
+ * Now that we have an iclog that is in the DONE_SYNC state, do
+ * one more check here to see if we have chased our tail around.
+ * If this is not the lowest lsn iclog, then we will leave it
+ * for another completion to process.
+ */
+ header_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+ lowest_lsn = xlog_get_lowest_lsn(log);
+ if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0)
+ return false;
+ xlog_state_set_callback(log, iclog, header_lsn);
+ return false;
+ default:
+ /*
+ * Can only perform callbacks in order. Since this iclog is not
+ * in the DONE_SYNC state, we skip the rest and just try to
+ * clean up.
+ */
+ return true;
+ }
+}
+
+/*
+ * Keep processing entries in the iclog callback list until we come around and
+ * it is empty. We need to atomically see that the list is empty and change the
+ * state to DIRTY so that we don't miss any more callbacks being added.
+ *
+ * This function is called with the icloglock held and returns with it held. We
+ * drop it while running callbacks, however, as holding it over thousands of
+ * callbacks is unnecessary and causes excessive contention if we do.
+ */
+static void
+xlog_state_do_iclog_callbacks(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ bool aborted)
+ __releases(&log->l_icloglock)
+ __acquires(&log->l_icloglock)
+{
+ spin_unlock(&log->l_icloglock);
+ spin_lock(&iclog->ic_callback_lock);
+ while (!list_empty(&iclog->ic_callbacks)) {
+ LIST_HEAD(tmp);
+
+ list_splice_init(&iclog->ic_callbacks, &tmp);
+
+ spin_unlock(&iclog->ic_callback_lock);
+ xlog_cil_process_committed(&tmp, aborted);
+ spin_lock(&iclog->ic_callback_lock);
+ }
+
+ /*
+ * Pick up the icloglock while still holding the callback lock so we
+ * serialise against anyone trying to add more callbacks to this iclog
+ * now we've finished processing.
+ */
+ spin_lock(&log->l_icloglock);
+ spin_unlock(&iclog->ic_callback_lock);
+}
+
STATIC void
xlog_state_do_callback(
struct xlog *log,
- bool aborted,
- struct xlog_in_core *ciclog)
-{
- xlog_in_core_t *iclog;
- xlog_in_core_t *first_iclog; /* used to know when we've
- * processed all iclogs once */
- int flushcnt = 0;
- xfs_lsn_t lowest_lsn;
- int ioerrors; /* counter: iclogs with errors */
- int loopdidcallbacks; /* flag: inner loop did callbacks*/
- int funcdidcallbacks; /* flag: function did callbacks */
- int repeats; /* for issuing console warnings if
- * looping too many times */
- int wake = 0;
+ bool aborted)
+{
+ struct xlog_in_core *iclog;
+ struct xlog_in_core *first_iclog;
+ bool cycled_icloglock;
+ bool ioerror;
+ int flushcnt = 0;
+ int repeats = 0;
spin_lock(&log->l_icloglock);
- first_iclog = iclog = log->l_iclog;
- ioerrors = 0;
- funcdidcallbacks = 0;
- repeats = 0;
-
do {
/*
* Scan all iclogs starting with the one pointed to by the
@@ -2638,134 +2858,29 @@ xlog_state_do_callback(
*/
first_iclog = log->l_iclog;
iclog = log->l_iclog;
- loopdidcallbacks = 0;
+ cycled_icloglock = false;
+ ioerror = false;
repeats++;
do {
+ if (xlog_state_iodone_process_iclog(log, iclog,
+ &ioerror))
+ break;
- /* skip all iclogs in the ACTIVE & DIRTY states */
- if (iclog->ic_state &
- (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY)) {
+ if (iclog->ic_state != XLOG_STATE_CALLBACK &&
+ iclog->ic_state != XLOG_STATE_IOERROR) {
iclog = iclog->ic_next;
continue;
}
/*
- * Between marking a filesystem SHUTDOWN and stopping
- * the log, we do flush all iclogs to disk (if there
- * wasn't a log I/O error). So, we do want things to
- * go smoothly in case of just a SHUTDOWN w/o a
- * LOG_IO_ERROR.
+ * Running callbacks will drop the icloglock which means
+ * we'll have to run at least one more complete loop.
*/
- if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
- /*
- * Can only perform callbacks in order. Since
- * this iclog is not in the DONE_SYNC/
- * DO_CALLBACK state, we skip the rest and
- * just try to clean up. If we set our iclog
- * to DO_CALLBACK, we will not process it when
- * we retry since a previous iclog is in the
- * CALLBACK and the state cannot change since
- * we are holding the l_icloglock.
- */
- if (!(iclog->ic_state &
- (XLOG_STATE_DONE_SYNC |
- XLOG_STATE_DO_CALLBACK))) {
- if (ciclog && (ciclog->ic_state ==
- XLOG_STATE_DONE_SYNC)) {
- ciclog->ic_state = XLOG_STATE_DO_CALLBACK;
- }
- break;
- }
- /*
- * We now have an iclog that is in either the
- * DO_CALLBACK or DONE_SYNC states. The other
- * states (WANT_SYNC, SYNCING, or CALLBACK were
- * caught by the above if and are going to
- * clean (i.e. we aren't doing their callbacks)
- * see the above if.
- */
-
- /*
- * We will do one more check here to see if we
- * have chased our tail around.
- */
-
- lowest_lsn = xlog_get_lowest_lsn(log);
- if (lowest_lsn &&
- XFS_LSN_CMP(lowest_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
- iclog = iclog->ic_next;
- continue; /* Leave this iclog for
- * another thread */
- }
-
- iclog->ic_state = XLOG_STATE_CALLBACK;
-
-
- /*
- * Completion of a iclog IO does not imply that
- * a transaction has completed, as transactions
- * can be large enough to span many iclogs. We
- * cannot change the tail of the log half way
- * through a transaction as this may be the only
- * transaction in the log and moving th etail to
- * point to the middle of it will prevent
- * recovery from finding the start of the
- * transaction. Hence we should only update the
- * last_sync_lsn if this iclog contains
- * transaction completion callbacks on it.
- *
- * We have to do this before we drop the
- * icloglock to ensure we are the only one that
- * can update it.
- */
- ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
- be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
- if (!list_empty_careful(&iclog->ic_callbacks))
- atomic64_set(&log->l_last_sync_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn));
-
- } else
- ioerrors++;
-
- spin_unlock(&log->l_icloglock);
-
- /*
- * Keep processing entries in the callback list until
- * we come around and it is empty. We need to
- * atomically see that the list is empty and change the
- * state to DIRTY so that we don't miss any more
- * callbacks being added.
- */
- spin_lock(&iclog->ic_callback_lock);
- while (!list_empty(&iclog->ic_callbacks)) {
- LIST_HEAD(tmp);
-
- list_splice_init(&iclog->ic_callbacks, &tmp);
-
- spin_unlock(&iclog->ic_callback_lock);
- xlog_cil_process_committed(&tmp, aborted);
- spin_lock(&iclog->ic_callback_lock);
- }
-
- loopdidcallbacks++;
- funcdidcallbacks++;
-
- spin_lock(&log->l_icloglock);
- spin_unlock(&iclog->ic_callback_lock);
- if (!(iclog->ic_state & XLOG_STATE_IOERROR))
- iclog->ic_state = XLOG_STATE_DIRTY;
-
- /*
- * Transition from DIRTY to ACTIVE if applicable.
- * NOP if STATE_IOERROR.
- */
- xlog_state_clean_log(log);
-
- /* wake up threads waiting in xfs_log_force() */
- wake_up_all(&iclog->ic_force_wait);
+ cycled_icloglock = true;
+ xlog_state_do_iclog_callbacks(log, iclog, aborted);
+ xlog_state_clean_iclog(log, iclog);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2776,50 +2891,13 @@ xlog_state_do_callback(
"%s: possible infinite loop (%d iterations)",
__func__, flushcnt);
}
- } while (!ioerrors && loopdidcallbacks);
+ } while (!ioerror && cycled_icloglock);
-#ifdef DEBUG
- /*
- * Make one last gasp attempt to see if iclogs are being left in limbo.
- * If the above loop finds an iclog earlier than the current iclog and
- * in one of the syncing states, the current iclog is put into
- * DO_CALLBACK and the callbacks are deferred to the completion of the
- * earlier iclog. Walk the iclogs in order and make sure that no iclog
- * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
- * states.
- *
- * Note that SYNCING|IOABORT is a valid state so we cannot just check
- * for ic_state == SYNCING.
- */
- if (funcdidcallbacks) {
- first_iclog = iclog = log->l_iclog;
- do {
- ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK);
- /*
- * Terminate the loop if iclogs are found in states
- * which will cause other threads to clean up iclogs.
- *
- * SYNCING - i/o completion will go through logs
- * DONE_SYNC - interrupt thread should be waiting for
- * l_icloglock
- * IOERROR - give up hope all ye who enter here
- */
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
- iclog->ic_state & XLOG_STATE_SYNCING ||
- iclog->ic_state == XLOG_STATE_DONE_SYNC ||
- iclog->ic_state == XLOG_STATE_IOERROR )
- break;
- iclog = iclog->ic_next;
- } while (first_iclog != iclog);
- }
-#endif
+ if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
+ log->l_iclog->ic_state == XLOG_STATE_IOERROR)
+ wake_up_all(&log->l_flush_wait);
- if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
- wake = 1;
spin_unlock(&log->l_icloglock);
-
- if (wake)
- wake_up_all(&log->l_flush_wait);
}
@@ -2845,8 +2923,6 @@ xlog_state_done_syncing(
spin_lock(&log->l_icloglock);
- ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
- iclog->ic_state == XLOG_STATE_IOERROR);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
/*
@@ -2855,8 +2931,10 @@ xlog_state_done_syncing(
* and none should ever be attempted to be written to disk
* again.
*/
- if (iclog->ic_state != XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_SYNCING)
iclog->ic_state = XLOG_STATE_DONE_SYNC;
+ else
+ ASSERT(iclog->ic_state == XLOG_STATE_IOERROR);
/*
* Someone could be sleeping prior to writing out the next
@@ -2865,7 +2943,7 @@ xlog_state_done_syncing(
*/
wake_up_all(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
- xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
+ xlog_state_do_callback(log, aborted); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2899,7 +2977,6 @@ xlog_state_get_iclog_space(
int log_offset;
xlog_rec_header_t *head;
xlog_in_core_t *iclog;
- int error;
restart:
spin_lock(&log->l_icloglock);
@@ -2948,24 +3025,22 @@ restart:
* can fit into remaining data section.
*/
if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
+ int error = 0;
+
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
/*
- * If I'm the only one writing to this iclog, sync it to disk.
- * We need to do an atomic compare and decrement here to avoid
- * racing with concurrent atomic_dec_and_lock() calls in
+ * If we are the only one writing to this iclog, sync it to
+ * disk. We need to do an atomic compare and decrement here to
+ * avoid racing with concurrent atomic_dec_and_lock() calls in
* xlog_state_release_iclog() when there is more than one
* reference to the iclog.
*/
- if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) {
- /* we are the only one */
- spin_unlock(&log->l_icloglock);
+ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
error = xlog_state_release_iclog(log, iclog);
- if (error)
- return error;
- } else {
- spin_unlock(&log->l_icloglock);
- }
+ spin_unlock(&log->l_icloglock);
+ if (error)
+ return error;
goto restart;
}
@@ -3077,60 +3152,6 @@ xlog_ungrant_log_space(
}
/*
- * Flush iclog to disk if this is the last reference to the given iclog and
- * the WANT_SYNC bit is set.
- *
- * When this function is entered, the iclog is not necessarily in the
- * WANT_SYNC state. It may be sitting around waiting to get filled.
- *
- *
- */
-STATIC int
-xlog_state_release_iclog(
- struct xlog *log,
- struct xlog_in_core *iclog)
-{
- int sync = 0; /* do we sync? */
-
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
-
- ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
- if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
- return 0;
-
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
- ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
- iclog->ic_state == XLOG_STATE_WANT_SYNC);
-
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
- /* update tail before writing to iclog */
- xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
- sync++;
- iclog->ic_state = XLOG_STATE_SYNCING;
- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
- xlog_verify_tail_lsn(log, iclog, tail_lsn);
- /* cycle incremented when incrementing curr_block */
- }
- spin_unlock(&log->l_icloglock);
-
- /*
- * We let the log lock go, so it's possible that we hit a log I/O
- * error or some other SHUTDOWN condition that marks the iclog
- * as XLOG_STATE_IOERROR before the bwrite. However, we know that
- * this iclog has consistent data, so we ignore IOERROR
- * flags after this point.
- */
- if (sync)
- xlog_sync(log, iclog);
- return 0;
-} /* xlog_state_release_iclog */
-
-
-/*
* This routine will mark the current iclog in the ring as WANT_SYNC
* and move the current iclog pointer to the next iclog in the ring.
* When this routine is called from xlog_state_get_iclog_space(), the
@@ -3223,7 +3244,7 @@ xfs_log_force(
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
if (iclog->ic_state == XLOG_STATE_DIRTY ||
@@ -3253,12 +3274,9 @@ xfs_log_force(
atomic_inc(&iclog->ic_refcnt);
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
-
if (xlog_state_release_iclog(log, iclog))
- return -EIO;
+ goto out_error;
- spin_lock(&log->l_icloglock);
if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
iclog->ic_state == XLOG_STATE_DIRTY)
goto out_unlock;
@@ -3283,11 +3301,11 @@ xfs_log_force(
if (!(flags & XFS_LOG_SYNC))
goto out_unlock;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
return -EIO;
return 0;
@@ -3312,7 +3330,7 @@ __xfs_log_force_lsn(
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
@@ -3341,10 +3359,8 @@ __xfs_log_force_lsn(
* will go out then.
*/
if (!already_slept &&
- (iclog->ic_prev->ic_state &
- (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
- ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
-
+ (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
+ iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_prev->ic_write_wait,
@@ -3353,24 +3369,23 @@ __xfs_log_force_lsn(
}
atomic_inc(&iclog->ic_refcnt);
xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
- return -EIO;
+ goto out_error;
if (log_flushed)
*log_flushed = 1;
- spin_lock(&log->l_icloglock);
}
if (!(flags & XFS_LOG_SYNC) ||
- (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
+ (iclog->ic_state == XLOG_STATE_ACTIVE ||
+ iclog->ic_state == XLOG_STATE_DIRTY))
goto out_unlock;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
return -EIO;
return 0;
@@ -3433,8 +3448,8 @@ xlog_state_want_sync(
if (iclog->ic_state == XLOG_STATE_ACTIVE) {
xlog_state_switch_iclogs(log, iclog, 0);
} else {
- ASSERT(iclog->ic_state &
- (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR));
+ ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
+ iclog->ic_state == XLOG_STATE_IOERROR);
}
}
@@ -3455,7 +3470,7 @@ xfs_log_ticket_put(
{
ASSERT(atomic_read(&ticket->t_ref) > 0);
if (atomic_dec_and_test(&ticket->t_ref))
- kmem_zone_free(xfs_log_ticket_zone, ticket);
+ kmem_cache_free(xfs_log_ticket_zone, ticket);
}
xlog_ticket_t *
@@ -3811,7 +3826,7 @@ xlog_state_ioerror(
xlog_in_core_t *iclog, *ic;
iclog = log->l_iclog;
- if (! (iclog->ic_state & XLOG_STATE_IOERROR)) {
+ if (iclog->ic_state != XLOG_STATE_IOERROR) {
/*
* Mark all the incore logs IOERROR.
* From now on, no log flushes will result.
@@ -3871,7 +3886,7 @@ xfs_log_force_umount(
* Somebody could've already done the hard work for us.
* No need to get locks for this.
*/
- if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) {
+ if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) {
ASSERT(XLOG_FORCED_SHUTDOWN(log));
return 1;
}
@@ -3919,22 +3934,11 @@ xfs_log_force_umount(
* item committed callback functions will do this again under lock to
* avoid races.
*/
+ spin_lock(&log->l_cilp->xc_push_lock);
wake_up_all(&log->l_cilp->xc_commit_wait);
- xlog_state_do_callback(log, true, NULL);
+ spin_unlock(&log->l_cilp->xc_push_lock);
+ xlog_state_do_callback(log, true);
-#ifdef XFSERRORDEBUG
- {
- xlog_in_core_t *iclog;
-
- spin_lock(&log->l_icloglock);
- iclog = log->l_iclog;
- do {
- ASSERT(iclog->ic_callback == 0);
- iclog = iclog->ic_next;
- } while (iclog != log->l_iclog);
- spin_unlock(&log->l_icloglock);
- }
-#endif
/* return non-zero if log IOERROR transition had already happened */
return retval;
}
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index fa5602d0fd7f..48435cf2aa16 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -38,7 +38,7 @@ xlog_cil_ticket_alloc(
struct xlog_ticket *tic;
tic = xlog_ticket_alloc(log, 0, 1, XFS_TRANSACTION, 0,
- KM_SLEEP|KM_NOFS);
+ KM_NOFS);
/*
* set the current reservation to zero so we know to steal the basic
@@ -179,14 +179,14 @@ xlog_cil_alloc_shadow_bufs(
/*
* We free and allocate here as a realloc would copy
- * unecessary data. We don't use kmem_zalloc() for the
+ * unnecessary data. We don't use kmem_zalloc() for the
* same reason - we don't need to zero the data area in
* the buffer, only the log vector header and the iovec
* storage.
*/
kmem_free(lip->li_lv_shadow);
- lv = kmem_alloc_large(buf_size, KM_SLEEP | KM_NOFS);
+ lv = kmem_alloc_large(buf_size, KM_NOFS);
memset(lv, 0, xlog_cil_iovec_space(niovecs));
lv->lv_item = lip;
@@ -660,7 +660,7 @@ xlog_cil_push(
if (!cil)
return 0;
- new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
+ new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS);
new_ctx->ticket = xlog_cil_ticket_alloc(log);
down_write(&cil->xc_ctx_lock);
@@ -682,7 +682,7 @@ xlog_cil_push(
}
- /* check for a previously pushed seqeunce */
+ /* check for a previously pushed sequence */
if (push_seq < cil->xc_ctx->sequence) {
spin_unlock(&cil->xc_push_lock);
goto out_skip;
@@ -847,7 +847,7 @@ restart:
goto out_abort;
spin_lock(&commit_iclog->ic_callback_lock);
- if (commit_iclog->ic_state & XLOG_STATE_IOERROR) {
+ if (commit_iclog->ic_state == XLOG_STATE_IOERROR) {
spin_unlock(&commit_iclog->ic_callback_lock);
goto out_abort;
}
@@ -1179,11 +1179,11 @@ xlog_cil_init(
struct xfs_cil *cil;
struct xfs_cil_ctx *ctx;
- cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
+ cil = kmem_zalloc(sizeof(*cil), KM_MAYFAIL);
if (!cil)
return -ENOMEM;
- ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
+ ctx = kmem_zalloc(sizeof(*ctx), KM_MAYFAIL);
if (!ctx) {
kmem_free(cil);
return -ENOMEM;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index b880c23cb6e4..b192c5a9f9fd 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -40,17 +40,15 @@ static inline uint xlog_get_client_id(__be32 i)
/*
* In core log state
*/
-#define XLOG_STATE_ACTIVE 0x0001 /* Current IC log being written to */
-#define XLOG_STATE_WANT_SYNC 0x0002 /* Want to sync this iclog; no more writes */
-#define XLOG_STATE_SYNCING 0x0004 /* This IC log is syncing */
-#define XLOG_STATE_DONE_SYNC 0x0008 /* Done syncing to disk */
-#define XLOG_STATE_DO_CALLBACK \
- 0x0010 /* Process callback functions */
-#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
-#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
-#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
-#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
-#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
+enum xlog_iclog_state {
+ XLOG_STATE_ACTIVE, /* Current IC log being written to */
+ XLOG_STATE_WANT_SYNC, /* Want to sync this iclog; no more writes */
+ XLOG_STATE_SYNCING, /* This IC log is syncing */
+ XLOG_STATE_DONE_SYNC, /* Done syncing to disk */
+ XLOG_STATE_CALLBACK, /* Callback functions now */
+ XLOG_STATE_DIRTY, /* Dirty IC log, not ready for ACTIVE status */
+ XLOG_STATE_IOERROR, /* IO error happened in sync'ing log */
+};
/*
* Flags to log ticket
@@ -179,8 +177,6 @@ typedef struct xlog_ticket {
* - ic_next is the pointer to the next iclog in the ring.
* - ic_log is a pointer back to the global log structure.
* - ic_size is the full size of the log buffer, minus the cycle headers.
- * - ic_io_size is the size of the currently pending log buffer write, which
- * might be smaller than ic_size
* - ic_offset is the current number of bytes written to in this iclog.
* - ic_refcnt is bumped when someone is writing to the log.
* - ic_state is the state of the iclog.
@@ -205,9 +201,8 @@ typedef struct xlog_in_core {
struct xlog_in_core *ic_prev;
struct xlog *ic_log;
u32 ic_size;
- u32 ic_io_size;
u32 ic_offset;
- unsigned short ic_state;
+ enum xlog_iclog_state ic_state;
char *ic_datap; /* pointer to iclog data */
/* Callback structures need their own cacheline */
@@ -399,8 +394,6 @@ struct xlog {
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
void *l_iclog_bak[XLOG_MAX_ICLOGS];
- /* log record crc error injection factor */
- uint32_t l_badcrc_factor;
#endif
/* log recovery lsn tracking (for buffer submission */
xfs_lsn_t l_recovery_lsn;
@@ -542,7 +535,11 @@ xlog_cil_force(struct xlog *log)
* by a spinlock. This matches the semantics of all the wait queues used in the
* log code.
*/
-static inline void xlog_wait(wait_queue_head_t *wq, spinlock_t *lock)
+static inline void
+xlog_wait(
+ struct wait_queue_head *wq,
+ struct spinlock *lock)
+ __releases(lock)
{
DECLARE_WAITQUEUE(wait, current);
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 13d1d3e95b88..25cfc85dbaa7 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -97,14 +97,15 @@ xlog_alloc_buffer(
struct xlog *log,
int nbblks)
{
+ int align_mask = xfs_buftarg_dma_alignment(log->l_targ);
+
/*
* Pass log block 0 since we don't have an addr yet, buffer will be
* verified on read.
*/
- if (!xlog_verify_bno(log, 0, nbblks)) {
+ if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, 0, nbblks))) {
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
return NULL;
}
@@ -125,7 +126,7 @@ xlog_alloc_buffer(
if (nbblks > 1 && log->l_sectBBsize > 1)
nbblks += log->l_sectBBsize;
nbblks = round_up(nbblks, log->l_sectBBsize);
- return kmem_alloc_large(BBTOB(nbblks), KM_MAYFAIL);
+ return kmem_alloc_io(BBTOB(nbblks), align_mask, KM_MAYFAIL | KM_ZERO);
}
/*
@@ -150,11 +151,10 @@ xlog_do_io(
{
int error;
- if (!xlog_verify_bno(log, blk_no, nbblks)) {
+ if (XFS_IS_CORRUPT(log->l_mp, !xlog_verify_bno(log, blk_no, nbblks))) {
xfs_warn(log->l_mp,
"Invalid log block/length (0x%llx, 0x%x) for buffer",
blk_no, nbblks);
- XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
return -EFSCORRUPTED;
}
@@ -242,19 +242,17 @@ xlog_header_check_recover(
* (XLOG_FMT_UNKNOWN). This stops us from trying to recover
* a dirty log created in IRIX.
*/
- if (unlikely(head->h_fmt != cpu_to_be32(XLOG_FMT))) {
+ if (XFS_IS_CORRUPT(mp, head->h_fmt != cpu_to_be32(XLOG_FMT))) {
xfs_warn(mp,
"dirty log written in incompatible format - can't recover");
xlog_header_check_dump(mp, head);
- XFS_ERROR_REPORT("xlog_header_check_recover(1)",
- XFS_ERRLEVEL_HIGH, mp);
return -EFSCORRUPTED;
- } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
+ }
+ if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid,
+ &head->h_fs_uuid))) {
xfs_warn(mp,
"dirty log entry has mismatched uuid - can't recover");
xlog_header_check_dump(mp, head);
- XFS_ERROR_REPORT("xlog_header_check_recover(2)",
- XFS_ERRLEVEL_HIGH, mp);
return -EFSCORRUPTED;
}
return 0;
@@ -277,11 +275,10 @@ xlog_header_check_mount(
* by IRIX and continue.
*/
xfs_warn(mp, "null uuid in log - IRIX style log");
- } else if (unlikely(!uuid_equal(&mp->m_sb.sb_uuid, &head->h_fs_uuid))) {
+ } else if (XFS_IS_CORRUPT(mp, !uuid_equal(&mp->m_sb.sb_uuid,
+ &head->h_fs_uuid))) {
xfs_warn(mp, "log has mismatched uuid - can't recover");
xlog_header_check_dump(mp, head);
- XFS_ERROR_REPORT("xlog_header_check_mount",
- XFS_ERRLEVEL_HIGH, mp);
return -EFSCORRUPTED;
}
return 0;
@@ -297,7 +294,7 @@ xlog_recover_iodone(
* this during recovery. One strike!
*/
if (!XFS_FORCED_SHUTDOWN(bp->b_mount)) {
- xfs_buf_ioerror_alert(bp, __func__);
+ xfs_buf_ioerror_alert(bp, __this_address);
xfs_force_shutdown(bp->b_mount, SHUTDOWN_META_IO_ERROR);
}
}
@@ -469,7 +466,7 @@ xlog_find_verify_log_record(
xfs_warn(log->l_mp,
"Log inconsistent (didn't find previous header)");
ASSERT(0);
- error = -EIO;
+ error = -EFSCORRUPTED;
goto out;
}
@@ -1345,10 +1342,11 @@ xlog_find_tail(
error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, buffer,
&rhead_blk, &rhead, &wrapped);
if (error < 0)
- return error;
+ goto done;
if (!error) {
xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
- return -EIO;
+ error = -EFSCORRUPTED;
+ goto done;
}
*tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
@@ -1697,11 +1695,10 @@ xlog_clear_stale_blocks(
* the distance from the beginning of the log to the
* tail.
*/
- if (unlikely(head_block < tail_block || head_block >= log->l_logBBsize)) {
- XFS_ERROR_REPORT("xlog_clear_stale_blocks(1)",
- XFS_ERRLEVEL_LOW, log->l_mp);
+ if (XFS_IS_CORRUPT(log->l_mp,
+ head_block < tail_block ||
+ head_block >= log->l_logBBsize))
return -EFSCORRUPTED;
- }
tail_distance = tail_block + (log->l_logBBsize - head_block);
} else {
/*
@@ -1709,11 +1706,10 @@ xlog_clear_stale_blocks(
* so the distance from the head to the tail is just
* the tail block minus the head block.
*/
- if (unlikely(head_block >= tail_block || head_cycle != (tail_cycle + 1))){
- XFS_ERROR_REPORT("xlog_clear_stale_blocks(2)",
- XFS_ERRLEVEL_LOW, log->l_mp);
+ if (XFS_IS_CORRUPT(log->l_mp,
+ head_block >= tail_block ||
+ head_cycle != tail_cycle + 1))
return -EFSCORRUPTED;
- }
tail_distance = tail_block - head_block;
}
@@ -1938,6 +1934,12 @@ xlog_recover_buffer_pass1(
struct list_head *bucket;
struct xfs_buf_cancel *bcp;
+ if (!xfs_buf_log_check_iovec(&item->ri_buf[0])) {
+ xfs_err(log->l_mp, "bad buffer log item size (%d)",
+ item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
/*
* If this isn't a cancel buffer item, then just return.
*/
@@ -1960,7 +1962,7 @@ xlog_recover_buffer_pass1(
}
}
- bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), KM_SLEEP);
+ bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
bcp->bc_blkno = buf_f->blf_blkno;
bcp->bc_len = buf_f->blf_len;
bcp->bc_refcount = 1;
@@ -2133,13 +2135,11 @@ xlog_recover_do_inode_buffer(
*/
logged_nextp = item->ri_buf[item_index].i_addr +
next_unlinked_offset - reg_buf_offset;
- if (unlikely(*logged_nextp == 0)) {
+ if (XFS_IS_CORRUPT(mp, *logged_nextp == 0)) {
xfs_alert(mp,
"Bad inode buffer log record (ptr = "PTR_FMT", bp = "PTR_FMT"). "
"Trying to replay bad (0) inode di_next_unlinked field.",
item, bp);
- XFS_ERROR_REPORT("xlog_recover_do_inode_buf",
- XFS_ERRLEVEL_LOW, mp);
return -EFSCORRUPTED;
}
@@ -2574,6 +2574,7 @@ xlog_recover_do_reg_buffer(
int bit;
int nbits;
xfs_failaddr_t fa;
+ const size_t size_disk_dquot = sizeof(struct xfs_disk_dquot);
trace_xfs_log_recover_buf_reg_buf(mp->m_log, buf_f);
@@ -2616,7 +2617,7 @@ xlog_recover_do_reg_buffer(
"XFS: NULL dquot in %s.", __func__);
goto next;
}
- if (item->ri_buf[i].i_len < sizeof(xfs_disk_dquot_t)) {
+ if (item->ri_buf[i].i_len < size_disk_dquot) {
xfs_alert(mp,
"XFS: dquot too small (%d) in %s.",
item->ri_buf[i].i_len, __func__);
@@ -2744,15 +2745,10 @@ xlog_recover_buffer_pass2(
if (buf_f->blf_flags & XFS_BLF_INODE_BUF)
buf_flags |= XBF_UNMAPPED;
- bp = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
- buf_flags, NULL);
- if (!bp)
- return -ENOMEM;
- error = bp->b_error;
- if (error) {
- xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#1)");
- goto out_release;
- }
+ error = xfs_buf_read(mp->m_ddev_targp, buf_f->blf_blkno, buf_f->blf_len,
+ buf_flags, &bp, NULL);
+ if (error)
+ return error;
/*
* Recover the buffer only if we get an LSN from it and it's less than
@@ -2930,7 +2926,7 @@ xlog_recover_inode_pass2(
if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
in_f = item->ri_buf[0].i_addr;
} else {
- in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP);
+ in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
need_free = 1;
error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
if (error)
@@ -2949,17 +2945,10 @@ xlog_recover_inode_pass2(
}
trace_xfs_log_recover_inode_recover(log, in_f);
- bp = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len, 0,
- &xfs_inode_buf_ops);
- if (!bp) {
- error = -ENOMEM;
+ error = xfs_buf_read(mp->m_ddev_targp, in_f->ilf_blkno, in_f->ilf_len,
+ 0, &bp, &xfs_inode_buf_ops);
+ if (error)
goto error;
- }
- error = bp->b_error;
- if (error) {
- xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)");
- goto out_release;
- }
ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
dip = xfs_buf_offset(bp, in_f->ilf_boffset);
@@ -2967,22 +2956,18 @@ xlog_recover_inode_pass2(
* Make sure the place we're flushing out to really looks
* like an inode!
*/
- if (unlikely(!xfs_verify_magic16(bp, dip->di_magic))) {
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_magic16(bp, dip->di_magic))) {
xfs_alert(mp,
"%s: Bad inode magic number, dip = "PTR_FMT", dino bp = "PTR_FMT", ino = %Ld",
__func__, dip, bp, in_f->ilf_ino);
- XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
- XFS_ERRLEVEL_LOW, mp);
error = -EFSCORRUPTED;
goto out_release;
}
ldip = item->ri_buf[1].i_addr;
- if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
+ if (XFS_IS_CORRUPT(mp, ldip->di_magic != XFS_DINODE_MAGIC)) {
xfs_alert(mp,
"%s: Bad inode log record, rec ptr "PTR_FMT", ino %Ld",
__func__, item, in_f->ilf_ino);
- XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
- XFS_ERRLEVEL_LOW, mp);
error = -EFSCORRUPTED;
goto out_release;
}
@@ -3164,7 +3149,7 @@ xlog_recover_inode_pass2(
default:
xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
ASSERT(0);
- error = -EIO;
+ error = -EFSCORRUPTED;
goto out_release;
}
}
@@ -3245,12 +3230,12 @@ xlog_recover_dquot_pass2(
recddq = item->ri_buf[1].i_addr;
if (recddq == NULL) {
xfs_alert(log->l_mp, "NULL dquot in %s.", __func__);
- return -EIO;
+ return -EFSCORRUPTED;
}
- if (item->ri_buf[1].i_len < sizeof(xfs_disk_dquot_t)) {
+ if (item->ri_buf[1].i_len < sizeof(struct xfs_disk_dquot)) {
xfs_alert(log->l_mp, "dquot too small (%d) in %s.",
item->ri_buf[1].i_len, __func__);
- return -EIO;
+ return -EFSCORRUPTED;
}
/*
@@ -3277,7 +3262,7 @@ xlog_recover_dquot_pass2(
if (fa) {
xfs_alert(mp, "corrupt dquot ID 0x%x in log at %pS",
dq_f->qlf_id, fa);
- return -EIO;
+ return -EFSCORRUPTED;
}
ASSERT(dq_f->qlf_len == 1);
@@ -3535,6 +3520,7 @@ xfs_cui_copy_format(
memcpy(dst_cui_fmt, src_cui_fmt, len);
return 0;
}
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
}
@@ -3599,8 +3585,10 @@ xlog_recover_cud_pass2(
struct xfs_ail *ailp = log->l_ailp;
cud_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format))
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_cud_log_format)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
+ }
cui_id = cud_formatp->cud_cui_id;
/*
@@ -3652,6 +3640,7 @@ xfs_bui_copy_format(
memcpy(dst_bui_fmt, src_bui_fmt, len);
return 0;
}
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
}
@@ -3675,8 +3664,10 @@ xlog_recover_bui_pass2(
bui_formatp = item->ri_buf[0].i_addr;
- if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
+ if (bui_formatp->bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
+ }
buip = xfs_bui_init(mp);
error = xfs_bui_copy_format(&item->ri_buf[0], &buip->bui_format);
if (error) {
@@ -3718,8 +3709,10 @@ xlog_recover_bud_pass2(
struct xfs_ail *ailp = log->l_ailp;
bud_formatp = item->ri_buf[0].i_addr;
- if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format))
+ if (item->ri_buf[0].i_len != sizeof(struct xfs_bud_log_format)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
+ }
bui_id = bud_formatp->bud_bui_id;
/*
@@ -4016,7 +4009,7 @@ xlog_recover_commit_pass1(
xfs_warn(log->l_mp, "%s: invalid item type (%d)",
__func__, ITEM_TYPE(item));
ASSERT(0);
- return -EIO;
+ return -EFSCORRUPTED;
}
}
@@ -4064,7 +4057,7 @@ xlog_recover_commit_pass2(
xfs_warn(log->l_mp, "%s: invalid item type (%d)",
__func__, ITEM_TYPE(item));
ASSERT(0);
- return -EIO;
+ return -EFSCORRUPTED;
}
}
@@ -4161,7 +4154,7 @@ xlog_recover_add_item(
{
xlog_recover_item_t *item;
- item = kmem_zalloc(sizeof(xlog_recover_item_t), KM_SLEEP);
+ item = kmem_zalloc(sizeof(xlog_recover_item_t), 0);
INIT_LIST_HEAD(&item->ri_list);
list_add_tail(&item->ri_list, head);
}
@@ -4185,7 +4178,7 @@ xlog_recover_add_to_cont_trans(
ASSERT(len <= sizeof(struct xfs_trans_header));
if (len > sizeof(struct xfs_trans_header)) {
xfs_warn(log->l_mp, "%s: bad header length", __func__);
- return -EIO;
+ return -EFSCORRUPTED;
}
xlog_recover_add_item(&trans->r_itemq);
@@ -4201,7 +4194,7 @@ xlog_recover_add_to_cont_trans(
old_ptr = item->ri_buf[item->ri_cnt-1].i_addr;
old_len = item->ri_buf[item->ri_cnt-1].i_len;
- ptr = kmem_realloc(old_ptr, len + old_len, KM_SLEEP);
+ ptr = kmem_realloc(old_ptr, len + old_len, 0);
memcpy(&ptr[old_len], dp, len);
item->ri_buf[item->ri_cnt-1].i_len += len;
item->ri_buf[item->ri_cnt-1].i_addr = ptr;
@@ -4241,13 +4234,13 @@ xlog_recover_add_to_trans(
xfs_warn(log->l_mp, "%s: bad header magic number",
__func__);
ASSERT(0);
- return -EIO;
+ return -EFSCORRUPTED;
}
if (len > sizeof(struct xfs_trans_header)) {
xfs_warn(log->l_mp, "%s: bad header length", __func__);
ASSERT(0);
- return -EIO;
+ return -EFSCORRUPTED;
}
/*
@@ -4261,7 +4254,7 @@ xlog_recover_add_to_trans(
return 0;
}
- ptr = kmem_alloc(len, KM_SLEEP);
+ ptr = kmem_alloc(len, 0);
memcpy(ptr, dp, len);
in_f = (struct xfs_inode_log_format *)ptr;
@@ -4283,15 +4276,24 @@ xlog_recover_add_to_trans(
in_f->ilf_size);
ASSERT(0);
kmem_free(ptr);
- return -EIO;
+ return -EFSCORRUPTED;
}
item->ri_total = in_f->ilf_size;
item->ri_buf =
kmem_zalloc(item->ri_total * sizeof(xfs_log_iovec_t),
- KM_SLEEP);
+ 0);
+ }
+
+ if (item->ri_total <= item->ri_cnt) {
+ xfs_warn(log->l_mp,
+ "log item region count (%d) overflowed size (%d)",
+ item->ri_cnt, item->ri_total);
+ ASSERT(0);
+ kmem_free(ptr);
+ return -EFSCORRUPTED;
}
- ASSERT(item->ri_total > item->ri_cnt);
+
/* Description region is ri_buf[0] */
item->ri_buf[item->ri_cnt].i_addr = ptr;
item->ri_buf[item->ri_cnt].i_len = len;
@@ -4378,7 +4380,7 @@ xlog_recovery_process_trans(
default:
xfs_warn(log->l_mp, "%s: bad flag 0x%x", __func__, flags);
ASSERT(0);
- error = -EIO;
+ error = -EFSCORRUPTED;
break;
}
if (error || freeit)
@@ -4423,7 +4425,7 @@ xlog_recover_ophdr_to_trans(
* This is a new transaction so allocate a new recovery container to
* hold the recovery ops that will follow.
*/
- trans = kmem_zalloc(sizeof(struct xlog_recover), KM_SLEEP);
+ trans = kmem_zalloc(sizeof(struct xlog_recover), 0);
trans->r_log_tid = tid;
trans->r_lsn = be64_to_cpu(rhead->h_lsn);
INIT_LIST_HEAD(&trans->r_itemq);
@@ -4458,7 +4460,7 @@ xlog_recover_process_ophdr(
xfs_warn(log->l_mp, "%s: bad clientid 0x%x",
__func__, ohead->oh_clientid);
ASSERT(0);
- return -EIO;
+ return -EFSCORRUPTED;
}
/*
@@ -4468,7 +4470,7 @@ xlog_recover_process_ophdr(
if (dp + len > end) {
xfs_warn(log->l_mp, "%s: bad length 0x%x", __func__, len);
WARN_ON(1);
- return -EIO;
+ return -EFSCORRUPTED;
}
trans = xlog_recover_ophdr_to_trans(rhash, rhead, ohead);
@@ -5022,16 +5024,27 @@ xlog_recover_process_one_iunlink(
}
/*
- * xlog_iunlink_recover
+ * Recover AGI unlinked lists
+ *
+ * This is called during recovery to process any inodes which we unlinked but
+ * not freed when the system crashed. These inodes will be on the lists in the
+ * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
+ * any inodes found on the lists. Each inode is removed from the lists when it
+ * has been fully truncated and is freed. The freeing of the inode and its
+ * removal from the list must be atomic.
+ *
+ * If everything we touch in the agi processing loop is already in memory, this
+ * loop can hold the cpu for a long time. It runs without lock contention,
+ * memory allocation contention, the need wait for IO, etc, and so will run
+ * until we either run out of inodes to process, run low on memory or we run out
+ * of log space.
*
- * This is called during recovery to process any inodes which
- * we unlinked but not freed when the system crashed. These
- * inodes will be on the lists in the AGI blocks. What we do
- * here is scan all the AGIs and fully truncate and free any
- * inodes found on the lists. Each inode is removed from the
- * lists when it has been fully truncated and is freed. The
- * freeing of the inode and its removal from the list must be
- * atomic.
+ * This behaviour is bad for latency on single CPU and non-preemptible kernels,
+ * and can prevent other filesytem work (such as CIL pushes) from running. This
+ * can lead to deadlocks if the recovery process runs out of log reservation
+ * space. Hence we need to yield the CPU when there is other kernel work
+ * scheduled on this CPU to ensure other scheduled work can run without undue
+ * latency.
*/
STATIC void
xlog_recover_process_iunlinks(
@@ -5078,6 +5091,7 @@ xlog_recover_process_iunlinks(
while (agino != NULLAGINO) {
agino = xlog_recover_process_one_iunlink(mp,
agno, agino, bucket);
+ cond_resched();
}
}
xfs_buf_rele(agibp);
@@ -5158,8 +5172,10 @@ xlog_recover_process(
* If the filesystem is CRC enabled, this mismatch becomes a
* fatal log corruption failure.
*/
- if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
+ if (xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, log->l_mp);
return -EFSCORRUPTED;
+ }
}
xlog_unpack_data(rhead, dp, log);
@@ -5176,31 +5192,25 @@ xlog_valid_rec_header(
{
int hlen;
- if (unlikely(rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM))) {
- XFS_ERROR_REPORT("xlog_valid_rec_header(1)",
- XFS_ERRLEVEL_LOW, log->l_mp);
+ if (XFS_IS_CORRUPT(log->l_mp,
+ rhead->h_magicno != cpu_to_be32(XLOG_HEADER_MAGIC_NUM)))
return -EFSCORRUPTED;
- }
- if (unlikely(
- (!rhead->h_version ||
- (be32_to_cpu(rhead->h_version) & (~XLOG_VERSION_OKBITS))))) {
+ if (XFS_IS_CORRUPT(log->l_mp,
+ (!rhead->h_version ||
+ (be32_to_cpu(rhead->h_version) &
+ (~XLOG_VERSION_OKBITS))))) {
xfs_warn(log->l_mp, "%s: unrecognised log version (%d).",
__func__, be32_to_cpu(rhead->h_version));
- return -EIO;
+ return -EFSCORRUPTED;
}
/* LR body must have data or it wouldn't have been written */
hlen = be32_to_cpu(rhead->h_len);
- if (unlikely( hlen <= 0 || hlen > INT_MAX )) {
- XFS_ERROR_REPORT("xlog_valid_rec_header(2)",
- XFS_ERRLEVEL_LOW, log->l_mp);
+ if (XFS_IS_CORRUPT(log->l_mp, hlen <= 0 || hlen > INT_MAX))
return -EFSCORRUPTED;
- }
- if (unlikely( blkno > log->l_logBBsize || blkno > INT_MAX )) {
- XFS_ERROR_REPORT("xlog_valid_rec_header(3)",
- XFS_ERRLEVEL_LOW, log->l_mp);
+ if (XFS_IS_CORRUPT(log->l_mp,
+ blkno > log->l_logBBsize || blkno > INT_MAX))
return -EFSCORRUPTED;
- }
return 0;
}
@@ -5282,8 +5292,12 @@ xlog_do_recovery_pass(
"invalid iclog size (%d bytes), using lsunit (%d bytes)",
h_size, log->l_mp->m_logbsize);
h_size = log->l_mp->m_logbsize;
- } else
- return -EFSCORRUPTED;
+ } else {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW,
+ log->l_mp);
+ error = -EFSCORRUPTED;
+ goto bread_err1;
+ }
}
if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
@@ -5527,7 +5541,7 @@ xlog_do_log_recovery(
*/
log->l_buf_cancel_table = kmem_zalloc(XLOG_BC_TABLE_SIZE *
sizeof(struct list_head),
- KM_SLEEP);
+ 0);
for (i = 0; i < XLOG_BC_TABLE_SIZE; i++)
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
@@ -5613,7 +5627,7 @@ xlog_do_recover(
error = xfs_buf_submit(bp);
if (error) {
if (!XFS_FORCED_SHUTDOWN(mp)) {
- xfs_buf_ioerror_alert(bp, __func__);
+ xfs_buf_ioerror_alert(bp, __this_address);
ASSERT(0);
}
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 9804efe525a9..e0f9d3b6abe9 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -20,8 +20,8 @@ __xfs_printk(
const struct xfs_mount *mp,
struct va_format *vaf)
{
- if (mp && mp->m_fsname) {
- printk("%sXFS (%s): %pV\n", level, mp->m_fsname, vaf);
+ if (mp && mp->m_super) {
+ printk("%sXFS (%s): %pV\n", level, mp->m_super->s_id, vaf);
return;
}
printk("%sXFS: %pV\n", level, vaf);
@@ -86,17 +86,25 @@ xfs_alert_tag(
}
void
-asswarn(char *expr, char *file, int line)
+asswarn(
+ struct xfs_mount *mp,
+ char *expr,
+ char *file,
+ int line)
{
- xfs_warn(NULL, "Assertion failed: %s, file: %s, line: %d",
+ xfs_warn(mp, "Assertion failed: %s, file: %s, line: %d",
expr, file, line);
WARN_ON(1);
}
void
-assfail(char *expr, char *file, int line)
+assfail(
+ struct xfs_mount *mp,
+ char *expr,
+ char *file,
+ int line)
{
- xfs_emerg(NULL, "Assertion failed: %s, file: %s, line: %d",
+ xfs_emerg(mp, "Assertion failed: %s, file: %s, line: %d",
expr, file, line);
if (xfs_globals.bug_on_assert)
BUG();
@@ -105,7 +113,7 @@ assfail(char *expr, char *file, int line)
}
void
-xfs_hex_dump(void *p, int length)
+xfs_hex_dump(const void *p, int length)
{
print_hex_dump(KERN_ALERT, "", DUMP_PREFIX_OFFSET, 16, 1, p, length, 1);
}
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index 34447dca97d1..0b05e10995a0 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -57,9 +57,9 @@ do { \
#define xfs_debug_ratelimited(dev, fmt, ...) \
xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
-extern void assfail(char *expr, char *f, int l);
-extern void asswarn(char *expr, char *f, int l);
+void assfail(struct xfs_mount *mp, char *expr, char *f, int l);
+void asswarn(struct xfs_mount *mp, char *expr, char *f, int l);
-extern void xfs_hex_dump(void *p, int length);
+extern void xfs_hex_dump(const void *p, int length);
#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 322da6909290..56efe140c923 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -31,7 +31,7 @@
#include "xfs_reflink.h"
#include "xfs_extent_busy.h"
#include "xfs_health.h"
-
+#include "xfs_trace.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
static int xfs_uuid_table_size;
@@ -82,7 +82,7 @@ xfs_uuid_mount(
if (hole < 0) {
xfs_uuid_table = kmem_realloc(xfs_uuid_table,
(xfs_uuid_table_size + 1) * sizeof(*xfs_uuid_table),
- KM_SLEEP);
+ 0);
hole = xfs_uuid_table_size++;
}
xfs_uuid_table[hole] = *uuid;
@@ -214,7 +214,7 @@ xfs_initialize_perag(
spin_lock(&mp->m_perag_lock);
if (radix_tree_insert(&mp->m_perag_tree, index, pag)) {
- BUG();
+ WARN_ON_ONCE(1);
spin_unlock(&mp->m_perag_lock);
radix_tree_preload_end();
error = -EEXIST;
@@ -360,108 +360,122 @@ release_buf:
}
/*
- * Update alignment values based on mount options and sb values
+ * If the sunit/swidth change would move the precomputed root inode value, we
+ * must reject the ondisk change because repair will stumble over that.
+ * However, we allow the mount to proceed because we never rejected this
+ * combination before. Returns true to update the sb, false otherwise.
+ */
+static inline int
+xfs_check_new_dalign(
+ struct xfs_mount *mp,
+ int new_dalign,
+ bool *update_sb)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+ xfs_ino_t calc_ino;
+
+ calc_ino = xfs_ialloc_calc_rootino(mp, new_dalign);
+ trace_xfs_check_new_dalign(mp, new_dalign, calc_ino);
+
+ if (sbp->sb_rootino == calc_ino) {
+ *update_sb = true;
+ return 0;
+ }
+
+ xfs_warn(mp,
+"Cannot change stripe alignment; would require moving root inode.");
+
+ /*
+ * XXX: Next time we add a new incompat feature, this should start
+ * returning -EINVAL to fail the mount. Until then, spit out a warning
+ * that we're ignoring the administrator's instructions.
+ */
+ xfs_warn(mp, "Skipping superblock stripe alignment update.");
+ *update_sb = false;
+ return 0;
+}
+
+/*
+ * If we were provided with new sunit/swidth values as mount options, make sure
+ * that they pass basic alignment and superblock feature checks, and convert
+ * them into the same units (FSB) that everything else expects. This step
+ * /must/ be done before computing the inode geometry.
*/
STATIC int
-xfs_update_alignment(xfs_mount_t *mp)
+xfs_validate_new_dalign(
+ struct xfs_mount *mp)
{
- xfs_sb_t *sbp = &(mp->m_sb);
+ if (mp->m_dalign == 0)
+ return 0;
- if (mp->m_dalign) {
+ /*
+ * If stripe unit and stripe width are not multiples
+ * of the fs blocksize turn off alignment.
+ */
+ if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
+ (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
+ xfs_warn(mp,
+ "alignment check failed: sunit/swidth vs. blocksize(%d)",
+ mp->m_sb.sb_blocksize);
+ return -EINVAL;
+ } else {
/*
- * If stripe unit and stripe width are not multiples
- * of the fs blocksize turn off alignment.
+ * Convert the stripe unit and width to FSBs.
*/
- if ((BBTOB(mp->m_dalign) & mp->m_blockmask) ||
- (BBTOB(mp->m_swidth) & mp->m_blockmask)) {
+ mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
+ if (mp->m_dalign && (mp->m_sb.sb_agblocks % mp->m_dalign)) {
xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. blocksize(%d)",
- sbp->sb_blocksize);
+ "alignment check failed: sunit/swidth vs. agsize(%d)",
+ mp->m_sb.sb_agblocks);
return -EINVAL;
- } else {
- /*
- * Convert the stripe unit and width to FSBs.
- */
- mp->m_dalign = XFS_BB_TO_FSBT(mp, mp->m_dalign);
- if (mp->m_dalign && (sbp->sb_agblocks % mp->m_dalign)) {
- xfs_warn(mp,
- "alignment check failed: sunit/swidth vs. agsize(%d)",
- sbp->sb_agblocks);
- return -EINVAL;
- } else if (mp->m_dalign) {
- mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
- } else {
- xfs_warn(mp,
- "alignment check failed: sunit(%d) less than bsize(%d)",
- mp->m_dalign, sbp->sb_blocksize);
- return -EINVAL;
- }
- }
-
- /*
- * Update superblock with new values
- * and log changes
- */
- if (xfs_sb_version_hasdalign(sbp)) {
- if (sbp->sb_unit != mp->m_dalign) {
- sbp->sb_unit = mp->m_dalign;
- mp->m_update_sb = true;
- }
- if (sbp->sb_width != mp->m_swidth) {
- sbp->sb_width = mp->m_swidth;
- mp->m_update_sb = true;
- }
+ } else if (mp->m_dalign) {
+ mp->m_swidth = XFS_BB_TO_FSBT(mp, mp->m_swidth);
} else {
xfs_warn(mp,
- "cannot change alignment: superblock does not support data alignment");
+ "alignment check failed: sunit(%d) less than bsize(%d)",
+ mp->m_dalign, mp->m_sb.sb_blocksize);
return -EINVAL;
}
- } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
- xfs_sb_version_hasdalign(&mp->m_sb)) {
- mp->m_dalign = sbp->sb_unit;
- mp->m_swidth = sbp->sb_width;
+ }
+
+ if (!xfs_sb_version_hasdalign(&mp->m_sb)) {
+ xfs_warn(mp,
+"cannot change alignment: superblock does not support data alignment");
+ return -EINVAL;
}
return 0;
}
-/*
- * Set the default minimum read and write sizes unless
- * already specified in a mount option.
- * We use smaller I/O sizes when the file system
- * is being used for NFS service (wsync mount option).
- */
-STATIC void
-xfs_set_rw_sizes(xfs_mount_t *mp)
+/* Update alignment values based on mount options and sb values. */
+STATIC int
+xfs_update_alignment(
+ struct xfs_mount *mp)
{
- xfs_sb_t *sbp = &(mp->m_sb);
- int readio_log, writeio_log;
+ struct xfs_sb *sbp = &mp->m_sb;
- if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
- if (mp->m_flags & XFS_MOUNT_WSYNC) {
- readio_log = XFS_WSYNC_READIO_LOG;
- writeio_log = XFS_WSYNC_WRITEIO_LOG;
- } else {
- readio_log = XFS_READIO_LOG_LARGE;
- writeio_log = XFS_WRITEIO_LOG_LARGE;
- }
- } else {
- readio_log = mp->m_readio_log;
- writeio_log = mp->m_writeio_log;
- }
+ if (mp->m_dalign) {
+ bool update_sb;
+ int error;
- if (sbp->sb_blocklog > readio_log) {
- mp->m_readio_log = sbp->sb_blocklog;
- } else {
- mp->m_readio_log = readio_log;
- }
- mp->m_readio_blocks = 1 << (mp->m_readio_log - sbp->sb_blocklog);
- if (sbp->sb_blocklog > writeio_log) {
- mp->m_writeio_log = sbp->sb_blocklog;
- } else {
- mp->m_writeio_log = writeio_log;
+ if (sbp->sb_unit == mp->m_dalign &&
+ sbp->sb_width == mp->m_swidth)
+ return 0;
+
+ error = xfs_check_new_dalign(mp, mp->m_dalign, &update_sb);
+ if (error || !update_sb)
+ return error;
+
+ sbp->sb_unit = mp->m_dalign;
+ sbp->sb_width = mp->m_swidth;
+ mp->m_update_sb = true;
+ } else if ((mp->m_flags & XFS_MOUNT_NOALIGN) != XFS_MOUNT_NOALIGN &&
+ xfs_sb_version_hasdalign(&mp->m_sb)) {
+ mp->m_dalign = sbp->sb_unit;
+ mp->m_swidth = sbp->sb_width;
}
- mp->m_writeio_blocks = 1 << (mp->m_writeio_log - sbp->sb_blocklog);
+
+ return 0;
}
/*
@@ -687,12 +701,12 @@ xfs_mountfs(
}
/*
- * Check if sb_agblocks is aligned at stripe boundary
- * If sb_agblocks is NOT aligned turn off m_dalign since
- * allocator alignment is within an ag, therefore ag has
- * to be aligned at stripe boundary.
+ * If we were given new sunit/swidth options, do some basic validation
+ * checks and convert the incore dalign and swidth values to the
+ * same units (FSB) that everything else uses. This /must/ happen
+ * before computing the inode geometry.
*/
- error = xfs_update_alignment(mp);
+ error = xfs_validate_new_dalign(mp);
if (error)
goto out;
@@ -703,10 +717,22 @@ xfs_mountfs(
xfs_rmapbt_compute_maxlevels(mp);
xfs_refcountbt_compute_maxlevels(mp);
+ /*
+ * Check if sb_agblocks is aligned at stripe boundary. If sb_agblocks
+ * is NOT aligned turn off m_dalign since allocator alignment is within
+ * an ag, therefore ag has to be aligned at stripe boundary. Note that
+ * we must compute the free space and rmap btree geometry before doing
+ * this.
+ */
+ error = xfs_update_alignment(mp);
+ if (error)
+ goto out;
+
/* enable fail_at_unmount as default */
mp->m_fail_unmount = true;
- error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype, NULL, mp->m_fsname);
+ error = xfs_sysfs_init(&mp->m_kobj, &xfs_mp_ktype,
+ NULL, mp->m_super->s_id);
if (error)
goto out;
@@ -728,9 +754,12 @@ xfs_mountfs(
goto out_remove_errortag;
/*
- * Set the minimum read and write sizes
+ * Update the preferred write size based on the information from the
+ * on-disk superblock.
*/
- xfs_set_rw_sizes(mp);
+ mp->m_allocsize_log =
+ max_t(uint32_t, sbp->sb_blocklog, mp->m_allocsize_log);
+ mp->m_allocsize_blocks = 1U << (mp->m_allocsize_log - sbp->sb_blocklog);
/* set the low space thresholds for dynamic preallocation */
xfs_set_low_space_thresholds(mp);
@@ -796,9 +825,8 @@ xfs_mountfs(
goto out_free_dir;
}
- if (!sbp->sb_logblocks) {
+ if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
xfs_warn(mp, "no log defined");
- XFS_ERROR_REPORT("xfs_mountfs", XFS_ERRLEVEL_LOW, mp);
error = -EFSCORRUPTED;
goto out_free_perag;
}
@@ -836,12 +864,10 @@ xfs_mountfs(
ASSERT(rip != NULL);
- if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
+ if (XFS_IS_CORRUPT(mp, !S_ISDIR(VFS_I(rip)->i_mode))) {
xfs_warn(mp, "corrupted root inode %llu: not a directory",
(unsigned long long)rip->i_ino);
xfs_iunlock(rip, XFS_ILOCK_EXCL);
- XFS_ERROR_REPORT("xfs_mountfs_int(2)", XFS_ERRLEVEL_LOW,
- mp);
error = -EFSCORRUPTED;
goto out_rele_rip;
}
@@ -1277,7 +1303,7 @@ xfs_mod_fdblocks(
printk_once(KERN_WARNING
"Filesystem \"%s\": reserve blocks depleted! "
"Consider increasing reserve pool size.",
- mp->m_fsname);
+ mp->m_super->s_id);
fdblocks_enospc:
spin_unlock(&mp->m_sb_lock);
return -ENOSPC;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 4adb6837439a..88ab09ed29e7 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -9,10 +9,8 @@
struct xlog;
struct xfs_inode;
struct xfs_mru_cache;
-struct xfs_nameops;
struct xfs_ail;
struct xfs_quotainfo;
-struct xfs_dir_ops;
struct xfs_da_geometry;
/* dynamic preallocation free space thresholds, 5% down to 1% */
@@ -59,7 +57,6 @@ struct xfs_error_cfg {
typedef struct xfs_mount {
struct super_block *m_super;
- xfs_tid_t m_tid; /* next unused tid for fs */
/*
* Bitsets of per-fs metadata that have been checked and/or are sick.
@@ -89,8 +86,6 @@ typedef struct xfs_mount {
struct percpu_counter m_delalloc_blks;
struct xfs_buf *m_sb_bp; /* buffer for superblock */
- char *m_fsname; /* filesystem name */
- int m_fsname_len; /* strlen of fs name */
char *m_rtname; /* realtime device name */
char *m_logname; /* external log device name */
int m_bsize; /* fs logical block size */
@@ -98,10 +93,8 @@ typedef struct xfs_mount {
xfs_agnumber_t m_agirotor; /* last ag dir inode alloced */
spinlock_t m_agirotor_lock;/* .. and lock protecting it */
xfs_agnumber_t m_maxagi; /* highest inode alloc group */
- uint m_readio_log; /* min read size log bytes */
- uint m_readio_blocks; /* min read size blocks */
- uint m_writeio_log; /* min write size log bytes */
- uint m_writeio_blocks; /* min write size blocks */
+ uint m_allocsize_log;/* min write size log bytes */
+ uint m_allocsize_blocks; /* min write size blocks */
struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */
struct xlog *m_log; /* log specific stuff */
@@ -159,10 +152,6 @@ typedef struct xfs_mount {
int m_dalign; /* stripe unit */
int m_swidth; /* stripe width */
uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
- const struct xfs_nameops *m_dirnameops; /* vector of dir name ops */
- const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */
- const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */
- uint m_chsize; /* size of next field */
atomic_t m_active_trans; /* number trans frozen */
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct delayed_work m_reclaim_work; /* background inode reclaim */
@@ -229,7 +218,7 @@ typedef struct xfs_mount {
#define XFS_MOUNT_ATTR2 (1ULL << 8) /* allow use of attr2 format */
#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */
#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
-#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */
+#define XFS_MOUNT_ALLOCSIZE (1ULL << 12) /* specified allocation size */
#define XFS_MOUNT_SMALL_INUMS (1ULL << 14) /* user wants 32bit inodes */
#define XFS_MOUNT_32BITINODES (1ULL << 15) /* inode32 allocator active */
#define XFS_MOUNT_NOUUID (1ULL << 16) /* ignore uuid during mount */
@@ -238,7 +227,7 @@ typedef struct xfs_mount {
* allocation */
#define XFS_MOUNT_RDONLY (1ULL << 20) /* read-only fs */
#define XFS_MOUNT_DIRSYNC (1ULL << 21) /* synchronous directory ops */
-#define XFS_MOUNT_COMPAT_IOSIZE (1ULL << 22) /* don't report large preferred
+#define XFS_MOUNT_LARGEIO (1ULL << 22) /* report large preferred
* I/O size in stat() */
#define XFS_MOUNT_FILESTREAMS (1ULL << 24) /* enable the filestreams
allocator */
@@ -246,13 +235,6 @@ typedef struct xfs_mount {
#define XFS_MOUNT_DAX (1ULL << 62) /* TEST ONLY! */
-
-/*
- * Default minimum read and write sizes.
- */
-#define XFS_READIO_LOG_LARGE 16
-#define XFS_WRITEIO_LOG_LARGE 16
-
/*
* Max and min values for mount-option defined I/O
* preallocation sizes.
@@ -260,37 +242,6 @@ typedef struct xfs_mount {
#define XFS_MAX_IO_LOG 30 /* 1G */
#define XFS_MIN_IO_LOG PAGE_SHIFT
-/*
- * Synchronous read and write sizes. This should be
- * better for NFSv2 wsync filesystems.
- */
-#define XFS_WSYNC_READIO_LOG 15 /* 32k */
-#define XFS_WSYNC_WRITEIO_LOG 14 /* 16k */
-
-/*
- * Allow large block sizes to be reported to userspace programs if the
- * "largeio" mount option is used.
- *
- * If compatibility mode is specified, simply return the basic unit of caching
- * so that we don't get inefficient read/modify/write I/O from user apps.
- * Otherwise....
- *
- * If the underlying volume is a stripe, then return the stripe width in bytes
- * as the recommended I/O size. It is not a stripe and we've set a default
- * buffered I/O size, return that, otherwise return the compat default.
- */
-static inline unsigned long
-xfs_preferred_iosize(xfs_mount_t *mp)
-{
- if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
- return PAGE_SIZE;
- return (mp->m_swidth ?
- (mp->m_swidth << mp->m_sb.sb_blocklog) :
- ((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
- (1 << (int)max(mp->m_readio_log, mp->m_writeio_log)) :
- PAGE_SIZE));
-}
-
#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \
((mp)->m_flags & XFS_MOUNT_WAS_CLEAN)
#define XFS_FORCED_SHUTDOWN(mp) ((mp)->m_flags & XFS_MOUNT_FS_SHUTDOWN)
@@ -327,13 +278,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
}
/* per-AG block reservation data structures*/
-enum xfs_ag_resv_type {
- XFS_AG_RESV_NONE = 0,
- XFS_AG_RESV_AGFL,
- XFS_AG_RESV_METADATA,
- XFS_AG_RESV_RMAPBT,
-};
-
struct xfs_ag_resv {
/* number of blocks originally reserved here */
xfs_extlen_t ar_orig_reserved;
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index 74738813f60d..a06661dac5be 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -333,12 +333,12 @@ xfs_mru_cache_create(
if (!(grp_time = msecs_to_jiffies(lifetime_ms) / grp_count))
return -EINVAL;
- if (!(mru = kmem_zalloc(sizeof(*mru), KM_SLEEP)))
+ if (!(mru = kmem_zalloc(sizeof(*mru), 0)))
return -ENOMEM;
/* An extra list is needed to avoid reaping up to a grp_time early. */
mru->grp_count = grp_count + 1;
- mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), KM_SLEEP);
+ mru->lists = kmem_zalloc(mru->grp_count * sizeof(*mru->lists), 0);
if (!mru->lists) {
err = -ENOMEM;
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
index b6701b4f59a9..5f04d8a5ab2a 100644
--- a/fs/xfs/xfs_ondisk.h
+++ b/fs/xfs/xfs_ondisk.h
@@ -111,6 +111,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10);
/* log structures */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24);
XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28);
XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 0c954cad7449..bb3008d390aa 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -12,6 +12,7 @@
#include "xfs_trans.h"
#include "xfs_bmap.h"
#include "xfs_iomap.h"
+#include "xfs_pnfs.h"
/*
* Ensure that we do not have any outstanding pNFS layouts that can be used by
@@ -32,7 +33,7 @@ xfs_break_leased_layouts(
struct xfs_inode *ip = XFS_I(inode);
int error;
- while ((error = break_layout(inode, false) == -EWOULDBLOCK)) {
+ while ((error = break_layout(inode, false)) == -EWOULDBLOCK) {
xfs_iunlock(ip, *iolock);
*did_unlock = true;
error = break_layout(inode, true);
@@ -59,7 +60,7 @@ xfs_fs_get_uuid(
printk_once(KERN_NOTICE
"XFS (%s): using experimental pNFS feature, use at your own risk!\n",
- mp->m_fsname);
+ mp->m_super->s_id);
if (*len < sizeof(uuid_t))
return -EINVAL;
@@ -142,43 +143,38 @@ xfs_fs_map_blocks(
lock_flags = xfs_ilock_data_map_shared(ip);
error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
&imap, &nimaps, bmapi_flags);
- xfs_iunlock(ip, lock_flags);
- if (error)
- goto out_unlock;
+ ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK);
+
+ if (!error && write &&
+ (!nimaps || imap.br_startblock == HOLESTARTBLOCK)) {
+ if (offset + length > XFS_ISIZE(ip))
+ end_fsb = xfs_iomap_eof_align_last_fsb(ip, end_fsb);
+ else if (nimaps && imap.br_startblock == HOLESTARTBLOCK)
+ end_fsb = min(end_fsb, imap.br_startoff +
+ imap.br_blockcount);
+ xfs_iunlock(ip, lock_flags);
+
+ error = xfs_iomap_write_direct(ip, offset_fsb,
+ end_fsb - offset_fsb, &imap);
+ if (error)
+ goto out_unlock;
- if (write) {
- enum xfs_prealloc_flags flags = 0;
-
- ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
-
- if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) {
- /*
- * xfs_iomap_write_direct() expects to take ownership of
- * the shared ilock.
- */
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- error = xfs_iomap_write_direct(ip, offset, length,
- &imap, nimaps);
- if (error)
- goto out_unlock;
-
- /*
- * Ensure the next transaction is committed
- * synchronously so that the blocks allocated and
- * handed out to the client are guaranteed to be
- * present even after a server crash.
- */
- flags |= XFS_PREALLOC_SET | XFS_PREALLOC_SYNC;
- }
-
- error = xfs_update_prealloc_flags(ip, flags);
+ /*
+ * Ensure the next transaction is committed synchronously so
+ * that the blocks allocated and handed out to the client are
+ * guaranteed to be present even after a server crash.
+ */
+ error = xfs_update_prealloc_flags(ip,
+ XFS_PREALLOC_SET | XFS_PREALLOC_SYNC);
if (error)
goto out_unlock;
+ } else {
+ xfs_iunlock(ip, lock_flags);
}
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- error = xfs_bmbt_to_iomap(ip, iomap, &imap, false);
+ error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0);
*device_generation = mp->m_generation;
return error;
out_unlock:
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 5e7a37f0cf84..0b0909657bad 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -22,6 +22,7 @@
#include "xfs_qm.h"
#include "xfs_trace.h"
#include "xfs_icache.h"
+#include "xfs_error.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -29,10 +30,10 @@
* quota functionality, including maintaining the freelist and hash
* tables of dquots.
*/
-STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
-STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
+STATIC int xfs_qm_init_quotainos(struct xfs_mount *mp);
+STATIC int xfs_qm_init_quotainfo(struct xfs_mount *mp);
-STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);
+STATIC void xfs_qm_destroy_quotainos(struct xfs_quotainfo *qi);
STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp);
/*
* We use the batch lookup interface to iterate over the dquots as it
@@ -243,14 +244,14 @@ xfs_qm_unmount_quotas(
STATIC int
xfs_qm_dqattach_one(
- xfs_inode_t *ip,
- xfs_dqid_t id,
- uint type,
- bool doalloc,
- xfs_dquot_t **IO_idqpp)
+ struct xfs_inode *ip,
+ xfs_dqid_t id,
+ uint type,
+ bool doalloc,
+ struct xfs_dquot **IO_idqpp)
{
- xfs_dquot_t *dqp;
- int error;
+ struct xfs_dquot *dqp;
+ int error;
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
error = 0;
@@ -341,7 +342,7 @@ xfs_qm_dqattach_locked(
}
if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {
- error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
+ error = xfs_qm_dqattach_one(ip, ip->i_d.di_projid, XFS_DQ_PROJ,
doalloc, &ip->i_pdquot);
if (error)
goto done;
@@ -539,12 +540,12 @@ xfs_qm_shrink_count(
STATIC void
xfs_qm_set_defquota(
- xfs_mount_t *mp,
- uint type,
- xfs_quotainfo_t *qinf)
+ struct xfs_mount *mp,
+ uint type,
+ struct xfs_quotainfo *qinf)
{
- xfs_dquot_t *dqp;
- struct xfs_def_quota *defq;
+ struct xfs_dquot *dqp;
+ struct xfs_def_quota *defq;
struct xfs_disk_dquot *ddqp;
int error;
@@ -642,7 +643,7 @@ xfs_qm_init_quotainfo(
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
- qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
+ qinf = mp->m_quotainfo = kmem_zalloc(sizeof(struct xfs_quotainfo), 0);
error = list_lru_init(&qinf->qi_lru);
if (error)
@@ -709,9 +710,9 @@ out_free_qinf:
*/
void
xfs_qm_destroy_quotainfo(
- xfs_mount_t *mp)
+ struct xfs_mount *mp)
{
- xfs_quotainfo_t *qi;
+ struct xfs_quotainfo *qi;
qi = mp->m_quotainfo;
ASSERT(qi != NULL);
@@ -754,11 +755,15 @@ xfs_qm_qino_alloc(
if ((flags & XFS_QMOPT_PQUOTA) &&
(mp->m_sb.sb_gquotino != NULLFSINO)) {
ino = mp->m_sb.sb_gquotino;
- ASSERT(mp->m_sb.sb_pquotino == NULLFSINO);
+ if (XFS_IS_CORRUPT(mp,
+ mp->m_sb.sb_pquotino != NULLFSINO))
+ return -EFSCORRUPTED;
} else if ((flags & XFS_QMOPT_GQUOTA) &&
(mp->m_sb.sb_pquotino != NULLFSINO)) {
ino = mp->m_sb.sb_pquotino;
- ASSERT(mp->m_sb.sb_gquotino == NULLFSINO);
+ if (XFS_IS_CORRUPT(mp,
+ mp->m_sb.sb_gquotino != NULLFSINO))
+ return -EFSCORRUPTED;
}
if (ino != NULLFSINO) {
error = xfs_iget(mp, NULL, ino, 0, 0, ip);
@@ -978,7 +983,7 @@ xfs_qm_reset_dqcounts_buf(
if (qip->i_d.di_nblocks == 0)
return 0;
- map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
+ map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), 0);
lblkno = 0;
maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
@@ -1559,7 +1564,7 @@ error_rele:
STATIC void
xfs_qm_destroy_quotainos(
- xfs_quotainfo_t *qi)
+ struct xfs_quotainfo *qi)
{
if (qi->qi_uquotaip) {
xfs_irele(qi->qi_uquotaip);
@@ -1693,7 +1698,7 @@ xfs_qm_vop_dqalloc(
}
}
if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
- if (xfs_get_projid(ip) != prid) {
+ if (ip->i_d.di_projid != prid) {
xfs_iunlock(ip, lockflags);
error = xfs_qm_dqget(mp, (xfs_dqid_t)prid, XFS_DQ_PROJ,
true, &pq);
@@ -1737,14 +1742,14 @@ error_rele:
* Actually transfer ownership, and do dquot modifications.
* These were already reserved.
*/
-xfs_dquot_t *
+struct xfs_dquot *
xfs_qm_vop_chown(
- xfs_trans_t *tp,
- xfs_inode_t *ip,
- xfs_dquot_t **IO_olddq,
- xfs_dquot_t *newdq)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ struct xfs_dquot **IO_olddq,
+ struct xfs_dquot *newdq)
{
- xfs_dquot_t *prevdq;
+ struct xfs_dquot *prevdq;
uint bfield = XFS_IS_REALTIME_INODE(ip) ?
XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
@@ -1827,7 +1832,7 @@ xfs_qm_vop_chown_reserve(
}
if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp &&
- xfs_get_projid(ip) != be32_to_cpu(pdqp->q_core.d_id)) {
+ ip->i_d.di_projid != be32_to_cpu(pdqp->q_core.d_id)) {
prjflags = XFS_QMOPT_ENOSPC;
pdq_delblks = pdqp;
if (delblks) {
@@ -1928,7 +1933,7 @@ xfs_qm_vop_create_dqattach(
}
if (pdqp && XFS_IS_PQUOTA_ON(mp)) {
ASSERT(ip->i_pdquot == NULL);
- ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));
+ ASSERT(ip->i_d.di_projid == be32_to_cpu(pdqp->q_core.d_id));
ip->i_pdquot = xfs_qm_dqhold(pdqp);
xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1);
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index b41b75089548..4e57edca8bce 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -54,7 +54,7 @@ struct xfs_def_quota {
* Various quota information for individual filesystems.
* The mount structure keeps a pointer to this.
*/
-typedef struct xfs_quotainfo {
+struct xfs_quotainfo {
struct radix_tree_root qi_uquota_tree;
struct radix_tree_root qi_gquota_tree;
struct radix_tree_root qi_pquota_tree;
@@ -64,9 +64,9 @@ typedef struct xfs_quotainfo {
struct xfs_inode *qi_pquotaip; /* project quota inode */
struct list_lru qi_lru;
int qi_dquots;
- time_t qi_btimelimit; /* limit for blks timer */
- time_t qi_itimelimit; /* limit for inodes timer */
- time_t qi_rtbtimelimit;/* limit for rt blks timer */
+ time64_t qi_btimelimit; /* limit for blks timer */
+ time64_t qi_itimelimit; /* limit for inodes timer */
+ time64_t qi_rtbtimelimit;/* limit for rt blks timer */
xfs_qwarncnt_t qi_bwarnlimit; /* limit for blks warnings */
xfs_qwarncnt_t qi_iwarnlimit; /* limit for inodes warnings */
xfs_qwarncnt_t qi_rtbwarnlimit;/* limit for rt blks warnings */
@@ -76,8 +76,8 @@ typedef struct xfs_quotainfo {
struct xfs_def_quota qi_usr_default;
struct xfs_def_quota qi_grp_default;
struct xfs_def_quota qi_prj_default;
- struct shrinker qi_shrinker;
-} xfs_quotainfo_t;
+ struct shrinker qi_shrinker;
+};
static inline struct radix_tree_root *
xfs_dquot_tree(
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index 5d72e88598b4..fc2fa418919f 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -54,13 +54,13 @@ xfs_fill_statvfs_from_dquot(
*/
void
xfs_qm_statvfs(
- xfs_inode_t *ip,
+ struct xfs_inode *ip,
struct kstatfs *statp)
{
- xfs_mount_t *mp = ip->i_mount;
- xfs_dquot_t *dqp;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_dquot *dqp;
- if (!xfs_qm_dqget(mp, xfs_get_projid(ip), XFS_DQ_PROJ, false, &dqp)) {
+ if (!xfs_qm_dqget(mp, ip->i_d.di_projid, XFS_DQ_PROJ, false, &dqp)) {
xfs_fill_statvfs_from_dquot(statp, dqp);
xfs_qm_dqput(dqp);
}
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index da7ad0383037..1ea82764bf89 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -19,9 +19,72 @@
#include "xfs_qm.h"
#include "xfs_icache.h"
-STATIC int xfs_qm_log_quotaoff(xfs_mount_t *, xfs_qoff_logitem_t **, uint);
-STATIC int xfs_qm_log_quotaoff_end(xfs_mount_t *, xfs_qoff_logitem_t *,
- uint);
+STATIC int
+xfs_qm_log_quotaoff(
+ struct xfs_mount *mp,
+ struct xfs_qoff_logitem **qoffstartp,
+ uint flags)
+{
+ struct xfs_trans *tp;
+ int error;
+ struct xfs_qoff_logitem *qoffi;
+
+ *qoffstartp = NULL;
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
+ if (error)
+ goto out;
+
+ qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
+ xfs_trans_log_quotaoff_item(tp, qoffi);
+
+ spin_lock(&mp->m_sb_lock);
+ mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
+ spin_unlock(&mp->m_sb_lock);
+
+ xfs_log_sb(tp);
+
+ /*
+ * We have to make sure that the transaction is secure on disk before we
+ * return and actually stop quota accounting. So, make it synchronous.
+ * We don't care about quotoff's performance.
+ */
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp);
+ if (error)
+ goto out;
+
+ *qoffstartp = qoffi;
+out:
+ return error;
+}
+
+STATIC int
+xfs_qm_log_quotaoff_end(
+ struct xfs_mount *mp,
+ struct xfs_qoff_logitem *startqoff,
+ uint flags)
+{
+ struct xfs_trans *tp;
+ int error;
+ struct xfs_qoff_logitem *qoffi;
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
+ if (error)
+ return error;
+
+ qoffi = xfs_trans_get_qoff_item(tp, startqoff,
+ flags & XFS_ALL_QUOTA_ACCT);
+ xfs_trans_log_quotaoff_item(tp, qoffi);
+
+ /*
+ * We have to make sure that the transaction is secure on disk before we
+ * return and actually stop quota accounting. So, make it synchronous.
+ * We don't care about quotoff's performance.
+ */
+ xfs_trans_set_sync(tp);
+ return xfs_trans_commit(tp);
+}
/*
* Turn off quota accounting and/or enforcement for all udquots and/or
@@ -40,7 +103,7 @@ xfs_qm_scall_quotaoff(
uint dqtype;
int error;
uint inactivate_flags;
- xfs_qoff_logitem_t *qoffstart;
+ struct xfs_qoff_logitem *qoffstart;
/*
* No file system can have quotas enabled on disk but not in core.
@@ -538,74 +601,6 @@ out_unlock:
return error;
}
-STATIC int
-xfs_qm_log_quotaoff_end(
- xfs_mount_t *mp,
- xfs_qoff_logitem_t *startqoff,
- uint flags)
-{
- xfs_trans_t *tp;
- int error;
- xfs_qoff_logitem_t *qoffi;
-
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_equotaoff, 0, 0, 0, &tp);
- if (error)
- return error;
-
- qoffi = xfs_trans_get_qoff_item(tp, startqoff,
- flags & XFS_ALL_QUOTA_ACCT);
- xfs_trans_log_quotaoff_item(tp, qoffi);
-
- /*
- * We have to make sure that the transaction is secure on disk before we
- * return and actually stop quota accounting. So, make it synchronous.
- * We don't care about quotoff's performance.
- */
- xfs_trans_set_sync(tp);
- return xfs_trans_commit(tp);
-}
-
-
-STATIC int
-xfs_qm_log_quotaoff(
- xfs_mount_t *mp,
- xfs_qoff_logitem_t **qoffstartp,
- uint flags)
-{
- xfs_trans_t *tp;
- int error;
- xfs_qoff_logitem_t *qoffi;
-
- *qoffstartp = NULL;
-
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_quotaoff, 0, 0, 0, &tp);
- if (error)
- goto out;
-
- qoffi = xfs_trans_get_qoff_item(tp, NULL, flags & XFS_ALL_QUOTA_ACCT);
- xfs_trans_log_quotaoff_item(tp, qoffi);
-
- spin_lock(&mp->m_sb_lock);
- mp->m_sb.sb_qflags = (mp->m_qflags & ~(flags)) & XFS_MOUNT_QUOTA_ALL;
- spin_unlock(&mp->m_sb_lock);
-
- xfs_log_sb(tp);
-
- /*
- * We have to make sure that the transaction is secure on disk before we
- * return and actually stop quota accounting. So, make it synchronous.
- * We don't care about quotoff's performance.
- */
- xfs_trans_set_sync(tp);
- error = xfs_trans_commit(tp);
- if (error)
- goto out;
-
- *qoffstartp = qoffi;
-out:
- return error;
-}
-
/* Fill out the quota context. */
static void
xfs_qm_scall_getquota_fill_qc(
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index cd6c7210a373..38669e827206 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -37,9 +37,9 @@ xfs_qm_fill_state(
tstate->flags |= QCI_SYSFILE;
tstate->blocks = ip->i_d.di_nblocks;
tstate->nextents = ip->i_d.di_nextents;
- tstate->spc_timelimit = q->qi_btimelimit;
- tstate->ino_timelimit = q->qi_itimelimit;
- tstate->rt_spc_timelimit = q->qi_rtbtimelimit;
+ tstate->spc_timelimit = (u32)q->qi_btimelimit;
+ tstate->ino_timelimit = (u32)q->qi_itimelimit;
+ tstate->rt_spc_timelimit = (u32)q->qi_rtbtimelimit;
tstate->spc_warnlimit = q->qi_bwarnlimit;
tstate->ino_warnlimit = q->qi_iwarnlimit;
tstate->rt_spc_warnlimit = q->qi_rtbwarnlimit;
@@ -201,6 +201,9 @@ xfs_fs_rm_xquota(
if (XFS_IS_QUOTA_ON(mp))
return -EINVAL;
+ if (uflags & ~(FS_USER_QUOTA | FS_GROUP_QUOTA | FS_PROJ_QUOTA))
+ return -EINVAL;
+
if (uflags & FS_USER_QUOTA)
flags |= XFS_DQ_USER;
if (uflags & FS_GROUP_QUOTA)
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index d8288aa0670a..8eeed73928cd 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -17,7 +17,7 @@
#include "xfs_refcount_item.h"
#include "xfs_log.h"
#include "xfs_refcount.h"
-
+#include "xfs_error.h"
kmem_zone_t *xfs_cui_zone;
kmem_zone_t *xfs_cud_zone;
@@ -34,7 +34,7 @@ xfs_cui_item_free(
if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
kmem_free(cuip);
else
- kmem_zone_free(xfs_cui_zone, cuip);
+ kmem_cache_free(xfs_cui_zone, cuip);
}
/*
@@ -144,9 +144,9 @@ xfs_cui_init(
ASSERT(nextents > 0);
if (nextents > XFS_CUI_MAX_FAST_EXTENTS)
cuip = kmem_zalloc(xfs_cui_log_item_sizeof(nextents),
- KM_SLEEP);
+ 0);
else
- cuip = kmem_zone_zalloc(xfs_cui_zone, KM_SLEEP);
+ cuip = kmem_zone_zalloc(xfs_cui_zone, 0);
xfs_log_item_init(mp, &cuip->cui_item, XFS_LI_CUI, &xfs_cui_item_ops);
cuip->cui_format.cui_nextents = nextents;
@@ -206,7 +206,7 @@ xfs_cud_item_release(
struct xfs_cud_log_item *cudp = CUD_ITEM(lip);
xfs_cui_release(cudp->cud_cuip);
- kmem_zone_free(xfs_cud_zone, cudp);
+ kmem_cache_free(xfs_cud_zone, cudp);
}
static const struct xfs_item_ops xfs_cud_item_ops = {
@@ -223,7 +223,7 @@ xfs_trans_get_cud(
{
struct xfs_cud_log_item *cudp;
- cudp = kmem_zone_zalloc(xfs_cud_zone, KM_SLEEP);
+ cudp = kmem_zone_zalloc(xfs_cud_zone, 0);
xfs_log_item_init(tp->t_mountp, &cudp->cud_item, XFS_LI_CUD,
&xfs_cud_item_ops);
cudp->cud_cuip = cuip;
@@ -497,7 +497,7 @@ xfs_cui_recover(
*/
set_bit(XFS_CUI_RECOVERED, &cuip->cui_flags);
xfs_cui_release(cuip);
- return -EIO;
+ return -EFSCORRUPTED;
}
}
@@ -536,6 +536,7 @@ xfs_cui_recover(
type = refc_type;
break;
default:
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);
error = -EFSCORRUPTED;
goto abort_error;
}
@@ -555,26 +556,24 @@ xfs_cui_recover(
irec.br_blockcount = new_len;
switch (type) {
case XFS_REFCOUNT_INCREASE:
- error = xfs_refcount_increase_extent(tp, &irec);
+ xfs_refcount_increase_extent(tp, &irec);
break;
case XFS_REFCOUNT_DECREASE:
- error = xfs_refcount_decrease_extent(tp, &irec);
+ xfs_refcount_decrease_extent(tp, &irec);
break;
case XFS_REFCOUNT_ALLOC_COW:
- error = xfs_refcount_alloc_cow_extent(tp,
+ xfs_refcount_alloc_cow_extent(tp,
irec.br_startblock,
irec.br_blockcount);
break;
case XFS_REFCOUNT_FREE_COW:
- error = xfs_refcount_free_cow_extent(tp,
+ xfs_refcount_free_cow_extent(tp,
irec.br_startblock,
irec.br_blockcount);
break;
default:
ASSERT(0);
}
- if (error)
- goto abort_error;
requeue_only = true;
}
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index c4ec7afd1170..b0ce04ffd3cd 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -143,8 +143,6 @@ xfs_reflink_find_shared(
error = xfs_alloc_read_agf(mp, tp, agno, 0, &agbp);
if (error)
return error;
- if (!agbp)
- return -ENOMEM;
cur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
@@ -223,8 +221,8 @@ xfs_reflink_trim_around_shared(
}
}
-bool
-xfs_inode_need_cow(
+int
+xfs_bmap_trim_cow(
struct xfs_inode *ip,
struct xfs_bmbt_irec *imap,
bool *shared)
@@ -308,13 +306,13 @@ static int
xfs_find_trim_cow_extent(
struct xfs_inode *ip,
struct xfs_bmbt_irec *imap,
+ struct xfs_bmbt_irec *cmap,
bool *shared,
bool *found)
{
xfs_fileoff_t offset_fsb = imap->br_startoff;
xfs_filblks_t count_fsb = imap->br_blockcount;
struct xfs_iext_cursor icur;
- struct xfs_bmbt_irec got;
*found = false;
@@ -322,23 +320,22 @@ xfs_find_trim_cow_extent(
* If we don't find an overlapping extent, trim the range we need to
* allocate to fit the hole we found.
*/
- if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &got))
- got.br_startoff = offset_fsb + count_fsb;
- if (got.br_startoff > offset_fsb) {
+ if (!xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, cmap))
+ cmap->br_startoff = offset_fsb + count_fsb;
+ if (cmap->br_startoff > offset_fsb) {
xfs_trim_extent(imap, imap->br_startoff,
- got.br_startoff - imap->br_startoff);
- return xfs_inode_need_cow(ip, imap, shared);
+ cmap->br_startoff - imap->br_startoff);
+ return xfs_bmap_trim_cow(ip, imap, shared);
}
*shared = true;
- if (isnullstartblock(got.br_startblock)) {
- xfs_trim_extent(imap, got.br_startoff, got.br_blockcount);
+ if (isnullstartblock(cmap->br_startblock)) {
+ xfs_trim_extent(imap, cmap->br_startoff, cmap->br_blockcount);
return 0;
}
/* real extent found - no need to allocate */
- xfs_trim_extent(&got, offset_fsb, count_fsb);
- *imap = got;
+ xfs_trim_extent(cmap, offset_fsb, count_fsb);
*found = true;
return 0;
}
@@ -348,6 +345,7 @@ int
xfs_reflink_allocate_cow(
struct xfs_inode *ip,
struct xfs_bmbt_irec *imap,
+ struct xfs_bmbt_irec *cmap,
bool *shared,
uint *lockmode,
bool convert_now)
@@ -367,7 +365,7 @@ xfs_reflink_allocate_cow(
xfs_ifork_init_cow(ip);
}
- error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
+ error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
if (error || !*shared)
return error;
if (found)
@@ -392,7 +390,7 @@ xfs_reflink_allocate_cow(
/*
* Check for an overlapping extent again now that we dropped the ilock.
*/
- error = xfs_find_trim_cow_extent(ip, imap, shared, &found);
+ error = xfs_find_trim_cow_extent(ip, imap, cmap, shared, &found);
if (error || !*shared)
goto out_trans_cancel;
if (found) {
@@ -410,8 +408,8 @@ xfs_reflink_allocate_cow(
/* Allocate the entire reservation as unwritten blocks. */
nimaps = 1;
error = xfs_bmapi_write(tp, ip, imap->br_startoff, imap->br_blockcount,
- XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC,
- resblks, imap, &nimaps);
+ XFS_BMAPI_COWFORK | XFS_BMAPI_PREALLOC, 0, cmap,
+ &nimaps);
if (error)
goto out_unreserve;
@@ -427,15 +425,15 @@ xfs_reflink_allocate_cow(
if (nimaps == 0)
return -ENOSPC;
convert:
- xfs_trim_extent(imap, offset_fsb, count_fsb);
+ xfs_trim_extent(cmap, offset_fsb, count_fsb);
/*
* COW fork extents are supposed to remain unwritten until we're ready
* to initiate a disk write. For direct I/O we are going to write the
* data and need the conversion, but for buffered writes we're done.
*/
- if (!convert_now || imap->br_state == XFS_EXT_NORM)
+ if (!convert_now || cmap->br_state == XFS_EXT_NORM)
return 0;
- trace_xfs_reflink_convert_cow(ip, imap);
+ trace_xfs_reflink_convert_cow(ip, cmap);
return xfs_reflink_convert_cow_locked(ip, offset_fsb, count_fsb);
out_unreserve:
@@ -495,10 +493,8 @@ xfs_reflink_cancel_cow_blocks(
ASSERT((*tpp)->t_firstblock == NULLFSBLOCK);
/* Free the CoW orphan record. */
- error = xfs_refcount_free_cow_extent(*tpp,
- del.br_startblock, del.br_blockcount);
- if (error)
- break;
+ xfs_refcount_free_cow_extent(*tpp, del.br_startblock,
+ del.br_blockcount);
xfs_bmap_add_free(*tpp, del.br_startblock,
del.br_blockcount, NULL);
@@ -675,15 +671,10 @@ xfs_reflink_end_cow_extent(
trace_xfs_reflink_cow_remap(ip, &del);
/* Free the CoW orphan record. */
- error = xfs_refcount_free_cow_extent(tp, del.br_startblock,
- del.br_blockcount);
- if (error)
- goto out_cancel;
+ xfs_refcount_free_cow_extent(tp, del.br_startblock, del.br_blockcount);
/* Map the new blocks into the data fork. */
- error = xfs_bmap_map_extent(tp, ip, &del);
- if (error)
- goto out_cancel;
+ xfs_bmap_map_extent(tp, ip, &del);
/* Charge this new data fork mapping to the on-disk quota. */
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_DELBCOUNT,
@@ -1070,14 +1061,10 @@ xfs_reflink_remap_extent(
uirec.br_blockcount, uirec.br_startblock);
/* Update the refcount tree */
- error = xfs_refcount_increase_extent(tp, &uirec);
- if (error)
- goto out_cancel;
+ xfs_refcount_increase_extent(tp, &uirec);
/* Map the new blocks into the data fork. */
- error = xfs_bmap_map_extent(tp, ip, &uirec);
- if (error)
- goto out_cancel;
+ xfs_bmap_map_extent(tp, ip, &uirec);
/* Update quota accounting. */
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT,
@@ -1190,11 +1177,11 @@ xfs_reflink_remap_blocks(
}
/*
- * Grab the exclusive iolock for a data copy from src to dest, making
- * sure to abide vfs locking order (lowest pointer value goes first) and
- * breaking the pnfs layout leases on dest before proceeding. The loop
- * is needed because we cannot call the blocking break_layout() with the
- * src iolock held, and therefore have to back out both locks.
+ * Grab the exclusive iolock for a data copy from src to dest, making sure to
+ * abide vfs locking order (lowest pointer value goes first) and breaking the
+ * layout leases before proceeding. The loop is needed because we cannot call
+ * the blocking break_layout() with the iolocks held, and therefore have to
+ * back out both locks.
*/
static int
xfs_iolock_two_inodes_and_break_layout(
@@ -1203,33 +1190,44 @@ xfs_iolock_two_inodes_and_break_layout(
{
int error;
-retry:
- if (src < dest) {
- inode_lock_shared(src);
- inode_lock_nested(dest, I_MUTEX_NONDIR2);
- } else {
- /* src >= dest */
- inode_lock(dest);
- }
+ if (src > dest)
+ swap(src, dest);
- error = break_layout(dest, false);
- if (error == -EWOULDBLOCK) {
- inode_unlock(dest);
- if (src < dest)
- inode_unlock_shared(src);
+retry:
+ /* Wait to break both inodes' layouts before we start locking. */
+ error = break_layout(src, true);
+ if (error)
+ return error;
+ if (src != dest) {
error = break_layout(dest, true);
if (error)
return error;
- goto retry;
}
+
+ /* Lock one inode and make sure nobody got in and leased it. */
+ inode_lock(src);
+ error = break_layout(src, false);
+ if (error) {
+ inode_unlock(src);
+ if (error == -EWOULDBLOCK)
+ goto retry;
+ return error;
+ }
+
+ if (src == dest)
+ return 0;
+
+ /* Lock the other inode and make sure nobody got in and leased it. */
+ inode_lock_nested(dest, I_MUTEX_NONDIR2);
+ error = break_layout(dest, false);
if (error) {
+ inode_unlock(src);
inode_unlock(dest);
- if (src < dest)
- inode_unlock_shared(src);
+ if (error == -EWOULDBLOCK)
+ goto retry;
return error;
}
- if (src > dest)
- inode_lock_shared_nested(src, I_MUTEX_NONDIR2);
+
return 0;
}
@@ -1247,10 +1245,10 @@ xfs_reflink_remap_unlock(
xfs_iunlock(dest, XFS_MMAPLOCK_EXCL);
if (!same_inode)
- xfs_iunlock(src, XFS_MMAPLOCK_SHARED);
+ xfs_iunlock(src, XFS_MMAPLOCK_EXCL);
inode_unlock(inode_out);
if (!same_inode)
- inode_unlock_shared(inode_in);
+ inode_unlock(inode_in);
}
/*
@@ -1270,7 +1268,7 @@ xfs_reflink_zero_posteof(
trace_xfs_zero_eof(ip, isize, pos - isize);
return iomap_zero_range(VFS_I(ip), isize, pos - isize, NULL,
- &xfs_iomap_ops);
+ &xfs_buffered_write_iomap_ops);
}
/*
@@ -1325,7 +1323,7 @@ xfs_reflink_remap_prep(
if (same_inode)
xfs_ilock(src, XFS_MMAPLOCK_EXCL);
else
- xfs_lock_two_inodes(src, XFS_MMAPLOCK_SHARED, dest,
+ xfs_lock_two_inodes(src, XFS_MMAPLOCK_EXCL, dest,
XFS_MMAPLOCK_EXCL);
/* Check file eligibility and prepare for block sharing. */
@@ -1381,85 +1379,6 @@ out_unlock:
return ret;
}
-/*
- * The user wants to preemptively CoW all shared blocks in this file,
- * which enables us to turn off the reflink flag. Iterate all
- * extents which are not prealloc/delalloc to see which ranges are
- * mentioned in the refcount tree, then read those blocks into the
- * pagecache, dirty them, fsync them back out, and then we can update
- * the inode flag. What happens if we run out of memory? :)
- */
-STATIC int
-xfs_reflink_dirty_extents(
- struct xfs_inode *ip,
- xfs_fileoff_t fbno,
- xfs_filblks_t end,
- xfs_off_t isize)
-{
- struct xfs_mount *mp = ip->i_mount;
- xfs_agnumber_t agno;
- xfs_agblock_t agbno;
- xfs_extlen_t aglen;
- xfs_agblock_t rbno;
- xfs_extlen_t rlen;
- xfs_off_t fpos;
- xfs_off_t flen;
- struct xfs_bmbt_irec map[2];
- int nmaps;
- int error = 0;
-
- while (end - fbno > 0) {
- nmaps = 1;
- /*
- * Look for extents in the file. Skip holes, delalloc, or
- * unwritten extents; they can't be reflinked.
- */
- error = xfs_bmapi_read(ip, fbno, end - fbno, map, &nmaps, 0);
- if (error)
- goto out;
- if (nmaps == 0)
- break;
- if (!xfs_bmap_is_real_extent(&map[0]))
- goto next;
-
- map[1] = map[0];
- while (map[1].br_blockcount) {
- agno = XFS_FSB_TO_AGNO(mp, map[1].br_startblock);
- agbno = XFS_FSB_TO_AGBNO(mp, map[1].br_startblock);
- aglen = map[1].br_blockcount;
-
- error = xfs_reflink_find_shared(mp, NULL, agno, agbno,
- aglen, &rbno, &rlen, true);
- if (error)
- goto out;
- if (rbno == NULLAGBLOCK)
- break;
-
- /* Dirty the pages */
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- fpos = XFS_FSB_TO_B(mp, map[1].br_startoff +
- (rbno - agbno));
- flen = XFS_FSB_TO_B(mp, rlen);
- if (fpos + flen > isize)
- flen = isize - fpos;
- error = iomap_file_dirty(VFS_I(ip), fpos, flen,
- &xfs_iomap_ops);
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (error)
- goto out;
-
- map[1].br_blockcount -= (rbno - agbno + rlen);
- map[1].br_startoff += (rbno - agbno + rlen);
- map[1].br_startblock += (rbno - agbno + rlen);
- }
-
-next:
- fbno = map[0].br_startoff + map[0].br_blockcount;
- }
-out:
- return error;
-}
-
/* Does this inode need the reflink flag? */
int
xfs_reflink_inode_has_shared_extents(
@@ -1536,7 +1455,8 @@ xfs_reflink_clear_inode_flag(
* We didn't find any shared blocks so turn off the reflink flag.
* First, get rid of any leftover CoW mappings.
*/
- error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true);
+ error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, XFS_MAX_FILEOFF,
+ true);
if (error)
return error;
@@ -1596,10 +1516,7 @@ xfs_reflink_unshare(
xfs_off_t offset,
xfs_off_t len)
{
- struct xfs_mount *mp = ip->i_mount;
- xfs_fileoff_t fbno;
- xfs_filblks_t end;
- xfs_off_t isize;
+ struct inode *inode = VFS_I(ip);
int error;
if (!xfs_is_reflink_inode(ip))
@@ -1607,20 +1524,13 @@ xfs_reflink_unshare(
trace_xfs_reflink_unshare(ip, offset, len);
- inode_dio_wait(VFS_I(ip));
+ inode_dio_wait(inode);
- /* Try to CoW the selected ranges */
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- fbno = XFS_B_TO_FSBT(mp, offset);
- isize = i_size_read(VFS_I(ip));
- end = XFS_B_TO_FSB(mp, offset + len);
- error = xfs_reflink_dirty_extents(ip, fbno, end, isize);
+ error = iomap_file_unshare(inode, offset, len,
+ &xfs_buffered_write_iomap_ops);
if (error)
- goto out_unlock;
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- /* Wait for the IO to finish */
- error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
+ goto out;
+ error = filemap_write_and_wait(inode->i_mapping);
if (error)
goto out;
@@ -1628,11 +1538,8 @@ xfs_reflink_unshare(
error = xfs_reflink_try_clear_inode_flag(ip);
if (error)
goto out;
-
return 0;
-out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
out:
trace_xfs_reflink_unshare_error(ip, error, _RET_IP_);
return error;
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 28a43b7f581d..3e4fd46373ab 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -22,11 +22,11 @@ extern int xfs_reflink_find_shared(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agblock_t *fbno, xfs_extlen_t *flen, bool find_maximal);
extern int xfs_reflink_trim_around_shared(struct xfs_inode *ip,
struct xfs_bmbt_irec *irec, bool *shared);
-bool xfs_inode_need_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
+int xfs_bmap_trim_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
bool *shared);
-extern int xfs_reflink_allocate_cow(struct xfs_inode *ip,
- struct xfs_bmbt_irec *imap, bool *shared, uint *lockmode,
+int xfs_reflink_allocate_cow(struct xfs_inode *ip, struct xfs_bmbt_irec *imap,
+ struct xfs_bmbt_irec *cmap, bool *shared, uint *lockmode,
bool convert_now);
extern int xfs_reflink_convert_cow(struct xfs_inode *ip, xfs_off_t offset,
xfs_off_t count);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 77ed557b6127..4911b68f95dd 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -17,7 +17,7 @@
#include "xfs_rmap_item.h"
#include "xfs_log.h"
#include "xfs_rmap.h"
-
+#include "xfs_error.h"
kmem_zone_t *xfs_rui_zone;
kmem_zone_t *xfs_rud_zone;
@@ -34,7 +34,7 @@ xfs_rui_item_free(
if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
kmem_free(ruip);
else
- kmem_zone_free(xfs_rui_zone, ruip);
+ kmem_cache_free(xfs_rui_zone, ruip);
}
/*
@@ -142,9 +142,9 @@ xfs_rui_init(
ASSERT(nextents > 0);
if (nextents > XFS_RUI_MAX_FAST_EXTENTS)
- ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), KM_SLEEP);
+ ruip = kmem_zalloc(xfs_rui_log_item_sizeof(nextents), 0);
else
- ruip = kmem_zone_zalloc(xfs_rui_zone, KM_SLEEP);
+ ruip = kmem_zone_zalloc(xfs_rui_zone, 0);
xfs_log_item_init(mp, &ruip->rui_item, XFS_LI_RUI, &xfs_rui_item_ops);
ruip->rui_format.rui_nextents = nextents;
@@ -171,8 +171,10 @@ xfs_rui_copy_format(
src_rui_fmt = buf->i_addr;
len = xfs_rui_log_format_sizeof(src_rui_fmt->rui_nextents);
- if (buf->i_len != len)
+ if (buf->i_len != len) {
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
return -EFSCORRUPTED;
+ }
memcpy(dst_rui_fmt, src_rui_fmt, len);
return 0;
@@ -227,7 +229,7 @@ xfs_rud_item_release(
struct xfs_rud_log_item *rudp = RUD_ITEM(lip);
xfs_rui_release(rudp->rud_ruip);
- kmem_zone_free(xfs_rud_zone, rudp);
+ kmem_cache_free(xfs_rud_zone, rudp);
}
static const struct xfs_item_ops xfs_rud_item_ops = {
@@ -244,7 +246,7 @@ xfs_trans_get_rud(
{
struct xfs_rud_log_item *rudp;
- rudp = kmem_zone_zalloc(xfs_rud_zone, KM_SLEEP);
+ rudp = kmem_zone_zalloc(xfs_rud_zone, 0);
xfs_log_item_init(tp->t_mountp, &rudp->rud_item, XFS_LI_RUD,
&xfs_rud_item_ops);
rudp->rud_ruip = ruip;
@@ -539,7 +541,7 @@ xfs_rui_recover(
*/
set_bit(XFS_RUI_RECOVERED, &ruip->rui_flags);
xfs_rui_release(ruip);
- return -EIO;
+ return -EFSCORRUPTED;
}
}
@@ -581,6 +583,7 @@ xfs_rui_recover(
type = XFS_RMAP_FREE;
break;
default:
+ XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, NULL);
error = -EFSCORRUPTED;
goto abort_error;
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 5fa4db3c3e32..6209e7b6b895 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -792,8 +792,7 @@ xfs_growfs_rt_alloc(
*/
nmap = 1;
error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks,
- XFS_BMAPI_METADATA, resblks, &map,
- &nmap);
+ XFS_BMAPI_METADATA, 0, &map, &nmap);
if (!error && nmap < 1)
error = -ENOSPC;
if (error)
@@ -827,12 +826,10 @@ xfs_growfs_rt_alloc(
* Get a buffer for the block.
*/
d = XFS_FSB_TO_DADDR(mp, fsbno);
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
- mp->m_bsize, 0);
- if (bp == NULL) {
- error = -EIO;
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+ mp->m_bsize, 0, &bp);
+ if (error)
goto out_trans_cancel;
- }
memset(bp->b_addr, 0, mp->m_sb.sb_blocksize);
xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
/*
@@ -865,7 +862,7 @@ xfs_alloc_rsum_cache(
* lower bound on the minimum level with any free extents. We can
* continue without the cache if it couldn't be allocated.
*/
- mp->m_rsum_cache = kmem_zalloc_large(rbmblocks, KM_SLEEP);
+ mp->m_rsum_cache = kmem_zalloc_large(rbmblocks, 0);
if (!mp->m_rsum_cache)
xfs_warn(mp, "could not allocate realtime summary cache");
}
@@ -963,7 +960,7 @@ xfs_growfs_rt(
/*
* Allocate a new (fake) mount/sb.
*/
- nmp = kmem_alloc(sizeof(*nmp), KM_SLEEP);
+ nmp = kmem_alloc(sizeof(*nmp), 0);
/*
* Loop over the bitmap blocks.
* We will do everything one bitmap block at a time.
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index f9450235533c..2094386af8ac 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -37,10 +37,10 @@
#include "xfs_reflink.h"
#include <linux/magic.h>
-#include <linux/parser.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
static const struct super_operations xfs_super_operations;
-struct bio_set xfs_ioend_bioset;
static struct kset *xfs_kset; /* top-level xfs sysfs dir */
#ifdef DEBUG
@@ -51,7 +51,7 @@ static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
* Table driven mount option parser.
*/
enum {
- Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
+ Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32, Opt_ikeep,
@@ -59,382 +59,62 @@ enum {
Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota,
Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota,
Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
- Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
+ Opt_discard, Opt_nodiscard, Opt_dax,
};
-static const match_table_t tokens = {
- {Opt_logbufs, "logbufs=%u"}, /* number of XFS log buffers */
- {Opt_logbsize, "logbsize=%s"}, /* size of XFS log buffers */
- {Opt_logdev, "logdev=%s"}, /* log device */
- {Opt_rtdev, "rtdev=%s"}, /* realtime I/O device */
- {Opt_biosize, "biosize=%u"}, /* log2 of preferred buffered io size */
- {Opt_wsync, "wsync"}, /* safe-mode nfs compatible mount */
- {Opt_noalign, "noalign"}, /* turn off stripe alignment */
- {Opt_swalloc, "swalloc"}, /* turn on stripe width allocation */
- {Opt_sunit, "sunit=%u"}, /* data volume stripe unit */
- {Opt_swidth, "swidth=%u"}, /* data volume stripe width */
- {Opt_nouuid, "nouuid"}, /* ignore filesystem UUID */
- {Opt_grpid, "grpid"}, /* group-ID from parent directory */
- {Opt_nogrpid, "nogrpid"}, /* group-ID from current process */
- {Opt_bsdgroups, "bsdgroups"}, /* group-ID from parent directory */
- {Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */
- {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
- {Opt_norecovery,"norecovery"}, /* don't run XFS recovery */
- {Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */
- {Opt_inode32, "inode32"}, /* inode allocation limited to
- * XFS_MAXINUMBER_32 */
- {Opt_ikeep, "ikeep"}, /* do not free empty inode clusters */
- {Opt_noikeep, "noikeep"}, /* free empty inode clusters */
- {Opt_largeio, "largeio"}, /* report large I/O sizes in stat() */
- {Opt_nolargeio, "nolargeio"}, /* do not report large I/O sizes
- * in stat(). */
- {Opt_attr2, "attr2"}, /* do use attr2 attribute format */
- {Opt_noattr2, "noattr2"}, /* do not use attr2 attribute format */
- {Opt_filestreams,"filestreams"},/* use filestreams allocator */
- {Opt_quota, "quota"}, /* disk quotas (user) */
- {Opt_noquota, "noquota"}, /* no quotas */
- {Opt_usrquota, "usrquota"}, /* user quota enabled */
- {Opt_grpquota, "grpquota"}, /* group quota enabled */
- {Opt_prjquota, "prjquota"}, /* project quota enabled */
- {Opt_uquota, "uquota"}, /* user quota (IRIX variant) */
- {Opt_gquota, "gquota"}, /* group quota (IRIX variant) */
- {Opt_pquota, "pquota"}, /* project quota (IRIX variant) */
- {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
- {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
- {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
- {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
- {Opt_discard, "discard"}, /* Discard unused blocks */
- {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */
- {Opt_dax, "dax"}, /* Enable direct access to bdev pages */
- {Opt_err, NULL},
+static const struct fs_parameter_spec xfs_fs_parameters[] = {
+ fsparam_u32("logbufs", Opt_logbufs),
+ fsparam_string("logbsize", Opt_logbsize),
+ fsparam_string("logdev", Opt_logdev),
+ fsparam_string("rtdev", Opt_rtdev),
+ fsparam_flag("wsync", Opt_wsync),
+ fsparam_flag("noalign", Opt_noalign),
+ fsparam_flag("swalloc", Opt_swalloc),
+ fsparam_u32("sunit", Opt_sunit),
+ fsparam_u32("swidth", Opt_swidth),
+ fsparam_flag("nouuid", Opt_nouuid),
+ fsparam_flag("grpid", Opt_grpid),
+ fsparam_flag("nogrpid", Opt_nogrpid),
+ fsparam_flag("bsdgroups", Opt_bsdgroups),
+ fsparam_flag("sysvgroups", Opt_sysvgroups),
+ fsparam_string("allocsize", Opt_allocsize),
+ fsparam_flag("norecovery", Opt_norecovery),
+ fsparam_flag("inode64", Opt_inode64),
+ fsparam_flag("inode32", Opt_inode32),
+ fsparam_flag("ikeep", Opt_ikeep),
+ fsparam_flag("noikeep", Opt_noikeep),
+ fsparam_flag("largeio", Opt_largeio),
+ fsparam_flag("nolargeio", Opt_nolargeio),
+ fsparam_flag("attr2", Opt_attr2),
+ fsparam_flag("noattr2", Opt_noattr2),
+ fsparam_flag("filestreams", Opt_filestreams),
+ fsparam_flag("quota", Opt_quota),
+ fsparam_flag("noquota", Opt_noquota),
+ fsparam_flag("usrquota", Opt_usrquota),
+ fsparam_flag("grpquota", Opt_grpquota),
+ fsparam_flag("prjquota", Opt_prjquota),
+ fsparam_flag("uquota", Opt_uquota),
+ fsparam_flag("gquota", Opt_gquota),
+ fsparam_flag("pquota", Opt_pquota),
+ fsparam_flag("uqnoenforce", Opt_uqnoenforce),
+ fsparam_flag("gqnoenforce", Opt_gqnoenforce),
+ fsparam_flag("pqnoenforce", Opt_pqnoenforce),
+ fsparam_flag("qnoenforce", Opt_qnoenforce),
+ fsparam_flag("discard", Opt_discard),
+ fsparam_flag("nodiscard", Opt_nodiscard),
+ fsparam_flag("dax", Opt_dax),
+ {}
};
-
-STATIC int
-suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
-{
- int last, shift_left_factor = 0, _res;
- char *value;
- int ret = 0;
-
- value = match_strdup(s);
- if (!value)
- return -ENOMEM;
-
- last = strlen(value) - 1;
- if (value[last] == 'K' || value[last] == 'k') {
- shift_left_factor = 10;
- value[last] = '\0';
- }
- if (value[last] == 'M' || value[last] == 'm') {
- shift_left_factor = 20;
- value[last] = '\0';
- }
- if (value[last] == 'G' || value[last] == 'g') {
- shift_left_factor = 30;
- value[last] = '\0';
- }
-
- if (kstrtoint(value, base, &_res))
- ret = -EINVAL;
- kfree(value);
- *res = _res << shift_left_factor;
- return ret;
-}
-
-/*
- * This function fills in xfs_mount_t fields based on mount args.
- * Note: the superblock has _not_ yet been read in.
- *
- * Note that this function leaks the various device name allocations on
- * failure. The caller takes care of them.
- *
- * *sb is const because this is also used to test options on the remount
- * path, and we don't want this to have any side effects at remount time.
- * Today this function does not change *sb, but just to future-proof...
- */
-STATIC int
-xfs_parseargs(
- struct xfs_mount *mp,
- char *options)
-{
- const struct super_block *sb = mp->m_super;
- char *p;
- substring_t args[MAX_OPT_ARGS];
- int dsunit = 0;
- int dswidth = 0;
- int iosize = 0;
- uint8_t iosizelog = 0;
-
- /*
- * set up the mount name first so all the errors will refer to the
- * correct device.
- */
- mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
- if (!mp->m_fsname)
- return -ENOMEM;
- mp->m_fsname_len = strlen(mp->m_fsname) + 1;
-
- /*
- * Copy binary VFS mount flags we are interested in.
- */
- if (sb_rdonly(sb))
- mp->m_flags |= XFS_MOUNT_RDONLY;
- if (sb->s_flags & SB_DIRSYNC)
- mp->m_flags |= XFS_MOUNT_DIRSYNC;
- if (sb->s_flags & SB_SYNCHRONOUS)
- mp->m_flags |= XFS_MOUNT_WSYNC;
-
- /*
- * Set some default flags that could be cleared by the mount option
- * parsing.
- */
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
-
- /*
- * These can be overridden by the mount option parsing.
- */
- mp->m_logbufs = -1;
- mp->m_logbsize = -1;
-
- if (!options)
- goto done;
-
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
-
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_logbufs:
- if (match_int(args, &mp->m_logbufs))
- return -EINVAL;
- break;
- case Opt_logbsize:
- if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
- return -EINVAL;
- break;
- case Opt_logdev:
- kfree(mp->m_logname);
- mp->m_logname = match_strdup(args);
- if (!mp->m_logname)
- return -ENOMEM;
- break;
- case Opt_rtdev:
- kfree(mp->m_rtname);
- mp->m_rtname = match_strdup(args);
- if (!mp->m_rtname)
- return -ENOMEM;
- break;
- case Opt_allocsize:
- case Opt_biosize:
- if (suffix_kstrtoint(args, 10, &iosize))
- return -EINVAL;
- iosizelog = ffs(iosize) - 1;
- break;
- case Opt_grpid:
- case Opt_bsdgroups:
- mp->m_flags |= XFS_MOUNT_GRPID;
- break;
- case Opt_nogrpid:
- case Opt_sysvgroups:
- mp->m_flags &= ~XFS_MOUNT_GRPID;
- break;
- case Opt_wsync:
- mp->m_flags |= XFS_MOUNT_WSYNC;
- break;
- case Opt_norecovery:
- mp->m_flags |= XFS_MOUNT_NORECOVERY;
- break;
- case Opt_noalign:
- mp->m_flags |= XFS_MOUNT_NOALIGN;
- break;
- case Opt_swalloc:
- mp->m_flags |= XFS_MOUNT_SWALLOC;
- break;
- case Opt_sunit:
- if (match_int(args, &dsunit))
- return -EINVAL;
- break;
- case Opt_swidth:
- if (match_int(args, &dswidth))
- return -EINVAL;
- break;
- case Opt_inode32:
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- break;
- case Opt_inode64:
- mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
- break;
- case Opt_nouuid:
- mp->m_flags |= XFS_MOUNT_NOUUID;
- break;
- case Opt_ikeep:
- mp->m_flags |= XFS_MOUNT_IKEEP;
- break;
- case Opt_noikeep:
- mp->m_flags &= ~XFS_MOUNT_IKEEP;
- break;
- case Opt_largeio:
- mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
- break;
- case Opt_nolargeio:
- mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
- break;
- case Opt_attr2:
- mp->m_flags |= XFS_MOUNT_ATTR2;
- break;
- case Opt_noattr2:
- mp->m_flags &= ~XFS_MOUNT_ATTR2;
- mp->m_flags |= XFS_MOUNT_NOATTR2;
- break;
- case Opt_filestreams:
- mp->m_flags |= XFS_MOUNT_FILESTREAMS;
- break;
- case Opt_noquota:
- mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
- mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
- mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
- break;
- case Opt_quota:
- case Opt_uquota:
- case Opt_usrquota:
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
- XFS_UQUOTA_ENFD);
- break;
- case Opt_qnoenforce:
- case Opt_uqnoenforce:
- mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_UQUOTA_ENFD;
- break;
- case Opt_pquota:
- case Opt_prjquota:
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
- XFS_PQUOTA_ENFD);
- break;
- case Opt_pqnoenforce:
- mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_PQUOTA_ENFD;
- break;
- case Opt_gquota:
- case Opt_grpquota:
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
- XFS_GQUOTA_ENFD);
- break;
- case Opt_gqnoenforce:
- mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
- mp->m_qflags &= ~XFS_GQUOTA_ENFD;
- break;
- case Opt_discard:
- mp->m_flags |= XFS_MOUNT_DISCARD;
- break;
- case Opt_nodiscard:
- mp->m_flags &= ~XFS_MOUNT_DISCARD;
- break;
-#ifdef CONFIG_FS_DAX
- case Opt_dax:
- mp->m_flags |= XFS_MOUNT_DAX;
- break;
-#endif
- default:
- xfs_warn(mp, "unknown mount option [%s].", p);
- return -EINVAL;
- }
- }
-
- /*
- * no recovery flag requires a read-only mount
- */
- if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
- !(mp->m_flags & XFS_MOUNT_RDONLY)) {
- xfs_warn(mp, "no-recovery mounts must be read-only.");
- return -EINVAL;
- }
-
- if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
- xfs_warn(mp,
- "sunit and swidth options incompatible with the noalign option");
- return -EINVAL;
- }
-
-#ifndef CONFIG_XFS_QUOTA
- if (XFS_IS_QUOTA_RUNNING(mp)) {
- xfs_warn(mp, "quota support not available in this kernel.");
- return -EINVAL;
- }
-#endif
-
- if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
- xfs_warn(mp, "sunit and swidth must be specified together");
- return -EINVAL;
- }
-
- if (dsunit && (dswidth % dsunit != 0)) {
- xfs_warn(mp,
- "stripe width (%d) must be a multiple of the stripe unit (%d)",
- dswidth, dsunit);
- return -EINVAL;
- }
-
-done:
- if (dsunit && !(mp->m_flags & XFS_MOUNT_NOALIGN)) {
- /*
- * At this point the superblock has not been read
- * in, therefore we do not know the block size.
- * Before the mount call ends we will convert
- * these to FSBs.
- */
- mp->m_dalign = dsunit;
- mp->m_swidth = dswidth;
- }
-
- if (mp->m_logbufs != -1 &&
- mp->m_logbufs != 0 &&
- (mp->m_logbufs < XLOG_MIN_ICLOGS ||
- mp->m_logbufs > XLOG_MAX_ICLOGS)) {
- xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
- mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
- return -EINVAL;
- }
- if (mp->m_logbsize != -1 &&
- mp->m_logbsize != 0 &&
- (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
- mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
- !is_power_of_2(mp->m_logbsize))) {
- xfs_warn(mp,
- "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
- mp->m_logbsize);
- return -EINVAL;
- }
-
- if (iosizelog) {
- if (iosizelog > XFS_MAX_IO_LOG ||
- iosizelog < XFS_MIN_IO_LOG) {
- xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
- iosizelog, XFS_MIN_IO_LOG,
- XFS_MAX_IO_LOG);
- return -EINVAL;
- }
-
- mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
- mp->m_readio_log = iosizelog;
- mp->m_writeio_log = iosizelog;
- }
-
- return 0;
-}
-
struct proc_xfs_info {
uint64_t flag;
char *str;
};
-STATIC void
-xfs_showargs(
- struct xfs_mount *mp,
- struct seq_file *m)
+static int
+xfs_fs_show_options(
+ struct seq_file *m,
+ struct dentry *root)
{
static struct proc_xfs_info xfs_info_set[] = {
/* the few simple ones we can get from the mount struct */
@@ -448,30 +128,24 @@ xfs_showargs(
{ XFS_MOUNT_FILESTREAMS, ",filestreams" },
{ XFS_MOUNT_GRPID, ",grpid" },
{ XFS_MOUNT_DISCARD, ",discard" },
- { XFS_MOUNT_SMALL_INUMS, ",inode32" },
+ { XFS_MOUNT_LARGEIO, ",largeio" },
{ XFS_MOUNT_DAX, ",dax" },
{ 0, NULL }
};
- static struct proc_xfs_info xfs_info_unset[] = {
- /* the few simple ones we can get from the mount struct */
- { XFS_MOUNT_COMPAT_IOSIZE, ",largeio" },
- { XFS_MOUNT_SMALL_INUMS, ",inode64" },
- { 0, NULL }
- };
+ struct xfs_mount *mp = XFS_M(root->d_sb);
struct proc_xfs_info *xfs_infop;
for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
if (mp->m_flags & xfs_infop->flag)
seq_puts(m, xfs_infop->str);
}
- for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
- if (!(mp->m_flags & xfs_infop->flag))
- seq_puts(m, xfs_infop->str);
- }
- if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+ seq_printf(m, ",inode%d",
+ (mp->m_flags & XFS_MOUNT_SMALL_INUMS) ? 32 : 64);
+
+ if (mp->m_flags & XFS_MOUNT_ALLOCSIZE)
seq_printf(m, ",allocsize=%dk",
- (int)(1 << mp->m_writeio_log) >> 10);
+ (1 << mp->m_allocsize_log) >> 10);
if (mp->m_logbufs > 0)
seq_printf(m, ",logbufs=%d", mp->m_logbufs);
@@ -510,32 +184,8 @@ xfs_showargs(
if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
seq_puts(m, ",noquota");
-}
-
-static uint64_t
-xfs_max_file_offset(
- unsigned int blockshift)
-{
- unsigned int pagefactor = 1;
- unsigned int bitshift = BITS_PER_LONG - 1;
-
- /* Figure out maximum filesize, on Linux this can depend on
- * the filesystem blocksize (on 32 bit platforms).
- * __block_write_begin does this in an [unsigned] long long...
- * page->index << (PAGE_SHIFT - bbits)
- * So, for page sized blocks (4K on 32 bit platforms),
- * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
- * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
- * but for smaller blocksizes it is less (bbits = log2 bsize).
- */
-
-#if BITS_PER_LONG == 32
- ASSERT(sizeof(sector_t) == 8);
- pagefactor = PAGE_SIZE;
- bitshift = BITS_PER_LONG;
-#endif
- return (((uint64_t)pagefactor) << bitshift) - 1;
+ return 0;
}
/*
@@ -808,32 +458,33 @@ xfs_init_mount_workqueues(
struct xfs_mount *mp)
{
mp->m_buf_workqueue = alloc_workqueue("xfs-buf/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_fsname);
+ WQ_MEM_RECLAIM|WQ_FREEZABLE, 1, mp->m_super->s_id);
if (!mp->m_buf_workqueue)
goto out;
mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
+ WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
if (!mp->m_unwritten_workqueue)
goto out_destroy_buf;
mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
+ WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_UNBOUND,
+ 0, mp->m_super->s_id);
if (!mp->m_cil_workqueue)
goto out_destroy_unwritten;
mp->m_reclaim_workqueue = alloc_workqueue("xfs-reclaim/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
+ WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
if (!mp->m_reclaim_workqueue)
goto out_destroy_cil;
mp->m_eofblocks_workqueue = alloc_workqueue("xfs-eofblocks/%s",
- WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname);
+ WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_super->s_id);
if (!mp->m_eofblocks_workqueue)
goto out_destroy_reclaim;
mp->m_sync_workqueue = alloc_workqueue("xfs-sync/%s", WQ_FREEZABLE, 0,
- mp->m_fsname);
+ mp->m_super->s_id);
if (!mp->m_sync_workqueue)
goto out_destroy_eofb;
@@ -1037,13 +688,13 @@ xfs_fs_drop_inode(
return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
}
-STATIC void
-xfs_free_fsname(
+static void
+xfs_mount_free(
struct xfs_mount *mp)
{
- kfree(mp->m_fsname);
kfree(mp->m_rtname);
kfree(mp->m_logname);
+ kmem_free(mp);
}
STATIC int
@@ -1204,181 +855,6 @@ xfs_quiesce_attr(
xfs_log_quiesce(mp);
}
-STATIC int
-xfs_test_remount_options(
- struct super_block *sb,
- char *options)
-{
- int error = 0;
- struct xfs_mount *tmp_mp;
-
- tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
- if (!tmp_mp)
- return -ENOMEM;
-
- tmp_mp->m_super = sb;
- error = xfs_parseargs(tmp_mp, options);
- xfs_free_fsname(tmp_mp);
- kmem_free(tmp_mp);
-
- return error;
-}
-
-STATIC int
-xfs_fs_remount(
- struct super_block *sb,
- int *flags,
- char *options)
-{
- struct xfs_mount *mp = XFS_M(sb);
- xfs_sb_t *sbp = &mp->m_sb;
- substring_t args[MAX_OPT_ARGS];
- char *p;
- int error;
-
- /* First, check for complete junk; i.e. invalid options */
- error = xfs_test_remount_options(sb, options);
- if (error)
- return error;
-
- sync_filesystem(sb);
- while ((p = strsep(&options, ",")) != NULL) {
- int token;
-
- if (!*p)
- continue;
-
- token = match_token(p, tokens, args);
- switch (token) {
- case Opt_inode64:
- mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
- mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
- break;
- case Opt_inode32:
- mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
- break;
- default:
- /*
- * Logically we would return an error here to prevent
- * users from believing they might have changed
- * mount options using remount which can't be changed.
- *
- * But unfortunately mount(8) adds all options from
- * mtab and fstab to the mount arguments in some cases
- * so we can't blindly reject options, but have to
- * check for each specified option if it actually
- * differs from the currently set option and only
- * reject it if that's the case.
- *
- * Until that is implemented we return success for
- * every remount request, and silently ignore all
- * options that we can't actually change.
- */
-#if 0
- xfs_info(mp,
- "mount option \"%s\" not supported for remount", p);
- return -EINVAL;
-#else
- break;
-#endif
- }
- }
-
- /* ro -> rw */
- if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) {
- if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
- xfs_warn(mp,
- "ro->rw transition prohibited on norecovery mount");
- return -EINVAL;
- }
-
- if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
- xfs_sb_has_ro_compat_feature(sbp,
- XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
- xfs_warn(mp,
-"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
- (sbp->sb_features_ro_compat &
- XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
- return -EINVAL;
- }
-
- mp->m_flags &= ~XFS_MOUNT_RDONLY;
-
- /*
- * If this is the first remount to writeable state we
- * might have some superblock changes to update.
- */
- if (mp->m_update_sb) {
- error = xfs_sync_sb(mp, false);
- if (error) {
- xfs_warn(mp, "failed to write sb changes");
- return error;
- }
- mp->m_update_sb = false;
- }
-
- /*
- * Fill out the reserve pool if it is empty. Use the stashed
- * value if it is non-zero, otherwise go with the default.
- */
- xfs_restore_resvblks(mp);
- xfs_log_work_queue(mp);
-
- /* Recover any CoW blocks that never got remapped. */
- error = xfs_reflink_recover_cow(mp);
- if (error) {
- xfs_err(mp,
- "Error %d recovering leftover CoW allocations.", error);
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return error;
- }
- xfs_start_block_reaping(mp);
-
- /* Create the per-AG metadata reservation pool .*/
- error = xfs_fs_reserve_ag_blocks(mp);
- if (error && error != -ENOSPC)
- return error;
- }
-
- /* rw -> ro */
- if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
- /*
- * Cancel background eofb scanning so it cannot race with the
- * final log force+buftarg wait and deadlock the remount.
- */
- xfs_stop_block_reaping(mp);
-
- /* Get rid of any leftover CoW reservations... */
- error = xfs_icache_free_cowblocks(mp, NULL);
- if (error) {
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return error;
- }
-
- /* Free the per-AG metadata reservation pool. */
- error = xfs_fs_unreserve_ag_blocks(mp);
- if (error) {
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
- return error;
- }
-
- /*
- * Before we sync the metadata, we need to free up the reserve
- * block pool so that the used block count in the superblock on
- * disk is correct at the end of the remount. Stash the current
- * reserve pool size so that if we get remounted rw, we can
- * return it to the same size.
- */
- xfs_save_resvblks(mp);
-
- xfs_quiesce_attr(mp);
- mp->m_flags |= XFS_MOUNT_RDONLY;
- }
-
- return 0;
-}
-
/*
* Second stage of a freeze. The data is already frozen so we only
* need to take care of the metadata. Once that's done sync the superblock
@@ -1409,15 +885,6 @@ xfs_fs_unfreeze(
return 0;
}
-STATIC int
-xfs_fs_show_options(
- struct seq_file *m,
- struct dentry *root)
-{
- xfs_showargs(XFS_M(root->d_sb), m);
- return 0;
-}
-
/*
* This function fills in xfs_mount_t fields based on mount args.
* Note: the superblock _has_ now been read in.
@@ -1540,60 +1007,337 @@ xfs_destroy_percpu_counters(
percpu_counter_destroy(&mp->m_delalloc_blks);
}
-static struct xfs_mount *
-xfs_mount_alloc(
+static void
+xfs_fs_put_super(
struct super_block *sb)
{
- struct xfs_mount *mp;
+ struct xfs_mount *mp = XFS_M(sb);
- mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
- if (!mp)
- return NULL;
+ /* if ->fill_super failed, we have no mount to tear down */
+ if (!sb->s_fs_info)
+ return;
- mp->m_super = sb;
- spin_lock_init(&mp->m_sb_lock);
- spin_lock_init(&mp->m_agirotor_lock);
- INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
- spin_lock_init(&mp->m_perag_lock);
- mutex_init(&mp->m_growlock);
- atomic_set(&mp->m_active_trans, 0);
- INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
- INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
- INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
- mp->m_kobj.kobject.kset = xfs_kset;
- /*
- * We don't create the finobt per-ag space reservation until after log
- * recovery, so we must set this to true so that an ifree transaction
- * started during log recovery will not depend on space reservations
- * for finobt expansion.
- */
- mp->m_finobt_nores = true;
- return mp;
+ xfs_notice(mp, "Unmounting Filesystem");
+ xfs_filestream_unmount(mp);
+ xfs_unmountfs(mp);
+
+ xfs_freesb(mp);
+ free_percpu(mp->m_stats.xs_stats);
+ xfs_destroy_percpu_counters(mp);
+ xfs_destroy_mount_workqueues(mp);
+ xfs_close_devices(mp);
+
+ sb->s_fs_info = NULL;
+ xfs_mount_free(mp);
}
+static long
+xfs_fs_nr_cached_objects(
+ struct super_block *sb,
+ struct shrink_control *sc)
+{
+ /* Paranoia: catch incorrect calls during mount setup or teardown */
+ if (WARN_ON_ONCE(!sb->s_fs_info))
+ return 0;
+ return xfs_reclaim_inodes_count(XFS_M(sb));
+}
-STATIC int
-xfs_fs_fill_super(
+static long
+xfs_fs_free_cached_objects(
struct super_block *sb,
- void *data,
- int silent)
+ struct shrink_control *sc)
{
- struct inode *root;
- struct xfs_mount *mp = NULL;
- int flags = 0, error = -ENOMEM;
+ return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
+}
+
+static const struct super_operations xfs_super_operations = {
+ .alloc_inode = xfs_fs_alloc_inode,
+ .destroy_inode = xfs_fs_destroy_inode,
+ .dirty_inode = xfs_fs_dirty_inode,
+ .drop_inode = xfs_fs_drop_inode,
+ .put_super = xfs_fs_put_super,
+ .sync_fs = xfs_fs_sync_fs,
+ .freeze_fs = xfs_fs_freeze,
+ .unfreeze_fs = xfs_fs_unfreeze,
+ .statfs = xfs_fs_statfs,
+ .show_options = xfs_fs_show_options,
+ .nr_cached_objects = xfs_fs_nr_cached_objects,
+ .free_cached_objects = xfs_fs_free_cached_objects,
+};
+
+static int
+suffix_kstrtoint(
+ const char *s,
+ unsigned int base,
+ int *res)
+{
+ int last, shift_left_factor = 0, _res;
+ char *value;
+ int ret = 0;
+ value = kstrdup(s, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ last = strlen(value) - 1;
+ if (value[last] == 'K' || value[last] == 'k') {
+ shift_left_factor = 10;
+ value[last] = '\0';
+ }
+ if (value[last] == 'M' || value[last] == 'm') {
+ shift_left_factor = 20;
+ value[last] = '\0';
+ }
+ if (value[last] == 'G' || value[last] == 'g') {
+ shift_left_factor = 30;
+ value[last] = '\0';
+ }
+
+ if (kstrtoint(value, base, &_res))
+ ret = -EINVAL;
+ kfree(value);
+ *res = _res << shift_left_factor;
+ return ret;
+}
+
+/*
+ * Set mount state from a mount option.
+ *
+ * NOTE: mp->m_super is NULL here!
+ */
+static int
+xfs_fc_parse_param(
+ struct fs_context *fc,
+ struct fs_parameter *param)
+{
+ struct xfs_mount *mp = fc->s_fs_info;
+ struct fs_parse_result result;
+ int size = 0;
+ int opt;
+
+ opt = fs_parse(fc, xfs_fs_parameters, param, &result);
+ if (opt < 0)
+ return opt;
+
+ switch (opt) {
+ case Opt_logbufs:
+ mp->m_logbufs = result.uint_32;
+ return 0;
+ case Opt_logbsize:
+ if (suffix_kstrtoint(param->string, 10, &mp->m_logbsize))
+ return -EINVAL;
+ return 0;
+ case Opt_logdev:
+ kfree(mp->m_logname);
+ mp->m_logname = kstrdup(param->string, GFP_KERNEL);
+ if (!mp->m_logname)
+ return -ENOMEM;
+ return 0;
+ case Opt_rtdev:
+ kfree(mp->m_rtname);
+ mp->m_rtname = kstrdup(param->string, GFP_KERNEL);
+ if (!mp->m_rtname)
+ return -ENOMEM;
+ return 0;
+ case Opt_allocsize:
+ if (suffix_kstrtoint(param->string, 10, &size))
+ return -EINVAL;
+ mp->m_allocsize_log = ffs(size) - 1;
+ mp->m_flags |= XFS_MOUNT_ALLOCSIZE;
+ return 0;
+ case Opt_grpid:
+ case Opt_bsdgroups:
+ mp->m_flags |= XFS_MOUNT_GRPID;
+ return 0;
+ case Opt_nogrpid:
+ case Opt_sysvgroups:
+ mp->m_flags &= ~XFS_MOUNT_GRPID;
+ return 0;
+ case Opt_wsync:
+ mp->m_flags |= XFS_MOUNT_WSYNC;
+ return 0;
+ case Opt_norecovery:
+ mp->m_flags |= XFS_MOUNT_NORECOVERY;
+ return 0;
+ case Opt_noalign:
+ mp->m_flags |= XFS_MOUNT_NOALIGN;
+ return 0;
+ case Opt_swalloc:
+ mp->m_flags |= XFS_MOUNT_SWALLOC;
+ return 0;
+ case Opt_sunit:
+ mp->m_dalign = result.uint_32;
+ return 0;
+ case Opt_swidth:
+ mp->m_swidth = result.uint_32;
+ return 0;
+ case Opt_inode32:
+ mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+ return 0;
+ case Opt_inode64:
+ mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+ return 0;
+ case Opt_nouuid:
+ mp->m_flags |= XFS_MOUNT_NOUUID;
+ return 0;
+ case Opt_ikeep:
+ mp->m_flags |= XFS_MOUNT_IKEEP;
+ return 0;
+ case Opt_noikeep:
+ mp->m_flags &= ~XFS_MOUNT_IKEEP;
+ return 0;
+ case Opt_largeio:
+ mp->m_flags |= XFS_MOUNT_LARGEIO;
+ return 0;
+ case Opt_nolargeio:
+ mp->m_flags &= ~XFS_MOUNT_LARGEIO;
+ return 0;
+ case Opt_attr2:
+ mp->m_flags |= XFS_MOUNT_ATTR2;
+ return 0;
+ case Opt_noattr2:
+ mp->m_flags &= ~XFS_MOUNT_ATTR2;
+ mp->m_flags |= XFS_MOUNT_NOATTR2;
+ return 0;
+ case Opt_filestreams:
+ mp->m_flags |= XFS_MOUNT_FILESTREAMS;
+ return 0;
+ case Opt_noquota:
+ mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
+ mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
+ mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
+ return 0;
+ case Opt_quota:
+ case Opt_uquota:
+ case Opt_usrquota:
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
+ XFS_UQUOTA_ENFD);
+ return 0;
+ case Opt_qnoenforce:
+ case Opt_uqnoenforce:
+ mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_UQUOTA_ENFD;
+ return 0;
+ case Opt_pquota:
+ case Opt_prjquota:
+ mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
+ XFS_PQUOTA_ENFD);
+ return 0;
+ case Opt_pqnoenforce:
+ mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_PQUOTA_ENFD;
+ return 0;
+ case Opt_gquota:
+ case Opt_grpquota:
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
+ XFS_GQUOTA_ENFD);
+ return 0;
+ case Opt_gqnoenforce:
+ mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
+ mp->m_qflags &= ~XFS_GQUOTA_ENFD;
+ return 0;
+ case Opt_discard:
+ mp->m_flags |= XFS_MOUNT_DISCARD;
+ return 0;
+ case Opt_nodiscard:
+ mp->m_flags &= ~XFS_MOUNT_DISCARD;
+ return 0;
+#ifdef CONFIG_FS_DAX
+ case Opt_dax:
+ mp->m_flags |= XFS_MOUNT_DAX;
+ return 0;
+#endif
+ default:
+ xfs_warn(mp, "unknown mount option [%s].", param->key);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+xfs_fc_validate_params(
+ struct xfs_mount *mp)
+{
/*
- * allocate mp and do all low-level struct initializations before we
- * attach it to the super
+ * no recovery flag requires a read-only mount
*/
- mp = xfs_mount_alloc(sb);
- if (!mp)
- goto out;
- sb->s_fs_info = mp;
+ if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
+ !(mp->m_flags & XFS_MOUNT_RDONLY)) {
+ xfs_warn(mp, "no-recovery mounts must be read-only.");
+ return -EINVAL;
+ }
- error = xfs_parseargs(mp, (char *)data);
+ if ((mp->m_flags & XFS_MOUNT_NOALIGN) &&
+ (mp->m_dalign || mp->m_swidth)) {
+ xfs_warn(mp,
+ "sunit and swidth options incompatible with the noalign option");
+ return -EINVAL;
+ }
+
+ if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) {
+ xfs_warn(mp, "quota support not available in this kernel.");
+ return -EINVAL;
+ }
+
+ if ((mp->m_dalign && !mp->m_swidth) ||
+ (!mp->m_dalign && mp->m_swidth)) {
+ xfs_warn(mp, "sunit and swidth must be specified together");
+ return -EINVAL;
+ }
+
+ if (mp->m_dalign && (mp->m_swidth % mp->m_dalign != 0)) {
+ xfs_warn(mp,
+ "stripe width (%d) must be a multiple of the stripe unit (%d)",
+ mp->m_swidth, mp->m_dalign);
+ return -EINVAL;
+ }
+
+ if (mp->m_logbufs != -1 &&
+ mp->m_logbufs != 0 &&
+ (mp->m_logbufs < XLOG_MIN_ICLOGS ||
+ mp->m_logbufs > XLOG_MAX_ICLOGS)) {
+ xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
+ mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
+ return -EINVAL;
+ }
+
+ if (mp->m_logbsize != -1 &&
+ mp->m_logbsize != 0 &&
+ (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
+ mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
+ !is_power_of_2(mp->m_logbsize))) {
+ xfs_warn(mp,
+ "invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
+ mp->m_logbsize);
+ return -EINVAL;
+ }
+
+ if ((mp->m_flags & XFS_MOUNT_ALLOCSIZE) &&
+ (mp->m_allocsize_log > XFS_MAX_IO_LOG ||
+ mp->m_allocsize_log < XFS_MIN_IO_LOG)) {
+ xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
+ mp->m_allocsize_log, XFS_MIN_IO_LOG, XFS_MAX_IO_LOG);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int
+xfs_fc_fill_super(
+ struct super_block *sb,
+ struct fs_context *fc)
+{
+ struct xfs_mount *mp = sb->s_fs_info;
+ struct inode *root;
+ int flags = 0, error;
+
+ mp->m_super = sb;
+
+ error = xfs_fc_validate_params(mp);
if (error)
- goto out_free_fsname;
+ goto out_free_names;
sb_min_blocksize(sb, BBSIZE);
sb->s_xattr = xfs_xattr_handlers;
@@ -1615,12 +1359,12 @@ xfs_fs_fill_super(
msleep(xfs_globals.mount_delay * 1000);
}
- if (silent)
+ if (fc->sb_flags & SB_SILENT)
flags |= XFS_MFSI_QUIET;
error = xfs_open_devices(mp);
if (error)
- goto out_free_fsname;
+ goto out_free_names;
error = xfs_init_mount_workqueues(mp);
if (error)
@@ -1649,6 +1393,26 @@ xfs_fs_fill_super(
if (error)
goto out_free_sb;
+ /*
+ * XFS block mappings use 54 bits to store the logical block offset.
+ * This should suffice to handle the maximum file size that the VFS
+ * supports (currently 2^63 bytes on 64-bit and ULONG_MAX << PAGE_SHIFT
+ * bytes on 32-bit), but as XFS and VFS have gotten the s_maxbytes
+ * calculation wrong on 32-bit kernels in the past, we'll add a WARN_ON
+ * to check this assertion.
+ *
+ * Avoid integer overflow by comparing the maximum bmbt offset to the
+ * maximum pagecache offset in units of fs blocks.
+ */
+ if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) {
+ xfs_warn(mp,
+"MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
+ XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
+ XFS_MAX_FILEOFF);
+ error = -EINVAL;
+ goto out_free_sb;
+ }
+
error = xfs_filestream_mount(mp);
if (error)
goto out_free_sb;
@@ -1660,9 +1424,11 @@ xfs_fs_fill_super(
sb->s_magic = XFS_SUPER_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
- sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
sb->s_max_links = XFS_MAXLINK;
sb->s_time_gran = 1;
+ sb->s_time_min = S32_MIN;
+ sb->s_time_max = S32_MAX;
sb->s_iflags |= SB_I_CGROUPWB;
set_posix_acl_flag(sb);
@@ -1755,11 +1521,9 @@ xfs_fs_fill_super(
xfs_destroy_mount_workqueues(mp);
out_close_devices:
xfs_close_devices(mp);
- out_free_fsname:
+ out_free_names:
sb->s_fs_info = NULL;
- xfs_free_fsname(mp);
- kfree(mp);
- out:
+ xfs_mount_free(mp);
return error;
out_unmount:
@@ -1768,80 +1532,252 @@ xfs_fs_fill_super(
goto out_free_sb;
}
-STATIC void
-xfs_fs_put_super(
- struct super_block *sb)
+static int
+xfs_fc_get_tree(
+ struct fs_context *fc)
{
- struct xfs_mount *mp = XFS_M(sb);
+ return get_tree_bdev(fc, xfs_fc_fill_super);
+}
- /* if ->fill_super failed, we have no mount to tear down */
- if (!sb->s_fs_info)
- return;
+static int
+xfs_remount_rw(
+ struct xfs_mount *mp)
+{
+ struct xfs_sb *sbp = &mp->m_sb;
+ int error;
- xfs_notice(mp, "Unmounting Filesystem");
- xfs_filestream_unmount(mp);
- xfs_unmountfs(mp);
+ if (mp->m_flags & XFS_MOUNT_NORECOVERY) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited on norecovery mount");
+ return -EINVAL;
+ }
- xfs_freesb(mp);
- free_percpu(mp->m_stats.xs_stats);
- xfs_destroy_percpu_counters(mp);
- xfs_destroy_mount_workqueues(mp);
- xfs_close_devices(mp);
+ if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+ xfs_sb_has_ro_compat_feature(sbp, XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+ xfs_warn(mp,
+ "ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
+ (sbp->sb_features_ro_compat &
+ XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+ return -EINVAL;
+ }
- sb->s_fs_info = NULL;
- xfs_free_fsname(mp);
- kfree(mp);
+ mp->m_flags &= ~XFS_MOUNT_RDONLY;
+
+ /*
+ * If this is the first remount to writeable state we might have some
+ * superblock changes to update.
+ */
+ if (mp->m_update_sb) {
+ error = xfs_sync_sb(mp, false);
+ if (error) {
+ xfs_warn(mp, "failed to write sb changes");
+ return error;
+ }
+ mp->m_update_sb = false;
+ }
+
+ /*
+ * Fill out the reserve pool if it is empty. Use the stashed value if
+ * it is non-zero, otherwise go with the default.
+ */
+ xfs_restore_resvblks(mp);
+ xfs_log_work_queue(mp);
+
+ /* Recover any CoW blocks that never got remapped. */
+ error = xfs_reflink_recover_cow(mp);
+ if (error) {
+ xfs_err(mp,
+ "Error %d recovering leftover CoW allocations.", error);
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+ }
+ xfs_start_block_reaping(mp);
+
+ /* Create the per-AG metadata reservation pool .*/
+ error = xfs_fs_reserve_ag_blocks(mp);
+ if (error && error != -ENOSPC)
+ return error;
+
+ return 0;
}
-STATIC struct dentry *
-xfs_fs_mount(
- struct file_system_type *fs_type,
- int flags,
- const char *dev_name,
- void *data)
+static int
+xfs_remount_ro(
+ struct xfs_mount *mp)
{
- return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
+ int error;
+
+ /*
+ * Cancel background eofb scanning so it cannot race with the final
+ * log force+buftarg wait and deadlock the remount.
+ */
+ xfs_stop_block_reaping(mp);
+
+ /* Get rid of any leftover CoW reservations... */
+ error = xfs_icache_free_cowblocks(mp, NULL);
+ if (error) {
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+ }
+
+ /* Free the per-AG metadata reservation pool. */
+ error = xfs_fs_unreserve_ag_blocks(mp);
+ if (error) {
+ xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ return error;
+ }
+
+ /*
+ * Before we sync the metadata, we need to free up the reserve block
+ * pool so that the used block count in the superblock on disk is
+ * correct at the end of the remount. Stash the current* reserve pool
+ * size so that if we get remounted rw, we can return it to the same
+ * size.
+ */
+ xfs_save_resvblks(mp);
+
+ xfs_quiesce_attr(mp);
+ mp->m_flags |= XFS_MOUNT_RDONLY;
+
+ return 0;
}
-static long
-xfs_fs_nr_cached_objects(
- struct super_block *sb,
- struct shrink_control *sc)
+/*
+ * Logically we would return an error here to prevent users from believing
+ * they might have changed mount options using remount which can't be changed.
+ *
+ * But unfortunately mount(8) adds all options from mtab and fstab to the mount
+ * arguments in some cases so we can't blindly reject options, but have to
+ * check for each specified option if it actually differs from the currently
+ * set option and only reject it if that's the case.
+ *
+ * Until that is implemented we return success for every remount request, and
+ * silently ignore all options that we can't actually change.
+ */
+static int
+xfs_fc_reconfigure(
+ struct fs_context *fc)
{
- /* Paranoia: catch incorrect calls during mount setup or teardown */
- if (WARN_ON_ONCE(!sb->s_fs_info))
- return 0;
- return xfs_reclaim_inodes_count(XFS_M(sb));
+ struct xfs_mount *mp = XFS_M(fc->root->d_sb);
+ struct xfs_mount *new_mp = fc->s_fs_info;
+ xfs_sb_t *sbp = &mp->m_sb;
+ int flags = fc->sb_flags;
+ int error;
+
+ error = xfs_fc_validate_params(new_mp);
+ if (error)
+ return error;
+
+ sync_filesystem(mp->m_super);
+
+ /* inode32 -> inode64 */
+ if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
+ !(new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
+ mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+ mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
+ }
+
+ /* inode64 -> inode32 */
+ if (!(mp->m_flags & XFS_MOUNT_SMALL_INUMS) &&
+ (new_mp->m_flags & XFS_MOUNT_SMALL_INUMS)) {
+ mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+ mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
+ }
+
+ /* ro -> rw */
+ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(flags & SB_RDONLY)) {
+ error = xfs_remount_rw(mp);
+ if (error)
+ return error;
+ }
+
+ /* rw -> ro */
+ if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (flags & SB_RDONLY)) {
+ error = xfs_remount_ro(mp);
+ if (error)
+ return error;
+ }
+
+ return 0;
}
-static long
-xfs_fs_free_cached_objects(
- struct super_block *sb,
- struct shrink_control *sc)
+static void xfs_fc_free(
+ struct fs_context *fc)
{
- return xfs_reclaim_inodes_nr(XFS_M(sb), sc->nr_to_scan);
+ struct xfs_mount *mp = fc->s_fs_info;
+
+ /*
+ * mp is stored in the fs_context when it is initialized.
+ * mp is transferred to the superblock on a successful mount,
+ * but if an error occurs before the transfer we have to free
+ * it here.
+ */
+ if (mp)
+ xfs_mount_free(mp);
}
-static const struct super_operations xfs_super_operations = {
- .alloc_inode = xfs_fs_alloc_inode,
- .destroy_inode = xfs_fs_destroy_inode,
- .dirty_inode = xfs_fs_dirty_inode,
- .drop_inode = xfs_fs_drop_inode,
- .put_super = xfs_fs_put_super,
- .sync_fs = xfs_fs_sync_fs,
- .freeze_fs = xfs_fs_freeze,
- .unfreeze_fs = xfs_fs_unfreeze,
- .statfs = xfs_fs_statfs,
- .remount_fs = xfs_fs_remount,
- .show_options = xfs_fs_show_options,
- .nr_cached_objects = xfs_fs_nr_cached_objects,
- .free_cached_objects = xfs_fs_free_cached_objects,
+static const struct fs_context_operations xfs_context_ops = {
+ .parse_param = xfs_fc_parse_param,
+ .get_tree = xfs_fc_get_tree,
+ .reconfigure = xfs_fc_reconfigure,
+ .free = xfs_fc_free,
};
+static int xfs_init_fs_context(
+ struct fs_context *fc)
+{
+ struct xfs_mount *mp;
+
+ mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO);
+ if (!mp)
+ return -ENOMEM;
+
+ spin_lock_init(&mp->m_sb_lock);
+ spin_lock_init(&mp->m_agirotor_lock);
+ INIT_RADIX_TREE(&mp->m_perag_tree, GFP_ATOMIC);
+ spin_lock_init(&mp->m_perag_lock);
+ mutex_init(&mp->m_growlock);
+ atomic_set(&mp->m_active_trans, 0);
+ INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
+ INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker);
+ INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker);
+ mp->m_kobj.kobject.kset = xfs_kset;
+ /*
+ * We don't create the finobt per-ag space reservation until after log
+ * recovery, so we must set this to true so that an ifree transaction
+ * started during log recovery will not depend on space reservations
+ * for finobt expansion.
+ */
+ mp->m_finobt_nores = true;
+
+ /*
+ * These can be overridden by the mount option parsing.
+ */
+ mp->m_logbufs = -1;
+ mp->m_logbsize = -1;
+ mp->m_allocsize_log = 16; /* 64k */
+
+ /*
+ * Copy binary VFS mount flags we are interested in.
+ */
+ if (fc->sb_flags & SB_RDONLY)
+ mp->m_flags |= XFS_MOUNT_RDONLY;
+ if (fc->sb_flags & SB_DIRSYNC)
+ mp->m_flags |= XFS_MOUNT_DIRSYNC;
+ if (fc->sb_flags & SB_SYNCHRONOUS)
+ mp->m_flags |= XFS_MOUNT_WSYNC;
+
+ fc->s_fs_info = mp;
+ fc->ops = &xfs_context_ops;
+
+ return 0;
+}
+
static struct file_system_type xfs_fs_type = {
.owner = THIS_MODULE,
.name = "xfs",
- .mount = xfs_fs_mount,
+ .init_fs_context = xfs_init_fs_context,
+ .parameters = xfs_fs_parameters,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -1850,37 +1786,39 @@ MODULE_ALIAS_FS("xfs");
STATIC int __init
xfs_init_zones(void)
{
- if (bioset_init(&xfs_ioend_bioset, 4 * (PAGE_SIZE / SECTOR_SIZE),
- offsetof(struct xfs_ioend, io_inline_bio),
- BIOSET_NEED_BVECS))
- goto out;
-
- xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
- "xfs_log_ticket");
+ xfs_log_ticket_zone = kmem_cache_create("xfs_log_ticket",
+ sizeof(struct xlog_ticket),
+ 0, 0, NULL);
if (!xfs_log_ticket_zone)
- goto out_free_ioend_bioset;
+ goto out;
- xfs_bmap_free_item_zone = kmem_zone_init(
- sizeof(struct xfs_extent_free_item),
- "xfs_bmap_free_item");
+ xfs_bmap_free_item_zone = kmem_cache_create("xfs_bmap_free_item",
+ sizeof(struct xfs_extent_free_item),
+ 0, 0, NULL);
if (!xfs_bmap_free_item_zone)
goto out_destroy_log_ticket_zone;
- xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
- "xfs_btree_cur");
+ xfs_btree_cur_zone = kmem_cache_create("xfs_btree_cur",
+ sizeof(struct xfs_btree_cur),
+ 0, 0, NULL);
if (!xfs_btree_cur_zone)
goto out_destroy_bmap_free_item_zone;
- xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
- "xfs_da_state");
+ xfs_da_state_zone = kmem_cache_create("xfs_da_state",
+ sizeof(struct xfs_da_state),
+ 0, 0, NULL);
if (!xfs_da_state_zone)
goto out_destroy_btree_cur_zone;
- xfs_ifork_zone = kmem_zone_init(sizeof(struct xfs_ifork), "xfs_ifork");
+ xfs_ifork_zone = kmem_cache_create("xfs_ifork",
+ sizeof(struct xfs_ifork),
+ 0, 0, NULL);
if (!xfs_ifork_zone)
goto out_destroy_da_state_zone;
- xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
+ xfs_trans_zone = kmem_cache_create("xf_trans",
+ sizeof(struct xfs_trans),
+ 0, 0, NULL);
if (!xfs_trans_zone)
goto out_destroy_ifork_zone;
@@ -1890,111 +1828,121 @@ xfs_init_zones(void)
* size possible under XFS. This wastes a little bit of memory,
* but it is much faster.
*/
- xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
- "xfs_buf_item");
+ xfs_buf_item_zone = kmem_cache_create("xfs_buf_item",
+ sizeof(struct xfs_buf_log_item),
+ 0, 0, NULL);
if (!xfs_buf_item_zone)
goto out_destroy_trans_zone;
- xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
- ((XFS_EFD_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))), "xfs_efd_item");
+ xfs_efd_zone = kmem_cache_create("xfs_efd_item",
+ (sizeof(struct xfs_efd_log_item) +
+ (XFS_EFD_MAX_FAST_EXTENTS - 1) *
+ sizeof(struct xfs_extent)),
+ 0, 0, NULL);
if (!xfs_efd_zone)
goto out_destroy_buf_item_zone;
- xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
- ((XFS_EFI_MAX_FAST_EXTENTS - 1) *
- sizeof(xfs_extent_t))), "xfs_efi_item");
+ xfs_efi_zone = kmem_cache_create("xfs_efi_item",
+ (sizeof(struct xfs_efi_log_item) +
+ (XFS_EFI_MAX_FAST_EXTENTS - 1) *
+ sizeof(struct xfs_extent)),
+ 0, 0, NULL);
if (!xfs_efi_zone)
goto out_destroy_efd_zone;
- xfs_inode_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
- KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD |
- KM_ZONE_ACCOUNT, xfs_fs_inode_init_once);
+ xfs_inode_zone = kmem_cache_create("xfs_inode",
+ sizeof(struct xfs_inode), 0,
+ (SLAB_HWCACHE_ALIGN |
+ SLAB_RECLAIM_ACCOUNT |
+ SLAB_MEM_SPREAD | SLAB_ACCOUNT),
+ xfs_fs_inode_init_once);
if (!xfs_inode_zone)
goto out_destroy_efi_zone;
- xfs_ili_zone =
- kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
- KM_ZONE_SPREAD, NULL);
+ xfs_ili_zone = kmem_cache_create("xfs_ili",
+ sizeof(struct xfs_inode_log_item), 0,
+ SLAB_MEM_SPREAD, NULL);
if (!xfs_ili_zone)
goto out_destroy_inode_zone;
- xfs_icreate_zone = kmem_zone_init(sizeof(struct xfs_icreate_item),
- "xfs_icr");
+
+ xfs_icreate_zone = kmem_cache_create("xfs_icr",
+ sizeof(struct xfs_icreate_item),
+ 0, 0, NULL);
if (!xfs_icreate_zone)
goto out_destroy_ili_zone;
- xfs_rud_zone = kmem_zone_init(sizeof(struct xfs_rud_log_item),
- "xfs_rud_item");
+ xfs_rud_zone = kmem_cache_create("xfs_rud_item",
+ sizeof(struct xfs_rud_log_item),
+ 0, 0, NULL);
if (!xfs_rud_zone)
goto out_destroy_icreate_zone;
- xfs_rui_zone = kmem_zone_init(
+ xfs_rui_zone = kmem_cache_create("xfs_rui_item",
xfs_rui_log_item_sizeof(XFS_RUI_MAX_FAST_EXTENTS),
- "xfs_rui_item");
+ 0, 0, NULL);
if (!xfs_rui_zone)
goto out_destroy_rud_zone;
- xfs_cud_zone = kmem_zone_init(sizeof(struct xfs_cud_log_item),
- "xfs_cud_item");
+ xfs_cud_zone = kmem_cache_create("xfs_cud_item",
+ sizeof(struct xfs_cud_log_item),
+ 0, 0, NULL);
if (!xfs_cud_zone)
goto out_destroy_rui_zone;
- xfs_cui_zone = kmem_zone_init(
+ xfs_cui_zone = kmem_cache_create("xfs_cui_item",
xfs_cui_log_item_sizeof(XFS_CUI_MAX_FAST_EXTENTS),
- "xfs_cui_item");
+ 0, 0, NULL);
if (!xfs_cui_zone)
goto out_destroy_cud_zone;
- xfs_bud_zone = kmem_zone_init(sizeof(struct xfs_bud_log_item),
- "xfs_bud_item");
+ xfs_bud_zone = kmem_cache_create("xfs_bud_item",
+ sizeof(struct xfs_bud_log_item),
+ 0, 0, NULL);
if (!xfs_bud_zone)
goto out_destroy_cui_zone;
- xfs_bui_zone = kmem_zone_init(
+ xfs_bui_zone = kmem_cache_create("xfs_bui_item",
xfs_bui_log_item_sizeof(XFS_BUI_MAX_FAST_EXTENTS),
- "xfs_bui_item");
+ 0, 0, NULL);
if (!xfs_bui_zone)
goto out_destroy_bud_zone;
return 0;
out_destroy_bud_zone:
- kmem_zone_destroy(xfs_bud_zone);
+ kmem_cache_destroy(xfs_bud_zone);
out_destroy_cui_zone:
- kmem_zone_destroy(xfs_cui_zone);
+ kmem_cache_destroy(xfs_cui_zone);
out_destroy_cud_zone:
- kmem_zone_destroy(xfs_cud_zone);
+ kmem_cache_destroy(xfs_cud_zone);
out_destroy_rui_zone:
- kmem_zone_destroy(xfs_rui_zone);
+ kmem_cache_destroy(xfs_rui_zone);
out_destroy_rud_zone:
- kmem_zone_destroy(xfs_rud_zone);
+ kmem_cache_destroy(xfs_rud_zone);
out_destroy_icreate_zone:
- kmem_zone_destroy(xfs_icreate_zone);
+ kmem_cache_destroy(xfs_icreate_zone);
out_destroy_ili_zone:
- kmem_zone_destroy(xfs_ili_zone);
+ kmem_cache_destroy(xfs_ili_zone);
out_destroy_inode_zone:
- kmem_zone_destroy(xfs_inode_zone);
+ kmem_cache_destroy(xfs_inode_zone);
out_destroy_efi_zone:
- kmem_zone_destroy(xfs_efi_zone);
+ kmem_cache_destroy(xfs_efi_zone);
out_destroy_efd_zone:
- kmem_zone_destroy(xfs_efd_zone);
+ kmem_cache_destroy(xfs_efd_zone);
out_destroy_buf_item_zone:
- kmem_zone_destroy(xfs_buf_item_zone);
+ kmem_cache_destroy(xfs_buf_item_zone);
out_destroy_trans_zone:
- kmem_zone_destroy(xfs_trans_zone);
+ kmem_cache_destroy(xfs_trans_zone);
out_destroy_ifork_zone:
- kmem_zone_destroy(xfs_ifork_zone);
+ kmem_cache_destroy(xfs_ifork_zone);
out_destroy_da_state_zone:
- kmem_zone_destroy(xfs_da_state_zone);
+ kmem_cache_destroy(xfs_da_state_zone);
out_destroy_btree_cur_zone:
- kmem_zone_destroy(xfs_btree_cur_zone);
+ kmem_cache_destroy(xfs_btree_cur_zone);
out_destroy_bmap_free_item_zone:
- kmem_zone_destroy(xfs_bmap_free_item_zone);
+ kmem_cache_destroy(xfs_bmap_free_item_zone);
out_destroy_log_ticket_zone:
- kmem_zone_destroy(xfs_log_ticket_zone);
- out_free_ioend_bioset:
- bioset_exit(&xfs_ioend_bioset);
+ kmem_cache_destroy(xfs_log_ticket_zone);
out:
return -ENOMEM;
}
@@ -2007,25 +1955,24 @@ xfs_destroy_zones(void)
* destroy caches.
*/
rcu_barrier();
- kmem_zone_destroy(xfs_bui_zone);
- kmem_zone_destroy(xfs_bud_zone);
- kmem_zone_destroy(xfs_cui_zone);
- kmem_zone_destroy(xfs_cud_zone);
- kmem_zone_destroy(xfs_rui_zone);
- kmem_zone_destroy(xfs_rud_zone);
- kmem_zone_destroy(xfs_icreate_zone);
- kmem_zone_destroy(xfs_ili_zone);
- kmem_zone_destroy(xfs_inode_zone);
- kmem_zone_destroy(xfs_efi_zone);
- kmem_zone_destroy(xfs_efd_zone);
- kmem_zone_destroy(xfs_buf_item_zone);
- kmem_zone_destroy(xfs_trans_zone);
- kmem_zone_destroy(xfs_ifork_zone);
- kmem_zone_destroy(xfs_da_state_zone);
- kmem_zone_destroy(xfs_btree_cur_zone);
- kmem_zone_destroy(xfs_bmap_free_item_zone);
- kmem_zone_destroy(xfs_log_ticket_zone);
- bioset_exit(&xfs_ioend_bioset);
+ kmem_cache_destroy(xfs_bui_zone);
+ kmem_cache_destroy(xfs_bud_zone);
+ kmem_cache_destroy(xfs_cui_zone);
+ kmem_cache_destroy(xfs_cud_zone);
+ kmem_cache_destroy(xfs_rui_zone);
+ kmem_cache_destroy(xfs_rud_zone);
+ kmem_cache_destroy(xfs_icreate_zone);
+ kmem_cache_destroy(xfs_ili_zone);
+ kmem_cache_destroy(xfs_inode_zone);
+ kmem_cache_destroy(xfs_efi_zone);
+ kmem_cache_destroy(xfs_efd_zone);
+ kmem_cache_destroy(xfs_buf_item_zone);
+ kmem_cache_destroy(xfs_trans_zone);
+ kmem_cache_destroy(xfs_ifork_zone);
+ kmem_cache_destroy(xfs_da_state_zone);
+ kmem_cache_destroy(xfs_btree_cur_zone);
+ kmem_cache_destroy(xfs_bmap_free_item_zone);
+ kmem_cache_destroy(xfs_log_ticket_zone);
}
STATIC int __init
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 763e43d22dee..b552cf6d3379 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -11,9 +11,11 @@
#ifdef CONFIG_XFS_QUOTA
extern int xfs_qm_init(void);
extern void xfs_qm_exit(void);
+# define XFS_QUOTA_STRING "quota, "
#else
# define xfs_qm_init() (0)
# define xfs_qm_exit() do { } while (0)
+# define XFS_QUOTA_STRING
#endif
#ifdef CONFIG_XFS_POSIX_ACL
@@ -50,6 +52,12 @@ extern void xfs_qm_exit(void);
# define XFS_WARN_STRING
#endif
+#ifdef CONFIG_XFS_ASSERT_FATAL
+# define XFS_ASSERT_FATAL_STRING "fatal assert, "
+#else
+# define XFS_ASSERT_FATAL_STRING
+#endif
+
#ifdef DEBUG
# define XFS_DBG_STRING "debug"
#else
@@ -63,6 +71,8 @@ extern void xfs_qm_exit(void);
XFS_SCRUB_STRING \
XFS_REPAIR_STRING \
XFS_WARN_STRING \
+ XFS_QUOTA_STRING \
+ XFS_ASSERT_FATAL_STRING \
XFS_DBG_STRING /* DBG must be last */
struct xfs_inode;
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index ed66fd2de327..d762d42ed0ff 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -17,6 +17,7 @@
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_quota.h"
+#include "xfs_symlink.h"
#include "xfs_trans_space.h"
#include "xfs_trace.h"
#include "xfs_trans.h"
@@ -52,20 +53,10 @@ xfs_readlink_bmap_ilocked(
d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
- bp = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
- &xfs_symlink_buf_ops);
- if (!bp)
- return -ENOMEM;
- error = bp->b_error;
- if (error) {
- xfs_buf_ioerror_alert(bp, __func__);
- xfs_buf_relse(bp);
-
- /* bad CRC means corrupted metadata */
- if (error == -EFSBADCRC)
- error = -EFSCORRUPTED;
- goto out;
- }
+ error = xfs_buf_read(mp->m_ddev_targp, d, BTOBB(byte_cnt), 0,
+ &bp, &xfs_symlink_buf_ops);
+ if (error)
+ return error;
byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
if (pathlen < byte_cnt)
byte_cnt = pathlen;
@@ -289,12 +280,10 @@ xfs_symlink(
d = XFS_FSB_TO_DADDR(mp, mval[n].br_startblock);
byte_cnt = XFS_FSB_TO_B(mp, mval[n].br_blockcount);
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
- BTOBB(byte_cnt), 0);
- if (!bp) {
- error = -ENOMEM;
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
+ BTOBB(byte_cnt), 0, &bp);
+ if (error)
goto out_trans_cancel;
- }
bp->b_ops = &xfs_symlink_buf_ops;
byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt);
@@ -432,13 +421,12 @@ xfs_inactive_symlink_rmt(
* Invalidate the block(s). No validation is done.
*/
for (i = 0; i < nmaps; i++) {
- bp = xfs_trans_get_buf(tp, mp->m_ddev_targp,
- XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
- XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);
- if (!bp) {
- error = -ENOMEM;
+ error = xfs_trans_get_buf(tp, mp->m_ddev_targp,
+ XFS_FSB_TO_DADDR(mp, mval[i].br_startblock),
+ XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0,
+ &bp);
+ if (error)
goto error_trans_cancel;
- }
xfs_trans_binval(tp, bp);
}
/*
diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h
index 9743d8c9394b..b1fa091427e6 100644
--- a/fs/xfs/xfs_symlink.h
+++ b/fs/xfs/xfs_symlink.h
@@ -5,7 +5,7 @@
#ifndef __XFS_SYMLINK_H
#define __XFS_SYMLINK_H 1
-/* Kernel only symlink defintions */
+/* Kernel only symlink definitions */
int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
const char *target_path, umode_t mode, struct xfs_inode **ipp);
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index ddd0bf7a4740..f1bc88f4367c 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -63,19 +63,6 @@ static const struct sysfs_ops xfs_sysfs_ops = {
.store = xfs_sysfs_object_store,
};
-/*
- * xfs_mount kobject. The mp kobject also serves as the per-mount parent object
- * that is identified by the fsname under sysfs.
- */
-
-static inline struct xfs_mount *
-to_mp(struct kobject *kobject)
-{
- struct xfs_kobj *kobj = to_kobj(kobject);
-
- return container_of(kobj, struct xfs_mount, m_kobj);
-}
-
static struct attribute *xfs_mp_attrs[] = {
NULL,
};
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 8094b1920eef..e242988f57fb 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -23,6 +23,7 @@ struct xlog;
struct xlog_ticket;
struct xlog_recover;
struct xlog_recover_item;
+struct xlog_rec_header;
struct xfs_buf_log_format;
struct xfs_inode_log_format;
struct xfs_bmbt_irec;
@@ -30,6 +31,10 @@ struct xfs_btree_cur;
struct xfs_refcount_irec;
struct xfs_fsmap;
struct xfs_rmap_irec;
+struct xfs_icreate_log;
+struct xfs_owner_info;
+struct xfs_trans_res;
+struct xfs_inobt_rec_incore;
DECLARE_EVENT_CLASS(xfs_attr_list_class,
TP_PROTO(struct xfs_attr_list_context *ctx),
@@ -213,8 +218,8 @@ DECLARE_EVENT_CLASS(xfs_bmap_class,
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
- __field(void *, leaf);
- __field(int, pos);
+ __field(void *, leaf)
+ __field(int, pos)
__field(xfs_fileoff_t, startoff)
__field(xfs_fsblock_t, startblock)
__field(xfs_filblks_t, blockcount)
@@ -720,7 +725,7 @@ TRACE_EVENT(xfs_iomap_prealloc_size,
__entry->writeio_blocks = writeio_blocks;
),
TP_printk("dev %d:%d ino 0x%llx prealloc blocks %llu shift %d "
- "m_writeio_blocks %u",
+ "m_allocsize_blocks %u",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
__entry->blocks, __entry->shift, __entry->writeio_blocks)
)
@@ -1153,71 +1158,6 @@ DEFINE_RW_EVENT(xfs_file_buffered_write);
DEFINE_RW_EVENT(xfs_file_direct_write);
DEFINE_RW_EVENT(xfs_file_dax_write);
-DECLARE_EVENT_CLASS(xfs_page_class,
- TP_PROTO(struct inode *inode, struct page *page, unsigned long off,
- unsigned int len),
- TP_ARGS(inode, page, off, len),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(pgoff_t, pgoff)
- __field(loff_t, size)
- __field(unsigned long, offset)
- __field(unsigned int, length)
- ),
- TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
- __entry->ino = XFS_I(inode)->i_ino;
- __entry->pgoff = page_offset(page);
- __entry->size = i_size_read(inode);
- __entry->offset = off;
- __entry->length = len;
- ),
- TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
- "length %x",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->pgoff,
- __entry->size,
- __entry->offset,
- __entry->length)
-)
-
-#define DEFINE_PAGE_EVENT(name) \
-DEFINE_EVENT(xfs_page_class, name, \
- TP_PROTO(struct inode *inode, struct page *page, unsigned long off, \
- unsigned int len), \
- TP_ARGS(inode, page, off, len))
-DEFINE_PAGE_EVENT(xfs_writepage);
-DEFINE_PAGE_EVENT(xfs_releasepage);
-DEFINE_PAGE_EVENT(xfs_invalidatepage);
-
-DECLARE_EVENT_CLASS(xfs_readpage_class,
- TP_PROTO(struct inode *inode, int nr_pages),
- TP_ARGS(inode, nr_pages),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_ino_t, ino)
- __field(int, nr_pages)
- ),
- TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
- __entry->ino = inode->i_ino;
- __entry->nr_pages = nr_pages;
- ),
- TP_printk("dev %d:%d ino 0x%llx nr_pages %d",
- MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->ino,
- __entry->nr_pages)
-)
-
-#define DEFINE_READPAGE_EVENT(name) \
-DEFINE_EVENT(xfs_readpage_class, name, \
- TP_PROTO(struct inode *inode, int nr_pages), \
- TP_ARGS(inode, nr_pages))
-DEFINE_READPAGE_EVENT(xfs_vm_readpage);
-DEFINE_READPAGE_EVENT(xfs_vm_readpages);
-
DECLARE_EVENT_CLASS(xfs_imap_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count,
int whichfork, struct xfs_bmbt_irec *irec),
@@ -1637,8 +1577,11 @@ DEFINE_ALLOC_EVENT(xfs_alloc_exact_notfound);
DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
DEFINE_ALLOC_EVENT(xfs_alloc_near_first);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_greater);
-DEFINE_ALLOC_EVENT(xfs_alloc_near_lesser);
+DEFINE_ALLOC_EVENT(xfs_alloc_cur);
+DEFINE_ALLOC_EVENT(xfs_alloc_cur_right);
+DEFINE_ALLOC_EVENT(xfs_alloc_cur_left);
+DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup);
+DEFINE_ALLOC_EVENT(xfs_alloc_cur_lookup_done);
DEFINE_ALLOC_EVENT(xfs_alloc_near_error);
DEFINE_ALLOC_EVENT(xfs_alloc_near_noentry);
DEFINE_ALLOC_EVENT(xfs_alloc_near_busy);
@@ -1658,6 +1601,32 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
+TRACE_EVENT(xfs_alloc_cur_check,
+ TP_PROTO(struct xfs_mount *mp, xfs_btnum_t btnum, xfs_agblock_t bno,
+ xfs_extlen_t len, xfs_extlen_t diff, bool new),
+ TP_ARGS(mp, btnum, bno, len, diff, new),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_btnum_t, btnum)
+ __field(xfs_agblock_t, bno)
+ __field(xfs_extlen_t, len)
+ __field(xfs_extlen_t, diff)
+ __field(bool, new)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->btnum = btnum;
+ __entry->bno = bno;
+ __entry->len = len;
+ __entry->diff = diff;
+ __entry->new = new;
+ ),
+ TP_printk("dev %d:%d btree %s bno 0x%x len 0x%x diff 0x%x new %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->btnum, XFS_BTNUM_STRINGS),
+ __entry->bno, __entry->len, __entry->diff, __entry->new)
+)
+
DECLARE_EVENT_CLASS(xfs_da_class,
TP_PROTO(struct xfs_da_args *args),
TP_ARGS(args),
@@ -3575,6 +3544,56 @@ TRACE_EVENT(xfs_pwork_init,
__entry->nr_threads, __entry->pid)
)
+DECLARE_EVENT_CLASS(xfs_kmem_class,
+ TP_PROTO(ssize_t size, int flags, unsigned long caller_ip),
+ TP_ARGS(size, flags, caller_ip),
+ TP_STRUCT__entry(
+ __field(ssize_t, size)
+ __field(int, flags)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->size = size;
+ __entry->flags = flags;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("size %zd flags 0x%x caller %pS",
+ __entry->size,
+ __entry->flags,
+ (char *)__entry->caller_ip)
+)
+
+#define DEFINE_KMEM_EVENT(name) \
+DEFINE_EVENT(xfs_kmem_class, name, \
+ TP_PROTO(ssize_t size, int flags, unsigned long caller_ip), \
+ TP_ARGS(size, flags, caller_ip))
+DEFINE_KMEM_EVENT(kmem_alloc);
+DEFINE_KMEM_EVENT(kmem_alloc_io);
+DEFINE_KMEM_EVENT(kmem_alloc_large);
+DEFINE_KMEM_EVENT(kmem_realloc);
+DEFINE_KMEM_EVENT(kmem_zone_alloc);
+
+TRACE_EVENT(xfs_check_new_dalign,
+ TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
+ TP_ARGS(mp, new_dalign, calc_rootino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(int, new_dalign)
+ __field(xfs_ino_t, sb_rootino)
+ __field(xfs_ino_t, calc_rootino)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->new_dalign = new_dalign;
+ __entry->sb_rootino = mp->m_sb.sb_rootino;
+ __entry->calc_rootino = calc_rootino;
+ ),
+ TP_printk("dev %d:%d new_dalign %d sb_rootino %llu calc_rootino %llu",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->new_dalign, __entry->sb_rootino,
+ __entry->calc_rootino)
+)
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index d42a68d8313b..3b208f9a865c 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -71,7 +71,7 @@ xfs_trans_free(
if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
sb_end_intwrite(tp->t_mountp->m_super);
xfs_trans_free_dqinfo(tp);
- kmem_zone_free(xfs_trans_zone, tp);
+ kmem_cache_free(xfs_trans_zone, tp);
}
/*
@@ -90,7 +90,7 @@ xfs_trans_dup(
trace_xfs_trans_dup(tp, _RET_IP_);
- ntp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
+ ntp = kmem_zone_zalloc(xfs_trans_zone, 0);
/*
* Initialize the new transaction structure.
@@ -263,7 +263,7 @@ xfs_trans_alloc(
* GFP_NOFS allocation context so that we avoid lockdep false positives
* by doing GFP_KERNEL allocations inside sb_start_intwrite().
*/
- tp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP);
+ tp = kmem_zone_zalloc(xfs_trans_zone, 0);
if (!(flags & XFS_TRANS_NO_WRITECOUNT))
sb_start_intwrite(mp->m_super);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 64d7f171ebd3..752c7fef9de7 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -169,21 +169,21 @@ int xfs_trans_alloc_empty(struct xfs_mount *mp,
struct xfs_trans **tpp);
void xfs_trans_mod_sb(xfs_trans_t *, uint, int64_t);
-struct xfs_buf *xfs_trans_get_buf_map(struct xfs_trans *tp,
- struct xfs_buftarg *target,
- struct xfs_buf_map *map, int nmaps,
- uint flags);
+int xfs_trans_get_buf_map(struct xfs_trans *tp, struct xfs_buftarg *target,
+ struct xfs_buf_map *map, int nmaps, xfs_buf_flags_t flags,
+ struct xfs_buf **bpp);
-static inline struct xfs_buf *
+static inline int
xfs_trans_get_buf(
struct xfs_trans *tp,
struct xfs_buftarg *target,
xfs_daddr_t blkno,
int numblks,
- uint flags)
+ uint flags,
+ struct xfs_buf **bpp)
{
DEFINE_SINGLE_BUF_MAP(map, blkno, numblks);
- return xfs_trans_get_buf_map(tp, target, &map, 1, flags);
+ return xfs_trans_get_buf_map(tp, target, &map, 1, flags, bpp);
}
int xfs_trans_read_buf_map(struct xfs_mount *mp,
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 6ccfd75d3c24..00cc5b8734be 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -427,15 +427,15 @@ xfsaild_push(
case XFS_ITEM_FLUSHING:
/*
- * The item or its backing buffer is already beeing
+ * The item or its backing buffer is already being
* flushed. The typical reason for that is that an
* inode buffer is locked because we already pushed the
* updates to it as part of inode clustering.
*
* We do not want to to stop flushing just because lots
- * of items are already beeing flushed, but we need to
+ * of items are already being flushed, but we need to
* re-try the flushing relatively soon if most of the
- * AIL is beeing flushed.
+ * AIL is being flushed.
*/
XFS_STATS_INC(mp, xs_push_ail_flushing);
trace_xfs_ail_flushing(lip);
@@ -612,7 +612,7 @@ xfsaild(
* The push is run asynchronously in a workqueue, which means the caller needs
* to handle waiting on the async flush for space to become available.
* We don't want to interrupt any push that is in progress, hence we only queue
- * work if we set the pushing bit approriately.
+ * work if we set the pushing bit appropriately.
*
* We do this unlocked - we only need to know whether there is anything in the
* AIL at the time we are called. We don't need to access the contents of
@@ -836,7 +836,7 @@ xfs_trans_ail_init(
init_waitqueue_head(&ailp->ail_empty);
ailp->ail_task = kthread_run(xfsaild, ailp, "xfsaild/%s",
- ailp->ail_mount->m_fsname);
+ ailp->ail_mount->m_super->s_id);
if (IS_ERR(ailp->ail_task))
goto out_free_ailp;
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index b5b3a78ef31c..08174ffa2118 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -112,19 +112,22 @@ xfs_trans_bjoin(
* If the transaction pointer is NULL, make this just a normal
* get_buf() call.
*/
-struct xfs_buf *
+int
xfs_trans_get_buf_map(
struct xfs_trans *tp,
struct xfs_buftarg *target,
struct xfs_buf_map *map,
int nmaps,
- xfs_buf_flags_t flags)
+ xfs_buf_flags_t flags,
+ struct xfs_buf **bpp)
{
xfs_buf_t *bp;
struct xfs_buf_log_item *bip;
+ int error;
+ *bpp = NULL;
if (!tp)
- return xfs_buf_get_map(target, map, nmaps, flags);
+ return xfs_buf_get_map(target, map, nmaps, flags, bpp);
/*
* If we find the buffer in the cache with this transaction
@@ -146,19 +149,20 @@ xfs_trans_get_buf_map(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
trace_xfs_trans_get_buf_recur(bip);
- return bp;
+ *bpp = bp;
+ return 0;
}
- bp = xfs_buf_get_map(target, map, nmaps, flags);
- if (bp == NULL) {
- return NULL;
- }
+ error = xfs_buf_get_map(target, map, nmaps, flags, &bp);
+ if (error)
+ return error;
ASSERT(!bp->b_error);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_get_buf(bp->b_log_item);
- return bp;
+ *bpp = bp;
+ return 0;
}
/*
@@ -276,7 +280,7 @@ xfs_trans_read_buf_map(
ASSERT(bp->b_ops != NULL);
error = xfs_buf_reverify(bp, ops);
if (error) {
- xfs_buf_ioerror_alert(bp, __func__);
+ xfs_buf_ioerror_alert(bp, __return_address);
if (tp->t_flags & XFS_TRANS_DIRTY)
xfs_force_shutdown(tp->t_mountp,
@@ -298,36 +302,17 @@ xfs_trans_read_buf_map(
return 0;
}
- bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
- if (!bp) {
- if (!(flags & XBF_TRYLOCK))
- return -ENOMEM;
- return tp ? 0 : -EAGAIN;
- }
-
- /*
- * If we've had a read error, then the contents of the buffer are
- * invalid and should not be used. To ensure that a followup read tries
- * to pull the buffer from disk again, we clear the XBF_DONE flag and
- * mark the buffer stale. This ensures that anyone who has a current
- * reference to the buffer will interpret it's contents correctly and
- * future cache lookups will also treat it as an empty, uninitialised
- * buffer.
- */
- if (bp->b_error) {
- error = bp->b_error;
- if (!XFS_FORCED_SHUTDOWN(mp))
- xfs_buf_ioerror_alert(bp, __func__);
- bp->b_flags &= ~XBF_DONE;
- xfs_buf_stale(bp);
-
+ error = xfs_buf_read_map(target, map, nmaps, flags, &bp, ops,
+ __return_address);
+ switch (error) {
+ case 0:
+ break;
+ default:
if (tp && (tp->t_flags & XFS_TRANS_DIRTY))
xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
- xfs_buf_relse(bp);
-
- /* bad CRC means corrupted metadata */
- if (error == -EFSBADCRC)
- error = -EFSCORRUPTED;
+ /* fall through */
+ case -ENOMEM:
+ case -EAGAIN:
return error;
}
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 1027c9ca6eb8..d1b9869bc5fa 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -25,8 +25,8 @@ STATIC void xfs_trans_alloc_dqinfo(xfs_trans_t *);
*/
void
xfs_trans_dqjoin(
- xfs_trans_t *tp,
- xfs_dquot_t *dqp)
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp)
{
ASSERT(XFS_DQ_IS_LOCKED(dqp));
ASSERT(dqp->q_logitem.qli_dquot == dqp);
@@ -49,8 +49,8 @@ xfs_trans_dqjoin(
*/
void
xfs_trans_log_dquot(
- xfs_trans_t *tp,
- xfs_dquot_t *dqp)
+ struct xfs_trans *tp,
+ struct xfs_dquot *dqp)
{
ASSERT(XFS_DQ_IS_LOCKED(dqp));
@@ -486,12 +486,12 @@ xfs_trans_apply_dquot_deltas(
*/
void
xfs_trans_unreserve_and_mod_dquots(
- xfs_trans_t *tp)
+ struct xfs_trans *tp)
{
int i, j;
- xfs_dquot_t *dqp;
+ struct xfs_dquot *dqp;
struct xfs_dqtrx *qtrx, *qa;
- bool locked;
+ bool locked;
if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
return;
@@ -571,21 +571,21 @@ xfs_quota_warn(
*/
STATIC int
xfs_trans_dqresv(
- xfs_trans_t *tp,
- xfs_mount_t *mp,
- xfs_dquot_t *dqp,
- int64_t nblks,
- long ninos,
- uint flags)
+ struct xfs_trans *tp,
+ struct xfs_mount *mp,
+ struct xfs_dquot *dqp,
+ int64_t nblks,
+ long ninos,
+ uint flags)
{
- xfs_qcnt_t hardlimit;
- xfs_qcnt_t softlimit;
- time_t timer;
- xfs_qwarncnt_t warns;
- xfs_qwarncnt_t warnlimit;
- xfs_qcnt_t total_count;
- xfs_qcnt_t *resbcountp;
- xfs_quotainfo_t *q = mp->m_quotainfo;
+ xfs_qcnt_t hardlimit;
+ xfs_qcnt_t softlimit;
+ time64_t timer;
+ xfs_qwarncnt_t warns;
+ xfs_qwarncnt_t warnlimit;
+ xfs_qcnt_t total_count;
+ xfs_qcnt_t *resbcountp;
+ struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_def_quota *defq;
@@ -635,7 +635,8 @@ xfs_trans_dqresv(
goto error_return;
}
if (softlimit && total_count > softlimit) {
- if ((timer != 0 && get_seconds() > timer) ||
+ if ((timer != 0 &&
+ ktime_get_real_seconds() > timer) ||
(warns != 0 && warns >= warnlimit)) {
xfs_quota_warn(mp, dqp,
QUOTA_NL_BSOFTLONGWARN);
@@ -662,7 +663,8 @@ xfs_trans_dqresv(
goto error_return;
}
if (softlimit && total_count > softlimit) {
- if ((timer != 0 && get_seconds() > timer) ||
+ if ((timer != 0 &&
+ ktime_get_real_seconds() > timer) ||
(warns != 0 && warns >= warnlimit)) {
xfs_quota_warn(mp, dqp,
QUOTA_NL_ISOFTLONGWARN);
@@ -824,13 +826,13 @@ xfs_trans_reserve_quota_nblks(
/*
* This routine is called to allocate a quotaoff log item.
*/
-xfs_qoff_logitem_t *
+struct xfs_qoff_logitem *
xfs_trans_get_qoff_item(
- xfs_trans_t *tp,
- xfs_qoff_logitem_t *startqoff,
+ struct xfs_trans *tp,
+ struct xfs_qoff_logitem *startqoff,
uint flags)
{
- xfs_qoff_logitem_t *q;
+ struct xfs_qoff_logitem *q;
ASSERT(tp != NULL);
@@ -852,8 +854,8 @@ xfs_trans_get_qoff_item(
*/
void
xfs_trans_log_quotaoff_item(
- xfs_trans_t *tp,
- xfs_qoff_logitem_t *qlp)
+ struct xfs_trans *tp,
+ struct xfs_qoff_logitem *qlp)
{
tp->t_flags |= XFS_TRANS_DIRTY;
set_bit(XFS_LI_DIRTY, &qlp->qql_item.li_flags);
@@ -863,7 +865,7 @@ STATIC void
xfs_trans_alloc_dqinfo(
xfs_trans_t *tp)
{
- tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, KM_SLEEP);
+ tp->t_dqinfo = kmem_zone_zalloc(xfs_qm_dqtrxzone, 0);
}
void
@@ -872,6 +874,6 @@ xfs_trans_free_dqinfo(
{
if (!tp->t_dqinfo)
return;
- kmem_zone_free(xfs_qm_dqtrxzone, tp->t_dqinfo);
+ kmem_cache_free(xfs_qm_dqtrxzone, tp->t_dqinfo);
tp->t_dqinfo = NULL;
}
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 3123b5aaad2a..b0fedb543f97 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -11,6 +11,7 @@
#include "xfs_da_format.h"
#include "xfs_inode.h"
#include "xfs_attr.h"
+#include "xfs_acl.h"
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
@@ -23,6 +24,7 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused,
int xflags = handler->flags;
struct xfs_inode *ip = XFS_I(inode);
int error, asize = size;
+ size_t namelen = strlen(name);
/* Convert Linux syscall to XFS internal ATTR flags */
if (!size) {
@@ -30,7 +32,8 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused,
value = NULL;
}
- error = xfs_attr_get(ip, (unsigned char *)name, value, &asize, xflags);
+ error = xfs_attr_get(ip, name, namelen, (unsigned char **)&value,
+ &asize, xflags);
if (error)
return error;
return asize;
@@ -66,6 +69,7 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused,
int xflags = handler->flags;
struct xfs_inode *ip = XFS_I(inode);
int error;
+ size_t namelen = strlen(name);
/* Convert Linux syscall to XFS internal ATTR flags */
if (flags & XATTR_CREATE)
@@ -73,10 +77,11 @@ xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused,
if (flags & XATTR_REPLACE)
xflags |= ATTR_REPLACE;
- if (!value)
- return xfs_attr_remove(ip, (unsigned char *)name, xflags);
- error = xfs_attr_set(ip, (unsigned char *)name,
- (void *)value, size, xflags);
+ if (value)
+ error = xfs_attr_set(ip, name, namelen, (void *)value, size,
+ xflags);
+ else
+ error = xfs_attr_remove(ip, name, namelen, xflags);
if (!error)
xfs_forget_acl(inode, name, xflags);
OpenPOWER on IntegriCloud