summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c35
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c1
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c1
-rw-r--r--fs/xfs/libxfs/xfs_bit.c6
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c27
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_btree.c58
-rw-r--r--fs/xfs/libxfs/xfs_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c1
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c1
-rw-r--r--fs/xfs/libxfs/xfs_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c27
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h1
-rw-r--r--fs/xfs/libxfs/xfs_sb.c2
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c1
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_log.c51
-rw-r--r--fs/xfs/xfs_log_priv.h3
-rw-r--r--fs/xfs/xfs_log_recover.c572
-rw-r--r--fs/xfs/xfs_super.c2
-rw-r--r--fs/xfs/xfs_sysfs.c36
-rw-r--r--fs/xfs/xfs_trans_dquot.c14
32 files changed, 669 insertions, 197 deletions
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3479294c1d58..a708e38b494c 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -535,6 +535,7 @@ xfs_agfl_write_verify(
}
const struct xfs_buf_ops xfs_agfl_buf_ops = {
+ .name = "xfs_agfl",
.verify_read = xfs_agfl_read_verify,
.verify_write = xfs_agfl_write_verify,
};
@@ -1926,7 +1927,7 @@ xfs_alloc_space_available(
* Decide whether to use this allocation group for this allocation.
* If so, fix up the btree freelist's size.
*/
-STATIC int /* error */
+int /* error */
xfs_alloc_fix_freelist(
struct xfs_alloc_arg *args, /* allocation argument structure */
int flags) /* XFS_ALLOC_FLAG_... */
@@ -2339,6 +2340,7 @@ xfs_agf_write_verify(
}
const struct xfs_buf_ops xfs_agf_buf_ops = {
+ .name = "xfs_agf",
.verify_read = xfs_agf_read_verify,
.verify_write = xfs_agf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 0ecde4d5cac8..135eb3d24db7 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -235,5 +235,6 @@ xfs_alloc_get_rec(
int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_agnumber_t agno, int flags, struct xfs_buf **bpp);
+int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags);
#endif /* __XFS_ALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 90de071dd4c2..444626ddbd1b 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -293,14 +293,7 @@ xfs_allocbt_verify(
level = be16_to_cpu(block->bb_level);
switch (block->bb_magic) {
case cpu_to_be32(XFS_ABTB_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTB_MAGIC):
@@ -311,14 +304,7 @@ xfs_allocbt_verify(
return false;
break;
case cpu_to_be32(XFS_ABTC_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_ABTC_MAGIC):
@@ -332,21 +318,7 @@ xfs_allocbt_verify(
return false;
}
- /* numrecs verification */
- if (be16_to_cpu(block->bb_numrecs) > mp->m_alloc_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
-
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_alloc_mxr[level != 0]);
}
static void
@@ -379,6 +351,7 @@ xfs_allocbt_write_verify(
}
const struct xfs_buf_ops xfs_allocbt_buf_ops = {
+ .name = "xfs_allocbt",
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index aa187f7ba2dd..01a5ecfedfcf 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -328,6 +328,7 @@ xfs_attr3_leaf_read_verify(
}
const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
+ .name = "xfs_attr3_leaf",
.verify_read = xfs_attr3_leaf_read_verify,
.verify_write = xfs_attr3_leaf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 5ab95ffa4ae9..f3ed9bf0b065 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -201,6 +201,7 @@ xfs_attr3_rmt_write_verify(
}
const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
+ .name = "xfs_attr3_rmt",
.verify_read = xfs_attr3_rmt_read_verify,
.verify_write = xfs_attr3_rmt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_bit.c b/fs/xfs/libxfs/xfs_bit.c
index 0e8885a59646..0a94cce5ea35 100644
--- a/fs/xfs/libxfs/xfs_bit.c
+++ b/fs/xfs/libxfs/xfs_bit.c
@@ -32,13 +32,13 @@ int
xfs_bitmap_empty(uint *map, uint size)
{
uint i;
- uint ret = 0;
for (i = 0; i < size; i++) {
- ret |= map[i];
+ if (map[i] != 0)
+ return 0;
}
- return (ret == 0);
+ return 1;
}
/*
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 119c2422aac7..73884953b21c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1723,10 +1723,11 @@ xfs_bmap_add_extent_delay_real(
xfs_filblks_t temp=0; /* value for da_new calculations */
xfs_filblks_t temp2=0;/* value for da_new calculations */
int tmp_rval; /* partial logging flags */
+ int whichfork = XFS_DATA_FORK;
struct xfs_mount *mp;
- mp = bma->tp ? bma->tp->t_mountp : NULL;
- ifp = XFS_IFORK_PTR(bma->ip, XFS_DATA_FORK);
+ mp = bma->ip->i_mount;
+ ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
ASSERT(bma->idx <= ifp->if_bytes / sizeof(struct xfs_bmbt_rec));
@@ -1785,7 +1786,7 @@ xfs_bmap_add_extent_delay_real(
* Don't set contiguous if the combined extent would be too large.
* Also check for all-three-contiguous being too large.
*/
- if (bma->idx < bma->ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
+ if (bma->idx < ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t) - 1) {
state |= BMAP_RIGHT_VALID;
xfs_bmbt_get_all(xfs_iext_get_ext(ifp, bma->idx + 1), &RIGHT);
@@ -2016,10 +2017,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist,
- &bma->cur, 1, &tmp_rval, XFS_DATA_FORK);
+ &bma->cur, 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2100,10 +2101,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur, 1,
- &tmp_rval, XFS_DATA_FORK);
+ &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2169,10 +2170,10 @@ xfs_bmap_add_extent_delay_real(
XFS_WANT_CORRUPTED_GOTO(mp, i == 1, done);
}
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
- 1, &tmp_rval, XFS_DATA_FORK);
+ 1, &tmp_rval, whichfork);
rval |= tmp_rval;
if (error)
goto done;
@@ -2215,13 +2216,13 @@ xfs_bmap_add_extent_delay_real(
}
/* convert to a btree if necessary */
- if (xfs_bmap_needs_btree(bma->ip, XFS_DATA_FORK)) {
+ if (xfs_bmap_needs_btree(bma->ip, whichfork)) {
int tmp_logflags; /* partial log flag return val */
ASSERT(bma->cur == NULL);
error = xfs_bmap_extents_to_btree(bma->tp, bma->ip,
bma->firstblock, bma->flist, &bma->cur,
- da_old > 0, &tmp_logflags, XFS_DATA_FORK);
+ da_old > 0, &tmp_logflags, whichfork);
bma->logflags |= tmp_logflags;
if (error)
goto done;
@@ -2242,7 +2243,7 @@ xfs_bmap_add_extent_delay_real(
if (bma->cur)
bma->cur->bc_private.b.allocated = 0;
- xfs_bmap_check_leaf_extents(bma->cur, bma->ip, XFS_DATA_FORK);
+ xfs_bmap_check_leaf_extents(bma->cur, bma->ip, whichfork);
done:
bma->logflags |= rval;
return error;
@@ -2939,7 +2940,7 @@ xfs_bmap_add_extent_hole_real(
int state; /* state bits, accessed thru macros */
struct xfs_mount *mp;
- mp = bma->tp ? bma->tp->t_mountp : NULL;
+ mp = bma->ip->i_mount;
ifp = XFS_IFORK_PTR(bma->ip, whichfork);
ASSERT(bma->idx >= 0);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6b0cf6546a82..1637c37bfbaa 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -720,6 +720,7 @@ xfs_bmbt_write_verify(
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
+ .name = "xfs_bmbt",
.verify_read = xfs_bmbt_read_verify,
.verify_write = xfs_bmbt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index af1bbee5586e..a0eb18ce3ad3 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4080,3 +4080,61 @@ xfs_btree_change_owner(
return 0;
}
+
+/**
+ * xfs_btree_sblock_v5hdr_verify() -- verify the v5 fields of a short-format
+ * btree block
+ *
+ * @bp: buffer containing the btree block
+ * @max_recs: pointer to the m_*_mxr max records field in the xfs mount
+ * @pag_max_level: pointer to the per-ag max level field
+ */
+bool
+xfs_btree_sblock_v5hdr_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+ struct xfs_perag *pag = bp->b_pag;
+
+ if (!xfs_sb_version_hascrc(&mp->m_sb))
+ return false;
+ if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
+ return false;
+ if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
+ return false;
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ return false;
+ return true;
+}
+
+/**
+ * xfs_btree_sblock_verify() -- verify a short-format btree block
+ *
+ * @bp: buffer containing the btree block
+ * @max_recs: maximum records allowed in this btree node
+ */
+bool
+xfs_btree_sblock_verify(
+ struct xfs_buf *bp,
+ unsigned int max_recs)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+ struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
+
+ /* numrecs verification */
+ if (be16_to_cpu(block->bb_numrecs) > max_recs)
+ return false;
+
+ /* sibling pointer verification */
+ if (!block->bb_u.s.bb_leftsib ||
+ (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+ if (!block->bb_u.s.bb_rightsib ||
+ (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
+ block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
+ return false;
+
+ return true;
+}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 992dec0638f3..2e874be70209 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -472,4 +472,7 @@ static inline int xfs_btree_get_level(struct xfs_btree_block *block)
#define XFS_BTREE_TRACE_ARGR(c, r)
#define XFS_BTREE_TRACE_CURSOR(c, t)
+bool xfs_btree_sblock_v5hdr_verify(struct xfs_buf *bp);
+bool xfs_btree_sblock_verify(struct xfs_buf *bp, unsigned int max_recs);
+
#endif /* __XFS_BTREE_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e89a0f8f827c..097bf7717d80 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -245,6 +245,7 @@ xfs_da3_node_read_verify(
}
const struct xfs_buf_ops xfs_da3_node_buf_ops = {
+ .name = "xfs_da3_node",
.verify_read = xfs_da3_node_read_verify,
.verify_write = xfs_da3_node_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 9c10e2b8cfcb..aa17cb788946 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -123,6 +123,7 @@ xfs_dir3_block_write_verify(
}
const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
+ .name = "xfs_dir3_block",
.verify_read = xfs_dir3_block_read_verify,
.verify_write = xfs_dir3_block_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index af71a84f343c..725fc7841fde 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -305,11 +305,13 @@ xfs_dir3_data_write_verify(
}
const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
+ .name = "xfs_dir3_data",
.verify_read = xfs_dir3_data_read_verify,
.verify_write = xfs_dir3_data_write_verify,
};
static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
+ .name = "xfs_dir3_data_reada",
.verify_read = xfs_dir3_data_reada_verify,
.verify_write = xfs_dir3_data_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 3923e1f94697..b887fb2a2bcf 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -245,11 +245,13 @@ xfs_dir3_leafn_write_verify(
}
const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
+ .name = "xfs_dir3_leaf1",
.verify_read = xfs_dir3_leaf1_read_verify,
.verify_write = xfs_dir3_leaf1_write_verify,
};
const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
+ .name = "xfs_dir3_leafn",
.verify_read = xfs_dir3_leafn_read_verify,
.verify_write = xfs_dir3_leafn_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 70b0cb2fd556..63ee03db796c 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -150,6 +150,7 @@ xfs_dir3_free_write_verify(
}
const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
+ .name = "xfs_dir3_free",
.verify_read = xfs_dir3_free_read_verify,
.verify_write = xfs_dir3_free_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 5331b7f0460c..11cefb2a372a 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -282,6 +282,7 @@ xfs_dquot_buf_write_verify(
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
+ .name = "xfs_dquot",
.verify_read = xfs_dquot_buf_read_verify,
.verify_write = xfs_dquot_buf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index 8774498ce0ff..e2536bb1c760 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -786,7 +786,7 @@ typedef struct xfs_agfl {
__be64 agfl_lsn;
__be32 agfl_crc;
__be32 agfl_bno[]; /* actually XFS_AGFL_SIZE(mp) */
-} xfs_agfl_t;
+} __attribute__((packed)) xfs_agfl_t;
#define XFS_AGFL_CRC_OFF offsetof(struct xfs_agfl, agfl_crc)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 70c1db99f6a7..66d702e6b9ff 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2572,6 +2572,7 @@ xfs_agi_write_verify(
}
const struct xfs_buf_ops xfs_agi_buf_ops = {
+ .name = "xfs_agi",
.verify_read = xfs_agi_read_verify,
.verify_write = xfs_agi_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index f39b285beb19..c679f3c05b63 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -221,7 +221,6 @@ xfs_inobt_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp);
- struct xfs_perag *pag = bp->b_pag;
unsigned int level;
/*
@@ -237,14 +236,7 @@ xfs_inobt_verify(
switch (block->bb_magic) {
case cpu_to_be32(XFS_IBT_CRC_MAGIC):
case cpu_to_be32(XFS_FIBT_CRC_MAGIC):
- if (!xfs_sb_version_hascrc(&mp->m_sb))
- return false;
- if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_meta_uuid))
- return false;
- if (block->bb_u.s.bb_blkno != cpu_to_be64(bp->b_bn))
- return false;
- if (pag &&
- be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (!xfs_btree_sblock_v5hdr_verify(bp))
return false;
/* fall through */
case cpu_to_be32(XFS_IBT_MAGIC):
@@ -254,24 +246,12 @@ xfs_inobt_verify(
return 0;
}
- /* numrecs and level verification */
+ /* level verification */
level = be16_to_cpu(block->bb_level);
if (level >= mp->m_in_maxlevels)
return false;
- if (be16_to_cpu(block->bb_numrecs) > mp->m_inobt_mxr[level != 0])
- return false;
-
- /* sibling pointer verification */
- if (!block->bb_u.s.bb_leftsib ||
- (be32_to_cpu(block->bb_u.s.bb_leftsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_leftsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- if (!block->bb_u.s.bb_rightsib ||
- (be32_to_cpu(block->bb_u.s.bb_rightsib) >= mp->m_sb.sb_agblocks &&
- block->bb_u.s.bb_rightsib != cpu_to_be32(NULLAGBLOCK)))
- return false;
- return true;
+ return xfs_btree_sblock_verify(bp, mp->m_inobt_mxr[level != 0]);
}
static void
@@ -304,6 +284,7 @@ xfs_inobt_write_verify(
}
const struct xfs_buf_ops xfs_inobt_buf_ops = {
+ .name = "xfs_inobt",
.verify_read = xfs_inobt_read_verify,
.verify_write = xfs_inobt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 268c00f4f83a..1b8d98a915c4 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -132,11 +132,13 @@ xfs_inode_buf_write_verify(
}
const struct xfs_buf_ops xfs_inode_buf_ops = {
+ .name = "xfs_inode",
.verify_read = xfs_inode_buf_read_verify,
.verify_write = xfs_inode_buf_write_verify,
};
const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
+ .name = "xxfs_inode_ra",
.verify_read = xfs_inode_buf_readahead_verify,
.verify_write = xfs_inode_buf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 1c55ccbb379d..8e385f91d660 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -60,6 +60,7 @@ typedef struct xlog_recover {
*/
#define XLOG_BC_TABLE_SIZE 64
+#define XLOG_RECOVER_CRCPASS 0
#define XLOG_RECOVER_PASS1 1
#define XLOG_RECOVER_PASS2 2
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a0b071d881a0..8a53eaa349f4 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -679,11 +679,13 @@ xfs_sb_write_verify(
}
const struct xfs_buf_ops xfs_sb_buf_ops = {
+ .name = "xfs_sb",
.verify_read = xfs_sb_read_verify,
.verify_write = xfs_sb_write_verify,
};
const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
+ .name = "xfs_sb_quiet",
.verify_read = xfs_sb_quiet_read_verify,
.verify_write = xfs_sb_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index cb6fd20a4d3d..2e2c6716b623 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -168,6 +168,7 @@ xfs_symlink_write_verify(
}
const struct xfs_buf_ops xfs_symlink_buf_ops = {
+ .name = "xfs_symlink",
.verify_read = xfs_symlink_read_verify,
.verify_write = xfs_symlink_write_verify,
};
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 3243cdf97f33..45a8ea7cfdb2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1045,7 +1045,7 @@ xfs_buf_ioend_work(
xfs_buf_ioend(bp);
}
-void
+static void
xfs_buf_ioend_async(
struct xfs_buf *bp)
{
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c79b717d9b88..c75721acd867 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -132,6 +132,7 @@ struct xfs_buf_map {
struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
struct xfs_buf_ops {
+ char *name;
void (*verify_read)(struct xfs_buf *);
void (*verify_write)(struct xfs_buf *);
};
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 74d0e5966ebc..88693a98fac5 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -164,9 +164,9 @@ xfs_verifier_error(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+ xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
- __return_address, bp->b_bn);
+ __return_address, bp->b_ops->name, bp->b_bn);
xfs_alert(mp, "Unmount and run xfs_repair");
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f52c72a1a06f..9c9a1c9bcc7f 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1188,10 +1188,16 @@ xlog_iodone(xfs_buf_t *bp)
int aborted = 0;
/*
- * Race to shutdown the filesystem if we see an error.
+ * Race to shutdown the filesystem if we see an error or the iclog is in
+ * IOABORT state. The IOABORT state is only set in DEBUG mode to inject
+ * CRC errors into log recovery.
*/
- if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
- XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
+ if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR,
+ XFS_RANDOM_IODONE_IOERR) ||
+ iclog->ic_state & XLOG_STATE_IOABORT) {
+ if (iclog->ic_state & XLOG_STATE_IOABORT)
+ iclog->ic_state &= ~XLOG_STATE_IOABORT;
+
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_stale(bp);
xfs_force_shutdown(l->l_mp, SHUTDOWN_LOG_IO_ERROR);
@@ -1838,6 +1844,23 @@ xlog_sync(
/* calculcate the checksum */
iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
iclog->ic_datap, size);
+#ifdef DEBUG
+ /*
+ * Intentionally corrupt the log record CRC based on the error injection
+ * frequency, if defined. This facilitates testing log recovery in the
+ * event of torn writes. Hence, set the IOABORT state to abort the log
+ * write on I/O completion and shutdown the fs. The subsequent mount
+ * detects the bad CRC and attempts to recover.
+ */
+ if (log->l_badcrc_factor &&
+ (prandom_u32() % log->l_badcrc_factor == 0)) {
+ iclog->ic_header.h_crc &= 0xAAAAAAAA;
+ iclog->ic_state |= XLOG_STATE_IOABORT;
+ xfs_warn(log->l_mp,
+ "Intentionally corrupted log record at LSN 0x%llx. Shutdown imminent.",
+ be64_to_cpu(iclog->ic_header.h_lsn));
+ }
+#endif
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
@@ -2045,12 +2068,14 @@ xlog_print_tic_res(
"QM_DQCLUSTER",
"QM_QINOCREATE",
"QM_QUOTAOFF_END",
- "SB_UNIT",
"FSYNC_TS",
"GROWFSRT_ALLOC",
"GROWFSRT_ZERO",
"GROWFSRT_FREE",
- "SWAPEXT"
+ "SWAPEXT",
+ "CHECKPOINT",
+ "ICREATE",
+ "CREATE_TMPFILE"
};
xfs_warn(mp, "xlog_write: reservation summary:");
@@ -2791,11 +2816,19 @@ xlog_state_do_callback(
}
} while (!ioerrors && loopdidcallbacks);
+#ifdef DEBUG
/*
- * make one last gasp attempt to see if iclogs are being left in
- * limbo..
+ * Make one last gasp attempt to see if iclogs are being left in limbo.
+ * If the above loop finds an iclog earlier than the current iclog and
+ * in one of the syncing states, the current iclog is put into
+ * DO_CALLBACK and the callbacks are deferred to the completion of the
+ * earlier iclog. Walk the iclogs in order and make sure that no iclog
+ * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
+ * states.
+ *
+ * Note that SYNCING|IOABORT is a valid state so we cannot just check
+ * for ic_state == SYNCING.
*/
-#ifdef DEBUG
if (funcdidcallbacks) {
first_iclog = iclog = log->l_iclog;
do {
@@ -2810,7 +2843,7 @@ xlog_state_do_callback(
* IOERROR - give up hope all ye who enter here
*/
if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
- iclog->ic_state == XLOG_STATE_SYNCING ||
+ iclog->ic_state & XLOG_STATE_SYNCING ||
iclog->ic_state == XLOG_STATE_DONE_SYNC ||
iclog->ic_state == XLOG_STATE_IOERROR )
break;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 8daba7491b13..ed8896310c00 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -62,6 +62,7 @@ static inline uint xlog_get_client_id(__be32 i)
#define XLOG_STATE_CALLBACK 0x0020 /* Callback functions now */
#define XLOG_STATE_DIRTY 0x0040 /* Dirty IC log, not ready for ACTIVE status*/
#define XLOG_STATE_IOERROR 0x0080 /* IO error happened in sync'ing log */
+#define XLOG_STATE_IOABORT 0x0100 /* force abort on I/O completion (debug) */
#define XLOG_STATE_ALL 0x7FFF /* All possible valid flags */
#define XLOG_STATE_NOTUSED 0x8000 /* This IC log not being used */
@@ -410,6 +411,8 @@ struct xlog {
/* The following field are used for debugging; need to hold icloglock */
#ifdef DEBUG
void *l_iclog_bak[XLOG_MAX_ICLOGS];
+ /* log record crc error injection factor */
+ uint32_t l_badcrc_factor;
#endif
};
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index c5ecaacdd218..26e67b4450cc 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -61,6 +61,9 @@ xlog_recover_check_summary(
#else
#define xlog_recover_check_summary(log)
#endif
+STATIC int
+xlog_do_recovery_pass(
+ struct xlog *, xfs_daddr_t, xfs_daddr_t, int, xfs_daddr_t *);
/*
* This structure is used during recovery to record the buf log items which
@@ -868,6 +871,351 @@ validate_head:
}
/*
+ * Seek backwards in the log for log record headers.
+ *
+ * Given a starting log block, walk backwards until we find the provided number
+ * of records or hit the provided tail block. The return value is the number of
+ * records encountered or a negative error code. The log block and buffer
+ * pointer of the last record seen are returned in rblk and rhead respectively.
+ */
+STATIC int
+xlog_rseek_logrec_hdr(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk,
+ int count,
+ struct xfs_buf *bp,
+ xfs_daddr_t *rblk,
+ struct xlog_rec_header **rhead,
+ bool *wrapped)
+{
+ int i;
+ int error;
+ int found = 0;
+ char *offset = NULL;
+ xfs_daddr_t end_blk;
+
+ *wrapped = false;
+
+ /*
+ * Walk backwards from the head block until we hit the tail or the first
+ * block in the log.
+ */
+ end_blk = head_blk > tail_blk ? tail_blk : 0;
+ for (i = (int) head_blk - 1; i >= end_blk; i--) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+
+ /*
+ * If we haven't hit the tail block or the log record header count,
+ * start looking again from the end of the physical log. Note that
+ * callers can pass head == tail if the tail is not yet known.
+ */
+ if (tail_blk >= head_blk && found != count) {
+ for (i = log->l_logBBsize - 1; i >= (int) tail_blk; i--) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *)offset ==
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *wrapped = true;
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+ }
+
+ return found;
+
+out_error:
+ return error;
+}
+
+/*
+ * Seek forward in the log for log record headers.
+ *
+ * Given head and tail blocks, walk forward from the tail block until we find
+ * the provided number of records or hit the head block. The return value is the
+ * number of records encountered or a negative error code. The log block and
+ * buffer pointer of the last record seen are returned in rblk and rhead
+ * respectively.
+ */
+STATIC int
+xlog_seek_logrec_hdr(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk,
+ int count,
+ struct xfs_buf *bp,
+ xfs_daddr_t *rblk,
+ struct xlog_rec_header **rhead,
+ bool *wrapped)
+{
+ int i;
+ int error;
+ int found = 0;
+ char *offset = NULL;
+ xfs_daddr_t end_blk;
+
+ *wrapped = false;
+
+ /*
+ * Walk forward from the tail block until we hit the head or the last
+ * block in the log.
+ */
+ end_blk = head_blk > tail_blk ? head_blk : log->l_logBBsize - 1;
+ for (i = (int) tail_blk; i <= end_blk; i++) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *) offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+
+ /*
+ * If we haven't hit the head block or the log record header count,
+ * start looking again from the start of the physical log.
+ */
+ if (tail_blk > head_blk && found != count) {
+ for (i = 0; i < (int) head_blk; i++) {
+ error = xlog_bread(log, i, 1, bp, &offset);
+ if (error)
+ goto out_error;
+
+ if (*(__be32 *)offset ==
+ cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
+ *wrapped = true;
+ *rblk = i;
+ *rhead = (struct xlog_rec_header *) offset;
+ if (++found == count)
+ break;
+ }
+ }
+ }
+
+ return found;
+
+out_error:
+ return error;
+}
+
+/*
+ * Check the log tail for torn writes. This is required when torn writes are
+ * detected at the head and the head had to be walked back to a previous record.
+ * The tail of the previous record must now be verified to ensure the torn
+ * writes didn't corrupt the previous tail.
+ *
+ * Return an error if CRC verification fails as recovery cannot proceed.
+ */
+STATIC int
+xlog_verify_tail(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk)
+{
+ struct xlog_rec_header *thead;
+ struct xfs_buf *bp;
+ xfs_daddr_t first_bad;
+ int count;
+ int error = 0;
+ bool wrapped;
+ xfs_daddr_t tmp_head;
+
+ bp = xlog_get_bp(log, 1);
+ if (!bp)
+ return -ENOMEM;
+
+ /*
+ * Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get
+ * a temporary head block that points after the last possible
+ * concurrently written record of the tail.
+ */
+ count = xlog_seek_logrec_hdr(log, head_blk, tail_blk,
+ XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead,
+ &wrapped);
+ if (count < 0) {
+ error = count;
+ goto out;
+ }
+
+ /*
+ * If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran
+ * into the actual log head. tmp_head points to the start of the record
+ * so update it to the actual head block.
+ */
+ if (count < XLOG_MAX_ICLOGS + 1)
+ tmp_head = head_blk;
+
+ /*
+ * We now have a tail and temporary head block that covers at least
+ * XLOG_MAX_ICLOGS records from the tail. We need to verify that these
+ * records were completely written. Run a CRC verification pass from
+ * tail to head and return the result.
+ */
+ error = xlog_do_recovery_pass(log, tmp_head, tail_blk,
+ XLOG_RECOVER_CRCPASS, &first_bad);
+
+out:
+ xlog_put_bp(bp);
+ return error;
+}
+
+/*
+ * Detect and trim torn writes from the head of the log.
+ *
+ * Storage without sector atomicity guarantees can result in torn writes in the
+ * log in the event of a crash. Our only means to detect this scenario is via
+ * CRC verification. While we can't always be certain that CRC verification
+ * failure is due to a torn write vs. an unrelated corruption, we do know that
+ * only a certain number (XLOG_MAX_ICLOGS) of log records can be written out at
+ * one time. Therefore, CRC verify up to XLOG_MAX_ICLOGS records at the head of
+ * the log and treat failures in this range as torn writes as a matter of
+ * policy. In the event of CRC failure, the head is walked back to the last good
+ * record in the log and the tail is updated from that record and verified.
+ */
+STATIC int
+xlog_verify_head(
+ struct xlog *log,
+ xfs_daddr_t *head_blk, /* in/out: unverified head */
+ xfs_daddr_t *tail_blk, /* out: tail block */
+ struct xfs_buf *bp,
+ xfs_daddr_t *rhead_blk, /* start blk of last record */
+ struct xlog_rec_header **rhead, /* ptr to last record */
+ bool *wrapped) /* last rec. wraps phys. log */
+{
+ struct xlog_rec_header *tmp_rhead;
+ struct xfs_buf *tmp_bp;
+ xfs_daddr_t first_bad;
+ xfs_daddr_t tmp_rhead_blk;
+ int found;
+ int error;
+ bool tmp_wrapped;
+
+ /*
+ * Search backwards through the log looking for the log record header
+ * block. This wraps all the way back around to the head so something is
+ * seriously wrong if we can't find it.
+ */
+ found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk,
+ rhead, wrapped);
+ if (found < 0)
+ return found;
+ if (!found) {
+ xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
+ return -EIO;
+ }
+
+ *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
+
+ /*
+ * Now that we have a tail block, check the head of the log for torn
+ * writes. Search again until we hit the tail or the maximum number of
+ * log record I/Os that could have been in flight at one time. Use a
+ * temporary buffer so we don't trash the rhead/bp pointer from the
+ * call above.
+ */
+ tmp_bp = xlog_get_bp(log, 1);
+ if (!tmp_bp)
+ return -ENOMEM;
+ error = xlog_rseek_logrec_hdr(log, *head_blk, *tail_blk,
+ XLOG_MAX_ICLOGS, tmp_bp, &tmp_rhead_blk,
+ &tmp_rhead, &tmp_wrapped);
+ xlog_put_bp(tmp_bp);
+ if (error < 0)
+ return error;
+
+ /*
+ * Now run a CRC verification pass over the records starting at the
+ * block found above to the current head. If a CRC failure occurs, the
+ * log block of the first bad record is saved in first_bad.
+ */
+ error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
+ XLOG_RECOVER_CRCPASS, &first_bad);
+ if (error == -EFSBADCRC) {
+ /*
+ * We've hit a potential torn write. Reset the error and warn
+ * about it.
+ */
+ error = 0;
+ xfs_warn(log->l_mp,
+"Torn write (CRC failure) detected at log block 0x%llx. Truncating head block from 0x%llx.",
+ first_bad, *head_blk);
+
+ /*
+ * Get the header block and buffer pointer for the last good
+ * record before the bad record.
+ *
+ * Note that xlog_find_tail() clears the blocks at the new head
+ * (i.e., the records with invalid CRC) if the cycle number
+ * matches the the current cycle.
+ */
+ found = xlog_rseek_logrec_hdr(log, first_bad, *tail_blk, 1, bp,
+ rhead_blk, rhead, wrapped);
+ if (found < 0)
+ return found;
+ if (found == 0) /* XXX: right thing to do here? */
+ return -EIO;
+
+ /*
+ * Reset the head block to the starting block of the first bad
+ * log record and set the tail block based on the last good
+ * record.
+ *
+ * Bail out if the updated head/tail match as this indicates
+ * possible corruption outside of the acceptable
+ * (XLOG_MAX_ICLOGS) range. This is a job for xfs_repair...
+ */
+ *head_blk = first_bad;
+ *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn));
+ if (*head_blk == *tail_blk) {
+ ASSERT(0);
+ return 0;
+ }
+
+ /*
+ * Now verify the tail based on the updated head. This is
+ * required because the torn writes trimmed from the head could
+ * have been written over the tail of a previous record. Return
+ * any errors since recovery cannot proceed if the tail is
+ * corrupt.
+ *
+ * XXX: This leaves a gap in truly robust protection from torn
+ * writes in the log. If the head is behind the tail, the tail
+ * pushes forward to create some space and then a crash occurs
+ * causing the writes into the previous record's tail region to
+ * tear, log recovery isn't able to recover.
+ *
+ * How likely is this to occur? If possible, can we do something
+ * more intelligent here? Is it safe to push the tail forward if
+ * we can determine that the tail is within the range of the
+ * torn write (e.g., the kernel can only overwrite the tail if
+ * it has actually been pushed forward)? Alternatively, could we
+ * somehow prevent this condition at runtime?
+ */
+ error = xlog_verify_tail(log, *head_blk, *tail_blk);
+ }
+
+ return error;
+}
+
+/*
* Find the sync block number or the tail of the log.
*
* This will be the block number of the last record to have its
@@ -893,13 +1241,13 @@ xlog_find_tail(
xlog_op_header_t *op_head;
char *offset = NULL;
xfs_buf_t *bp;
- int error, i, found;
+ int error;
xfs_daddr_t umount_data_blk;
xfs_daddr_t after_umount_blk;
+ xfs_daddr_t rhead_blk;
xfs_lsn_t tail_lsn;
int hblks;
-
- found = 0;
+ bool wrapped = false;
/*
* Find previous log record
@@ -923,48 +1271,16 @@ xlog_find_tail(
}
/*
- * Search backwards looking for log record header block
+ * Trim the head block back to skip over torn records. We can have
+ * multiple log I/Os in flight at any time, so we assume CRC failures
+ * back through the previous several records are torn writes and skip
+ * them.
*/
ASSERT(*head_blk < INT_MAX);
- for (i = (int)(*head_blk) - 1; i >= 0; i--) {
- error = xlog_bread(log, i, 1, bp, &offset);
- if (error)
- goto done;
-
- if (*(__be32 *)offset == cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
- found = 1;
- break;
- }
- }
- /*
- * If we haven't found the log record header block, start looking
- * again from the end of the physical log. XXXmiken: There should be
- * a check here to make sure we didn't search more than N blocks in
- * the previous code.
- */
- if (!found) {
- for (i = log->l_logBBsize - 1; i >= (int)(*head_blk); i--) {
- error = xlog_bread(log, i, 1, bp, &offset);
- if (error)
- goto done;
-
- if (*(__be32 *)offset ==
- cpu_to_be32(XLOG_HEADER_MAGIC_NUM)) {
- found = 2;
- break;
- }
- }
- }
- if (!found) {
- xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__);
- xlog_put_bp(bp);
- ASSERT(0);
- return -EIO;
- }
-
- /* find blk_no of tail of log */
- rhead = (xlog_rec_header_t *)offset;
- *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn));
+ error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk,
+ &rhead, &wrapped);
+ if (error)
+ goto done;
/*
* Reset log values according to the state of the log when we
@@ -976,10 +1292,10 @@ xlog_find_tail(
* written was complete and ended exactly on the end boundary
* of the physical log.
*/
- log->l_prev_block = i;
+ log->l_prev_block = rhead_blk;
log->l_curr_block = (int)*head_blk;
log->l_curr_cycle = be32_to_cpu(rhead->h_cycle);
- if (found == 2)
+ if (wrapped)
log->l_curr_cycle++;
atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn));
atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn));
@@ -1014,12 +1330,13 @@ xlog_find_tail(
} else {
hblks = 1;
}
- after_umount_blk = (i + hblks + (int)
- BTOBB(be32_to_cpu(rhead->h_len))) % log->l_logBBsize;
+ after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len));
+ after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize);
tail_lsn = atomic64_read(&log->l_tail_lsn);
if (*head_blk == after_umount_blk &&
be32_to_cpu(rhead->h_num_logops) == 1) {
- umount_data_blk = (i + hblks) % log->l_logBBsize;
+ umount_data_blk = rhead_blk + hblks;
+ umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize);
error = xlog_bread(log, umount_data_blk, 1, bp, &offset);
if (error)
goto done;
@@ -4118,26 +4435,69 @@ xlog_recover_process_iunlinks(
mp->m_dmevmask = mp_dmevmask;
}
+STATIC int
+xlog_unpack_data(
+ struct xlog_rec_header *rhead,
+ char *dp,
+ struct xlog *log)
+{
+ int i, j, k;
+
+ for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
+ i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
+ *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
+ dp += BBSIZE;
+ }
+
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+ xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
+ for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
+ j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+ k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+ *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
+ dp += BBSIZE;
+ }
+ }
+
+ return 0;
+}
+
/*
- * Upack the log buffer data and crc check it. If the check fails, issue a
- * warning if and only if the CRC in the header is non-zero. This makes the
- * check an advisory warning, and the zero CRC check will prevent failure
- * warnings from being emitted when upgrading the kernel from one that does not
- * add CRCs by default.
- *
- * When filesystems are CRC enabled, this CRC mismatch becomes a fatal log
- * corruption failure
+ * CRC check, unpack and process a log record.
*/
STATIC int
-xlog_unpack_data_crc(
+xlog_recover_process(
+ struct xlog *log,
+ struct hlist_head rhash[],
struct xlog_rec_header *rhead,
char *dp,
- struct xlog *log)
+ int pass)
{
+ int error;
__le32 crc;
crc = xlog_cksum(log, rhead, dp, be32_to_cpu(rhead->h_len));
- if (crc != rhead->h_crc) {
+
+ /*
+ * Nothing else to do if this is a CRC verification pass. Just return
+ * if this a record with a non-zero crc. Unfortunately, mkfs always
+ * sets h_crc to 0 so we must consider this valid even on v5 supers.
+ * Otherwise, return EFSBADCRC on failure so the callers up the stack
+ * know precisely what failed.
+ */
+ if (pass == XLOG_RECOVER_CRCPASS) {
+ if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc))
+ return -EFSBADCRC;
+ return 0;
+ }
+
+ /*
+ * We're in the normal recovery path. Issue a warning if and only if the
+ * CRC in the header is non-zero. This is an advisory warning and the
+ * zero CRC check prevents warnings from being emitted when upgrading
+ * the kernel from one that does not add CRCs by default.
+ */
+ if (crc != le32_to_cpu(rhead->h_crc)) {
if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {
xfs_alert(log->l_mp,
"log record CRC mismatch: found 0x%x, expected 0x%x.",
@@ -4147,47 +4507,18 @@ xlog_unpack_data_crc(
}
/*
- * If we've detected a log record corruption, then we can't
- * recover past this point. Abort recovery if we are enforcing
- * CRC protection by punting an error back up the stack.
+ * If the filesystem is CRC enabled, this mismatch becomes a
+ * fatal log corruption failure.
*/
if (xfs_sb_version_hascrc(&log->l_mp->m_sb))
return -EFSCORRUPTED;
}
- return 0;
-}
-
-STATIC int
-xlog_unpack_data(
- struct xlog_rec_header *rhead,
- char *dp,
- struct xlog *log)
-{
- int i, j, k;
- int error;
-
- error = xlog_unpack_data_crc(rhead, dp, log);
+ error = xlog_unpack_data(rhead, dp, log);
if (error)
return error;
- for (i = 0; i < BTOBB(be32_to_cpu(rhead->h_len)) &&
- i < (XLOG_HEADER_CYCLE_SIZE / BBSIZE); i++) {
- *(__be32 *)dp = *(__be32 *)&rhead->h_cycle_data[i];
- dp += BBSIZE;
- }
-
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
- xlog_in_core_2_t *xhdr = (xlog_in_core_2_t *)rhead;
- for ( ; i < BTOBB(be32_to_cpu(rhead->h_len)); i++) {
- j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
- k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
- *(__be32 *)dp = xhdr[j].hic_xheader.xh_cycle_data[k];
- dp += BBSIZE;
- }
- }
-
- return 0;
+ return xlog_recover_process_data(log, rhash, rhead, dp, pass);
}
STATIC int
@@ -4239,18 +4570,21 @@ xlog_do_recovery_pass(
struct xlog *log,
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk,
- int pass)
+ int pass,
+ xfs_daddr_t *first_bad) /* out: first bad log rec */
{
xlog_rec_header_t *rhead;
xfs_daddr_t blk_no;
+ xfs_daddr_t rhead_blk;
char *offset;
xfs_buf_t *hbp, *dbp;
- int error = 0, h_size;
+ int error = 0, h_size, h_len;
int bblks, split_bblks;
int hblks, split_hblks, wrapped_hblks;
struct hlist_head rhash[XLOG_RHASH_SIZE];
ASSERT(head_blk != tail_blk);
+ rhead_blk = 0;
/*
* Read the header of the tail block and get the iclog buffer size from
@@ -4274,7 +4608,31 @@ xlog_do_recovery_pass(
error = xlog_valid_rec_header(log, rhead, tail_blk);
if (error)
goto bread_err1;
+
+ /*
+ * xfsprogs has a bug where record length is based on lsunit but
+ * h_size (iclog size) is hardcoded to 32k. Now that we
+ * unconditionally CRC verify the unmount record, this means the
+ * log buffer can be too small for the record and cause an
+ * overrun.
+ *
+ * Detect this condition here. Use lsunit for the buffer size as
+ * long as this looks like the mkfs case. Otherwise, return an
+ * error to avoid a buffer overrun.
+ */
h_size = be32_to_cpu(rhead->h_size);
+ h_len = be32_to_cpu(rhead->h_len);
+ if (h_len > h_size) {
+ if (h_len <= log->l_mp->m_logbsize &&
+ be32_to_cpu(rhead->h_num_logops) == 1) {
+ xfs_warn(log->l_mp,
+ "invalid iclog size (%d bytes), using lsunit (%d bytes)",
+ h_size, log->l_mp->m_logbsize);
+ h_size = log->l_mp->m_logbsize;
+ } else
+ return -EFSCORRUPTED;
+ }
+
if ((be32_to_cpu(rhead->h_version) & XLOG_VERSION_2) &&
(h_size > XLOG_HEADER_CYCLE_SIZE)) {
hblks = h_size / XLOG_HEADER_CYCLE_SIZE;
@@ -4301,7 +4659,7 @@ xlog_do_recovery_pass(
}
memset(rhash, 0, sizeof(rhash));
- blk_no = tail_blk;
+ blk_no = rhead_blk = tail_blk;
if (tail_blk > head_blk) {
/*
* Perform recovery around the end of the physical log.
@@ -4408,19 +4766,18 @@ xlog_do_recovery_pass(
goto bread_err2;
}
- error = xlog_unpack_data(rhead, offset, log);
+ error = xlog_recover_process(log, rhash, rhead, offset,
+ pass);
if (error)
goto bread_err2;
- error = xlog_recover_process_data(log, rhash,
- rhead, offset, pass);
- if (error)
- goto bread_err2;
blk_no += bblks;
+ rhead_blk = blk_no;
}
ASSERT(blk_no >= log->l_logBBsize);
blk_no -= log->l_logBBsize;
+ rhead_blk = blk_no;
}
/* read first part of physical log */
@@ -4441,21 +4798,22 @@ xlog_do_recovery_pass(
if (error)
goto bread_err2;
- error = xlog_unpack_data(rhead, offset, log);
+ error = xlog_recover_process(log, rhash, rhead, offset, pass);
if (error)
goto bread_err2;
- error = xlog_recover_process_data(log, rhash,
- rhead, offset, pass);
- if (error)
- goto bread_err2;
blk_no += bblks + hblks;
+ rhead_blk = blk_no;
}
bread_err2:
xlog_put_bp(dbp);
bread_err1:
xlog_put_bp(hbp);
+
+ if (error && first_bad)
+ *first_bad = rhead_blk;
+
return error;
}
@@ -4493,7 +4851,7 @@ xlog_do_log_recovery(
INIT_LIST_HEAD(&log->l_buf_cancel_table[i]);
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
- XLOG_RECOVER_PASS1);
+ XLOG_RECOVER_PASS1, NULL);
if (error != 0) {
kmem_free(log->l_buf_cancel_table);
log->l_buf_cancel_table = NULL;
@@ -4504,7 +4862,7 @@ xlog_do_log_recovery(
* When it is complete free the table of buf cancel items.
*/
error = xlog_do_recovery_pass(log, head_blk, tail_blk,
- XLOG_RECOVER_PASS2);
+ XLOG_RECOVER_PASS2, NULL);
#ifdef DEBUG
if (!error) {
int i;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 36bd8825bfb0..b35775752b74 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -137,7 +137,7 @@ static const match_table_t tokens = {
};
-STATIC unsigned long
+STATIC int
suffix_kstrtoint(char *s, unsigned int base, int *res)
{
int last, shift_left_factor = 0, _res;
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index ee70f5dec9dc..641d625eb334 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -255,11 +255,47 @@ write_grant_head_show(
}
XFS_SYSFS_ATTR_RO(write_grant_head);
+#ifdef DEBUG
+STATIC ssize_t
+log_badcrc_factor_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xlog *log = to_xlog(kobject);
+ int ret;
+ uint32_t val;
+
+ ret = kstrtouint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ log->l_badcrc_factor = val;
+
+ return count;
+}
+
+STATIC ssize_t
+log_badcrc_factor_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xlog *log = to_xlog(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", log->l_badcrc_factor);
+}
+
+XFS_SYSFS_ATTR_RW(log_badcrc_factor);
+#endif /* DEBUG */
+
static struct attribute *xfs_log_attrs[] = {
ATTR_LIST(log_head_lsn),
ATTR_LIST(log_tail_lsn),
ATTR_LIST(reserve_grant_head),
ATTR_LIST(write_grant_head),
+#ifdef DEBUG
+ ATTR_LIST(log_badcrc_factor),
+#endif
NULL,
};
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index ce78534a047e..995170194df0 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -572,12 +572,16 @@ xfs_quota_warn(
struct xfs_dquot *dqp,
int type)
{
- /* no warnings for project quotas - we just return ENOSPC later */
+ enum quota_type qtype;
+
if (dqp->dq_flags & XFS_DQ_PROJ)
- return;
- quota_send_warning(make_kqid(&init_user_ns,
- (dqp->dq_flags & XFS_DQ_USER) ?
- USRQUOTA : GRPQUOTA,
+ qtype = PRJQUOTA;
+ else if (dqp->dq_flags & XFS_DQ_USER)
+ qtype = USRQUOTA;
+ else
+ qtype = GRPQUOTA;
+
+ quota_send_warning(make_kqid(&init_user_ns, qtype,
be32_to_cpu(dqp->q_core.d_id)),
mp->m_super->s_dev, type);
}
OpenPOWER on IntegriCloud