summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_aops.c49
-rw-r--r--fs/xfs/xfs_bmap_util.c2
-rw-r--r--fs/xfs/xfs_da_btree.c41
-rw-r--r--fs/xfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/xfs_ialloc.h2
-rw-r--r--fs/xfs/xfs_log.h19
-rw-r--r--fs/xfs/xfs_log_cil.c7
7 files changed, 66 insertions, 64 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 0479c32c5eb1..d1b99b692ccb 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -982,7 +982,32 @@ xfs_vm_writepage(
offset = i_size_read(inode);
end_index = offset >> PAGE_CACHE_SHIFT;
last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
- if (page->index >= end_index) {
+
+ /*
+ * The page index is less than the end_index, adjust the end_offset
+ * to the highest offset that this page should represent.
+ * -----------------------------------------------------
+ * | file mapping | <EOF> |
+ * -----------------------------------------------------
+ * | Page ... | Page N-2 | Page N-1 | Page N | |
+ * ^--------------------------------^----------|--------
+ * | desired writeback range | see else |
+ * ---------------------------------^------------------|
+ */
+ if (page->index < end_index)
+ end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+ else {
+ /*
+ * Check whether the page to write out is beyond or straddles
+ * i_size or not.
+ * -------------------------------------------------------
+ * | file mapping | <EOF> |
+ * -------------------------------------------------------
+ * | Page ... | Page N-2 | Page N-1 | Page N | Beyond |
+ * ^--------------------------------^-----------|---------
+ * | | Straddles |
+ * ---------------------------------^-----------|--------|
+ */
unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
/*
@@ -990,24 +1015,36 @@ xfs_vm_writepage(
* truncate operation that is in progress. We must redirty the
* page so that reclaim stops reclaiming it. Otherwise
* xfs_vm_releasepage() is called on it and gets confused.
+ *
+ * Note that the end_index is unsigned long, it would overflow
+ * if the given offset is greater than 16TB on 32-bit system
+ * and if we do check the page is fully outside i_size or not
+ * via "if (page->index >= end_index + 1)" as "end_index + 1"
+ * will be evaluated to 0. Hence this page will be redirtied
+ * and be written out repeatedly which would result in an
+ * infinite loop, the user program that perform this operation
+ * will hang. Instead, we can verify this situation by checking
+ * if the page to write is totally beyond the i_size or if it's
+ * offset is just equal to the EOF.
*/
- if (page->index >= end_index + 1 || offset_into_page == 0)
+ if (page->index > end_index ||
+ (page->index == end_index && offset_into_page == 0))
goto redirty;
/*
* The page straddles i_size. It must be zeroed out on each
* and every writepage invocation because it may be mmapped.
* "A file is mapped in multiples of the page size. For a file
- * that is not a multiple of the page size, the remaining
+ * that is not a multiple of the page size, the remaining
* memory is zeroed when mapped, and writes to that region are
* not written out to the file."
*/
zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+
+ /* Adjust the end_offset to the end of file */
+ end_offset = offset;
}
- end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
- offset);
len = 1 << inode->i_blkbits;
bh = head = page_buffers(page);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 296160b8e78c..057f671811d6 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -1519,7 +1519,6 @@ xfs_collapse_file_space(
while (!error && !done) {
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
- tp->t_flags |= XFS_TRANS_RESERVE;
/*
* We would need to reserve permanent block for transaction.
* This will come into picture when after shifting extent into
@@ -1529,7 +1528,6 @@ xfs_collapse_file_space(
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write,
XFS_DIOSTRAT_SPACE_RES(mp, 0), 0);
if (error) {
- ASSERT(error == ENOSPC || XFS_FORCED_SHUTDOWN(mp));
xfs_trans_cancel(tp, 0);
break;
}
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 9eec594cc25a..4db5102d403a 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -2624,47 +2624,6 @@ xfs_da_read_buf(
xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
else
xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF);
-
- /*
- * This verification code will be moved to a CRC verification callback
- * function so just leave it here unchanged until then.
- */
- {
- xfs_dir2_data_hdr_t *hdr = bp->b_addr;
- xfs_dir2_free_t *free = bp->b_addr;
- xfs_da_blkinfo_t *info = bp->b_addr;
- uint magic, magic1;
- struct xfs_mount *mp = dp->i_mount;
-
- magic = be16_to_cpu(info->magic);
- magic1 = be32_to_cpu(hdr->magic);
- if (unlikely(
- XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) &&
- (magic != XFS_DA3_NODE_MAGIC) &&
- (magic != XFS_ATTR_LEAF_MAGIC) &&
- (magic != XFS_ATTR3_LEAF_MAGIC) &&
- (magic != XFS_DIR2_LEAF1_MAGIC) &&
- (magic != XFS_DIR3_LEAF1_MAGIC) &&
- (magic != XFS_DIR2_LEAFN_MAGIC) &&
- (magic != XFS_DIR3_LEAFN_MAGIC) &&
- (magic1 != XFS_DIR2_BLOCK_MAGIC) &&
- (magic1 != XFS_DIR3_BLOCK_MAGIC) &&
- (magic1 != XFS_DIR2_DATA_MAGIC) &&
- (magic1 != XFS_DIR3_DATA_MAGIC) &&
- (free->hdr.magic !=
- cpu_to_be32(XFS_DIR2_FREE_MAGIC)) &&
- (free->hdr.magic !=
- cpu_to_be32(XFS_DIR3_FREE_MAGIC)),
- mp, XFS_ERRTAG_DA_READ_BUF,
- XFS_RANDOM_DA_READ_BUF))) {
- trace_xfs_da_btree_corrupt(bp, _RET_IP_);
- XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)",
- XFS_ERRLEVEL_LOW, mp, info);
- error = XFS_ERROR(EFSCORRUPTED);
- xfs_trans_brelse(trans, bp);
- goto out_free;
- }
- }
*bpp = bp;
out_free:
if (mapp != &map)
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c
index 6ac0c2986c32..1313df7ff0d8 100644
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1437,7 +1437,7 @@ xfs_difree_inobt(
struct xfs_buf *agbp,
xfs_agino_t agino,
struct xfs_bmap_free *flist,
- int *delete,
+ int *deleted,
xfs_ino_t *first_ino,
struct xfs_inobt_rec_incore *orec)
{
@@ -1497,7 +1497,7 @@ xfs_difree_inobt(
if (!(mp->m_flags & XFS_MOUNT_IKEEP) &&
(rec.ir_freecount == mp->m_ialloc_inos)) {
- *delete = 1;
+ *deleted = 1;
*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);
/*
@@ -1525,7 +1525,7 @@ xfs_difree_inobt(
XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)),
mp->m_ialloc_blks, flist, mp);
} else {
- *delete = 0;
+ *deleted = 0;
error = xfs_inobt_update(cur, &rec);
if (error) {
@@ -1662,7 +1662,7 @@ xfs_difree(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
- int *delete,/* set if inode cluster was deleted */
+ int *deleted,/* set if inode cluster was deleted */
xfs_ino_t *first_ino)/* first inode in deleted cluster */
{
/* REFERENCED */
@@ -1714,7 +1714,7 @@ xfs_difree(
/*
* Fix up the inode allocation btree.
*/
- error = xfs_difree_inobt(mp, tp, agbp, agino, flist, delete, first_ino,
+ error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino,
&rec);
if (error)
goto error0;
diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h
index 812365d17e67..95ad1c002d60 100644
--- a/fs/xfs/xfs_ialloc.h
+++ b/fs/xfs/xfs_ialloc.h
@@ -90,7 +90,7 @@ xfs_difree(
struct xfs_trans *tp, /* transaction pointer */
xfs_ino_t inode, /* inode to be freed */
struct xfs_bmap_free *flist, /* extents to free */
- int *delete, /* set if inode cluster was deleted */
+ int *deleted, /* set if inode cluster was deleted */
xfs_ino_t *first_ino); /* first inode in deleted cluster */
/*
diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h
index 2c4004475e71..84e0deb95abd 100644
--- a/fs/xfs/xfs_log.h
+++ b/fs/xfs/xfs_log.h
@@ -24,7 +24,8 @@ struct xfs_log_vec {
struct xfs_log_iovec *lv_iovecp; /* iovec array */
struct xfs_log_item *lv_item; /* owner */
char *lv_buf; /* formatted buffer */
- int lv_buf_len; /* size of formatted buffer */
+ int lv_bytes; /* accounted space in buffer */
+ int lv_buf_len; /* aligned size of buffer */
int lv_size; /* size of allocated lv */
};
@@ -52,15 +53,21 @@ xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp,
return vec->i_addr;
}
+/*
+ * We need to make sure the next buffer is naturally aligned for the biggest
+ * basic data type we put into it. We already accounted for this padding when
+ * sizing the buffer.
+ *
+ * However, this padding does not get written into the log, and hence we have to
+ * track the space used by the log vectors separately to prevent log space hangs
+ * due to inaccurate accounting (i.e. a leak) of the used log space through the
+ * CIL context ticket.
+ */
static inline void
xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len)
{
- /*
- * We need to make sure the next buffer is naturally aligned for the
- * biggest basic data type we put into it. We already accounted for
- * this when sizing the buffer.
- */
lv->lv_buf_len += round_up(len, sizeof(uint64_t));
+ lv->lv_bytes += len;
vec->i_len = len;
}
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 039c873e6fb2..b3425b34e3d5 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -97,7 +97,7 @@ xfs_cil_prepare_item(
{
/* Account for the new LV being passed in */
if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
- *diff_len += lv->lv_buf_len;
+ *diff_len += lv->lv_bytes;
*diff_iovecs += lv->lv_niovecs;
}
@@ -111,7 +111,7 @@ xfs_cil_prepare_item(
else if (old_lv != lv) {
ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
- *diff_len -= old_lv->lv_buf_len;
+ *diff_len -= old_lv->lv_bytes;
*diff_iovecs -= old_lv->lv_niovecs;
kmem_free(old_lv);
}
@@ -239,7 +239,7 @@ xlog_cil_insert_format_items(
* that the space reservation accounting is correct.
*/
*diff_iovecs -= lv->lv_niovecs;
- *diff_len -= lv->lv_buf_len;
+ *diff_len -= lv->lv_bytes;
} else {
/* allocate new data chunk */
lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
@@ -259,6 +259,7 @@ xlog_cil_insert_format_items(
/* The allocated data region lies beyond the iovec region */
lv->lv_buf_len = 0;
+ lv->lv_bytes = 0;
lv->lv_buf = (char *)lv + buf_size - nbytes;
ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
OpenPOWER on IntegriCloud