summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_alloc.c29
-rw-r--r--fs/xfs/xfs_alloc.h2
-rw-r--r--fs/xfs/xfs_bmap.c5
-rw-r--r--fs/xfs/xfs_bmap_btree.c10
-rw-r--r--fs/xfs/xfs_mount.c24
5 files changed, 55 insertions, 15 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index 8558226281c4..22af489d3f34 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -1862,7 +1862,7 @@ xfs_alloc_fix_freelist(
(pag->pagf_longest - delta) :
(pag->pagf_flcount > 0 || pag->pagf_longest > 0);
if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
- (args->minleft &&
+ (!(flags & XFS_ALLOC_FLAG_FREEING) &&
(int)(pag->pagf_freeblks + pag->pagf_flcount -
need - args->total) <
(int)args->minleft)) {
@@ -1898,7 +1898,7 @@ xfs_alloc_fix_freelist(
longest = (longest > delta) ? (longest - delta) :
(be32_to_cpu(agf->agf_flcount) > 0 || longest > 0);
if (args->minlen + args->alignment + args->minalignslop - 1 > longest ||
- (args->minleft &&
+ (!(flags & XFS_ALLOC_FLAG_FREEING) &&
(int)(be32_to_cpu(agf->agf_freeblks) +
be32_to_cpu(agf->agf_flcount) - need - args->total) <
(int)args->minleft)) {
@@ -1951,8 +1951,14 @@ xfs_alloc_fix_freelist(
* the restrictions correctly. Can happen for free calls
* on a completely full ag.
*/
- if (targs.agbno == NULLAGBLOCK)
+ if (targs.agbno == NULLAGBLOCK) {
+ if (!(flags & XFS_ALLOC_FLAG_FREEING)) {
+ xfs_trans_brelse(tp, agflbp);
+ args->agbp = NULL;
+ return 0;
+ }
break;
+ }
/*
* Put each allocated block on the list.
*/
@@ -2360,8 +2366,19 @@ xfs_alloc_vextent(
if (args->agno == sagno &&
type == XFS_ALLOCTYPE_START_BNO)
args->type = XFS_ALLOCTYPE_THIS_AG;
- if (++(args->agno) == mp->m_sb.sb_agcount)
- args->agno = 0;
+ /*
+ * For the first allocation, we can try any AG to get
+ * space. However, if we already have allocated a
+ * block, we don't want to try AGs whose number is below
+ * sagno. Otherwise, we may end up with out-of-order
+ * locking of AGF, which might cause deadlock.
+ */
+ if (++(args->agno) == mp->m_sb.sb_agcount) {
+ if (args->firstblock != NULLFSBLOCK)
+ args->agno = sagno;
+ else
+ args->agno = 0;
+ }
/*
* Reached the starting a.g., must either be done
* or switch to non-trylock mode.
@@ -2443,7 +2460,7 @@ xfs_free_extent(
args.minlen = args.minleft = args.minalignslop = 0;
down_read(&args.mp->m_peraglock);
args.pag = &args.mp->m_perag[args.agno];
- if ((error = xfs_alloc_fix_freelist(&args, 0)))
+ if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING)))
goto error0;
#ifdef DEBUG
ASSERT(args.agbp != NULL);
diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h
index 2d1f8928b267..650591f999ae 100644
--- a/fs/xfs/xfs_alloc.h
+++ b/fs/xfs/xfs_alloc.h
@@ -41,6 +41,7 @@ typedef enum xfs_alloctype
* Flags for xfs_alloc_fix_freelist.
*/
#define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */
+#define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/
/*
* Argument structure for xfs_alloc routines.
@@ -70,6 +71,7 @@ typedef struct xfs_alloc_arg {
char wasfromfl; /* set if allocation is from freelist */
char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
+ xfs_fsblock_t firstblock; /* io first block allocated */
} xfs_alloc_arg_t;
/*
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 890ad3528174..ad595dbefe16 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -2762,6 +2762,7 @@ xfs_bmap_btalloc(
args.mp = mp;
args.fsbno = ap->rval;
args.maxlen = MIN(ap->alen, mp->m_sb.sb_agblocks);
+ args.firstblock = ap->firstblock;
blen = 0;
if (nullfb) {
args.type = XFS_ALLOCTYPE_START_BNO;
@@ -2821,7 +2822,7 @@ xfs_bmap_btalloc(
else
args.minlen = ap->alen;
} else if (ap->low) {
- args.type = XFS_ALLOCTYPE_FIRST_AG;
+ args.type = XFS_ALLOCTYPE_START_BNO;
args.total = args.minlen = ap->minlen;
} else {
args.type = XFS_ALLOCTYPE_NEAR_BNO;
@@ -3452,6 +3453,7 @@ xfs_bmap_extents_to_btree(
XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_BTREE);
args.tp = tp;
args.mp = mp;
+ args.firstblock = *firstblock;
if (*firstblock == NULLFSBLOCK) {
args.type = XFS_ALLOCTYPE_START_BNO;
args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino);
@@ -3587,6 +3589,7 @@ xfs_bmap_local_to_extents(
args.tp = tp;
args.mp = ip->i_mount;
+ args.firstblock = *firstblock;
ASSERT((ifp->if_flags &
(XFS_IFINLINE|XFS_IFEXTENTS|XFS_IFEXTIREC)) == XFS_IFINLINE);
/*
diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c
index bea44709afbe..3b6dfc9b53af 100644
--- a/fs/xfs/xfs_bmap_btree.c
+++ b/fs/xfs/xfs_bmap_btree.c
@@ -1569,12 +1569,11 @@ xfs_bmbt_split(
lbno = XFS_DADDR_TO_FSB(args.mp, XFS_BUF_ADDR(lbp));
left = XFS_BUF_TO_BMBT_BLOCK(lbp);
args.fsbno = cur->bc_private.b.firstblock;
+ args.firstblock = args.fsbno;
if (args.fsbno == NULLFSBLOCK) {
args.fsbno = lbno;
args.type = XFS_ALLOCTYPE_START_BNO;
- } else if (cur->bc_private.b.flist->xbf_low)
- args.type = XFS_ALLOCTYPE_FIRST_AG;
- else
+ } else
args.type = XFS_ALLOCTYPE_NEAR_BNO;
args.mod = args.minleft = args.alignment = args.total = args.isfl =
args.userdata = args.minalignslop = 0;
@@ -2356,6 +2355,7 @@ xfs_bmbt_newroot(
args.userdata = args.minalignslop = 0;
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
+ args.firstblock = args.fsbno;
if (args.fsbno == NULLFSBLOCK) {
#ifdef DEBUG
if ((error = xfs_btree_check_lptr(cur, INT_GET(*pp, ARCH_CONVERT), level))) {
@@ -2365,9 +2365,7 @@ xfs_bmbt_newroot(
#endif
args.fsbno = INT_GET(*pp, ARCH_CONVERT);
args.type = XFS_ALLOCTYPE_START_BNO;
- } else if (args.wasdel)
- args.type = XFS_ALLOCTYPE_FIRST_AG;
- else
+ } else
args.type = XFS_ALLOCTYPE_NEAR_BNO;
if ((error = xfs_alloc_vextent(&args))) {
XFS_BMBT_TRACE_CURSOR(cur, ERROR);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index c0b1c2906880..4b7be49cc4de 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -1254,6 +1254,26 @@ xfs_mod_sb(xfs_trans_t *tp, __int64_t fields)
xfs_trans_log_buf(tp, bp, first, last);
}
+
+/*
+ * In order to avoid ENOSPC-related deadlock caused by
+ * out-of-order locking of AGF buffer (PV 947395), we place
+ * constraints on the relationship among actual allocations for
+ * data blocks, freelist blocks, and potential file data bmap
+ * btree blocks. However, these restrictions may result in no
+ * actual space allocated for a delayed extent, for example, a data
+ * block in a certain AG is allocated but there is no additional
+ * block for the additional bmap btree block due to a split of the
+ * bmap btree of the file. The result of this may lead to an
+ * infinite loop in xfssyncd when the file gets flushed to disk and
+ * all delayed extents need to be actually allocated. To get around
+ * this, we explicitly set aside a few blocks which will not be
+ * reserved in delayed allocation. Considering the minimum number of
+ * needed freelist blocks is 4 fsbs, a potential split of file's bmap
+ * btree requires 1 fsb, so we set the number of set-aside blocks to 8.
+*/
+#define SET_ASIDE_BLOCKS 8
+
/*
* xfs_mod_incore_sb_unlocked() is a utility routine common used to apply
* a delta to a specified field in the in-core superblock. Simply
@@ -1298,7 +1318,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
return 0;
case XFS_SBS_FDBLOCKS:
- lcounter = (long long)mp->m_sb.sb_fdblocks;
+ lcounter = (long long)mp->m_sb.sb_fdblocks - SET_ASIDE_BLOCKS;
res_used = (long long)(mp->m_resblks - mp->m_resblks_avail);
if (delta > 0) { /* Putting blocks back */
@@ -1332,7 +1352,7 @@ xfs_mod_incore_sb_unlocked(xfs_mount_t *mp, xfs_sb_field_t field,
}
}
- mp->m_sb.sb_fdblocks = lcounter;
+ mp->m_sb.sb_fdblocks = lcounter + SET_ASIDE_BLOCKS;
return 0;
case XFS_SBS_FREXTENTS:
lcounter = (long long)mp->m_sb.sb_frextents;
OpenPOWER on IntegriCloud