summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c710
1 files changed, 357 insertions, 353 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 7fc3c1ad36bc..f6006d94a581 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -57,10 +57,6 @@ xlog_state_get_iclog_space(
struct xlog_ticket *ticket,
int *continued_write,
int *logoffsetp);
-STATIC int
-xlog_state_release_iclog(
- struct xlog *log,
- struct xlog_in_core *iclog);
STATIC void
xlog_state_switch_iclogs(
struct xlog *log,
@@ -83,7 +79,10 @@ STATIC void
xlog_ungrant_log_space(
struct xlog *log,
struct xlog_ticket *ticket);
-
+STATIC void
+xlog_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog);
#if defined(DEBUG)
STATIC void
xlog_verify_dest_ptr(
@@ -214,15 +213,42 @@ xlog_grant_head_wake(
{
struct xlog_ticket *tic;
int need_bytes;
+ bool woken_task = false;
list_for_each_entry(tic, &head->waiters, t_queue) {
+
+ /*
+ * There is a chance that the size of the CIL checkpoints in
+ * progress at the last AIL push target calculation resulted in
+ * limiting the target to the log head (l_last_sync_lsn) at the
+ * time. This may not reflect where the log head is now as the
+ * CIL checkpoints may have completed.
+ *
+ * Hence when we are woken here, it may be that the head of the
+ * log that has moved rather than the tail. As the tail didn't
+ * move, there still won't be space available for the
+ * reservation we require. However, if the AIL has already
+ * pushed to the target defined by the old log head location, we
+ * will hang here waiting for something else to update the AIL
+ * push target.
+ *
+ * Therefore, if there isn't space to wake the first waiter on
+ * the grant head, we need to push the AIL again to ensure the
+ * target reflects both the current log tail and log head
+ * position before we wait for the tail to move again.
+ */
+
need_bytes = xlog_ticket_reservation(log, head, tic);
- if (*free_bytes < need_bytes)
+ if (*free_bytes < need_bytes) {
+ if (!woken_task)
+ xlog_grant_push_ail(log, need_bytes);
return false;
+ }
*free_bytes -= need_bytes;
trace_xfs_log_grant_wake_up(log, tic);
wake_up_process(tic->t_task);
+ woken_task = true;
}
return true;
@@ -428,8 +454,7 @@ xfs_log_reserve(
XFS_STATS_INC(mp, xs_try_logspace);
ASSERT(*ticp == NULL);
- tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
- KM_SLEEP);
+ tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, 0);
*ticp = tic;
xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
@@ -526,16 +551,71 @@ xfs_log_done(
return lsn;
}
+static bool
+__xlog_state_release_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
+{
+ lockdep_assert_held(&log->l_icloglock);
+
+ if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+ /* update tail before writing to iclog */
+ xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+
+ iclog->ic_state = XLOG_STATE_SYNCING;
+ iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
+ xlog_verify_tail_lsn(log, iclog, tail_lsn);
+ /* cycle incremented when incrementing curr_block */
+ return true;
+ }
+
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
+ return false;
+}
+
+/*
+ * Flush iclog to disk if this is the last reference to the given iclog and the
+ * it is in the WANT_SYNC state.
+ */
+static int
+xlog_state_release_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
+{
+ lockdep_assert_held(&log->l_icloglock);
+
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
+ return -EIO;
+
+ if (atomic_dec_and_test(&iclog->ic_refcnt) &&
+ __xlog_state_release_iclog(log, iclog)) {
+ spin_unlock(&log->l_icloglock);
+ xlog_sync(log, iclog);
+ spin_lock(&log->l_icloglock);
+ }
+
+ return 0;
+}
+
int
xfs_log_release_iclog(
- struct xfs_mount *mp,
+ struct xfs_mount *mp,
struct xlog_in_core *iclog)
{
- if (xlog_state_release_iclog(mp->m_log, iclog)) {
+ struct xlog *log = mp->m_log;
+ bool sync;
+
+ if (iclog->ic_state == XLOG_STATE_IOERROR) {
xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
return -EIO;
}
+ if (atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock)) {
+ sync = __xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+ if (sync)
+ xlog_sync(log, iclog);
+ }
return 0;
}
@@ -840,10 +920,7 @@ out_err:
iclog = log->l_iclog;
atomic_inc(&iclog->ic_refcnt);
xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
error = xlog_state_release_iclog(log, iclog);
-
- spin_lock(&log->l_icloglock);
switch (iclog->ic_state) {
default:
if (!XLOG_FORCED_SHUTDOWN(log)) {
@@ -898,8 +975,8 @@ xfs_log_unmount_write(xfs_mount_t *mp)
#ifdef DEBUG
first_iclog = iclog = log->l_iclog;
do {
- if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
- ASSERT(iclog->ic_state & XLOG_STATE_ACTIVE);
+ if (iclog->ic_state != XLOG_STATE_IOERROR) {
+ ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
ASSERT(iclog->ic_offset == 0);
}
iclog = iclog->ic_next;
@@ -924,21 +1001,17 @@ xfs_log_unmount_write(xfs_mount_t *mp)
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
atomic_inc(&iclog->ic_refcnt);
-
xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
error = xlog_state_release_iclog(log, iclog);
-
- spin_lock(&log->l_icloglock);
-
- if ( ! ( iclog->ic_state == XLOG_STATE_ACTIVE
- || iclog->ic_state == XLOG_STATE_DIRTY
- || iclog->ic_state == XLOG_STATE_IOERROR) ) {
-
- xlog_wait(&iclog->ic_force_wait,
- &log->l_icloglock);
- } else {
+ switch (iclog->ic_state) {
+ case XLOG_STATE_ACTIVE:
+ case XLOG_STATE_DIRTY:
+ case XLOG_STATE_IOERROR:
spin_unlock(&log->l_icloglock);
+ break;
+ default:
+ xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
+ break;
}
}
@@ -1228,7 +1301,7 @@ xlog_ioend_work(
* didn't succeed.
*/
aborted = true;
- } else if (iclog->ic_state & XLOG_STATE_IOERROR) {
+ } else if (iclog->ic_state == XLOG_STATE_IOERROR) {
aborted = true;
}
@@ -1404,6 +1477,7 @@ xlog_alloc_log(
*/
ASSERT(log->l_iclog_size >= 4096);
for (i = 0; i < log->l_iclog_bufs; i++) {
+ int align_mask = xfs_buftarg_dma_alignment(mp->m_logdev_targp);
size_t bvec_size = howmany(log->l_iclog_size, PAGE_SIZE) *
sizeof(struct bio_vec);
@@ -1415,8 +1489,8 @@ xlog_alloc_log(
iclog->ic_prev = prev_iclog;
prev_iclog = iclog;
- iclog->ic_data = kmem_alloc_large(log->l_iclog_size,
- KM_MAYFAIL);
+ iclog->ic_data = kmem_alloc_io(log->l_iclog_size, align_mask,
+ KM_MAYFAIL | KM_ZERO);
if (!iclog->ic_data)
goto out_free_iclog;
#ifdef DEBUG
@@ -1452,7 +1526,7 @@ xlog_alloc_log(
log->l_ioend_workqueue = alloc_workqueue("xfs-log/%s",
WQ_MEM_RECLAIM | WQ_FREEZABLE | WQ_HIGHPRI, 0,
- mp->m_fsname);
+ mp->m_super->s_id);
if (!log->l_ioend_workqueue)
goto out_free_iclog;
@@ -1468,6 +1542,8 @@ out_free_iclog:
prev_iclog = iclog->ic_next;
kmem_free(iclog->ic_data);
kmem_free(iclog);
+ if (prev_iclog == log->l_iclog)
+ break;
}
out_free_log:
kmem_free(log);
@@ -1700,7 +1776,7 @@ xlog_write_iclog(
* across the log IO to archieve that.
*/
down(&iclog->ic_sema);
- if (unlikely(iclog->ic_state & XLOG_STATE_IOERROR)) {
+ if (unlikely(iclog->ic_state == XLOG_STATE_IOERROR)) {
/*
* It would seem logical to return EIO here, but we rely on
* the log state machine to propagate I/O errors instead of
@@ -1708,13 +1784,11 @@ xlog_write_iclog(
* the buffer manually, the code needs to be kept in sync
* with the I/O completion path.
*/
- xlog_state_done_syncing(iclog, XFS_LI_ABORTED);
+ xlog_state_done_syncing(iclog, true);
up(&iclog->ic_sema);
return;
}
- iclog->ic_io_size = count;
-
bio_init(&iclog->ic_bio, iclog->ic_bvec, howmany(count, PAGE_SIZE));
bio_set_dev(&iclog->ic_bio, log->l_targ->bt_bdev);
iclog->ic_bio.bi_iter.bi_sector = log->l_logBBstart + bno;
@@ -1724,9 +1798,9 @@ xlog_write_iclog(
if (need_flush)
iclog->ic_bio.bi_opf |= REQ_PREFLUSH;
- xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, iclog->ic_io_size);
+ xlog_map_iclog_data(&iclog->ic_bio, iclog->ic_data, count);
if (is_vmalloc_addr(iclog->ic_data))
- flush_kernel_vmap_range(iclog->ic_data, iclog->ic_io_size);
+ flush_kernel_vmap_range(iclog->ic_data, count);
/*
* If this log buffer would straddle the end of the log we will have
@@ -1942,7 +2016,6 @@ xlog_dealloc_log(
/*
* Update counters atomically now that memcpy is done.
*/
-/* ARGSUSED */
static inline void
xlog_state_finish_copy(
struct xlog *log,
@@ -1950,16 +2023,11 @@ xlog_state_finish_copy(
int record_cnt,
int copy_bytes)
{
- spin_lock(&log->l_icloglock);
+ lockdep_assert_held(&log->l_icloglock);
be32_add_cpu(&iclog->ic_header.h_num_logops, record_cnt);
iclog->ic_offset += copy_bytes;
-
- spin_unlock(&log->l_icloglock);
-} /* xlog_state_finish_copy */
-
-
-
+}
/*
* print out info relating to regions written which consume
@@ -2236,15 +2304,18 @@ xlog_write_copy_finish(
int log_offset,
struct xlog_in_core **commit_iclog)
{
+ int error;
+
if (*partial_copy) {
/*
* This iclog has already been marked WANT_SYNC by
* xlog_state_get_iclog_space.
*/
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
*record_cnt = 0;
*data_cnt = 0;
- return xlog_state_release_iclog(log, iclog);
+ goto release_iclog;
}
*partial_copy = 0;
@@ -2252,21 +2323,25 @@ xlog_write_copy_finish(
if (iclog->ic_size - log_offset <= sizeof(xlog_op_header_t)) {
/* no more space in this iclog - push it. */
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, *record_cnt, *data_cnt);
*record_cnt = 0;
*data_cnt = 0;
- spin_lock(&log->l_icloglock);
xlog_state_want_sync(log, iclog);
- spin_unlock(&log->l_icloglock);
-
if (!commit_iclog)
- return xlog_state_release_iclog(log, iclog);
+ goto release_iclog;
+ spin_unlock(&log->l_icloglock);
ASSERT(flags & XLOG_COMMIT_TRANS);
*commit_iclog = iclog;
}
return 0;
+
+release_iclog:
+ error = xlog_state_release_iclog(log, iclog);
+ spin_unlock(&log->l_icloglock);
+ return error;
}
/*
@@ -2328,7 +2403,7 @@ xlog_write(
int contwr = 0;
int record_cnt = 0;
int data_cnt = 0;
- int error;
+ int error = 0;
*start_lsn = 0;
@@ -2479,13 +2554,17 @@ next_lv:
ASSERT(len == 0);
+ spin_lock(&log->l_icloglock);
xlog_state_finish_copy(log, iclog, record_cnt, data_cnt);
- if (!commit_iclog)
- return xlog_state_release_iclog(log, iclog);
+ if (commit_iclog) {
+ ASSERT(flags & XLOG_COMMIT_TRANS);
+ *commit_iclog = iclog;
+ } else {
+ error = xlog_state_release_iclog(log, iclog);
+ }
+ spin_unlock(&log->l_icloglock);
- ASSERT(flags & XLOG_COMMIT_TRANS);
- *commit_iclog = iclog;
- return 0;
+ return error;
}
@@ -2496,21 +2575,35 @@ next_lv:
*****************************************************************************
*/
-/* Clean iclogs starting from the head. This ordering must be
- * maintained, so an iclog doesn't become ACTIVE beyond one that
- * is SYNCING. This is also required to maintain the notion that we use
- * a ordered wait queue to hold off would be writers to the log when every
- * iclog is trying to sync to disk.
+/*
+ * An iclog has just finished IO completion processing, so we need to update
+ * the iclog state and propagate that up into the overall log state. Hence we
+ * prepare the iclog for cleaning, and then clean all the pending dirty iclogs
+ * starting from the head, and then wake up any threads that are waiting for the
+ * iclog to be marked clean.
+ *
+ * The ordering of marking iclogs ACTIVE must be maintained, so an iclog
+ * doesn't become ACTIVE beyond one that is SYNCING. This is also required to
+ * maintain the notion that we use a ordered wait queue to hold off would be
+ * writers to the log when every iclog is trying to sync to disk.
*
- * State Change: DIRTY -> ACTIVE
+ * Caller must hold the icloglock before calling us.
+ *
+ * State Change: !IOERROR -> DIRTY -> ACTIVE
*/
STATIC void
-xlog_state_clean_log(
- struct xlog *log)
+xlog_state_clean_iclog(
+ struct xlog *log,
+ struct xlog_in_core *dirty_iclog)
{
- xlog_in_core_t *iclog;
- int changed = 0;
+ struct xlog_in_core *iclog;
+ int changed = 0;
+
+ /* Prepare the completed iclog. */
+ if (dirty_iclog->ic_state != XLOG_STATE_IOERROR)
+ dirty_iclog->ic_state = XLOG_STATE_DIRTY;
+ /* Walk all the iclogs to update the ordered active state. */
iclog = log->l_iclog;
do {
if (iclog->ic_state == XLOG_STATE_DIRTY) {
@@ -2548,7 +2641,13 @@ xlog_state_clean_log(
iclog = iclog->ic_next;
} while (iclog != log->l_iclog);
- /* log is locked when we are called */
+
+ /*
+ * Wake up threads waiting in xfs_log_force() for the dirty iclog
+ * to be cleaned.
+ */
+ wake_up_all(&dirty_iclog->ic_force_wait);
+
/*
* Change state for the dummy log recording.
* We usually go to NEED. But we go to NEED2 if the changed indicates
@@ -2582,7 +2681,7 @@ xlog_state_clean_log(
ASSERT(0);
}
}
-} /* xlog_state_clean_log */
+}
STATIC xfs_lsn_t
xlog_get_lowest_lsn(
@@ -2592,7 +2691,8 @@ xlog_get_lowest_lsn(
xfs_lsn_t lowest_lsn = 0, lsn;
do {
- if (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY))
+ if (iclog->ic_state == XLOG_STATE_ACTIVE ||
+ iclog->ic_state == XLOG_STATE_DIRTY)
continue;
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
@@ -2603,30 +2703,150 @@ xlog_get_lowest_lsn(
return lowest_lsn;
}
+/*
+ * Completion of a iclog IO does not imply that a transaction has completed, as
+ * transactions can be large enough to span many iclogs. We cannot change the
+ * tail of the log half way through a transaction as this may be the only
+ * transaction in the log and moving the tail to point to the middle of it
+ * will prevent recovery from finding the start of the transaction. Hence we
+ * should only update the last_sync_lsn if this iclog contains transaction
+ * completion callbacks on it.
+ *
+ * We have to do this before we drop the icloglock to ensure we are the only one
+ * that can update it.
+ *
+ * If we are moving the last_sync_lsn forwards, we also need to ensure we kick
+ * the reservation grant head pushing. This is due to the fact that the push
+ * target is bound by the current last_sync_lsn value. Hence if we have a large
+ * amount of log space bound up in this committing transaction then the
+ * last_sync_lsn value may be the limiting factor preventing tail pushing from
+ * freeing space in the log. Hence once we've updated the last_sync_lsn we
+ * should push the AIL to ensure the push target (and hence the grant head) is
+ * no longer bound by the old log head location and can move forwards and make
+ * progress again.
+ */
+static void
+xlog_state_set_callback(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ xfs_lsn_t header_lsn)
+{
+ iclog->ic_state = XLOG_STATE_CALLBACK;
+
+ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
+ header_lsn) <= 0);
+
+ if (list_empty_careful(&iclog->ic_callbacks))
+ return;
+
+ atomic64_set(&log->l_last_sync_lsn, header_lsn);
+ xlog_grant_push_ail(log, 0);
+}
+
+/*
+ * Return true if we need to stop processing, false to continue to the next
+ * iclog. The caller will need to run callbacks if the iclog is returned in the
+ * XLOG_STATE_CALLBACK state.
+ */
+static bool
+xlog_state_iodone_process_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ bool *ioerror)
+{
+ xfs_lsn_t lowest_lsn;
+ xfs_lsn_t header_lsn;
+
+ switch (iclog->ic_state) {
+ case XLOG_STATE_ACTIVE:
+ case XLOG_STATE_DIRTY:
+ /*
+ * Skip all iclogs in the ACTIVE & DIRTY states:
+ */
+ return false;
+ case XLOG_STATE_IOERROR:
+ /*
+ * Between marking a filesystem SHUTDOWN and stopping the log,
+ * we do flush all iclogs to disk (if there wasn't a log I/O
+ * error). So, we do want things to go smoothly in case of just
+ * a SHUTDOWN w/o a LOG_IO_ERROR.
+ */
+ *ioerror = true;
+ return false;
+ case XLOG_STATE_DONE_SYNC:
+ /*
+ * Now that we have an iclog that is in the DONE_SYNC state, do
+ * one more check here to see if we have chased our tail around.
+ * If this is not the lowest lsn iclog, then we will leave it
+ * for another completion to process.
+ */
+ header_lsn = be64_to_cpu(iclog->ic_header.h_lsn);
+ lowest_lsn = xlog_get_lowest_lsn(log);
+ if (lowest_lsn && XFS_LSN_CMP(lowest_lsn, header_lsn) < 0)
+ return false;
+ xlog_state_set_callback(log, iclog, header_lsn);
+ return false;
+ default:
+ /*
+ * Can only perform callbacks in order. Since this iclog is not
+ * in the DONE_SYNC state, we skip the rest and just try to
+ * clean up.
+ */
+ return true;
+ }
+}
+
+/*
+ * Keep processing entries in the iclog callback list until we come around and
+ * it is empty. We need to atomically see that the list is empty and change the
+ * state to DIRTY so that we don't miss any more callbacks being added.
+ *
+ * This function is called with the icloglock held and returns with it held. We
+ * drop it while running callbacks, however, as holding it over thousands of
+ * callbacks is unnecessary and causes excessive contention if we do.
+ */
+static void
+xlog_state_do_iclog_callbacks(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ bool aborted)
+ __releases(&log->l_icloglock)
+ __acquires(&log->l_icloglock)
+{
+ spin_unlock(&log->l_icloglock);
+ spin_lock(&iclog->ic_callback_lock);
+ while (!list_empty(&iclog->ic_callbacks)) {
+ LIST_HEAD(tmp);
+
+ list_splice_init(&iclog->ic_callbacks, &tmp);
+
+ spin_unlock(&iclog->ic_callback_lock);
+ xlog_cil_process_committed(&tmp, aborted);
+ spin_lock(&iclog->ic_callback_lock);
+ }
+
+ /*
+ * Pick up the icloglock while still holding the callback lock so we
+ * serialise against anyone trying to add more callbacks to this iclog
+ * now we've finished processing.
+ */
+ spin_lock(&log->l_icloglock);
+ spin_unlock(&iclog->ic_callback_lock);
+}
+
STATIC void
xlog_state_do_callback(
struct xlog *log,
- bool aborted,
- struct xlog_in_core *ciclog)
-{
- xlog_in_core_t *iclog;
- xlog_in_core_t *first_iclog; /* used to know when we've
- * processed all iclogs once */
- int flushcnt = 0;
- xfs_lsn_t lowest_lsn;
- int ioerrors; /* counter: iclogs with errors */
- int loopdidcallbacks; /* flag: inner loop did callbacks*/
- int funcdidcallbacks; /* flag: function did callbacks */
- int repeats; /* for issuing console warnings if
- * looping too many times */
- int wake = 0;
+ bool aborted)
+{
+ struct xlog_in_core *iclog;
+ struct xlog_in_core *first_iclog;
+ bool cycled_icloglock;
+ bool ioerror;
+ int flushcnt = 0;
+ int repeats = 0;
spin_lock(&log->l_icloglock);
- first_iclog = iclog = log->l_iclog;
- ioerrors = 0;
- funcdidcallbacks = 0;
- repeats = 0;
-
do {
/*
* Scan all iclogs starting with the one pointed to by the
@@ -2638,134 +2858,29 @@ xlog_state_do_callback(
*/
first_iclog = log->l_iclog;
iclog = log->l_iclog;
- loopdidcallbacks = 0;
+ cycled_icloglock = false;
+ ioerror = false;
repeats++;
do {
+ if (xlog_state_iodone_process_iclog(log, iclog,
+ &ioerror))
+ break;
- /* skip all iclogs in the ACTIVE & DIRTY states */
- if (iclog->ic_state &
- (XLOG_STATE_ACTIVE|XLOG_STATE_DIRTY)) {
+ if (iclog->ic_state != XLOG_STATE_CALLBACK &&
+ iclog->ic_state != XLOG_STATE_IOERROR) {
iclog = iclog->ic_next;
continue;
}
/*
- * Between marking a filesystem SHUTDOWN and stopping
- * the log, we do flush all iclogs to disk (if there
- * wasn't a log I/O error). So, we do want things to
- * go smoothly in case of just a SHUTDOWN w/o a
- * LOG_IO_ERROR.
+ * Running callbacks will drop the icloglock which means
+ * we'll have to run at least one more complete loop.
*/
- if (!(iclog->ic_state & XLOG_STATE_IOERROR)) {
- /*
- * Can only perform callbacks in order. Since
- * this iclog is not in the DONE_SYNC/
- * DO_CALLBACK state, we skip the rest and
- * just try to clean up. If we set our iclog
- * to DO_CALLBACK, we will not process it when
- * we retry since a previous iclog is in the
- * CALLBACK and the state cannot change since
- * we are holding the l_icloglock.
- */
- if (!(iclog->ic_state &
- (XLOG_STATE_DONE_SYNC |
- XLOG_STATE_DO_CALLBACK))) {
- if (ciclog && (ciclog->ic_state ==
- XLOG_STATE_DONE_SYNC)) {
- ciclog->ic_state = XLOG_STATE_DO_CALLBACK;
- }
- break;
- }
- /*
- * We now have an iclog that is in either the
- * DO_CALLBACK or DONE_SYNC states. The other
- * states (WANT_SYNC, SYNCING, or CALLBACK were
- * caught by the above if and are going to
- * clean (i.e. we aren't doing their callbacks)
- * see the above if.
- */
-
- /*
- * We will do one more check here to see if we
- * have chased our tail around.
- */
-
- lowest_lsn = xlog_get_lowest_lsn(log);
- if (lowest_lsn &&
- XFS_LSN_CMP(lowest_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn)) < 0) {
- iclog = iclog->ic_next;
- continue; /* Leave this iclog for
- * another thread */
- }
-
- iclog->ic_state = XLOG_STATE_CALLBACK;
-
-
- /*
- * Completion of a iclog IO does not imply that
- * a transaction has completed, as transactions
- * can be large enough to span many iclogs. We
- * cannot change the tail of the log half way
- * through a transaction as this may be the only
- * transaction in the log and moving th etail to
- * point to the middle of it will prevent
- * recovery from finding the start of the
- * transaction. Hence we should only update the
- * last_sync_lsn if this iclog contains
- * transaction completion callbacks on it.
- *
- * We have to do this before we drop the
- * icloglock to ensure we are the only one that
- * can update it.
- */
- ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
- be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
- if (!list_empty_careful(&iclog->ic_callbacks))
- atomic64_set(&log->l_last_sync_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn));
-
- } else
- ioerrors++;
-
- spin_unlock(&log->l_icloglock);
-
- /*
- * Keep processing entries in the callback list until
- * we come around and it is empty. We need to
- * atomically see that the list is empty and change the
- * state to DIRTY so that we don't miss any more
- * callbacks being added.
- */
- spin_lock(&iclog->ic_callback_lock);
- while (!list_empty(&iclog->ic_callbacks)) {
- LIST_HEAD(tmp);
-
- list_splice_init(&iclog->ic_callbacks, &tmp);
-
- spin_unlock(&iclog->ic_callback_lock);
- xlog_cil_process_committed(&tmp, aborted);
- spin_lock(&iclog->ic_callback_lock);
- }
-
- loopdidcallbacks++;
- funcdidcallbacks++;
-
- spin_lock(&log->l_icloglock);
- spin_unlock(&iclog->ic_callback_lock);
- if (!(iclog->ic_state & XLOG_STATE_IOERROR))
- iclog->ic_state = XLOG_STATE_DIRTY;
-
- /*
- * Transition from DIRTY to ACTIVE if applicable.
- * NOP if STATE_IOERROR.
- */
- xlog_state_clean_log(log);
-
- /* wake up threads waiting in xfs_log_force() */
- wake_up_all(&iclog->ic_force_wait);
+ cycled_icloglock = true;
+ xlog_state_do_iclog_callbacks(log, iclog, aborted);
+ xlog_state_clean_iclog(log, iclog);
iclog = iclog->ic_next;
} while (first_iclog != iclog);
@@ -2776,50 +2891,13 @@ xlog_state_do_callback(
"%s: possible infinite loop (%d iterations)",
__func__, flushcnt);
}
- } while (!ioerrors && loopdidcallbacks);
+ } while (!ioerror && cycled_icloglock);
-#ifdef DEBUG
- /*
- * Make one last gasp attempt to see if iclogs are being left in limbo.
- * If the above loop finds an iclog earlier than the current iclog and
- * in one of the syncing states, the current iclog is put into
- * DO_CALLBACK and the callbacks are deferred to the completion of the
- * earlier iclog. Walk the iclogs in order and make sure that no iclog
- * is in DO_CALLBACK unless an earlier iclog is in one of the syncing
- * states.
- *
- * Note that SYNCING|IOABORT is a valid state so we cannot just check
- * for ic_state == SYNCING.
- */
- if (funcdidcallbacks) {
- first_iclog = iclog = log->l_iclog;
- do {
- ASSERT(iclog->ic_state != XLOG_STATE_DO_CALLBACK);
- /*
- * Terminate the loop if iclogs are found in states
- * which will cause other threads to clean up iclogs.
- *
- * SYNCING - i/o completion will go through logs
- * DONE_SYNC - interrupt thread should be waiting for
- * l_icloglock
- * IOERROR - give up hope all ye who enter here
- */
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC ||
- iclog->ic_state & XLOG_STATE_SYNCING ||
- iclog->ic_state == XLOG_STATE_DONE_SYNC ||
- iclog->ic_state == XLOG_STATE_IOERROR )
- break;
- iclog = iclog->ic_next;
- } while (first_iclog != iclog);
- }
-#endif
+ if (log->l_iclog->ic_state == XLOG_STATE_ACTIVE ||
+ log->l_iclog->ic_state == XLOG_STATE_IOERROR)
+ wake_up_all(&log->l_flush_wait);
- if (log->l_iclog->ic_state & (XLOG_STATE_ACTIVE|XLOG_STATE_IOERROR))
- wake = 1;
spin_unlock(&log->l_icloglock);
-
- if (wake)
- wake_up_all(&log->l_flush_wait);
}
@@ -2845,8 +2923,6 @@ xlog_state_done_syncing(
spin_lock(&log->l_icloglock);
- ASSERT(iclog->ic_state == XLOG_STATE_SYNCING ||
- iclog->ic_state == XLOG_STATE_IOERROR);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
/*
@@ -2855,8 +2931,10 @@ xlog_state_done_syncing(
* and none should ever be attempted to be written to disk
* again.
*/
- if (iclog->ic_state != XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_SYNCING)
iclog->ic_state = XLOG_STATE_DONE_SYNC;
+ else
+ ASSERT(iclog->ic_state == XLOG_STATE_IOERROR);
/*
* Someone could be sleeping prior to writing out the next
@@ -2865,7 +2943,7 @@ xlog_state_done_syncing(
*/
wake_up_all(&iclog->ic_write_wait);
spin_unlock(&log->l_icloglock);
- xlog_state_do_callback(log, aborted, iclog); /* also cleans log */
+ xlog_state_do_callback(log, aborted); /* also cleans log */
} /* xlog_state_done_syncing */
@@ -2899,7 +2977,6 @@ xlog_state_get_iclog_space(
int log_offset;
xlog_rec_header_t *head;
xlog_in_core_t *iclog;
- int error;
restart:
spin_lock(&log->l_icloglock);
@@ -2948,24 +3025,22 @@ restart:
* can fit into remaining data section.
*/
if (iclog->ic_size - iclog->ic_offset < 2*sizeof(xlog_op_header_t)) {
+ int error = 0;
+
xlog_state_switch_iclogs(log, iclog, iclog->ic_size);
/*
- * If I'm the only one writing to this iclog, sync it to disk.
- * We need to do an atomic compare and decrement here to avoid
- * racing with concurrent atomic_dec_and_lock() calls in
+ * If we are the only one writing to this iclog, sync it to
+ * disk. We need to do an atomic compare and decrement here to
+ * avoid racing with concurrent atomic_dec_and_lock() calls in
* xlog_state_release_iclog() when there is more than one
* reference to the iclog.
*/
- if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1)) {
- /* we are the only one */
- spin_unlock(&log->l_icloglock);
+ if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
error = xlog_state_release_iclog(log, iclog);
- if (error)
- return error;
- } else {
- spin_unlock(&log->l_icloglock);
- }
+ spin_unlock(&log->l_icloglock);
+ if (error)
+ return error;
goto restart;
}
@@ -3077,60 +3152,6 @@ xlog_ungrant_log_space(
}
/*
- * Flush iclog to disk if this is the last reference to the given iclog and
- * the WANT_SYNC bit is set.
- *
- * When this function is entered, the iclog is not necessarily in the
- * WANT_SYNC state. It may be sitting around waiting to get filled.
- *
- *
- */
-STATIC int
-xlog_state_release_iclog(
- struct xlog *log,
- struct xlog_in_core *iclog)
-{
- int sync = 0; /* do we sync? */
-
- if (iclog->ic_state & XLOG_STATE_IOERROR)
- return -EIO;
-
- ASSERT(atomic_read(&iclog->ic_refcnt) > 0);
- if (!atomic_dec_and_lock(&iclog->ic_refcnt, &log->l_icloglock))
- return 0;
-
- if (iclog->ic_state & XLOG_STATE_IOERROR) {
- spin_unlock(&log->l_icloglock);
- return -EIO;
- }
- ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE ||
- iclog->ic_state == XLOG_STATE_WANT_SYNC);
-
- if (iclog->ic_state == XLOG_STATE_WANT_SYNC) {
- /* update tail before writing to iclog */
- xfs_lsn_t tail_lsn = xlog_assign_tail_lsn(log->l_mp);
- sync++;
- iclog->ic_state = XLOG_STATE_SYNCING;
- iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
- xlog_verify_tail_lsn(log, iclog, tail_lsn);
- /* cycle incremented when incrementing curr_block */
- }
- spin_unlock(&log->l_icloglock);
-
- /*
- * We let the log lock go, so it's possible that we hit a log I/O
- * error or some other SHUTDOWN condition that marks the iclog
- * as XLOG_STATE_IOERROR before the bwrite. However, we know that
- * this iclog has consistent data, so we ignore IOERROR
- * flags after this point.
- */
- if (sync)
- xlog_sync(log, iclog);
- return 0;
-} /* xlog_state_release_iclog */
-
-
-/*
* This routine will mark the current iclog in the ring as WANT_SYNC
* and move the current iclog pointer to the next iclog in the ring.
* When this routine is called from xlog_state_get_iclog_space(), the
@@ -3223,7 +3244,7 @@ xfs_log_force(
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
if (iclog->ic_state == XLOG_STATE_DIRTY ||
@@ -3253,12 +3274,9 @@ xfs_log_force(
atomic_inc(&iclog->ic_refcnt);
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
-
if (xlog_state_release_iclog(log, iclog))
- return -EIO;
+ goto out_error;
- spin_lock(&log->l_icloglock);
if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn ||
iclog->ic_state == XLOG_STATE_DIRTY)
goto out_unlock;
@@ -3283,11 +3301,11 @@ xfs_log_force(
if (!(flags & XFS_LOG_SYNC))
goto out_unlock;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
return -EIO;
return 0;
@@ -3312,7 +3330,7 @@ __xfs_log_force_lsn(
spin_lock(&log->l_icloglock);
iclog = log->l_iclog;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
while (be64_to_cpu(iclog->ic_header.h_lsn) != lsn) {
@@ -3341,10 +3359,8 @@ __xfs_log_force_lsn(
* will go out then.
*/
if (!already_slept &&
- (iclog->ic_prev->ic_state &
- (XLOG_STATE_WANT_SYNC | XLOG_STATE_SYNCING))) {
- ASSERT(!(iclog->ic_state & XLOG_STATE_IOERROR));
-
+ (iclog->ic_prev->ic_state == XLOG_STATE_WANT_SYNC ||
+ iclog->ic_prev->ic_state == XLOG_STATE_SYNCING)) {
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_prev->ic_write_wait,
@@ -3353,24 +3369,23 @@ __xfs_log_force_lsn(
}
atomic_inc(&iclog->ic_refcnt);
xlog_state_switch_iclogs(log, iclog, 0);
- spin_unlock(&log->l_icloglock);
if (xlog_state_release_iclog(log, iclog))
- return -EIO;
+ goto out_error;
if (log_flushed)
*log_flushed = 1;
- spin_lock(&log->l_icloglock);
}
if (!(flags & XFS_LOG_SYNC) ||
- (iclog->ic_state & (XLOG_STATE_ACTIVE | XLOG_STATE_DIRTY)))
+ (iclog->ic_state == XLOG_STATE_ACTIVE ||
+ iclog->ic_state == XLOG_STATE_DIRTY))
goto out_unlock;
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
goto out_error;
XFS_STATS_INC(mp, xs_log_force_sleep);
xlog_wait(&iclog->ic_force_wait, &log->l_icloglock);
- if (iclog->ic_state & XLOG_STATE_IOERROR)
+ if (iclog->ic_state == XLOG_STATE_IOERROR)
return -EIO;
return 0;
@@ -3433,8 +3448,8 @@ xlog_state_want_sync(
if (iclog->ic_state == XLOG_STATE_ACTIVE) {
xlog_state_switch_iclogs(log, iclog, 0);
} else {
- ASSERT(iclog->ic_state &
- (XLOG_STATE_WANT_SYNC|XLOG_STATE_IOERROR));
+ ASSERT(iclog->ic_state == XLOG_STATE_WANT_SYNC ||
+ iclog->ic_state == XLOG_STATE_IOERROR);
}
}
@@ -3455,7 +3470,7 @@ xfs_log_ticket_put(
{
ASSERT(atomic_read(&ticket->t_ref) > 0);
if (atomic_dec_and_test(&ticket->t_ref))
- kmem_zone_free(xfs_log_ticket_zone, ticket);
+ kmem_cache_free(xfs_log_ticket_zone, ticket);
}
xlog_ticket_t *
@@ -3811,7 +3826,7 @@ xlog_state_ioerror(
xlog_in_core_t *iclog, *ic;
iclog = log->l_iclog;
- if (! (iclog->ic_state & XLOG_STATE_IOERROR)) {
+ if (iclog->ic_state != XLOG_STATE_IOERROR) {
/*
* Mark all the incore logs IOERROR.
* From now on, no log flushes will result.
@@ -3871,7 +3886,7 @@ xfs_log_force_umount(
* Somebody could've already done the hard work for us.
* No need to get locks for this.
*/
- if (logerror && log->l_iclog->ic_state & XLOG_STATE_IOERROR) {
+ if (logerror && log->l_iclog->ic_state == XLOG_STATE_IOERROR) {
ASSERT(XLOG_FORCED_SHUTDOWN(log));
return 1;
}
@@ -3919,22 +3934,11 @@ xfs_log_force_umount(
* item committed callback functions will do this again under lock to
* avoid races.
*/
+ spin_lock(&log->l_cilp->xc_push_lock);
wake_up_all(&log->l_cilp->xc_commit_wait);
- xlog_state_do_callback(log, true, NULL);
+ spin_unlock(&log->l_cilp->xc_push_lock);
+ xlog_state_do_callback(log, true);
-#ifdef XFSERRORDEBUG
- {
- xlog_in_core_t *iclog;
-
- spin_lock(&log->l_icloglock);
- iclog = log->l_iclog;
- do {
- ASSERT(iclog->ic_callback == 0);
- iclog = iclog->ic_next;
- } while (iclog != log->l_iclog);
- spin_unlock(&log->l_icloglock);
- }
-#endif
/* return non-zero if log IOERROR transition had already happened */
return retval;
}
OpenPOWER on IntegriCloud