summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c16
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h11
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c3
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1
-rw-r--r--fs/xfs/xfs_log.c13
-rw-r--r--fs/xfs/xfs_log_cil.c12
-rw-r--r--fs/xfs/xfs_log_priv.h37
8 files changed, 48 insertions, 64 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 6838aefca71..f3ccaec5760 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -924,19 +924,7 @@ xfs_buf_iodone_work(
xfs_buf_t *bp =
container_of(work, xfs_buf_t, b_iodone_work);
- /*
- * We can get an EOPNOTSUPP to ordered writes. Here we clear the
- * ordered flag and reissue them. Because we can't tell the higher
- * layers directly that they should not issue ordered I/O anymore, they
- * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
- */
- if ((bp->b_error == EOPNOTSUPP) &&
- (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
- trace_xfs_buf_ordered_retry(bp, _RET_IP_);
- bp->b_flags &= ~XBF_ORDERED;
- bp->b_flags |= _XFS_BARRIER_FAILED;
- xfs_buf_iorequest(bp);
- } else if (bp->b_iodone)
+ if (bp->b_iodone)
(*(bp->b_iodone))(bp);
else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
@@ -1195,7 +1183,7 @@ _xfs_buf_ioapply(
if (bp->b_flags & XBF_ORDERED) {
ASSERT(!(bp->b_flags & XBF_READ));
- rw = WRITE_BARRIER;
+ rw = WRITE_FLUSH_FUA;
} else if (bp->b_flags & XBF_LOG_BUFFER) {
ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
bp->b_flags &= ~_XBF_RUN_QUEUES;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 2a05614f0b9..9d021c73ea5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -86,14 +86,6 @@ typedef enum {
*/
#define _XBF_PAGE_LOCKED (1 << 22)
-/*
- * If we try a barrier write, but it fails we have to communicate
- * this to the upper layers. Unfortunately b_error gets overwritten
- * when the buffer is re-issued so we have to add another flag to
- * keep this information.
- */
-#define _XFS_BARRIER_FAILED (1 << 23)
-
typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \
@@ -114,8 +106,7 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \
- { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
+ { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }
typedef enum {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a4e07974955..08fd3102128 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -693,8 +693,7 @@ void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
- blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
- BLKDEV_IFL_WAIT);
+ blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
}
STATIC void
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index d59c4a65d49..81976ffed7d 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -668,14 +668,11 @@ xfs_inode_set_reclaim_tag(
xfs_perag_put(pag);
}
-void
-__xfs_inode_clear_reclaim_tag(
- xfs_mount_t *mp,
+STATIC void
+__xfs_inode_clear_reclaim(
xfs_perag_t *pag,
xfs_inode_t *ip)
{
- radix_tree_tag_clear(&pag->pag_ici_root,
- XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable--;
if (!pag->pag_ici_reclaimable) {
/* clear the reclaim tag from the perag radix tree */
@@ -689,6 +686,17 @@ __xfs_inode_clear_reclaim_tag(
}
}
+void
+__xfs_inode_clear_reclaim_tag(
+ xfs_mount_t *mp,
+ xfs_perag_t *pag,
+ xfs_inode_t *ip)
+{
+ radix_tree_tag_clear(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+ __xfs_inode_clear_reclaim(pag, ip);
+}
+
/*
* Inodes in different states need to be treated differently, and the return
* value of xfs_iflush is not sufficient to get this right. The following table
@@ -838,6 +846,7 @@ reclaim:
if (!radix_tree_delete(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
ASSERT(0);
+ __xfs_inode_clear_reclaim(pag, ip);
write_unlock(&pag->pag_ici_lock);
/*
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index be5dffd282a..8fe311a456e 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -325,7 +325,6 @@ DEFINE_BUF_EVENT(xfs_buf_lock);
DEFINE_BUF_EVENT(xfs_buf_lock_done);
DEFINE_BUF_EVENT(xfs_buf_cond_lock);
DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
DEFINE_BUF_EVENT(xfs_buf_iowait);
DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 33f718f92a4..ba8e36e0b4e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -917,19 +917,6 @@ xlog_iodone(xfs_buf_t *bp)
l = iclog->ic_log;
/*
- * If the _XFS_BARRIER_FAILED flag was set by a lower
- * layer, it means the underlying device no longer supports
- * barrier I/O. Warn loudly and turn off barriers.
- */
- if (bp->b_flags & _XFS_BARRIER_FAILED) {
- bp->b_flags &= ~_XFS_BARRIER_FAILED;
- l->l_mp->m_flags &= ~XFS_MOUNT_BARRIER;
- xfs_fs_cmn_err(CE_WARN, l->l_mp,
- "xlog_iodone: Barriers are no longer supported"
- " by device. Disabling barriers\n");
- }
-
- /*
* Race to shutdown the filesystem if we see an error.
*/
if (XFS_TEST_ERROR((XFS_BUF_GETERROR(bp)), l->l_mp,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index ed575fb4b49..7e206fc1fa3 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -405,9 +405,15 @@ xlog_cil_push(
new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
new_ctx->ticket = xlog_cil_ticket_alloc(log);
- /* lock out transaction commit, but don't block on background push */
+ /*
+ * Lock out transaction commit, but don't block for background pushes
+ * unless we are well over the CIL space limit. See the definition of
+ * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
+ * used here.
+ */
if (!down_write_trylock(&cil->xc_ctx_lock)) {
- if (!push_seq)
+ if (!push_seq &&
+ cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
goto out_free_ticket;
down_write(&cil->xc_ctx_lock);
}
@@ -422,7 +428,7 @@ xlog_cil_push(
goto out_skip;
/* check for a previously pushed seqeunce */
- if (push_seq < cil->xc_ctx->sequence)
+ if (push_seq && push_seq < cil->xc_ctx->sequence)
goto out_skip;
/*
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index ced52b98b32..edcdfe01617 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -426,13 +426,13 @@ struct xfs_cil {
};
/*
- * The amount of log space we should the CIL to aggregate is difficult to size.
- * Whatever we chose we have to make we can get a reservation for the log space
- * effectively, that it is large enough to capture sufficient relogging to
- * reduce log buffer IO significantly, but it is not too large for the log or
- * induces too much latency when writing out through the iclogs. We track both
- * space consumed and the number of vectors in the checkpoint context, so we
- * need to decide which to use for limiting.
+ * The amount of log space we allow the CIL to aggregate is difficult to size.
+ * Whatever we choose, we have to make sure we can get a reservation for the
+ * log space effectively, that it is large enough to capture sufficient
+ * relogging to reduce log buffer IO significantly, but it is not too large for
+ * the log or induces too much latency when writing out through the iclogs. We
+ * track both space consumed and the number of vectors in the checkpoint
+ * context, so we need to decide which to use for limiting.
*
* Every log buffer we write out during a push needs a header reserved, which
* is at least one sector and more for v2 logs. Hence we need a reservation of
@@ -459,16 +459,21 @@ struct xfs_cil {
* checkpoint transaction ticket is specific to the checkpoint context, rather
* than the CIL itself.
*
- * With dynamic reservations, we can basically make up arbitrary limits for the
- * checkpoint size so long as they don't violate any other size rules. Hence
- * the initial maximum size for the checkpoint transaction will be set to a
- * quarter of the log or 8MB, which ever is smaller. 8MB is an arbitrary limit
- * right now based on the latency of writing out a large amount of data through
- * the circular iclog buffers.
+ * With dynamic reservations, we can effectively make up arbitrary limits for
+ * the checkpoint size so long as they don't violate any other size rules.
+ * Recovery imposes a rule that no transaction exceed half the log, so we are
+ * limited by that. Furthermore, the log transaction reservation subsystem
+ * tries to keep 25% of the log free, so we need to keep below that limit or we
+ * risk running out of free log space to start any new transactions.
+ *
+ * In order to keep background CIL push efficient, we will set a lower
+ * threshold at which background pushing is attempted without blocking current
+ * transaction commits. A separate, higher bound defines when CIL pushes are
+ * enforced to ensure we stay within our maximum checkpoint size bounds.
+ * threshold, yet give us plenty of space for aggregation on large logs.
*/
-
-#define XLOG_CIL_SPACE_LIMIT(log) \
- (min((log->l_logsize >> 2), (8 * 1024 * 1024)))
+#define XLOG_CIL_SPACE_LIMIT(log) (log->l_logsize >> 3)
+#define XLOG_CIL_HARD_SPACE_LIMIT(log) (3 * (log->l_logsize >> 4))
/*
* The reservation head lsn is not made up of a cycle number and block number.