aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c237
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h81
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c31
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c19
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c5
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c39
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c27
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c432
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h5
-rw-r--r--fs/xfs/linux-2.6/xfs_version.h29
17 files changed, 461 insertions, 512 deletions
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 286e36e21da..ba5312802aa 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -188,8 +188,8 @@ _xfs_buf_initialize(
atomic_set(&bp->b_hold, 1);
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_list);
- INIT_LIST_HEAD(&bp->b_hash_list);
- init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+ RB_CLEAR_NODE(&bp->b_rbnode);
+ sema_init(&bp->b_sema, 0); /* held, no waiters */
XB_SET_OWNER(bp);
bp->b_target = target;
bp->b_file_offset = range_base;
@@ -262,8 +262,6 @@ xfs_buf_free(
{
trace_xfs_buf_free(bp, _RET_IP_);
- ASSERT(list_empty(&bp->b_hash_list));
-
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
@@ -422,8 +420,10 @@ _xfs_buf_find(
{
xfs_off_t range_base;
size_t range_length;
- xfs_bufhash_t *hash;
- xfs_buf_t *bp, *n;
+ struct xfs_perag *pag;
+ struct rb_node **rbp;
+ struct rb_node *parent;
+ xfs_buf_t *bp;
range_base = (ioff << BBSHIFT);
range_length = (isize << BBSHIFT);
@@ -432,14 +432,37 @@ _xfs_buf_find(
ASSERT(!(range_length < (1 << btp->bt_sshift)));
ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
- hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
-
- spin_lock(&hash->bh_lock);
-
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
- ASSERT(btp == bp->b_target);
- if (bp->b_file_offset == range_base &&
- bp->b_buffer_length == range_length) {
+ /* get tree root */
+ pag = xfs_perag_get(btp->bt_mount,
+ xfs_daddr_to_agno(btp->bt_mount, ioff));
+
+ /* walk tree */
+ spin_lock(&pag->pag_buf_lock);
+ rbp = &pag->pag_buf_tree.rb_node;
+ parent = NULL;
+ bp = NULL;
+ while (*rbp) {
+ parent = *rbp;
+ bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+
+ if (range_base < bp->b_file_offset)
+ rbp = &(*rbp)->rb_left;
+ else if (range_base > bp->b_file_offset)
+ rbp = &(*rbp)->rb_right;
+ else {
+ /*
+ * found a block offset match. If the range doesn't
+ * match, the only way this is allowed is if the buffer
+ * in the cache is stale and the transaction that made
+ * it stale has not yet committed. i.e. we are
+ * reallocating a busy extent. Skip this buffer and
+ * continue searching to the right for an exact match.
+ */
+ if (bp->b_buffer_length != range_length) {
+ ASSERT(bp->b_flags & XBF_STALE);
+ rbp = &(*rbp)->rb_right;
+ continue;
+ }
atomic_inc(&bp->b_hold);
goto found;
}
@@ -449,17 +472,21 @@ _xfs_buf_find(
if (new_bp) {
_xfs_buf_initialize(new_bp, btp, range_base,
range_length, flags);
- new_bp->b_hash = hash;
- list_add(&new_bp->b_hash_list, &hash->bh_list);
+ rb_link_node(&new_bp->b_rbnode, parent, rbp);
+ rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+ /* the buffer keeps the perag reference until it is freed */
+ new_bp->b_pag = pag;
+ spin_unlock(&pag->pag_buf_lock);
} else {
XFS_STATS_INC(xb_miss_locked);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
}
-
- spin_unlock(&hash->bh_lock);
return new_bp;
found:
- spin_unlock(&hash->bh_lock);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
/* Attempt to get the semaphore without sleeping,
* if this does not work then we need to drop the
@@ -625,8 +652,7 @@ void
xfs_buf_readahead(
xfs_buftarg_t *target,
xfs_off_t ioff,
- size_t isize,
- xfs_buf_flags_t flags)
+ size_t isize)
{
struct backing_dev_info *bdi;
@@ -634,8 +660,42 @@ xfs_buf_readahead(
if (bdi_read_congested(bdi))
return;
- flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
- xfs_buf_read(target, ioff, isize, flags);
+ xfs_buf_read(target, ioff, isize,
+ XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+ struct xfs_mount *mp,
+ struct xfs_buftarg *target,
+ xfs_daddr_t daddr,
+ size_t length,
+ int flags)
+{
+ xfs_buf_t *bp;
+ int error;
+
+ bp = xfs_buf_get_uncached(target, length, flags);
+ if (!bp)
+ return NULL;
+
+ /* set up the buffer for a read IO */
+ xfs_buf_lock(bp);
+ XFS_BUF_SET_ADDR(bp, daddr);
+ XFS_BUF_READ(bp);
+ XFS_BUF_BUSY(bp);
+
+ xfsbdstrat(mp, bp);
+ error = xfs_buf_iowait(bp);
+ if (error || bp->b_error) {
+ xfs_buf_relse(bp);
+ return NULL;
+ }
+ return bp;
}
xfs_buf_t *
@@ -707,9 +767,10 @@ xfs_buf_associate_memory(
}
xfs_buf_t *
-xfs_buf_get_noaddr(
+xfs_buf_get_uncached(
+ struct xfs_buftarg *target,
size_t len,
- xfs_buftarg_t *target)
+ int flags)
{
unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
int error, i;
@@ -725,7 +786,7 @@ xfs_buf_get_noaddr(
goto fail_free_buf;
for (i = 0; i < page_count; i++) {
- bp->b_pages[i] = alloc_page(GFP_KERNEL);
+ bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
if (!bp->b_pages[i])
goto fail_free_mem;
}
@@ -740,7 +801,7 @@ xfs_buf_get_noaddr(
xfs_buf_unlock(bp);
- trace_xfs_buf_get_noaddr(bp, _RET_IP_);
+ trace_xfs_buf_get_uncached(bp, _RET_IP_);
return bp;
fail_free_mem:
@@ -774,29 +835,30 @@ void
xfs_buf_rele(
xfs_buf_t *bp)
{
- xfs_bufhash_t *hash = bp->b_hash;
+ struct xfs_perag *pag = bp->b_pag;
trace_xfs_buf_rele(bp, _RET_IP_);
- if (unlikely(!hash)) {
+ if (!pag) {
ASSERT(!bp->b_relse);
+ ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
xfs_buf_free(bp);
return;
}
+ ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
ASSERT(atomic_read(&bp->b_hold) > 0);
- if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
+ if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
- spin_unlock(&hash->bh_lock);
- (*(bp->b_relse)) (bp);
- } else if (bp->b_flags & XBF_FS_MANAGED) {
- spin_unlock(&hash->bh_lock);
+ spin_unlock(&pag->pag_buf_lock);
+ bp->b_relse(bp);
} else {
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
- list_del_init(&bp->b_hash_list);
- spin_unlock(&hash->bh_lock);
+ rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
xfs_buf_free(bp);
}
}
@@ -859,7 +921,7 @@ xfs_buf_lock(
trace_xfs_buf_lock(bp, _RET_IP_);
if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
- xfs_log_force(bp->b_mount, 0);
+ xfs_log_force(bp->b_target->bt_mount, 0);
if (atomic_read(&bp->b_io_remaining))
blk_run_address_space(bp->b_target->bt_mapping);
down(&bp->b_sema);
@@ -924,19 +986,7 @@ xfs_buf_iodone_work(
xfs_buf_t *bp =
container_of(work, xfs_buf_t, b_iodone_work);
- /*
- * We can get an EOPNOTSUPP to ordered writes. Here we clear the
- * ordered flag and reissue them. Because we can't tell the higher
- * layers directly that they should not issue ordered I/O anymore, they
- * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
- */
- if ((bp->b_error == EOPNOTSUPP) &&
- (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
- trace_xfs_buf_ordered_retry(bp, _RET_IP_);
- bp->b_flags &= ~XBF_ORDERED;
- bp->b_flags |= _XFS_BARRIER_FAILED;
- xfs_buf_iorequest(bp);
- } else if (bp->b_iodone)
+ if (bp->b_iodone)
(*(bp->b_iodone))(bp);
else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
@@ -982,7 +1032,6 @@ xfs_bwrite(
{
int error;
- bp->b_mount = mp;
bp->b_flags |= XBF_WRITE;
bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
@@ -1003,8 +1052,6 @@ xfs_bdwrite(
{
trace_xfs_buf_bdwrite(bp, _RET_IP_);
- bp->b_mount = mp;
-
bp->b_flags &= ~XBF_READ;
bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
@@ -1013,7 +1060,7 @@ xfs_bdwrite(
/*
* Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call biodone
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
* so that the proper iodone callbacks get called.
*/
STATIC int
@@ -1030,21 +1077,21 @@ xfs_bioerror(
XFS_BUF_ERROR(bp, EIO);
/*
- * We're calling biodone, so delete XBF_DONE flag.
+ * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
- xfs_biodone(bp);
+ xfs_buf_ioend(bp, 0);
return EIO;
}
/*
* Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the biodone call.
+ * here ourselves, and avoiding the xfs_buf_ioend call.
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
@@ -1093,7 +1140,7 @@ int
xfs_bdstrat_cb(
struct xfs_buf *bp)
{
- if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+ if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
trace_xfs_bdstrat_shut(bp, _RET_IP_);
/*
* Metadata write that didn't get logged but
@@ -1195,7 +1242,7 @@ _xfs_buf_ioapply(
if (bp->b_flags & XBF_ORDERED) {
ASSERT(!(bp->b_flags & XBF_READ));
- rw = WRITE_BARRIER;
+ rw = WRITE_FLUSH_FUA;
} else if (bp->b_flags & XBF_LOG_BUFFER) {
ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
bp->b_flags &= ~_XBF_RUN_QUEUES;
@@ -1399,62 +1446,24 @@ xfs_buf_iomove(
*/
void
xfs_wait_buftarg(
- xfs_buftarg_t *btp)
-{
- xfs_buf_t *bp, *n;
- xfs_bufhash_t *hash;
- uint i;
-
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
- hash = &btp->bt_hash[i];
-again:
- spin_lock(&hash->bh_lock);
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
- ASSERT(btp == bp->b_target);
- if (!(bp->b_flags & XBF_FS_MANAGED)) {
- spin_unlock(&hash->bh_lock);
- /*
- * Catch superblock reference count leaks
- * immediately
- */
- BUG_ON(bp->b_bn == 0);
- delay(100);
- goto again;
- }
- }
- spin_unlock(&hash->bh_lock);
- }
-}
-
-/*
- * Allocate buffer hash table for a given target.
- * For devices containing metadata (i.e. not the log/realtime devices)
- * we need to allocate a much larger hash table.
- */
-STATIC void
-xfs_alloc_bufhash(
- xfs_buftarg_t *btp,
- int external)
+ struct xfs_buftarg *btp)
{
- unsigned int i;
+ struct xfs_perag *pag;
+ uint i;
- btp->bt_hashshift = external ? 3 : 12; /* 8 or 4096 buckets */
- btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
- sizeof(xfs_bufhash_t));
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
- spin_lock_init(&btp->bt_hash[i].bh_lock);
- INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+ for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
+ pag = xfs_perag_get(btp->bt_mount, i);
+ spin_lock(&pag->pag_buf_lock);
+ while (rb_first(&pag->pag_buf_tree)) {
+ spin_unlock(&pag->pag_buf_lock);
+ delay(100);
+ spin_lock(&pag->pag_buf_lock);
+ }
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
}
}
-STATIC void
-xfs_free_bufhash(
- xfs_buftarg_t *btp)
-{
- kmem_free_large(btp->bt_hash);
- btp->bt_hash = NULL;
-}
-
/*
* buftarg list for delwrite queue processing
*/
@@ -1487,7 +1496,6 @@ xfs_free_buftarg(
xfs_flush_buftarg(btp, 1);
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_blkdev_issue_flush(btp);
- xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
/* Unregister the buftarg first so that we don't get a
@@ -1609,6 +1617,7 @@ out_error:
xfs_buftarg_t *
xfs_alloc_buftarg(
+ struct xfs_mount *mp,
struct block_device *bdev,
int external,
const char *fsname)
@@ -1617,6 +1626,7 @@ xfs_alloc_buftarg(
btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+ btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
if (xfs_setsize_buftarg_early(btp, bdev))
@@ -1625,7 +1635,6 @@ xfs_alloc_buftarg(
goto error;
if (xfs_alloc_delwrite_queue(btp, fsname))
goto error;
- xfs_alloc_bufhash(btp, external);
return btp;
error:
@@ -1916,7 +1925,7 @@ xfs_flush_buftarg(
bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
list_del_init(&bp->b_list);
- xfs_iowait(bp);
+ xfs_buf_iowait(bp);
xfs_buf_relse(bp);
}
}
@@ -1933,7 +1942,7 @@ xfs_buf_init(void)
goto out;
xfslogd_workqueue = alloc_workqueue("xfslogd",
- WQ_RESCUER | WQ_HIGHPRI, 1);
+ WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
if (!xfslogd_workqueue)
goto out_free_buf_zone;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 2a05614f0b9..383a3f37cf9 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -51,7 +51,6 @@ typedef enum {
#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
-#define XBF_FS_MANAGED (1 << 8) /* filesystem controls freeing memory */
#define XBF_ORDERED (1 << 11)/* use ordered writes */
#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
@@ -86,14 +85,6 @@ typedef enum {
*/
#define _XBF_PAGE_LOCKED (1 << 22)
-/*
- * If we try a barrier write, but it fails we have to communicate
- * this to the upper layers. Unfortunately b_error gets overwritten
- * when the buffer is re-issued so we have to add another flag to
- * keep this information.
- */
-#define _XFS_BARRIER_FAILED (1 << 23)
-
typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \
@@ -104,7 +95,6 @@ typedef unsigned int xfs_buf_flags_t;
{ XBF_DONE, "DONE" }, \
{ XBF_DELWRI, "DELWRI" }, \
{ XBF_STALE, "STALE" }, \
- { XBF_FS_MANAGED, "FS_MANAGED" }, \
{ XBF_ORDERED, "ORDERED" }, \
{ XBF_READ_AHEAD, "READ_AHEAD" }, \
{ XBF_LOCK, "LOCK" }, /* should never be set */\
@@ -114,8 +104,7 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \
- { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
+ { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }
typedef enum {
@@ -132,14 +121,11 @@ typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
struct address_space *bt_mapping;
+ struct xfs_mount *bt_mount;
unsigned int bt_bsize;
unsigned int bt_sshift;
size_t bt_smask;
- /* per device buffer hash table */
- uint bt_hashshift;
- xfs_bufhash_t *bt_hash;
-
/* per device delwri queue */
struct task_struct *bt_task;
struct list_head bt_list;
@@ -167,34 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
#define XB_PAGES 2
typedef struct xfs_buf {
+ /*
+ * first cacheline holds all the fields needed for an uncontended cache
+ * hit to be fully processed. The semaphore straddles the cacheline
+ * boundary, but the counter and lock sits on the first cacheline,
+ * which is the only bit that is touched if we hit the semaphore
+ * fast-path on locking.
+ */
+ struct rb_node b_rbnode; /* rbtree node */
+ xfs_off_t b_file_offset; /* offset in file */
+ size_t b_buffer_length;/* size of buffer in bytes */
+ atomic_t b_hold; /* reference count */
+ xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
- unsigned long b_queuetime; /* time buffer was queued */
- atomic_t b_pin_count; /* pin count */
+
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
- xfs_buf_flags_t b_flags; /* status flags */
- struct list_head b_hash_list; /* hash table list */
- xfs_bufhash_t *b_hash; /* hash table list start */
+ struct xfs_perag *b_pag; /* contains rbtree root */
xfs_buftarg_t *b_target; /* buffer target (device) */
- atomic_t b_hold; /* reference count */
xfs_daddr_t b_bn; /* block number for I/O */
- xfs_off_t b_file_offset; /* offset in file */
- size_t b_buffer_length;/* size of buffer in bytes */
size_t b_count_desired;/* desired transfer size */
void *b_addr; /* virtual address of buffer */
struct work_struct b_iodone_work;
- atomic_t b_io_remaining; /* #outstanding I/O requests */
xfs_buf_iodone_t b_iodone; /* I/O completion function */
xfs_buf_relse_t b_relse; /* releasing function */
struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
void *b_fspriv2;
- struct xfs_mount *b_mount;
- unsigned short b_error; /* error code on I/O */
- unsigned int b_page_count; /* size of page array */
- unsigned int b_offset; /* page offset in first page */
struct page **b_pages; /* array of page pointers */
struct page *b_page_array[XB_PAGES]; /* inline pages */
+ unsigned long b_queuetime; /* time buffer was queued */
+ atomic_t b_pin_count; /* pin count */
+ atomic_t b_io_remaining; /* #outstanding I/O requests */
+ unsigned int b_page_count; /* size of page array */
+ unsigned int b_offset; /* page offset in first page */
+ unsigned short b_error; /* error code on I/O */
#ifdef XFS_BUF_LOCK_TRACKING
int b_last_holder;
#endif
@@ -213,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
xfs_buf_flags_t);
extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+ struct xfs_buftarg *target,
+ xfs_daddr_t daddr, size_t length, int flags);
/* Releasing Buffers */
extern void xfs_buf_free(xfs_buf_t *);
@@ -242,6 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *);
extern int xfs_buf_iowait(xfs_buf_t *);
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
xfs_buf_rw_t);
+#define xfs_buf_zero(bp, off, len) \
+ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
static inline int xfs_buf_geterror(xfs_buf_t *bp)
{
@@ -276,8 +273,6 @@ extern void xfs_buf_terminate(void);
XFS_BUF_DONE(bp); \
} while (0)
-#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
-
#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
@@ -356,25 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
xfs_buf_rele(bp);
}
-#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
-
-#define xfs_biomove(bp, off, len, data, rw) \
- xfs_buf_iomove((bp), (off), (len), (data), \
- ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
-
-#define xfs_biozero(bp, off, len) \
- xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-#define xfs_iowait(bp) xfs_buf_iowait(bp)
-
-#define xfs_baread(target, rablkno, ralen) \
- xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
-
-
/*
* Handling of buftargs.
*/
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+ struct block_device *, int, const char *);
extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
deleted file mode 100644
index 55bddf3b609..00000000000
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_CRED_H__
-#define __XFS_CRED_H__
-
-#include <linux/capability.h>
-
-/*
- * Credentials
- */
-typedef const struct cred cred_t;
-
-#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index 1f279b012f9..ed88ed16811 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -32,10 +32,9 @@ xfs_tosspages(
xfs_off_t last,
int fiopt)
{
- struct address_space *mapping = VFS_I(ip)->i_mapping;
-
- if (mapping->nrpages)
- truncate_inode_pages(mapping, first);
+ /* can't toss partial tail pages, so mask them out */
+ last &= ~(PAGE_SIZE - 1);
+ truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
}
int
@@ -50,12 +49,11 @@ xfs_flushinval_pages(
trace_xfs_pagecache_inval(ip, first, last);
- if (mapping->nrpages) {
- xfs_iflags_clear(ip, XFS_ITRUNCATED);
- ret = filemap_write_and_wait(mapping);
- if (!ret)
- truncate_inode_pages(mapping, first);
- }
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ ret = filemap_write_and_wait_range(mapping, first,
+ last == -1 ? LLONG_MAX : last);
+ if (!ret)
+ truncate_inode_pages_range(mapping, first, last);
return -ret;
}
@@ -71,10 +69,9 @@ xfs_flush_pages(
int ret = 0;
int ret2;
- if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- xfs_iflags_clear(ip, XFS_ITRUNCATED);
- ret = -filemap_fdatawrite(mapping);
- }
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ ret = -filemap_fdatawrite_range(mapping, first,
+ last == -1 ? LLONG_MAX : last);
if (flags & XBF_ASYNC)
return ret;
ret2 = xfs_wait_on_pages(ip, first, last);
@@ -91,7 +88,9 @@ xfs_wait_on_pages(
{
struct address_space *mapping = VFS_I(ip)->i_mapping;
- if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
- return -filemap_fdatawait(mapping);
+ if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+ return -filemap_fdatawait_range(mapping, first,
+ last == -1 ? ip->i_size - 1 : last);
+ }
return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 2ae8b1ccb02..76e81cff70b 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -16,7 +16,6 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_cred.h"
#include "xfs_sysctl.h"
/*
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
deleted file mode 100644
index 69f71caf061..00000000000
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_GLOBALS_H__
-#define __XFS_GLOBALS_H__
-
-extern uint64_t xfs_panic_mask; /* set to cause more panics */
-
-#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 3b9e626f7cd..2ea238f6d38 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -790,7 +790,7 @@ xfs_ioc_fsgetxattr(
xfs_ilock(ip, XFS_ILOCK_SHARED);
fa.fsx_xflags = xfs_ip2xflags(ip);
fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
- fa.fsx_projid = ip->i_d.di_projid;
+ fa.fsx_projid = xfs_get_projid(ip);
if (attr) {
if (ip->i_afp) {
@@ -909,10 +909,10 @@ xfs_ioctl_setattr(
return XFS_ERROR(EIO);
/*
- * Disallow 32bit project ids because on-disk structure
- * is 16bit only.
+ * Disallow 32bit project ids when projid32bit feature is not enabled.
*/
- if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1))
+ if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+ !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
return XFS_ERROR(EINVAL);
/*
@@ -961,7 +961,7 @@ xfs_ioctl_setattr(
if (mask & FSX_PROJID) {
if (XFS_IS_QUOTA_RUNNING(mp) &&
XFS_IS_PQUOTA_ON(mp) &&
- ip->i_d.di_projid != fa->fsx_projid) {
+ xfs_get_projid(ip) != fa->fsx_projid) {
ASSERT(tp);
code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
capable(CAP_FOWNER) ?
@@ -1063,12 +1063,12 @@ xfs_ioctl_setattr(
* Change the ownerships and register quota modifications
* in the transaction.
*/
- if (ip->i_d.di_projid != fa->fsx_projid) {
+ if (xfs_get_projid(ip) != fa->fsx_projid) {
if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
olddquot = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
- ip->i_d.di_projid = fa->fsx_projid;
+ xfs_set_projid(ip, fa->fsx_projid);
/*
* We may have to rev the inode as well as
@@ -1088,8 +1088,8 @@ xfs_ioctl_setattr(
xfs_diflags_to_linux(ip);
}
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
XFS_STATS_INC(xs_ig_attrchg);
@@ -1301,7 +1301,8 @@ xfs_file_ioctl(
case XFS_IOC_ALLOCSP64:
case XFS_IOC_FREESP64:
case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64: {
+ case XFS_IOC_UNRESVSP64:
+ case XFS_IOC_ZERO_RANGE: {
xfs_flock64_t bf;
if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 6c83f7f62dc..b3486dfa552 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -164,7 +164,8 @@ xfs_ioctl32_bstat_copyin(
get_user(bstat->bs_extsize, &bstat32->bs_extsize) ||
get_user(bstat->bs_extents, &bstat32->bs_extents) ||
get_user(bstat->bs_gen, &bstat32->bs_gen) ||
- get_user(bstat->bs_projid, &bstat32->bs_projid) ||
+ get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+ get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
get_user(bstat->bs_aextents, &bstat32->bs_aextents))
@@ -218,6 +219,7 @@ xfs_bulkstat_one_fmt_compat(
put_user(buffer->bs_extents, &p32->bs_extents) ||
put_user(buffer->bs_gen, &p32->bs_gen) ||
put_user(buffer->bs_projid, &p32->bs_projid) ||
+ put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) ||
put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
put_user(buffer->bs_aextents, &p32->bs_aextents))
@@ -574,6 +576,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSGEOMETRY_V1:
case XFS_IOC_FSGROWFSDATA:
case XFS_IOC_FSGROWFSRT:
+ case XFS_IOC_ZERO_RANGE:
return xfs_file_ioctl(filp, cmd, p);
#else
case XFS_IOC_ALLOCSP_32:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 1024c4f8ba0..08b605792a9 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat {
__s32 bs_extsize; /* extent size */
__s32 bs_extents; /* number of extents */
__u32 bs_gen; /* generation count */
- __u16 bs_projid; /* project id */
- unsigned char bs_pad[14]; /* pad space, unused */
+ __u16 bs_projid_lo; /* lower part of project id */
+#define bs_projid bs_projid_lo /* (previously just bs_projid) */
+ __u16 bs_projid_hi; /* high part of project id */
+ unsigned char bs_pad[12]; /* pad space, unused */
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index b1fc2a6bfe8..ec858e09d54 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -95,41 +95,6 @@ xfs_mark_inode_dirty(
}
/*
- * Change the requested timestamp in the given inode.
- * We don't lock across timestamp updates, and we don't log them but
- * we do record the fact that there is dirty information in core.
- */
-void
-xfs_ichgtime(
- xfs_inode_t *ip,
- int flags)
-{
- struct inode *inode = VFS_I(ip);
- timespec_t tv;
- int sync_it = 0;
-
- tv = current_fs_time(inode->i_sb);
-
- if ((flags & XFS_ICHGTIME_MOD) &&
- !timespec_equal(&inode->i_mtime, &tv)) {
- inode->i_mtime = tv;
- sync_it = 1;
- }
- if ((flags & XFS_ICHGTIME_CHG) &&
- !timespec_equal(&inode->i_ctime, &tv)) {
- inode->i_ctime = tv;
- sync_it = 1;
- }
-
- /*
- * Update complete - now make sure everyone knows that the inode
- * is dirty.
- */
- if (sync_it)
- xfs_mark_inode_dirty_sync(ip);
-}
-
-/*
* Hook in SELinux. This is not quite correct yet, what we really need
* here (as we do for default ACLs) is a mechanism by which creation of
* these attrs can be journalled at inode creation time (along with the
@@ -224,7 +189,7 @@ xfs_vn_mknod(
}
xfs_dentry_to_name(&name, dentry);
- error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
+ error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
if (unlikely(error))
goto out_free_acl;
@@ -397,7 +362,7 @@ xfs_vn_symlink(
(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
xfs_dentry_to_name(&name, dentry);
- error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL);
+ error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
if (unlikely(error))
goto out;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index 2fa0bd9ebc7..214ddd71ff7 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -71,6 +71,7 @@
#include <linux/random.h>
#include <linux/ctype.h>
#include <linux/writeback.h>
+#include <linux/capability.h>
#include <asm/page.h>
#include <asm/div64.h>
@@ -79,14 +80,12 @@
#include <asm/byteorder.h>
#include <asm/unaligned.h>
-#include <xfs_cred.h>
#include <xfs_vnode.h>
#include <xfs_stats.h>
#include <xfs_sysctl.h>
#include <xfs_iops.h>
#include <xfs_aops.h>
#include <xfs_super.h>
-#include <xfs_globals.h>
#include <xfs_buf.h>
/*
@@ -144,7 +143,7 @@
#define SYNCHRONIZE() barrier()
#define __return_address __builtin_return_address(0)
-#define dfltprid 0
+#define XFS_PROJID_DEFAULT 0
#define MAXPATHLEN 1024
#define MIN(a,b) (min(a,b))
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index a4e07974955..ab31ce5aeaf 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -44,7 +44,6 @@
#include "xfs_buf_item.h"
#include "xfs_utils.h"
#include "xfs_vnodeops.h"
-#include "xfs_version.h"
#include "xfs_log_priv.h"
#include "xfs_trans_priv.h"
#include "xfs_filestream.h"
@@ -645,7 +644,7 @@ xfs_barrier_test(
XFS_BUF_ORDERED(sbp);
xfsbdstrat(mp, sbp);
- error = xfs_iowait(sbp);
+ error = xfs_buf_iowait(sbp);
/*
* Clear all the flags we set and possible error state in the
@@ -693,8 +692,7 @@ void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
- blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
- BLKDEV_IFL_WAIT);
+ blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
}
STATIC void
@@ -758,18 +756,20 @@ xfs_open_devices(
* Setup xfs_mount buffer target pointers
*/
error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
+ mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
+ mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+ mp->m_fsname);
if (!mp->m_rtdev_targp)
goto out_free_ddev_targ;
}
if (logdev && logdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
+ mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+ mp->m_fsname);
if (!mp->m_logdev_targp)
goto out_free_rtdev_targ;
} else {
@@ -972,12 +972,7 @@ xfs_fs_inode_init_once(
/*
* Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode. Care must be taken
- * here - the transaction code calls mark_inode_dirty_sync() to mark the
- * VFS inode dirty in a transaction and clears the i_update_core field;
- * it must clear the field after calling mark_inode_dirty_sync() to
- * correctly indicate that the dirty state has been propagated into the
- * inode log item.
+ * we catch unlogged VFS level updates to the inode.
*
* We need the barrier() to maintain correct ordering between unlogged
* updates and the transaction commit code that clears the i_update_core
@@ -1521,8 +1516,9 @@ xfs_fs_fill_super(
if (error)
goto out_free_fsname;
- if (xfs_icsb_init_counters(mp))
- mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+ error = xfs_icsb_init_counters(mp);
+ if (error)
+ goto out_close_devices;
error = xfs_readsb(mp, flags);
if (error)
@@ -1583,6 +1579,7 @@ xfs_fs_fill_super(
xfs_freesb(mp);
out_destroy_counters:
xfs_icsb_destroy_counters(mp);
+ out_close_devices:
xfs_close_devices(mp);
out_free_fsname:
xfs_free_fsname(mp);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 1ef4a4d2d99..50a3266c999 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -62,6 +62,7 @@ extern void xfs_qm_exit(void);
# define XFS_DBG_STRING "no debug"
#endif
+#define XFS_VERSION_STRING "SGI XFS"
#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
XFS_SECURITY_STRING \
XFS_REALTIME_STRING \
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index d59c4a65d49..37d33254981 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -39,42 +39,39 @@
#include <linux/kthread.h>
#include <linux/freezer.h>
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH 32
-STATIC xfs_inode_t *
-xfs_inode_ag_lookup(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
- uint32_t *first_index,
- int tag)
+STATIC int
+xfs_inode_ag_walk_grab(
+ struct xfs_inode *ip)
{
- int nr_found;
- struct xfs_inode *ip;
+ struct inode *inode = VFS_I(ip);
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- if (tag == XFS_ICI_NO_TAG) {
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void **)&ip, *first_index, 1);
- } else {
- nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
- (void **)&ip, *first_index, 1, tag);
+ /* nothing to sync during shutdown */
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return EFSCORRUPTED;
+
+ /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+ if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+ return ENOENT;
+
+ /* If we can't grab the inode, it must on it's way to reclaim. */
+ if (!igrab(inode))
+ return ENOENT;
+
+ if (is_bad_inode(inode)) {
+ IRELE(ip);
+ return ENOENT;
}
- if (!nr_found)
- return NULL;
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- return NULL;
- return ip;
+ /* inode is valid */
+ return 0;
}
STATIC int
@@ -83,49 +80,75 @@ xfs_inode_ag_walk(
struct xfs_perag *pag,
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
- int flags,
- int tag,
- int exclusive,
- int *nr_to_scan)
+ int flags)
{
uint32_t first_index;
int last_error = 0;
int skipped;
+ int done;
+ int nr_found;
restart:
+ done = 0;
skipped = 0;
first_index = 0;
+ nr_found = 0;
do {
+ struct xfs_inode *batch[XFS_LOOKUP_BATCH];
int error = 0;
- xfs_inode_t *ip;
+ int i;
- if (exclusive)
- write_lock(&pag->pag_ici_lock);
- else
- read_lock(&pag->pag_ici_lock);
- ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
- if (!ip) {
- if (exclusive)
- write_unlock(&pag->pag_ici_lock);
- else
- read_unlock(&pag->pag_ici_lock);
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+ (void **)batch, first_index,
+ XFS_LOOKUP_BATCH);
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
break;
}
- /* execute releases pag->pag_ici_lock */
- error = execute(ip, pag, flags);
- if (error == EAGAIN) {
- skipped++;
- continue;
+ /*
+ * Grab the inodes before we drop the lock. if we found
+ * nothing, nr == 0 and the loop will be skipped.
+ */
+ for (i = 0; i < nr_found; i++) {
+ struct xfs_inode *ip = batch[i];
+
+ if (done || xfs_inode_ag_walk_grab(ip))
+ batch[i] = NULL;
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ done = 1;
+ }
+
+ /* unlock now we've grabbed the inodes. */
+ read_unlock(&pag->pag_ici_lock);
+
+ for (i = 0; i < nr_found; i++) {
+ if (!batch[i])
+ continue;
+ error = execute(batch[i], pag, flags);
+ IRELE(batch[i]);
+ if (error == EAGAIN) {
+ skipped++;
+ continue;
+ }
+ if (error && last_error != EFSCORRUPTED)
+ last_error = error;
}
- if (error)
- last_error = error;
/* bail out if the filesystem is corrupted. */
if (error == EFSCORRUPTED)
break;
- } while ((*nr_to_scan)--);
+ } while (nr_found && !done);
if (skipped) {
delay(1);
@@ -134,110 +157,32 @@ restart:
return last_error;
}
-/*
- * Select the next per-ag structure to iterate during the walk. The reclaim
- * walk is optimised only to walk AGs with reclaimable inodes in them.
- */
-static struct xfs_perag *
-xfs_inode_ag_iter_next_pag(
- struct xfs_mount *mp,
- xfs_agnumber_t *first,
- int tag)
-{
- struct xfs_perag *pag = NULL;
-
- if (tag == XFS_ICI_RECLAIM_TAG) {
- int found;
- int ref;
-
- spin_lock(&mp->m_perag_lock);
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, *first, 1, tag);
- if (found <= 0) {
- spin_unlock(&mp->m_perag_lock);
- return NULL;
- }
- *first = pag->pag_agno + 1;
- /* open coded pag reference increment */
- ref = atomic_inc_return(&pag->pag_ref);
- spin_unlock(&mp->m_perag_lock);
- trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
- } else {
- pag = xfs_perag_get(mp, *first);
- (*first)++;
- }
- return pag;
-}
-
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
- int flags,
- int tag,
- int exclusive,
- int *nr_to_scan)
+ int flags)
{
struct xfs_perag *pag;
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
- int nr;
- nr = nr_to_scan ? *nr_to_scan : INT_MAX;
ag = 0;
- while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
- error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
- exclusive, &nr);
+ while ((pag = xfs_perag_get(mp, ag))) {
+ ag = pag->pag_agno + 1;
+ error = xfs_inode_ag_walk(mp, pag, execute, flags);
xfs_perag_put(pag);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
break;
}
- if (nr <= 0)
- break;
}
- if (nr_to_scan)
- *nr_to_scan = nr;
return XFS_ERROR(last_error);
}
-/* must be called with pag_ici_lock held and releases it */
-int
-xfs_sync_inode_valid(
- struct xfs_inode *ip,
- struct xfs_perag *pag)
-{
- struct inode *inode = VFS_I(ip);
- int error = EFSCORRUPTED;
-
- /* nothing to sync during shutdown */
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- goto out_unlock;
-
- /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
- error = ENOENT;
- if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
- goto out_unlock;
-
- /* If we can't grab the inode, it must on it's way to reclaim. */
- if (!igrab(inode))
- goto out_unlock;
-
- if (is_bad_inode(inode)) {
- IRELE(ip);
- goto out_unlock;
- }
-
- /* inode is valid */
- error = 0;
-out_unlock:
- read_unlock(&pag->pag_ici_lock);
- return error;
-}
-
STATIC int
xfs_sync_inode_data(
struct xfs_inode *ip,
@@ -248,10 +193,6 @@ xfs_sync_inode_data(
struct address_space *mapping = inode->i_mapping;
int error = 0;
- error = xfs_sync_inode_valid(ip, pag);
- if (error)
- return error;
-
if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
goto out_wait;
@@ -268,7 +209,6 @@ xfs_sync_inode_data(
out_wait:
if (flags & SYNC_WAIT)
xfs_ioend_wait(ip);
- IRELE(ip);
return error;
}
@@ -280,10 +220,6 @@ xfs_sync_inode_attr(
{
int error = 0;
- error = xfs_sync_inode_valid(ip, pag);
- if (error)
- return error;
-
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_inode_clean(ip))
goto out_unlock;
@@ -302,7 +238,6 @@ xfs_sync_inode_attr(
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- IRELE(ip);
return error;
}
@@ -318,8 +253,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
- error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
- XFS_ICI_NO_TAG, 0, NULL);
+ error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
if (error)
return XFS_ERROR(error);
@@ -337,8 +271,7 @@ xfs_sync_attr(
{
ASSERT((flags & ~SYNC_WAIT) == 0);
- return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
- XFS_ICI_NO_TAG, 0, NULL);
+ return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
}
STATIC int
@@ -668,14 +601,11 @@ xfs_inode_set_reclaim_tag(
xfs_perag_put(pag);
}
-void
-__xfs_inode_clear_reclaim_tag(
- xfs_mount_t *mp,
+STATIC void
+__xfs_inode_clear_reclaim(
xfs_perag_t *pag,
xfs_inode_t *ip)
{
- radix_tree_tag_clear(&pag->pag_ici_root,
- XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable--;
if (!pag->pag_ici_reclaimable) {
/* clear the reclaim tag from the perag radix tree */
@@ -689,6 +619,54 @@ __xfs_inode_clear_reclaim_tag(
}
}
+void
+__xfs_inode_clear_reclaim_tag(
+ xfs_mount_t *mp,
+ xfs_perag_t *pag,
+ xfs_inode_t *ip)
+{
+ radix_tree_tag_clear(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+ __xfs_inode_clear_reclaim(pag, ip);
+}
+
+/*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+ struct xfs_inode *ip,
+ int flags)
+{
+
+ /*
+ * do some unlocked checks first to avoid unnecceary lock traffic.
+ * The first is a flush lock check, the second is a already in reclaim
+ * check. Only do these checks if we are not going to block on locks.
+ */
+ if ((flags & SYNC_TRYLOCK) &&
+ (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+ return 1;
+ }
+
+ /*
+ * The radix tree lock here protects a thread in xfs_iget from racing
+ * with us starting reclaim on the inode. Once we have the
+ * XFS_IRECLAIM flag set it will not touch us.
+ */
+ spin_lock(&ip->i_flags_lock);
+ ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+ /* ignore as it is already under reclaim */
+ spin_unlock(&ip->i_flags_lock);
+ return 1;
+ }
+ __xfs_iflags_set(ip, XFS_IRECLAIM);
+ spin_unlock(&ip->i_flags_lock);
+ return 0;
+}
+
/*
* Inodes in different states need to be treated differently, and the return
* value of xfs_iflush is not sufficient to get this right. The following table
@@ -747,23 +725,6 @@ xfs_reclaim_inode(
{
int error = 0;
- /*
- * The radix tree lock here protects a thread in xfs_iget from racing
- * with us starting reclaim on the inode. Once we have the
- * XFS_IRECLAIM flag set it will not touch us.
- */
- spin_lock(&ip->i_flags_lock);
- ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
- if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
- /* ignore as it is already under reclaim */
- spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
- return 0;
- }
- __xfs_iflags_set(ip, XFS_IRECLAIM);
- spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
-
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (!xfs_iflock_nowait(ip)) {
if (!(sync_mode & SYNC_WAIT))
@@ -838,6 +799,7 @@ reclaim:
if (!radix_tree_delete(&pag->pag_ici_root,
XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
ASSERT(0);
+ __xfs_inode_clear_reclaim(pag, ip);
write_unlock(&pag->pag_ici_lock);
/*
@@ -859,13 +821,126 @@ reclaim:
}
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+ struct xfs_mount *mp,
+ int flags,
+ int *nr_to_scan)
+{
+ struct xfs_perag *pag;
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
+ int trylock = flags & SYNC_TRYLOCK;
+ int skipped;
+
+restart:
+ ag = 0;
+ skipped = 0;
+ while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+ unsigned long first_index = 0;
+ int done = 0;
+ int nr_found = 0;
+
+ ag = pag->pag_agno + 1;
+
+ if (trylock) {
+ if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+ skipped++;
+ continue;
+ }
+ first_index = pag->pag_ici_reclaim_cursor;
+ } else
+ mutex_lock(&pag->pag_ici_reclaim_lock);
+
+ do {
+ struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+ int i;
+
+ write_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup_tag(
+ &pag->pag_ici_root,
+ (void **)batch, first_index,
+ XFS_LOOKUP_BATCH,
+ XFS_ICI_RECLAIM_TAG);
+ if (!nr_found) {
+ write_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /*
+ * Grab the inodes before we drop the lock. if we found
+ * nothing, nr == 0 and the loop will be skipped.
+ */
+ for (i = 0; i < nr_found; i++) {
+ struct xfs_inode *ip = batch[i];
+
+ if (done || xfs_reclaim_inode_grab(ip, flags))
+ batch[i] = NULL;
+
+ /*
+ * Update the index for the next lookup. Catch
+ * overflows into the next AG range which can
+ * occur if we have inodes in the last block of
+ * the AG and we are currently pointing to the
+ * last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ done = 1;
+ }
+
+ /* unlock now we've grabbed the inodes. */
+ write_unlock(&pag->pag_ici_lock);
+
+ for (i = 0; i < nr_found; i++) {
+ if (!batch[i])
+ continue;
+ error = xfs_reclaim_inode(batch[i], pag, flags);
+ if (error && last_error != EFSCORRUPTED)
+ last_error = error;
+ }
+
+ *nr_to_scan -= XFS_LOOKUP_BATCH;
+
+ } while (nr_found && !done && *nr_to_scan > 0);
+
+ if (trylock && !done)
+ pag->pag_ici_reclaim_cursor = first_index;
+ else
+ pag->pag_ici_reclaim_cursor = 0;
+ mutex_unlock(&pag->pag_ici_reclaim_lock);
+ xfs_perag_put(pag);
+ }
+
+ /*
+ * if we skipped any AG, and we still have scan count remaining, do
+ * another pass this time using blocking reclaim semantics (i.e
+ * waiting on the reclaim locks and ignoring the reclaim cursors). This
+ * ensure that when we get more reclaimers than AGs we block rather
+ * than spin trying to execute reclaim.
+ */
+ if (trylock && skipped && *nr_to_scan > 0) {
+ trylock = 0;
+ goto restart;
+ }
+ return XFS_ERROR(last_error);
+}
+
int
xfs_reclaim_inodes(
xfs_mount_t *mp,
int mode)
{
- return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
- XFS_ICI_RECLAIM_TAG, 1, NULL);
+ int nr_to_scan = INT_MAX;
+
+ return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
}
/*
@@ -887,17 +962,16 @@ xfs_reclaim_inode_shrink(
if (!(gfp_mask & __GFP_FS))
return -1;
- xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
- XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
- /* if we don't exhaust the scan, don't bother coming back */
+ xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+ /* terminate if we don't exhaust the scan */
if (nr_to_scan > 0)
return -1;
}
reclaimable = 0;
ag = 0;
- while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
- XFS_ICI_RECLAIM_TAG))) {
+ while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+ ag = pag->pag_agno + 1;
reclaimable += pag->pag_ici_reclaimable;
xfs_perag_put(pag);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index fe78726196f..32ba6628290 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -47,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
-int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags, int tag, int write_lock, int *nr_to_scan);
+ int flags);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index be5dffd282a..acef2e98c59 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \
unsigned long caller_ip), \
TP_ARGS(mp, agno, refcount, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
@@ -325,13 +325,12 @@ DEFINE_BUF_EVENT(xfs_buf_lock);
DEFINE_BUF_EVENT(xfs_buf_lock_done);
DEFINE_BUF_EVENT(xfs_buf_cond_lock);
DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
DEFINE_BUF_EVENT(xfs_buf_iowait);
DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_bdstrat_shut);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone);
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
deleted file mode 100644
index f8d279d7563..00000000000
--- a/fs/xfs/linux-2.6/xfs_version.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_VERSION_H__
-#define __XFS_VERSION_H__
-
-/*
- * Dummy file that can contain a timestamp to put into the
- * XFS init string, to help users keep track of what they're
- * running
- */
-
-#define XFS_VERSION_STRING "SGI XFS"
-
-#endif /* __XFS_VERSION_H__ */