aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/linux-2.6
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/linux-2.6')
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.c649
-rw-r--r--fs/xfs/linux-2.6/xfs_aops.h4
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c303
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h179
-rw-r--r--fs/xfs/linux-2.6/xfs_cred.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_dmapi_priv.h28
-rw-r--r--fs/xfs/linux-2.6/xfs_export.c8
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c104
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.c35
-rw-r--r--fs/xfs/linux-2.6/xfs_fs_subr.h25
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.c1
-rw-r--r--fs/xfs/linux-2.6/xfs_globals.h23
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl.c49
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_ioctl32.h6
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c77
-rw-r--r--fs/xfs/linux-2.6/xfs_linux.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_quotaops.c11
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c227
-rw-r--r--fs/xfs/linux-2.6/xfs_super.h8
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c521
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h7
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.c4
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h133
-rw-r--r--fs/xfs/linux-2.6/xfs_version.h29
26 files changed, 1014 insertions, 1466 deletions
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 9f769b5b38f..b2771862fd3 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -225,7 +225,7 @@ xfs_check_acl(struct inode *inode, int mask)
struct posix_acl *acl;
int error = -EAGAIN;
- xfs_itrace_entry(ip);
+ trace_xfs_check_acl(ip);
/*
* If there is no attribute fork no ACL exists on this inode and
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index 34640d6dbdc..c9af48fffcd 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -21,19 +21,12 @@
#include "xfs_inum.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_trans.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_alloc.h"
-#include "xfs_btree.h"
#include "xfs_error.h"
#include "xfs_rw.h"
#include "xfs_iomap.h"
@@ -92,18 +85,15 @@ void
xfs_count_page_state(
struct page *page,
int *delalloc,
- int *unmapped,
int *unwritten)
{
struct buffer_head *bh, *head;
- *delalloc = *unmapped = *unwritten = 0;
+ *delalloc = *unwritten = 0;
bh = head = page_buffers(page);
do {
- if (buffer_uptodate(bh) && !buffer_mapped(bh))
- (*unmapped) = 1;
- else if (buffer_unwritten(bh))
+ if (buffer_unwritten(bh))
(*unwritten) = 1;
else if (buffer_delay(bh))
(*delalloc) = 1;
@@ -212,23 +202,17 @@ xfs_setfilesize(
}
/*
- * Schedule IO completion handling on a xfsdatad if this was
- * the final hold on this ioend. If we are asked to wait,
- * flush the workqueue.
+ * Schedule IO completion handling on the final put of an ioend.
*/
STATIC void
xfs_finish_ioend(
- xfs_ioend_t *ioend,
- int wait)
+ struct xfs_ioend *ioend)
{
if (atomic_dec_and_test(&ioend->io_remaining)) {
- struct workqueue_struct *wq;
-
- wq = (ioend->io_type == IO_UNWRITTEN) ?
- xfsconvertd_workqueue : xfsdatad_workqueue;
- queue_work(wq, &ioend->io_work);
- if (wait)
- flush_workqueue(wq);
+ if (ioend->io_type == IO_UNWRITTEN)
+ queue_work(xfsconvertd_workqueue, &ioend->io_work);
+ else
+ queue_work(xfsdatad_workqueue, &ioend->io_work);
}
}
@@ -272,11 +256,25 @@ xfs_end_io(
*/
if (error == EAGAIN) {
atomic_inc(&ioend->io_remaining);
- xfs_finish_ioend(ioend, 0);
+ xfs_finish_ioend(ioend);
/* ensure we don't spin on blocked ioends */
delay(1);
- } else
+ } else {
+ if (ioend->io_iocb)
+ aio_complete(ioend->io_iocb, ioend->io_result, 0);
xfs_destroy_ioend(ioend);
+ }
+}
+
+/*
+ * Call IO completion handling in caller context on the final put of an ioend.
+ */
+STATIC void
+xfs_finish_ioend_sync(
+ struct xfs_ioend *ioend)
+{
+ if (atomic_dec_and_test(&ioend->io_remaining))
+ xfs_end_io(&ioend->io_work);
}
/*
@@ -309,6 +307,8 @@ xfs_alloc_ioend(
atomic_inc(&XFS_I(ioend->io_inode)->i_iocount);
ioend->io_offset = 0;
ioend->io_size = 0;
+ ioend->io_iocb = NULL;
+ ioend->io_result = 0;
INIT_WORK(&ioend->io_work, xfs_end_io);
return ioend;
@@ -358,7 +358,7 @@ xfs_end_bio(
bio->bi_end_io = NULL;
bio_put(bio);
- xfs_finish_ioend(ioend, 0);
+ xfs_finish_ioend(ioend);
}
STATIC void
@@ -500,7 +500,7 @@ xfs_submit_ioend(
}
if (bio)
xfs_submit_ioend_bio(wbc, ioend, bio);
- xfs_finish_ioend(ioend, 0);
+ xfs_finish_ioend(ioend);
} while ((ioend = next) != NULL);
}
@@ -614,31 +614,30 @@ xfs_map_at_offset(
STATIC unsigned int
xfs_probe_page(
struct page *page,
- unsigned int pg_offset,
- int mapped)
+ unsigned int pg_offset)
{
+ struct buffer_head *bh, *head;
int ret = 0;
if (PageWriteback(page))
return 0;
+ if (!PageDirty(page))
+ return 0;
+ if (!page->mapping)
+ return 0;
+ if (!page_has_buffers(page))
+ return 0;
- if (page->mapping && PageDirty(page)) {
- if (page_has_buffers(page)) {
- struct buffer_head *bh, *head;
-
- bh = head = page_buffers(page);
- do {
- if (!buffer_uptodate(bh))
- break;
- if (mapped != buffer_mapped(bh))
- break;
- ret += bh->b_size;
- if (ret >= pg_offset)
- break;
- } while ((bh = bh->b_this_page) != head);
- } else
- ret = mapped ? 0 : PAGE_CACHE_SIZE;
- }
+ bh = head = page_buffers(page);
+ do {
+ if (!buffer_uptodate(bh))
+ break;
+ if (!buffer_mapped(bh))
+ break;
+ ret += bh->b_size;
+ if (ret >= pg_offset)
+ break;
+ } while ((bh = bh->b_this_page) != head);
return ret;
}
@@ -648,8 +647,7 @@ xfs_probe_cluster(
struct inode *inode,
struct page *startpage,
struct buffer_head *bh,
- struct buffer_head *head,
- int mapped)
+ struct buffer_head *head)
{
struct pagevec pvec;
pgoff_t tindex, tlast, tloff;
@@ -658,7 +656,7 @@ xfs_probe_cluster(
/* First sum forwards in this page */
do {
- if (!buffer_uptodate(bh) || (mapped != buffer_mapped(bh)))
+ if (!buffer_uptodate(bh) || !buffer_mapped(bh))
return total;
total += bh->b_size;
} while ((bh = bh->b_this_page) != head);
@@ -692,7 +690,7 @@ xfs_probe_cluster(
pg_offset = PAGE_CACHE_SIZE;
if (page->index == tindex && trylock_page(page)) {
- pg_len = xfs_probe_page(page, pg_offset, mapped);
+ pg_len = xfs_probe_page(page, pg_offset);
unlock_page(page);
}
@@ -761,7 +759,6 @@ xfs_convert_page(
struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
struct writeback_control *wbc,
- int startio,
int all_bh)
{
struct buffer_head *bh, *head;
@@ -832,19 +829,14 @@ xfs_convert_page(
ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
xfs_map_at_offset(inode, bh, imap, offset);
- if (startio) {
- xfs_add_to_ioend(inode, bh, offset,
- type, ioendp, done);
- } else {
- set_buffer_dirty(bh);
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- }
+ xfs_add_to_ioend(inode, bh, offset, type,
+ ioendp, done);
+
page_dirty--;
count++;
} else {
type = IO_NEW;
- if (buffer_mapped(bh) && all_bh && startio) {
+ if (buffer_mapped(bh) && all_bh) {
lock_buffer(bh);
xfs_add_to_ioend(inode, bh, offset,
type, ioendp, done);
@@ -859,14 +851,12 @@ xfs_convert_page(
if (uptodate && bh == head)
SetPageUptodate(page);
- if (startio) {
- if (count) {
- wbc->nr_to_write--;
- if (wbc->nr_to_write <= 0)
- done = 1;
- }
- xfs_start_page_writeback(page, !page_dirty, count);
+ if (count) {
+ if (--wbc->nr_to_write <= 0 &&
+ wbc->sync_mode == WB_SYNC_NONE)
+ done = 1;
}
+ xfs_start_page_writeback(page, !page_dirty, count);
return done;
fail_unlock_page:
@@ -886,7 +876,6 @@ xfs_cluster_write(
struct xfs_bmbt_irec *imap,
xfs_ioend_t **ioendp,
struct writeback_control *wbc,
- int startio,
int all_bh,
pgoff_t tlast)
{
@@ -902,7 +891,7 @@ xfs_cluster_write(
for (i = 0; i < pagevec_count(&pvec); i++) {
done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- imap, ioendp, wbc, startio, all_bh);
+ imap, ioendp, wbc, all_bh);
if (done)
break;
}
@@ -981,7 +970,7 @@ xfs_aops_discard_page(
*/
error = xfs_bmapi(NULL, ip, offset_fsb, 1,
XFS_BMAPI_ENTIRE, NULL, 0, &imap,
- &nimaps, NULL, NULL);
+ &nimaps, NULL);
if (error) {
/* something screwed, just bail */
@@ -1009,7 +998,7 @@ xfs_aops_discard_page(
*/
xfs_bmap_init(&flist, &firstblock);
error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
- &flist, NULL, &done);
+ &flist, &done);
ASSERT(!flist.xbf_count && !flist.xbf_first);
if (error) {
@@ -1032,50 +1021,66 @@ out_invalidate:
}
/*
- * Calling this without startio set means we are being asked to make a dirty
- * page ready for freeing it's buffers. When called with startio set then
- * we are coming from writepage.
+ * Write out a dirty page.
+ *
+ * For delalloc space on the page we need to allocate space and flush it.
+ * For unwritten space on the page we need to start the conversion to
+ * regular allocated space.
+ * For any other dirty buffer heads on the page we should flush them.
*
- * When called with startio set it is important that we write the WHOLE
- * page if possible.
- * The bh->b_state's cannot know if any of the blocks or which block for
- * that matter are dirty due to mmap writes, and therefore bh uptodate is
- * only valid if the page itself isn't completely uptodate. Some layers
- * may clear the page dirty flag prior to calling write page, under the
- * assumption the entire page will be written out; by not writing out the
- * whole page the page can be reused before all valid dirty data is
- * written out. Note: in the case of a page that has been dirty'd by
- * mapwrite and but partially setup by block_prepare_write the
- * bh->b_states's will not agree and only ones setup by BPW/BCW will have
- * valid state, thus the whole page must be written out thing.
+ * If we detect that a transaction would be required to flush the page, we
+ * have to check the process flags first, if we are already in a transaction
+ * or disk I/O during allocations is off, we need to fail the writepage and
+ * redirty the page.
*/
-
STATIC int
-xfs_page_state_convert(
- struct inode *inode,
- struct page *page,
- struct writeback_control *wbc,
- int startio,
- int unmapped) /* also implies page uptodate */
+xfs_vm_writepage(
+ struct page *page,
+ struct writeback_control *wbc)
{
+ struct inode *inode = page->mapping->host;
+ int delalloc, unwritten;
struct buffer_head *bh, *head;
struct xfs_bmbt_irec imap;
xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset;
- unsigned long p_offset = 0;
unsigned int type;
__uint64_t end_offset;
pgoff_t end_index, last_index;
ssize_t size, len;
int flags, err, imap_valid = 0, uptodate = 1;
- int page_dirty, count = 0;
- int trylock = 0;
- int all_bh = unmapped;
+ int count = 0;
+ int all_bh = 0;
- if (startio) {
- if (wbc->sync_mode == WB_SYNC_NONE && wbc->nonblocking)
- trylock |= BMAPI_TRYLOCK;
- }
+ trace_xfs_writepage(inode, page, 0);
+
+ ASSERT(page_has_buffers(page));
+
+ /*
+ * Refuse to write the page out if we are called from reclaim context.
+ *
+ * This avoids stack overflows when called from deeply used stacks in
+ * random callers for direct reclaim or memcg reclaim. We explicitly
+ * allow reclaim from kswapd as the stack usage there is relatively low.
+ *
+ * This should really be done by the core VM, but until that happens
+ * filesystems like XFS, btrfs and ext4 have to take care of this
+ * by themselves.
+ */
+ if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
+ goto redirty;
+
+ /*
+ * We need a transaction if there are delalloc or unwritten buffers
+ * on the page.
+ *
+ * If we need a transaction and the process flags say we are already
+ * in a transaction, or no IO is allowed then mark the page dirty
+ * again and leave the page as is.
+ */
+ xfs_count_page_state(page, &delalloc, &unwritten);
+ if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
+ goto redirty;
/* Is this page beyond the end of the file? */
offset = i_size_read(inode);
@@ -1084,50 +1089,33 @@ xfs_page_state_convert(
if (page->index >= end_index) {
if ((page->index >= end_index + 1) ||
!(i_size_read(inode) & (PAGE_CACHE_SIZE - 1))) {
- if (startio)
- unlock_page(page);
+ unlock_page(page);
return 0;
}
}
- /*
- * page_dirty is initially a count of buffers on the page before
- * EOF and is decremented as we move each into a cleanable state.
- *
- * Derivation:
- *
- * End offset is the highest offset that this page should represent.
- * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
- * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
- * hence give us the correct page_dirty count. On any other page,
- * it will be zero and in that case we need page_dirty to be the
- * count of buffers on the page.
- */
end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, offset);
+ (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
+ offset);
len = 1 << inode->i_blkbits;
- p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
- PAGE_CACHE_SIZE);
- p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
- page_dirty = p_offset / len;
bh = head = page_buffers(page);
offset = page_offset(page);
flags = BMAPI_READ;
type = IO_NEW;
- /* TODO: cleanup count and page_dirty */
-
do {
if (offset >= end_offset)
break;
if (!buffer_uptodate(bh))
uptodate = 0;
- if (!(PageUptodate(page) || buffer_uptodate(bh)) && !startio) {
- /*
- * the iomap is actually still valid, but the ioend
- * isn't. shouldn't happen too often.
- */
+
+ /*
+ * A hole may still be marked uptodate because discard_buffer
+ * leaves the flag set.
+ */
+ if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+ ASSERT(!buffer_dirty(bh));
imap_valid = 0;
continue;
}
@@ -1135,19 +1123,7 @@ xfs_page_state_convert(
if (imap_valid)
imap_valid = xfs_imap_valid(inode, &imap, offset);
- /*
- * First case, map an unwritten extent and prepare for
- * extent state conversion transaction on completion.
- *
- * Second case, allocate space for a delalloc buffer.
- * We can return EAGAIN here in the release page case.
- *
- * Third case, an unmapped buffer was found, and we are
- * in a path where we need to write the whole page out.
- */
- if (buffer_unwritten(bh) || buffer_delay(bh) ||
- ((buffer_uptodate(bh) || PageUptodate(page)) &&
- !buffer_mapped(bh) && (unmapped || startio))) {
+ if (buffer_unwritten(bh) || buffer_delay(bh)) {
int new_ioend = 0;
/*
@@ -1161,15 +1137,15 @@ xfs_page_state_convert(
flags = BMAPI_WRITE | BMAPI_IGNSTATE;
} else if (buffer_delay(bh)) {
type = IO_DELAY;
- flags = BMAPI_ALLOCATE | trylock;
- } else {
- type = IO_NEW;
- flags = BMAPI_WRITE | BMAPI_MMAP;
+ flags = BMAPI_ALLOCATE;
+
+ if (wbc->sync_mode == WB_SYNC_NONE)
+ flags |= BMAPI_TRYLOCK;
}
if (!imap_valid) {
/*
- * if we didn't have a valid mapping then we
+ * If we didn't have a valid mapping then we
* need to ensure that we put the new mapping
* in a new ioend structure. This needs to be
* done to ensure that the ioends correctly
@@ -1177,14 +1153,7 @@ xfs_page_state_convert(
* for unwritten extent conversion.
*/
new_ioend = 1;
- if (type == IO_NEW) {
- size = xfs_probe_cluster(inode,
- page, bh, head, 0);
- } else {
- size = len;
- }
-
- err = xfs_map_blocks(inode, offset, size,
+ err = xfs_map_blocks(inode, offset, len,
&imap, flags);
if (err)
goto error;
@@ -1193,19 +1162,11 @@ xfs_page_state_convert(
}
if (imap_valid) {
xfs_map_at_offset(inode, bh, &imap, offset);
- if (startio) {
- xfs_add_to_ioend(inode, bh, offset,
- type, &ioend,
- new_ioend);
- } else {
- set_buffer_dirty(bh);
- unlock_buffer(bh);
- mark_buffer_dirty(bh);
- }
- page_dirty--;
+ xfs_add_to_ioend(inode, bh, offset, type,
+ &ioend, new_ioend);
count++;
}
- } else if (buffer_uptodate(bh) && startio) {
+ } else if (buffer_uptodate(bh)) {
/*
* we got here because the buffer is already mapped.
* That means it must already have extents allocated
@@ -1213,8 +1174,7 @@ xfs_page_state_convert(
*/
if (!imap_valid || flags != BMAPI_READ) {
flags = BMAPI_READ;
- size = xfs_probe_cluster(inode, page, bh,
- head, 1);
+ size = xfs_probe_cluster(inode, page, bh, head);
err = xfs_map_blocks(inode, offset, size,
&imap, flags);
if (err)
@@ -1233,18 +1193,16 @@ xfs_page_state_convert(
*/
type = IO_NEW;
if (trylock_buffer(bh)) {
- ASSERT(buffer_mapped(bh));
if (imap_valid)
all_bh = 1;
xfs_add_to_ioend(inode, bh, offset, type,
&ioend, !imap_valid);
- page_dirty--;
count++;
} else {
imap_valid = 0;
}
- } else if ((buffer_uptodate(bh) || PageUptodate(page)) &&
- (unmapped || startio)) {
+ } else if (PageUptodate(page)) {
+ ASSERT(buffer_mapped(bh));
imap_valid = 0;
}
@@ -1256,8 +1214,7 @@ xfs_page_state_convert(
if (uptodate && bh == head)
SetPageUptodate(page);
- if (startio)
- xfs_start_page_writeback(page, 1, count);
+ xfs_start_page_writeback(page, 1, count);
if (ioend && imap_valid) {
xfs_off_t end_index;
@@ -1275,131 +1232,30 @@ xfs_page_state_convert(
end_index = last_index;
xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
- wbc, startio, all_bh, end_index);
+ wbc, all_bh, end_index);
}
if (iohead)
xfs_submit_ioend(wbc, iohead);
- return page_dirty;
+ return 0;
error:
if (iohead)
xfs_cancel_ioend(iohead);
- /*
- * If it's delalloc and we have nowhere to put it,
- * throw it away, unless the lower layers told
- * us to try again.
- */
- if (err != -EAGAIN) {
- if (!unmapped)
- xfs_aops_discard_page(page);
- ClearPageUptodate(page);
- }
- return err;
-}
+ if (err == -EAGAIN)
+ goto redirty;
-/*
- * writepage: Called from one of two places:
- *
- * 1. we are flushing a delalloc buffer head.
- *
- * 2. we are writing out a dirty page. Typically the page dirty
- * state is cleared before we get here. In this case is it
- * conceivable we have no buffer heads.
- *
- * For delalloc space on the page we need to allocate space and
- * flush it. For unmapped buffer heads on the page we should
- * allocate space if the page is uptodate. For any other dirty
- * buffer heads on the page we should flush them.
- *
- * If we detect that a transaction would be required to flush
- * the page, we have to check the process flags first, if we
- * are already in a transaction or disk I/O during allocations
- * is off, we need to fail the writepage and redirty the page.
- */
-
-STATIC int
-xfs_vm_writepage(
- struct page *page,
- struct writeback_control *wbc)
-{
- int error;
- int need_trans;
- int delalloc, unmapped, unwritten;
- struct inode *inode = page->mapping->host;
-
- trace_xfs_writepage(inode, page, 0);
-
- /*
- * Refuse to write the page out if we are called from reclaim context.
- *
- * This is primarily to avoid stack overflows when called from deep
- * used stacks in random callers for direct reclaim, but disabling
- * reclaim for kswap is a nice side-effect as kswapd causes rather
- * suboptimal I/O patters, too.
- *
- * This should really be done by the core VM, but until that happens
- * filesystems like XFS, btrfs and ext4 have to take care of this
- * by themselves.
- */
- if (current->flags & PF_MEMALLOC)
- goto out_fail;
-
- /*
- * We need a transaction if:
- * 1. There are delalloc buffers on the page
- * 2. The page is uptodate and we have unmapped buffers
- * 3. The page is uptodate and we have no buffers
- * 4. There are unwritten buffers on the page
- */
-
- if (!page_has_buffers(page)) {
- unmapped = 1;
- need_trans = 1;
- } else {
- xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
- if (!PageUptodate(page))
- unmapped = 0;
- need_trans = delalloc + unmapped + unwritten;
- }
-
- /*
- * If we need a transaction and the process flags say
- * we are already in a transaction, or no IO is allowed
- * then mark the page dirty again and leave the page
- * as is.
- */
- if (current_test_flags(PF_FSTRANS) && need_trans)
- goto out_fail;
-
- /*
- * Delay hooking up buffer heads until we have
- * made our go/no-go decision.
- */
- if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
-
- /*
- * Convert delayed allocate, unwritten or unmapped space
- * to real space and flush out to disk.
- */
- error = xfs_page_state_convert(inode, page, wbc, 1, unmapped);
- if (error == -EAGAIN)
- goto out_fail;
- if (unlikely(error < 0))
- goto out_unlock;
-
- return 0;
+ xfs_aops_discard_page(page);
+ ClearPageUptodate(page);
+ unlock_page(page);
+ return err;
-out_fail:
+redirty:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
-out_unlock:
- unlock_page(page);
- return error;
}
STATIC int
@@ -1413,65 +1269,27 @@ xfs_vm_writepages(
/*
* Called to move a page into cleanable state - and from there
- * to be released. Possibly the page is already clean. We always
+ * to be released. The page should already be clean. We always
* have buffer heads in this call.
*
- * Returns 0 if the page is ok to release, 1 otherwise.
- *
- * Possible scenarios are:
- *
- * 1. We are being called to release a page which has been written
- * to via regular I/O. buffer heads will be dirty and possibly
- * delalloc. If no delalloc buffer heads in this case then we
- * can just return zero.
- *
- * 2. We are called to release a page which has been written via
- * mmap, all we need to do is ensure there is no delalloc
- * state in the buffer heads, if not we can let the caller
- * free them and we should come back later via writepage.
+ * Returns 1 if the page is ok to release, 0 otherwise.
*/
STATIC int
xfs_vm_releasepage(
struct page *page,
gfp_t gfp_mask)
{
- struct inode *inode = page->mapping->host;
- int dirty, delalloc, unmapped, unwritten;
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = 1,
- };
+ int delalloc, unwritten;
- trace_xfs_releasepage(inode, page, 0);
+ trace_xfs_releasepage(page->mapping->host, page, 0);
- if (!page_has_buffers(page))
- return 0;
-
- xfs_count_page_state(page, &delalloc, &unmapped, &unwritten);
- if (!delalloc && !unwritten)
- goto free_buffers;
+ xfs_count_page_state(page, &delalloc, &unwritten);
- if (!(gfp_mask & __GFP_FS))
+ if (WARN_ON(delalloc))
return 0;
-
- /* If we are already inside a transaction or the thread cannot
- * do I/O, we cannot release this page.
- */
- if (current_test_flags(PF_FSTRANS))
+ if (WARN_ON(unwritten))
return 0;
- /*
- * Convert delalloc space to real space, do not flush the
- * data out to disk, that will be done by the caller.
- * Never need to allocate space here - we will always
- * come back to writepage in that case.
- */
- dirty = xfs_page_state_convert(inode, page, &wbc, 0, 0);
- if (dirty == 0 && !unwritten)
- goto free_buffers;
- return 0;
-
-free_buffers:
return try_to_free_buffers(page);
}
@@ -1481,9 +1299,9 @@ __xfs_get_blocks(
sector_t iblock,
struct buffer_head *bh_result,
int create,
- int direct,
- bmapi_flags_t flags)
+ int direct)
{
+ int flags = create ? BMAPI_WRITE : BMAPI_READ;
struct xfs_bmbt_irec imap;
xfs_off_t offset;
ssize_t size;
@@ -1498,8 +1316,11 @@ __xfs_get_blocks(
if (!create && direct && offset >= i_size_read(inode))
return 0;
- error = xfs_iomap(XFS_I(inode), offset, size,
- create ? flags : BMAPI_READ, &imap, &nimap, &new);
+ if (direct && create)
+ flags |= BMAPI_DIRECT;
+
+ error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap,
+ &new);
if (error)
return -error;
if (nimap == 0)
@@ -1579,8 +1400,7 @@ xfs_get_blocks(
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock,
- bh_result, create, 0, BMAPI_WRITE);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, 0);
}
STATIC int
@@ -1590,61 +1410,59 @@ xfs_get_blocks_direct(
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_blocks(inode, iblock,
- bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
+ return __xfs_get_blocks(inode, iblock, bh_result, create, 1);
}
+/*
+ * Complete a direct I/O write request.
+ *
+ * If the private argument is non-NULL __xfs_get_blocks signals us that we
+ * need to issue a transaction to convert the range from unwritten to written
+ * extents. In case this is regular synchronous I/O we just call xfs_end_io
+ * to do this and we are done. But in case this was a successfull AIO
+ * request this handler is called from interrupt context, from which we
+ * can't start transactions. In that case offload the I/O completion to
+ * the workqueues we also use for buffered I/O completion.
+ */
STATIC void
-xfs_end_io_direct(
- struct kiocb *iocb,
- loff_t offset,
- ssize_t size,
- void *private)
+xfs_end_io_direct_write(
+ struct kiocb *iocb,
+ loff_t offset,
+ ssize_t size,
+ void *private,
+ int ret,
+ bool is_async)
{
- xfs_ioend_t *ioend = iocb->private;
+ struct xfs_ioend *ioend = iocb->private;
/*
- * Non-NULL private data means we need to issue a transaction to
- * convert a range from unwritten to written extents. This needs
- * to happen from process context but aio+dio I/O completion
- * happens from irq context so we need to defer it to a workqueue.
- * This is not necessary for synchronous direct I/O, but we do
- * it anyway to keep the code uniform and simpler.
- *
- * Well, if only it were that simple. Because synchronous direct I/O
- * requires extent conversion to occur *before* we return to userspace,
- * we have to wait for extent conversion to complete. Look at the
- * iocb that has been passed to us to determine if this is AIO or
- * not. If it is synchronous, tell xfs_finish_ioend() to kick the
- * workqueue and wait for it to complete.
- *
- * The core direct I/O code might be changed to always call the
- * completion handler in the future, in which case all this can
- * go away.
+ * blockdev_direct_IO can return an error even after the I/O
+ * completion handler was called. Thus we need to protect
+ * against double-freeing.
*/
+ iocb->private = NULL;
+
ioend->io_offset = offset;
ioend->io_size = size;
- if (ioend->io_type == IO_READ) {
- xfs_finish_ioend(ioend, 0);
- } else if (private && size > 0) {
- xfs_finish_ioend(ioend, is_sync_kiocb(iocb));
- } else {
+ if (private && size > 0)
+ ioend->io_type = IO_UNWRITTEN;
+
+ if (is_async) {
/*
- * A direct I/O write ioend starts it's life in unwritten
- * state in case they map an unwritten extent. This write
- * didn't map an unwritten extent so switch it's completion
- * handler.
+ * If we are converting an unwritten extent we need to delay
+ * the AIO completion until after the unwrittent extent
+ * conversion has completed, otherwise do it ASAP.
*/
- ioend->io_type = IO_NEW;
- xfs_finish_ioend(ioend, 0);
+ if (ioend->io_type == IO_UNWRITTEN) {
+ ioend->io_iocb = iocb;
+ ioend->io_result = ret;
+ } else {
+ aio_complete(iocb, ret, 0);
+ }
+ xfs_finish_ioend(ioend);
+ } else {
+ xfs_finish_ioend_sync(ioend);
}
-
- /*
- * blockdev_direct_IO can return an error even after the I/O
- * completion handler was called. Thus we need to protect
- * against double-freeing.
- */
- iocb->private = NULL;
}
STATIC ssize_t
@@ -1655,26 +1473,45 @@ xfs_vm_direct_IO(
loff_t offset,
unsigned long nr_segs)
{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
- struct block_device *bdev;
- ssize_t ret;
-
- bdev = xfs_find_bdev_for_inode(inode);
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct block_device *bdev = xfs_find_bdev_for_inode(inode);
+ ssize_t ret;
- iocb->private = xfs_alloc_ioend(inode, rw == WRITE ?
- IO_UNWRITTEN : IO_READ);
+ if (rw & WRITE) {
+ iocb->private = xfs_alloc_ioend(inode, IO_NEW);
- ret = blockdev_direct_IO_no_locking(rw, iocb, inode, bdev, iov,
+ ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
offset, nr_segs,
xfs_get_blocks_direct,
- xfs_end_io_direct);
+ xfs_end_io_direct_write, NULL, 0);
+ if (ret != -EIOCBQUEUED && iocb->private)
+ xfs_destroy_ioend(iocb->private);
+ } else {
+ ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov,
+ offset, nr_segs,
+ xfs_get_blocks_direct,
+ NULL, NULL, 0);
+ }
- if (unlikely(ret != -EIOCBQUEUED && iocb->private))
- xfs_destroy_ioend(iocb->private);
return ret;
}
+STATIC void
+xfs_vm_write_failed(
+ struct address_space *mapping,
+ loff_t to)
+{
+ struct inode *inode = mapping->host;
+
+ if (to > inode->i_size) {
+ struct iattr ia = {
+ .ia_valid = ATTR_SIZE | ATTR_FORCE,
+ .ia_size = inode->i_size,
+ };
+ xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+ }
+}
+
STATIC int
xfs_vm_write_begin(
struct file *file,
@@ -1685,9 +1522,31 @@ xfs_vm_write_begin(
struct page **pagep,
void **fsdata)
{
- *pagep = NULL;
- return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata,
- xfs_get_blocks);
+ int ret;
+
+ ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS,
+ pagep, xfs_get_blocks);
+ if (unlikely(ret))
+ xfs_vm_write_failed(mapping, pos + len);
+ return ret;
+}
+
+STATIC int
+xfs_vm_write_end(
+ struct file *file,
+ struct address_space *mapping,
+ loff_t pos,
+ unsigned len,
+ unsigned copied,
+ struct page *page,
+ void *fsdata)
+{
+ int ret;
+
+ ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
+ if (unlikely(ret < len))
+ xfs_vm_write_failed(mapping, pos + len);
+ return ret;
}
STATIC sector_t
@@ -1698,7 +1557,7 @@ xfs_vm_bmap(
struct inode *inode = (struct inode *)mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- xfs_itrace_entry(XFS_I(inode));
+ trace_xfs_vm_bmap(XFS_I(inode));
xfs_ilock(ip, XFS_IOLOCK_SHARED);
xfs_flush_pages(ip, (xfs_off_t)0, -1, 0, FI_REMAPF);
xfs_iunlock(ip, XFS_IOLOCK_SHARED);
@@ -1732,7 +1591,7 @@ const struct address_space_operations xfs_address_space_operations = {
.releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage,
.write_begin = xfs_vm_write_begin,
- .write_end = generic_write_end,
+ .write_end = xfs_vm_write_end,
.bmap = xfs_vm_bmap,
.direct_IO = xfs_vm_direct_IO,
.migratepage = buffer_migrate_page,
diff --git a/fs/xfs/linux-2.6/xfs_aops.h b/fs/xfs/linux-2.6/xfs_aops.h
index 4cfc6ea87df..c5057fb6237 100644
--- a/fs/xfs/linux-2.6/xfs_aops.h
+++ b/fs/xfs/linux-2.6/xfs_aops.h
@@ -37,6 +37,8 @@ typedef struct xfs_ioend {
size_t io_size; /* size of the extent */
xfs_off_t io_offset; /* offset in the file */
struct work_struct io_work; /* xfsdatad work queue */
+ struct kiocb *io_iocb;
+ int io_result;
} xfs_ioend_t;
extern const struct address_space_operations xfs_address_space_operations;
@@ -45,6 +47,6 @@ extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
extern void xfs_ioend_init(void);
extern void xfs_ioend_wait(struct xfs_inode *);
-extern void xfs_count_page_state(struct page *, int *, int *, int *);
+extern void xfs_count_page_state(struct page *, int *, int *);
#endif /* __XFS_AOPS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 2ee3f7a6016..63fd2c07cb5 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -39,7 +39,6 @@
#include "xfs_inum.h"
#include "xfs_log.h"
#include "xfs_ag.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_trace.h"
@@ -189,8 +188,8 @@ _xfs_buf_initialize(
atomic_set(&bp->b_hold, 1);
init_completion(&bp->b_iowait);
INIT_LIST_HEAD(&bp->b_list);
- INIT_LIST_HEAD(&bp->b_hash_list);
- init_MUTEX_LOCKED(&bp->b_sema); /* held, no waiters */
+ RB_CLEAR_NODE(&bp->b_rbnode);
+ sema_init(&bp->b_sema, 0); /* held, no waiters */
XB_SET_OWNER(bp);
bp->b_target = target;
bp->b_file_offset = range_base;
@@ -263,8 +262,6 @@ xfs_buf_free(
{
trace_xfs_buf_free(bp, _RET_IP_);
- ASSERT(list_empty(&bp->b_hash_list));
-
if (bp->b_flags & (_XBF_PAGE_CACHE|_XBF_PAGES)) {
uint i;
@@ -423,8 +420,10 @@ _xfs_buf_find(
{
xfs_off_t range_base;
size_t range_length;
- xfs_bufhash_t *hash;
- xfs_buf_t *bp, *n;
+ struct xfs_perag *pag;
+ struct rb_node **rbp;
+ struct rb_node *parent;
+ xfs_buf_t *bp;
range_base = (ioff << BBSHIFT);
range_length = (isize << BBSHIFT);
@@ -433,20 +432,38 @@ _xfs_buf_find(
ASSERT(!(range_length < (1 << btp->bt_sshift)));
ASSERT(!(range_base & (xfs_off_t)btp->bt_smask));
- hash = &btp->bt_hash[hash_long((unsigned long)ioff, btp->bt_hashshift)];
-
- spin_lock(&hash->bh_lock);
-
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
- ASSERT(btp == bp->b_target);
- if (bp->b_file_offset == range_base &&
- bp->b_buffer_length == range_length) {
+ /* get tree root */
+ pag = xfs_perag_get(btp->bt_mount,
+ xfs_daddr_to_agno(btp->bt_mount, ioff));
+
+ /* walk tree */
+ spin_lock(&pag->pag_buf_lock);
+ rbp = &pag->pag_buf_tree.rb_node;
+ parent = NULL;
+ bp = NULL;
+ while (*rbp) {
+ parent = *rbp;
+ bp = rb_entry(parent, struct xfs_buf, b_rbnode);
+
+ if (range_base < bp->b_file_offset)
+ rbp = &(*rbp)->rb_left;
+ else if (range_base > bp->b_file_offset)
+ rbp = &(*rbp)->rb_right;
+ else {
/*
- * If we look at something, bring it to the
- * front of the list for next time.
+ * found a block offset match. If the range doesn't
+ * match, the only way this is allowed is if the buffer
+ * in the cache is stale and the transaction that made
+ * it stale has not yet committed. i.e. we are
+ * reallocating a busy extent. Skip this buffer and
+ * continue searching to the right for an exact match.
*/
+ if (bp->b_buffer_length != range_length) {
+ ASSERT(bp->b_flags & XBF_STALE);
+ rbp = &(*rbp)->rb_right;
+ continue;
+ }
atomic_inc(&bp->b_hold);
- list_move(&bp->b_hash_list, &hash->bh_list);
goto found;
}
}
@@ -455,17 +472,21 @@ _xfs_buf_find(
if (new_bp) {
_xfs_buf_initialize(new_bp, btp, range_base,
range_length, flags);
- new_bp->b_hash = hash;
- list_add(&new_bp->b_hash_list, &hash->bh_list);
+ rb_link_node(&new_bp->b_rbnode, parent, rbp);
+ rb_insert_color(&new_bp->b_rbnode, &pag->pag_buf_tree);
+ /* the buffer keeps the perag reference until it is freed */
+ new_bp->b_pag = pag;
+ spin_unlock(&pag->pag_buf_lock);
} else {
XFS_STATS_INC(xb_miss_locked);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
}
-
- spin_unlock(&hash->bh_lock);
return new_bp;
found:
- spin_unlock(&hash->bh_lock);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
/* Attempt to get the semaphore without sleeping,
* if this does not work then we need to drop the
@@ -579,9 +600,9 @@ _xfs_buf_read(
XBF_READ_AHEAD | _XBF_RUN_QUEUES);
status = xfs_buf_iorequest(bp);
- if (!status && !(flags & XBF_ASYNC))
- status = xfs_buf_iowait(bp);
- return status;
+ if (status || XFS_BUF_ISERROR(bp) || (flags & XBF_ASYNC))
+ return status;
+ return xfs_buf_iowait(bp);
}
xfs_buf_t *
@@ -631,8 +652,7 @@ void
xfs_buf_readahead(
xfs_buftarg_t *target,
xfs_off_t ioff,
- size_t isize,
- xfs_buf_flags_t flags)
+ size_t isize)
{
struct backing_dev_info *bdi;
@@ -640,8 +660,42 @@ xfs_buf_readahead(
if (bdi_read_congested(bdi))
return;
- flags |= (XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD);
- xfs_buf_read(target, ioff, isize, flags);
+ xfs_buf_read(target, ioff, isize,
+ XBF_TRYLOCK|XBF_ASYNC|XBF_READ_AHEAD|XBF_DONT_BLOCK);
+}
+
+/*
+ * Read an uncached buffer from disk. Allocates and returns a locked
+ * buffer containing the disk contents or nothing.
+ */
+struct xfs_buf *
+xfs_buf_read_uncached(
+ struct xfs_mount *mp,
+ struct xfs_buftarg *target,
+ xfs_daddr_t daddr,
+ size_t length,
+ int flags)
+{
+ xfs_buf_t *bp;
+ int error;
+
+ bp = xfs_buf_get_uncached(target, length, flags);
+ if (!bp)
+ return NULL;
+
+ /* set up the buffer for a read IO */
+ xfs_buf_lock(bp);
+ XFS_BUF_SET_ADDR(bp, daddr);
+ XFS_BUF_READ(bp);
+ XFS_BUF_BUSY(bp);
+
+ xfsbdstrat(mp, bp);
+ error = xfs_buf_iowait(bp);
+ if (error || bp->b_error) {
+ xfs_buf_relse(bp);
+ return NULL;
+ }
+ return bp;
}
xfs_buf_t *
@@ -713,9 +767,10 @@ xfs_buf_associate_memory(
}
xfs_buf_t *
-xfs_buf_get_noaddr(
+xfs_buf_get_uncached(
+ struct xfs_buftarg *target,
size_t len,
- xfs_buftarg_t *target)
+ int flags)
{
unsigned long page_count = PAGE_ALIGN(len) >> PAGE_SHIFT;
int error, i;
@@ -731,7 +786,7 @@ xfs_buf_get_noaddr(
goto fail_free_buf;
for (i = 0; i < page_count; i++) {
- bp->b_pages[i] = alloc_page(GFP_KERNEL);
+ bp->b_pages[i] = alloc_page(xb_to_gfp(flags));
if (!bp->b_pages[i])
goto fail_free_mem;
}
@@ -746,7 +801,7 @@ xfs_buf_get_noaddr(
xfs_buf_unlock(bp);
- trace_xfs_buf_get_noaddr(bp, _RET_IP_);
+ trace_xfs_buf_get_uncached(bp, _RET_IP_);
return bp;
fail_free_mem:
@@ -780,29 +835,30 @@ void
xfs_buf_rele(
xfs_buf_t *bp)
{
- xfs_bufhash_t *hash = bp->b_hash;
+ struct xfs_perag *pag = bp->b_pag;
trace_xfs_buf_rele(bp, _RET_IP_);
- if (unlikely(!hash)) {
+ if (!pag) {
ASSERT(!bp->b_relse);
+ ASSERT(RB_EMPTY_NODE(&bp->b_rbnode));
if (atomic_dec_and_test(&bp->b_hold))
xfs_buf_free(bp);
return;
}
+ ASSERT(!RB_EMPTY_NODE(&bp->b_rbnode));
ASSERT(atomic_read(&bp->b_hold) > 0);
- if (atomic_dec_and_lock(&bp->b_hold, &hash->bh_lock)) {
+ if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
if (bp->b_relse) {
atomic_inc(&bp->b_hold);
- spin_unlock(&hash->bh_lock);
- (*(bp->b_relse)) (bp);
- } else if (bp->b_flags & XBF_FS_MANAGED) {
- spin_unlock(&hash->bh_lock);
+ spin_unlock(&pag->pag_buf_lock);
+ bp->b_relse(bp);
} else {
ASSERT(!(bp->b_flags & (XBF_DELWRI|_XBF_DELWRI_Q)));
- list_del_init(&bp->b_hash_list);
- spin_unlock(&hash->bh_lock);
+ rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
xfs_buf_free(bp);
}
}
@@ -865,7 +921,7 @@ xfs_buf_lock(
trace_xfs_buf_lock(bp, _RET_IP_);
if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
- xfs_log_force(bp->b_mount, 0);
+ xfs_log_force(bp->b_target->bt_mount, 0);
if (atomic_read(&bp->b_io_remaining))
blk_run_address_space(bp->b_target->bt_mapping);
down(&bp->b_sema);
@@ -897,36 +953,6 @@ xfs_buf_unlock(
trace_xfs_buf_unlock(bp, _RET_IP_);
}
-
-/*
- * Pinning Buffer Storage in Memory
- * Ensure that no attempt to force a buffer to disk will succeed.
- */
-void
-xfs_buf_pin(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_pin(bp, _RET_IP_);
- atomic_inc(&bp->b_pin_count);
-}
-
-void
-xfs_buf_unpin(
- xfs_buf_t *bp)
-{
- trace_xfs_buf_unpin(bp, _RET_IP_);
-
- if (atomic_dec_and_test(&bp->b_pin_count))
- wake_up_all(&bp->b_waiters);
-}
-
-int
-xfs_buf_ispin(
- xfs_buf_t *bp)
-{
- return atomic_read(&bp->b_pin_count);
-}
-
STATIC void
xfs_buf_wait_unpin(
xfs_buf_t *bp)
@@ -960,19 +986,7 @@ xfs_buf_iodone_work(
xfs_buf_t *bp =
container_of(work, xfs_buf_t, b_iodone_work);
- /*
- * We can get an EOPNOTSUPP to ordered writes. Here we clear the
- * ordered flag and reissue them. Because we can't tell the higher
- * layers directly that they should not issue ordered I/O anymore, they
- * need to check if the _XFS_BARRIER_FAILED flag was set during I/O completion.
- */
- if ((bp->b_error == EOPNOTSUPP) &&
- (bp->b_flags & (XBF_ORDERED|XBF_ASYNC)) == (XBF_ORDERED|XBF_ASYNC)) {
- trace_xfs_buf_ordered_retry(bp, _RET_IP_);
- bp->b_flags &= ~XBF_ORDERED;
- bp->b_flags |= _XFS_BARRIER_FAILED;
- xfs_buf_iorequest(bp);
- } else if (bp->b_iodone)
+ if (bp->b_iodone)
(*(bp->b_iodone))(bp);
else if (bp->b_flags & XBF_ASYNC)
xfs_buf_relse(bp);
@@ -1018,13 +1032,11 @@ xfs_bwrite(
{
int error;
- bp->b_strat = xfs_bdstrat_cb;
- bp->b_mount = mp;
bp->b_flags |= XBF_WRITE;
bp->b_flags &= ~(XBF_ASYNC | XBF_READ);
xfs_buf_delwri_dequeue(bp);
- xfs_buf_iostrategy(bp);
+ xfs_bdstrat_cb(bp);
error = xfs_buf_iowait(bp);
if (error)
@@ -1040,9 +1052,6 @@ xfs_bdwrite(
{
trace_xfs_buf_bdwrite(bp, _RET_IP_);
- bp->b_strat = xfs_bdstrat_cb;
- bp->b_mount = mp;
-
bp->b_flags &= ~XBF_READ;
bp->b_flags |= (XBF_DELWRI | XBF_ASYNC);
@@ -1051,7 +1060,7 @@ xfs_bdwrite(
/*
* Called when we want to stop a buffer from getting written or read.
- * We attach the EIO error, muck with its flags, and call biodone
+ * We attach the EIO error, muck with its flags, and call xfs_buf_ioend
* so that the proper iodone callbacks get called.
*/
STATIC int
@@ -1068,22 +1077,21 @@ xfs_bioerror(
XFS_BUF_ERROR(bp, EIO);
/*
- * We're calling biodone, so delete XBF_DONE flag.
+ * We're calling xfs_buf_ioend, so delete XBF_DONE flag.
*/
XFS_BUF_UNREAD(bp);
XFS_BUF_UNDELAYWRITE(bp);
XFS_BUF_UNDONE(bp);
XFS_BUF_STALE(bp);
- XFS_BUF_CLR_BDSTRAT_FUNC(bp);
- xfs_biodone(bp);
+ xfs_buf_ioend(bp, 0);
return EIO;
}
/*
* Same as xfs_bioerror, except that we are releasing the buffer
- * here ourselves, and avoiding the biodone call.
+ * here ourselves, and avoiding the xfs_buf_ioend call.
* This is meant for userdata errors; metadata bufs come with
* iodone functions attached, so that we can track down errors.
*/
@@ -1105,7 +1113,6 @@ xfs_bioerror_relse(
XFS_BUF_DONE(bp);
XFS_BUF_STALE(bp);
XFS_BUF_CLR_IODONE_FUNC(bp);
- XFS_BUF_CLR_BDSTRAT_FUNC(bp);
if (!(fl & XBF_ASYNC)) {
/*
* Mark b_error and B_ERROR _both_.
@@ -1133,7 +1140,7 @@ int
xfs_bdstrat_cb(
struct xfs_buf *bp)
{
- if (XFS_FORCED_SHUTDOWN(bp->b_mount)) {
+ if (XFS_FORCED_SHUTDOWN(bp->b_target->bt_mount)) {
trace_xfs_bdstrat_shut(bp, _RET_IP_);
/*
* Metadata write that didn't get logged but
@@ -1235,7 +1242,7 @@ _xfs_buf_ioapply(
if (bp->b_flags & XBF_ORDERED) {
ASSERT(!(bp->b_flags & XBF_READ));
- rw = WRITE_BARRIER;
+ rw = WRITE_FLUSH_FUA;
} else if (bp->b_flags & XBF_LOG_BUFFER) {
ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
bp->b_flags &= ~_XBF_RUN_QUEUES;
@@ -1311,8 +1318,19 @@ submit_io:
if (size)
goto next_chunk;
} else {
- bio_put(bio);
+ /*
+ * if we get here, no pages were added to the bio. However,
+ * we can't just error out here - if the pages are locked then
+ * we have to unlock them otherwise we can hang on a later
+ * access to the page.
+ */
xfs_buf_ioerror(bp, EIO);
+ if (bp->b_flags & _XBF_PAGE_LOCKED) {
+ int i;
+ for (i = 0; i < bp->b_page_count; i++)
+ unlock_page(bp->b_pages[i]);
+ }
+ bio_put(bio);
}
}
@@ -1428,63 +1446,24 @@ xfs_buf_iomove(
*/
void
xfs_wait_buftarg(
- xfs_buftarg_t *btp)
-{
- xfs_buf_t *bp, *n;
- xfs_bufhash_t *hash;
- uint i;
-
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
- hash = &btp->bt_hash[i];
-again:
- spin_lock(&hash->bh_lock);
- list_for_each_entry_safe(bp, n, &hash->bh_list, b_hash_list) {
- ASSERT(btp == bp->b_target);
- if (!(bp->b_flags & XBF_FS_MANAGED)) {
- spin_unlock(&hash->bh_lock);
- /*
- * Catch superblock reference count leaks
- * immediately
- */
- BUG_ON(bp->b_bn == 0);
- delay(100);
- goto again;
- }
- }
- spin_unlock(&hash->bh_lock);
- }
-}
-
-/*
- * Allocate buffer hash table for a given target.
- * For devices containing metadata (i.e. not the log/realtime devices)
- * we need to allocate a much larger hash table.
- */
-STATIC void
-xfs_alloc_bufhash(
- xfs_buftarg_t *btp,
- int external)
+ struct xfs_buftarg *btp)
{
- unsigned int i;
+ struct xfs_perag *pag;
+ uint i;
- btp->bt_hashshift = external ? 3 : 8; /* 8 or 256 buckets */
- btp->bt_hashmask = (1 << btp->bt_hashshift) - 1;
- btp->bt_hash = kmem_zalloc_large((1 << btp->bt_hashshift) *
- sizeof(xfs_bufhash_t));
- for (i = 0; i < (1 << btp->bt_hashshift); i++) {
- spin_lock_init(&btp->bt_hash[i].bh_lock);
- INIT_LIST_HEAD(&btp->bt_hash[i].bh_list);
+ for (i = 0; i < btp->bt_mount->m_sb.sb_agcount; i++) {
+ pag = xfs_perag_get(btp->bt_mount, i);
+ spin_lock(&pag->pag_buf_lock);
+ while (rb_first(&pag->pag_buf_tree)) {
+ spin_unlock(&pag->pag_buf_lock);
+ delay(100);
+ spin_lock(&pag->pag_buf_lock);
+ }
+ spin_unlock(&pag->pag_buf_lock);
+ xfs_perag_put(pag);
}
}
-STATIC void
-xfs_free_bufhash(
- xfs_buftarg_t *btp)
-{
- kmem_free_large(btp->bt_hash);
- btp->bt_hash = NULL;
-}
-
/*
* buftarg list for delwrite queue processing
*/
@@ -1517,7 +1496,6 @@ xfs_free_buftarg(
xfs_flush_buftarg(btp, 1);
if (mp->m_flags & XFS_MOUNT_BARRIER)
xfs_blkdev_issue_flush(btp);
- xfs_free_bufhash(btp);
iput(btp->bt_mapping->host);
/* Unregister the buftarg first so that we don't get a
@@ -1602,6 +1580,7 @@ xfs_mapping_buftarg(
XFS_BUFTARG_NAME(btp));
return ENOMEM;
}
+ inode->i_ino = get_next_ino();
inode->i_mode = S_IFBLK;
inode->i_bdev = bdev;
inode->i_rdev = bdev->bd_dev;
@@ -1639,6 +1618,7 @@ out_error:
xfs_buftarg_t *
xfs_alloc_buftarg(
+ struct xfs_mount *mp,
struct block_device *bdev,
int external,
const char *fsname)
@@ -1647,6 +1627,7 @@ xfs_alloc_buftarg(
btp = kmem_zalloc(sizeof(*btp), KM_SLEEP);
+ btp->bt_mount = mp;
btp->bt_dev = bdev->bd_dev;
btp->bt_bdev = bdev;
if (xfs_setsize_buftarg_early(btp, bdev))
@@ -1655,7 +1636,6 @@ xfs_alloc_buftarg(
goto error;
if (xfs_alloc_delwrite_queue(btp, fsname))
goto error;
- xfs_alloc_bufhash(btp, external);
return btp;
error:
@@ -1804,7 +1784,7 @@ xfs_buf_delwri_split(
trace_xfs_buf_delwri_split(bp, _RET_IP_);
ASSERT(bp->b_flags & XBF_DELWRI);
- if (!xfs_buf_ispin(bp) && !xfs_buf_cond_lock(bp)) {
+ if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
if (!force &&
time_before(jiffies, bp->b_queuetime + age)) {
xfs_buf_unlock(bp);
@@ -1889,7 +1869,7 @@ xfsbufd(
struct xfs_buf *bp;
bp = list_first_entry(&tmp, struct xfs_buf, b_list);
list_del_init(&bp->b_list);
- xfs_buf_iostrategy(bp);
+ xfs_bdstrat_cb(bp);
count++;
}
if (count)
@@ -1936,7 +1916,7 @@ xfs_flush_buftarg(
bp->b_flags &= ~XBF_ASYNC;
list_add(&bp->b_list, &wait_list);
}
- xfs_buf_iostrategy(bp);
+ xfs_bdstrat_cb(bp);
}
if (wait) {
@@ -1946,7 +1926,7 @@ xfs_flush_buftarg(
bp = list_first_entry(&wait_list, struct xfs_buf, b_list);
list_del_init(&bp->b_list);
- xfs_iowait(bp);
+ xfs_buf_iowait(bp);
xfs_buf_relse(bp);
}
}
@@ -1962,7 +1942,8 @@ xfs_buf_init(void)
if (!xfs_buf_zone)
goto out;
- xfslogd_workqueue = create_workqueue("xfslogd");
+ xfslogd_workqueue = alloc_workqueue("xfslogd",
+ WQ_MEM_RECLAIM | WQ_HIGHPRI, 1);
if (!xfslogd_workqueue)
goto out_free_buf_zone;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index 5fbecefa5df..383a3f37cf9 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -44,57 +44,48 @@ typedef enum {
XBRW_ZERO = 3, /* Zero target memory */
} xfs_buf_rw_t;
-typedef enum {
- XBF_READ = (1 << 0), /* buffer intended for reading from device */
- XBF_WRITE = (1 << 1), /* buffer intended for writing to device */
- XBF_MAPPED = (1 << 2), /* buffer mapped (b_addr valid) */
- XBF_ASYNC = (1 << 4), /* initiator will not wait for completion */
- XBF_DONE = (1 << 5), /* all pages in the buffer uptodate */
- XBF_DELWRI = (1 << 6), /* buffer has dirty pages */
- XBF_STALE = (1 << 7), /* buffer has been staled, do not find it */
- XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
- XBF_ORDERED = (1 << 11), /* use ordered writes */
- XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
- XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
-
- /* flags used only as arguments to access routines */
- XBF_LOCK = (1 << 14), /* lock requested */
- XBF_TRYLOCK = (1 << 15), /* lock requested, but do not wait */
- XBF_DONT_BLOCK = (1 << 16), /* do not block in current thread */
-
- /* flags used only internally */
- _XBF_PAGE_CACHE = (1 << 17),/* backed by pagecache */
- _XBF_PAGES = (1 << 18), /* backed by refcounted pages */
- _XBF_RUN_QUEUES = (1 << 19),/* run block device task queue */
- _XBF_DELWRI_Q = (1 << 21), /* buffer on delwri queue */
+#define XBF_READ (1 << 0) /* buffer intended for reading from device */
+#define XBF_WRITE (1 << 1) /* buffer intended for writing to device */
+#define XBF_MAPPED (1 << 2) /* buffer mapped (b_addr valid) */
+#define XBF_ASYNC (1 << 4) /* initiator will not wait for completion */
+#define XBF_DONE (1 << 5) /* all pages in the buffer uptodate */
+#define XBF_DELWRI (1 << 6) /* buffer has dirty pages */
+#define XBF_STALE (1 << 7) /* buffer has been staled, do not find it */
+#define XBF_ORDERED (1 << 11)/* use ordered writes */
+#define XBF_READ_AHEAD (1 << 12)/* asynchronous read-ahead */
+#define XBF_LOG_BUFFER (1 << 13)/* this is a buffer used for the log */
+
+/* flags used only as arguments to access routines */
+#define XBF_LOCK (1 << 14)/* lock requested */
+#define XBF_TRYLOCK (1 << 15)/* lock requested, but do not wait */
+#define XBF_DONT_BLOCK (1 << 16)/* do not block in current thread */
+
+/* flags used only internally */
+#define _XBF_PAGE_CACHE (1 << 17)/* backed by pagecache */
+#define _XBF_PAGES (1 << 18)/* backed by refcounted pages */
+#define _XBF_RUN_QUEUES (1 << 19)/* run block device task queue */
+#define _XBF_DELWRI_Q (1 << 21)/* buffer on delwri queue */
- /*
- * Special flag for supporting metadata blocks smaller than a FSB.
- *
- * In this case we can have multiple xfs_buf_t on a single page and
- * need to lock out concurrent xfs_buf_t readers as they only
- * serialise access to the buffer.
- *
- * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
- * between reads of the page. Hence we can have one thread read the
- * page and modify it, but then race with another thread that thinks
- * the page is not up-to-date and hence reads it again.
- *
- * The result is that the first modifcation to the page is lost.
- * This sort of AGF/AGI reading race can happen when unlinking inodes
- * that require truncation and results in the AGI unlinked list
- * modifications being lost.
- */
- _XBF_PAGE_LOCKED = (1 << 22),
+/*
+ * Special flag for supporting metadata blocks smaller than a FSB.
+ *
+ * In this case we can have multiple xfs_buf_t on a single page and
+ * need to lock out concurrent xfs_buf_t readers as they only
+ * serialise access to the buffer.
+ *
+ * If the FSB size >= PAGE_CACHE_SIZE case, we have no serialisation
+ * between reads of the page. Hence we can have one thread read the
+ * page and modify it, but then race with another thread that thinks
+ * the page is not up-to-date and hence reads it again.
+ *
+ * The result is that the first modifcation to the page is lost.
+ * This sort of AGF/AGI reading race can happen when unlinking inodes
+ * that require truncation and results in the AGI unlinked list
+ * modifications being lost.
+ */
+#define _XBF_PAGE_LOCKED (1 << 22)
- /*
- * If we try a barrier write, but it fails we have to communicate
- * this to the upper layers. Unfortunately b_error gets overwritten
- * when the buffer is re-issued so we have to add another flag to
- * keep this information.
- */
- _XFS_BARRIER_FAILED = (1 << 23),
-} xfs_buf_flags_t;
+typedef unsigned int xfs_buf_flags_t;
#define XFS_BUF_FLAGS \
{ XBF_READ, "READ" }, \
@@ -104,7 +95,6 @@ typedef enum {
{ XBF_DONE, "DONE" }, \
{ XBF_DELWRI, "DELWRI" }, \
{ XBF_STALE, "STALE" }, \
- { XBF_FS_MANAGED, "FS_MANAGED" }, \
{ XBF_ORDERED, "ORDERED" }, \
{ XBF_READ_AHEAD, "READ_AHEAD" }, \
{ XBF_LOCK, "LOCK" }, /* should never be set */\
@@ -114,8 +104,7 @@ typedef enum {
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_RUN_QUEUES, "RUN_QUEUES" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
- { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }, \
- { _XFS_BARRIER_FAILED, "BARRIER_FAILED" }
+ { _XBF_PAGE_LOCKED, "PAGE_LOCKED" }
typedef enum {
@@ -132,15 +121,11 @@ typedef struct xfs_buftarg {
dev_t bt_dev;
struct block_device *bt_bdev;
struct address_space *bt_mapping;
+ struct xfs_mount *bt_mount;
unsigned int bt_bsize;
unsigned int bt_sshift;
size_t bt_smask;
- /* per device buffer hash table */
- uint bt_hashmask;
- uint bt_hashshift;
- xfs_bufhash_t *bt_hash;
-
/* per device delwri queue */
struct task_struct *bt_task;
struct list_head bt_list;
@@ -168,35 +153,41 @@ typedef int (*xfs_buf_bdstrat_t)(struct xfs_buf *);
#define XB_PAGES 2
typedef struct xfs_buf {
+ /*
+ * first cacheline holds all the fields needed for an uncontended cache
+ * hit to be fully processed. The semaphore straddles the cacheline
+ * boundary, but the counter and lock sits on the first cacheline,
+ * which is the only bit that is touched if we hit the semaphore
+ * fast-path on locking.
+ */
+ struct rb_node b_rbnode; /* rbtree node */
+ xfs_off_t b_file_offset; /* offset in file */
+ size_t b_buffer_length;/* size of buffer in bytes */
+ atomic_t b_hold; /* reference count */
+ xfs_buf_flags_t b_flags; /* status flags */
struct semaphore b_sema; /* semaphore for lockables */
- unsigned long b_queuetime; /* time buffer was queued */
- atomic_t b_pin_count; /* pin count */
+
wait_queue_head_t b_waiters; /* unpin waiters */
struct list_head b_list;
- xfs_buf_flags_t b_flags; /* status flags */
- struct list_head b_hash_list; /* hash table list */
- xfs_bufhash_t *b_hash; /* hash table list start */
+ struct xfs_perag *b_pag; /* contains rbtree root */
xfs_buftarg_t *b_target; /* buffer target (device) */
- atomic_t b_hold; /* reference count */
xfs_daddr_t b_bn; /* block number for I/O */
- xfs_off_t b_file_offset; /* offset in file */
- size_t b_buffer_length;/* size of buffer in bytes */
size_t b_count_desired;/* desired transfer size */
void *b_addr; /* virtual address of buffer */
struct work_struct b_iodone_work;
- atomic_t b_io_remaining; /* #outstanding I/O requests */
xfs_buf_iodone_t b_iodone; /* I/O completion function */
xfs_buf_relse_t b_relse; /* releasing function */
- xfs_buf_bdstrat_t b_strat; /* pre-write function */
struct completion b_iowait; /* queue for I/O waiters */
void *b_fspriv;
void *b_fspriv2;
- struct xfs_mount *b_mount;
- unsigned short b_error; /* error code on I/O */
- unsigned int b_page_count; /* size of page array */
- unsigned int b_offset; /* page offset in first page */
struct page **b_pages; /* array of page pointers */
struct page *b_page_array[XB_PAGES]; /* inline pages */
+ unsigned long b_queuetime; /* time buffer was queued */
+ atomic_t b_pin_count; /* pin count */
+ atomic_t b_io_remaining; /* #outstanding I/O requests */
+ unsigned int b_page_count; /* size of page array */
+ unsigned int b_offset; /* page offset in first page */
+ unsigned short b_error; /* error code on I/O */
#ifdef XFS_BUF_LOCK_TRACKING
int b_last_holder;
#endif
@@ -215,11 +206,13 @@ extern xfs_buf_t *xfs_buf_read(xfs_buftarg_t *, xfs_off_t, size_t,
xfs_buf_flags_t);
extern xfs_buf_t *xfs_buf_get_empty(size_t, xfs_buftarg_t *);
-extern xfs_buf_t *xfs_buf_get_noaddr(size_t, xfs_buftarg_t *);
+extern xfs_buf_t *xfs_buf_get_uncached(struct xfs_buftarg *, size_t, int);
extern int xfs_buf_associate_memory(xfs_buf_t *, void *, size_t);
extern void xfs_buf_hold(xfs_buf_t *);
-extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t,
- xfs_buf_flags_t);
+extern void xfs_buf_readahead(xfs_buftarg_t *, xfs_off_t, size_t);
+struct xfs_buf *xfs_buf_read_uncached(struct xfs_mount *mp,
+ struct xfs_buftarg *target,
+ xfs_daddr_t daddr, size_t length, int flags);
/* Releasing Buffers */
extern void xfs_buf_free(xfs_buf_t *);
@@ -244,11 +237,8 @@ extern int xfs_buf_iorequest(xfs_buf_t *);
extern int xfs_buf_iowait(xfs_buf_t *);
extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
xfs_buf_rw_t);
-
-static inline int xfs_buf_iostrategy(xfs_buf_t *bp)
-{
- return bp->b_strat ? bp->b_strat(bp) : xfs_buf_iorequest(bp);
-}
+#define xfs_buf_zero(bp, off, len) \
+ xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
static inline int xfs_buf_geterror(xfs_buf_t *bp)
{
@@ -258,11 +248,6 @@ static inline int xfs_buf_geterror(xfs_buf_t *bp)
/* Buffer Utility Routines */
extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t);
-/* Pinning Buffer Storage in Memory */
-extern void xfs_buf_pin(xfs_buf_t *);
-extern void xfs_buf_unpin(xfs_buf_t *);
-extern int xfs_buf_ispin(xfs_buf_t *);
-
/* Delayed Write Buffer Routines */
extern void xfs_buf_delwri_dequeue(xfs_buf_t *);
extern void xfs_buf_delwri_promote(xfs_buf_t *);
@@ -288,8 +273,6 @@ extern void xfs_buf_terminate(void);
XFS_BUF_DONE(bp); \
} while (0)
-#define XFS_BUF_UNMANAGE(bp) ((bp)->b_flags &= ~XBF_FS_MANAGED)
-
#define XFS_BUF_DELAYWRITE(bp) ((bp)->b_flags |= XBF_DELWRI)
#define XFS_BUF_UNDELAYWRITE(bp) xfs_buf_delwri_dequeue(bp)
#define XFS_BUF_ISDELAYWRITE(bp) ((bp)->b_flags & XBF_DELWRI)
@@ -326,8 +309,6 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_IODONE_FUNC(bp) ((bp)->b_iodone)
#define XFS_BUF_SET_IODONE_FUNC(bp, func) ((bp)->b_iodone = (func))
#define XFS_BUF_CLR_IODONE_FUNC(bp) ((bp)->b_iodone = NULL)
-#define XFS_BUF_SET_BDSTRAT_FUNC(bp, func) ((bp)->b_strat = (func))
-#define XFS_BUF_CLR_BDSTRAT_FUNC(bp) ((bp)->b_strat = NULL)
#define XFS_BUF_FSPRIVATE(bp, type) ((type)(bp)->b_fspriv)
#define XFS_BUF_SET_FSPRIVATE(bp, val) ((bp)->b_fspriv = (void*)(val))
@@ -351,7 +332,7 @@ extern void xfs_buf_terminate(void);
#define XFS_BUF_SET_VTYPE(bp, type) do { } while (0)
#define XFS_BUF_SET_REF(bp, ref) do { } while (0)
-#define XFS_BUF_ISPINNED(bp) xfs_buf_ispin(bp)
+#define XFS_BUF_ISPINNED(bp) atomic_read(&((bp)->b_pin_count))
#define XFS_BUF_VALUSEMA(bp) xfs_buf_lock_value(bp)
#define XFS_BUF_CPSEMA(bp) (xfs_buf_cond_lock(bp) == 0)
@@ -370,27 +351,11 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)
xfs_buf_rele(bp);
}
-#define xfs_bpin(bp) xfs_buf_pin(bp)
-#define xfs_bunpin(bp) xfs_buf_unpin(bp)
-#define xfs_biodone(bp) xfs_buf_ioend(bp, 0)
-
-#define xfs_biomove(bp, off, len, data, rw) \
- xfs_buf_iomove((bp), (off), (len), (data), \
- ((rw) == XBF_WRITE) ? XBRW_WRITE : XBRW_READ)
-
-#define xfs_biozero(bp, off, len) \
- xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO)
-
-#define xfs_iowait(bp) xfs_buf_iowait(bp)
-
-#define xfs_baread(target, rablkno, ralen) \
- xfs_buf_readahead((target), (rablkno), (ralen), XBF_DONT_BLOCK)
-
-
/*
* Handling of buftargs.
*/
-extern xfs_buftarg_t *xfs_alloc_buftarg(struct block_device *, int, const char *);
+extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *,
+ struct block_device *, int, const char *);
extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);
extern void xfs_wait_buftarg(xfs_buftarg_t *);
extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int);
diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h
deleted file mode 100644
index 55bddf3b609..00000000000
--- a/fs/xfs/linux-2.6/xfs_cred.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_CRED_H__
-#define __XFS_CRED_H__
-
-#include <linux/capability.h>
-
-/*
- * Credentials
- */
-typedef const struct cred cred_t;
-
-#endif /* __XFS_CRED_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_dmapi_priv.h b/fs/xfs/linux-2.6/xfs_dmapi_priv.h
deleted file mode 100644
index a8b0b1685ee..00000000000
--- a/fs/xfs/linux-2.6/xfs_dmapi_priv.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/*
- * Copyright (c) 2000-2006 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_DMAPI_PRIV_H__
-#define __XFS_DMAPI_PRIV_H__
-
-/*
- * Based on IO_ISDIRECT, decide which i_ flag is set.
- */
-#define DM_SEM_FLAG_RD(ioflags) (((ioflags) & IO_ISDIRECT) ? \
- DM_FLAGS_IMUX : 0)
-#define DM_SEM_FLAG_WR (DM_FLAGS_IALLOCSEM_WR | DM_FLAGS_IMUX)
-
-#endif /*__XFS_DMAPI_PRIV_H__*/
diff --git a/fs/xfs/linux-2.6/xfs_export.c b/fs/xfs/linux-2.6/xfs_export.c
index e7839ee49e4..3764d74790e 100644
--- a/fs/xfs/linux-2.6/xfs_export.c
+++ b/fs/xfs/linux-2.6/xfs_export.c
@@ -23,13 +23,13 @@
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_export.h"
#include "xfs_vnodeops.h"
#include "xfs_bmap_btree.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
+#include "xfs_trace.h"
/*
* Note that we only accept fileids which are long enough rather than allow
@@ -132,8 +132,7 @@ xfs_nfs_get_inode(
* fine and not an indication of a corrupted filesystem as clients can
* send invalid file handles and we have to handle it gracefully..
*/
- error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED,
- XFS_ILOCK_SHARED, &ip);
+ error = xfs_iget(mp, NULL, ino, XFS_IGET_UNTRUSTED, 0, &ip);
if (error) {
/*
* EINVAL means the inode cluster doesn't exist anymore.
@@ -148,11 +147,10 @@ xfs_nfs_get_inode(
}
if (ip->i_d.di_gen != generation) {
- xfs_iput_new(ip, XFS_ILOCK_SHARED);
+ IRELE(ip);
return ERR_PTR(-ENOENT);
}
- xfs_iunlock(ip, XFS_ILOCK_SHARED);
return VFS_I(ip);
}
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 257a56b127c..ba8ad422a16 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -22,23 +22,15 @@
#include "xfs_inum.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_trans.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_alloc.h"
-#include "xfs_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_vnodeops.h"
#include "xfs_da_btree.h"
#include "xfs_ioctl.h"
@@ -108,7 +100,7 @@ xfs_file_fsync(
int error = 0;
int log_flushed = 0;
- xfs_itrace_entry(ip);
+ trace_xfs_file_fsync(ip);
if (XFS_FORCED_SHUTDOWN(ip->i_mount))
return -XFS_ERROR(EIO);
@@ -166,8 +158,7 @@ xfs_file_fsync(
* transaction. So we play it safe and fire off the
* transaction anyway.
*/
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, ip);
+ xfs_trans_ijoin(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_set_sync(tp);
error = _xfs_trans_commit(tp, 0, &log_flushed);
@@ -275,20 +266,6 @@ xfs_file_aio_read(
mutex_lock(&inode->i_mutex);
xfs_ilock(ip, XFS_IOLOCK_SHARED);
- if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
- int dmflags = FILP_DELAY_FLAG(file) | DM_SEM_FLAG_RD(ioflags);
- int iolock = XFS_IOLOCK_SHARED;
-
- ret = -XFS_SEND_DATA(mp, DM_EVENT_READ, ip, iocb->ki_pos, size,
- dmflags, &iolock);
- if (ret) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- if (unlikely(ioflags & IO_ISDIRECT))
- mutex_unlock(&inode->i_mutex);
- return ret;
- }
- }
-
if (unlikely(ioflags & IO_ISDIRECT)) {
if (inode->i_mapping->nrpages) {
ret = -xfs_flushinval_pages(ip,
@@ -321,7 +298,6 @@ xfs_file_splice_read(
unsigned int flags)
{
struct xfs_inode *ip = XFS_I(infilp->f_mapping->host);
- struct xfs_mount *mp = ip->i_mount;
int ioflags = 0;
ssize_t ret;
@@ -335,18 +311,6 @@ xfs_file_splice_read(
xfs_ilock(ip, XFS_IOLOCK_SHARED);
- if (DM_EVENT_ENABLED(ip, DM_EVENT_READ) && !(ioflags & IO_INVIS)) {
- int iolock = XFS_IOLOCK_SHARED;
- int error;
-
- error = XFS_SEND_DATA(mp, DM_EVENT_READ, ip, *ppos, count,
- FILP_DELAY_FLAG(infilp), &iolock);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_SHARED);
- return -error;
- }
- }
-
trace_xfs_file_splice_read(ip, count, *ppos, ioflags);
ret = generic_file_splice_read(infilp, ppos, pipe, count, flags);
@@ -367,7 +331,6 @@ xfs_file_splice_write(
{
struct inode *inode = outfilp->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
- struct xfs_mount *mp = ip->i_mount;
xfs_fsize_t isize, new_size;
int ioflags = 0;
ssize_t ret;
@@ -382,18 +345,6 @@ xfs_file_splice_write(
xfs_ilock(ip, XFS_IOLOCK_EXCL);
- if (DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) && !(ioflags & IO_INVIS)) {
- int iolock = XFS_IOLOCK_EXCL;
- int error;
-
- error = XFS_SEND_DATA(mp, DM_EVENT_WRITE, ip, *ppos, count,
- FILP_DELAY_FLAG(outfilp), &iolock);
- if (error) {
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return -error;
- }
- }
-
new_size = *ppos + count;
xfs_ilock(ip, XFS_ILOCK_EXCL);
@@ -463,7 +414,7 @@ xfs_zero_last_block(
last_fsb = XFS_B_TO_FSBT(mp, isize);
nimaps = 1;
error = xfs_bmapi(NULL, ip, last_fsb, 1, 0, NULL, 0, &imap,
- &nimaps, NULL, NULL);
+ &nimaps, NULL);
if (error) {
return error;
}
@@ -558,7 +509,7 @@ xfs_zero_eof(
nimaps = 1;
zero_count_fsb = end_zero_fsb - start_zero_fsb + 1;
error = xfs_bmapi(NULL, ip, start_zero_fsb, zero_count_fsb,
- 0, NULL, 0, &imap, &nimaps, NULL, NULL);
+ 0, NULL, 0, &imap, &nimaps, NULL);
if (error) {
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_IOLOCK_EXCL));
return error;
@@ -627,7 +578,6 @@ xfs_file_aio_write(
int ioflags = 0;
xfs_fsize_t isize, new_size;
int iolock;
- int eventsent = 0;
size_t ocount = 0, count;
int need_i_mutex;
@@ -673,33 +623,6 @@ start:
goto out_unlock_mutex;
}
- if ((DM_EVENT_ENABLED(ip, DM_EVENT_WRITE) &&
- !(ioflags & IO_INVIS) && !eventsent)) {
- int dmflags = FILP_DELAY_FLAG(file);
-
- if (need_i_mutex)
- dmflags |= DM_FLAGS_IMUX;
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- error = XFS_SEND_DATA(ip->i_mount, DM_EVENT_WRITE, ip,
- pos, count, dmflags, &iolock);
- if (error) {
- goto out_unlock_internal;
- }
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- eventsent = 1;
-
- /*
- * The iolock was dropped and reacquired in XFS_SEND_DATA
- * so we have to recheck the size when appending.
- * We will only "goto start;" once, since having sent the
- * event prevents another call to XFS_SEND_DATA, which is
- * what allows the size to change in the first place.
- */
- if ((file->f_flags & O_APPEND) && pos != ip->i_size)
- goto start;
- }
-
if (ioflags & IO_ISDIRECT) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
@@ -830,22 +753,6 @@ write_retry:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
}
- if (ret == -ENOSPC &&
- DM_EVENT_ENABLED(ip, DM_EVENT_NOSPACE) && !(ioflags & IO_INVIS)) {
- xfs_iunlock(ip, iolock);
- if (need_i_mutex)
- mutex_unlock(&inode->i_mutex);
- error = XFS_SEND_NAMESP(ip->i_mount, DM_EVENT_NOSPACE, ip,
- DM_RIGHT_NULL, ip, DM_RIGHT_NULL, NULL, NULL,
- 0, 0, 0); /* Delay flag intentionally unused */
- if (need_i_mutex)
- mutex_lock(&inode->i_mutex);
- xfs_ilock(ip, iolock);
- if (error)
- goto out_unlock_internal;
- goto start;
- }
-
error = -ret;
if (ret <= 0)
goto out_unlock_internal;
@@ -1014,9 +921,6 @@ const struct file_operations xfs_file_operations = {
.open = xfs_file_open,
.release = xfs_file_release,
.fsync = xfs_file_fsync,
-#ifdef HAVE_FOP_OPEN_EXEC
- .open_exec = xfs_file_open_exec,
-#endif
};
const struct file_operations xfs_dir_file_operations = {
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.c b/fs/xfs/linux-2.6/xfs_fs_subr.c
index b6918d76bc7..ed88ed16811 100644
--- a/fs/xfs/linux-2.6/xfs_fs_subr.c
+++ b/fs/xfs/linux-2.6/xfs_fs_subr.c
@@ -21,10 +21,6 @@
#include "xfs_inode.h"
#include "xfs_trace.h"
-int fs_noerr(void) { return 0; }
-int fs_nosys(void) { return ENOSYS; }
-void fs_noval(void) { return; }
-
/*
* note: all filemap functions return negative error codes. These
* need to be inverted before returning to the xfs core functions.
@@ -36,10 +32,9 @@ xfs_tosspages(
xfs_off_t last,
int fiopt)
{
- struct address_space *mapping = VFS_I(ip)->i_mapping;
-
- if (mapping->nrpages)
- truncate_inode_pages(mapping, first);
+ /* can't toss partial tail pages, so mask them out */
+ last &= ~(PAGE_SIZE - 1);
+ truncate_inode_pages_range(VFS_I(ip)->i_mapping, first, last - 1);
}
int
@@ -54,12 +49,11 @@ xfs_flushinval_pages(
trace_xfs_pagecache_inval(ip, first, last);
- if (mapping->nrpages) {
- xfs_iflags_clear(ip, XFS_ITRUNCATED);
- ret = filemap_write_and_wait(mapping);
- if (!ret)
- truncate_inode_pages(mapping, first);
- }
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ ret = filemap_write_and_wait_range(mapping, first,
+ last == -1 ? LLONG_MAX : last);
+ if (!ret)
+ truncate_inode_pages_range(mapping, first, last);
return -ret;
}
@@ -75,10 +69,9 @@ xfs_flush_pages(
int ret = 0;
int ret2;
- if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- xfs_iflags_clear(ip, XFS_ITRUNCATED);
- ret = -filemap_fdatawrite(mapping);
- }
+ xfs_iflags_clear(ip, XFS_ITRUNCATED);
+ ret = -filemap_fdatawrite_range(mapping, first,
+ last == -1 ? LLONG_MAX : last);
if (flags & XBF_ASYNC)
return ret;
ret2 = xfs_wait_on_pages(ip, first, last);
@@ -95,7 +88,9 @@ xfs_wait_on_pages(
{
struct address_space *mapping = VFS_I(ip)->i_mapping;
- if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
- return -filemap_fdatawait(mapping);
+ if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+ return -filemap_fdatawait_range(mapping, first,
+ last == -1 ? ip->i_size - 1 : last);
+ }
return 0;
}
diff --git a/fs/xfs/linux-2.6/xfs_fs_subr.h b/fs/xfs/linux-2.6/xfs_fs_subr.h
deleted file mode 100644
index 82bb19b2599..00000000000
--- a/fs/xfs/linux-2.6/xfs_fs_subr.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_FS_SUBR_H__
-#define __XFS_FS_SUBR_H__
-
-extern int fs_noerr(void);
-extern int fs_nosys(void);
-extern void fs_noval(void);
-
-#endif /* __XFS_FS_SUBR_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_globals.c b/fs/xfs/linux-2.6/xfs_globals.c
index 2ae8b1ccb02..76e81cff70b 100644
--- a/fs/xfs/linux-2.6/xfs_globals.c
+++ b/fs/xfs/linux-2.6/xfs_globals.c
@@ -16,7 +16,6 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_cred.h"
#include "xfs_sysctl.h"
/*
diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h
deleted file mode 100644
index 69f71caf061..00000000000
--- a/fs/xfs/linux-2.6/xfs_globals.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/*
- * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_GLOBALS_H__
-#define __XFS_GLOBALS_H__
-
-extern uint64_t xfs_panic_mask; /* set to cause more panics */
-
-#endif /* __XFS_GLOBALS_H__ */
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index e59a8106283..2ea238f6d38 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -23,24 +23,15 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_ioctl.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_rtalloc.h"
#include "xfs_itable.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_bmap.h"
#include "xfs_buf_item.h"
@@ -794,10 +785,12 @@ xfs_ioc_fsgetxattr(
{
struct fsxattr fa;
+ memset(&fa, 0, sizeof(struct fsxattr));
+
xfs_ilock(ip, XFS_ILOCK_SHARED);
fa.fsx_xflags = xfs_ip2xflags(ip);
fa.fsx_extsize = ip->i_d.di_extsize << ip->i_mount->m_sb.sb_blocklog;
- fa.fsx_projid = ip->i_d.di_projid;
+ fa.fsx_projid = xfs_get_projid(ip);
if (attr) {
if (ip->i_afp) {
@@ -908,7 +901,7 @@ xfs_ioctl_setattr(
struct xfs_dquot *olddquot = NULL;
int code;
- xfs_itrace_entry(ip);
+ trace_xfs_ioctl_setattr(ip);
if (mp->m_flags & XFS_MOUNT_RDONLY)
return XFS_ERROR(EROFS);
@@ -916,6 +909,13 @@ xfs_ioctl_setattr(
return XFS_ERROR(EIO);
/*
+ * Disallow 32bit project ids when projid32bit feature is not enabled.
+ */
+ if ((mask & FSX_PROJID) && (fa->fsx_projid > (__uint16_t)-1) &&
+ !xfs_sb_version_hasprojid32bit(&ip->i_mount->m_sb))
+ return XFS_ERROR(EINVAL);
+
+ /*
* If disk quotas is on, we make sure that the dquots do exist on disk,
* before we start any other transactions. Trying to do this later
* is messy. We don't care to take a readlock to look at the ids
@@ -961,7 +961,7 @@ xfs_ioctl_setattr(
if (mask & FSX_PROJID) {
if (XFS_IS_QUOTA_RUNNING(mp) &&
XFS_IS_PQUOTA_ON(mp) &&
- ip->i_d.di_projid != fa->fsx_projid) {
+ xfs_get_projid(ip) != fa->fsx_projid) {
ASSERT(tp);
code = xfs_qm_vop_chown_reserve(tp, ip, udqp, gdqp,
capable(CAP_FOWNER) ?
@@ -1043,8 +1043,7 @@ xfs_ioctl_setattr(
}
}
- xfs_trans_ijoin(tp, ip, lock_flags);
- xfs_trans_ihold(tp, ip);
+ xfs_trans_ijoin(tp, ip);
/*
* Change file ownership. Must be the owner or privileged.
@@ -1064,12 +1063,12 @@ xfs_ioctl_setattr(
* Change the ownerships and register quota modifications
* in the transaction.
*/
- if (ip->i_d.di_projid != fa->fsx_projid) {
+ if (xfs_get_projid(ip) != fa->fsx_projid) {
if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_PQUOTA_ON(mp)) {
olddquot = xfs_qm_vop_chown(tp, ip,
&ip->i_gdquot, gdqp);
}
- ip->i_d.di_projid = fa->fsx_projid;
+ xfs_set_projid(ip, fa->fsx_projid);
/*
* We may have to rev the inode as well as
@@ -1089,8 +1088,8 @@ xfs_ioctl_setattr(
xfs_diflags_to_linux(ip);
}
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
XFS_STATS_INC(xs_ig_attrchg);
@@ -1116,16 +1115,7 @@ xfs_ioctl_setattr(
xfs_qm_dqrele(udqp);
xfs_qm_dqrele(gdqp);
- if (code)
- return code;
-
- if (DM_EVENT_ENABLED(ip, DM_EVENT_ATTRIBUTE)) {
- XFS_SEND_NAMESP(mp, DM_EVENT_ATTRIBUTE, ip, DM_RIGHT_NULL,
- NULL, DM_RIGHT_NULL, NULL, NULL, 0, 0,
- (mask & FSX_NONBLOCK) ? DM_FLAGS_NDELAY : 0);
- }
-
- return 0;
+ return code;
error_return:
xfs_qm_dqrele(udqp);
@@ -1301,7 +1291,7 @@ xfs_file_ioctl(
if (filp->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- xfs_itrace_entry(ip);
+ trace_xfs_file_ioctl(ip);
switch (cmd) {
case XFS_IOC_ALLOCSP:
@@ -1311,7 +1301,8 @@ xfs_file_ioctl(
case XFS_IOC_ALLOCSP64:
case XFS_IOC_FREESP64:
case XFS_IOC_RESVSP64:
- case XFS_IOC_UNRESVSP64: {
+ case XFS_IOC_UNRESVSP64:
+ case XFS_IOC_ZERO_RANGE: {
xfs_flock64_t bf;
if (copy_from_user(&bf, arg, sizeof(bf)))
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.c b/fs/xfs/linux-2.6/xfs_ioctl32.c
index 52ed49e6465..b3486dfa552 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.c
@@ -28,12 +28,8 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_attr_sf.h"
-#include "xfs_dir2_sf.h"
#include "xfs_vnode.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
@@ -168,7 +164,8 @@ xfs_ioctl32_bstat_copyin(
get_user(bstat->bs_extsize, &bstat32->bs_extsize) ||
get_user(bstat->bs_extents, &bstat32->bs_extents) ||
get_user(bstat->bs_gen, &bstat32->bs_gen) ||
- get_user(bstat->bs_projid, &bstat32->bs_projid) ||
+ get_user(bstat->bs_projid_lo, &bstat32->bs_projid_lo) ||
+ get_user(bstat->bs_projid_hi, &bstat32->bs_projid_hi) ||
get_user(bstat->bs_dmevmask, &bstat32->bs_dmevmask) ||
get_user(bstat->bs_dmstate, &bstat32->bs_dmstate) ||
get_user(bstat->bs_aextents, &bstat32->bs_aextents))
@@ -222,6 +219,7 @@ xfs_bulkstat_one_fmt_compat(
put_user(buffer->bs_extents, &p32->bs_extents) ||
put_user(buffer->bs_gen, &p32->bs_gen) ||
put_user(buffer->bs_projid, &p32->bs_projid) ||
+ put_user(buffer->bs_projid_hi, &p32->bs_projid_hi) ||
put_user(buffer->bs_dmevmask, &p32->bs_dmevmask) ||
put_user(buffer->bs_dmstate, &p32->bs_dmstate) ||
put_user(buffer->bs_aextents, &p32->bs_aextents))
@@ -544,7 +542,7 @@ xfs_file_compat_ioctl(
if (filp->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- xfs_itrace_entry(ip);
+ trace_xfs_file_compat_ioctl(ip);
switch (cmd) {
/* No size or alignment issues on any arch */
@@ -578,6 +576,7 @@ xfs_file_compat_ioctl(
case XFS_IOC_FSGEOMETRY_V1:
case XFS_IOC_FSGROWFSDATA:
case XFS_IOC_FSGROWFSRT:
+ case XFS_IOC_ZERO_RANGE:
return xfs_file_ioctl(filp, cmd, p);
#else
case XFS_IOC_ALLOCSP_32:
diff --git a/fs/xfs/linux-2.6/xfs_ioctl32.h b/fs/xfs/linux-2.6/xfs_ioctl32.h
index 1024c4f8ba0..08b605792a9 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl32.h
+++ b/fs/xfs/linux-2.6/xfs_ioctl32.h
@@ -65,8 +65,10 @@ typedef struct compat_xfs_bstat {
__s32 bs_extsize; /* extent size */
__s32 bs_extents; /* number of extents */
__u32 bs_gen; /* generation count */
- __u16 bs_projid; /* project id */
- unsigned char bs_pad[14]; /* pad space, unused */
+ __u16 bs_projid_lo; /* lower part of project id */
+#define bs_projid bs_projid_lo /* (previously just bs_projid) */
+ __u16 bs_projid_hi; /* high part of project id */
+ unsigned char bs_pad[12]; /* pad space, unused */
__u32 bs_dmevmask; /* DMIG event mask */
__u16 bs_dmstate; /* DMIG state info */
__u16 bs_aextents; /* attribute number of extents */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 44f0b2de153..96107efc0c6 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -24,21 +24,13 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_bmap.h"
-#include "xfs_btree.h"
-#include "xfs_ialloc.h"
#include "xfs_rtalloc.h"
#include "xfs_error.h"
#include "xfs_itable.h"
@@ -88,7 +80,7 @@ xfs_mark_inode_dirty_sync(
{
struct inode *inode = VFS_I(ip);
- if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+ if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
mark_inode_dirty_sync(inode);
}
@@ -98,46 +90,11 @@ xfs_mark_inode_dirty(
{
struct inode *inode = VFS_I(ip);
- if (!(inode->i_state & (I_WILL_FREE|I_FREEING|I_CLEAR)))
+ if (!(inode->i_state & (I_WILL_FREE|I_FREEING)))
mark_inode_dirty(inode);
}
/*
- * Change the requested timestamp in the given inode.
- * We don't lock across timestamp updates, and we don't log them but
- * we do record the fact that there is dirty information in core.
- */
-void
-xfs_ichgtime(
- xfs_inode_t *ip,
- int flags)
-{
- struct inode *inode = VFS_I(ip);
- timespec_t tv;
- int sync_it = 0;
-
- tv = current_fs_time(inode->i_sb);
-
- if ((flags & XFS_ICHGTIME_MOD) &&
- !timespec_equal(&inode->i_mtime, &tv)) {
- inode->i_mtime = tv;
- sync_it = 1;
- }
- if ((flags & XFS_ICHGTIME_CHG) &&
- !timespec_equal(&inode->i_ctime, &tv)) {
- inode->i_ctime = tv;
- sync_it = 1;
- }
-
- /*
- * Update complete - now make sure everyone knows that the inode
- * is dirty.
- */
- if (sync_it)
- xfs_mark_inode_dirty_sync(ip);
-}
-
-/*
* Hook in SELinux. This is not quite correct yet, what we really need
* here (as we do for default ACLs) is a mechanism by which creation of
* these attrs can be journalled at inode creation time (along with the
@@ -232,7 +189,7 @@ xfs_vn_mknod(
}
xfs_dentry_to_name(&name, dentry);
- error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip, NULL);
+ error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
if (unlikely(error))
goto out_free_acl;
@@ -360,7 +317,7 @@ xfs_vn_link(
if (unlikely(error))
return -error;
- atomic_inc(&inode->i_count);
+ ihold(inode);
d_instantiate(dentry, inode);
return 0;
}
@@ -405,7 +362,7 @@ xfs_vn_symlink(
(irix_symlink_mode ? 0777 & ~current_umask() : S_IRWXUGO);
xfs_dentry_to_name(&name, dentry);
- error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip, NULL);
+ error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
if (unlikely(error))
goto out;
@@ -496,7 +453,7 @@ xfs_vn_getattr(
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- xfs_itrace_entry(ip);
+ trace_xfs_getattr(ip);
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
@@ -548,21 +505,6 @@ xfs_vn_setattr(
return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
}
-/*
- * block_truncate_page can return an error, but we can't propagate it
- * at all here. Leave a complaint + stack trace in the syslog because
- * this could be bad. If it is bad, we need to propagate the error further.
- */
-STATIC void
-xfs_vn_truncate(
- struct inode *inode)
-{
- int error;
- error = block_truncate_page(inode->i_mapping, inode->i_size,
- xfs_get_blocks);
- WARN_ON(error);
-}
-
STATIC long
xfs_vn_fallocate(
struct inode *inode,
@@ -687,7 +629,7 @@ xfs_vn_fiemap(
fieinfo->fi_extents_max + 1;
bm.bmv_count = min_t(__s32, bm.bmv_count,
(PAGE_SIZE * 16 / sizeof(struct getbmapx)));
- bm.bmv_iflags = BMV_IF_PREALLOC;
+ bm.bmv_iflags = BMV_IF_PREALLOC | BMV_IF_NO_HOLES;
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR)
bm.bmv_iflags |= BMV_IF_ATTRFORK;
if (!(fieinfo->fi_flags & FIEMAP_FLAG_SYNC))
@@ -702,7 +644,6 @@ xfs_vn_fiemap(
static const struct inode_operations xfs_inode_operations = {
.check_acl = xfs_check_acl,
- .truncate = xfs_vn_truncate,
.getattr = xfs_vn_getattr,
.setattr = xfs_vn_setattr,
.setxattr = generic_setxattr,
@@ -819,7 +760,9 @@ xfs_setup_inode(
inode->i_ino = ip->i_ino;
inode->i_state = I_NEW;
- inode_add_to_lists(ip->i_mount->m_super, inode);
+
+ inode_sb_list_add(inode);
+ insert_inode_hash(inode);
inode->i_mode = ip->i_d.di_mode;
inode->i_nlink = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_linux.h b/fs/xfs/linux-2.6/xfs_linux.h
index facfb323a70..214ddd71ff7 100644
--- a/fs/xfs/linux-2.6/xfs_linux.h
+++ b/fs/xfs/linux-2.6/xfs_linux.h
@@ -71,6 +71,7 @@
#include <linux/random.h>
#include <linux/ctype.h>
#include <linux/writeback.h>
+#include <linux/capability.h>
#include <asm/page.h>
#include <asm/div64.h>
@@ -79,15 +80,12 @@
#include <asm/byteorder.h>
#include <asm/unaligned.h>
-#include <xfs_cred.h>
#include <xfs_vnode.h>
#include <xfs_stats.h>
#include <xfs_sysctl.h>
#include <xfs_iops.h>
#include <xfs_aops.h>
#include <xfs_super.h>
-#include <xfs_globals.h>
-#include <xfs_fs_subr.h>
#include <xfs_buf.h>
/*
@@ -145,7 +143,7 @@
#define SYNCHRONIZE() barrier()
#define __return_address __builtin_return_address(0)
-#define dfltprid 0
+#define XFS_PROJID_DEFAULT 0
#define MAXPATHLEN 1024
#define MIN(a,b) (min(a,b))
@@ -157,8 +155,6 @@
*/
#define xfs_sort(a,n,s,fn) sort(a,n,s,fn,NULL)
#define xfs_stack_trace() dump_stack()
-#define xfs_itruncate_data(ip, off) \
- (-vmtruncate(VFS_I(ip), (off)))
/* Move the kernel do_div definition off to one side */
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 067cafbfc63..29b9d642e93 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -16,7 +16,6 @@
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "xfs.h"
-#include "xfs_dmapi.h"
#include "xfs_sb.h"
#include "xfs_inum.h"
#include "xfs_log.h"
@@ -69,15 +68,15 @@ xfs_fs_set_xstate(
if (op != Q_XQUOTARM && !XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
- if (uflags & XFS_QUOTA_UDQ_ACCT)
+ if (uflags & FS_QUOTA_UDQ_ACCT)
flags |= XFS_UQUOTA_ACCT;
- if (uflags & XFS_QUOTA_PDQ_ACCT)
+ if (uflags & FS_QUOTA_PDQ_ACCT)
flags |= XFS_PQUOTA_ACCT;
- if (uflags & XFS_QUOTA_GDQ_ACCT)
+ if (uflags & FS_QUOTA_GDQ_ACCT)
flags |= XFS_GQUOTA_ACCT;
- if (uflags & XFS_QUOTA_UDQ_ENFD)
+ if (uflags & FS_QUOTA_UDQ_ENFD)
flags |= XFS_UQUOTA_ENFD;
- if (uflags & (XFS_QUOTA_PDQ_ENFD|XFS_QUOTA_GDQ_ENFD))
+ if (uflags & (FS_QUOTA_PDQ_ENFD|FS_QUOTA_GDQ_ENFD))
flags |= XFS_OQUOTA_ENFD;
switch (op) {
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 80938c736c2..9f3a78fe6ae 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -25,14 +25,11 @@
#include "xfs_ag.h"
#include "xfs_dir2.h"
#include "xfs_alloc.h"
-#include "xfs_dmapi.h"
#include "xfs_quota.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
@@ -43,12 +40,10 @@
#include "xfs_error.h"
#include "xfs_itable.h"
#include "xfs_fsops.h"
-#include "xfs_rw.h"
#include "xfs_attr.h"
#include "xfs_buf_item.h"
#include "xfs_utils.h"
#include "xfs_vnodeops.h"
-#include "xfs_version.h"
#include "xfs_log_priv.h"
#include "xfs_trans_priv.h"
#include "xfs_filestream.h"
@@ -94,7 +89,6 @@ mempool_t *xfs_ioend_pool;
#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
* unwritten extent conversion */
#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
-#define MNTOPT_OSYNCISOSYNC "osyncisosync" /* o_sync is REALLY o_sync */
#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
@@ -116,9 +110,6 @@ mempool_t *xfs_ioend_pool;
#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
-#define MNTOPT_DMAPI "dmapi" /* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_XDSM "xdsm" /* DMI enabled (DMAPI / XDSM) */
-#define MNTOPT_DMI "dmi" /* DMI enabled (DMAPI / XDSM) */
#define MNTOPT_DELAYLOG "delaylog" /* Delayed loging enabled */
#define MNTOPT_NODELAYLOG "nodelaylog" /* Delayed loging disabled */
@@ -172,15 +163,13 @@ suffix_strtoul(char *s, char **endp, unsigned int base)
STATIC int
xfs_parseargs(
struct xfs_mount *mp,
- char *options,
- char **mtpt)
+ char *options)
{
struct super_block *sb = mp->m_super;
char *this_char, *value, *eov;
int dsunit = 0;
int dswidth = 0;
int iosize = 0;
- int dmapi_implies_ikeep = 1;
__uint8_t iosizelog = 0;
/*
@@ -243,15 +232,10 @@ xfs_parseargs(
if (!mp->m_logname)
return ENOMEM;
} else if (!strcmp(this_char, MNTOPT_MTPT)) {
- if (!value || !*value) {
- cmn_err(CE_WARN,
- "XFS: %s option requires an argument",
- this_char);
- return EINVAL;
- }
- *mtpt = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
- if (!*mtpt)
- return ENOMEM;
+ cmn_err(CE_WARN,
+ "XFS: %s option not allowed on this system",
+ this_char);
+ return EINVAL;
} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
if (!value || !*value) {
cmn_err(CE_WARN,
@@ -288,8 +272,6 @@ xfs_parseargs(
mp->m_flags &= ~XFS_MOUNT_GRPID;
} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
mp->m_flags |= XFS_MOUNT_WSYNC;
- } else if (!strcmp(this_char, MNTOPT_OSYNCISOSYNC)) {
- mp->m_flags |= XFS_MOUNT_OSYNCISOSYNC;
} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
mp->m_flags |= XFS_MOUNT_NORECOVERY;
} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
@@ -329,7 +311,6 @@ xfs_parseargs(
} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
mp->m_flags |= XFS_MOUNT_IKEEP;
} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
- dmapi_implies_ikeep = 0;
mp->m_flags &= ~XFS_MOUNT_IKEEP;
} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
@@ -370,12 +351,6 @@ xfs_parseargs(
} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
mp->m_qflags &= ~XFS_OQUOTA_ENFD;
- } else if (!strcmp(this_char, MNTOPT_DMAPI)) {
- mp->m_flags |= XFS_MOUNT_DMAPI;
- } else if (!strcmp(this_char, MNTOPT_XDSM)) {
- mp->m_flags |= XFS_MOUNT_DMAPI;
- } else if (!strcmp(this_char, MNTOPT_DMI)) {
- mp->m_flags |= XFS_MOUNT_DMAPI;
} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
mp->m_flags |= XFS_MOUNT_DELAYLOG;
cmn_err(CE_WARN,
@@ -387,9 +362,11 @@ xfs_parseargs(
cmn_err(CE_WARN,
"XFS: ihashsize no longer used, option is deprecated.");
} else if (!strcmp(this_char, "osyncisdsync")) {
- /* no-op, this is now the default */
cmn_err(CE_WARN,
- "XFS: osyncisdsync is now the default, option is deprecated.");
+ "XFS: osyncisdsync has no effect, option is deprecated.");
+ } else if (!strcmp(this_char, "osyncisosync")) {
+ cmn_err(CE_WARN,
+ "XFS: osyncisosync has no effect, option is deprecated.");
} else if (!strcmp(this_char, "irixsgid")) {
cmn_err(CE_WARN,
"XFS: irixsgid is now a sysctl(2) variable, option is deprecated.");
@@ -430,12 +407,6 @@ xfs_parseargs(
return EINVAL;
}
- if ((mp->m_flags & XFS_MOUNT_DMAPI) && (!*mtpt || *mtpt[0] == '\0')) {
- printk("XFS: %s option needs the mount point option as well\n",
- MNTOPT_DMAPI);
- return EINVAL;
- }
-
if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
cmn_err(CE_WARN,
"XFS: sunit and swidth must be specified together");
@@ -449,18 +420,6 @@ xfs_parseargs(
return EINVAL;
}
- /*
- * Applications using DMI filesystems often expect the
- * inode generation number to be monotonically increasing.
- * If we delete inode chunks we break this assumption, so
- * keep unused inode chunks on disk for DMI filesystems
- * until we come up with a better solution.
- * Note that if "ikeep" or "noikeep" mount options are
- * supplied, then they are honored.
- */
- if ((mp->m_flags & XFS_MOUNT_DMAPI) && dmapi_implies_ikeep)
- mp->m_flags |= XFS_MOUNT_IKEEP;
-
done:
if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
/*
@@ -539,10 +498,8 @@ xfs_showargs(
{ XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC },
{ XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
{ XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
- { XFS_MOUNT_OSYNCISOSYNC, "," MNTOPT_OSYNCISOSYNC },
{ XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
{ XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
- { XFS_MOUNT_DMAPI, "," MNTOPT_DMAPI },
{ XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
{ XFS_MOUNT_DELAYLOG, "," MNTOPT_DELAYLOG },
{ 0, NULL }
@@ -619,7 +576,7 @@ xfs_max_file_offset(
/* Figure out maximum filesize, on Linux this can depend on
* the filesystem blocksize (on 32 bit platforms).
- * __block_prepare_write does this in an [unsigned] long...
+ * __block_write_begin does this in an [unsigned] long...
* page->index << (PAGE_CACHE_SHIFT - bbits)
* So, for page sized blocks (4K on 32 bit platforms),
* this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
@@ -687,7 +644,7 @@ xfs_barrier_test(
XFS_BUF_ORDERED(sbp);
xfsbdstrat(mp, sbp);
- error = xfs_iowait(sbp);
+ error = xfs_buf_iowait(sbp);
/*
* Clear all the flags we set and possible error state in the
@@ -735,8 +692,7 @@ void
xfs_blkdev_issue_flush(
xfs_buftarg_t *buftarg)
{
- blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL,
- BLKDEV_IFL_WAIT);
+ blkdev_issue_flush(buftarg->bt_bdev, GFP_KERNEL, NULL);
}
STATIC void
@@ -800,18 +756,20 @@ xfs_open_devices(
* Setup xfs_mount buffer target pointers
*/
error = ENOMEM;
- mp->m_ddev_targp = xfs_alloc_buftarg(ddev, 0, mp->m_fsname);
+ mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
if (!mp->m_ddev_targp)
goto out_close_rtdev;
if (rtdev) {
- mp->m_rtdev_targp = xfs_alloc_buftarg(rtdev, 1, mp->m_fsname);
+ mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
+ mp->m_fsname);
if (!mp->m_rtdev_targp)
goto out_free_ddev_targ;
}
if (logdev && logdev != ddev) {
- mp->m_logdev_targp = xfs_alloc_buftarg(logdev, 1, mp->m_fsname);
+ mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
+ mp->m_fsname);
if (!mp->m_logdev_targp)
goto out_free_rtdev_targ;
} else {
@@ -947,7 +905,7 @@ xfs_fs_destroy_inode(
{
struct xfs_inode *ip = XFS_I(inode);
- xfs_itrace_entry(ip);
+ trace_xfs_destroy_inode(ip);
XFS_STATS_INC(vn_reclaim);
@@ -1014,12 +972,7 @@ xfs_fs_inode_init_once(
/*
* Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode. Care must be taken
- * here - the transaction code calls mark_inode_dirty_sync() to mark the
- * VFS inode dirty in a transaction and clears the i_update_core field;
- * it must clear the field after calling mark_inode_dirty_sync() to
- * correctly indicate that the dirty state has been propagated into the
- * inode log item.
+ * we catch unlogged VFS level updates to the inode.
*
* We need the barrier() to maintain correct ordering between unlogged
* updates and the transaction commit code that clears the i_update_core
@@ -1063,10 +1016,8 @@ xfs_log_inode(
* an inode in another recent transaction. So we play it safe and
* fire off the transaction anyway.
*/
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, ip);
+ xfs_trans_ijoin(tp, ip);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp, 0);
xfs_ilock_demote(ip, XFS_ILOCK_EXCL);
@@ -1082,27 +1033,18 @@ xfs_fs_write_inode(
struct xfs_mount *mp = ip->i_mount;
int error = EAGAIN;
- xfs_itrace_entry(ip);
+ trace_xfs_write_inode(ip);
if (XFS_FORCED_SHUTDOWN(mp))
return XFS_ERROR(EIO);
if (wbc->sync_mode == WB_SYNC_ALL) {
/*
- * Make sure the inode has hit stable storage. By using the
- * log and the fsync transactions we reduce the IOs we have
- * to do here from two (log and inode) to just the log.
- *
- * Note: We still need to do a delwri write of the inode after
- * this to flush it to the backing buffer so that bulkstat
- * works properly if this is the first time the inode has been
- * written. Because we hold the ilock atomically over the
- * transaction commit and the inode flush we are guaranteed
- * that the inode is not pinned when it returns. If the flush
- * lock is already held, then the inode has already been
- * flushed once and we don't need to flush it again. Hence
- * the code will only flush the inode if it isn't already
- * being flushed.
+ * Make sure the inode has made it it into the log. Instead
+ * of forcing it all the way to stable storage using a
+ * synchronous transaction we let the log force inside the
+ * ->sync_fs call do that for thus, which reduces the number
+ * of synchronous log foces dramatically.
*/
xfs_ioend_wait(ip);
xfs_ilock(ip, XFS_ILOCK_SHARED);
@@ -1116,27 +1058,29 @@ xfs_fs_write_inode(
* We make this non-blocking if the inode is contended, return
* EAGAIN to indicate to the caller that they did not succeed.
* This prevents the flush path from blocking on inodes inside
- * another operation right now, they get caught later by xfs_sync.
+ * another operation right now, they get caught later by
+ * xfs_sync.
*/
if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED))
goto out;
- }
- if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
- goto out_unlock;
+ if (xfs_ipincount(ip) || !xfs_iflock_nowait(ip))
+ goto out_unlock;
- /*
- * Now we have the flush lock and the inode is not pinned, we can check
- * if the inode is really clean as we know that there are no pending
- * transaction completions, it is not waiting on the delayed write
- * queue and there is no IO in progress.
- */
- if (xfs_inode_clean(ip)) {
- xfs_ifunlock(ip);
- error = 0;
- goto out_unlock;
+ /*
+ * Now we have the flush lock and the inode is not pinned, we
+ * can check if the inode is really clean as we know that
+ * there are no pending transaction completions, it is not
+ * waiting on the delayed write queue and there is no IO in
+ * progress.
+ */
+ if (xfs_inode_clean(ip)) {
+ xfs_ifunlock(ip);
+ error = 0;
+ goto out_unlock;
+ }
+ error = xfs_iflush(ip, 0);
}
- error = xfs_iflush(ip, 0);
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
@@ -1151,12 +1095,15 @@ xfs_fs_write_inode(
}
STATIC void
-xfs_fs_clear_inode(
+xfs_fs_evict_inode(
struct inode *inode)
{
xfs_inode_t *ip = XFS_I(inode);
- xfs_itrace_entry(ip);
+ trace_xfs_evict_inode(ip);
+
+ truncate_inode_pages(&inode->i_data, 0);
+ end_writeback(inode);
XFS_STATS_INC(vn_rele);
XFS_STATS_INC(vn_remove);
XFS_STATS_DEC(vn_active);
@@ -1193,22 +1140,13 @@ xfs_fs_put_super(
{
struct xfs_mount *mp = XFS_M(sb);
+ /*
+ * Unregister the memory shrinker before we tear down the mount
+ * structure so we don't have memory reclaim racing with us here.
+ */
+ xfs_inode_shrinker_unregister(mp);
xfs_syncd_stop(mp);
- if (!(sb->s_flags & MS_RDONLY)) {
- /*
- * XXX(hch): this should be SYNC_WAIT.
- *
- * Or more likely not needed at all because the VFS is already
- * calling ->sync_fs after shutting down all filestem
- * operations and just before calling ->put_super.
- */
- xfs_sync_data(mp, 0);
- xfs_sync_attr(mp, 0);
- }
-
- XFS_SEND_PREUNMOUNT(mp);
-
/*
* Blow away any referenced inode in the filestreams cache.
* This can and will cause log traffic as inodes go inactive
@@ -1218,14 +1156,10 @@ xfs_fs_put_super(
XFS_bflush(mp->m_ddev_targp);
- XFS_SEND_UNMOUNT(mp);
-
xfs_unmountfs(mp);
xfs_freesb(mp);
- xfs_inode_shrinker_unregister(mp);
xfs_icsb_destroy_counters(mp);
xfs_close_devices(mp);
- xfs_dmops_put(mp);
xfs_free_fsname(mp);
kfree(mp);
}
@@ -1287,6 +1221,7 @@ xfs_fs_statfs(
struct xfs_inode *ip = XFS_I(dentry->d_inode);
__uint64_t fakeinos, id;
xfs_extlen_t lsize;
+ __int64_t ffree;
statp->f_type = XFS_SB_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
@@ -1310,7 +1245,11 @@ xfs_fs_statfs(
statp->f_files = min_t(typeof(statp->f_files),
statp->f_files,
mp->m_maxicount);
- statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+
+ /* make sure statp->f_ffree does not underflow */
+ ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+ statp->f_ffree = max_t(__int64_t, ffree, 0);
+
spin_unlock(&mp->m_sb_lock);
if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
@@ -1463,7 +1402,7 @@ xfs_fs_freeze(
xfs_save_resvblks(mp);
xfs_quiesce_attr(mp);
- return -xfs_fs_log_dummy(mp);
+ return -xfs_fs_log_dummy(mp, SYNC_WAIT);
}
STATIC int
@@ -1543,7 +1482,6 @@ xfs_fs_fill_super(
struct inode *root;
struct xfs_mount *mp = NULL;
int flags = 0, error = ENOMEM;
- char *mtpt = NULL;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
@@ -1559,7 +1497,7 @@ xfs_fs_fill_super(
mp->m_super = sb;
sb->s_fs_info = mp;
- error = xfs_parseargs(mp, (char *)data, &mtpt);
+ error = xfs_parseargs(mp, (char *)data);
if (error)
goto out_free_fsname;
@@ -1571,19 +1509,16 @@ xfs_fs_fill_super(
#endif
sb->s_op = &xfs_super_operations;
- error = xfs_dmops_get(mp);
- if (error)
- goto out_free_fsname;
-
if (silent)
flags |= XFS_MFSI_QUIET;
error = xfs_open_devices(mp);
if (error)
- goto out_put_dmops;
+ goto out_free_fsname;
- if (xfs_icsb_init_counters(mp))
- mp->m_flags |= XFS_MOUNT_NO_PERCPU_SB;
+ error = xfs_icsb_init_counters(mp);
+ if (error)
+ goto out_close_devices;
error = xfs_readsb(mp, flags);
if (error)
@@ -1608,8 +1543,6 @@ xfs_fs_fill_super(
if (error)
goto out_filestream_unmount;
- XFS_SEND_MOUNT(mp, DM_RIGHT_NULL, mtpt, mp->m_fsname);
-
sb->s_magic = XFS_SB_MAGIC;
sb->s_blocksize = mp->m_sb.sb_blocksize;
sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
@@ -1638,7 +1571,6 @@ xfs_fs_fill_super(
xfs_inode_shrinker_register(mp);
- kfree(mtpt);
return 0;
out_filestream_unmount:
@@ -1647,12 +1579,10 @@ xfs_fs_fill_super(
xfs_freesb(mp);
out_destroy_counters:
xfs_icsb_destroy_counters(mp);
+ out_close_devices:
xfs_close_devices(mp);
- out_put_dmops:
- xfs_dmops_put(mp);
out_free_fsname:
xfs_free_fsname(mp);
- kfree(mtpt);
kfree(mp);
out:
return -error;
@@ -1679,16 +1609,14 @@ xfs_fs_fill_super(
goto out_free_sb;
}
-STATIC int
-xfs_fs_get_sb(
+STATIC struct dentry *
+xfs_fs_mount(
struct file_system_type *fs_type,
int flags,
const char *dev_name,
- void *data,
- struct vfsmount *mnt)
+ void *data)
{
- return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
- mnt);
+ return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
}
static const struct super_operations xfs_super_operations = {
@@ -1696,7 +1624,7 @@ static const struct super_operations xfs_super_operations = {
.destroy_inode = xfs_fs_destroy_inode,
.dirty_inode = xfs_fs_dirty_inode,
.write_inode = xfs_fs_write_inode,
- .clear_inode = xfs_fs_clear_inode,
+ .evict_inode = xfs_fs_evict_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
.freeze_fs = xfs_fs_freeze,
@@ -1709,7 +1637,7 @@ static const struct super_operations xfs_super_operations = {
static struct file_system_type xfs_fs_type = {
.owner = THIS_MODULE,
.name = "xfs",
- .get_sb = xfs_fs_get_sb,
+ .mount = xfs_fs_mount,
.kill_sb = kill_block_super,
.fs_flags = FS_REQUIRES_DEV,
};
@@ -1759,6 +1687,12 @@ xfs_init_zones(void)
if (!xfs_trans_zone)
goto out_destroy_ifork_zone;
+ xfs_log_item_desc_zone =
+ kmem_zone_init(sizeof(struct xfs_log_item_desc),
+ "xfs_log_item_desc");
+ if (!xfs_log_item_desc_zone)
+ goto out_destroy_trans_zone;
+
/*
* The size of the zone allocated buf log item is the maximum
* size possible under XFS. This wastes a little bit of memory,
@@ -1768,7 +1702,7 @@ xfs_init_zones(void)
(((XFS_MAX_BLOCKSIZE / XFS_BLF_CHUNK) /
NBWORD) * sizeof(int))), "xfs_buf_item");
if (!xfs_buf_item_zone)
- goto out_destroy_trans_zone;
+ goto out_destroy_log_item_desc_zone;
xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
((XFS_EFD_MAX_FAST_EXTENTS - 1) *
@@ -1805,6 +1739,8 @@ xfs_init_zones(void)
kmem_zone_destroy(xfs_efd_zone);
out_destroy_buf_item_zone:
kmem_zone_destroy(xfs_buf_item_zone);
+ out_destroy_log_item_desc_zone:
+ kmem_zone_destroy(xfs_log_item_desc_zone);
out_destroy_trans_zone:
kmem_zone_destroy(xfs_trans_zone);
out_destroy_ifork_zone:
@@ -1835,6 +1771,7 @@ xfs_destroy_zones(void)
kmem_zone_destroy(xfs_efi_zone);
kmem_zone_destroy(xfs_efd_zone);
kmem_zone_destroy(xfs_buf_item_zone);
+ kmem_zone_destroy(xfs_log_item_desc_zone);
kmem_zone_destroy(xfs_trans_zone);
kmem_zone_destroy(xfs_ifork_zone);
kmem_zone_destroy(xfs_dabuf_zone);
diff --git a/fs/xfs/linux-2.6/xfs_super.h b/fs/xfs/linux-2.6/xfs_super.h
index 519618e9279..50a3266c999 100644
--- a/fs/xfs/linux-2.6/xfs_super.h
+++ b/fs/xfs/linux-2.6/xfs_super.h
@@ -56,23 +56,17 @@ extern void xfs_qm_exit(void);
# define XFS_BIGFS_STRING
#endif
-#ifdef CONFIG_XFS_DMAPI
-# define XFS_DMAPI_STRING "dmapi support, "
-#else
-# define XFS_DMAPI_STRING
-#endif
-
#ifdef DEBUG
# define XFS_DBG_STRING "debug"
#else
# define XFS_DBG_STRING "no debug"
#endif
+#define XFS_VERSION_STRING "SGI XFS"
#define XFS_BUILD_OPTIONS XFS_ACL_STRING \
XFS_SECURITY_STRING \
XFS_REALTIME_STRING \
XFS_BIGFS_STRING \
- XFS_DMAPI_STRING \
XFS_DBG_STRING /* DBG must be last */
struct xfs_inode;
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index a51a07c3a70..37d33254981 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -24,67 +24,54 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_inode.h"
#include "xfs_dinode.h"
#include "xfs_error.h"
-#include "xfs_mru_cache.h"
#include "xfs_filestream.h"
#include "xfs_vnodeops.h"
-#include "xfs_utils.h"
-#include "xfs_buf_item.h"
#include "xfs_inode_item.h"
-#include "xfs_rw.h"
#include "xfs_quota.h"
#include "xfs_trace.h"
+#include "xfs_fsops.h"
#include <linux/kthread.h>
#include <linux/freezer.h>
+/*
+ * The inode lookup is done in batches to keep the amount of lock traffic and
+ * radix tree lookups to a minimum. The batch size is a trade off between
+ * lookup reduction and stack usage. This is in the reclaim path, so we can't
+ * be too greedy.
+ */
+#define XFS_LOOKUP_BATCH 32
-STATIC xfs_inode_t *
-xfs_inode_ag_lookup(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
- uint32_t *first_index,
- int tag)
+STATIC int
+xfs_inode_ag_walk_grab(
+ struct xfs_inode *ip)
{
- int nr_found;
- struct xfs_inode *ip;
+ struct inode *inode = VFS_I(ip);
- /*
- * use a gang lookup to find the next inode in the tree
- * as the tree is sparse and a gang lookup walks to find
- * the number of objects requested.
- */
- if (tag == XFS_ICI_NO_TAG) {
- nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
- (void **)&ip, *first_index, 1);
- } else {
- nr_found = radix_tree_gang_lookup_tag(&pag->pag_ici_root,
- (void **)&ip, *first_index, 1, tag);
+ /* nothing to sync during shutdown */
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return EFSCORRUPTED;
+
+ /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+ if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+ return ENOENT;
+
+ /* If we can't grab the inode, it must on it's way to reclaim. */
+ if (!igrab(inode))
+ return ENOENT;
+
+ if (is_bad_inode(inode)) {
+ IRELE(ip);
+ return ENOENT;
}
- if (!nr_found)
- return NULL;
- /*
- * Update the index for the next lookup. Catch overflows
- * into the next AG range which can occur if we have inodes
- * in the last block of the AG and we are currently
- * pointing to the last inode.
- */
- *first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
- if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- return NULL;
- return ip;
+ /* inode is valid */
+ return 0;
}
STATIC int
@@ -93,49 +80,75 @@ xfs_inode_ag_walk(
struct xfs_perag *pag,
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
- int flags,
- int tag,
- int exclusive,
- int *nr_to_scan)
+ int flags)
{
uint32_t first_index;
int last_error = 0;
int skipped;
+ int done;
+ int nr_found;
restart:
+ done = 0;
skipped = 0;
first_index = 0;
+ nr_found = 0;
do {
+ struct xfs_inode *batch[XFS_LOOKUP_BATCH];
int error = 0;
- xfs_inode_t *ip;
+ int i;
- if (exclusive)
- write_lock(&pag->pag_ici_lock);
- else
- read_lock(&pag->pag_ici_lock);
- ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
- if (!ip) {
- if (exclusive)
- write_unlock(&pag->pag_ici_lock);
- else
- read_unlock(&pag->pag_ici_lock);
+ read_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
+ (void **)batch, first_index,
+ XFS_LOOKUP_BATCH);
+ if (!nr_found) {
+ read_unlock(&pag->pag_ici_lock);
break;
}
- /* execute releases pag->pag_ici_lock */
- error = execute(ip, pag, flags);
- if (error == EAGAIN) {
- skipped++;
- continue;
+ /*
+ * Grab the inodes before we drop the lock. if we found
+ * nothing, nr == 0 and the loop will be skipped.
+ */
+ for (i = 0; i < nr_found; i++) {
+ struct xfs_inode *ip = batch[i];
+
+ if (done || xfs_inode_ag_walk_grab(ip))
+ batch[i] = NULL;
+
+ /*
+ * Update the index for the next lookup. Catch overflows
+ * into the next AG range which can occur if we have inodes
+ * in the last block of the AG and we are currently
+ * pointing to the last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ done = 1;
+ }
+
+ /* unlock now we've grabbed the inodes. */
+ read_unlock(&pag->pag_ici_lock);
+
+ for (i = 0; i < nr_found; i++) {
+ if (!batch[i])
+ continue;
+ error = execute(batch[i], pag, flags);
+ IRELE(batch[i]);
+ if (error == EAGAIN) {
+ skipped++;
+ continue;
+ }
+ if (error && last_error != EFSCORRUPTED)
+ last_error = error;
}
- if (error)
- last_error = error;
/* bail out if the filesystem is corrupted. */
if (error == EFSCORRUPTED)
break;
- } while ((*nr_to_scan)--);
+ } while (nr_found && !done);
if (skipped) {
delay(1);
@@ -144,110 +157,32 @@ restart:
return last_error;
}
-/*
- * Select the next per-ag structure to iterate during the walk. The reclaim
- * walk is optimised only to walk AGs with reclaimable inodes in them.
- */
-static struct xfs_perag *
-xfs_inode_ag_iter_next_pag(
- struct xfs_mount *mp,
- xfs_agnumber_t *first,
- int tag)
-{
- struct xfs_perag *pag = NULL;
-
- if (tag == XFS_ICI_RECLAIM_TAG) {
- int found;
- int ref;
-
- spin_lock(&mp->m_perag_lock);
- found = radix_tree_gang_lookup_tag(&mp->m_perag_tree,
- (void **)&pag, *first, 1, tag);
- if (found <= 0) {
- spin_unlock(&mp->m_perag_lock);
- return NULL;
- }
- *first = pag->pag_agno + 1;
- /* open coded pag reference increment */
- ref = atomic_inc_return(&pag->pag_ref);
- spin_unlock(&mp->m_perag_lock);
- trace_xfs_perag_get_reclaim(mp, pag->pag_agno, ref, _RET_IP_);
- } else {
- pag = xfs_perag_get(mp, *first);
- (*first)++;
- }
- return pag;
-}
-
int
xfs_inode_ag_iterator(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
- int flags,
- int tag,
- int exclusive,
- int *nr_to_scan)
+ int flags)
{
struct xfs_perag *pag;
int error = 0;
int last_error = 0;
xfs_agnumber_t ag;
- int nr;
- nr = nr_to_scan ? *nr_to_scan : INT_MAX;
ag = 0;
- while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag, tag))) {
- error = xfs_inode_ag_walk(mp, pag, execute, flags, tag,
- exclusive, &nr);
+ while ((pag = xfs_perag_get(mp, ag))) {
+ ag = pag->pag_agno + 1;
+ error = xfs_inode_ag_walk(mp, pag, execute, flags);
xfs_perag_put(pag);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
break;
}
- if (nr <= 0)
- break;
}
- if (nr_to_scan)
- *nr_to_scan = nr;
return XFS_ERROR(last_error);
}
-/* must be called with pag_ici_lock held and releases it */
-int
-xfs_sync_inode_valid(
- struct xfs_inode *ip,
- struct xfs_perag *pag)
-{
- struct inode *inode = VFS_I(ip);
- int error = EFSCORRUPTED;
-
- /* nothing to sync during shutdown */
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- goto out_unlock;
-
- /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
- error = ENOENT;
- if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
- goto out_unlock;
-
- /* If we can't grab the inode, it must on it's way to reclaim. */
- if (!igrab(inode))
- goto out_unlock;
-
- if (is_bad_inode(inode)) {
- IRELE(ip);
- goto out_unlock;
- }
-
- /* inode is valid */
- error = 0;
-out_unlock:
- read_unlock(&pag->pag_ici_lock);
- return error;
-}
-
STATIC int
xfs_sync_inode_data(
struct xfs_inode *ip,
@@ -258,10 +193,6 @@ xfs_sync_inode_data(
struct address_space *mapping = inode->i_mapping;
int error = 0;
- error = xfs_sync_inode_valid(ip, pag);
- if (error)
- return error;
-
if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
goto out_wait;
@@ -278,7 +209,6 @@ xfs_sync_inode_data(
out_wait:
if (flags & SYNC_WAIT)
xfs_ioend_wait(ip);
- IRELE(ip);
return error;
}
@@ -290,10 +220,6 @@ xfs_sync_inode_attr(
{
int error = 0;
- error = xfs_sync_inode_valid(ip, pag);
- if (error)
- return error;
-
xfs_ilock(ip, XFS_ILOCK_SHARED);
if (xfs_inode_clean(ip))
goto out_unlock;
@@ -312,14 +238,13 @@ xfs_sync_inode_attr(
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_SHARED);
- IRELE(ip);
return error;
}
/*
* Write out pagecache data for the whole filesystem.
*/
-int
+STATIC int
xfs_sync_data(
struct xfs_mount *mp,
int flags)
@@ -328,8 +253,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
- error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
- XFS_ICI_NO_TAG, 0, NULL);
+ error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags);
if (error)
return XFS_ERROR(error);
@@ -340,48 +264,14 @@ xfs_sync_data(
/*
* Write out inode metadata (attributes) for the whole filesystem.
*/
-int
+STATIC int
xfs_sync_attr(
struct xfs_mount *mp,
int flags)
{
ASSERT((flags & ~SYNC_WAIT) == 0);
- return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
- XFS_ICI_NO_TAG, 0, NULL);
-}
-
-STATIC int
-xfs_commit_dummy_trans(
- struct xfs_mount *mp,
- uint flags)
-{
- struct xfs_inode *ip = mp->m_rootip;
- struct xfs_trans *tp;
- int error;
-
- /*
- * Put a dummy transaction in the log to tell recovery
- * that all others are OK.
- */
- tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
- error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
- if (error) {
- xfs_trans_cancel(tp, 0);
- return error;
- }
-
- xfs_ilock(ip, XFS_ILOCK_EXCL);
-
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
- xfs_trans_ihold(tp, ip);
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = xfs_trans_commit(tp, 0);
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
- /* the log force ensures this transaction is pushed to disk */
- xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
- return error;
+ return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags);
}
STATIC int
@@ -444,7 +334,7 @@ xfs_quiesce_data(
/* mark the log as covered if needed */
if (xfs_log_need_covered(mp))
- error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
+ error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
/* flush data-only devices */
if (mp->m_rtdev_targp)
@@ -575,7 +465,7 @@ xfs_flush_inodes(
/*
* Every sync period we need to unpin all items, reclaim inodes and sync
* disk quotas. We might need to cover the log to indicate that the
- * filesystem is idle.
+ * filesystem is idle and not frozen.
*/
STATIC void
xfs_sync_worker(
@@ -589,8 +479,9 @@ xfs_sync_worker(
xfs_reclaim_inodes(mp, 0);
/* dgc: errors ignored here */
error = xfs_qm_sync(mp, SYNC_TRYLOCK);
- if (xfs_log_need_covered(mp))
- error = xfs_commit_dummy_trans(mp, 0);
+ if (mp->m_super->s_frozen == SB_UNFROZEN &&
+ xfs_log_need_covered(mp))
+ error = xfs_fs_log_dummy(mp, 0);
}
mp->m_sync_seq++;
wake_up(&mp->m_wait_single_sync_task);
@@ -710,14 +601,11 @@ xfs_inode_set_reclaim_tag(
xfs_perag_put(pag);
}
-void
-__xfs_inode_clear_reclaim_tag(
- xfs_mount_t *mp,
+STATIC void
+__xfs_inode_clear_reclaim(
xfs_perag_t *pag,
xfs_inode_t *ip)
{
- radix_tree_tag_clear(&pag->pag_ici_root,
- XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
pag->pag_ici_reclaimable--;
if (!pag->pag_ici_reclaimable) {
/* clear the reclaim tag from the perag radix tree */
@@ -731,6 +619,54 @@ __xfs_inode_clear_reclaim_tag(
}
}
+void
+__xfs_inode_clear_reclaim_tag(
+ xfs_mount_t *mp,
+ xfs_perag_t *pag,
+ xfs_inode_t *ip)
+{
+ radix_tree_tag_clear(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(mp, ip->i_ino), XFS_ICI_RECLAIM_TAG);
+ __xfs_inode_clear_reclaim(pag, ip);
+}
+
+/*
+ * Grab the inode for reclaim exclusively.
+ * Return 0 if we grabbed it, non-zero otherwise.
+ */
+STATIC int
+xfs_reclaim_inode_grab(
+ struct xfs_inode *ip,
+ int flags)
+{
+
+ /*
+ * do some unlocked checks first to avoid unnecceary lock traffic.
+ * The first is a flush lock check, the second is a already in reclaim
+ * check. Only do these checks if we are not going to block on locks.
+ */
+ if ((flags & SYNC_TRYLOCK) &&
+ (!ip->i_flush.done || __xfs_iflags_test(ip, XFS_IRECLAIM))) {
+ return 1;
+ }
+
+ /*
+ * The radix tree lock here protects a thread in xfs_iget from racing
+ * with us starting reclaim on the inode. Once we have the
+ * XFS_IRECLAIM flag set it will not touch us.
+ */
+ spin_lock(&ip->i_flags_lock);
+ ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+ /* ignore as it is already under reclaim */
+ spin_unlock(&ip->i_flags_lock);
+ return 1;
+ }
+ __xfs_iflags_set(ip, XFS_IRECLAIM);
+ spin_unlock(&ip->i_flags_lock);
+ return 0;
+}
+
/*
* Inodes in different states need to be treated differently, and the return
* value of xfs_iflush is not sufficient to get this right. The following table
@@ -789,23 +725,6 @@ xfs_reclaim_inode(
{
int error = 0;
- /*
- * The radix tree lock here protects a thread in xfs_iget from racing
- * with us starting reclaim on the inode. Once we have the
- * XFS_IRECLAIM flag set it will not touch us.
- */
- spin_lock(&ip->i_flags_lock);
- ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
- if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
- /* ignore as it is already under reclaim */
- spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
- return 0;
- }
- __xfs_iflags_set(ip, XFS_IRECLAIM);
- spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
-
xfs_ilock(ip, XFS_ILOCK_EXCL);
if (!xfs_iflock_nowait(ip)) {
if (!(sync_mode & SYNC_WAIT))
@@ -867,18 +786,161 @@ out:
reclaim:
xfs_ifunlock(ip);
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_ireclaim(ip);
+
+ XFS_STATS_INC(xs_ig_reclaims);
+ /*
+ * Remove the inode from the per-AG radix tree.
+ *
+ * Because radix_tree_delete won't complain even if the item was never
+ * added to the tree assert that it's been there before to catch
+ * problems with the inode life time early on.
+ */
+ write_lock(&pag->pag_ici_lock);
+ if (!radix_tree_delete(&pag->pag_ici_root,
+ XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino)))
+ ASSERT(0);
+ __xfs_inode_clear_reclaim(pag, ip);
+ write_unlock(&pag->pag_ici_lock);
+
+ /*
+ * Here we do an (almost) spurious inode lock in order to coordinate
+ * with inode cache radix tree lookups. This is because the lookup
+ * can reference the inodes in the cache without taking references.
+ *
+ * We make that OK here by ensuring that we wait until the inode is
+ * unlocked after the lookup before we go ahead and free it. We get
+ * both the ilock and the iolock because the code may need to drop the
+ * ilock one but will still hold the iolock.
+ */
+ xfs_ilock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+ xfs_qm_dqdetach(ip);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+ xfs_inode_free(ip);
return error;
}
+/*
+ * Walk the AGs and reclaim the inodes in them. Even if the filesystem is
+ * corrupted, we still want to try to reclaim all the inodes. If we don't,
+ * then a shut down during filesystem unmount reclaim walk leak all the
+ * unreclaimed inodes.
+ */
+int
+xfs_reclaim_inodes_ag(
+ struct xfs_mount *mp,
+ int flags,
+ int *nr_to_scan)
+{
+ struct xfs_perag *pag;
+ int error = 0;
+ int last_error = 0;
+ xfs_agnumber_t ag;
+ int trylock = flags & SYNC_TRYLOCK;
+ int skipped;
+
+restart:
+ ag = 0;
+ skipped = 0;
+ while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+ unsigned long first_index = 0;
+ int done = 0;
+ int nr_found = 0;
+
+ ag = pag->pag_agno + 1;
+
+ if (trylock) {
+ if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
+ skipped++;
+ continue;
+ }
+ first_index = pag->pag_ici_reclaim_cursor;
+ } else
+ mutex_lock(&pag->pag_ici_reclaim_lock);
+
+ do {
+ struct xfs_inode *batch[XFS_LOOKUP_BATCH];
+ int i;
+
+ write_lock(&pag->pag_ici_lock);
+ nr_found = radix_tree_gang_lookup_tag(
+ &pag->pag_ici_root,
+ (void **)batch, first_index,
+ XFS_LOOKUP_BATCH,
+ XFS_ICI_RECLAIM_TAG);
+ if (!nr_found) {
+ write_unlock(&pag->pag_ici_lock);
+ break;
+ }
+
+ /*
+ * Grab the inodes before we drop the lock. if we found
+ * nothing, nr == 0 and the loop will be skipped.
+ */
+ for (i = 0; i < nr_found; i++) {
+ struct xfs_inode *ip = batch[i];
+
+ if (done || xfs_reclaim_inode_grab(ip, flags))
+ batch[i] = NULL;
+
+ /*
+ * Update the index for the next lookup. Catch
+ * overflows into the next AG range which can
+ * occur if we have inodes in the last block of
+ * the AG and we are currently pointing to the
+ * last inode.
+ */
+ first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
+ if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
+ done = 1;
+ }
+
+ /* unlock now we've grabbed the inodes. */
+ write_unlock(&pag->pag_ici_lock);
+
+ for (i = 0; i < nr_found; i++) {
+ if (!batch[i])
+ continue;
+ error = xfs_reclaim_inode(batch[i], pag, flags);
+ if (error && last_error != EFSCORRUPTED)
+ last_error = error;
+ }
+
+ *nr_to_scan -= XFS_LOOKUP_BATCH;
+
+ } while (nr_found && !done && *nr_to_scan > 0);
+
+ if (trylock && !done)
+ pag->pag_ici_reclaim_cursor = first_index;
+ else
+ pag->pag_ici_reclaim_cursor = 0;
+ mutex_unlock(&pag->pag_ici_reclaim_lock);
+ xfs_perag_put(pag);
+ }
+
+ /*
+ * if we skipped any AG, and we still have scan count remaining, do
+ * another pass this time using blocking reclaim semantics (i.e
+ * waiting on the reclaim locks and ignoring the reclaim cursors). This
+ * ensure that when we get more reclaimers than AGs we block rather
+ * than spin trying to execute reclaim.
+ */
+ if (trylock && skipped && *nr_to_scan > 0) {
+ trylock = 0;
+ goto restart;
+ }
+ return XFS_ERROR(last_error);
+}
+
int
xfs_reclaim_inodes(
xfs_mount_t *mp,
int mode)
{
- return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
- XFS_ICI_RECLAIM_TAG, 1, NULL);
+ int nr_to_scan = INT_MAX;
+
+ return xfs_reclaim_inodes_ag(mp, mode, &nr_to_scan);
}
/*
@@ -900,17 +962,16 @@ xfs_reclaim_inode_shrink(
if (!(gfp_mask & __GFP_FS))
return -1;
- xfs_inode_ag_iterator(mp, xfs_reclaim_inode, 0,
- XFS_ICI_RECLAIM_TAG, 1, &nr_to_scan);
- /* if we don't exhaust the scan, don't bother coming back */
+ xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK, &nr_to_scan);
+ /* terminate if we don't exhaust the scan */
if (nr_to_scan > 0)
return -1;
}
reclaimable = 0;
ag = 0;
- while ((pag = xfs_inode_ag_iter_next_pag(mp, &ag,
- XFS_ICI_RECLAIM_TAG))) {
+ while ((pag = xfs_perag_get_tag(mp, ag, XFS_ICI_RECLAIM_TAG))) {
+ ag = pag->pag_agno + 1;
reclaimable += pag->pag_ici_reclaimable;
xfs_perag_put(pag);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index e28139aaa4a..32ba6628290 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -35,9 +35,6 @@ typedef struct xfs_sync_work {
int xfs_syncd_init(struct xfs_mount *mp);
void xfs_syncd_stop(struct xfs_mount *mp);
-int xfs_sync_attr(struct xfs_mount *mp, int flags);
-int xfs_sync_data(struct xfs_mount *mp, int flags);
-
int xfs_quiesce_data(struct xfs_mount *mp);
void xfs_quiesce_attr(struct xfs_mount *mp);
@@ -50,10 +47,10 @@ void __xfs_inode_set_reclaim_tag(struct xfs_perag *pag, struct xfs_inode *ip);
void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
struct xfs_inode *ip);
-int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
+int xfs_sync_inode_grab(struct xfs_inode *ip);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags, int tag, int write_lock, int *nr_to_scan);
+ int flags);
void xfs_inode_shrinker_register(struct xfs_mount *mp);
void xfs_inode_shrinker_unregister(struct xfs_mount *mp);
diff --git a/fs/xfs/linux-2.6/xfs_trace.c b/fs/xfs/linux-2.6/xfs_trace.c
index d12be8470cb..88d25d4aa56 100644
--- a/fs/xfs/linux-2.6/xfs_trace.c
+++ b/fs/xfs/linux-2.6/xfs_trace.c
@@ -24,17 +24,13 @@
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_dir2.h"
#include "xfs_da_btree.h"
#include "xfs_bmap_btree.h"
#include "xfs_alloc_btree.h"
#include "xfs_ialloc_btree.h"
-#include "xfs_dir2_sf.h"
-#include "xfs_attr_sf.h"
#include "xfs_dinode.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
-#include "xfs_dmapi.h"
#include "xfs_mount.h"
#include "xfs_ialloc.h"
#include "xfs_itable.h"
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index 30282069090..acef2e98c59 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -124,7 +124,7 @@ DEFINE_EVENT(xfs_perag_class, name, \
unsigned long caller_ip), \
TP_ARGS(mp, agno, refcount, caller_ip))
DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_get_reclaim);
+DEFINE_PERAG_REF_EVENT(xfs_perag_get_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_put);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_reclaim);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);
@@ -317,8 +317,6 @@ DEFINE_BUF_EVENT(xfs_buf_init);
DEFINE_BUF_EVENT(xfs_buf_free);
DEFINE_BUF_EVENT(xfs_buf_hold);
DEFINE_BUF_EVENT(xfs_buf_rele);
-DEFINE_BUF_EVENT(xfs_buf_pin);
-DEFINE_BUF_EVENT(xfs_buf_unpin);
DEFINE_BUF_EVENT(xfs_buf_iodone);
DEFINE_BUF_EVENT(xfs_buf_iorequest);
DEFINE_BUF_EVENT(xfs_buf_bawrite);
@@ -327,13 +325,12 @@ DEFINE_BUF_EVENT(xfs_buf_lock);
DEFINE_BUF_EVENT(xfs_buf_lock_done);
DEFINE_BUF_EVENT(xfs_buf_cond_lock);
DEFINE_BUF_EVENT(xfs_buf_unlock);
-DEFINE_BUF_EVENT(xfs_buf_ordered_retry);
DEFINE_BUF_EVENT(xfs_buf_iowait);
DEFINE_BUF_EVENT(xfs_buf_iowait_done);
DEFINE_BUF_EVENT(xfs_buf_delwri_queue);
DEFINE_BUF_EVENT(xfs_buf_delwri_dequeue);
DEFINE_BUF_EVENT(xfs_buf_delwri_split);
-DEFINE_BUF_EVENT(xfs_buf_get_noaddr);
+DEFINE_BUF_EVENT(xfs_buf_get_uncached);
DEFINE_BUF_EVENT(xfs_bdstrat_shut);
DEFINE_BUF_EVENT(xfs_buf_item_relse);
DEFINE_BUF_EVENT(xfs_buf_item_iodone);
@@ -541,7 +538,7 @@ DEFINE_LOCK_EVENT(xfs_ilock_nowait);
DEFINE_LOCK_EVENT(xfs_ilock_demote);
DEFINE_LOCK_EVENT(xfs_iunlock);
-DECLARE_EVENT_CLASS(xfs_iget_class,
+DECLARE_EVENT_CLASS(xfs_inode_class,
TP_PROTO(struct xfs_inode *ip),
TP_ARGS(ip),
TP_STRUCT__entry(
@@ -557,16 +554,38 @@ DECLARE_EVENT_CLASS(xfs_iget_class,
__entry->ino)
)
-#define DEFINE_IGET_EVENT(name) \
-DEFINE_EVENT(xfs_iget_class, name, \
+#define DEFINE_INODE_EVENT(name) \
+DEFINE_EVENT(xfs_inode_class, name, \
TP_PROTO(struct xfs_inode *ip), \
TP_ARGS(ip))
-DEFINE_IGET_EVENT(xfs_iget_skip);
-DEFINE_IGET_EVENT(xfs_iget_reclaim);
-DEFINE_IGET_EVENT(xfs_iget_found);
-DEFINE_IGET_EVENT(xfs_iget_alloc);
-
-DECLARE_EVENT_CLASS(xfs_inode_class,
+DEFINE_INODE_EVENT(xfs_iget_skip);
+DEFINE_INODE_EVENT(xfs_iget_reclaim);
+DEFINE_INODE_EVENT(xfs_iget_reclaim_fail);
+DEFINE_INODE_EVENT(xfs_iget_hit);
+DEFINE_INODE_EVENT(xfs_iget_miss);
+
+DEFINE_INODE_EVENT(xfs_getattr);
+DEFINE_INODE_EVENT(xfs_setattr);
+DEFINE_INODE_EVENT(xfs_readlink);
+DEFINE_INODE_EVENT(xfs_alloc_file_space);
+DEFINE_INODE_EVENT(xfs_free_file_space);
+DEFINE_INODE_EVENT(xfs_readdir);
+#ifdef CONFIG_XFS_POSIX_ACL
+DEFINE_INODE_EVENT(xfs_check_acl);
+#endif
+DEFINE_INODE_EVENT(xfs_vm_bmap);
+DEFINE_INODE_EVENT(xfs_file_ioctl);
+DEFINE_INODE_EVENT(xfs_file_compat_ioctl);
+DEFINE_INODE_EVENT(xfs_ioctl_setattr);
+DEFINE_INODE_EVENT(xfs_file_fsync);
+DEFINE_INODE_EVENT(xfs_destroy_inode);
+DEFINE_INODE_EVENT(xfs_write_inode);
+DEFINE_INODE_EVENT(xfs_evict_inode);
+
+DEFINE_INODE_EVENT(xfs_dquot_dqalloc);
+DEFINE_INODE_EVENT(xfs_dquot_dqdetach);
+
+DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
TP_ARGS(ip, caller_ip),
TP_STRUCT__entry(
@@ -591,20 +610,71 @@ DECLARE_EVENT_CLASS(xfs_inode_class,
(char *)__entry->caller_ip)
)
-#define DEFINE_INODE_EVENT(name) \
-DEFINE_EVENT(xfs_inode_class, name, \
+#define DEFINE_IREF_EVENT(name) \
+DEFINE_EVENT(xfs_iref_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
TP_ARGS(ip, caller_ip))
-DEFINE_INODE_EVENT(xfs_ihold);
-DEFINE_INODE_EVENT(xfs_irele);
-DEFINE_INODE_EVENT(xfs_inode_pin);
-DEFINE_INODE_EVENT(xfs_inode_unpin);
-DEFINE_INODE_EVENT(xfs_inode_unpin_nowait);
+DEFINE_IREF_EVENT(xfs_ihold);
+DEFINE_IREF_EVENT(xfs_irele);
+DEFINE_IREF_EVENT(xfs_inode_pin);
+DEFINE_IREF_EVENT(xfs_inode_unpin);
+DEFINE_IREF_EVENT(xfs_inode_unpin_nowait);
+
+DECLARE_EVENT_CLASS(xfs_namespace_class,
+ TP_PROTO(struct xfs_inode *dp, struct xfs_name *name),
+ TP_ARGS(dp, name),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dp_ino)
+ __dynamic_array(char, name, name->len)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(dp)->i_sb->s_dev;
+ __entry->dp_ino = dp->i_ino;
+ memcpy(__get_str(name), name->name, name->len);
+ ),
+ TP_printk("dev %d:%d dp ino 0x%llx name %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dp_ino,
+ __get_str(name))
+)
-/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
-DEFINE_INODE_EVENT(xfs_inode);
-#define xfs_itrace_entry(ip) \
- trace_xfs_inode(ip, _THIS_IP_)
+#define DEFINE_NAMESPACE_EVENT(name) \
+DEFINE_EVENT(xfs_namespace_class, name, \
+ TP_PROTO(struct xfs_inode *dp, struct xfs_name *name), \
+ TP_ARGS(dp, name))
+DEFINE_NAMESPACE_EVENT(xfs_remove);
+DEFINE_NAMESPACE_EVENT(xfs_link);
+DEFINE_NAMESPACE_EVENT(xfs_lookup);
+DEFINE_NAMESPACE_EVENT(xfs_create);
+DEFINE_NAMESPACE_EVENT(xfs_symlink);
+
+TRACE_EVENT(xfs_rename,
+ TP_PROTO(struct xfs_inode *src_dp, struct xfs_inode *target_dp,
+ struct xfs_name *src_name, struct xfs_name *target_name),
+ TP_ARGS(src_dp, target_dp, src_name, target_name),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, src_dp_ino)
+ __field(xfs_ino_t, target_dp_ino)
+ __dynamic_array(char, src_name, src_name->len)
+ __dynamic_array(char, target_name, target_name->len)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(src_dp)->i_sb->s_dev;
+ __entry->src_dp_ino = src_dp->i_ino;
+ __entry->target_dp_ino = target_dp->i_ino;
+ memcpy(__get_str(src_name), src_name->name, src_name->len);
+ memcpy(__get_str(target_name), target_name->name, target_name->len);
+ ),
+ TP_printk("dev %d:%d src dp ino 0x%llx target dp ino 0x%llx"
+ " src name %s target name %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->src_dp_ino,
+ __entry->target_dp_ino,
+ __get_str(src_name),
+ __get_str(target_name))
+)
DECLARE_EVENT_CLASS(xfs_dquot_class,
TP_PROTO(struct xfs_dquot *dqp),
@@ -684,9 +754,6 @@ DEFINE_DQUOT_EVENT(xfs_dqrele);
DEFINE_DQUOT_EVENT(xfs_dqflush);
DEFINE_DQUOT_EVENT(xfs_dqflush_force);
DEFINE_DQUOT_EVENT(xfs_dqflush_done);
-/* not really iget events, but we re-use the format */
-DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
-DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
DECLARE_EVENT_CLASS(xfs_loggrant_class,
TP_PROTO(struct log *log, struct xlog_ticket *tic),
@@ -834,33 +901,29 @@ DECLARE_EVENT_CLASS(xfs_page_class,
__field(loff_t, size)
__field(unsigned long, offset)
__field(int, delalloc)
- __field(int, unmapped)
__field(int, unwritten)
),
TP_fast_assign(
- int delalloc = -1, unmapped = -1, unwritten = -1;
+ int delalloc = -1, unwritten = -1;
if (page_has_buffers(page))
- xfs_count_page_state(page, &delalloc,
- &unmapped, &unwritten);
+ xfs_count_page_state(page, &delalloc, &unwritten);
__entry->dev = inode->i_sb->s_dev;
__entry->ino = XFS_I(inode)->i_ino;
__entry->pgoff = page_offset(page);
__entry->size = i_size_read(inode);
__entry->offset = off;
__entry->delalloc = delalloc;
- __entry->unmapped = unmapped;
__entry->unwritten = unwritten;
),
TP_printk("dev %d:%d ino 0x%llx pgoff 0x%lx size 0x%llx offset %lx "
- "delalloc %d unmapped %d unwritten %d",
+ "delalloc %d unwritten %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->pgoff,
__entry->size,
__entry->offset,
__entry->delalloc,
- __entry->unmapped,
__entry->unwritten)
)
diff --git a/fs/xfs/linux-2.6/xfs_version.h b/fs/xfs/linux-2.6/xfs_version.h
deleted file mode 100644
index f8d279d7563..00000000000
--- a/fs/xfs/linux-2.6/xfs_version.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2001-2002,2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it would be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-#ifndef __XFS_VERSION_H__
-#define __XFS_VERSION_H__
-
-/*
- * Dummy file that can contain a timestamp to put into the
- * XFS init string, to help users keep track of what they're
- * running
- */
-
-#define XFS_VERSION_STRING "SGI XFS"
-
-#endif /* __XFS_VERSION_H__ */