aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/buffer.c47
-rw-r--r--fs/xfs/linux-2.6/xfs_file.c16
-rw-r--r--fs/xfs/xfs_vnodeops.c30
-rw-r--r--include/linux/buffer_head.h2
4 files changed, 93 insertions, 2 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 0f900671423..02ebb1f1d3b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2194,6 +2194,52 @@ int generic_commit_write(struct file *file, struct page *page,
return 0;
}
+/*
+ * block_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF. Because
+ * vmtruncate() writes the inode size before removing pages, once we have the
+ * page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+int
+block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+ get_block_t get_block)
+{
+ struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ unsigned long end;
+ loff_t size;
+ int ret = -EINVAL;
+
+ lock_page(page);
+ size = i_size_read(inode);
+ if ((page->mapping != inode->i_mapping) ||
+ ((page->index << PAGE_CACHE_SHIFT) > size)) {
+ /* page got truncated out from underneath us */
+ goto out_unlock;
+ }
+
+ /* page is wholly or partially inside EOF */
+ if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
+ end = size & ~PAGE_CACHE_MASK;
+ else
+ end = PAGE_CACHE_SIZE;
+
+ ret = block_prepare_write(page, 0, end, get_block);
+ if (!ret)
+ ret = block_commit_write(page, 0, end);
+
+out_unlock:
+ unlock_page(page);
+ return ret;
+}
/*
* nobh_prepare_write()'s prereads are special: the buffer_heads are freed
@@ -2977,6 +3023,7 @@ EXPORT_SYMBOL(__brelse);
EXPORT_SYMBOL(__wait_on_buffer);
EXPORT_SYMBOL(block_commit_write);
EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(block_page_mkwrite);
EXPORT_SYMBOL(block_read_full_page);
EXPORT_SYMBOL(block_sync_page);
EXPORT_SYMBOL(block_truncate_page);
diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c
index 2d4be2f247b..0d4001eafd1 100644
--- a/fs/xfs/linux-2.6/xfs_file.c
+++ b/fs/xfs/linux-2.6/xfs_file.c
@@ -413,6 +413,20 @@ xfs_file_open_exec(
}
#endif /* HAVE_FOP_OPEN_EXEC */
+/*
+ * mmap()d file has taken write protection fault and is being made
+ * writable. We can set the page state up correctly for a writable
+ * page, which means we can do correct delalloc accounting (ENOSPC
+ * checking!) and unwritten extent mapping.
+ */
+STATIC int
+xfs_vm_page_mkwrite(
+ struct vm_area_struct *vma,
+ struct page *page)
+{
+ return block_page_mkwrite(vma, page, xfs_get_blocks);
+}
+
const struct file_operations xfs_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -465,11 +479,13 @@ const struct file_operations xfs_dir_file_operations = {
static struct vm_operations_struct xfs_file_vm_ops = {
.fault = filemap_fault,
+ .page_mkwrite = xfs_vm_page_mkwrite,
};
#ifdef CONFIG_XFS_DMAPI
static struct vm_operations_struct xfs_dmapi_file_vm_ops = {
.fault = xfs_vm_fault,
+ .page_mkwrite = xfs_vm_page_mkwrite,
#ifdef HAVE_VMOP_MPROTECT
.mprotect = xfs_vm_mprotect,
#endif
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 79b522779aa..1a5ad8cd97b 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -589,7 +589,30 @@ xfs_setattr(
code = xfs_igrow_start(ip, vap->va_size, credp);
}
xfs_iunlock(ip, XFS_ILOCK_EXCL);
- vn_iowait(vp); /* wait for the completion of any pending DIOs */
+
+ /*
+ * We are going to log the inode size change in this
+ * transaction so any previous writes that are beyond the on
+ * disk EOF and the new EOF that have not been written out need
+ * to be written here. If we do not write the data out, we
+ * expose ourselves to the null files problem.
+ *
+ * Only flush from the on disk size to the smaller of the in
+ * memory file size or the new size as that's the range we
+ * really care about here and prevents waiting for other data
+ * not within the range we care about here.
+ */
+ if (!code &&
+ (ip->i_size != ip->i_d.di_size) &&
+ (vap->va_size > ip->i_d.di_size)) {
+ code = bhv_vop_flush_pages(XFS_ITOV(ip),
+ ip->i_d.di_size, vap->va_size,
+ XFS_B_ASYNC, FI_NONE);
+ }
+
+ /* wait for all I/O to complete */
+ vn_iowait(vp);
+
if (!code)
code = xfs_itruncate_data(ip, vap->va_size);
if (code) {
@@ -4434,9 +4457,12 @@ xfs_free_file_space(
while (!error && !done) {
/*
- * allocate and setup the transaction
+ * allocate and setup the transaction. Allow this
+ * transaction to dip into the reserve blocks to ensure
+ * the freeing of the space succeeds at ENOSPC.
*/
tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
+ tp->t_flags |= XFS_TRANS_RESERVE;
error = xfs_trans_reserve(tp,
resblks,
XFS_WRITE_LOG_RES(mp),
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5c6e12853a9..35cadad84b1 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -209,6 +209,8 @@ int cont_prepare_write(struct page*, unsigned, unsigned, get_block_t*,
int generic_cont_expand(struct inode *inode, loff_t size);
int generic_cont_expand_simple(struct inode *inode, loff_t size);
int block_commit_write(struct page *page, unsigned from, unsigned to);
+int block_page_mkwrite(struct vm_area_struct *vma, struct page *page,
+ get_block_t get_block);
void block_sync_page(struct page *);
sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *);
int generic_commit_write(struct file *, struct page *, unsigned, unsigned);