summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_dfrag.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_dfrag.c')
-rw-r--r--fs/xfs/xfs_dfrag.c149
1 files changed, 117 insertions, 32 deletions
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d1483a4f71b..cd27c9d6c71 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -45,15 +45,21 @@
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
+
+static int xfs_swap_extents(
+ xfs_inode_t *ip, /* target inode */
+ xfs_inode_t *tip, /* tmp inode */
+ xfs_swapext_t *sxp);
+
/*
- * Syssgi interface for swapext
+ * ioctl interface for swapext
*/
int
xfs_swapext(
xfs_swapext_t *sxp)
{
xfs_inode_t *ip, *tip;
- struct file *file, *target_file;
+ struct file *file, *tmp_file;
int error = 0;
/* Pull information for the target fd */
@@ -68,56 +74,128 @@ xfs_swapext(
goto out_put_file;
}
- target_file = fget((int)sxp->sx_fdtmp);
- if (!target_file) {
+ tmp_file = fget((int)sxp->sx_fdtmp);
+ if (!tmp_file) {
error = XFS_ERROR(EINVAL);
goto out_put_file;
}
- if (!(target_file->f_mode & FMODE_WRITE) ||
- (target_file->f_flags & O_APPEND)) {
+ if (!(tmp_file->f_mode & FMODE_WRITE) ||
+ (tmp_file->f_flags & O_APPEND)) {
error = XFS_ERROR(EBADF);
- goto out_put_target_file;
+ goto out_put_tmp_file;
}
if (IS_SWAPFILE(file->f_path.dentry->d_inode) ||
- IS_SWAPFILE(target_file->f_path.dentry->d_inode)) {
+ IS_SWAPFILE(tmp_file->f_path.dentry->d_inode)) {
error = XFS_ERROR(EINVAL);
- goto out_put_target_file;
+ goto out_put_tmp_file;
}
ip = XFS_I(file->f_path.dentry->d_inode);
- tip = XFS_I(target_file->f_path.dentry->d_inode);
+ tip = XFS_I(tmp_file->f_path.dentry->d_inode);
if (ip->i_mount != tip->i_mount) {
error = XFS_ERROR(EINVAL);
- goto out_put_target_file;
+ goto out_put_tmp_file;
}
if (ip->i_ino == tip->i_ino) {
error = XFS_ERROR(EINVAL);
- goto out_put_target_file;
+ goto out_put_tmp_file;
}
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
error = XFS_ERROR(EIO);
- goto out_put_target_file;
+ goto out_put_tmp_file;
}
error = xfs_swap_extents(ip, tip, sxp);
- out_put_target_file:
- fput(target_file);
+ out_put_tmp_file:
+ fput(tmp_file);
out_put_file:
fput(file);
out:
return error;
}
-int
+/*
+ * We need to check that the format of the data fork in the temporary inode is
+ * valid for the target inode before doing the swap. This is not a problem with
+ * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
+ * data fork depending on the space the attribute fork is taking so we can get
+ * invalid formats on the target inode.
+ *
+ * E.g. target has space for 7 extents in extent format, temp inode only has
+ * space for 6. If we defragment down to 7 extents, then the tmp format is a
+ * btree, but when swapped it needs to be in extent format. Hence we can't just
+ * blindly swap data forks on attr2 filesystems.
+ *
+ * Note that we check the swap in both directions so that we don't end up with
+ * a corrupt temporary inode, either.
+ *
+ * Note that fixing the way xfs_fsr sets up the attribute fork in the source
+ * inode will prevent this situation from occurring, so all we do here is
+ * reject and log the attempt. basically we are putting the responsibility on
+ * userspace to get this right.
+ */
+static int
+xfs_swap_extents_check_format(
+ xfs_inode_t *ip, /* target inode */
+ xfs_inode_t *tip) /* tmp inode */
+{
+
+ /* Should never get a local format */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
+ tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+ return EINVAL;
+
+ /*
+ * if the target inode has less extents that then temporary inode then
+ * why did userspace call us?
+ */
+ if (ip->i_d.di_nextents < tip->i_d.di_nextents)
+ return EINVAL;
+
+ /*
+ * if the target inode is in extent form and the temp inode is in btree
+ * form then we will end up with the target inode in the wrong format
+ * as we already know there are less extents in the temp inode.
+ */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
+ return EINVAL;
+
+ /* Check temp in extent form to max in target */
+ if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
+ return EINVAL;
+
+ /* Check target in extent form to max in temp */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
+ return EINVAL;
+
+ /* Check root block of temp in btree form to max in target */
+ if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+ XFS_IFORK_BOFF(ip) &&
+ tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
+ return EINVAL;
+
+ /* Check root block of target in btree form to max in temp */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+ XFS_IFORK_BOFF(tip) &&
+ ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
+ return EINVAL;
+
+ return 0;
+}
+
+static int
xfs_swap_extents(
- xfs_inode_t *ip,
- xfs_inode_t *tip,
+ xfs_inode_t *ip, /* target inode */
+ xfs_inode_t *tip, /* tmp inode */
xfs_swapext_t *sxp)
{
xfs_mount_t *mp;
@@ -161,13 +239,6 @@ xfs_swap_extents(
goto out_unlock;
}
- /* Should never get a local format */
- if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
- tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
- error = XFS_ERROR(EINVAL);
- goto out_unlock;
- }
-
if (VN_CACHED(VFS_I(tip)) != 0) {
error = xfs_flushinval_pages(tip, 0, -1,
FI_REMAPF_LOCKED);
@@ -189,13 +260,15 @@ xfs_swap_extents(
goto out_unlock;
}
- /*
- * If the target has extended attributes, the tmp file
- * must also in order to ensure the correct data fork
- * format.
- */
- if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) {
- error = XFS_ERROR(EINVAL);
+ trace_xfs_swap_extent_before(ip, 0);
+ trace_xfs_swap_extent_before(tip, 1);
+
+ /* check inode formats now that data is flushed */
+ error = xfs_swap_extents_check_format(ip, tip);
+ if (error) {
+ xfs_fs_cmn_err(CE_NOTE, mp,
+ "%s: inode 0x%llx format is incompatible for exchanging.",
+ __FILE__, ip->i_ino);
goto out_unlock;
}
@@ -276,6 +349,16 @@ xfs_swap_extents(
*tifp = *tempifp; /* struct copy */
/*
+ * Fix the in-memory data fork values that are dependent on the fork
+ * offset in the inode. We can't assume they remain the same as attr2
+ * has dynamic fork offsets.
+ */
+ ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
+ (uint)sizeof(xfs_bmbt_rec_t);
+ tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
+ (uint)sizeof(xfs_bmbt_rec_t);
+
+ /*
* Fix the on-disk inode values
*/
tmp = (__uint64_t)ip->i_d.di_nblocks;
@@ -347,6 +430,8 @@ xfs_swap_extents(
error = xfs_trans_commit(tp, XFS_TRANS_SWAPEXT);
+ trace_xfs_swap_extent_after(ip, 0);
+ trace_xfs_swap_extent_after(tip, 1);
out:
kmem_free(tempifp);
return error;