aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/acl.c42
-rw-r--r--fs/9p/vfs_file.c6
-rw-r--r--fs/affs/super.c5
-rw-r--r--fs/aio.c7
-rw-r--r--fs/attr.c15
-rw-r--r--fs/autofs4/autofs_i.h8
-rw-r--r--fs/autofs4/dev-ioctl.c2
-rw-r--r--fs/autofs4/expire.c80
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/binfmt_elf.c60
-rw-r--r--fs/block_dev.c21
-rw-r--r--fs/btrfs/acl.c6
-rw-r--r--fs/btrfs/async-thread.c14
-rw-r--r--fs/btrfs/async-thread.h1
-rw-r--r--fs/btrfs/compression.c4
-rw-r--r--fs/btrfs/ctree.c5
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/delayed-inode.c6
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c59
-rw-r--r--fs/btrfs/extent_io.c15
-rw-r--r--fs/btrfs/file.c41
-rw-r--r--fs/btrfs/inode.c37
-rw-r--r--fs/btrfs/ioctl.c21
-rw-r--r--fs/btrfs/qgroup.c18
-rw-r--r--fs/btrfs/qgroup.h3
-rw-r--r--fs/btrfs/relocation.c27
-rw-r--r--fs/btrfs/super.c4
-rw-r--r--fs/btrfs/transaction.h2
-rw-r--r--fs/btrfs/tree-log.c161
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/buffer.c12
-rw-r--r--fs/ceph/acl.c10
-rw-r--r--fs/ceph/addr.c26
-rw-r--r--fs/ceph/cache.c12
-rw-r--r--fs/ceph/dir.c5
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ceph/inode.c36
-rw-r--r--fs/ceph/mds_client.c14
-rw-r--r--fs/ceph/super.h1
-rw-r--r--fs/ceph/xattr.c3
-rw-r--r--fs/cifs/cifs_debug.c57
-rw-r--r--fs/cifs/cifs_debug.h2
-rw-r--r--fs/cifs/cifs_fs_sb.h4
-rw-r--r--fs/cifs/cifs_unicode.c39
-rw-r--r--fs/cifs/cifs_unicode.h3
-rw-r--r--fs/cifs/cifsencrypt.c16
-rw-r--r--fs/cifs/cifsfs.c23
-rw-r--r--fs/cifs/cifsglob.h49
-rw-r--r--fs/cifs/cifsproto.h3
-rw-r--r--fs/cifs/cifssmb.c11
-rw-r--r--fs/cifs/connect.c126
-rw-r--r--fs/cifs/dir.c62
-rw-r--r--fs/cifs/file.c72
-rw-r--r--fs/cifs/inode.c22
-rw-r--r--fs/cifs/ioctl.c2
-rw-r--r--fs/cifs/misc.c15
-rw-r--r--fs/cifs/ntlmssp.h2
-rw-r--r--fs/cifs/readdir.c7
-rw-r--r--fs/cifs/sess.c203
-rw-r--r--fs/cifs/smb1ops.c19
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2glob.h11
-rw-r--r--fs/cifs/smb2inode.c14
-rw-r--r--fs/cifs/smb2misc.c60
-rw-r--r--fs/cifs/smb2ops.c50
-rw-r--r--fs/cifs/smb2pdu.c134
-rw-r--r--fs/cifs/smb2pdu.h6
-rw-r--r--fs/cifs/smb2proto.h10
-rw-r--r--fs/cifs/smb2transport.c58
-rw-r--r--fs/cifs/transport.c2
-rw-r--r--fs/configfs/symlink.c3
-rw-r--r--fs/coredump.c51
-rw-r--r--fs/dcache.c55
-rw-r--r--fs/debugfs/inode.c12
-rw-r--r--fs/devpts/inode.c49
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/dlm/lowcomms.c8
-rw-r--r--fs/dlm/user.c4
-rw-r--r--fs/ecryptfs/file.c86
-rw-r--r--fs/eventpoll.c37
-rw-r--r--fs/exec.c67
-rw-r--r--fs/ext2/acl.c12
-rw-r--r--fs/ext4/acl.c12
-rw-r--r--fs/ext4/balloc.c3
-rw-r--r--fs/ext4/crypto.c96
-rw-r--r--fs/ext4/crypto_fname.c2
-rw-r--r--fs/ext4/crypto_key.c28
-rw-r--r--fs/ext4/crypto_policy.c73
-rw-r--r--fs/ext4/dir.c6
-rw-r--r--fs/ext4/ext4.h58
-rw-r--r--fs/ext4/ext4_crypto.h1
-rw-r--r--fs/ext4/ext4_jbd2.h14
-rw-r--r--fs/ext4/extents.c180
-rw-r--r--fs/ext4/file.c119
-rw-r--r--fs/ext4/ialloc.c59
-rw-r--r--fs/ext4/inline.c18
-rw-r--r--fs/ext4/inode.c226
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/mballoc.c85
-rw-r--r--fs/ext4/move_extent.c21
-rw-r--r--fs/ext4/namei.c47
-rw-r--r--fs/ext4/page-io.c14
-rw-r--r--fs/ext4/readpage.c2
-rw-r--r--fs/ext4/resize.c3
-rw-r--r--fs/ext4/super.c205
-rw-r--r--fs/ext4/sysfs.c6
-rw-r--r--fs/ext4/truncate.h2
-rw-r--r--fs/ext4/xattr.c98
-rw-r--r--fs/f2fs/acl.c8
-rw-r--r--fs/f2fs/crypto_fname.c2
-rw-r--r--fs/f2fs/crypto_key.c28
-rw-r--r--fs/f2fs/crypto_policy.c71
-rw-r--r--fs/f2fs/debug.c1
-rw-r--r--fs/f2fs/dir.c32
-rw-r--r--fs/f2fs/f2fs.h17
-rw-r--r--fs/f2fs/f2fs_crypto.h1
-rw-r--r--fs/f2fs/file.c9
-rw-r--r--fs/f2fs/hash.c7
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/super.c121
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fcntl.c14
-rw-r--r--fs/fhandle.c2
-rw-r--r--fs/fs-writeback.c37
-rw-r--r--fs/fscache/cookie.c5
-rw-r--r--fs/fscache/netfs.c1
-rw-r--r--fs/fscache/object.c32
-rw-r--r--fs/fuse/cuse.c4
-rw-r--r--fs/fuse/dev.c7
-rw-r--r--fs/fuse/dir.c59
-rw-r--r--fs/fuse/file.c73
-rw-r--r--fs/fuse/fuse_i.h9
-rw-r--r--fs/fuse/inode.c2
-rw-r--r--fs/gfs2/acl.c12
-rw-r--r--fs/gfs2/dir.c4
-rw-r--r--fs/gfs2/glock.c16
-rw-r--r--fs/gfs2/incore.h3
-rw-r--r--fs/hfsplus/posix_acl.c4
-rw-r--r--fs/hostfs/hostfs_kern.c7
-rw-r--r--fs/hpfs/super.c42
-rw-r--r--fs/hugetlbfs/inode.c2
-rw-r--r--fs/inode.c6
-rw-r--r--fs/isofs/inode.c8
-rw-r--r--fs/isofs/rock.c13
-rw-r--r--fs/jbd2/commit.c2
-rw-r--r--fs/jbd2/journal.c17
-rw-r--r--fs/jbd2/transaction.c7
-rw-r--r--fs/jffs2/acl.c9
-rw-r--r--fs/jfs/acl.c6
-rw-r--r--fs/jfs/super.c4
-rw-r--r--fs/kernfs/file.c28
-rw-r--r--fs/locks.c2
-rw-r--r--fs/mount.h4
-rw-r--r--fs/mpage.c2
-rw-r--r--fs/namei.c38
-rw-r--r--fs/namespace.c235
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfs/callback_xdr.c6
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/dir.c56
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/filelayout/filelayout.c3
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c3
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c3
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c1
-rw-r--r--fs/nfs/inode.c6
-rw-r--r--fs/nfs/internal.h1
-rw-r--r--fs/nfs/nfs42proc.c1
-rw-r--r--fs/nfs/nfs4file.c4
-rw-r--r--fs/nfs/nfs4proc.c32
-rw-r--r--fs/nfs/nfs4state.c4
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/pagelist.c26
-rw-r--r--fs/nfs/pnfs.c14
-rw-r--r--fs/nfs/write.c5
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nfsd/nfs2acl.c20
-rw-r--r--fs/nfsd/nfs3acl.c16
-rw-r--r--fs/nfsd/nfs3xdr.c12
-rw-r--r--fs/nfsd/nfs4acl.c16
-rw-r--r--fs/nfsd/nfs4callback.c18
-rw-r--r--fs/nfsd/nfs4layouts.c5
-rw-r--r--fs/nfsd/nfs4proc.c14
-rw-r--r--fs/nfsd/nfs4state.c172
-rw-r--r--fs/nfsd/nfs4xdr.c40
-rw-r--r--fs/nfsd/nfssvc.c36
-rw-r--r--fs/nfsd/nfsxdr.c10
-rw-r--r--fs/nfsd/state.h6
-rw-r--r--fs/nfsd/vfs.c59
-rw-r--r--fs/nilfs2/btnode.c2
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/mdt.c4
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/the_nilfs.c2
-rw-r--r--fs/notify/fanotify/fanotify.c13
-rw-r--r--fs/notify/fanotify/fanotify_user.c36
-rw-r--r--fs/notify/fsnotify.c8
-rw-r--r--fs/notify/group.c19
-rw-r--r--fs/notify/notification.c23
-rw-r--r--fs/ocfs2/acl.c97
-rw-r--r--fs/ocfs2/acl.h5
-rw-r--r--fs/ocfs2/aops.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c8
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c26
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c1
-rw-r--r--fs/ocfs2/dlmglue.c10
-rw-r--r--fs/ocfs2/file.c40
-rw-r--r--fs/ocfs2/namei.c23
-rw-r--r--fs/ocfs2/refcounttree.c17
-rw-r--r--fs/ocfs2/stackglue.c6
-rw-r--r--fs/ocfs2/stackglue.h3
-rw-r--r--fs/ocfs2/xattr.c14
-rw-r--r--fs/ocfs2/xattr.h4
-rw-r--r--fs/open.c24
-rw-r--r--fs/overlayfs/copy_up.c16
-rw-r--r--fs/overlayfs/dir.c118
-rw-r--r--fs/overlayfs/inode.c25
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/overlayfs/super.c55
-rw-r--r--fs/pipe.c47
-rw-r--r--fs/pnode.c281
-rw-r--r--fs/pnode.h3
-rw-r--r--fs/posix_acl.c81
-rw-r--r--fs/proc/base.c12
-rw-r--r--fs/proc/generic.c1
-rw-r--r--fs/proc/proc_sysctl.c3
-rw-r--r--fs/proc/root.c7
-rw-r--r--fs/proc/task_mmu.c112
-rw-r--r--fs/proc/task_nommu.c49
-rw-r--r--fs/proc_namespace.c2
-rw-r--r--fs/pstore/inode.c1
-rw-r--r--fs/pstore/ram.c22
-rw-r--r--fs/pstore/ram_core.c74
-rw-r--r--fs/quota/dquot.c3
-rw-r--r--fs/reiserfs/file.c8
-rw-r--r--fs/reiserfs/ibalance.c3
-rw-r--r--fs/reiserfs/inode.c2
-rw-r--r--fs/reiserfs/namei.c18
-rw-r--r--fs/reiserfs/super.c12
-rw-r--r--fs/reiserfs/xattr.c54
-rw-r--r--fs/reiserfs/xattr.h9
-rw-r--r--fs/reiserfs/xattr_acl.c8
-rw-r--r--fs/reiserfs/xattr_security.c14
-rw-r--r--fs/reiserfs/xattr_trusted.c14
-rw-r--r--fs/reiserfs/xattr_user.c14
-rw-r--r--fs/romfs/super.c23
-rw-r--r--fs/seq_file.c11
-rw-r--r--fs/splice.c4
-rw-r--r--fs/stat.c3
-rw-r--r--fs/super.c6
-rw-r--r--fs/sysfs/file.c14
-rw-r--r--fs/timerfd.c17
-rw-r--r--fs/ubifs/dir.c16
-rw-r--r--fs/ubifs/file.c24
-rw-r--r--fs/ubifs/tnc.c25
-rw-r--r--fs/ubifs/tnc_commit.c2
-rw-r--r--fs/ubifs/xattr.c2
-rw-r--r--fs/udf/inode.c6
-rw-r--r--fs/ufs/balloc.c26
-rw-r--r--fs/ufs/inode.c9
-rw-r--r--fs/ufs/super.c18
-rw-r--r--fs/ufs/util.h10
-rw-r--r--fs/utimes.c16
-rw-r--r--fs/xattr.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c2
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c1
-rw-r--r--fs/xfs/libxfs/xfs_attr_remote.c1
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c7
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_block.c1
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_leaf.c2
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c1
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c40
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c1
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c1
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c12
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h2
-rw-r--r--fs/xfs/libxfs/xfs_sb.c5
-rw-r--r--fs/xfs/libxfs/xfs_shared.h1
-rw-r--r--fs/xfs/libxfs/xfs_symlink_remote.c1
-rw-r--r--fs/xfs/xfs_acl.c13
-rw-r--r--fs/xfs/xfs_aops.c31
-rw-r--r--fs/xfs/xfs_attr.h1
-rw-r--r--fs/xfs/xfs_attr_list.c27
-rw-r--r--fs/xfs/xfs_bmap_util.c9
-rw-r--r--fs/xfs/xfs_buf.c27
-rw-r--r--fs/xfs/xfs_buf.h2
-rw-r--r--fs/xfs/xfs_dir2_readdir.c15
-rw-r--r--fs/xfs/xfs_error.c4
-rw-r--r--fs/xfs/xfs_file.c35
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_icache.c58
-rw-r--r--fs/xfs/xfs_icache.h8
-rw-r--r--fs/xfs/xfs_inode.c26
-rw-r--r--fs/xfs/xfs_inode.h4
-rw-r--r--fs/xfs/xfs_ioctl.c11
-rw-r--r--fs/xfs/xfs_linux.h9
-rw-r--r--fs/xfs/xfs_log_recover.c10
-rw-r--r--fs/xfs/xfs_qm.c7
-rw-r--r--fs/xfs/xfs_qm_syscalls.c3
-rw-r--r--fs/xfs/xfs_super.c10
-rw-r--r--fs/xfs/xfs_xattr.c16
308 files changed, 5127 insertions, 2297 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
index a7e28890f5ef..c30c6ceac2c4 100644
--- a/fs/9p/acl.c
+++ b/fs/9p/acl.c
@@ -282,32 +282,28 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
switch (handler->flags) {
case ACL_TYPE_ACCESS:
if (acl) {
- umode_t mode = inode->i_mode;
- retval = posix_acl_equiv_mode(acl, &mode);
- if (retval < 0)
+ struct iattr iattr;
+ struct posix_acl *old_acl = acl;
+
+ retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
+ if (retval)
goto err_out;
- else {
- struct iattr iattr;
- if (retval == 0) {
- /*
- * ACL can be represented
- * by the mode bits. So don't
- * update ACL.
- */
- acl = NULL;
- value = NULL;
- size = 0;
- }
- /* Updte the mode bits */
- iattr.ia_mode = ((mode & S_IALLUGO) |
- (inode->i_mode & ~S_IALLUGO));
- iattr.ia_valid = ATTR_MODE;
- /* FIXME should we update ctime ?
- * What is the following setxattr update the
- * mode ?
+ if (!acl) {
+ /*
+ * ACL can be represented
+ * by the mode bits. So don't
+ * update ACL.
*/
- v9fs_vfs_setattr_dotl(dentry, &iattr);
+ posix_acl_release(old_acl);
+ value = NULL;
+ size = 0;
}
+ iattr.ia_valid = ATTR_MODE;
+ /* FIXME should we update ctime ?
+ * What is the following setxattr update the
+ * mode ?
+ */
+ v9fs_vfs_setattr_dotl(dentry, &iattr);
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 7bf835f85bc8..12ceaf52dae6 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -74,7 +74,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
v9fs_proto_dotu(v9ses));
fid = file->private_data;
if (!fid) {
- fid = v9fs_fid_clone(file->f_path.dentry);
+ fid = v9fs_fid_clone(file_dentry(file));
if (IS_ERR(fid))
return PTR_ERR(fid);
@@ -100,7 +100,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
* because we want write after unlink usecase
* to work.
*/
- fid = v9fs_writeback_fid(file->f_path.dentry);
+ fid = v9fs_writeback_fid(file_dentry(file));
if (IS_ERR(fid)) {
err = PTR_ERR(fid);
mutex_unlock(&v9inode->v_mutex);
@@ -516,7 +516,7 @@ v9fs_mmap_file_mmap(struct file *filp, struct vm_area_struct *vma)
* because we want write after unlink usecase
* to work.
*/
- fid = v9fs_writeback_fid(filp->f_path.dentry);
+ fid = v9fs_writeback_fid(file_dentry(filp));
if (IS_ERR(fid)) {
retval = PTR_ERR(fid);
mutex_unlock(&v9inode->v_mutex);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 5b50c4ca43a7..f90c535703ce 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -528,7 +528,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
char *prefix = NULL;
new_opts = kstrdup(data, GFP_KERNEL);
- if (!new_opts)
+ if (data && !new_opts)
return -ENOMEM;
pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
@@ -546,7 +546,8 @@ affs_remount(struct super_block *sb, int *flags, char *data)
}
flush_delayed_work(&sbi->sb_work);
- replace_mount_options(sb, new_opts);
+ if (new_opts)
+ replace_mount_options(sb, new_opts);
sbi->s_flags = mount_flags;
sbi->s_mode = mode;
diff --git a/fs/aio.c b/fs/aio.c
index 155f84253f33..fe4f49212b99 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -239,7 +239,12 @@ static struct dentry *aio_mount(struct file_system_type *fs_type,
static const struct dentry_operations ops = {
.d_dname = simple_dname,
};
- return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
+ struct dentry *root = mount_pseudo(fs_type, "aio:", NULL, &ops,
+ AIO_RING_MAGIC);
+
+ if (!IS_ERR(root))
+ root->d_sb->s_iflags |= SB_I_NOEXEC;
+ return root;
}
/* aio_setup
diff --git a/fs/attr.c b/fs/attr.c
index 6530ced19697..d62f674a605f 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -202,6 +202,21 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
return -EPERM;
}
+ /*
+ * If utimes(2) and friends are called with times == NULL (or both
+ * times are UTIME_NOW), then we need to check for write permission
+ */
+ if (ia_valid & ATTR_TOUCH) {
+ if (IS_IMMUTABLE(inode))
+ return -EPERM;
+
+ if (!inode_owner_or_capable(inode)) {
+ error = inode_permission(inode, MAY_WRITE);
+ if (error)
+ return error;
+ }
+ }
+
if ((ia_valid & ATTR_MODE)) {
umode_t amode = attr->ia_mode;
/* Flag setting protected by i_mutex */
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c37149b929be..502d3892d8a4 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -79,9 +79,13 @@ struct autofs_info {
};
#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
-#define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered
+#define AUTOFS_INF_WANT_EXPIRE (1<<1) /* the dentry is being considered
* for expiry, so RCU_walk is
- * not permitted
+ * not permitted. If it progresses to
+ * actual expiry attempt, the flag is
+ * not cleared when EXPIRING is set -
+ * in that case it gets cleared only
+ * when it comes to clearing EXPIRING.
*/
#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index ac7d921ed984..257425511d10 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -331,7 +331,7 @@ static int autofs_dev_ioctl_fail(struct file *fp,
int status;
token = (autofs_wqt_t) param->fail.token;
- status = param->fail.status ? param->fail.status : -ENOENT;
+ status = param->fail.status < 0 ? param->fail.status : -ENOENT;
return autofs4_wait_release(sbi, token, status);
}
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 1cebc3c52fa5..7a5a598a2d94 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -315,19 +315,17 @@ struct dentry *autofs4_expire_direct(struct super_block *sb,
if (ino->flags & AUTOFS_INF_PENDING)
goto out;
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
- ino->flags |= AUTOFS_INF_NO_RCU;
+ ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
spin_lock(&sbi->fs_lock);
if (!autofs4_direct_busy(mnt, root, timeout, do_now)) {
ino->flags |= AUTOFS_INF_EXPIRING;
- smp_mb();
- ino->flags &= ~AUTOFS_INF_NO_RCU;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
return root;
}
- ino->flags &= ~AUTOFS_INF_NO_RCU;
+ ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
}
out:
spin_unlock(&sbi->fs_lock);
@@ -417,6 +415,7 @@ static struct dentry *should_expire(struct dentry *dentry,
}
return NULL;
}
+
/*
* Find an eligible tree to time-out
* A tree is eligible if :-
@@ -432,6 +431,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
struct dentry *root = sb->s_root;
struct dentry *dentry;
struct dentry *expired;
+ struct dentry *found;
struct autofs_info *ino;
if (!root)
@@ -442,48 +442,54 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
+ int flags = how;
+
spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(dentry);
- if (ino->flags & AUTOFS_INF_NO_RCU)
- expired = NULL;
- else
- expired = should_expire(dentry, mnt, timeout, how);
- if (!expired) {
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE) {
spin_unlock(&sbi->fs_lock);
continue;
}
+ spin_unlock(&sbi->fs_lock);
+
+ expired = should_expire(dentry, mnt, timeout, flags);
+ if (!expired)
+ continue;
+
+ spin_lock(&sbi->fs_lock);
ino = autofs4_dentry_ino(expired);
- ino->flags |= AUTOFS_INF_NO_RCU;
+ ino->flags |= AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
synchronize_rcu();
- spin_lock(&sbi->fs_lock);
- if (should_expire(expired, mnt, timeout, how)) {
- if (expired != dentry)
- dput(dentry);
- goto found;
- }
- ino->flags &= ~AUTOFS_INF_NO_RCU;
+ /* Make sure a reference is not taken on found if
+ * things have changed.
+ */
+ flags &= ~AUTOFS_EXP_LEAVES;
+ found = should_expire(expired, mnt, timeout, how);
+ if (!found || found != expired)
+ /* Something has changed, continue */
+ goto next;
+
if (expired != dentry)
- dput(expired);
+ dput(dentry);
+
+ spin_lock(&sbi->fs_lock);
+ goto found;
+next:
+ spin_lock(&sbi->fs_lock);
+ ino->flags &= ~AUTOFS_INF_WANT_EXPIRE;
spin_unlock(&sbi->fs_lock);
+ if (expired != dentry)
+ dput(expired);
}
return NULL;
found:
DPRINTK("returning %p %pd", expired, expired);
ino->flags |= AUTOFS_INF_EXPIRING;
- smp_mb();
- ino->flags &= ~AUTOFS_INF_NO_RCU;
init_completion(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
- spin_lock(&sbi->lookup_lock);
- spin_lock(&expired->d_parent->d_lock);
- spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&expired->d_parent->d_subdirs, &expired->d_child);
- spin_unlock(&expired->d_lock);
- spin_unlock(&expired->d_parent->d_lock);
- spin_unlock(&sbi->lookup_lock);
return expired;
}
@@ -492,15 +498,27 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
+ int state;
/* Block on any pending expire */
- if (!(ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU)))
+ if (!(ino->flags & AUTOFS_INF_WANT_EXPIRE))
return 0;
if (rcu_walk)
return -ECHILD;
+retry:
spin_lock(&sbi->fs_lock);
- if (ino->flags & AUTOFS_INF_EXPIRING) {
+ state = ino->flags & (AUTOFS_INF_WANT_EXPIRE | AUTOFS_INF_EXPIRING);
+ if (state == AUTOFS_INF_WANT_EXPIRE) {
+ spin_unlock(&sbi->fs_lock);
+ /*
+ * Possibly being selected for expire, wait until
+ * it's selected or not.
+ */
+ schedule_timeout_uninterruptible(HZ/10);
+ goto retry;
+ }
+ if (state & AUTOFS_INF_EXPIRING) {
spin_unlock(&sbi->fs_lock);
DPRINTK("waiting for expire %p name=%pd", dentry, dentry);
@@ -551,7 +569,7 @@ int autofs4_expire_run(struct super_block *sb,
ino = autofs4_dentry_ino(dentry);
/* avoid rapid-fire expire attempts if expiry fails */
ino->last_used = now;
- ino->flags &= ~AUTOFS_INF_EXPIRING;
+ ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
@@ -579,7 +597,7 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
spin_lock(&sbi->fs_lock);
/* avoid rapid-fire expire attempts if expiry fails */
ino->last_used = now;
- ino->flags &= ~AUTOFS_INF_EXPIRING;
+ ino->flags &= ~(AUTOFS_INF_EXPIRING|AUTOFS_INF_WANT_EXPIRE);
complete_all(&ino->expire_complete);
spin_unlock(&sbi->fs_lock);
dput(dentry);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c6d7d3dbd52a..7a54c6a867c8 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -455,7 +455,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
* a mount-trap.
*/
struct inode *inode;
- if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
+ if (ino->flags & AUTOFS_INF_WANT_EXPIRE)
return 0;
if (d_mountpoint(dentry))
return 0;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3a93755e880f..f44e93d2650d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -905,17 +905,60 @@ static int load_elf_binary(struct linux_binprm *bprm)
elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
vaddr = elf_ppnt->p_vaddr;
+ /*
+ * If we are loading ET_EXEC or we have already performed
+ * the ET_DYN load_addr calculations, proceed normally.
+ */
if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
elf_flags |= MAP_FIXED;
} else if (loc->elf_ex.e_type == ET_DYN) {
- /* Try and get dynamic programs out of the way of the
- * default mmap base, as well as whatever program they
- * might try to exec. This is because the brk will
- * follow the loader, and is not movable. */
- load_bias = ELF_ET_DYN_BASE - vaddr;
- if (current->flags & PF_RANDOMIZE)
- load_bias += arch_mmap_rnd();
- load_bias = ELF_PAGESTART(load_bias);
+ /*
+ * This logic is run once for the first LOAD Program
+ * Header for ET_DYN binaries to calculate the
+ * randomization (load_bias) for all the LOAD
+ * Program Headers, and to calculate the entire
+ * size of the ELF mapping (total_size). (Note that
+ * load_addr_set is set to true later once the
+ * initial mapping is performed.)
+ *
+ * There are effectively two types of ET_DYN
+ * binaries: programs (i.e. PIE: ET_DYN with INTERP)
+ * and loaders (ET_DYN without INTERP, since they
+ * _are_ the ELF interpreter). The loaders must
+ * be loaded away from programs since the program
+ * may otherwise collide with the loader (especially
+ * for ET_EXEC which does not have a randomized
+ * position). For example to handle invocations of
+ * "./ld.so someprog" to test out a new version of
+ * the loader, the subsequent program that the
+ * loader loads must avoid the loader itself, so
+ * they cannot share the same load range. Sufficient
+ * room for the brk must be allocated with the
+ * loader as well, since brk must be available with
+ * the loader.
+ *
+ * Therefore, programs are loaded offset from
+ * ELF_ET_DYN_BASE and loaders are loaded into the
+ * independently randomized mmap region (0 load_bias
+ * without MAP_FIXED).
+ */
+ if (elf_interpreter) {
+ load_bias = ELF_ET_DYN_BASE;
+ if (current->flags & PF_RANDOMIZE)
+ load_bias += arch_mmap_rnd();
+ elf_flags |= MAP_FIXED;
+ } else
+ load_bias = 0;
+
+ /*
+ * Since load_bias is used for all subsequent loading
+ * calculations, we must lower it by the first vaddr
+ * so that the remaining calculations based on the
+ * ELF vaddrs will be correctly offset. The result
+ * is then page aligned.
+ */
+ load_bias = ELF_PAGESTART(load_bias - vaddr);
+
total_size = total_mapping_size(elf_phdata,
loc->elf_ex.e_phnum);
if (!total_size) {
@@ -2295,6 +2338,7 @@ static int elf_core_dump(struct coredump_params *cprm)
goto end_coredump;
}
}
+ dump_truncate(cprm);
if (!elf_core_write_extra_data(cprm))
goto end_coredump;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 44d4a1e9244e..26bbaaefdff4 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -88,12 +88,11 @@ void invalidate_bdev(struct block_device *bdev)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0)
- return;
-
- invalidate_bh_lrus();
- lru_add_drain_all(); /* make sure all lru add caches are flushed */
- invalidate_mapping_pages(mapping, 0, -1);
+ if (mapping->nrpages) {
+ invalidate_bh_lrus();
+ lru_add_drain_all(); /* make sure all lru add caches are flushed */
+ invalidate_mapping_pages(mapping, 0, -1);
+ }
/* 99% of the time, we don't need to flush the cleancache on the bdev.
* But, for the strange corners, lets be cautious
*/
@@ -759,7 +758,7 @@ static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
return true; /* already a holder */
else if (bdev->bd_holder != NULL)
return false; /* held by someone else */
- else if (bdev->bd_contains == bdev)
+ else if (whole == bdev)
return true; /* is a whole device which isn't held */
else if (whole->bd_holder == bd_may_claim)
@@ -1098,7 +1097,6 @@ int revalidate_disk(struct gendisk *disk)
if (disk->fops->revalidate_disk)
ret = disk->fops->revalidate_disk(disk);
- blk_integrity_revalidate(disk);
bdev = bdget_disk(disk, 0);
if (!bdev)
return ret;
@@ -1806,6 +1804,7 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
spin_lock(&blockdev_superblock->s_inode_list_lock);
list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
struct address_space *mapping = inode->i_mapping;
+ struct block_device *bdev;
spin_lock(&inode->i_lock);
if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
@@ -1826,8 +1825,12 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
*/
iput(old_inode);
old_inode = inode;
+ bdev = I_BDEV(inode);
- func(I_BDEV(inode), arg);
+ mutex_lock(&bdev->bd_mutex);
+ if (bdev->bd_openers)
+ func(bdev, arg);
+ mutex_unlock(&bdev->bd_mutex);
spin_lock(&blockdev_superblock->s_inode_list_lock);
}
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 9a0124a95851..fb3e64d37cb4 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -83,11 +83,9 @@ static int __btrfs_set_acl(struct btrfs_trans_handle *trans,
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
- ret = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (ret < 0)
+ ret = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (ret)
return ret;
- if (ret == 0)
- acl = NULL;
}
ret = 0;
break;
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 9aba42b78253..a09264d8b853 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -70,6 +70,20 @@ void btrfs_##name(struct work_struct *arg) \
normal_work_helper(work); \
}
+bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq)
+{
+ /*
+ * We could compare wq->normal->pending with num_online_cpus()
+ * to support "thresh == NO_THRESHOLD" case, but it requires
+ * moving up atomic_inc/dec in thresh_queue/exec_hook. Let's
+ * postpone it until someone needs the support of that case.
+ */
+ if (wq->normal->thresh == NO_THRESHOLD)
+ return false;
+
+ return atomic_read(&wq->normal->pending) > wq->normal->thresh * 2;
+}
+
BTRFS_WORK_HELPER(worker_helper);
BTRFS_WORK_HELPER(delalloc_helper);
BTRFS_WORK_HELPER(flush_delalloc_helper);
diff --git a/fs/btrfs/async-thread.h b/fs/btrfs/async-thread.h
index ad4d0647d1a6..8e1d6576d764 100644
--- a/fs/btrfs/async-thread.h
+++ b/fs/btrfs/async-thread.h
@@ -80,4 +80,5 @@ void btrfs_queue_work(struct btrfs_workqueue *wq,
void btrfs_destroy_workqueue(struct btrfs_workqueue *wq);
void btrfs_workqueue_set_max(struct btrfs_workqueue *wq, int max);
void btrfs_set_work_high_priority(struct btrfs_work *work);
+bool btrfs_workqueue_normal_congested(struct btrfs_workqueue *wq);
#endif
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index c473c42d7d6c..bae05c5c75ba 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -694,7 +694,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, READ, comp_bio,
mirror_num, 0);
if (ret) {
- bio->bi_error = ret;
+ comp_bio->bi_error = ret;
bio_endio(comp_bio);
}
@@ -723,7 +723,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0);
if (ret) {
- bio->bi_error = ret;
+ comp_bio->bi_error = ret;
bio_endio(comp_bio);
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 5b8e235c4b6d..0f2b7c622ce3 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1551,6 +1551,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
trans->transid, root->fs_info->generation);
if (!should_cow_block(trans, root, buf)) {
+ trans->dirty = true;
*cow_ret = buf;
return 0;
}
@@ -2773,8 +2774,10 @@ again:
* then we don't want to set the path blocking,
* so we test it here
*/
- if (!should_cow_block(trans, root, b))
+ if (!should_cow_block(trans, root, b)) {
+ trans->dirty = true;
goto cow_done;
+ }
/*
* must have write locks on this node and the
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 385b449fd7ed..e847573c6db0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1770,6 +1770,7 @@ struct btrfs_fs_info {
struct btrfs_workqueue *qgroup_rescan_workers;
struct completion qgroup_rescan_completion;
struct btrfs_work qgroup_rescan_work;
+ bool qgroup_rescan_running; /* protected by qgroup_rescan_lock */
/* filesystem state */
unsigned long fs_state;
@@ -3069,6 +3070,8 @@ btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu,
cpu->target = le64_to_cpu(disk->target);
cpu->flags = le64_to_cpu(disk->flags);
cpu->limit = le64_to_cpu(disk->limit);
+ cpu->stripes_min = le32_to_cpu(disk->stripes_min);
+ cpu->stripes_max = le32_to_cpu(disk->stripes_max);
}
static inline void
@@ -3087,6 +3090,8 @@ btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk,
disk->target = cpu_to_le64(cpu->target);
disk->flags = cpu_to_le64(cpu->flags);
disk->limit = cpu_to_le64(cpu->limit);
+ disk->stripes_min = cpu_to_le32(cpu->stripes_min);
+ disk->stripes_max = cpu_to_le32(cpu->stripes_max);
}
/* struct btrfs_super_block */
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 02b934d0ee65..09fa5af9782e 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1375,7 +1375,8 @@ release_path:
total_done++;
btrfs_release_prepared_delayed_node(delayed_node);
- if (async_work->nr == 0 || total_done < async_work->nr)
+ if ((async_work->nr == 0 && total_done < BTRFS_DELAYED_WRITEBACK) ||
+ total_done < async_work->nr)
goto again;
free_path:
@@ -1391,7 +1392,8 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
{
struct btrfs_async_delayed_work *async_work;
- if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND)
+ if (atomic_read(&delayed_root->items) < BTRFS_DELAYED_BACKGROUND ||
+ btrfs_workqueue_normal_congested(fs_info->delayed_workers))
return 0;
async_work = kmalloc(sizeof(*async_work), GFP_NOFS);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 41fb43183406..85b207d19aa5 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2276,6 +2276,7 @@ static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
fs_info->qgroup_ulist = NULL;
+ fs_info->qgroup_rescan_running = false;
mutex_init(&fs_info->qgroup_rescan_lock);
}
@@ -3811,7 +3812,7 @@ void close_ctree(struct btrfs_root *root)
smp_mb();
/* wait for the qgroup rescan worker to stop */
- btrfs_qgroup_wait_for_completion(fs_info);
+ btrfs_qgroup_wait_for_completion(fs_info, false);
/* wait for the uuid_scan task to finish */
down(&fs_info->uuid_tree_rescan_sem);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 2368cac1115a..c36a03fa7678 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2520,11 +2520,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
if (ref && ref->seq &&
btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) {
spin_unlock(&locked_ref->lock);
- btrfs_delayed_ref_unlock(locked_ref);
spin_lock(&delayed_refs->lock);
locked_ref->processing = 0;
delayed_refs->num_heads_ready++;
spin_unlock(&delayed_refs->lock);
+ btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
cond_resched();
count++;
@@ -2570,7 +2570,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
*/
if (must_insert_reserved)
locked_ref->must_insert_reserved = 1;
+ spin_lock(&delayed_refs->lock);
locked_ref->processing = 0;
+ delayed_refs->num_heads_ready++;
+ spin_unlock(&delayed_refs->lock);
btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret);
btrfs_delayed_ref_unlock(locked_ref);
return ret;
@@ -3851,6 +3854,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
info->space_info_kobj, "%s",
alloc_name(found->flags));
if (ret) {
+ percpu_counter_destroy(&found->total_bytes_pinned);
kfree(found);
return ret;
}
@@ -7856,7 +7860,7 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
}
- trans->blocks_used++;
+ trans->dirty = true;
/* this returns a buffer locked for blocking */
return buf;
}
@@ -8486,14 +8490,13 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
ret = btrfs_lookup_extent_info(trans, root, bytenr, level - 1, 1,
&wc->refs[level - 1],
&wc->flags[level - 1]);
- if (ret < 0) {
- btrfs_tree_unlock(next);
- return ret;
- }
+ if (ret < 0)
+ goto out_unlock;
if (unlikely(wc->refs[level - 1] == 0)) {
btrfs_err(root->fs_info, "Missing references.");
- BUG();
+ ret = -EIO;
+ goto out_unlock;
}
*lookup_info = 0;
@@ -8545,7 +8548,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
}
level--;
- BUG_ON(level != btrfs_header_level(next));
+ ASSERT(level == btrfs_header_level(next));
+ if (level != btrfs_header_level(next)) {
+ btrfs_err(root->fs_info, "mismatched level");
+ ret = -EIO;
+ goto out_unlock;
+ }
path->nodes[level] = next;
path->slots[level] = 0;
path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
@@ -8560,8 +8568,15 @@ skip:
if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
parent = path->nodes[level]->start;
} else {
- BUG_ON(root->root_key.objectid !=
+ ASSERT(root->root_key.objectid ==
btrfs_header_owner(path->nodes[level]));
+ if (root->root_key.objectid !=
+ btrfs_header_owner(path->nodes[level])) {
+ btrfs_err(root->fs_info,
+ "mismatched block owner");
+ ret = -EIO;
+ goto out_unlock;
+ }
parent = 0;
}
@@ -8578,12 +8593,18 @@ skip:
}
ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
root->root_key.objectid, level - 1, 0);
- BUG_ON(ret); /* -ENOMEM */
+ if (ret)
+ goto out_unlock;
}
+
+ *lookup_info = 1;
+ ret = 1;
+
+out_unlock:
btrfs_tree_unlock(next);
free_extent_buffer(next);
- *lookup_info = 1;
- return 1;
+
+ return ret;
}
/*
@@ -9686,6 +9707,11 @@ int btrfs_read_block_groups(struct btrfs_root *root)
struct extent_buffer *leaf;
int need_clear = 0;
u64 cache_gen;
+ u64 feature;
+ int mixed;
+
+ feature = btrfs_super_incompat_flags(info->super_copy);
+ mixed = !!(feature & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS);
root = info->extent_root;
key.objectid = 0;
@@ -9739,6 +9765,15 @@ int btrfs_read_block_groups(struct btrfs_root *root)
btrfs_item_ptr_offset(leaf, path->slots[0]),
sizeof(cache->item));
cache->flags = btrfs_block_group_flags(&cache->item);
+ if (!mixed &&
+ ((cache->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+ (cache->flags & BTRFS_BLOCK_GROUP_DATA))) {
+ btrfs_err(info,
+"bg %llu is a mixed block group but filesystem hasn't enabled mixed block groups",
+ cache->key.objectid);
+ ret = -EINVAL;
+ goto error;
+ }
key.objectid = found_key.objectid + found_key.offset;
btrfs_release_path(path);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 9abe18763a7f..e767f347f2b1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2786,12 +2786,6 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
btrfs_bio->csum = NULL;
btrfs_bio->csum_allocated = NULL;
btrfs_bio->end_io = NULL;
-
-#ifdef CONFIG_BLK_CGROUP
- /* FIXME, put this into bio_clone_bioset */
- if (bio->bi_css)
- bio_associate_blkcg(new, bio->bi_css);
-#endif
}
return new;
}
@@ -5300,11 +5294,20 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
lock_page(page);
}
locked_pages++;
+ }
+ /*
+ * We need to firstly lock all pages to make sure that
+ * the uptodate bit of our pages won't be affected by
+ * clear_extent_buffer_uptodate().
+ */
+ for (i = start_i; i < num_pages; i++) {
+ page = eb->pages[i];
if (!PageUptodate(page)) {
num_reads++;
all_uptodate = 0;
}
}
+
if (all_uptodate) {
if (start_i == 0)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 0f09526aa7d9..d4a6eef31854 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1526,27 +1526,24 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
- if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
- BTRFS_INODE_PREALLOC)) {
- ret = check_can_nocow(inode, pos, &write_bytes);
- if (ret < 0)
- break;
- if (ret > 0) {
- /*
- * For nodata cow case, no need to reserve
- * data space.
- */
- only_release_metadata = true;
- /*
- * our prealloc extent may be smaller than
- * write_bytes, so scale down.
- */
- num_pages = DIV_ROUND_UP(write_bytes + offset,
- PAGE_CACHE_SIZE);
- reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
- goto reserve_metadata;
- }
+ if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) &&
+ check_can_nocow(inode, pos, &write_bytes) > 0) {
+ /*
+ * For nodata cow case, no need to reserve
+ * data space.
+ */
+ only_release_metadata = true;
+ /*
+ * our prealloc extent may be smaller than
+ * write_bytes, so scale down.
+ */
+ num_pages = DIV_ROUND_UP(write_bytes + offset,
+ PAGE_CACHE_SIZE);
+ reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+ goto reserve_metadata;
}
+
ret = btrfs_check_data_free_space(inode, pos, write_bytes);
if (ret < 0)
break;
@@ -1885,7 +1882,7 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
*/
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
- struct dentry *dentry = file->f_path.dentry;
+ struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
@@ -2774,7 +2771,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (!ret)
ret = btrfs_prealloc_file_range(inode, mode,
range->start,
- range->len, 1 << inode->i_blkbits,
+ range->len, i_blocksize(inode),
offset + len, &alloc_hint);
list_del(&range->list);
kfree(range);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4bc9dbf29a73..bebd6517355d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -4397,8 +4397,19 @@ search_again:
if (found_type > min_type) {
del_item = 1;
} else {
- if (item_end < new_size)
+ if (item_end < new_size) {
+ /*
+ * With NO_HOLES mode, for the following mapping
+ *
+ * [0-4k][hole][8k-12k]
+ *
+ * if truncating isize down to 6k, it ends up
+ * isize being 8k.
+ */
+ if (btrfs_fs_incompat(root->fs_info, NO_HOLES))
+ last_size = new_size;
break;
+ }
if (found_key.offset >= new_size)
del_item = 1;
else
@@ -7318,8 +7329,8 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
int found = false;
void **pagep = NULL;
struct page *page = NULL;
- int start_idx;
- int end_idx;
+ unsigned long start_idx;
+ unsigned long end_idx;
start_idx = start >> PAGE_CACHE_SHIFT;
@@ -7510,11 +7521,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode,
* within our reservation, otherwise we need to adjust our inode
* counter appropriately.
*/
- if (dio_data->outstanding_extents) {
+ if (dio_data->outstanding_extents >= num_extents) {
dio_data->outstanding_extents -= num_extents;
} else {
+ /*
+ * If dio write length has been split due to no large enough
+ * contiguous space, we need to compensate our inode counter
+ * appropriately.
+ */
+ u64 num_needed = num_extents - dio_data->outstanding_extents;
+
spin_lock(&BTRFS_I(inode)->lock);
- BTRFS_I(inode)->outstanding_extents += num_extents;
+ BTRFS_I(inode)->outstanding_extents += num_needed;
spin_unlock(&BTRFS_I(inode)->lock);
}
}
@@ -8691,9 +8709,14 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
* So even we call qgroup_free_data(), it won't decrease reserved
* space.
* 2) Not written to disk
- * This means the reserved space should be freed here.
+ * This means the reserved space should be freed here. However,
+ * if a truncate invalidates the page (by clearing PageDirty)
+ * and the page is accounted for while allocating extent
+ * in btrfs_check_data_free_space() we let delayed_ref to
+ * free the entire extent.
*/
- btrfs_qgroup_free_data(inode, page_start, PAGE_CACHE_SIZE);
+ if (PageDirty(page))
+ btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f07d01bc4875..317b99acdf4b 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1619,6 +1619,9 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
int namelen;
int ret = 0;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
ret = mnt_want_write_file(file);
if (ret)
goto out;
@@ -1648,7 +1651,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
src_inode = file_inode(src.file);
if (src_inode->i_sb != file_inode(file)->i_sb) {
- btrfs_info(BTRFS_I(src_inode)->root->fs_info,
+ btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
ret = -EXDEV;
} else if (!inode_owner_or_capable(src_inode)) {
@@ -1676,6 +1679,9 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
struct btrfs_ioctl_vol_args *vol_args;
int ret;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -1699,6 +1705,9 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
bool readonly = false;
struct btrfs_qgroup_inherit *inherit = NULL;
+ if (!S_ISDIR(file_inode(file)->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -2345,6 +2354,9 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
int ret;
int err = 0;
+ if (!S_ISDIR(dir->i_mode))
+ return -ENOTDIR;
+
vol_args = memdup_user(arg, sizeof(*vol_args));
if (IS_ERR(vol_args))
return PTR_ERR(vol_args);
@@ -3813,6 +3825,11 @@ process_slot:
}
btrfs_release_path(path);
key.offset = next_key_min_offset;
+
+ if (fatal_signal_pending(current)) {
+ ret = -EINTR;
+ goto out;
+ }
}
ret = 0;
@@ -5121,7 +5138,7 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- return btrfs_qgroup_wait_for_completion(root->fs_info);
+ return btrfs_qgroup_wait_for_completion(root->fs_info, true);
}
static long _btrfs_ioctl_set_received_subvol(struct file *file,
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5279fdae7142..88d9b66e2207 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -995,7 +995,7 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
goto out;
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
- btrfs_qgroup_wait_for_completion(fs_info);
+ btrfs_qgroup_wait_for_completion(fs_info, false);
spin_lock(&fs_info->qgroup_lock);
quota_root = fs_info->quota_root;
fs_info->quota_root = NULL;
@@ -2349,6 +2349,9 @@ out:
}
done:
+ mutex_lock(&fs_info->qgroup_rescan_lock);
+ fs_info->qgroup_rescan_running = false;
+ mutex_unlock(&fs_info->qgroup_rescan_lock);
complete_all(&fs_info->qgroup_rescan_completion);
}
@@ -2390,6 +2393,7 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid,
sizeof(fs_info->qgroup_rescan_progress));
fs_info->qgroup_rescan_progress.objectid = progress_objectid;
init_completion(&fs_info->qgroup_rescan_completion);
+ fs_info->qgroup_rescan_running = true;
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
@@ -2467,20 +2471,26 @@ btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info)
return 0;
}
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info)
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+ bool interruptible)
{
int running;
int ret = 0;
mutex_lock(&fs_info->qgroup_rescan_lock);
spin_lock(&fs_info->qgroup_lock);
- running = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN;
+ running = fs_info->qgroup_rescan_running;
spin_unlock(&fs_info->qgroup_lock);
mutex_unlock(&fs_info->qgroup_rescan_lock);
- if (running)
+ if (!running)
+ return 0;
+
+ if (interruptible)
ret = wait_for_completion_interruptible(
&fs_info->qgroup_rescan_completion);
+ else
+ wait_for_completion(&fs_info->qgroup_rescan_completion);
return ret;
}
diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index ecb2c143ef75..3d73e4c9c7df 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h
@@ -46,7 +46,8 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info);
int btrfs_qgroup_rescan(struct btrfs_fs_info *fs_info);
void btrfs_qgroup_rescan_resume(struct btrfs_fs_info *fs_info);
-int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info);
+int btrfs_qgroup_wait_for_completion(struct btrfs_fs_info *fs_info,
+ bool interruptible);
int btrfs_add_qgroup_relation(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 src, u64 dst);
int btrfs_del_qgroup_relation(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b4ca5454ef1a..8ca9aa92972d 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -921,9 +921,16 @@ again:
path2->slots[level]--;
eb = path2->nodes[level];
- WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) !=
- cur->bytenr);
-
+ if (btrfs_node_blockptr(eb, path2->slots[level]) !=
+ cur->bytenr) {
+ btrfs_err(root->fs_info,
+ "couldn't find block (%llu) (level %d) in tree (%llu) with key (%llu %u %llu)",
+ cur->bytenr, level - 1, root->objectid,
+ node_key->objectid, node_key->type,
+ node_key->offset);
+ err = -ENOENT;
+ goto out;
+ }
lower = cur;
need_check = true;
for (; level < BTRFS_MAX_LEVEL; level++) {
@@ -2343,6 +2350,10 @@ void free_reloc_roots(struct list_head *list)
while (!list_empty(list)) {
reloc_root = list_entry(list->next, struct btrfs_root,
root_list);
+ free_extent_buffer(reloc_root->node);
+ free_extent_buffer(reloc_root->commit_root);
+ reloc_root->node = NULL;
+ reloc_root->commit_root = NULL;
__del_reloc_root(reloc_root);
}
}
@@ -2676,11 +2687,15 @@ static int do_relocation(struct btrfs_trans_handle *trans,
if (!upper->eb) {
ret = btrfs_search_slot(trans, root, key, path, 0, 1);
- if (ret < 0) {
- err = ret;
+ if (ret) {
+ if (ret < 0)
+ err = ret;
+ else
+ err = -ENOENT;
+
+ btrfs_release_path(path);
break;
}
- BUG_ON(ret > 0);
if (!upper->eb) {
upper->eb = path->nodes[upper->level];
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index fe609b81dd1b..3bd2233737ac 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -239,7 +239,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
trans->aborted = errno;
/* Nothing used. The other threads that have joined this
* transaction may be able to continue. */
- if (!trans->blocks_used && list_empty(&trans->new_bgs)) {
+ if (!trans->dirty && list_empty(&trans->new_bgs)) {
const char *errstr;
errstr = btrfs_decode_error(errno);
@@ -1727,6 +1727,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
goto restore;
}
+ btrfs_qgroup_rescan_resume(fs_info);
+
if (!fs_info->uuid_root) {
btrfs_info(fs_info, "creating UUID tree");
ret = btrfs_create_uuid_tree(fs_info);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 64c8221b6165..1e872923ec2c 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -110,7 +110,6 @@ struct btrfs_trans_handle {
u64 chunk_bytes_reserved;
unsigned long use_count;
unsigned long blocks_reserved;
- unsigned long blocks_used;
unsigned long delayed_ref_updates;
struct btrfs_transaction *transaction;
struct btrfs_block_rsv *block_rsv;
@@ -121,6 +120,7 @@ struct btrfs_trans_handle {
bool can_flush_pending_bgs;
bool reloc_reserved;
bool sync;
+ bool dirty;
unsigned int type;
/*
* this root is only needed to validate that the root passed to
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 323e12cc9d2f..ee7832e2d39d 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -1923,12 +1923,11 @@ static noinline int find_dir_range(struct btrfs_root *root,
next:
/* check the next slot in the tree to see if it is a valid item */
nritems = btrfs_header_nritems(path->nodes[0]);
+ path->slots[0]++;
if (path->slots[0] >= nritems) {
ret = btrfs_next_leaf(root, path);
if (ret)
goto out;
- } else {
- path->slots[0]++;
}
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
@@ -2696,14 +2695,12 @@ static inline void btrfs_remove_all_log_ctxs(struct btrfs_root *root,
int index, int error)
{
struct btrfs_log_ctx *ctx;
+ struct btrfs_log_ctx *safe;
- if (!error) {
- INIT_LIST_HEAD(&root->log_ctxs[index]);
- return;
- }
-
- list_for_each_entry(ctx, &root->log_ctxs[index], list)
+ list_for_each_entry_safe(ctx, safe, &root->log_ctxs[index], list) {
+ list_del_init(&ctx->list);
ctx->log_ret = error;
+ }
INIT_LIST_HEAD(&root->log_ctxs[index]);
}
@@ -2850,6 +2847,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
if (log_root_tree->log_transid_committed >= root_log_ctx.log_transid) {
blk_finish_plug(&plug);
+ list_del_init(&root_log_ctx.list);
mutex_unlock(&log_root_tree->log_mutex);
ret = root_log_ctx.log_ret;
goto out;
@@ -2943,13 +2941,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
mutex_unlock(&root->log_mutex);
out_wake_log_root:
- /*
- * We needn't get log_mutex here because we are sure all
- * the other tasks are blocked.
- */
+ mutex_lock(&log_root_tree->log_mutex);
btrfs_remove_all_log_ctxs(log_root_tree, index2, ret);
- mutex_lock(&log_root_tree->log_mutex);
log_root_tree->log_transid_committed++;
atomic_set(&log_root_tree->log_commit[index2], 0);
mutex_unlock(&log_root_tree->log_mutex);
@@ -2960,10 +2954,8 @@ out_wake_log_root:
if (waitqueue_active(&log_root_tree->log_commit_wait[index2]))
wake_up(&log_root_tree->log_commit_wait[index2]);
out:
- /* See above. */
- btrfs_remove_all_log_ctxs(root, index1, ret);
-
mutex_lock(&root->log_mutex);
+ btrfs_remove_all_log_ctxs(root, index1, ret);
root->log_transid_committed++;
atomic_set(&root->log_commit[index1], 0);
mutex_unlock(&root->log_mutex);
@@ -4406,6 +4398,127 @@ static int btrfs_log_trailing_hole(struct btrfs_trans_handle *trans,
return ret;
}
+/*
+ * When we are logging a new inode X, check if it doesn't have a reference that
+ * matches the reference from some other inode Y created in a past transaction
+ * and that was renamed in the current transaction. If we don't do this, then at
+ * log replay time we can lose inode Y (and all its files if it's a directory):
+ *
+ * mkdir /mnt/x
+ * echo "hello world" > /mnt/x/foobar
+ * sync
+ * mv /mnt/x /mnt/y
+ * mkdir /mnt/x # or touch /mnt/x
+ * xfs_io -c fsync /mnt/x
+ * <power fail>
+ * mount fs, trigger log replay
+ *
+ * After the log replay procedure, we would lose the first directory and all its
+ * files (file foobar).
+ * For the case where inode Y is not a directory we simply end up losing it:
+ *
+ * echo "123" > /mnt/foo
+ * sync
+ * mv /mnt/foo /mnt/bar
+ * echo "abc" > /mnt/foo
+ * xfs_io -c fsync /mnt/foo
+ * <power fail>
+ *
+ * We also need this for cases where a snapshot entry is replaced by some other
+ * entry (file or directory) otherwise we end up with an unreplayable log due to
+ * attempts to delete the snapshot entry (entry of type BTRFS_ROOT_ITEM_KEY) as
+ * if it were a regular entry:
+ *
+ * mkdir /mnt/x
+ * btrfs subvolume snapshot /mnt /mnt/x/snap
+ * btrfs subvolume delete /mnt/x/snap
+ * rmdir /mnt/x
+ * mkdir /mnt/x
+ * fsync /mnt/x or fsync some new file inside it
+ * <power fail>
+ *
+ * The snapshot delete, rmdir of x, mkdir of a new x and the fsync all happen in
+ * the same transaction.
+ */
+static int btrfs_check_ref_name_override(struct extent_buffer *eb,
+ const int slot,
+ const struct btrfs_key *key,
+ struct inode *inode)
+{
+ int ret;
+ struct btrfs_path *search_path;
+ char *name = NULL;
+ u32 name_len = 0;
+ u32 item_size = btrfs_item_size_nr(eb, slot);
+ u32 cur_offset = 0;
+ unsigned long ptr = btrfs_item_ptr_offset(eb, slot);
+
+ search_path = btrfs_alloc_path();
+ if (!search_path)
+ return -ENOMEM;
+ search_path->search_commit_root = 1;
+ search_path->skip_locking = 1;
+
+ while (cur_offset < item_size) {
+ u64 parent;
+ u32 this_name_len;
+ u32 this_len;
+ unsigned long name_ptr;
+ struct btrfs_dir_item *di;
+
+ if (key->type == BTRFS_INODE_REF_KEY) {
+ struct btrfs_inode_ref *iref;
+
+ iref = (struct btrfs_inode_ref *)(ptr + cur_offset);
+ parent = key->offset;
+ this_name_len = btrfs_inode_ref_name_len(eb, iref);
+ name_ptr = (unsigned long)(iref + 1);
+ this_len = sizeof(*iref) + this_name_len;
+ } else {
+ struct btrfs_inode_extref *extref;
+
+ extref = (struct btrfs_inode_extref *)(ptr +
+ cur_offset);
+ parent = btrfs_inode_extref_parent(eb, extref);
+ this_name_len = btrfs_inode_extref_name_len(eb, extref);
+ name_ptr = (unsigned long)&extref->name;
+ this_len = sizeof(*extref) + this_name_len;
+ }
+
+ if (this_name_len > name_len) {
+ char *new_name;
+
+ new_name = krealloc(name, this_name_len, GFP_NOFS);
+ if (!new_name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ name_len = this_name_len;
+ name = new_name;
+ }
+
+ read_extent_buffer(eb, name, name_ptr, this_name_len);
+ di = btrfs_lookup_dir_item(NULL, BTRFS_I(inode)->root,
+ search_path, parent,
+ name, this_name_len, 0);
+ if (di && !IS_ERR(di)) {
+ ret = 1;
+ goto out;
+ } else if (IS_ERR(di)) {
+ ret = PTR_ERR(di);
+ goto out;
+ }
+ btrfs_release_path(search_path);
+
+ cur_offset += this_len;
+ }
+ ret = 0;
+out:
+ btrfs_free_path(search_path);
+ kfree(name);
+ return ret;
+}
+
/* log a single inode in the tree log.
* At least one parent directory for this inode must exist in the tree
* or be logged already.
@@ -4578,6 +4691,22 @@ again:
if (min_key.type == BTRFS_INODE_ITEM_KEY)
need_log_inode_item = false;
+ if ((min_key.type == BTRFS_INODE_REF_KEY ||
+ min_key.type == BTRFS_INODE_EXTREF_KEY) &&
+ BTRFS_I(inode)->generation == trans->transid) {
+ ret = btrfs_check_ref_name_override(path->nodes[0],
+ path->slots[0],
+ &min_key, inode);
+ if (ret < 0) {
+ err = ret;
+ goto out_unlock;
+ } else if (ret > 0) {
+ err = 1;
+ btrfs_set_log_full_commit(root->fs_info, trans);
+ goto out_unlock;
+ }
+ }
+
/* Skip xattrs, we log them later with btrfs_log_all_xattrs() */
if (min_key.type == BTRFS_XATTR_ITEM_KEY) {
if (ins_nr == 0)
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9c62a6f9757a..600c67ef8a03 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -108,7 +108,7 @@ const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
},
};
-const u64 const btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
+const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
[BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
[BTRFS_RAID_RAID1] = BTRFS_BLOCK_GROUP_RAID1,
[BTRFS_RAID_DUP] = BTRFS_BLOCK_GROUP_DUP,
diff --git a/fs/buffer.c b/fs/buffer.c
index 4f4cd959da7c..6f7d519a093b 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2298,7 +2298,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
loff_t pos, loff_t *bytes)
{
struct inode *inode = mapping->host;
- unsigned blocksize = 1 << inode->i_blkbits;
+ unsigned int blocksize = i_blocksize(inode);
struct page *page;
void *fsdata;
pgoff_t index, curidx;
@@ -2378,8 +2378,8 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
get_block_t *get_block, loff_t *bytes)
{
struct inode *inode = mapping->host;
- unsigned blocksize = 1 << inode->i_blkbits;
- unsigned zerofrom;
+ unsigned int blocksize = i_blocksize(inode);
+ unsigned int zerofrom;
int err;
err = cont_expand_zero(file, mapping, pos, bytes);
@@ -2741,7 +2741,7 @@ int nobh_truncate_page(struct address_space *mapping,
struct buffer_head map_bh;
int err;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
@@ -2819,7 +2819,7 @@ int block_truncate_page(struct address_space *mapping,
struct buffer_head *bh;
int err;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
length = offset & (blocksize - 1);
/* Block boundary? Nothing to do */
@@ -2931,7 +2931,7 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block,
struct inode *inode = mapping->host;
tmp.b_state = 0;
tmp.b_blocknr = 0;
- tmp.b_size = 1 << inode->i_blkbits;
+ tmp.b_size = i_blocksize(inode);
get_block(inode, block, &tmp, 0);
return tmp.b_blocknr;
}
diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c
index 8f84646f10e9..bdb9c94335f1 100644
--- a/fs/ceph/acl.c
+++ b/fs/ceph/acl.c
@@ -94,11 +94,9 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
- ret = posix_acl_equiv_mode(acl, &new_mode);
- if (ret < 0)
+ ret = posix_acl_update_mode(inode, &new_mode, &acl);
+ if (ret)
goto out;
- if (ret == 0)
- acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
@@ -130,7 +128,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (new_mode != old_mode) {
newattrs.ia_mode = new_mode;
newattrs.ia_valid = ATTR_MODE;
- ret = ceph_setattr(dentry, &newattrs);
+ ret = __ceph_setattr(dentry, &newattrs);
if (ret)
goto out_dput;
}
@@ -140,7 +138,7 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (new_mode != old_mode) {
newattrs.ia_mode = old_mode;
newattrs.ia_valid = ATTR_MODE;
- ceph_setattr(dentry, &newattrs);
+ __ceph_setattr(dentry, &newattrs);
}
goto out_dput;
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b7d218a168fb..22bae2b434e2 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -189,7 +189,7 @@ static int ceph_releasepage(struct page *page, gfp_t g)
/*
* read a single page, without unlocking it.
*/
-static int readpage_nounlock(struct file *filp, struct page *page)
+static int ceph_do_readpage(struct file *filp, struct page *page)
{
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -219,7 +219,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
err = ceph_readpage_from_fscache(inode, page);
if (err == 0)
- goto out;
+ return -EINPROGRESS;
dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index);
@@ -249,8 +249,11 @@ out:
static int ceph_readpage(struct file *filp, struct page *page)
{
- int r = readpage_nounlock(filp, page);
- unlock_page(page);
+ int r = ceph_do_readpage(filp, page);
+ if (r != -EINPROGRESS)
+ unlock_page(page);
+ else
+ r = 0;
return r;
}
@@ -697,7 +700,7 @@ static int ceph_writepages_start(struct address_space *mapping,
struct pagevec pvec;
int done = 0;
int rc = 0;
- unsigned wsize = 1 << inode->i_blkbits;
+ unsigned int wsize = i_blocksize(inode);
struct ceph_osd_request *req = NULL;
int do_sync = 0;
loff_t snap_size, i_size;
@@ -1094,7 +1097,7 @@ retry_locked:
goto retry_locked;
r = writepage_nounlock(page, NULL);
if (r < 0)
- goto fail_nosnap;
+ goto fail_unlock;
goto retry_locked;
}
@@ -1122,11 +1125,14 @@ retry_locked:
}
/* we need to read it. */
- r = readpage_nounlock(file, page);
- if (r < 0)
- goto fail_nosnap;
+ r = ceph_do_readpage(file, page);
+ if (r < 0) {
+ if (r == -EINPROGRESS)
+ return -EAGAIN;
+ goto fail_unlock;
+ }
goto retry_locked;
-fail_nosnap:
+fail_unlock:
unlock_page(page);
return r;
}
diff --git a/fs/ceph/cache.c b/fs/ceph/cache.c
index a4766ded1ba7..ff1cfd7b1083 100644
--- a/fs/ceph/cache.c
+++ b/fs/ceph/cache.c
@@ -224,13 +224,7 @@ void ceph_fscache_unregister_inode_cookie(struct ceph_inode_info* ci)
fscache_relinquish_cookie(cookie, 0);
}
-static void ceph_vfs_readpage_complete(struct page *page, void *data, int error)
-{
- if (!error)
- SetPageUptodate(page);
-}
-
-static void ceph_vfs_readpage_complete_unlock(struct page *page, void *data, int error)
+static void ceph_readpage_from_fscache_complete(struct page *page, void *data, int error)
{
if (!error)
SetPageUptodate(page);
@@ -259,7 +253,7 @@ int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
return -ENOBUFS;
ret = fscache_read_or_alloc_page(ci->fscache, page,
- ceph_vfs_readpage_complete, NULL,
+ ceph_readpage_from_fscache_complete, NULL,
GFP_KERNEL);
switch (ret) {
@@ -288,7 +282,7 @@ int ceph_readpages_from_fscache(struct inode *inode,
return -ENOBUFS;
ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
- ceph_vfs_readpage_complete_unlock,
+ ceph_readpage_from_fscache_complete,
NULL, mapping_gfp_mask(mapping));
switch (ret) {
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 9314b4ea2375..be7d187d53fd 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -247,6 +247,11 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
if (ret < 0)
err = ret;
dput(last);
+ /* last_name no longer match cache index */
+ if (fi->readdir_cache_idx >= 0) {
+ fi->readdir_cache_idx = -1;
+ fi->dir_release_count = 0;
+ }
}
return err;
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 3c68e6aee2f0..c8222bfe1e56 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -929,7 +929,8 @@ again:
statret = __ceph_do_getattr(inode, page,
CEPH_STAT_CAP_INLINE_DATA, !!page);
if (statret < 0) {
- __free_page(page);
+ if (page)
+ __free_page(page);
if (statret == -ENODATA) {
BUG_ON(retry_op != READ_INLINE);
goto again;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 498dcfa2dcdb..9f0d99094cc1 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1358,15 +1358,20 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
if (!ctl->page || pgoff != page_index(ctl->page)) {
ceph_readdir_cache_release(ctl);
- ctl->page = grab_cache_page(&dir->i_data, pgoff);
+ if (idx == 0)
+ ctl->page = grab_cache_page(&dir->i_data, pgoff);
+ else
+ ctl->page = find_lock_page(&dir->i_data, pgoff);
if (!ctl->page) {
ctl->index = -1;
- return -ENOMEM;
+ return idx == 0 ? -ENOMEM : 0;
}
/* reading/filling the cache are serialized by
* i_mutex, no need to use page lock */
unlock_page(ctl->page);
ctl->dentries = kmap(ctl->page);
+ if (idx == 0)
+ memset(ctl->dentries, 0, PAGE_CACHE_SIZE);
}
if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) &&
@@ -1768,7 +1773,7 @@ static const struct inode_operations ceph_symlink_iops = {
/*
* setattr
*/
-int ceph_setattr(struct dentry *dentry, struct iattr *attr)
+int __ceph_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = d_inode(dentry);
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -1970,11 +1975,6 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
if (inode_dirty_flags)
__mark_inode_dirty(inode, inode_dirty_flags);
- if (ia_valid & ATTR_MODE) {
- err = posix_acl_chmod(inode, attr->ia_mode);
- if (err)
- goto out_put;
- }
if (mask) {
req->r_inode = inode;
@@ -1988,13 +1988,23 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
ceph_cap_string(dirtied), mask);
ceph_mdsc_put_request(req);
- if (mask & CEPH_SETATTR_SIZE)
- __ceph_do_pending_vmtruncate(inode);
ceph_free_cap_flush(prealloc_cf);
+
+ if (err >= 0 && (mask & CEPH_SETATTR_SIZE))
+ __ceph_do_pending_vmtruncate(inode);
+
return err;
-out_put:
- ceph_mdsc_put_request(req);
- ceph_free_cap_flush(prealloc_cf);
+}
+
+int ceph_setattr(struct dentry *dentry, struct iattr *attr)
+{
+ int err;
+
+ err = __ceph_setattr(dentry, attr);
+
+ if (err >= 0 && (attr->ia_valid & ATTR_MODE))
+ err = posix_acl_chmod(d_inode(dentry), attr->ia_mode);
+
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index e7b130a637f9..f54f77037d22 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -274,12 +274,13 @@ static int parse_reply_info_extra(void **p, void *end,
struct ceph_mds_reply_info_parsed *info,
u64 features)
{
- if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
+ u32 op = le32_to_cpu(info->head->op);
+
+ if (op == CEPH_MDS_OP_GETFILELOCK)
return parse_reply_info_filelock(p, end, info, features);
- else if (info->head->op == CEPH_MDS_OP_READDIR ||
- info->head->op == CEPH_MDS_OP_LSSNAP)
+ else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
return parse_reply_info_dir(p, end, info, features);
- else if (info->head->op == CEPH_MDS_OP_CREATE)
+ else if (op == CEPH_MDS_OP_CREATE)
return parse_reply_info_create(p, end, info, features);
else
return -EIO;
@@ -643,6 +644,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
{
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+ /* Never leave an unregistered request on an unsafe list! */
+ list_del_init(&req->r_unsafe_item);
+
if (req->r_tid == mdsc->oldest_tid) {
struct rb_node *p = rb_next(&req->r_node);
mdsc->oldest_tid = 0;
@@ -1050,7 +1054,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
while (!list_empty(&session->s_unsafe)) {
req = list_first_entry(&session->s_unsafe,
struct ceph_mds_request, r_unsafe_item);
- list_del_init(&req->r_unsafe_item);
pr_warn_ratelimited(" dropping unsafe request %llu\n",
req->r_tid);
__unregister_request(mdsc, req);
@@ -2476,7 +2479,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
* useful we could do with a revised return value.
*/
dout("got safe reply %llu, mds%d\n", tid, mds);
- list_del_init(&req->r_unsafe_item);
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 75b7d125ce66..8c8cb8fe3d32 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -788,6 +788,7 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
return __ceph_do_getattr(inode, NULL, mask, force);
}
extern int ceph_permission(struct inode *inode, int mask);
+extern int __ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 819163d8313b..b24275ef97f7 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -369,6 +369,7 @@ static int __set_xattr(struct ceph_inode_info *ci,
if (update_xattr) {
int err = 0;
+
if (xattr && (flags & XATTR_CREATE))
err = -EEXIST;
else if (!xattr && (flags & XATTR_REPLACE))
@@ -376,12 +377,14 @@ static int __set_xattr(struct ceph_inode_info *ci,
if (err) {
kfree(name);
kfree(val);
+ kfree(*newxattr);
return err;
}
if (update_xattr < 0) {
if (xattr)
__remove_xattr(ci, xattr);
kfree(name);
+ kfree(*newxattr);
return 0;
}
}
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 50b268483302..0a3544fb50f9 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -152,6 +152,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
list_for_each(tmp1, &cifs_tcp_ses_list) {
server = list_entry(tmp1, struct TCP_Server_Info,
tcp_ses_list);
+ seq_printf(m, "\nNumber of credits: %d", server->credits);
i++;
list_for_each(tmp2, &server->smb_ses_list) {
ses = list_entry(tmp2, struct cifs_ses,
@@ -255,7 +256,6 @@ static const struct file_operations cifs_debug_data_proc_fops = {
static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
- char c;
bool bv;
int rc;
struct list_head *tmp1, *tmp2, *tmp3;
@@ -263,11 +263,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- rc = get_user(c, buffer);
- if (rc)
- return rc;
-
- if (strtobool(&c, &bv) == 0) {
+ rc = kstrtobool_from_user(buffer, count, &bv);
+ if (rc == 0) {
#ifdef CONFIG_CIFS_STATS2
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
@@ -290,6 +287,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
}
}
spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ return rc;
}
return count;
@@ -433,17 +432,17 @@ static int cifsFYI_proc_open(struct inode *inode, struct file *file)
static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- char c;
+ char c[2] = { '\0' };
bool bv;
int rc;
- rc = get_user(c, buffer);
+ rc = get_user(c[0], buffer);
if (rc)
return rc;
- if (strtobool(&c, &bv) == 0)
+ if (strtobool(c, &bv) == 0)
cifsFYI = bv;
- else if ((c > '1') && (c <= '9'))
- cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
+ else if ((c[0] > '1') && (c[0] <= '9'))
+ cifsFYI = (int) (c[0] - '0'); /* see cifs_debug.h for meanings */
return count;
}
@@ -471,20 +470,12 @@ static int cifs_linux_ext_proc_open(struct inode *inode, struct file *file)
static ssize_t cifs_linux_ext_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
- char c;
- bool bv;
int rc;
- rc = get_user(c, buffer);
+ rc = kstrtobool_from_user(buffer, count, &linuxExtEnabled);
if (rc)
return rc;
- rc = strtobool(&c, &bv);
- if (rc)
- return rc;
-
- linuxExtEnabled = bv;
-
return count;
}
@@ -511,20 +502,12 @@ static int cifs_lookup_cache_proc_open(struct inode *inode, struct file *file)
static ssize_t cifs_lookup_cache_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
- char c;
- bool bv;
int rc;
- rc = get_user(c, buffer);
+ rc = kstrtobool_from_user(buffer, count, &lookupCacheEnabled);
if (rc)
return rc;
- rc = strtobool(&c, &bv);
- if (rc)
- return rc;
-
- lookupCacheEnabled = bv;
-
return count;
}
@@ -551,20 +534,12 @@ static int traceSMB_proc_open(struct inode *inode, struct file *file)
static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- char c;
- bool bv;
int rc;
- rc = get_user(c, buffer);
+ rc = kstrtobool_from_user(buffer, count, &traceSMB);
if (rc)
return rc;
- rc = strtobool(&c, &bv);
- if (rc)
- return rc;
-
- traceSMB = bv;
-
return count;
}
@@ -622,7 +597,6 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
int rc;
unsigned int flags;
char flags_string[12];
- char c;
bool bv;
if ((count < 1) || (count > 11))
@@ -635,11 +609,10 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
if (count < 3) {
/* single char or single char followed by null */
- c = flags_string[0];
- if (strtobool(&c, &bv) == 0) {
+ if (strtobool(flags_string, &bv) == 0) {
global_secflags = bv ? CIFSSEC_MAX : CIFSSEC_DEF;
return count;
- } else if (!isdigit(c)) {
+ } else if (!isdigit(flags_string[0])) {
cifs_dbg(VFS, "Invalid SecurityFlags: %s\n",
flags_string);
return -EINVAL;
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 66cf0f9fff89..c611ca2339d7 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -25,7 +25,7 @@
void cifs_dump_mem(char *label, void *data, int length);
void cifs_dump_detail(void *);
void cifs_dump_mids(struct TCP_Server_Info *);
-extern int traceSMB; /* flag which enables the function below */
+extern bool traceSMB; /* flag which enables the function below */
void dump_smb(void *, int);
#define CIFS_INFO 0x01
#define CIFS_RC 0x02
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 3182273a3407..1418daa03d95 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -46,6 +46,9 @@
#define CIFS_MOUNT_CIFS_BACKUPUID 0x200000 /* backup intent bit for a user */
#define CIFS_MOUNT_CIFS_BACKUPGID 0x400000 /* backup intent bit for a group */
#define CIFS_MOUNT_MAP_SFM_CHR 0x800000 /* SFM/MAC mapping for illegal chars */
+#define CIFS_MOUNT_USE_PREFIX_PATH 0x1000000 /* make subpath with unaccessible
+ * root mountable
+ */
struct cifs_sb_info {
struct rb_root tlink_tree;
@@ -67,5 +70,6 @@ struct cifs_sb_info {
struct backing_dev_info bdi;
struct delayed_work prune_tlinks;
struct rcu_head rcu;
+ char *prepath;
};
#endif /* _CIFS_FS_SB_H */
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 5a53ac6b1e02..a0b3e7d1be48 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -83,6 +83,9 @@ convert_sfm_char(const __u16 src_char, char *target)
case SFM_COLON:
*target = ':';
break;
+ case SFM_DOUBLEQUOTE:
+ *target = '"';
+ break;
case SFM_ASTERISK:
*target = '*';
break;
@@ -101,6 +104,12 @@ convert_sfm_char(const __u16 src_char, char *target)
case SFM_SLASH:
*target = '\\';
break;
+ case SFM_SPACE:
+ *target = ' ';
+ break;
+ case SFM_PERIOD:
+ *target = '.';
+ break;
default:
return false;
}
@@ -404,7 +413,7 @@ static __le16 convert_to_sfu_char(char src_char)
return dest_char;
}
-static __le16 convert_to_sfm_char(char src_char)
+static __le16 convert_to_sfm_char(char src_char, bool end_of_string)
{
__le16 dest_char;
@@ -412,6 +421,9 @@ static __le16 convert_to_sfm_char(char src_char)
case ':':
dest_char = cpu_to_le16(SFM_COLON);
break;
+ case '"':
+ dest_char = cpu_to_le16(SFM_DOUBLEQUOTE);
+ break;
case '*':
dest_char = cpu_to_le16(SFM_ASTERISK);
break;
@@ -427,6 +439,18 @@ static __le16 convert_to_sfm_char(char src_char)
case '|':
dest_char = cpu_to_le16(SFM_PIPE);
break;
+ case '.':
+ if (end_of_string)
+ dest_char = cpu_to_le16(SFM_PERIOD);
+ else
+ dest_char = 0;
+ break;
+ case ' ':
+ if (end_of_string)
+ dest_char = cpu_to_le16(SFM_SPACE);
+ else
+ dest_char = 0;
+ break;
default:
dest_char = 0;
}
@@ -469,9 +493,16 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
/* see if we must remap this char */
if (map_chars == SFU_MAP_UNI_RSVD)
dst_char = convert_to_sfu_char(src_char);
- else if (map_chars == SFM_MAP_UNI_RSVD)
- dst_char = convert_to_sfm_char(src_char);
- else
+ else if (map_chars == SFM_MAP_UNI_RSVD) {
+ bool end_of_string;
+
+ if (i == srclen - 1)
+ end_of_string = true;
+ else
+ end_of_string = false;
+
+ dst_char = convert_to_sfm_char(src_char, end_of_string);
+ } else
dst_char = 0;
/*
* FIXME: We can not handle remapping backslash (UNI_SLASH)
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index bdc52cb9a676..07ade707fa60 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -57,6 +57,7 @@
* not conflict (although almost does) with the mapping above.
*/
+#define SFM_DOUBLEQUOTE ((__u16) 0xF020)
#define SFM_ASTERISK ((__u16) 0xF021)
#define SFM_QUESTION ((__u16) 0xF025)
#define SFM_COLON ((__u16) 0xF022)
@@ -64,6 +65,8 @@
#define SFM_LESSTHAN ((__u16) 0xF023)
#define SFM_PIPE ((__u16) 0xF027)
#define SFM_SLASH ((__u16) 0xF026)
+#define SFM_SPACE ((__u16) 0xF028)
+#define SFM_PERIOD ((__u16) 0xF029)
/*
* Mapping mechanism to use when one of the seven reserved characters is
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index e682b36a210f..4acbc390a7d6 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -731,24 +731,26 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
memcpy(ses->auth_key.response + baselen, tiblob, tilen);
+ mutex_lock(&ses->server->srv_mutex);
+
rc = crypto_hmacmd5_alloc(ses->server);
if (rc) {
cifs_dbg(VFS, "could not crypto alloc hmacmd5 rc %d\n", rc);
- goto setup_ntlmv2_rsp_ret;
+ goto unlock;
}
/* calculate ntlmv2_hash */
rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
if (rc) {
cifs_dbg(VFS, "could not get v2 hash rc %d\n", rc);
- goto setup_ntlmv2_rsp_ret;
+ goto unlock;
}
/* calculate first part of the client response (CR1) */
rc = CalcNTLMv2_response(ses, ntlmv2_hash);
if (rc) {
cifs_dbg(VFS, "Could not calculate CR1 rc: %d\n", rc);
- goto setup_ntlmv2_rsp_ret;
+ goto unlock;
}
/* now calculate the session key for NTLMv2 */
@@ -757,13 +759,13 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
if (rc) {
cifs_dbg(VFS, "%s: Could not set NTLMV2 Hash as a key\n",
__func__);
- goto setup_ntlmv2_rsp_ret;
+ goto unlock;
}
rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
if (rc) {
cifs_dbg(VFS, "%s: Could not init hmacmd5\n", __func__);
- goto setup_ntlmv2_rsp_ret;
+ goto unlock;
}
rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
@@ -771,7 +773,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
CIFS_HMAC_MD5_HASH_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
- goto setup_ntlmv2_rsp_ret;
+ goto unlock;
}
rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
@@ -779,6 +781,8 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
if (rc)
cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
+unlock:
+ mutex_unlock(&ses->server->srv_mutex);
setup_ntlmv2_rsp_ret:
kfree(tiblob);
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index cbc0f4bca0c0..4f4fc9ff3636 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -54,10 +54,10 @@
#endif
int cifsFYI = 0;
-int traceSMB = 0;
+bool traceSMB;
bool enable_oplocks = true;
-unsigned int linuxExtEnabled = 1;
-unsigned int lookupCacheEnabled = 1;
+bool linuxExtEnabled = true;
+bool lookupCacheEnabled = true;
unsigned int global_secflags = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
unsigned int sign_CIFS_PDUs = 1;
@@ -268,7 +268,7 @@ cifs_alloc_inode(struct super_block *sb)
cifs_inode->createtime = 0;
cifs_inode->epoch = 0;
#ifdef CONFIG_CIFS_SMB2
- get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE);
+ generate_random_uuid(cifs_inode->lease_key);
#endif
/*
* Can not set i_flags here - they get immediately overwritten to zero
@@ -686,6 +686,14 @@ cifs_do_mount(struct file_system_type *fs_type,
goto out_cifs_sb;
}
+ if (volume_info->prepath) {
+ cifs_sb->prepath = kstrdup(volume_info->prepath, GFP_KERNEL);
+ if (cifs_sb->prepath == NULL) {
+ root = ERR_PTR(-ENOMEM);
+ goto out_cifs_sb;
+ }
+ }
+
cifs_setup_cifs_sb(volume_info, cifs_sb);
rc = cifs_mount(cifs_sb, volume_info);
@@ -724,7 +732,11 @@ cifs_do_mount(struct file_system_type *fs_type,
sb->s_flags |= MS_ACTIVE;
}
- root = cifs_get_root(volume_info, sb);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+ root = dget(sb->s_root);
+ else
+ root = cifs_get_root(volume_info, sb);
+
if (IS_ERR(root))
goto out_super;
@@ -1198,7 +1210,6 @@ init_cifs(void)
GlobalTotalActiveXid = 0;
GlobalMaxActiveXid = 0;
spin_lock_init(&cifs_tcp_ses_lock);
- spin_lock_init(&cifs_file_list_lock);
spin_lock_init(&GlobalMid_Lock);
if (cifs_max_pending < 2) {
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 2b510c537a0d..e2f6a79e9b01 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -227,6 +227,7 @@ struct smb_version_operations {
/* verify the message */
int (*check_message)(char *, unsigned int);
bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
+ int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *);
void (*downgrade_oplock)(struct TCP_Server_Info *,
struct cifsInodeInfo *, bool);
/* process transaction2 response */
@@ -627,6 +628,8 @@ struct TCP_Server_Info {
#ifdef CONFIG_CIFS_SMB2
unsigned int max_read;
unsigned int max_write;
+ struct delayed_work reconnect; /* reconnect workqueue job */
+ struct mutex reconnect_mutex; /* prevent simultaneous reconnects */
#endif /* CONFIG_CIFS_SMB2 */
};
@@ -826,7 +829,9 @@ cap_unix(struct cifs_ses *ses)
struct cifs_tcon {
struct list_head tcon_list;
int tc_count;
+ struct list_head rlist; /* reconnect list */
struct list_head openFileList;
+ spinlock_t open_file_lock; /* protects list above */
struct cifs_ses *ses; /* pointer to session associated with */
char treeName[MAX_TREE_SIZE + 1]; /* UNC name of resource in ASCII */
char *nativeFileSystem;
@@ -883,7 +888,7 @@ struct cifs_tcon {
#endif /* CONFIG_CIFS_STATS2 */
__u64 bytes_read;
__u64 bytes_written;
- spinlock_t stat_lock;
+ spinlock_t stat_lock; /* protects the two fields above */
#endif /* CONFIG_CIFS_STATS */
FILE_SYSTEM_DEVICE_INFO fsDevInfo;
FILE_SYSTEM_ATTRIBUTE_INFO fsAttrInfo; /* ok if fs name truncated */
@@ -902,7 +907,6 @@ struct cifs_tcon {
bool use_persistent:1; /* use persistent instead of durable handles */
#ifdef CONFIG_CIFS_SMB2
bool print:1; /* set if connection to printer share */
- bool bad_network_name:1; /* set if ret status STATUS_BAD_NETWORK_NAME */
__le32 capabilities;
__u32 share_flags;
__u32 maximal_access;
@@ -1034,8 +1038,10 @@ struct cifs_fid_locks {
};
struct cifsFileInfo {
+ /* following two lists are protected by tcon->open_file_lock */
struct list_head tlist; /* pointer to next fid owned by tcon */
struct list_head flist; /* next fid (file instance) for this inode */
+ /* lock list below protected by cifsi->lock_sem */
struct cifs_fid_locks *llist; /* brlocks held by this fid */
kuid_t uid; /* allows finding which FileInfo structure */
__u32 pid; /* process id who opened file */
@@ -1043,11 +1049,12 @@ struct cifsFileInfo {
/* BB add lock scope info here if needed */ ;
/* lock scope id (0 if none) */
struct dentry *dentry;
- unsigned int f_flags;
struct tcon_link *tlink;
+ unsigned int f_flags;
bool invalidHandle:1; /* file closed via session abend */
bool oplock_break_cancelled:1;
- int count; /* refcount protected by cifs_file_list_lock */
+ int count;
+ spinlock_t file_info_lock; /* protects four flag/count fields above */
struct mutex fh_mutex; /* prevents reopen race after dead ses*/
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
@@ -1114,7 +1121,7 @@ struct cifs_writedata {
/*
* Take a reference on the file private data. Must be called with
- * cifs_file_list_lock held.
+ * cfile->file_info_lock held.
*/
static inline void
cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
@@ -1283,12 +1290,19 @@ struct mid_q_entry {
void *callback_data; /* general purpose pointer for callback */
void *resp_buf; /* pointer to received SMB header */
int mid_state; /* wish this were enum but can not pass to wait_event */
+ unsigned int mid_flags;
__le16 command; /* smb command code */
bool large_buf:1; /* if valid response, is pointer to large buf */
bool multiRsp:1; /* multiple trans2 responses for one request */
bool multiEnd:1; /* both received */
};
+struct close_cancelled_open {
+ struct cifs_fid fid;
+ struct cifs_tcon *tcon;
+ struct work_struct work;
+};
+
/* Make code in transport.c a little cleaner by moving
update of optional stats into function below */
#ifdef CONFIG_CIFS_STATS2
@@ -1420,6 +1434,9 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
#define MID_RESPONSE_MALFORMED 0x10
#define MID_SHUTDOWN 0x20
+/* Flags */
+#define MID_WAIT_CANCELLED 1 /* Cancelled while waiting for response */
+
/* Types of response buffer returned from SendReceive2 */
#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
#define CIFS_SMALL_BUFFER 1
@@ -1508,8 +1525,10 @@ require use of the stronger protocol */
* GlobalMid_Lock protects:
* list operations on pending_mid_q and oplockQ
* updates to XID counters, multiplex id and SMB sequence numbers
- * cifs_file_list_lock protects:
- * list operations on tcp and SMB session lists and tCon lists
+ * tcp_ses_lock protects:
+ * list operations on tcp and SMB session lists
+ * tcon->open_file_lock protects the list of open files hanging off the tcon
+ * cfile->file_info_lock protects counters and fields in cifs file struct
* f_owner.lock protects certain per file struct operations
* mapping->page_lock protects certain per page operations
*
@@ -1541,18 +1560,12 @@ GLOBAL_EXTERN struct list_head cifs_tcp_ses_list;
* tcp session, and the list of tcon's per smb session. It also protects
* the reference counters for the server, smb session, and tcon. Finally,
* changes to the tcon->tidStatus should be done while holding this lock.
+ * generally the locks should be taken in order tcp_ses_lock before
+ * tcon->open_file_lock and that before file->file_info_lock since the
+ * structure order is cifs_socket-->cifs_ses-->cifs_tcon-->cifs_file
*/
GLOBAL_EXTERN spinlock_t cifs_tcp_ses_lock;
-/*
- * This lock protects the cifs_file->llist and cifs_file->flist
- * list operations, and updates to some flags (cifs_file->invalidHandle)
- * It will be moved to either use the tcon->stat_lock or equivalent later.
- * If cifs_tcp_ses_lock and the lock below are both needed to be held, then
- * the cifs_tcp_ses_lock must be grabbed first and released last.
- */
-GLOBAL_EXTERN spinlock_t cifs_file_list_lock;
-
#ifdef CONFIG_CIFS_DNOTIFY_EXPERIMENTAL /* unused temporarily */
/* Outstanding dir notify requests */
GLOBAL_EXTERN struct list_head GlobalDnotifyReqList;
@@ -1588,11 +1601,11 @@ GLOBAL_EXTERN atomic_t midCount;
/* Misc globals */
GLOBAL_EXTERN bool enable_oplocks; /* enable or disable oplocks */
-GLOBAL_EXTERN unsigned int lookupCacheEnabled;
+GLOBAL_EXTERN bool lookupCacheEnabled;
GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
with more secure ntlmssp2 challenge/resp */
GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
-GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
+GLOBAL_EXTERN bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */
GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index c63fd1dde25b..54590fd33df1 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -205,6 +205,9 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
struct tcon_link *tlink,
struct cifs_pending_open *open);
extern void cifs_del_pending_open(struct cifs_pending_open *open);
+extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
+ int from_reconnect);
+extern void cifs_put_tcon(struct cifs_tcon *tcon);
#if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL)
extern void cifs_dfs_release_automount_timer(void);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 76fcb50295a3..b60150e5b5ce 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -98,13 +98,13 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon)
struct list_head *tmp1;
/* list all files open on tree connection and mark them invalid */
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_for_each_safe(tmp, tmp1, &tcon->openFileList) {
open_file = list_entry(tmp, struct cifsFileInfo, tlist);
open_file->invalidHandle = true;
open_file->oplock_break_cancelled = true;
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
/*
* BB Add call to invalidate_inodes(sb) for all superblocks mounted
* to this tcon.
@@ -717,6 +717,9 @@ CIFSSMBEcho(struct TCP_Server_Info *server)
if (rc)
return rc;
+ if (server->capabilities & CAP_UNICODE)
+ smb->hdr.Flags2 |= SMBFLG2_UNICODE;
+
/* set up echo request */
smb->hdr.Tid = 0xffff;
smb->hdr.WordCount = 1;
@@ -1424,6 +1427,8 @@ cifs_readv_discard(struct TCP_Server_Info *server, struct mid_q_entry *mid)
length = discard_remaining_data(server);
dequeue_mid(mid, rdata->result);
+ mid->resp_buf = server->smallbuf;
+ server->smallbuf = NULL;
return length;
}
@@ -1538,6 +1543,8 @@ cifs_readv_receive(struct TCP_Server_Info *server, struct mid_q_entry *mid)
return cifs_readv_discard(server, mid);
dequeue_mid(mid, false);
+ mid->resp_buf = server->smallbuf;
+ server->smallbuf = NULL;
return length;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 3c194ff0d2f0..53a827c6d8b1 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -52,6 +52,9 @@
#include "nterr.h"
#include "rfc1002pdu.h"
#include "fscache.h"
+#ifdef CONFIG_CIFS_SMB2
+#include "smb2proto.h"
+#endif
#define CIFS_PORT 445
#define RFC1001_PORT 139
@@ -409,6 +412,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
}
} while (server->tcpStatus == CifsNeedReconnect);
+ if (server->tcpStatus == CifsNeedNegotiate)
+ mod_delayed_work(cifsiod_wq, &server->echo, 0);
+
return rc;
}
@@ -418,16 +424,27 @@ cifs_echo_request(struct work_struct *work)
int rc;
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, echo.work);
+ unsigned long echo_interval;
+
+ /*
+ * If we need to renegotiate, set echo interval to zero to
+ * immediately call echo service where we can renegotiate.
+ */
+ if (server->tcpStatus == CifsNeedNegotiate)
+ echo_interval = 0;
+ else
+ echo_interval = SMB_ECHO_INTERVAL;
/*
- * We cannot send an echo if it is disabled or until the
- * NEGOTIATE_PROTOCOL request is done, which is indicated by
- * server->ops->need_neg() == true. Also, no need to ping if
- * we got a response recently.
+ * We cannot send an echo if it is disabled.
+ * Also, no need to ping if we got a response recently.
*/
- if (!server->ops->need_neg || server->ops->need_neg(server) ||
+
+ if (server->tcpStatus == CifsNeedReconnect ||
+ server->tcpStatus == CifsExiting ||
+ server->tcpStatus == CifsNew ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
- time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
+ time_before(jiffies, server->lstrp + echo_interval - HZ))
goto requeue_echo;
rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
@@ -919,10 +936,19 @@ cifs_demultiplex_thread(void *p)
server->lstrp = jiffies;
if (mid_entry != NULL) {
+ if ((mid_entry->mid_flags & MID_WAIT_CANCELLED) &&
+ mid_entry->mid_state == MID_RESPONSE_RECEIVED &&
+ server->ops->handle_cancelled_mid)
+ server->ops->handle_cancelled_mid(
+ mid_entry->resp_buf,
+ server);
+
if (!mid_entry->multiRsp || mid_entry->multiEnd)
mid_entry->callback(mid_entry);
- } else if (!server->ops->is_oplock_break ||
- !server->ops->is_oplock_break(buf, server)) {
+ } else if (server->ops->is_oplock_break &&
+ server->ops->is_oplock_break(buf, server)) {
+ cifs_dbg(FYI, "Received oplock break\n");
+ } else {
cifs_dbg(VFS, "No task to wake, unknown frame received! NumMids %d\n",
atomic_read(&midCount));
cifs_dump_mem("Received Data is: ", buf,
@@ -2111,8 +2137,8 @@ cifs_find_tcp_session(struct smb_vol *vol)
return NULL;
}
-static void
-cifs_put_tcp_session(struct TCP_Server_Info *server)
+void
+cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
{
struct task_struct *task;
@@ -2129,6 +2155,19 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
cancel_delayed_work_sync(&server->echo);
+#ifdef CONFIG_CIFS_SMB2
+ if (from_reconnect)
+ /*
+ * Avoid deadlock here: reconnect work calls
+ * cifs_put_tcp_session() at its end. Need to be sure
+ * that reconnect work does nothing with server pointer after
+ * that step.
+ */
+ cancel_delayed_work(&server->reconnect);
+ else
+ cancel_delayed_work_sync(&server->reconnect);
+#endif
+
spin_lock(&GlobalMid_Lock);
server->tcpStatus = CifsExiting;
spin_unlock(&GlobalMid_Lock);
@@ -2193,12 +2232,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
+#ifdef CONFIG_CIFS_SMB2
+ INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
+ mutex_init(&tcp_ses->reconnect_mutex);
+#endif
memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
sizeof(tcp_ses->srcaddr));
memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
sizeof(tcp_ses->dstaddr));
#ifdef CONFIG_CIFS_SMB2
- get_random_bytes(tcp_ses->client_guid, SMB2_CLIENT_GUID_SIZE);
+ generate_random_uuid(tcp_ses->client_guid);
#endif
/*
* at this point we are the only ones with the pointer
@@ -2345,7 +2388,7 @@ cifs_put_smb_ses(struct cifs_ses *ses)
spin_unlock(&cifs_tcp_ses_lock);
sesInfoFree(ses);
- cifs_put_tcp_session(server);
+ cifs_put_tcp_session(server, 0);
}
#ifdef CONFIG_KEYS
@@ -2519,7 +2562,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
mutex_unlock(&ses->session_mutex);
/* existing SMB ses has a server reference already */
- cifs_put_tcp_session(server);
+ cifs_put_tcp_session(server, 0);
free_xid(xid);
return ses;
}
@@ -2609,7 +2652,7 @@ cifs_find_tcon(struct cifs_ses *ses, const char *unc)
return NULL;
}
-static void
+void
cifs_put_tcon(struct cifs_tcon *tcon)
{
unsigned int xid;
@@ -3515,6 +3558,44 @@ cifs_get_volume_info(char *mount_data, const char *devname)
return volume_info;
}
+static int
+cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
+ unsigned int xid,
+ struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb,
+ char *full_path)
+{
+ int rc;
+ char *s;
+ char sep, tmp;
+
+ sep = CIFS_DIR_SEP(cifs_sb);
+ s = full_path;
+
+ rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, "");
+ while (rc == 0) {
+ /* skip separators */
+ while (*s == sep)
+ s++;
+ if (!*s)
+ break;
+ /* next separator */
+ while (*s && *s != sep)
+ s++;
+
+ /*
+ * temporarily null-terminate the path at the end of
+ * the current component
+ */
+ tmp = *s;
+ *s = 0;
+ rc = server->ops->is_path_accessible(xid, tcon, cifs_sb,
+ full_path);
+ *s = tmp;
+ }
+ return rc;
+}
+
int
cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info)
{
@@ -3652,6 +3733,18 @@ remote_path_check:
kfree(full_path);
goto mount_fail_check;
}
+
+ if (rc != -EREMOTE) {
+ rc = cifs_are_all_path_components_accessible(server,
+ xid, tcon, cifs_sb,
+ full_path);
+ if (rc != 0) {
+ cifs_dbg(VFS, "cannot query dirs between root and final path, "
+ "enabling CIFS_MOUNT_USE_PREFIX_PATH\n");
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
+ rc = 0;
+ }
+ }
kfree(full_path);
}
@@ -3715,7 +3808,7 @@ mount_fail_check:
else if (ses)
cifs_put_smb_ses(ses);
else
- cifs_put_tcp_session(server);
+ cifs_put_tcp_session(server, 0);
bdi_destroy(&cifs_sb->bdi);
}
@@ -3921,6 +4014,7 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
bdi_destroy(&cifs_sb->bdi);
kfree(cifs_sb->mountdata);
+ kfree(cifs_sb->prepath);
call_rcu(&cifs_sb->rcu, delayed_free);
}
@@ -4025,7 +4119,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
if (IS_ERR(ses)) {
tcon = (struct cifs_tcon *)ses;
- cifs_put_tcp_session(master_tcon->ses->server);
+ cifs_put_tcp_session(master_tcon->ses->server, 0);
goto out;
}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index c3eb998a99bd..297e05c9e2b0 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -84,6 +84,7 @@ build_path_from_dentry(struct dentry *direntry)
struct dentry *temp;
int namelen;
int dfsplen;
+ int pplen = 0;
char *full_path;
char dirsep;
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
@@ -95,8 +96,12 @@ build_path_from_dentry(struct dentry *direntry)
dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1);
else
dfsplen = 0;
+
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+ pplen = cifs_sb->prepath ? strlen(cifs_sb->prepath) + 1 : 0;
+
cifs_bp_rename_retry:
- namelen = dfsplen;
+ namelen = dfsplen + pplen;
seq = read_seqbegin(&rename_lock);
rcu_read_lock();
for (temp = direntry; !IS_ROOT(temp);) {
@@ -137,7 +142,7 @@ cifs_bp_rename_retry:
}
}
rcu_read_unlock();
- if (namelen != dfsplen || read_seqretry(&rename_lock, seq)) {
+ if (namelen != dfsplen + pplen || read_seqretry(&rename_lock, seq)) {
cifs_dbg(FYI, "did not end path lookup where expected. namelen=%ddfsplen=%d\n",
namelen, dfsplen);
/* presumably this is only possible if racing with a rename
@@ -153,6 +158,17 @@ cifs_bp_rename_retry:
those safely to '/' if any are found in the middle of the prepath */
/* BB test paths to Windows with '/' in the midst of prepath */
+ if (pplen) {
+ int i;
+
+ cifs_dbg(FYI, "using cifs_sb prepath <%s>\n", cifs_sb->prepath);
+ memcpy(full_path+dfsplen+1, cifs_sb->prepath, pplen-1);
+ full_path[dfsplen] = '\\';
+ for (i = 0; i < pplen-1; i++)
+ if (full_path[dfsplen+1+i] == '/')
+ full_path[dfsplen+1+i] = CIFS_DIR_SEP(cifs_sb);
+ }
+
if (dfsplen) {
strncpy(full_path, tcon->treeName, dfsplen);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) {
@@ -167,15 +183,20 @@ cifs_bp_rename_retry:
}
/*
+ * Don't allow path components longer than the server max.
* Don't allow the separator character in a path component.
* The VFS will not allow "/", but "\" is allowed by posix.
*/
static int
-check_name(struct dentry *direntry)
+check_name(struct dentry *direntry, struct cifs_tcon *tcon)
{
struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
int i;
+ if (unlikely(direntry->d_name.len >
+ le32_to_cpu(tcon->fsAttrInfo.MaxPathNameComponentLength)))
+ return -ENAMETOOLONG;
+
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)) {
for (i = 0; i < direntry->d_name.len; i++) {
if (direntry->d_name.name[i] == '\\') {
@@ -229,6 +250,13 @@ cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned int xid,
goto cifs_create_get_file_info;
}
+ if (S_ISDIR(newinode->i_mode)) {
+ CIFSSMBClose(xid, tcon, fid->netfid);
+ iput(newinode);
+ rc = -EISDIR;
+ goto out;
+ }
+
if (!S_ISREG(newinode->i_mode)) {
/*
* The server may allow us to open things like
@@ -399,10 +427,14 @@ cifs_create_set_dentry:
if (rc != 0) {
cifs_dbg(FYI, "Create worked, get_inode_info failed rc = %d\n",
rc);
- if (server->ops->close)
- server->ops->close(xid, tcon, fid);
- goto out;
+ goto out_err;
}
+
+ if (S_ISDIR(newinode->i_mode)) {
+ rc = -EISDIR;
+ goto out_err;
+ }
+
d_drop(direntry);
d_add(direntry, newinode);
@@ -410,6 +442,13 @@ out:
kfree(buf);
kfree(full_path);
return rc;
+
+out_err:
+ if (server->ops->close)
+ server->ops->close(xid, tcon, fid);
+ if (newinode)
+ iput(newinode);
+ goto out;
}
int
@@ -455,10 +494,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
return finish_no_open(file, res);
}
- rc = check_name(direntry);
- if (rc)
- return rc;
-
xid = get_xid();
cifs_dbg(FYI, "parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
@@ -471,6 +506,11 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
}
tcon = tlink_tcon(tlink);
+
+ rc = check_name(direntry, tcon);
+ if (rc)
+ goto out_free_xid;
+
server = tcon->ses->server;
if (server->ops->new_lease_key)
@@ -731,7 +771,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
}
pTcon = tlink_tcon(tlink);
- rc = check_name(direntry);
+ rc = check_name(direntry, pTcon);
if (rc)
goto lookup_out;
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 0068e82217c3..a0c0a49b6620 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -305,6 +305,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
mutex_init(&cfile->fh_mutex);
+ spin_lock_init(&cfile->file_info_lock);
cifs_sb_active(inode->i_sb);
@@ -317,7 +318,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
oplock = 0;
}
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
oplock = fid->pending_open->oplock;
list_del(&fid->pending_open->olist);
@@ -326,12 +327,13 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
server->ops->set_fid(cfile, fid, oplock);
list_add(&cfile->tlist, &tcon->openFileList);
+
/* if readable file instance put first in list*/
if (file->f_mode & FMODE_READ)
list_add(&cfile->flist, &cinode->openFileList);
else
list_add_tail(&cfile->flist, &cinode->openFileList);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
if (fid->purge_cache)
cifs_zap_mapping(inode);
@@ -343,16 +345,16 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
struct cifsFileInfo *
cifsFileInfo_get(struct cifsFileInfo *cifs_file)
{
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&cifs_file->file_info_lock);
cifsFileInfo_get_locked(cifs_file);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cifs_file->file_info_lock);
return cifs_file;
}
/*
* Release a reference on the file private data. This may involve closing
* the filehandle out on the server. Must be called without holding
- * cifs_file_list_lock.
+ * tcon->open_file_lock and cifs_file->file_info_lock.
*/
void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
{
@@ -367,11 +369,15 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
struct cifs_pending_open open;
bool oplock_break_cancelled;
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
+
+ spin_lock(&cifs_file->file_info_lock);
if (--cifs_file->count > 0) {
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cifs_file->file_info_lock);
+ spin_unlock(&tcon->open_file_lock);
return;
}
+ spin_unlock(&cifs_file->file_info_lock);
if (server->ops->get_lease_key)
server->ops->get_lease_key(inode, &fid);
@@ -395,7 +401,8 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
cifs_set_oplock_level(cifsi, 0);
}
- spin_unlock(&cifs_file_list_lock);
+
+ spin_unlock(&tcon->open_file_lock);
oplock_break_cancelled = cancel_work_sync(&cifs_file->oplock_break);
@@ -772,10 +779,10 @@ int cifs_closedir(struct inode *inode, struct file *file)
server = tcon->ses->server;
cifs_dbg(FYI, "Freeing private data in close dir\n");
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&cfile->file_info_lock);
if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cfile->file_info_lock);
if (server->ops->close_dir)
rc = server->ops->close_dir(xid, tcon, &cfile->fid);
else
@@ -784,7 +791,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
/* not much we can do if it fails anyway, ignore rc */
rc = 0;
} else
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cfile->file_info_lock);
buf = cfile->srch_inf.ntwrk_buf_start;
if (buf) {
@@ -1720,12 +1727,13 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
{
struct cifsFileInfo *open_file = NULL;
struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
/* we could simply get the first_list_entry since write-only entries
are always at the end of the list but since the first entry might
have a close pending, we go through the whole list */
@@ -1736,8 +1744,8 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
if (!open_file->invalidHandle) {
/* found a good file */
/* lock it so it will not be closed on us */
- cifsFileInfo_get_locked(open_file);
- spin_unlock(&cifs_file_list_lock);
+ cifsFileInfo_get(open_file);
+ spin_unlock(&tcon->open_file_lock);
return open_file;
} /* else might as well continue, and look for
another, or simply have the caller reopen it
@@ -1745,7 +1753,7 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
} else /* write only file */
break; /* write only files are last so must be done */
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
return NULL;
}
@@ -1754,6 +1762,7 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
{
struct cifsFileInfo *open_file, *inv_file = NULL;
struct cifs_sb_info *cifs_sb;
+ struct cifs_tcon *tcon;
bool any_available = false;
int rc;
unsigned int refind = 0;
@@ -1769,15 +1778,16 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
}
cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+ tcon = cifs_sb_master_tcon(cifs_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
fsuid_only = false;
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
refind_writable:
if (refind > MAX_REOPEN_ATT) {
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
return NULL;
}
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
@@ -1788,8 +1798,8 @@ refind_writable:
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
if (!open_file->invalidHandle) {
/* found a good writable file */
- cifsFileInfo_get_locked(open_file);
- spin_unlock(&cifs_file_list_lock);
+ cifsFileInfo_get(open_file);
+ spin_unlock(&tcon->open_file_lock);
return open_file;
} else {
if (!inv_file)
@@ -1805,24 +1815,24 @@ refind_writable:
if (inv_file) {
any_available = false;
- cifsFileInfo_get_locked(inv_file);
+ cifsFileInfo_get(inv_file);
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
if (inv_file) {
rc = cifs_reopen_file(inv_file, false);
if (!rc)
return inv_file;
else {
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_move_tail(&inv_file->flist,
&cifs_inode->openFileList);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
cifsFileInfo_put(inv_file);
- spin_lock(&cifs_file_list_lock);
++refind;
inv_file = NULL;
+ spin_lock(&tcon->open_file_lock);
goto refind_writable;
}
}
@@ -2535,7 +2545,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
wdata->credits = credits;
if (!wdata->cfile->invalidHandle ||
- !cifs_reopen_file(wdata->cfile, false))
+ !(rc = cifs_reopen_file(wdata->cfile, false)))
rc = server->ops->async_writev(wdata,
cifs_uncached_writedata_release);
if (rc) {
@@ -2948,7 +2958,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
rdata->credits = credits;
if (!rdata->cfile->invalidHandle ||
- !cifs_reopen_file(rdata->cfile, true))
+ !(rc = cifs_reopen_file(rdata->cfile, true)))
rc = server->ops->async_readv(rdata);
error:
if (rc) {
@@ -3534,7 +3544,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
}
if (!rdata->cfile->invalidHandle ||
- !cifs_reopen_file(rdata->cfile, true))
+ !(rc = cifs_reopen_file(rdata->cfile, true)))
rc = server->ops->async_readv(rdata);
if (rc) {
add_credits_and_wake_if(server, rdata->credits, 0);
@@ -3632,15 +3642,17 @@ static int cifs_readpage(struct file *file, struct page *page)
static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
{
struct cifsFileInfo *open_file;
+ struct cifs_tcon *tcon =
+ cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
return 1;
}
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
return 0;
}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index a329f5ba35aa..9cdeb0293267 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -982,10 +982,26 @@ struct inode *cifs_root_iget(struct super_block *sb)
struct inode *inode = NULL;
long rc;
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ char *path = NULL;
+ int len;
+
+ if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
+ && cifs_sb->prepath) {
+ len = strlen(cifs_sb->prepath);
+ path = kzalloc(len + 2 /* leading sep + null */, GFP_KERNEL);
+ if (path == NULL)
+ return ERR_PTR(-ENOMEM);
+ path[0] = '/';
+ memcpy(path+1, cifs_sb->prepath, len);
+ } else {
+ path = kstrdup("", GFP_KERNEL);
+ if (path == NULL)
+ return ERR_PTR(-ENOMEM);
+ }
xid = get_xid();
if (tcon->unix_ext) {
- rc = cifs_get_inode_info_unix(&inode, "", sb, xid);
+ rc = cifs_get_inode_info_unix(&inode, path, sb, xid);
/* some servers mistakenly claim POSIX support */
if (rc != -EOPNOTSUPP)
goto iget_no_retry;
@@ -993,7 +1009,8 @@ struct inode *cifs_root_iget(struct super_block *sb)
tcon->unix_ext = false;
}
- rc = cifs_get_inode_info(&inode, "", NULL, sb, xid, NULL);
+ convert_delimiter(path, CIFS_DIR_SEP(cifs_sb));
+ rc = cifs_get_inode_info(&inode, path, NULL, sb, xid, NULL);
iget_no_retry:
if (!inode) {
@@ -1022,6 +1039,7 @@ iget_no_retry:
}
out:
+ kfree(path);
/* can not call macro free_xid here since in a void func
* TODO: This is no longer true
*/
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 35cf990f87d3..a8f5b31636dc 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -272,6 +272,8 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
rc = -EOPNOTSUPP;
break;
case CIFS_IOC_GET_MNT_INFO:
+ if (pSMBFile == NULL)
+ break;
tcon = tlink_tcon(pSMBFile->tlink);
rc = smb_mnt_get_fsinfo(xid, tcon, (void __user *)arg);
break;
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 8442b8b8e0be..2396ab099849 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -120,6 +120,7 @@ tconInfoAlloc(void)
++ret_buf->tc_count;
INIT_LIST_HEAD(&ret_buf->openFileList);
INIT_LIST_HEAD(&ret_buf->tcon_list);
+ spin_lock_init(&ret_buf->open_file_lock);
#ifdef CONFIG_CIFS_STATS
spin_lock_init(&ret_buf->stat_lock);
#endif
@@ -465,7 +466,7 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
continue;
cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_for_each(tmp2, &tcon->openFileList) {
netfile = list_entry(tmp2, struct cifsFileInfo,
tlist);
@@ -495,11 +496,11 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
&netfile->oplock_break);
netfile->oplock_break_cancelled = false;
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
return true;
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "No matching file for oplock break\n");
return true;
@@ -613,9 +614,9 @@ backup_cred(struct cifs_sb_info *cifs_sb)
void
cifs_del_pending_open(struct cifs_pending_open *open)
{
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tlink_tcon(open->tlink)->open_file_lock);
list_del(&open->olist);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
}
void
@@ -635,7 +636,7 @@ void
cifs_add_pending_open(struct cifs_fid *fid, struct tcon_link *tlink,
struct cifs_pending_open *open)
{
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tlink_tcon(tlink)->open_file_lock);
cifs_add_pending_open_locked(fid, tlink, open);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
}
diff --git a/fs/cifs/ntlmssp.h b/fs/cifs/ntlmssp.h
index 848249fa120f..3079b38f0afb 100644
--- a/fs/cifs/ntlmssp.h
+++ b/fs/cifs/ntlmssp.h
@@ -133,6 +133,6 @@ typedef struct _AUTHENTICATE_MESSAGE {
int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len, struct cifs_ses *ses);
void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, struct cifs_ses *ses);
-int build_ntlmssp_auth_blob(unsigned char *pbuffer, u16 *buflen,
+int build_ntlmssp_auth_blob(unsigned char **pbuffer, u16 *buflen,
struct cifs_ses *ses,
const struct nls_table *nls_cp);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b30a4a6d98a0..97d1a15873c5 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -282,6 +282,7 @@ initiate_cifs_search(const unsigned int xid, struct file *file)
rc = -ENOMEM;
goto error_exit;
}
+ spin_lock_init(&cifsFile->file_info_lock);
file->private_data = cifsFile;
cifsFile->tlink = cifs_get_tlink(tlink);
tcon = tlink_tcon(tlink);
@@ -594,14 +595,14 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon, loff_t pos,
is_dir_changed(file)) || (index_to_find < first_entry_in_buffer)) {
/* close and restart search */
cifs_dbg(FYI, "search backing up - close and restart search\n");
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&cfile->file_info_lock);
if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cfile->file_info_lock);
if (server->ops->close_dir)
server->ops->close_dir(xid, tcon, &cfile->fid);
} else
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cfile->file_info_lock);
if (cfile->srch_inf.ntwrk_buf_start) {
cifs_dbg(FYI, "freeing SMB ff cache buf on search rewind\n");
if (cfile->srch_inf.smallBuf)
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 59727e32ed0f..e88ffe1da045 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -364,19 +364,43 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
sec_blob->DomainName.MaximumLength = 0;
}
-/* We do not malloc the blob, it is passed in pbuffer, because its
- maximum possible size is fixed and small, making this approach cleaner.
- This function returns the length of the data in the blob */
-int build_ntlmssp_auth_blob(unsigned char *pbuffer,
+static int size_of_ntlmssp_blob(struct cifs_ses *ses)
+{
+ int sz = sizeof(AUTHENTICATE_MESSAGE) + ses->auth_key.len
+ - CIFS_SESS_KEY_SIZE + CIFS_CPHTXT_SIZE + 2;
+
+ if (ses->domainName)
+ sz += 2 * strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+ else
+ sz += 2;
+
+ if (ses->user_name)
+ sz += 2 * strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
+ else
+ sz += 2;
+
+ return sz;
+}
+
+int build_ntlmssp_auth_blob(unsigned char **pbuffer,
u16 *buflen,
struct cifs_ses *ses,
const struct nls_table *nls_cp)
{
int rc;
- AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
+ AUTHENTICATE_MESSAGE *sec_blob;
__u32 flags;
unsigned char *tmp;
+ rc = setup_ntlmv2_rsp(ses, nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+ *buflen = 0;
+ goto setup_ntlmv2_ret;
+ }
+ *pbuffer = kmalloc(size_of_ntlmssp_blob(ses), GFP_KERNEL);
+ sec_blob = (AUTHENTICATE_MESSAGE *)*pbuffer;
+
memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
sec_blob->MessageType = NtLmAuthenticate;
@@ -391,7 +415,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
}
- tmp = pbuffer + sizeof(AUTHENTICATE_MESSAGE);
+ tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE);
sec_blob->NegotiateFlags = cpu_to_le32(flags);
sec_blob->LmChallengeResponse.BufferOffset =
@@ -399,23 +423,27 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->LmChallengeResponse.Length = 0;
sec_blob->LmChallengeResponse.MaximumLength = 0;
- sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
- rc = setup_ntlmv2_rsp(ses, nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
- goto setup_ntlmv2_ret;
+ sec_blob->NtChallengeResponse.BufferOffset =
+ cpu_to_le32(tmp - *pbuffer);
+ if (ses->user_name != NULL) {
+ memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+
+ sec_blob->NtChallengeResponse.Length =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ sec_blob->NtChallengeResponse.MaximumLength =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ } else {
+ /*
+ * don't send an NT Response for anonymous access
+ */
+ sec_blob->NtChallengeResponse.Length = 0;
+ sec_blob->NtChallengeResponse.MaximumLength = 0;
}
- memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- ses->auth_key.len - CIFS_SESS_KEY_SIZE);
- tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
-
- sec_blob->NtChallengeResponse.Length =
- cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
- sec_blob->NtChallengeResponse.MaximumLength =
- cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
if (ses->domainName == NULL) {
- sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->DomainName.Length = 0;
sec_blob->DomainName.MaximumLength = 0;
tmp += 2;
@@ -424,14 +452,14 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
CIFS_MAX_USERNAME_LEN, nls_cp);
len *= 2; /* unicode is 2 bytes each */
- sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->DomainName.Length = cpu_to_le16(len);
sec_blob->DomainName.MaximumLength = cpu_to_le16(len);
tmp += len;
}
if (ses->user_name == NULL) {
- sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->UserName.Length = 0;
sec_blob->UserName.MaximumLength = 0;
tmp += 2;
@@ -440,13 +468,13 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
CIFS_MAX_USERNAME_LEN, nls_cp);
len *= 2; /* unicode is 2 bytes each */
- sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->UserName.Length = cpu_to_le16(len);
sec_blob->UserName.MaximumLength = cpu_to_le16(len);
tmp += len;
}
- sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->WorkstationName.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->WorkstationName.Length = 0;
sec_blob->WorkstationName.MaximumLength = 0;
tmp += 2;
@@ -455,19 +483,19 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
(ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_EXTENDED_SEC))
&& !calc_seckey(ses)) {
memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
sec_blob->SessionKey.MaximumLength =
cpu_to_le16(CIFS_CPHTXT_SIZE);
tmp += CIFS_CPHTXT_SIZE;
} else {
- sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+ sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - *pbuffer);
sec_blob->SessionKey.Length = 0;
sec_blob->SessionKey.MaximumLength = 0;
}
+ *buflen = tmp - *pbuffer;
setup_ntlmv2_ret:
- *buflen = tmp - pbuffer;
return rc;
}
@@ -670,20 +698,24 @@ sess_auth_lanman(struct sess_data *sess_data)
pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
- /* no capabilities flags in old lanman negotiation */
- pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
-
- /* Calculate hash with password and copy into bcc_ptr.
- * Encryption Key (stored as in cryptkey) gets used if the
- * security mode bit in Negottiate Protocol response states
- * to use challenge/response method (i.e. Password bit is 1).
- */
- rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
- ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
- true : false, lnm_session_key);
-
- memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ if (ses->user_name != NULL) {
+ /* no capabilities flags in old lanman negotiation */
+ pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+
+ /* Calculate hash with password and copy into bcc_ptr.
+ * Encryption Key (stored as in cryptkey) gets used if the
+ * security mode bit in Negottiate Protocol response states
+ * to use challenge/response method (i.e. Password bit is 1).
+ */
+ rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
+ ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
+ true : false, lnm_session_key);
+
+ memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ } else {
+ pSMB->old_req.PasswordLength = 0;
+ }
/*
* can not sign if LANMAN negotiated so no need
@@ -769,27 +801,32 @@ sess_auth_ntlm(struct sess_data *sess_data)
capabilities = cifs_ssetup_hdr(ses, pSMB);
pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
- pSMB->req_no_secext.CaseInsensitivePasswordLength =
- cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+ if (ses->user_name != NULL) {
+ pSMB->req_no_secext.CaseInsensitivePasswordLength =
+ cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+ pSMB->req_no_secext.CaseSensitivePasswordLength =
+ cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+
+ /* calculate ntlm response and session key */
+ rc = setup_ntlm_response(ses, sess_data->nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLM authentication\n",
+ rc);
+ goto out;
+ }
- /* calculate ntlm response and session key */
- rc = setup_ntlm_response(ses, sess_data->nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLM authentication\n",
- rc);
- goto out;
+ /* copy ntlm response */
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ } else {
+ pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
+ pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
}
- /* copy ntlm response */
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
-
if (ses->capabilities & CAP_UNICODE) {
/* unicode strings must be word aligned */
if (sess_data->iov[0].iov_len % 2) {
@@ -878,22 +915,26 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
/* LM2 password would be here if we supported it */
pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
- /* calculate nlmv2 response and session key */
- rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
- goto out;
- }
+ if (ses->user_name != NULL) {
+ /* calculate nlmv2 response and session key */
+ rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
+ goto out;
+ }
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- ses->auth_key.len - CIFS_SESS_KEY_SIZE);
- bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
- /* set case sensitive password length after tilen may get
- * assigned, tilen is 0 otherwise.
- */
- pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ /* set case sensitive password length after tilen may get
+ * assigned, tilen is 0 otherwise.
+ */
+ pSMB->req_no_secext.CaseSensitivePasswordLength =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ } else {
+ pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
+ }
if (ses->capabilities & CAP_UNICODE) {
if (sess_data->iov[0].iov_len % 2) {
@@ -1245,7 +1286,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
struct cifs_ses *ses = sess_data->ses;
__u16 bytes_remaining;
char *bcc_ptr;
- char *ntlmsspblob = NULL;
+ unsigned char *ntlmsspblob = NULL;
u16 blob_len;
cifs_dbg(FYI, "rawntlmssp session setup authenticate phase\n");
@@ -1258,19 +1299,7 @@ sess_auth_rawntlmssp_authenticate(struct sess_data *sess_data)
/* Build security blob before we assemble the request */
pSMB = (SESSION_SETUP_ANDX *)sess_data->iov[0].iov_base;
smb_buf = (struct smb_hdr *)pSMB;
- /*
- * 5 is an empirical value, large enough to hold
- * authenticate message plus max 10 of av paris,
- * domain, user, workstation names, flags, etc.
- */
- ntlmsspblob = kzalloc(5*sizeof(struct _AUTHENTICATE_MESSAGE),
- GFP_KERNEL);
- if (!ntlmsspblob) {
- rc = -ENOMEM;
- goto out;
- }
-
- rc = build_ntlmssp_auth_blob(ntlmsspblob,
+ rc = build_ntlmssp_auth_blob(&ntlmsspblob,
&blob_len, ses, sess_data->nls_cp);
if (rc)
goto out_free_ntlmsspblob;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index fc537c29044e..efd72e1fae74 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -849,8 +849,13 @@ cifs_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid *fid, __u16 search_flags,
struct cifs_search_info *srch_inf)
{
- return CIFSFindFirst(xid, tcon, path, cifs_sb,
- &fid->netfid, search_flags, srch_inf, true);
+ int rc;
+
+ rc = CIFSFindFirst(xid, tcon, path, cifs_sb,
+ &fid->netfid, search_flags, srch_inf, true);
+ if (rc)
+ cifs_dbg(FYI, "find first failed=%d\n", rc);
+ return rc;
}
static int
@@ -1015,6 +1020,15 @@ cifs_dir_needs_close(struct cifsFileInfo *cfile)
return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
}
+static bool
+cifs_can_echo(struct TCP_Server_Info *server)
+{
+ if (server->tcpStatus == CifsGood)
+ return true;
+
+ return false;
+}
+
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -1049,6 +1063,7 @@ struct smb_version_operations smb1_operations = {
.get_dfs_refer = CIFSGetDFSRefer,
.qfs_tcon = cifs_qfs_tcon,
.is_path_accessible = cifs_is_path_accessible,
+ .can_echo = cifs_can_echo,
.query_path_info = cifs_query_path_info,
.query_file_info = cifs_query_file_info,
.get_srv_inum = cifs_get_srv_inum,
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index f9e766f464be..b2aff0c6f22c 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -260,7 +260,7 @@ smb2_push_mandatory_locks(struct cifsFileInfo *cfile)
* and check it for zero before using.
*/
max_buf = tlink_tcon(cfile->tlink)->ses->server->maxBuf;
- if (!max_buf) {
+ if (max_buf < sizeof(struct smb2_lock_element)) {
free_xid(xid);
return -EINVAL;
}
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index bc0bb9c34f72..238759c146ba 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -44,6 +44,7 @@
#define SMB2_OP_DELETE 7
#define SMB2_OP_HARDLINK 8
#define SMB2_OP_SET_EOF 9
+#define SMB2_OP_RMDIR 10
/* Used when constructing chained read requests. */
#define CHAINED_REQUEST 1
@@ -60,4 +61,14 @@
/* Maximum buffer size value we can send with 1 credit */
#define SMB2_MAX_BUFFER_SIZE 65536
+/*
+ * Maximum number of credits to keep available.
+ * This value is chosen somewhat arbitrarily. The Windows client
+ * defaults to 128 credits, the Windows server allows clients up to
+ * 512 credits, and the NetApp server does not limit clients at all.
+ * Choose a high enough value such that the client shouldn't limit
+ * performance.
+ */
+#define SMB2_MAX_CREDITS_AVAILABLE 32000
+
#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 899bbc86f73e..1238cd3552f9 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -80,6 +80,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
* SMB2_open() call.
*/
break;
+ case SMB2_OP_RMDIR:
+ tmprc = SMB2_rmdir(xid, tcon, fid.persistent_fid,
+ fid.volatile_fid);
+ break;
case SMB2_OP_RENAME:
tmprc = SMB2_rename(xid, tcon, fid.persistent_fid,
fid.volatile_fid, (__le16 *)data);
@@ -191,8 +195,8 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
- CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE,
- NULL, SMB2_OP_DELETE);
+ CREATE_NOT_FILE,
+ NULL, SMB2_OP_RMDIR);
}
int
@@ -262,9 +266,15 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
struct tcon_link *tlink;
int rc;
+ if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) &&
+ (buf->LastWriteTime == 0) && (buf->ChangeTime) &&
+ (buf->Attributes == 0))
+ return 0; /* would be a no op, no sense sending this */
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
+
rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
SMB2_OP_SET_INFO);
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 1c5907019045..76ccf20fbfb7 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -525,19 +525,19 @@ smb2_is_valid_lease_break(char *buffer)
list_for_each(tmp1, &server->smb_ses_list) {
ses = list_entry(tmp1, struct cifs_ses, smb_ses_list);
- spin_lock(&cifs_file_list_lock);
list_for_each(tmp2, &ses->tcon_list) {
tcon = list_entry(tmp2, struct cifs_tcon,
tcon_list);
+ spin_lock(&tcon->open_file_lock);
cifs_stats_inc(
&tcon->stats.cifs_stats.num_oplock_brks);
if (smb2_tcon_has_lease(tcon, rsp, lw)) {
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
return true;
}
+ spin_unlock(&tcon->open_file_lock);
}
- spin_unlock(&cifs_file_list_lock);
}
}
spin_unlock(&cifs_tcp_ses_lock);
@@ -579,7 +579,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
- spin_lock(&cifs_file_list_lock);
+ spin_lock(&tcon->open_file_lock);
list_for_each(tmp2, &tcon->openFileList) {
cfile = list_entry(tmp2, struct cifsFileInfo,
tlist);
@@ -591,7 +591,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
cifs_dbg(FYI, "file id match, oplock break\n");
cinode = CIFS_I(d_inode(cfile->dentry));
-
+ spin_lock(&cfile->file_info_lock);
if (!CIFS_CACHE_WRITE(cinode) &&
rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
cfile->oplock_break_cancelled = true;
@@ -613,14 +613,14 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
clear_bit(
CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2,
&cinode->flags);
-
+ spin_unlock(&cfile->file_info_lock);
queue_work(cifsiod_wq, &cfile->oplock_break);
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
return true;
}
- spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&tcon->open_file_lock);
spin_unlock(&cifs_tcp_ses_lock);
cifs_dbg(FYI, "No matching file for oplock break\n");
return true;
@@ -630,3 +630,47 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
return false;
}
+
+void
+smb2_cancelled_close_fid(struct work_struct *work)
+{
+ struct close_cancelled_open *cancelled = container_of(work,
+ struct close_cancelled_open, work);
+
+ cifs_dbg(VFS, "Close unmatched open\n");
+
+ SMB2_close(0, cancelled->tcon, cancelled->fid.persistent_fid,
+ cancelled->fid.volatile_fid);
+ cifs_put_tcon(cancelled->tcon);
+ kfree(cancelled);
+}
+
+int
+smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
+{
+ struct smb2_hdr *hdr = (struct smb2_hdr *)buffer;
+ struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
+ struct cifs_tcon *tcon;
+ struct close_cancelled_open *cancelled;
+
+ if (hdr->Command != SMB2_CREATE || hdr->Status != STATUS_SUCCESS)
+ return 0;
+
+ cancelled = kzalloc(sizeof(*cancelled), GFP_KERNEL);
+ if (!cancelled)
+ return -ENOMEM;
+
+ tcon = smb2_find_smb_tcon(server, hdr->SessionId, hdr->TreeId);
+ if (!tcon) {
+ kfree(cancelled);
+ return -ENOENT;
+ }
+
+ cancelled->fid.persistent_fid = rsp->PersistentFileId;
+ cancelled->fid.volatile_fid = rsp->VolatileFileId;
+ cancelled->tcon = tcon;
+ INIT_WORK(&cancelled->work, smb2_cancelled_close_fid);
+ queue_work(cifsiod_wq, &cancelled->work);
+
+ return 0;
+}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 53ccdde6ff18..1d125d3d0d89 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -282,7 +282,7 @@ SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
cifs_dbg(FYI, "Link Speed %lld\n",
le64_to_cpu(out_buf->LinkSpeed));
}
-
+ kfree(out_buf);
return rc;
}
#endif /* STATS2 */
@@ -536,6 +536,7 @@ smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
server->ops->set_oplock_level(cinode, oplock, fid->epoch,
&fid->purge_cache);
cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
+ memcpy(cfile->fid.create_guid, fid->create_guid, 16);
}
static void
@@ -694,6 +695,7 @@ smb2_clone_range(const unsigned int xid,
cchunk_out:
kfree(pcchunk);
+ kfree(retbuf);
return rc;
}
@@ -818,7 +820,6 @@ smb2_duplicate_extents(const unsigned int xid,
{
int rc;
unsigned int ret_data_len;
- char *retbuf = NULL;
struct duplicate_extents_to_file dup_ext_buf;
struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
@@ -844,7 +845,7 @@ smb2_duplicate_extents(const unsigned int xid,
FSCTL_DUPLICATE_EXTENTS_TO_FILE,
true /* is_fsctl */, (char *)&dup_ext_buf,
sizeof(struct duplicate_extents_to_file),
- (char **)&retbuf,
+ NULL,
&ret_data_len);
if (ret_data_len > 0)
@@ -867,7 +868,6 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
struct cifsFileInfo *cfile)
{
struct fsctl_set_integrity_information_req integr_info;
- char *retbuf = NULL;
unsigned int ret_data_len;
integr_info.ChecksumAlgorithm = cpu_to_le16(CHECKSUM_TYPE_UNCHANGED);
@@ -879,7 +879,7 @@ smb3_set_integrity(const unsigned int xid, struct cifs_tcon *tcon,
FSCTL_SET_INTEGRITY_INFORMATION,
true /* is_fsctl */, (char *)&integr_info,
sizeof(struct fsctl_set_integrity_information_req),
- (char **)&retbuf,
+ NULL,
&ret_data_len);
}
@@ -909,7 +909,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
kfree(utf16_path);
if (rc) {
- cifs_dbg(VFS, "open dir failed\n");
+ cifs_dbg(FYI, "open dir failed rc=%d\n", rc);
return rc;
}
@@ -919,7 +919,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
fid->volatile_fid, 0, srch_inf);
if (rc) {
- cifs_dbg(VFS, "query directory failed\n");
+ cifs_dbg(FYI, "query directory failed rc=%d\n", rc);
SMB2_close(xid, tcon, fid->persistent_fid, fid->volatile_fid);
}
return rc;
@@ -1036,9 +1036,12 @@ smb2_set_lease_key(struct inode *inode, struct cifs_fid *fid)
static void
smb2_new_lease_key(struct cifs_fid *fid)
{
- get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
+ generate_random_uuid(fid->lease_key);
}
+#define SMB2_SYMLINK_STRUCT_SIZE \
+ (sizeof(struct smb2_err_rsp) - 1 + sizeof(struct smb2_symlink_err_rsp))
+
static int
smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
const char *full_path, char **target_path,
@@ -1051,7 +1054,10 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid fid;
struct smb2_err_rsp *err_buf = NULL;
struct smb2_symlink_err_rsp *symlink;
- unsigned int sub_len, sub_offset;
+ unsigned int sub_len;
+ unsigned int sub_offset;
+ unsigned int print_len;
+ unsigned int print_offset;
cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
@@ -1072,11 +1078,33 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
kfree(utf16_path);
return -ENOENT;
}
+
+ if (le32_to_cpu(err_buf->ByteCount) < sizeof(struct smb2_symlink_err_rsp) ||
+ get_rfc1002_length(err_buf) + 4 < SMB2_SYMLINK_STRUCT_SIZE) {
+ kfree(utf16_path);
+ return -ENOENT;
+ }
+
/* open must fail on symlink - reset rc */
rc = 0;
symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData;
sub_len = le16_to_cpu(symlink->SubstituteNameLength);
sub_offset = le16_to_cpu(symlink->SubstituteNameOffset);
+ print_len = le16_to_cpu(symlink->PrintNameLength);
+ print_offset = le16_to_cpu(symlink->PrintNameOffset);
+
+ if (get_rfc1002_length(err_buf) + 4 <
+ SMB2_SYMLINK_STRUCT_SIZE + sub_offset + sub_len) {
+ kfree(utf16_path);
+ return -ENOENT;
+ }
+
+ if (get_rfc1002_length(err_buf) + 4 <
+ SMB2_SYMLINK_STRUCT_SIZE + print_offset + print_len) {
+ kfree(utf16_path);
+ return -ENOENT;
+ }
+
*target_path = cifs_strndup_from_utf16(
(char *)symlink->PathBuffer + sub_offset,
sub_len, true, cifs_sb->local_nls);
@@ -1483,6 +1511,7 @@ struct smb_version_operations smb20_operations = {
.clear_stats = smb2_clear_stats,
.print_stats = smb2_print_stats,
.is_oplock_break = smb2_is_valid_oplock_break,
+ .handle_cancelled_mid = smb2_handle_cancelled_mid,
.downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
@@ -1561,6 +1590,7 @@ struct smb_version_operations smb21_operations = {
.clear_stats = smb2_clear_stats,
.print_stats = smb2_print_stats,
.is_oplock_break = smb2_is_valid_oplock_break,
+ .handle_cancelled_mid = smb2_handle_cancelled_mid,
.downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
@@ -1642,6 +1672,7 @@ struct smb_version_operations smb30_operations = {
.print_stats = smb2_print_stats,
.dump_share_caps = smb2_dump_share_caps,
.is_oplock_break = smb2_is_valid_oplock_break,
+ .handle_cancelled_mid = smb2_handle_cancelled_mid,
.downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
@@ -1729,6 +1760,7 @@ struct smb_version_operations smb311_operations = {
.print_stats = smb2_print_stats,
.dump_share_caps = smb2_dump_share_caps,
.is_oplock_break = smb2_is_valid_oplock_break,
+ .handle_cancelled_mid = smb2_handle_cancelled_mid,
.downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 373b5cd1c913..6c484ddf26a9 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -103,7 +103,21 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ ,
hdr->ProtocolId[3] = 'B';
hdr->StructureSize = cpu_to_le16(64);
hdr->Command = smb2_cmd;
- hdr->CreditRequest = cpu_to_le16(2); /* BB make this dynamic */
+ if (tcon && tcon->ses && tcon->ses->server) {
+ struct TCP_Server_Info *server = tcon->ses->server;
+
+ spin_lock(&server->req_lock);
+ /* Request up to 2 credits but don't go over the limit. */
+ if (server->credits >= SMB2_MAX_CREDITS_AVAILABLE)
+ hdr->CreditRequest = cpu_to_le16(0);
+ else
+ hdr->CreditRequest = cpu_to_le16(
+ min_t(int, SMB2_MAX_CREDITS_AVAILABLE -
+ server->credits, 2));
+ spin_unlock(&server->req_lock);
+ } else {
+ hdr->CreditRequest = cpu_to_le16(2);
+ }
hdr->ProcessId = cpu_to_le32((__u16)current->tgid);
if (!tcon)
@@ -264,7 +278,7 @@ out:
case SMB2_CHANGE_NOTIFY:
case SMB2_QUERY_INFO:
case SMB2_SET_INFO:
- return -EAGAIN;
+ rc = -EAGAIN;
}
unload_nls(nls_codepage);
return rc;
@@ -550,8 +564,12 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
}
if (rsplen != sizeof(struct validate_negotiate_info_rsp)) {
- cifs_dbg(VFS, "invalid size of protocol negotiate response\n");
- return -EIO;
+ cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n",
+ rsplen);
+
+ /* relax check since Mac returns max bufsize allowed on ioctl */
+ if (rsplen > CIFSMaxBufSize)
+ return -EIO;
}
/* check validate negotiate info response matches what we got earlier */
@@ -591,8 +609,9 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
u16 blob_length = 0;
struct key *spnego_key = NULL;
char *security_blob = NULL;
- char *ntlmssp_blob = NULL;
+ unsigned char *ntlmssp_blob = NULL;
bool use_spnego = false; /* else use raw ntlmssp */
+ u64 previous_session = ses->Suid;
cifs_dbg(FYI, "Session Setup\n");
@@ -630,6 +649,10 @@ ssetup_ntlmssp_authenticate:
return rc;
req->hdr.SessionId = 0; /* First session, not a reauthenticate */
+
+ /* if reconnect, we need to send previous sess id, otherwise it is 0 */
+ req->PreviousSessionId = previous_session;
+
req->Flags = 0; /* MBZ */
/* to enable echos and oplocks */
req->hdr.CreditRequest = cpu_to_le16(3);
@@ -716,13 +739,7 @@ ssetup_ntlmssp_authenticate:
iov[1].iov_len = blob_length;
} else if (phase == NtLmAuthenticate) {
req->hdr.SessionId = ses->Suid;
- ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
- GFP_KERNEL);
- if (ntlmssp_blob == NULL) {
- rc = -ENOMEM;
- goto ssetup_exit;
- }
- rc = build_ntlmssp_auth_blob(ntlmssp_blob, &blob_length, ses,
+ rc = build_ntlmssp_auth_blob(&ntlmssp_blob, &blob_length, ses,
nls_cp);
if (rc) {
cifs_dbg(FYI, "build_ntlmssp_auth_blob failed %d\n",
@@ -919,9 +936,6 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
else
return -EIO;
- if (tcon && tcon->bad_network_name)
- return -ENOENT;
-
if ((tcon && tcon->seal) &&
((ses->server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) == 0)) {
cifs_dbg(VFS, "encryption requested but no server support");
@@ -939,6 +953,10 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree,
return -EINVAL;
}
+ /* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
+ if (tcon)
+ tcon->tid = 0;
+
rc = small_smb2_init(SMB2_TREE_CONNECT, tcon, (void **) &req);
if (rc) {
kfree(unc_path);
@@ -1019,8 +1037,6 @@ tcon_exit:
tcon_error_exit:
if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
- if (tcon)
- tcon->bad_network_name = true;
}
goto tcon_exit;
}
@@ -1173,7 +1189,7 @@ create_durable_v2_buf(struct cifs_fid *pfid)
buf->dcontext.Timeout = 0; /* Should this be configurable by workload */
buf->dcontext.Flags = cpu_to_le32(SMB2_DHANDLE_FLAG_PERSISTENT);
- get_random_bytes(buf->dcontext.CreateGuid, 16);
+ generate_random_uuid(buf->dcontext.CreateGuid);
memcpy(pfid->create_guid, buf->dcontext.CreateGuid, 16);
/* SMB2_CREATE_DURABLE_HANDLE_REQUEST is "DH2Q" */
@@ -1506,8 +1522,12 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
* than one credit. Windows typically sets this smaller, but for some
* ioctls it may be useful to allow server to send more. No point
* limiting what the server can send as long as fits in one credit
+ * Unfortunately - we can not handle more than CIFS_MAX_MSG_SIZE
+ * (by default, note that it can be overridden to make max larger)
+ * in responses (except for read responses which can be bigger.
+ * We may want to bump this limit up
*/
- req->MaxOutputResponse = cpu_to_le32(0xFF00); /* < 64K uses 1 credit */
+ req->MaxOutputResponse = cpu_to_le32(CIFSMaxBufSize);
if (is_fsctl)
req->Flags = cpu_to_le32(SMB2_0_IOCTL_IS_FSCTL);
@@ -1809,6 +1829,54 @@ smb2_echo_callback(struct mid_q_entry *mid)
add_credits(server, credits_received, CIFS_ECHO_OP);
}
+void smb2_reconnect_server(struct work_struct *work)
+{
+ struct TCP_Server_Info *server = container_of(work,
+ struct TCP_Server_Info, reconnect.work);
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon, *tcon2;
+ struct list_head tmp_list;
+ int tcon_exist = false;
+
+ /* Prevent simultaneous reconnects that can corrupt tcon->rlist list */
+ mutex_lock(&server->reconnect_mutex);
+
+ INIT_LIST_HEAD(&tmp_list);
+ cifs_dbg(FYI, "Need negotiate, reconnecting tcons\n");
+
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ if (tcon->need_reconnect) {
+ tcon->tc_count++;
+ list_add_tail(&tcon->rlist, &tmp_list);
+ tcon_exist = true;
+ }
+ }
+ }
+ /*
+ * Get the reference to server struct to be sure that the last call of
+ * cifs_put_tcon() in the loop below won't release the server pointer.
+ */
+ if (tcon_exist)
+ server->srv_count++;
+
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ list_for_each_entry_safe(tcon, tcon2, &tmp_list, rlist) {
+ smb2_reconnect(SMB2_ECHO, tcon);
+ list_del_init(&tcon->rlist);
+ cifs_put_tcon(tcon);
+ }
+
+ cifs_dbg(FYI, "Reconnecting tcons finished\n");
+ mutex_unlock(&server->reconnect_mutex);
+
+ /* now we can safely release srv struct */
+ if (tcon_exist)
+ cifs_put_tcp_session(server, 1);
+}
+
int
SMB2_echo(struct TCP_Server_Info *server)
{
@@ -1820,6 +1888,12 @@ SMB2_echo(struct TCP_Server_Info *server)
cifs_dbg(FYI, "In echo request\n");
+ if (server->tcpStatus == CifsNeedNegotiate) {
+ /* No need to send echo on newly established connections */
+ queue_delayed_work(cifsiod_wq, &server->reconnect, 0);
+ return rc;
+ }
+
rc = small_smb2_init(SMB2_ECHO, NULL, (void **)&req);
if (rc)
return rc;
@@ -2038,6 +2112,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
if (rdata->credits) {
buf->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
SMB2_MAX_BUFFER_SIZE));
+ buf->CreditRequest = buf->CreditCharge;
spin_lock(&server->req_lock);
server->credits += rdata->credits -
le16_to_cpu(buf->CreditCharge);
@@ -2224,6 +2299,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
if (wdata->credits) {
req->hdr.CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
SMB2_MAX_BUFFER_SIZE));
+ req->hdr.CreditRequest = req->hdr.CreditCharge;
spin_lock(&server->req_lock);
server->credits += wdata->credits -
le16_to_cpu(req->hdr.CreditCharge);
@@ -2577,6 +2653,22 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
}
int
+SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid)
+{
+ __u8 delete_pending = 1;
+ void *data;
+ unsigned int size;
+
+ data = &delete_pending;
+ size = 1; /* sizeof __u8 */
+
+ return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+ current->tgid, FILE_DISPOSITION_INFORMATION, 1, &data,
+ &size);
+}
+
+int
SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
{
@@ -2676,8 +2768,8 @@ copy_fs_info_to_kstatfs(struct smb2_fs_full_size_info *pfs_inf,
kst->f_bsize = le32_to_cpu(pfs_inf->BytesPerSector) *
le32_to_cpu(pfs_inf->SectorsPerAllocationUnit);
kst->f_blocks = le64_to_cpu(pfs_inf->TotalAllocationUnits);
- kst->f_bfree = le64_to_cpu(pfs_inf->ActualAvailableAllocationUnits);
- kst->f_bavail = le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
+ kst->f_bfree = kst->f_bavail =
+ le64_to_cpu(pfs_inf->CallerAvailableAllocationUnits);
return;
}
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 4af52780ec35..aacb15bd56fe 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -82,8 +82,8 @@
#define NUMBER_OF_SMB2_COMMANDS 0x0013
-/* BB FIXME - analyze following length BB */
-#define MAX_SMB2_HDR_SIZE 0x78 /* 4 len + 64 hdr + (2*24 wct) + 2 bct + 2 pad */
+/* 4 len + 52 transform hdr + 64 hdr + 56 create rsp */
+#define MAX_SMB2_HDR_SIZE 0x00b0
#define SMB2_PROTO_NUMBER cpu_to_le32(0x424d53fe)
@@ -276,7 +276,7 @@ struct smb2_sess_setup_req {
__le32 Channel;
__le16 SecurityBufferOffset;
__le16 SecurityBufferLength;
- __le64 PreviousSessionId;
+ __u64 PreviousSessionId;
__u8 Buffer[1]; /* variable length GSS security buffer */
} __packed;
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 79dc650c18b2..adc5234486c3 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -47,6 +47,10 @@ extern struct mid_q_entry *smb2_setup_request(struct cifs_ses *ses,
struct smb_rqst *rqst);
extern struct mid_q_entry *smb2_setup_async_request(
struct TCP_Server_Info *server, struct smb_rqst *rqst);
+extern struct cifs_ses *smb2_find_smb_ses(struct TCP_Server_Info *server,
+ __u64 ses_id);
+extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server,
+ __u64 ses_id, __u32 tid);
extern int smb2_calc_signature(struct smb_rqst *rqst,
struct TCP_Server_Info *server);
extern int smb3_calc_signature(struct smb_rqst *rqst,
@@ -95,6 +99,7 @@ extern int smb2_open_file(const unsigned int xid,
extern int smb2_unlock_range(struct cifsFileInfo *cfile,
struct file_lock *flock, const unsigned int xid);
extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
+extern void smb2_reconnect_server(struct work_struct *work);
/*
* SMB2 Worker functions - most of protocol specific implementation details
@@ -140,6 +145,8 @@ extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
__le16 *target_file);
+extern int SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid);
extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
__le16 *target_file);
@@ -154,6 +161,9 @@ extern int SMB2_set_compression(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
const u64 persistent_fid, const u64 volatile_fid,
const __u8 oplock_level);
+extern int smb2_handle_cancelled_mid(char *buffer,
+ struct TCP_Server_Info *server);
+void smb2_cancelled_close_fid(struct work_struct *work);
extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
struct kstatfs *FSData);
diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c
index d4c5b6f109a7..69e3b322bbfe 100644
--- a/fs/cifs/smb2transport.c
+++ b/fs/cifs/smb2transport.c
@@ -115,22 +115,68 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
}
static struct cifs_ses *
-smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server)
+smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id)
{
struct cifs_ses *ses;
- spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
- if (ses->Suid != smb2hdr->SessionId)
+ if (ses->Suid != ses_id)
continue;
- spin_unlock(&cifs_tcp_ses_lock);
return ses;
}
+
+ return NULL;
+}
+
+struct cifs_ses *
+smb2_find_smb_ses(struct TCP_Server_Info *server, __u64 ses_id)
+{
+ struct cifs_ses *ses;
+
+ spin_lock(&cifs_tcp_ses_lock);
+ ses = smb2_find_smb_ses_unlocked(server, ses_id);
spin_unlock(&cifs_tcp_ses_lock);
+ return ses;
+}
+
+static struct cifs_tcon *
+smb2_find_smb_sess_tcon_unlocked(struct cifs_ses *ses, __u32 tid)
+{
+ struct cifs_tcon *tcon;
+
+ list_for_each_entry(tcon, &ses->tcon_list, tcon_list) {
+ if (tcon->tid != tid)
+ continue;
+ ++tcon->tc_count;
+ return tcon;
+ }
+
return NULL;
}
+/*
+ * Obtain tcon corresponding to the tid in the given
+ * cifs_ses
+ */
+
+struct cifs_tcon *
+smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid)
+{
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+
+ spin_lock(&cifs_tcp_ses_lock);
+ ses = smb2_find_smb_ses_unlocked(server, ses_id);
+ if (!ses) {
+ spin_unlock(&cifs_tcp_ses_lock);
+ return NULL;
+ }
+ tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid);
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ return tcon;
+}
int
smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
@@ -143,7 +189,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
struct cifs_ses *ses;
- ses = smb2_find_smb_ses(smb2_pdu, server);
+ ses = smb2_find_smb_ses(server, smb2_pdu->SessionId);
if (!ses) {
cifs_dbg(VFS, "%s: Could not find session\n", __func__);
return 0;
@@ -314,7 +360,7 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
struct cifs_ses *ses;
- ses = smb2_find_smb_ses(smb2_pdu, server);
+ ses = smb2_find_smb_ses(server, smb2_pdu->SessionId);
if (!ses) {
cifs_dbg(VFS, "%s: Could not find session\n", __func__);
return 0;
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 87abe8ed074c..54af10204e83 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -786,9 +786,11 @@ SendReceive2(const unsigned int xid, struct cifs_ses *ses,
rc = wait_for_response(ses->server, midQ);
if (rc != 0) {
+ cifs_dbg(FYI, "Cancelling wait for mid %llu\n", midQ->mid);
send_cancel(ses->server, buf, midQ);
spin_lock(&GlobalMid_Lock);
if (midQ->mid_state == MID_REQUEST_SUBMITTED) {
+ midQ->mid_flags |= MID_WAIT_CANCELLED;
midQ->callback = DeleteMidQEntry;
spin_unlock(&GlobalMid_Lock);
cifs_small_buf_release(buf);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index ec5c8325b503..0525ebc3aea2 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -83,14 +83,13 @@ static int create_link(struct config_item *parent_item,
ret = -ENOMEM;
sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
if (sl) {
- sl->sl_target = config_item_get(item);
spin_lock(&configfs_dirent_lock);
if (target_sd->s_type & CONFIGFS_USET_DROPPING) {
spin_unlock(&configfs_dirent_lock);
- config_item_put(item);
kfree(sl);
return -ENOENT;
}
+ sl->sl_target = config_item_get(item);
list_add(&sl->sl_list, &target_sd->s_links);
spin_unlock(&configfs_dirent_lock);
ret = configfs_create_link(sl, parent_item->ci_dentry,
diff --git a/fs/coredump.c b/fs/coredump.c
index 1777331eee76..a8852293038a 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -1,6 +1,7 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/freezer.h>
#include <linux/mm.h>
#include <linux/stat.h>
#include <linux/fcntl.h>
@@ -32,6 +33,9 @@
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/path.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -396,7 +400,9 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
if (core_waiters > 0) {
struct core_thread *ptr;
+ freezer_do_not_count();
wait_for_completion(&core_state->startup);
+ freezer_count();
/*
* Wait for all the threads to become inactive, so that
* all the thread context (extended register state, like
@@ -627,6 +633,8 @@ void do_coredump(const siginfo_t *siginfo)
}
} else {
struct inode *inode;
+ int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
+ O_LARGEFILE | O_EXCL;
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
@@ -665,10 +673,27 @@ void do_coredump(const siginfo_t *siginfo)
* what matters is that at least one of the two processes
* writes its coredump successfully, not which one.
*/
- cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW |
- O_LARGEFILE | O_EXCL,
- 0600);
+ if (need_suid_safe) {
+ /*
+ * Using user namespaces, normal user tasks can change
+ * their current->fs->root to point to arbitrary
+ * directories. Since the intention of the "only dump
+ * with a fully qualified path" rule is to control where
+ * coredumps may be placed using root privileges,
+ * current->fs->root must not be used. Instead, use the
+ * root directory of init_task.
+ */
+ struct path root;
+
+ task_lock(&init_task);
+ get_fs_root(init_task.fs, &root);
+ task_unlock(&init_task);
+ cprm.file = file_open_root(root.dentry, root.mnt,
+ cn.corename, open_flags, 0600);
+ path_put(&root);
+ } else {
+ cprm.file = filp_open(cn.corename, open_flags, 0600);
+ }
if (IS_ERR(cprm.file))
goto fail_unlock;
@@ -785,3 +810,21 @@ int dump_align(struct coredump_params *cprm, int align)
return mod ? dump_skip(cprm, align - mod) : 1;
}
EXPORT_SYMBOL(dump_align);
+
+/*
+ * Ensures that file size is big enough to contain the current file
+ * postion. This prevents gdb from complaining about a truncated file
+ * if the last "write" to the file was dump_skip.
+ */
+void dump_truncate(struct coredump_params *cprm)
+{
+ struct file *file = cprm->file;
+ loff_t offset;
+
+ if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
+ offset = file->f_op->llseek(file, 0, SEEK_CUR);
+ if (i_size_read(file->f_mapping->host) < offset)
+ do_truncate(file->f_path.dentry, offset, 0, file);
+ }
+}
+EXPORT_SYMBOL(dump_truncate);
diff --git a/fs/dcache.c b/fs/dcache.c
index 877bcbbd03ff..3ed642e0a0c2 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -269,6 +269,33 @@ static inline int dname_external(const struct dentry *dentry)
return dentry->d_name.name != dentry->d_iname;
}
+void take_dentry_name_snapshot(struct name_snapshot *name, struct dentry *dentry)
+{
+ spin_lock(&dentry->d_lock);
+ if (unlikely(dname_external(dentry))) {
+ struct external_name *p = external_name(dentry);
+ atomic_inc(&p->u.count);
+ spin_unlock(&dentry->d_lock);
+ name->name = p->name;
+ } else {
+ memcpy(name->inline_name, dentry->d_iname, DNAME_INLINE_LEN);
+ spin_unlock(&dentry->d_lock);
+ name->name = name->inline_name;
+ }
+}
+EXPORT_SYMBOL(take_dentry_name_snapshot);
+
+void release_dentry_name_snapshot(struct name_snapshot *name)
+{
+ if (unlikely(name->name != name->inline_name)) {
+ struct external_name *p;
+ p = container_of(name->name, struct external_name, name[0]);
+ if (unlikely(atomic_dec_and_test(&p->u.count)))
+ kfree_rcu(p, u.head);
+ }
+}
+EXPORT_SYMBOL(release_dentry_name_snapshot);
+
static inline void __d_set_inode_and_type(struct dentry *dentry,
struct inode *inode,
unsigned type_flags)
@@ -578,7 +605,6 @@ static struct dentry *dentry_kill(struct dentry *dentry)
failed:
spin_unlock(&dentry->d_lock);
- cpu_relax();
return dentry; /* try again with same dentry */
}
@@ -752,6 +778,8 @@ void dput(struct dentry *dentry)
return;
repeat:
+ might_sleep();
+
rcu_read_lock();
if (likely(fast_dput(dentry))) {
rcu_read_unlock();
@@ -783,8 +811,10 @@ repeat:
kill_it:
dentry = dentry_kill(dentry);
- if (dentry)
+ if (dentry) {
+ cond_resched();
goto repeat;
+ }
}
EXPORT_SYMBOL(dput);
@@ -1125,11 +1155,12 @@ void shrink_dcache_sb(struct super_block *sb)
LIST_HEAD(dispose);
freed = list_lru_walk(&sb->s_dentry_lru,
- dentry_lru_isolate_shrink, &dispose, UINT_MAX);
+ dentry_lru_isolate_shrink, &dispose, 1024);
this_cpu_sub(nr_dentry_unused, freed);
shrink_dentry_list(&dispose);
- } while (freed > 0);
+ cond_resched();
+ } while (list_lru_count(&sb->s_dentry_lru) > 0);
}
EXPORT_SYMBOL(shrink_dcache_sb);
@@ -1319,8 +1350,11 @@ int d_set_mounted(struct dentry *dentry)
}
spin_lock(&dentry->d_lock);
if (!d_unlinked(dentry)) {
- dentry->d_flags |= DCACHE_MOUNTED;
- ret = 0;
+ ret = -EBUSY;
+ if (!d_mountpoint(dentry)) {
+ dentry->d_flags |= DCACHE_MOUNTED;
+ ret = 0;
+ }
}
spin_unlock(&dentry->d_lock);
out:
@@ -1618,7 +1652,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
struct dentry *dentry = __d_alloc(parent->d_sb, name);
if (!dentry)
return NULL;
-
+ dentry->d_flags |= DCACHE_RCUACCESS;
spin_lock(&parent->d_lock);
/*
* don't need child lock because it is not subject
@@ -1666,7 +1700,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
DCACHE_OP_REVALIDATE |
DCACHE_OP_WEAK_REVALIDATE |
DCACHE_OP_DELETE |
- DCACHE_OP_SELECT_INODE));
+ DCACHE_OP_SELECT_INODE |
+ DCACHE_OP_REAL));
dentry->d_op = op;
if (!op)
return;
@@ -1684,6 +1719,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
dentry->d_flags |= DCACHE_OP_PRUNE;
if (op->d_select_inode)
dentry->d_flags |= DCACHE_OP_SELECT_INODE;
+ if (op->d_real)
+ dentry->d_flags |= DCACHE_OP_REAL;
}
EXPORT_SYMBOL(d_set_d_op);
@@ -2410,7 +2447,6 @@ static void __d_rehash(struct dentry * entry, struct hlist_bl_head *b)
{
BUG_ON(!d_unhashed(entry));
hlist_bl_lock(b);
- entry->d_flags |= DCACHE_RCUACCESS;
hlist_bl_add_head_rcu(&entry->d_hash, b);
hlist_bl_unlock(b);
}
@@ -2629,6 +2665,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
/* ... and switch them in the tree */
if (IS_ROOT(dentry)) {
/* splicing a tree */
+ dentry->d_flags |= DCACHE_RCUACCESS;
dentry->d_parent = target->d_parent;
target->d_parent = target;
list_del_init(&target->d_child);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b7fcc0de0b2f..e49ba072bd64 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -457,7 +457,7 @@ struct dentry *debugfs_create_automount(const char *name,
if (unlikely(!inode))
return failed_creating(dentry);
- inode->i_mode = S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO;
+ make_empty_dir_inode(inode);
inode->i_flags |= S_AUTOMOUNT;
inode->i_private = data;
dentry->d_fsdata = (void *)f;
@@ -669,7 +669,7 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
{
int error;
struct dentry *dentry = NULL, *trap;
- const char *old_name;
+ struct name_snapshot old_name;
trap = lock_rename(new_dir, old_dir);
/* Source or destination directories don't exist? */
@@ -684,19 +684,19 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
if (IS_ERR(dentry) || dentry == trap || d_really_is_positive(dentry))
goto exit;
- old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+ take_dentry_name_snapshot(&old_name, old_dentry);
error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
dentry);
if (error) {
- fsnotify_oldname_free(old_name);
+ release_dentry_name_snapshot(&old_name);
goto exit;
}
d_move(old_dentry, dentry);
- fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name,
+ fsnotify_move(d_inode(old_dir), d_inode(new_dir), old_name.name,
d_is_dir(old_dentry),
NULL, old_dentry);
- fsnotify_oldname_free(old_name);
+ release_dentry_name_snapshot(&old_name);
unlock_rename(new_dir, old_dir);
dput(dentry);
return old_dentry;
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 706de324f2a6..c82edb049117 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -128,6 +128,7 @@ static const match_table_t tokens = {
struct pts_fs_info {
struct ida allocated_ptys;
struct pts_mount_opts mount_opts;
+ struct super_block *sb;
struct dentry *ptmx_dentry;
};
@@ -358,7 +359,7 @@ static const struct super_operations devpts_sops = {
.show_options = devpts_show_options,
};
-static void *new_pts_fs_info(void)
+static void *new_pts_fs_info(struct super_block *sb)
{
struct pts_fs_info *fsi;
@@ -369,6 +370,7 @@ static void *new_pts_fs_info(void)
ida_init(&fsi->allocated_ptys);
fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
+ fsi->sb = sb;
return fsi;
}
@@ -384,7 +386,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
s->s_op = &devpts_sops;
s->s_time_gran = 1;
- s->s_fs_info = new_pts_fs_info();
+ s->s_fs_info = new_pts_fs_info(s);
if (!s->s_fs_info)
goto fail;
@@ -524,17 +526,14 @@ static struct file_system_type devpts_fs_type = {
* to the System V naming convention
*/
-int devpts_new_index(struct inode *ptmx_inode)
+int devpts_new_index(struct pts_fs_info *fsi)
{
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
- struct pts_fs_info *fsi;
int index;
int ida_ret;
- if (!sb)
+ if (!fsi)
return -ENODEV;
- fsi = DEVPTS_SB(sb);
retry:
if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
return -ENOMEM;
@@ -564,11 +563,8 @@ retry:
return index;
}
-void devpts_kill_index(struct inode *ptmx_inode, int idx)
+void devpts_kill_index(struct pts_fs_info *fsi, int idx)
{
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
- struct pts_fs_info *fsi = DEVPTS_SB(sb);
-
mutex_lock(&allocated_ptys_lock);
ida_remove(&fsi->allocated_ptys, idx);
pty_count--;
@@ -578,21 +574,25 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
/*
* pty code needs to hold extra references in case of last /dev/tty close
*/
-
-void devpts_add_ref(struct inode *ptmx_inode)
+struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
{
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+ struct super_block *sb;
+ struct pts_fs_info *fsi;
+
+ sb = pts_sb_from_inode(ptmx_inode);
+ if (!sb)
+ return NULL;
+ fsi = DEVPTS_SB(sb);
+ if (!fsi)
+ return NULL;
atomic_inc(&sb->s_active);
- ihold(ptmx_inode);
+ return fsi;
}
-void devpts_del_ref(struct inode *ptmx_inode)
+void devpts_put_ref(struct pts_fs_info *fsi)
{
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
-
- iput(ptmx_inode);
- deactivate_super(sb);
+ deactivate_super(fsi->sb);
}
/**
@@ -604,22 +604,21 @@ void devpts_del_ref(struct inode *ptmx_inode)
*
* The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
*/
-struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
+struct inode *devpts_pty_new(struct pts_fs_info *fsi, dev_t device, int index,
void *priv)
{
struct dentry *dentry;
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+ struct super_block *sb;
struct inode *inode;
struct dentry *root;
- struct pts_fs_info *fsi;
struct pts_mount_opts *opts;
char s[12];
- if (!sb)
+ if (!fsi)
return ERR_PTR(-ENODEV);
+ sb = fsi->sb;
root = sb->s_root;
- fsi = DEVPTS_SB(sb);
opts = &fsi->mount_opts;
inode = new_inode(sb);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 01171d8a6ee9..c772fdf36cd9 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -575,7 +575,7 @@ static int dio_set_defer_completion(struct dio *dio)
/*
* Call into the fs to map some more disk blocks. We record the current number
* of available blocks at sdio->blocks_available. These are in units of the
- * fs blocksize, (1 << inode->i_blkbits).
+ * fs blocksize, i_blocksize(inode).
*
* The fs is allowed to map lots of blocks at once. If it wants to do that,
* it uses the passed inode-relative block number as the file offset, as usual.
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3a37bd3f9637..9d7a4a714907 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1607,16 +1607,12 @@ void dlm_lowcomms_stop(void)
mutex_lock(&connections_lock);
dlm_allow_conn = 0;
foreach_conn(stop_conn);
+ clean_writequeues();
+ foreach_conn(free_conn);
mutex_unlock(&connections_lock);
work_stop();
- mutex_lock(&connections_lock);
- clean_writequeues();
-
- foreach_conn(free_conn);
-
- mutex_unlock(&connections_lock);
kmem_cache_destroy(con_cache);
}
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 173b3873a4f4..e40c440a4555 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -355,6 +355,10 @@ static int dlm_device_register(struct dlm_ls *ls, char *name)
error = misc_register(&ls->ls_device);
if (error) {
kfree(ls->ls_device.name);
+ /* this has to be set to NULL
+ * to avoid a double-free in dlm_device_deregister
+ */
+ ls->ls_device.name = NULL;
}
fail:
return error;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index feef8a9c4de7..27794b137b24 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -112,7 +112,6 @@ static int ecryptfs_readdir(struct file *file, struct dir_context *ctx)
.sb = inode->i_sb,
};
lower_file = ecryptfs_file_to_lower(file);
- lower_file->f_pos = ctx->pos;
rc = iterate_dir(lower_file, &buf.ctx);
ctx->pos = buf.ctx.pos;
if (rc < 0)
@@ -170,6 +169,19 @@ out:
return rc;
}
+static int ecryptfs_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct file *lower_file = ecryptfs_file_to_lower(file);
+ /*
+ * Don't allow mmap on top of file systems that don't support it
+ * natively. If FILESYSTEM_MAX_STACK_DEPTH > 2 or ecryptfs
+ * allows recursive mounting, this will need to be extended.
+ */
+ if (!lower_file->f_op->mmap)
+ return -ENODEV;
+ return generic_file_mmap(file, vma);
+}
+
/**
* ecryptfs_open
* @inode: inode speciying file to open
@@ -223,14 +235,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
}
ecryptfs_set_file_lower(
file, ecryptfs_inode_to_private(inode)->lower_file);
- if (d_is_dir(ecryptfs_dentry)) {
- ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
- mutex_lock(&crypt_stat->cs_mutex);
- crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
- mutex_unlock(&crypt_stat->cs_mutex);
- rc = 0;
- goto out;
- }
rc = read_or_initialize_metadata(ecryptfs_dentry);
if (rc)
goto out_put;
@@ -247,6 +251,45 @@ out:
return rc;
}
+/**
+ * ecryptfs_dir_open
+ * @inode: inode speciying file to open
+ * @file: Structure to return filled in
+ *
+ * Opens the file specified by inode.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_dir_open(struct inode *inode, struct file *file)
+{
+ struct dentry *ecryptfs_dentry = file->f_path.dentry;
+ /* Private value of ecryptfs_dentry allocated in
+ * ecryptfs_lookup() */
+ struct ecryptfs_file_info *file_info;
+ struct file *lower_file;
+
+ /* Released in ecryptfs_release or end of function if failure */
+ file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
+ ecryptfs_set_file_private(file, file_info);
+ if (unlikely(!file_info)) {
+ ecryptfs_printk(KERN_ERR,
+ "Error attempting to allocate memory\n");
+ return -ENOMEM;
+ }
+ lower_file = dentry_open(ecryptfs_dentry_to_lower_path(ecryptfs_dentry),
+ file->f_flags, current_cred());
+ if (IS_ERR(lower_file)) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the lower file for the dentry with name "
+ "[%pd]; rc = [%ld]\n", __func__,
+ ecryptfs_dentry, PTR_ERR(lower_file));
+ kmem_cache_free(ecryptfs_file_info_cache, file_info);
+ return PTR_ERR(lower_file);
+ }
+ ecryptfs_set_file_lower(file, lower_file);
+ return 0;
+}
+
static int ecryptfs_flush(struct file *file, fl_owner_t td)
{
struct file *lower_file = ecryptfs_file_to_lower(file);
@@ -267,6 +310,19 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
return 0;
}
+static int ecryptfs_dir_release(struct inode *inode, struct file *file)
+{
+ fput(ecryptfs_file_to_lower(file));
+ kmem_cache_free(ecryptfs_file_info_cache,
+ ecryptfs_file_to_private(file));
+ return 0;
+}
+
+static loff_t ecryptfs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ return vfs_llseek(ecryptfs_file_to_lower(file), offset, whence);
+}
+
static int
ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
@@ -346,25 +402,21 @@ const struct file_operations ecryptfs_dir_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
#endif
- .open = ecryptfs_open,
- .flush = ecryptfs_flush,
- .release = ecryptfs_release,
+ .open = ecryptfs_dir_open,
+ .release = ecryptfs_dir_release,
.fsync = ecryptfs_fsync,
- .fasync = ecryptfs_fasync,
- .splice_read = generic_file_splice_read,
- .llseek = default_llseek,
+ .llseek = ecryptfs_dir_llseek,
};
const struct file_operations ecryptfs_main_fops = {
.llseek = generic_file_llseek,
.read_iter = ecryptfs_read_update_atime,
.write_iter = generic_file_write_iter,
- .iterate = ecryptfs_readdir,
.unlocked_ioctl = ecryptfs_unlocked_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
#endif
- .mmap = generic_file_mmap,
+ .mmap = ecryptfs_mmap,
.open = ecryptfs_open,
.flush = ecryptfs_flush,
.release = ecryptfs_release,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 1e009cad8d5c..1b08556776ce 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -518,8 +518,13 @@ static void ep_remove_wait_queue(struct eppoll_entry *pwq)
wait_queue_head_t *whead;
rcu_read_lock();
- /* If it is cleared by POLLFREE, it should be rcu-safe */
- whead = rcu_dereference(pwq->whead);
+ /*
+ * If it is cleared by POLLFREE, it should be rcu-safe.
+ * If we read NULL we need a barrier paired with
+ * smp_store_release() in ep_poll_callback(), otherwise
+ * we rely on whead->lock.
+ */
+ whead = smp_load_acquire(&pwq->whead);
if (whead)
remove_wait_queue(whead, &pwq->wait);
rcu_read_unlock();
@@ -1003,17 +1008,6 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
struct epitem *epi = ep_item_from_wait(wait);
struct eventpoll *ep = epi->ep;
- if ((unsigned long)key & POLLFREE) {
- ep_pwq_from_wait(wait)->whead = NULL;
- /*
- * whead = NULL above can race with ep_remove_wait_queue()
- * which can do another remove_wait_queue() after us, so we
- * can't use __remove_wait_queue(). whead->lock is held by
- * the caller.
- */
- list_del_init(&wait->task_list);
- }
-
spin_lock_irqsave(&ep->lock, flags);
/*
@@ -1078,6 +1072,23 @@ out_unlock:
if (pwake)
ep_poll_safewake(&ep->poll_wait);
+
+ if ((unsigned long)key & POLLFREE) {
+ /*
+ * If we race with ep_remove_wait_queue() it can miss
+ * ->whead = NULL and do another remove_wait_queue() after
+ * us, so we can't use __remove_wait_queue().
+ */
+ list_del_init(&wait->task_list);
+ /*
+ * ->whead != NULL protects us from the race with ep_free()
+ * or ep_remove(), ep_remove_wait_queue() takes whead->lock
+ * held by the caller. Once we nullify it, nothing protects
+ * ep/epi or even wait.
+ */
+ smp_store_release(&ep_pwq_from_wait(wait)->whead, NULL);
+ }
+
return 1;
}
diff --git a/fs/exec.c b/fs/exec.c
index b06623a9347f..9c5ee2a880aa 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -19,7 +19,7 @@
* current->executable is only used by the procfs. This allows a dispatch
* table to check for several different types of binary formats. We keep
* trying until we recognize the file or we run out of supported binary
- * formats.
+ * formats.
*/
#include <linux/slab.h>
@@ -56,6 +56,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
+#include <linux/user_namespace.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
@@ -205,7 +206,24 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
if (write) {
unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
- struct rlimit *rlim;
+ unsigned long ptr_size, limit;
+
+ /*
+ * Since the stack will hold pointers to the strings, we
+ * must account for them as well.
+ *
+ * The size calculation is the entire vma while each arg page is
+ * built, so each time we get here it's calculating how far it
+ * is currently (rather than each call being just the newly
+ * added size from the arg page). As a result, we need to
+ * always add the entire size of the pointers, so that on the
+ * last call to get_arg_page() we'll actually have the entire
+ * correct size.
+ */
+ ptr_size = (bprm->argc + bprm->envc) * sizeof(void *);
+ if (ptr_size > ULONG_MAX - size)
+ goto fail;
+ size += ptr_size;
acct_arg_size(bprm, size / PAGE_SIZE);
@@ -217,20 +235,24 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
return page;
/*
- * Limit to 1/4-th the stack size for the argv+env strings.
+ * Limit to 1/4 of the max stack size or 3/4 of _STK_LIM
+ * (whichever is smaller) for the argv+env strings.
* This ensures that:
* - the remaining binfmt code will not run out of stack space,
* - the program will have a reasonable amount of stack left
* to work from.
*/
- rlim = current->signal->rlim;
- if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur) / 4) {
- put_page(page);
- return NULL;
- }
+ limit = _STK_LIM / 4 * 3;
+ limit = min(limit, rlimit(RLIMIT_STACK) / 4);
+ if (size > limit)
+ goto fail;
}
return page;
+
+fail:
+ put_page(page);
+ return NULL;
}
static void put_arg_page(struct page *page)
@@ -1114,6 +1136,13 @@ int flush_old_exec(struct linux_binprm * bprm)
flush_thread();
current->personality &= ~bprm->per_clear;
+ /*
+ * We have to apply CLOEXEC before we change whether the process is
+ * dumpable (in setup_new_exec) to avoid a race with a process in userspace
+ * trying to access the should-be-closed file descriptors of a process
+ * undergoing exec(2).
+ */
+ do_close_on_exec(current->files);
return 0;
out:
@@ -1123,8 +1152,22 @@ EXPORT_SYMBOL(flush_old_exec);
void would_dump(struct linux_binprm *bprm, struct file *file)
{
- if (inode_permission(file_inode(file), MAY_READ) < 0)
+ struct inode *inode = file_inode(file);
+ if (inode_permission(inode, MAY_READ) < 0) {
+ struct user_namespace *old, *user_ns;
bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
+
+ /* Ensure mm->user_ns contains the executable */
+ user_ns = old = bprm->mm->user_ns;
+ while ((user_ns != &init_user_ns) &&
+ !privileged_wrt_inode_uidgid(user_ns, inode))
+ user_ns = user_ns->parent;
+
+ if (old != user_ns) {
+ bprm->mm->user_ns = get_user_ns(user_ns);
+ put_user_ns(old);
+ }
+ }
}
EXPORT_SYMBOL(would_dump);
@@ -1154,7 +1197,6 @@ void setup_new_exec(struct linux_binprm * bprm)
!gid_eq(bprm->cred->gid, current_egid())) {
current->pdeath_signal = 0;
} else {
- would_dump(bprm, bprm->file);
if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP)
set_dumpable(current->mm, suid_dumpable);
}
@@ -1163,7 +1205,6 @@ void setup_new_exec(struct linux_binprm * bprm)
group */
current->self_exec_id++;
flush_signal_handlers(current, 0);
- do_close_on_exec(current->files);
}
EXPORT_SYMBOL(setup_new_exec);
@@ -1254,7 +1295,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
unsigned n_fs;
if (p->ptrace) {
- if (p->ptrace & PT_PTRACE_CAP)
+ if (ptracer_capable(p, current_user_ns()))
bprm->unsafe |= LSM_UNSAFE_PTRACE_CAP;
else
bprm->unsafe |= LSM_UNSAFE_PTRACE;
@@ -1587,6 +1628,8 @@ static int do_execveat_common(int fd, struct filename *filename,
if (retval < 0)
goto out;
+ would_dump(bprm, bprm->file);
+
retval = exec_binprm(bprm);
if (retval < 0)
goto out;
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index 27695e6f4e46..d6aeb84e90b6 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -193,15 +193,11 @@ ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
case ACL_TYPE_ACCESS:
name_index = EXT2_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
- else {
- inode->i_ctime = CURRENT_TIME_SEC;
- mark_inode_dirty(inode);
- if (error == 0)
- acl = NULL;
- }
+ inode->i_ctime = CURRENT_TIME_SEC;
+ mark_inode_dirty(inode);
}
break;
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 69b1e73026a5..c3fe1e323951 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -196,15 +196,11 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
case ACL_TYPE_ACCESS:
name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
- else {
- inode->i_ctime = ext4_current_time(inode);
- ext4_mark_inode_dirty(handle, inode);
- if (error == 0)
- acl = NULL;
- }
+ inode->i_ctime = ext4_current_time(inode);
+ ext4_mark_inode_dirty(handle, inode);
}
break;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index fe1f50fe764f..f97110461c19 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -208,6 +208,9 @@ static int ext4_init_block_bitmap(struct super_block *sb,
memset(bh->b_data, 0, sb->s_blocksize);
bit_max = ext4_num_base_meta_clusters(sb, block_group);
+ if ((bit_max >> 3) >= bh->b_size)
+ return -EFSCORRUPTED;
+
for (bit = 0; bit < bit_max; bit++)
ext4_set_bit(bit, bh->b_data);
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 1a0835073663..f240cef8b326 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -34,6 +34,7 @@
#include <linux/random.h>
#include <linux/scatterlist.h>
#include <linux/spinlock_types.h>
+#include <linux/namei.h>
#include "ext4_extents.h"
#include "xattr.h"
@@ -93,7 +94,8 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
* Return: An allocated and initialized encryption context on success; error
* value or NULL otherwise.
*/
-struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
+struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
+ gfp_t gfp_flags)
{
struct ext4_crypto_ctx *ctx = NULL;
int res = 0;
@@ -120,7 +122,7 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
list_del(&ctx->free_list);
spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
if (!ctx) {
- ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
+ ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, gfp_flags);
if (!ctx) {
res = -ENOMEM;
goto out;
@@ -257,7 +259,8 @@ static int ext4_page_crypto(struct inode *inode,
ext4_direction_t rw,
pgoff_t index,
struct page *src_page,
- struct page *dest_page)
+ struct page *dest_page,
+ gfp_t gfp_flags)
{
u8 xts_tweak[EXT4_XTS_TWEAK_SIZE];
@@ -268,7 +271,7 @@ static int ext4_page_crypto(struct inode *inode,
struct crypto_ablkcipher *tfm = ci->ci_ctfm;
int res = 0;
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = ablkcipher_request_alloc(tfm, gfp_flags);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n",
@@ -309,9 +312,10 @@ static int ext4_page_crypto(struct inode *inode,
return 0;
}
-static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx)
+static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx,
+ gfp_t gfp_flags)
{
- ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, GFP_NOWAIT);
+ ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, gfp_flags);
if (ctx->w.bounce_page == NULL)
return ERR_PTR(-ENOMEM);
ctx->flags |= EXT4_WRITE_PATH_FL;
@@ -334,7 +338,8 @@ static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx)
* error value or NULL.
*/
struct page *ext4_encrypt(struct inode *inode,
- struct page *plaintext_page)
+ struct page *plaintext_page,
+ gfp_t gfp_flags)
{
struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
@@ -342,17 +347,17 @@ struct page *ext4_encrypt(struct inode *inode,
BUG_ON(!PageLocked(plaintext_page));
- ctx = ext4_get_crypto_ctx(inode);
+ ctx = ext4_get_crypto_ctx(inode, gfp_flags);
if (IS_ERR(ctx))
return (struct page *) ctx;
/* The encryption operation will require a bounce page. */
- ciphertext_page = alloc_bounce_page(ctx);
+ ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
if (IS_ERR(ciphertext_page))
goto errout;
ctx->w.control_page = plaintext_page;
err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index,
- plaintext_page, ciphertext_page);
+ plaintext_page, ciphertext_page, gfp_flags);
if (err) {
ciphertext_page = ERR_PTR(err);
errout:
@@ -380,8 +385,8 @@ int ext4_decrypt(struct page *page)
{
BUG_ON(!PageLocked(page));
- return ext4_page_crypto(page->mapping->host,
- EXT4_DECRYPT, page->index, page, page);
+ return ext4_page_crypto(page->mapping->host, EXT4_DECRYPT,
+ page->index, page, page, GFP_NOFS);
}
int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
@@ -402,11 +407,11 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
- ctx = ext4_get_crypto_ctx(inode);
+ ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ciphertext_page = alloc_bounce_page(ctx);
+ ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
if (IS_ERR(ciphertext_page)) {
err = PTR_ERR(ciphertext_page);
goto errout;
@@ -414,11 +419,12 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex)
while (len--) {
err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk,
- ZERO_PAGE(0), ciphertext_page);
+ ZERO_PAGE(0), ciphertext_page,
+ GFP_NOFS);
if (err)
goto errout;
- bio = bio_alloc(GFP_KERNEL, 1);
+ bio = bio_alloc(GFP_NOWAIT, 1);
if (!bio) {
err = -ENOMEM;
goto errout;
@@ -469,3 +475,61 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
return size;
return 0;
}
+
+/*
+ * Validate dentries for encrypted directories to make sure we aren't
+ * potentially caching stale data after a key has been added or
+ * removed.
+ */
+static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct dentry *dir;
+ struct ext4_crypt_info *ci;
+ int dir_has_key, cached_with_key;
+
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ dir = dget_parent(dentry);
+ if (!ext4_encrypted_inode(d_inode(dir))) {
+ dput(dir);
+ return 0;
+ }
+ ci = EXT4_I(d_inode(dir))->i_crypt_info;
+
+ /* this should eventually be an flag in d_flags */
+ cached_with_key = dentry->d_fsdata != NULL;
+ dir_has_key = (ci != NULL);
+ dput(dir);
+
+ /*
+ * If the dentry was cached without the key, and it is a
+ * negative dentry, it might be a valid name. We can't check
+ * if the key has since been made available due to locking
+ * reasons, so we fail the validation so ext4_lookup() can do
+ * this check.
+ *
+ * We also fail the validation if the dentry was created with
+ * the key present, but we no longer have the key, or vice versa.
+ */
+ if ((!cached_with_key && d_is_negative(dentry)) ||
+ (!cached_with_key && dir_has_key) ||
+ (cached_with_key && !dir_has_key)) {
+#if 0 /* Revalidation debug */
+ char buf[80];
+ char *cp = simple_dname(dentry, buf, sizeof(buf));
+
+ if (IS_ERR(cp))
+ cp = (char *) "???";
+ pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
+ cached_with_key, d_is_negative(dentry),
+ dir_has_key);
+#endif
+ return 0;
+ }
+ return 1;
+}
+
+const struct dentry_operations ext4_encrypted_d_ops = {
+ .d_revalidate = ext4_d_revalidate,
+};
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 2fbef8a14760..2cfe3ffc276f 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -343,7 +343,7 @@ int _ext4_fname_disk_to_usr(struct inode *inode,
memcpy(buf+4, &hinfo->minor_hash, 4);
} else
memset(buf, 0, 8);
- memcpy(buf + 8, iname->name + iname->len - 16, 16);
+ memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16);
oname->name[0] = '_';
ret = digest_encode(buf, 24, oname->name+1);
oname->len = ret + 1;
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 9a16d1e75a49..505f8afde57c 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -88,8 +88,6 @@ void ext4_free_crypt_info(struct ext4_crypt_info *ci)
if (!ci)
return;
- if (ci->ci_keyring_key)
- key_put(ci->ci_keyring_key);
crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(ext4_crypt_info_cachep, ci);
}
@@ -111,7 +109,7 @@ void ext4_free_encryption_info(struct inode *inode,
ext4_free_crypt_info(ci);
}
-int _ext4_get_encryption_info(struct inode *inode)
+int ext4_get_encryption_info(struct inode *inode)
{
struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_crypt_info *crypt_info;
@@ -128,22 +126,15 @@ int _ext4_get_encryption_info(struct inode *inode)
char mode;
int res;
+ if (ei->i_crypt_info)
+ return 0;
+
if (!ext4_read_workqueue) {
res = ext4_init_crypto();
if (res)
return res;
}
-retry:
- crypt_info = ACCESS_ONCE(ei->i_crypt_info);
- if (crypt_info) {
- if (!crypt_info->ci_keyring_key ||
- key_validate(crypt_info->ci_keyring_key) == 0)
- return 0;
- ext4_free_encryption_info(inode, crypt_info);
- goto retry;
- }
-
res = ext4_xattr_get(inode, EXT4_XATTR_INDEX_ENCRYPTION,
EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
&ctx, sizeof(ctx));
@@ -166,7 +157,6 @@ retry:
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
- crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
if (S_ISREG(inode->i_mode))
@@ -206,7 +196,6 @@ retry:
keyring_key = NULL;
goto out;
}
- crypt_info->ci_keyring_key = keyring_key;
if (keyring_key->type != &key_type_logon) {
printk_once(KERN_WARNING
"ext4: key type must be logon\n");
@@ -253,16 +242,13 @@ got_key:
ext4_encryption_key_size(mode));
if (res)
goto out;
- memzero_explicit(raw_key, sizeof(raw_key));
- if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) != NULL) {
- ext4_free_crypt_info(crypt_info);
- goto retry;
- }
- return 0;
+ if (cmpxchg(&ei->i_crypt_info, NULL, crypt_info) == NULL)
+ crypt_info = NULL;
out:
if (res == -ENOKEY)
res = 0;
+ key_put(keyring_key);
ext4_free_crypt_info(crypt_info);
memzero_explicit(raw_key, sizeof(raw_key));
return res;
diff --git a/fs/ext4/crypto_policy.c b/fs/ext4/crypto_policy.c
index ad050698143f..e4f4fc4e56ab 100644
--- a/fs/ext4/crypto_policy.c
+++ b/fs/ext4/crypto_policy.c
@@ -102,6 +102,9 @@ static int ext4_create_encryption_context_from_policy(
int ext4_process_policy(const struct ext4_encryption_policy *policy,
struct inode *inode)
{
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
if (policy->version != 0)
return -EINVAL;
@@ -145,20 +148,38 @@ int ext4_get_policy(struct inode *inode, struct ext4_encryption_policy *policy)
int ext4_is_child_context_consistent_with_parent(struct inode *parent,
struct inode *child)
{
- struct ext4_crypt_info *parent_ci, *child_ci;
+ const struct ext4_crypt_info *parent_ci, *child_ci;
+ struct ext4_encryption_context parent_ctx, child_ctx;
int res;
- if ((parent == NULL) || (child == NULL)) {
- pr_err("parent %p child %p\n", parent, child);
- WARN_ON(1); /* Should never happen */
- return 0;
- }
- /* no restrictions if the parent directory is not encrypted */
+ /* No restrictions on file types which are never encrypted */
+ if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
+ !S_ISLNK(child->i_mode))
+ return 1;
+
+ /* No restrictions if the parent directory is unencrypted */
if (!ext4_encrypted_inode(parent))
return 1;
- /* if the child directory is not encrypted, this is always a problem */
+
+ /* Encrypted directories must not contain unencrypted files */
if (!ext4_encrypted_inode(child))
return 0;
+
+ /*
+ * Both parent and child are encrypted, so verify they use the same
+ * encryption policy. Compare the fscrypt_info structs if the keys are
+ * available, otherwise retrieve and compare the fscrypt_contexts.
+ *
+ * Note that the fscrypt_context retrieval will be required frequently
+ * when accessing an encrypted directory tree without the key.
+ * Performance-wise this is not a big deal because we already don't
+ * really optimize for file access without the key (to the extent that
+ * such access is even possible), given that any attempted access
+ * already causes a fscrypt_context retrieval and keyring search.
+ *
+ * In any case, if an unexpected error occurs, fall back to "forbidden".
+ */
+
res = ext4_get_encryption_info(parent);
if (res)
return 0;
@@ -167,17 +188,35 @@ int ext4_is_child_context_consistent_with_parent(struct inode *parent,
return 0;
parent_ci = EXT4_I(parent)->i_crypt_info;
child_ci = EXT4_I(child)->i_crypt_info;
- if (!parent_ci && !child_ci)
- return 1;
- if (!parent_ci || !child_ci)
+ if (parent_ci && child_ci) {
+ return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
+ EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
+ (parent_ci->ci_filename_mode ==
+ child_ci->ci_filename_mode) &&
+ (parent_ci->ci_flags == child_ci->ci_flags);
+ }
+
+ res = ext4_xattr_get(parent, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &parent_ctx, sizeof(parent_ctx));
+ if (res != sizeof(parent_ctx))
+ return 0;
+
+ res = ext4_xattr_get(child, EXT4_XATTR_INDEX_ENCRYPTION,
+ EXT4_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &child_ctx, sizeof(child_ctx));
+ if (res != sizeof(child_ctx))
return 0;
- return (memcmp(parent_ci->ci_master_key,
- child_ci->ci_master_key,
- EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
- (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
- (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
- (parent_ci->ci_flags == child_ci->ci_flags));
+ return memcmp(parent_ctx.master_key_descriptor,
+ child_ctx.master_key_descriptor,
+ EXT4_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (parent_ctx.contents_encryption_mode ==
+ child_ctx.contents_encryption_mode) &&
+ (parent_ctx.filenames_encryption_mode ==
+ child_ctx.filenames_encryption_mode) &&
+ (parent_ctx.flags == child_ctx.flags);
}
/**
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d1bca74f844..6d17f31a31d7 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
int dir_has_error = 0;
struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
+ if (ext4_encrypted_inode(inode)) {
+ err = ext4_get_encryption_info(inode);
+ if (err && err != -ENOKEY)
+ return err;
+ }
+
if (is_dx_dir(inode)) {
err = ext4_dx_readdir(file, ctx);
if (err != ERR_BAD_DX_DIR) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index cc7ca4e87144..c8ad14c697c4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -221,6 +221,7 @@ struct ext4_io_submit {
#define EXT4_MAX_BLOCK_SIZE 65536
#define EXT4_MIN_BLOCK_LOG_SIZE 10
#define EXT4_MAX_BLOCK_LOG_SIZE 16
+#define EXT4_MAX_CLUSTER_LOG_SIZE 30
#ifdef __KERNEL__
# define EXT4_BLOCK_SIZE(s) ((s)->s_blocksize)
#else
@@ -850,6 +851,29 @@ do { \
#include "extents_status.h"
/*
+ * Lock subclasses for i_data_sem in the ext4_inode_info structure.
+ *
+ * These are needed to avoid lockdep false positives when we need to
+ * allocate blocks to the quota inode during ext4_map_blocks(), while
+ * holding i_data_sem for a normal (non-quota) inode. Since we don't
+ * do quota tracking for the quota inode, this avoids deadlock (as
+ * well as infinite recursion, since it isn't turtles all the way
+ * down...)
+ *
+ * I_DATA_SEM_NORMAL - Used for most inodes
+ * I_DATA_SEM_OTHER - Used by move_inode.c for the second normal inode
+ * where the second inode has larger inode number
+ * than the first
+ * I_DATA_SEM_QUOTA - Used for quota inodes only
+ */
+enum {
+ I_DATA_SEM_NORMAL = 0,
+ I_DATA_SEM_OTHER,
+ I_DATA_SEM_QUOTA,
+};
+
+
+/*
* fourth extended file system inode data in memory
*/
struct ext4_inode_info {
@@ -910,6 +934,15 @@ struct ext4_inode_info {
* by other means, so we have i_data_sem.
*/
struct rw_semaphore i_data_sem;
+ /*
+ * i_mmap_sem is for serializing page faults with truncate / punch hole
+ * operations. We have to make sure that new page cannot be faulted in
+ * a section of the inode that is being punched. We cannot easily use
+ * i_data_sem for this since we need protection for the whole punch
+ * operation and i_data_sem ranks below transaction start so we have
+ * to occasionally drop it.
+ */
+ struct rw_semaphore i_mmap_sem;
struct inode vfs_inode;
struct jbd2_inode *jinode;
@@ -2228,13 +2261,16 @@ extern struct kmem_cache *ext4_crypt_info_cachep;
bool ext4_valid_contents_enc_mode(uint32_t mode);
uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
extern struct workqueue_struct *ext4_read_workqueue;
-struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode);
+struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
+ gfp_t gfp_flags);
void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx);
void ext4_restore_control_page(struct page *data_page);
struct page *ext4_encrypt(struct inode *inode,
- struct page *plaintext_page);
+ struct page *plaintext_page,
+ gfp_t gfp_flags);
int ext4_decrypt(struct page *page);
int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex);
+extern const struct dentry_operations ext4_encrypted_d_ops;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_init_crypto(void);
@@ -2297,23 +2333,11 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }
/* crypto_key.c */
void ext4_free_crypt_info(struct ext4_crypt_info *ci);
void ext4_free_encryption_info(struct inode *inode, struct ext4_crypt_info *ci);
-int _ext4_get_encryption_info(struct inode *inode);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_has_encryption_key(struct inode *inode);
-static inline int ext4_get_encryption_info(struct inode *inode)
-{
- struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
-
- if (!ci ||
- (ci->ci_keyring_key &&
- (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED) |
- (1 << KEY_FLAG_DEAD)))))
- return _ext4_get_encryption_info(inode);
- return 0;
-}
+int ext4_get_encryption_info(struct inode *inode);
static inline struct ext4_crypt_info *ext4_encryption_info(struct inode *inode)
{
@@ -2484,6 +2508,7 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
loff_t lstart, loff_t lend);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
@@ -2848,6 +2873,9 @@ static inline int ext4_update_inode_size(struct inode *inode, loff_t newsize)
return changed;
}
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ loff_t len);
+
struct ext4_group_info {
unsigned long bb_state;
struct rb_root bb_free_root;
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index ac7d4e813796..1b17b05b9f4d 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -78,7 +78,6 @@ struct ext4_crypt_info {
char ci_filename_mode;
char ci_flags;
struct crypto_ablkcipher *ci_ctfm;
- struct key *ci_keyring_key;
char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
};
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 5f5846211095..f817ed58f5ad 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -395,17 +395,19 @@ static inline int ext4_inode_journal_mode(struct inode *inode)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
/* We do not support data journalling with delayed allocation */
if (!S_ISREG(inode->i_mode) ||
- test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
- return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
- if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
- !test_opt(inode->i_sb, DELALLOC))
+ test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
+ (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) &&
+ !test_opt(inode->i_sb, DELALLOC))) {
+ /* We do not support data journalling for encrypted data */
+ if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode))
+ return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
return EXT4_INODE_JOURNAL_DATA_MODE; /* journal data */
+ }
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
return EXT4_INODE_ORDERED_DATA_MODE; /* ordered */
if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
return EXT4_INODE_WRITEBACK_DATA_MODE; /* writeback */
- else
- BUG();
+ BUG();
}
static inline int ext4_should_journal_data(struct inode *inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 551353b1b17a..61d5bfc7318c 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -376,9 +376,13 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
- ext4_lblk_t last = lblock + len - 1;
- if (len == 0 || lblock > last)
+ /*
+ * We allow neither:
+ * - zero length
+ * - overflow/wrap-around
+ */
+ if (lblock + len <= lblock)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -469,6 +473,10 @@ static int __ext4_ext_check(const char *function, unsigned int line,
error_msg = "invalid extent entries";
goto corrupted;
}
+ if (unlikely(depth > 32)) {
+ error_msg = "too large eh_depth";
+ goto corrupted;
+ }
/* Verify checksum on non-root extent tree nodes */
if (ext_depth(inode) != depth &&
!ext4_extent_block_csum_verify(inode, eh)) {
@@ -4685,10 +4693,6 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
if (len <= EXT_UNWRITTEN_MAX_LEN)
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
-
/*
* credits to insert 1 extent into extent tree
*/
@@ -4752,8 +4756,6 @@ retry:
goto retry;
}
- ext4_inode_resume_unlocked_dio(inode);
-
return ret > 0 ? ret2 : ret;
}
@@ -4770,7 +4772,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
int partial_begin, partial_end;
loff_t start, end;
ext4_lblk_t lblk;
- struct address_space *mapping = inode->i_mapping;
unsigned int blkbits = inode->i_blkbits;
trace_ext4_zero_range(inode, offset, len, mode);
@@ -4786,17 +4787,6 @@ static long ext4_zero_range(struct file *file, loff_t offset,
}
/*
- * Write out all dirty pages to avoid race conditions
- * Then release them.
- */
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
- ret = filemap_write_and_wait_range(mapping, offset,
- offset + len - 1);
- if (ret)
- return ret;
- }
-
- /*
* Round up offset. This is not fallocate, we neet to zero out
* blocks, so convert interior block aligned part of the range to
* unwritten and possibly manually zero out unaligned parts of the
@@ -4839,6 +4829,10 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
/* Preallocate the range including the unaligned edges */
if (partial_begin || partial_end) {
ret = ext4_alloc_file_blocks(file,
@@ -4847,7 +4841,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
round_down(offset, 1 << blkbits)) >> blkbits,
new_size, flags, mode);
if (ret)
- goto out_mutex;
+ goto out_dio;
}
@@ -4856,16 +4850,23 @@ static long ext4_zero_range(struct file *file, loff_t offset,
flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
EXT4_EX_NOCACHE);
- /* Now release the pages and zero block aligned part of pages*/
+ /*
+ * Prevent page faults from reinstantiating pages we have
+ * released from page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ ret = ext4_update_disksize_before_punch(inode, offset, len);
+ if (ret) {
+ up_write(&EXT4_I(inode)->i_mmap_sem);
+ goto out_dio;
+ }
+ /* Now release the pages and zero block aligned part of pages */
truncate_pagecache_range(inode, start, end - 1);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
-
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
if (ret)
goto out_dio;
}
@@ -4901,6 +4902,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
/* Zero out partial block at the edges of the range */
ret = ext4_zero_partial_blocks(handle, inode, offset, len);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
if (file->f_flags & O_SYNC)
ext4_handle_sync(handle);
@@ -4998,8 +5001,13 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
goto out;
}
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
flags, mode);
+ ext4_inode_resume_unlocked_dio(inode);
if (ret)
goto out;
@@ -5356,7 +5364,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_lblk_t stop, *iterator, ex_start, ex_end;
/* Let path point to the last extent */
- path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -5365,15 +5374,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if (!extent)
goto out;
- stop = le32_to_cpu(extent->ee_block) +
- ext4_ext_get_actual_len(extent);
+ stop = le32_to_cpu(extent->ee_block);
/*
* In case of left shift, Don't start shifting extents until we make
* sure the hole is big enough to accommodate the shift.
*/
if (SHIFT == SHIFT_LEFT) {
- path = ext4_find_extent(inode, start - 1, &path, 0);
+ path = ext4_find_extent(inode, start - 1, &path,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5405,9 +5414,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
else
iterator = &stop;
- /* Its safe to start updating extents */
- while (start < stop) {
- path = ext4_find_extent(inode, *iterator, &path, 0);
+ /*
+ * Its safe to start updating extents. Start and stop are unsigned, so
+ * in case of right shift if extent with 0 block is reached, iterator
+ * becomes NULL to indicate the end of the loop.
+ */
+ while (iterator && start <= stop) {
+ path = ext4_find_extent(inode, *iterator, &path,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5434,8 +5448,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- *iterator = le32_to_cpu(extent->ee_block) > 0 ?
- le32_to_cpu(extent->ee_block) - 1 : 0;
+ if (le32_to_cpu(extent->ee_block) > 0)
+ *iterator = le32_to_cpu(extent->ee_block) - 1;
+ else
+ /* Beginning is reached, end of the loop */
+ iterator = NULL;
/* Update path extent in case we need to stop */
while (le32_to_cpu(extent->ee_block) < start)
extent++;
@@ -5494,21 +5511,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
- /*
- * Need to round down offset to be aligned with page size boundary
- * for page size > block size.
- */
- ioffset = round_down(offset, PAGE_SIZE);
-
- /* Write out all dirty pages */
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- LLONG_MAX);
- if (ret)
- return ret;
-
- /* Take mutex lock */
mutex_lock(&inode->i_mutex);
-
/*
* There is no need to overlap collapse range with EOF, in which case
* it is effectively a truncate operation
@@ -5524,17 +5527,43 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex;
}
- truncate_pagecache(inode, ioffset);
-
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ /*
+ * Need to round down offset to be aligned with page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+ /*
+ * Write tail of the last page before removed range since it will get
+ * removed from the page cache below.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset, offset);
+ if (ret)
+ goto out_mmap;
+ /*
+ * Write data that will be shifted to preserve them when discarding
+ * page cache below. We are also protected from pages becoming dirty
+ * by i_mmap_sem.
+ */
+ ret = filemap_write_and_wait_range(inode->i_mapping, offset + len,
+ LLONG_MAX);
+ if (ret)
+ goto out_mmap;
+ truncate_pagecache(inode, ioffset);
+
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out_dio;
+ goto out_mmap;
}
down_write(&EXT4_I(inode)->i_data_sem);
@@ -5570,10 +5599,12 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
ext4_handle_sync(handle);
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
@@ -5627,21 +5658,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
return ret;
}
- /*
- * Need to round down to align start offset to page size boundary
- * for page size > block size.
- */
- ioffset = round_down(offset, PAGE_SIZE);
-
- /* Write out all dirty pages */
- ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
- LLONG_MAX);
- if (ret)
- return ret;
-
- /* Take mutex lock */
mutex_lock(&inode->i_mutex);
-
/* Currently just for extent based files */
if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
ret = -EOPNOTSUPP;
@@ -5660,17 +5677,32 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
goto out_mutex;
}
- truncate_pagecache(inode, ioffset);
-
/* Wait for existing dio to complete */
ext4_inode_block_unlocked_dio(inode);
inode_dio_wait(inode);
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
+ /*
+ * Need to round down to align start offset to page size boundary
+ * for page size > block size.
+ */
+ ioffset = round_down(offset, PAGE_SIZE);
+ /* Write out all dirty pages */
+ ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
+ LLONG_MAX);
+ if (ret)
+ goto out_mmap;
+ truncate_pagecache(inode, ioffset);
+
credits = ext4_writepage_trans_blocks(inode);
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
- goto out_dio;
+ goto out_mmap;
}
/* Expand file to avoid data loss if there is error while shifting */
@@ -5718,6 +5750,9 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
goto out_stop;
}
+ } else {
+ ext4_ext_drop_refs(path);
+ kfree(path);
}
ret = ext4_es_remove_extent(inode, offset_lblk,
@@ -5738,10 +5773,13 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
up_write(&EXT4_I(inode)->i_data_sem);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
-out_dio:
+out_mmap:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 113837e7ba98..45ef9975caec 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -209,15 +209,18 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int result;
handle_t *handle = NULL;
- struct super_block *sb = file_inode(vma->vm_file)->i_sb;
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
EXT4_DATA_TRANS_BLOCKS(sb));
- }
+ } else
+ down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
@@ -228,8 +231,10 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
- }
+ } else
+ up_read(&EXT4_I(inode)->i_mmap_sem);
return result;
}
@@ -246,10 +251,12 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
ext4_chunk_trans_blocks(inode,
PMD_SIZE / PAGE_SIZE));
- }
+ } else
+ down_read(&EXT4_I(inode)->i_mmap_sem);
if (IS_ERR(handle))
result = VM_FAULT_SIGBUS;
@@ -260,30 +267,71 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
if (write) {
if (!IS_ERR(handle))
ext4_journal_stop(handle);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
- }
+ } else
+ up_read(&EXT4_I(inode)->i_mmap_sem);
return result;
}
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
- return dax_mkwrite(vma, vmf, ext4_get_block_dax,
- ext4_end_io_unwritten);
+ int err;
+ struct inode *inode = file_inode(vma->vm_file);
+
+ sb_start_pagefault(inode->i_sb);
+ file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ err = __dax_mkwrite(vma, vmf, ext4_get_block_dax,
+ ext4_end_io_unwritten);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(inode->i_sb);
+
+ return err;
+}
+
+/*
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * handler we check for races agaist truncate. Note that since we cycle through
+ * i_mmap_sem, we are sure that also any hole punching that began before we
+ * were called is finished by now and so if it included part of the file we
+ * are working on, our pte will get unmapped and the check for pte_same() in
+ * wp_pfn_shared() fails. Thus fault gets retried and things work out as
+ * desired.
+ */
+static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
+ struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ struct super_block *sb = inode->i_sb;
+ int ret = VM_FAULT_NOPAGE;
+ loff_t size;
+
+ sb_start_pagefault(sb);
+ file_update_time(vma->vm_file);
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (vmf->pgoff >= size)
+ ret = VM_FAULT_SIGBUS;
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+ sb_end_pagefault(sb);
+
+ return ret;
}
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.pmd_fault = ext4_dax_pmd_fault,
.page_mkwrite = ext4_dax_mkwrite,
- .pfn_mkwrite = dax_pfn_mkwrite,
+ .pfn_mkwrite = ext4_dax_pfn_mkwrite,
};
#else
#define ext4_dax_vm_ops ext4_file_vm_ops
#endif
static const struct vm_operations_struct ext4_file_vm_ops = {
- .fault = filemap_fault,
+ .fault = ext4_filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = ext4_page_mkwrite,
};
@@ -415,47 +463,27 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
(pgoff_t)num);
- if (nr_pages == 0) {
- if (whence == SEEK_DATA)
- break;
-
- BUG_ON(whence != SEEK_HOLE);
- /*
- * If this is the first time to go into the loop and
- * offset is not beyond the end offset, it will be a
- * hole at this offset
- */
- if (lastoff == startoff || lastoff < endoff)
- found = 1;
- break;
- }
-
- /*
- * If this is the first time to go into the loop and
- * offset is smaller than the first page offset, it will be a
- * hole at this offset.
- */
- if (lastoff == startoff && whence == SEEK_HOLE &&
- lastoff < page_offset(pvec.pages[0])) {
- found = 1;
+ if (nr_pages == 0)
break;
- }
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
struct buffer_head *bh, *head;
/*
- * If the current offset is not beyond the end of given
- * range, it will be a hole.
+ * If current offset is smaller than the page offset,
+ * there is a hole at this offset.
*/
- if (lastoff < endoff && whence == SEEK_HOLE &&
- page->index > end) {
+ if (whence == SEEK_HOLE && lastoff < endoff &&
+ lastoff < page_offset(pvec.pages[i])) {
found = 1;
*offset = lastoff;
goto out;
}
+ if (page->index > end)
+ goto out;
+
lock_page(page);
if (unlikely(page->mapping != inode->i_mapping)) {
@@ -472,6 +500,8 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
lastoff = page_offset(page);
bh = head = page_buffers(page);
do {
+ if (lastoff + bh->b_size <= startoff)
+ goto next;
if (buffer_uptodate(bh) ||
buffer_unwritten(bh)) {
if (whence == SEEK_DATA)
@@ -486,6 +516,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
unlock_page(page);
goto out;
}
+next:
lastoff += bh->b_size;
bh = bh->b_this_page;
} while (bh != head);
@@ -495,20 +526,18 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
unlock_page(page);
}
- /*
- * The no. of pages is less than our desired, that would be a
- * hole in there.
- */
- if (nr_pages < num && whence == SEEK_HOLE) {
- found = 1;
- *offset = lastoff;
+ /* The no. of pages is less than our desired, we are done. */
+ if (nr_pages < num)
break;
- }
index = pvec.pages[i - 1]->index + 1;
pagevec_release(&pvec);
} while (index <= end);
+ if (whence == SEEK_HOLE && lastoff < endoff) {
+ found = 1;
+ *offset = lastoff;
+ }
out:
pagevec_release(&pvec);
return found;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 53f2b98a69f3..5388207d2832 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1143,25 +1143,20 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
ext4_group_t block_group;
int bit;
- struct buffer_head *bitmap_bh;
+ struct buffer_head *bitmap_bh = NULL;
struct inode *inode = NULL;
- long err = -EIO;
+ int err = -EFSCORRUPTED;
- /* Error cases - e2fsck has already cleaned up for us */
- if (ino > max_ino) {
- ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
- err = -EFSCORRUPTED;
- goto error;
- }
+ if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
+ goto bad_orphan;
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
- ext4_warning(sb, "inode bitmap error %ld for orphan %lu",
- ino, err);
- goto error;
+ ext4_error(sb, "inode bitmap error %ld for orphan %lu",
+ ino, PTR_ERR(bitmap_bh));
+ return (struct inode *) bitmap_bh;
}
/* Having the inode bit set should be a 100% indicator that this
@@ -1172,15 +1167,21 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
goto bad_orphan;
inode = ext4_iget(sb, ino);
- if (IS_ERR(inode))
- goto iget_failed;
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
+ ino, err);
+ return inode;
+ }
/*
- * If the orphans has i_nlinks > 0 then it should be able to be
- * truncated, otherwise it won't be removed from the orphan list
- * during processing and an infinite loop will result.
+ * If the orphans has i_nlinks > 0 then it should be able to
+ * be truncated, otherwise it won't be removed from the orphan
+ * list during processing and an infinite loop will result.
+ * Similarly, it must not be a bad inode.
*/
- if (inode->i_nlink && !ext4_can_truncate(inode))
+ if ((inode->i_nlink && !ext4_can_truncate(inode)) ||
+ is_bad_inode(inode))
goto bad_orphan;
if (NEXT_ORPHAN(inode) > max_ino)
@@ -1188,29 +1189,25 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
brelse(bitmap_bh);
return inode;
-iget_failed:
- err = PTR_ERR(inode);
- inode = NULL;
bad_orphan:
- ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
- printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
- bit, (unsigned long long)bitmap_bh->b_blocknr,
- ext4_test_bit(bit, bitmap_bh->b_data));
- printk(KERN_WARNING "inode=%p\n", inode);
+ ext4_error(sb, "bad orphan inode %lu", ino);
+ if (bitmap_bh)
+ printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n",
+ bit, (unsigned long long)bitmap_bh->b_blocknr,
+ ext4_test_bit(bit, bitmap_bh->b_data));
if (inode) {
- printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
+ printk(KERN_ERR "is_bad_inode(inode)=%d\n",
is_bad_inode(inode));
- printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
+ printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
- printk(KERN_WARNING "max_ino=%lu\n", max_ino);
- printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
+ printk(KERN_ERR "max_ino=%lu\n", max_ino);
+ printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
iput(inode);
}
brelse(bitmap_bh);
-error:
return ERR_PTR(err);
}
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index d884989cc83d..dad8e7bdf0a6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -336,8 +336,10 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
len -= EXT4_MIN_INLINE_DATA_SIZE;
value = kzalloc(len, GFP_NOFS);
- if (!value)
+ if (!value) {
+ error = -ENOMEM;
goto out;
+ }
error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
value, len);
@@ -931,8 +933,15 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page)
{
int i_size_changed = 0;
+ int ret;
- copied = ext4_write_inline_data_end(inode, pos, len, copied, page);
+ ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ return ret;
+ }
+ copied = ret;
/*
* No need to use i_size_read() here, the i_size
@@ -1149,10 +1158,9 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
set_buffer_uptodate(dir_block);
err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
if (err)
- goto out;
+ return err;
set_buffer_verified(dir_block);
-out:
- return err;
+ return ext4_mark_inode_dirty(handle, inode);
}
static int ext4_convert_inline_data_nolock(handle_t *handle,
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 06bda0361e7c..1796d1bd9a1d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -51,25 +51,30 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,
struct ext4_inode_info *ei)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- __u16 csum_lo;
- __u16 csum_hi = 0;
__u32 csum;
+ __u16 dummy_csum = 0;
+ int offset = offsetof(struct ext4_inode, i_checksum_lo);
+ unsigned int csum_size = sizeof(dummy_csum);
- csum_lo = le16_to_cpu(raw->i_checksum_lo);
- raw->i_checksum_lo = 0;
- if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
- EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
- csum_hi = le16_to_cpu(raw->i_checksum_hi);
- raw->i_checksum_hi = 0;
- }
-
- csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw,
- EXT4_INODE_SIZE(inode->i_sb));
+ csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)raw, offset);
+ csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, csum_size);
+ offset += csum_size;
+ csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+ EXT4_GOOD_OLD_INODE_SIZE - offset);
- raw->i_checksum_lo = cpu_to_le16(csum_lo);
- if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE &&
- EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi))
- raw->i_checksum_hi = cpu_to_le16(csum_hi);
+ if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
+ offset = offsetof(struct ext4_inode, i_checksum_hi);
+ csum = ext4_chksum(sbi, csum, (__u8 *)raw +
+ EXT4_GOOD_OLD_INODE_SIZE,
+ offset - EXT4_GOOD_OLD_INODE_SIZE);
+ if (EXT4_FITS_IN_INODE(raw, ei, i_checksum_hi)) {
+ csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,
+ csum_size);
+ offset += csum_size;
+ }
+ csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset,
+ EXT4_INODE_SIZE(inode->i_sb) - offset);
+ }
return csum;
}
@@ -205,9 +210,9 @@ void ext4_evict_inode(struct inode *inode)
* Note that directories do not have this problem because they
* don't use page cache.
*/
- if (ext4_should_journal_data(inode) &&
- (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
- inode->i_ino != EXT4_JOURNAL_INO) {
+ if (inode->i_ino != EXT4_JOURNAL_INO &&
+ ext4_should_journal_data(inode) &&
+ (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) {
journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
@@ -1159,8 +1164,11 @@ static int ext4_write_end(struct file *file,
if (ext4_has_inline_data(inode)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
- if (ret < 0)
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
goto errout;
+ }
copied = ret;
} else
copied = block_write_end(file, mapping, pos,
@@ -1214,7 +1222,9 @@ errout:
* set the buffer to be dirty, since in data=journalled mode we need
* to call ext4_handle_dirty_metadata() instead.
*/
-static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
+static void ext4_journalled_zero_new_buffers(handle_t *handle,
+ struct page *page,
+ unsigned from, unsigned to)
{
unsigned int block_start = 0, block_end;
struct buffer_head *head, *bh;
@@ -1231,7 +1241,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
size = min(to, block_end) - start;
zero_user(page, start, size);
- set_buffer_uptodate(bh);
+ write_end_fn(handle, bh);
}
clear_buffer_new(bh);
}
@@ -1260,18 +1270,25 @@ static int ext4_journalled_write_end(struct file *file,
BUG_ON(!ext4_handle_valid(handle));
- if (ext4_has_inline_data(inode))
- copied = ext4_write_inline_data_end(inode, pos, len,
- copied, page);
- else {
- if (copied < len) {
- if (!PageUptodate(page))
- copied = 0;
- zero_new_buffers(page, from+copied, to);
+ if (ext4_has_inline_data(inode)) {
+ ret = ext4_write_inline_data_end(inode, pos, len,
+ copied, page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ goto errout;
}
-
+ copied = ret;
+ } else if (unlikely(copied < len) && !PageUptodate(page)) {
+ copied = 0;
+ ext4_journalled_zero_new_buffers(handle, page, from, to);
+ } else {
+ if (unlikely(copied < len))
+ ext4_journalled_zero_new_buffers(handle, page,
+ from + copied, to);
ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
- to, &partial, write_end_fn);
+ from + copied, &partial,
+ write_end_fn);
if (!partial)
SetPageUptodate(page);
}
@@ -1297,6 +1314,7 @@ static int ext4_journalled_write_end(struct file *file,
*/
ext4_orphan_add(handle, inode);
+errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
@@ -2026,7 +2044,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
{
struct inode *inode = mpd->inode;
int err;
- ext4_lblk_t blocks = (i_size_read(inode) + (1 << inode->i_blkbits) - 1)
+ ext4_lblk_t blocks = (i_size_read(inode) + i_blocksize(inode) - 1)
>> inode->i_blkbits;
do {
@@ -2589,13 +2607,36 @@ retry:
done = true;
}
}
- ext4_journal_stop(handle);
+ /*
+ * Caution: If the handle is synchronous,
+ * ext4_journal_stop() can wait for transaction commit
+ * to finish which may depend on writeback of pages to
+ * complete or on page lock to be released. In that
+ * case, we have to wait until after after we have
+ * submitted all the IO, released page locks we hold,
+ * and dropped io_end reference (for extent conversion
+ * to be able to complete) before stopping the handle.
+ */
+ if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
+ ext4_journal_stop(handle);
+ handle = NULL;
+ }
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
/* Unlock pages we didn't use */
mpage_release_unused_pages(&mpd, give_up_on_write);
- /* Drop our io_end reference we got from init */
- ext4_put_io_end(mpd.io_submit.io_end);
+ /*
+ * Drop our io_end reference we got from init. We have
+ * to be careful and use deferred io_end finishing if
+ * we are still holding the transaction as we can
+ * release the last reference to io_end which may end
+ * up doing unwritten extent conversion.
+ */
+ if (handle) {
+ ext4_put_io_end_defer(mpd.io_submit.io_end);
+ ext4_journal_stop(handle);
+ } else
+ ext4_put_io_end(mpd.io_submit.io_end);
if (ret == -ENOSPC && sbi->s_journal) {
/*
@@ -3531,6 +3572,10 @@ static int ext4_block_truncate_page(handle_t *handle,
unsigned blocksize;
struct inode *inode = mapping->host;
+ /* If we are processing an encrypted inode during orphan list handling */
+ if (ext4_encrypted_inode(inode) && !ext4_has_encryption_key(inode))
+ return 0;
+
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
@@ -3587,7 +3632,36 @@ int ext4_can_truncate(struct inode *inode)
}
/*
- * ext4_punch_hole: punches a hole in a file by releaseing the blocks
+ * We have to make sure i_disksize gets properly updated before we truncate
+ * page cache due to hole punching or zero range. Otherwise i_disksize update
+ * can get lost as it may have been postponed to submission of writeback but
+ * that will never happen after we truncate page cache.
+ */
+int ext4_update_disksize_before_punch(struct inode *inode, loff_t offset,
+ loff_t len)
+{
+ handle_t *handle;
+ loff_t size = i_size_read(inode);
+
+ WARN_ON(!mutex_is_locked(&inode->i_mutex));
+ if (offset > size || offset + len < size)
+ return 0;
+
+ if (EXT4_I(inode)->i_disksize >= size)
+ return 0;
+
+ handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+ ext4_update_i_disksize(inode, size);
+ ext4_mark_inode_dirty(handle, inode);
+ ext4_journal_stop(handle);
+
+ return 0;
+}
+
+/*
+ * ext4_punch_hole: punches a hole in a file by releasing the blocks
* associated with the given offset and length
*
* @inode: File inode
@@ -3616,7 +3690,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
* Write out all dirty pages to avoid race conditions
* Then release them.
*/
- if (mapping->nrpages && mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
+ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) {
ret = filemap_write_and_wait_range(mapping, offset,
offset + length - 1);
if (ret)
@@ -3651,17 +3725,26 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
}
+ /* Wait all existing dio workers, newcomers will block on i_mutex */
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+
+ /*
+ * Prevent page faults from reinstantiating pages we have released from
+ * page cache.
+ */
+ down_write(&EXT4_I(inode)->i_mmap_sem);
first_block_offset = round_up(offset, sb->s_blocksize);
last_block_offset = round_down((offset + length), sb->s_blocksize) - 1;
/* Now release the pages and zero block aligned part of pages*/
- if (last_block_offset > first_block_offset)
+ if (last_block_offset > first_block_offset) {
+ ret = ext4_update_disksize_before_punch(inode, offset, length);
+ if (ret)
+ goto out_dio;
truncate_pagecache_range(inode, first_block_offset,
last_block_offset);
-
- /* Wait all existing dio workers, newcomers will block on i_mutex */
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
+ }
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
credits = ext4_writepage_trans_blocks(inode);
@@ -3708,16 +3791,14 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
if (IS_SYNC(inode))
ext4_handle_sync(handle);
- /* Now release the pages again to reduce race window */
- if (last_block_offset > first_block_offset)
- truncate_pagecache_range(inode, first_block_offset,
- last_block_offset);
-
inode->i_mtime = inode->i_ctime = ext4_current_time(inode);
ext4_mark_inode_dirty(handle, inode);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
out_dio:
+ up_write(&EXT4_I(inode)->i_mmap_sem);
ext4_inode_resume_unlocked_dio(inode);
out_mutex:
mutex_unlock(&inode->i_mutex);
@@ -4112,6 +4193,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
struct inode *inode;
journal_t *journal = EXT4_SB(sb)->s_journal;
long ret;
+ loff_t size;
int block;
uid_t i_uid;
gid_t i_gid;
@@ -4203,6 +4285,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
ei->i_file_acl |=
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(raw_inode);
+ if ((size = i_size_read(inode)) < 0) {
+ EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size);
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
ei->i_disksize = inode->i_size;
#ifdef CONFIG_QUOTA
ei->i_reserved_quota = 0;
@@ -4486,14 +4573,14 @@ static int ext4_do_update_inode(handle_t *handle,
* Fix up interoperability with old kernels. Otherwise, old inodes get
* re-used with the upper 16 bits of the uid/gid intact
*/
- if (!ei->i_dtime) {
+ if (ei->i_dtime && list_empty(&ei->i_orphan)) {
+ raw_inode->i_uid_high = 0;
+ raw_inode->i_gid_high = 0;
+ } else {
raw_inode->i_uid_high =
cpu_to_le16(high_16_bits(i_uid));
raw_inode->i_gid_high =
cpu_to_le16(high_16_bits(i_gid));
- } else {
- raw_inode->i_uid_high = 0;
- raw_inode->i_gid_high = 0;
}
} else {
raw_inode->i_uid_low = cpu_to_le16(fs_high2lowuid(i_uid));
@@ -4851,6 +4938,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
} else
ext4_wait_for_tail_page_commit(inode);
}
+ down_write(&EXT4_I(inode)->i_mmap_sem);
/*
* Truncate pagecache after we've waited for commit
* in data=journal mode to make pages freeable.
@@ -4858,6 +4946,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
truncate_pagecache(inode, inode->i_size);
if (shrink)
ext4_truncate(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
}
if (!rc) {
@@ -5075,8 +5164,9 @@ static int ext4_expand_extra_isize(struct inode *inode,
/* No extended attributes present */
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
- memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
- new_extra_isize);
+ memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+ EXT4_I(inode)->i_extra_isize, 0,
+ new_extra_isize - EXT4_I(inode)->i_extra_isize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
return 0;
}
@@ -5109,6 +5199,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
might_sleep();
trace_ext4_mark_inode_dirty(inode, _RET_IP_);
err = ext4_reserve_inode_write(handle, inode, &iloc);
+ if (err)
+ return err;
if (ext4_handle_valid(handle) &&
EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
!ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {
@@ -5125,8 +5217,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
sbi->s_want_extra_isize,
iloc, handle);
if (ret) {
- ext4_set_inode_state(inode,
- EXT4_STATE_NO_EXPAND);
if (mnt_count !=
le16_to_cpu(sbi->s_es->s_mnt_count)) {
ext4_warning(inode->i_sb,
@@ -5139,9 +5229,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
}
}
}
- if (!err)
- err = ext4_mark_iloc_dirty(handle, inode, &iloc);
- return err;
+ return ext4_mark_iloc_dirty(handle, inode, &iloc);
}
/*
@@ -5306,6 +5394,13 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_convert_inline_data(inode);
+ if (ret)
+ goto out_ret;
+
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
!ext4_should_journal_data(inode) &&
@@ -5375,6 +5470,19 @@ retry_alloc:
out_ret:
ret = block_page_mkwrite_return(ret);
out:
+ up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(inode->i_sb);
return ret;
}
+
+int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct inode *inode = file_inode(vma->vm_file);
+ int err;
+
+ down_read(&EXT4_I(inode)->i_mmap_sem);
+ err = filemap_fault(vma, vmf);
+ up_read(&EXT4_I(inode)->i_mmap_sem);
+
+ return err;
+}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 5e872fd40e5e..bcd7c4788903 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -622,6 +622,9 @@ resizefs_out:
struct ext4_encryption_policy policy;
int err = 0;
+ if (!ext4_has_feature_encrypt(sb))
+ return -EOPNOTSUPP;
+
if (copy_from_user(&policy,
(struct ext4_encryption_policy __user *)arg,
sizeof(policy))) {
@@ -629,7 +632,17 @@ resizefs_out:
goto encryption_policy_out;
}
+ err = mnt_want_write_file(filp);
+ if (err)
+ goto encryption_policy_out;
+
+ mutex_lock(&inode->i_mutex);
+
err = ext4_process_policy(&policy, inode);
+
+ mutex_unlock(&inode->i_mutex);
+
+ mnt_drop_write_file(filp);
encryption_policy_out:
return err;
#else
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 61eaf74dca37..84cd77663e1f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -669,7 +669,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,
ext4_grpblk_t min;
ext4_grpblk_t max;
ext4_grpblk_t chunk;
- unsigned short border;
+ unsigned int border;
BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb));
@@ -815,7 +815,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
* for this page; do not hold this lock when calling this routine!
*/
-static int ext4_mb_init_cache(struct page *page, char *incore)
+static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
{
ext4_group_t ngroups;
int blocksize;
@@ -848,7 +848,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* allocate buffer_heads to read bitmaps */
if (groups_per_page > 1) {
i = sizeof(struct buffer_head *) * groups_per_page;
- bh = kzalloc(i, GFP_NOFS);
+ bh = kzalloc(i, gfp);
if (bh == NULL) {
err = -ENOMEM;
goto out;
@@ -983,7 +983,7 @@ out:
* are on the same page e4b->bd_buddy_page is NULL and return value is 0.
*/
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
- ext4_group_t group, struct ext4_buddy *e4b)
+ ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
{
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
int block, pnum, poff;
@@ -1002,7 +1002,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
@@ -1016,7 +1016,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block++;
pnum = block / blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
@@ -1042,7 +1042,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
* calling this routine!
*/
static noinline_for_stack
-int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
{
struct ext4_group_info *this_grp;
@@ -1062,7 +1062,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
* The call to ext4_mb_get_buddy_page_lock will mark the
* page accessed.
*/
- ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
+ ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
/*
* somebody initialized the group
@@ -1072,7 +1072,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
}
page = e4b.bd_bitmap_page;
- ret = ext4_mb_init_cache(page, NULL);
+ ret = ext4_mb_init_cache(page, NULL, gfp);
if (ret)
goto err;
if (!PageUptodate(page)) {
@@ -1091,7 +1091,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
}
/* init buddy cache */
page = e4b.bd_buddy_page;
- ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
+ ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
if (ret)
goto err;
if (!PageUptodate(page)) {
@@ -1109,8 +1109,8 @@ err:
* calling this routine!
*/
static noinline_for_stack int
-ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- struct ext4_buddy *e4b)
+ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b, gfp_t gfp)
{
int blocks_per_page;
int block;
@@ -1140,7 +1140,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* we need full data about the group
* to make a good selection
*/
- ret = ext4_mb_init_group(sb, group);
+ ret = ext4_mb_init_group(sb, group, gfp);
if (ret)
return ret;
}
@@ -1168,11 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* wait for it to initialize.
*/
page_cache_release(page);
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, NULL);
+ ret = ext4_mb_init_cache(page, NULL, gfp);
if (ret) {
unlock_page(page);
goto err;
@@ -1204,11 +1204,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
if (page == NULL || !PageUptodate(page)) {
if (page)
page_cache_release(page);
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+ ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+ gfp);
if (ret) {
unlock_page(page);
goto err;
@@ -1247,6 +1248,12 @@ err:
return ret;
}
+static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b)
+{
+ return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
+}
+
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
{
if (e4b->bd_bitmap_page)
@@ -1259,6 +1266,7 @@ static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
{
int order = 1;
+ int bb_incr = 1 << (e4b->bd_blkbits - 1);
void *bb;
BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
@@ -1271,7 +1279,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
/* this block is part of buddy of order 'order' */
return order;
}
- bb += 1 << (e4b->bd_blkbits - order);
+ bb += bb_incr;
+ bb_incr >>= 1;
order++;
}
return 0;
@@ -2045,7 +2054,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- int ret = ext4_mb_init_group(ac->ac_sb, group);
+ int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
if (ret)
return ret;
}
@@ -2278,7 +2287,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
struct ext4_group_info *grinfo;
struct sg {
struct ext4_group_info info;
- ext4_grpblk_t counters[16];
+ ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];
} sg;
group--;
@@ -2576,7 +2585,7 @@ int ext4_mb_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned i, j;
- unsigned offset;
+ unsigned offset, offset_incr;
unsigned max;
int ret;
@@ -2605,11 +2614,13 @@ int ext4_mb_init(struct super_block *sb)
i = 1;
offset = 0;
+ offset_incr = 1 << (sb->s_blocksize_bits - 1);
max = sb->s_blocksize << 2;
do {
sbi->s_mb_offsets[i] = offset;
sbi->s_mb_maxs[i] = max;
- offset += 1 << (sb->s_blocksize_bits - i);
+ offset += offset_incr;
+ offset_incr = offset_incr >> 1;
max = max >> 1;
i++;
} while (i <= sb->s_blocksize_bits + 1);
@@ -2928,7 +2939,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ext4_error(sb, "Allocating blocks %llu-%llu which overlap "
"fs metadata", block, block+len);
/* File system mounted not to panic on error
- * Fix the bitmap and repeat the block allocation
+ * Fix the bitmap and return EFSCORRUPTED
* We leak some of the blocks here.
*/
ext4_lock_group(sb, ac->ac_b_ex.fe_group);
@@ -2937,7 +2948,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
if (!err)
- err = -EAGAIN;
+ err = -EFSCORRUPTED;
goto out_err;
}
@@ -3109,6 +3120,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
if (ar->pright && start + size - 1 >= ar->lright)
size -= start + size - ar->lright;
+ /*
+ * Trim allocation request for filesystems with artificially small
+ * groups.
+ */
+ if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+ size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
+
end = start + size;
/* check we don't cross already preallocated blocks */
@@ -4502,18 +4520,7 @@ repeat:
}
if (likely(ac->ac_status == AC_STATUS_FOUND)) {
*errp = ext4_mb_mark_diskspace_used(ac, handle, reserv_clstrs);
- if (*errp == -EAGAIN) {
- /*
- * drop the reference that we took
- * in ext4_mb_use_best_found
- */
- ext4_mb_release_context(ac);
- ac->ac_b_ex.fe_group = 0;
- ac->ac_b_ex.fe_start = 0;
- ac->ac_b_ex.fe_len = 0;
- ac->ac_status = AC_STATUS_CONTINUE;
- goto repeat;
- } else if (*errp) {
+ if (*errp) {
ext4_discard_allocated_blocks(ac);
goto errout;
} else {
@@ -4815,7 +4822,9 @@ do_more:
#endif
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
+ /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
+ err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
+ GFP_NOFS|__GFP_NOFAIL);
if (err)
goto error_return;
@@ -5217,7 +5226,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
grp = ext4_get_group_info(sb, group);
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- ret = ext4_mb_init_group(sb, group);
+ ret = ext4_mb_init_group(sb, group, GFP_NOFS);
if (ret)
break;
}
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index e032a0423e35..05048fcfd602 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -60,10 +60,10 @@ ext4_double_down_write_data_sem(struct inode *first, struct inode *second)
{
if (first < second) {
down_write(&EXT4_I(first)->i_data_sem);
- down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
+ down_write_nested(&EXT4_I(second)->i_data_sem, I_DATA_SEM_OTHER);
} else {
down_write(&EXT4_I(second)->i_data_sem);
- down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING);
+ down_write_nested(&EXT4_I(first)->i_data_sem, I_DATA_SEM_OTHER);
}
}
@@ -187,7 +187,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
if (PageUptodate(page))
return 0;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
@@ -390,6 +390,7 @@ data_copy:
*err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
if (*err < 0)
break;
+ bh = bh->b_this_page;
}
if (!*err)
*err = block_commit_write(pagep[0], from, from + replaced_size);
@@ -483,6 +484,13 @@ mext_check_arguments(struct inode *orig_inode,
return -EBUSY;
}
+ if (IS_NOQUOTA(orig_inode) || IS_NOQUOTA(donor_inode)) {
+ ext4_debug("ext4 move extent: The argument files should "
+ "not be quota files [ino:orig %lu, donor %lu]\n",
+ orig_inode->i_ino, donor_inode->i_ino);
+ return -EBUSY;
+ }
+
/* Ext4 move extent supports only extent based file */
if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
ext4_debug("ext4 move extent: orig file is not extents "
@@ -590,6 +598,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
return -EOPNOTSUPP;
}
+ if (ext4_encrypted_inode(orig_inode) ||
+ ext4_encrypted_inode(donor_inode)) {
+ ext4_msg(orig_inode->i_sb, KERN_ERR,
+ "Online defrag not supported for encrypted files");
+ return -EOPNOTSUPP;
+ }
+
/* Protect orig and donor inodes against a truncate */
lock_two_nondirectories(orig_inode, donor_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a969ab39f302..1d007e853f5c 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -420,15 +420,14 @@ static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
__u32 csum;
- __le32 save_csum;
int size;
+ __u32 dummy_csum = 0;
+ int offset = offsetof(struct dx_tail, dt_checksum);
size = count_offset + (count * sizeof(struct dx_entry));
- save_csum = t->dt_checksum;
- t->dt_checksum = 0;
csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
- csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
- t->dt_checksum = save_csum;
+ csum = ext4_chksum(sbi, csum, (__u8 *)t, offset);
+ csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
return cpu_to_le32(csum);
}
@@ -1244,9 +1243,9 @@ static inline int ext4_match(struct ext4_filename *fname,
if (unlikely(!name)) {
if (fname->usr_fname->name[0] == '_') {
int ret;
- if (de->name_len < 16)
+ if (de->name_len <= 32)
return 0;
- ret = memcmp(de->name + de->name_len - 16,
+ ret = memcmp(de->name + ((de->name_len - 17) & ~15),
fname->crypto_buf.name + 8, 16);
return (ret == 0) ? 1 : 0;
}
@@ -1558,6 +1557,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
struct ext4_dir_entry_2 *de;
struct buffer_head *bh;
+ if (ext4_encrypted_inode(dir)) {
+ int res = ext4_get_encryption_info(dir);
+
+ /*
+ * This should be a properly defined flag for
+ * dentry->d_flags when we uplift this to the VFS.
+ * d_fsdata is set to (void *) 1 if if the dentry is
+ * created while the directory was encrypted and we
+ * don't have access to the key.
+ */
+ dentry->d_fsdata = NULL;
+ if (ext4_encryption_info(dir))
+ dentry->d_fsdata = (void *) 1;
+ d_set_d_op(dentry, &ext4_encrypted_d_ops);
+ if (res && res != -ENOKEY)
+ return ERR_PTR(res);
+ }
+
if (dentry->d_name.len > EXT4_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -2018,33 +2035,31 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
frame->entries = entries;
frame->at = entries;
frame->bh = bh;
- bh = bh2;
retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
if (retval)
goto out_frames;
- retval = ext4_handle_dirty_dirent_node(handle, dir, bh);
+ retval = ext4_handle_dirty_dirent_node(handle, dir, bh2);
if (retval)
goto out_frames;
- de = do_split(handle,dir, &bh, frame, &fname->hinfo);
+ de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
if (IS_ERR(de)) {
retval = PTR_ERR(de);
goto out_frames;
}
- dx_release(frames);
- retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
- brelse(bh);
- return retval;
+ retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
out_frames:
/*
* Even if the block split failed, we have to properly write
* out all the changes we did so far. Otherwise we can end up
* with corrupted filesystem.
*/
- ext4_mark_inode_dirty(handle, dir);
+ if (retval)
+ ext4_mark_inode_dirty(handle, dir);
dx_release(frames);
+ brelse(bh2);
return retval;
}
@@ -2809,7 +2824,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
* list entries can cause panics at unmount time.
*/
mutex_lock(&sbi->s_orphan_lock);
- list_del(&EXT4_I(inode)->i_orphan);
+ list_del_init(&EXT4_I(inode)->i_orphan);
mutex_unlock(&sbi->s_orphan_lock);
}
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 17fbe3882b8e..6ca56f5f72b5 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -23,6 +23,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/backing-dev.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -485,9 +486,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
nr_to_submit) {
- data_page = ext4_encrypt(inode, page);
+ gfp_t gfp_flags = GFP_NOFS;
+
+ retry_encrypt:
+ data_page = ext4_encrypt(inode, page, gfp_flags);
if (IS_ERR(data_page)) {
ret = PTR_ERR(data_page);
+ if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
+ if (io->io_bio) {
+ ext4_io_submit(io);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ }
+ gfp_flags |= __GFP_NOFAIL;
+ goto retry_encrypt;
+ }
data_page = NULL;
goto out;
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 5dc5e95063de..bc7642f57dc8 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -279,7 +279,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (ext4_encrypted_inode(inode) &&
S_ISREG(inode->i_mode)) {
- ctx = ext4_get_crypto_ctx(inode);
+ ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
goto set_error_page;
}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 34038e3598d5..74516efd874c 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1926,7 +1926,8 @@ retry:
n_desc_blocks = o_desc_blocks +
le16_to_cpu(es->s_reserved_gdt_blocks);
n_group = n_desc_blocks * EXT4_DESC_PER_BLOCK(sb);
- n_blocks_count = n_group * EXT4_BLOCKS_PER_GROUP(sb);
+ n_blocks_count = (ext4_fsblk_t)n_group *
+ EXT4_BLOCKS_PER_GROUP(sb);
n_group--; /* set to last group number */
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index c9ab67da6e5a..68345a9e59b8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -793,6 +793,7 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ int aborted = 0;
int i, err;
ext4_unregister_li_request(sb);
@@ -802,9 +803,10 @@ static void ext4_put_super(struct super_block *sb)
destroy_workqueue(sbi->rsv_conversion_wq);
if (sbi->s_journal) {
+ aborted = is_journal_aborted(sbi->s_journal);
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
- if (err < 0)
+ if ((err < 0) && !aborted)
ext4_abort(sb, "Couldn't clean up the journal");
}
@@ -816,7 +818,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!(sb->s_flags & MS_RDONLY) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
@@ -958,6 +960,7 @@ static void init_once(void *foo)
INIT_LIST_HEAD(&ei->i_orphan);
init_rwsem(&ei->xattr_sem);
init_rwsem(&ei->i_data_sem);
+ init_rwsem(&ei->i_mmap_sem);
inode_init_once(&ei->vfs_inode);
}
@@ -1292,9 +1295,9 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args)
return -1;
}
if (ext4_has_feature_quota(sb)) {
- ext4_msg(sb, KERN_ERR, "Cannot set journaled quota options "
- "when QUOTA feature is enabled");
- return -1;
+ ext4_msg(sb, KERN_INFO, "Journaled quota options "
+ "ignored when QUOTA feature is enabled");
+ return 1;
}
qname = match_strdup(args);
if (!qname) {
@@ -1657,10 +1660,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
return -1;
}
if (ext4_has_feature_quota(sb)) {
- ext4_msg(sb, KERN_ERR,
- "Cannot set journaled quota options "
+ ext4_msg(sb, KERN_INFO,
+ "Quota format mount options ignored "
"when QUOTA feature is enabled");
- return -1;
+ return 1;
}
sbi->s_jquota_fmt = m->mount_opt;
#endif
@@ -1721,11 +1724,11 @@ static int parse_options(char *options, struct super_block *sb,
#ifdef CONFIG_QUOTA
if (ext4_has_feature_quota(sb) &&
(test_opt(sb, USRQUOTA) || test_opt(sb, GRPQUOTA))) {
- ext4_msg(sb, KERN_ERR, "Cannot set quota options when QUOTA "
- "feature is enabled");
- return 0;
- }
- if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+ ext4_msg(sb, KERN_INFO, "Quota feature enabled, usrquota and grpquota "
+ "mount options ignored.");
+ clear_opt(sb, USRQUOTA);
+ clear_opt(sb, GRPQUOTA);
+ } else if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
clear_opt(sb, USRQUOTA);
@@ -2029,23 +2032,25 @@ failed:
static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
struct ext4_group_desc *gdp)
{
- int offset;
+ int offset = offsetof(struct ext4_group_desc, bg_checksum);
__u16 crc = 0;
__le32 le_group = cpu_to_le32(block_group);
struct ext4_sb_info *sbi = EXT4_SB(sb);
if (ext4_has_metadata_csum(sbi->s_sb)) {
/* Use new metadata_csum algorithm */
- __le16 save_csum;
__u32 csum32;
+ __u16 dummy_csum = 0;
- save_csum = gdp->bg_checksum;
- gdp->bg_checksum = 0;
csum32 = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&le_group,
sizeof(le_group));
- csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp,
- sbi->s_desc_size);
- gdp->bg_checksum = save_csum;
+ csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp, offset);
+ csum32 = ext4_chksum(sbi, csum32, (__u8 *)&dummy_csum,
+ sizeof(dummy_csum));
+ offset += sizeof(dummy_csum);
+ if (offset < sbi->s_desc_size)
+ csum32 = ext4_chksum(sbi, csum32, (__u8 *)gdp + offset,
+ sbi->s_desc_size - offset);
crc = csum32 & 0xFFFF;
goto out;
@@ -2055,8 +2060,6 @@ static __le16 ext4_group_desc_csum(struct super_block *sb, __u32 block_group,
if (!ext4_has_feature_gdt_csum(sb))
return 0;
- offset = offsetof(struct ext4_group_desc, bg_checksum);
-
crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
crc = crc16(crc, (__u8 *)gdp, offset);
@@ -2092,6 +2095,7 @@ void ext4_group_desc_csum_set(struct super_block *sb, __u32 block_group,
/* Called at mount-time, super-block is locked */
static int ext4_check_descriptors(struct super_block *sb,
+ ext4_fsblk_t sb_block,
ext4_group_t *first_not_zeroed)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2122,6 +2126,11 @@ static int ext4_check_descriptors(struct super_block *sb,
grp = i;
block_bitmap = ext4_block_bitmap(sb, gdp);
+ if (block_bitmap == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Block bitmap for group %u overlaps "
+ "superblock", i);
+ }
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Block bitmap for group %u not in group "
@@ -2129,6 +2138,11 @@ static int ext4_check_descriptors(struct super_block *sb,
return 0;
}
inode_bitmap = ext4_inode_bitmap(sb, gdp);
+ if (inode_bitmap == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode bitmap for group %u overlaps "
+ "superblock", i);
+ }
if (inode_bitmap < first_block || inode_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
"Inode bitmap for group %u not in group "
@@ -2136,6 +2150,11 @@ static int ext4_check_descriptors(struct super_block *sb,
return 0;
}
inode_table = ext4_inode_table(sb, gdp);
+ if (inode_table == sb_block) {
+ ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
+ "Inode table for group %u overlaps "
+ "superblock", i);
+ }
if (inode_table < first_block ||
inode_table + sbi->s_itb_per_group - 1 > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2239,6 +2258,16 @@ static void ext4_orphan_cleanup(struct super_block *sb,
while (es->s_last_orphan) {
struct inode *inode;
+ /*
+ * We may have encountered an error during cleanup; if
+ * so, skip the rest.
+ */
+ if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
+ jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+ es->s_last_orphan = 0;
+ break;
+ }
+
inode = ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
if (IS_ERR(inode)) {
es->s_last_orphan = 0;
@@ -3010,10 +3039,15 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,
ext4_set_bit(s++, buf);
count++;
}
- for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) {
- ext4_set_bit(EXT4_B2C(sbi, s++), buf);
- count++;
+ j = ext4_bg_num_gdb(sb, grp);
+ if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) {
+ ext4_error(sb, "Invalid number of block group "
+ "descriptor blocks: %d", j);
+ j = EXT4_BLOCKS_PER_GROUP(sb) - s;
}
+ count += j;
+ for (; j > 0; j--)
+ ext4_set_bit(EXT4_B2C(sbi, s++), buf);
}
if (!count)
return 0;
@@ -3103,7 +3137,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
char *orig_data = kstrdup(data, GFP_KERNEL);
struct buffer_head *bh;
struct ext4_super_block *es = NULL;
- struct ext4_sb_info *sbi;
+ struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
ext4_fsblk_t block;
ext4_fsblk_t sb_block = get_sb_block(&data);
ext4_fsblk_t logical_sb_block;
@@ -3122,16 +3156,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
ext4_group_t first_not_zeroed;
- sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
- if (!sbi)
- goto out_free_orig;
+ if ((data && !orig_data) || !sbi)
+ goto out_free_base;
sbi->s_blockgroup_lock =
kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
- if (!sbi->s_blockgroup_lock) {
- kfree(sbi);
- goto out_free_orig;
- }
+ if (!sbi->s_blockgroup_lock)
+ goto out_free_base;
+
sb->s_fs_info = sbi;
sbi->s_sb = sb;
sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS;
@@ -3277,11 +3309,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
*/
sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT;
- if (!parse_options((char *) sbi->s_es->s_mount_opts, sb,
- &journal_devnum, &journal_ioprio, 0)) {
- ext4_msg(sb, KERN_WARNING,
- "failed to parse options in superblock: %s",
- sbi->s_es->s_mount_opts);
+ if (sbi->s_es->s_mount_opts[0]) {
+ char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts,
+ sizeof(sbi->s_es->s_mount_opts),
+ GFP_KERNEL);
+ if (!s_mount_opts)
+ goto failed_mount;
+ if (!parse_options(s_mount_opts, sb, &journal_devnum,
+ &journal_ioprio, 0)) {
+ ext4_msg(sb, KERN_WARNING,
+ "failed to parse options in superblock: %s",
+ s_mount_opts);
+ }
+ kfree(s_mount_opts);
}
sbi->s_def_mount_opt = sbi->s_mount_opt;
if (!parse_options((char *) data, sb, &journal_devnum,
@@ -3307,6 +3347,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"both data=journal and dax");
goto failed_mount;
}
+ if (ext4_has_feature_encrypt(sb)) {
+ ext4_msg(sb, KERN_WARNING,
+ "encrypted files will use data=ordered "
+ "instead of data journaling mode");
+ }
if (test_opt(sb, DELALLOC))
clear_opt(sb, DELALLOC);
} else {
@@ -3367,7 +3412,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
if (blocksize < EXT4_MIN_BLOCK_SIZE ||
blocksize > EXT4_MAX_BLOCK_SIZE) {
ext4_msg(sb, KERN_ERR,
- "Unsupported filesystem blocksize %d", blocksize);
+ "Unsupported filesystem blocksize %d (%d log_block_size)",
+ blocksize, le32_to_cpu(es->s_log_block_size));
+ goto failed_mount;
+ }
+ if (le32_to_cpu(es->s_log_block_size) >
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log block size: %u",
+ le32_to_cpu(es->s_log_block_size));
+ goto failed_mount;
+ }
+
+ if (le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) > (blocksize / 4)) {
+ ext4_msg(sb, KERN_ERR,
+ "Number of reserved GDT blocks insanely large: %d",
+ le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks));
goto failed_mount;
}
@@ -3454,12 +3514,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
- if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0)
- goto cantfind_ext4;
sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
if (sbi->s_inodes_per_block == 0)
goto cantfind_ext4;
+ if (sbi->s_inodes_per_group < sbi->s_inodes_per_block ||
+ sbi->s_inodes_per_group > blocksize * 8) {
+ ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n",
+ sbi->s_blocks_per_group);
+ goto failed_mount;
+ }
sbi->s_itb_per_group = sbi->s_inodes_per_group /
sbi->s_inodes_per_block;
sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
@@ -3499,6 +3563,13 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
"block size (%d)", clustersize, blocksize);
goto failed_mount;
}
+ if (le32_to_cpu(es->s_log_cluster_size) >
+ (EXT4_MAX_CLUSTER_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE)) {
+ ext4_msg(sb, KERN_ERR,
+ "Invalid log cluster size: %u",
+ le32_to_cpu(es->s_log_cluster_size));
+ goto failed_mount;
+ }
sbi->s_cluster_bits = le32_to_cpu(es->s_log_cluster_size) -
le32_to_cpu(es->s_log_block_size);
sbi->s_clusters_per_group =
@@ -3535,13 +3606,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
sbi->s_cluster_ratio = clustersize / blocksize;
- if (sbi->s_inodes_per_group > blocksize * 8) {
- ext4_msg(sb, KERN_ERR,
- "#inodes per group too big: %lu",
- sbi->s_inodes_per_group);
- goto failed_mount;
- }
-
/* Do we have standard group size of clustersize * 8 blocks ? */
if (sbi->s_blocks_per_group == clustersize << 3)
set_opt2(sb, STD_GROUP_SIZE);
@@ -3601,6 +3665,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
EXT4_DESC_PER_BLOCK(sb);
+ if (ext4_has_feature_meta_bg(sb)) {
+ if (le32_to_cpu(es->s_first_meta_bg) > db_count) {
+ ext4_msg(sb, KERN_WARNING,
+ "first meta block group too large: %u "
+ "(group descriptor block count %u)",
+ le32_to_cpu(es->s_first_meta_bg), db_count);
+ goto failed_mount;
+ }
+ }
sbi->s_group_desc = ext4_kvmalloc(db_count *
sizeof(struct buffer_head *),
GFP_KERNEL);
@@ -3622,7 +3695,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount2;
}
}
- if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
+ if (!ext4_check_descriptors(sb, logical_sb_block, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
ret = -EFSCORRUPTED;
goto failed_mount2;
@@ -3675,7 +3748,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* root first: it may be modified in the journal!
*/
if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
- if (ext4_load_journal(sb, es, journal_devnum))
+ err = ext4_load_journal(sb, es, journal_devnum);
+ if (err)
goto failed_mount3a;
} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
ext4_has_feature_journal_needs_recovery(sb)) {
@@ -3945,7 +4019,9 @@ no_journal:
if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))
ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
- "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts,
+ "Opts: %.*s%s%s", descr,
+ (int) sizeof(sbi->s_es->s_mount_opts),
+ sbi->s_es->s_mount_opts,
*sbi->s_es->s_mount_opts ? "; " : "", orig_data);
if (es->s_error_count)
@@ -4015,8 +4091,8 @@ failed_mount:
out_fail:
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
+out_free_base:
kfree(sbi);
-out_free_orig:
kfree(orig_data);
return err ? err : ret;
}
@@ -4936,6 +5012,20 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
EXT4_SB(sb)->s_jquota_fmt, type);
}
+static void lockdep_set_quota_inode(struct inode *inode, int subclass)
+{
+ struct ext4_inode_info *ei = EXT4_I(inode);
+
+ /* The first argument of lockdep_set_subclass has to be
+ * *exactly* the same as the argument to init_rwsem() --- in
+ * this case, in init_once() --- or lockdep gets unhappy
+ * because the name of the lock is set using the
+ * stringification of the argument to init_rwsem().
+ */
+ (void) ei; /* shut up clang warning if !CONFIG_LOCKDEP */
+ lockdep_set_subclass(&ei->i_data_sem, subclass);
+}
+
/*
* Standard function to be called on quota_on
*/
@@ -4975,8 +5065,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
if (err)
return err;
}
-
- return dquot_quota_on(sb, type, format_id, path);
+ lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
+ err = dquot_quota_on(sb, type, format_id, path);
+ if (err)
+ lockdep_set_quota_inode(path->dentry->d_inode,
+ I_DATA_SEM_NORMAL);
+ return err;
}
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
@@ -5002,8 +5096,11 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
/* Don't account quota for quota files to avoid recursion */
qf_inode->i_flags |= S_NOQUOTA;
+ lockdep_set_quota_inode(qf_inode, I_DATA_SEM_QUOTA);
err = dquot_enable(qf_inode, type, format_id, flags);
iput(qf_inode);
+ if (err)
+ lockdep_set_quota_inode(qf_inode, I_DATA_SEM_NORMAL);
return err;
}
diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c
index 1420a3c614af..c2ee23acf359 100644
--- a/fs/ext4/sysfs.c
+++ b/fs/ext4/sysfs.c
@@ -100,7 +100,7 @@ static ssize_t reserved_clusters_store(struct ext4_attr *a,
int ret;
ret = kstrtoull(skip_spaces(buf), 0, &val);
- if (!ret || val >= clusters)
+ if (ret || val >= clusters)
return -EINVAL;
atomic64_set(&sbi->s_resv_clusters, val);
@@ -223,14 +223,18 @@ static struct attribute *ext4_attrs[] = {
EXT4_ATTR_FEATURE(lazy_itable_init);
EXT4_ATTR_FEATURE(batched_discard);
EXT4_ATTR_FEATURE(meta_bg_resize);
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
EXT4_ATTR_FEATURE(encryption);
+#endif
EXT4_ATTR_FEATURE(metadata_csum_seed);
static struct attribute *ext4_feat_attrs[] = {
ATTR_LIST(lazy_itable_init),
ATTR_LIST(batched_discard),
ATTR_LIST(meta_bg_resize),
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
ATTR_LIST(encryption),
+#endif
ATTR_LIST(metadata_csum_seed),
NULL,
};
diff --git a/fs/ext4/truncate.h b/fs/ext4/truncate.h
index 011ba6670d99..c70d06a383e2 100644
--- a/fs/ext4/truncate.h
+++ b/fs/ext4/truncate.h
@@ -10,8 +10,10 @@
*/
static inline void ext4_truncate_failed_write(struct inode *inode)
{
+ down_write(&EXT4_I(inode)->i_mmap_sem);
truncate_inode_pages(inode->i_mapping, inode->i_size);
ext4_truncate(inode);
+ up_write(&EXT4_I(inode)->i_mmap_sem);
}
/*
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 6b6b3e751f8c..7c23363ecf19 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -123,17 +123,18 @@ static __le32 ext4_xattr_block_csum(struct inode *inode,
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
__u32 csum;
- __le32 save_csum;
__le64 dsk_block_nr = cpu_to_le64(block_nr);
+ __u32 dummy_csum = 0;
+ int offset = offsetof(struct ext4_xattr_header, h_checksum);
- save_csum = hdr->h_checksum;
- hdr->h_checksum = 0;
csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
sizeof(dsk_block_nr));
- csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
- EXT4_BLOCK_SIZE(inode->i_sb));
+ csum = ext4_chksum(sbi, csum, (__u8 *)hdr, offset);
+ csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
+ offset += sizeof(dummy_csum);
+ csum = ext4_chksum(sbi, csum, (__u8 *)hdr + offset,
+ EXT4_BLOCK_SIZE(inode->i_sb) - offset);
- hdr->h_checksum = save_csum;
return cpu_to_le32(csum);
}
@@ -232,6 +233,27 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
return error;
}
+static int
+__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
+ void *end, const char *function, unsigned int line)
+{
+ struct ext4_xattr_entry *entry = IFIRST(header);
+ int error = -EFSCORRUPTED;
+
+ if (((void *) header >= end) ||
+ (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC)))
+ goto errout;
+ error = ext4_xattr_check_names(entry, end, entry);
+errout:
+ if (error)
+ __ext4_error_inode(inode, function, line, 0,
+ "corrupted in-inode xattr");
+ return error;
+}
+
+#define xattr_check_inode(inode, header, end) \
+ __xattr_check_inode((inode), (header), (end), __func__, __LINE__)
+
static inline int
ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
{
@@ -343,7 +365,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(entry, end, entry);
+ error = xattr_check_inode(inode, header, end);
if (error)
goto cleanup;
error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -474,7 +496,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
+ error = xattr_check_inode(inode, header, end);
if (error)
goto cleanup;
error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -990,8 +1012,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
is->s.here = is->s.first;
is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
- error = ext4_xattr_check_names(IFIRST(header), is->s.end,
- IFIRST(header));
+ error = xattr_check_inode(inode, header, is->s.end);
if (error)
return error;
/* Find the named attribute. */
@@ -1264,15 +1285,19 @@ int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
size_t min_offs, free;
int total_ino;
void *base, *start, *end;
- int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
+ int error = 0, tried_min_extra_isize = 0;
int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
+ int isize_diff; /* How much do we need to grow i_extra_isize */
down_write(&EXT4_I(inode)->xattr_sem);
+ /*
+ * Set EXT4_STATE_NO_EXPAND to avoid recursion when marking inode dirty
+ */
+ ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
retry:
- if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
- up_write(&EXT4_I(inode)->xattr_sem);
- return 0;
- }
+ isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
+ if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
+ goto out;
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
@@ -1288,8 +1313,12 @@ retry:
last = entry;
total_ino = sizeof(struct ext4_xattr_ibody_header);
+ error = xattr_check_inode(inode, header, end);
+ if (error)
+ goto cleanup;
+
free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
- if (free >= new_extra_isize) {
+ if (free >= isize_diff) {
entry = IFIRST(header);
ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize
- new_extra_isize, (void *)raw_inode +
@@ -1297,8 +1326,7 @@ retry:
(void *)header, total_ino,
inode->i_sb->s_blocksize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
- error = 0;
- goto cleanup;
+ goto out;
}
/*
@@ -1321,7 +1349,7 @@ retry:
end = bh->b_data + bh->b_size;
min_offs = end - base;
free = ext4_xattr_free_space(first, &min_offs, base, NULL);
- if (free < new_extra_isize) {
+ if (free < isize_diff) {
if (!tried_min_extra_isize && s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
@@ -1335,7 +1363,7 @@ retry:
free = inode->i_sb->s_blocksize;
}
- while (new_extra_isize > 0) {
+ while (isize_diff > 0) {
size_t offs, size, entry_size;
struct ext4_xattr_entry *small_entry = NULL;
struct ext4_xattr_info i = {
@@ -1366,7 +1394,7 @@ retry:
EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
EXT4_XATTR_LEN(last->e_name_len);
if (total_size <= free && total_size < min_total_size) {
- if (total_size < new_extra_isize) {
+ if (total_size < isize_diff) {
small_entry = last;
} else {
entry = last;
@@ -1421,22 +1449,22 @@ retry:
error = ext4_xattr_ibody_set(handle, inode, &i, is);
if (error)
goto cleanup;
+ total_ino -= entry_size;
entry = IFIRST(header);
- if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
- shift_bytes = new_extra_isize;
+ if (entry_size + EXT4_XATTR_SIZE(size) >= isize_diff)
+ shift_bytes = isize_diff;
else
- shift_bytes = entry_size + size;
+ shift_bytes = entry_size + EXT4_XATTR_SIZE(size);
/* Adjust the offsets and shift the remaining entries ahead */
- ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
- shift_bytes, (void *)raw_inode +
- EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
- (void *)header, total_ino - entry_size,
- inode->i_sb->s_blocksize);
+ ext4_xattr_shift_entries(entry, -shift_bytes,
+ (void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+ EXT4_I(inode)->i_extra_isize + shift_bytes,
+ (void *)header, total_ino, inode->i_sb->s_blocksize);
- extra_isize += shift_bytes;
- new_extra_isize -= shift_bytes;
- EXT4_I(inode)->i_extra_isize = extra_isize;
+ isize_diff -= shift_bytes;
+ EXT4_I(inode)->i_extra_isize += shift_bytes;
+ header = IHDR(inode, raw_inode);
i.name = b_entry_name;
i.value = buffer;
@@ -1458,6 +1486,8 @@ retry:
kfree(bs);
}
brelse(bh);
+out:
+ ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
up_write(&EXT4_I(inode)->xattr_sem);
return 0;
@@ -1469,6 +1499,10 @@ cleanup:
kfree(is);
kfree(bs);
brelse(bh);
+ /*
+ * We deliberately leave EXT4_STATE_NO_EXPAND set here since inode
+ * size expansion failed.
+ */
up_write(&EXT4_I(inode)->xattr_sem);
return error;
}
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index c8f25f7241f0..83dcf7bfd7b8 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -213,13 +213,11 @@ static int __f2fs_set_acl(struct inode *inode, int type,
switch (type) {
case ACL_TYPE_ACCESS:
name_index = F2FS_XATTR_INDEX_POSIX_ACL_ACCESS;
- if (acl) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
+ if (acl && !ipage) {
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
set_acl_inode(fi, inode->i_mode);
- if (error == 0)
- acl = NULL;
}
break;
diff --git a/fs/f2fs/crypto_fname.c b/fs/f2fs/crypto_fname.c
index ab377d496a39..38349ed5ea51 100644
--- a/fs/f2fs/crypto_fname.c
+++ b/fs/f2fs/crypto_fname.c
@@ -333,7 +333,7 @@ int f2fs_fname_disk_to_usr(struct inode *inode,
memset(buf + 4, 0, 4);
} else
memset(buf, 0, 8);
- memcpy(buf + 8, iname->name + iname->len - 16, 16);
+ memcpy(buf + 8, iname->name + ((iname->len - 17) & ~15), 16);
oname->name[0] = '_';
ret = digest_encode(buf, 24, oname->name + 1);
oname->len = ret + 1;
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
index 5de2d866a25c..18595d7a0efc 100644
--- a/fs/f2fs/crypto_key.c
+++ b/fs/f2fs/crypto_key.c
@@ -92,7 +92,6 @@ static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
if (!ci)
return;
- key_put(ci->ci_keyring_key);
crypto_free_ablkcipher(ci->ci_ctfm);
kmem_cache_free(f2fs_crypt_info_cachep, ci);
}
@@ -113,7 +112,7 @@ void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci)
f2fs_free_crypt_info(ci);
}
-int _f2fs_get_encryption_info(struct inode *inode)
+int f2fs_get_encryption_info(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
struct f2fs_crypt_info *crypt_info;
@@ -129,18 +128,12 @@ int _f2fs_get_encryption_info(struct inode *inode)
char mode;
int res;
+ if (fi->i_crypt_info)
+ return 0;
+
res = f2fs_crypto_initialize();
if (res)
return res;
-retry:
- crypt_info = ACCESS_ONCE(fi->i_crypt_info);
- if (crypt_info) {
- if (!crypt_info->ci_keyring_key ||
- key_validate(crypt_info->ci_keyring_key) == 0)
- return 0;
- f2fs_free_encryption_info(inode, crypt_info);
- goto retry;
- }
res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
@@ -159,7 +152,6 @@ retry:
crypt_info->ci_data_mode = ctx.contents_encryption_mode;
crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
crypt_info->ci_ctfm = NULL;
- crypt_info->ci_keyring_key = NULL;
memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
sizeof(crypt_info->ci_master_key));
if (S_ISREG(inode->i_mode))
@@ -197,7 +189,6 @@ retry:
keyring_key = NULL;
goto out;
}
- crypt_info->ci_keyring_key = keyring_key;
BUG_ON(keyring_key->type != &key_type_logon);
ukp = user_key_payload(keyring_key);
if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
@@ -230,17 +221,12 @@ retry:
if (res)
goto out;
- memzero_explicit(raw_key, sizeof(raw_key));
- if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) {
- f2fs_free_crypt_info(crypt_info);
- goto retry;
- }
- return 0;
-
+ if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) == NULL)
+ crypt_info = NULL;
out:
if (res == -ENOKEY && !S_ISREG(inode->i_mode))
res = 0;
-
+ key_put(keyring_key);
f2fs_free_crypt_info(crypt_info);
memzero_explicit(raw_key, sizeof(raw_key));
return res;
diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c
index d4a96af513c2..884f3f0fe29d 100644
--- a/fs/f2fs/crypto_policy.c
+++ b/fs/f2fs/crypto_policy.c
@@ -89,6 +89,9 @@ static int f2fs_create_encryption_context_from_policy(
int f2fs_process_policy(const struct f2fs_encryption_policy *policy,
struct inode *inode)
{
+ if (!inode_owner_or_capable(inode))
+ return -EACCES;
+
if (policy->version != 0)
return -EINVAL;
@@ -138,20 +141,38 @@ int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy)
int f2fs_is_child_context_consistent_with_parent(struct inode *parent,
struct inode *child)
{
- struct f2fs_crypt_info *parent_ci, *child_ci;
+ const struct f2fs_crypt_info *parent_ci, *child_ci;
+ struct f2fs_encryption_context parent_ctx, child_ctx;
int res;
- if ((parent == NULL) || (child == NULL)) {
- pr_err("parent %p child %p\n", parent, child);
- BUG_ON(1);
- }
+ /* No restrictions on file types which are never encrypted */
+ if (!S_ISREG(child->i_mode) && !S_ISDIR(child->i_mode) &&
+ !S_ISLNK(child->i_mode))
+ return 1;
- /* no restrictions if the parent directory is not encrypted */
+ /* No restrictions if the parent directory is unencrypted */
if (!f2fs_encrypted_inode(parent))
return 1;
- /* if the child directory is not encrypted, this is always a problem */
+
+ /* Encrypted directories must not contain unencrypted files */
if (!f2fs_encrypted_inode(child))
return 0;
+
+ /*
+ * Both parent and child are encrypted, so verify they use the same
+ * encryption policy. Compare the fscrypt_info structs if the keys are
+ * available, otherwise retrieve and compare the fscrypt_contexts.
+ *
+ * Note that the fscrypt_context retrieval will be required frequently
+ * when accessing an encrypted directory tree without the key.
+ * Performance-wise this is not a big deal because we already don't
+ * really optimize for file access without the key (to the extent that
+ * such access is even possible), given that any attempted access
+ * already causes a fscrypt_context retrieval and keyring search.
+ *
+ * In any case, if an unexpected error occurs, fall back to "forbidden".
+ */
+
res = f2fs_get_encryption_info(parent);
if (res)
return 0;
@@ -160,17 +181,35 @@ int f2fs_is_child_context_consistent_with_parent(struct inode *parent,
return 0;
parent_ci = F2FS_I(parent)->i_crypt_info;
child_ci = F2FS_I(child)->i_crypt_info;
- if (!parent_ci && !child_ci)
- return 1;
- if (!parent_ci || !child_ci)
+ if (parent_ci && child_ci) {
+ return memcmp(parent_ci->ci_master_key, child_ci->ci_master_key,
+ F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
+ (parent_ci->ci_filename_mode ==
+ child_ci->ci_filename_mode) &&
+ (parent_ci->ci_flags == child_ci->ci_flags);
+ }
+
+ res = f2fs_getxattr(parent, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &parent_ctx, sizeof(parent_ctx), NULL);
+ if (res != sizeof(parent_ctx))
+ return 0;
+
+ res = f2fs_getxattr(child, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
+ &child_ctx, sizeof(child_ctx), NULL);
+ if (res != sizeof(child_ctx))
return 0;
- return (memcmp(parent_ci->ci_master_key,
- child_ci->ci_master_key,
- F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
- (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
- (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
- (parent_ci->ci_flags == child_ci->ci_flags));
+ return memcmp(parent_ctx.master_key_descriptor,
+ child_ctx.master_key_descriptor,
+ F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (parent_ctx.contents_encryption_mode ==
+ child_ctx.contents_encryption_mode) &&
+ (parent_ctx.filenames_encryption_mode ==
+ child_ctx.filenames_encryption_mode) &&
+ (parent_ctx.flags == child_ctx.flags);
}
/**
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 478e5d54154f..24d6a51b48d1 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -352,6 +352,7 @@ static int stat_open(struct inode *inode, struct file *file)
}
static const struct file_operations stat_fops = {
+ .owner = THIS_MODULE,
.open = stat_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 7c1678ba8f92..60972a559685 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -124,19 +124,29 @@ struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname,
de = &d->dentry[bit_pos];
- /* encrypted case */
+ if (de->hash_code != namehash)
+ goto not_match;
+
de_name.name = d->filename[bit_pos];
de_name.len = le16_to_cpu(de->name_len);
- /* show encrypted name */
- if (fname->hash) {
- if (de->hash_code == fname->hash)
- goto found;
- } else if (de_name.len == name->len &&
- de->hash_code == namehash &&
- !memcmp(de_name.name, name->name, name->len))
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ if (unlikely(!name->name)) {
+ if (fname->usr_fname->name[0] == '_') {
+ if (de_name.len > 32 &&
+ !memcmp(de_name.name + ((de_name.len - 17) & ~15),
+ fname->crypto_buf.name + 8, 16))
+ goto found;
+ goto not_match;
+ }
+ name->name = fname->crypto_buf.name;
+ name->len = fname->crypto_buf.len;
+ }
+#endif
+ if (de_name.len == name->len &&
+ !memcmp(de_name.name, name->name, name->len))
goto found;
-
+not_match:
if (max_slots && max_len > *max_slots)
*max_slots = max_len;
max_len = 0;
@@ -170,7 +180,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
int max_slots;
f2fs_hash_t namehash;
- namehash = f2fs_dentry_hash(&name);
+ namehash = f2fs_dentry_hash(&name, fname);
f2fs_bug_on(F2FS_I_SB(dir), level > MAX_DIR_HASH_DEPTH);
@@ -547,7 +557,7 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
level = 0;
slots = GET_DENTRY_SLOTS(new_name.len);
- dentry_hash = f2fs_dentry_hash(&new_name);
+ dentry_hash = f2fs_dentry_hash(&new_name, NULL);
current_depth = F2FS_I(dir)->i_current_depth;
if (F2FS_I(dir)->chash == dentry_hash) {
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9db5500d63d9..2871576fbca4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1722,7 +1722,8 @@ void f2fs_msg(struct super_block *, const char *, const char *, ...);
/*
* hash.c
*/
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *);
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
+ struct f2fs_filename *fname);
/*
* node.c
@@ -2149,7 +2150,6 @@ void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *);
/* crypto_key.c */
void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *);
-int _f2fs_get_encryption_info(struct inode *inode);
/* crypto_fname.c */
bool f2fs_valid_filenames_enc_mode(uint32_t);
@@ -2170,18 +2170,7 @@ void f2fs_exit_crypto(void);
int f2fs_has_encryption_key(struct inode *);
-static inline int f2fs_get_encryption_info(struct inode *inode)
-{
- struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-
- if (!ci ||
- (ci->ci_keyring_key &&
- (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED) |
- (1 << KEY_FLAG_DEAD)))))
- return _f2fs_get_encryption_info(inode);
- return 0;
-}
+int f2fs_get_encryption_info(struct inode *inode);
void f2fs_fname_crypto_free_buffer(struct f2fs_str *);
int f2fs_fname_setup_filename(struct inode *, const struct qstr *,
diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h
index c2c1c2b63b25..f113f1a1e8c1 100644
--- a/fs/f2fs/f2fs_crypto.h
+++ b/fs/f2fs/f2fs_crypto.h
@@ -79,7 +79,6 @@ struct f2fs_crypt_info {
char ci_filename_mode;
char ci_flags;
struct crypto_ablkcipher *ci_ctfm;
- struct key *ci_keyring_key;
char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
};
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a197215ad52b..4b449d263333 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1535,12 +1535,19 @@ static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
#ifdef CONFIG_F2FS_FS_ENCRYPTION
struct f2fs_encryption_policy policy;
struct inode *inode = file_inode(filp);
+ int err;
if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg,
sizeof(policy)))
return -EFAULT;
- return f2fs_process_policy(&policy, inode);
+ mutex_lock(&inode->i_mutex);
+
+ err = f2fs_process_policy(&policy, inode);
+
+ mutex_unlock(&inode->i_mutex);
+
+ return err;
#else
return -EOPNOTSUPP;
#endif
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index 71b7206c431e..b238d2fec3e5 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -70,7 +70,8 @@ static void str2hashbuf(const unsigned char *msg, size_t len,
*buf++ = pad;
}
-f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
+f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info,
+ struct f2fs_filename *fname)
{
__u32 hash;
f2fs_hash_t f2fs_hash;
@@ -79,6 +80,10 @@ f2fs_hash_t f2fs_dentry_hash(const struct qstr *name_info)
const unsigned char *name = name_info->name;
size_t len = name_info->len;
+ /* encrypted bigname case */
+ if (fname && !fname->disk_name.name)
+ return cpu_to_le32(fname->hash);
+
if (is_dot_dotdot(name_info))
return 0;
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index bda7126466c0..ad80f916b64d 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -303,7 +303,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
if (IS_ERR(ipage))
return NULL;
- namehash = f2fs_dentry_hash(&name);
+ namehash = f2fs_dentry_hash(&name, fname);
inline_dentry = inline_data_addr(ipage);
@@ -468,7 +468,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
f2fs_wait_on_page_writeback(ipage, NODE);
- name_hash = f2fs_dentry_hash(name);
+ name_hash = f2fs_dentry_hash(name, NULL);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
f2fs_update_dentry(ino, mode, &d, name, name_hash, bit_pos);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3a65e0132352..4f666368aa85 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -918,6 +918,79 @@ static loff_t max_file_size(unsigned bits)
return result;
}
+static inline bool sanity_check_area_boundary(struct super_block *sb,
+ struct f2fs_super_block *raw_super)
+{
+ u32 segment0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
+ u32 cp_blkaddr = le32_to_cpu(raw_super->cp_blkaddr);
+ u32 sit_blkaddr = le32_to_cpu(raw_super->sit_blkaddr);
+ u32 nat_blkaddr = le32_to_cpu(raw_super->nat_blkaddr);
+ u32 ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
+ u32 main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
+ u32 segment_count_ckpt = le32_to_cpu(raw_super->segment_count_ckpt);
+ u32 segment_count_sit = le32_to_cpu(raw_super->segment_count_sit);
+ u32 segment_count_nat = le32_to_cpu(raw_super->segment_count_nat);
+ u32 segment_count_ssa = le32_to_cpu(raw_super->segment_count_ssa);
+ u32 segment_count_main = le32_to_cpu(raw_super->segment_count_main);
+ u32 segment_count = le32_to_cpu(raw_super->segment_count);
+ u32 log_blocks_per_seg = le32_to_cpu(raw_super->log_blocks_per_seg);
+
+ if (segment0_blkaddr != cp_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Mismatch start address, segment0(%u) cp_blkaddr(%u)",
+ segment0_blkaddr, cp_blkaddr);
+ return true;
+ }
+
+ if (cp_blkaddr + (segment_count_ckpt << log_blocks_per_seg) !=
+ sit_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong CP boundary, start(%u) end(%u) blocks(%u)",
+ cp_blkaddr, sit_blkaddr,
+ segment_count_ckpt << log_blocks_per_seg);
+ return true;
+ }
+
+ if (sit_blkaddr + (segment_count_sit << log_blocks_per_seg) !=
+ nat_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong SIT boundary, start(%u) end(%u) blocks(%u)",
+ sit_blkaddr, nat_blkaddr,
+ segment_count_sit << log_blocks_per_seg);
+ return true;
+ }
+
+ if (nat_blkaddr + (segment_count_nat << log_blocks_per_seg) !=
+ ssa_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong NAT boundary, start(%u) end(%u) blocks(%u)",
+ nat_blkaddr, ssa_blkaddr,
+ segment_count_nat << log_blocks_per_seg);
+ return true;
+ }
+
+ if (ssa_blkaddr + (segment_count_ssa << log_blocks_per_seg) !=
+ main_blkaddr) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong SSA boundary, start(%u) end(%u) blocks(%u)",
+ ssa_blkaddr, main_blkaddr,
+ segment_count_ssa << log_blocks_per_seg);
+ return true;
+ }
+
+ if (main_blkaddr + (segment_count_main << log_blocks_per_seg) !=
+ segment0_blkaddr + (segment_count << log_blocks_per_seg)) {
+ f2fs_msg(sb, KERN_INFO,
+ "Wrong MAIN_AREA boundary, start(%u) end(%u) blocks(%u)",
+ main_blkaddr,
+ segment0_blkaddr + (segment_count << log_blocks_per_seg),
+ segment_count_main << log_blocks_per_seg);
+ return true;
+ }
+
+ return false;
+}
+
static int sanity_check_raw_super(struct super_block *sb,
struct f2fs_super_block *raw_super)
{
@@ -947,6 +1020,14 @@ static int sanity_check_raw_super(struct super_block *sb,
return 1;
}
+ /* check log blocks per segment */
+ if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid log blocks per segment (%u)\n",
+ le32_to_cpu(raw_super->log_blocks_per_seg));
+ return 1;
+ }
+
/* Currently, support 512/1024/2048/4096 bytes sector size */
if (le32_to_cpu(raw_super->log_sectorsize) >
F2FS_MAX_LOG_SECTOR_SIZE ||
@@ -965,6 +1046,30 @@ static int sanity_check_raw_super(struct super_block *sb,
le32_to_cpu(raw_super->log_sectorsize));
return 1;
}
+
+ /* check reserved ino info */
+ if (le32_to_cpu(raw_super->node_ino) != 1 ||
+ le32_to_cpu(raw_super->meta_ino) != 2 ||
+ le32_to_cpu(raw_super->root_ino) != 3) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid Fs Meta Ino: node(%u) meta(%u) root(%u)",
+ le32_to_cpu(raw_super->node_ino),
+ le32_to_cpu(raw_super->meta_ino),
+ le32_to_cpu(raw_super->root_ino));
+ return 1;
+ }
+
+ if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) {
+ f2fs_msg(sb, KERN_INFO,
+ "Invalid segment count (%u)",
+ le32_to_cpu(raw_super->segment_count));
+ return 1;
+ }
+
+ /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
+ if (sanity_check_area_boundary(sb, raw_super))
+ return 1;
+
return 0;
}
@@ -973,6 +1078,8 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ unsigned int main_segs, blocks_per_seg;
+ int i;
total = le32_to_cpu(raw_super->segment_count);
fsmeta = le32_to_cpu(raw_super->segment_count_ckpt);
@@ -984,6 +1091,20 @@ static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
if (unlikely(fsmeta >= total))
return 1;
+ main_segs = le32_to_cpu(raw_super->segment_count_main);
+ blocks_per_seg = sbi->blocks_per_seg;
+
+ for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
+ if (le32_to_cpu(ckpt->cur_node_segno[i]) >= main_segs ||
+ le16_to_cpu(ckpt->cur_node_blkoff[i]) >= blocks_per_seg)
+ return 1;
+ }
+ for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
+ if (le32_to_cpu(ckpt->cur_data_segno[i]) >= main_segs ||
+ le16_to_cpu(ckpt->cur_data_blkoff[i]) >= blocks_per_seg)
+ return 1;
+ }
+
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_msg(sbi->sb, KERN_ERR, "A bug case: need to run fsck");
return 1;
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 509411dd3698..cf644d52c0cf 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1269,6 +1269,16 @@ out:
return 0;
}
+static void fat_dummy_inode_init(struct inode *inode)
+{
+ /* Initialize this dummy inode to work as no-op. */
+ MSDOS_I(inode)->mmu_private = 0;
+ MSDOS_I(inode)->i_start = 0;
+ MSDOS_I(inode)->i_logstart = 0;
+ MSDOS_I(inode)->i_attrs = 0;
+ MSDOS_I(inode)->i_pos = 0;
+}
+
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1713,12 +1723,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
- MSDOS_I(fat_inode)->i_pos = 0;
+ fat_dummy_inode_init(fat_inode);
sbi->fat_inode = fat_inode;
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
+ fat_dummy_inode_init(fsinfo_inode);
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
diff --git a/fs/fcntl.c b/fs/fcntl.c
index ee85cd4e136a..62376451bbce 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -740,16 +740,10 @@ static int __init fcntl_init(void)
* Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
* is defined as O_NONBLOCK on some platforms and not on others.
*/
- BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
- O_RDONLY | O_WRONLY | O_RDWR |
- O_CREAT | O_EXCL | O_NOCTTY |
- O_TRUNC | O_APPEND | /* O_NONBLOCK | */
- __O_SYNC | O_DSYNC | FASYNC |
- O_DIRECT | O_LARGEFILE | O_DIRECTORY |
- O_NOFOLLOW | O_NOATIME | O_CLOEXEC |
- __FMODE_EXEC | O_PATH | __O_TMPFILE |
- __FMODE_NONOTIFY
- ));
+ BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
+ HWEIGHT32(
+ (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
+ __FMODE_EXEC | __FMODE_NONOTIFY));
fasync_cache = kmem_cache_create("fasync_cache",
sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
diff --git a/fs/fhandle.c b/fs/fhandle.c
index d59712dfa3e7..ca3c3dd01789 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -228,7 +228,7 @@ long do_handle_open(int mountdirfd,
path_put(&path);
return fd;
}
- file = file_open_root(path.dentry, path.mnt, "", open_flag);
+ file = file_open_root(path.dentry, path.mnt, "", open_flag, 0);
if (IS_ERR(file)) {
put_unused_fd(fd);
retval = PTR_ERR(file);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 7a8ea1351584..60d6fc2e0e4b 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -281,13 +281,15 @@ locked_inode_to_wb_and_lock_list(struct inode *inode)
wb_get(wb);
spin_unlock(&inode->i_lock);
spin_lock(&wb->list_lock);
- wb_put(wb); /* not gonna deref it anymore */
/* i_wb may have changed inbetween, can't use inode_to_wb() */
- if (likely(wb == inode->i_wb))
- return wb; /* @inode already has ref */
+ if (likely(wb == inode->i_wb)) {
+ wb_put(wb); /* @inode already has ref */
+ return wb;
+ }
spin_unlock(&wb->list_lock);
+ wb_put(wb);
cpu_relax();
spin_lock(&inode->i_lock);
}
@@ -1339,10 +1341,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
* and does more profound writeback list handling in writeback_sb_inodes().
*/
-static int
-writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
- struct writeback_control *wbc)
+static int writeback_single_inode(struct inode *inode,
+ struct writeback_control *wbc)
{
+ struct bdi_writeback *wb;
int ret = 0;
spin_lock(&inode->i_lock);
@@ -1380,7 +1382,8 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
ret = __writeback_single_inode(inode, wbc);
wbc_detach_inode(wbc);
- spin_lock(&wb->list_lock);
+
+ wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
/*
* If inode is clean, remove it from writeback lists. Otherwise don't
@@ -1455,6 +1458,7 @@ static long writeback_sb_inodes(struct super_block *sb,
while (!list_empty(&wb->b_io)) {
struct inode *inode = wb_inode(wb->b_io.prev);
+ struct bdi_writeback *tmp_wb;
if (inode->i_sb != sb) {
if (work->sb) {
@@ -1545,15 +1549,23 @@ static long writeback_sb_inodes(struct super_block *sb,
cond_resched();
}
-
- spin_lock(&wb->list_lock);
+ /*
+ * Requeue @inode if still dirty. Be careful as @inode may
+ * have been switched to another wb in the meantime.
+ */
+ tmp_wb = inode_to_wb_and_lock_list(inode);
spin_lock(&inode->i_lock);
if (!(inode->i_state & I_DIRTY_ALL))
wrote++;
- requeue_inode(inode, wb, &wbc);
+ requeue_inode(inode, tmp_wb, &wbc);
inode_sync_complete(inode);
spin_unlock(&inode->i_lock);
+ if (unlikely(tmp_wb != wb)) {
+ spin_unlock(&tmp_wb->list_lock);
+ spin_lock(&wb->list_lock);
+ }
+
/*
* bail out to wb_writeback() often enough to check
* background threshold and other termination conditions.
@@ -2340,7 +2352,6 @@ EXPORT_SYMBOL(sync_inodes_sb);
*/
int write_inode_now(struct inode *inode, int sync)
{
- struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
struct writeback_control wbc = {
.nr_to_write = LONG_MAX,
.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
@@ -2352,7 +2363,7 @@ int write_inode_now(struct inode *inode, int sync)
wbc.nr_to_write = 0;
might_sleep();
- return writeback_single_inode(inode, wb, &wbc);
+ return writeback_single_inode(inode, &wbc);
}
EXPORT_SYMBOL(write_inode_now);
@@ -2369,7 +2380,7 @@ EXPORT_SYMBOL(write_inode_now);
*/
int sync_inode(struct inode *inode, struct writeback_control *wbc)
{
- return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc);
+ return writeback_single_inode(inode, wbc);
}
EXPORT_SYMBOL(sync_inode);
diff --git a/fs/fscache/cookie.c b/fs/fscache/cookie.c
index 4304072161aa..40d61077bead 100644
--- a/fs/fscache/cookie.c
+++ b/fs/fscache/cookie.c
@@ -542,6 +542,7 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
hlist_for_each_entry(object, &cookie->backing_objects, cookie_link) {
if (invalidate)
set_bit(FSCACHE_OBJECT_RETIRED, &object->flags);
+ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
fscache_raise_event(object, FSCACHE_OBJECT_EV_KILL);
}
} else {
@@ -560,6 +561,10 @@ void __fscache_disable_cookie(struct fscache_cookie *cookie, bool invalidate)
wait_on_atomic_t(&cookie->n_active, fscache_wait_atomic_t,
TASK_UNINTERRUPTIBLE);
+ /* Make sure any pending writes are cancelled. */
+ if (cookie->def->type != FSCACHE_COOKIE_TYPE_INDEX)
+ fscache_invalidate_writes(cookie);
+
/* Reset the cookie state if it wasn't relinquished */
if (!test_bit(FSCACHE_COOKIE_RELINQUISHED, &cookie->flags)) {
atomic_inc(&cookie->n_active);
diff --git a/fs/fscache/netfs.c b/fs/fscache/netfs.c
index 9b28649df3a1..a8aa00be4444 100644
--- a/fs/fscache/netfs.c
+++ b/fs/fscache/netfs.c
@@ -48,6 +48,7 @@ int __fscache_register_netfs(struct fscache_netfs *netfs)
cookie->flags = 1 << FSCACHE_COOKIE_ENABLED;
spin_lock_init(&cookie->lock);
+ spin_lock_init(&cookie->stores_lock);
INIT_HLIST_HEAD(&cookie->backing_objects);
/* check the netfs type is not already present */
diff --git a/fs/fscache/object.c b/fs/fscache/object.c
index 9e792e30f4db..7a182c87f378 100644
--- a/fs/fscache/object.c
+++ b/fs/fscache/object.c
@@ -30,6 +30,7 @@ static const struct fscache_state *fscache_look_up_object(struct fscache_object
static const struct fscache_state *fscache_object_available(struct fscache_object *, int);
static const struct fscache_state *fscache_parent_ready(struct fscache_object *, int);
static const struct fscache_state *fscache_update_object(struct fscache_object *, int);
+static const struct fscache_state *fscache_object_dead(struct fscache_object *, int);
#define __STATE_NAME(n) fscache_osm_##n
#define STATE(n) (&__STATE_NAME(n))
@@ -91,7 +92,7 @@ static WORK_STATE(LOOKUP_FAILURE, "LCFL", fscache_lookup_failure);
static WORK_STATE(KILL_OBJECT, "KILL", fscache_kill_object);
static WORK_STATE(KILL_DEPENDENTS, "KDEP", fscache_kill_dependents);
static WORK_STATE(DROP_OBJECT, "DROP", fscache_drop_object);
-static WORK_STATE(OBJECT_DEAD, "DEAD", (void*)2UL);
+static WORK_STATE(OBJECT_DEAD, "DEAD", fscache_object_dead);
static WAIT_STATE(WAIT_FOR_INIT, "?INI",
TRANSIT_TO(INIT_OBJECT, 1 << FSCACHE_OBJECT_EV_NEW_CHILD));
@@ -229,6 +230,10 @@ execute_work_state:
event = -1;
if (new_state == NO_TRANSIT) {
_debug("{OBJ%x} %s notrans", object->debug_id, state->name);
+ if (unlikely(state == STATE(OBJECT_DEAD))) {
+ _leave(" [dead]");
+ return;
+ }
fscache_enqueue_object(object);
event_mask = object->oob_event_mask;
goto unmask_events;
@@ -239,7 +244,7 @@ execute_work_state:
object->state = state = new_state;
if (state->work) {
- if (unlikely(state->work == ((void *)2UL))) {
+ if (unlikely(state == STATE(OBJECT_DEAD))) {
_leave(" [dead]");
return;
}
@@ -645,6 +650,12 @@ static const struct fscache_state *fscache_kill_object(struct fscache_object *ob
fscache_mark_object_dead(object);
object->oob_event_mask = 0;
+ if (test_bit(FSCACHE_OBJECT_RETIRED, &object->flags)) {
+ /* Reject any new read/write ops and abort any that are pending. */
+ clear_bit(FSCACHE_OBJECT_PENDING_WRITE, &object->flags);
+ fscache_cancel_all_ops(object);
+ }
+
if (list_empty(&object->dependents) &&
object->n_ops == 0 &&
object->n_children == 0)
@@ -1077,3 +1088,20 @@ void fscache_object_mark_killed(struct fscache_object *object,
}
}
EXPORT_SYMBOL(fscache_object_mark_killed);
+
+/*
+ * The object is dead. We can get here if an object gets queued by an event
+ * that would lead to its death (such as EV_KILL) when the dispatcher is
+ * already running (and so can be requeued) but hasn't yet cleared the event
+ * mask.
+ */
+static const struct fscache_state *fscache_object_dead(struct fscache_object *object,
+ int event)
+{
+ if (!test_and_set_bit(FSCACHE_OBJECT_RUN_AFTER_DEAD,
+ &object->flags))
+ return NO_TRANSIT;
+
+ WARN(true, "FS-Cache object redispatched after death");
+ return NO_TRANSIT;
+}
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 8e3ee1936c7e..c5b6b7165489 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -90,7 +90,7 @@ static struct list_head *cuse_conntbl_head(dev_t devt)
static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to)
{
- struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp);
loff_t pos = 0;
return fuse_direct_io(&io, to, &pos, FUSE_DIO_CUSE);
@@ -98,7 +98,7 @@ static ssize_t cuse_read_iter(struct kiocb *kiocb, struct iov_iter *to)
static ssize_t cuse_write_iter(struct kiocb *kiocb, struct iov_iter *from)
{
- struct fuse_io_priv io = { .async = 0, .file = kiocb->ki_filp };
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(kiocb->ki_filp);
loff_t pos = 0;
/*
* No locking or generic_write_checks(), the server is
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ebb5e37455a0..d0cf1f010fbe 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -418,6 +418,10 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
static void queue_interrupt(struct fuse_iqueue *fiq, struct fuse_req *req)
{
spin_lock(&fiq->waitq.lock);
+ if (test_bit(FR_FINISHED, &req->flags)) {
+ spin_unlock(&fiq->waitq.lock);
+ return;
+ }
if (list_empty(&req->intr_entry)) {
list_add_tail(&req->intr_entry, &fiq->interrupts);
wake_up_locked(&fiq->waitq);
@@ -2083,7 +2087,6 @@ static void end_requests(struct fuse_conn *fc, struct list_head *head)
struct fuse_req *req;
req = list_entry(head->next, struct fuse_req, list);
req->out.h.error = -ECONNABORTED;
- clear_bit(FR_PENDING, &req->flags);
clear_bit(FR_SENT, &req->flags);
list_del_init(&req->list);
request_end(fc, req);
@@ -2161,6 +2164,8 @@ void fuse_abort_conn(struct fuse_conn *fc)
spin_lock(&fiq->waitq.lock);
fiq->connected = 0;
list_splice_init(&fiq->pending, &to_end2);
+ list_for_each_entry(req, &to_end2, list)
+ clear_bit(FR_PENDING, &req->flags);
while (forget_pending(fiq))
kfree(dequeue_forget(fiq, 1, NULL));
wake_up_all_locked(&fiq->waitq);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 5e2e08712d3b..4b5f2c4e69c8 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1697,14 +1697,46 @@ error:
static int fuse_setattr(struct dentry *entry, struct iattr *attr)
{
struct inode *inode = d_inode(entry);
+ struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
+ int ret;
if (!fuse_allow_current_process(get_fuse_conn(inode)))
return -EACCES;
- if (attr->ia_valid & ATTR_FILE)
- return fuse_do_setattr(inode, attr, attr->ia_file);
- else
- return fuse_do_setattr(inode, attr, NULL);
+ if (attr->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID)) {
+ int kill;
+
+ attr->ia_valid &= ~(ATTR_KILL_SUID | ATTR_KILL_SGID |
+ ATTR_MODE);
+ /*
+ * ia_mode calculation may have used stale i_mode. Refresh and
+ * recalculate.
+ */
+ ret = fuse_do_getattr(inode, NULL, file);
+ if (ret)
+ return ret;
+
+ attr->ia_mode = inode->i_mode;
+ kill = should_remove_suid(entry);
+ if (kill & ATTR_KILL_SUID) {
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode &= ~S_ISUID;
+ }
+ if (kill & ATTR_KILL_SGID) {
+ attr->ia_valid |= ATTR_MODE;
+ attr->ia_mode &= ~S_ISGID;
+ }
+ }
+ if (!attr->ia_valid)
+ return 0;
+
+ ret = fuse_do_setattr(inode, attr, file);
+ if (!ret) {
+ /* Directory mode changed, may need to revalidate access */
+ if (d_is_dir(entry) && (attr->ia_valid & ATTR_MODE))
+ fuse_invalidate_entry_cache(entry);
+ }
+ return ret;
}
static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry,
@@ -1797,6 +1829,23 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
return ret;
}
+static int fuse_verify_xattr_list(char *list, size_t size)
+{
+ size_t origsize = size;
+
+ while (size) {
+ size_t thislen = strnlen(list, size);
+
+ if (!thislen || thislen == size)
+ return -EIO;
+
+ size -= thislen + 1;
+ list += thislen + 1;
+ }
+
+ return origsize;
+}
+
static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
{
struct inode *inode = d_inode(entry);
@@ -1832,6 +1881,8 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
ret = fuse_simple_request(fc, &args);
if (!ret && !size)
ret = outarg.size;
+ if (ret > 0 && size)
+ ret = fuse_verify_xattr_list(list, ret);
if (ret == -ENOSYS) {
fc->no_listxattr = 1;
ret = -EOPNOTSUPP;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 570ca4053c80..1a063cbfe503 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -46,7 +46,7 @@ struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
{
struct fuse_file *ff;
- ff = kmalloc(sizeof(struct fuse_file), GFP_KERNEL);
+ ff = kzalloc(sizeof(struct fuse_file), GFP_KERNEL);
if (unlikely(!ff))
return NULL;
@@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
} else if (sync) {
+ __set_bit(FR_FORCE, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
fuse_request_send(ff->fc, req);
iput(req->misc.release.inode);
@@ -417,6 +418,15 @@ static int fuse_flush(struct file *file, fl_owner_t id)
fuse_sync_writes(inode);
mutex_unlock(&inode->i_mutex);
+ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
+ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
+ err = -ENOSPC;
+ if (test_bit(AS_EIO, &file->f_mapping->flags) &&
+ test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
+ err = -EIO;
+ if (err)
+ return err;
+
req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
@@ -462,6 +472,21 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
goto out;
fuse_sync_writes(inode);
+
+ /*
+ * Due to implementation of fuse writeback
+ * filemap_write_and_wait_range() does not catch errors.
+ * We have to do this directly after fuse_sync_writes()
+ */
+ if (test_bit(AS_ENOSPC, &file->f_mapping->flags) &&
+ test_and_clear_bit(AS_ENOSPC, &file->f_mapping->flags))
+ err = -ENOSPC;
+ if (test_bit(AS_EIO, &file->f_mapping->flags) &&
+ test_and_clear_bit(AS_EIO, &file->f_mapping->flags))
+ err = -EIO;
+ if (err)
+ goto out;
+
err = sync_inode_metadata(inode, 1);
if (err)
goto out;
@@ -516,18 +541,23 @@ void fuse_read_fill(struct fuse_req *req, struct file *file, loff_t pos,
req->out.args[0].size = count;
}
-static void fuse_release_user_pages(struct fuse_req *req, int write)
+static void fuse_release_user_pages(struct fuse_req *req, bool should_dirty)
{
unsigned i;
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
- if (write)
+ if (should_dirty)
set_page_dirty_lock(page);
put_page(page);
}
}
+static void fuse_io_release(struct kref *kref)
+{
+ kfree(container_of(kref, struct fuse_io_priv, refcnt));
+}
+
static ssize_t fuse_get_res_by_io(struct fuse_io_priv *io)
{
if (io->err)
@@ -585,8 +615,9 @@ static void fuse_aio_complete(struct fuse_io_priv *io, int err, ssize_t pos)
}
io->iocb->ki_complete(io->iocb, res, 0);
- kfree(io);
}
+
+ kref_put(&io->refcnt, fuse_io_release);
}
static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
@@ -613,6 +644,7 @@ static size_t fuse_async_req_send(struct fuse_conn *fc, struct fuse_req *req,
size_t num_bytes, struct fuse_io_priv *io)
{
spin_lock(&io->lock);
+ kref_get(&io->refcnt);
io->size += num_bytes;
io->reqs++;
spin_unlock(&io->lock);
@@ -691,7 +723,7 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
static int fuse_do_readpage(struct file *file, struct page *page)
{
- struct fuse_io_priv io = { .async = 0, .file = file };
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file);
struct inode *inode = page->mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_req *req;
@@ -984,7 +1016,7 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
size_t res;
unsigned offset;
unsigned i;
- struct fuse_io_priv io = { .async = 0, .file = file };
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file);
for (i = 0; i < req->num_pages; i++)
fuse_wait_on_page_writeback(inode, req->pages[i]->index);
@@ -1300,6 +1332,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
loff_t *ppos, int flags)
{
int write = flags & FUSE_DIO_WRITE;
+ bool should_dirty = !write && iter_is_iovec(iter);
int cuse = flags & FUSE_DIO_CUSE;
struct file *file = io->file;
struct inode *inode = file->f_mapping->host;
@@ -1344,7 +1377,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
nres = fuse_send_read(req, io, pos, nbytes, owner);
if (!io->async)
- fuse_release_user_pages(req, !write);
+ fuse_release_user_pages(req, should_dirty);
if (req->out.h.error) {
if (!res)
res = req->out.h.error;
@@ -1398,7 +1431,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io,
static ssize_t fuse_direct_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
- struct fuse_io_priv io = { .async = 0, .file = iocb->ki_filp };
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(iocb->ki_filp);
return __fuse_direct_read(&io, to, &iocb->ki_pos);
}
@@ -1406,7 +1439,7 @@ static ssize_t fuse_direct_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct fuse_io_priv io = { .async = 0, .file = file };
+ struct fuse_io_priv io = FUSE_IO_PRIV_SYNC(file);
ssize_t res;
if (is_bad_inode(inode))
@@ -1965,6 +1998,10 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
{
struct inode *inode = page->mapping->host;
+ /* Haven't copied anything? Skip zeroing, size extending, dirtying. */
+ if (!copied)
+ goto unlock;
+
if (!PageUptodate(page)) {
/* Zero any unwritten bytes at the end of the page */
size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
@@ -1975,6 +2012,8 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
fuse_write_update_size(inode, pos + copied);
set_page_dirty(page);
+
+unlock:
unlock_page(page);
page_cache_release(page);
@@ -2786,6 +2825,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
loff_t i_size;
size_t count = iov_iter_count(iter);
struct fuse_io_priv *io;
+ bool is_sync = is_sync_kiocb(iocb);
pos = offset;
inode = file->f_mapping->host;
@@ -2806,6 +2846,7 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
if (!io)
return -ENOMEM;
spin_lock_init(&io->lock);
+ kref_init(&io->refcnt);
io->reqs = 1;
io->bytes = -1;
io->size = 0;
@@ -2825,12 +2866,18 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
* to wait on real async I/O requests, so we must submit this request
* synchronously.
*/
- if (!is_sync_kiocb(iocb) && (offset + count > i_size) &&
+ if (!is_sync && (offset + count > i_size) &&
iov_iter_rw(iter) == WRITE)
io->async = false;
- if (io->async && is_sync_kiocb(iocb))
+ if (io->async && is_sync) {
+ /*
+ * Additional reference to keep io around after
+ * calling fuse_aio_complete()
+ */
+ kref_get(&io->refcnt);
io->done = &wait;
+ }
if (iov_iter_rw(iter) == WRITE) {
ret = fuse_direct_io(io, iter, &pos, FUSE_DIO_WRITE);
@@ -2843,14 +2890,14 @@ fuse_direct_IO(struct kiocb *iocb, struct iov_iter *iter, loff_t offset)
fuse_aio_complete(io, ret < 0 ? ret : 0, -1);
/* we have a non-extending, async request, so return */
- if (!is_sync_kiocb(iocb))
+ if (!is_sync)
return -EIOCBQUEUED;
wait_for_completion(&wait);
ret = fuse_get_res_by_io(io);
}
- kfree(io);
+ kref_put(&io->refcnt, fuse_io_release);
if (iov_iter_rw(iter) == WRITE) {
if (ret > 0)
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 405113101db8..604cd42dafef 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -22,6 +22,7 @@
#include <linux/rbtree.h>
#include <linux/poll.h>
#include <linux/workqueue.h>
+#include <linux/kref.h>
/** Max number of pages that can be used in a single read request */
#define FUSE_MAX_PAGES_PER_REQ 32
@@ -243,6 +244,7 @@ struct fuse_args {
/** The request IO state (for asynchronous processing) */
struct fuse_io_priv {
+ struct kref refcnt;
int async;
spinlock_t lock;
unsigned reqs;
@@ -256,6 +258,13 @@ struct fuse_io_priv {
struct completion *done;
};
+#define FUSE_IO_PRIV_SYNC(f) \
+{ \
+ .refcnt = { ATOMIC_INIT(1) }, \
+ .async = 0, \
+ .file = f, \
+}
+
/**
* Request flags
*
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 2913db2a5b99..0d5e8e59b390 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -926,7 +926,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
- FUSE_FLOCK_LOCKS | FUSE_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
+ FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT;
req->in.h.opcode = FUSE_INIT;
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 1be3b061c05c..ff0ac96a8e7b 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -79,17 +79,11 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (type == ACL_TYPE_ACCESS) {
umode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
- if (error < 0)
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
-
- if (error == 0)
- acl = NULL;
-
- if (mode != inode->i_mode) {
- inode->i_mode = mode;
+ if (mode != inode->i_mode)
mark_inode_dirty(inode);
- }
}
if (acl) {
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index ad8a5b757cc7..a443c6e54412 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -760,7 +760,7 @@ static int get_first_leaf(struct gfs2_inode *dip, u32 index,
int error;
error = get_leaf_nr(dip, index, &leaf_no);
- if (!error)
+ if (!IS_ERR_VALUE(error))
error = get_leaf(dip, leaf_no, bh_out);
return error;
@@ -976,7 +976,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
index = name->hash >> (32 - dip->i_depth);
error = get_leaf_nr(dip, index, &leaf_no);
- if (error)
+ if (IS_ERR_VALUE(error))
return error;
/* Get the old leaf block */
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 32e74710b1aa..070901e76653 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -80,9 +80,9 @@ static struct rhashtable_params ht_parms = {
static struct rhashtable gl_hash_table;
-void gfs2_glock_free(struct gfs2_glock *gl)
+static void gfs2_glock_dealloc(struct rcu_head *rcu)
{
- struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+ struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
if (gl->gl_ops->go_flags & GLOF_ASPACE) {
kmem_cache_free(gfs2_glock_aspace_cachep, gl);
@@ -90,6 +90,13 @@ void gfs2_glock_free(struct gfs2_glock *gl)
kfree(gl->gl_lksb.sb_lvbptr);
kmem_cache_free(gfs2_glock_cachep, gl);
}
+}
+
+void gfs2_glock_free(struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
+
+ call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
if (atomic_dec_and_test(&sdp->sd_glock_disposal))
wake_up(&sdp->sd_glock_wait);
}
@@ -651,9 +658,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct kmem_cache *cachep;
int ret, tries = 0;
+ rcu_read_lock();
gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (gl && !lockref_get_not_dead(&gl->gl_lockref))
gl = NULL;
+ rcu_read_unlock();
*glp = gl;
if (gl)
@@ -721,15 +730,18 @@ again:
if (ret == -EEXIST) {
ret = 0;
+ rcu_read_lock();
tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
if (++tries < 100) {
+ rcu_read_unlock();
cond_resched();
goto again;
}
tmp = NULL;
ret = -ENOMEM;
}
+ rcu_read_unlock();
} else {
WARN_ON_ONCE(ret);
}
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index de7b4f97ac75..4a9077ec9313 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -207,7 +207,7 @@ struct lm_lockname {
struct gfs2_sbd *ln_sbd;
u64 ln_number;
unsigned int ln_type;
-};
+} __packed __aligned(sizeof(int));
#define lm_name_equal(name1, name2) \
(((name1)->ln_number == (name2)->ln_number) && \
@@ -367,6 +367,7 @@ struct gfs2_glock {
loff_t end;
} gl_vm;
};
+ struct rcu_head gl_rcu;
struct rhash_head gl_node;
};
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
index df0c9af68d05..71b3087b7e32 100644
--- a/fs/hfsplus/posix_acl.c
+++ b/fs/hfsplus/posix_acl.c
@@ -68,8 +68,8 @@ int hfsplus_set_posix_acl(struct inode *inode, struct posix_acl *acl,
case ACL_TYPE_ACCESS:
xattr_name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
- err = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (err < 0)
+ err = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (err)
return err;
}
err = 0;
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 5a7b3229b956..f34d6f5a5aca 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -959,10 +959,11 @@ static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
if (S_ISLNK(root_inode->i_mode)) {
char *name = follow_link(host_root_path);
- if (IS_ERR(name))
+ if (IS_ERR(name)) {
err = PTR_ERR(name);
- else
- err = read_name(root_inode, name);
+ goto out_put;
+ }
+ err = read_name(root_inode, name);
kfree(name);
if (err)
goto out_put;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a561591896bd..3713fd52b44b 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -15,6 +15,7 @@
#include <linux/sched.h>
#include <linux/bitmap.h>
#include <linux/slab.h>
+#include <linux/seq_file.h>
/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
@@ -453,10 +454,6 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
int lowercase, eas, chk, errs, chkdsk, timeshift;
int o;
struct hpfs_sb_info *sbi = hpfs_sb(s);
- char *new_opts = kstrdup(data, GFP_KERNEL);
-
- if (!new_opts)
- return -ENOMEM;
sync_filesystem(s);
@@ -493,17 +490,44 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
- replace_mount_options(s, new_opts);
-
hpfs_unlock(s);
return 0;
out_err:
hpfs_unlock(s);
- kfree(new_opts);
return -EINVAL;
}
+static int hpfs_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct hpfs_sb_info *sbi = hpfs_sb(root->d_sb);
+
+ seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, sbi->sb_uid));
+ seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, sbi->sb_gid));
+ seq_printf(seq, ",umask=%03o", (~sbi->sb_mode & 0777));
+ if (sbi->sb_lowercase)
+ seq_printf(seq, ",case=lower");
+ if (!sbi->sb_chk)
+ seq_printf(seq, ",check=none");
+ if (sbi->sb_chk == 2)
+ seq_printf(seq, ",check=strict");
+ if (!sbi->sb_err)
+ seq_printf(seq, ",errors=continue");
+ if (sbi->sb_err == 2)
+ seq_printf(seq, ",errors=panic");
+ if (!sbi->sb_chkdsk)
+ seq_printf(seq, ",chkdsk=no");
+ if (sbi->sb_chkdsk == 2)
+ seq_printf(seq, ",chkdsk=always");
+ if (!sbi->sb_eas)
+ seq_printf(seq, ",eas=no");
+ if (sbi->sb_eas == 1)
+ seq_printf(seq, ",eas=ro");
+ if (sbi->sb_timeshift)
+ seq_printf(seq, ",timeshift=%d", sbi->sb_timeshift);
+ return 0;
+}
+
/* Super operations */
static const struct super_operations hpfs_sops =
@@ -514,7 +538,7 @@ static const struct super_operations hpfs_sops =
.put_super = hpfs_put_super,
.statfs = hpfs_statfs,
.remount_fs = hpfs_remount_fs,
- .show_options = generic_show_options,
+ .show_options = hpfs_show_options,
};
static int hpfs_fill_super(struct super_block *s, void *options, int silent)
@@ -537,8 +561,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
int o;
- save_mount_options(s, options);
-
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi) {
return -ENOMEM;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 595ebdb41846..a17da8b57fc6 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -191,7 +191,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
- (!vma || addr + len <= vma->vm_start))
+ (!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
diff --git a/fs/inode.c b/fs/inode.c
index 1be5f9003eb3..b0edef500590 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1733,8 +1733,8 @@ static int __remove_privs(struct dentry *dentry, int kill)
*/
int file_remove_privs(struct file *file)
{
- struct dentry *dentry = file->f_path.dentry;
- struct inode *inode = d_inode(dentry);
+ struct dentry *dentry = file_dentry(file);
+ struct inode *inode = file_inode(file);
int kill;
int error = 0;
@@ -1742,7 +1742,7 @@ int file_remove_privs(struct file *file)
if (IS_NOSEC(inode))
return 0;
- kill = file_needs_remove_privs(file);
+ kill = dentry_needs_remove_privs(dentry);
if (kill < 0)
return kill;
if (kill)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d67a16f2a45d..350f67fb5b9c 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -690,6 +690,11 @@ static int isofs_fill_super(struct super_block *s, void *data, int silent)
pri_bh = NULL;
root_found:
+ /* We don't support read-write mounts */
+ if (!(s->s_flags & MS_RDONLY)) {
+ error = -EACCES;
+ goto out_freebh;
+ }
if (joliet_level && (pri == NULL || !opt.rock)) {
/* This is the case of Joliet with the norock mount flag.
@@ -1503,9 +1508,6 @@ struct inode *__isofs_iget(struct super_block *sb,
static struct dentry *isofs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- /* We don't support read-write mounts */
- if (!(flags & MS_RDONLY))
- return ERR_PTR(-EACCES);
return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
}
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 735d7522a3a9..204659a5f6db 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -203,6 +203,8 @@ int get_rock_ridge_filename(struct iso_directory_record *de,
int retnamlen = 0;
int truncate = 0;
int ret = 0;
+ char *p;
+ int len;
if (!ISOFS_SB(inode->i_sb)->s_rock)
return 0;
@@ -267,12 +269,17 @@ repeat:
rr->u.NM.flags);
break;
}
- if ((strlen(retname) + rr->len - 5) >= 254) {
+ len = rr->len - 5;
+ if (retnamlen + len >= 254) {
truncate = 1;
break;
}
- strncat(retname, rr->u.NM.name, rr->len - 5);
- retnamlen += rr->len - 5;
+ p = memchr(rr->u.NM.name, '\0', len);
+ if (unlikely(p))
+ len = p - rr->u.NM.name;
+ memcpy(retname + retnamlen, rr->u.NM.name, len);
+ retnamlen += len;
+ retname[retnamlen] = '\0';
break;
case SIG('R', 'E'):
kfree(rs.buffer);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 36345fefa3ff..2d964ce45606 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -124,7 +124,7 @@ static int journal_submit_commit_record(journal_t *journal,
struct commit_header *tmp;
struct buffer_head *bh;
int ret;
- struct timespec now = current_kernel_time();
+ struct timespec64 now = current_kernel_time64();
*cbh = NULL;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 81e622681c82..624a57a9c4aa 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1408,11 +1408,12 @@ out:
/**
* jbd2_mark_journal_empty() - Mark on disk journal as empty.
* @journal: The journal to update.
+ * @write_op: With which operation should we write the journal sb
*
* Update a journal's dynamic superblock fields to show that journal is empty.
* Write updated superblock to disk waiting for IO to complete.
*/
-static void jbd2_mark_journal_empty(journal_t *journal)
+static void jbd2_mark_journal_empty(journal_t *journal, int write_op)
{
journal_superblock_t *sb = journal->j_superblock;
@@ -1430,7 +1431,7 @@ static void jbd2_mark_journal_empty(journal_t *journal)
sb->s_start = cpu_to_be32(0);
read_unlock(&journal->j_state_lock);
- jbd2_write_superblock(journal, WRITE_FUA);
+ jbd2_write_superblock(journal, write_op);
/* Log is no longer empty */
write_lock(&journal->j_state_lock);
@@ -1716,7 +1717,13 @@ int jbd2_journal_destroy(journal_t *journal)
if (journal->j_sb_buffer) {
if (!is_journal_aborted(journal)) {
mutex_lock(&journal->j_checkpoint_mutex);
- jbd2_mark_journal_empty(journal);
+
+ write_lock(&journal->j_state_lock);
+ journal->j_tail_sequence =
+ ++journal->j_transaction_sequence;
+ write_unlock(&journal->j_state_lock);
+
+ jbd2_mark_journal_empty(journal, WRITE_FLUSH_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
} else
err = -EIO;
@@ -1975,7 +1982,7 @@ int jbd2_journal_flush(journal_t *journal)
* the magic code for a fully-recovered superblock. Any future
* commits of data to the journal will restore the current
* s_start value. */
- jbd2_mark_journal_empty(journal);
+ jbd2_mark_journal_empty(journal, WRITE_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
write_lock(&journal->j_state_lock);
J_ASSERT(!journal->j_running_transaction);
@@ -2021,7 +2028,7 @@ int jbd2_journal_wipe(journal_t *journal, int write)
if (write) {
/* Lock to make assertions happy... */
mutex_lock(&journal->j_checkpoint_mutex);
- jbd2_mark_journal_empty(journal);
+ jbd2_mark_journal_empty(journal, WRITE_FUA);
mutex_unlock(&journal->j_checkpoint_mutex);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index ca181e81c765..a2e724053919 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1156,6 +1156,7 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
+ spin_unlock(&journal->j_list_lock);
} else if (jh->b_transaction == journal->j_committing_transaction) {
/* first access by this transaction */
jh->b_modified = 0;
@@ -1163,8 +1164,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
JBUFFER_TRACE(jh, "set next transaction");
spin_lock(&journal->j_list_lock);
jh->b_next_transaction = transaction;
+ spin_unlock(&journal->j_list_lock);
}
- spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
/*
@@ -1875,7 +1876,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
__blist_del_buffer(list, jh);
jh->b_jlist = BJ_None;
- if (test_clear_buffer_jbddirty(bh))
+ if (transaction && is_journal_aborted(transaction->t_journal))
+ clear_buffer_jbddirty(bh);
+ else if (test_clear_buffer_jbddirty(bh))
mark_buffer_dirty(bh); /* Expose it to the VM */
}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 2f7a3c090489..f9f86f87d32b 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -235,9 +235,10 @@ int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
case ACL_TYPE_ACCESS:
xprefix = JFFS2_XPREFIX_ACL_ACCESS;
if (acl) {
- umode_t mode = inode->i_mode;
- rc = posix_acl_equiv_mode(acl, &mode);
- if (rc < 0)
+ umode_t mode;
+
+ rc = posix_acl_update_mode(inode, &mode, &acl);
+ if (rc)
return rc;
if (inode->i_mode != mode) {
struct iattr attr;
@@ -249,8 +250,6 @@ int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
if (rc < 0)
return rc;
}
- if (rc == 0)
- acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c
index 0c8ca830b113..9fad9f4fe883 100644
--- a/fs/jfs/acl.c
+++ b/fs/jfs/acl.c
@@ -84,13 +84,11 @@ static int __jfs_set_acl(tid_t tid, struct inode *inode, int type,
case ACL_TYPE_ACCESS:
ea_name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
- rc = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (rc < 0)
+ rc = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (rc)
return rc;
inode->i_ctime = CURRENT_TIME;
mark_inode_dirty(inode);
- if (rc == 0)
- acl = NULL;
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 8f9176caf098..c8d58c5ac8ae 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -758,7 +758,7 @@ static ssize_t jfs_quota_read(struct super_block *sb, int type, char *data,
sb->s_blocksize - offset : toread;
tmp_bh.b_state = 0;
- tmp_bh.b_size = 1 << inode->i_blkbits;
+ tmp_bh.b_size = i_blocksize(inode);
err = jfs_get_block(inode, blk, &tmp_bh, 0);
if (err)
return err;
@@ -798,7 +798,7 @@ static ssize_t jfs_quota_write(struct super_block *sb, int type,
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
- tmp_bh.b_size = 1 << inode->i_blkbits;
+ tmp_bh.b_size = i_blocksize(inode);
err = jfs_get_block(inode, blk, &tmp_bh, 1);
if (err)
goto out;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 7247252ee9b1..6e9a912d394c 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -833,21 +833,35 @@ repeat:
mutex_lock(&kernfs_mutex);
list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
+ struct kernfs_node *parent;
struct inode *inode;
- struct dentry *dentry;
+ /*
+ * We want fsnotify_modify() on @kn but as the
+ * modifications aren't originating from userland don't
+ * have the matching @file available. Look up the inodes
+ * and generate the events manually.
+ */
inode = ilookup(info->sb, kn->ino);
if (!inode)
continue;
- dentry = d_find_any_alias(inode);
- if (dentry) {
- fsnotify_parent(NULL, dentry, FS_MODIFY);
- fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
- NULL, 0);
- dput(dentry);
+ parent = kernfs_get_parent(kn);
+ if (parent) {
+ struct inode *p_inode;
+
+ p_inode = ilookup(info->sb, parent->ino);
+ if (p_inode) {
+ fsnotify(p_inode, FS_MODIFY | FS_EVENT_ON_CHILD,
+ inode, FSNOTIFY_EVENT_INODE, kn->name, 0);
+ iput(p_inode);
+ }
+
+ kernfs_put(parent);
}
+ fsnotify(inode, FS_MODIFY, inode, FSNOTIFY_EVENT_INODE,
+ kn->name, 0);
iput(inode);
}
diff --git a/fs/locks.c b/fs/locks.c
index 6333263b7bc8..8eddae23e10b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1602,7 +1602,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
{
struct file_lock *fl, *my_fl = NULL, *lease;
struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = file_inode(filp);
struct file_lock_context *ctx;
bool is_deleg = (*flp)->fl_flags & FL_DELEG;
int error;
diff --git a/fs/mount.h b/fs/mount.h
index 14db05d424f7..37c64bbe840c 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -13,6 +13,8 @@ struct mnt_namespace {
u64 seq; /* Sequence number to prevent loops */
wait_queue_head_t poll;
u64 event;
+ unsigned int mounts; /* # of mounts in the namespace */
+ unsigned int pending_mounts;
};
struct mnt_pcp {
@@ -55,6 +57,7 @@ struct mount {
struct mnt_namespace *mnt_ns; /* containing namespace */
struct mountpoint *mnt_mp; /* where is it mounted */
struct hlist_node mnt_mp_list; /* list mounts with the same mountpoint */
+ struct list_head mnt_umounting; /* list entry for umount propagation */
#ifdef CONFIG_FSNOTIFY
struct hlist_head mnt_fsnotify_marks;
__u32 mnt_fsnotify_mask;
@@ -86,7 +89,6 @@ static inline int is_mounted(struct vfsmount *mnt)
}
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
-extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
extern int __legitimize_mnt(struct vfsmount *, unsigned);
extern bool legitimize_mnt(struct vfsmount *, unsigned);
diff --git a/fs/mpage.c b/fs/mpage.c
index 1480d3a18037..6ade29b19494 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -111,7 +111,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
SetPageUptodate(page);
return;
}
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+ create_empty_buffers(page, i_blocksize(inode), 0);
}
head = page_buffers(page);
page_bh = head;
diff --git a/fs/namei.c b/fs/namei.c
index d8ee4da93650..3f96ae087488 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -887,6 +887,7 @@ static inline int may_follow_link(struct nameidata *nd)
{
const struct inode *inode;
const struct inode *parent;
+ kuid_t puid;
if (!sysctl_protected_symlinks)
return 0;
@@ -902,7 +903,8 @@ static inline int may_follow_link(struct nameidata *nd)
return 0;
/* Allowed if parent directory and link owner match. */
- if (uid_eq(parent->i_uid, inode->i_uid))
+ puid = parent->i_uid;
+ if (uid_valid(puid) && uid_eq(puid, inode->i_uid))
return 0;
if (nd->flags & LOOKUP_RCU)
@@ -2906,22 +2908,10 @@ no_open:
dentry = lookup_real(dir, dentry, nd->flags);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
-
- if (create_error) {
- int open_flag = op->open_flag;
-
- error = create_error;
- if ((open_flag & O_EXCL)) {
- if (!dentry->d_inode)
- goto out;
- } else if (!dentry->d_inode) {
- goto out;
- } else if ((open_flag & O_TRUNC) &&
- d_is_reg(dentry)) {
- goto out;
- }
- /* will fail later, go on to get the right error */
- }
+ }
+ if (create_error && !dentry->d_inode) {
+ error = create_error;
+ goto out;
}
looked_up:
path->dentry = dentry;
@@ -4189,13 +4179,17 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
{
int error;
bool is_dir = d_is_dir(old_dentry);
- const unsigned char *old_name;
struct inode *source = old_dentry->d_inode;
struct inode *target = new_dentry->d_inode;
bool new_is_dir = false;
unsigned max_links = new_dir->i_sb->s_max_links;
+ struct name_snapshot old_name;
- if (source == target)
+ /*
+ * Check source == target.
+ * On overlayfs need to look at underlying inodes.
+ */
+ if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0))
return 0;
error = may_delete(old_dir, old_dentry, is_dir);
@@ -4243,7 +4237,7 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (error)
return error;
- old_name = fsnotify_oldname_init(old_dentry->d_name.name);
+ take_dentry_name_snapshot(&old_name, old_dentry);
dget(new_dentry);
if (!is_dir || (flags & RENAME_EXCHANGE))
lock_two_nondirectories(source, target);
@@ -4304,14 +4298,14 @@ out:
mutex_unlock(&target->i_mutex);
dput(new_dentry);
if (!error) {
- fsnotify_move(old_dir, new_dir, old_name, is_dir,
+ fsnotify_move(old_dir, new_dir, old_name.name, is_dir,
!(flags & RENAME_EXCHANGE) ? target : NULL, old_dentry);
if (flags & RENAME_EXCHANGE) {
fsnotify_move(new_dir, old_dir, old_dentry->d_name.name,
new_is_dir, NULL, new_dentry);
}
}
- fsnotify_oldname_free(old_name);
+ release_dentry_name_snapshot(&old_name);
return error;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 0570729c87fd..ec4078d16eb7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -27,6 +27,9 @@
#include "pnode.h"
#include "internal.h"
+/* Maximum number of mounts in a mount namespace */
+unsigned int sysctl_mount_max __read_mostly = 100000;
+
static unsigned int m_hash_mask __read_mostly;
static unsigned int m_hash_shift __read_mostly;
static unsigned int mp_hash_mask __read_mostly;
@@ -234,6 +237,7 @@ static struct mount *alloc_vfsmnt(const char *name)
INIT_LIST_HEAD(&mnt->mnt_slave_list);
INIT_LIST_HEAD(&mnt->mnt_slave);
INIT_HLIST_NODE(&mnt->mnt_mp_list);
+ INIT_LIST_HEAD(&mnt->mnt_umounting);
#ifdef CONFIG_FSNOTIFY
INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
#endif
@@ -638,28 +642,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
}
/*
- * find the last mount at @dentry on vfsmount @mnt.
- * mount_lock must be held.
- */
-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
-{
- struct mount *p, *res = NULL;
- p = __lookup_mnt(mnt, dentry);
- if (!p)
- goto out;
- if (!(p->mnt.mnt_flags & MNT_UMOUNT))
- res = p;
- hlist_for_each_entry_continue(p, mnt_hash) {
- if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
- break;
- if (!(p->mnt.mnt_flags & MNT_UMOUNT))
- res = p;
- }
-out:
- return res;
-}
-
-/*
* lookup_mnt - Return the first child mount mounted at path
*
* "First" means first mounted chronologically. If you create the
@@ -743,26 +725,50 @@ static struct mountpoint *lookup_mountpoint(struct dentry *dentry)
return NULL;
}
-static struct mountpoint *new_mountpoint(struct dentry *dentry)
+static struct mountpoint *get_mountpoint(struct dentry *dentry)
{
- struct hlist_head *chain = mp_hash(dentry);
- struct mountpoint *mp;
+ struct mountpoint *mp, *new = NULL;
int ret;
- mp = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
- if (!mp)
+ if (d_mountpoint(dentry)) {
+mountpoint:
+ read_seqlock_excl(&mount_lock);
+ mp = lookup_mountpoint(dentry);
+ read_sequnlock_excl(&mount_lock);
+ if (mp)
+ goto done;
+ }
+
+ if (!new)
+ new = kmalloc(sizeof(struct mountpoint), GFP_KERNEL);
+ if (!new)
return ERR_PTR(-ENOMEM);
+
+ /* Exactly one processes may set d_mounted */
ret = d_set_mounted(dentry);
- if (ret) {
- kfree(mp);
- return ERR_PTR(ret);
- }
- mp->m_dentry = dentry;
- mp->m_count = 1;
- hlist_add_head(&mp->m_hash, chain);
- INIT_HLIST_HEAD(&mp->m_list);
+ /* Someone else set d_mounted? */
+ if (ret == -EBUSY)
+ goto mountpoint;
+
+ /* The dentry is not available as a mountpoint? */
+ mp = ERR_PTR(ret);
+ if (ret)
+ goto done;
+
+ /* Add the new mountpoint to the hash table */
+ read_seqlock_excl(&mount_lock);
+ new->m_dentry = dentry;
+ new->m_count = 1;
+ hlist_add_head(&new->m_hash, mp_hash(dentry));
+ INIT_HLIST_HEAD(&new->m_list);
+ read_sequnlock_excl(&mount_lock);
+
+ mp = new;
+ new = NULL;
+done:
+ kfree(new);
return mp;
}
@@ -855,6 +861,13 @@ void mnt_set_mountpoint(struct mount *mnt,
hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
}
+static void __attach_mnt(struct mount *mnt, struct mount *parent)
+{
+ hlist_add_head_rcu(&mnt->mnt_hash,
+ m_hash(&parent->mnt, mnt->mnt_mountpoint));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+}
+
/*
* vfsmount lock must be held for write
*/
@@ -863,28 +876,45 @@ static void attach_mnt(struct mount *mnt,
struct mountpoint *mp)
{
mnt_set_mountpoint(parent, mp, mnt);
- hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ __attach_mnt(mnt, parent);
}
-static void attach_shadowed(struct mount *mnt,
- struct mount *parent,
- struct mount *shadows)
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
{
- if (shadows) {
- hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
- list_add(&mnt->mnt_child, &shadows->mnt_child);
- } else {
- hlist_add_head_rcu(&mnt->mnt_hash,
- m_hash(&parent->mnt, mnt->mnt_mountpoint));
- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
- }
+ struct mountpoint *old_mp = mnt->mnt_mp;
+ struct dentry *old_mountpoint = mnt->mnt_mountpoint;
+ struct mount *old_parent = mnt->mnt_parent;
+
+ list_del_init(&mnt->mnt_child);
+ hlist_del_init(&mnt->mnt_mp_list);
+ hlist_del_init_rcu(&mnt->mnt_hash);
+
+ attach_mnt(mnt, parent, mp);
+
+ put_mountpoint(old_mp);
+
+ /*
+ * Safely avoid even the suggestion this code might sleep or
+ * lock the mount hash by taking advantage of the knowledge that
+ * mnt_change_mountpoint will not release the final reference
+ * to a mountpoint.
+ *
+ * During mounting, the mount passed in as the parent mount will
+ * continue to use the old mountpoint and during unmounting, the
+ * old mountpoint will continue to exist until namespace_unlock,
+ * which happens well after mnt_change_mountpoint.
+ */
+ spin_lock(&old_mountpoint->d_lock);
+ old_mountpoint->d_lockref.count--;
+ spin_unlock(&old_mountpoint->d_lock);
+
+ mnt_add_count(old_parent, -1);
}
/*
* vfsmount lock must be held for write
*/
-static void commit_tree(struct mount *mnt, struct mount *shadows)
+static void commit_tree(struct mount *mnt)
{
struct mount *parent = mnt->mnt_parent;
struct mount *m;
@@ -899,7 +929,10 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
- attach_shadowed(mnt, parent, shadows);
+ n->mounts += n->pending_mounts;
+ n->pending_mounts = 0;
+
+ __attach_mnt(mnt, parent);
touch_mnt_namespace(n);
}
@@ -1419,11 +1452,16 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
propagate_umount(&tmp_list);
while (!list_empty(&tmp_list)) {
+ struct mnt_namespace *ns;
bool disconnect;
p = list_first_entry(&tmp_list, struct mount, mnt_list);
list_del_init(&p->mnt_expire);
list_del_init(&p->mnt_list);
- __touch_mnt_namespace(p->mnt_ns);
+ ns = p->mnt_ns;
+ if (ns) {
+ ns->mounts--;
+ __touch_mnt_namespace(ns);
+ }
p->mnt_ns = NULL;
if (how & UMOUNT_SYNC)
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
@@ -1557,11 +1595,12 @@ void __detach_mounts(struct dentry *dentry)
struct mount *mnt;
namespace_lock();
+ lock_mount_hash();
mp = lookup_mountpoint(dentry);
if (IS_ERR_OR_NULL(mp))
goto out_unlock;
- lock_mount_hash();
+ event++;
while (!hlist_empty(&mp->m_list)) {
mnt = hlist_entry(mp->m_list.first, struct mount, mnt_mp_list);
if (mnt->mnt.mnt_flags & MNT_UMOUNT) {
@@ -1570,9 +1609,9 @@ void __detach_mounts(struct dentry *dentry)
}
else umount_tree(mnt, UMOUNT_CONNECTED);
}
- unlock_mount_hash();
put_mountpoint(mp);
out_unlock:
+ unlock_mount_hash();
namespace_unlock();
}
@@ -1693,7 +1732,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
continue;
for (s = r; s; s = next_mnt(s, r)) {
- struct mount *t = NULL;
if (!(flag & CL_COPY_UNBINDABLE) &&
IS_MNT_UNBINDABLE(s)) {
s = skip_mnt_tree(s);
@@ -1715,14 +1753,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
goto out;
lock_mount_hash();
list_add_tail(&q->mnt_list, &res->mnt_list);
- mnt_set_mountpoint(parent, p->mnt_mp, q);
- if (!list_empty(&parent->mnt_mounts)) {
- t = list_last_entry(&parent->mnt_mounts,
- struct mount, mnt_child);
- if (t->mnt_mp != p->mnt_mp)
- t = NULL;
- }
- attach_shadowed(q, parent, t);
+ attach_mnt(q, parent, p->mnt_mp);
unlock_mount_hash();
}
}
@@ -1831,6 +1862,28 @@ static int invent_group_ids(struct mount *mnt, bool recurse)
return 0;
}
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt)
+{
+ unsigned int max = READ_ONCE(sysctl_mount_max);
+ unsigned int mounts = 0, old, pending, sum;
+ struct mount *p;
+
+ for (p = mnt; p; p = next_mnt(p, mnt))
+ mounts++;
+
+ old = ns->mounts;
+ pending = ns->pending_mounts;
+ sum = old + pending;
+ if ((old > sum) ||
+ (pending > sum) ||
+ (max < sum) ||
+ (mounts > (max - sum)))
+ return -ENOSPC;
+
+ ns->pending_mounts = pending + mounts;
+ return 0;
+}
+
/*
* @source_mnt : mount tree to be attached
* @nd : place the mount tree @source_mnt is attached
@@ -1900,10 +1953,26 @@ static int attach_recursive_mnt(struct mount *source_mnt,
struct path *parent_path)
{
HLIST_HEAD(tree_list);
+ struct mnt_namespace *ns = dest_mnt->mnt_ns;
+ struct mountpoint *smp;
struct mount *child, *p;
struct hlist_node *n;
int err;
+ /* Preallocate a mountpoint in case the new mounts need
+ * to be tucked under other mounts.
+ */
+ smp = get_mountpoint(source_mnt->mnt.mnt_root);
+ if (IS_ERR(smp))
+ return PTR_ERR(smp);
+
+ /* Is there space to add these mounts to the mount namespace? */
+ if (!parent_path) {
+ err = count_mounts(ns, source_mnt);
+ if (err)
+ goto out;
+ }
+
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
@@ -1923,16 +1992,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
- commit_tree(source_mnt, NULL);
+ commit_tree(source_mnt);
}
hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
struct mount *q;
hlist_del_init(&child->mnt_hash);
- q = __lookup_mnt_last(&child->mnt_parent->mnt,
- child->mnt_mountpoint);
- commit_tree(child, q);
+ q = __lookup_mnt(&child->mnt_parent->mnt,
+ child->mnt_mountpoint);
+ if (q)
+ mnt_change_mountpoint(child, smp, q);
+ commit_tree(child);
}
+ put_mountpoint(smp);
unlock_mount_hash();
return 0;
@@ -1940,11 +2012,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
out_cleanup_ids:
while (!hlist_empty(&tree_list)) {
child = hlist_entry(tree_list.first, struct mount, mnt_hash);
+ child->mnt_parent->mnt_ns->pending_mounts = 0;
umount_tree(child, UMOUNT_SYNC);
}
unlock_mount_hash();
cleanup_group_ids(source_mnt, NULL);
out:
+ ns->pending_mounts = 0;
+
+ read_seqlock_excl(&mount_lock);
+ put_mountpoint(smp);
+ read_sequnlock_excl(&mount_lock);
+
return err;
}
@@ -1961,9 +2040,7 @@ retry:
namespace_lock();
mnt = lookup_mnt(path);
if (likely(!mnt)) {
- struct mountpoint *mp = lookup_mountpoint(dentry);
- if (!mp)
- mp = new_mountpoint(dentry);
+ struct mountpoint *mp = get_mountpoint(dentry);
if (IS_ERR(mp)) {
namespace_unlock();
mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1982,7 +2059,11 @@ retry:
static void unlock_mount(struct mountpoint *where)
{
struct dentry *dentry = where->m_dentry;
+
+ read_seqlock_excl(&mount_lock);
put_mountpoint(where);
+ read_sequnlock_excl(&mount_lock);
+
namespace_unlock();
mutex_unlock(&dentry->d_inode->i_mutex);
}
@@ -2401,8 +2482,10 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
}
if (type->fs_flags & FS_USERNS_VISIBLE) {
- if (!fs_fully_visible(type, &mnt_flags))
+ if (!fs_fully_visible(type, &mnt_flags)) {
+ put_filesystem(type);
return -EPERM;
+ }
}
}
@@ -2766,6 +2849,8 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
init_waitqueue_head(&new_ns->poll);
new_ns->event = 0;
new_ns->user_ns = get_user_ns(user_ns);
+ new_ns->mounts = 0;
+ new_ns->pending_mounts = 0;
return new_ns;
}
@@ -2815,6 +2900,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
q = new;
while (p) {
q->mnt_ns = new_ns;
+ new_ns->mounts++;
if (new_fs) {
if (&p->mnt == new_fs->root.mnt) {
new_fs->root.mnt = mntget(&q->mnt);
@@ -2853,6 +2939,7 @@ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m)
struct mount *mnt = real_mount(m);
mnt->mnt_ns = new_ns;
new_ns->root = mnt;
+ new_ns->mounts++;
list_add(&mnt->mnt_list, &new_ns->list);
} else {
mntput(m);
@@ -3052,9 +3139,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
touch_mnt_namespace(current->nsproxy->mnt_ns);
/* A moved mount should not expire automatically */
list_del_init(&new_mnt->mnt_expire);
+ put_mountpoint(root_mp);
unlock_mount_hash();
chroot_fs_refs(&root, &new);
- put_mountpoint(root_mp);
error = 0;
out4:
unlock_mount(old_mp);
@@ -3236,6 +3323,10 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
if (mnt->mnt.mnt_sb->s_iflags & SB_I_NOEXEC)
mnt_flags &= ~(MNT_LOCK_NOSUID | MNT_LOCK_NOEXEC);
+ /* Don't miss readonly hidden in the superblock flags */
+ if (mnt->mnt.mnt_sb->s_flags & MS_RDONLY)
+ mnt_flags |= MNT_LOCK_READONLY;
+
/* Verify the mount flags are equal to or more permissive
* than the proposed new mount.
*/
@@ -3262,7 +3353,7 @@ static bool fs_fully_visible(struct file_system_type *type, int *new_mnt_flags)
list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
struct inode *inode = child->mnt_mountpoint->d_inode;
/* Only worry about locked mounts */
- if (!(mnt_flags & MNT_LOCKED))
+ if (!(child->mnt.mnt_flags & MNT_LOCKED))
continue;
/* Is the directory permanetly empty? */
if (!is_empty_dir_inode(inode))
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f31fd0dd92c6..b1daeafbea92 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -121,6 +121,7 @@ config PNFS_FILE_LAYOUT
config PNFS_BLOCK
tristate
depends on NFS_V4_1 && BLK_DEV_DM
+ depends on 64BIT || LBDAF
default NFS_V4
config PNFS_OBJLAYOUT
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index a7f2e6e33305..48efe62e1302 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -261,7 +261,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
}
ret = -EPROTONOSUPPORT;
- if (minorversion == 0)
+ if (!IS_ENABLED(CONFIG_NFS_V4_1) || minorversion == 0)
ret = nfs4_callback_up_net(serv, net);
else if (xprt->ops->bc_up)
ret = xprt->ops->bc_up(serv, net);
@@ -275,6 +275,7 @@ static int nfs_callback_up_net(int minorversion, struct svc_serv *serv,
err_socks:
svc_rpcb_cleanup(serv, net);
err_bind:
+ nn->cb_users[minorversion]--;
dprintk("NFS: Couldn't create callback socket: err = %d; "
"net = %p\n", ret, net);
return ret;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 646cdac73488..e2e857affbf2 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -912,7 +912,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
if (hdr_arg.minorversion == 0) {
cps.clp = nfs4_find_client_ident(SVC_NET(rqstp), hdr_arg.cb_ident);
if (!cps.clp || !check_gss_callback_principal(cps.clp, rqstp))
- return rpc_drop_reply;
+ goto out_invalidcred;
}
cps.minorversion = hdr_arg.minorversion;
@@ -940,6 +940,10 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
nfs_put_client(cps.clp);
dprintk("%s: done, status = %u\n", __func__, ntohl(status));
return rpc_success;
+
+out_invalidcred:
+ pr_warn_ratelimited("NFS: NFSv4 callback contains invalid cred\n");
+ return rpc_autherr_badcred;
}
/*
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 5166adcfc0fb..7af5eeabc80e 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -41,6 +41,17 @@ void nfs_mark_delegation_referenced(struct nfs_delegation *delegation)
set_bit(NFS_DELEGATION_REFERENCED, &delegation->flags);
}
+static bool
+nfs4_is_valid_delegation(const struct nfs_delegation *delegation,
+ fmode_t flags)
+{
+ if (delegation != NULL && (delegation->type & flags) == flags &&
+ !test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
+ !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
+ return true;
+ return false;
+}
+
static int
nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
{
@@ -50,8 +61,7 @@ nfs4_do_check_delegation(struct inode *inode, fmode_t flags, bool mark)
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (delegation != NULL && (delegation->type & flags) == flags &&
- !test_bit(NFS_DELEGATION_RETURNING, &delegation->flags)) {
+ if (nfs4_is_valid_delegation(delegation, flags)) {
if (mark)
nfs_mark_delegation_referenced(delegation);
ret = 1;
@@ -892,7 +902,7 @@ bool nfs4_copy_delegation_stateid(nfs4_stateid *dst, struct inode *inode,
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
- ret = (delegation != NULL && (delegation->type & flags) == flags);
+ ret = nfs4_is_valid_delegation(delegation, flags);
if (ret) {
nfs4_stateid_copy(dst, &delegation->stateid);
nfs_mark_delegation_referenced(delegation);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ce5a21861074..348e0a05bd18 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -377,7 +377,7 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
again:
timestamp = jiffies;
gencount = nfs_inc_attr_generation_counter();
- error = NFS_PROTO(inode)->readdir(file->f_path.dentry, cred, entry->cookie, pages,
+ error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
NFS_SERVER(inode)->dtsize, desc->plus);
if (error < 0) {
/* We requested READDIRPLUS, but the server doesn't grok it */
@@ -462,7 +462,7 @@ void nfs_force_use_readdirplus(struct inode *dir)
{
if (!list_empty(&NFS_I(dir)->open_files)) {
nfs_advise_use_readdirplus(dir);
- nfs_zap_mapping(dir, dir->i_mapping);
+ invalidate_mapping_pages(dir->i_mapping, 0, -1);
}
}
@@ -560,7 +560,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
count++;
if (desc->plus != 0)
- nfs_prime_dcache(desc->file->f_path.dentry, entry);
+ nfs_prime_dcache(file_dentry(desc->file), entry);
status = nfs_readdir_add_to_array(entry, page);
if (status != 0)
@@ -847,24 +847,13 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc)
goto out;
}
-static bool nfs_dir_mapping_need_revalidate(struct inode *dir)
-{
- struct nfs_inode *nfsi = NFS_I(dir);
-
- if (nfs_attribute_cache_expired(dir))
- return true;
- if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
- return true;
- return false;
-}
-
/* The file offset position represents the dirent entry number. A
last cookie cache takes care of the common case of reading the
whole directory.
*/
static int nfs_readdir(struct file *file, struct dir_context *ctx)
{
- struct dentry *dentry = file->f_path.dentry;
+ struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
nfs_readdir_descriptor_t my_desc,
*desc = &my_desc;
@@ -890,7 +879,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
desc->plus = nfs_use_readdirplus(inode, ctx) ? 1 : 0;
nfs_block_sillyrename(dentry);
- if (ctx->pos == 0 || nfs_dir_mapping_need_revalidate(inode))
+ if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
res = nfs_revalidate_mapping(inode, file->f_mapping);
if (res < 0)
goto out;
@@ -1146,11 +1135,13 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
/* Force a full look up iff the parent directory has changed */
if (!nfs_is_exclusive_create(dir, flags) &&
nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
-
- if (nfs_lookup_verify_inode(inode, flags)) {
+ error = nfs_lookup_verify_inode(inode, flags);
+ if (error) {
if (flags & LOOKUP_RCU)
return -ECHILD;
- goto out_zap_parent;
+ if (error == -ESTALE)
+ goto out_zap_parent;
+ goto out_error;
}
goto out_valid;
}
@@ -1174,8 +1165,10 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
- if (error)
+ if (error == -ESTALE || error == -ENOENT)
goto out_bad;
+ if (error)
+ goto out_error;
if (nfs_compare_fh(NFS_FH(inode), fhandle))
goto out_bad;
if ((error = nfs_refresh_inode(inode, fattr)) != 0)
@@ -1531,9 +1524,9 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
err = PTR_ERR(inode);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
put_nfs_open_context(ctx);
+ d_drop(dentry);
switch (err) {
case -ENOENT:
- d_drop(dentry);
d_add(dentry, NULL);
nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
break;
@@ -2432,6 +2425,20 @@ int nfs_may_open(struct inode *inode, struct rpc_cred *cred, int openflags)
}
EXPORT_SYMBOL_GPL(nfs_may_open);
+static int nfs_execute_ok(struct inode *inode, int mask)
+{
+ struct nfs_server *server = NFS_SERVER(inode);
+ int ret;
+
+ if (mask & MAY_NOT_BLOCK)
+ ret = nfs_revalidate_inode_rcu(server, inode);
+ else
+ ret = nfs_revalidate_inode(server, inode);
+ if (ret == 0 && !execute_ok(inode))
+ ret = -EACCES;
+ return ret;
+}
+
int nfs_permission(struct inode *inode, int mask)
{
struct rpc_cred *cred;
@@ -2449,6 +2456,9 @@ int nfs_permission(struct inode *inode, int mask)
case S_IFLNK:
goto out;
case S_IFREG:
+ if ((mask & MAY_OPEN) &&
+ nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
+ return 0;
break;
case S_IFDIR:
/*
@@ -2481,8 +2491,8 @@ force_lookup:
res = PTR_ERR(cred);
}
out:
- if (!res && (mask & MAY_EXEC) && !execute_ok(inode))
- res = -EACCES;
+ if (!res && (mask & MAY_EXEC))
+ res = nfs_execute_ok(inode, mask);
dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
inode->i_sb->s_id, inode->i_ino, mask, res);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 93e236429c5d..dc875cd0e11d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -407,7 +407,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
*/
if (!PageUptodate(page)) {
unsigned pglen = nfs_page_length(page);
- unsigned end = offset + len;
+ unsigned end = offset + copied;
if (pglen == 0) {
zero_user_segments(page, 0, offset,
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 02ec07973bc4..fd8da630fd22 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -374,8 +374,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
return -EAGAIN;
}
- if (data->verf.committed == NFS_UNSTABLE)
- pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
+ pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
return 0;
}
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index 4946ef40ba87..85ef38f9765f 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -283,7 +283,8 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
s->nfs_client->cl_rpcclient->cl_auth->au_flavor);
out_test_devid:
- if (filelayout_test_devid_unavailable(devid))
+ if (ret->ds_clp == NULL ||
+ filelayout_test_devid_unavailable(devid))
ret = NULL;
out:
return ret;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 2a2e2d8ddee5..54313322ee5b 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1414,8 +1414,7 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
return -EAGAIN;
}
- if (data->verf.committed == NFS_UNSTABLE
- && ff_layout_need_layoutcommit(data->lseg))
+ if (ff_layout_need_layoutcommit(data->lseg))
pnfs_set_layoutcommit(data->inode, data->lseg, data->lwb);
return 0;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index e125e55de86d..2603d7589946 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -30,6 +30,7 @@ void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds)
{
nfs4_print_deviceid(&mirror_ds->id_node.deviceid);
nfs4_pnfs_ds_put(mirror_ds->ds);
+ kfree(mirror_ds->ds_versions);
kfree_rcu(mirror_ds, id_node.rcu);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 3e2071a177fd..668ac19af58f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -927,7 +927,7 @@ int nfs_open(struct inode *inode, struct file *filp)
{
struct nfs_open_context *ctx;
- ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+ ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
nfs_file_set_open_context(filp, ctx);
@@ -1241,9 +1241,9 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
return 0;
/* Has the inode gone and changed behind our back? */
if ((fattr->valid & NFS_ATTR_FATTR_FILEID) && nfsi->fileid != fattr->fileid)
- return -EIO;
+ return -ESTALE;
if ((fattr->valid & NFS_ATTR_FATTR_TYPE) && (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
- return -EIO;
+ return -ESTALE;
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
inode->i_version != fattr->change_attr)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9dea85f7f918..578350fd96e1 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -243,7 +243,6 @@ int nfs_iocounter_wait(struct nfs_io_counter *c);
extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *);
void nfs_pgio_header_free(struct nfs_pgio_header *);
-void nfs_pgio_data_destroy(struct nfs_pgio_header *);
int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr,
struct rpc_cred *cred, const struct nfs_rpc_ops *rpc_ops,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6b1ce9825430..7f1a0fb8c493 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -269,6 +269,7 @@ int nfs42_proc_layoutstats_generic(struct nfs_server *server,
task = rpc_run_task(&task_setup);
if (IS_ERR(task))
return PTR_ERR(task);
+ rpc_put_task(task);
return 0;
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index db9b5fea5b3e..679e003818b1 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -26,7 +26,7 @@ static int
nfs4_file_open(struct inode *inode, struct file *filp)
{
struct nfs_open_context *ctx;
- struct dentry *dentry = filp->f_path.dentry;
+ struct dentry *dentry = file_dentry(filp);
struct dentry *parent = NULL;
struct inode *dir;
unsigned openflags = filp->f_flags;
@@ -57,7 +57,7 @@ nfs4_file_open(struct inode *inode, struct file *filp)
parent = dget_parent(dentry);
dir = d_inode(parent);
- ctx = alloc_nfs_open_context(filp->f_path.dentry, filp->f_mode);
+ ctx = alloc_nfs_open_context(file_dentry(filp), filp->f_mode);
err = PTR_ERR(ctx);
if (IS_ERR(ctx))
goto out;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 98a44157353a..8e425f2c5ddd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2188,8 +2188,6 @@ static int nfs4_opendata_access(struct rpc_cred *cred,
if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
return 0;
- /* even though OPEN succeeded, access is denied. Close the file */
- nfs4_close_state(state, fmode);
return -EACCES;
}
@@ -2422,7 +2420,8 @@ static inline void nfs4_exclusive_attrset(struct nfs4_opendata *opendata,
sattr->ia_valid |= ATTR_MTIME;
/* Except MODE, it seems harmless of setting twice. */
- if ((attrset[1] & FATTR4_WORD1_MODE))
+ if (opendata->o_arg.createmode != NFS4_CREATE_EXCLUSIVE &&
+ attrset[1] & FATTR4_WORD1_MODE)
sattr->ia_valid &= ~ATTR_MODE;
if (attrset[2] & FATTR4_WORD2_SECURITY_LABEL)
@@ -2451,6 +2450,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
ret = PTR_ERR(state);
if (IS_ERR(state))
goto out;
+ ctx->state = state;
if (server->caps & NFS_CAP_POSIX_LOCK)
set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
@@ -2473,7 +2473,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
if (ret != 0)
goto out;
- ctx->state = state;
if (d_inode(dentry) == state->inode) {
nfs_inode_attach_open_context(ctx);
if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
@@ -2854,12 +2853,11 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
call_close |= is_wronly;
else if (is_wronly)
calldata->arg.fmode |= FMODE_WRITE;
+ if (calldata->arg.fmode != (FMODE_READ|FMODE_WRITE))
+ call_close |= is_rdwr;
} else if (is_rdwr)
calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
- if (calldata->arg.fmode == 0)
- call_close |= is_rdwr;
-
if (!nfs4_valid_open_stateid(state))
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -4711,7 +4709,7 @@ out:
*/
static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
- struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+ struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
struct nfs_getaclargs args = {
.fh = NFS_FH(inode),
.acl_pages = pages,
@@ -4725,13 +4723,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
.rpc_argp = &args,
.rpc_resp = &res,
};
- unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
+ unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
int ret = -ENOMEM, i;
- /* As long as we're doing a round trip to the server anyway,
- * let's be prepared for a page of acl data. */
- if (npages == 0)
- npages = 1;
if (npages > ARRAY_SIZE(pages))
return -ERANGE;
@@ -7425,12 +7419,20 @@ static int _nfs4_proc_create_session(struct nfs_client *clp,
status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
trace_nfs4_create_session(clp, status);
+ switch (status) {
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_DELAY:
+ case -ETIMEDOUT:
+ case -EACCES:
+ case -EAGAIN:
+ goto out;
+ };
+
+ clp->cl_seqid++;
if (!status) {
/* Verify the session's negotiated channel_attrs values */
status = nfs4_verify_channel_attrs(&args, &res);
/* Increment the clientid slot sequence id */
- if (clp->cl_seqid == res.seqid)
- clp->cl_seqid++;
if (status)
goto out;
nfs4_update_session(session, &res);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index d854693a15b0..e8d1d6c5000c 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1072,6 +1072,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
case -NFS4ERR_BADXDR:
case -NFS4ERR_RESOURCE:
case -NFS4ERR_NOFILEHANDLE:
+ case -NFS4ERR_MOVED:
/* Non-seqid mutating errors */
return;
};
@@ -1493,6 +1494,9 @@ restart:
__func__, status);
case -ENOENT:
case -ENOMEM:
+ case -EACCES:
+ case -EROFS:
+ case -EIO:
case -ESTALE:
/* Open state on this file cannot be recovered */
nfs4_state_mark_recovery_failed(state, status);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e4441216804..1cb50bb898b0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2487,7 +2487,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + 1;
+ replen = hdr.replen + op_decode_hdr_maxsz;
encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 452a011ba0d8..8ebfdd00044b 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -528,16 +528,6 @@ struct nfs_pgio_header *nfs_pgio_header_alloc(const struct nfs_rw_ops *ops)
}
EXPORT_SYMBOL_GPL(nfs_pgio_header_alloc);
-/*
- * nfs_pgio_header_free - Free a read or write header
- * @hdr: The header to free
- */
-void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
-{
- hdr->rw_ops->rw_free_header(hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
-
/**
* nfs_pgio_data_destroy - make @hdr suitable for reuse
*
@@ -546,14 +536,24 @@ EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
*
* @hdr: A header that has had nfs_generic_pgio called
*/
-void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
+static void nfs_pgio_data_destroy(struct nfs_pgio_header *hdr)
{
if (hdr->args.context)
put_nfs_open_context(hdr->args.context);
if (hdr->page_array.pagevec != hdr->page_array.page_array)
kfree(hdr->page_array.pagevec);
}
-EXPORT_SYMBOL_GPL(nfs_pgio_data_destroy);
+
+/*
+ * nfs_pgio_header_free - Free a read or write header
+ * @hdr: The header to free
+ */
+void nfs_pgio_header_free(struct nfs_pgio_header *hdr)
+{
+ nfs_pgio_data_destroy(hdr);
+ hdr->rw_ops->rw_free_header(hdr);
+}
+EXPORT_SYMBOL_GPL(nfs_pgio_header_free);
/**
* nfs_pgio_rpcsetup - Set up arguments for a pageio call
@@ -671,7 +671,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
u32 midx;
set_bit(NFS_IOHDR_REDO, &hdr->flags);
- nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
/* TODO: Make sure it's right to clean up all mirrors here
* and not just hdr->pgio_mirror_idx */
@@ -689,7 +688,6 @@ static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
static void nfs_pgio_release(void *calldata)
{
struct nfs_pgio_header *hdr = calldata;
- nfs_pgio_data_destroy(hdr);
hdr->completion_ops->completion(hdr);
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index bec0384499f7..7af7bedd7c02 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -365,6 +365,9 @@ pnfs_layout_need_return(struct pnfs_layout_hdr *lo,
static bool
pnfs_prepare_layoutreturn(struct pnfs_layout_hdr *lo)
{
+ /* Serialise LAYOUTGET/LAYOUTRETURN */
+ if (atomic_read(&lo->plh_outstanding) != 0)
+ return false;
if (test_and_set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
return false;
lo->plh_return_iomode = 0;
@@ -1182,13 +1185,11 @@ bool pnfs_wait_on_layoutreturn(struct inode *ino, struct rpc_task *task)
* i_lock */
spin_lock(&ino->i_lock);
lo = nfsi->layout;
- if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+ if (lo && test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) {
+ rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
sleep = true;
+ }
spin_unlock(&ino->i_lock);
-
- if (sleep)
- rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
-
return sleep;
}
@@ -1530,6 +1531,7 @@ pnfs_update_layout(struct inode *ino,
goto out;
lookup_again:
+ nfs4_client_recover_expired_lease(clp);
first = false;
spin_lock(&ino->i_lock);
lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
@@ -1941,7 +1943,6 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_write_mds(desc);
mirror->pg_recoalesce = 1;
}
- nfs_pgio_data_destroy(hdr);
hdr->release(hdr);
}
@@ -2057,7 +2058,6 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_read_mds(desc);
mirror->pg_recoalesce = 1;
}
- nfs_pgio_data_destroy(hdr);
hdr->release(hdr);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 7b9316406930..7a9b6e347249 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1261,6 +1261,9 @@ int nfs_updatepage(struct file *file, struct page *page,
dprintk("NFS: nfs_updatepage(%pD2 %d@%lld)\n",
file, count, (long long)(page_file_offset(page) + offset));
+ if (!count)
+ goto out;
+
if (nfs_can_extend_write(file, page, inode)) {
count = max(count + offset, nfs_page_length(page));
offset = 0;
@@ -1271,7 +1274,7 @@ int nfs_updatepage(struct file *file, struct page *page,
nfs_set_pageerror(page);
else
__set_page_dirty_nobuffers(page);
-
+out:
dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n",
status, (long long)i_size_read(inode));
return status;
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index c29d9421bd5e..0976f8dad4ce 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -50,7 +50,7 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
{
struct nfsd4_layout_seg *seg = &args->lg_seg;
struct super_block *sb = inode->i_sb;
- u32 block_size = (1 << inode->i_blkbits);
+ u32 block_size = i_blocksize(inode);
struct pnfs_block_extent *bex;
struct iomap iomap;
u32 device_generation = 0;
@@ -151,7 +151,7 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
int error;
nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ lcp->lc_up_len, &iomaps, i_blocksize(inode));
if (nr_iomaps < 0)
return nfserrno(nr_iomaps);
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 1580ea6fd64d..d08cd88155c7 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -104,22 +104,21 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
goto out;
inode = d_inode(fh->fh_dentry);
- if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
- error = -EOPNOTSUPP;
- goto out_errno;
- }
error = fh_want_write(fh);
if (error)
goto out_errno;
- error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+ fh_lock(fh);
+
+ error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
if (error)
- goto out_drop_write;
- error = inode->i_op->set_acl(inode, argp->acl_default,
- ACL_TYPE_DEFAULT);
+ goto out_drop_lock;
+ error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
if (error)
- goto out_drop_write;
+ goto out_drop_lock;
+
+ fh_unlock(fh);
fh_drop_write(fh);
@@ -131,7 +130,8 @@ out:
posix_acl_release(argp->acl_access);
posix_acl_release(argp->acl_default);
return nfserr;
-out_drop_write:
+out_drop_lock:
+ fh_unlock(fh);
fh_drop_write(fh);
out_errno:
nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 01df4cd7c753..0c890347cde3 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -95,22 +95,20 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
goto out;
inode = d_inode(fh->fh_dentry);
- if (!IS_POSIXACL(inode) || !inode->i_op->set_acl) {
- error = -EOPNOTSUPP;
- goto out_errno;
- }
error = fh_want_write(fh);
if (error)
goto out_errno;
- error = inode->i_op->set_acl(inode, argp->acl_access, ACL_TYPE_ACCESS);
+ fh_lock(fh);
+
+ error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
if (error)
- goto out_drop_write;
- error = inode->i_op->set_acl(inode, argp->acl_default,
- ACL_TYPE_DEFAULT);
+ goto out_drop_lock;
+ error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
-out_drop_write:
+out_drop_lock:
+ fh_unlock(fh);
fh_drop_write(fh);
out_errno:
nfserr = nfserrno(error);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 00575d776d91..7162ab7bc093 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -358,6 +358,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
{
unsigned int len, v, hdr, dlen;
u32 max_blocksize = svc_max_payload(rqstp);
+ struct kvec *head = rqstp->rq_arg.head;
p = decode_fh(p, &args->fh);
if (!p)
@@ -367,6 +368,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
args->count = ntohl(*p++);
args->stable = ntohl(*p++);
len = args->len = ntohl(*p++);
+ if ((void *)p > head->iov_base + head->iov_len)
+ return 0;
/*
* The count must equal the amount of data passed.
*/
@@ -377,9 +380,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
* Check to make sure that we got the right number of
* bytes.
*/
- hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
- dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
- - hdr;
+ hdr = (void*)p - head->iov_base;
+ dlen = head->iov_len + rqstp->rq_arg.page_len - hdr;
/*
* Round the length of the data which was specified up to
* the next multiple of XDR units and then compare that
@@ -396,7 +398,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
len = args->len = max_blocksize;
}
rqstp->rq_vec[0].iov_base = (void*)p;
- rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+ rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
v = 0;
while (len > rqstp->rq_vec[v].iov_len) {
len -= rqstp->rq_vec[v].iov_len;
@@ -471,6 +473,8 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
/* first copy and check from the first page */
old = (char*)p;
vec = &rqstp->rq_arg.head[0];
+ if ((void *)old > vec->iov_base + vec->iov_len)
+ return 0;
avail = vec->iov_len - (old - (char*)vec->iov_base);
while (len && avail && *old) {
*new++ = *old++;
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 6adabd6049b7..71292a0d6f09 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -770,9 +770,6 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = fhp->fh_dentry;
inode = d_inode(dentry);
- if (!inode->i_op->set_acl || !IS_POSIXACL(inode))
- return nfserr_attrnotsupp;
-
if (S_ISDIR(inode->i_mode))
flags = NFS4_ACL_DIR;
@@ -782,16 +779,19 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (host_error < 0)
goto out_nfserr;
- host_error = inode->i_op->set_acl(inode, pacl, ACL_TYPE_ACCESS);
+ fh_lock(fhp);
+
+ host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
if (host_error < 0)
- goto out_release;
+ goto out_drop_lock;
if (S_ISDIR(inode->i_mode)) {
- host_error = inode->i_op->set_acl(inode, dpacl,
- ACL_TYPE_DEFAULT);
+ host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
}
-out_release:
+out_drop_lock:
+ fh_unlock(fhp);
+
posix_acl_release(pacl);
posix_acl_release(dpacl);
out_nfserr:
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index e7f50c4081d6..15bdc2d48cfe 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -710,22 +710,6 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
}
}
-static struct rpc_clnt *create_backchannel_client(struct rpc_create_args *args)
-{
- struct rpc_xprt *xprt;
-
- if (args->protocol != XPRT_TRANSPORT_BC_TCP)
- return rpc_create(args);
-
- xprt = args->bc_xprt->xpt_bc_xprt;
- if (xprt) {
- xprt_get(xprt);
- return rpc_create_xprt(args, xprt);
- }
-
- return rpc_create(args);
-}
-
static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
int maxtime = max_cb_time(clp->net);
@@ -768,7 +752,7 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
- client = create_backchannel_client(&args);
+ client = rpc_create(&args);
if (IS_ERR(client)) {
dprintk("NFSD: couldn't create callback client: %ld\n",
PTR_ERR(client));
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index c9d6c715c0fb..9eed219f57a5 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -189,10 +189,11 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
struct nfs4_layout_stateid *ls;
struct nfs4_stid *stp;
- stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache);
+ stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache,
+ nfsd4_free_layout_stateid);
if (!stp)
return NULL;
- stp->sc_free = nfsd4_free_layout_stateid;
+
get_nfs4_file(fp);
stp->sc_file = fp;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a9f096c7e99f..209dbfc50cd4 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -877,6 +877,7 @@ nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&exp, &dentry);
if (err)
return err;
+ fh_unlock(&cstate->current_fh);
if (d_really_is_negative(dentry)) {
exp_put(exp);
err = nfserr_noent;
@@ -1689,6 +1690,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
opdesc->op_get_currentstateid(cstate, &op->u);
op->status = opdesc->op_func(rqstp, cstate, &op->u);
+ /* Only from SEQUENCE */
+ if (cstate->status == nfserr_replay_cache) {
+ dprintk("%s NFS4.1 replay from cache\n", __func__);
+ status = op->status;
+ goto out;
+ }
if (!op->status) {
if (opdesc->op_set_currentstateid)
opdesc->op_set_currentstateid(cstate, &op->u);
@@ -1699,14 +1706,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
if (need_wrongsec_check(rqstp))
op->status = check_nfsd_access(current_fh->fh_export, rqstp);
}
-
encode_op:
- /* Only from SEQUENCE */
- if (cstate->status == nfserr_replay_cache) {
- dprintk("%s NFS4.1 replay from cache\n", __func__);
- status = op->status;
- goto out;
- }
if (op->status == nfserr_replay_me) {
op->replay = &cstate->replay_owner->so_replay;
nfsd4_encode_replay(&resp->xdr, op);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b800b5b8fed..c7f1ce41442a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -553,8 +553,8 @@ out:
return co;
}
-struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
- struct kmem_cache *slab)
+struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
+ void (*sc_free)(struct nfs4_stid *))
{
struct nfs4_stid *stid;
int new_id;
@@ -570,6 +570,8 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
idr_preload_end();
if (new_id < 0)
goto out_free;
+
+ stid->sc_free = sc_free;
stid->sc_client = cl;
stid->sc_stateid.si_opaque.so_id = new_id;
stid->sc_stateid.si_opaque.so_clid = cl->cl_clientid;
@@ -595,15 +597,12 @@ out_free:
static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
{
struct nfs4_stid *stid;
- struct nfs4_ol_stateid *stp;
- stid = nfs4_alloc_stid(clp, stateid_slab);
+ stid = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_ol_stateid);
if (!stid)
return NULL;
- stp = openlockstateid(stid);
- stp->st_stid.sc_free = nfs4_free_ol_stateid;
- return stp;
+ return openlockstateid(stid);
}
static void nfs4_free_deleg(struct nfs4_stid *stid)
@@ -701,11 +700,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh,
goto out_dec;
if (delegation_blocked(&current_fh->fh_handle))
goto out_dec;
- dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
+ dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab, nfs4_free_deleg));
if (dp == NULL)
goto out_dec;
- dp->dl_stid.sc_free = nfs4_free_deleg;
/*
* delegation seqid's are never incremented. The 4.1 special
* meaning of seqid 0 isn't meaningful, really, but let's avoid
@@ -1200,27 +1198,6 @@ free_ol_stateid_reaplist(struct list_head *reaplist)
}
}
-static void release_lockowner(struct nfs4_lockowner *lo)
-{
- struct nfs4_client *clp = lo->lo_owner.so_client;
- struct nfs4_ol_stateid *stp;
- struct list_head reaplist;
-
- INIT_LIST_HEAD(&reaplist);
-
- spin_lock(&clp->cl_lock);
- unhash_lockowner_locked(lo);
- while (!list_empty(&lo->lo_owner.so_stateids)) {
- stp = list_first_entry(&lo->lo_owner.so_stateids,
- struct nfs4_ol_stateid, st_perstateowner);
- WARN_ON(!unhash_lock_stateid(stp));
- put_ol_stateid_locked(stp, &reaplist);
- }
- spin_unlock(&clp->cl_lock);
- free_ol_stateid_reaplist(&reaplist);
- nfs4_put_stateowner(&lo->lo_owner);
-}
-
static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
struct list_head *reaplist)
{
@@ -3452,6 +3429,10 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
struct nfs4_openowner *oo = open->op_openowner;
struct nfs4_ol_stateid *retstp = NULL;
+ /* We are moving these outside of the spinlocks to avoid the warnings */
+ mutex_init(&stp->st_mutex);
+ mutex_lock(&stp->st_mutex);
+
spin_lock(&oo->oo_owner.so_client->cl_lock);
spin_lock(&fp->fi_lock);
@@ -3467,13 +3448,17 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
stp->st_access_bmap = 0;
stp->st_deny_bmap = 0;
stp->st_openstp = NULL;
- init_rwsem(&stp->st_rwsem);
list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
list_add(&stp->st_perfile, &fp->fi_stateids);
out_unlock:
spin_unlock(&fp->fi_lock);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
+ if (retstp) {
+ mutex_lock(&retstp->st_mutex);
+ /* Not that we need to, just for neatness */
+ mutex_unlock(&stp->st_mutex);
+ }
return retstp;
}
@@ -4300,32 +4285,34 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
*/
if (stp) {
/* Stateid was found, this is an OPEN upgrade */
- down_read(&stp->st_rwsem);
+ mutex_lock(&stp->st_mutex);
status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
} else {
stp = open->op_stp;
open->op_stp = NULL;
+ /*
+ * init_open_stateid() either returns a locked stateid
+ * it found, or initializes and locks the new one we passed in
+ */
swapstp = init_open_stateid(stp, fp, open);
if (swapstp) {
nfs4_put_stid(&stp->st_stid);
stp = swapstp;
- down_read(&stp->st_rwsem);
status = nfs4_upgrade_open(rqstp, fp, current_fh,
stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto out;
}
goto upgrade_out;
}
- down_read(&stp->st_rwsem);
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
if (status) {
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
release_open_stateid(stp);
goto out;
}
@@ -4337,7 +4324,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
}
upgrade_out:
nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
- up_read(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
if (nfsd4_has_session(&resp->cstate)) {
if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
@@ -4872,6 +4859,32 @@ nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfs_ok;
}
+static __be32
+nfsd4_free_lock_stateid(stateid_t *stateid, struct nfs4_stid *s)
+{
+ struct nfs4_ol_stateid *stp = openlockstateid(s);
+ __be32 ret;
+
+ mutex_lock(&stp->st_mutex);
+
+ ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
+ if (ret)
+ goto out;
+
+ ret = nfserr_locks_held;
+ if (check_for_locks(stp->st_stid.sc_file,
+ lockowner(stp->st_stateowner)))
+ goto out;
+
+ release_lock_stateid(stp);
+ ret = nfs_ok;
+
+out:
+ mutex_unlock(&stp->st_mutex);
+ nfs4_put_stid(s);
+ return ret;
+}
+
__be32
nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_free_stateid *free_stateid)
@@ -4879,7 +4892,6 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
stateid_t *stateid = &free_stateid->fr_stateid;
struct nfs4_stid *s;
struct nfs4_delegation *dp;
- struct nfs4_ol_stateid *stp;
struct nfs4_client *cl = cstate->session->se_client;
__be32 ret = nfserr_bad_stateid;
@@ -4898,18 +4910,9 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
ret = nfserr_locks_held;
break;
case NFS4_LOCK_STID:
- ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
- if (ret)
- break;
- stp = openlockstateid(s);
- ret = nfserr_locks_held;
- if (check_for_locks(stp->st_stid.sc_file,
- lockowner(stp->st_stateowner)))
- break;
- WARN_ON(!unhash_lock_stateid(stp));
+ atomic_inc(&s->sc_count);
spin_unlock(&cl->cl_lock);
- nfs4_put_stid(s);
- ret = nfs_ok;
+ ret = nfsd4_free_lock_stateid(stateid, s);
goto out;
case NFS4_REVOKED_DELEG_STID:
dp = delegstateid(s);
@@ -4950,12 +4953,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* revoked delegations are kept only for free_stateid.
*/
return nfserr_bad_stateid;
- down_write(&stp->st_rwsem);
+ mutex_lock(&stp->st_mutex);
status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
if (status == nfs_ok)
status = nfs4_check_fh(current_fh, &stp->st_stid);
if (status != nfs_ok)
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
return status;
}
@@ -5003,7 +5006,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
return status;
oo = openowner(stp->st_stateowner);
if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
return nfserr_bad_stateid;
}
@@ -5035,12 +5038,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
oo = openowner(stp->st_stateowner);
status = nfserr_bad_stateid;
if (oo->oo_flags & NFS4_OO_CONFIRMED) {
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
goto put_stateid;
}
oo->oo_flags |= NFS4_OO_CONFIRMED;
nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
__func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
@@ -5116,7 +5119,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
status = nfs_ok;
put_stateid:
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5169,7 +5172,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfsd4_close_open_stateid(stp);
@@ -5391,11 +5394,10 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
get_nfs4_file(fp);
stp->st_stid.sc_file = fp;
- stp->st_stid.sc_free = nfs4_free_lock_stateid;
stp->st_access_bmap = 0;
stp->st_deny_bmap = open_stp->st_deny_bmap;
stp->st_openstp = open_stp;
- init_rwsem(&stp->st_rwsem);
+ mutex_init(&stp->st_mutex);
list_add(&stp->st_locks, &open_stp->st_locks);
list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
spin_lock(&fp->fi_lock);
@@ -5434,7 +5436,7 @@ find_or_create_lock_stateid(struct nfs4_lockowner *lo, struct nfs4_file *fi,
lst = find_lock_stateid(lo, fi);
if (lst == NULL) {
spin_unlock(&clp->cl_lock);
- ns = nfs4_alloc_stid(clp, stateid_slab);
+ ns = nfs4_alloc_stid(clp, stateid_slab, nfs4_free_lock_stateid);
if (ns == NULL)
return NULL;
@@ -5476,7 +5478,7 @@ static __be32
lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
struct nfs4_ol_stateid *ost,
struct nfsd4_lock *lock,
- struct nfs4_ol_stateid **lst, bool *new)
+ struct nfs4_ol_stateid **plst, bool *new)
{
__be32 status;
struct nfs4_file *fi = ost->st_stid.sc_file;
@@ -5484,7 +5486,9 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
struct nfs4_client *cl = oo->oo_owner.so_client;
struct inode *inode = d_inode(cstate->current_fh.fh_dentry);
struct nfs4_lockowner *lo;
+ struct nfs4_ol_stateid *lst;
unsigned int strhashval;
+ bool hashed;
lo = find_lockowner_str(cl, &lock->lk_new_owner);
if (!lo) {
@@ -5500,12 +5504,27 @@ lookup_or_create_lock_state(struct nfsd4_compound_state *cstate,
goto out;
}
- *lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
- if (*lst == NULL) {
+retry:
+ lst = find_or_create_lock_stateid(lo, fi, inode, ost, new);
+ if (lst == NULL) {
status = nfserr_jukebox;
goto out;
}
+
+ mutex_lock(&lst->st_mutex);
+
+ /* See if it's still hashed to avoid race with FREE_STATEID */
+ spin_lock(&cl->cl_lock);
+ hashed = !list_empty(&lst->st_perfile);
+ spin_unlock(&cl->cl_lock);
+
+ if (!hashed) {
+ mutex_unlock(&lst->st_mutex);
+ nfs4_put_stid(&lst->st_stid);
+ goto retry;
+ }
status = nfs_ok;
+ *plst = lst;
out:
nfs4_put_stateowner(&lo->lo_owner);
return status;
@@ -5564,7 +5583,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&open_stp, nn);
if (status)
goto out;
- up_write(&open_stp->st_rwsem);
+ mutex_unlock(&open_stp->st_mutex);
open_sop = openowner(open_stp->st_stateowner);
status = nfserr_bad_stateid;
if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
@@ -5572,8 +5591,6 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
status = lookup_or_create_lock_state(cstate, open_stp, lock,
&lock_stp, &new);
- if (status == nfs_ok)
- down_write(&lock_stp->st_rwsem);
} else {
status = nfs4_preprocess_seqid_op(cstate,
lock->lk_old_lock_seqid,
@@ -5677,7 +5694,7 @@ out:
seqid_mutating_err(ntohl(status)))
lock_sop->lo_owner.so_seqid++;
- up_write(&lock_stp->st_rwsem);
+ mutex_unlock(&lock_stp->st_mutex);
/*
* If this is a new, never-before-used stateid, and we are
@@ -5847,7 +5864,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
fput:
fput(filp);
put_stateid:
- up_write(&stp->st_rwsem);
+ mutex_unlock(&stp->st_mutex);
nfs4_put_stid(&stp->st_stid);
out:
nfsd4_bump_seqid(cstate, status);
@@ -5911,6 +5928,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
__be32 status;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfs4_client *clp;
+ LIST_HEAD (reaplist);
dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
clid->cl_boot, clid->cl_id);
@@ -5941,9 +5959,23 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
nfs4_get_stateowner(sop);
break;
}
+ if (!lo) {
+ spin_unlock(&clp->cl_lock);
+ return status;
+ }
+
+ unhash_lockowner_locked(lo);
+ while (!list_empty(&lo->lo_owner.so_stateids)) {
+ stp = list_first_entry(&lo->lo_owner.so_stateids,
+ struct nfs4_ol_stateid,
+ st_perstateowner);
+ WARN_ON(!unhash_lock_stateid(stp));
+ put_ol_stateid_locked(stp, &reaplist);
+ }
spin_unlock(&clp->cl_lock);
- if (lo)
- release_lockowner(lo);
+ free_ol_stateid_reaplist(&reaplist);
+ nfs4_put_stateowner(&lo->lo_owner);
+
return status;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9ca39a4..544672b440de 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -129,7 +129,7 @@ static void next_decode_page(struct nfsd4_compoundargs *argp)
argp->p = page_address(argp->pagelist[0]);
argp->pagelist++;
if (argp->pagelen < PAGE_SIZE) {
- argp->end = argp->p + (argp->pagelen>>2);
+ argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
argp->pagelen = 0;
} else {
argp->end = argp->p + (PAGE_SIZE>>2);
@@ -1072,8 +1072,9 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
READ_BUF(4);
rename->rn_snamelen = be32_to_cpup(p++);
- READ_BUF(rename->rn_snamelen + 4);
+ READ_BUF(rename->rn_snamelen);
SAVEMEM(rename->rn_sname, rename->rn_snamelen);
+ READ_BUF(4);
rename->rn_tnamelen = be32_to_cpup(p++);
READ_BUF(rename->rn_tnamelen);
SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
@@ -1155,13 +1156,14 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
READ_BUF(8);
setclientid->se_callback_prog = be32_to_cpup(p++);
setclientid->se_callback_netid_len = be32_to_cpup(p++);
-
- READ_BUF(setclientid->se_callback_netid_len + 4);
+ READ_BUF(setclientid->se_callback_netid_len);
SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
+ READ_BUF(4);
setclientid->se_callback_addr_len = be32_to_cpup(p++);
- READ_BUF(setclientid->se_callback_addr_len + 4);
+ READ_BUF(setclientid->se_callback_addr_len);
SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
+ READ_BUF(4);
setclientid->se_callback_ident = be32_to_cpup(p++);
DECODE_TAIL;
@@ -1244,9 +1246,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
argp->pagelen -= pages * PAGE_SIZE;
len -= pages * PAGE_SIZE;
- argp->p = (__be32 *)page_address(argp->pagelist[0]);
- argp->pagelist++;
- argp->end = argp->p + XDR_QUADLEN(PAGE_SIZE);
+ next_decode_page(argp);
}
argp->p += XDR_QUADLEN(len);
@@ -1815,8 +1815,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
READ_BUF(4);
argp->taglen = be32_to_cpup(p++);
- READ_BUF(argp->taglen + 8);
+ READ_BUF(argp->taglen);
SAVEMEM(argp->tag, argp->taglen);
+ READ_BUF(8);
argp->minorversion = be32_to_cpup(p++);
argp->opcnt = be32_to_cpup(p++);
max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
@@ -2750,9 +2751,16 @@ out_acl:
}
#endif /* CONFIG_NFSD_PNFS */
if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
- status = nfsd4_encode_bitmap(xdr, NFSD_SUPPATTR_EXCLCREAT_WORD0,
- NFSD_SUPPATTR_EXCLCREAT_WORD1,
- NFSD_SUPPATTR_EXCLCREAT_WORD2);
+ u32 supp[3];
+
+ supp[0] = nfsd_suppattrs0(minorversion);
+ supp[1] = nfsd_suppattrs1(minorversion);
+ supp[2] = nfsd_suppattrs2(minorversion);
+ supp[0] &= NFSD_SUPPATTR_EXCLCREAT_WORD0;
+ supp[1] &= NFSD_SUPPATTR_EXCLCREAT_WORD1;
+ supp[2] &= NFSD_SUPPATTR_EXCLCREAT_WORD2;
+
+ status = nfsd4_encode_bitmap(xdr, supp[0], supp[1], supp[2]);
if (status)
goto out;
}
@@ -4038,8 +4046,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_getdeviceinfo *gdev)
{
struct xdr_stream *xdr = &resp->xdr;
- const struct nfsd4_layout_ops *ops =
- nfsd4_layout_ops[gdev->gd_layout_type];
+ const struct nfsd4_layout_ops *ops;
u32 starting_len = xdr->buf->len, needed_len;
__be32 *p;
@@ -4056,6 +4063,7 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
/* If maxcount is 0 then just update notifications */
if (gdev->gd_maxcount != 0) {
+ ops = nfsd4_layout_ops[gdev->gd_layout_type];
nfserr = ops->encode_getdeviceinfo(xdr, gdev);
if (nfserr) {
/*
@@ -4108,8 +4116,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_layoutget *lgp)
{
struct xdr_stream *xdr = &resp->xdr;
- const struct nfsd4_layout_ops *ops =
- nfsd4_layout_ops[lgp->lg_layout_type];
+ const struct nfsd4_layout_ops *ops;
__be32 *p;
dprintk("%s: err %d\n", __func__, nfserr);
@@ -4132,6 +4139,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
*p++ = cpu_to_be32(lgp->lg_seg.iomode);
*p++ = cpu_to_be32(lgp->lg_layout_type);
+ ops = nfsd4_layout_ops[lgp->lg_layout_type];
nfserr = ops->encode_layoutget(xdr, lgp);
out:
kfree(lgp->lg_content);
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index ad4e2377dd63..5be1fa6b676d 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -656,6 +656,37 @@ static __be32 map_new_errors(u32 vers, __be32 nfserr)
return nfserr;
}
+/*
+ * A write procedure can have a large argument, and a read procedure can
+ * have a large reply, but no NFSv2 or NFSv3 procedure has argument and
+ * reply that can both be larger than a page. The xdr code has taken
+ * advantage of this assumption to be a sloppy about bounds checking in
+ * some cases. Pending a rewrite of the NFSv2/v3 xdr code to fix that
+ * problem, we enforce these assumptions here:
+ */
+static bool nfs_request_too_big(struct svc_rqst *rqstp,
+ struct svc_procedure *proc)
+{
+ /*
+ * The ACL code has more careful bounds-checking and is not
+ * susceptible to this problem:
+ */
+ if (rqstp->rq_prog != NFS_PROGRAM)
+ return false;
+ /*
+ * Ditto NFSv4 (which can in theory have argument and reply both
+ * more than a page):
+ */
+ if (rqstp->rq_vers >= 4)
+ return false;
+ /* The reply will be small, we're OK: */
+ if (proc->pc_xdrressize > 0 &&
+ proc->pc_xdrressize < XDR_QUADLEN(PAGE_SIZE))
+ return false;
+
+ return rqstp->rq_arg.len > PAGE_SIZE;
+}
+
int
nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
{
@@ -668,6 +699,11 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
rqstp->rq_vers, rqstp->rq_proc);
proc = rqstp->rq_procinfo;
+ if (nfs_request_too_big(rqstp, proc)) {
+ dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
+ *statp = rpc_garbage_args;
+ return 1;
+ }
/*
* Give the xdr decoder a chance to change this if it wants
* (necessary in the NFSv4.0 compound case)
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 79d964aa8079..bf913201a6ad 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -280,6 +280,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_writeargs *args)
{
unsigned int len, hdr, dlen;
+ struct kvec *head = rqstp->rq_arg.head;
int v;
p = decode_fh(p, &args->fh);
@@ -300,9 +301,10 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
* Check to make sure that we got the right number of
* bytes.
*/
- hdr = (void*)p - rqstp->rq_arg.head[0].iov_base;
- dlen = rqstp->rq_arg.head[0].iov_len + rqstp->rq_arg.page_len
- - hdr;
+ hdr = (void*)p - head->iov_base;
+ if (hdr > head->iov_len)
+ return 0;
+ dlen = head->iov_len + rqstp->rq_arg.page_len - hdr;
/*
* Round the length of the data which was specified up to
@@ -316,7 +318,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
return 0;
rqstp->rq_vec[0].iov_base = (void*)p;
- rqstp->rq_vec[0].iov_len = rqstp->rq_arg.head[0].iov_len - hdr;
+ rqstp->rq_vec[0].iov_len = head->iov_len - hdr;
v = 0;
while (len > rqstp->rq_vec[v].iov_len) {
len -= rqstp->rq_vec[v].iov_len;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 77fdf4de91ba..5134eedcb16c 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -535,7 +535,7 @@ struct nfs4_ol_stateid {
unsigned char st_access_bmap;
unsigned char st_deny_bmap;
struct nfs4_ol_stateid *st_openstp;
- struct rw_semaphore st_rwsem;
+ struct mutex st_mutex;
};
static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
@@ -583,8 +583,8 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
-struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl,
- struct kmem_cache *slab);
+struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
+ void (*sc_free)(struct nfs4_stid *));
void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66fbb446..91e0c5429b4d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -369,7 +369,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
__be32 err;
int host_err;
bool get_write_count;
- int size_change = 0;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -382,11 +382,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
if (err)
- goto out;
+ return err;
if (get_write_count) {
host_err = fh_want_write(fhp);
if (host_err)
- return nfserrno(host_err);
+ goto out;
}
dentry = fhp->fh_dentry;
@@ -397,20 +397,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~ATTR_MODE;
if (!iap->ia_valid)
- goto out;
+ return 0;
nfsd_sanitize_attrs(inode, iap);
+ if (check_guard && guardtime != inode->i_ctime.tv_sec)
+ return nfserr_notsync;
+
/*
* The size case is special, it changes the file in addition to the
- * attributes.
+ * attributes, and file systems don't expect it to be mixed with
+ * "random" attribute changes. We thus split out the size change
+ * into a separate call to ->setattr, and do the rest as a separate
+ * setattr call.
*/
- if (iap->ia_valid & ATTR_SIZE) {
+ if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap);
if (err)
- goto out;
- size_change = 1;
+ return err;
+ }
+ fh_lock(fhp);
+ if (size_change) {
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
@@ -418,29 +426,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*
* (and similar for the older RFCs)
*/
- if (iap->ia_size != i_size_read(inode))
- iap->ia_valid |= ATTR_MTIME;
- }
+ struct iattr size_attr = {
+ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+ .ia_size = iap->ia_size,
+ };
- iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(dentry, &size_attr, NULL);
+ if (host_err)
+ goto out_unlock;
+ iap->ia_valid &= ~ATTR_SIZE;
- if (check_guard && guardtime != inode->i_ctime.tv_sec) {
- err = nfserr_notsync;
- goto out_put_write_access;
+ /*
+ * Avoid the additional setattr call below if the only other
+ * attribute that the client sends is the mtime, as we update
+ * it as part of the size change above.
+ */
+ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+ goto out_unlock;
}
- fh_lock(fhp);
+ iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, iap, NULL);
- fh_unlock(fhp);
- err = nfserrno(host_err);
-out_put_write_access:
+out_unlock:
+ fh_unlock(fhp);
if (size_change)
put_write_access(inode);
- if (!err)
- err = nfserrno(commit_metadata(fhp));
out:
- return err;
+ if (!host_err)
+ host_err = commit_metadata(fhp);
+ return nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index a35ae35e6932..cd39b57288c2 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -55,7 +55,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
brelse(bh);
BUG();
}
- memset(bh->b_data, 0, 1 << inode->i_blkbits);
+ memset(bh->b_data, 0, i_blocksize(inode));
bh->b_bdev = inode->i_sb->s_bdev;
bh->b_blocknr = blocknr;
set_buffer_mapped(bh);
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index ac2f64943ff4..00877ef0b120 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -55,7 +55,7 @@ void nilfs_inode_add_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
- inode_add_bytes(inode, (1 << inode->i_blkbits) * n);
+ inode_add_bytes(inode, i_blocksize(inode) * n);
if (root)
atomic64_add(n, &root->blocks_count);
}
@@ -64,7 +64,7 @@ void nilfs_inode_sub_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
- inode_sub_bytes(inode, (1 << inode->i_blkbits) * n);
+ inode_sub_bytes(inode, i_blocksize(inode) * n);
if (root)
atomic64_sub(n, &root->blocks_count);
}
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 1125f40233ff..612a2457243d 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -60,7 +60,7 @@ nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block,
set_buffer_mapped(bh);
kaddr = kmap_atomic(bh->b_page);
- memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits);
+ memset(kaddr + bh_offset(bh), 0, i_blocksize(inode));
if (init_block)
init_block(inode, bh, kaddr);
flush_dcache_page(bh->b_page);
@@ -503,7 +503,7 @@ void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size,
struct nilfs_mdt_info *mi = NILFS_MDT(inode);
mi->mi_entry_size = entry_size;
- mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size;
+ mi->mi_entries_per_block = i_blocksize(inode) / entry_size;
mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size);
}
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 3b65adaae7e4..2f27c935bd57 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -719,7 +719,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
lock_page(page);
if (!page_has_buffers(page))
- create_empty_buffers(page, 1 << inode->i_blkbits, 0);
+ create_empty_buffers(page, i_blocksize(inode), 0);
unlock_page(page);
bh = head = page_buffers(page);
diff --git a/fs/nilfs2/the_nilfs.c b/fs/nilfs2/the_nilfs.c
index 69bd801afb53..37e49cb2ac4c 100644
--- a/fs/nilfs2/the_nilfs.c
+++ b/fs/nilfs2/the_nilfs.c
@@ -443,7 +443,7 @@ static int nilfs_valid_sb(struct nilfs_super_block *sbp)
if (!sbp || le16_to_cpu(sbp->s_magic) != NILFS_SUPER_MAGIC)
return 0;
bytes = le16_to_cpu(sbp->s_bytes);
- if (bytes > BLOCK_SIZE)
+ if (bytes < sumoff + 4 || bytes > BLOCK_SIZE)
return 0;
crc = crc32_le(le32_to_cpu(sbp->s_crc_seed), (unsigned char *)sbp,
sumoff);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index d2f97ecca6a5..e0e5f7c3c99f 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -67,18 +67,7 @@ static int fanotify_get_response(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- wait_event(group->fanotify_data.access_waitq, event->response ||
- atomic_read(&group->fanotify_data.bypass_perm));
-
- if (!event->response) { /* bypass_perm set */
- /*
- * Event was canceled because group is being destroyed. Remove
- * it from group's event list because we are responsible for
- * freeing the permission event.
- */
- fsnotify_remove_event(group, &event->fae.fse);
- return 0;
- }
+ wait_event(group->fanotify_data.access_waitq, event->response);
/* userspace responded, convert to something usable */
switch (event->response) {
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 8e8e6bcd1d43..a64313868d3a 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -358,16 +358,20 @@ static int fanotify_release(struct inode *ignored, struct file *file)
#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
struct fanotify_perm_event_info *event, *next;
+ struct fsnotify_event *fsn_event;
/*
- * There may be still new events arriving in the notification queue
- * but since userspace cannot use fanotify fd anymore, no event can
- * enter or leave access_list by now.
+ * Stop new events from arriving in the notification queue. since
+ * userspace cannot use fanotify fd anymore, no event can enter or
+ * leave access_list by now either.
*/
- spin_lock(&group->fanotify_data.access_lock);
-
- atomic_inc(&group->fanotify_data.bypass_perm);
+ fsnotify_group_stop_queueing(group);
+ /*
+ * Process all permission events on access_list and notification queue
+ * and simulate reply from userspace.
+ */
+ spin_lock(&group->fanotify_data.access_lock);
list_for_each_entry_safe(event, next, &group->fanotify_data.access_list,
fae.fse.list) {
pr_debug("%s: found group=%p event=%p\n", __func__, group,
@@ -379,12 +383,21 @@ static int fanotify_release(struct inode *ignored, struct file *file)
spin_unlock(&group->fanotify_data.access_lock);
/*
- * Since bypass_perm is set, newly queued events will not wait for
- * access response. Wake up the already sleeping ones now.
- * synchronize_srcu() in fsnotify_destroy_group() will wait for all
- * processes sleeping in fanotify_handle_event() waiting for access
- * response and thus also for all permission events to be freed.
+ * Destroy all non-permission events. For permission events just
+ * dequeue them and set the response. They will be freed once the
+ * response is consumed and fanotify_get_response() returns.
*/
+ mutex_lock(&group->notification_mutex);
+ while (!fsnotify_notify_queue_is_empty(group)) {
+ fsn_event = fsnotify_remove_first_event(group);
+ if (!(fsn_event->mask & FAN_ALL_PERM_EVENTS))
+ fsnotify_destroy_event(group, fsn_event);
+ else
+ FANOTIFY_PE(fsn_event)->response = FAN_ALLOW;
+ }
+ mutex_unlock(&group->notification_mutex);
+
+ /* Response for all permission events it set, wakeup waiters */
wake_up(&group->fanotify_data.access_waitq);
#endif
@@ -755,7 +768,6 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
spin_lock_init(&group->fanotify_data.access_lock);
init_waitqueue_head(&group->fanotify_data.access_waitq);
INIT_LIST_HEAD(&group->fanotify_data.access_list);
- atomic_set(&group->fanotify_data.bypass_perm, 0);
#endif
switch (flags & FAN_ALL_CLASS_BITS) {
case FAN_CLASS_NOTIF:
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index db39de2dd4cb..a64adc2fced9 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -104,16 +104,20 @@ int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
if (unlikely(!fsnotify_inode_watches_children(p_inode)))
__fsnotify_update_child_dentry_flags(p_inode);
else if (p_inode->i_fsnotify_mask & mask) {
+ struct name_snapshot name;
+
/* we are notifying a parent so come up with the new mask which
* specifies these are events which came from a child. */
mask |= FS_EVENT_ON_CHILD;
+ take_dentry_name_snapshot(&name, dentry);
if (path)
ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
- dentry->d_name.name, 0);
+ name.name, 0);
else
ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
- dentry->d_name.name, 0);
+ name.name, 0);
+ release_dentry_name_snapshot(&name);
}
dput(parent);
diff --git a/fs/notify/group.c b/fs/notify/group.c
index d16b62cb2854..18eb30c6bd8f 100644
--- a/fs/notify/group.c
+++ b/fs/notify/group.c
@@ -40,6 +40,17 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
}
/*
+ * Stop queueing new events for this group. Once this function returns
+ * fsnotify_add_event() will not add any new events to the group's queue.
+ */
+void fsnotify_group_stop_queueing(struct fsnotify_group *group)
+{
+ mutex_lock(&group->notification_mutex);
+ group->shutdown = true;
+ mutex_unlock(&group->notification_mutex);
+}
+
+/*
* Trying to get rid of a group. Remove all marks, flush all events and release
* the group reference.
* Note that another thread calling fsnotify_clear_marks_by_group() may still
@@ -47,6 +58,14 @@ static void fsnotify_final_destroy_group(struct fsnotify_group *group)
*/
void fsnotify_destroy_group(struct fsnotify_group *group)
{
+ /*
+ * Stop queueing new events. The code below is careful enough to not
+ * require this but fanotify needs to stop queuing events even before
+ * fsnotify_destroy_group() is called and this makes the other callers
+ * of fsnotify_destroy_group() to see the same behavior.
+ */
+ fsnotify_group_stop_queueing(group);
+
/* clear all inode marks for this group */
fsnotify_clear_marks_by_group(group);
diff --git a/fs/notify/notification.c b/fs/notify/notification.c
index a95d8e037aeb..e455e83ceeeb 100644
--- a/fs/notify/notification.c
+++ b/fs/notify/notification.c
@@ -82,7 +82,8 @@ void fsnotify_destroy_event(struct fsnotify_group *group,
* Add an event to the group notification queue. The group can later pull this
* event off the queue to deal with. The function returns 0 if the event was
* added to the queue, 1 if the event was merged with some other queued event,
- * 2 if the queue of events has overflown.
+ * 2 if the event was not queued - either the queue of events has overflown
+ * or the group is shutting down.
*/
int fsnotify_add_event(struct fsnotify_group *group,
struct fsnotify_event *event,
@@ -96,6 +97,11 @@ int fsnotify_add_event(struct fsnotify_group *group,
mutex_lock(&group->notification_mutex);
+ if (group->shutdown) {
+ mutex_unlock(&group->notification_mutex);
+ return 2;
+ }
+
if (group->q_len >= group->max_events) {
ret = 2;
/* Queue overflow event only if it isn't already queued */
@@ -126,21 +132,6 @@ queue:
}
/*
- * Remove @event from group's notification queue. It is the responsibility of
- * the caller to destroy the event.
- */
-void fsnotify_remove_event(struct fsnotify_group *group,
- struct fsnotify_event *event)
-{
- mutex_lock(&group->notification_mutex);
- if (!list_empty(&event->list)) {
- list_del_init(&event->list);
- group->q_len--;
- }
- mutex_unlock(&group->notification_mutex);
-}
-
-/*
* Remove and return the first event from the notification list. It is the
* responsibility of the caller to destroy the obtained event
*/
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 0cdf497c91ef..164307b99405 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -241,13 +241,11 @@ int ocfs2_set_acl(handle_t *handle,
case ACL_TYPE_ACCESS:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
if (acl) {
- umode_t mode = inode->i_mode;
- ret = posix_acl_equiv_mode(acl, &mode);
- if (ret < 0)
- return ret;
+ umode_t mode;
- if (ret == 0)
- acl = NULL;
+ ret = posix_acl_update_mode(inode, &mode, &acl);
+ if (ret)
+ return ret;
ret = ocfs2_acl_set_mode(inode, di_bh,
handle, mode);
@@ -322,3 +320,90 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
brelse(di_bh);
return acl;
}
+
+int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl;
+ int ret;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return 0;
+
+ acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+ ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ if (ret)
+ return ret;
+ ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
+ acl, NULL, NULL);
+ posix_acl_release(acl);
+ return ret;
+}
+
+/*
+ * Initialize the ACLs of a new inode. If parent directory has default ACL,
+ * then clone to new inode. Called from ocfs2_mknod.
+ */
+int ocfs2_init_acl(handle_t *handle,
+ struct inode *inode,
+ struct inode *dir,
+ struct buffer_head *di_bh,
+ struct buffer_head *dir_bh,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl = NULL;
+ int ret = 0, ret2;
+ umode_t mode;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+ acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
+ dir_bh);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+ if (!acl) {
+ mode = inode->i_mode & ~current_umask();
+ ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+ if (ret) {
+ mlog_errno(ret);
+ goto cleanup;
+ }
+ }
+ }
+ if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
+ if (S_ISDIR(inode->i_mode)) {
+ ret = ocfs2_set_acl(handle, inode, di_bh,
+ ACL_TYPE_DEFAULT, acl,
+ meta_ac, data_ac);
+ if (ret)
+ goto cleanup;
+ }
+ mode = inode->i_mode;
+ ret = __posix_acl_create(&acl, GFP_NOFS, &mode);
+ if (ret < 0)
+ return ret;
+
+ ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+ if (ret2) {
+ mlog_errno(ret2);
+ ret = ret2;
+ goto cleanup;
+ }
+ if (ret > 0) {
+ ret = ocfs2_set_acl(handle, inode,
+ di_bh, ACL_TYPE_ACCESS,
+ acl, meta_ac, data_ac);
+ }
+ }
+cleanup:
+ posix_acl_release(acl);
+ return ret;
+}
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 3fce68d08625..2783a75b3999 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -35,5 +35,10 @@ int ocfs2_set_acl(handle_t *handle,
struct posix_acl *acl,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac);
+extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
+extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
+ struct buffer_head *, struct buffer_head *,
+ struct ocfs2_alloc_context *,
+ struct ocfs2_alloc_context *);
#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index e6795c7c76a8..e4184bd2a954 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -1103,7 +1103,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno,
int ret = 0;
struct buffer_head *head, *bh, *wait[2], **wait_bh = wait;
unsigned int block_end, block_start;
- unsigned int bsize = 1 << inode->i_blkbits;
+ unsigned int bsize = i_blocksize(inode);
if (!page_has_buffers(page))
create_empty_buffers(page, bsize, 0);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 709fbbd44c65..acebc350e98d 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -2070,13 +2070,13 @@ unlock:
spin_unlock(&o2hb_live_lock);
}
-static ssize_t o2hb_heartbeat_group_threshold_show(struct config_item *item,
+static ssize_t o2hb_heartbeat_group_dead_threshold_show(struct config_item *item,
char *page)
{
return sprintf(page, "%u\n", o2hb_dead_threshold);
}
-static ssize_t o2hb_heartbeat_group_threshold_store(struct config_item *item,
+static ssize_t o2hb_heartbeat_group_dead_threshold_store(struct config_item *item,
const char *page, size_t count)
{
unsigned long tmp;
@@ -2125,11 +2125,11 @@ static ssize_t o2hb_heartbeat_group_mode_store(struct config_item *item,
}
-CONFIGFS_ATTR(o2hb_heartbeat_group_, threshold);
+CONFIGFS_ATTR(o2hb_heartbeat_group_, dead_threshold);
CONFIGFS_ATTR(o2hb_heartbeat_group_, mode);
static struct configfs_attribute *o2hb_heartbeat_group_attrs[] = {
- &o2hb_heartbeat_group_attr_threshold,
+ &o2hb_heartbeat_group_attr_dead_threshold,
&o2hb_heartbeat_group_attr_mode,
NULL,
};
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index e36d63ff1783..2e11658676eb 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -287,6 +287,19 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
status = DLM_DENIED;
goto bail;
}
+
+ if (lock->ml.type == type && lock->ml.convert_type == LKM_IVMODE) {
+ mlog(0, "last convert request returned DLM_RECOVERING, but "
+ "owner has already queued and sent ast to me. res %.*s, "
+ "(cookie=%u:%llu, type=%d, conv=%d)\n",
+ res->lockname.len, res->lockname.name,
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+ lock->ml.type, lock->ml.convert_type);
+ status = DLM_NORMAL;
+ goto bail;
+ }
+
res->state |= DLM_LOCK_RES_IN_PROGRESS;
/* move lock to local convert queue */
/* do not alter lock refcount. switching lists. */
@@ -315,13 +328,22 @@ enum dlm_status dlmconvert_remote(struct dlm_ctxt *dlm,
spin_lock(&res->spinlock);
res->state &= ~DLM_LOCK_RES_IN_PROGRESS;
- lock->convert_pending = 0;
- /* if it failed, move it back to granted queue */
+ /* if it failed, move it back to granted queue.
+ * if master returns DLM_NORMAL and then down before sending ast,
+ * it may have already been moved to granted queue, reset to
+ * DLM_RECOVERING and retry convert */
if (status != DLM_NORMAL) {
if (status != DLM_NOTQUEUED)
dlm_error(status);
dlm_revert_pending_convert(res, lock);
+ } else if (!lock->convert_pending) {
+ mlog(0, "%s: res %.*s, owner died and lock has been moved back "
+ "to granted list, retry convert.\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ status = DLM_RECOVERING;
}
+
+ lock->convert_pending = 0;
bail:
spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 42f0cae93a0a..4a338803e7e9 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -2064,7 +2064,6 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm,
dlm_lock_get(lock);
if (lock->convert_pending) {
/* move converting lock back to granted */
- BUG_ON(i != DLM_CONVERTING_LIST);
mlog(0, "node died with convert pending "
"on %.*s. move back to granted list.\n",
res->lockname.len, res->lockname.name);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index b002acf50203..60a5f1548cd9 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3321,6 +3321,16 @@ static int ocfs2_downconvert_lock(struct ocfs2_super *osb,
mlog(ML_BASTS, "lockres %s, level %d => %d\n", lockres->l_name,
lockres->l_level, new_level);
+ /*
+ * On DLM_LKF_VALBLK, fsdlm behaves differently with o2cb. It always
+ * expects DLM_LKF_VALBLK being set if the LKB has LVB, so that
+ * we can recover correctly from node failure. Otherwise, we may get
+ * invalid LVB in LKB, but without DLM_SBF_VALNOTVALID being set.
+ */
+ if (!ocfs2_is_o2cb_active() &&
+ lockres->l_ops->flags & LOCK_TYPE_USES_LVB)
+ lvb = 1;
+
if (lvb)
dlm_flags |= DLM_LKF_VALBLK;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 0e5b4515f92e..1d738723a41a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -808,7 +808,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
/* We know that zero_from is block aligned */
for (block_start = zero_from; block_start < zero_to;
block_start = block_end) {
- block_end = block_start + (1 << inode->i_blkbits);
+ block_end = block_start + i_blocksize(inode);
/*
* block_start is block-aligned. Bump it by one to force
@@ -1268,20 +1268,20 @@ bail_unlock_rw:
if (size_change)
ocfs2_rw_unlock(inode, 1);
bail:
- brelse(bh);
/* Release quota pointers in case we acquired them */
for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
dqput(transfer_to[qtype]);
if (!status && attr->ia_valid & ATTR_MODE) {
- status = posix_acl_chmod(inode, inode->i_mode);
+ status = ocfs2_acl_chmod(inode, bh);
if (status < 0)
mlog_errno(status);
}
if (inode_locked)
ocfs2_inode_unlock(inode, 1);
+ brelse(bh);
return status;
}
@@ -1536,7 +1536,8 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
u64 start, u64 len)
{
int ret = 0;
- u64 tmpend, end = start + len;
+ u64 tmpend = 0;
+ u64 end = start + len;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
unsigned int csize = osb->s_clustersize;
handle_t *handle;
@@ -1568,18 +1569,31 @@ static int ocfs2_zero_partial_clusters(struct inode *inode,
}
/*
- * We want to get the byte offset of the end of the 1st cluster.
+ * If start is on a cluster boundary and end is somewhere in another
+ * cluster, we have not COWed the cluster starting at start, unless
+ * end is also within the same cluster. So, in this case, we skip this
+ * first call to ocfs2_zero_range_for_truncate() truncate and move on
+ * to the next one.
*/
- tmpend = (u64)osb->s_clustersize + (start & ~(osb->s_clustersize - 1));
- if (tmpend > end)
- tmpend = end;
+ if ((start & (csize - 1)) != 0) {
+ /*
+ * We want to get the byte offset of the end of the 1st
+ * cluster.
+ */
+ tmpend = (u64)osb->s_clustersize +
+ (start & ~(osb->s_clustersize - 1));
+ if (tmpend > end)
+ tmpend = end;
- trace_ocfs2_zero_partial_clusters_range1((unsigned long long)start,
- (unsigned long long)tmpend);
+ trace_ocfs2_zero_partial_clusters_range1(
+ (unsigned long long)start,
+ (unsigned long long)tmpend);
- ret = ocfs2_zero_range_for_truncate(inode, handle, start, tmpend);
- if (ret)
- mlog_errno(ret);
+ ret = ocfs2_zero_range_for_truncate(inode, handle, start,
+ tmpend);
+ if (ret)
+ mlog_errno(ret);
+ }
if (tmpend < end) {
/*
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 3123408da935..62af9554541d 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -259,7 +259,6 @@ static int ocfs2_mknod(struct inode *dir,
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
int did_block_signals = 0;
- struct posix_acl *default_acl = NULL, *acl = NULL;
struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
@@ -367,12 +366,6 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
- status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
- if (status) {
- mlog_errno(status);
- goto leave;
- }
-
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
S_ISDIR(mode),
xattr_credits));
@@ -421,16 +414,8 @@ static int ocfs2_mknod(struct inode *dir,
inc_nlink(dir);
}
- if (default_acl) {
- status = ocfs2_set_acl(handle, inode, new_fe_bh,
- ACL_TYPE_DEFAULT, default_acl,
- meta_ac, data_ac);
- }
- if (!status && acl) {
- status = ocfs2_set_acl(handle, inode, new_fe_bh,
- ACL_TYPE_ACCESS, acl,
- meta_ac, data_ac);
- }
+ status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
+ meta_ac, data_ac);
if (status < 0) {
mlog_errno(status);
@@ -472,10 +457,6 @@ static int ocfs2_mknod(struct inode *dir,
d_instantiate(dentry, inode);
status = 0;
leave:
- if (default_acl)
- posix_acl_release(default_acl);
- if (acl)
- posix_acl_release(acl);
if (status < 0 && did_quota_inode)
dquot_free_inode(inode);
if (handle)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 252119860e6c..6a0c55d7dff0 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4248,20 +4248,12 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
- struct posix_acl *default_acl, *acl;
- umode_t mode;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
- mode = inode->i_mode;
- error = posix_acl_create(dir, &mode, &default_acl, &acl);
- if (error) {
- mlog_errno(error);
- return error;
- }
- error = ocfs2_create_inode_in_orphan(dir, mode,
+ error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
&new_orphan_inode);
if (error) {
mlog_errno(error);
@@ -4300,16 +4292,11 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
- &new_dentry->d_name,
- default_acl, acl);
+ &new_dentry->d_name);
if (error)
mlog_errno(error);
}
out:
- if (default_acl)
- posix_acl_release(default_acl);
- if (acl)
- posix_acl_release(acl);
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 5d965e83bd43..783bcdce5666 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -48,6 +48,12 @@ static char ocfs2_hb_ctl_path[OCFS2_MAX_HB_CTL_PATH] = "/sbin/ocfs2_hb_ctl";
*/
static struct ocfs2_stack_plugin *active_stack;
+inline int ocfs2_is_o2cb_active(void)
+{
+ return !strcmp(active_stack->sp_name, OCFS2_STACK_PLUGIN_O2CB);
+}
+EXPORT_SYMBOL_GPL(ocfs2_is_o2cb_active);
+
static struct ocfs2_stack_plugin *ocfs2_stack_lookup(const char *name)
{
struct ocfs2_stack_plugin *p;
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 66334a30cea8..e1b30931974d 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,4 +298,7 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
+/* In ocfs2_downconvert_lock(), we need to know which stack we are using */
+int ocfs2_is_o2cb_active(void);
+
#endif /* STACKGLUE_H */
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index e9164f09841b..877830b05e12 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7197,12 +7197,10 @@ out:
*/
int ocfs2_init_security_and_acl(struct inode *dir,
struct inode *inode,
- const struct qstr *qstr,
- struct posix_acl *default_acl,
- struct posix_acl *acl)
+ const struct qstr *qstr)
{
- struct buffer_head *dir_bh = NULL;
int ret = 0;
+ struct buffer_head *dir_bh = NULL;
ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
if (ret) {
@@ -7215,11 +7213,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
mlog_errno(ret);
goto leave;
}
-
- if (!ret && default_acl)
- ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
- if (!ret && acl)
- ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
+ if (ret)
+ mlog_errno(ret);
ocfs2_inode_unlock(dir, 0);
brelse(dir_bh);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index f10d5b93c366..1633cc15ea1f 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -94,7 +94,5 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
bool preserve_security);
int ocfs2_init_security_and_acl(struct inode *dir,
struct inode *inode,
- const struct qstr *qstr,
- struct posix_acl *default_acl,
- struct posix_acl *acl);
+ const struct qstr *qstr);
#endif /* OCFS2_XATTR_H */
diff --git a/fs/open.c b/fs/open.c
index b6f1e96a7c0b..fbc5c7b230b3 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -840,16 +840,12 @@ EXPORT_SYMBOL(file_path);
int vfs_open(const struct path *path, struct file *file,
const struct cred *cred)
{
- struct dentry *dentry = path->dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = vfs_select_inode(path->dentry, file->f_flags);
- file->f_path = *path;
- if (dentry->d_flags & DCACHE_OP_SELECT_INODE) {
- inode = dentry->d_op->d_select_inode(dentry, file->f_flags);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
- }
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ file->f_path = *path;
return do_dentry_open(file, inode, NULL, cred);
}
@@ -889,6 +885,12 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o
int lookup_flags = 0;
int acc_mode;
+ /*
+ * Clear out all open flags we don't know about so that we don't report
+ * them in fcntl(F_GETFD) or similar interfaces.
+ */
+ flags &= VALID_OPEN_FLAGS;
+
if (flags & (O_CREAT | __O_TMPFILE))
op->mode = (mode & S_IALLUGO) | S_IFREG;
else
@@ -995,14 +997,12 @@ struct file *filp_open(const char *filename, int flags, umode_t mode)
EXPORT_SYMBOL(filp_open);
struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
- const char *filename, int flags)
+ const char *filename, int flags, umode_t mode)
{
struct open_flags op;
- int err = build_open_flags(flags, 0, &op);
+ int err = build_open_flags(flags, mode, &op);
if (err)
return ERR_PTR(err);
- if (flags & O_CREAT)
- return ERR_PTR(-EINVAL);
return do_file_open_root(dentry, mnt, filename, &op);
}
EXPORT_SYMBOL(file_open_root);
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index eff6319d5037..63a0d0ba36de 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -25,6 +25,7 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
ssize_t list_size, size, value_size = 0;
char *buf, *name, *value = NULL;
int uninitialized_var(error);
+ size_t slen;
if (!old->d_inode->i_op->getxattr ||
!new->d_inode->i_op->getxattr)
@@ -47,7 +48,18 @@ int ovl_copy_xattr(struct dentry *old, struct dentry *new)
goto out;
}
- for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
+ for (name = buf; list_size; name += slen) {
+ slen = strnlen(name, list_size) + 1;
+
+ /* underlying fs providing us with an broken xattr list? */
+ if (WARN_ON(slen > list_size)) {
+ error = -EIO;
+ break;
+ }
+ list_size -= slen;
+
+ if (ovl_is_private_xattr(name))
+ continue;
retry:
size = vfs_getxattr(old, name, value, value_size);
if (size == -ERANGE)
@@ -127,6 +139,8 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
len -= bytes;
}
+ if (!error)
+ error = vfs_fsync(new_file, 0);
fput(new_file);
out_fput:
fput(old_file);
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index a2b1d7ce3e1a..327177df03a5 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -12,6 +12,7 @@
#include <linux/xattr.h>
#include <linux/security.h>
#include <linux/cred.h>
+#include <linux/atomic.h>
#include "overlayfs.h"
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
@@ -35,8 +36,10 @@ struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
{
struct dentry *temp;
char name[20];
+ static atomic_t temp_id = ATOMIC_INIT(0);
- snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
+ /* counter is allowed to wrap, since temp dentries are ephemeral */
+ snprintf(name, sizeof(name), "#%x", atomic_inc_return(&temp_id));
temp = lookup_one_len(name, workdir, strlen(name));
if (!IS_ERR(temp) && temp->d_inode) {
@@ -511,6 +514,7 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
struct dentry *upper;
struct dentry *opaquedir = NULL;
int err;
+ int flags = 0;
if (WARN_ON(!workdir))
return -EROFS;
@@ -540,46 +544,39 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
if (err)
goto out_dput;
- whiteout = ovl_whiteout(workdir, dentry);
- err = PTR_ERR(whiteout);
- if (IS_ERR(whiteout))
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
goto out_unlock;
- upper = ovl_dentry_upper(dentry);
- if (!upper) {
- upper = lookup_one_len(dentry->d_name.name, upperdir,
- dentry->d_name.len);
- err = PTR_ERR(upper);
- if (IS_ERR(upper))
- goto kill_whiteout;
-
- err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
- dput(upper);
- if (err)
- goto kill_whiteout;
- } else {
- int flags = 0;
+ err = -ESTALE;
+ if ((opaquedir && upper != opaquedir) ||
+ (!opaquedir && ovl_dentry_upper(dentry) &&
+ upper != ovl_dentry_upper(dentry))) {
+ goto out_dput_upper;
+ }
- if (opaquedir)
- upper = opaquedir;
- err = -ESTALE;
- if (upper->d_parent != upperdir)
- goto kill_whiteout;
+ whiteout = ovl_whiteout(workdir, dentry);
+ err = PTR_ERR(whiteout);
+ if (IS_ERR(whiteout))
+ goto out_dput_upper;
- if (is_dir)
- flags |= RENAME_EXCHANGE;
+ if (d_is_dir(upper))
+ flags = RENAME_EXCHANGE;
- err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
- if (err)
- goto kill_whiteout;
+ err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
+ if (err)
+ goto kill_whiteout;
+ if (flags)
+ ovl_cleanup(wdir, upper);
- if (is_dir)
- ovl_cleanup(wdir, upper);
- }
ovl_dentry_version_inc(dentry->d_parent);
out_d_drop:
d_drop(dentry);
dput(whiteout);
+out_dput_upper:
+ dput(upper);
out_unlock:
unlock_rename(workdir, upperdir);
out_dput:
@@ -596,21 +593,25 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
{
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *dir = upperdir->d_inode;
- struct dentry *upper = ovl_dentry_upper(dentry);
+ struct dentry *upper;
int err;
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
+ goto out_unlock;
+
err = -ESTALE;
- if (upper->d_parent == upperdir) {
- /* Don't let d_delete() think it can reset d_inode */
- dget(upper);
+ if (upper == ovl_dentry_upper(dentry)) {
if (is_dir)
err = vfs_rmdir(dir, upper);
else
err = vfs_unlink(dir, upper, NULL);
- dput(upper);
ovl_dentry_version_inc(dentry->d_parent);
}
+ dput(upper);
/*
* Keeping this dentry hashed would mean having to release
@@ -620,6 +621,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
*/
if (!err)
d_drop(dentry);
+out_unlock:
mutex_unlock(&dir->i_mutex);
return err;
@@ -840,29 +842,39 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
trap = lock_rename(new_upperdir, old_upperdir);
- olddentry = ovl_dentry_upper(old);
- newdentry = ovl_dentry_upper(new);
- if (newdentry) {
+
+ olddentry = lookup_one_len(old->d_name.name, old_upperdir,
+ old->d_name.len);
+ err = PTR_ERR(olddentry);
+ if (IS_ERR(olddentry))
+ goto out_unlock;
+
+ err = -ESTALE;
+ if (olddentry != ovl_dentry_upper(old))
+ goto out_dput_old;
+
+ newdentry = lookup_one_len(new->d_name.name, new_upperdir,
+ new->d_name.len);
+ err = PTR_ERR(newdentry);
+ if (IS_ERR(newdentry))
+ goto out_dput_old;
+
+ err = -ESTALE;
+ if (ovl_dentry_upper(new)) {
if (opaquedir) {
- newdentry = opaquedir;
- opaquedir = NULL;
+ if (newdentry != opaquedir)
+ goto out_dput;
} else {
- dget(newdentry);
+ if (newdentry != ovl_dentry_upper(new))
+ goto out_dput;
}
} else {
new_create = true;
- newdentry = lookup_one_len(new->d_name.name, new_upperdir,
- new->d_name.len);
- err = PTR_ERR(newdentry);
- if (IS_ERR(newdentry))
- goto out_unlock;
+ if (!d_is_negative(newdentry) &&
+ (!new_opaque || !ovl_is_whiteout(newdentry)))
+ goto out_dput;
}
- err = -ESTALE;
- if (olddentry->d_parent != old_upperdir)
- goto out_dput;
- if (newdentry->d_parent != new_upperdir)
- goto out_dput;
if (olddentry == trap)
goto out_dput;
if (newdentry == trap)
@@ -925,6 +937,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
out_dput:
dput(newdentry);
+out_dput_old:
+ dput(olddentry);
out_unlock:
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 05ac9a95e881..220b04f04523 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -63,6 +63,9 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
if (!err) {
upperdentry = ovl_dentry_upper(dentry);
+ if (attr->ia_valid & (ATTR_KILL_SUID|ATTR_KILL_SGID))
+ attr->ia_valid &= ~ATTR_MODE;
+
mutex_lock(&upperdentry->d_inode->i_mutex);
err = notify_change(upperdentry, attr, NULL);
if (!err)
@@ -216,7 +219,7 @@ static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
}
-static bool ovl_is_private_xattr(const char *name)
+bool ovl_is_private_xattr(const char *name)
{
return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0;
}
@@ -274,7 +277,8 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
struct path realpath;
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
ssize_t res;
- int off;
+ size_t len;
+ char *s;
res = vfs_listxattr(realpath.dentry, list, size);
if (res <= 0 || size == 0)
@@ -284,17 +288,19 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
return res;
/* filter out private xattrs */
- for (off = 0; off < res;) {
- char *s = list + off;
- size_t slen = strlen(s) + 1;
+ for (s = list, len = res; len;) {
+ size_t slen = strnlen(s, len) + 1;
- BUG_ON(off + slen > res);
+ /* underlying fs providing us with an broken xattr list? */
+ if (WARN_ON(slen > len))
+ return -EIO;
+ len -= slen;
if (ovl_is_private_xattr(s)) {
res -= slen;
- memmove(s, s + slen, res - off);
+ memmove(s, s + slen, len);
} else {
- off += slen;
+ s += slen;
}
}
@@ -412,12 +418,11 @@ struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
if (!inode)
return NULL;
- mode &= S_IFMT;
-
inode->i_ino = get_next_ino();
inode->i_mode = mode;
inode->i_flags |= S_NOATIME | S_NOCMTIME;
+ mode &= S_IFMT;
switch (mode) {
case S_IFDIR:
inode->i_private = oe;
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index e17154aeaae4..c319d5eaabcf 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -174,6 +174,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
int ovl_removexattr(struct dentry *dentry, const char *name);
struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags);
+bool ovl_is_private_xattr(const char *name);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
struct ovl_entry *oe);
@@ -181,6 +182,7 @@ static inline void ovl_copyattr(struct inode *from, struct inode *to)
{
to->i_uid = from->i_uid;
to->i_gid = from->i_gid;
+ to->i_mode = from->i_mode;
}
/* dir.c */
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 000b2ed05c29..d70208c0de84 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -276,6 +276,37 @@ static void ovl_dentry_release(struct dentry *dentry)
}
}
+static struct dentry *ovl_d_real(struct dentry *dentry, struct inode *inode)
+{
+ struct dentry *real;
+
+ if (d_is_dir(dentry)) {
+ if (!inode || inode == d_inode(dentry))
+ return dentry;
+ goto bug;
+ }
+
+ real = ovl_dentry_upper(dentry);
+ if (real && (!inode || inode == d_inode(real)))
+ return real;
+
+ real = ovl_dentry_lower(dentry);
+ if (!real)
+ goto bug;
+
+ if (!inode || inode == d_inode(real))
+ return real;
+
+ /* Handle recursion */
+ if (real->d_flags & DCACHE_OP_REAL)
+ return real->d_op->d_real(real, inode);
+
+bug:
+ WARN(1, "ovl_d_real(%pd4, %s:%lu\n): real dentry not found\n", dentry,
+ inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
+ return dentry;
+}
+
static int ovl_dentry_revalidate(struct dentry *dentry, unsigned int flags)
{
struct ovl_entry *oe = dentry->d_fsdata;
@@ -320,11 +351,13 @@ static int ovl_dentry_weak_revalidate(struct dentry *dentry, unsigned int flags)
static const struct dentry_operations ovl_dentry_operations = {
.d_release = ovl_dentry_release,
.d_select_inode = ovl_d_select_inode,
+ .d_real = ovl_d_real,
};
static const struct dentry_operations ovl_reval_dentry_operations = {
.d_release = ovl_dentry_release,
.d_select_inode = ovl_d_select_inode,
+ .d_real = ovl_d_real,
.d_revalidate = ovl_dentry_revalidate,
.d_weak_revalidate = ovl_dentry_weak_revalidate,
};
@@ -343,7 +376,8 @@ static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
static bool ovl_dentry_remote(struct dentry *dentry)
{
return dentry->d_flags &
- (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE);
+ (DCACHE_OP_REVALIDATE | DCACHE_OP_WEAK_REVALIDATE |
+ DCACHE_OP_REAL);
}
static bool ovl_dentry_weird(struct dentry *dentry)
@@ -729,6 +763,10 @@ retry:
struct kstat stat = {
.mode = S_IFDIR | 0,
};
+ struct iattr attr = {
+ .ia_valid = ATTR_MODE,
+ .ia_mode = stat.mode,
+ };
if (work->d_inode) {
err = -EEXIST;
@@ -744,6 +782,21 @@ retry:
err = ovl_create_real(dir, work, &stat, NULL, NULL, true);
if (err)
goto out_dput;
+
+ err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+ if (err && err != -ENODATA && err != -EOPNOTSUPP)
+ goto out_dput;
+
+ err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+ if (err && err != -ENODATA && err != -EOPNOTSUPP)
+ goto out_dput;
+
+ /* Clear any inherited mode bits */
+ inode_lock(work->d_inode);
+ err = notify_change(work, &attr, NULL);
+ inode_unlock(work->d_inode);
+ if (err)
+ goto out_dput;
}
out_unlock:
mutex_unlock(&dir->i_mutex);
diff --git a/fs/pipe.c b/fs/pipe.c
index 42cf8ddf0e55..ab8dad3ccb6a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -38,6 +38,12 @@ unsigned int pipe_max_size = 1048576;
*/
unsigned int pipe_min_size = PAGE_SIZE;
+/* Maximum allocatable pages per user. Hard limit is unset by default, soft
+ * matches default values.
+ */
+unsigned long pipe_user_pages_hard;
+unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+
/*
* We use a start+len construction, which provides full use of the
* allocated memory.
@@ -583,20 +589,49 @@ pipe_fasync(int fd, struct file *filp, int on)
return retval;
}
+static void account_pipe_buffers(struct pipe_inode_info *pipe,
+ unsigned long old, unsigned long new)
+{
+ atomic_long_add(new - old, &pipe->user->pipe_bufs);
+}
+
+static bool too_many_pipe_buffers_soft(struct user_struct *user)
+{
+ return pipe_user_pages_soft &&
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_soft;
+}
+
+static bool too_many_pipe_buffers_hard(struct user_struct *user)
+{
+ return pipe_user_pages_hard &&
+ atomic_long_read(&user->pipe_bufs) >= pipe_user_pages_hard;
+}
+
struct pipe_inode_info *alloc_pipe_info(void)
{
struct pipe_inode_info *pipe;
pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL);
if (pipe) {
- pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * PIPE_DEF_BUFFERS, GFP_KERNEL);
+ unsigned long pipe_bufs = PIPE_DEF_BUFFERS;
+ struct user_struct *user = get_current_user();
+
+ if (!too_many_pipe_buffers_hard(user)) {
+ if (too_many_pipe_buffers_soft(user))
+ pipe_bufs = 1;
+ pipe->bufs = kzalloc(sizeof(struct pipe_buffer) * pipe_bufs, GFP_KERNEL);
+ }
+
if (pipe->bufs) {
init_waitqueue_head(&pipe->wait);
pipe->r_counter = pipe->w_counter = 1;
- pipe->buffers = PIPE_DEF_BUFFERS;
+ pipe->buffers = pipe_bufs;
+ pipe->user = user;
+ account_pipe_buffers(pipe, 0, pipe_bufs);
mutex_init(&pipe->mutex);
return pipe;
}
+ free_uid(user);
kfree(pipe);
}
@@ -607,6 +642,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
{
int i;
+ account_pipe_buffers(pipe, pipe->buffers, 0);
+ free_uid(pipe->user);
for (i = 0; i < pipe->buffers; i++) {
struct pipe_buffer *buf = pipe->bufs + i;
if (buf->ops)
@@ -998,6 +1035,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)
memcpy(bufs + head, pipe->bufs, tail * sizeof(struct pipe_buffer));
}
+ account_pipe_buffers(pipe, pipe->buffers, nr_pages);
pipe->curbuf = 0;
kfree(pipe->bufs);
pipe->bufs = bufs;
@@ -1069,6 +1107,11 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
if (!capable(CAP_SYS_RESOURCE) && size > pipe_max_size) {
ret = -EPERM;
goto out;
+ } else if ((too_many_pipe_buffers_hard(pipe->user) ||
+ too_many_pipe_buffers_soft(pipe->user)) &&
+ !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN)) {
+ ret = -EPERM;
+ goto out;
}
ret = pipe_set_size(pipe, nr_pages);
break;
diff --git a/fs/pnode.c b/fs/pnode.c
index 6367e1e435c6..d15c63e97ef1 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -24,6 +24,11 @@ static inline struct mount *first_slave(struct mount *p)
return list_entry(p->mnt_slave_list.next, struct mount, mnt_slave);
}
+static inline struct mount *last_slave(struct mount *p)
+{
+ return list_entry(p->mnt_slave_list.prev, struct mount, mnt_slave);
+}
+
static inline struct mount *next_slave(struct mount *p)
{
return list_entry(p->mnt_slave.next, struct mount, mnt_slave);
@@ -164,6 +169,19 @@ static struct mount *propagation_next(struct mount *m,
}
}
+static struct mount *skip_propagation_subtree(struct mount *m,
+ struct mount *origin)
+{
+ /*
+ * Advance m such that propagation_next will not return
+ * the slaves of m.
+ */
+ if (!IS_MNT_NEW(m) && !list_empty(&m->mnt_slave_list))
+ m = last_slave(m);
+
+ return m;
+}
+
static struct mount *next_group(struct mount *m, struct mount *origin)
{
while (1) {
@@ -198,10 +216,15 @@ static struct mount *next_group(struct mount *m, struct mount *origin)
/* all accesses are serialized by namespace_sem */
static struct user_namespace *user_ns;
-static struct mount *last_dest, *last_source, *dest_master;
+static struct mount *last_dest, *first_source, *last_source, *dest_master;
static struct mountpoint *mp;
static struct hlist_head *list;
+static inline bool peers(struct mount *m1, struct mount *m2)
+{
+ return m1->mnt_group_id == m2->mnt_group_id && m1->mnt_group_id;
+}
+
static int propagate_one(struct mount *m)
{
struct mount *child;
@@ -212,24 +235,26 @@ static int propagate_one(struct mount *m)
/* skip if mountpoint isn't covered by it */
if (!is_subdir(mp->m_dentry, m->mnt.mnt_root))
return 0;
- if (m->mnt_group_id == last_dest->mnt_group_id) {
+ if (peers(m, last_dest)) {
type = CL_MAKE_SHARED;
} else {
struct mount *n, *p;
+ bool done;
for (n = m; ; n = p) {
p = n->mnt_master;
- if (p == dest_master || IS_MNT_MARKED(p)) {
- while (last_dest->mnt_master != p) {
- last_source = last_source->mnt_master;
- last_dest = last_source->mnt_parent;
- }
- if (n->mnt_group_id != last_dest->mnt_group_id) {
- last_source = last_source->mnt_master;
- last_dest = last_source->mnt_parent;
- }
+ if (p == dest_master || IS_MNT_MARKED(p))
break;
- }
}
+ do {
+ struct mount *parent = last_source->mnt_parent;
+ if (last_source == first_source)
+ break;
+ done = parent->mnt_master == p;
+ if (done && peers(n, parent))
+ break;
+ last_source = last_source->mnt_master;
+ } while (!done);
+
type = CL_SLAVE;
/* beginning of peer group among the slaves? */
if (IS_MNT_SHARED(m))
@@ -252,7 +277,7 @@ static int propagate_one(struct mount *m)
read_sequnlock_excl(&mount_lock);
}
hlist_add_head(&child->mnt_hash, list);
- return 0;
+ return count_mounts(m->mnt_ns, child);
}
/*
@@ -281,6 +306,7 @@ int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
*/
user_ns = current->nsproxy->mnt_ns->user_ns;
last_dest = dest_mnt;
+ first_source = source_mnt;
last_source = source_mnt;
mp = dest_mp;
list = tree_list;
@@ -316,6 +342,21 @@ out:
return ret;
}
+static struct mount *find_topper(struct mount *mnt)
+{
+ /* If there is exactly one mount covering mnt completely return it. */
+ struct mount *child;
+
+ if (!list_is_singular(&mnt->mnt_mounts))
+ return NULL;
+
+ child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
+ if (child->mnt_mountpoint != mnt->mnt.mnt_root)
+ return NULL;
+
+ return child;
+}
+
/*
* return true if the refcount is greater than count
*/
@@ -336,9 +377,8 @@ static inline int do_refcount_check(struct mount *mnt, int count)
*/
int propagate_mount_busy(struct mount *mnt, int refcnt)
{
- struct mount *m, *child;
+ struct mount *m, *child, *topper;
struct mount *parent = mnt->mnt_parent;
- int ret = 0;
if (mnt == parent)
return do_refcount_check(mnt, refcnt);
@@ -353,12 +393,24 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
- if (child && list_empty(&child->mnt_mounts) &&
- (ret = do_refcount_check(child, 1)))
- break;
+ int count = 1;
+ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
+ if (!child)
+ continue;
+
+ /* Is there exactly one mount on the child that covers
+ * it completely whose reference should be ignored?
+ */
+ topper = find_topper(child);
+ if (topper)
+ count += 1;
+ else if (!list_empty(&child->mnt_mounts))
+ continue;
+
+ if (do_refcount_check(child, count))
+ return 1;
}
- return ret;
+ return 0;
}
/*
@@ -375,63 +427,113 @@ void propagate_mount_unlock(struct mount *mnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
+ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
if (child)
child->mnt.mnt_flags &= ~MNT_LOCKED;
}
}
-/*
- * Mark all mounts that the MNT_LOCKED logic will allow to be unmounted.
- */
-static void mark_umount_candidates(struct mount *mnt)
+static void umount_one(struct mount *mnt, struct list_head *to_umount)
{
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
-
- BUG_ON(parent == mnt);
-
- for (m = propagation_next(parent, parent); m;
- m = propagation_next(m, parent)) {
- struct mount *child = __lookup_mnt_last(&m->mnt,
- mnt->mnt_mountpoint);
- if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
- SET_MNT_MARK(child);
- }
- }
+ CLEAR_MNT_MARK(mnt);
+ mnt->mnt.mnt_flags |= MNT_UMOUNT;
+ list_del_init(&mnt->mnt_child);
+ list_del_init(&mnt->mnt_umounting);
+ list_move_tail(&mnt->mnt_list, to_umount);
}
/*
* NOTE: unmounting 'mnt' naturally propagates to all other mounts its
* parent propagates to.
*/
-static void __propagate_umount(struct mount *mnt)
+static bool __propagate_umount(struct mount *mnt,
+ struct list_head *to_umount,
+ struct list_head *to_restore)
{
- struct mount *parent = mnt->mnt_parent;
- struct mount *m;
-
- BUG_ON(parent == mnt);
+ bool progress = false;
+ struct mount *child;
- for (m = propagation_next(parent, parent); m;
- m = propagation_next(m, parent)) {
+ /*
+ * The state of the parent won't change if this mount is
+ * already unmounted or marked as without children.
+ */
+ if (mnt->mnt.mnt_flags & (MNT_UMOUNT | MNT_MARKED))
+ goto out;
- struct mount *child = __lookup_mnt_last(&m->mnt,
- mnt->mnt_mountpoint);
- /*
- * umount the child only if the child has no children
- * and the child is marked safe to unmount.
- */
- if (!child || !IS_MNT_MARKED(child))
+ /* Verify topper is the only grandchild that has not been
+ * speculatively unmounted.
+ */
+ list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) {
+ if (child->mnt_mountpoint == mnt->mnt.mnt_root)
continue;
- CLEAR_MNT_MARK(child);
- if (list_empty(&child->mnt_mounts)) {
- list_del_init(&child->mnt_child);
- child->mnt.mnt_flags |= MNT_UMOUNT;
- list_move_tail(&child->mnt_list, &mnt->mnt_list);
+ if (!list_empty(&child->mnt_umounting) && IS_MNT_MARKED(child))
+ continue;
+ /* Found a mounted child */
+ goto children;
+ }
+
+ /* Mark mounts that can be unmounted if not locked */
+ SET_MNT_MARK(mnt);
+ progress = true;
+
+ /* If a mount is without children and not locked umount it. */
+ if (!IS_MNT_LOCKED(mnt)) {
+ umount_one(mnt, to_umount);
+ } else {
+children:
+ list_move_tail(&mnt->mnt_umounting, to_restore);
+ }
+out:
+ return progress;
+}
+
+static void umount_list(struct list_head *to_umount,
+ struct list_head *to_restore)
+{
+ struct mount *mnt, *child, *tmp;
+ list_for_each_entry(mnt, to_umount, mnt_list) {
+ list_for_each_entry_safe(child, tmp, &mnt->mnt_mounts, mnt_child) {
+ /* topper? */
+ if (child->mnt_mountpoint == mnt->mnt.mnt_root)
+ list_move_tail(&child->mnt_umounting, to_restore);
+ else
+ umount_one(child, to_umount);
}
}
}
+static void restore_mounts(struct list_head *to_restore)
+{
+ /* Restore mounts to a clean working state */
+ while (!list_empty(to_restore)) {
+ struct mount *mnt, *parent;
+ struct mountpoint *mp;
+
+ mnt = list_first_entry(to_restore, struct mount, mnt_umounting);
+ CLEAR_MNT_MARK(mnt);
+ list_del_init(&mnt->mnt_umounting);
+
+ /* Should this mount be reparented? */
+ mp = mnt->mnt_mp;
+ parent = mnt->mnt_parent;
+ while (parent->mnt.mnt_flags & MNT_UMOUNT) {
+ mp = parent->mnt_mp;
+ parent = parent->mnt_parent;
+ }
+ if (parent != mnt->mnt_parent)
+ mnt_change_mountpoint(parent, mp, mnt);
+ }
+}
+
+static void cleanup_umount_visitations(struct list_head *visited)
+{
+ while (!list_empty(visited)) {
+ struct mount *mnt =
+ list_first_entry(visited, struct mount, mnt_umounting);
+ list_del_init(&mnt->mnt_umounting);
+ }
+}
+
/*
* collect all mounts that receive propagation from the mount in @list,
* and return these additional mounts in the same list.
@@ -442,11 +544,68 @@ static void __propagate_umount(struct mount *mnt)
int propagate_umount(struct list_head *list)
{
struct mount *mnt;
+ LIST_HEAD(to_restore);
+ LIST_HEAD(to_umount);
+ LIST_HEAD(visited);
+
+ /* Find candidates for unmounting */
+ list_for_each_entry_reverse(mnt, list, mnt_list) {
+ struct mount *parent = mnt->mnt_parent;
+ struct mount *m;
+
+ /*
+ * If this mount has already been visited it is known that it's
+ * entire peer group and all of their slaves in the propagation
+ * tree for the mountpoint has already been visited and there is
+ * no need to visit them again.
+ */
+ if (!list_empty(&mnt->mnt_umounting))
+ continue;
+
+ list_add_tail(&mnt->mnt_umounting, &visited);
+ for (m = propagation_next(parent, parent); m;
+ m = propagation_next(m, parent)) {
+ struct mount *child = __lookup_mnt(&m->mnt,
+ mnt->mnt_mountpoint);
+ if (!child)
+ continue;
+
+ if (!list_empty(&child->mnt_umounting)) {
+ /*
+ * If the child has already been visited it is
+ * know that it's entire peer group and all of
+ * their slaves in the propgation tree for the
+ * mountpoint has already been visited and there
+ * is no need to visit this subtree again.
+ */
+ m = skip_propagation_subtree(m, parent);
+ continue;
+ } else if (child->mnt.mnt_flags & MNT_UMOUNT) {
+ /*
+ * We have come accross an partially unmounted
+ * mount in list that has not been visited yet.
+ * Remember it has been visited and continue
+ * about our merry way.
+ */
+ list_add_tail(&child->mnt_umounting, &visited);
+ continue;
+ }
+
+ /* Check the child and parents while progress is made */
+ while (__propagate_umount(child,
+ &to_umount, &to_restore)) {
+ /* Is the parent a umount candidate? */
+ child = child->mnt_parent;
+ if (list_empty(&child->mnt_umounting))
+ break;
+ }
+ }
+ }
- list_for_each_entry_reverse(mnt, list, mnt_list)
- mark_umount_candidates(mnt);
+ umount_list(&to_umount, &to_restore);
+ restore_mounts(&to_restore);
+ cleanup_umount_visitations(&visited);
+ list_splice_tail(&to_umount, list);
- list_for_each_entry(mnt, list, mnt_list)
- __propagate_umount(mnt);
return 0;
}
diff --git a/fs/pnode.h b/fs/pnode.h
index 0fcdbe7ca648..dc87e65becd2 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -49,7 +49,10 @@ int get_dominating_id(struct mount *mnt, const struct path *root);
unsigned int mnt_get_count(struct mount *mnt);
void mnt_set_mountpoint(struct mount *, struct mountpoint *,
struct mount *);
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
+ struct mount *mnt);
struct mount *copy_tree(struct mount *, struct dentry *, int);
bool is_path_reachable(struct mount *, struct dentry *,
const struct path *root);
+int count_mounts(struct mnt_namespace *ns, struct mount *mnt);
#endif /* _LINUX_PNODE_H */
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 4adde1e2cbec..993bb3b5f4d5 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -592,6 +592,37 @@ no_mem:
}
EXPORT_SYMBOL_GPL(posix_acl_create);
+/**
+ * posix_acl_update_mode - update mode in set_acl
+ *
+ * Update the file mode when setting an ACL: compute the new file permission
+ * bits based on the ACL. In addition, if the ACL is equivalent to the new
+ * file mode, set *acl to NULL to indicate that no ACL should be set.
+ *
+ * As with chmod, clear the setgit bit if the caller is not in the owning group
+ * or capable of CAP_FSETID (see inode_change_ok).
+ *
+ * Called from set_acl inode operations.
+ */
+int posix_acl_update_mode(struct inode *inode, umode_t *mode_p,
+ struct posix_acl **acl)
+{
+ umode_t mode = inode->i_mode;
+ int error;
+
+ error = posix_acl_equiv_mode(*acl, &mode);
+ if (error < 0)
+ return error;
+ if (error == 0)
+ *acl = NULL;
+ if (!in_group_p(inode->i_gid) &&
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+ mode &= ~S_ISGID;
+ *mode_p = mode;
+ return 0;
+}
+EXPORT_SYMBOL(posix_acl_update_mode);
+
/*
* Fix up the uids and gids in posix acl extended attributes in place.
*/
@@ -788,6 +819,28 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
return error;
}
+int
+set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
+{
+ if (!IS_POSIXACL(inode))
+ return -EOPNOTSUPP;
+ if (!inode->i_op->set_acl)
+ return -EOPNOTSUPP;
+
+ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
+ return acl ? -EACCES : 0;
+ if (!inode_owner_or_capable(inode))
+ return -EPERM;
+
+ if (acl) {
+ int ret = posix_acl_valid(acl);
+ if (ret)
+ return ret;
+ }
+ return inode->i_op->set_acl(inode, acl, type);
+}
+EXPORT_SYMBOL(set_posix_acl);
+
static int
posix_acl_xattr_set(const struct xattr_handler *handler,
struct dentry *dentry, const char *name,
@@ -799,30 +852,13 @@ posix_acl_xattr_set(const struct xattr_handler *handler,
if (strcmp(name, "") != 0)
return -EINVAL;
- if (!IS_POSIXACL(inode))
- return -EOPNOTSUPP;
- if (!inode->i_op->set_acl)
- return -EOPNOTSUPP;
-
- if (handler->flags == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
- return value ? -EACCES : 0;
- if (!inode_owner_or_capable(inode))
- return -EPERM;
if (value) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
if (IS_ERR(acl))
return PTR_ERR(acl);
-
- if (acl) {
- ret = posix_acl_valid(acl);
- if (ret)
- goto out;
- }
}
-
- ret = inode->i_op->set_acl(inode, acl, handler->flags);
-out:
+ ret = set_posix_acl(inode, handler->flags, acl);
posix_acl_release(acl);
return ret;
}
@@ -867,11 +903,10 @@ int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type)
int error;
if (type == ACL_TYPE_ACCESS) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
- return 0;
- if (error == 0)
- acl = NULL;
+ error = posix_acl_update_mode(inode,
+ &inode->i_mode, &acl);
+ if (error)
+ return error;
}
inode->i_ctime = CURRENT_TIME;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index b7de324bec11..dd732400578e 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -954,7 +954,8 @@ static ssize_t environ_read(struct file *file, char __user *buf,
int ret = 0;
struct mm_struct *mm = file->private_data;
- if (!mm)
+ /* Ensure the process spawned far enough to have an environment. */
+ if (!mm || !mm->env_end)
return 0;
page = (char *)__get_free_page(GFP_TEMPORARY);
@@ -1544,18 +1545,13 @@ static const struct file_operations proc_pid_set_comm_operations = {
static int proc_exe_link(struct dentry *dentry, struct path *exe_path)
{
struct task_struct *task;
- struct mm_struct *mm;
struct file *exe_file;
task = get_proc_task(d_inode(dentry));
if (!task)
return -ENOENT;
- mm = get_task_mm(task);
+ exe_file = get_task_exe_file(task);
put_task_struct(task);
- if (!mm)
- return -ENOENT;
- exe_file = get_mm_exe_file(mm);
- mmput(mm);
if (exe_file) {
*exe_path = exe_file->f_path;
path_get(&exe_file->f_path);
@@ -3062,6 +3058,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
iter.tgid += 1, iter = next_tgid(ns, iter)) {
char name[PROC_NUMBUF];
int len;
+
+ cond_resched();
if (!has_pid_permissions(ns, iter.task, 2))
continue;
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index ff3ffc76a937..3773335791da 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -469,6 +469,7 @@ struct proc_dir_entry *proc_create_mount_point(const char *name)
ent->data = NULL;
ent->proc_fops = NULL;
ent->proc_iops = NULL;
+ parent->nlink++;
if (proc_register(parent, ent) < 0) {
kfree(ent);
parent->nlink--;
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index fe5b6e6c4671..4dbe1e2daeca 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -703,7 +703,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
ctl_dir = container_of(head, struct ctl_dir, header);
if (!dir_emit_dots(file, ctx))
- return 0;
+ goto out;
pos = 2;
@@ -713,6 +713,7 @@ static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
break;
}
}
+out:
sysctl_head_finish(head);
return 0;
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 361ab4ee42fc..ec649c92d270 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -121,6 +121,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
if (IS_ERR(sb))
return ERR_CAST(sb);
+ /*
+ * procfs isn't actually a stacking filesystem; however, there is
+ * too much magic going on inside it to permit stacking things on
+ * top of it
+ */
+ sb->s_stack_depth = FILESYSTEM_MAX_STACK_DEPTH;
+
if (!proc_parse_options(options, ns)) {
deactivate_locked_super(sb);
return ERR_PTR(-EINVAL);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 09cd3edde08a..07ef85e19fbc 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -248,23 +248,29 @@ static int do_maps_open(struct inode *inode, struct file *file,
sizeof(struct proc_maps_private));
}
-static pid_t pid_of_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, bool is_pid)
+/*
+ * Indicate if the VMA is a stack for the given task; for
+ * /proc/PID/maps that is the stack of the main task.
+ */
+static int is_stack(struct proc_maps_private *priv,
+ struct vm_area_struct *vma, int is_pid)
{
- struct inode *inode = priv->inode;
- struct task_struct *task;
- pid_t ret = 0;
+ int stack = 0;
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task) {
- task = task_of_stack(task, vma, is_pid);
+ if (is_pid) {
+ stack = vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack;
+ } else {
+ struct inode *inode = priv->inode;
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task)
- ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+ stack = vma_is_stack_for_task(vma, task);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
- return ret;
+ return stack;
}
static void
@@ -289,11 +295,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
/* We don't show the stack guard page in /proc/maps */
start = vma->vm_start;
- if (stack_guard_page_start(vma, start))
- start += PAGE_SIZE;
end = vma->vm_end;
- if (stack_guard_page_end(vma, end))
- end -= PAGE_SIZE;
seq_setwidth(m, 25 + sizeof(void *) * 6 - 1);
seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu ",
@@ -324,8 +326,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
name = arch_vma_name(vma);
if (!name) {
- pid_t tid;
-
if (!mm) {
name = "[vdso]";
goto done;
@@ -337,21 +337,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
goto done;
}
- tid = pid_of_stack(priv, vma, is_pid);
- if (tid != 0) {
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack)) {
- name = "[stack]";
- } else {
- /* Thread stack in /proc/PID/maps */
- seq_pad(m, ' ');
- seq_printf(m, "[stack:%d]", tid);
- }
- }
+ if (is_stack(priv, vma, is_pid))
+ name = "[stack]";
}
done:
@@ -812,7 +799,14 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- pmd_t pmd = pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp);
+ pmd_t pmd = *pmdp;
+
+ /* See comment in change_huge_pmd() */
+ pmdp_invalidate(vma, addr, pmdp);
+ if (pmd_dirty(*pmdp))
+ pmd = pmd_mkdirty(pmd);
+ if (pmd_young(*pmdp))
+ pmd = pmd_mkyoung(pmd);
pmd = pmd_wrprotect(pmd);
pmd = pmd_clear_soft_dirty(pmd);
@@ -1435,6 +1429,32 @@ static struct page *can_gather_numa_stats(pte_t pte, struct vm_area_struct *vma,
return page;
}
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static struct page *can_gather_numa_stats_pmd(pmd_t pmd,
+ struct vm_area_struct *vma,
+ unsigned long addr)
+{
+ struct page *page;
+ int nid;
+
+ if (!pmd_present(pmd))
+ return NULL;
+
+ page = vm_normal_page_pmd(vma, addr, pmd);
+ if (!page)
+ return NULL;
+
+ if (PageReserved(page))
+ return NULL;
+
+ nid = page_to_nid(page);
+ if (!node_isset(nid, node_states[N_MEMORY]))
+ return NULL;
+
+ return page;
+}
+#endif
+
static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
@@ -1444,13 +1464,13 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
pte_t *orig_pte;
pte_t *pte;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (pmd_trans_huge_lock(pmd, vma, &ptl) == 1) {
- pte_t huge_pte = *(pte_t *)pmd;
struct page *page;
- page = can_gather_numa_stats(huge_pte, vma, addr);
+ page = can_gather_numa_stats_pmd(*pmd, vma, addr);
if (page)
- gather_stats(page, md, pte_dirty(huge_pte),
+ gather_stats(page, md, pmd_dirty(*pmd),
HPAGE_PMD_SIZE/PAGE_SIZE);
spin_unlock(ptl);
return 0;
@@ -1458,6 +1478,7 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr,
if (pmd_trans_unstable(pmd))
return 0;
+#endif
orig_pte = pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
do {
struct page *page = can_gather_numa_stats(*pte, vma, addr);
@@ -1539,19 +1560,8 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
seq_file_path(m, file, "\n\t= ");
} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
seq_puts(m, " heap");
- } else {
- pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
- if (tid != 0) {
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack))
- seq_puts(m, " stack");
- else
- seq_printf(m, " stack:%d", tid);
- }
+ } else if (is_stack(proc_priv, vma, is_pid)) {
+ seq_puts(m, " stack");
}
if (is_vm_hugetlb_page(vma))
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index e0d64c92e4f6..faacb0c0d857 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,23 +123,26 @@ unsigned long task_statm(struct mm_struct *mm,
return size;
}
-static pid_t pid_of_stack(struct proc_maps_private *priv,
- struct vm_area_struct *vma, bool is_pid)
+static int is_stack(struct proc_maps_private *priv,
+ struct vm_area_struct *vma, int is_pid)
{
- struct inode *inode = priv->inode;
- struct task_struct *task;
- pid_t ret = 0;
-
- rcu_read_lock();
- task = pid_task(proc_pid(inode), PIDTYPE_PID);
- if (task) {
- task = task_of_stack(task, vma, is_pid);
+ struct mm_struct *mm = vma->vm_mm;
+ int stack = 0;
+
+ if (is_pid) {
+ stack = vma->vm_start <= mm->start_stack &&
+ vma->vm_end >= mm->start_stack;
+ } else {
+ struct inode *inode = priv->inode;
+ struct task_struct *task;
+
+ rcu_read_lock();
+ task = pid_task(proc_pid(inode), PIDTYPE_PID);
if (task)
- ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+ stack = vma_is_stack_for_task(vma, task);
+ rcu_read_unlock();
}
- rcu_read_unlock();
-
- return ret;
+ return stack;
}
/*
@@ -181,21 +184,9 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
if (file) {
seq_pad(m, ' ');
seq_file_path(m, file, "");
- } else if (mm) {
- pid_t tid = pid_of_stack(priv, vma, is_pid);
-
- if (tid != 0) {
- seq_pad(m, ' ');
- /*
- * Thread stack in /proc/PID/task/TID/maps or
- * the main process stack.
- */
- if (!is_pid || (vma->vm_start <= mm->start_stack &&
- vma->vm_end >= mm->start_stack))
- seq_printf(m, "[stack]");
- else
- seq_printf(m, "[stack:%d]", tid);
- }
+ } else if (mm && is_stack(priv, vma, is_pid)) {
+ seq_pad(m, ' ');
+ seq_printf(m, "[stack]");
}
seq_putc(m, '\n');
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 8ebd9a334085..87645955990d 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -197,6 +197,8 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
if (sb->s_op->show_devname) {
seq_puts(m, "device ");
err = sb->s_op->show_devname(m, mnt_path.dentry);
+ if (err)
+ goto out;
} else {
if (r->mnt_devname) {
seq_puts(m, "device ");
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index d8c439d813ce..ac6c78fe19cf 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -178,7 +178,6 @@ static loff_t pstore_file_llseek(struct file *file, loff_t off, int whence)
}
static const struct file_operations pstore_file_operations = {
- .owner = THIS_MODULE,
.open = pstore_file_open,
.read = pstore_file_read,
.llseek = pstore_file_llseek,
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 319c3a60cfa5..59d93acc29c7 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -375,13 +375,14 @@ static void ramoops_free_przs(struct ramoops_context *cxt)
{
int i;
- cxt->max_dump_cnt = 0;
if (!cxt->przs)
return;
- for (i = 0; !IS_ERR_OR_NULL(cxt->przs[i]); i++)
+ for (i = 0; i < cxt->max_dump_cnt; i++)
persistent_ram_free(cxt->przs[i]);
+
kfree(cxt->przs);
+ cxt->max_dump_cnt = 0;
}
static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
@@ -406,17 +407,22 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
GFP_KERNEL);
if (!cxt->przs) {
dev_err(dev, "failed to initialize a prz array for dumps\n");
- goto fail_prz;
+ goto fail_mem;
}
for (i = 0; i < cxt->max_dump_cnt; i++) {
cxt->przs[i] = persistent_ram_new(*paddr, cxt->record_size, 0,
&cxt->ecc_info,
- cxt->memtype);
+ cxt->memtype, 0);
if (IS_ERR(cxt->przs[i])) {
err = PTR_ERR(cxt->przs[i]);
dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
cxt->record_size, (unsigned long long)*paddr, err);
+
+ while (i > 0) {
+ i--;
+ persistent_ram_free(cxt->przs[i]);
+ }
goto fail_prz;
}
*paddr += cxt->record_size;
@@ -424,7 +430,9 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
return 0;
fail_prz:
- ramoops_free_przs(cxt);
+ kfree(cxt->przs);
+fail_mem:
+ cxt->max_dump_cnt = 0;
return err;
}
@@ -442,7 +450,8 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
return -ENOMEM;
}
- *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype);
+ *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info,
+ cxt->memtype, 0);
if (IS_ERR(*prz)) {
int err = PTR_ERR(*prz);
@@ -583,7 +592,6 @@ static int ramoops_remove(struct platform_device *pdev)
struct ramoops_context *cxt = &oops_cxt;
pstore_unregister(&cxt->pstore);
- cxt->max_dump_cnt = 0;
kfree(cxt->pstore.buf);
cxt->pstore.bufsize = 0;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 76c3f80efdfa..27300533c2dd 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -48,48 +48,14 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
}
/* increase and wrap the start pointer, returning the old value */
-static size_t buffer_start_add_atomic(struct persistent_ram_zone *prz, size_t a)
+static size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
{
int old;
int new;
+ unsigned long flags = 0;
- do {
- old = atomic_read(&prz->buffer->start);
- new = old + a;
- while (unlikely(new >= prz->buffer_size))
- new -= prz->buffer_size;
- } while (atomic_cmpxchg(&prz->buffer->start, old, new) != old);
-
- return old;
-}
-
-/* increase the size counter until it hits the max size */
-static void buffer_size_add_atomic(struct persistent_ram_zone *prz, size_t a)
-{
- size_t old;
- size_t new;
-
- if (atomic_read(&prz->buffer->size) == prz->buffer_size)
- return;
-
- do {
- old = atomic_read(&prz->buffer->size);
- new = old + a;
- if (new > prz->buffer_size)
- new = prz->buffer_size;
- } while (atomic_cmpxchg(&prz->buffer->size, old, new) != old);
-}
-
-static DEFINE_RAW_SPINLOCK(buffer_lock);
-
-/* increase and wrap the start pointer, returning the old value */
-static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a)
-{
- int old;
- int new;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&buffer_lock, flags);
+ if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+ raw_spin_lock_irqsave(&prz->buffer_lock, flags);
old = atomic_read(&prz->buffer->start);
new = old + a;
@@ -97,19 +63,21 @@ static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a)
new -= prz->buffer_size;
atomic_set(&prz->buffer->start, new);
- raw_spin_unlock_irqrestore(&buffer_lock, flags);
+ if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+ raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
return old;
}
/* increase the size counter until it hits the max size */
-static void buffer_size_add_locked(struct persistent_ram_zone *prz, size_t a)
+static void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
{
size_t old;
size_t new;
- unsigned long flags;
+ unsigned long flags = 0;
- raw_spin_lock_irqsave(&buffer_lock, flags);
+ if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+ raw_spin_lock_irqsave(&prz->buffer_lock, flags);
old = atomic_read(&prz->buffer->size);
if (old == prz->buffer_size)
@@ -121,12 +89,10 @@ static void buffer_size_add_locked(struct persistent_ram_zone *prz, size_t a)
atomic_set(&prz->buffer->size, new);
exit:
- raw_spin_unlock_irqrestore(&buffer_lock, flags);
+ if (!(prz->flags & PRZ_FLAG_NO_LOCK))
+ raw_spin_unlock_irqrestore(&prz->buffer_lock, flags);
}
-static size_t (*buffer_start_add)(struct persistent_ram_zone *, size_t) = buffer_start_add_atomic;
-static void (*buffer_size_add)(struct persistent_ram_zone *, size_t) = buffer_size_add_atomic;
-
static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
uint8_t *data, size_t len, uint8_t *ecc)
{
@@ -299,7 +265,7 @@ static void notrace persistent_ram_update(struct persistent_ram_zone *prz,
const void *s, unsigned int start, unsigned int count)
{
struct persistent_ram_buffer *buffer = prz->buffer;
- memcpy(buffer->data + start, s, count);
+ memcpy_toio(buffer->data + start, s, count);
persistent_ram_update_ecc(prz, start, count);
}
@@ -322,8 +288,8 @@ void persistent_ram_save_old(struct persistent_ram_zone *prz)
}
prz->old_log_size = size;
- memcpy(prz->old_log, &buffer->data[start], size - start);
- memcpy(prz->old_log + size - start, &buffer->data[0], start);
+ memcpy_fromio(prz->old_log, &buffer->data[start], size - start);
+ memcpy_fromio(prz->old_log + size - start, &buffer->data[0], start);
}
int notrace persistent_ram_write(struct persistent_ram_zone *prz,
@@ -426,9 +392,6 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size,
return NULL;
}
- buffer_start_add = buffer_start_add_locked;
- buffer_size_add = buffer_size_add_locked;
-
if (memtype)
va = ioremap(start, size);
else
@@ -487,6 +450,7 @@ static int persistent_ram_post_init(struct persistent_ram_zone *prz, u32 sig,
prz->buffer->sig);
}
+ /* Rewind missing or invalid memory area. */
prz->buffer->sig = sig;
persistent_ram_zap(prz);
@@ -513,7 +477,7 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
u32 sig, struct persistent_ram_ecc_info *ecc_info,
- unsigned int memtype)
+ unsigned int memtype, u32 flags)
{
struct persistent_ram_zone *prz;
int ret = -ENOMEM;
@@ -524,6 +488,10 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
goto err;
}
+ /* Initialize general buffer state. */
+ raw_spin_lock_init(&prz->buffer_lock);
+ prz->flags = flags;
+
ret = persistent_ram_buffer_map(start, size, prz, memtype);
if (ret)
goto err;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ef0d64b2a6d9..353ff31dcee1 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1398,7 +1398,7 @@ static int dquot_active(const struct inode *inode)
static int __dquot_initialize(struct inode *inode, int type)
{
int cnt, init_needed = 0;
- struct dquot **dquots, *got[MAXQUOTAS];
+ struct dquot **dquots, *got[MAXQUOTAS] = {};
struct super_block *sb = inode->i_sb;
qsize_t rsv;
int ret = 0;
@@ -1415,7 +1415,6 @@ static int __dquot_initialize(struct inode *inode, int type)
int rc;
struct dquot *dquot;
- got[cnt] = NULL;
if (type != -1 && cnt != type)
continue;
/*
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 96a1bcf33db4..38187300a2b4 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -189,7 +189,7 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
int ret = 0;
th.t_trans_id = 0;
- blocksize = 1 << inode->i_blkbits;
+ blocksize = i_blocksize(inode);
if (logit) {
reiserfs_write_lock(s);
@@ -260,10 +260,10 @@ const struct file_operations reiserfs_file_operations = {
const struct inode_operations reiserfs_file_inode_operations = {
.setattr = reiserfs_setattr,
- .setxattr = reiserfs_setxattr,
- .getxattr = reiserfs_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = reiserfs_listxattr,
- .removexattr = reiserfs_removexattr,
+ .removexattr = generic_removexattr,
.permission = reiserfs_permission,
.get_acl = reiserfs_get_acl,
.set_acl = reiserfs_set_acl,
diff --git a/fs/reiserfs/ibalance.c b/fs/reiserfs/ibalance.c
index b751eea32e20..5db6f45b3fed 100644
--- a/fs/reiserfs/ibalance.c
+++ b/fs/reiserfs/ibalance.c
@@ -1153,8 +1153,9 @@ int balance_internal(struct tree_balance *tb,
insert_ptr);
}
- memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
insert_ptr[0] = new_insert_ptr;
+ if (new_insert_ptr)
+ memcpy(new_insert_key_addr, &new_insert_key, KEY_SIZE);
return order;
}
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3d8e7e671d5b..60ba35087d12 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -524,7 +524,7 @@ static int reiserfs_get_blocks_direct_io(struct inode *inode,
* referenced in convert_tail_for_hole() that may be called from
* reiserfs_get_block()
*/
- bh_result->b_size = (1 << inode->i_blkbits);
+ bh_result->b_size = i_blocksize(inode);
ret = reiserfs_get_block(inode, iblock, bh_result,
create | GET_BLOCK_NO_DANGLE);
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 47f96988fdd4..3ebc70167e41 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1649,10 +1649,10 @@ const struct inode_operations reiserfs_dir_inode_operations = {
.mknod = reiserfs_mknod,
.rename = reiserfs_rename,
.setattr = reiserfs_setattr,
- .setxattr = reiserfs_setxattr,
- .getxattr = reiserfs_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = reiserfs_listxattr,
- .removexattr = reiserfs_removexattr,
+ .removexattr = generic_removexattr,
.permission = reiserfs_permission,
.get_acl = reiserfs_get_acl,
.set_acl = reiserfs_set_acl,
@@ -1667,10 +1667,10 @@ const struct inode_operations reiserfs_symlink_inode_operations = {
.follow_link = page_follow_link_light,
.put_link = page_put_link,
.setattr = reiserfs_setattr,
- .setxattr = reiserfs_setxattr,
- .getxattr = reiserfs_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = reiserfs_listxattr,
- .removexattr = reiserfs_removexattr,
+ .removexattr = generic_removexattr,
.permission = reiserfs_permission,
};
@@ -1679,10 +1679,10 @@ const struct inode_operations reiserfs_symlink_inode_operations = {
*/
const struct inode_operations reiserfs_special_inode_operations = {
.setattr = reiserfs_setattr,
- .setxattr = reiserfs_setxattr,
- .getxattr = reiserfs_getxattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
.listxattr = reiserfs_listxattr,
- .removexattr = reiserfs_removexattr,
+ .removexattr = generic_removexattr,
.permission = reiserfs_permission,
.get_acl = reiserfs_get_acl,
.set_acl = reiserfs_set_acl,
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 4a62fe8cc3bf..f9f3be50081a 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -190,7 +190,15 @@ static int remove_save_link_only(struct super_block *s,
static int reiserfs_quota_on_mount(struct super_block *, int);
#endif
-/* look for uncompleted unlinks and truncates and complete them */
+/*
+ * Look for uncompleted unlinks and truncates and complete them
+ *
+ * Called with superblock write locked. If quotas are enabled, we have to
+ * release/retake lest we call dquot_quota_on_mount(), proceed to
+ * schedule_on_each_cpu() in invalidate_bdev() and deadlock waiting for the per
+ * cpu worklets to complete flush_async_commits() that in turn wait for the
+ * superblock write lock.
+ */
static int finish_unfinished(struct super_block *s)
{
INITIALIZE_PATH(path);
@@ -237,7 +245,9 @@ static int finish_unfinished(struct super_block *s)
quota_enabled[i] = 0;
continue;
}
+ reiserfs_write_unlock(s);
ret = reiserfs_quota_on_mount(s, i);
+ reiserfs_write_lock(s);
if (ret < 0)
reiserfs_warning(s, "reiserfs-2500",
"cannot turn on journaled "
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 66b26fdfff8d..a8dbc93e45eb 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -763,60 +763,6 @@ find_xattr_handler_prefix(const struct xattr_handler **handlers,
return xah;
}
-
-/*
- * Inode operation getxattr()
- */
-ssize_t
-reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
- size_t size)
-{
- const struct xattr_handler *handler;
-
- handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
-
- if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
- return -EOPNOTSUPP;
-
- return handler->get(handler, dentry, name, buffer, size);
-}
-
-/*
- * Inode operation setxattr()
- *
- * d_inode(dentry)->i_mutex down
- */
-int
-reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
- size_t size, int flags)
-{
- const struct xattr_handler *handler;
-
- handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
-
- if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
- return -EOPNOTSUPP;
-
- return handler->set(handler, dentry, name, value, size, flags);
-}
-
-/*
- * Inode operation removexattr()
- *
- * d_inode(dentry)->i_mutex down
- */
-int reiserfs_removexattr(struct dentry *dentry, const char *name)
-{
- const struct xattr_handler *handler;
-
- handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
-
- if (!handler || get_inode_sd_version(d_inode(dentry)) == STAT_DATA_V1)
- return -EOPNOTSUPP;
-
- return handler->set(handler, dentry, name, NULL, 0, XATTR_REPLACE);
-}
-
struct listxattr_buf {
struct dir_context ctx;
size_t size;
diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h
index 15dde6262c00..613ff5aef94e 100644
--- a/fs/reiserfs/xattr.h
+++ b/fs/reiserfs/xattr.h
@@ -2,6 +2,7 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/rwsem.h>
+#include <linux/xattr.h>
struct inode;
struct dentry;
@@ -18,12 +19,7 @@ int reiserfs_permission(struct inode *inode, int mask);
#ifdef CONFIG_REISERFS_FS_XATTR
#define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
-ssize_t reiserfs_getxattr(struct dentry *dentry, const char *name,
- void *buffer, size_t size);
-int reiserfs_setxattr(struct dentry *dentry, const char *name,
- const void *value, size_t size, int flags);
ssize_t reiserfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
-int reiserfs_removexattr(struct dentry *dentry, const char *name);
int reiserfs_xattr_get(struct inode *, const char *, void *, size_t);
int reiserfs_xattr_set(struct inode *, const char *, const void *, size_t, int);
@@ -92,10 +88,7 @@ static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
#else
-#define reiserfs_getxattr NULL
-#define reiserfs_setxattr NULL
#define reiserfs_listxattr NULL
-#define reiserfs_removexattr NULL
static inline void reiserfs_init_xattr_rwsem(struct inode *inode)
{
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 4b34b9dc03dd..9b1824f35501 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -246,13 +246,9 @@ __reiserfs_set_acl(struct reiserfs_transaction_handle *th, struct inode *inode,
case ACL_TYPE_ACCESS:
name = POSIX_ACL_XATTR_ACCESS;
if (acl) {
- error = posix_acl_equiv_mode(acl, &inode->i_mode);
- if (error < 0)
+ error = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+ if (error)
return error;
- else {
- if (error == 0)
- acl = NULL;
- }
}
break;
case ACL_TYPE_DEFAULT:
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index ac659af431ae..60de069225ba 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -12,26 +12,24 @@ static int
security_get(const struct xattr_handler *handler, struct dentry *dentry,
const char *name, void *buffer, size_t size)
{
- if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
- return -EINVAL;
-
if (IS_PRIVATE(d_inode(dentry)))
return -EPERM;
- return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
+ return reiserfs_xattr_get(d_inode(dentry),
+ xattr_full_name(handler, name),
+ buffer, size);
}
static int
security_set(const struct xattr_handler *handler, struct dentry *dentry,
const char *name, const void *buffer, size_t size, int flags)
{
- if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
- return -EINVAL;
-
if (IS_PRIVATE(d_inode(dentry)))
return -EPERM;
- return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
+ return reiserfs_xattr_set(d_inode(dentry),
+ xattr_full_name(handler, name),
+ buffer, size, flags);
}
static size_t security_list(const struct xattr_handler *handler,
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a338adf1b8b4..ebba1ebf28ad 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -11,26 +11,24 @@ static int
trusted_get(const struct xattr_handler *handler, struct dentry *dentry,
const char *name, void *buffer, size_t size)
{
- if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
- return -EINVAL;
-
if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
return -EPERM;
- return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
+ return reiserfs_xattr_get(d_inode(dentry),
+ xattr_full_name(handler, name),
+ buffer, size);
}
static int
trusted_set(const struct xattr_handler *handler, struct dentry *dentry,
const char *name, const void *buffer, size_t size, int flags)
{
- if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
- return -EINVAL;
-
if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(d_inode(dentry)))
return -EPERM;
- return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
+ return reiserfs_xattr_set(d_inode(dentry),
+ xattr_full_name(handler, name),
+ buffer, size, flags);
}
static size_t trusted_list(const struct xattr_handler *handler,
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index 39c9667191c5..6ac8a8c8bd9c 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -10,24 +10,22 @@ static int
user_get(const struct xattr_handler *handler, struct dentry *dentry,
const char *name, void *buffer, size_t size)
{
-
- if (strlen(name) < sizeof(XATTR_USER_PREFIX))
- return -EINVAL;
if (!reiserfs_xattrs_user(dentry->d_sb))
return -EOPNOTSUPP;
- return reiserfs_xattr_get(d_inode(dentry), name, buffer, size);
+ return reiserfs_xattr_get(d_inode(dentry),
+ xattr_full_name(handler, name),
+ buffer, size);
}
static int
user_set(const struct xattr_handler *handler, struct dentry *dentry,
const char *name, const void *buffer, size_t size, int flags)
{
- if (strlen(name) < sizeof(XATTR_USER_PREFIX))
- return -EINVAL;
-
if (!reiserfs_xattrs_user(dentry->d_sb))
return -EOPNOTSUPP;
- return reiserfs_xattr_set(d_inode(dentry), name, buffer, size, flags);
+ return reiserfs_xattr_set(d_inode(dentry),
+ xattr_full_name(handler, name),
+ buffer, size, flags);
}
static size_t user_list(const struct xattr_handler *handler,
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268733cda397..5f4f1882dc7d 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -74,6 +74,7 @@
#include <linux/highmem.h>
#include <linux/pagemap.h>
#include <linux/uaccess.h>
+#include <linux/major.h>
#include "internal.h"
static struct kmem_cache *romfs_inode_cachep;
@@ -415,7 +416,22 @@ static void romfs_destroy_inode(struct inode *inode)
static int romfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
- u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
+ u64 id = 0;
+
+ /* When calling huge_encode_dev(),
+ * use sb->s_bdev->bd_dev when,
+ * - CONFIG_ROMFS_ON_BLOCK defined
+ * use sb->s_dev when,
+ * - CONFIG_ROMFS_ON_BLOCK undefined and
+ * - CONFIG_ROMFS_ON_MTD defined
+ * leave id as 0 when,
+ * - CONFIG_ROMFS_ON_BLOCK undefined and
+ * - CONFIG_ROMFS_ON_MTD undefined
+ */
+ if (sb->s_bdev)
+ id = huge_encode_dev(sb->s_bdev->bd_dev);
+ else if (sb->s_dev)
+ id = huge_encode_dev(sb->s_dev);
buf->f_type = ROMFS_MAGIC;
buf->f_namelen = ROMFS_MAXFN;
@@ -488,6 +504,11 @@ static int romfs_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_RDONLY | MS_NOATIME;
sb->s_op = &romfs_super_ops;
+#ifdef CONFIG_ROMFS_ON_MTD
+ /* Use same dev ID from the underlying mtdblock device */
+ if (sb->s_mtd)
+ sb->s_dev = MKDEV(MTD_BLOCK_MAJOR, sb->s_mtd->index);
+#endif
/* read the image superblock and check it */
rsb = kmalloc(512, GFP_KERNEL);
if (!rsb)
diff --git a/fs/seq_file.c b/fs/seq_file.c
index e85664b7c7d9..6dc4296eed62 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -72,9 +72,10 @@ int seq_open(struct file *file, const struct seq_operations *op)
mutex_init(&p->lock);
p->op = op;
-#ifdef CONFIG_USER_NS
- p->user_ns = file->f_cred->user_ns;
-#endif
+
+ // No refcounting: the lifetime of 'p' is constrained
+ // to the lifetime of the file.
+ p->file = file;
/*
* Wrappers around seq_open(e.g. swaps_open) need to be
@@ -222,8 +223,10 @@ ssize_t seq_read(struct file *file, char __user *buf, size_t size, loff_t *ppos)
size -= n;
buf += n;
copied += n;
- if (!m->count)
+ if (!m->count) {
+ m->from = 0;
m->index++;
+ }
if (!size)
goto Done;
}
diff --git a/fs/splice.c b/fs/splice.c
index 4cf700d50b40..8398974e1538 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -185,6 +185,9 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
unsigned int spd_pages = spd->nr_pages;
int ret, do_wakeup, page_nr;
+ if (!spd_pages)
+ return 0;
+
ret = 0;
do_wakeup = 0;
page_nr = 0;
@@ -208,6 +211,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,
buf->len = spd->partial[page_nr].len;
buf->private = spd->partial[page_nr].private;
buf->ops = spd->ops;
+ buf->flags = 0;
if (spd->flags & SPLICE_F_GIFT)
buf->flags |= PIPE_BUF_FLAG_GIFT;
diff --git a/fs/stat.c b/fs/stat.c
index d4a61d8dc021..004dd77c3b93 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -31,7 +31,7 @@ void generic_fillattr(struct inode *inode, struct kstat *stat)
stat->atime = inode->i_atime;
stat->mtime = inode->i_mtime;
stat->ctime = inode->i_ctime;
- stat->blksize = (1 << inode->i_blkbits);
+ stat->blksize = i_blocksize(inode);
stat->blocks = inode->i_blocks;
}
@@ -454,6 +454,7 @@ void __inode_add_bytes(struct inode *inode, loff_t bytes)
inode->i_bytes -= 512;
}
}
+EXPORT_SYMBOL(__inode_add_bytes);
void inode_add_bytes(struct inode *inode, loff_t bytes)
{
diff --git a/fs/super.c b/fs/super.c
index f5f4b328f860..d4d2591b77c8 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1326,8 +1326,8 @@ int freeze_super(struct super_block *sb)
}
}
/*
- * This is just for debugging purposes so that fs can warn if it
- * sees write activity when frozen is set to SB_FREEZE_COMPLETE.
+ * For debugging purposes so that fs can warn if it sees write activity
+ * when frozen is set to SB_FREEZE_COMPLETE, and for thaw_super().
*/
sb->s_writers.frozen = SB_FREEZE_COMPLETE;
up_write(&sb->s_umount);
@@ -1346,7 +1346,7 @@ int thaw_super(struct super_block *sb)
int error;
down_write(&sb->s_umount);
- if (sb->s_writers.frozen == SB_UNFROZEN) {
+ if (sb->s_writers.frozen != SB_FREEZE_COMPLETE) {
up_write(&sb->s_umount);
return -EINVAL;
}
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index f35523d4fa3a..39c75a86c67f 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -108,16 +108,24 @@ static ssize_t sysfs_kf_read(struct kernfs_open_file *of, char *buf,
{
const struct sysfs_ops *ops = sysfs_file_ops(of->kn);
struct kobject *kobj = of->kn->parent->priv;
- size_t len;
+ ssize_t len;
/*
* If buf != of->prealloc_buf, we don't know how
* large it is, so cannot safely pass it to ->show
*/
- if (pos || WARN_ON_ONCE(buf != of->prealloc_buf))
+ if (WARN_ON_ONCE(buf != of->prealloc_buf))
return 0;
len = ops->show(kobj, of->kn->priv, buf);
- return min(count, len);
+ if (len < 0)
+ return len;
+ if (pos) {
+ if (len <= pos)
+ return 0;
+ len -= pos;
+ memmove(buf, buf + pos, len);
+ }
+ return min_t(ssize_t, count, len);
}
/* kernfs write callback for regular sysfs files */
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 053818dd6c18..1327a02ec778 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -40,6 +40,7 @@ struct timerfd_ctx {
short unsigned settime_flags; /* to show in fdinfo */
struct rcu_head rcu;
struct list_head clist;
+ spinlock_t cancel_lock;
bool might_cancel;
};
@@ -112,7 +113,7 @@ void timerfd_clock_was_set(void)
rcu_read_unlock();
}
-static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
+static void __timerfd_remove_cancel(struct timerfd_ctx *ctx)
{
if (ctx->might_cancel) {
ctx->might_cancel = false;
@@ -122,6 +123,13 @@ static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
}
}
+static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
+{
+ spin_lock(&ctx->cancel_lock);
+ __timerfd_remove_cancel(ctx);
+ spin_unlock(&ctx->cancel_lock);
+}
+
static bool timerfd_canceled(struct timerfd_ctx *ctx)
{
if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
@@ -132,6 +140,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx)
static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
{
+ spin_lock(&ctx->cancel_lock);
if ((ctx->clockid == CLOCK_REALTIME ||
ctx->clockid == CLOCK_REALTIME_ALARM) &&
(flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) {
@@ -141,9 +150,10 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
list_add_rcu(&ctx->clist, &cancel_list);
spin_unlock(&cancel_lock);
}
- } else if (ctx->might_cancel) {
- timerfd_remove_cancel(ctx);
+ } else {
+ __timerfd_remove_cancel(ctx);
}
+ spin_unlock(&ctx->cancel_lock);
}
static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
@@ -395,6 +405,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
return -ENOMEM;
init_waitqueue_head(&ctx->wqh);
+ spin_lock_init(&ctx->cancel_lock);
ctx->clockid = clockid;
if (isalarm(ctx))
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index e49bd2808bf3..f5d5ee43ae6e 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -350,7 +350,7 @@ static unsigned int vfs_dent_type(uint8_t type)
*/
static int ubifs_readdir(struct file *file, struct dir_context *ctx)
{
- int err;
+ int err = 0;
struct qstr nm;
union ubifs_key key;
struct ubifs_dent_node *dent;
@@ -452,14 +452,20 @@ out:
kfree(file->private_data);
file->private_data = NULL;
- if (err != -ENOENT) {
+ if (err != -ENOENT)
ubifs_err(c, "cannot find next direntry, error %d", err);
- return err;
- }
+ else
+ /*
+ * -ENOENT is a non-fatal error in this context, the TNC uses
+ * it to indicate that the cursor moved past the current directory
+ * and readdir() has to stop.
+ */
+ err = 0;
+
/* 2 is a special value indicating that there are no more direntries */
ctx->pos = 2;
- return 0;
+ return err;
}
/* Free saved readdir() state when the directory is closed */
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 0edc12856147..b895af7d8d80 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -52,6 +52,7 @@
#include "ubifs.h"
#include <linux/mount.h>
#include <linux/slab.h>
+#include <linux/migrate.h>
static int read_block(struct inode *inode, void *addr, unsigned int block,
struct ubifs_data_node *dn)
@@ -1452,6 +1453,26 @@ static int ubifs_set_page_dirty(struct page *page)
return ret;
}
+#ifdef CONFIG_MIGRATION
+static int ubifs_migrate_page(struct address_space *mapping,
+ struct page *newpage, struct page *page, enum migrate_mode mode)
+{
+ int rc;
+
+ rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0);
+ if (rc != MIGRATEPAGE_SUCCESS)
+ return rc;
+
+ if (PagePrivate(page)) {
+ ClearPagePrivate(page);
+ SetPagePrivate(newpage);
+ }
+
+ migrate_page_copy(newpage, page);
+ return MIGRATEPAGE_SUCCESS;
+}
+#endif
+
static int ubifs_releasepage(struct page *page, gfp_t unused_gfp_flags)
{
/*
@@ -1591,6 +1612,9 @@ const struct address_space_operations ubifs_file_address_operations = {
.write_end = ubifs_write_end,
.invalidatepage = ubifs_invalidatepage,
.set_page_dirty = ubifs_set_page_dirty,
+#ifdef CONFIG_MIGRATION
+ .migratepage = ubifs_migrate_page,
+#endif
.releasepage = ubifs_releasepage,
};
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index fa9a20cc60d6..fe5e8d4970ae 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -34,6 +34,11 @@
#include <linux/slab.h>
#include "ubifs.h"
+static int try_read_node(const struct ubifs_info *c, void *buf, int type,
+ int len, int lnum, int offs);
+static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
+ struct ubifs_zbranch *zbr, void *node);
+
/*
* Returned codes of 'matches_name()' and 'fallible_matches_name()' functions.
* @NAME_LESS: name corresponding to the first argument is less than second
@@ -402,7 +407,19 @@ static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
return 0;
}
- err = ubifs_tnc_read_node(c, zbr, node);
+ if (c->replaying) {
+ err = fallible_read_node(c, &zbr->key, zbr, node);
+ /*
+ * When the node was not found, return -ENOENT, 0 otherwise.
+ * Negative return codes stay as-is.
+ */
+ if (err == 0)
+ err = -ENOENT;
+ else if (err == 1)
+ err = 0;
+ } else {
+ err = ubifs_tnc_read_node(c, zbr, node);
+ }
if (err)
return err;
@@ -2766,7 +2783,11 @@ struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
if (nm->name) {
if (err) {
/* Handle collisions */
- err = resolve_collision(c, key, &znode, &n, nm);
+ if (c->replaying)
+ err = fallible_resolve_collision(c, key, &znode, &n,
+ nm, 0);
+ else
+ err = resolve_collision(c, key, &znode, &n, nm);
dbg_tnc("rc returned %d, znode %p, n %d",
err, znode, n);
if (unlikely(err < 0))
diff --git a/fs/ubifs/tnc_commit.c b/fs/ubifs/tnc_commit.c
index b45345d701e7..51157da3f76e 100644
--- a/fs/ubifs/tnc_commit.c
+++ b/fs/ubifs/tnc_commit.c
@@ -370,7 +370,7 @@ static int layout_in_gaps(struct ubifs_info *c, int cnt)
p = c->gap_lebs;
do {
- ubifs_assert(p < c->gap_lebs + sizeof(int) * c->lst.idx_lebs);
+ ubifs_assert(p < c->gap_lebs + c->lst.idx_lebs);
written = layout_leb_in_gaps(c, p);
if (written < 0) {
err = written;
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index e8b01b721e99..b5bf23b34241 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -173,6 +173,7 @@ out_cancel:
host_ui->xattr_cnt -= 1;
host_ui->xattr_size -= CALC_DENT_SIZE(nm->len);
host_ui->xattr_size -= CALC_XATTR_BYTES(size);
+ host_ui->xattr_names -= nm->len;
mutex_unlock(&host_ui->ui_mutex);
out_free:
make_bad_inode(inode);
@@ -533,6 +534,7 @@ out_cancel:
host_ui->xattr_cnt += 1;
host_ui->xattr_size += CALC_DENT_SIZE(nm->len);
host_ui->xattr_size += CALC_XATTR_BYTES(ui->data_len);
+ host_ui->xattr_names += nm->len;
mutex_unlock(&host_ui->ui_mutex);
ubifs_release_budget(c, &req);
make_bad_inode(inode);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 566df9b5a6cb..0e659d9c69a1 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1206,7 +1206,7 @@ int udf_setsize(struct inode *inode, loff_t newsize)
{
int err;
struct udf_inode_info *iinfo;
- int bsize = 1 << inode->i_blkbits;
+ int bsize = i_blocksize(inode);
if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)))
@@ -1235,8 +1235,8 @@ int udf_setsize(struct inode *inode, loff_t newsize)
return err;
}
set_size:
- truncate_setsize(inode, newsize);
up_write(&iinfo->i_data_sem);
+ truncate_setsize(inode, newsize);
} else {
if (iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB) {
down_write(&iinfo->i_data_sem);
@@ -1253,9 +1253,9 @@ set_size:
udf_get_block);
if (err)
return err;
+ truncate_setsize(inode, newsize);
down_write(&iinfo->i_data_sem);
udf_clear_extent_cache(inode);
- truncate_setsize(inode, newsize);
udf_truncate_extents(inode);
up_write(&iinfo->i_data_sem);
}
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index dc5fae601c24..637e17cb0edd 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -81,7 +81,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count)
ufs_error (sb, "ufs_free_fragments",
"bit already cleared for fragment %u", i);
}
-
+
+ inode_sub_bytes(inode, count << uspi->s_fshift);
fs32_add(sb, &ucg->cg_cs.cs_nffree, count);
uspi->cs_total.cs_nffree += count;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, count);
@@ -183,6 +184,7 @@ do_more:
ufs_error(sb, "ufs_free_blocks", "freeing free fragment");
}
ubh_setblock(UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
+ inode_sub_bytes(inode, uspi->s_fpb << uspi->s_fshift);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
ufs_clusteracct (sb, ucpi, blkno, 1);
@@ -494,6 +496,20 @@ u64 ufs_new_fragments(struct inode *inode, void *p, u64 fragment,
return 0;
}
+static bool try_add_frags(struct inode *inode, unsigned frags)
+{
+ unsigned size = frags * i_blocksize(inode);
+ spin_lock(&inode->i_lock);
+ __inode_add_bytes(inode, size);
+ if (unlikely((u32)inode->i_blocks != inode->i_blocks)) {
+ __inode_sub_bytes(inode, size);
+ spin_unlock(&inode->i_lock);
+ return false;
+ }
+ spin_unlock(&inode->i_lock);
+ return true;
+}
+
static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
unsigned oldcount, unsigned newcount)
{
@@ -530,6 +546,9 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment,
for (i = oldcount; i < newcount; i++)
if (ubh_isclr (UCPI_UBH(ucpi), ucpi->c_freeoff, fragno + i))
return 0;
+
+ if (!try_add_frags(inode, count))
+ return 0;
/*
* Block can be extended
*/
@@ -647,6 +666,7 @@ cg_found:
ubh_setbit (UCPI_UBH(ucpi), ucpi->c_freeoff, goal + i);
i = uspi->s_fpb - count;
+ inode_sub_bytes(inode, i << uspi->s_fshift);
fs32_add(sb, &ucg->cg_cs.cs_nffree, i);
uspi->cs_total.cs_nffree += i;
fs32_add(sb, &UFS_SB(sb)->fs_cs(cgno).cs_nffree, i);
@@ -657,6 +677,8 @@ cg_found:
result = ufs_bitmap_search (sb, ucpi, goal, allocsize);
if (result == INVBLOCK)
return 0;
+ if (!try_add_frags(inode, count))
+ return 0;
for (i = 0; i < count; i++)
ubh_clrbit (UCPI_UBH(ucpi), ucpi->c_freeoff, result + i);
@@ -716,6 +738,8 @@ norot:
return INVBLOCK;
ucpi->c_rotor = result;
gotit:
+ if (!try_add_frags(inode, uspi->s_fpb))
+ return 0;
blkno = ufs_fragstoblks(result);
ubh_clrblock (UCPI_UBH(ucpi), ucpi->c_freeoff, blkno);
if ((UFS_SB(sb)->s_flags & UFS_CG_MASK) == UFS_CG_44BSD)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index a064cf44b143..1f69bb9b1e9d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -235,7 +235,8 @@ ufs_extend_tail(struct inode *inode, u64 writes_to,
p = ufs_get_direct_data_ptr(uspi, ufsi, block);
tmp = ufs_new_fragments(inode, p, lastfrag, ufs_data_ptr_to_cpu(sb, p),
- new_size, err, locked_page);
+ new_size - (lastfrag & uspi->s_fpbmask), err,
+ locked_page);
return tmp != 0;
}
@@ -284,7 +285,7 @@ ufs_inode_getfrag(struct inode *inode, unsigned index,
goal += uspi->s_fpb;
}
tmp = ufs_new_fragments(inode, p, ufs_blknum(new_fragment),
- goal, uspi->s_fpb, err, locked_page);
+ goal, nfrags, err, locked_page);
if (!tmp) {
*err = -ENOSPC;
@@ -402,7 +403,9 @@ static int ufs_getfrag_block(struct inode *inode, sector_t fragment, struct buff
if (!create) {
phys64 = ufs_frag_map(inode, offsets, depth);
- goto out;
+ if (phys64)
+ map_bh(bh_result, sb, phys64 + frag);
+ return 0;
}
/* This code entered only while writing ....? */
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index f6390eec02ca..10f364490833 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -746,6 +746,23 @@ static void ufs_put_super(struct super_block *sb)
return;
}
+static u64 ufs_max_bytes(struct super_block *sb)
+{
+ struct ufs_sb_private_info *uspi = UFS_SB(sb)->s_uspi;
+ int bits = uspi->s_apbshift;
+ u64 res;
+
+ if (bits > 21)
+ res = ~0ULL;
+ else
+ res = UFS_NDADDR + (1LL << bits) + (1LL << (2*bits)) +
+ (1LL << (3*bits));
+
+ if (res >= (MAX_LFS_FILESIZE >> uspi->s_bshift))
+ return MAX_LFS_FILESIZE;
+ return res << uspi->s_bshift;
+}
+
static int ufs_fill_super(struct super_block *sb, void *data, int silent)
{
struct ufs_sb_info * sbi;
@@ -1212,6 +1229,7 @@ magic_found:
"fast symlink size (%u)\n", uspi->s_maxsymlinklen);
uspi->s_maxsymlinklen = maxsymlen;
}
+ sb->s_maxbytes = ufs_max_bytes(sb);
sb->s_max_links = UFS_LINK_MAX;
inode = ufs_iget(sb, UFS_ROOTINO);
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 954175928240..3f9463f8cf2f 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -473,15 +473,19 @@ static inline unsigned _ubh_find_last_zero_bit_(
static inline int _ubh_isblockset_(struct ufs_sb_private_info * uspi,
struct ufs_buffer_head * ubh, unsigned begin, unsigned block)
{
+ u8 mask;
switch (uspi->s_fpb) {
case 8:
return (*ubh_get_addr (ubh, begin + block) == 0xff);
case 4:
- return (*ubh_get_addr (ubh, begin + (block >> 1)) == (0x0f << ((block & 0x01) << 2)));
+ mask = 0x0f << ((block & 0x01) << 2);
+ return (*ubh_get_addr (ubh, begin + (block >> 1)) & mask) == mask;
case 2:
- return (*ubh_get_addr (ubh, begin + (block >> 2)) == (0x03 << ((block & 0x03) << 1)));
+ mask = 0x03 << ((block & 0x03) << 1);
+ return (*ubh_get_addr (ubh, begin + (block >> 2)) & mask) == mask;
case 1:
- return (*ubh_get_addr (ubh, begin + (block >> 3)) == (0x01 << (block & 0x07)));
+ mask = 0x01 << (block & 0x07);
+ return (*ubh_get_addr (ubh, begin + (block >> 3)) & mask) == mask;
}
return 0;
}
diff --git a/fs/utimes.c b/fs/utimes.c
index aa138d64560a..cb771c30d102 100644
--- a/fs/utimes.c
+++ b/fs/utimes.c
@@ -87,20 +87,7 @@ static int utimes_common(struct path *path, struct timespec *times)
*/
newattrs.ia_valid |= ATTR_TIMES_SET;
} else {
- /*
- * If times is NULL (or both times are UTIME_NOW),
- * then we need to check permissions, because
- * inode_change_ok() won't do it.
- */
- error = -EACCES;
- if (IS_IMMUTABLE(inode))
- goto mnt_drop_write_and_out;
-
- if (!inode_owner_or_capable(inode)) {
- error = inode_permission(inode, MAY_WRITE);
- if (error)
- goto mnt_drop_write_and_out;
- }
+ newattrs.ia_valid |= ATTR_TOUCH;
}
retry_deleg:
mutex_lock(&inode->i_mutex);
@@ -112,7 +99,6 @@ retry_deleg:
goto retry_deleg;
}
-mnt_drop_write_and_out:
mnt_drop_write(path->mnt);
out:
return error;
diff --git a/fs/xattr.c b/fs/xattr.c
index 9b932b95d74e..f0da9d24e9ca 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -442,7 +442,7 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
size = XATTR_SIZE_MAX;
kvalue = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
if (!kvalue) {
- vvalue = vmalloc(size);
+ vvalue = vzalloc(size);
if (!vvalue)
return -ENOMEM;
kvalue = vvalue;
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 3479294c1d58..e1e7fe3b5424 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -535,6 +535,7 @@ xfs_agfl_write_verify(
}
const struct xfs_buf_ops xfs_agfl_buf_ops = {
+ .name = "xfs_agfl",
.verify_read = xfs_agfl_read_verify,
.verify_write = xfs_agfl_write_verify,
};
@@ -2339,6 +2340,7 @@ xfs_agf_write_verify(
}
const struct xfs_buf_ops xfs_agf_buf_ops = {
+ .name = "xfs_agf",
.verify_read = xfs_agf_read_verify,
.verify_write = xfs_agf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 90de071dd4c2..eb8bbfe85484 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -379,6 +379,7 @@ xfs_allocbt_write_verify(
}
const struct xfs_buf_ops xfs_allocbt_buf_ops = {
+ .name = "xfs_allocbt",
.verify_read = xfs_allocbt_read_verify,
.verify_write = xfs_allocbt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index aa187f7ba2dd..01a5ecfedfcf 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -328,6 +328,7 @@ xfs_attr3_leaf_read_verify(
}
const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = {
+ .name = "xfs_attr3_leaf",
.verify_read = xfs_attr3_leaf_read_verify,
.verify_write = xfs_attr3_leaf_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c
index 5ab95ffa4ae9..f3ed9bf0b065 100644
--- a/fs/xfs/libxfs/xfs_attr_remote.c
+++ b/fs/xfs/libxfs/xfs_attr_remote.c
@@ -201,6 +201,7 @@ xfs_attr3_rmt_write_verify(
}
const struct xfs_buf_ops xfs_attr3_rmt_buf_ops = {
+ .name = "xfs_attr3_rmt",
.verify_read = xfs_attr3_rmt_read_verify,
.verify_write = xfs_attr3_rmt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 119c2422aac7..75884aecf920 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -2179,8 +2179,10 @@ xfs_bmap_add_extent_delay_real(
}
temp = xfs_bmap_worst_indlen(bma->ip, temp);
temp2 = xfs_bmap_worst_indlen(bma->ip, temp2);
- diff = (int)(temp + temp2 - startblockval(PREV.br_startblock) -
- (bma->cur ? bma->cur->bc_private.b.allocated : 0));
+ diff = (int)(temp + temp2 -
+ (startblockval(PREV.br_startblock) -
+ (bma->cur ?
+ bma->cur->bc_private.b.allocated : 0)));
if (diff > 0) {
error = xfs_mod_fdblocks(bma->ip->i_mount,
-((int64_t)diff), false);
@@ -2232,7 +2234,6 @@ xfs_bmap_add_extent_delay_real(
temp = da_new;
if (bma->cur)
temp += bma->cur->bc_private.b.allocated;
- ASSERT(temp <= da_old);
if (temp < da_old)
xfs_mod_fdblocks(bma->ip->i_mount,
(int64_t)(da_old - temp), false);
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 6b0cf6546a82..1637c37bfbaa 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -720,6 +720,7 @@ xfs_bmbt_write_verify(
}
const struct xfs_buf_ops xfs_bmbt_buf_ops = {
+ .name = "xfs_bmbt",
.verify_read = xfs_bmbt_read_verify,
.verify_write = xfs_bmbt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index af1bbee5586e..28bc5e78b110 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -4064,7 +4064,7 @@ xfs_btree_change_owner(
xfs_btree_readahead_ptr(cur, ptr, 1);
/* save for the next iteration of the loop */
- lptr = *ptr;
+ xfs_btree_copy_ptrs(cur, &lptr, ptr, 1);
}
/* for each buffer in the level */
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index e89a0f8f827c..097bf7717d80 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -245,6 +245,7 @@ xfs_da3_node_read_verify(
}
const struct xfs_buf_ops xfs_da3_node_buf_ops = {
+ .name = "xfs_da3_node",
.verify_read = xfs_da3_node_read_verify,
.verify_write = xfs_da3_node_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 9c10e2b8cfcb..aa17cb788946 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -123,6 +123,7 @@ xfs_dir3_block_write_verify(
}
const struct xfs_buf_ops xfs_dir3_block_buf_ops = {
+ .name = "xfs_dir3_block",
.verify_read = xfs_dir3_block_read_verify,
.verify_write = xfs_dir3_block_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index af71a84f343c..725fc7841fde 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -305,11 +305,13 @@ xfs_dir3_data_write_verify(
}
const struct xfs_buf_ops xfs_dir3_data_buf_ops = {
+ .name = "xfs_dir3_data",
.verify_read = xfs_dir3_data_read_verify,
.verify_write = xfs_dir3_data_write_verify,
};
static const struct xfs_buf_ops xfs_dir3_data_reada_buf_ops = {
+ .name = "xfs_dir3_data_reada",
.verify_read = xfs_dir3_data_reada_verify,
.verify_write = xfs_dir3_data_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_leaf.c b/fs/xfs/libxfs/xfs_dir2_leaf.c
index 3923e1f94697..b887fb2a2bcf 100644
--- a/fs/xfs/libxfs/xfs_dir2_leaf.c
+++ b/fs/xfs/libxfs/xfs_dir2_leaf.c
@@ -245,11 +245,13 @@ xfs_dir3_leafn_write_verify(
}
const struct xfs_buf_ops xfs_dir3_leaf1_buf_ops = {
+ .name = "xfs_dir3_leaf1",
.verify_read = xfs_dir3_leaf1_read_verify,
.verify_write = xfs_dir3_leaf1_write_verify,
};
const struct xfs_buf_ops xfs_dir3_leafn_buf_ops = {
+ .name = "xfs_dir3_leafn",
.verify_read = xfs_dir3_leafn_read_verify,
.verify_write = xfs_dir3_leafn_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 70b0cb2fd556..63ee03db796c 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -150,6 +150,7 @@ xfs_dir3_free_write_verify(
}
const struct xfs_buf_ops xfs_dir3_free_buf_ops = {
+ .name = "xfs_dir3_free",
.verify_read = xfs_dir3_free_read_verify,
.verify_write = xfs_dir3_free_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 5331b7f0460c..ac9a003dd29a 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -54,7 +54,7 @@ xfs_dqcheck(
xfs_dqid_t id,
uint type, /* used only when IO_dorepair is true */
uint flags,
- char *str)
+ const char *str)
{
xfs_dqblk_t *d = (xfs_dqblk_t *)ddq;
int errs = 0;
@@ -191,8 +191,7 @@ xfs_dquot_buf_verify_crc(
if (mp->m_quotainfo)
ndquots = mp->m_quotainfo->qi_dqperchunk;
else
- ndquots = xfs_calc_dquots_per_chunk(
- XFS_BB_TO_FSB(mp, bp->b_length));
+ ndquots = xfs_calc_dquots_per_chunk(bp->b_length);
for (i = 0; i < ndquots; i++, d++) {
if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk),
@@ -207,7 +206,8 @@ xfs_dquot_buf_verify_crc(
STATIC bool
xfs_dquot_buf_verify(
struct xfs_mount *mp,
- struct xfs_buf *bp)
+ struct xfs_buf *bp,
+ int warn)
{
struct xfs_dqblk *d = (struct xfs_dqblk *)bp->b_addr;
xfs_dqid_t id = 0;
@@ -240,8 +240,7 @@ xfs_dquot_buf_verify(
if (i == 0)
id = be32_to_cpu(ddq->d_id);
- error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN,
- "xfs_dquot_buf_verify");
+ error = xfs_dqcheck(mp, ddq, id + i, 0, warn, __func__);
if (error)
return false;
}
@@ -256,7 +255,7 @@ xfs_dquot_buf_read_verify(
if (!xfs_dquot_buf_verify_crc(mp, bp))
xfs_buf_ioerror(bp, -EFSBADCRC);
- else if (!xfs_dquot_buf_verify(mp, bp))
+ else if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN))
xfs_buf_ioerror(bp, -EFSCORRUPTED);
if (bp->b_error)
@@ -264,6 +263,25 @@ xfs_dquot_buf_read_verify(
}
/*
+ * readahead errors are silent and simply leave the buffer as !done so a real
+ * read will then be run with the xfs_dquot_buf_ops verifier. See
+ * xfs_inode_buf_verify() for why we use EIO and ~XBF_DONE here rather than
+ * reporting the failure.
+ */
+static void
+xfs_dquot_buf_readahead_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_target->bt_mount;
+
+ if (!xfs_dquot_buf_verify_crc(mp, bp) ||
+ !xfs_dquot_buf_verify(mp, bp, 0)) {
+ xfs_buf_ioerror(bp, -EIO);
+ bp->b_flags &= ~XBF_DONE;
+ }
+}
+
+/*
* we don't calculate the CRC here as that is done when the dquot is flushed to
* the buffer after the update is done. This ensures that the dquot in the
* buffer always has an up-to-date CRC value.
@@ -274,7 +292,7 @@ xfs_dquot_buf_write_verify(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- if (!xfs_dquot_buf_verify(mp, bp)) {
+ if (!xfs_dquot_buf_verify(mp, bp, XFS_QMOPT_DOWARN)) {
xfs_buf_ioerror(bp, -EFSCORRUPTED);
xfs_verifier_error(bp);
return;
@@ -282,7 +300,13 @@ xfs_dquot_buf_write_verify(
}
const struct xfs_buf_ops xfs_dquot_buf_ops = {
+ .name = "xfs_dquot",
.verify_read = xfs_dquot_buf_read_verify,
.verify_write = xfs_dquot_buf_write_verify,
};
+const struct xfs_buf_ops xfs_dquot_buf_ra_ops = {
+ .name = "xfs_dquot_ra",
+ .verify_read = xfs_dquot_buf_readahead_verify,
+ .verify_write = xfs_dquot_buf_write_verify,
+};
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 70c1db99f6a7..66d702e6b9ff 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2572,6 +2572,7 @@ xfs_agi_write_verify(
}
const struct xfs_buf_ops xfs_agi_buf_ops = {
+ .name = "xfs_agi",
.verify_read = xfs_agi_read_verify,
.verify_write = xfs_agi_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index f39b285beb19..6dd44f9ea727 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -304,6 +304,7 @@ xfs_inobt_write_verify(
}
const struct xfs_buf_ops xfs_inobt_buf_ops = {
+ .name = "xfs_inobt",
.verify_read = xfs_inobt_read_verify,
.verify_write = xfs_inobt_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 65485cfc4ade..7183b7ea065b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -68,6 +68,8 @@ xfs_inobp_check(
* recovery and we don't get unnecssary panics on debug kernels. We use EIO here
* because all we want to do is say readahead failed; there is no-one to report
* the error to, so this will distinguish it from a non-ra verifier failure.
+ * Changes to this readahead error behavour also need to be reflected in
+ * xfs_dquot_buf_readahead_verify().
*/
static void
xfs_inode_buf_verify(
@@ -134,11 +136,13 @@ xfs_inode_buf_write_verify(
}
const struct xfs_buf_ops xfs_inode_buf_ops = {
+ .name = "xfs_inode",
.verify_read = xfs_inode_buf_read_verify,
.verify_write = xfs_inode_buf_write_verify,
};
const struct xfs_buf_ops xfs_inode_buf_ra_ops = {
+ .name = "xxfs_inode_ra",
.verify_read = xfs_inode_buf_readahead_verify,
.verify_write = xfs_inode_buf_write_verify,
};
@@ -295,6 +299,14 @@ xfs_dinode_verify(
if (dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))
return false;
+ /* don't allow invalid i_size */
+ if (be64_to_cpu(dip->di_size) & (1ULL << 63))
+ return false;
+
+ /* No zero-length symlinks. */
+ if (S_ISLNK(be16_to_cpu(dip->di_mode)) && dip->di_size == 0)
+ return false;
+
/* only version 3 or greater inodes are extensively verified here */
if (dip->di_version < 3)
return true;
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index 1b0a08379759..f51078f1e92a 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -153,7 +153,7 @@ typedef __uint16_t xfs_qwarncnt_t;
#define XFS_QMOPT_RESBLK_MASK (XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS)
extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq,
- xfs_dqid_t id, uint type, uint flags, char *str);
+ xfs_dqid_t id, uint type, uint flags, const char *str);
extern int xfs_calc_dquots_per_chunk(unsigned int nbblks);
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a0b071d881a0..7088be6afb3c 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -581,7 +581,8 @@ xfs_sb_verify(
* Only check the in progress field for the primary superblock as
* mkfs.xfs doesn't clear it from secondary superblocks.
*/
- return xfs_mount_validate_sb(mp, &sb, bp->b_bn == XFS_SB_DADDR,
+ return xfs_mount_validate_sb(mp, &sb,
+ bp->b_maps[0].bm_bn == XFS_SB_DADDR,
check_version);
}
@@ -679,11 +680,13 @@ xfs_sb_write_verify(
}
const struct xfs_buf_ops xfs_sb_buf_ops = {
+ .name = "xfs_sb",
.verify_read = xfs_sb_read_verify,
.verify_write = xfs_sb_write_verify,
};
const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
+ .name = "xfs_sb_quiet",
.verify_read = xfs_sb_quiet_read_verify,
.verify_write = xfs_sb_write_verify,
};
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 5be529707903..15c3ceb845b9 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -49,6 +49,7 @@ extern const struct xfs_buf_ops xfs_inobt_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_dquot_buf_ops;
+extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
diff --git a/fs/xfs/libxfs/xfs_symlink_remote.c b/fs/xfs/libxfs/xfs_symlink_remote.c
index cb6fd20a4d3d..2e2c6716b623 100644
--- a/fs/xfs/libxfs/xfs_symlink_remote.c
+++ b/fs/xfs/libxfs/xfs_symlink_remote.c
@@ -168,6 +168,7 @@ xfs_symlink_write_verify(
}
const struct xfs_buf_ops xfs_symlink_buf_ops = {
+ .name = "xfs_symlink",
.verify_read = xfs_symlink_read_verify,
.verify_write = xfs_symlink_write_verify,
};
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index 6bb470fbb8e8..c5101a3295d8 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -288,16 +288,11 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
return error;
if (type == ACL_TYPE_ACCESS) {
- umode_t mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
-
- if (error <= 0) {
- acl = NULL;
-
- if (error < 0)
- return error;
- }
+ umode_t mode;
+ error = posix_acl_update_mode(inode, &mode, &acl);
+ if (error)
+ return error;
error = xfs_set_mode(inode, mode);
if (error)
return error;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 29e7e5dd5178..a9063ac50c4e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -288,7 +288,7 @@ xfs_map_blocks(
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- ssize_t count = 1 << inode->i_blkbits;
+ ssize_t count = i_blocksize(inode);
xfs_fileoff_t offset_fsb, end_fsb;
int error = 0;
int bmapi_flags = XFS_BMAPI_ENTIRE;
@@ -921,7 +921,7 @@ xfs_aops_discard_page(
break;
}
next_buffer:
- offset += 1 << inode->i_blkbits;
+ offset += i_blocksize(inode);
} while ((bh = bh->b_this_page) != head);
@@ -1363,7 +1363,7 @@ xfs_map_trim_size(
offset + mapping_size >= i_size_read(inode)) {
/* limit mapping to block that spans EOF */
mapping_size = roundup_64(i_size_read(inode) - offset,
- 1 << inode->i_blkbits);
+ i_blocksize(inode));
}
if (mapping_size > LONG_MAX)
mapping_size = LONG_MAX;
@@ -1395,7 +1395,7 @@ __xfs_get_blocks(
return -EIO;
offset = (xfs_off_t)iblock << inode->i_blkbits;
- ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+ ASSERT(bh_result->b_size >= i_blocksize(inode));
size = bh_result->b_size;
if (!create && direct && offset >= i_size_read(inode))
@@ -1426,6 +1426,26 @@ __xfs_get_blocks(
if (error)
goto out_unlock;
+ /*
+ * The only time we can ever safely find delalloc blocks on direct I/O
+ * is a dio write to post-eof speculative preallocation. All other
+ * scenarios are indicative of a problem or misuse (such as mixing
+ * direct and mapped I/O).
+ *
+ * The file may be unmapped by the time we get here so we cannot
+ * reliably fail the I/O based on mapping. Instead, fail the I/O if this
+ * is a read or a write within eof. Otherwise, carry on but warn as a
+ * precuation if the file happens to be mapped.
+ */
+ if (direct && imap.br_startblock == DELAYSTARTBLOCK) {
+ if (!create || offset < i_size_read(VFS_I(ip))) {
+ WARN_ON_ONCE(1);
+ error = -EIO;
+ goto out_unlock;
+ }
+ WARN_ON_ONCE(mapping_mapped(VFS_I(ip)->i_mapping));
+ }
+
/* for DAX, we convert unwritten extents directly */
if (create &&
(!nimaps ||
@@ -1525,7 +1545,6 @@ __xfs_get_blocks(
set_buffer_new(bh_result);
if (imap.br_startblock == DELAYSTARTBLOCK) {
- BUG_ON(direct);
if (create) {
set_buffer_uptodate(bh_result);
set_buffer_mapped(bh_result);
@@ -1968,7 +1987,7 @@ xfs_vm_set_page_dirty(
if (offset < end_offset)
set_buffer_dirty(bh);
bh = bh->b_this_page;
- offset += 1 << inode->i_blkbits;
+ offset += i_blocksize(inode);
} while (bh != head);
}
/*
diff --git a/fs/xfs/xfs_attr.h b/fs/xfs/xfs_attr.h
index dd4824589470..234331227c0c 100644
--- a/fs/xfs/xfs_attr.h
+++ b/fs/xfs/xfs_attr.h
@@ -112,6 +112,7 @@ typedef struct attrlist_cursor_kern {
*========================================================================*/
+/* Return 0 on success, or -errno; other state communicated via *context */
typedef int (*put_listent_func_t)(struct xfs_attr_list_context *, int,
unsigned char *, int, int, unsigned char *);
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 0ef7c2ed3f8a..c8be331a3196 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -108,16 +108,14 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
(int)sfe->namelen,
(int)sfe->valuelen,
&sfe->nameval[sfe->namelen]);
-
+ if (error)
+ return error;
/*
* Either search callback finished early or
* didn't fit it all in the buffer after all.
*/
if (context->seen_enough)
break;
-
- if (error)
- return error;
sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
}
trace_xfs_attr_list_sf_all(context);
@@ -202,8 +200,10 @@ xfs_attr_shortform_list(xfs_attr_list_context_t *context)
sbp->namelen,
sbp->valuelen,
&sbp->name[sbp->namelen]);
- if (error)
+ if (error) {
+ kmem_free(sbuf);
return error;
+ }
if (context->seen_enough)
break;
cursor->offset++;
@@ -454,14 +454,13 @@ xfs_attr3_leaf_list_int(
args.rmtblkcnt = xfs_attr3_rmt_blocks(
args.dp->i_mount, valuelen);
retval = xfs_attr_rmtval_get(&args);
- if (retval)
- return retval;
- retval = context->put_listent(context,
- entry->flags,
- name_rmt->name,
- (int)name_rmt->namelen,
- valuelen,
- args.value);
+ if (!retval)
+ retval = context->put_listent(context,
+ entry->flags,
+ name_rmt->name,
+ (int)name_rmt->namelen,
+ valuelen,
+ args.value);
kmem_free(args.value);
} else {
retval = context->put_listent(context,
@@ -580,7 +579,7 @@ xfs_attr_put_listent(
trace_xfs_attr_list_full(context);
alist->al_more = 1;
context->seen_enough = 1;
- return 1;
+ return 0;
}
aep = (attrlist_ent_t *)&context->alist[context->firstu];
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index dbae6490a79a..863e1bff403b 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -682,7 +682,7 @@ xfs_getbmap(
* extents.
*/
if (map[i].br_startblock == DELAYSTARTBLOCK &&
- map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
+ map[i].br_startoff < XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
ASSERT((iflags & BMV_IF_DELALLOC) != 0);
if (map[i].br_startblock == HOLESTARTBLOCK &&
@@ -1713,6 +1713,7 @@ xfs_swap_extents(
xfs_trans_t *tp;
xfs_bstat_t *sbp = &sxp->sx_stat;
xfs_ifork_t *tempifp, *ifp, *tifp;
+ xfs_extnum_t nextents;
int src_log_flags, target_log_flags;
int error = 0;
int aforkblks = 0;
@@ -1899,7 +1900,8 @@ xfs_swap_extents(
* pointer. Otherwise it's already NULL or
* pointing to the extent.
*/
- if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
+ nextents = ip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ if (nextents <= XFS_INLINE_EXTS) {
ifp->if_u1.if_extents =
ifp->if_u2.if_inline_ext;
}
@@ -1918,7 +1920,8 @@ xfs_swap_extents(
* pointer. Otherwise it's already NULL or
* pointing to the extent.
*/
- if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
+ nextents = tip->i_df.if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
+ if (nextents <= XFS_INLINE_EXTS) {
tifp->if_u1.if_extents =
tifp->if_u2.if_inline_ext;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 39090fc56f09..dcb70969ff1c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -375,6 +375,7 @@ retry:
out_free_pages:
for (i = 0; i < bp->b_page_count; i++)
__free_page(bp->b_pages[i]);
+ bp->b_flags &= ~_XBF_PAGES;
return error;
}
@@ -978,6 +979,8 @@ void
xfs_buf_unlock(
struct xfs_buf *bp)
{
+ ASSERT(xfs_buf_islocked(bp));
+
XB_CLEAR_OWNER(bp);
up(&bp->b_sema);
@@ -1535,7 +1538,7 @@ xfs_wait_buftarg(
* ensure here that all reference counts have been dropped before we
* start walking the LRU list.
*/
- drain_workqueue(btp->bt_mount->m_buf_workqueue);
+ flush_workqueue(btp->bt_mount->m_buf_workqueue);
/* loop until there is nothing left on the lru list. */
while (list_lru_count(&btp->bt_lru)) {
@@ -1712,6 +1715,28 @@ error:
}
/*
+ * Cancel a delayed write list.
+ *
+ * Remove each buffer from the list, clear the delwri queue flag and drop the
+ * associated buffer reference.
+ */
+void
+xfs_buf_delwri_cancel(
+ struct list_head *list)
+{
+ struct xfs_buf *bp;
+
+ while (!list_empty(list)) {
+ bp = list_first_entry(list, struct xfs_buf, b_list);
+
+ xfs_buf_lock(bp);
+ bp->b_flags &= ~_XBF_DELWRI_Q;
+ list_del_init(&bp->b_list);
+ xfs_buf_relse(bp);
+ }
+}
+
+/*
* Add a buffer to the delayed write list.
*
* This queues a buffer for writeout if it hasn't already been. Note that
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c79b717d9b88..149bbd451731 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -132,6 +132,7 @@ struct xfs_buf_map {
struct xfs_buf_map (map) = { .bm_bn = (blkno), .bm_len = (numblk) };
struct xfs_buf_ops {
+ char *name;
void (*verify_read)(struct xfs_buf *);
void (*verify_write)(struct xfs_buf *);
};
@@ -303,6 +304,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
extern void *xfs_buf_offset(struct xfs_buf *, size_t);
/* Delayed Write Buffer Routines */
+extern void xfs_buf_delwri_cancel(struct list_head *);
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
extern int xfs_buf_delwri_submit(struct list_head *);
extern int xfs_buf_delwri_submit_nowait(struct list_head *);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 642d55d10075..2fbf643fa10a 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -406,6 +406,7 @@ xfs_dir2_leaf_readbuf(
/*
* Do we need more readahead?
+ * Each loop tries to process 1 full dir blk; last may be partial.
*/
blk_start_plug(&plug);
for (mip->ra_index = mip->ra_offset = i = 0;
@@ -416,7 +417,8 @@ xfs_dir2_leaf_readbuf(
* Read-ahead a contiguous directory block.
*/
if (i > mip->ra_current &&
- map[mip->ra_index].br_blockcount >= geo->fsbcount) {
+ (map[mip->ra_index].br_blockcount - mip->ra_offset) >=
+ geo->fsbcount) {
xfs_dir3_data_readahead(dp,
map[mip->ra_index].br_startoff + mip->ra_offset,
XFS_FSB_TO_DADDR(dp->i_mount,
@@ -437,14 +439,19 @@ xfs_dir2_leaf_readbuf(
}
/*
- * Advance offset through the mapping table.
+ * Advance offset through the mapping table, processing a full
+ * dir block even if it is fragmented into several extents.
+ * But stop if we have consumed all valid mappings, even if
+ * it's not yet a full directory block.
*/
- for (j = 0; j < geo->fsbcount; j += length ) {
+ for (j = 0;
+ j < geo->fsbcount && mip->ra_index < mip->map_valid;
+ j += length ) {
/*
* The rest of this extent but not more than a dir
* block.
*/
- length = min_t(int, geo->fsbcount,
+ length = min_t(int, geo->fsbcount - j,
map[mip->ra_index].br_blockcount -
mip->ra_offset);
mip->ra_offset += length;
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 74d0e5966ebc..88693a98fac5 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -164,9 +164,9 @@ xfs_verifier_error(
{
struct xfs_mount *mp = bp->b_target->bt_mount;
- xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx",
+ xfs_alert(mp, "Metadata %s detected at %pF, %s block 0x%llx",
bp->b_error == -EFSBADCRC ? "CRC error" : "corruption",
- __return_address, bp->b_bn);
+ __return_address, bp->b_ops->name, bp->b_bn);
xfs_alert(mp, "Unmount and run xfs_repair");
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f5392ab2def1..3dd47307363f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -947,7 +947,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
- unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
if (offset & blksize_mask || len & blksize_mask) {
error = -EINVAL;
@@ -969,7 +969,7 @@ xfs_file_fallocate(
if (error)
goto out_unlock;
} else if (mode & FALLOC_FL_INSERT_RANGE) {
- unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+ unsigned int blksize_mask = i_blocksize(inode) - 1;
new_size = i_size_read(inode) + len;
if (offset & blksize_mask || len & blksize_mask) {
@@ -1208,7 +1208,7 @@ xfs_find_get_desired_pgoff(
unsigned nr_pages;
unsigned int i;
- want = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
+ want = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1;
nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
want);
/*
@@ -1235,17 +1235,6 @@ xfs_find_get_desired_pgoff(
break;
}
- /*
- * At lease we found one page. If this is the first time we
- * step into the loop, and if the first page index offset is
- * greater than the given search offset, a hole was found.
- */
- if (type == HOLE_OFF && lastoff == startoff &&
- lastoff < page_offset(pvec.pages[0])) {
- found = true;
- break;
- }
-
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
loff_t b_offset;
@@ -1257,18 +1246,18 @@ xfs_find_get_desired_pgoff(
* file mapping. However, page->index will not change
* because we have a reference on the page.
*
- * Searching done if the page index is out of range.
- * If the current offset is not reaches the end of
- * the specified search range, there should be a hole
- * between them.
+ * If current page offset is beyond where we've ended,
+ * we've found a hole.
*/
- if (page->index > end) {
- if (type == HOLE_OFF && lastoff < endoff) {
- *offset = lastoff;
- found = true;
- }
+ if (type == HOLE_OFF && lastoff < endoff &&
+ lastoff < page_offset(pvec.pages[i])) {
+ found = true;
+ *offset = lastoff;
goto out;
}
+ /* Searching done if the page index is out of range. */
+ if (page->index > end)
+ goto out;
lock_page(page);
/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ee3aaa0a5317..ca0d3eb44925 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -243,8 +243,8 @@ xfs_growfs_data_private(
agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
- agf->agf_flfirst = 0;
- agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
+ agf->agf_flfirst = cpu_to_be32(1);
+ agf->agf_fllast = 0;
agf->agf_flcount = 0;
tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
agf->agf_freeblks = cpu_to_be32(tmpsize);
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index d7a490f24ead..adbc1f59969a 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -210,14 +210,17 @@ xfs_iget_cache_hit(
error = inode_init_always(mp->m_super, inode);
if (error) {
+ bool wake;
/*
* Re-initializing the inode failed, and we are in deep
* trouble. Try to re-add it to the reclaim list.
*/
rcu_read_lock();
spin_lock(&ip->i_flags_lock);
-
+ wake = !!__xfs_iflags_test(ip, XFS_INEW);
ip->i_flags &= ~(XFS_INEW | XFS_IRECLAIM);
+ if (wake)
+ wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
ASSERT(ip->i_flags & XFS_IRECLAIMABLE);
trace_xfs_iget_reclaim_fail(ip);
goto out_error;
@@ -363,6 +366,22 @@ out_destroy:
return error;
}
+static void
+xfs_inew_wait(
+ struct xfs_inode *ip)
+{
+ wait_queue_head_t *wq = bit_waitqueue(&ip->i_flags, __XFS_INEW_BIT);
+ DEFINE_WAIT_BIT(wait, &ip->i_flags, __XFS_INEW_BIT);
+
+ do {
+ prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
+ if (!xfs_iflags_test(ip, XFS_INEW))
+ break;
+ schedule();
+ } while (true);
+ finish_wait(wq, &wait.wait);
+}
+
/*
* Look up an inode by number in the given file system.
* The inode is looked up in the cache held in each AG.
@@ -467,9 +486,11 @@ out_error_or_again:
STATIC int
xfs_inode_ag_walk_grab(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ int flags)
{
struct inode *inode = VFS_I(ip);
+ bool newinos = !!(flags & XFS_AGITER_INEW_WAIT);
ASSERT(rcu_read_lock_held());
@@ -487,7 +508,8 @@ xfs_inode_ag_walk_grab(
goto out_unlock_noent;
/* avoid new or reclaimable inodes. Leave for reclaim code to flush */
- if (__xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+ if ((!newinos && __xfs_iflags_test(ip, XFS_INEW)) ||
+ __xfs_iflags_test(ip, XFS_IRECLAIMABLE | XFS_IRECLAIM))
goto out_unlock_noent;
spin_unlock(&ip->i_flags_lock);
@@ -515,7 +537,8 @@ xfs_inode_ag_walk(
void *args),
int flags,
void *args,
- int tag)
+ int tag,
+ int iter_flags)
{
uint32_t first_index;
int last_error = 0;
@@ -557,7 +580,7 @@ restart:
for (i = 0; i < nr_found; i++) {
struct xfs_inode *ip = batch[i];
- if (done || xfs_inode_ag_walk_grab(ip))
+ if (done || xfs_inode_ag_walk_grab(ip, iter_flags))
batch[i] = NULL;
/*
@@ -585,6 +608,9 @@ restart:
for (i = 0; i < nr_found; i++) {
if (!batch[i])
continue;
+ if ((iter_flags & XFS_AGITER_INEW_WAIT) &&
+ xfs_iflags_test(batch[i], XFS_INEW))
+ xfs_inew_wait(batch[i]);
error = execute(batch[i], flags, args);
IRELE(batch[i]);
if (error == -EAGAIN) {
@@ -637,12 +663,13 @@ xfs_eofblocks_worker(
}
int
-xfs_inode_ag_iterator(
+xfs_inode_ag_iterator_flags(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags,
void *args),
int flags,
- void *args)
+ void *args,
+ int iter_flags)
{
struct xfs_perag *pag;
int error = 0;
@@ -652,7 +679,8 @@ xfs_inode_ag_iterator(
ag = 0;
while ((pag = xfs_perag_get(mp, ag))) {
ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1);
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, args, -1,
+ iter_flags);
xfs_perag_put(pag);
if (error) {
last_error = error;
@@ -664,6 +692,17 @@ xfs_inode_ag_iterator(
}
int
+xfs_inode_ag_iterator(
+ struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, int flags,
+ void *args),
+ int flags,
+ void *args)
+{
+ return xfs_inode_ag_iterator_flags(mp, execute, flags, args, 0);
+}
+
+int
xfs_inode_ag_iterator_tag(
struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags,
@@ -680,7 +719,8 @@ xfs_inode_ag_iterator_tag(
ag = 0;
while ((pag = xfs_perag_get_tag(mp, ag, tag))) {
ag = pag->pag_agno + 1;
- error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag);
+ error = xfs_inode_ag_walk(mp, pag, execute, flags, args, tag,
+ 0);
xfs_perag_put(pag);
if (error) {
last_error = error;
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index 62f1f91c32cb..147a79212e63 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -48,6 +48,11 @@ struct xfs_eofblocks {
#define XFS_IGET_UNTRUSTED 0x2
#define XFS_IGET_DONTCACHE 0x4
+/*
+ * flags for AG inode iterator
+ */
+#define XFS_AGITER_INEW_WAIT 0x1 /* wait on new inodes */
+
int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
uint flags, uint lock_flags, xfs_inode_t **ipp);
@@ -72,6 +77,9 @@ void xfs_eofblocks_worker(struct work_struct *);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags, void *args),
int flags, void *args);
+int xfs_inode_ag_iterator_flags(struct xfs_mount *mp,
+ int (*execute)(struct xfs_inode *ip, int flags, void *args),
+ int flags, void *args, int iter_flags);
int xfs_inode_ag_iterator_tag(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, int flags, void *args),
int flags, void *args, int tag);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 8ee393996b7d..f0ce28cd311d 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3220,13 +3220,14 @@ xfs_iflush_cluster(
* We need to check under the i_flags_lock for a valid inode
* here. Skip it if it is not valid or the wrong inode.
*/
- spin_lock(&ip->i_flags_lock);
- if (!ip->i_ino ||
+ spin_lock(&iq->i_flags_lock);
+ if (!iq->i_ino ||
+ __xfs_iflags_test(iq, XFS_ISTALE) ||
(XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
- spin_unlock(&ip->i_flags_lock);
+ spin_unlock(&iq->i_flags_lock);
continue;
}
- spin_unlock(&ip->i_flags_lock);
+ spin_unlock(&iq->i_flags_lock);
/*
* Do an un-protected check to see if the inode is dirty and
@@ -3342,7 +3343,7 @@ xfs_iflush(
struct xfs_buf **bpp)
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_buf *bp;
+ struct xfs_buf *bp = NULL;
struct xfs_dinode *dip;
int error;
@@ -3384,14 +3385,22 @@ xfs_iflush(
}
/*
- * Get the buffer containing the on-disk inode.
+ * Get the buffer containing the on-disk inode. We are doing a try-lock
+ * operation here, so we may get an EAGAIN error. In that case, we
+ * simply want to return with the inode still dirty.
+ *
+ * If we get any other error, we effectively have a corruption situation
+ * and we cannot flush the inode, so we treat it the same as failing
+ * xfs_iflush_int().
*/
error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
0);
- if (error || !bp) {
+ if (error == -EAGAIN) {
xfs_ifunlock(ip);
return error;
}
+ if (error)
+ goto corrupt_out;
/*
* First flush out the inode that xfs_iflush was called with.
@@ -3419,7 +3428,8 @@ xfs_iflush(
return 0;
corrupt_out:
- xfs_buf_relse(bp);
+ if (bp)
+ xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out:
error = -EFSCORRUPTED;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ca9e11989cbd..ae1a49845744 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -208,7 +208,8 @@ xfs_get_initial_prid(struct xfs_inode *dp)
#define XFS_IRECLAIM (1 << 0) /* started reclaiming this inode */
#define XFS_ISTALE (1 << 1) /* inode has been staled */
#define XFS_IRECLAIMABLE (1 << 2) /* inode can be reclaimed */
-#define XFS_INEW (1 << 3) /* inode has just been allocated */
+#define __XFS_INEW_BIT 3 /* inode has just been allocated */
+#define XFS_INEW (1 << __XFS_INEW_BIT)
#define XFS_ITRUNCATED (1 << 5) /* truncated down so flush-on-close */
#define XFS_IDIRTY_RELEASE (1 << 6) /* dirty release already seen */
#define __XFS_IFLOCK_BIT 7 /* inode is being flushed right now */
@@ -453,6 +454,7 @@ static inline void xfs_finish_inode_setup(struct xfs_inode *ip)
xfs_iflags_clear(ip, XFS_INEW);
barrier();
unlock_new_inode(VFS_I(ip));
+ wake_up_bit(&ip->i_flags, __XFS_INEW_BIT);
}
static inline void xfs_setup_existing_inode(struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index d42738deec6d..e4a4f82ea13f 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -403,6 +403,7 @@ xfs_attrlist_by_handle(
{
int error = -ENOMEM;
attrlist_cursor_kern_t *cursor;
+ struct xfs_fsop_attrlist_handlereq __user *p = arg;
xfs_fsop_attrlist_handlereq_t al_hreq;
struct dentry *dentry;
char *kbuf;
@@ -435,6 +436,11 @@ xfs_attrlist_by_handle(
if (error)
goto out_kfree;
+ if (copy_to_user(&p->pos, cursor, sizeof(attrlist_cursor_kern_t))) {
+ error = -EFAULT;
+ goto out_kfree;
+ }
+
if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
error = -EFAULT;
@@ -1379,10 +1385,11 @@ xfs_ioc_getbmap(
unsigned int cmd,
void __user *arg)
{
- struct getbmapx bmx;
+ struct getbmapx bmx = { 0 };
int error;
- if (copy_from_user(&bmx, arg, sizeof(struct getbmapx)))
+ /* struct getbmap is a strict subset of struct getbmapx. */
+ if (copy_from_user(&bmx, arg, offsetof(struct getbmapx, bmv_iflags)))
return -EFAULT;
if (bmx.bmv_count < 2)
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ec0e239a0fa9..201aae0b2662 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -369,7 +369,14 @@ static inline __uint64_t howmany_64(__uint64_t x, __uint32_t y)
#endif /* DEBUG */
#ifdef CONFIG_XFS_RT
-#define XFS_IS_REALTIME_INODE(ip) ((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME)
+
+/*
+ * make sure we ignore the inode flag if the filesystem doesn't have a
+ * configured realtime device.
+ */
+#define XFS_IS_REALTIME_INODE(ip) \
+ (((ip)->i_d.di_flags & XFS_DIFLAG_REALTIME) && \
+ (ip)->i_mount->m_rtdev_targp)
#else
#define XFS_IS_REALTIME_INODE(ip) (0)
#endif
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index c5ecaacdd218..8cab78eeb0c2 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -3204,6 +3204,7 @@ xlog_recover_dquot_ra_pass2(
struct xfs_disk_dquot *recddq;
struct xfs_dq_logformat *dq_f;
uint type;
+ int len;
if (mp->m_qflags == 0)
@@ -3224,8 +3225,12 @@ xlog_recover_dquot_ra_pass2(
ASSERT(dq_f);
ASSERT(dq_f->qlf_len == 1);
- xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno,
- XFS_FSB_TO_BB(mp, dq_f->qlf_len), NULL);
+ len = XFS_FSB_TO_BB(mp, dq_f->qlf_len);
+ if (xlog_peek_buffer_cancelled(log, dq_f->qlf_blkno, len, 0))
+ return;
+
+ xfs_buf_readahead(mp->m_ddev_targp, dq_f->qlf_blkno, len,
+ &xfs_dquot_buf_ra_ops);
}
STATIC void
@@ -3975,6 +3980,7 @@ xlog_recover_clear_agi_bucket(
agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
offset = offsetof(xfs_agi_t, agi_unlinked) +
(sizeof(xfs_agino_t) * bucket);
+ xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 532ab79d38fe..572b64a135b3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -1355,12 +1355,7 @@ xfs_qm_quotacheck(
mp->m_qflags |= flags;
error_return:
- while (!list_empty(&buffer_list)) {
- struct xfs_buf *bp =
- list_first_entry(&buffer_list, struct xfs_buf, b_list);
- list_del_init(&bp->b_list);
- xfs_buf_relse(bp);
- }
+ xfs_buf_delwri_cancel(&buffer_list);
if (error) {
xfs_warn(mp,
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 3640c6e896af..4d334440bd94 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -764,5 +764,6 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, NULL);
+ xfs_inode_ag_iterator_flags(mp, xfs_dqrele_inode, flags, NULL,
+ XFS_AGITER_INEW_WAIT);
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 36bd8825bfb0..ef64a1e1a66a 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1233,6 +1233,16 @@ xfs_fs_remount(
return -EINVAL;
}
+ if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+ xfs_sb_has_ro_compat_feature(sbp,
+ XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+ xfs_warn(mp,
+"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
+ (sbp->sb_features_ro_compat &
+ XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+ return -EINVAL;
+ }
+
mp->m_flags &= ~XFS_MOUNT_RDONLY;
/*
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index 839b35ca21c6..9beaf192b4bb 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -180,7 +180,8 @@ xfs_xattr_put_listent(
arraytop = context->count + prefix_len + namelen + 1;
if (arraytop > context->firstu) {
context->count = -1; /* insufficient space */
- return 1;
+ context->seen_enough = 1;
+ return 0;
}
offset = (char *)context->alist + context->count;
strncpy(offset, xfs_xattr_prefix(flags), prefix_len);
@@ -222,12 +223,15 @@ list_one_attr(const char *name, const size_t len, void *data,
}
ssize_t
-xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
+xfs_vn_listxattr(
+ struct dentry *dentry,
+ char *data,
+ size_t size)
{
struct xfs_attr_list_context context;
struct attrlist_cursor_kern cursor = { 0 };
- struct inode *inode = d_inode(dentry);
- int error;
+ struct inode *inode = d_inode(dentry);
+ int error;
/*
* First read the regular on-disk attributes.
@@ -245,7 +249,9 @@ xfs_vn_listxattr(struct dentry *dentry, char *data, size_t size)
else
context.put_listent = xfs_xattr_put_listent_sizes;
- xfs_attr_list_int(&context);
+ error = xfs_attr_list_int(&context);
+ if (error)
+ return error;
if (context.count < 0)
return -ERANGE;