aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/amigaffs.c2
-rw-r--r--fs/aio.c16
-rw-r--r--fs/attr.c13
-rw-r--r--fs/autofs4/dev-ioctl.c8
-rw-r--r--fs/autofs4/expire.c12
-rw-r--r--fs/autofs4/root.c2
-rw-r--r--fs/binfmt_elf.c44
-rw-r--r--fs/bio-integrity.c14
-rw-r--r--fs/bio.c24
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/backref.c32
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/ctree.c13
-rw-r--r--fs/btrfs/delayed-inode.c8
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/extent-tree.c33
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c56
-rw-r--r--fs/btrfs/free-space-cache.c4
-rw-r--r--fs/btrfs/inode.c63
-rw-r--r--fs/btrfs/ioctl.c17
-rw-r--r--fs/btrfs/relocation.c19
-rw-r--r--fs/btrfs/scrub.c7
-rw-r--r--fs/btrfs/send.c46
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/btrfs/tree-log.c9
-rw-r--r--fs/btrfs/ulist.c15
-rw-r--r--fs/btrfs/volumes.c34
-rw-r--r--fs/buffer.c20
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/dir.c8
-rw-r--r--fs/ceph/file.c47
-rw-r--r--fs/ceph/inode.c6
-rw-r--r--fs/ceph/ioctl.c12
-rw-r--r--fs/ceph/mds_client.c15
-rw-r--r--fs/ceph/mdsmap.c4
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/ceph/xattr.c9
-rw-r--r--fs/cifs/cifs_unicode.c7
-rw-r--r--fs/cifs/cifs_unicode.h8
-rw-r--r--fs/cifs/cifsacl.c28
-rw-r--r--fs/cifs/cifsencrypt.c8
-rw-r--r--fs/cifs/cifsglob.h18
-rw-r--r--fs/cifs/cifssmb.c8
-rw-r--r--fs/cifs/connect.c9
-rw-r--r--fs/cifs/dir.c1
-rw-r--r--fs/cifs/file.c53
-rw-r--r--fs/cifs/inode.c31
-rw-r--r--fs/cifs/readdir.c14
-rw-r--r--fs/cifs/sess.c6
-rw-r--r--fs/cifs/smb1ops.c15
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2glob.h3
-rw-r--r--fs/cifs/smb2inode.c2
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2misc.c148
-rw-r--r--fs/cifs/smb2ops.c24
-rw-r--r--fs/cifs/smb2pdu.c17
-rw-r--r--fs/cifs/xattr.c64
-rw-r--r--fs/coda/cache.c2
-rw-r--r--fs/compat_binfmt_elf.c5
-rw-r--r--fs/configfs/dir.c16
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dcache.c199
-rw-r--r--fs/dcookies.c2
-rw-r--r--fs/debugfs/inode.c103
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/ecryptfs/crypto.c1
-rw-r--r--fs/ecryptfs/file.c12
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/ecryptfs/keystore.c3
-rw-r--r--fs/ecryptfs/main.c16
-rw-r--r--fs/exec.c16
-rw-r--r--fs/exofs/ore.c37
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext2/xip.c1
-rw-r--r--fs/ext3/namei.c7
-rw-r--r--fs/ext3/super.c7
-rw-r--r--fs/ext4/balloc.c4
-rw-r--r--fs/ext4/ext4.h18
-rw-r--r--fs/ext4/ext4_jbd2.c17
-rw-r--r--fs/ext4/extents.c90
-rw-r--r--fs/ext4/file.c24
-rw-r--r--fs/ext4/ialloc.c28
-rw-r--r--fs/ext4/indirect.c20
-rw-r--r--fs/ext4/inline.c10
-rw-r--r--fs/ext4/inode.c175
-rw-r--r--fs/ext4/ioctl.c19
-rw-r--r--fs/ext4/mballoc.c81
-rw-r--r--fs/ext4/namei.c31
-rw-r--r--fs/ext4/page-io.c24
-rw-r--r--fs/ext4/resize.c40
-rw-r--r--fs/ext4/super.c93
-rw-r--r--fs/ext4/xattr.c58
-rw-r--r--fs/file.c2
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fs-writeback.c75
-rw-r--r--fs/fuse/dev.c29
-rw-r--r--fs/fuse/dir.c55
-rw-r--r--fs/fuse/file.c34
-rw-r--r--fs/fuse/fuse_i.h2
-rw-r--r--fs/fuse/inode.c23
-rw-r--r--fs/gfs2/aops.c30
-rw-r--r--fs/gfs2/inode.c16
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/hfsplus/brec.c20
-rw-r--r--fs/hpfs/map.c3
-rw-r--r--fs/hpfs/super.c8
-rw-r--r--fs/hugetlbfs/inode.c8
-rw-r--r--fs/inode.c10
-rw-r--r--fs/ioprio.c14
-rw-r--r--fs/isofs/inode.c31
-rw-r--r--fs/isofs/isofs.h23
-rw-r--r--fs/isofs/rock.c48
-rw-r--r--fs/jbd2/journal.c3
-rw-r--r--fs/jbd2/recovery.c8
-rw-r--r--fs/jbd2/transaction.c13
-rw-r--r--fs/jffs2/compr_rtime.c4
-rw-r--r--fs/jffs2/jffs2_fs_sb.h2
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/nodemgmt.c14
-rw-r--r--fs/jffs2/scan.c5
-rw-r--r--fs/jffs2/wbuf.c17
-rw-r--r--fs/jfs/jfs_dtree.c35
-rw-r--r--fs/jfs/jfs_inode.c3
-rw-r--r--fs/libfs.c12
-rw-r--r--fs/lockd/clntlock.c13
-rw-r--r--fs/lockd/clntproc.c5
-rw-r--r--fs/lockd/mon.c6
-rw-r--r--fs/lockd/svc.c13
-rw-r--r--fs/lockd/svclock.c12
-rw-r--r--fs/locks.c7
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namei.c23
-rw-r--r--fs/namespace.c75
-rw-r--r--fs/ncpfs/dir.c2
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/ncpfs/ncplib_kernel.h4
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/callback.c8
-rw-r--r--fs/nfs/callback_xdr.c4
-rw-r--r--fs/nfs/delegation.c36
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/direct.c7
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/inode.c28
-rw-r--r--fs/nfs/nfs3acl.c5
-rw-r--r--fs/nfs/nfs4client.c75
-rw-r--r--fs/nfs/nfs4filelayout.c2
-rw-r--r--fs/nfs/nfs4filelayoutdev.c18
-rw-r--r--fs/nfs/nfs4proc.c143
-rw-r--r--fs/nfs/nfs4renewd.c12
-rw-r--r--fs/nfs/nfs4state.c40
-rw-r--r--fs/nfs/nfs4xdr.c47
-rw-r--r--fs/nfsd/export.c15
-rw-r--r--fs/nfsd/nfs4acl.c17
-rw-r--r--fs/nfsd/nfs4callback.c15
-rw-r--r--fs/nfsd/nfs4proc.c21
-rw-r--r--fs/nfsd/nfs4state.c99
-rw-r--r--fs/nfsd/nfs4xdr.c27
-rw-r--r--fs/nfsd/nfscache.c46
-rw-r--r--fs/nfsd/nfsctl.c5
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/vfs.c188
-rw-r--r--fs/nilfs2/btree.c47
-rw-r--r--fs/nilfs2/inode.c39
-rw-r--r--fs/nilfs2/namei.c15
-rw-r--r--fs/nilfs2/nilfs.h2
-rw-r--r--fs/nilfs2/page.c2
-rw-r--r--fs/nilfs2/segbuf.c5
-rw-r--r--fs/nilfs2/segment.c66
-rw-r--r--fs/nilfs2/segment.h5
-rw-r--r--fs/notify/fanotify/fanotify_user.c7
-rw-r--r--fs/notify/fdinfo.c4
-rw-r--r--fs/notify/fsnotify.c4
-rw-r--r--fs/notify/inode_mark.c17
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/dcache.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c18
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c29
-rw-r--r--fs/ocfs2/extent_map.c1
-rw-r--r--fs/ocfs2/file.c22
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/ocfs2/xattr.c10
-rw-r--r--fs/open.c26
-rw-r--r--fs/pipe.c39
-rw-r--r--fs/posix_acl.c6
-rw-r--r--fs/proc/Makefile1
-rw-r--r--fs/proc/array.c11
-rw-r--r--fs/proc/base.c54
-rw-r--r--fs/proc/generic.c12
-rw-r--r--fs/proc/inode.c21
-rw-r--r--fs/proc/internal.h8
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/proc/proc_devtree.c243
-rw-r--r--fs/proc/root.c6
-rw-r--r--fs/proc/task_mmu.c18
-rw-r--r--fs/pstore/inode.c9
-rw-r--r--fs/pstore/ram.c28
-rw-r--r--fs/pstore/ram_core.c85
-rw-r--r--fs/quota/dquot.c62
-rw-r--r--fs/read_write.c16
-rw-r--r--fs/reiserfs/dir.c6
-rw-r--r--fs/reiserfs/inode.c8
-rw-r--r--fs/reiserfs/procfs.c99
-rw-r--r--fs/reiserfs/reiserfs.h2
-rw-r--r--fs/reiserfs/super.c3
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/splice.c26
-rw-r--r--fs/stat.c11
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c27
-rw-r--r--fs/sysv/super.c1
-rw-r--r--fs/ubifs/commit.c10
-rw-r--r--fs/ubifs/file.c3
-rw-r--r--fs/ubifs/log.c19
-rw-r--r--fs/ubifs/master.c7
-rw-r--r--fs/ubifs/shrinker.c1
-rw-r--r--fs/ubifs/super.c3
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/udf/inode.c35
-rw-r--r--fs/udf/super.c342
-rw-r--r--fs/udf/symlink.c17
-rw-r--r--fs/xfs/xfs_aops.c61
-rw-r--r--fs/xfs/xfs_buf_item.c4
-rw-r--r--fs/xfs/xfs_da_btree.c7
-rw-r--r--fs/xfs/xfs_dquot.c3
-rw-r--r--fs/xfs/xfs_file.c21
-rw-r--r--fs/xfs/xfs_fsops.c6
-rw-r--r--fs/xfs/xfs_inode.c2
-rw-r--r--fs/xfs/xfs_ioctl.c9
-rw-r--r--fs/xfs/xfs_ioctl32.c3
-rw-r--r--fs/xfs/xfs_qm.c8
-rw-r--r--fs/xfs/xfs_trans.c1
242 files changed, 3523 insertions, 2131 deletions
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index d9a43674cb94..9cca0ea4e479 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -126,7 +126,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
{
struct dentry *dentry;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
if (entry_ino == (u32)(long)dentry->d_fsdata) {
dentry->d_fsdata = (void *)inode->i_ino;
break;
diff --git a/fs/aio.c b/fs/aio.c
index 2bbcacf74d0c..ded94c4fa30d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -310,7 +310,6 @@ static void free_ioctx(struct kioctx *ctx)
avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
- atomic_sub(avail, &ctx->reqs_active);
head += avail;
head %= ctx->nr_events;
}
@@ -423,10 +422,12 @@ static void kill_ioctx_rcu(struct rcu_head *head)
* when the processes owning a context have all exited to encourage
* the rapid destruction of the kioctx.
*/
-static void kill_ioctx(struct kioctx *ctx)
+static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
{
if (!atomic_xchg(&ctx->dead, 1)) {
+ spin_lock(&mm->ioctx_lock);
hlist_del_rcu(&ctx->list);
+ spin_unlock(&mm->ioctx_lock);
/*
* It'd be more correct to do this in free_ioctx(), after all
@@ -494,7 +495,7 @@ void exit_aio(struct mm_struct *mm)
*/
ctx->mmap_size = 0;
- kill_ioctx(ctx);
+ kill_ioctx(mm, ctx);
}
}
@@ -676,6 +677,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
put_rq:
/* everything turned out well, dispose of the aiocb. */
aio_put_req(iocb);
+ atomic_dec(&ctx->reqs_active);
/*
* We have to order our ring_info tail store above and test
@@ -715,6 +717,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
if (head == ctx->tail)
goto out;
+ head %= ctx->nr_events;
+
while (ret < nr) {
long avail;
struct io_event *ev;
@@ -753,8 +757,6 @@ static long aio_read_events_ring(struct kioctx *ctx,
flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, ctx->tail);
-
- atomic_sub(ret, &ctx->reqs_active);
out:
mutex_unlock(&ctx->ring_lock);
@@ -852,7 +854,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
if (ret)
- kill_ioctx(ioctx);
+ kill_ioctx(current->mm, ioctx);
put_ioctx(ioctx);
}
@@ -870,7 +872,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
{
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
- kill_ioctx(ioctx);
+ kill_ioctx(current->mm, ioctx);
put_ioctx(ioctx);
return 0;
}
diff --git a/fs/attr.c b/fs/attr.c
index 1449adb14ef6..66fa6251c398 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -50,14 +50,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
if ((ia_valid & ATTR_UID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
!uid_eq(attr->ia_uid, inode->i_uid)) &&
- !inode_capable(inode, CAP_CHOWN))
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
(!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
- !inode_capable(inode, CAP_CHOWN))
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
/* Make sure a caller can chmod. */
@@ -67,7 +67,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
/* Also check the setgid bit! */
if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
inode->i_gid) &&
- !inode_capable(inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
@@ -160,7 +160,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
umode_t mode = attr->ia_mode;
if (!in_group_p(inode->i_gid) &&
- !inode_capable(inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
mode &= ~S_ISGID;
inode->i_mode = mode;
}
@@ -182,11 +182,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
return -EPERM;
}
- if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) {
- if (attr->ia_size != inode->i_size)
- inode_inc_iversion(inode);
- }
-
if ((ia_valid & ATTR_MODE)) {
umode_t amode = attr->ia_mode;
/* Flag setting protected by i_mutex */
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index 743c7c2c949d..6aa8312ad89f 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -95,7 +95,7 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
*/
static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
{
- struct autofs_dev_ioctl tmp;
+ struct autofs_dev_ioctl tmp, *res;
if (copy_from_user(&tmp, in, sizeof(tmp)))
return ERR_PTR(-EFAULT);
@@ -103,7 +103,11 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i
if (tmp.size < sizeof(tmp))
return ERR_PTR(-EINVAL);
- return memdup_user(in, tmp.size);
+ res = memdup_user(in, tmp.size);
+ if (!IS_ERR(res))
+ res->size = tmp.size;
+
+ return res;
}
static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 13ddec92341c..8ad277990eac 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -91,7 +91,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev,
spin_lock(&root->d_lock);
if (prev)
- next = prev->d_u.d_child.next;
+ next = prev->d_child.next;
else {
prev = dget_dlock(root);
next = prev->d_subdirs.next;
@@ -105,13 +105,13 @@ cont:
return NULL;
}
- q = list_entry(next, struct dentry, d_u.d_child);
+ q = list_entry(next, struct dentry, d_child);
spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
/* Already gone or negative dentry (under construction) - try next */
if (q->d_count == 0 || !simple_positive(q)) {
spin_unlock(&q->d_lock);
- next = q->d_u.d_child.next;
+ next = q->d_child.next;
goto cont;
}
dget_dlock(q);
@@ -161,13 +161,13 @@ again:
goto relock;
}
spin_unlock(&p->d_lock);
- next = p->d_u.d_child.next;
+ next = p->d_child.next;
p = parent;
if (next != &parent->d_subdirs)
break;
}
}
- ret = list_entry(next, struct dentry, d_u.d_child);
+ ret = list_entry(next, struct dentry, d_child);
spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
/* Negative dentry - try next */
@@ -447,7 +447,7 @@ found:
spin_lock(&sbi->lookup_lock);
spin_lock(&expired->d_parent->d_lock);
spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+ list_move(&expired->d_parent->d_subdirs, &expired->d_child);
spin_unlock(&expired->d_lock);
spin_unlock(&expired->d_parent->d_lock);
spin_unlock(&sbi->lookup_lock);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index 085da86e07c2..79ab4cb3590a 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -655,7 +655,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
/* only consider parents below dentrys in the root */
if (IS_ROOT(parent->d_parent))
return;
- d_child = &dentry->d_u.d_child;
+ d_child = &dentry->d_child;
/* Set parent managed if it's becoming empty */
if (d_child->next == &parent->d_subdirs &&
d_child->prev == &parent->d_subdirs)
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f8a0b0efda44..53f620a4350e 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -552,11 +552,12 @@ out:
static unsigned long randomize_stack_top(unsigned long stack_top)
{
- unsigned int random_variable = 0;
+ unsigned long random_variable = 0;
if ((current->flags & PF_RANDOMIZE) &&
!(current->personality & ADDR_NO_RANDOMIZE)) {
- random_variable = get_random_int() & STACK_RND_MASK;
+ random_variable = (unsigned long) get_random_int();
+ random_variable &= STACK_RND_MASK;
random_variable <<= PAGE_SHIFT;
}
#ifdef CONFIG_STACK_GROWSUP
@@ -755,6 +756,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
int elf_prot = 0, elf_flags;
unsigned long k, vaddr;
+ unsigned long total_size = 0;
if (elf_ppnt->p_type != PT_LOAD)
continue;
@@ -819,10 +821,16 @@ static int load_elf_binary(struct linux_binprm *bprm)
#else
load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
#endif
+ total_size = total_mapping_size(elf_phdata,
+ loc->elf_ex.e_phnum);
+ if (!total_size) {
+ error = -EINVAL;
+ goto out_free_dentry;
+ }
}
error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
- elf_prot, elf_flags, 0);
+ elf_prot, elf_flags, total_size);
if (BAD_ADDR(error)) {
send_sig(SIGKILL, current, 0);
retval = IS_ERR((void *)error) ?
@@ -1415,7 +1423,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
* long file_ofs
* followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
*/
-static void fill_files_note(struct memelfnote *note)
+static int fill_files_note(struct memelfnote *note)
{
struct vm_area_struct *vma;
unsigned count, size, names_ofs, remaining, n;
@@ -1430,11 +1438,11 @@ static void fill_files_note(struct memelfnote *note)
names_ofs = (2 + 3 * count) * sizeof(data[0]);
alloc:
if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
- goto err;
+ return -EINVAL;
size = round_up(size, PAGE_SIZE);
data = vmalloc(size);
if (!data)
- goto err;
+ return -ENOMEM;
start_end_ofs = data + 2;
name_base = name_curpos = ((char *)data) + names_ofs;
@@ -1487,7 +1495,7 @@ static void fill_files_note(struct memelfnote *note)
size = name_curpos - (char *)data;
fill_note(note, "CORE", NT_FILE, size, data);
- err: ;
+ return 0;
}
#ifdef CORE_DUMP_USE_REGSET
@@ -1688,8 +1696,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_auxv_note(&info->auxv, current->mm);
info->size += notesize(&info->auxv);
- fill_files_note(&info->files);
- info->size += notesize(&info->files);
+ if (fill_files_note(&info->files) == 0)
+ info->size += notesize(&info->files);
return 1;
}
@@ -1721,7 +1729,8 @@ static int write_note_info(struct elf_note_info *info,
return 0;
if (first && !writenote(&info->auxv, file, foffset))
return 0;
- if (first && !writenote(&info->files, file, foffset))
+ if (first && info->files.data &&
+ !writenote(&info->files, file, foffset))
return 0;
for (i = 1; i < info->thread_notes; ++i)
@@ -1808,6 +1817,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
struct elf_note_info {
struct memelfnote *notes;
+ struct memelfnote *notes_files;
struct elf_prstatus *prstatus; /* NT_PRSTATUS */
struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
struct list_head thread_list;
@@ -1898,9 +1908,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
fill_auxv_note(info->notes + 3, current->mm);
- fill_files_note(info->notes + 4);
+ info->numnote = 4;
- info->numnote = 5;
+ if (fill_files_note(info->notes + info->numnote) == 0) {
+ info->notes_files = info->notes + info->numnote;
+ info->numnote++;
+ }
/* Try to dump the FPU. */
info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
@@ -1962,8 +1975,9 @@ static void free_note_info(struct elf_note_info *info)
kfree(list_entry(tmp, struct elf_thread_status, list));
}
- /* Free data allocated by fill_files_note(): */
- vfree(info->notes[4].data);
+ /* Free data possibly allocated by fill_files_note(): */
+ if (info->notes_files)
+ vfree(info->notes_files->data);
kfree(info->prstatus);
kfree(info->psinfo);
@@ -2046,7 +2060,7 @@ static int elf_core_dump(struct coredump_params *cprm)
struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff, foffset;
- struct elf_note_info info;
+ struct elf_note_info info = { };
struct elf_phdr *phdr4note = NULL;
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 8fb42916d8a2..433c3b828e1d 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio)
}
EXPORT_SYMBOL(bio_integrity_free);
+static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
+{
+ if (bip->bip_slab == BIO_POOL_NONE)
+ return BIP_INLINE_VECS;
+
+ return bvec_nr_vecs(bip->bip_slab);
+}
+
/**
* bio_integrity_add_page - Attach integrity metadata
* @bio: bio to update
@@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv;
- if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
+ if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0;
}
@@ -450,7 +458,7 @@ static int bio_integrity_verify(struct bio *bio)
bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
bix.sector_size = bi->sector_size;
- bio_for_each_segment(bv, bio, i) {
+ bio_for_each_segment_all(bv, bio, i) {
void *kaddr = kmap_atomic(bv->bv_page);
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
@@ -734,7 +742,7 @@ void bioset_integrity_free(struct bio_set *bs)
mempool_destroy(bs->bio_integrity_pool);
if (bs->bvec_integrity_pool)
- mempool_destroy(bs->bio_integrity_pool);
+ mempool_destroy(bs->bvec_integrity_pool);
}
EXPORT_SYMBOL(bioset_integrity_free);
diff --git a/fs/bio.c b/fs/bio.c
index 94bbc04dba77..5e7507d79297 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
src_p = kmap_atomic(src_bv->bv_page);
dst_p = kmap_atomic(dst_bv->bv_page);
- memcpy(dst_p + dst_bv->bv_offset,
- src_p + src_bv->bv_offset,
+ memcpy(dst_p + dst_offset,
+ src_p + src_offset,
bytes);
kunmap_atomic(dst_p);
@@ -1045,12 +1045,22 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs,
int bio_uncopy_user(struct bio *bio)
{
struct bio_map_data *bmd = bio->bi_private;
- int ret = 0;
+ struct bio_vec *bvec;
+ int ret = 0, i;
- if (!bio_flagged(bio, BIO_NULL_MAPPED))
- ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
- bmd->nr_sgvecs, bio_data_dir(bio) == READ,
- 0, bmd->is_our_pages);
+ if (!bio_flagged(bio, BIO_NULL_MAPPED)) {
+ /*
+ * if we're in a workqueue, the request is orphaned, so
+ * don't copy into a random user address space, just free.
+ */
+ if (current->mm)
+ ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs,
+ bmd->nr_sgvecs, bio_data_dir(bio) == READ,
+ 0, bmd->is_our_pages);
+ else if (bmd->is_our_pages)
+ bio_for_each_segment_all(bvec, bio, i)
+ __free_page(bvec->bv_page);
+ }
bio_free_map_data(bmd);
bio_put(bio);
return ret;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 2091db8cdd78..85f5c85ec91c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode,
struct backing_dev_info *dst)
{
struct backing_dev_info *old = inode->i_data.backing_dev_info;
+ bool wakeup_bdi = false;
if (unlikely(dst == old)) /* deadlock avoidance */
return;
bdi_lock_two(&old->wb, &dst->wb);
spin_lock(&inode->i_lock);
inode->i_data.backing_dev_info = dst;
- if (inode->i_state & I_DIRTY)
+ if (inode->i_state & I_DIRTY) {
+ if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb))
+ wakeup_bdi = true;
list_move(&inode->i_wb_list, &dst->wb.b_dirty);
+ }
spin_unlock(&inode->i_lock);
spin_unlock(&old->wb.list_lock);
spin_unlock(&dst->wb.list_lock);
+
+ if (wakeup_bdi)
+ bdi_wakeup_thread_delayed(dst);
}
/* Kill _all_ buffers and pagecache , dirty or not.. */
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index e15d2b0d8d3b..0890c83643e9 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- } else {
+ } else if (ret < 0) {
cache_no_acl(inode);
}
} else {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 290e347b6db3..d85f90c92bb4 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1347,9 +1347,10 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
* returns <0 on error
*/
static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- struct btrfs_extent_inline_ref **out_eiref,
- int *out_type)
+ struct btrfs_key *key,
+ struct btrfs_extent_item *ei, u32 item_size,
+ struct btrfs_extent_inline_ref **out_eiref,
+ int *out_type)
{
unsigned long end;
u64 flags;
@@ -1359,19 +1360,26 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
/* first call */
flags = btrfs_extent_flags(eb, ei);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- info = (struct btrfs_tree_block_info *)(ei + 1);
- *out_eiref =
- (struct btrfs_extent_inline_ref *)(info + 1);
+ if (key->type == BTRFS_METADATA_ITEM_KEY) {
+ /* a skinny metadata extent */
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(ei + 1);
+ } else {
+ WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(info + 1);
+ }
} else {
*out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
}
*ptr = (unsigned long)*out_eiref;
- if ((void *)*ptr >= (void *)ei + item_size)
+ if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
return -ENOENT;
}
end = (unsigned long)ei + item_size;
- *out_eiref = (struct btrfs_extent_inline_ref *)*ptr;
+ *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
*out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
*ptr += btrfs_extent_inline_ref_size(*out_type);
@@ -1390,8 +1398,8 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
* <0 on error.
*/
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level)
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level)
{
int ret;
int type;
@@ -1402,8 +1410,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
return 1;
while (1) {
- ret = __get_extent_inline_ref(ptr, eb, ei, item_size,
- &eiref, &type);
+ ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size,
+ &eiref, &type);
if (ret < 0)
return ret;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 0f446d7ca2c0..526d09e70c93 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -42,8 +42,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
u64 *flags);
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level);
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level);
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b189bd1e7a3e..ce7067881d36 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1009,6 +1009,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+ if (*pg_index == (vcnt - 1) && *pg_offset == 0)
+ memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
kunmap_atomic(kaddr);
flush_dcache_page(page_out);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 02fae7f7e42c..7fb054ba1b60 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1089,7 +1089,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- tree_mod_log_free_eb(root->fs_info, buf);
+ if (last_ref)
+ tree_mod_log_free_eb(root->fs_info, buf);
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
@@ -1161,8 +1162,8 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
* time_seq).
*/
static void
-__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
- struct tree_mod_elem *first_tm)
+__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
+ u64 time_seq, struct tree_mod_elem *first_tm)
{
u32 n;
struct rb_node *next;
@@ -1172,6 +1173,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
unsigned long p_size = sizeof(struct btrfs_key_ptr);
n = btrfs_header_nritems(eb);
+ tree_mod_log_read_lock(fs_info);
while (tm && tm->seq >= time_seq) {
/*
* all the operations are recorded with the operator used for
@@ -1226,6 +1228,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
if (tm->index != first_tm->index)
break;
}
+ tree_mod_log_read_unlock(fs_info);
btrfs_set_header_nritems(eb, n);
}
@@ -1274,7 +1277,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
extent_buffer_get(eb_rewin);
btrfs_tree_read_lock(eb_rewin);
- __tree_mod_log_rewind(eb_rewin, time_seq, tm);
+ __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
@@ -1350,7 +1353,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
btrfs_set_header_generation(eb, old_generation);
}
if (tm)
- __tree_mod_log_rewind(eb, time_seq, tm);
+ __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm);
else
WARN_ON(btrfs_header_level(eb) != 0);
WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index f26f38ccd194..019fc5a68a14 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1843,6 +1843,14 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *delayed_node;
int ret = 0;
+ /*
+ * we don't do delayed inode updates during log recovery because it
+ * leads to enospc problems. This means we also can't do
+ * delayed inode refs
+ */
+ if (BTRFS_I(inode)->root->fs_info->log_root_recovering)
+ return -EAGAIN;
+
delayed_node = btrfs_get_or_create_delayed_node(inode);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b8b60b660c8f..7360f03ddbe1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3161,6 +3161,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
/* send down all the barriers */
head = &info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
+ if (dev->missing)
+ continue;
if (!dev->bdev) {
errors_send++;
continue;
@@ -3175,6 +3177,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
/* wait for all the barriers */
list_for_each_entry_rcu(dev, head, dev_list) {
+ if (dev->missing)
+ continue;
if (!dev->bdev) {
errors_wait++;
continue;
@@ -3514,6 +3518,11 @@ int close_ctree(struct btrfs_root *root)
btrfs_free_block_groups(fs_info);
+ /*
+ * we must make sure there is not any read request to
+ * submit after we stopping all workers.
+ */
+ invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
del_fs_roots(fs_info);
@@ -3848,12 +3857,6 @@ again:
if (ret)
break;
- /* opt_discard */
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_error_discard_extent(root, start,
- end + 1 - start,
- NULL);
-
clear_extent_dirty(unpin, start, end, GFP_NOFS);
btrfs_error_unpin_extent_range(root, start, end);
cond_resched();
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index df472ab1b5ac..07f167a1d271 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2402,6 +2402,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
default:
WARN_ON(1);
}
+ } else {
+ list_del_init(&locked_ref->cluster);
}
spin_unlock(&delayed_refs->lock);
@@ -2424,7 +2426,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
* list before we release it.
*/
if (btrfs_delayed_ref_is_head(ref)) {
- list_del_init(&locked_ref->cluster);
btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
}
@@ -5276,7 +5277,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
update_global_block_rsv(fs_info);
}
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+ const bool return_free_space)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *cache = NULL;
@@ -5300,7 +5302,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
if (start < cache->last_byte_to_unpin) {
len = min(len, cache->last_byte_to_unpin - start);
- btrfs_add_free_space(cache, start, len);
+ if (return_free_space)
+ btrfs_add_free_space(cache, start, len);
}
start += len;
@@ -5363,7 +5366,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
end + 1 - start, NULL);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
- unpin_extent_range(root, start, end);
+ unpin_extent_range(root, start, end, true);
cond_resched();
}
@@ -6360,12 +6363,11 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
return -ENOSPC;
}
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_discard_extent(root, start, len, NULL);
-
if (pin)
pin_down_extent(root, cache, start, len, 1);
else {
+ if (btrfs_test_opt(root, DISCARD))
+ ret = btrfs_discard_extent(root, start, len, NULL);
btrfs_add_free_space(cache, start, len);
btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
}
@@ -7298,6 +7300,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
int err = 0;
int ret;
int level;
+ bool root_dropped = false;
path = btrfs_alloc_path();
if (!path) {
@@ -7355,6 +7358,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
while (1) {
btrfs_tree_lock(path->nodes[level]);
btrfs_set_lock_blocking(path->nodes[level]);
+ path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING;
ret = btrfs_lookup_extent_info(trans, root,
path->nodes[level]->start,
@@ -7370,6 +7374,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
break;
btrfs_tree_unlock(path->nodes[level]);
+ path->locks[level] = 0;
WARN_ON(wc->refs[level] != 1);
level--;
}
@@ -7471,13 +7476,23 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
free_extent_buffer(root->commit_root);
kfree(root);
}
+ root_dropped = true;
out_end_trans:
btrfs_end_transaction_throttle(trans, tree_root);
out_free:
kfree(wc);
btrfs_free_path(path);
out:
- if (err)
+ /*
+ * So if we need to stop dropping the snapshot for whatever reason we
+ * need to make sure to add it back to the dead root list so that we
+ * keep trying to do the work later. This also cleans up roots if we
+ * don't have it in the radix (like when we recover after a power fail
+ * or unmount) so we don't leak memory.
+ */
+ if (root_dropped == false)
+ btrfs_add_dead_root(root);
+ if (err && err != -EAGAIN)
btrfs_std_error(root->fs_info, err);
return err;
}
@@ -8550,7 +8565,7 @@ out:
int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
{
- return unpin_extent_range(root, start, end);
+ return unpin_extent_range(root, start, end, false);
}
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e7e7afb4a872..84ceff6abbc1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1624,6 +1624,7 @@ again:
* shortening the size of the delalloc range we're searching
*/
free_extent_state(cached_state);
+ cached_state = NULL;
if (!loops) {
unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
max_bytes = PAGE_CACHE_SIZE - offset;
@@ -2356,7 +2357,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
{
int uptodate = (err == 0);
struct extent_io_tree *tree;
- int ret;
+ int ret = 0;
tree = &BTRFS_I(page->mapping->host)->io_tree;
@@ -2370,6 +2371,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
if (!uptodate) {
ClearPageUptodate(page);
SetPageError(page);
+ ret = ret < 0 ? ret : -EIO;
+ mapping_set_error(page->mapping, ret);
}
return 0;
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a4a7a1a8da95..0a3809500599 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -263,8 +263,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
if (!em)
goto out;
- if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
- list_move(&em->list, &tree->modified_extents);
em->generation = gen;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
em->mod_start = em->start;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b193bf324a41..e4bcfec7787e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -403,7 +403,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
ret = 0;
fail:
while (ret < 0 && !list_empty(&tmplist)) {
- sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
+ sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
list_del(&sums->list);
kfree(sums);
}
@@ -754,7 +754,7 @@ again:
found_next = 1;
if (ret != 0)
goto insert;
- slot = 0;
+ slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 4205ba752d40..caaf30f9f27f 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1593,22 +1593,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
mutex_unlock(&inode->i_mutex);
/*
- * we want to make sure fsync finds this change
- * but we haven't joined a transaction running right now.
- *
- * Later on, someone is sure to update the inode and get the
- * real transid recorded.
- *
- * We set last_trans now to the fs_info generation + 1,
- * this will either be one more than the running transaction
- * or the generation used for the next transaction if there isn't
- * one running right now.
- *
* We also have to set last_sub_trans to the current log transid,
* otherwise subsequent syncs to a file that's been synced in this
* transaction will appear to have already occured.
*/
- BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;
BTRFS_I(inode)->last_sub_trans = root->log_transid;
if (num_written > 0 || num_written == -EIOCBQUEUED) {
err = generic_write_sync(file, pos, num_written);
@@ -1706,25 +1694,37 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
atomic_inc(&root->log_batch);
/*
- * check the transaction that last modified this inode
- * and see if its already been committed
- */
- if (!BTRFS_I(inode)->last_trans) {
- mutex_unlock(&inode->i_mutex);
- goto out;
- }
-
- /*
- * if the last transaction that changed this file was before
- * the current transaction, we can bail out now without any
- * syncing
+ * If the last transaction that changed this file was before the current
+ * transaction and we have the full sync flag set in our inode, we can
+ * bail out now without any syncing.
+ *
+ * Note that we can't bail out if the full sync flag isn't set. This is
+ * because when the full sync flag is set we start all ordered extents
+ * and wait for them to fully complete - when they complete they update
+ * the inode's last_trans field through:
+ *
+ * btrfs_finish_ordered_io() ->
+ * btrfs_update_inode_fallback() ->
+ * btrfs_update_inode() ->
+ * btrfs_set_inode_last_trans()
+ *
+ * So we are sure that last_trans is up to date and can do this check to
+ * bail out safely. For the fast path, when the full sync flag is not
+ * set in our inode, we can not do it because we start only our ordered
+ * extents and don't wait for them to complete (that is when
+ * btrfs_finish_ordered_io runs), so here at this point their last_trans
+ * value might be less than or equals to fs_info->last_trans_committed,
+ * and setting a speculative last_trans for an inode when a buffered
+ * write is made (such as fs_info->generation + 1 for example) would not
+ * be reliable since after setting the value and before fsync is called
+ * any number of transactions can start and commit (transaction kthread
+ * commits the current transaction periodically), and a transaction
+ * commit does not start nor waits for ordered extents to complete.
*/
smp_mb();
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
- BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed) {
- BTRFS_I(inode)->last_trans = 0;
-
+ (full_sync && BTRFS_I(inode)->last_trans <=
+ root->fs_info->last_trans_committed)) {
/*
* We'v had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e53009657f0e..0cbe95dc8113 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -835,7 +835,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
if (!matched) {
__btrfs_remove_free_space_cache(ctl);
- btrfs_err(fs_info, "block group %llu has wrong amount of free space",
+ btrfs_warn(fs_info, "block group %llu has wrong amount of free space",
block_group->key.objectid);
ret = -1;
}
@@ -847,7 +847,7 @@ out:
spin_unlock(&block_group->lock);
ret = 0;
- btrfs_err(fs_info, "failed to load free space cache for block group %llu",
+ btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now",
block_group->key.objectid);
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17f3064b4a3e..d20db6437723 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2419,10 +2419,23 @@ out_unlock:
return ret;
}
+static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
+{
+ struct old_sa_defrag_extent *old, *tmp;
+
+ if (!new)
+ return;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ kfree(new);
+}
+
static void relink_file_extents(struct new_sa_defrag_extent *new)
{
struct btrfs_path *path;
- struct old_sa_defrag_extent *old, *tmp;
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
struct inode *inode;
@@ -2465,16 +2478,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
kfree(prev);
btrfs_free_path(path);
-
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out:
+ free_sa_defrag_extent(new);
+
atomic_dec(&root->fs_info->defrag_running);
wake_up(&root->fs_info->transaction_wait);
-
- kfree(new);
}
static struct new_sa_defrag_extent *
@@ -2484,7 +2492,7 @@ record_old_file_extents(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct btrfs_key key;
- struct old_sa_defrag_extent *old, *tmp;
+ struct old_sa_defrag_extent *old;
struct new_sa_defrag_extent *new;
int ret;
@@ -2532,7 +2540,7 @@ record_old_file_extents(struct inode *inode,
if (slot >= btrfs_header_nritems(l)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out_free_list;
+ goto out_free_path;
else if (ret > 0)
break;
continue;
@@ -2561,7 +2569,7 @@ record_old_file_extents(struct inode *inode,
old = kmalloc(sizeof(*old), GFP_NOFS);
if (!old)
- goto out_free_list;
+ goto out_free_path;
offset = max(new->file_pos, key.offset);
end = min(new->file_pos + new->len, key.offset + num_bytes);
@@ -2583,15 +2591,10 @@ next:
return new;
-out_free_list:
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out_free_path:
btrfs_free_path(path);
out_kfree:
- kfree(new);
+ free_sa_defrag_extent(new);
return NULL;
}
@@ -2652,7 +2655,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
EXTENT_DEFRAG, 1, cached_state);
if (ret) {
u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
- if (last_snapshot >= BTRFS_I(inode)->generation)
+ if (0 && last_snapshot >= BTRFS_I(inode)->generation)
/* the inode is shared */
new = record_old_file_extents(inode, ordered_extent);
@@ -2743,8 +2746,14 @@ out:
btrfs_remove_ordered_extent(inode, ordered_extent);
/* for snapshot-aware defrag */
- if (new)
- relink_file_extents(new);
+ if (new) {
+ if (ret) {
+ free_sa_defrag_extent(new);
+ atomic_dec(&root->fs_info->defrag_running);
+ } else {
+ relink_file_extents(new);
+ }
+ }
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -3536,7 +3545,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* without delay
*/
if (!btrfs_is_free_space_inode(inode)
- && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+ && !root->fs_info->log_root_recovering) {
btrfs_update_root_times(trans, root);
ret = btrfs_delayed_update_inode(trans, root, inode);
@@ -4518,8 +4528,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
- inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+ if (newsize != oldsize) {
+ inode_inc_iversion(inode);
+ if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
+ inode->i_ctime = inode->i_mtime =
+ current_fs_time(inode->i_sb);
+ }
if (newsize > oldsize) {
truncate_pagecache(inode, oldsize, newsize);
@@ -6811,7 +6825,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) &&
em->block_start != EXTENT_MAP_HOLE)) {
int type;
- int ret;
u64 block_start, orig_start, orig_block_len, ram_bytes;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
@@ -8146,7 +8159,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* check for collisions, even if the name isn't there */
- ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+ ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
new_dentry->d_name.name,
new_dentry->d_name.len);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 0f81d67cdc8d..dbefa6c609f4 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1528,6 +1528,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
printk(KERN_INFO "btrfs: Snapshot src from "
"another FS\n");
ret = -EINVAL;
+ } else if (!inode_owner_or_capable(src_inode)) {
+ /*
+ * Subvolume creation is not restricted, but snapshots
+ * are limited to own subvolumes only
+ */
+ ret = -EPERM;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
@@ -2093,7 +2099,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
if (err == -EINTR)
- goto out;
+ goto out_drop_write;
dentry = lookup_one_len(vol_args->name, parent, namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
@@ -2235,6 +2241,7 @@ out_dput:
dput(dentry);
out_unlock_dir:
mutex_unlock(&dir->i_mutex);
+out_drop_write:
mnt_drop_write_file(file);
out:
kfree(vol_args);
@@ -2565,6 +2572,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
if (off + len == src->i_size)
len = ALIGN(src->i_size, bs) - off;
+ if (len == 0) {
+ ret = 0;
+ goto out_unlock;
+ }
+
/* verify the end result is block aligned */
if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
!IS_ALIGNED(destoff, bs))
@@ -3299,6 +3311,9 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
switch (p->cmd) {
case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
+ if (root->fs_info->sb->s_flags & MS_RDONLY)
+ return -EROFS;
+
if (atomic_xchg(
&root->fs_info->mutually_exclusive_operation_running,
1)) {
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4febca4fc2de..0e7f7765b3bb 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -691,6 +691,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
int cowonly;
int ret;
int err = 0;
+ bool need_check = true;
path1 = btrfs_alloc_path();
path2 = btrfs_alloc_path();
@@ -914,6 +915,7 @@ again:
cur->bytenr);
lower = cur;
+ need_check = true;
for (; level < BTRFS_MAX_LEVEL; level++) {
if (!path2->nodes[level]) {
BUG_ON(btrfs_root_bytenr(&root->root_item) !=
@@ -957,18 +959,19 @@ again:
/*
* add the block to pending list if we
- * need check its backrefs. only block
- * at 'cur->level + 1' is added to the
- * tail of pending list. this guarantees
- * we check backrefs from lower level
- * blocks to upper level blocks.
+ * need check its backrefs, we only do this once
+ * while walking up a tree as we will catch
+ * anything else later on.
*/
- if (!upper->checked &&
- level == cur->level + 1) {
+ if (!upper->checked && need_check) {
+ need_check = false;
list_add_tail(&edge->list[UPPER],
&list);
- } else
+ } else {
+ if (upper->checked)
+ need_check = true;
INIT_LIST_HEAD(&edge->list[UPPER]);
+ }
} else {
upper = rb_entry(rb_node, struct backref_node,
rb_node);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 79bd479317cb..e4f69e3b78b9 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -545,8 +545,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
do {
- ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
- &ref_root, &ref_level);
+ ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
+ item_size, &ref_root,
+ &ref_level);
printk_in_rcu(KERN_WARNING
"btrfs: %s at logical %llu on dev %s, "
"sector %llu: metadata %s (level %d) in tree "
@@ -2501,7 +2502,7 @@ again:
ret = scrub_extent(sctx, extent_logical, extent_len,
extent_physical, extent_dev, flags,
generation, extent_mirror_num,
- extent_physical);
+ extent_logical - logical + physical);
if (ret)
goto out;
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index ff40f1c00ce3..414c1b9eb896 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1550,6 +1550,10 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
goto out;
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+ if (key.type == BTRFS_ROOT_ITEM_KEY) {
+ ret = -ENOENT;
+ goto out;
+ }
*found_inode = key.objectid;
*found_type = btrfs_dir_type(path->nodes[0], di);
@@ -2524,7 +2528,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
btrfs_dir_item_key_to_cpu(eb, di, &di_key);
- if (di_key.objectid < sctx->send_progress) {
+ if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
+ di_key.objectid < sctx->send_progress) {
ret = 1;
goto out;
}
@@ -4579,6 +4584,41 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
send_root = BTRFS_I(file_inode(mnt_file))->root;
fs_info = send_root->fs_info;
+ /*
+ * This is done when we lookup the root, it should already be complete
+ * by the time we get here.
+ */
+ WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE);
+
+ /*
+ * If we just created this root we need to make sure that the orphan
+ * cleanup has been done and committed since we search the commit root,
+ * so check its commit root transid with our otransid and if they match
+ * commit the transaction to make sure everything is updated.
+ */
+ down_read(&send_root->fs_info->extent_commit_sem);
+ if (btrfs_header_generation(send_root->commit_root) ==
+ btrfs_root_otransid(&send_root->root_item)) {
+ struct btrfs_trans_handle *trans;
+
+ up_read(&send_root->fs_info->extent_commit_sem);
+
+ trans = btrfs_attach_transaction_barrier(send_root);
+ if (IS_ERR(trans)) {
+ if (PTR_ERR(trans) != -ENOENT) {
+ ret = PTR_ERR(trans);
+ goto out;
+ }
+ /* ENOENT means theres no transaction */
+ } else {
+ ret = btrfs_commit_transaction(trans, send_root);
+ if (ret)
+ goto out;
+ }
+ } else {
+ up_read(&send_root->fs_info->extent_commit_sem);
+ }
+
arg = memdup_user(arg_, sizeof(*arg));
if (IS_ERR(arg)) {
ret = PTR_ERR(arg);
@@ -4587,8 +4627,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
}
if (!access_ok(VERIFY_READ, arg->clone_sources,
- sizeof(*arg->clone_sources *
- arg->clone_sources_count))) {
+ sizeof(*arg->clone_sources) *
+ arg->clone_sources_count)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0544587d74f4..1f214689fa5e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -524,7 +524,6 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
if (transid <= root->fs_info->last_trans_committed)
goto out;
- ret = -EINVAL;
/* find specified transaction */
spin_lock(&root->fs_info->trans_lock);
list_for_each_entry(t, &root->fs_info->trans_list, list) {
@@ -540,9 +539,16 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
}
}
spin_unlock(&root->fs_info->trans_lock);
- /* The specified transaction doesn't exist */
- if (!cur_trans)
+
+ /*
+ * The specified transaction doesn't exist, or we
+ * raced with btrfs_commit_transaction
+ */
+ if (!cur_trans) {
+ if (transid > root->fs_info->last_trans_committed)
+ ret = -EINVAL;
goto out;
+ }
} else {
/* find newest transaction that is committing | committed */
spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index c276ac9a0ec3..7d3331cbccba 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -943,7 +943,7 @@ again:
base = btrfs_item_ptr_offset(leaf, path->slots[0]);
while (cur_offset < item_size) {
- extref = (struct btrfs_inode_extref *)base + cur_offset;
+ extref = (struct btrfs_inode_extref *)(base + cur_offset);
victim_name_len = btrfs_inode_extref_name_len(leaf, extref);
@@ -3314,7 +3314,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_REG,
&token);
- if (em->block_start == 0)
+ if (em->block_start == EXTENT_MAP_HOLE)
skip_csum = true;
}
@@ -3728,8 +3728,9 @@ next_slot:
}
log_extents:
+ btrfs_release_path(path);
+ btrfs_release_path(dst_path);
if (fast_search) {
- btrfs_release_path(dst_path);
ret = btrfs_log_changed_extents(trans, root, inode, dst_path);
if (ret) {
err = ret;
@@ -3746,8 +3747,6 @@ log_extents:
}
if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) {
- btrfs_release_path(path);
- btrfs_release_path(dst_path);
ret = log_directory_changes(trans, root, inode, path, dst_path);
if (ret) {
err = ret;
diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c
index 7b417e20efe2..b0a523b2c60e 100644
--- a/fs/btrfs/ulist.c
+++ b/fs/btrfs/ulist.c
@@ -205,6 +205,10 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
u64 new_alloced = ulist->nodes_alloced + 128;
struct ulist_node *new_nodes;
void *old = NULL;
+ int i;
+
+ for (i = 0; i < ulist->nnodes; i++)
+ rb_erase(&ulist->nodes[i].rb_node, &ulist->root);
/*
* if nodes_alloced == ULIST_SIZE no memory has been allocated
@@ -224,6 +228,17 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
ulist->nodes = new_nodes;
ulist->nodes_alloced = new_alloced;
+
+ /*
+ * krealloc actually uses memcpy, which does not copy rb_node
+ * pointers, so we have to do it ourselves. Otherwise we may
+ * be bitten by crashes.
+ */
+ for (i = 0; i < ulist->nnodes; i++) {
+ ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]);
+ if (ret < 0)
+ return ret;
+ }
}
ulist->nodes[ulist->nnodes].val = val;
ulist->nodes[ulist->nnodes].aux = aux;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8bffb9174afb..7fc774639a78 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1384,6 +1384,22 @@ out:
return ret;
}
+/*
+ * Function to update ctime/mtime for a given device path.
+ * Mainly used for ctime/mtime based probe like libblkid.
+ */
+static void update_dev_time(char *path_name)
+{
+ struct file *filp;
+
+ filp = filp_open(path_name, O_RDWR, 0);
+ if (!filp)
+ return;
+ file_update_time(filp);
+ filp_close(filp, NULL);
+ return;
+}
+
static int btrfs_rm_dev_item(struct btrfs_root *root,
struct btrfs_device *device)
{
@@ -1612,11 +1628,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
struct btrfs_fs_devices *fs_devices;
fs_devices = root->fs_info->fs_devices;
while (fs_devices) {
- if (fs_devices->seed == cur_devices)
+ if (fs_devices->seed == cur_devices) {
+ fs_devices->seed = cur_devices->seed;
break;
+ }
fs_devices = fs_devices->seed;
}
- fs_devices->seed = cur_devices->seed;
cur_devices->seed = NULL;
lock_chunks(root);
__btrfs_close_devices(cur_devices);
@@ -1642,10 +1659,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = 0;
- /* Notify udev that device has changed */
- if (bdev)
+ if (bdev) {
+ /* Notify udev that device has changed */
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+ /* Update ctime/mtime for device path for libblkid */
+ update_dev_time(device_path);
+ }
+
error_brelse:
brelse(bh);
if (bdev)
@@ -1817,7 +1838,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
fs_devices->seeding = 0;
fs_devices->num_devices = 0;
fs_devices->open_devices = 0;
- fs_devices->total_devices = 0;
fs_devices->seed = seed_devices;
generate_random_uuid(fs_devices->fsid);
@@ -2089,6 +2109,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = btrfs_commit_transaction(trans, root);
}
+ /* Update ctime/mtime for libblkid */
+ update_dev_time(device_path);
return ret;
error_trans:
@@ -4248,6 +4270,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
"%Lu-%Lu\n", logical, logical+len, em->start,
em->start + em->len);
+ free_extent_map(em);
return 1;
}
@@ -4429,6 +4452,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
"found %Lu-%Lu\n", logical, em->start,
em->start + em->len);
+ free_extent_map(em);
return -EINVAL;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index d2a4d1bb2d57..83fedaa53b55 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -620,14 +620,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
static void __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- spin_lock_irq(&mapping->tree_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
- spin_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
@@ -983,7 +985,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
bh = page_buffers(page);
if (bh->b_size == size) {
end_block = init_page_buffers(page, bdev,
- index << sizebits, size);
+ (sector_t)index << sizebits,
+ size);
goto done;
}
if (!try_to_free_buffers(page))
@@ -1004,7 +1007,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
*/
spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh);
- end_block = init_page_buffers(page, bdev, index << sizebits, size);
+ end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
+ size);
spin_unlock(&inode->i_mapping->private_lock);
done:
ret = (block < end_block) ? 1 : -ENXIO;
@@ -2014,6 +2018,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
int i_size_changed = 0;
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2033,6 +2038,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
@@ -2250,6 +2257,11 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
err = 0;
balance_dirty_pages_ratelimited(mapping);
+
+ if (unlikely(fatal_signal_pending(current))) {
+ err = -EINTR;
+ goto out;
+ }
}
/* page covers the boundary, find the boundary offset */
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 3e68ac101040..5da06f020986 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -213,9 +213,13 @@ static int readpage_nounlock(struct file *filp, struct page *page)
if (err < 0) {
SetPageError(page);
goto out;
- } else if (err < PAGE_CACHE_SIZE) {
+ } else {
+ if (err < PAGE_CACHE_SIZE) {
/* zero fill remainder of page */
- zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ } else {
+ flush_dcache_page(page);
+ }
}
SetPageUptodate(page);
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index f02d82b7933e..ccb43298e272 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -103,7 +103,7 @@ static unsigned fpos_off(loff_t p)
/*
* When possible, we try to satisfy a readdir by peeking at the
* dcache. We make this work by carefully ordering dentries on
- * d_u.d_child when we initially get results back from the MDS, and
+ * d_child when we initially get results back from the MDS, and
* falling back to a "normal" sync readdir if any dentries in the dir
* are dropped.
*
@@ -139,11 +139,11 @@ static int __dcache_readdir(struct file *filp,
p = parent->d_subdirs.prev;
dout(" initial p %p/%p\n", p->prev, p->next);
} else {
- p = last->d_u.d_child.prev;
+ p = last->d_child.prev;
}
more:
- dentry = list_entry(p, struct dentry, d_u.d_child);
+ dentry = list_entry(p, struct dentry, d_child);
di = ceph_dentry(dentry);
while (1) {
dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
@@ -165,7 +165,7 @@ more:
!dentry->d_inode ? " null" : "");
spin_unlock(&dentry->d_lock);
p = p->prev;
- dentry = list_entry(p, struct dentry, d_u.d_child);
+ dentry = list_entry(p, struct dentry, d_child);
di = ceph_dentry(dentry);
}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 656e16907430..5de16f5ac7e9 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -313,9 +313,9 @@ static int striped_read(struct inode *inode,
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
- u64 pos, this_len;
+ u64 pos, this_len, left;
int io_align, page_align;
- int left, pages_left;
+ int pages_left;
int read;
struct page **page_pos;
int ret;
@@ -346,47 +346,40 @@ more:
ret = 0;
hit_stripe = this_len < left;
was_short = ret >= 0 && ret < this_len;
- dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
+ dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
- if (ret > 0) {
- int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
-
- if (read < pos - off) {
- dout(" zero gap %llu to %llu\n", off + read, pos);
- ceph_zero_page_vector_range(page_align + read,
- pos - off - read, pages);
+ if (ret >= 0) {
+ int didpages;
+ if (was_short && (pos + ret < inode->i_size)) {
+ u64 tmp = min(this_len - ret,
+ inode->i_size - pos - ret);
+ dout(" zero gap %llu to %llu\n",
+ pos + ret, pos + ret + tmp);
+ ceph_zero_page_vector_range(page_align + read + ret,
+ tmp, pages);
+ ret += tmp;
}
+
+ didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
pos += ret;
read = pos - off;
left -= ret;
page_pos += didpages;
pages_left -= didpages;
- /* hit stripe? */
- if (left && hit_stripe)
+ /* hit stripe and need continue*/
+ if (left && hit_stripe && pos < inode->i_size)
goto more;
}
- if (was_short) {
+ if (read > 0) {
+ ret = read;
/* did we bounce off eof? */
if (pos + left > inode->i_size)
*checkeof = 1;
-
- /* zero trailing bytes (inside i_size) */
- if (left > 0 && pos < inode->i_size) {
- if (pos + left > inode->i_size)
- left = inode->i_size - pos;
-
- dout("zero tail %d\n", left);
- ceph_zero_page_vector_range(page_align + read, left,
- pages);
- read += left;
- }
}
- if (ret >= 0)
- ret = read;
dout("striped_read returns %d\n", ret);
return ret;
}
@@ -618,6 +611,8 @@ out:
if (check_caps)
ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY,
NULL);
+ } else if (ret != -EOLDSNAPC && written > 0) {
+ ret = written;
}
return ret;
}
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index be0f7e20d62e..0cf23a7b88c2 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -867,9 +867,9 @@ static void ceph_set_dentry_offset(struct dentry *dn)
spin_lock(&dir->d_lock);
spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&dn->d_u.d_child, &dir->d_subdirs);
+ list_move(&dn->d_child, &dir->d_subdirs);
dout("set_dentry_offset %p %lld (%p %p)\n", dn, di->offset,
- dn->d_u.d_child.prev, dn->d_u.d_child.next);
+ dn->d_child.prev, dn->d_child.next);
spin_unlock(&dn->d_lock);
spin_unlock(&dir->d_lock);
}
@@ -1296,7 +1296,7 @@ retry_lookup:
/* reorder parent's d_subdirs */
spin_lock(&parent->d_lock);
spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
- list_move(&dn->d_u.d_child, &parent->d_subdirs);
+ list_move(&dn->d_child, &parent->d_subdirs);
spin_unlock(&dn->d_lock);
spin_unlock(&parent->d_lock);
}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index e0b4ef31d3c8..669622fd1ae3 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -196,8 +196,10 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len,
&dl.object_no, &dl.object_offset,
&olen);
- if (r < 0)
+ if (r < 0) {
+ up_read(&osdc->map_sem);
return -EIO;
+ }
dl.file_offset -= dl.object_offset;
dl.object_size = ceph_file_layout_object_size(ci->i_layout);
dl.block_size = ceph_file_layout_su(ci->i_layout);
@@ -209,8 +211,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
ceph_ino(inode), dl.object_no);
- ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
- ceph_file_layout_pg_pool(ci->i_layout));
+ r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
+ ceph_file_layout_pg_pool(ci->i_layout));
+ if (r < 0) {
+ up_read(&osdc->map_sem);
+ return r;
+ }
dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
if (dl.osd >= 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4d2920304be8..d6a536886472 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -414,6 +414,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
{
struct ceph_mds_session *s;
+ if (mds >= mdsc->mdsmap->m_max_mds)
+ return ERR_PTR(-EINVAL);
+
s = kzalloc(sizeof(*s), GFP_NOFS);
if (!s)
return ERR_PTR(-ENOMEM);
@@ -639,6 +642,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
req->r_unsafe_dir = NULL;
}
+ complete_all(&req->r_safe_completion);
+
ceph_mdsc_put_request(req);
}
@@ -1840,8 +1845,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = -EAGAIN;
- if (req->r_err || req->r_got_result)
+ if (req->r_err || req->r_got_result) {
+ if (req->r_aborted)
+ __unregister_request(mdsc, req);
goto out;
+ }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -2151,7 +2159,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -3040,8 +3047,10 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
fsc->mdsc = mdsc;
mutex_init(&mdsc->mutex);
mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
- if (mdsc->mdsmap == NULL)
+ if (mdsc->mdsmap == NULL) {
+ kfree(mdsc);
return -ENOMEM;
+ }
init_completion(&mdsc->safe_umount_waiters);
init_waitqueue_head(&mdsc->session_close_wq);
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 9278dec9e940..d4d38977dcbb 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -138,6 +138,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_info[mds].export_targets =
kcalloc(num_export_targets, sizeof(u32),
GFP_NOFS);
+ if (m->m_info[mds].export_targets == NULL)
+ goto badmem;
for (j = 0; j < num_export_targets; j++)
m->m_info[mds].export_targets[j] =
ceph_decode_32(&pexport_targets);
@@ -170,7 +172,7 @@ bad:
DUMP_PREFIX_OFFSET, 16, 1,
start, end - start, true);
ceph_mdsmap_destroy(m);
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(err);
}
void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7d377c9a5e35..6627b26a800c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -357,7 +357,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
}
err = -EINVAL;
dev_name_end--; /* back up to ':' separator */
- if (*dev_name_end != ':') {
+ if (dev_name_end < dev_name || *dev_name_end != ':') {
pr_err("device name is missing path (no : separator in %s)\n",
dev_name);
goto out;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 9b6b2b6dd164..be661d8f532a 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -675,17 +675,18 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
if (!ceph_is_valid_xattr(name))
return -ENODATA;
- spin_lock(&ci->i_ceph_lock);
- dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
- ci->i_xattrs.version, ci->i_xattrs.index_version);
/* let's see if a virtual xattr was requested */
vxattr = ceph_match_vxattr(inode, name);
if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
err = vxattr->getxattr_cb(ci, value, size);
- goto out;
+ return err;
}
+ spin_lock(&ci->i_ceph_lock);
+ dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
+ ci->i_xattrs.version, ci->i_xattrs.index_version);
+
if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
(ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
goto get_xattr;
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 0227b45ef00a..15e9505aa35f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -290,7 +290,8 @@ int
cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
const struct nls_table *cp, int mapChars)
{
- int i, j, charlen;
+ int i, charlen;
+ int j = 0;
char src_char;
__le16 dst_char;
wchar_t tmp;
@@ -298,12 +299,11 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
if (!mapChars)
return cifs_strtoUTF16(target, source, PATH_MAX, cp);
- for (i = 0, j = 0; i < srclen; j++) {
+ for (i = 0; i < srclen; j++) {
src_char = source[i];
charlen = 1;
switch (src_char) {
case 0:
- put_unaligned(0, &target[j]);
goto ctoUTF16_out;
case ':':
dst_char = cpu_to_le16(UNI_COLON);
@@ -350,6 +350,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
}
ctoUTF16_out:
+ put_unaligned(0, &target[j]); /* Null terminate target unicode string */
return j;
}
diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h
index 4fb097468e21..fe8d6276410a 100644
--- a/fs/cifs/cifs_unicode.h
+++ b/fs/cifs/cifs_unicode.h
@@ -327,14 +327,14 @@ UniToupper(register wchar_t uc)
/*
* UniStrupr: Upper case a unicode string
*/
-static inline wchar_t *
-UniStrupr(register wchar_t *upin)
+static inline __le16 *
+UniStrupr(register __le16 *upin)
{
- register wchar_t *up;
+ register __le16 *up;
up = upin;
while (*up) { /* For all characters */
- *up = UniToupper(*up);
+ *up = cpu_to_le16(UniToupper(le16_to_cpu(*up)));
up++;
}
return upin; /* Return input pointer */
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 51f5e0ee7237..494b68349667 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1027,15 +1027,30 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
__u32 secdesclen = 0;
struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
+ struct cifs_tcon *tcon;
+
+ if (IS_ERR(tlink))
+ return PTR_ERR(tlink);
+ tcon = tlink_tcon(tlink);
cifs_dbg(NOISY, "set ACL from mode for %s\n", path);
/* Get the security descriptor */
- pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
+
+ if (tcon->ses->server->ops->get_acl == NULL) {
+ cifs_put_tlink(tlink);
+ return -EOPNOTSUPP;
+ }
+
+ pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path,
+ &secdesclen);
if (IS_ERR(pntsd)) {
rc = PTR_ERR(pntsd);
cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
- goto out;
+ cifs_put_tlink(tlink);
+ return rc;
}
/*
@@ -1048,6 +1063,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
pnntsd = kmalloc(secdesclen, GFP_KERNEL);
if (!pnntsd) {
kfree(pntsd);
+ cifs_put_tlink(tlink);
return -ENOMEM;
}
@@ -1056,14 +1072,18 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
+ if (tcon->ses->server->ops->set_acl == NULL)
+ rc = -EOPNOTSUPP;
+
if (!rc) {
/* Set the security descriptor */
- rc = set_cifs_acl(pnntsd, secdesclen, inode, path, aclflag);
+ rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode,
+ path, aclflag);
cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
}
+ cifs_put_tlink(tlink);
kfree(pnntsd);
kfree(pntsd);
-out:
return rc;
}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 71436d1fca13..5c807b23ca67 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -389,7 +389,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp)
if (blobptr + attrsize > blobend)
break;
if (type == NTLMSSP_AV_NB_DOMAIN_NAME) {
- if (!attrsize)
+ if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN)
break;
if (!ses->domainName) {
ses->domainName =
@@ -414,7 +414,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
int rc = 0;
int len;
char nt_hash[CIFS_NTHASH_SIZE];
- wchar_t *user;
+ __le16 *user;
wchar_t *domain;
wchar_t *server;
@@ -439,7 +439,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
return rc;
}
- /* convert ses->user_name to unicode and uppercase */
+ /* convert ses->user_name to unicode */
len = ses->user_name ? strlen(ses->user_name) : 0;
user = kmalloc(2 + (len * 2), GFP_KERNEL);
if (user == NULL) {
@@ -448,7 +448,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
}
if (len) {
- len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp);
+ len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp);
UniStrupr(user);
} else {
memset(user, '\0', 2);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4f07f6fbe494..f74dfa89c4c4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -44,6 +44,7 @@
#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
#define MAX_SERVER_SIZE 15
#define MAX_SHARE_SIZE 80
+#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */
#define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */
#define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */
@@ -73,11 +74,6 @@
#define SERVER_NAME_LENGTH 40
#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
-/* used to define string lengths for reversing unicode strings */
-/* (256+1)*2 = 514 */
-/* (max path length + 1 for null) * 2 for unicode */
-#define MAX_NAME 514
-
/* SMB echo "timeout" -- FIXME: tunable? */
#define SMB_ECHO_INTERVAL (60 * HZ)
@@ -369,6 +365,18 @@ struct smb_version_operations {
void (*new_lease_key)(struct cifs_fid *fid);
int (*calc_signature)(struct smb_rqst *rqst,
struct TCP_Server_Info *server);
+ ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *,
+ const unsigned char *, const unsigned char *, char *,
+ size_t, const struct nls_table *, int);
+ int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *,
+ const char *, const void *, const __u16,
+ const struct nls_table *, int);
+ struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *,
+ const char *, u32 *);
+ int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
+ int);
+ /* check if we need to issue closedir */
+ bool (*dir_needs_close)(struct cifsFileInfo *);
};
struct smb_version_values {
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a58dc77cc443..d17c5d72cd29 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3306,11 +3306,13 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
return 0;
}
cifs_acl->version = cpu_to_le16(1);
- if (acl_type == ACL_TYPE_ACCESS)
+ if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- else if (acl_type == ACL_TYPE_DEFAULT)
+ cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ } else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- else {
+ cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ } else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
}
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e3bc39bb9d12..d05a30072023 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -377,6 +377,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
try_to_freeze();
/* we should try only the port we connected to before */
+ mutex_lock(&server->srv_mutex);
rc = generic_ip_connect(server);
if (rc) {
cifs_dbg(FYI, "reconnect error %d\n", rc);
@@ -388,6 +389,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
server->tcpStatus = CifsNeedNegotiate;
spin_unlock(&GlobalMid_Lock);
}
+ mutex_unlock(&server->srv_mutex);
} while (server->tcpStatus == CifsNeedReconnect);
return rc;
@@ -1662,7 +1664,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
if (string == NULL)
goto out_nomem;
- if (strnlen(string, 256) == 256) {
+ if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
+ == CIFS_MAX_DOMAINNAME_LEN) {
printk(KERN_WARNING "CIFS: domain name too"
" long\n");
goto cifs_parse_mount_err;
@@ -2288,8 +2291,8 @@ cifs_put_smb_ses(struct cifs_ses *ses)
#ifdef CONFIG_KEYS
-/* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */
-#define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1)
+/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */
+#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1)
/* Populate username and pw fields from keyring if possible */
static int
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 5699b5036ed8..0c2425b21974 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -491,6 +491,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
if (server->ops->close)
server->ops->close(xid, tcon, &fid);
cifs_del_pending_open(&open);
+ fput(file);
rc = -ENOMEM;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 48b29d24c9f4..f4a8577c3e91 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -553,11 +553,10 @@ cifs_relock_file(struct cifsFileInfo *cfile)
struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
int rc = 0;
- /* we are going to update can_cache_brlcks here - need a write access */
- down_write(&cinode->lock_sem);
+ down_read(&cinode->lock_sem);
if (cinode->can_cache_brlcks) {
- /* can cache locks - no need to push them */
- up_write(&cinode->lock_sem);
+ /* can cache locks - no need to relock */
+ up_read(&cinode->lock_sem);
return rc;
}
@@ -568,7 +567,7 @@ cifs_relock_file(struct cifsFileInfo *cfile)
else
rc = tcon->ses->server->ops->push_mand_locks(cfile);
- up_write(&cinode->lock_sem);
+ up_read(&cinode->lock_sem);
return rc;
}
@@ -736,7 +735,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
cifs_dbg(FYI, "Freeing private data in close dir\n");
spin_lock(&cifs_file_list_lock);
- if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+ if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
spin_unlock(&cifs_file_list_lock);
if (server->ops->close_dir)
@@ -1790,6 +1789,7 @@ refind_writable:
cifsFileInfo_put(inv_file);
spin_lock(&cifs_file_list_lock);
++refind;
+ inv_file = NULL;
goto refind_writable;
}
}
@@ -2354,7 +2354,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *poffset)
{
unsigned long nr_pages, i;
- size_t copied, len, cur_len;
+ size_t bytes, copied, len, cur_len;
ssize_t total_written = 0;
loff_t offset;
struct iov_iter it;
@@ -2409,14 +2409,45 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
save_len = cur_len;
for (i = 0; i < nr_pages; i++) {
- copied = min_t(const size_t, cur_len, PAGE_SIZE);
+ bytes = min_t(const size_t, cur_len, PAGE_SIZE);
copied = iov_iter_copy_from_user(wdata->pages[i], &it,
- 0, copied);
+ 0, bytes);
cur_len -= copied;
iov_iter_advance(&it, copied);
+ /*
+ * If we didn't copy as much as we expected, then that
+ * may mean we trod into an unmapped area. Stop copying
+ * at that point. On the next pass through the big
+ * loop, we'll likely end up getting a zero-length
+ * write and bailing out of it.
+ */
+ if (copied < bytes)
+ break;
}
cur_len = save_len - cur_len;
+ /*
+ * If we have no data to send, then that probably means that
+ * the copy above failed altogether. That's most likely because
+ * the address in the iovec was bogus. Set the rc to -EFAULT,
+ * free anything we allocated and bail out.
+ */
+ if (!cur_len) {
+ for (i = 0; i < nr_pages; i++)
+ put_page(wdata->pages[i]);
+ kfree(wdata);
+ rc = -EFAULT;
+ break;
+ }
+
+ /*
+ * i + 1 now represents the number of pages we actually used in
+ * the copy phase above. Bring nr_pages down to that, and free
+ * any pages that we didn't use.
+ */
+ for ( ; nr_pages > i + 1; nr_pages--)
+ put_page(wdata->pages[nr_pages - 1]);
+
wdata->sync_mode = WB_SYNC_ALL;
wdata->nr_pages = nr_pages;
wdata->offset = (__u64)offset;
@@ -2779,7 +2810,7 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
total_read += result;
}
- return total_read > 0 ? total_read : result;
+ return total_read > 0 && result != -EAGAIN ? total_read : result;
}
static ssize_t
@@ -3202,7 +3233,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
total_read += result;
}
- return total_read > 0 ? total_read : result;
+ return total_read > 0 && result != -EAGAIN ? total_read : result;
}
static int cifs_readpages(struct file *file, struct address_space *mapping,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 20efd81266c6..54304ccae7e7 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -490,10 +490,15 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
return PTR_ERR(tlink);
tcon = tlink_tcon(tlink);
- rc = CIFSSMBQAllEAs(xid, tcon, path, "SETFILEBITS",
- ea_value, 4 /* size of buf */, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (tcon->ses->server->ops->query_all_EAs == NULL) {
+ cifs_put_tlink(tlink);
+ return -EOPNOTSUPP;
+ }
+
+ rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path,
+ "SETFILEBITS", ea_value, 4 /* size of buf */,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
cifs_put_tlink(tlink);
if (rc < 0)
return (int)rc;
@@ -558,6 +563,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
fattr->cf_mode &= ~(S_IWUGO);
fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
+ if (fattr->cf_nlink < 1) {
+ cifs_dbg(1, "replacing bogus file nlink value %u\n",
+ fattr->cf_nlink);
+ fattr->cf_nlink = 1;
+ }
}
fattr->cf_uid = cifs_sb->mnt_uid;
@@ -822,7 +832,7 @@ inode_has_hashed_dentries(struct inode *inode)
struct dentry *dentry;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
spin_unlock(&inode->i_lock);
return true;
@@ -1630,13 +1640,22 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
unlink_target:
/* Try unlinking the target dentry if it's not negative */
if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
- tmprc = cifs_unlink(target_dir, target_dentry);
+ if (S_ISDIR(target_dentry->d_inode->i_mode))
+ tmprc = cifs_rmdir(target_dir, target_dentry);
+ else
+ tmprc = cifs_unlink(target_dir, target_dentry);
if (tmprc)
goto cifs_rename_exit;
rc = cifs_do_rename(xid, source_dentry, from_name,
target_dentry, to_name);
}
+ /* force revalidate to go get info when needed */
+ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
+
+ source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
+ target_dir->i_mtime = current_fs_time(source_dir->i_sb);
+
cifs_rename_exit:
kfree(info_buf_source);
kfree(from_name);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 770d5a9781c1..85ebdaa21015 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -111,6 +111,14 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
return;
}
+ /*
+ * If we know that the inode will need to be revalidated immediately,
+ * then don't create a new dentry for it. We'll end up doing an on
+ * the wire call either way and this spares us an invalidation.
+ */
+ if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL)
+ return;
+
dentry = d_alloc(parent, name);
if (!dentry)
return;
@@ -574,11 +582,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon,
/* close and restart search */
cifs_dbg(FYI, "search backing up - close and restart search\n");
spin_lock(&cifs_file_list_lock);
- if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+ if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
spin_unlock(&cifs_file_list_lock);
- if (server->ops->close)
- server->ops->close(xid, tcon, &cfile->fid);
+ if (server->ops->close_dir)
+ server->ops->close_dir(xid, tcon, &cfile->fid);
} else
spin_unlock(&cifs_file_list_lock);
if (cfile->srch_inf.ntwrk_buf_start) {
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index f230571a7ab3..8edc9eb1ef7b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -198,7 +198,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses,
bytes_ret = 0;
} else
bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName,
- 256, nls_cp);
+ CIFS_MAX_DOMAINNAME_LEN, nls_cp);
bcc_ptr += 2 * bytes_ret;
bcc_ptr += 2; /* account for null terminator */
@@ -256,8 +256,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
/* copy domain */
if (ses->domainName != NULL) {
- strncpy(bcc_ptr, ses->domainName, 256);
- bcc_ptr += strnlen(ses->domainName, 256);
+ strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
+ bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN);
} /* else we will send a null domain name
so the server will default to its own domain */
*bcc_ptr = 0;
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 3efdb9d5c0b8..610c6c24d41d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -885,6 +885,12 @@ cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
(__u8)type, wait, 0);
}
+static bool
+cifs_dir_needs_close(struct cifsFileInfo *cfile)
+{
+ return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
+}
+
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -948,6 +954,15 @@ struct smb_version_operations smb1_operations = {
.mand_lock = cifs_mand_lock,
.mand_unlock_range = cifs_unlock_range,
.push_mand_locks = cifs_push_mandatory_locks,
+ .dir_needs_close = cifs_dir_needs_close,
+#ifdef CONFIG_CIFS_XATTR
+ .query_all_EAs = CIFSSMBQAllEAs,
+ .set_EA = CIFSSMBSetEA,
+#endif /* CIFS_XATTR */
+#ifdef CONFIG_CIFS_ACL
+ .get_acl = get_cifs_acl,
+ .set_acl = set_cifs_acl,
+#endif /* CIFS_ACL */
};
struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 5da1b55a2258..d801f63cddd0 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -73,7 +73,7 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
goto out;
}
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL) {
rc = -ENOMEM;
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 7c0e2143e775..cc592ef6584a 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -55,4 +55,7 @@
#define SMB2_NTLMV2_SESSKEY_SIZE (16)
#define SMB2_HMACSHA256_SIZE (32)
+/* Maximum buffer size value we can send with 1 credit */
+#define SMB2_MAX_BUFFER_SIZE 65536
+
#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index fff6dfba6204..6d535797ec76 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -123,7 +123,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
*adjust_tz = false;
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 7c2f45c06fc2..4768cf8be6e2 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"},
{STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"},
{STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"},
- {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"},
+ {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"},
{STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"},
{STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"},
{STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"},
@@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"},
{STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"},
{STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"},
- {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"},
+ {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"},
{STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"},
{STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"},
{STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"},
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index 10383d8c015b..4f791e0e98d7 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -413,96 +413,108 @@ cifs_ses_oplock_break(struct work_struct *work)
}
static bool
-smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
+smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
+ struct smb2_lease_break_work *lw)
{
- struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
- struct list_head *tmp, *tmp1, *tmp2;
- struct cifs_ses *ses;
- struct cifs_tcon *tcon;
- struct cifsInodeInfo *cinode;
+ bool found;
+ __u8 lease_state;
+ struct list_head *tmp;
struct cifsFileInfo *cfile;
struct cifs_pending_open *open;
- struct smb2_lease_break_work *lw;
- bool found;
+ struct cifsInodeInfo *cinode;
int ack_req = le32_to_cpu(rsp->Flags &
SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED);
- lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
- if (!lw)
- return false;
+ lease_state = smb2_map_lease_to_oplock(rsp->NewLeaseState);
- INIT_WORK(&lw->lease_break, cifs_ses_oplock_break);
- lw->lease_state = rsp->NewLeaseState;
+ list_for_each(tmp, &tcon->openFileList) {
+ cfile = list_entry(tmp, struct cifsFileInfo, tlist);
+ cinode = CIFS_I(cfile->dentry->d_inode);
- cifs_dbg(FYI, "Checking for lease break\n");
+ if (memcmp(cinode->lease_key, rsp->LeaseKey,
+ SMB2_LEASE_KEY_SIZE))
+ continue;
- /* look up tcon based on tid & uid */
- spin_lock(&cifs_tcp_ses_lock);
- list_for_each(tmp, &server->smb_ses_list) {
- ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ cifs_dbg(FYI, "found in the open list\n");
+ cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
+ le32_to_cpu(rsp->NewLeaseState));
- spin_lock(&cifs_file_list_lock);
- list_for_each(tmp1, &ses->tcon_list) {
- tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+ smb2_set_oplock_level(cinode, lease_state);
- cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
- list_for_each(tmp2, &tcon->openFileList) {
- cfile = list_entry(tmp2, struct cifsFileInfo,
- tlist);
- cinode = CIFS_I(cfile->dentry->d_inode);
+ if (ack_req)
+ cfile->oplock_break_cancelled = false;
+ else
+ cfile->oplock_break_cancelled = true;
- if (memcmp(cinode->lease_key, rsp->LeaseKey,
- SMB2_LEASE_KEY_SIZE))
- continue;
+ queue_work(cifsiod_wq, &cfile->oplock_break);
+ kfree(lw);
+ return true;
+ }
- cifs_dbg(FYI, "found in the open list\n");
- cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
- le32_to_cpu(rsp->NewLeaseState));
+ found = false;
+ list_for_each_entry(open, &tcon->pending_opens, olist) {
+ if (memcmp(open->lease_key, rsp->LeaseKey,
+ SMB2_LEASE_KEY_SIZE))
+ continue;
+
+ if (!found && ack_req) {
+ found = true;
+ memcpy(lw->lease_key, open->lease_key,
+ SMB2_LEASE_KEY_SIZE);
+ lw->tlink = cifs_get_tlink(open->tlink);
+ queue_work(cifsiod_wq, &lw->lease_break);
+ }
- smb2_set_oplock_level(cinode,
- smb2_map_lease_to_oplock(rsp->NewLeaseState));
+ cifs_dbg(FYI, "found in the pending open list\n");
+ cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
+ le32_to_cpu(rsp->NewLeaseState));
- if (ack_req)
- cfile->oplock_break_cancelled = false;
- else
- cfile->oplock_break_cancelled = true;
+ open->oplock = lease_state;
+ }
+ return found;
+}
- queue_work(cifsiod_wq, &cfile->oplock_break);
+static bool
+smb2_is_valid_lease_break(char *buffer)
+{
+ struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
+ struct list_head *tmp, *tmp1, *tmp2;
+ struct TCP_Server_Info *server;
+ struct cifs_ses *ses;
+ struct cifs_tcon *tcon;
+ struct smb2_lease_break_work *lw;
- spin_unlock(&cifs_file_list_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- return true;
- }
+ lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
+ if (!lw)
+ return false;
- found = false;
- list_for_each_entry(open, &tcon->pending_opens, olist) {
- if (memcmp(open->lease_key, rsp->LeaseKey,
- SMB2_LEASE_KEY_SIZE))
- continue;
+ INIT_WORK(&lw->lease_break, cifs_ses_oplock_break);
+ lw->lease_state = rsp->NewLeaseState;
- if (!found && ack_req) {
- found = true;
- memcpy(lw->lease_key, open->lease_key,
- SMB2_LEASE_KEY_SIZE);
- lw->tlink = cifs_get_tlink(open->tlink);
- queue_work(cifsiod_wq,
- &lw->lease_break);
- }
+ cifs_dbg(FYI, "Checking for lease break\n");
+
+ /* look up tcon based on tid & uid */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &cifs_tcp_ses_list) {
+ server = list_entry(tmp, struct TCP_Server_Info, tcp_ses_list);
- cifs_dbg(FYI, "found in the pending open list\n");
- cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
- le32_to_cpu(rsp->NewLeaseState));
+ list_for_each(tmp1, &server->smb_ses_list) {
+ ses = list_entry(tmp1, struct cifs_ses, smb_ses_list);
- open->oplock =
- smb2_map_lease_to_oplock(rsp->NewLeaseState);
- }
- if (found) {
- spin_unlock(&cifs_file_list_lock);
- spin_unlock(&cifs_tcp_ses_lock);
- return true;
+ spin_lock(&cifs_file_list_lock);
+ list_for_each(tmp2, &ses->tcon_list) {
+ tcon = list_entry(tmp2, struct cifs_tcon,
+ tcon_list);
+ cifs_stats_inc(
+ &tcon->stats.cifs_stats.num_oplock_brks);
+ if (smb2_tcon_has_lease(tcon, rsp, lw)) {
+ spin_unlock(&cifs_file_list_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ return true;
+ }
}
+ spin_unlock(&cifs_file_list_lock);
}
- spin_unlock(&cifs_file_list_lock);
}
spin_unlock(&cifs_tcp_ses_lock);
kfree(lw);
@@ -528,7 +540,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
if (rsp->StructureSize !=
smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
if (le16_to_cpu(rsp->StructureSize) == 44)
- return smb2_is_valid_lease_break(buffer, server);
+ return smb2_is_valid_lease_break(buffer);
else
return false;
}
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f2e76f3b0c61..e12f258a5ffa 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -181,11 +181,8 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified wsize, or default */
wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
- /*
- * limit write size to 2 ** 16, because we don't support multicredit
- * requests now.
- */
- wsize = min_t(unsigned int, wsize, 2 << 15);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
return wsize;
}
@@ -199,11 +196,8 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified rsize, or default */
rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read);
- /*
- * limit write size to 2 ** 16, because we don't support multicredit
- * requests now.
- */
- rsize = min_t(unsigned int, rsize, 2 << 15);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
return rsize;
}
@@ -249,7 +243,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
struct smb2_file_all_info *smb2_data;
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
@@ -560,6 +554,12 @@ smb2_new_lease_key(struct cifs_fid *fid)
get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
}
+static bool
+smb2_dir_needs_close(struct cifsFileInfo *cfile)
+{
+ return !cfile->invalidHandle;
+}
+
struct smb_version_operations smb21_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -624,6 +624,7 @@ struct smb_version_operations smb21_operations = {
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
.calc_signature = smb2_calc_signature,
+ .dir_needs_close = smb2_dir_needs_close,
};
@@ -691,6 +692,7 @@ struct smb_version_operations smb30_operations = {
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
.calc_signature = smb3_calc_signature,
+ .dir_needs_close = smb2_dir_needs_close,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2b95ce2b54e8..eb0de4c3ca76 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -408,6 +408,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
server->dialect = le16_to_cpu(rsp->DialectRevision);
server->maxBuf = le32_to_cpu(rsp->MaxTransactSize);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ server->maxBuf = min_t(unsigned int, le32_to_cpu(rsp->MaxTransactSize),
+ SMB2_MAX_BUFFER_SIZE);
server->max_read = le32_to_cpu(rsp->MaxReadSize);
server->max_write = le32_to_cpu(rsp->MaxWriteSize);
/* BB Do we need to validate the SecurityMode? */
@@ -806,7 +809,8 @@ tcon_exit:
tcon_error_exit:
if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
- tcon->bad_network_name = true;
+ if (tcon)
+ tcon->bad_network_name = true;
}
goto tcon_exit;
}
@@ -1200,7 +1204,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
{
return query_info(xid, tcon, persistent_fid, volatile_fid,
FILE_ALL_INFORMATION,
- sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
sizeof(struct smb2_file_all_info), data);
}
@@ -1796,6 +1800,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
if (rc) {
+ if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
+ srch_inf->endOfSearch = true;
+ rc = 0;
+ }
cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
goto qdir_exit;
}
@@ -1833,11 +1841,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
else
cifs_dbg(VFS, "illegal search buffer type\n");
- if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
- srch_inf->endOfSearch = 1;
- else
- srch_inf->endOfSearch = 0;
-
return rc;
qdir_exit:
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 09afda4cc58e..5ac836a86b18 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -82,9 +82,11 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
goto remove_ea_exit;
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL,
- (__u16)0, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, NULL, (__u16)0,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
}
remove_ea_exit:
kfree(full_path);
@@ -149,18 +151,22 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
cifs_dbg(FYI, "attempt to set cifs inode metadata\n");
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
- (__u16)value_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, ea_value, (__u16)value_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)
== 0) {
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
goto set_ea_exit;
ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
- (__u16)value_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, ea_value, (__u16)value_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
#ifdef CONFIG_CIFS_ACL
@@ -170,8 +176,12 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
rc = -ENOMEM;
} else {
memcpy(pacl, ea_value, value_size);
- rc = set_cifs_acl(pacl, value_size,
- direntry->d_inode, full_path, CIFS_ACL_DACL);
+ if (pTcon->ses->server->ops->set_acl)
+ rc = pTcon->ses->server->ops->set_acl(pacl,
+ value_size, direntry->d_inode,
+ full_path, CIFS_ACL_DACL);
+ else
+ rc = -EOPNOTSUPP;
if (rc == 0) /* force revalidate of the inode */
CIFS_I(direntry->d_inode)->time = 0;
kfree(pacl);
@@ -272,17 +282,21 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
/* revalidate/getattr then populate from inode */
} /* BB add else when above is implemented */
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, ea_name, ea_value, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
goto get_ea_exit;
ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, ea_name, ea_value, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
#ifdef CONFIG_CIFS_POSIX
@@ -313,8 +327,11 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
u32 acllen;
struct cifs_ntsd *pacl;
- pacl = get_cifs_acl(cifs_sb, direntry->d_inode,
- full_path, &acllen);
+ if (pTcon->ses->server->ops->get_acl == NULL)
+ goto get_ea_exit; /* rc already EOPNOTSUPP */
+
+ pacl = pTcon->ses->server->ops->get_acl(cifs_sb,
+ direntry->d_inode, full_path, &acllen);
if (IS_ERR(pacl)) {
rc = PTR_ERR(pacl);
cifs_dbg(VFS, "%s: error %zd getting sec desc\n",
@@ -400,11 +417,12 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
/* if proc/fs/cifs/streamstoxattr is set then
search server for EAs or streams to
returns as xattrs */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, NULL, data,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, NULL, data, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
list_ea_exit:
kfree(full_path);
free_xid(xid);
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 1da168c61d35..9bc1147a6c5d 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -92,7 +92,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
struct dentry *de;
spin_lock(&parent->d_lock);
- list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) {
+ list_for_each_entry(de, &parent->d_subdirs, d_child) {
/* don't know what to do with negative dentries */
if (de->d_inode )
coda_flag_inode(de->d_inode, flag);
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index a81147e2e4ef..4d24d17bcfc1 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime,
#define ELF_HWCAP COMPAT_ELF_HWCAP
#endif
+#ifdef COMPAT_ELF_HWCAP2
+#undef ELF_HWCAP2
+#define ELF_HWCAP2 COMPAT_ELF_HWCAP2
+#endif
+
#ifdef COMPAT_ARCH_DLINFO
#undef ARCH_DLINFO
#define ARCH_DLINFO COMPAT_ARCH_DLINFO
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7aabc6ad4e9b..fa38d076697d 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,10 +56,19 @@ static void configfs_d_iput(struct dentry * dentry,
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
- BUG_ON(sd->s_dentry != dentry);
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
- sd->s_dentry = NULL;
+ /* Coordinate with configfs_attach_attr where will increase
+ * sd->s_count and update sd->s_dentry to new allocated one.
+ * Only set sd->dentry to null when this dentry is the only
+ * sd owner.
+ * If not do so, configfs_d_iput may run just after
+ * configfs_attach_attr and set sd->s_dentry to null
+ * even it's still in use.
+ */
+ if (atomic_read(&sd->s_count) <= 2)
+ sd->s_dentry = NULL;
+
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
@@ -426,8 +435,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
struct configfs_attribute * attr = sd->s_element;
int error;
+ spin_lock(&configfs_dirent_lock);
dentry->d_fsdata = configfs_get(sd);
sd->s_dentry = dentry;
+ spin_unlock(&configfs_dirent_lock);
+
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
configfs_init_file);
if (error) {
diff --git a/fs/coredump.c b/fs/coredump.c
index dafafbafa731..1d402ce5b72f 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -299,7 +299,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (unlikely(nr < 0))
return nr;
- tsk->flags = PF_DUMPCORE;
+ tsk->flags |= PF_DUMPCORE;
if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
/*
diff --git a/fs/dcache.c b/fs/dcache.c
index f09b9085f7d8..efa4602e064f 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -43,7 +43,7 @@
/*
* Usage:
* dcache->d_inode->i_lock protects:
- * - i_dentry, d_alias, d_inode of aliases
+ * - i_dentry, d_u.d_alias, d_inode of aliases
* dcache_hash_bucket lock protects:
* - the dcache hash table
* s_anon bl list spinlock protects:
@@ -58,7 +58,7 @@
* - d_unhashed()
* - d_parent and d_subdirs
* - childrens' d_child and d_parent
- * - d_alias, d_inode
+ * - d_u.d_alias, d_inode
*
* Ordering:
* dentry->d_inode->i_lock
@@ -96,8 +96,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
* This hash-function tries to avoid losing too many bits of hash
* information, yet avoid using a prime hash-size or similar.
*/
-#define D_HASHBITS d_hash_shift
-#define D_HASHMASK d_hash_mask
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
@@ -108,8 +106,7 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
unsigned int hash)
{
hash += (unsigned long) parent / L1_CACHE_BYTES;
- hash = hash + (hash >> D_HASHBITS);
- return dentry_hashtable + (hash & D_HASHMASK);
+ return dentry_hashtable + hash_32(hash, d_hash_shift);
}
/* Statistics gathering. */
@@ -218,7 +215,6 @@ static void __d_free(struct rcu_head *head)
{
struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
- WARN_ON(!hlist_unhashed(&dentry->d_alias));
if (dname_external(dentry))
kfree(dentry->d_name.name);
kmem_cache_free(dentry_cache, dentry);
@@ -229,6 +225,7 @@ static void __d_free(struct rcu_head *head)
*/
static void d_free(struct dentry *dentry)
{
+ WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
BUG_ON(dentry->d_count);
this_cpu_dec(nr_dentry);
if (dentry->d_op && dentry->d_op->d_release)
@@ -267,7 +264,7 @@ static void dentry_iput(struct dentry * dentry)
struct inode *inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
- hlist_del_init(&dentry->d_alias);
+ hlist_del_init(&dentry->d_u.d_alias);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
if (!inode->i_nlink)
@@ -291,7 +288,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
{
struct inode *inode = dentry->d_inode;
dentry->d_inode = NULL;
- hlist_del_init(&dentry->d_alias);
+ hlist_del_init(&dentry->d_u.d_alias);
dentry_rcuwalk_barrier(dentry);
spin_unlock(&dentry->d_lock);
spin_unlock(&inode->i_lock);
@@ -367,9 +364,9 @@ static struct dentry *d_kill(struct dentry *dentry, struct dentry *parent)
__releases(parent->d_lock)
__releases(dentry->d_inode->i_lock)
{
- list_del(&dentry->d_u.d_child);
+ __list_del_entry(&dentry->d_child);
/*
- * Inform try_to_ascend() that we are no longer attached to the
+ * Inform ascending readers that we are no longer attached to the
* dentry tree
*/
dentry->d_flags |= DCACHE_DENTRY_KILLED;
@@ -663,7 +660,7 @@ static struct dentry *__d_find_alias(struct inode *inode, int want_discon)
again:
discon_alias = NULL;
- hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
spin_lock(&alias->d_lock);
if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
if (IS_ROOT(alias) &&
@@ -716,7 +713,7 @@ void d_prune_aliases(struct inode *inode)
struct dentry *dentry;
restart:
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
spin_lock(&dentry->d_lock);
if (!dentry->d_count) {
__dget_dlock(dentry);
@@ -896,7 +893,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
/* descend to the first leaf in the current subtree */
while (!list_empty(&dentry->d_subdirs))
dentry = list_entry(dentry->d_subdirs.next,
- struct dentry, d_u.d_child);
+ struct dentry, d_child);
/* consume the dentries from this leaf up through its parents
* until we find one with children or run out altogether */
@@ -930,17 +927,17 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
if (IS_ROOT(dentry)) {
parent = NULL;
- list_del(&dentry->d_u.d_child);
+ list_del(&dentry->d_child);
} else {
parent = dentry->d_parent;
parent->d_count--;
- list_del(&dentry->d_u.d_child);
+ list_del(&dentry->d_child);
}
inode = dentry->d_inode;
if (inode) {
dentry->d_inode = NULL;
- hlist_del_init(&dentry->d_alias);
+ hlist_del_init(&dentry->d_u.d_alias);
if (dentry->d_op && dentry->d_op->d_iput)
dentry->d_op->d_iput(dentry, inode);
else
@@ -958,7 +955,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
} while (list_empty(&dentry->d_subdirs));
dentry = list_entry(dentry->d_subdirs.next,
- struct dentry, d_u.d_child);
+ struct dentry, d_child);
}
}
@@ -991,35 +988,6 @@ void shrink_dcache_for_umount(struct super_block *sb)
}
/*
- * This tries to ascend one level of parenthood, but
- * we can race with renaming, so we need to re-check
- * the parenthood after dropping the lock and check
- * that the sequence number still matches.
- */
-static struct dentry *try_to_ascend(struct dentry *old, int locked, unsigned seq)
-{
- struct dentry *new = old->d_parent;
-
- rcu_read_lock();
- spin_unlock(&old->d_lock);
- spin_lock(&new->d_lock);
-
- /*
- * might go back up the wrong parent if we have had a rename
- * or deletion
- */
- if (new != old->d_parent ||
- (old->d_flags & DCACHE_DENTRY_KILLED) ||
- (!locked && read_seqretry(&rename_lock, seq))) {
- spin_unlock(&new->d_lock);
- new = NULL;
- }
- rcu_read_unlock();
- return new;
-}
-
-
-/*
* Search for at least 1 mount point in the dentry's subdirs.
* We descend to the next level whenever the d_subdirs
* list is non-empty and continue searching.
@@ -1051,7 +1019,7 @@ repeat:
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1073,30 +1041,48 @@ resume:
/*
* All done at this level ... ascend and resume the search.
*/
+ rcu_read_lock();
+ascend:
if (this_parent != parent) {
struct dentry *child = this_parent;
- this_parent = try_to_ascend(this_parent, locked, seq);
- if (!this_parent)
+ this_parent = child->d_parent;
+
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /* might go back up the wrong parent if we have had a rename. */
+ if (!locked && read_seqretry(&rename_lock, seq))
goto rename_retry;
- next = child->d_u.d_child.next;
+ next = child->d_child.next;
+ while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ next = next->next;
+ }
+ rcu_read_unlock();
goto resume;
}
- spin_unlock(&this_parent->d_lock);
if (!locked && read_seqretry(&rename_lock, seq))
goto rename_retry;
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
if (locked)
write_sequnlock(&rename_lock);
return 0; /* No mount points found in tree */
positive:
if (!locked && read_seqretry(&rename_lock, seq))
- goto rename_retry;
+ goto rename_retry_unlocked;
if (locked)
write_sequnlock(&rename_lock);
return 1;
rename_retry:
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
if (locked)
goto again;
+rename_retry_unlocked:
locked = 1;
write_seqlock(&rename_lock);
goto again;
@@ -1134,7 +1120,7 @@ repeat:
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1161,6 +1147,7 @@ resume:
*/
if (found && need_resched()) {
spin_unlock(&dentry->d_lock);
+ rcu_read_lock();
goto out;
}
@@ -1180,23 +1167,40 @@ resume:
/*
* All done at this level ... ascend and resume the search.
*/
+ rcu_read_lock();
+ascend:
if (this_parent != parent) {
struct dentry *child = this_parent;
- this_parent = try_to_ascend(this_parent, locked, seq);
- if (!this_parent)
+ this_parent = child->d_parent;
+
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /* might go back up the wrong parent if we have had a rename. */
+ if (!locked && read_seqretry(&rename_lock, seq))
goto rename_retry;
- next = child->d_u.d_child.next;
+ next = child->d_child.next;
+ while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ next = next->next;
+ }
+ rcu_read_unlock();
goto resume;
}
out:
- spin_unlock(&this_parent->d_lock);
if (!locked && read_seqretry(&rename_lock, seq))
goto rename_retry;
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
if (locked)
write_sequnlock(&rename_lock);
return found;
rename_retry:
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
if (found)
return found;
if (locked)
@@ -1281,8 +1285,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
INIT_HLIST_BL_NODE(&dentry->d_hash);
INIT_LIST_HEAD(&dentry->d_lru);
INIT_LIST_HEAD(&dentry->d_subdirs);
- INIT_HLIST_NODE(&dentry->d_alias);
- INIT_LIST_HEAD(&dentry->d_u.d_child);
+ INIT_HLIST_NODE(&dentry->d_u.d_alias);
+ INIT_LIST_HEAD(&dentry->d_child);
d_set_d_op(dentry, dentry->d_sb->s_d_op);
this_cpu_inc(nr_dentry);
@@ -1312,7 +1316,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
*/
__dget_dlock(parent);
dentry->d_parent = parent;
- list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+ list_add(&dentry->d_child, &parent->d_subdirs);
spin_unlock(&parent->d_lock);
return dentry;
@@ -1372,7 +1376,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
if (inode) {
if (unlikely(IS_AUTOMOUNT(inode)))
dentry->d_flags |= DCACHE_NEED_AUTOMOUNT;
- hlist_add_head(&dentry->d_alias, &inode->i_dentry);
+ hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
}
dentry->d_inode = inode;
dentry_rcuwalk_barrier(dentry);
@@ -1397,7 +1401,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
void d_instantiate(struct dentry *entry, struct inode * inode)
{
- BUG_ON(!hlist_unhashed(&entry->d_alias));
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
if (inode)
spin_lock(&inode->i_lock);
__d_instantiate(entry, inode);
@@ -1436,7 +1440,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
return NULL;
}
- hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
/*
* Don't need alias->d_lock here, because aliases with
* d_parent == entry->d_parent are not subject to name or
@@ -1462,7 +1466,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
{
struct dentry *result;
- BUG_ON(!hlist_unhashed(&entry->d_alias));
+ BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
if (inode)
spin_lock(&inode->i_lock);
@@ -1505,7 +1509,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
if (hlist_empty(&inode->i_dentry))
return NULL;
- alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+ alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
__dget(alias);
return alias;
}
@@ -1579,7 +1583,7 @@ struct dentry *d_obtain_alias(struct inode *inode)
spin_lock(&tmp->d_lock);
tmp->d_inode = inode;
tmp->d_flags |= DCACHE_DISCONNECTED;
- hlist_add_head(&tmp->d_alias, &inode->i_dentry);
+ hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
hlist_bl_lock(&tmp->d_sb->s_anon);
hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
hlist_bl_unlock(&tmp->d_sb->s_anon);
@@ -2022,7 +2026,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
struct dentry *child;
spin_lock(&dparent->d_lock);
- list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
+ list_for_each_entry(child, &dparent->d_subdirs, d_child) {
if (dentry == child) {
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
__dget_dlock(dentry);
@@ -2269,8 +2273,8 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
/* Unhash the target: dput() will then get rid of it */
__d_drop(target);
- list_del(&dentry->d_u.d_child);
- list_del(&target->d_u.d_child);
+ list_del(&dentry->d_child);
+ list_del(&target->d_child);
/* Switch the names.. */
switch_names(dentry, target);
@@ -2280,15 +2284,15 @@ static void __d_move(struct dentry * dentry, struct dentry * target)
if (IS_ROOT(dentry)) {
dentry->d_parent = target->d_parent;
target->d_parent = target;
- INIT_LIST_HEAD(&target->d_u.d_child);
+ INIT_LIST_HEAD(&target->d_child);
} else {
swap(dentry->d_parent, target->d_parent);
/* And add them back to the (new) parent lists */
- list_add(&target->d_u.d_child, &target->d_parent->d_subdirs);
+ list_add(&target->d_child, &target->d_parent->d_subdirs);
}
- list_add(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+ list_add(&dentry->d_child, &dentry->d_parent->d_subdirs);
write_seqcount_end(&target->d_seq);
write_seqcount_end(&dentry->d_seq);
@@ -2395,9 +2399,9 @@ static void __d_materialise_dentry(struct dentry *dentry, struct dentry *anon)
swap(dentry->d_name.hash, anon->d_name.hash);
dentry->d_parent = dentry;
- list_del_init(&dentry->d_u.d_child);
+ list_del_init(&dentry->d_child);
anon->d_parent = dparent;
- list_move(&anon->d_u.d_child, &dparent->d_subdirs);
+ list_move(&anon->d_child, &dparent->d_subdirs);
write_seqcount_end(&dentry->d_seq);
write_seqcount_end(&anon->d_seq);
@@ -2686,8 +2690,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
* thus don't need to be hashed. They also don't need a name until a
* user wants to identify the object in /proc/pid/fd/. The little hack
* below allows us to generate a name for these objects on demand:
+ *
+ * Some pseudo inodes are mountable. When they are mounted
+ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
+ * and instead have d_path return the mounted path.
*/
- if (path->dentry->d_op && path->dentry->d_op->d_dname)
+ if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
get_fs_root(current->fs, &root);
@@ -2724,6 +2733,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
return memcpy(buffer, temp, sz);
}
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ char *end = buffer + buflen;
+ /* these dentries are never renamed, so d_lock is not needed */
+ if (prepend(&end, &buflen, " (deleted)", 11) ||
+ prepend_name(&end, &buflen, &dentry->d_name) ||
+ prepend(&end, &buflen, "/", 1))
+ end = ERR_PTR(-ENAMETOOLONG);
+ return end;
+}
+
/*
* Write full pathname from the root of the filesystem into the buffer.
*/
@@ -2920,7 +2940,7 @@ repeat:
resume:
while (next != &this_parent->d_subdirs) {
struct list_head *tmp = next;
- struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+ struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
next = tmp->next;
spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -2941,26 +2961,43 @@ resume:
}
spin_unlock(&dentry->d_lock);
}
+ rcu_read_lock();
+ascend:
if (this_parent != root) {
struct dentry *child = this_parent;
if (!(this_parent->d_flags & DCACHE_GENOCIDE)) {
this_parent->d_flags |= DCACHE_GENOCIDE;
this_parent->d_count--;
}
- this_parent = try_to_ascend(this_parent, locked, seq);
- if (!this_parent)
+ this_parent = child->d_parent;
+
+ spin_unlock(&child->d_lock);
+ spin_lock(&this_parent->d_lock);
+
+ /* might go back up the wrong parent if we have had a rename. */
+ if (!locked && read_seqretry(&rename_lock, seq))
goto rename_retry;
- next = child->d_u.d_child.next;
+ next = child->d_child.next;
+ while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+ if (next == &this_parent->d_subdirs)
+ goto ascend;
+ child = list_entry(next, struct dentry, d_child);
+ next = next->next;
+ }
+ rcu_read_unlock();
goto resume;
}
- spin_unlock(&this_parent->d_lock);
if (!locked && read_seqretry(&rename_lock, seq))
goto rename_retry;
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
if (locked)
write_sequnlock(&rename_lock);
return;
rename_retry:
+ spin_unlock(&this_parent->d_lock);
+ rcu_read_unlock();
if (locked)
goto again;
locked = 1;
diff --git a/fs/dcookies.c b/fs/dcookies.c
index ab5954b50267..ac44a69fbea9 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -204,7 +204,7 @@ out:
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, size_t, len)
+COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len)
{
#ifdef __BIG_ENDIAN
return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 4888cb3fdef7..26d7fff8d78e 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -245,10 +245,19 @@ static int debugfs_show_options(struct seq_file *m, struct dentry *root)
return 0;
}
+static void debugfs_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages(&inode->i_data, 0);
+ clear_inode(inode);
+ if (S_ISLNK(inode->i_mode))
+ kfree(inode->i_private);
+}
+
static const struct super_operations debugfs_super_operations = {
.statfs = simple_statfs,
.remount_fs = debugfs_remount,
.show_options = debugfs_show_options,
+ .evict_inode = debugfs_evict_inode,
};
static int debug_fill_super(struct super_block *sb, void *data, int silent)
@@ -465,23 +474,14 @@ static int __debugfs_remove(struct dentry *dentry, struct dentry *parent)
int ret = 0;
if (debugfs_positive(dentry)) {
- if (dentry->d_inode) {
- dget(dentry);
- switch (dentry->d_inode->i_mode & S_IFMT) {
- case S_IFDIR:
- ret = simple_rmdir(parent->d_inode, dentry);
- break;
- case S_IFLNK:
- kfree(dentry->d_inode->i_private);
- /* fall through */
- default:
- simple_unlink(parent->d_inode, dentry);
- break;
- }
- if (!ret)
- d_delete(dentry);
- dput(dentry);
- }
+ dget(dentry);
+ if (S_ISDIR(dentry->d_inode->i_mode))
+ ret = simple_rmdir(parent->d_inode, dentry);
+ else
+ simple_unlink(parent->d_inode, dentry);
+ if (!ret)
+ d_delete(dentry);
+ dput(dentry);
}
return ret;
}
@@ -533,8 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove);
*/
void debugfs_remove_recursive(struct dentry *dentry)
{
- struct dentry *child;
- struct dentry *parent;
+ struct dentry *child, *next, *parent;
if (IS_ERR_OR_NULL(dentry))
return;
@@ -544,61 +543,37 @@ void debugfs_remove_recursive(struct dentry *dentry)
return;
parent = dentry;
+ down:
mutex_lock(&parent->d_inode->i_mutex);
+ list_for_each_entry_safe(child, next, &parent->d_subdirs, d_child) {
+ if (!debugfs_positive(child))
+ continue;
- while (1) {
- /*
- * When all dentries under "parent" has been removed,
- * walk up the tree until we reach our starting point.
- */
- if (list_empty(&parent->d_subdirs)) {
- mutex_unlock(&parent->d_inode->i_mutex);
- if (parent == dentry)
- break;
- parent = parent->d_parent;
- mutex_lock(&parent->d_inode->i_mutex);
- }
- child = list_entry(parent->d_subdirs.next, struct dentry,
- d_u.d_child);
- next_sibling:
-
- /*
- * If "child" isn't empty, walk down the tree and
- * remove all its descendants first.
- */
+ /* perhaps simple_empty(child) makes more sense */
if (!list_empty(&child->d_subdirs)) {
mutex_unlock(&parent->d_inode->i_mutex);
parent = child;
- mutex_lock(&parent->d_inode->i_mutex);
- continue;
+ goto down;
}
- __debugfs_remove(child, parent);
- if (parent->d_subdirs.next == &child->d_u.d_child) {
- /*
- * Try the next sibling.
- */
- if (child->d_u.d_child.next != &parent->d_subdirs) {
- child = list_entry(child->d_u.d_child.next,
- struct dentry,
- d_u.d_child);
- goto next_sibling;
- }
-
- /*
- * Avoid infinite loop if we fail to remove
- * one dentry.
- */
- mutex_unlock(&parent->d_inode->i_mutex);
- break;
- }
- simple_release_fs(&debugfs_mount, &debugfs_mount_count);
+ up:
+ if (!__debugfs_remove(child, parent))
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
- parent = dentry->d_parent;
+ mutex_unlock(&parent->d_inode->i_mutex);
+ child = parent;
+ parent = parent->d_parent;
mutex_lock(&parent->d_inode->i_mutex);
- __debugfs_remove(dentry, parent);
+
+ if (child != dentry) {
+ next = list_entry(child->d_child.next, struct dentry,
+ d_child);
+ goto up;
+ }
+
+ if (!__debugfs_remove(child, parent))
+ simple_release_fs(&debugfs_mount, &debugfs_mount_count);
mutex_unlock(&parent->d_inode->i_mutex);
- simple_release_fs(&debugfs_mount, &debugfs_mount_count);
}
EXPORT_SYMBOL_GPL(debugfs_remove_recursive);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 073d30b9d1ac..a726b9f29cb7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -498,6 +498,7 @@ static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
+ ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f71ec125290d..1da2446bf6b0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -2102,7 +2102,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
break;
case 2:
dst[dst_byte_offset++] |= (src_byte);
- dst[dst_byte_offset] = 0;
current_bit_offset = 0;
break;
}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index a7abbea2c096..9ff3664bb3ea 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -196,23 +196,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
{
int rc = 0;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
- struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct dentry *ecryptfs_dentry = file->f_path.dentry;
/* Private value of ecryptfs_dentry allocated in
* ecryptfs_lookup() */
struct ecryptfs_file_info *file_info;
- mount_crypt_stat = &ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
- if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
- && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
- || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
- || (file->f_flags & O_APPEND))) {
- printk(KERN_WARNING "Mount has encrypted view enabled; "
- "files may only be read\n");
- rc = -EPERM;
- goto out;
- }
/* Released in ecryptfs_release or end of function if failure */
file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
ecryptfs_set_file_private(file, file_info);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5eab400e2590..41baf8b5e0eb 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1051,7 +1051,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
}
rc = vfs_setxattr(lower_dentry, name, value, size, flags);
- if (!rc)
+ if (!rc && dentry->d_inode)
fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
out:
return rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 7d52806c2119..4725a07f003c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
struct ecryptfs_msg_ctx *msg_ctx;
struct ecryptfs_message *msg = NULL;
char *auth_tok_sig;
- char *payload;
+ char *payload = NULL;
size_t payload_len = 0;
int rc;
@@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
}
out:
kfree(msg);
+ kfree(payload);
return rc;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e924cf45aad9..329a9cc2b2eb 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -494,6 +494,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
{
struct super_block *s;
struct ecryptfs_sb_info *sbi;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct ecryptfs_dentry_info *root_info;
const char *err = "Getting sb failed";
struct inode *inode;
@@ -512,6 +513,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
err = "Error parsing options";
goto out;
}
+ mount_crypt_stat = &sbi->mount_crypt_stat;
s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s)) {
@@ -558,11 +560,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
/**
* Set the POSIX ACL flag based on whether they're enabled in the lower
- * mount. Force a read-only eCryptfs mount if the lower mount is ro.
- * Allow a ro eCryptfs mount even when the lower mount is rw.
+ * mount.
*/
s->s_flags = flags & ~MS_POSIXACL;
- s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+ s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+ /**
+ * Force a read-only eCryptfs mount when:
+ * 1) The lower mount is ro
+ * 2) The ecryptfs_encrypted_view mount option is specified
+ */
+ if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+ mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ s->s_flags |= MS_RDONLY;
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/exec.c b/fs/exec.c
index ffd7a813ad3d..dd6aa61c8548 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -607,7 +607,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
return -ENOMEM;
lru_add_drain();
- tlb_gather_mmu(&tlb, mm, 0);
+ tlb_gather_mmu(&tlb, mm, old_start, old_end);
if (new_end > old_start) {
/*
* when the old and new regions overlap clear from new_end.
@@ -624,7 +624,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
free_pgd_range(&tlb, old_start, old_end, new_end,
vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
}
- tlb_finish_mmu(&tlb, new_end, old_end);
+ tlb_finish_mmu(&tlb, old_start, old_end);
/*
* Shrink the vma to just the new range. Always succeeds.
@@ -654,10 +654,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
unsigned long rlim_stack;
#ifdef CONFIG_STACK_GROWSUP
- /* Limit stack size to 1GB */
+ /* Limit stack size */
stack_base = rlimit_max(RLIMIT_STACK);
- if (stack_base > (1 << 30))
- stack_base = 1 << 30;
+ if (stack_base > STACK_SIZE_MAX)
+ stack_base = STACK_SIZE_MAX;
/* Make sure we didn't let the argument array grow too large. */
if (vma->vm_end - vma->vm_start > stack_base)
@@ -1669,6 +1669,12 @@ int __get_dumpable(unsigned long mm_flags)
return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
}
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index b74422888604..85cde3e76290 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -103,7 +103,7 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
layout->max_io_length =
(BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
- layout->group_width;
+ (layout->group_width - layout->parity);
if (layout->parity) {
unsigned stripe_length =
(layout->group_width - layout->parity) *
@@ -286,7 +286,8 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
if (length) {
ore_calc_stripe_info(layout, offset, length, &ios->si);
ios->length = ios->si.length;
- ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
+ ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) +
+ ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
if (layout->parity)
_ore_post_alloc_raid_stuff(ios);
}
@@ -536,6 +537,7 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
u64 H = LmodS - G * T;
u32 N = div_u64(H, U);
+ u32 Nlast;
/* "H - (N * U)" is just "H % U" so it's bound to u32 */
u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
@@ -568,6 +570,10 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
si->length = T - H;
if (si->length > length)
si->length = length;
+
+ Nlast = div_u64(H + si->length + U - 1, U);
+ si->maxdevUnits = Nlast - N;
+
si->M = M;
}
EXPORT_SYMBOL(ore_calc_stripe_info);
@@ -583,13 +589,16 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
int ret;
if (per_dev->bio == NULL) {
- unsigned pages_in_stripe = ios->layout->group_width *
- (ios->layout->stripe_unit / PAGE_SIZE);
- unsigned nr_pages = ios->nr_pages * ios->layout->group_width /
- (ios->layout->group_width -
- ios->layout->parity);
- unsigned bio_size = (nr_pages + pages_in_stripe) /
- ios->layout->group_width;
+ unsigned bio_size;
+
+ if (!ios->reading) {
+ bio_size = ios->si.maxdevUnits;
+ } else {
+ bio_size = (ios->si.maxdevUnits + 1) *
+ (ios->layout->group_width - ios->layout->parity) /
+ ios->layout->group_width;
+ }
+ bio_size *= (ios->layout->stripe_unit / PAGE_SIZE);
per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
if (unlikely(!per_dev->bio)) {
@@ -609,8 +618,12 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
pglen, pgbase);
if (unlikely(pglen != added_len)) {
- ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n",
- per_dev->bio->bi_vcnt);
+ /* If bi_vcnt == bi_max then this is a SW BUG */
+ ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x "
+ "bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n",
+ per_dev->bio->bi_vcnt,
+ per_dev->bio->bi_max_vecs,
+ BIO_MAX_PAGES_KMALLOC, cur_len);
ret = -ENOMEM;
goto out;
}
@@ -1098,7 +1111,7 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
size_attr->attr = g_attr_logical_length;
size_attr->attr.val_ptr = &size_attr->newsize;
- ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
+ ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
_LLU(oc->comps->obj.id), _LLU(obj_size), i);
ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
&size_attr->attr);
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 262fc9940982..b4eec4c9a790 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result,
inode = result->d_inode;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
dget(dentry);
spin_unlock(&inode->i_lock);
if (toput)
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0a87bb10998d..99d84ce038b8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -632,6 +632,8 @@ static int ext2_get_blocks(struct inode *inode,
int count = 0;
ext2_fsblk_t first_block = 0;
+ BUG_ON(maxblocks == 0);
+
depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
if (depth == 0)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 288534920fe5..20d6697bd638 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
+ tmp_bh.b_size = sb->s_blocksize;
err = ext2_get_block(inode, blk, &tmp_bh, 1);
if (err < 0)
goto out;
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index 1c3312858fcf..e98171a11cfe 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -35,6 +35,7 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
int rc;
memset(&tmp, 0, sizeof(struct buffer_head));
+ tmp.b_size = 1 << inode->i_blkbits;
rc = ext2_get_block(inode, pgoff, &tmp, create);
*result = tmp.b_blocknr;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 692de13e3596..cea8ecf3e76e 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -576,11 +576,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh,
(block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb))
+((char *)de - bh->b_data))) {
- /* On error, skip the f_pos to the next block. */
- dir_file->f_pos = (dir_file->f_pos |
- (dir->i_sb->s_blocksize - 1)) + 1;
- brelse (bh);
- return count;
+ /* silently ignore the rest of the block */
+ break;
}
ext3fs_dirhash(de->name, de->name_len, hinfo);
if ((hinfo->hash < start_hash) ||
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6356665a74bb..882d4bdfd428 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1300,13 +1300,6 @@ set_qf_format:
"not specified.");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext3_msg(sb, KERN_ERR, "error: journaled quota format "
- "specified with no journaling "
- "enabled.");
- return 0;
- }
}
#endif
return 1;
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index d0f13eada0ed..3742e4c85723 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb,
ext4_group_t group;
if (test_opt2(sb, STD_GROUP_SIZE))
- group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
- block) >>
+ group = (block -
+ le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >>
(EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3);
else
ext4_get_group_no_and_offset(sb, block, &group, NULL);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5aae3d12d400..2a71466b0115 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -280,6 +280,16 @@ struct ext4_io_submit {
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
+/* Mask out the low bits to get the starting block of the cluster */
+#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
+ ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
+ ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
+/* Get the cluster offset */
+#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
+ ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
+ ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
/*
* Structure of a blocks group descriptor
@@ -579,6 +589,7 @@ enum {
#define EXT4_FREE_BLOCKS_NO_QUOT_UPDATE 0x0008
#define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010
#define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020
+#define EXT4_FREE_BLOCKS_RESERVE 0x0040
/*
* Flags used by ext4_discard_partial_page_buffers
@@ -764,6 +775,8 @@ do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(einode)->xtime.tv_sec = \
(signed)le32_to_cpu((raw_inode)->xtime); \
+ else \
+ (einode)->xtime.tv_sec = 0; \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
@@ -2076,6 +2089,7 @@ int do_journal_get_write_access(handle_t *handle,
#define CONVERT_INLINE_DATA 2
extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2248,8 +2262,8 @@ extern int ext4_register_li_request(struct super_block *sb,
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
return EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
+ (EXT4_SB(sb)->s_chksum_driver != NULL);
}
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 451eb4045330..1be3996b5942 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -219,10 +219,19 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
set_buffer_prio(bh);
if (ext4_handle_valid(handle)) {
err = jbd2_journal_dirty_metadata(handle, bh);
- if (err) {
- /* Errors can only happen if there is a bug */
- handle->h_err = err;
- __ext4_journal_stop(where, line, handle);
+ /* Errors can only happen if there is a bug */
+ if (WARN_ON_ONCE(err)) {
+ ext4_journal_abort_handle(where, line, __func__, bh,
+ handle, err);
+ ext4_error_inode(inode, where, line,
+ bh->b_blocknr,
+ "journal_dirty_metadata failed: "
+ "handle type %u started at line %u, "
+ "credits %u/%u, errcode %d",
+ handle->h_type,
+ handle->h_line_no,
+ handle->h_requested_credits,
+ handle->h_buffer_credits, err);
}
} else {
if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index bc0f1910b9cf..7fbd1c5b74af 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+ ext4_lblk_t last = lblock + len - 1;
- if (len == 0)
+ if (lblock > last)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
if (depth == 0) {
/* leaf entries */
struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ ext4_fsblk_t pblock = 0;
+ ext4_lblk_t lblock = 0;
+ ext4_lblk_t prev = 0;
+ int len = 0;
while (entries) {
if (!ext4_valid_extent(inode, ext))
return 0;
+
+ /* Check for overlapping extents */
+ lblock = le32_to_cpu(ext->ee_block);
+ len = ext4_ext_get_actual_len(ext);
+ if ((lblock <= prev) && prev) {
+ pblock = ext4_ext_pblock(ext);
+ es->s_last_error_block = cpu_to_le64(pblock);
+ return 0;
+ }
ext++;
entries--;
+ prev = lblock + len - 1;
}
} else {
struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -1705,7 +1722,8 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
brelse(path[1].p_bh);
ext4_free_blocks(handle, inode, NULL, blk, 1,
- EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
+ EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET |
+ EXT4_FREE_BLOCKS_RESERVE);
}
/*
@@ -1755,8 +1773,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
/*
* get the next allocated block if the extent in the path
@@ -1766,7 +1783,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
b2 = ext4_ext_next_allocated_block(path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, b2);
}
/* check for wrap through zero on extent logical start block*/
@@ -2427,7 +2444,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* truncate operation has removed all of the blocks in
* the cluster.
*/
- if (pblk & (sbi->s_cluster_ratio - 1) &&
+ if (EXT4_PBLK_COFF(sbi, pblk) &&
(ee_len == num))
*partial_cluster = EXT4_B2C(sbi, pblk);
else
@@ -2495,6 +2512,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
+ /*
+ * If we're starting with an extent other than the last one in the
+ * node, we need to see if it shares a cluster with the extent to
+ * the right (towards the end of the file). If its leftmost cluster
+ * is this extent's rightmost cluster and it is not cluster aligned,
+ * we'll mark it as a partial that is not to be deallocated.
+ */
+
+ if (ex != EXT_LAST_EXTENT(eh)) {
+ ext4_fsblk_t current_pblk, right_pblk;
+ long long current_cluster, right_cluster;
+
+ current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
+ current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
+ right_pblk = ext4_ext_pblock(ex + 1);
+ right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
+ if (current_cluster == right_cluster &&
+ EXT4_PBLK_COFF(sbi, right_pblk))
+ *partial_cluster = -right_cluster;
+ }
+
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
while (ex >= EXT_FIRST_EXTENT(eh) &&
@@ -3658,7 +3696,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end;
- lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+ lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3717,9 +3755,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
/* Check towards left side */
- c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
if (c_offset) {
- lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+ lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
lblk_to = lblk_from + c_offset - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3727,7 +3765,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
/* Now check towards right. */
- c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
if (allocated_clusters && c_offset) {
lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -3935,7 +3973,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_ext_path *path)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ext4_lblk_t ex_cluster_start, ex_cluster_end;
ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -3953,8 +3991,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
(rr_cluster_start == ex_cluster_start)) {
if (rr_cluster_start == ex_cluster_end)
ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
+ map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
map->m_len = min(map->m_len,
(unsigned) sbi->s_cluster_ratio - c_offset);
/*
@@ -4017,7 +4054,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent newex, *ex, *ex2;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_fsblk_t newblock = 0;
- int free_on_err = 0, err = 0, depth;
+ int free_on_err = 0, err = 0, depth, ret;
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
@@ -4078,9 +4115,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (!ext4_ext_is_uninitialized(ex))
goto out;
- allocated = ext4_ext_handle_uninitialized_extents(
+ ret = ext4_ext_handle_uninitialized_extents(
handle, inode, map, path, flags,
allocated, newblock);
+ if (ret < 0)
+ err = ret;
+ else
+ allocated = ret;
goto out3;
}
}
@@ -4108,7 +4149,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk);
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
/*
* If we are doing bigalloc, check to see if the extent returned
@@ -4176,7 +4217,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* needed so that future calls to get_implied_cluster_alloc()
* work correctly.
*/
- offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+ offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
ar.goal -= offset;
ar.logical -= offset;
@@ -4386,9 +4427,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode)
last_block = (inode->i_size + sb->s_blocksize - 1)
>> EXT4_BLOCK_SIZE_BITS(sb);
+retry:
err = ext4_es_remove_extent(inode, last_block,
EXT_MAX_BLOCKS - last_block);
+ if (err == -ENOMEM) {
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
+ }
+ if (err) {
+ ext4_std_error(inode->i_sb, err);
+ return;
+ }
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
+ ext4_std_error(inode->i_sb, err);
}
static void ext4_falloc_update_inode(struct inode *inode,
@@ -4659,7 +4711,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
error = ext4_get_inode_loc(inode, &iloc);
if (error)
return error;
- physical = iloc.bh->b_blocknr << blockbits;
+ physical = (__u64)iloc.bh->b_blocknr << blockbits;
offset = EXT4_GOOD_OLD_INODE_SIZE +
EXT4_I(inode)->i_extra_isize;
physical += offset;
@@ -4667,7 +4719,7 @@ static int ext4_xattr_fiemap(struct inode *inode,
flags |= FIEMAP_EXTENT_DATA_INLINE;
brelse(iloc.bh);
} else { /* external block */
- physical = EXT4_I(inode)->i_file_acl << blockbits;
+ physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits;
length = inode->i_sb->s_blocksize;
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b1b4d51b5d86..ec9770f42538 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
size_t count = iov_length(iov, nr_segs);
loff_t final_size = pos + count;
- if (pos >= inode->i_size)
+ if (pos >= i_size_read(inode))
return 0;
if ((pos & blockmask) || (final_size & blockmask))
@@ -100,7 +100,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
struct blk_plug plug;
int unaligned_aio = 0;
ssize_t ret;
- int overwrite = 0;
+ int *overwrite = iocb->private;
size_t length = iov_length(iov, nr_segs);
if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
@@ -118,8 +118,6 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
mutex_lock(&inode->i_mutex);
blk_start_plug(&plug);
- iocb->private = &overwrite;
-
/* check whether we do a DIO overwrite or not */
if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
@@ -143,7 +141,7 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
* So we should check these two conditions.
*/
if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
- overwrite = 1;
+ *overwrite = 1;
}
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
@@ -170,6 +168,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
{
struct inode *inode = file_inode(iocb->ki_filp);
ssize_t ret;
+ int overwrite = 0;
/*
* If we have encountered a bitmap-format file, the size limit
@@ -190,6 +189,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
}
}
+ iocb->private = &overwrite;
if (unlikely(iocb->ki_filp->f_flags & O_DIRECT))
ret = ext4_file_dio_write(iocb, iov, nr_segs, pos);
else
@@ -312,7 +312,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset;
lastoff = startoff;
- endoff = (map->m_lblk + map->m_len) << blkbits;
+ endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
index = startoff >> PAGE_CACHE_SHIFT;
end = endoff >> PAGE_CACHE_SHIFT;
@@ -457,7 +457,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
if (last != start)
- dataoff = last << blkbits;
+ dataoff = (loff_t)last << blkbits;
break;
}
@@ -468,7 +468,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
if (last != start)
- dataoff = last << blkbits;
+ dataoff = (loff_t)last << blkbits;
break;
}
@@ -486,7 +486,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
}
last++;
- dataoff = last << blkbits;
+ dataoff = (loff_t)last << blkbits;
} while (last <= end);
mutex_unlock(&inode->i_mutex);
@@ -540,7 +540,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
ret = ext4_map_blocks(NULL, inode, &map, 0);
if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
last += ret;
- holeoff = last << blkbits;
+ holeoff = (loff_t)last << blkbits;
continue;
}
@@ -551,7 +551,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
ext4_es_find_delayed_extent_range(inode, last, last, &es);
if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
last = es.es_lblk + es.es_len;
- holeoff = last << blkbits;
+ holeoff = (loff_t)last << blkbits;
continue;
}
@@ -566,7 +566,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
&map, &holeoff);
if (!unwritten) {
last += ret;
- holeoff = last << blkbits;
+ holeoff = (loff_t)last << blkbits;
continue;
}
}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 00a818d67b54..4d4718cf25ab 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -734,11 +734,8 @@ repeat_in_this_group:
ino = ext4_find_next_zero_bit((unsigned long *)
inode_bitmap_bh->b_data,
EXT4_INODES_PER_GROUP(sb), ino);
- if (ino >= EXT4_INODES_PER_GROUP(sb)) {
- if (++group == ngroups)
- group = 0;
- continue;
- }
+ if (ino >= EXT4_INODES_PER_GROUP(sb))
+ goto next_group;
if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) {
ext4_error(sb, "reserved inode found cleared - "
"inode=%lu", ino + 1);
@@ -768,6 +765,9 @@ repeat_in_this_group:
goto got; /* we grabbed the inode! */
if (ino < EXT4_INODES_PER_GROUP(sb))
goto repeat_in_this_group;
+next_group:
+ if (++group == ngroups)
+ group = 0;
}
err = -ENOSPC;
goto out;
@@ -780,12 +780,23 @@ got:
goto out;
}
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, group_desc_bh);
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
+
/* We may have to initialize the block bitmap if it isn't already */
if (ext4_has_group_desc_csum(sb) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
struct buffer_head *block_bitmap_bh;
block_bitmap_bh = ext4_read_block_bitmap(sb, group);
+ if (!block_bitmap_bh) {
+ err = -EIO;
+ goto out;
+ }
BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
err = ext4_journal_get_write_access(handle, block_bitmap_bh);
if (err) {
@@ -816,13 +827,6 @@ got:
}
}
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, group_desc_bh);
- if (err) {
- ext4_std_error(sb, err);
- goto out;
- }
-
/* Update the relevant bg descriptor fields */
if (ext4_has_group_desc_csum(sb)) {
int free;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index b8d5d351e24f..589061469687 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -390,7 +390,13 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
return 0;
failed:
for (; i >= 0; i--) {
- if (i != indirect_blks && branch[i].bh)
+ /*
+ * We want to ext4_forget() only freshly allocated indirect
+ * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
+ * buffer at branch[0].bh is indirect block / inode already
+ * existing before ext4_alloc_branch() was called.
+ */
+ if (i > 0 && i != indirect_blks && branch[i].bh)
ext4_forget(handle, 1, inode, branch[i].bh,
branch[i].bh->b_blocknr);
ext4_free_blocks(handle, inode, NULL, new_blocks[i],
@@ -1325,16 +1331,24 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
blk = *i_data;
if (level > 0) {
ext4_lblk_t first2;
+ ext4_lblk_t count2;
+
bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
if (!bh) {
EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
"Read failure");
return -EIO;
}
- first2 = (first > offset) ? first - offset : 0;
+ if (first > offset) {
+ first2 = first - offset;
+ count2 = count;
+ } else {
+ first2 = 0;
+ count2 = count - (offset - first);
+ }
ret = free_hole_blocks(handle, inode, bh,
(__le32 *)bh->b_data, level - 1,
- first2, count - offset,
+ first2, count2,
inode->i_sb->s_blocksize >> 2);
if (ret) {
brelse(bh);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 3e2bf873e8a8..e350be6c7ac6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1842,7 +1842,7 @@ int ext4_inline_data_fiemap(struct inode *inode,
if (error)
goto out;
- physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
+ physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits;
physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data;
physical += offsetof(struct ext4_inode, i_block);
length = i_size_read(inode);
@@ -1957,9 +1957,11 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
}
/* Clear the content within i_blocks. */
- if (i_size < EXT4_MIN_INLINE_DATA_SIZE)
- memset(ext4_raw_inode(&is.iloc)->i_block + i_size, 0,
- EXT4_MIN_INLINE_DATA_SIZE - i_size);
+ if (i_size < EXT4_MIN_INLINE_DATA_SIZE) {
+ void *p = (void *) ext4_raw_inode(&is.iloc)->i_block;
+ memset(p + i_size, 0,
+ EXT4_MIN_INLINE_DATA_SIZE - i_size);
+ }
EXT4_I(inode)->i_inline_size = i_size <
EXT4_MIN_INLINE_DATA_SIZE ?
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d6382b89ecbd..e48bd5a1814b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/aio.h>
+#include <linux/bitops.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -1118,10 +1119,13 @@ static int ext4_write_end(struct file *file,
}
}
- if (ext4_has_inline_data(inode))
- copied = ext4_write_inline_data_end(inode, pos, len,
- copied, page);
- else
+ if (ext4_has_inline_data(inode)) {
+ ret = ext4_write_inline_data_end(inode, pos, len,
+ copied, page);
+ if (ret < 0)
+ goto errout;
+ copied = ret;
+ } else
copied = block_write_end(file, mapping, pos,
len, copied, page, fsdata);
@@ -1260,7 +1264,6 @@ static int ext4_journalled_write_end(struct file *file,
*/
static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1272,7 +1275,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1292,10 +1294,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
return -ENOSPC;
}
ei->i_reserved_meta_blocks += md_needed;
@@ -1309,7 +1307,6 @@ repeat:
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1331,7 +1328,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1351,10 +1347,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
@@ -2655,6 +2647,20 @@ static int ext4_nonda_switch(struct super_block *sb)
return 0;
}
+/* We always reserve for an inode update; the superblock could be there too */
+static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
+{
+ if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
+ return 1;
+
+ if (pos + len <= 0x7fffffffULL)
+ return 1;
+
+ /* We might need to update the superblock to set LARGE_FILE */
+ return 2;
+}
+
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -2705,7 +2711,8 @@ retry_grab:
* of file which has an already mapped buffer.
*/
retry_journal:
- handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
+ handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+ ext4_da_write_credits(inode, pos, len));
if (IS_ERR(handle)) {
page_cache_release(page);
return PTR_ERR(handle);
@@ -4053,18 +4060,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
+ unsigned int new_fl = 0;
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
if (flags & EXT4_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & EXT4_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & EXT4_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+ set_mask_bits(&inode->i_flags,
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@ -4357,6 +4366,13 @@ bad_inode:
return ERR_PTR(ret);
}
+struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
+{
+ if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
+ return ERR_PTR(-EIO);
+ return ext4_iget(sb, ino);
+}
+
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
@@ -4703,7 +4719,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
ext4_journal_stop(handle);
}
- if (attr->ia_valid & ATTR_SIZE) {
+ if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
+ handle_t *handle;
+ loff_t oldsize = inode->i_size;
if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4711,73 +4729,64 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_size > sbi->s_bitmap_maxbytes)
return -EFBIG;
}
- }
- if (S_ISREG(inode->i_mode) &&
- attr->ia_valid & ATTR_SIZE &&
- (attr->ia_size < inode->i_size)) {
- handle_t *handle;
+ if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
+ inode_inc_iversion(inode);
- handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
- if (IS_ERR(handle)) {
- error = PTR_ERR(handle);
- goto err_out;
- }
- if (ext4_handle_valid(handle)) {
- error = ext4_orphan_add(handle, inode);
- orphan = 1;
- }
- EXT4_I(inode)->i_disksize = attr->ia_size;
- rc = ext4_mark_inode_dirty(handle, inode);
- if (!error)
- error = rc;
- ext4_journal_stop(handle);
-
- if (ext4_should_order_data(inode)) {
- error = ext4_begin_ordered_truncate(inode,
+ if (S_ISREG(inode->i_mode) &&
+ (attr->ia_size < inode->i_size)) {
+ if (ext4_should_order_data(inode)) {
+ error = ext4_begin_ordered_truncate(inode,
attr->ia_size);
- if (error) {
- /* Do as much error cleanup as possible */
- handle = ext4_journal_start(inode,
- EXT4_HT_INODE, 3);
- if (IS_ERR(handle)) {
- ext4_orphan_del(NULL, inode);
+ if (error)
goto err_out;
- }
- ext4_orphan_del(handle, inode);
- orphan = 0;
- ext4_journal_stop(handle);
+ }
+ handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
+ if (IS_ERR(handle)) {
+ error = PTR_ERR(handle);
+ goto err_out;
+ }
+ if (ext4_handle_valid(handle)) {
+ error = ext4_orphan_add(handle, inode);
+ orphan = 1;
+ }
+ EXT4_I(inode)->i_disksize = attr->ia_size;
+ rc = ext4_mark_inode_dirty(handle, inode);
+ if (!error)
+ error = rc;
+ ext4_journal_stop(handle);
+ if (error) {
+ ext4_orphan_del(NULL, inode);
goto err_out;
}
}
- }
- if (attr->ia_valid & ATTR_SIZE) {
- if (attr->ia_size != inode->i_size) {
- loff_t oldsize = inode->i_size;
-
- i_size_write(inode, attr->ia_size);
- /*
- * Blocks are going to be removed from the inode. Wait
- * for dio in flight. Temporarily disable
- * dioread_nolock to prevent livelock.
- */
- if (orphan) {
- if (!ext4_should_journal_data(inode)) {
- ext4_inode_block_unlocked_dio(inode);
- inode_dio_wait(inode);
- ext4_inode_resume_unlocked_dio(inode);
- } else
- ext4_wait_for_tail_page_commit(inode);
- }
- /*
- * Truncate pagecache after we've waited for commit
- * in data=journal mode to make pages freeable.
- */
- truncate_pagecache(inode, oldsize, inode->i_size);
+ i_size_write(inode, attr->ia_size);
+ /*
+ * Blocks are going to be removed from the inode. Wait
+ * for dio in flight. Temporarily disable
+ * dioread_nolock to prevent livelock.
+ */
+ if (orphan) {
+ if (!ext4_should_journal_data(inode)) {
+ ext4_inode_block_unlocked_dio(inode);
+ inode_dio_wait(inode);
+ ext4_inode_resume_unlocked_dio(inode);
+ } else
+ ext4_wait_for_tail_page_commit(inode);
}
- ext4_truncate(inode);
+ /*
+ * Truncate pagecache after we've waited for commit
+ * in data=journal mode to make pages freeable.
+ */
+ truncate_pagecache(inode, oldsize, inode->i_size);
}
+ /*
+ * We want to call ext4_truncate() even if attr->ia_size ==
+ * inode->i_size for cases like truncation of fallocated space
+ */
+ if (attr->ia_valid & ATTR_SIZE)
+ ext4_truncate(inode);
if (!rc) {
setattr_copy(inode, attr);
@@ -4805,7 +4814,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct inode *inode;
- unsigned long delalloc_blocks;
+ unsigned long long delalloc_blocks;
inode = dentry->d_inode;
generic_fillattr(inode, stat);
@@ -4823,7 +4832,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb),
EXT4_I(inode)->i_reserved_data_blocks);
- stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
+ stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9);
return 0;
}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 9491ac0590f7..d4fd81c44f55 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -77,8 +77,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2)
memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data));
memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags));
memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize));
- memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree));
- memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr));
+ ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS);
+ ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS);
+ ext4_es_lru_del(inode1);
+ ext4_es_lru_del(inode2);
isize = i_size_read(inode1);
i_size_write(inode1, i_size_read(inode2));
@@ -143,7 +145,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
if (IS_ERR(handle)) {
err = -EINVAL;
- goto swap_boot_out;
+ goto journal_err_out;
}
/* Protect extent tree against block allocations via delalloc */
@@ -201,6 +203,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ext4_double_up_write_data_sem(inode, inode_bl);
+journal_err_out:
ext4_inode_resume_unlocked_dio(inode);
ext4_inode_resume_unlocked_dio(inode_bl);
@@ -546,9 +549,17 @@ group_add_out:
}
case EXT4_IOC_SWAP_BOOT:
+ {
+ int err;
if (!(filp->f_mode & FMODE_WRITE))
return -EBADF;
- return swap_inode_boot_loader(sb, inode);
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+ err = swap_inode_boot_loader(sb, inode);
+ mnt_drop_write_file(filp);
+ return err;
+ }
case EXT4_IOC_RESIZE_FS: {
ext4_fsblk_t n_blocks_count;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index def84082a9a9..df5050f9080b 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1396,6 +1396,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
int last = first + count - 1;
struct super_block *sb = e4b->bd_sb;
+ if (WARN_ON(count == 0))
+ return;
BUG_ON(last >= (sb->s_blocksize << 3));
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
mb_check_buddy(e4b);
@@ -3116,7 +3118,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical);
- BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
+ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
/* now prepare goal request */
@@ -3177,8 +3179,30 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
{
struct ext4_prealloc_space *pa = ac->ac_pa;
+ struct ext4_buddy e4b;
+ int err;
- if (pa && pa->pa_type == MB_INODE_PA)
+ if (pa == NULL) {
+ if (ac->ac_f_ex.fe_len == 0)
+ return;
+ err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
+ if (err) {
+ /*
+ * This should never happen since we pin the
+ * pages in the ext4_allocation_context so
+ * ext4_mb_load_buddy() should never fail.
+ */
+ WARN(1, "mb_load_buddy failed (%d)", err);
+ return;
+ }
+ ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
+ ac->ac_f_ex.fe_len);
+ ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ ext4_mb_unload_buddy(&e4b);
+ return;
+ }
+ if (pa->pa_type == MB_INODE_PA)
pa->pa_free += ac->ac_b_ex.fe_len;
}
@@ -3423,6 +3447,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
{
struct ext4_prealloc_space *pa;
pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+
+ BUG_ON(atomic_read(&pa->pa_count));
+ BUG_ON(pa->pa_deleted == 0);
kmem_cache_free(ext4_pspace_cachep, pa);
}
@@ -3436,11 +3463,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
ext4_group_t grp;
ext4_fsblk_t grp_blk;
- if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
- return;
-
/* in this short window concurrent discard can set pa_deleted */
spin_lock(&pa->pa_lock);
+ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
+ spin_unlock(&pa->pa_lock);
+ return;
+ }
+
if (pa->pa_deleted == 1) {
spin_unlock(&pa->pa_lock);
return;
@@ -4102,7 +4131,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ext4_get_group_no_and_offset(sb, goal, &group, &block);
/* set up allocation goals */
- ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
+ ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
@@ -4581,6 +4610,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
struct buffer_head *gd_bh;
ext4_group_t block_group;
struct ext4_sb_info *sbi;
+ struct ext4_inode_info *ei = EXT4_I(inode);
struct ext4_buddy e4b;
unsigned int count_clusters;
int err = 0;
@@ -4639,7 +4669,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* blocks at the beginning or the end unless we are explicitly
* requested to avoid doing so.
*/
- overflow = block & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_PBLK_COFF(sbi, block);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
overflow = sbi->s_cluster_ratio - overflow;
@@ -4653,7 +4683,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
count += overflow;
}
}
- overflow = count & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_LBLK_COFF(sbi, count);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
if (count > overflow)
@@ -4735,11 +4765,16 @@ do_more:
* blocks being freed are metadata. these blocks shouldn't
* be used until this transaction is committed
*/
+ retry:
new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS);
if (!new_entry) {
- ext4_mb_unload_buddy(&e4b);
- err = -ENOMEM;
- goto error_return;
+ /*
+ * We use a retry loop because
+ * ext4_free_blocks() is not allowed to fail.
+ */
+ cond_resched();
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ goto retry;
}
new_entry->efd_start_cluster = bit;
new_entry->efd_group = block_group;
@@ -4761,8 +4796,8 @@ do_more:
" group:%d block:%d count:%lu failed"
" with %d", block_group, bit, count,
err);
- }
-
+ } else
+ EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
@@ -4774,7 +4809,6 @@ do_more:
ext4_block_bitmap_csum_set(sb, block_group, gdp, bitmap_bh);
ext4_group_desc_csum_set(sb, block_group, gdp);
ext4_unlock_group(sb, block_group);
- percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
@@ -4782,10 +4816,23 @@ do_more:
&sbi->s_flex_groups[flex_group].free_clusters);
}
- ext4_mb_unload_buddy(&e4b);
-
- if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
+ if (flags & EXT4_FREE_BLOCKS_RESERVE && ei->i_reserved_data_blocks) {
+ percpu_counter_add(&sbi->s_dirtyclusters_counter,
+ count_clusters);
+ spin_lock(&ei->i_block_reservation_lock);
+ if (flags & EXT4_FREE_BLOCKS_METADATA)
+ ei->i_reserved_meta_blocks += count_clusters;
+ else
+ ei->i_reserved_data_blocks += count_clusters;
+ spin_unlock(&ei->i_block_reservation_lock);
+ if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
+ dquot_reclaim_block(inode,
+ EXT4_C2B(sbi, count_clusters));
+ } else if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
+ percpu_counter_add(&sbi->s_freeclusters_counter, count_clusters);
+
+ ext4_mb_unload_buddy(&e4b);
/* We dirtied the bitmap block */
BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 6653fc35ecb7..facf8590b714 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -918,11 +918,8 @@ static int htree_dirblock_to_tree(struct file *dir_file,
bh->b_data, bh->b_size,
(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
+ ((char *)de - bh->b_data))) {
- /* On error, skip the f_pos to the next block. */
- dir_file->f_pos = (dir_file->f_pos |
- (dir->i_sb->s_blocksize - 1)) + 1;
- brelse(bh);
- return count;
+ /* silently ignore the rest of the block */
+ break;
}
ext4fs_dirhash(de->name, de->name_len, hinfo);
if ((hinfo->hash < start_hash) ||
@@ -1433,7 +1430,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
dentry->d_name.name);
return ERR_PTR(-EIO);
}
- inode = ext4_iget(dir->i_sb, ino);
+ inode = ext4_iget_normal(dir->i_sb, ino);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1464,7 +1461,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
return ERR_PTR(-EIO);
}
- return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
+ return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
}
/*
@@ -1883,7 +1880,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
struct inode *dir = dentry->d_parent->d_inode;
- struct buffer_head *bh;
+ struct buffer_head *bh = NULL;
struct ext4_dir_entry_2 *de;
struct ext4_dir_entry_tail *t;
struct super_block *sb;
@@ -1908,14 +1905,14 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return retval;
if (retval == 1) {
retval = 0;
- return retval;
+ goto out;
}
}
if (is_dx(dir)) {
retval = ext4_dx_add_entry(handle, dentry, inode);
if (!retval || (retval != ERR_BAD_DX_DIR))
- return retval;
+ goto out;
ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
dx_fallback++;
ext4_mark_inode_dirty(handle, dir);
@@ -1927,14 +1924,15 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
return PTR_ERR(bh);
retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
- if (retval != -ENOSPC) {
- brelse(bh);
- return retval;
- }
+ if (retval != -ENOSPC)
+ goto out;
if (blocks == 1 && !dx_fallback &&
- EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
- return make_indexed_dir(handle, dentry, inode, bh);
+ EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
+ retval = make_indexed_dir(handle, dentry, inode, bh);
+ bh = NULL; /* make_indexed_dir releases bh */
+ goto out;
+ }
brelse(bh);
}
bh = ext4_append(handle, dir, &block);
@@ -1950,6 +1948,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
}
retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
+out:
brelse(bh);
if (retval == 0)
ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 4acf1f78881b..b12a4427aedc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -384,6 +384,17 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
ClearPageError(page);
/*
+ * Comments copied from block_write_full_page_endio:
+ *
+ * The page straddles i_size. It must be zeroed out on each and every
+ * writepage invocation because it may be mmapped. "A file is mapped
+ * in multiples of the page size. For a file that is not a multiple of
+ * the page size, the remaining memory is zeroed when mapped, and
+ * writes to that region are not written out to the file."
+ */
+ if (len < PAGE_CACHE_SIZE)
+ zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ /*
* In the first loop we prepare and mark buffers to submit. We have to
* mark all buffers in the page before submitting so that
* end_page_writeback() cannot be called from ext4_bio_end_io() when IO
@@ -394,19 +405,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
block_start = bh_offset(bh);
if (block_start >= len) {
- /*
- * Comments copied from block_write_full_page_endio:
- *
- * The page straddles i_size. It must be zeroed out on
- * each and every writepage invocation because it may
- * be mmapped. "A file is mapped in multiples of the
- * page size. For a file that is not a multiple of
- * the page size, the remaining memory is zeroed when
- * mapped, and writes to that region are not written
- * out to the file."
- */
- zero_user_segment(page, block_start,
- block_start + blocksize);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b27c96d01965..a69bd74ed390 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -238,6 +238,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
ext4_group_t group;
ext4_group_t last_group;
unsigned overhead;
+ __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
BUG_ON(flex_gd->count == 0 || group_data == NULL);
@@ -261,7 +262,7 @@ next_group:
src_group++;
for (; src_group <= last_group; src_group++) {
overhead = ext4_group_overhead_blocks(sb, src_group);
- if (overhead != 0)
+ if (overhead == 0)
last_blk += group_data[src_group - group].blocks_count;
else
break;
@@ -275,8 +276,7 @@ next_group:
group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group;
group_data[group].free_blocks_count--;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ flex_gd->bg_flags[group] &= uninit_mask;
}
/* Allocate inode bitmaps */
@@ -287,22 +287,30 @@ next_group:
group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group;
group_data[group].free_blocks_count--;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ flex_gd->bg_flags[group] &= uninit_mask;
}
/* Allocate inode tables */
for (; it_index < flex_gd->count; it_index++) {
- if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
+ unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
+ ext4_fsblk_t next_group_start;
+
+ if (start_blk + itb > last_blk)
goto next_group;
group_data[it_index].inode_table = start_blk;
- group = ext4_get_group_number(sb, start_blk - 1);
+ group = ext4_get_group_number(sb, start_blk);
+ next_group_start = ext4_group_first_block_no(sb, group + 1);
group -= group_data[0].group;
- group_data[group].free_blocks_count -=
- EXT4_SB(sb)->s_itb_per_group;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ if (start_blk + itb > next_group_start) {
+ flex_gd->bg_flags[group + 1] &= uninit_mask;
+ overhead = start_blk + itb - next_group_start;
+ group_data[group + 1].free_blocks_count -= overhead;
+ itb -= overhead;
+ }
+
+ group_data[group].free_blocks_count -= itb;
+ flex_gd->bg_flags[group] &= uninit_mask;
start_blk += EXT4_SB(sb)->s_itb_per_group;
}
@@ -396,7 +404,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
start = ext4_group_first_block_no(sb, group);
group -= flex_gd->groups[0].group;
- count2 = sb->s_blocksize * 8 - (block - start);
+ count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start);
if (count2 > count)
count2 = count;
@@ -615,7 +623,7 @@ handle_ib:
if (err)
goto out;
count = group_table_count[j];
- start = group_data[i].block_bitmap;
+ start = (&group_data[i].block_bitmap)[j];
block = start;
}
@@ -1058,7 +1066,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
break;
if (meta_bg == 0)
- backup_block = group * bpg + blk_off;
+ backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
else
backup_block = (ext4_group_first_block_no(sb, group) +
ext4_bg_has_super(sb, group));
@@ -1656,12 +1664,10 @@ errout:
err = err2;
if (!err) {
- ext4_fsblk_t first_block;
- first_block = ext4_group_first_block_no(sb, 0);
if (test_opt(sb, DEBUG))
printk(KERN_DEBUG "EXT4-fs: extended group to %llu "
"blocks\n", ext4_blocks_count(es));
- update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block,
+ update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr,
(char *)es, sizeof(struct ext4_super_block), 0);
}
return err;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 94cc84db7c9a..21a0b43a7d31 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -964,7 +964,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget(sb, ino);
+ inode = ext4_iget_normal(sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1341,7 +1341,7 @@ static const struct mount_opts {
{Opt_delalloc, EXT4_MOUNT_DELALLOC,
MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT},
{Opt_nodelalloc, EXT4_MOUNT_DELALLOC,
- MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT},
+ MOPT_EXT4_ONLY | MOPT_CLEAR},
{Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM,
MOPT_EXT4_ONLY | MOPT_SET},
{Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT |
@@ -1483,8 +1483,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
sbi->s_commit_interval = HZ * arg;
} else if (token == Opt_max_batch_time) {
- if (arg == 0)
- arg = EXT4_DEF_MAX_BATCH_TIME;
sbi->s_max_batch_time = arg;
} else if (token == Opt_min_batch_time) {
sbi->s_min_batch_time = arg;
@@ -1634,13 +1632,6 @@ static int parse_options(char *options, struct super_block *sb,
"not specified");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "specified with no journaling "
- "enabled");
- return 0;
- }
}
#endif
if (test_opt(sb, DIOREAD_NOLOCK)) {
@@ -1684,12 +1675,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
if (sbi->s_qf_names[GRPQUOTA])
seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
-
- if (test_opt(sb, USRQUOTA))
- seq_puts(seq, ",usrquota");
-
- if (test_opt(sb, GRPQUOTA))
- seq_puts(seq, ",grpquota");
#endif
}
@@ -1965,6 +1950,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
}
/* old crc16 code */
+ if (!(sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
+ return 0;
+
offset = offsetof(struct ext4_group_desc, bg_checksum);
crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
@@ -2693,10 +2682,11 @@ static void print_daily_error_info(unsigned long arg)
es = sbi->s_es;
if (es->s_error_count)
- ext4_msg(sb, KERN_NOTICE, "error count: %u",
+ /* fsck newer than v1.41.13 is needed to clean this condition. */
+ ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
le32_to_cpu(es->s_error_count));
if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
@@ -2710,7 +2700,7 @@ static void print_daily_error_info(unsigned long arg)
printk("\n");
}
if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
@@ -3219,11 +3209,19 @@ int ext4_calculate_overhead(struct super_block *sb)
}
-static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
+static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
/*
+ * There's no need to reserve anything when we aren't using extents.
+ * The space estimates are exact, there are no unwritten extents,
+ * hole punching doesn't need new metadata... This is needed especially
+ * to keep ext2/3 backward compatibility.
+ */
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+ return 0;
+ /*
* By default we reserve 2% or 4096 clusters, whichever is smaller.
* This should cover the situations where we can not afford to run
* out of space like for example punch hole, or converting
@@ -3231,7 +3229,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
* allocation would require 1, or 2 blocks, higher numbers are
* very rare.
*/
- resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+ resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
+ EXT4_SB(sb)->s_cluster_bits;
do_div(resv_clusters, 50);
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
@@ -3451,7 +3450,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
}
if (test_opt(sb, DIOREAD_NOLOCK)) {
ext4_msg(sb, KERN_ERR, "can't mount with "
- "both data=journal and delalloc");
+ "both data=journal and dioread_nolock");
goto failed_mount;
}
if (test_opt(sb, DELALLOC))
@@ -3586,23 +3585,25 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb));
sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb));
- /* Do we have standard group size of blocksize * 8 blocks ? */
- if (sbi->s_blocks_per_group == blocksize << 3)
- set_opt2(sb, STD_GROUP_SIZE);
-
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
- i = le32_to_cpu(es->s_flags);
- if (i & EXT2_FLAGS_UNSIGNED_HASH)
- sbi->s_hash_unsigned = 3;
- else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+ if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
+ i = le32_to_cpu(es->s_flags);
+ if (i & EXT2_FLAGS_UNSIGNED_HASH)
+ sbi->s_hash_unsigned = 3;
+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
#ifdef __CHAR_UNSIGNED__
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
- sbi->s_hash_unsigned = 3;
+ if (!(sb->s_flags & MS_RDONLY))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+ sbi->s_hash_unsigned = 3;
#else
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+ if (!(sb->s_flags & MS_RDONLY))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
#endif
+ }
}
/* Handle clustersize */
@@ -3659,6 +3660,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
goto failed_mount;
}
+ /* Do we have standard group size of clustersize * 8 blocks ? */
+ if (sbi->s_blocks_per_group == clustersize << 3)
+ set_opt2(sb, STD_GROUP_SIZE);
+
/*
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
@@ -3975,10 +3980,10 @@ no_journal:
"available");
}
- err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
+ err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
- "reserved pool", ext4_calculate_resv_clusters(sbi));
+ "reserved pool", ext4_calculate_resv_clusters(sb));
goto failed_mount4a;
}
@@ -4652,6 +4657,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
goto restore_opts;
}
+ if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
+ if (test_opt2(sb, EXPLICIT_DELALLOC)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and delalloc");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+ if (test_opt(sb, DIOREAD_NOLOCK)) {
+ ext4_msg(sb, KERN_ERR, "can't mount with "
+ "both data=journal and dioread_nolock");
+ err = -EINVAL;
+ goto restore_opts;
+ }
+ }
+
if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED)
ext4_abort(sb, "Abort forced by user");
@@ -5406,6 +5426,7 @@ static void __exit ext4_exit_fs(void)
kset_unregister(ext4_kset);
ext4_exit_system_zone();
ext4_exit_pageio();
+ ext4_exit_es();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c081e34f717f..a20816e7eb3a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -189,14 +189,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
static int
-ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
+ void *value_start)
{
- while (!IS_LAST_ENTRY(entry)) {
- struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+ struct ext4_xattr_entry *e = entry;
+
+ while (!IS_LAST_ENTRY(e)) {
+ struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
if ((void *)next >= end)
return -EIO;
- entry = next;
+ e = next;
+ }
+
+ while (!IS_LAST_ENTRY(entry)) {
+ if (entry->e_value_size != 0 &&
+ (value_start + le16_to_cpu(entry->e_value_offs) <
+ (void *)e + sizeof(__u32) ||
+ value_start + le16_to_cpu(entry->e_value_offs) +
+ le32_to_cpu(entry->e_value_size) > end))
+ return -EIO;
+ entry = EXT4_XATTR_NEXT(entry);
}
+
return 0;
}
@@ -213,7 +227,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
return -EIO;
if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
return -EIO;
- error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+ error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
+ bh->b_data);
if (!error)
set_buffer_verified(bh);
return error;
@@ -329,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(entry, end);
+ error = ext4_xattr_check_names(entry, end, entry);
if (error)
goto cleanup;
error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -457,7 +472,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(IFIRST(header), end);
+ error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
if (error)
goto cleanup;
error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -517,8 +532,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
}
/*
- * Release the xattr block BH: If the reference count is > 1, decrement
- * it; otherwise free the block.
+ * Release the xattr block BH: If the reference count is > 1, decrement it;
+ * otherwise free the block.
*/
static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
@@ -538,16 +553,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ce)
mb_cache_entry_free(ce);
get_bh(bh);
+ unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
- unlock_buffer(bh);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
if (ce)
mb_cache_entry_release(ce);
+ /*
+ * Beware of this ugliness: Releasing of xattr block references
+ * from different inodes can race and so we have to protect
+ * from a race where someone else frees the block (and releases
+ * its journal_head) before we are done dirtying the buffer. In
+ * nojournal mode this race is harmless and we actually cannot
+ * call ext4_handle_dirty_xattr_block() with locked buffer as
+ * that function can call sync_dirty_buffer() so for that case
+ * we handle the dirtying after unlocking the buffer.
+ */
+ if (ext4_handle_valid(handle))
+ error = ext4_handle_dirty_xattr_block(handle, inode,
+ bh);
unlock_buffer(bh);
- error = ext4_handle_dirty_xattr_block(handle, inode, bh);
+ if (!ext4_handle_valid(handle))
+ error = ext4_handle_dirty_xattr_block(handle, inode,
+ bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -957,7 +987,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
is->s.here = is->s.first;
is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
- error = ext4_xattr_check_names(IFIRST(header), is->s.end);
+ error = ext4_xattr_check_names(IFIRST(header), is->s.end,
+ IFIRST(header));
if (error)
return error;
/* Find the named attribute. */
@@ -1350,6 +1381,9 @@ retry:
s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
+ kfree(is); is = NULL;
+ kfree(bs); bs = NULL;
+ brelse(bh);
goto retry;
}
error = -1;
diff --git a/fs/file.c b/fs/file.c
index 4a78f981557a..9de20265a78c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -34,7 +34,7 @@ static void *alloc_fdmem(size_t size)
* vmalloc() if the allocation size will be considered "large" by the VM.
*/
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
+ void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY);
if (data != NULL)
return data;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 485dc0eddd67..54a34be444f9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -211,10 +211,10 @@ static void drop_file_write_access(struct file *file)
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
- put_write_access(inode);
-
if (special_file(inode->i_mode))
return;
+
+ put_write_access(inode);
if (file_check_writeable(file) != 0)
return;
__mnt_drop_write(mnt);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3be57189efd5..b44306378193 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -87,16 +87,29 @@ static inline struct inode *wb_inode(struct list_head *head)
#define CREATE_TRACE_POINTS
#include <trace/events/writeback.h>
+static void bdi_wakeup_thread(struct backing_dev_info *bdi)
+{
+ spin_lock_bh(&bdi->wb_lock);
+ if (test_bit(BDI_registered, &bdi->state))
+ mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+ spin_unlock_bh(&bdi->wb_lock);
+}
+
static void bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
trace_writeback_queue(bdi, work);
spin_lock_bh(&bdi->wb_lock);
+ if (!test_bit(BDI_registered, &bdi->state)) {
+ if (work->done)
+ complete(work->done);
+ goto out_unlock;
+ }
list_add_tail(&work->list, &bdi->work_list);
- spin_unlock_bh(&bdi->wb_lock);
-
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+out_unlock:
+ spin_unlock_bh(&bdi->wb_lock);
}
static void
@@ -112,7 +125,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
trace_writeback_nowork(bdi);
- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+ bdi_wakeup_thread(bdi);
return;
}
@@ -159,7 +172,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
* writeback as soon as there is no other work to do.
*/
trace_writeback_wake_background(bdi);
- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+ bdi_wakeup_thread(bdi);
}
/*
@@ -457,12 +470,28 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* write_inode()
*/
spin_lock(&inode->i_lock);
- /* Clear I_DIRTY_PAGES if we've written out all dirty pages */
- if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- inode->i_state &= ~I_DIRTY_PAGES;
+
dirty = inode->i_state & I_DIRTY;
- inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ inode->i_state &= ~I_DIRTY;
+
+ /*
+ * Paired with smp_mb() in __mark_inode_dirty(). This allows
+ * __mark_inode_dirty() to test i_state without grabbing i_lock -
+ * either they see the I_DIRTY bits cleared or we see the dirtied
+ * inode.
+ *
+ * I_DIRTY_PAGES is always cleared together above even if @mapping
+ * still has dirty pages. The flag is reinstated after smp_mb() if
+ * necessary. This guarantees that either __mark_inode_dirty()
+ * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
+ */
+ smp_mb();
+
+ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ inode->i_state |= I_DIRTY_PAGES;
+
spin_unlock(&inode->i_lock);
+
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
int err = write_inode(inode, wbc);
@@ -505,13 +534,16 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
}
WARN_ON(inode->i_state & I_SYNC);
/*
- * Skip inode if it is clean. We don't want to mess with writeback
- * lists in this function since flusher thread may be doing for example
- * sync in parallel and if we move the inode, it could get skipped. So
- * here we make sure inode is on some writeback list and leave it there
- * unless we have completely cleaned the inode.
+ * Skip inode if it is clean and we have no outstanding writeback in
+ * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
+ * function since flusher thread may be doing for example sync in
+ * parallel and if we move the inode, it could get skipped. So here we
+ * make sure inode is on some writeback list and leave it there unless
+ * we have completely cleaned the inode.
*/
- if (!(inode->i_state & I_DIRTY))
+ if (!(inode->i_state & I_DIRTY) &&
+ (wbc->sync_mode != WB_SYNC_ALL ||
+ !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
goto out;
inode->i_state |= I_SYNC;
spin_unlock(&inode->i_lock);
@@ -1013,7 +1045,7 @@ void bdi_writeback_workfn(struct work_struct *work)
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
- list_empty(&bdi->bdi_list))) {
+ !test_bit(BDI_registered, &bdi->state))) {
/*
* The normal path. Keep writing back @bdi until its
* work_list is empty. Note that this path is also taken
@@ -1035,10 +1067,10 @@ void bdi_writeback_workfn(struct work_struct *work)
trace_writeback_pages_written(pages_written);
}
- if (!list_empty(&bdi->work_list) ||
- (wb_has_dirty_io(wb) && dirty_writeback_interval))
- queue_delayed_work(bdi_wq, &wb->dwork,
- msecs_to_jiffies(dirty_writeback_interval * 10));
+ if (!list_empty(&bdi->work_list))
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
+ bdi_wakeup_thread_delayed(bdi);
current->flags &= ~PF_SWAPWRITE;
}
@@ -1130,12 +1162,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
/*
- * make sure that changes are seen by all cpus before we test i_state
- * -- mikulas
+ * Paired with smp_mb() in __writeback_single_inode() for the
+ * following lockless i_state test. See there for details.
*/
smp_mb();
- /* avoid the locking if we can */
if ((inode->i_state & flags) == flags)
return;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1d55f9465400..b535008b6c4c 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -819,8 +819,8 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
newpage = buf->page;
- if (WARN_ON(!PageUptodate(newpage)))
- return -EIO;
+ if (!PageUptodate(newpage))
+ SetPageUptodate(newpage);
ClearPageMappedToDisk(newpage);
@@ -1296,22 +1296,6 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
}
-static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- return 1;
-}
-
-static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
- .can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
- .confirm = generic_pipe_buf_confirm,
- .release = generic_pipe_buf_release,
- .steal = fuse_dev_pipe_buf_steal,
- .get = generic_pipe_buf_get,
-};
-
static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
@@ -1358,7 +1342,11 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
buf->page = bufs[page_nr].page;
buf->offset = bufs[page_nr].offset;
buf->len = bufs[page_nr].len;
- buf->ops = &fuse_dev_pipe_buf_ops;
+ /*
+ * Need to be careful about this. Having buf->ops in module
+ * code can Oops if the buffer persists after module unload.
+ */
+ buf->ops = &nosteal_pipe_buf_ops;
pipe->nrbufs++;
page_nr++;
@@ -1737,6 +1725,9 @@ copy_finish:
static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
unsigned int size, struct fuse_copy_state *cs)
{
+ /* Don't try to move pages (yet) */
+ cs->move_pages = 0;
+
switch (code) {
case FUSE_NOTIFY_POLL:
return fuse_notify_poll(fc, size, cs);
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index f3f783dc4f75..e67b13de2ebc 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1175,6 +1175,8 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
return -EIO;
if (reclen > nbytes)
break;
+ if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+ return -EIO;
over = filldir(dstbuf, dirent->name, dirent->namelen,
file->f_pos, dirent->ino, dirent->type);
@@ -1225,13 +1227,29 @@ static int fuse_direntplus_link(struct file *file,
if (name.name[1] == '.' && name.len == 2)
return 0;
}
+
+ if (invalid_nodeid(o->nodeid))
+ return -EIO;
+ if (!fuse_valid_type(o->attr.mode))
+ return -EIO;
+
fc = get_fuse_conn(dir);
name.hash = full_name_hash(name.name, name.len);
dentry = d_lookup(parent, &name);
- if (dentry && dentry->d_inode) {
+ if (dentry) {
inode = dentry->d_inode;
- if (get_node_id(inode) == o->nodeid) {
+ if (!inode) {
+ d_drop(dentry);
+ } else if (get_node_id(inode) != o->nodeid ||
+ ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
+ err = d_invalidate(dentry);
+ if (err)
+ goto out;
+ } else if (is_bad_inode(inode)) {
+ err = -EIO;
+ goto out;
+ } else {
struct fuse_inode *fi;
fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
@@ -1244,9 +1262,6 @@ static int fuse_direntplus_link(struct file *file,
*/
goto found;
}
- err = d_invalidate(dentry);
- if (err)
- goto out;
dput(dentry);
dentry = NULL;
}
@@ -1261,10 +1276,19 @@ static int fuse_direntplus_link(struct file *file,
if (!inode)
goto out;
- alias = d_materialise_unique(dentry, inode);
- err = PTR_ERR(alias);
- if (IS_ERR(alias))
- goto out;
+ if (S_ISDIR(inode->i_mode)) {
+ mutex_lock(&fc->inst_mutex);
+ alias = fuse_d_add_directory(dentry, inode);
+ mutex_unlock(&fc->inst_mutex);
+ err = PTR_ERR(alias);
+ if (IS_ERR(alias)) {
+ iput(inode);
+ goto out;
+ }
+ } else {
+ alias = d_splice_alias(inode, dentry);
+ }
+
if (alias) {
dput(dentry);
dentry = alias;
@@ -1301,6 +1325,8 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
return -EIO;
if (reclen > nbytes)
break;
+ if (memchr(dirent->name, '/', dirent->namelen) != NULL)
+ return -EIO;
if (!over) {
/* We fill entries into dstbuf only as much as
@@ -1572,6 +1598,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
struct file *file)
{
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_req *req;
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
@@ -1599,8 +1626,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
if (IS_ERR(req))
return PTR_ERR(req);
- if (is_truncate)
+ if (is_truncate) {
fuse_set_nowrite(inode);
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+ }
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
@@ -1662,12 +1691,14 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
invalidate_inode_pages2(inode->i_mapping);
}
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
return 0;
error:
if (is_truncate)
fuse_release_nowrite(inode);
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
return err;
}
@@ -1731,6 +1762,8 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
fc->no_setxattr = 1;
err = -EOPNOTSUPP;
}
+ if (!err)
+ fuse_invalidate_attr(inode);
return err;
}
@@ -1860,6 +1893,8 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
fc->no_removexattr = 1;
err = -EOPNOTSUPP;
}
+ if (!err)
+ fuse_invalidate_attr(inode);
return err;
}
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 35f281033142..4fafb8484bbc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -630,7 +630,8 @@ static void fuse_read_update_size(struct inode *inode, loff_t size,
struct fuse_inode *fi = get_fuse_inode(inode);
spin_lock(&fc->lock);
- if (attr_ver == fi->attr_version && size < inode->i_size) {
+ if (attr_ver == fi->attr_version && size < inode->i_size &&
+ !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
fi->attr_version = ++fc->attr_version;
i_size_write(inode, size);
}
@@ -1033,12 +1034,16 @@ static ssize_t fuse_perform_write(struct file *file,
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
+ struct fuse_inode *fi = get_fuse_inode(inode);
int err = 0;
ssize_t res = 0;
if (is_bad_inode(inode))
return -EIO;
+ if (inode->i_size < pos + iov_iter_count(ii))
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
do {
struct fuse_req *req;
ssize_t count;
@@ -1074,6 +1079,7 @@ static ssize_t fuse_perform_write(struct file *file,
if (res > 0)
fuse_write_update_size(inode, pos);
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
fuse_invalidate_attr(inode);
return res > 0 ? res : err;
@@ -1530,7 +1536,6 @@ static int fuse_writepage_locked(struct page *page)
inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK);
inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP);
- end_page_writeback(page);
spin_lock(&fc->lock);
list_add(&req->writepages_entry, &fi->writepages);
@@ -1538,6 +1543,8 @@ static int fuse_writepage_locked(struct page *page)
fuse_flush_writepages(inode);
spin_unlock(&fc->lock);
+ end_page_writeback(page);
+
return 0;
err_free:
@@ -2461,6 +2468,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file->f_inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = ff->fc;
struct fuse_req *req;
struct fuse_fallocate_in inarg = {
@@ -2478,10 +2486,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (lock_inode) {
mutex_lock(&inode->i_mutex);
- if (mode & FALLOC_FL_PUNCH_HOLE)
- fuse_set_nowrite(inode);
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ loff_t endbyte = offset + length - 1;
+ err = filemap_write_and_wait_range(inode->i_mapping,
+ offset, endbyte);
+ if (err)
+ goto out;
+
+ fuse_sync_writes(inode);
+ }
}
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
req = fuse_get_req_nopages(fc);
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -2514,11 +2532,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
fuse_invalidate_attr(inode);
out:
- if (lock_inode) {
- if (mode & FALLOC_FL_PUNCH_HOLE)
- fuse_release_nowrite(inode);
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+ if (lock_inode)
mutex_unlock(&inode->i_mutex);
- }
return err;
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index fde7249a3a96..5ced199b50bb 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -115,6 +115,8 @@ struct fuse_inode {
enum {
/** Advise readdirplus */
FUSE_I_ADVISE_RDPLUS,
+ /** An operation changing file size is in progress */
+ FUSE_I_SIZE_UNSTABLE,
};
struct fuse_conn;
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 9a0cdde14a08..39a986e1da9e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -201,7 +201,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
struct timespec old_mtime;
spin_lock(&fc->lock);
- if (attr_version != 0 && fi->attr_version > attr_version) {
+ if ((attr_version != 0 && fi->attr_version > attr_version) ||
+ test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) {
spin_unlock(&fc->lock);
return;
}
@@ -460,6 +461,17 @@ static const match_table_t tokens = {
{OPT_ERR, NULL}
};
+static int fuse_match_uint(substring_t *s, unsigned int *res)
+{
+ int err = -ENOMEM;
+ char *buf = match_strdup(s);
+ if (buf) {
+ err = kstrtouint(buf, 10, res);
+ kfree(buf);
+ }
+ return err;
+}
+
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
{
char *p;
@@ -470,6 +482,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
while ((p = strsep(&opt, ",")) != NULL) {
int token;
int value;
+ unsigned uv;
substring_t args[MAX_OPT_ARGS];
if (!*p)
continue;
@@ -493,18 +506,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
break;
case OPT_USER_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->user_id = make_kuid(current_user_ns(), value);
+ d->user_id = make_kuid(current_user_ns(), uv);
if (!uid_valid(d->user_id))
return 0;
d->user_id_present = 1;
break;
case OPT_GROUP_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->group_id = make_kgid(current_user_ns(), value);
+ d->group_id = make_kgid(current_user_ns(), uv);
if (!gid_valid(d->group_id))
return 0;
d->group_id_present = 1;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0bad69ed6336..76251600cbea 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -999,6 +999,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = inode->i_mapping;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int rv;
@@ -1019,6 +1020,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
+ /*
+ * Now since we are holding a deferred (CW) lock at this point, you
+ * might be wondering why this is ever needed. There is a case however
+ * where we've granted a deferred local lock against a cached exclusive
+ * glock. That is ok provided all granted local locks are deferred, but
+ * it also means that it is possible to encounter pages which are
+ * cached and possibly also mapped. So here we check for that and sort
+ * them out ahead of the dio. The glock state machine will take care of
+ * everything else.
+ *
+ * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
+ * the first place, mapping->nr_pages will always be zero.
+ */
+ if (mapping->nrpages) {
+ loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
+ loff_t len = iov_length(iov, nr_segs);
+ loff_t end = PAGE_ALIGN(offset + len) - 1;
+
+ rv = 0;
+ if (len == 0)
+ goto out;
+ if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
+ rv = filemap_write_and_wait_range(mapping, lstart, end);
+ if (rv)
+ return rv;
+ truncate_inode_pages_range(mapping, lstart, end);
+ }
+
rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gfs2_get_block_direct,
NULL, NULL, 0);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 62b484e4a9e4..bc5dac400125 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1536,10 +1536,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
ogid = ngid = NO_GID_QUOTA_CHANGE;
- error = gfs2_quota_lock(ip, nuid, ngid);
+ error = get_write_access(inode);
if (error)
return error;
+ error = gfs2_rs_alloc(ip);
+ if (error)
+ goto out;
+
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ goto out;
+
+ error = gfs2_quota_lock(ip, nuid, ngid);
+ if (error)
+ goto out;
+
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
error = gfs2_quota_check(ip, nuid, ngid);
@@ -1566,6 +1578,8 @@ out_end_trans:
gfs2_trans_end(sdp);
out_gunlock_q:
gfs2_quota_unlock(ip);
+out:
+ put_write_access(inode);
return error;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 60ede2a0f43f..f7dd3b4f8ab0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1317,8 +1317,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if (IS_ERR(s))
goto error_bdev;
- if (s->s_root)
+ if (s->s_root) {
+ /*
+ * s_umount nests inside bd_mutex during
+ * __invalidate_device(). blkdev_put() acquires
+ * bd_mutex and can't be called under s_umount. Drop
+ * s_umount temporarily. This is safe as we're
+ * holding an active reference.
+ */
+ up_write(&s->s_umount);
blkdev_put(bdev, mode);
+ down_write(&s->s_umount);
+ }
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 6e560d56094b..754fdf8c6356 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -131,13 +131,16 @@ skip:
hfs_bnode_write(node, entry, data_off + key_len, entry_len);
hfs_bnode_dump(node);
- if (new_node) {
- /* update parent key if we inserted a key
- * at the start of the first node
- */
- if (!rec && new_node != node)
- hfs_brec_update_parent(fd);
+ /*
+ * update parent key if we inserted a key
+ * at the start of the node and it is not the new node
+ */
+ if (!rec && new_node != node) {
+ hfs_bnode_read_key(node, fd->search_key, data_off + size);
+ hfs_brec_update_parent(fd);
+ }
+ if (new_node) {
hfs_bnode_put(fd->bnode);
if (!new_node->parent) {
hfs_btree_inc_height(tree);
@@ -168,9 +171,6 @@ skip:
goto again;
}
- if (!rec)
- hfs_brec_update_parent(fd);
-
return 0;
}
@@ -370,6 +370,8 @@ again:
if (IS_ERR(parent))
return PTR_ERR(parent);
__hfs_brec_find(parent, fd, hfs_find_rec_by_key);
+ if (fd->record < 0)
+ return -ENOENT;
hfs_bnode_dump(parent);
rec = fd->record;
diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c
index 4acb19d78359..803d3da3a0fe 100644
--- a/fs/hpfs/map.c
+++ b/fs/hpfs/map.c
@@ -17,7 +17,8 @@ __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block,
struct quad_buffer_head *qbh, char *id)
{
secno sec;
- if (hpfs_sb(s)->sb_chk) if (bmp_block * 16384 > hpfs_sb(s)->sb_fs_size) {
+ unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14;
+ if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) {
hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id);
return NULL;
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index a0617e706957..962e90c37aec 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -558,7 +558,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
sbi->sb_cp_table = NULL;
sbi->sb_c_bitmap = -1;
sbi->sb_max_fwd_alloc = 0xffffff;
-
+
+ if (sbi->sb_fs_size >= 0x80000000) {
+ hpfs_error(s, "invalid size in superblock: %08x",
+ (unsigned)sbi->sb_fs_size);
+ goto bail4;
+ }
+
/* Load bitmap directory */
if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps))))
goto bail4;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a3f868ae3fd4..4e5f332f15d9 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -916,14 +916,8 @@ static int get_hstate_idx(int page_size_log)
return h - hstates;
}
-static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen)
-{
- return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
- dentry->d_name.name);
-}
-
static struct dentry_operations anon_ops = {
- .d_dname = hugetlb_dname
+ .d_dname = simple_dname
};
/*
diff --git a/fs/inode.c b/fs/inode.c
index 00d5fc3b86e1..1b300a06b8be 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1837,14 +1837,18 @@ EXPORT_SYMBOL(inode_init_owner);
* inode_owner_or_capable - check current task permissions to inode
* @inode: inode being checked
*
- * Return true if current either has CAP_FOWNER to the inode, or
- * owns the file.
+ * Return true if current either has CAP_FOWNER in a namespace with the
+ * inode owner uid mapped, or owns the file.
*/
bool inode_owner_or_capable(const struct inode *inode)
{
+ struct user_namespace *ns;
+
if (uid_eq(current_fsuid(), inode->i_uid))
return true;
- if (inode_capable(inode, CAP_FOWNER))
+
+ ns = current_user_ns();
+ if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
return true;
return false;
}
diff --git a/fs/ioprio.c b/fs/ioprio.c
index e50170ca7c33..31666c92b46a 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -157,14 +157,16 @@ out:
int ioprio_best(unsigned short aprio, unsigned short bprio)
{
- unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
- unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+ unsigned short aclass;
+ unsigned short bclass;
- if (aclass == IOPRIO_CLASS_NONE)
- aclass = IOPRIO_CLASS_BE;
- if (bclass == IOPRIO_CLASS_NONE)
- bclass = IOPRIO_CLASS_BE;
+ if (!ioprio_valid(aprio))
+ aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ if (!ioprio_valid(bprio))
+ bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ aclass = IOPRIO_PRIO_CLASS(aprio);
+ bclass = IOPRIO_PRIO_CLASS(bprio);
if (aclass == bclass)
return min(aprio, bprio);
if (aclass > bclass)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d9b8aebdeb22..10489bbd40fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -69,7 +69,7 @@ static void isofs_put_super(struct super_block *sb)
return;
}
-static int isofs_read_inode(struct inode *);
+static int isofs_read_inode(struct inode *, int relocated);
static int isofs_statfs (struct dentry *, struct kstatfs *);
static struct kmem_cache *isofs_inode_cachep;
@@ -125,8 +125,8 @@ static void destroy_inodecache(void)
static int isofs_remount(struct super_block *sb, int *flags, char *data)
{
- /* we probably want a lot more here */
- *flags |= MS_RDONLY;
+ if (!(*flags & MS_RDONLY))
+ return -EROFS;
return 0;
}
@@ -779,15 +779,6 @@ root_found:
*/
s->s_maxbytes = 0x80000000000LL;
- /*
- * The CDROM is read-only, has no nodes (devices) on it, and since
- * all of the files appear to be owned by root, we really do not want
- * to allow suid. (suid or devices will not show up unless we have
- * Rock Ridge extensions)
- */
-
- s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */;
-
/* Set this for reference. Its not currently used except on write
which we don't have .. */
@@ -1283,7 +1274,7 @@ out_toomany:
goto out;
}
-static int isofs_read_inode(struct inode *inode)
+static int isofs_read_inode(struct inode *inode, int relocated)
{
struct super_block *sb = inode->i_sb;
struct isofs_sb_info *sbi = ISOFS_SB(sb);
@@ -1428,7 +1419,7 @@ static int isofs_read_inode(struct inode *inode)
*/
if (!high_sierra) {
- parse_rock_ridge_inode(de, inode);
+ parse_rock_ridge_inode(de, inode, relocated);
/* if we want uid/gid set, override the rock ridge setting */
if (sbi->s_uid_set)
inode->i_uid = sbi->s_uid;
@@ -1507,9 +1498,10 @@ static int isofs_iget5_set(struct inode *ino, void *data)
* offset that point to the underlying meta-data for the inode. The
* code below is otherwise similar to the iget() code in
* include/linux/fs.h */
-struct inode *isofs_iget(struct super_block *sb,
- unsigned long block,
- unsigned long offset)
+struct inode *__isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset,
+ int relocated)
{
unsigned long hashval;
struct inode *inode;
@@ -1531,7 +1523,7 @@ struct inode *isofs_iget(struct super_block *sb,
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- ret = isofs_read_inode(inode);
+ ret = isofs_read_inode(inode, relocated);
if (ret < 0) {
iget_failed(inode);
inode = ERR_PTR(ret);
@@ -1546,6 +1538,9 @@ struct inode *isofs_iget(struct super_block *sb,
static struct dentry *isofs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
+ /* We don't support read-write mounts */
+ if (!(flags & MS_RDONLY))
+ return ERR_PTR(-EACCES);
return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
}
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 99167238518d..0ac4c1f73fbd 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -107,7 +107,7 @@ extern int iso_date(char *, int);
struct inode; /* To make gcc happy */
-extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *);
+extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated);
extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *);
extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *);
@@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int
extern struct buffer_head *isofs_bread(struct inode *, sector_t);
extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
-extern struct inode *isofs_iget(struct super_block *sb,
- unsigned long block,
- unsigned long offset);
+struct inode *__isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset,
+ int relocated);
+
+static inline struct inode *isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset)
+{
+ return __isofs_iget(sb, block, offset, 0);
+}
+
+static inline struct inode *isofs_iget_reloc(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset)
+{
+ return __isofs_iget(sb, block, offset, 1);
+}
/* Because the inode number is no longer relevant to finding the
* underlying meta-data for an inode, we are free to choose a more
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..735d7522a3a9 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -30,6 +30,7 @@ struct rock_state {
int cont_size;
int cont_extent;
int cont_offset;
+ int cont_loops;
struct inode *inode;
};
@@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode)
rs->inode = inode;
}
+/* Maximum number of Rock Ridge continuation entries */
+#define RR_MAX_CE_ENTRIES 32
+
/*
* Returns 0 if the caller should continue scanning, 1 if the scan must end
* and -ve on error.
@@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs)
goto out;
}
ret = -EIO;
+ if (++rs->cont_loops >= RR_MAX_CE_ENTRIES)
+ goto out;
bh = sb_bread(rs->inode->i_sb, rs->cont_extent);
if (bh) {
memcpy(rs->buffer, bh->b_data + rs->cont_offset,
@@ -288,12 +294,16 @@ eio:
goto out;
}
+#define RR_REGARD_XA 1
+#define RR_RELOC_DE 2
+
static int
parse_rock_ridge_inode_internal(struct iso_directory_record *de,
- struct inode *inode, int regard_xa)
+ struct inode *inode, int flags)
{
int symlink_len = 0;
int cnt, sig;
+ unsigned int reloc_block;
struct inode *reloc;
struct rock_ridge *rr;
int rootflag;
@@ -305,7 +315,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
init_rock_state(&rs, inode);
setup_rock_ridge(de, inode, &rs);
- if (regard_xa) {
+ if (flags & RR_REGARD_XA) {
rs.chr += 14;
rs.len -= 14;
if (rs.len < 0)
@@ -352,6 +362,9 @@ repeat:
rs.cont_size = isonum_733(rr->u.CE.size);
break;
case SIG('E', 'R'):
+ /* Invalid length of ER tag id? */
+ if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len)
+ goto out;
ISOFS_SB(inode->i_sb)->s_rock = 1;
printk(KERN_DEBUG "ISO 9660 Extensions: ");
{
@@ -485,12 +498,22 @@ repeat:
"relocated directory\n");
goto out;
case SIG('C', 'L'):
- ISOFS_I(inode)->i_first_extent =
- isonum_733(rr->u.CL.location);
- reloc =
- isofs_iget(inode->i_sb,
- ISOFS_I(inode)->i_first_extent,
- 0);
+ if (flags & RR_RELOC_DE) {
+ printk(KERN_ERR
+ "ISOFS: Recursive directory relocation "
+ "is not supported\n");
+ goto eio;
+ }
+ reloc_block = isonum_733(rr->u.CL.location);
+ if (reloc_block == ISOFS_I(inode)->i_iget5_block &&
+ ISOFS_I(inode)->i_iget5_offset == 0) {
+ printk(KERN_ERR
+ "ISOFS: Directory relocation points to "
+ "itself\n");
+ goto eio;
+ }
+ ISOFS_I(inode)->i_first_extent = reloc_block;
+ reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0);
if (IS_ERR(reloc)) {
ret = PTR_ERR(reloc);
goto out;
@@ -637,9 +660,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
return rpnt;
}
-int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
+int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode,
+ int relocated)
{
- int result = parse_rock_ridge_inode_internal(de, inode, 0);
+ int flags = relocated ? RR_RELOC_DE : 0;
+ int result = parse_rock_ridge_inode_internal(de, inode, flags);
/*
* if rockridge flag was reset and we didn't look for attributes
@@ -647,7 +672,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
*/
if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
&& (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
- result = parse_rock_ridge_inode_internal(de, inode, 14);
+ result = parse_rock_ridge_inode_internal(de, inode,
+ flags | RR_REGARD_XA);
}
return result;
}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 95457576e434..aaa1a3f33b0e 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1318,6 +1318,7 @@ static int journal_reset(journal_t *journal)
static void jbd2_write_superblock(journal_t *journal, int write_op)
{
struct buffer_head *bh = journal->j_sb_buffer;
+ journal_superblock_t *sb = journal->j_superblock;
int ret;
trace_jbd2_write_superblock(journal, write_op);
@@ -1339,6 +1340,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op)
clear_buffer_write_io_error(bh);
set_buffer_uptodate(bh);
}
+ jbd2_superblock_csum_set(journal, sb);
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
ret = submit_bh(write_op, bh);
@@ -1435,7 +1437,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal)
jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
journal->j_errno);
sb->s_errno = cpu_to_be32(journal->j_errno);
- jbd2_superblock_csum_set(journal, sb);
read_unlock(&journal->j_state_lock);
jbd2_write_superblock(journal, WRITE_SYNC);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 626846bac32f..6e2fb5cbacde 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -427,6 +427,7 @@ static int do_one_pass(journal_t *journal,
int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */
int descr_csum_size = 0;
+ int block_error = 0;
/*
* First thing is to establish what we expect to find in the log
@@ -521,6 +522,7 @@ static int do_one_pass(journal_t *journal,
!jbd2_descr_block_csum_verify(journal,
bh->b_data)) {
err = -EIO;
+ brelse(bh);
goto failed;
}
@@ -599,7 +601,8 @@ static int do_one_pass(journal_t *journal,
"checksum recovering "
"block %llu in log\n",
blocknr);
- continue;
+ block_error = 1;
+ goto skip_write;
}
/* Find a buffer for the new
@@ -798,7 +801,8 @@ static int do_one_pass(journal_t *journal,
success = -EIO;
}
}
-
+ if (block_error && success == 0)
+ success = -EIO;
return success;
failed:
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 10f524c59ea8..ec34e11d6854 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -517,10 +517,10 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask)
&transaction->t_outstanding_credits);
if (atomic_dec_and_test(&transaction->t_updates))
wake_up(&journal->j_wait_updates);
+ tid = transaction->t_tid;
spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "restarting handle %p\n", handle);
- tid = transaction->t_tid;
need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
if (need_to_start)
@@ -1151,7 +1151,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* once a transaction -bzzz
*/
jh->b_modified = 1;
- J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+ if (handle->h_buffer_credits <= 0) {
+ ret = -ENOSPC;
+ goto out_unlock_bh;
+ }
handle->h_buffer_credits--;
}
@@ -1234,7 +1237,6 @@ out_unlock_bh:
jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
- WARN_ON(ret); /* All errors are bugs, so dump the stack */
return ret;
}
@@ -1440,9 +1442,12 @@ int jbd2_journal_stop(handle_t *handle)
* to perform a synchronous write. We do this to detect the
* case where a single process is doing a stream of sync
* writes. No point in waiting for joiners in that case.
+ *
+ * Setting max_batch_time to 0 disables this completely.
*/
pid = current->pid;
- if (handle->h_sync && journal->j_last_sync_writer != pid) {
+ if (handle->h_sync && journal->j_last_sync_writer != pid &&
+ journal->j_max_batch_time) {
u64 commit_time, trans_time;
journal->j_last_sync_writer = pid;
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 16a5047903a6..406d9cc84ba8 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
unsigned char *cpage_out,
uint32_t *sourcelen, uint32_t *dstlen)
{
- short positions[256];
+ unsigned short positions[256];
int outpos = 0;
int pos=0;
@@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in,
unsigned char *cpage_out,
uint32_t srclen, uint32_t destlen)
{
- short positions[256];
+ unsigned short positions[256];
int outpos = 0;
int pos=0;
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 413ef89c2d1b..046fee8b6e9b 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -134,8 +134,6 @@ struct jffs2_sb_info {
struct rw_semaphore wbuf_sem; /* Protects the write buffer */
struct delayed_work wbuf_dwork; /* write-buffer write-out work */
- int wbuf_queued; /* non-zero delayed work is queued */
- spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */
unsigned char *oobbuf;
int oobavail; /* How many bytes are available for JFFS2 in OOB */
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index e4619b00f7c5..fa35ff79ab35 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info
uint32_t version;
uint32_t data_crc;
uint32_t partial_crc;
- uint16_t csize;
+ uint32_t csize;
uint16_t overlapped;
};
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 03310721712f..b6bd4affd9ad 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
spin_unlock(&c->erase_completion_lock);
schedule();
+ remove_wait_queue(&c->erase_wait, &wait);
} else
spin_unlock(&c->erase_completion_lock);
} else if (ret)
@@ -211,20 +212,25 @@ out:
int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
uint32_t *len, uint32_t sumsize)
{
- int ret = -EAGAIN;
+ int ret;
minsize = PAD(minsize);
jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
- spin_lock(&c->erase_completion_lock);
- while(ret == -EAGAIN) {
+ while (true) {
+ spin_lock(&c->erase_completion_lock);
ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
if (ret) {
jffs2_dbg(1, "%s(): looping, ret is %d\n",
__func__, ret);
}
+ spin_unlock(&c->erase_completion_lock);
+
+ if (ret == -EAGAIN)
+ cond_resched();
+ else
+ break;
}
- spin_unlock(&c->erase_completion_lock);
if (!ret)
ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 7654e87b0428..9ad5ba4b299b 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -510,6 +510,10 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
sumlen = c->sector_size - je32_to_cpu(sm->offset);
sumptr = buf + buf_size - sumlen;
+ /* sm->offset maybe wrong but MAGIC maybe right */
+ if (sumlen > c->sector_size)
+ goto full_scan;
+
/* Now, make sure the summary itself is available */
if (sumlen > buf_size) {
/* Need to kmalloc for this. */
@@ -544,6 +548,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
}
}
+full_scan:
buf_ofs = jeb->offset;
if (!buf_size) {
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a6597d60d76d..09ed55190ee2 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1162,10 +1162,6 @@ static void delayed_wbuf_sync(struct work_struct *work)
struct jffs2_sb_info *c = work_to_sb(work);
struct super_block *sb = OFNI_BS_2SFFJ(c);
- spin_lock(&c->wbuf_dwork_lock);
- c->wbuf_queued = 0;
- spin_unlock(&c->wbuf_dwork_lock);
-
if (!(sb->s_flags & MS_RDONLY)) {
jffs2_dbg(1, "%s()\n", __func__);
jffs2_flush_wbuf_gc(c, 0);
@@ -1180,14 +1176,9 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c)
if (sb->s_flags & MS_RDONLY)
return;
- spin_lock(&c->wbuf_dwork_lock);
- if (!c->wbuf_queued) {
+ delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+ if (queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay))
jffs2_dbg(1, "%s()\n", __func__);
- delay = msecs_to_jiffies(dirty_writeback_interval * 10);
- queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay);
- c->wbuf_queued = 1;
- }
- spin_unlock(&c->wbuf_dwork_lock);
}
int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
@@ -1211,7 +1202,6 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
/* Initialise write buffer */
init_rwsem(&c->wbuf_sem);
- spin_lock_init(&c->wbuf_dwork_lock);
INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
c->wbuf_pagesize = c->mtd->writesize;
c->wbuf_ofs = 0xFFFFFFFF;
@@ -1251,7 +1241,6 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
/* Initialize write buffer */
init_rwsem(&c->wbuf_sem);
- spin_lock_init(&c->wbuf_dwork_lock);
INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
c->wbuf_pagesize = c->mtd->erasesize;
@@ -1311,7 +1300,6 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
/* Initialize write buffer */
init_rwsem(&c->wbuf_sem);
- spin_lock_init(&c->wbuf_dwork_lock);
INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
c->wbuf_pagesize = c->mtd->writesize;
@@ -1346,7 +1334,6 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) {
return 0;
init_rwsem(&c->wbuf_sem);
- spin_lock_init(&c->wbuf_dwork_lock);
INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
c->wbuf_pagesize = c->mtd->writesize;
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 0ddbeceafc62..5d876b1c9ea4 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -3047,6 +3047,14 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
dir_index = (u32) filp->f_pos;
+ /*
+ * NFSv4 reserves cookies 1 and 2 for . and .. so we add
+ * the value we return to the vfs is one greater than the
+ * one we use internally.
+ */
+ if (dir_index)
+ dir_index--;
+
if (dir_index > 1) {
struct dir_table_slot dirtab_slot;
@@ -3086,7 +3094,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
if (p->header.flag & BT_INTERNAL) {
jfs_err("jfs_readdir: bad index table");
DT_PUTPAGE(mp);
- filp->f_pos = -1;
+ filp->f_pos = DIREND;
return 0;
}
} else {
@@ -3094,16 +3102,16 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
/*
* self "."
*/
- filp->f_pos = 0;
- if (filldir(dirent, ".", 1, 0, ip->i_ino,
+ filp->f_pos = 1;
+ if (filldir(dirent, ".", 1, 1, ip->i_ino,
DT_DIR))
return 0;
}
/*
* parent ".."
*/
- filp->f_pos = 1;
- if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR))
+ filp->f_pos = 2;
+ if (filldir(dirent, "..", 2, 2, PARENT(ip), DT_DIR))
return 0;
/*
@@ -3123,24 +3131,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
/*
* Legacy filesystem - OS/2 & Linux JFS < 0.3.6
*
- * pn = index = 0: First entry "."
- * pn = 0; index = 1: Second entry ".."
+ * pn = 0; index = 1: First entry "."
+ * pn = 0; index = 2: Second entry ".."
* pn > 0: Real entries, pn=1 -> leftmost page
* pn = index = -1: No more entries
*/
dtpos = filp->f_pos;
- if (dtpos == 0) {
+ if (dtpos < 2) {
/* build "." entry */
+ filp->f_pos = 1;
if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino,
DT_DIR))
return 0;
- dtoffset->index = 1;
+ dtoffset->index = 2;
filp->f_pos = dtpos;
}
if (dtoffset->pn == 0) {
- if (dtoffset->index == 1) {
+ if (dtoffset->index == 2) {
/* build ".." entry */
if (filldir(dirent, "..", 2, filp->f_pos,
@@ -3233,6 +3242,12 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
}
jfs_dirent->position = unique_pos++;
}
+ /*
+ * We add 1 to the index because we may
+ * use a value of 2 internally, and NFSv4
+ * doesn't like that.
+ */
+ jfs_dirent->position++;
} else {
jfs_dirent->position = dtpos;
len = min(d_namleft, DTLHDRDATALEN_LEGACY);
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index c1a3e603279c..7f464c513ba0 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -95,7 +95,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
if (insert_inode_locked(inode) < 0) {
rc = -EINVAL;
- goto fail_unlock;
+ goto fail_put;
}
inode_init_owner(inode, parent, mode);
@@ -156,7 +156,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
fail_drop:
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
-fail_unlock:
clear_nlink(inode);
unlock_new_inode(inode);
fail_put:
diff --git a/fs/libfs.c b/fs/libfs.c
index 916da8c4158b..1299bd5e07b7 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -104,18 +104,18 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
spin_lock(&dentry->d_lock);
/* d_lock not required for cursor */
- list_del(&cursor->d_u.d_child);
+ list_del(&cursor->d_child);
p = dentry->d_subdirs.next;
while (n && p != &dentry->d_subdirs) {
struct dentry *next;
- next = list_entry(p, struct dentry, d_u.d_child);
+ next = list_entry(p, struct dentry, d_child);
spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
if (simple_positive(next))
n--;
spin_unlock(&next->d_lock);
p = p->next;
}
- list_add_tail(&cursor->d_u.d_child, p);
+ list_add_tail(&cursor->d_child, p);
spin_unlock(&dentry->d_lock);
}
}
@@ -139,7 +139,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
struct dentry *dentry = filp->f_path.dentry;
struct dentry *cursor = filp->private_data;
- struct list_head *p, *q = &cursor->d_u.d_child;
+ struct list_head *p, *q = &cursor->d_child;
ino_t ino;
int i = filp->f_pos;
@@ -165,7 +165,7 @@ int dcache_readdir(struct file * filp, void * dirent, filldir_t filldir)
for (p=q->next; p != &dentry->d_subdirs; p=p->next) {
struct dentry *next;
- next = list_entry(p, struct dentry, d_u.d_child);
+ next = list_entry(p, struct dentry, d_child);
spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
if (!simple_positive(next)) {
spin_unlock(&next->d_lock);
@@ -289,7 +289,7 @@ int simple_empty(struct dentry *dentry)
int ret = 0;
spin_lock(&dentry->d_lock);
- list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
+ list_for_each_entry(child, &dentry->d_subdirs, d_child) {
spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
if (simple_positive(child)) {
spin_unlock(&child->d_lock);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 01bfe7662751..41e491b8e5d7 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
nlm_init->protocol, nlm_version,
nlm_init->hostname, nlm_init->noresvport,
nlm_init->net);
- if (host == NULL) {
- lockd_down(nlm_init->net);
- return ERR_PTR(-ENOLCK);
- }
+ if (host == NULL)
+ goto out_nohost;
+ if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
+ goto out_nobind;
return host;
+out_nobind:
+ nlmclnt_release_host(host);
+out_nohost:
+ lockd_down(nlm_init->net);
+ return ERR_PTR(-ENOLCK);
}
EXPORT_SYMBOL_GPL(nlmclnt_init);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 9760ecb9b60f..acd394716349 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl)
{
struct nlm_args *argp = &req->a_args;
struct nlm_lock *lock = &argp->lock;
+ char *nodename = req->a_host->h_rpcclnt->cl_nodename;
nlmclnt_next_cookie(&argp->cookie);
memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh));
- lock->caller = utsname()->nodename;
+ lock->caller = nodename;
lock->oh.data = req->a_owner;
lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s",
(unsigned int)fl->fl_u.nfs_fl.owner->pid,
- utsname()->nodename);
+ nodename);
lock->svid = fl->fl_u.nfs_fl.owner->pid;
lock->fl.fl_start = fl->fl_start;
lock->fl.fl_end = fl->fl_end;
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1812f026960c..6ae664b489af 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -159,6 +159,12 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
msg.rpc_proc = &clnt->cl_procinfo[proc];
status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
+ if (status == -ECONNREFUSED) {
+ dprintk("lockd: NSM upcall RPC failed, status=%d, forcing rebind\n",
+ status);
+ rpc_force_rebind(clnt);
+ status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
+ }
if (status < 0)
dprintk("lockd: NSM upcall RPC failed, status=%d\n",
status);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index a2aa97d45670..7a318480ab7a 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -137,10 +137,6 @@ lockd(void *vrqstp)
dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
- if (!nlm_timeout)
- nlm_timeout = LOCKD_DFLT_TIMEO;
- nlmsvc_timeout = nlm_timeout * HZ;
-
/*
* The main request loop. We don't terminate until the last
* NFS mount or NFS daemon has gone away.
@@ -235,6 +231,7 @@ out_err:
if (warned++ == 0)
printk(KERN_WARNING
"lockd_up: makesock failed, error=%d\n", err);
+ svc_shutdown_net(serv, net);
return err;
}
@@ -252,13 +249,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)
error = make_socks(serv, net);
if (error < 0)
- goto err_socks;
+ goto err_bind;
set_grace_period(net);
dprintk("lockd_up_net: per-net data created; net=%p\n", net);
return 0;
-err_socks:
- svc_rpcb_cleanup(serv, net);
err_bind:
ln->nlmsvc_users--;
return error;
@@ -347,6 +342,10 @@ static struct svc_serv *lockd_create_svc(void)
printk(KERN_WARNING
"lockd_up: no pid, %d users??\n", nlmsvc_users);
+ if (!nlm_timeout)
+ nlm_timeout = LOCKD_DFLT_TIMEO;
+ nlmsvc_timeout = nlm_timeout * HZ;
+
serv = svc_create(&nlmsvc_program, LOCKD_BUFSIZE, NULL);
if (!serv) {
printk(KERN_WARNING "lockd_up: create service failed\n");
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index e703318c41df..ffc4045fc62e 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -767,6 +767,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
struct nlm_file *file = block->b_file;
struct nlm_lock *lock = &block->b_call->a_args.lock;
int error;
+ loff_t fl_start, fl_end;
dprintk("lockd: grant blocked lock %p\n", block);
@@ -784,9 +785,16 @@ nlmsvc_grant_blocked(struct nlm_block *block)
}
/* Try the lock operation again */
+ /* vfs_lock_file() can mangle fl_start and fl_end, but we need
+ * them unchanged for the GRANT_MSG
+ */
lock->fl.fl_flags |= FL_SLEEP;
+ fl_start = lock->fl.fl_start;
+ fl_end = lock->fl.fl_end;
error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
+ lock->fl.fl_start = fl_start;
+ lock->fl.fl_end = fl_end;
switch (error) {
case 0:
@@ -939,6 +947,7 @@ nlmsvc_retry_blocked(void)
unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
struct nlm_block *block;
+ spin_lock(&nlm_blocked_lock);
while (!list_empty(&nlm_blocked) && !kthread_should_stop()) {
block = list_entry(nlm_blocked.next, struct nlm_block, b_list);
@@ -948,6 +957,7 @@ nlmsvc_retry_blocked(void)
timeout = block->b_when - jiffies;
break;
}
+ spin_unlock(&nlm_blocked_lock);
dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n",
block, block->b_when);
@@ -957,7 +967,9 @@ nlmsvc_retry_blocked(void)
retry_deferred_block(block);
} else
nlmsvc_grant_blocked(block);
+ spin_lock(&nlm_blocked_lock);
}
+ spin_unlock(&nlm_blocked_lock);
return timeout;
}
diff --git a/fs/locks.c b/fs/locks.c
index cb424a4fed71..0274c953b07d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1243,11 +1243,10 @@ int __break_lease(struct inode *inode, unsigned int mode)
restart:
break_time = flock->fl_break_time;
- if (break_time != 0) {
+ if (break_time != 0)
break_time -= jiffies;
- if (break_time == 0)
- break_time++;
- }
+ if (break_time == 0)
+ break_time++;
locks_insert_block(flock, new_fl);
unlock_flocks();
error = wait_event_interruptible_timeout(new_fl->fl_wait,
diff --git a/fs/mount.h b/fs/mount.h
index 64a858143ff9..68d80bdcd081 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -73,7 +73,7 @@ static inline int mnt_has_parent(struct mount *mnt)
static inline int is_mounted(struct vfsmount *mnt)
{
/* neither detached nor internal? */
- return !IS_ERR_OR_NULL(real_mount(mnt));
+ return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
}
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
diff --git a/fs/namei.c b/fs/namei.c
index 9ed9361223c0..036c21246d6a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -34,6 +34,7 @@
#include <linux/device_cgroup.h>
#include <linux/fs_struct.h>
#include <linux/posix_acl.h>
+#include <linux/hash.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -321,10 +322,11 @@ int generic_permission(struct inode *inode, int mask)
if (S_ISDIR(inode->i_mode)) {
/* DACs are overridable for directories */
- if (inode_capable(inode, CAP_DAC_OVERRIDE))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
return 0;
if (!(mask & MAY_WRITE))
- if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+ if (capable_wrt_inode_uidgid(inode,
+ CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
}
@@ -334,7 +336,7 @@ int generic_permission(struct inode *inode, int mask)
* at least one exec bit set.
*/
if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
- if (inode_capable(inode, CAP_DAC_OVERRIDE))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
return 0;
/*
@@ -342,7 +344,7 @@ int generic_permission(struct inode *inode, int mask)
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ)
- if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
@@ -1540,7 +1542,8 @@ static inline int walk_component(struct nameidata *nd, struct path *path,
if (should_follow_link(inode, follow)) {
if (nd->flags & LOOKUP_RCU) {
- if (unlikely(unlazy_walk(nd, path->dentry))) {
+ if (unlikely(nd->path.mnt != path->mnt ||
+ unlazy_walk(nd, path->dentry))) {
err = -ECHILD;
goto out_err;
}
@@ -1646,8 +1649,7 @@ static inline int can_lookup(struct inode *inode)
static inline unsigned int fold_hash(unsigned long hash)
{
- hash += hash >> (8*sizeof(int));
- return hash;
+ return hash_64(hash, 32);
}
#else /* 32-bit case */
@@ -2199,7 +2201,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
return 0;
if (uid_eq(dir->i_uid, fsuid))
return 0;
- return !inode_capable(inode, CAP_FOWNER);
+ return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
}
/*
@@ -2263,6 +2265,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
*/
static inline int may_create(struct inode *dir, struct dentry *child)
{
+ audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
@@ -2822,7 +2825,8 @@ finish_lookup:
if (should_follow_link(inode, !symlink_ok)) {
if (nd->flags & LOOKUP_RCU) {
- if (unlikely(unlazy_walk(nd, path->dentry))) {
+ if (unlikely(nd->path.mnt != path->mnt ||
+ unlazy_walk(nd, path->dentry))) {
error = -ECHILD;
goto out;
}
@@ -3654,6 +3658,7 @@ retry:
out_dput:
done_path_create(&new_path, new_dentry);
if (retry_estale(error, how)) {
+ path_put(&old_path);
how |= LOOKUP_REVAL;
goto retry;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index 7b1ca9ba0b0a..d0244c8ba09c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -828,8 +828,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
/* Don't allow unprivileged users to change mount flags */
- if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
- mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+ if (flag & CL_UNPRIVILEGED) {
+ mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
+
+ if (mnt->mnt.mnt_flags & MNT_READONLY)
+ mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+
+ if (mnt->mnt.mnt_flags & MNT_NODEV)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
+
+ if (mnt->mnt.mnt_flags & MNT_NOSUID)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
+
+ if (mnt->mnt.mnt_flags & MNT_NOEXEC)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
+ }
atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
@@ -1261,6 +1274,8 @@ static int do_umount(struct mount *mnt, int flags)
* Special case for "unmounting" root ...
* we just try to remount it readonly.
*/
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
down_write(&sb->s_umount);
if (!(sb->s_flags & MS_RDONLY))
retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
@@ -1327,6 +1342,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (!check_mnt(mnt))
goto dput_and_out;
+ retval = -EPERM;
+ if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+ goto dput_and_out;
retval = do_umount(mnt, flags);
dput_and_out:
@@ -1429,7 +1447,7 @@ struct vfsmount *collect_mounts(struct path *path)
CL_COPY_ALL | CL_PRIVATE);
namespace_unlock();
if (IS_ERR(tree))
- return NULL;
+ return ERR_CAST(tree);
return &tree->mnt;
}
@@ -1764,9 +1782,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
if (readonly_request == __mnt_is_readonly(mnt))
return 0;
- if (mnt->mnt_flags & MNT_LOCK_READONLY)
- return -EPERM;
-
if (readonly_request)
error = mnt_make_readonly(real_mount(mnt));
else
@@ -1792,6 +1807,39 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
+ /* Don't allow changing of locked mnt flags.
+ *
+ * No locks need to be held here while testing the various
+ * MNT_LOCK flags because those flags can never be cleared
+ * once they are set.
+ */
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
+ !(mnt_flags & MNT_READONLY)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
+ !(mnt_flags & MNT_NODEV)) {
+ /* Was the nodev implicitly added in mount? */
+ if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
+ !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+ mnt_flags |= MNT_NODEV;
+ } else {
+ return -EPERM;
+ }
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
+ !(mnt_flags & MNT_NOSUID)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
+ !(mnt_flags & MNT_NOEXEC)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
+ ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+ return -EPERM;
+ }
+
err = security_sb_remount(sb, data);
if (err)
return err;
@@ -1805,7 +1853,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
br_write_lock(&vfsmount_lock);
- mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
+ mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
mnt->mnt.mnt_flags = mnt_flags;
br_write_unlock(&vfsmount_lock);
}
@@ -1991,7 +2039,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
*/
if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
flags |= MS_NODEV;
- mnt_flags |= MNT_NODEV;
+ mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
}
}
@@ -2309,6 +2357,14 @@ long do_mount(const char *dev_name, const char *dir_name,
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
+ /* The default atime for remount is preservation */
+ if ((flags & MS_REMOUNT) &&
+ ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
+ MS_STRICTATIME)) == 0)) {
+ mnt_flags &= ~MNT_ATIME_MASK;
+ mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
+ }
+
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
MS_STRICTATIME);
@@ -2649,6 +2705,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* make sure we can reach put_old from new_root */
if (!is_path_reachable(old_mnt, old.dentry, &new))
goto out4;
+ /* make certain new is below the root */
+ if (!is_path_reachable(new_mnt, new.dentry, &root))
+ goto out4;
root_mp->m_count++; /* pin it so it won't go away */
br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 6792ce11f2bf..c578ba9949e6 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -391,7 +391,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
- dent = list_entry(next, struct dentry, d_u.d_child);
+ dent = list_entry(next, struct dentry, d_child);
if ((unsigned long)dent->d_fsdata == fpos) {
if (dent->d_inode)
dget(dent);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 60426ccb3b65..2f970de02b16 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -448,7 +448,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
result = -EIO;
}
}
- result = 0;
}
mutex_unlock(&server->root_setup_lock);
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 32c06587351a..6d5e7c56c79d 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -194,7 +194,7 @@ ncp_renew_dentries(struct dentry *parent)
spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
- dentry = list_entry(next, struct dentry, d_u.d_child);
+ dentry = list_entry(next, struct dentry, d_child);
if (dentry->d_fsdata == NULL)
ncp_age_dentry(server, dentry);
@@ -216,7 +216,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
spin_lock(&parent->d_lock);
next = parent->d_subdirs.next;
while (next != &parent->d_subdirs) {
- dentry = list_entry(next, struct dentry, d_u.d_child);
+ dentry = list_entry(next, struct dentry, d_child);
dentry->d_fsdata = NULL;
ncp_age_dentry(server, dentry);
next = next->next;
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 9c3e117c3ed1..4d0161442565 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
static inline sector_t normalize(sector_t s, int base)
{
sector_t tmp = s; /* Since do_div modifies its argument */
- return s - do_div(tmp, base);
+ return s - sector_div(tmp, base);
}
static inline sector_t normalize_up(sector_t s, int base)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index cff089a412c7..e05c96ebb27d 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -128,22 +128,24 @@ nfs41_callback_svc(void *vrqstp)
if (try_to_freeze())
continue;
- prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_INTERRUPTIBLE);
+ prepare_to_wait(&serv->sv_cb_waitq, &wq, TASK_UNINTERRUPTIBLE);
spin_lock_bh(&serv->sv_cb_lock);
if (!list_empty(&serv->sv_cb_list)) {
req = list_first_entry(&serv->sv_cb_list,
struct rpc_rqst, rq_bc_list);
list_del(&req->rq_bc_list);
spin_unlock_bh(&serv->sv_cb_lock);
+ finish_wait(&serv->sv_cb_waitq, &wq);
dprintk("Invoking bc_svc_process()\n");
error = bc_svc_process(serv, req, rqstp);
dprintk("bc_svc_process() returned w/ error code= %d\n",
error);
} else {
spin_unlock_bh(&serv->sv_cb_lock);
- schedule();
+ /* schedule_timeout to game the hung task watchdog */
+ schedule_timeout(60 * HZ);
+ finish_wait(&serv->sv_cb_waitq, &wq);
}
- finish_wait(&serv->sv_cb_waitq, &wq);
}
return 0;
}
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a35582c9d444..e98ecf8d2588 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -464,8 +464,10 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
for (i = 0; i < args->csa_nrclists; i++) {
status = decode_rc_list(xdr, &args->csa_rclists[i]);
- if (status)
+ if (status) {
+ args->csa_nrclists = i;
goto out_free;
+ }
}
}
status = 0;
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 57db3244f4d9..ef0c394b7bf5 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -108,6 +108,8 @@ again:
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
+ if (!nfs4_valid_open_stateid(state))
+ continue;
if (!nfs4_stateid_match(&state->stateid, stateid))
continue;
get_nfs_open_context(ctx);
@@ -175,7 +177,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
{
int res = 0;
- res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync);
+ if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ res = nfs4_proc_delegreturn(inode,
+ delegation->cred,
+ &delegation->stateid,
+ issync);
nfs_free_delegation(delegation);
return res;
}
@@ -361,11 +367,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
- int err;
+ int err = 0;
if (delegation == NULL)
return 0;
do {
+ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ break;
err = nfs_delegation_claim_opens(inode, &delegation->stateid);
if (!issync || err != -EAGAIN)
break;
@@ -586,10 +594,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl
rcu_read_unlock();
}
+static void nfs_revoke_delegation(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL) {
+ set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
+ nfs_mark_return_delegation(NFS_SERVER(inode), delegation);
+ }
+ rcu_read_unlock();
+}
+
void nfs_remove_bad_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
+ nfs_revoke_delegation(inode);
delegation = nfs_inode_detach_delegation(inode);
if (delegation) {
nfs_inode_find_state_and_recover(inode, &delegation->stateid);
@@ -656,16 +677,19 @@ int nfs_async_inode_return_delegation(struct inode *inode,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation == NULL)
+ goto out_enoent;
- if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
- rcu_read_unlock();
- return -ENOENT;
- }
+ if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
+ goto out_enoent;
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
return 0;
+out_enoent:
+ rcu_read_unlock();
+ return -ENOENT;
}
static struct inode *
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6d..e02b090ab9da 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -31,6 +31,7 @@ enum {
NFS_DELEGATION_RETURN_IF_CLOSED,
NFS_DELEGATION_REFERENCED,
NFS_DELEGATION_RETURNING,
+ NFS_DELEGATION_REVOKED,
};
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0bd7a55a5f07..615c5079db7c 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -123,6 +123,12 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
*/
ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+
+ /* we only support swap file calling nfs_direct_IO */
+ if (!IS_SWAPFILE(inode))
+ return 0;
+
#ifndef CONFIG_NFS_SWAP
dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
iocb->ki_filp->f_path.dentry->d_name.name,
@@ -180,6 +186,7 @@ static void nfs_direct_req_free(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+ nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
if (dreq->l_ctx != NULL)
nfs_put_lock_context(dreq->l_ctx);
if (dreq->ctx != NULL)
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 44efaa8c5f78..0fe3ced6438c 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -58,7 +58,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
*/
spin_lock(&sb->s_root->d_inode->i_lock);
spin_lock(&sb->s_root->d_lock);
- hlist_del_init(&sb->s_root->d_alias);
+ hlist_del_init(&sb->s_root->d_u.d_alias);
spin_unlock(&sb->s_root->d_lock);
spin_unlock(&sb->s_root->d_inode->i_lock);
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c1c7a9d78722..e9be01b2cc5a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -519,7 +519,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
- int err;
+ int err = 0;
/* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
@@ -1382,18 +1382,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_version = fattr->change_attr;
}
} else if (server->caps & NFS_CAP_CHANGE_ATTR)
- invalid |= save_cache_validity;
+ nfsi->cache_validity |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
} else if (server->caps & NFS_CAP_MTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
} else if (server->caps & NFS_CAP_CTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
/* Check if our cached file size is stale */
@@ -1416,7 +1418,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
(long long)new_isize);
}
} else
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
@@ -1424,7 +1427,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
else if (server->caps & NFS_CAP_ATIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_MODE) {
@@ -1435,7 +1439,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
}
} else if (server->caps & NFS_CAP_MODE)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1446,7 +1451,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_uid = fattr->uid;
}
} else if (server->caps & NFS_CAP_OWNER)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1457,7 +1463,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_gid = fattr->gid;
}
} else if (server->caps & NFS_CAP_OWNER_GROUP)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1470,7 +1477,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
}
} else if (server->caps & NFS_CAP_NLINK)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 4a1aafba6a20..8c34f57a9aef 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -305,7 +305,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.rpc_argp = &args,
.rpc_resp = &fattr,
};
- int status;
+ int status = 0;
+
+ if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
+ goto out;
status = -EOPNOTSUPP;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 4cbad5d6b276..5f8d5ffdad8f 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -240,13 +240,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
error = nfs4_discover_server_trunking(clp, &old);
if (error < 0)
goto error;
- nfs_put_client(clp);
- if (clp != old) {
- clp->cl_preserve_clid = true;
- clp = old;
- }
- return clp;
+ if (clp != old)
+ clp->cl_preserve_clid = true;
+ nfs_put_client(clp);
+ return old;
error:
nfs_mark_client_ready(clp, error);
@@ -313,6 +311,16 @@ int nfs40_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+
+ if (pos->rpc_ops != new->rpc_ops)
+ continue;
+
+ if (pos->cl_proto != new->cl_proto)
+ continue;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ continue;
+
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos" */
if (pos->cl_cons_state > NFS_CS_READY) {
@@ -324,22 +332,14 @@ int nfs40_walk_client_list(struct nfs_client *new,
prev = pos;
status = nfs_wait_client_init_complete(pos);
- spin_lock(&nn->nfs_client_lock);
if (status < 0)
- continue;
+ goto out;
+ status = -NFS4ERR_STALE_CLIENTID;
+ spin_lock(&nn->nfs_client_lock);
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
- if (pos->rpc_ops != new->rpc_ops)
- continue;
-
- if (pos->cl_proto != new->cl_proto)
- continue;
-
- if (pos->cl_minorversion != new->cl_minorversion)
- continue;
-
if (pos->cl_clientid != new->cl_clientid)
continue;
@@ -394,20 +394,14 @@ static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b)
}
/*
- * Returns true if the server owners match
+ * Returns true if the server major ids match
*/
static bool
-nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b)
+nfs4_check_clientid_trunking(struct nfs_client *a, struct nfs_client *b)
{
struct nfs41_server_owner *o1 = a->cl_serverowner;
struct nfs41_server_owner *o2 = b->cl_serverowner;
- if (o1->minor_id != o2->minor_id) {
- dprintk("NFS: --> %s server owner minor IDs do not match\n",
- __func__);
- return false;
- }
-
if (o1->major_id_sz != o2->major_id_sz)
goto out_major_mismatch;
if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0)
@@ -445,6 +439,16 @@ int nfs41_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+
+ if (pos->rpc_ops != new->rpc_ops)
+ continue;
+
+ if (pos->cl_proto != new->cl_proto)
+ continue;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ continue;
+
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos", especially the client
* ID and serverowner fields. Wait for CREATE_SESSION
@@ -458,30 +462,27 @@ int nfs41_walk_client_list(struct nfs_client *new,
prev = pos;
status = nfs_wait_client_init_complete(pos);
- if (status == 0) {
+ if (pos->cl_cons_state == NFS_CS_SESSION_INITING) {
nfs4_schedule_lease_recovery(pos);
status = nfs4_wait_clnt_recover(pos);
}
spin_lock(&nn->nfs_client_lock);
if (status < 0)
- continue;
+ break;
+ status = -NFS4ERR_STALE_CLIENTID;
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
- if (pos->rpc_ops != new->rpc_ops)
- continue;
-
- if (pos->cl_proto != new->cl_proto)
- continue;
-
- if (pos->cl_minorversion != new->cl_minorversion)
- continue;
-
if (!nfs4_match_clientids(pos, new))
continue;
- if (!nfs4_match_serverowners(pos, new))
+ /*
+ * Note that session trunking is just a special subcase of
+ * client id trunking. In either case, we want to fall back
+ * to using the existing nfs_client.
+ */
+ if (!nfs4_check_clientid_trunking(pos, new))
continue;
atomic_inc(&pos->cl_count);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 22d10623f5ee..b039f7f26d95 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -1300,7 +1300,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
struct nfs4_filelayout *flo;
flo = kzalloc(sizeof(*flo), gfp_flags);
- return &flo->generic_hdr;
+ return flo != NULL ? &flo->generic_hdr : NULL;
}
static void
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 661a0f611215..678cb8964532 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -797,34 +797,34 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
-
- if (filelayout_test_devid_unavailable(devid))
- return NULL;
+ struct nfs4_pnfs_ds *ret = ds;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
filelayout_mark_devid_invalid(devid);
- return NULL;
+ goto out;
}
if (ds->ds_clp)
- return ds;
+ goto out_test_devid;
if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;
err = nfs4_ds_connect(s, ds);
- if (err) {
+ if (err)
nfs4_mark_deviceid_unavailable(devid);
- ds = NULL;
- }
nfs4_clear_ds_conn_bit(ds);
} else {
/* Either ds is connected, or ds is NULL */
nfs4_wait_ds_connect(ds);
}
- return ds;
+out_test_devid:
+ if (filelayout_test_devid_unavailable(devid))
+ ret = NULL;
+out:
+ return ret;
}
module_param(dataserver_retrans, uint, 0644);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d7ba5616989c..20ebcfa3c92e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1160,29 +1160,24 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
int ret;
if (!data->rpc_done) {
- ret = data->rpc_status;
- goto err;
+ if (data->rpc_status) {
+ ret = data->rpc_status;
+ goto err;
+ }
+ /* cached opens have already been processed */
+ goto update;
}
- ret = -ESTALE;
- if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE))
- goto err;
-
- ret = -ENOMEM;
- state = nfs4_get_open_state(inode, data->owner);
- if (state == NULL)
- goto err;
-
ret = nfs_refresh_inode(inode, &data->f_attr);
if (ret)
goto err;
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
+update:
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
+ atomic_inc(&state->count);
return state;
err:
@@ -1421,7 +1416,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
nfs_inode_find_state_and_recover(state->inode,
stateid);
nfs4_schedule_stateid_recovery(server, state);
- return 0;
+ return -EAGAIN;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
set_bit(NFS_DELEGATED_STATE, &state->flags);
@@ -1850,6 +1845,28 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
return ret;
}
+static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state)
+{
+ nfs_remove_bad_delegation(state->inode);
+ write_seqlock(&state->seqlock);
+ nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+ write_sequnlock(&state->seqlock);
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+}
+
+static void nfs40_clear_delegation_stateid(struct nfs4_state *state)
+{
+ if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL)
+ nfs_finish_clear_delegation_stateid(state);
+}
+
+static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ /* NFSv4.0 doesn't allow for delegation recovery on open expire */
+ nfs40_clear_delegation_stateid(state);
+ return nfs4_open_expired(sp, state);
+}
+
#if defined(CONFIG_NFS_V4_1)
static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
{
@@ -2292,6 +2309,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
dprintk("%s: begin!\n", __func__);
@@ -2299,21 +2317,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
goto out_wait;
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
- calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
spin_lock(&state->owner->so_lock);
+ is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
+ is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
+ is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
/* Calculate the change in open mode */
+ calldata->arg.fmode = 0;
if (state->n_rdwr == 0) {
- if (state->n_rdonly == 0) {
- call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
- call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
- calldata->arg.fmode &= ~FMODE_READ;
- }
- if (state->n_wronly == 0) {
- call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
- call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
- calldata->arg.fmode &= ~FMODE_WRITE;
- }
- }
+ if (state->n_rdonly == 0)
+ call_close |= is_rdonly;
+ else if (is_rdonly)
+ calldata->arg.fmode |= FMODE_READ;
+ if (state->n_wronly == 0)
+ call_close |= is_wronly;
+ else if (is_wronly)
+ calldata->arg.fmode |= FMODE_WRITE;
+ } else if (is_rdwr)
+ calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
+
+ if (calldata->arg.fmode == 0)
+ call_close |= is_rdwr;
+
if (!nfs4_valid_open_stateid(state))
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -3612,8 +3636,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
{
nfs4_stateid current_stateid;
- if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode))
- return false;
+ /* If the current stateid represents a lost lock, then exit */
+ if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode) == -EIO)
+ return true;
return nfs4_stateid_match(stateid, &current_stateid);
}
@@ -4227,8 +4252,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- task->tk_status = 0;
- return -EAGAIN;
+ goto wait_on_recovery;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
@@ -4406,11 +4430,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
return;
switch (task->tk_status) {
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_EXPIRED:
case 0:
renew_lease(data->res.server, data->timestamp);
break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ task->tk_status = 0;
+ break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-EAGAIN) {
@@ -4572,6 +4602,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = 0;
}
request->fl_ops->fl_release_private(request);
+ request->fl_ops = NULL;
out:
return status;
}
@@ -6058,7 +6089,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
int ret = 0;
if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
- return 0;
+ return -EAGAIN;
task = _nfs41_proc_sequence(clp, cred, false);
if (IS_ERR(task))
ret = PTR_ERR(task);
@@ -6231,9 +6262,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
struct nfs4_state *state = NULL;
- unsigned long timeo, giveup;
+ unsigned long timeo, now, giveup;
- dprintk("--> %s\n", __func__);
+ dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
if (!nfs41_sequence_done(task, &lgp->res.seq_res))
goto out;
@@ -6241,12 +6272,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
goto out;
+ /*
+ * NFS4ERR_LAYOUTTRYLATER is a conflict with another client
+ * (or clients) writing to the same RAID stripe
+ */
case -NFS4ERR_LAYOUTTRYLATER:
+ /*
+ * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
+ * existing layout before getting a new one).
+ */
case -NFS4ERR_RECALLCONFLICT:
timeo = rpc_get_timeout(task->tk_client);
giveup = lgp->args.timestamp + timeo;
- if (time_after(giveup, jiffies))
- task->tk_status = -NFS4ERR_DELAY;
+ now = jiffies;
+ if (time_after(giveup, now)) {
+ unsigned long delay;
+
+ /* Delay for:
+ * - Not less then NFS4_POLL_RETRY_MIN.
+ * - One last time a jiffie before we give up
+ * - exponential backoff (time_now minus start_attempt)
+ */
+ delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
+ min((giveup - now - 1),
+ now - lgp->args.timestamp));
+
+ dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
+ __func__, delay);
+ rpc_delay(task, delay);
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
+ goto out; /* Do not call nfs4_async_handle_error() */
+ }
break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
@@ -6361,6 +6418,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
dprintk("--> %s\n", __func__);
+ /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
+ pnfs_get_layout_hdr(NFS_I(inode)->layout);
+
lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
if (!lgp->args.layout.pages) {
nfs4_layoutget_release(lgp);
@@ -6373,9 +6433,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
lgp->res.seq_res.sr_slot = NULL;
nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
- /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
- pnfs_get_layout_hdr(NFS_I(inode)->layout);
-
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return ERR_CAST(task);
@@ -6682,7 +6739,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
- case -NFS4ERR_NOTSUPP:
+ case -ENOTSUPP:
goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
@@ -6714,7 +6771,7 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
- if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+ if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) {
err = nfs4_find_root_sec(server, fhandle, info);
goto out_freepage;
}
@@ -6939,7 +6996,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
.state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
- .recover_open = nfs4_open_expired,
+ .recover_open = nfs40_open_expired,
.recover_lock = nfs4_lock_expired,
.establish_clid = nfs4_init_clientid,
.get_clid_cred = nfs4_get_setclientid_cred,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 1720d32ffa54..e1ba58c3d1ad 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -88,10 +88,18 @@ nfs4_renew_state(struct work_struct *work)
}
nfs_expire_all_delegations(clp);
} else {
+ int ret;
+
/* Queue an asynchronous RENEW. */
- ops->sched_state_renewal(clp, cred, renew_flags);
+ ret = ops->sched_state_renewal(clp, cred, renew_flags);
put_rpccred(cred);
- goto out_exp;
+ switch (ret) {
+ default:
+ goto out_exp;
+ case -EAGAIN:
+ case -ENOMEM:
+ break;
+ }
}
} else {
dprintk("%s: failed to call renewd. Reason: lease not expired \n",
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 1fab140764c4..d482b86d0e0b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -228,19 +228,8 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
return status;
}
-/*
- * Back channel returns NFS4ERR_DELAY for new requests when
- * NFS4_SESSION_DRAINING is set so there is no work to be done when draining
- * is ended.
- */
-static void nfs4_end_drain_session(struct nfs_client *clp)
+static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl)
{
- struct nfs4_session *ses = clp->cl_session;
- struct nfs4_slot_table *tbl;
-
- if (ses == NULL)
- return;
- tbl = &ses->fc_slot_table;
if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
spin_lock(&tbl->slot_tbl_lock);
nfs41_wake_slot_table(tbl);
@@ -248,6 +237,16 @@ static void nfs4_end_drain_session(struct nfs_client *clp)
}
}
+static void nfs4_end_drain_session(struct nfs_client *clp)
+{
+ struct nfs4_session *ses = clp->cl_session;
+
+ if (ses != NULL) {
+ nfs4_end_drain_slot_table(&ses->bc_slot_table);
+ nfs4_end_drain_slot_table(&ses->fc_slot_table);
+ }
+}
+
/*
* Signal state manager thread if session fore channel is drained
*/
@@ -1700,7 +1699,8 @@ restart:
if (status < 0) {
set_bit(ops->owner_flag_bit, &sp->so_flags);
nfs4_put_state_owner(sp);
- return nfs4_recovery_handle_error(clp, status);
+ status = nfs4_recovery_handle_error(clp, status);
+ return (status != 0) ? status : -EAGAIN;
}
nfs4_put_state_owner(sp);
@@ -1709,7 +1709,7 @@ restart:
spin_unlock(&clp->cl_lock);
}
rcu_read_unlock();
- return status;
+ return 0;
}
static int nfs4_check_lease(struct nfs_client *clp)
@@ -1756,7 +1756,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
break;
case -NFS4ERR_STALE_CLIENTID:
clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
- nfs4_state_clear_reclaim_reboot(clp);
nfs4_state_start_reclaim_reboot(clp);
break;
case -NFS4ERR_CLID_INUSE:
@@ -2175,14 +2174,11 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim reboot";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->reboot_recovery_ops);
- if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
- test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
- continue;
- nfs4_state_end_reclaim_reboot(clp);
- if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
+ if (status == -EAGAIN)
continue;
if (status < 0)
goto out_error;
+ nfs4_state_end_reclaim_reboot(clp);
}
/* Now recover expired state... */
@@ -2190,9 +2186,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim nograce";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->nograce_recovery_ops);
- if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
- test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
- test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ if (status == -EAGAIN)
continue;
if (status < 0)
goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4be8d135ed61..988efb4caac0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3002,7 +3002,8 @@ out_overflow:
return -EIO;
}
-static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+ int *nfs_retval)
{
__be32 *p;
uint32_t opnum;
@@ -3012,19 +3013,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
if (unlikely(!p))
goto out_overflow;
opnum = be32_to_cpup(p++);
- if (opnum != expected) {
- dprintk("nfs: Server returned operation"
- " %d but we issued a request for %d\n",
- opnum, expected);
- return -EIO;
- }
+ if (unlikely(opnum != expected))
+ goto out_bad_operation;
nfserr = be32_to_cpup(p);
- if (nfserr != NFS_OK)
- return nfs4_stat_to_errno(nfserr);
- return 0;
+ if (nfserr == NFS_OK)
+ *nfs_retval = 0;
+ else
+ *nfs_retval = nfs4_stat_to_errno(nfserr);
+ return true;
+out_bad_operation:
+ dprintk("nfs: Server returned operation"
+ " %d but we issued a request for %d\n",
+ opnum, expected);
+ *nfs_retval = -EREMOTEIO;
+ return false;
out_overflow:
print_overflow_msg(__func__, xdr);
- return -EIO;
+ *nfs_retval = -EIO;
+ return false;
+}
+
+static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+ int retval;
+
+ __decode_op_hdr(xdr, expected, &retval);
+ return retval;
}
/* Dummy routine */
@@ -4842,11 +4856,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
uint32_t savewords, bmlen, i;
int status;
- status = decode_op_hdr(xdr, OP_OPEN);
- if (status != -EIO)
- nfs_increment_open_seqid(status, res->seqid);
- if (!status)
- status = decode_stateid(xdr, &res->stateid);
+ if (!__decode_op_hdr(xdr, OP_OPEN, &status))
+ return status;
+ nfs_increment_open_seqid(status, res->seqid);
+ if (status)
+ return status;
+ status = decode_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5f38ea36e266..af51cf9bf2e3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -536,16 +536,12 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (err)
goto out3;
exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
- if (!uid_valid(exp.ex_anon_uid))
- goto out3;
/* anon gid */
err = get_int(&mesg, &an_int);
if (err)
goto out3;
exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
- if (!gid_valid(exp.ex_anon_gid))
- goto out3;
/* fsid */
err = get_int(&mesg, &an_int);
@@ -583,6 +579,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_uuid);
if (err)
goto out4;
+ /*
+ * For some reason exportfs has been passing down an
+ * invalid (-1) uid & gid on the "dummy" export which it
+ * uses to test export support. To make sure exportfs
+ * sees errors from check_export we therefore need to
+ * delay these checks till after check_export:
+ */
+ if (!uid_valid(exp.ex_anon_uid))
+ goto out4;
+ if (!gid_valid(exp.ex_anon_gid))
+ goto out4;
}
expp = svc_export_lookup(&exp);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 8a50b3c18093..e15bcbd5043c 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -385,8 +385,10 @@ sort_pacl(struct posix_acl *pacl)
* by uid/gid. */
int i, j;
- if (pacl->a_count <= 4)
- return; /* no users or groups */
+ /* no users or groups */
+ if (!pacl || pacl->a_count <= 4)
+ return;
+
i = 1;
while (pacl->a_entries[i].e_tag == ACL_USER)
i++;
@@ -513,13 +515,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
/*
* ACLs with no ACEs are treated differently in the inheritable
- * and effective cases: when there are no inheritable ACEs, we
- * set a zero-length default posix acl:
+ * and effective cases: when there are no inheritable ACEs,
+ * calls ->set_acl with a NULL ACL structure.
*/
- if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) {
- pacl = posix_acl_alloc(0, GFP_KERNEL);
- return pacl ? pacl : ERR_PTR(-ENOMEM);
- }
+ if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT))
+ return NULL;
+
/*
* When there are no effective ACEs, the following will end
* up setting a 3-element effective posix ACL with all
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7f05cd140de3..f42bbe5fbc0a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -637,9 +637,11 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
+ int maxtime = max_cb_time(clp->net);
struct rpc_timeout timeparms = {
- .to_initval = max_cb_time(clp->net),
+ .to_initval = maxtime,
.to_retries = 0,
+ .to_maxval = maxtime,
};
struct rpc_create_args args = {
.net = clp->net,
@@ -670,7 +672,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
- args.protocol = XPRT_TRANSPORT_BC_TCP;
+ args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
+ XPRT_TRANSPORT_BC;
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
@@ -781,8 +784,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
{
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
- dprintk("%s slot is busy\n", __func__);
- return false;
+ /* Race breaker */
+ if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ dprintk("%s slot is busy\n", __func__);
+ return false;
+ }
+ rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
return true;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 27d74a294515..9240dd1678da 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -576,15 +576,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
switch (create->cr_type) {
case NF4LNK:
- /* ugh! we have to null-terminate the linktext, or
- * vfs_symlink() will choke. it is always safe to
- * null-terminate by brute force, since at worst we
- * will overwrite the first byte of the create namelen
- * in the XDR buffer, which has already been extracted
- * during XDR decode.
- */
- create->cr_linkname[create->cr_linklen] = 0;
-
status = nfsd_symlink(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
create->cr_linkname, create->cr_linklen,
@@ -1200,7 +1191,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
*/
if (argp->opcnt == resp->opcnt)
return false;
-
+ if (next->opnum == OP_ILLEGAL)
+ return false;
nextd = OPDESC(next);
/*
* Rest of 2.6.3.1.1: certain operations will return WRONGSEC
@@ -1307,6 +1299,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
/* If op is non-idempotent */
if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
plen = opdesc->op_rsize_bop(rqstp, op);
+ /*
+ * If there's still another operation, make sure
+ * we'll have space to at least encode an error:
+ */
+ if (resp->opcnt < args->opcnt)
+ plen += COMPOUND_ERR_SLACK_SPACE;
op->status = nfsd4_check_resp_size(resp, plen);
}
@@ -1471,7 +1469,8 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
- return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
+ return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
+ sizeof(__be32);
}
static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 316ec843dec2..4a58afa99654 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -367,7 +367,6 @@ static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type)
{
struct nfs4_delegation *dp;
- struct nfs4_file *fp = stp->st_file;
dprintk("NFSD alloc_init_deleg\n");
/*
@@ -377,8 +376,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
*/
if (type != NFS4_OPEN_DELEGATE_READ)
return NULL;
- if (fp->fi_had_conflict)
- return NULL;
if (num_delegations > max_delegations)
return NULL;
dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
@@ -395,8 +392,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
INIT_LIST_HEAD(&dp->dl_perfile);
INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru);
- get_nfs4_file(fp);
- dp->dl_file = fp;
+ dp->dl_file = NULL;
dp->dl_type = type;
fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
dp->dl_time = 0;
@@ -1081,6 +1077,18 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
return NULL;
}
clp->cl_name.len = name.len;
+ INIT_LIST_HEAD(&clp->cl_sessions);
+ idr_init(&clp->cl_stateids);
+ atomic_set(&clp->cl_refcount, 0);
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ INIT_LIST_HEAD(&clp->cl_idhash);
+ INIT_LIST_HEAD(&clp->cl_openowners);
+ INIT_LIST_HEAD(&clp->cl_delegations);
+ INIT_LIST_HEAD(&clp->cl_lru);
+ INIT_LIST_HEAD(&clp->cl_callbacks);
+ INIT_LIST_HEAD(&clp->cl_revoked);
+ spin_lock_init(&clp->cl_lock);
+ rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
return clp;
}
@@ -1098,6 +1106,7 @@ free_client(struct nfs4_client *clp)
WARN_ON_ONCE(atomic_read(&ses->se_ref));
free_session(ses);
}
+ rpc_destroy_wait_queue(&clp->cl_cb_waitq);
free_svc_cred(&clp->cl_cred);
kfree(clp->cl_name.data);
idr_destroy(&clp->cl_stateids);
@@ -1191,15 +1200,14 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
return 0;
}
-static long long
+static int
compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
{
- long long res;
-
- res = o1->len - o2->len;
- if (res)
- return res;
- return (long long)memcmp(o1->data, o2->data, o1->len);
+ if (o1->len < o2->len)
+ return -1;
+ if (o1->len > o2->len)
+ return 1;
+ return memcmp(o1->data, o2->data, o1->len);
}
static int same_name(const char *n1, const char *n2)
@@ -1315,7 +1323,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
if (clp == NULL)
return NULL;
- INIT_LIST_HEAD(&clp->cl_sessions);
ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
if (ret) {
spin_lock(&nn->client_lock);
@@ -1323,20 +1330,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
spin_unlock(&nn->client_lock);
return NULL;
}
- idr_init(&clp->cl_stateids);
- atomic_set(&clp->cl_refcount, 0);
- clp->cl_cb_state = NFSD4_CB_UNKNOWN;
- INIT_LIST_HEAD(&clp->cl_idhash);
- INIT_LIST_HEAD(&clp->cl_openowners);
- INIT_LIST_HEAD(&clp->cl_delegations);
- INIT_LIST_HEAD(&clp->cl_lru);
- INIT_LIST_HEAD(&clp->cl_callbacks);
- INIT_LIST_HEAD(&clp->cl_revoked);
- spin_lock_init(&clp->cl_lock);
nfsd4_init_callback(&clp->cl_cb_null);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
- rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
copy_verf(clp, verf);
rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
gen_confirm(clp);
@@ -1368,7 +1364,7 @@ add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
static struct nfs4_client *
find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
{
- long long cmp;
+ int cmp;
struct rb_node *node = root->rb_node;
struct nfs4_client *clp;
@@ -2964,22 +2960,35 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
return 0;
}
-static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
+static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag, struct nfs4_file *fp)
{
- struct nfs4_file *fp = dp->dl_file;
+ int status;
- if (!fp->fi_lease)
- return nfs4_setlease(dp, flag);
+ if (fp->fi_had_conflict)
+ return -EAGAIN;
+ get_nfs4_file(fp);
+ dp->dl_file = fp;
+ if (!fp->fi_lease) {
+ status = nfs4_setlease(dp, flag);
+ if (status)
+ goto out_free;
+ return 0;
+ }
spin_lock(&recall_lock);
if (fp->fi_had_conflict) {
spin_unlock(&recall_lock);
- return -EAGAIN;
+ status = -EAGAIN;
+ goto out_free;
}
atomic_inc(&fp->fi_delegees);
list_add(&dp->dl_perfile, &fp->fi_delegations);
spin_unlock(&recall_lock);
list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
return 0;
+out_free:
+ put_nfs4_file(fp);
+ dp->dl_file = fp;
+ return status;
}
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -3045,7 +3054,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag);
if (dp == NULL)
goto out_no_deleg;
- status = nfs4_set_delegation(dp, flag);
+ status = nfs4_set_delegation(dp, flag, stp->st_file);
if (status)
goto out_free;
@@ -3598,9 +3607,16 @@ out:
static __be32
nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
{
- if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner)))
+ struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
+
+ if (check_for_locks(stp->st_file, lo))
return nfserr_locks_held;
- release_lock_stateid(stp);
+ /*
+ * Currently there's a 1-1 lock stateid<->lockowner
+ * correspondance, and we have to delete the lockowner when we
+ * delete the lock stateid:
+ */
+ release_lockowner(lo);
return nfs_ok;
}
@@ -4044,6 +4060,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c
if (!same_owner_str(&lo->lo_owner, owner, clid))
return false;
+ if (list_empty(&lo->lo_owner.so_stateids)) {
+ WARN_ON_ONCE(1);
+ return false;
+ }
lst = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid, st_perstateowner);
return lst->st_file->fi_inode == inode;
@@ -4958,7 +4978,6 @@ nfs4_state_destroy_net(struct net *net)
int i;
struct nfs4_client *clp = NULL;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- struct rb_node *node, *tmp;
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&nn->conf_id_hashtbl[i])) {
@@ -4967,13 +4986,11 @@ nfs4_state_destroy_net(struct net *net)
}
}
- node = rb_first(&nn->unconf_name_tree);
- while (node != NULL) {
- tmp = node;
- node = rb_next(tmp);
- clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
- rb_erase(tmp, &nn->unconf_name_tree);
- destroy_client(clp);
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ while (!list_empty(&nn->unconf_id_hashtbl[i])) {
+ clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
+ destroy_client(clp);
+ }
}
kfree(nn->sessionid_hashtbl);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 6cd86e0fe450..acf179d7615f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -162,8 +162,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
*/
memcpy(p, argp->p, avail);
/* step to next page */
- argp->p = page_address(argp->pagelist[0]);
argp->pagelist++;
+ argp->p = page_address(argp->pagelist[0]);
if (argp->pagelen < PAGE_SIZE) {
argp->end = argp->p + (argp->pagelen>>2);
argp->pagelen = 0;
@@ -553,7 +553,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
READ_BUF(4);
READ32(create->cr_linklen);
READ_BUF(create->cr_linklen);
- SAVEMEM(create->cr_linkname, create->cr_linklen);
+ /*
+ * The VFS will want a null-terminated string, and
+ * null-terminating in place isn't safe since this might
+ * end on a page boundary:
+ */
+ create->cr_linkname =
+ kmalloc(create->cr_linklen + 1, GFP_KERNEL);
+ if (!create->cr_linkname)
+ return nfserr_jukebox;
+ memcpy(create->cr_linkname, p, create->cr_linklen);
+ create->cr_linkname[create->cr_linklen] = '\0';
+ defer_free(argp, kfree, create->cr_linkname);
break;
case NF4BLK:
case NF4CHR:
@@ -1732,6 +1743,9 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components,
}
else
end++;
+ if (found_esc)
+ end = next;
+
str = end;
}
*pp = p;
@@ -2035,8 +2049,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
err = vfs_getattr(&path, &stat);
if (err)
goto out_nfserr;
- if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
- FATTR4_WORD0_MAXNAME)) ||
+ if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+ FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
(bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
err = vfs_statfs(&path, &statfs);
@@ -2401,6 +2415,8 @@ out_acl:
WRITE64(stat.ino);
}
if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ if ((buflen -= 16) < 0)
+ goto out_resource;
WRITE32(3);
WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
@@ -3382,6 +3398,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_test_stateid_id *stateid, *next;
__be32 *p;
+ if (nfserr)
+ return nfserr;
+
RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
*p++ = htonl(test_stateid->ts_num_ids);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index e76244edd748..e5e4675b7e75 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -221,13 +221,6 @@ hash_refile(struct svc_cacherep *rp)
hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
}
-static inline bool
-nfsd_cache_entry_expired(struct svc_cacherep *rp)
-{
- return rp->c_state != RC_INPROG &&
- time_after(jiffies, rp->c_timestamp + RC_EXPIRE);
-}
-
/*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries.
@@ -238,8 +231,14 @@ prune_cache_entries(void)
struct svc_cacherep *rp, *tmp;
list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
- if (!nfsd_cache_entry_expired(rp) &&
- num_drc_entries <= max_drc_entries)
+ /*
+ * Don't free entries attached to calls that are still
+ * in-progress, but do keep scanning the list.
+ */
+ if (rp->c_state == RC_INPROG)
+ continue;
+ if (num_drc_entries <= max_drc_entries &&
+ time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
nfsd_reply_cache_free_locked(rp);
}
@@ -395,22 +394,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
/*
* Since the common case is a cache miss followed by an insert,
- * preallocate an entry. First, try to reuse the first entry on the LRU
- * if it works, then go ahead and prune the LRU list.
+ * preallocate an entry.
*/
- spin_lock(&cache_lock);
- if (!list_empty(&lru_head)) {
- rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
- if (nfsd_cache_entry_expired(rp) ||
- num_drc_entries >= max_drc_entries) {
- lru_put_end(rp);
- prune_cache_entries();
- goto search_cache;
- }
- }
-
- /* No expired ones available, allocate a new one. */
- spin_unlock(&cache_lock);
rp = nfsd_reply_cache_alloc();
spin_lock(&cache_lock);
if (likely(rp)) {
@@ -418,7 +403,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
drc_mem_usage += sizeof(*rp);
}
-search_cache:
+ /* go ahead and prune the cache */
+ prune_cache_entries();
+
found = nfsd_cache_search(rqstp, csum);
if (found) {
if (likely(rp))
@@ -432,15 +419,6 @@ search_cache:
goto out;
}
- /*
- * We're keeping the one we just allocated. Are we now over the
- * limit? Prune one off the tip of the LRU in trade for the one we
- * just allocated if so.
- */
- if (num_drc_entries >= max_drc_entries)
- nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
- struct svc_cacherep, c_lru));
-
nfsdstats.rcmisses++;
rqstp->rq_cacherep = rp;
rp->c_state = RC_INPROG;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7f555179bf81..f34d9de802ab 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
if (err != 0 || fd < 0)
return -EINVAL;
+ if (svc_alien_sock(net, fd)) {
+ printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
+ return -EINVAL;
+ }
+
err = nfsd_create_serv(net);
if (err != 0)
return err;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 262df5ccbf59..8016892f3f05 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -220,7 +220,8 @@ static int nfsd_startup_generic(int nrservs)
*/
ret = nfsd_racache_init(2*nrservs);
if (ret)
- return ret;
+ goto dec_users;
+
ret = nfs4_state_start();
if (ret)
goto out_racache;
@@ -228,6 +229,8 @@ static int nfsd_startup_generic(int nrservs)
out_racache:
nfsd_racache_shutdown();
+dec_users:
+ nfsd_users--;
return ret;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 84ce601d8063..81325ba8660a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -297,41 +297,12 @@ commit_metadata(struct svc_fh *fhp)
}
/*
- * Set various file attributes.
- * N.B. After this call fhp needs an fh_put
+ * Go over the attributes and take care of the small differences between
+ * NFS semantics and what Linux expects.
*/
-__be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+static void
+nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
- struct dentry *dentry;
- struct inode *inode;
- int accmode = NFSD_MAY_SATTR;
- umode_t ftype = 0;
- __be32 err;
- int host_err;
- int size_change = 0;
-
- if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
- accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
- if (iap->ia_valid & ATTR_SIZE)
- ftype = S_IFREG;
-
- /* Get inode */
- err = fh_verify(rqstp, fhp, ftype, accmode);
- if (err)
- goto out;
-
- dentry = fhp->fh_dentry;
- inode = dentry->d_inode;
-
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- goto out;
-
/*
* NFSv2 does not differentiate between "set-[ac]time-to-now"
* which only requires access, and "set-[ac]time-to-X" which
@@ -341,8 +312,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
* convert to "set to now" instead of "set to explicit time"
*
* We only call inode_change_ok as the last test as technically
- * it is not an interface that we should be using. It is only
- * valid if the filesystem does not define it's own i_op->setattr.
+ * it is not an interface that we should be using.
*/
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
#define MAX_TOUCH_TIME_ERROR (30*60)
@@ -368,30 +338,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~BOTH_TIME_SET;
}
}
-
- /*
- * The size case is special.
- * It changes the file as well as the attributes.
- */
- if (iap->ia_valid & ATTR_SIZE) {
- if (iap->ia_size < inode->i_size) {
- err = nfsd_permission(rqstp, fhp->fh_export, dentry,
- NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- }
-
- host_err = get_write_access(inode);
- if (host_err)
- goto out_nfserr;
-
- size_change = 1;
- host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
- if (host_err) {
- put_write_access(inode);
- goto out_nfserr;
- }
- }
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
@@ -414,32 +360,120 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
}
}
+}
+
+static __be32
+nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct iattr *iap)
+{
+ struct inode *inode = fhp->fh_dentry->d_inode;
+ int host_err;
+
+ if (iap->ia_size < inode->i_size) {
+ __be32 err;
+
+ err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+ if (err)
+ return err;
+ }
- /* Change the attributes. */
+ host_err = get_write_access(inode);
+ if (host_err)
+ goto out_nfserrno;
- iap->ia_valid |= ATTR_CTIME;
+ host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+ if (host_err)
+ goto out_put_write_access;
+ return 0;
+
+out_put_write_access:
+ put_write_access(inode);
+out_nfserrno:
+ return nfserrno(host_err);
+}
- err = nfserr_notsync;
- if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
- host_err = nfsd_break_lease(inode);
+/*
+ * Set various file attributes. After this call fhp needs an fh_put.
+ */
+__be32
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+ int check_guard, time_t guardtime)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+ int host_err;
+ bool get_write_count;
+ int size_change = 0;
+
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
+ if (iap->ia_valid & ATTR_SIZE)
+ ftype = S_IFREG;
+
+ /* Callers that do fh_verify should do the fh_want_write: */
+ get_write_count = !fhp->fh_dentry;
+
+ /* Get inode */
+ err = fh_verify(rqstp, fhp, ftype, accmode);
+ if (err)
+ goto out;
+ if (get_write_count) {
+ host_err = fh_want_write(fhp);
if (host_err)
- goto out_nfserr;
- fh_lock(fhp);
+ return nfserrno(host_err);
+ }
- host_err = notify_change(dentry, iap);
- err = nfserrno(host_err);
- fh_unlock(fhp);
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+
+ /* Ignore any mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
+ if (!iap->ia_valid)
+ goto out;
+
+ nfsd_sanitize_attrs(inode, iap);
+
+ /*
+ * The size case is special, it changes the file in addition to the
+ * attributes.
+ */
+ if (iap->ia_valid & ATTR_SIZE) {
+ err = nfsd_get_write_access(rqstp, fhp, iap);
+ if (err)
+ goto out;
+ size_change = 1;
+ }
+
+ iap->ia_valid |= ATTR_CTIME;
+
+ if (check_guard && guardtime != inode->i_ctime.tv_sec) {
+ err = nfserr_notsync;
+ goto out_put_write_access;
}
+
+ host_err = nfsd_break_lease(inode);
+ if (host_err)
+ goto out_put_write_access_nfserror;
+
+ fh_lock(fhp);
+ host_err = notify_change(dentry, iap);
+ fh_unlock(fhp);
+
+out_put_write_access_nfserror:
+ err = nfserrno(host_err);
+out_put_write_access:
if (size_change)
put_write_access(inode);
if (!err)
commit_metadata(fhp);
out:
return err;
-
-out_nfserr:
- err = nfserrno(host_err);
- goto out;
}
#if defined(CONFIG_NFSD_V2_ACL) || \
@@ -474,6 +508,9 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
char *buf = NULL;
int error = 0;
+ if (!pacl)
+ return vfs_setxattr(dentry, key, NULL, 0, 0);
+
buflen = posix_acl_xattr_size(pacl->a_count);
buf = kmalloc(buflen, GFP_KERNEL);
error = -ENOMEM;
@@ -802,9 +839,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
flags = O_WRONLY|O_LARGEFILE;
}
*filp = dentry_open(&path, flags, current_cred());
- if (IS_ERR(*filp))
+ if (IS_ERR(*filp)) {
host_err = PTR_ERR(*filp);
- else {
+ *filp = NULL;
+ } else {
host_err = ima_file_check(*filp, may_flags);
if (may_flags & NFSD_MAY_64BIT_COOKIE)
diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c
index b2e3ff347620..ecdbae19a766 100644
--- a/fs/nilfs2/btree.c
+++ b/fs/nilfs2/btree.c
@@ -31,6 +31,8 @@
#include "alloc.h"
#include "dat.h"
+static void __nilfs_btree_init(struct nilfs_bmap *bmap);
+
static struct nilfs_btree_path *nilfs_btree_alloc_path(void)
{
struct nilfs_btree_path *path;
@@ -368,6 +370,34 @@ static int nilfs_btree_node_broken(const struct nilfs_btree_node *node,
return ret;
}
+/**
+ * nilfs_btree_root_broken - verify consistency of btree root node
+ * @node: btree root node to be examined
+ * @ino: inode number
+ *
+ * Return Value: If node is broken, 1 is returned. Otherwise, 0 is returned.
+ */
+static int nilfs_btree_root_broken(const struct nilfs_btree_node *node,
+ unsigned long ino)
+{
+ int level, flags, nchildren;
+ int ret = 0;
+
+ level = nilfs_btree_node_get_level(node);
+ flags = nilfs_btree_node_get_flags(node);
+ nchildren = nilfs_btree_node_get_nchildren(node);
+
+ if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN ||
+ level > NILFS_BTREE_LEVEL_MAX ||
+ nchildren < 0 ||
+ nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) {
+ pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n",
+ ino, level, flags, nchildren);
+ ret = 1;
+ }
+ return ret;
+}
+
int nilfs_btree_broken_node_block(struct buffer_head *bh)
{
int ret;
@@ -1713,7 +1743,7 @@ nilfs_btree_commit_convert_and_insert(struct nilfs_bmap *btree,
/* convert and insert */
dat = NILFS_BMAP_USE_VBN(btree) ? nilfs_bmap_get_dat(btree) : NULL;
- nilfs_btree_init(btree);
+ __nilfs_btree_init(btree);
if (nreq != NULL) {
nilfs_bmap_commit_alloc_ptr(btree, dreq, dat);
nilfs_bmap_commit_alloc_ptr(btree, nreq, dat);
@@ -2294,12 +2324,23 @@ static const struct nilfs_bmap_operations nilfs_btree_ops_gc = {
.bop_gather_data = NULL,
};
-int nilfs_btree_init(struct nilfs_bmap *bmap)
+static void __nilfs_btree_init(struct nilfs_bmap *bmap)
{
bmap->b_ops = &nilfs_btree_ops;
bmap->b_nchildren_per_block =
NILFS_BTREE_NODE_NCHILDREN_MAX(nilfs_btree_node_size(bmap));
- return 0;
+}
+
+int nilfs_btree_init(struct nilfs_bmap *bmap)
+{
+ int ret = 0;
+
+ __nilfs_btree_init(bmap);
+
+ if (nilfs_btree_root_broken(nilfs_btree_get_root(bmap),
+ bmap->b_inode->i_ino))
+ ret = -EIO;
+ return ret;
}
void nilfs_btree_init_gc(struct nilfs_bmap *bmap)
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index bccfec8343c5..587d699bdc2c 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -24,6 +24,7 @@
#include <linux/buffer_head.h>
#include <linux/gfp.h>
#include <linux/mpage.h>
+#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/aio.h>
#include "nilfs.h"
@@ -48,6 +49,8 @@ struct nilfs_iget_args {
int for_gc;
};
+static int nilfs_iget_test(struct inode *inode, void *opaque);
+
void nilfs_inode_add_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
@@ -219,10 +222,10 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
static int nilfs_set_page_dirty(struct page *page)
{
+ struct inode *inode = page->mapping->host;
int ret = __set_page_dirty_nobuffers(page);
if (page_has_buffers(page)) {
- struct inode *inode = page->mapping->host;
unsigned nr_dirty = 0;
struct buffer_head *bh, *head;
@@ -245,6 +248,10 @@ static int nilfs_set_page_dirty(struct page *page)
if (nr_dirty)
nilfs_set_file_dirty(inode, nr_dirty);
+ } else if (ret) {
+ unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+ nilfs_set_file_dirty(inode, nr_dirty);
}
return ret;
}
@@ -342,6 +349,17 @@ const struct address_space_operations nilfs_aops = {
.is_partially_uptodate = block_is_partially_uptodate,
};
+static int nilfs_insert_inode_locked(struct inode *inode,
+ struct nilfs_root *root,
+ unsigned long ino)
+{
+ struct nilfs_iget_args args = {
+ .ino = ino, .root = root, .cno = 0, .for_gc = 0
+ };
+
+ return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
+}
+
struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
{
struct super_block *sb = dir->i_sb;
@@ -377,7 +395,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
err = nilfs_bmap_read(ii->i_bmap, NULL);
if (err < 0)
- goto failed_bmap;
+ goto failed_after_creation;
set_bit(NILFS_I_BMAP, &ii->i_state);
/* No lock is needed; iget() ensures it. */
@@ -393,21 +411,24 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
spin_lock(&nilfs->ns_next_gen_lock);
inode->i_generation = nilfs->ns_next_generation++;
spin_unlock(&nilfs->ns_next_gen_lock);
- insert_inode_hash(inode);
+ if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
+ err = -EIO;
+ goto failed_after_creation;
+ }
err = nilfs_init_acl(inode, dir);
if (unlikely(err))
- goto failed_acl; /* never occur. When supporting
+ goto failed_after_creation; /* never occur. When supporting
nilfs_init_acl(), proper cancellation of
above jobs should be considered */
return inode;
- failed_acl:
- failed_bmap:
+ failed_after_creation:
clear_nlink(inode);
+ unlock_new_inode(inode);
iput(inode); /* raw_inode will be deleted through
- generic_delete_inode() */
+ nilfs_evict_inode() */
goto failed;
failed_ifile_create_inode:
@@ -455,8 +476,8 @@ int nilfs_read_inode_common(struct inode *inode,
inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
- if (inode->i_nlink == 0 && inode->i_mode == 0)
- return -EINVAL; /* this inode is deleted */
+ if (inode->i_nlink == 0)
+ return -ESTALE; /* this inode is deleted */
inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
ii->i_flags = le32_to_cpu(raw_inode->i_flags);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 9de78f08989e..0f84b257932c 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -51,9 +51,11 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
int err = nilfs_add_link(dentry, inode);
if (!err) {
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
}
inode_dec_link_count(inode);
+ unlock_new_inode(inode);
iput(inode);
return err;
}
@@ -182,6 +184,7 @@ out:
out_fail:
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
+ unlock_new_inode(inode);
iput(inode);
goto out;
}
@@ -201,11 +204,15 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
inode_inc_link_count(inode);
ihold(inode);
- err = nilfs_add_nondir(dentry, inode);
- if (!err)
+ err = nilfs_add_link(dentry, inode);
+ if (!err) {
+ d_instantiate(dentry, inode);
err = nilfs_transaction_commit(dir->i_sb);
- else
+ } else {
+ inode_dec_link_count(inode);
+ iput(inode);
nilfs_transaction_abort(dir->i_sb);
+ }
return err;
}
@@ -243,6 +250,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
nilfs_mark_inode_dirty(inode);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
@@ -255,6 +263,7 @@ out_fail:
drop_nlink(inode);
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
+ unlock_new_inode(inode);
iput(inode);
out_dir:
drop_nlink(dir);
diff --git a/fs/nilfs2/nilfs.h b/fs/nilfs2/nilfs.h
index 9bc72dec3fa6..b02c202223a6 100644
--- a/fs/nilfs2/nilfs.h
+++ b/fs/nilfs2/nilfs.h
@@ -141,7 +141,6 @@ enum {
* @ti_save: Backup of journal_info field of task_struct
* @ti_flags: Flags
* @ti_count: Nest level
- * @ti_garbage: List of inode to be put when releasing semaphore
*/
struct nilfs_transaction_info {
u32 ti_magic;
@@ -150,7 +149,6 @@ struct nilfs_transaction_info {
one of other filesystems has a bug. */
unsigned short ti_flags;
unsigned short ti_count;
- struct list_head ti_garbage;
};
/* ti_magic */
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0ba679866e50..da276640f776 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh);
clear_buffer_nilfs_redirected(bh);
+ clear_buffer_async_write(bh);
clear_buffer_dirty(bh);
if (nilfs_page_buffers_clean(page))
__nilfs_clear_page_dirty(page);
@@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
"discard block %llu, size %zu",
(u64)bh->b_blocknr, bh->b_size);
}
+ clear_buffer_async_write(bh);
clear_buffer_dirty(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh);
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index dc9a913784ab..2d8be51f90dc 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -345,8 +345,7 @@ static void nilfs_end_bio_write(struct bio *bio, int err)
if (err == -EOPNOTSUPP) {
set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
- bio_put(bio);
- /* to be detected by submit_seg_bio() */
+ /* to be detected by nilfs_segbuf_submit_bio() */
}
if (!uptodate)
@@ -377,12 +376,12 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf,
bio->bi_private = segbuf;
bio_get(bio);
submit_bio(mode, bio);
+ segbuf->sb_nbio++;
if (bio_flagged(bio, BIO_EOPNOTSUPP)) {
bio_put(bio);
err = -EOPNOTSUPP;
goto failed;
}
- segbuf->sb_nbio++;
bio_put(bio);
wi->bio = NULL;
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index a5752a589932..99294a286e66 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -305,7 +305,6 @@ static void nilfs_transaction_lock(struct super_block *sb,
ti->ti_count = 0;
ti->ti_save = cur_ti;
ti->ti_magic = NILFS_TI_MAGIC;
- INIT_LIST_HEAD(&ti->ti_garbage);
current->journal_info = ti;
for (;;) {
@@ -332,8 +331,6 @@ static void nilfs_transaction_unlock(struct super_block *sb)
up_write(&nilfs->ns_segctor_sem);
current->journal_info = ti->ti_save;
- if (!list_empty(&ti->ti_garbage))
- nilfs_dispose_list(nilfs, &ti->ti_garbage, 0);
}
static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci,
@@ -665,7 +662,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
bh = head = page_buffers(page);
do {
- if (!buffer_dirty(bh))
+ if (!buffer_dirty(bh) || buffer_async_write(bh))
continue;
get_bh(bh);
list_add_tail(&bh->b_assoc_buffers, listp);
@@ -699,7 +696,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
for (i = 0; i < pagevec_count(&pvec); i++) {
bh = head = page_buffers(pvec.pages[i]);
do {
- if (buffer_dirty(bh)) {
+ if (buffer_dirty(bh) &&
+ !buffer_async_write(bh)) {
get_bh(bh);
list_add_tail(&bh->b_assoc_buffers,
listp);
@@ -745,6 +743,15 @@ static void nilfs_dispose_list(struct the_nilfs *nilfs,
}
}
+static void nilfs_iput_work_func(struct work_struct *work)
+{
+ struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info,
+ sc_iput_work);
+ struct the_nilfs *nilfs = sci->sc_super->s_fs_info;
+
+ nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0);
+}
+
static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs,
struct nilfs_root *root)
{
@@ -1439,17 +1446,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nilfs_clear_logs(&sci->sc_segbufs);
- err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
- if (unlikely(err))
- return err;
-
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
sci->sc_freesegs,
sci->sc_nfreesegs,
NULL);
WARN_ON(err); /* do not happen */
+ sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
}
+
+ err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+ if (unlikely(err))
+ return err;
+
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
@@ -1579,6 +1588,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ set_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page) {
lock_page(bd_page);
@@ -1592,6 +1602,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
+ set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
lock_page(bd_page);
@@ -1677,6 +1688,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ clear_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1686,6 +1698,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
+ clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
@@ -1755,6 +1768,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
+ clear_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1776,6 +1790,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
+ clear_buffer_async_write(bh);
clear_buffer_delay(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_redirected(bh);
@@ -1890,8 +1905,9 @@ static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci,
static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
struct the_nilfs *nilfs)
{
- struct nilfs_transaction_info *ti = current->journal_info;
struct nilfs_inode_info *ii, *n;
+ int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE);
+ int defer_iput = false;
spin_lock(&nilfs->ns_inode_lock);
list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) {
@@ -1902,9 +1918,24 @@ static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci,
clear_bit(NILFS_I_BUSY, &ii->i_state);
brelse(ii->i_bh);
ii->i_bh = NULL;
- list_move_tail(&ii->i_dirty, &ti->ti_garbage);
+ list_del_init(&ii->i_dirty);
+ if (!ii->vfs_inode.i_nlink || during_mount) {
+ /*
+ * Defer calling iput() to avoid deadlocks if
+ * i_nlink == 0 or mount is not yet finished.
+ */
+ list_add_tail(&ii->i_dirty, &sci->sc_iput_queue);
+ defer_iput = true;
+ } else {
+ spin_unlock(&nilfs->ns_inode_lock);
+ iput(&ii->vfs_inode);
+ spin_lock(&nilfs->ns_inode_lock);
+ }
}
spin_unlock(&nilfs->ns_inode_lock);
+
+ if (defer_iput)
+ schedule_work(&sci->sc_iput_work);
}
/*
@@ -2571,6 +2602,8 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb,
INIT_LIST_HEAD(&sci->sc_segbufs);
INIT_LIST_HEAD(&sci->sc_write_logs);
INIT_LIST_HEAD(&sci->sc_gc_inodes);
+ INIT_LIST_HEAD(&sci->sc_iput_queue);
+ INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func);
init_timer(&sci->sc_timer);
sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT;
@@ -2597,6 +2630,8 @@ static void nilfs_segctor_write_out(struct nilfs_sc_info *sci)
ret = nilfs_segctor_construct(sci, SC_LSEG_SR);
nilfs_transaction_unlock(sci->sc_super);
+ flush_work(&sci->sc_iput_work);
+
} while (ret && retrycount-- > 0);
}
@@ -2621,6 +2656,9 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
|| sci->sc_seq_request != sci->sc_seq_done);
spin_unlock(&sci->sc_state_lock);
+ if (flush_work(&sci->sc_iput_work))
+ flag = true;
+
if (flag || !nilfs_segctor_confirm(sci))
nilfs_segctor_write_out(sci);
@@ -2630,6 +2668,12 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1);
}
+ if (!list_empty(&sci->sc_iput_queue)) {
+ nilfs_warning(sci->sc_super, __func__,
+ "iput queue is not empty\n");
+ nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1);
+ }
+
WARN_ON(!list_empty(&sci->sc_segbufs));
WARN_ON(!list_empty(&sci->sc_write_logs));
diff --git a/fs/nilfs2/segment.h b/fs/nilfs2/segment.h
index 38a1d0013314..a48d6de1e02c 100644
--- a/fs/nilfs2/segment.h
+++ b/fs/nilfs2/segment.h
@@ -26,6 +26,7 @@
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
+#include <linux/workqueue.h>
#include <linux/nilfs2_fs.h>
#include "nilfs.h"
@@ -92,6 +93,8 @@ struct nilfs_segsum_pointer {
* @sc_nblk_inc: Block count of current generation
* @sc_dirty_files: List of files to be written
* @sc_gc_inodes: List of GC inodes having blocks to be written
+ * @sc_iput_queue: list of inodes for which iput should be done
+ * @sc_iput_work: work struct to defer iput call
* @sc_freesegs: array of segment numbers to be freed
* @sc_nfreesegs: number of segments on @sc_freesegs
* @sc_dsync_inode: inode whose data pages are written for a sync operation
@@ -135,6 +138,8 @@ struct nilfs_sc_info {
struct list_head sc_dirty_files;
struct list_head sc_gc_inodes;
+ struct list_head sc_iput_queue;
+ struct work_struct sc_iput_work;
__u64 *sc_freesegs;
size_t sc_nfreesegs;
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 6c80083a984f..9be6b4163406 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -69,7 +69,7 @@ static int create_fd(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- client_fd = get_unused_fd();
+ client_fd = get_unused_fd_flags(group->fanotify_data.f_flags);
if (client_fd < 0)
return client_fd;
@@ -122,6 +122,7 @@ static int fill_event_metadata(struct fsnotify_group *group,
metadata->event_len = FAN_EVENT_METADATA_LEN;
metadata->metadata_len = FAN_EVENT_METADATA_LEN;
metadata->vers = FANOTIFY_METADATA_VERSION;
+ metadata->reserved = 0;
metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
metadata->pid = pid_vnr(event->tgid);
if (unlikely(event->mask & FAN_Q_OVERFLOW))
@@ -866,9 +867,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
{
return sys_fanotify_mark(fanotify_fd, flags,
#ifdef __BIG_ENDIAN
- ((__u64)mask1 << 32) | mask0,
-#else
((__u64)mask0 << 32) | mask1,
+#else
+ ((__u64)mask1 << 32) | mask0,
#endif
dfd, pathname);
}
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 238a5930cb3c..9d7e2b9659cb 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
{
struct {
struct file_handle handle;
- u8 pad[64];
+ u8 pad[MAX_HANDLE_SZ];
} f;
int size, ret, i;
@@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
size = f.handle.handle_bytes >> 2;
ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
- if ((ret == 255) || (ret == -ENOSPC)) {
+ if ((ret == FILEID_INVALID) || (ret < 0)) {
WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
return 0;
}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 4bb21d67d9b1..a3153e2d0f1f 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -63,14 +63,14 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
spin_lock(&inode->i_lock);
/* run all of the dentries associated with this inode. Since this is a
* directory, there damn well better only be one item on this list */
- hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
struct dentry *child;
/* run all of the children of the original inode and fix their
* d_flags to indicate parental interest (their parent is the
* original inode) */
spin_lock(&alias->d_lock);
- list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+ list_for_each_entry(child, &alias->d_subdirs, d_child) {
if (!child->d_inode)
continue;
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 74825be65b7b..fbb9dfb7b1d2 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -288,20 +288,25 @@ void fsnotify_unmount_inodes(struct list_head *list)
spin_unlock(&inode->i_lock);
/* In case the dropping of a reference would nuke next_i. */
- if ((&next_i->i_sb_list != list) &&
- atomic_read(&next_i->i_count)) {
+ while (&next_i->i_sb_list != list) {
spin_lock(&next_i->i_lock);
- if (!(next_i->i_state & (I_FREEING | I_WILL_FREE))) {
+ if (!(next_i->i_state & (I_FREEING | I_WILL_FREE)) &&
+ atomic_read(&next_i->i_count)) {
__iget(next_i);
need_iput = next_i;
+ spin_unlock(&next_i->i_lock);
+ break;
}
spin_unlock(&next_i->i_lock);
+ next_i = list_entry(next_i->i_sb_list.next,
+ struct inode, i_sb_list);
}
/*
- * We can safely drop inode_sb_list_lock here because we hold
- * references on both inode and next_i. Also no new inodes
- * will be added since the umount has begun.
+ * We can safely drop inode_sb_list_lock here because either
+ * we actually hold references on both inode and next_i or
+ * end of list. Also no new inodes will be added since the
+ * umount has begun.
*/
spin_unlock(&inode_sb_list_lock);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 20dfec72e903..f998c6009ad4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -917,7 +917,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
}
}
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
{
int i;
@@ -938,7 +938,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
page_cache_release(wc->w_target_page);
}
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+ ocfs2_unlock_pages(wc);
brelse(wc->w_di_bh);
kfree(wc);
}
@@ -2060,11 +2064,19 @@ out_write_size:
di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
ocfs2_journal_dirty(handle, wc->w_di_bh);
+ /* unlock pages before dealloc since it needs acquiring j_trans_barrier
+ * lock, or it will cause a deadlock since journal commit threads holds
+ * this lock and will ask for the page lock when flushing the data.
+ * put it here to preserve the unlock order.
+ */
+ ocfs2_unlock_pages(wc);
+
ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
- ocfs2_free_write_ctxt(wc);
+ brelse(wc->w_di_bh);
+ kfree(wc);
return copied;
}
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 5d18ad10c27f..4f66e007dae1 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
* information for this bh as it's not marked locally
* uptodate. */
ret = -EIO;
- put_bh(bh);
mlog_errno(ret);
}
@@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
if (!buffer_uptodate(bh)) {
ret = -EIO;
- put_bh(bh);
mlog_errno(ret);
}
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index ef999729e274..ce37013b4a59 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -172,7 +172,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
struct dentry *dentry;
spin_lock(&inode->i_lock);
- hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+ hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
spin_lock(&dentry->d_lock);
if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
trace_ocfs2_find_local_alias(dentry->d_name.len,
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 33ecbe0e6734..2b941113e423 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -653,12 +653,9 @@ void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm,
clear_bit(bit, res->refmap);
}
-
-void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
+static void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
- assert_spin_locked(&res->spinlock);
-
res->inflight_locks++;
mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name,
@@ -666,6 +663,13 @@ void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
__builtin_return_address(0));
}
+void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ assert_spin_locked(&res->spinlock);
+ __dlm_lockres_grab_inflight_ref(dlm, res);
+}
+
void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
@@ -855,10 +859,8 @@ lookup:
/* finally add the lockres to its hash bucket */
__dlm_insert_lockres(dlm, res);
- /* Grab inflight ref to pin the resource */
- spin_lock(&res->spinlock);
- dlm_lockres_grab_inflight_ref(dlm, res);
- spin_unlock(&res->spinlock);
+ /* since this lockres is new it doesn't not require the spinlock */
+ __dlm_lockres_grab_inflight_ref(dlm, res);
/* get an extra ref on the mle in case this is a BLOCK
* if so, the creator of the BLOCK may try to put the last
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index e68588e6b1e8..9bd981cd3142 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -540,7 +540,10 @@ master_here:
/* success! see if any other nodes need recovery */
mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
dlm->name, dlm->reco.dead_node, dlm->node_num);
- dlm_reset_recovery(dlm);
+ spin_lock(&dlm->spinlock);
+ __dlm_reset_recovery(dlm);
+ dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+ spin_unlock(&dlm->spinlock);
}
dlm_end_recovery(dlm);
@@ -698,6 +701,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
if (all_nodes_done) {
int ret;
+ /* Set this flag on recovery master to avoid
+ * a new recovery for another dead node start
+ * before the recovery is not done. That may
+ * cause recovery hung.*/
+ spin_lock(&dlm->spinlock);
+ dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
+ spin_unlock(&dlm->spinlock);
+
/* all nodes are now in DLM_RECO_NODE_DATA_DONE state
* just send a finalize message to everyone and
* clean up */
@@ -1751,13 +1762,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
struct dlm_migratable_lockres *mres)
{
struct dlm_migratable_lock *ml;
- struct list_head *queue;
+ struct list_head *queue, *iter;
struct list_head *tmpq = NULL;
struct dlm_lock *newlock = NULL;
struct dlm_lockstatus *lksb = NULL;
int ret = 0;
int i, j, bad;
- struct dlm_lock *lock = NULL;
+ struct dlm_lock *lock;
u8 from = O2NM_MAX_NODES;
unsigned int added = 0;
__be64 c;
@@ -1792,14 +1803,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
/* MIGRATION ONLY! */
BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
+ lock = NULL;
spin_lock(&res->spinlock);
for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
tmpq = dlm_list_idx_to_ptr(res, j);
- list_for_each_entry(lock, tmpq, list) {
- if (lock->ml.cookie != ml->cookie)
- lock = NULL;
- else
+ list_for_each(iter, tmpq) {
+ lock = list_entry(iter,
+ struct dlm_lock, list);
+ if (lock->ml.cookie == ml->cookie)
break;
+ lock = NULL;
}
if (lock)
break;
@@ -2867,8 +2880,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
BUG();
}
dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+ __dlm_reset_recovery(dlm);
spin_unlock(&dlm->spinlock);
- dlm_reset_recovery(dlm);
dlm_kick_recovery_thread(dlm);
break;
default:
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2487116d0d33..846064726682 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
cpos = map_start >> osb->s_clustersize_bits;
mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
map_start + map_len);
- mapping_end -= cpos;
is_last = 0;
while (cpos < mapping_end && !is_last) {
u32 fe_flags;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ff54014a24ec..d0e8c0b1767f 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2372,10 +2372,14 @@ out_dio:
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
+ if (unlikely(written <= 0))
+ goto no_sync;
+
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawrite_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
if (ret < 0)
written = ret;
@@ -2388,10 +2392,12 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping,
+ iocb->ki_pos - written,
+ iocb->ki_pos - 1);
}
+no_sync:
/*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
* function pointer which is called when o_direct io completes so that
@@ -2453,12 +2459,14 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
struct splice_desc sd = {
- .total_len = len,
.flags = flags,
- .pos = *ppos,
.u.file = out,
};
-
+ ret = generic_write_checks(out, ppos, &len, 0);
+ if(ret)
+ return ret;
+ sd.total_len = len;
+ sd.pos = *ppos;
trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry,
(unsigned long long)OCFS2_I(inode)->ip_blkno,
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 332a281f217e..e49b4f1cb26b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot)
*/
if (status < 0)
mlog_errno(status);
+ /*
+ * Clear dq_off so that we search for the structure in quota file next
+ * time we acquire it. The structure might be deleted and reallocated
+ * elsewhere by another node while our dquot structure is on freelist.
+ */
+ dquot->dq_off = 0;
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_trans:
ocfs2_commit_trans(osb, handle);
@@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
status = ocfs2_lock_global_qf(info, 1);
if (status < 0)
goto out;
- if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
- status = ocfs2_qinfo_lock(info, 0);
- if (status < 0)
- goto out_dq;
- status = qtree_read_dquot(&info->dqi_gi, dquot);
- ocfs2_qinfo_unlock(info, 0);
- if (status < 0)
- goto out_dq;
- }
- set_bit(DQ_READ_B, &dquot->dq_flags);
+ status = ocfs2_qinfo_lock(info, 0);
+ if (status < 0)
+ goto out_dq;
+ /*
+ * We always want to read dquot structure from disk because we don't
+ * know what happened with it while it was on freelist.
+ */
+ status = qtree_read_dquot(&info->dqi_gi, dquot);
+ ocfs2_qinfo_unlock(info, 0);
+ if (status < 0)
+ goto out_dq;
OCFS2_DQUOT(dquot)->dq_use_count++;
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 27fe7ee4874c..d0f323da0b5c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
out:
- /* Clear the read bit so that next time someone uses this
- * dquot he reads fresh info from disk and allocates local
- * dquot structure */
- clear_bit(DQ_READ_B, &dquot->dq_flags);
return status;
}
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 2e3ea308c144..5b8d94436105 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -6499,6 +6499,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
}
new_oi = OCFS2_I(args->new_inode);
+ /*
+ * Adjust extent record count to reserve space for extended attribute.
+ * Inline data count had been adjusted in ocfs2_duplicate_inline_data().
+ */
+ if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) &&
+ !(ocfs2_inode_is_fast_symlink(args->new_inode))) {
+ struct ocfs2_extent_list *el = &new_di->id2.i_list;
+ le16_add_cpu(&el->l_count, -(inline_size /
+ sizeof(struct ocfs2_extent_rec)));
+ }
spin_lock(&new_oi->ip_lock);
new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
diff --git a/fs/open.c b/fs/open.c
index 8c741002f947..86092bde31f4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -628,23 +628,12 @@ out:
static inline int __get_file_write_access(struct inode *inode,
struct vfsmount *mnt)
{
- int error;
- error = get_write_access(inode);
+ int error = get_write_access(inode);
if (error)
return error;
- /*
- * Do not take mount writer counts on
- * special files since no writes to
- * the mount itself will occur.
- */
- if (!special_file(inode->i_mode)) {
- /*
- * Balanced in __fput()
- */
- error = __mnt_want_write(mnt);
- if (error)
- put_write_access(inode);
- }
+ error = __mnt_want_write(mnt);
+ if (error)
+ put_write_access(inode);
return error;
}
@@ -677,12 +666,11 @@ static int do_dentry_open(struct file *f,
path_get(&f->f_path);
inode = f->f_inode = f->f_path.dentry->d_inode;
- if (f->f_mode & FMODE_WRITE) {
+ if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = __get_file_write_access(inode, f->f_path.mnt);
if (error)
goto cleanup_file;
- if (!special_file(inode->i_mode))
- file_take_write(f);
+ file_take_write(f);
}
f->f_mapping = inode->i_mapping;
@@ -723,7 +711,6 @@ cleanup_all:
fops_put(f->f_op);
file_sb_list_del(f);
if (f->f_mode & FMODE_WRITE) {
- put_write_access(inode);
if (!special_file(inode->i_mode)) {
/*
* We don't consider this a real
@@ -731,6 +718,7 @@ cleanup_all:
* because it all happenend right
* here, so just reset the state.
*/
+ put_write_access(inode);
file_reset_write(f);
__mnt_drop_write(f->f_path.mnt);
}
diff --git a/fs/pipe.c b/fs/pipe.c
index d2c45e14e6d8..0e0752ef2715 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait)
return mask;
}
+static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
+{
+ int kill = 0;
+
+ spin_lock(&inode->i_lock);
+ if (!--pipe->files) {
+ inode->i_pipe = NULL;
+ kill = 1;
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (kill)
+ free_pipe_info(pipe);
+}
+
static int
pipe_release(struct inode *inode, struct file *file)
{
- struct pipe_inode_info *pipe = inode->i_pipe;
- int kill = 0;
+ struct pipe_inode_info *pipe = file->private_data;
__pipe_lock(pipe);
if (file->f_mode & FMODE_READ)
@@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- if (kill)
- free_pipe_info(pipe);
-
+ put_pipe_info(inode, pipe);
return 0;
}
@@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp)
{
struct pipe_inode_info *pipe;
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
- int kill = 0;
int ret;
filp->f_version = 0;
@@ -1130,15 +1135,9 @@ err_wr:
goto err;
err:
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- if (kill)
- free_pipe_info(pipe);
+
+ put_pipe_info(inode, pipe);
return ret;
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 8bd2135b7f82..3542f1f814e2 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -158,6 +158,12 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
umode_t mode = 0;
int not_equiv = 0;
+ /*
+ * A null ACL can always be presented as mode bits.
+ */
+ if (!acl)
+ return 0;
+
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch (pa->e_tag) {
case ACL_USER_OBJ:
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index ab30716584f5..239493ec718e 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -27,6 +27,5 @@ proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o
proc-$(CONFIG_NET) += proc_net.o
proc-$(CONFIG_PROC_KCORE) += kcore.o
proc-$(CONFIG_PROC_VMCORE) += vmcore.o
-proc-$(CONFIG_PROC_DEVICETREE) += proc_devtree.o
proc-$(CONFIG_PRINTK) += kmsg.o
proc-$(CONFIG_PROC_PAGE_MONITOR) += page.o
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cbd0f1b324b9..09f0d9c374a3 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -304,15 +304,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
seq_puts(m, header);
CAP_FOR_EACH_U32(__capi) {
seq_printf(m, "%08x",
- a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
+ a->cap[CAP_LAST_U32 - __capi]);
}
seq_putc(m, '\n');
}
-/* Remove non-existent capabilities */
-#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
- CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
-
static inline void task_cap(struct seq_file *m, struct task_struct *p)
{
const struct cred *cred;
@@ -326,11 +322,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
cap_bset = cred->cap_bset;
rcu_read_unlock();
- NORM_CAPS(cap_inheritable);
- NORM_CAPS(cap_permitted);
- NORM_CAPS(cap_effective);
- NORM_CAPS(cap_bset);
-
render_cap_t(m, "CapInh:\t", &cap_inheritable);
render_cap_t(m, "CapPrm:\t", &cap_permitted);
render_cap_t(m, "CapEff:\t", &cap_effective);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c3834dad09b3..8fc784aef0b8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1825,6 +1825,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
+ rc = -ENOENT;
down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
@@ -2611,6 +2612,57 @@ static const struct file_operations proc_projid_map_operations = {
.llseek = seq_lseek,
.release = proc_id_map_release,
};
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+ struct user_namespace *ns = NULL;
+ struct task_struct *task;
+ int ret;
+
+ ret = -ESRCH;
+ task = get_proc_task(inode);
+ if (task) {
+ rcu_read_lock();
+ ns = get_user_ns(task_cred_xxx(task, user_ns));
+ rcu_read_unlock();
+ put_task_struct(task);
+ }
+ if (!ns)
+ goto err;
+
+ if (file->f_mode & FMODE_WRITE) {
+ ret = -EACCES;
+ if (!ns_capable(ns, CAP_SYS_ADMIN))
+ goto err_put_ns;
+ }
+
+ ret = single_open(file, &proc_setgroups_show, ns);
+ if (ret)
+ goto err_put_ns;
+
+ return 0;
+err_put_ns:
+ put_user_ns(ns);
+err:
+ return ret;
+}
+
+static int proc_setgroups_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct user_namespace *ns = seq->private;
+ int ret = single_release(inode, file);
+ put_user_ns(ns);
+ return ret;
+}
+
+static const struct file_operations proc_setgroups_operations = {
+ .open = proc_setgroups_open,
+ .write = proc_setgroups_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = proc_setgroups_release,
+};
#endif /* CONFIG_USER_NS */
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2719,6 +2771,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
@@ -3072,6 +3125,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
};
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a2596afffae6..846b1d7852ed 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -19,7 +19,6 @@
#include <linux/mount.h>
#include <linux/init.h>
#include <linux/idr.h>
-#include <linux/namei.h>
#include <linux/bitops.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
@@ -163,17 +162,6 @@ void proc_free_inum(unsigned int inum)
spin_unlock_irqrestore(&proc_inum_lock, flags);
}
-static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
- nd_set_link(nd, __PDE_DATA(dentry->d_inode));
- return NULL;
-}
-
-static const struct inode_operations proc_link_inode_operations = {
- .readlink = generic_readlink,
- .follow_link = proc_follow_link,
-};
-
/*
* As some entries in /proc are volatile, we want to
* get rid of unused dentries. This could be made
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 073aea60cf8f..843b8ef04e84 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -23,6 +23,7 @@
#include <linux/slab.h>
#include <linux/mount.h>
#include <linux/magic.h>
+#include <linux/namei.h>
#include <asm/uaccess.h>
@@ -373,6 +374,26 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
};
#endif
+static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+ struct proc_dir_entry *pde = PDE(dentry->d_inode);
+ if (unlikely(!use_pde(pde)))
+ return ERR_PTR(-EINVAL);
+ nd_set_link(nd, pde->data);
+ return pde;
+}
+
+static void proc_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+{
+ unuse_pde(p);
+}
+
+const struct inode_operations proc_link_inode_operations = {
+ .readlink = generic_readlink,
+ .follow_link = proc_follow_link,
+ .put_link = proc_put_link,
+};
+
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
struct inode *inode = new_inode_pseudo(sb);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index d600fb098b6a..e833df48eeb4 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -202,6 +202,7 @@ struct pde_opener {
int closing;
struct completion *c;
};
+extern const struct inode_operations proc_link_inode_operations;
extern const struct inode_operations proc_pid_link_inode_operations;
@@ -211,13 +212,6 @@ extern int proc_fill_super(struct super_block *);
extern void proc_entry_rundown(struct proc_dir_entry *);
/*
- * proc_devtree.c
- */
-#ifdef CONFIG_PROC_DEVICETREE
-extern void proc_device_tree_init(void);
-#endif
-
-/*
* proc_namespaces.c
*/
extern const struct inode_operations proc_ns_dir_inode_operations;
diff --git a/fs/proc/page.c b/fs/proc/page.c
index b8730d9ebaee..2a8cc94bb641 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -121,7 +121,7 @@ u64 stable_page_flags(struct page *page)
* just checks PG_head/PG_tail, so we need to check PageLRU to make
* sure a given page is a thp, not a non-huge compound page.
*/
- else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
+ else if (PageTransCompound(page) && PageLRU(compound_head(page)))
u |= 1 << KPF_THP;
/*
diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c
deleted file mode 100644
index 106a83570630..000000000000
--- a/fs/proc/proc_devtree.c
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * proc_devtree.c - handles /proc/device-tree
- *
- * Copyright 1997 Paul Mackerras
- */
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/time.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/printk.h>
-#include <linux/stat.h>
-#include <linux/string.h>
-#include <linux/of.h>
-#include <linux/export.h>
-#include <linux/slab.h>
-#include <asm/prom.h>
-#include <asm/uaccess.h>
-#include "internal.h"
-
-static inline void set_node_proc_entry(struct device_node *np,
- struct proc_dir_entry *de)
-{
-#ifdef HAVE_ARCH_DEVTREE_FIXUPS
- np->pde = de;
-#endif
-}
-
-static struct proc_dir_entry *proc_device_tree;
-
-/*
- * Supply data on a read from /proc/device-tree/node/property.
- */
-static int property_proc_show(struct seq_file *m, void *v)
-{
- struct property *pp = m->private;
-
- seq_write(m, pp->value, pp->length);
- return 0;
-}
-
-static int property_proc_open(struct inode *inode, struct file *file)
-{
- return single_open(file, property_proc_show, __PDE_DATA(inode));
-}
-
-static const struct file_operations property_proc_fops = {
- .owner = THIS_MODULE,
- .open = property_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-/*
- * For a node with a name like "gc@10", we make symlinks called "gc"
- * and "@10" to it.
- */
-
-/*
- * Add a property to a node
- */
-static struct proc_dir_entry *
-__proc_device_tree_add_prop(struct proc_dir_entry *de, struct property *pp,
- const char *name)
-{
- struct proc_dir_entry *ent;
-
- /*
- * Unfortunately proc_register puts each new entry
- * at the beginning of the list. So we rearrange them.
- */
- ent = proc_create_data(name,
- strncmp(name, "security-", 9) ? S_IRUGO : S_IRUSR,
- de, &property_proc_fops, pp);
- if (ent == NULL)
- return NULL;
-
- if (!strncmp(name, "security-", 9))
- ent->size = 0; /* don't leak number of password chars */
- else
- ent->size = pp->length;
-
- return ent;
-}
-
-
-void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop)
-{
- __proc_device_tree_add_prop(pde, prop, prop->name);
-}
-
-void proc_device_tree_remove_prop(struct proc_dir_entry *pde,
- struct property *prop)
-{
- remove_proc_entry(prop->name, pde);
-}
-
-void proc_device_tree_update_prop(struct proc_dir_entry *pde,
- struct property *newprop,
- struct property *oldprop)
-{
- struct proc_dir_entry *ent;
-
- if (!oldprop) {
- proc_device_tree_add_prop(pde, newprop);
- return;
- }
-
- for (ent = pde->subdir; ent != NULL; ent = ent->next)
- if (ent->data == oldprop)
- break;
- if (ent == NULL) {
- pr_warn("device-tree: property \"%s\" does not exist\n",
- oldprop->name);
- } else {
- ent->data = newprop;
- ent->size = newprop->length;
- }
-}
-
-/*
- * Various dodgy firmware might give us nodes and/or properties with
- * conflicting names. That's generally ok, except for exporting via /proc,
- * so munge names here to ensure they're unique.
- */
-
-static int duplicate_name(struct proc_dir_entry *de, const char *name)
-{
- struct proc_dir_entry *ent;
- int found = 0;
-
- spin_lock(&proc_subdir_lock);
-
- for (ent = de->subdir; ent != NULL; ent = ent->next) {
- if (strcmp(ent->name, name) == 0) {
- found = 1;
- break;
- }
- }
-
- spin_unlock(&proc_subdir_lock);
-
- return found;
-}
-
-static const char *fixup_name(struct device_node *np, struct proc_dir_entry *de,
- const char *name)
-{
- char *fixed_name;
- int fixup_len = strlen(name) + 2 + 1; /* name + #x + \0 */
- int i = 1, size;
-
-realloc:
- fixed_name = kmalloc(fixup_len, GFP_KERNEL);
- if (fixed_name == NULL) {
- pr_err("device-tree: Out of memory trying to fixup "
- "name \"%s\"\n", name);
- return name;
- }
-
-retry:
- size = snprintf(fixed_name, fixup_len, "%s#%d", name, i);
- size++; /* account for NULL */
-
- if (size > fixup_len) {
- /* We ran out of space, free and reallocate. */
- kfree(fixed_name);
- fixup_len = size;
- goto realloc;
- }
-
- if (duplicate_name(de, fixed_name)) {
- /* Multiple duplicates. Retry with a different offset. */
- i++;
- goto retry;
- }
-
- pr_warn("device-tree: Duplicate name in %s, renamed to \"%s\"\n",
- np->full_name, fixed_name);
-
- return fixed_name;
-}
-
-/*
- * Process a node, adding entries for its children and its properties.
- */
-void proc_device_tree_add_node(struct device_node *np,
- struct proc_dir_entry *de)
-{
- struct property *pp;
- struct proc_dir_entry *ent;
- struct device_node *child;
- const char *p;
-
- set_node_proc_entry(np, de);
- for (child = NULL; (child = of_get_next_child(np, child));) {
- /* Use everything after the last slash, or the full name */
- p = kbasename(child->full_name);
-
- if (duplicate_name(de, p))
- p = fixup_name(np, de, p);
-
- ent = proc_mkdir(p, de);
- if (ent == NULL)
- break;
- proc_device_tree_add_node(child, ent);
- }
- of_node_put(child);
-
- for (pp = np->properties; pp != NULL; pp = pp->next) {
- p = pp->name;
-
- if (strchr(p, '/'))
- continue;
-
- if (duplicate_name(de, p))
- p = fixup_name(np, de, p);
-
- ent = __proc_device_tree_add_prop(de, pp, p);
- if (ent == NULL)
- break;
- }
-}
-
-/*
- * Called on initialization to set up the /proc/device-tree subtree
- */
-void __init proc_device_tree_init(void)
-{
- struct device_node *root;
-
- proc_device_tree = proc_mkdir("device-tree", NULL);
- if (proc_device_tree == NULL)
- return;
- root = of_find_node_by_path("/");
- if (root == NULL) {
- pr_debug("/proc/device-tree: can't find root\n");
- return;
- }
- proc_device_tree_add_node(root, proc_device_tree);
- of_node_put(root);
-}
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 41a6ea93f486..9459710c55ae 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -110,7 +110,8 @@ static struct dentry *proc_mount(struct file_system_type *fs_type,
ns = task_active_pid_ns(current);
options = data;
- if (!current_user_ns()->may_mount_proc)
+ if (!current_user_ns()->may_mount_proc ||
+ !ns_capable(ns->user_ns, CAP_SYS_ADMIN))
return ERR_PTR(-EPERM);
}
@@ -179,9 +180,6 @@ void __init proc_root_init(void)
proc_mkdir("openprom", NULL);
#endif
proc_tty_init();
-#ifdef CONFIG_PROC_DEVICETREE
- proc_device_tree_init();
-#endif
proc_mkdir("bus", NULL);
proc_sys_init();
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3e636d864d56..9f285fb9bab3 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -792,14 +792,14 @@ typedef struct {
} pagemap_entry_t;
struct pagemapread {
- int pos, len;
+ int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
#define PAGEMAP_WALK_MASK (PMD_MASK)
-#define PM_ENTRY_BYTES sizeof(u64)
+#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
@@ -1038,8 +1038,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!count)
goto out_task;
- pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
- pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
+ pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
+ pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
ret = -ENOMEM;
if (!pm.buffer)
goto out_task;
@@ -1110,9 +1110,19 @@ out:
return ret;
}
+static int pagemap_open(struct inode *inode, struct file *file)
+{
+ /* do not disclose physical addresses to unprivileged
+ userspace (closes a rowhammer attack vector) */
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+ return 0;
+}
+
const struct file_operations proc_pagemap_operations = {
.llseek = mem_lseek, /* borrow this */
.read = pagemap_read,
+ .open = pagemap_open,
};
#endif /* CONFIG_PROC_PAGE_MONITOR */
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index e4bcb2cf055a..66c8c2fe86b7 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -178,6 +178,8 @@ static int pstore_unlink(struct inode *dir, struct dentry *dentry)
if (p->psi->erase)
p->psi->erase(p->type, p->id, p->count,
dentry->d_inode->i_ctime, p->psi);
+ else
+ return -EPERM;
return simple_unlink(dir, dentry);
}
@@ -316,10 +318,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
sprintf(name, "dmesg-%s-%lld", psname, id);
break;
case PSTORE_TYPE_CONSOLE:
- sprintf(name, "console-%s", psname);
+ sprintf(name, "console-%s-%lld", psname, id);
break;
case PSTORE_TYPE_FTRACE:
- sprintf(name, "ftrace-%s", psname);
+ sprintf(name, "ftrace-%s-%lld", psname, id);
break;
case PSTORE_TYPE_MCE:
sprintf(name, "mce-%s-%lld", psname, id);
@@ -334,9 +336,8 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
mutex_lock(&root->d_inode->i_mutex);
- rc = -ENOSPC;
dentry = d_alloc_name(root, name);
- if (IS_ERR(dentry))
+ if (!dentry)
goto fail_lockedalloc;
memcpy(private->data, data, size);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 1376e5a8f0d6..d3d37142bd93 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -61,6 +61,11 @@ module_param(mem_size, ulong, 0400);
MODULE_PARM_DESC(mem_size,
"size of reserved RAM used to store oops/panic logs");
+static unsigned int mem_type;
+module_param(mem_type, uint, 0600);
+MODULE_PARM_DESC(mem_type,
+ "set to 1 to try to use unbuffered memory (default 0)");
+
static int dump_oops = 1;
module_param(dump_oops, int, 0600);
MODULE_PARM_DESC(dump_oops,
@@ -79,6 +84,7 @@ struct ramoops_context {
struct persistent_ram_zone *fprz;
phys_addr_t phys_addr;
unsigned long size;
+ unsigned int memtype;
size_t record_size;
size_t console_size;
size_t ftrace_size;
@@ -86,6 +92,7 @@ struct ramoops_context {
struct persistent_ram_ecc_info ecc_info;
unsigned int max_dump_cnt;
unsigned int dump_write_cnt;
+ /* _read_cnt need clear on ramoops_pstore_open */
unsigned int dump_read_cnt;
unsigned int console_read_cnt;
unsigned int ftrace_read_cnt;
@@ -101,6 +108,7 @@ static int ramoops_pstore_open(struct pstore_info *psi)
cxt->dump_read_cnt = 0;
cxt->console_read_cnt = 0;
+ cxt->ftrace_read_cnt = 0;
return 0;
}
@@ -117,13 +125,15 @@ ramoops_get_next_prz(struct persistent_ram_zone *przs[], uint *c, uint max,
return NULL;
prz = przs[i];
+ if (!prz)
+ return NULL;
- if (update) {
- /* Update old/shadowed buffer. */
+ /* Update old/shadowed buffer. */
+ if (update)
persistent_ram_save_old(prz);
- if (!persistent_ram_old_size(prz))
- return NULL;
- }
+
+ if (!persistent_ram_old_size(prz))
+ return NULL;
*typep = type;
*id = i;
@@ -331,7 +341,8 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
size_t sz = cxt->record_size;
cxt->przs[i] = persistent_ram_new(*paddr, sz, 0,
- &cxt->ecc_info);
+ &cxt->ecc_info,
+ cxt->memtype);
if (IS_ERR(cxt->przs[i])) {
err = PTR_ERR(cxt->przs[i]);
dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
@@ -361,7 +372,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
return -ENOMEM;
}
- *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info);
+ *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype);
if (IS_ERR(*prz)) {
int err = PTR_ERR(*prz);
@@ -408,9 +419,9 @@ static int ramoops_probe(struct platform_device *pdev)
if (!is_power_of_2(pdata->ftrace_size))
pdata->ftrace_size = rounddown_pow_of_two(pdata->ftrace_size);
- cxt->dump_read_cnt = 0;
cxt->size = pdata->mem_size;
cxt->phys_addr = pdata->mem_address;
+ cxt->memtype = pdata->mem_type;
cxt->record_size = pdata->record_size;
cxt->console_size = pdata->console_size;
cxt->ftrace_size = pdata->ftrace_size;
@@ -541,6 +552,7 @@ static void ramoops_register_dummy(void)
dummy_data->mem_size = mem_size;
dummy_data->mem_address = mem_address;
+ dummy_data->mem_type = 0;
dummy_data->record_size = record_size;
dummy_data->console_size = ramoops_console_size;
dummy_data->ftrace_size = ramoops_ftrace_size;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 59337326e288..bda61a759b68 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -46,7 +46,7 @@ static inline size_t buffer_start(struct persistent_ram_zone *prz)
}
/* increase and wrap the start pointer, returning the old value */
-static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
+static size_t buffer_start_add_atomic(struct persistent_ram_zone *prz, size_t a)
{
int old;
int new;
@@ -62,7 +62,7 @@ static inline size_t buffer_start_add(struct persistent_ram_zone *prz, size_t a)
}
/* increase the size counter until it hits the max size */
-static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
+static void buffer_size_add_atomic(struct persistent_ram_zone *prz, size_t a)
{
size_t old;
size_t new;
@@ -78,6 +78,53 @@ static inline void buffer_size_add(struct persistent_ram_zone *prz, size_t a)
} while (atomic_cmpxchg(&prz->buffer->size, old, new) != old);
}
+static DEFINE_RAW_SPINLOCK(buffer_lock);
+
+/* increase and wrap the start pointer, returning the old value */
+static size_t buffer_start_add_locked(struct persistent_ram_zone *prz, size_t a)
+{
+ int old;
+ int new;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&buffer_lock, flags);
+
+ old = atomic_read(&prz->buffer->start);
+ new = old + a;
+ while (unlikely(new > prz->buffer_size))
+ new -= prz->buffer_size;
+ atomic_set(&prz->buffer->start, new);
+
+ raw_spin_unlock_irqrestore(&buffer_lock, flags);
+
+ return old;
+}
+
+/* increase the size counter until it hits the max size */
+static void buffer_size_add_locked(struct persistent_ram_zone *prz, size_t a)
+{
+ size_t old;
+ size_t new;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&buffer_lock, flags);
+
+ old = atomic_read(&prz->buffer->size);
+ if (old == prz->buffer_size)
+ goto exit;
+
+ new = old + a;
+ if (new > prz->buffer_size)
+ new = prz->buffer_size;
+ atomic_set(&prz->buffer->size, new);
+
+exit:
+ raw_spin_unlock_irqrestore(&buffer_lock, flags);
+}
+
+static size_t (*buffer_start_add)(struct persistent_ram_zone *, size_t) = buffer_start_add_atomic;
+static void (*buffer_size_add)(struct persistent_ram_zone *, size_t) = buffer_size_add_atomic;
+
static void notrace persistent_ram_encode_rs8(struct persistent_ram_zone *prz,
uint8_t *data, size_t len, uint8_t *ecc)
{
@@ -333,7 +380,8 @@ void persistent_ram_zap(struct persistent_ram_zone *prz)
persistent_ram_update_header_ecc(prz);
}
-static void *persistent_ram_vmap(phys_addr_t start, size_t size)
+static void *persistent_ram_vmap(phys_addr_t start, size_t size,
+ unsigned int memtype)
{
struct page **pages;
phys_addr_t page_start;
@@ -345,7 +393,10 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
page_start = start - offset_in_page(start);
page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE);
- prot = pgprot_noncached(PAGE_KERNEL);
+ if (memtype)
+ prot = pgprot_noncached(PAGE_KERNEL);
+ else
+ prot = pgprot_writecombine(PAGE_KERNEL);
pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
if (!pages) {
@@ -364,27 +415,38 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
return vaddr;
}
-static void *persistent_ram_iomap(phys_addr_t start, size_t size)
+static void *persistent_ram_iomap(phys_addr_t start, size_t size,
+ unsigned int memtype)
{
+ void *va;
+
if (!request_mem_region(start, size, "persistent_ram")) {
pr_err("request mem region (0x%llx@0x%llx) failed\n",
(unsigned long long)size, (unsigned long long)start);
return NULL;
}
- return ioremap(start, size);
+ buffer_start_add = buffer_start_add_locked;
+ buffer_size_add = buffer_size_add_locked;
+
+ if (memtype)
+ va = ioremap(start, size);
+ else
+ va = ioremap_wc(start, size);
+
+ return va;
}
static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
- struct persistent_ram_zone *prz)
+ struct persistent_ram_zone *prz, int memtype)
{
prz->paddr = start;
prz->size = size;
if (pfn_valid(start >> PAGE_SHIFT))
- prz->vaddr = persistent_ram_vmap(start, size);
+ prz->vaddr = persistent_ram_vmap(start, size, memtype);
else
- prz->vaddr = persistent_ram_iomap(start, size);
+ prz->vaddr = persistent_ram_iomap(start, size, memtype);
if (!prz->vaddr) {
pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__,
@@ -452,7 +514,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
}
struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
- u32 sig, struct persistent_ram_ecc_info *ecc_info)
+ u32 sig, struct persistent_ram_ecc_info *ecc_info,
+ unsigned int memtype)
{
struct persistent_ram_zone *prz;
int ret = -ENOMEM;
@@ -463,7 +526,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
goto err;
}
- ret = persistent_ram_buffer_map(start, size, prz);
+ ret = persistent_ram_buffer_map(start, size, prz, memtype);
if (ret)
goto err;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 3e64169ef527..4f7f451ca70d 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block *sb,
dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
- ret = fn(dquot, priv);
- if (ret < 0)
- goto out;
+ /*
+ * ->release_dquot() can be racing with us. Our reference
+ * protects us from new calls to it so just wait for any
+ * outstanding call and recheck the DQ_ACTIVE_B after that.
+ */
+ wait_on_dquot(dquot);
+ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ ret = fn(dquot, priv);
+ if (ret < 0)
+ goto out;
+ }
spin_lock(&dq_list_lock);
/* We are safe to continue now because our dquot could not
* be moved out of the inuse list while we hold the reference */
@@ -629,7 +637,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
dqstats_inc(DQST_LOOKUPS);
err = sb->dq_op->write_dquot(dquot);
if (!ret && err)
- err = ret;
+ ret = err;
dqput(dquot);
spin_lock(&dq_list_lock);
}
@@ -1094,6 +1102,14 @@ static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number)
dquot->dq_dqb.dqb_rsvspace -= number;
}
+static void dquot_reclaim_reserved_space(struct dquot *dquot, qsize_t number)
+{
+ if (WARN_ON_ONCE(dquot->dq_dqb.dqb_curspace < number))
+ number = dquot->dq_dqb.dqb_curspace;
+ dquot->dq_dqb.dqb_rsvspace += number;
+ dquot->dq_dqb.dqb_curspace -= number;
+}
+
static inline
void dquot_free_reserved_space(struct dquot *dquot, qsize_t number)
{
@@ -1528,6 +1544,15 @@ void inode_claim_rsv_space(struct inode *inode, qsize_t number)
}
EXPORT_SYMBOL(inode_claim_rsv_space);
+void inode_reclaim_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) += number;
+ __inode_sub_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
+}
+EXPORT_SYMBOL(inode_reclaim_rsv_space);
+
void inode_sub_rsv_space(struct inode *inode, qsize_t number)
{
spin_lock(&inode->i_lock);
@@ -1702,6 +1727,35 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
EXPORT_SYMBOL(dquot_claim_space_nodirty);
/*
+ * Convert allocated space back to in-memory reserved quotas
+ */
+void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
+{
+ int cnt;
+
+ if (!dquot_active(inode)) {
+ inode_reclaim_rsv_space(inode, number);
+ return;
+ }
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ spin_lock(&dq_data_lock);
+ /* Claim reserved quotas to allocated quotas */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (inode->i_dquot[cnt])
+ dquot_reclaim_reserved_space(inode->i_dquot[cnt],
+ number);
+ }
+ /* Update inode bytes */
+ inode_reclaim_rsv_space(inode, number);
+ spin_unlock(&dq_data_lock);
+ mark_all_dquot_dirty(inode->i_dquot);
+ up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ return;
+}
+EXPORT_SYMBOL(dquot_reclaim_space_nodirty);
+
+/*
* This operation can block, but only after everything is updated
*/
void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
diff --git a/fs/read_write.c b/fs/read_write.c
index 2cefa417be34..f6b7c600eb7f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -947,9 +947,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen)
+ compat_ulong_t, vlen)
{
struct fd f = fdget(fd);
ssize_t ret;
@@ -983,9 +983,9 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
return ret;
}
-COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen, u32, pos_low, u32, pos_high)
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
return compat_sys_preadv64(fd, vec, vlen, pos);
@@ -1013,9 +1013,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
const struct compat_iovec __user *, vec,
- unsigned long, vlen)
+ compat_ulong_t, vlen)
{
struct fd f = fdget(fd);
ssize_t ret;
@@ -1049,9 +1049,9 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
return ret;
}
-COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen, u32, pos_low, u32, pos_high)
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
return compat_sys_pwritev64(fd, vec, vlen, pos);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 6c2d136561cb..2b96b59f75da 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -128,6 +128,7 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
char *d_name;
off_t d_off;
ino_t d_ino;
+ loff_t cur_pos = deh_offset(deh);
if (!de_visible(deh))
/* it is hidden entry */
@@ -200,8 +201,9 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
if (local_buf != small_buf) {
kfree(local_buf);
}
- // next entry should be looked for with such offset
- next_pos = deh_offset(deh) + 1;
+
+ /* deh_offset(deh) may be invalid now. */
+ next_pos = cur_pos + 1;
if (item_moved(&tmp_ih, &path_to_entry)) {
set_cpu_key_k_offset(&pos_key,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f844533792ee..36166443bc45 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3211,8 +3211,14 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_size != i_size_read(inode)) {
error = inode_newsize_ok(inode, attr->ia_size);
if (!error) {
+ /*
+ * Could race against reiserfs_file_release
+ * if called from NFS, so take tailpack mutex.
+ */
+ mutex_lock(&REISERFS_I(inode)->tailpack);
truncate_setsize(inode, attr->ia_size);
- reiserfs_vfs_truncate_file(inode);
+ reiserfs_truncate_file(inode, 1);
+ mutex_unlock(&REISERFS_I(inode)->tailpack);
}
}
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index 33532f79b4f7..1d48974c25dd 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -19,12 +19,13 @@
/*
* LOCKING:
*
- * We rely on new Alexander Viro's super-block locking.
+ * These guys are evicted from procfs as the very first step in ->kill_sb().
*
*/
-static int show_version(struct seq_file *m, struct super_block *sb)
+static int show_version(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
char *format;
if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) {
@@ -66,8 +67,9 @@ static int show_version(struct seq_file *m, struct super_block *sb)
#define DJP( x ) le32_to_cpu( jp -> x )
#define JF( x ) ( r -> s_journal -> x )
-static int show_super(struct seq_file *m, struct super_block *sb)
+static int show_super(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *r = REISERFS_SB(sb);
seq_printf(m, "state: \t%s\n"
@@ -128,8 +130,9 @@ static int show_super(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_per_level(struct seq_file *m, struct super_block *sb)
+static int show_per_level(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *r = REISERFS_SB(sb);
int level;
@@ -186,8 +189,9 @@ static int show_per_level(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_bitmap(struct seq_file *m, struct super_block *sb)
+static int show_bitmap(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *r = REISERFS_SB(sb);
seq_printf(m, "free_block: %lu\n"
@@ -218,8 +222,9 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
+static int show_on_disk_super(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
struct reiserfs_super_block *rs = sb_info->s_rs;
int hash_code = DFL(s_hash_function_code);
@@ -261,8 +266,9 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_oidmap(struct seq_file *m, struct super_block *sb)
+static int show_oidmap(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *sb_info = REISERFS_SB(sb);
struct reiserfs_super_block *rs = sb_info->s_rs;
unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize);
@@ -291,8 +297,9 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb)
return 0;
}
-static int show_journal(struct seq_file *m, struct super_block *sb)
+static int show_journal(struct seq_file *m, void *unused)
{
+ struct super_block *sb = m->private;
struct reiserfs_sb_info *r = REISERFS_SB(sb);
struct reiserfs_super_block *rs = r->s_rs;
struct journal_params *jp = &rs->s_v1.s_journal;
@@ -383,92 +390,24 @@ static int show_journal(struct seq_file *m, struct super_block *sb)
return 0;
}
-/* iterator */
-static int test_sb(struct super_block *sb, void *data)
-{
- return data == sb;
-}
-
-static int set_sb(struct super_block *sb, void *data)
-{
- return -ENOENT;
-}
-
-struct reiserfs_seq_private {
- struct super_block *sb;
- int (*show) (struct seq_file *, struct super_block *);
-};
-
-static void *r_start(struct seq_file *m, loff_t * pos)
-{
- struct reiserfs_seq_private *priv = m->private;
- loff_t l = *pos;
-
- if (l)
- return NULL;
-
- if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb)))
- return NULL;
-
- up_write(&priv->sb->s_umount);
- return priv->sb;
-}
-
-static void *r_next(struct seq_file *m, void *v, loff_t * pos)
-{
- ++*pos;
- if (v)
- deactivate_super(v);
- return NULL;
-}
-
-static void r_stop(struct seq_file *m, void *v)
-{
- if (v)
- deactivate_super(v);
-}
-
-static int r_show(struct seq_file *m, void *v)
-{
- struct reiserfs_seq_private *priv = m->private;
- return priv->show(m, v);
-}
-
-static const struct seq_operations r_ops = {
- .start = r_start,
- .next = r_next,
- .stop = r_stop,
- .show = r_show,
-};
-
static int r_open(struct inode *inode, struct file *file)
{
- struct reiserfs_seq_private *priv;
- int ret = seq_open_private(file, &r_ops,
- sizeof(struct reiserfs_seq_private));
-
- if (!ret) {
- struct seq_file *m = file->private_data;
- priv = m->private;
- priv->sb = proc_get_parent_data(inode);
- priv->show = PDE_DATA(inode);
- }
- return ret;
+ return single_open(file, PDE_DATA(inode),
+ proc_get_parent_data(inode));
}
static const struct file_operations r_file_operations = {
.open = r_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = seq_release_private,
- .owner = THIS_MODULE,
+ .release = single_release,
};
static struct proc_dir_entry *proc_info_root = NULL;
static const char proc_info_root_name[] = "fs/reiserfs";
static void add_file(struct super_block *sb, char *name,
- int (*func) (struct seq_file *, struct super_block *))
+ int (*func) (struct seq_file *, void *))
{
proc_create_data(name, 0, REISERFS_SB(sb)->procdir,
&r_file_operations, func);
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index 157e474ab303..635a1425d370 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -1954,8 +1954,6 @@ struct treepath var = {.path_length = ILLEGAL_PATH_ELEMENT_OFFSET, .reada = 0,}
#define MAX_US_INT 0xffff
// reiserfs version 2 has max offset 60 bits. Version 1 - 32 bit offset
-#define U32_MAX (~(__u32)0)
-
static inline loff_t max_reiserfs_offset(struct inode *inode)
{
if (get_inode_item_key_version(inode) == KEY_FORMAT_3_5)
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f8a23c3078f8..e2e202a07b31 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -499,6 +499,7 @@ int remove_save_link(struct inode *inode, int truncate)
static void reiserfs_kill_sb(struct super_block *s)
{
if (REISERFS_SB(s)) {
+ reiserfs_proc_info_done(s);
/*
* Force any pending inode evictions to occur now. Any
* inodes to be removed that have extended attributes
@@ -554,8 +555,6 @@ static void reiserfs_put_super(struct super_block *s)
REISERFS_SB(s)->reserved_blocks);
}
- reiserfs_proc_info_done(s);
-
reiserfs_write_unlock(s);
mutex_destroy(&REISERFS_SB(s)->lock);
kfree(s->s_fs_info);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 774c1eb7f1c9..3dd44db1465e 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
m->read_pos = offset;
retval = file->f_pos = offset;
}
+ } else {
+ file->f_pos = offset;
}
}
file->f_version = m->version;
diff --git a/fs/splice.c b/fs/splice.c
index d37431dd60a1..f183f1342c01 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -555,6 +555,24 @@ static const struct pipe_buf_operations default_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+/* Pipe buffer operations for a socket and similar. */
+const struct pipe_buf_operations nosteal_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = generic_pipe_buf_nosteal,
+ .get = generic_pipe_buf_get,
+};
+EXPORT_SYMBOL(nosteal_pipe_buf_ops);
+
static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
unsigned long vlen, loff_t offset)
{
@@ -994,13 +1012,17 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
struct address_space *mapping = out->f_mapping;
struct inode *inode = mapping->host;
struct splice_desc sd = {
- .total_len = len,
.flags = flags,
- .pos = *ppos,
.u.file = out,
};
ssize_t ret;
+ ret = generic_write_checks(out, ppos, &len, S_ISBLK(inode->i_mode));
+ if (ret)
+ return ret;
+ sd.total_len = len;
+ sd.pos = *ppos;
+
pipe_lock(pipe);
splice_from_pipe_begin(&sd);
diff --git a/fs/stat.c b/fs/stat.c
index 04ce1ac20d20..d0ea7ef75e26 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -447,9 +447,8 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
EXPORT_SYMBOL(inode_add_bytes);
-void inode_sub_bytes(struct inode *inode, loff_t bytes)
+void __inode_sub_bytes(struct inode *inode, loff_t bytes)
{
- spin_lock(&inode->i_lock);
inode->i_blocks -= bytes >> 9;
bytes &= 511;
if (inode->i_bytes < bytes) {
@@ -457,6 +456,14 @@ void inode_sub_bytes(struct inode *inode, loff_t bytes)
inode->i_bytes += 512;
}
inode->i_bytes -= bytes;
+}
+
+EXPORT_SYMBOL(__inode_sub_bytes);
+
+void inode_sub_bytes(struct inode *inode, loff_t bytes)
+{
+ spin_lock(&inode->i_lock);
+ __inode_sub_bytes(inode, bytes);
spin_unlock(&inode->i_lock);
}
diff --git a/fs/statfs.c b/fs/statfs.c
index c219e733f553..083dc0ac9140 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -94,7 +94,7 @@ retry:
int fd_statfs(int fd, struct kstatfs *st)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_raw(fd);
int error = -EBADF;
if (f.file) {
error = vfs_statfs(&f.file->f_path, st);
diff --git a/fs/super.c b/fs/super.c
index 7465d4364208..e028b508db25 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -76,6 +76,8 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
total_objects = sb->s_nr_dentry_unused +
sb->s_nr_inodes_unused + fs_objects + 1;
+ if (!total_objects)
+ total_objects = 1;
if (sc->nr_to_scan) {
int dentries;
@@ -336,19 +338,19 @@ EXPORT_SYMBOL(deactivate_super);
* and want to turn it into a full-blown active reference. grab_super()
* is called with sb_lock held and drops it. Returns 1 in case of
* success, 0 if we had failed (superblock contents was already dead or
- * dying when grab_super() had been called).
+ * dying when grab_super() had been called). Note that this is only
+ * called for superblocks not in rundown mode (== ones still on ->fs_supers
+ * of their type), so increment of ->s_count is OK here.
*/
static int grab_super(struct super_block *s) __releases(sb_lock)
{
- if (atomic_inc_not_zero(&s->s_active)) {
- spin_unlock(&sb_lock);
- return 1;
- }
- /* it's going away */
s->s_count++;
spin_unlock(&sb_lock);
- /* wait for it to die */
down_write(&s->s_umount);
+ if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) {
+ put_super(s);
+ return 1;
+ }
up_write(&s->s_umount);
put_super(s);
return 0;
@@ -463,11 +465,6 @@ retry:
destroy_super(s);
s = NULL;
}
- down_write(&old->s_umount);
- if (unlikely(!(old->s_flags & MS_BORN))) {
- deactivate_locked_super(old);
- goto retry;
- }
return old;
}
}
@@ -660,10 +657,10 @@ restart:
if (hlist_unhashed(&sb->s_instances))
continue;
if (sb->s_bdev == bdev) {
- if (grab_super(sb)) /* drops sb_lock */
- return sb;
- else
+ if (!grab_super(sb))
goto restart;
+ up_write(&sb->s_umount);
+ return sb;
}
}
spin_unlock(&sb_lock);
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index d0c6a007ce83..eda10959714f 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -487,6 +487,7 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_sb = sb;
sbi->s_block_base = 0;
sbi->s_type = FSTYPE_V7;
+ mutex_init(&sbi->s_lock);
sb->s_fs_info = sbi;
sb_set_blocksize(sb, 512);
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index ff8229340cd5..26b69b2d4a45 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -166,15 +166,10 @@ static int do_commit(struct ubifs_info *c)
err = ubifs_orphan_end_commit(c);
if (err)
goto out;
- old_ltail_lnum = c->ltail_lnum;
- err = ubifs_log_end_commit(c, new_ltail_lnum);
- if (err)
- goto out;
err = dbg_check_old_index(c, &zroot);
if (err)
goto out;
- mutex_lock(&c->mst_mutex);
c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
@@ -203,8 +198,9 @@ static int do_commit(struct ubifs_info *c)
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
else
c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
- err = ubifs_write_master(c);
- mutex_unlock(&c->mst_mutex);
+
+ old_ltail_lnum = c->ltail_lnum;
+ err = ubifs_log_end_commit(c, new_ltail_lnum);
if (err)
goto out;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 14374530784c..881324c08430 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1524,8 +1524,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
}
wait_for_stable_page(page);
- unlock_page(page);
- return 0;
+ return VM_FAULT_LOCKED;
out_unlock:
unlock_page(page);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36bd4efd0819..06649d21b056 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -106,10 +106,14 @@ static inline long long empty_log_bytes(const struct ubifs_info *c)
h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
t = (long long)c->ltail_lnum * c->leb_size;
- if (h >= t)
+ if (h > t)
return c->log_bytes - h + t;
- else
+ else if (h != t)
return t - h;
+ else if (c->lhead_lnum != c->ltail_lnum)
+ return 0;
+ else
+ return c->log_bytes;
}
/**
@@ -447,9 +451,9 @@ out:
* @ltail_lnum: new log tail LEB number
*
* This function is called on when the commit operation was finished. It
- * moves log tail to new position and unmaps LEBs which contain obsolete data.
- * Returns zero in case of success and a negative error code in case of
- * failure.
+ * moves log tail to new position and updates the master node so that it stores
+ * the new log tail LEB number. Returns zero in case of success and a negative
+ * error code in case of failure.
*/
int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
{
@@ -477,7 +481,12 @@ int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
spin_unlock(&c->buds_lock);
err = dbg_check_bud_bytes(c);
+ if (err)
+ goto out;
+ err = ubifs_write_master(c);
+
+out:
mutex_unlock(&c->log_mutex);
return err;
}
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index ab83ace9910a..1a4bb9e8b3b8 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -352,10 +352,9 @@ int ubifs_read_master(struct ubifs_info *c)
* ubifs_write_master - write master node.
* @c: UBIFS file-system description object
*
- * This function writes the master node. The caller has to take the
- * @c->mst_mutex lock before calling this function. Returns zero in case of
- * success and a negative error code in case of failure. The master node is
- * written twice to enable recovery.
+ * This function writes the master node. Returns zero in case of success and a
+ * negative error code in case of failure. The master node is written twice to
+ * enable recovery.
*/
int ubifs_write_master(struct ubifs_info *c)
{
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 9e1d05666fed..e0a7a764a903 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
freed = ubifs_destroy_tnc_subtree(znode);
atomic_long_sub(freed, &ubifs_clean_zn_cnt);
atomic_long_sub(freed, &c->clean_zn_cnt);
- ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
total_freed += freed;
znode = zprev;
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index f21acf0ef01f..05115d719408 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1412,7 +1412,7 @@ static int mount_ubifs(struct ubifs_info *c)
ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s",
c->vi.ubi_num, c->vi.vol_id, c->vi.name,
- c->ro_mount ? ", R/O mode" : NULL);
+ c->ro_mount ? ", R/O mode" : "");
x = (long long)c->main_lebs * c->leb_size;
y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes;
ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
@@ -1970,7 +1970,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
mutex_init(&c->lp_mutex);
mutex_init(&c->tnc_mutex);
mutex_init(&c->log_mutex);
- mutex_init(&c->mst_mutex);
mutex_init(&c->umount_mutex);
mutex_init(&c->bu_mutex);
mutex_init(&c->write_reserve_mutex);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index b2babce4d70f..bd51277f6fe1 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1042,7 +1042,6 @@ struct ubifs_debug_info;
*
* @mst_node: master node
* @mst_offs: offset of valid master node
- * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
*
* @max_bu_buf_len: maximum bulk-read buffer length
* @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
@@ -1282,7 +1281,6 @@ struct ubifs_info {
struct ubifs_mst_node *mst_node;
int mst_offs;
- struct mutex mst_mutex;
int max_bu_buf_len;
struct mutex bu_mutex;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index b6d15d349810..aa023283cc8a 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1270,13 +1270,22 @@ update_time:
return 0;
}
+/*
+ * Maximum length of linked list formed by ICB hierarchy. The chosen number is
+ * arbitrary - just that we hopefully don't limit any real use of rewritten
+ * inode on write-once media but avoid looping for too long on corrupted media.
+ */
+#define UDF_MAX_ICB_NESTING 1024
+
static void __udf_read_inode(struct inode *inode)
{
struct buffer_head *bh = NULL;
struct fileEntry *fe;
uint16_t ident;
struct udf_inode_info *iinfo = UDF_I(inode);
+ unsigned int indirections = 0;
+reread:
/*
* Set defaults, but the inode is still incomplete!
* Note: get_new_inode() sets the following on a new inode:
@@ -1313,28 +1322,26 @@ static void __udf_read_inode(struct inode *inode)
ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1,
&ident);
if (ident == TAG_IDENT_IE && ibh) {
- struct buffer_head *nbh = NULL;
struct kernel_lb_addr loc;
struct indirectEntry *ie;
ie = (struct indirectEntry *)ibh->b_data;
loc = lelb_to_cpu(ie->indirectICB.extLocation);
- if (ie->indirectICB.extLength &&
- (nbh = udf_read_ptagged(inode->i_sb, &loc, 0,
- &ident))) {
- if (ident == TAG_IDENT_FE ||
- ident == TAG_IDENT_EFE) {
- memcpy(&iinfo->i_location,
- &loc,
- sizeof(struct kernel_lb_addr));
- brelse(bh);
- brelse(ibh);
- brelse(nbh);
- __udf_read_inode(inode);
+ if (ie->indirectICB.extLength) {
+ brelse(bh);
+ brelse(ibh);
+ memcpy(&iinfo->i_location, &loc,
+ sizeof(struct kernel_lb_addr));
+ if (++indirections > UDF_MAX_ICB_NESTING) {
+ udf_err(inode->i_sb,
+ "too many ICBs in ICB hierarchy"
+ " (max %d supported)\n",
+ UDF_MAX_ICB_NESTING);
+ make_bad_inode(inode);
return;
}
- brelse(nbh);
+ goto reread;
}
}
brelse(ibh);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 9ac4057a86c9..839a2bad7f45 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -630,6 +630,12 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
struct udf_sb_info *sbi = UDF_SB(sb);
int error = 0;
+ if (sbi->s_lvid_bh) {
+ int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
+ if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
+ return -EACCES;
+ }
+
uopt.flags = sbi->s_flags;
uopt.uid = sbi->s_uid;
uopt.gid = sbi->s_gid;
@@ -649,12 +655,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
sbi->s_dmode = uopt.dmode;
write_unlock(&sbi->s_cred_lock);
- if (sbi->s_lvid_bh) {
- int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
- if (write_rev > UDF_MAX_WRITE_VERSION)
- *flags |= MS_RDONLY;
- }
-
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
goto out_unlock;
@@ -843,27 +843,38 @@ static int udf_find_fileset(struct super_block *sb,
return 1;
}
+/*
+ * Load primary Volume Descriptor Sequence
+ *
+ * Return <0 on error, 0 on success. -EAGAIN is special meaning next sequence
+ * should be tried.
+ */
static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
{
struct primaryVolDesc *pvoldesc;
struct ustr *instr, *outstr;
struct buffer_head *bh;
uint16_t ident;
- int ret = 1;
+ int ret = -ENOMEM;
instr = kmalloc(sizeof(struct ustr), GFP_NOFS);
if (!instr)
- return 1;
+ return -ENOMEM;
outstr = kmalloc(sizeof(struct ustr), GFP_NOFS);
if (!outstr)
goto out1;
bh = udf_read_tagged(sb, block, block, &ident);
- if (!bh)
+ if (!bh) {
+ ret = -EAGAIN;
goto out2;
+ }
- BUG_ON(ident != TAG_IDENT_PVD);
+ if (ident != TAG_IDENT_PVD) {
+ ret = -EIO;
+ goto out_bh;
+ }
pvoldesc = (struct primaryVolDesc *)bh->b_data;
@@ -889,8 +900,9 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
if (udf_CS0toUTF8(outstr, instr))
udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
- brelse(bh);
ret = 0;
+out_bh:
+ brelse(bh);
out2:
kfree(outstr);
out1:
@@ -947,7 +959,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
if (mdata->s_mirror_fe == NULL) {
udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
- goto error_exit;
+ return -EIO;
}
}
@@ -964,23 +976,18 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
addr.logicalBlockNum, addr.partitionReferenceNum);
mdata->s_bitmap_fe = udf_iget(sb, &addr);
-
if (mdata->s_bitmap_fe == NULL) {
if (sb->s_flags & MS_RDONLY)
udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
else {
udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
- goto error_exit;
+ return -EIO;
}
}
}
udf_debug("udf_load_metadata_files Ok\n");
-
return 0;
-
-error_exit:
- return 1;
}
static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
@@ -1069,7 +1076,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (!map->s_uspace.s_table) {
udf_debug("cannot load unallocSpaceTable (part %d)\n",
p_index);
- return 1;
+ return -EIO;
}
map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
udf_debug("unallocSpaceTable (part %d) @ %ld\n",
@@ -1079,7 +1086,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (phd->unallocSpaceBitmap.extLength) {
struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index);
if (!bitmap)
- return 1;
+ return -ENOMEM;
map->s_uspace.s_bitmap = bitmap;
bitmap->s_extPosition = le32_to_cpu(
phd->unallocSpaceBitmap.extPosition);
@@ -1102,7 +1109,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (!map->s_fspace.s_table) {
udf_debug("cannot load freedSpaceTable (part %d)\n",
p_index);
- return 1;
+ return -EIO;
}
map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
@@ -1113,7 +1120,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (phd->freedSpaceBitmap.extLength) {
struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index);
if (!bitmap)
- return 1;
+ return -ENOMEM;
map->s_fspace.s_bitmap = bitmap;
bitmap->s_extPosition = le32_to_cpu(
phd->freedSpaceBitmap.extPosition);
@@ -1165,7 +1172,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
}
if (!sbi->s_vat_inode)
- return 1;
+ return -EIO;
if (map->s_partition_type == UDF_VIRTUAL_MAP15) {
map->s_type_specific.s_virtual.s_start_offset = 0;
@@ -1177,7 +1184,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
pos = udf_block_map(sbi->s_vat_inode, 0);
bh = sb_bread(sb, pos);
if (!bh)
- return 1;
+ return -EIO;
vat20 = (struct virtualAllocationTable20 *)bh->b_data;
} else {
vat20 = (struct virtualAllocationTable20 *)
@@ -1195,6 +1202,12 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
return 0;
}
+/*
+ * Load partition descriptor block
+ *
+ * Returns <0 on error, 0 on success, -EAGAIN is special - try next descriptor
+ * sequence.
+ */
static int udf_load_partdesc(struct super_block *sb, sector_t block)
{
struct buffer_head *bh;
@@ -1204,13 +1217,15 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
int i, type1_idx;
uint16_t partitionNumber;
uint16_t ident;
- int ret = 0;
+ int ret;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 1;
- if (ident != TAG_IDENT_PD)
+ return -EAGAIN;
+ if (ident != TAG_IDENT_PD) {
+ ret = 0;
goto out_bh;
+ }
p = (struct partitionDesc *)bh->b_data;
partitionNumber = le16_to_cpu(p->partitionNumber);
@@ -1229,10 +1244,13 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
if (i >= sbi->s_partitions) {
udf_debug("Partition (%d) not found in partition map\n",
partitionNumber);
+ ret = 0;
goto out_bh;
}
ret = udf_fill_partdesc_info(sb, p, i);
+ if (ret < 0)
+ goto out_bh;
/*
* Now rescan for VIRTUAL or METADATA partitions when SPARABLE and
@@ -1249,32 +1267,37 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
break;
}
- if (i >= sbi->s_partitions)
+ if (i >= sbi->s_partitions) {
+ ret = 0;
goto out_bh;
+ }
ret = udf_fill_partdesc_info(sb, p, i);
- if (ret)
+ if (ret < 0)
goto out_bh;
if (map->s_partition_type == UDF_METADATA_MAP25) {
ret = udf_load_metadata_files(sb, i);
- if (ret) {
+ if (ret < 0) {
udf_err(sb, "error loading MetaData partition map %d\n",
i);
goto out_bh;
}
} else {
- ret = udf_load_vat(sb, i, type1_idx);
- if (ret)
- goto out_bh;
/*
- * Mark filesystem read-only if we have a partition with
- * virtual map since we don't handle writing to it (we
- * overwrite blocks instead of relocating them).
+ * If we have a partition with virtual map, we don't handle
+ * writing to it (we overwrite blocks instead of relocating
+ * them).
*/
- sb->s_flags |= MS_RDONLY;
- pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n");
+ if (!(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
+ goto out_bh;
+ }
+ ret = udf_load_vat(sb, i, type1_idx);
+ if (ret < 0)
+ goto out_bh;
}
+ ret = 0;
out_bh:
/* In case loading failed, we handle cleanup in udf_fill_super */
brelse(bh);
@@ -1340,11 +1363,11 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
uint16_t ident;
struct buffer_head *bh;
unsigned int table_len;
- int ret = 0;
+ int ret;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 1;
+ return -EAGAIN;
BUG_ON(ident != TAG_IDENT_LVD);
lvd = (struct logicalVolDesc *)bh->b_data;
table_len = le32_to_cpu(lvd->mapTableLength);
@@ -1352,7 +1375,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
udf_err(sb, "error loading logical volume descriptor: "
"Partition table too long (%u > %lu)\n", table_len,
sb->s_blocksize - sizeof(*lvd));
- ret = 1;
+ ret = -EIO;
goto out_bh;
}
@@ -1396,11 +1419,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_SPARABLE,
strlen(UDF_ID_SPARABLE))) {
- if (udf_load_sparable_map(sb, map,
- (struct sparablePartitionMap *)gpm) < 0) {
- ret = 1;
+ ret = udf_load_sparable_map(sb, map,
+ (struct sparablePartitionMap *)gpm);
+ if (ret < 0)
goto out_bh;
- }
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_METADATA,
strlen(UDF_ID_METADATA))) {
@@ -1465,7 +1487,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
}
if (lvd->integritySeqExt.extLength)
udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt));
-
+ ret = 0;
out_bh:
brelse(bh);
return ret;
@@ -1503,22 +1525,18 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
}
/*
- * udf_process_sequence
- *
- * PURPOSE
- * Process a main/reserve volume descriptor sequence.
- *
- * PRE-CONDITIONS
- * sb Pointer to _locked_ superblock.
- * block First block of first extent of the sequence.
- * lastblock Lastblock of first extent of the sequence.
+ * Process a main/reserve volume descriptor sequence.
+ * @block First block of first extent of the sequence.
+ * @lastblock Lastblock of first extent of the sequence.
+ * @fileset There we store extent containing root fileset
*
- * HISTORY
- * July 1, 1997 - Andrew E. Mileski
- * Written, tested, and released.
+ * Returns <0 on error, 0 on success. -EAGAIN is special - try next descriptor
+ * sequence
*/
-static noinline int udf_process_sequence(struct super_block *sb, long block,
- long lastblock, struct kernel_lb_addr *fileset)
+static noinline int udf_process_sequence(
+ struct super_block *sb,
+ sector_t block, sector_t lastblock,
+ struct kernel_lb_addr *fileset)
{
struct buffer_head *bh = NULL;
struct udf_vds_record vds[VDS_POS_LENGTH];
@@ -1529,6 +1547,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
uint32_t vdsn;
uint16_t ident;
long next_s = 0, next_e = 0;
+ int ret;
memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH);
@@ -1543,7 +1562,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
udf_err(sb,
"Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
(unsigned long long)block);
- return 1;
+ return -EAGAIN;
}
/* Process each descriptor (ISO 13346 3/8.3-8.4) */
@@ -1616,14 +1635,19 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
*/
if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
udf_err(sb, "Primary Volume Descriptor not found!\n");
- return 1;
+ return -EAGAIN;
+ }
+ ret = udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block);
+ if (ret < 0)
+ return ret;
+
+ if (vds[VDS_POS_LOGICAL_VOL_DESC].block) {
+ ret = udf_load_logicalvol(sb,
+ vds[VDS_POS_LOGICAL_VOL_DESC].block,
+ fileset);
+ if (ret < 0)
+ return ret;
}
- if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block))
- return 1;
-
- if (vds[VDS_POS_LOGICAL_VOL_DESC].block && udf_load_logicalvol(sb,
- vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset))
- return 1;
if (vds[VDS_POS_PARTITION_DESC].block) {
/*
@@ -1632,19 +1656,27 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
*/
for (block = vds[VDS_POS_PARTITION_DESC].block;
block < vds[VDS_POS_TERMINATING_DESC].block;
- block++)
- if (udf_load_partdesc(sb, block))
- return 1;
+ block++) {
+ ret = udf_load_partdesc(sb, block);
+ if (ret < 0)
+ return ret;
+ }
}
return 0;
}
+/*
+ * Load Volume Descriptor Sequence described by anchor in bh
+ *
+ * Returns <0 on error, 0 on success
+ */
static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
struct kernel_lb_addr *fileset)
{
struct anchorVolDescPtr *anchor;
- long main_s, main_e, reserve_s, reserve_e;
+ sector_t main_s, main_e, reserve_s, reserve_e;
+ int ret;
anchor = (struct anchorVolDescPtr *)bh->b_data;
@@ -1662,18 +1694,26 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
/* Process the main & reserve sequences */
/* responsible for finding the PartitionDesc(s) */
- if (!udf_process_sequence(sb, main_s, main_e, fileset))
- return 1;
- udf_sb_free_partitions(sb);
- if (!udf_process_sequence(sb, reserve_s, reserve_e, fileset))
- return 1;
+ ret = udf_process_sequence(sb, main_s, main_e, fileset);
+ if (ret != -EAGAIN)
+ return ret;
udf_sb_free_partitions(sb);
- return 0;
+ ret = udf_process_sequence(sb, reserve_s, reserve_e, fileset);
+ if (ret < 0) {
+ udf_sb_free_partitions(sb);
+ /* No sequence was OK, return -EIO */
+ if (ret == -EAGAIN)
+ ret = -EIO;
+ }
+ return ret;
}
/*
* Check whether there is an anchor block in the given block and
* load Volume Descriptor Sequence if so.
+ *
+ * Returns <0 on error, 0 on success, -EAGAIN is special - try next anchor
+ * block
*/
static int udf_check_anchor_block(struct super_block *sb, sector_t block,
struct kernel_lb_addr *fileset)
@@ -1685,33 +1725,40 @@ static int udf_check_anchor_block(struct super_block *sb, sector_t block,
if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
udf_fixed_to_variable(block) >=
sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
- return 0;
+ return -EAGAIN;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 0;
+ return -EAGAIN;
if (ident != TAG_IDENT_AVDP) {
brelse(bh);
- return 0;
+ return -EAGAIN;
}
ret = udf_load_sequence(sb, bh, fileset);
brelse(bh);
return ret;
}
-/* Search for an anchor volume descriptor pointer */
-static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
- struct kernel_lb_addr *fileset)
+/*
+ * Search for an anchor volume descriptor pointer.
+ *
+ * Returns < 0 on error, 0 on success. -EAGAIN is special - try next set
+ * of anchors.
+ */
+static int udf_scan_anchors(struct super_block *sb, sector_t *lastblock,
+ struct kernel_lb_addr *fileset)
{
sector_t last[6];
int i;
struct udf_sb_info *sbi = UDF_SB(sb);
int last_count = 0;
+ int ret;
/* First try user provided anchor */
if (sbi->s_anchor) {
- if (udf_check_anchor_block(sb, sbi->s_anchor, fileset))
- return lastblock;
+ ret = udf_check_anchor_block(sb, sbi->s_anchor, fileset);
+ if (ret != -EAGAIN)
+ return ret;
}
/*
* according to spec, anchor is in either:
@@ -1720,39 +1767,46 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
* lastblock
* however, if the disc isn't closed, it could be 512.
*/
- if (udf_check_anchor_block(sb, sbi->s_session + 256, fileset))
- return lastblock;
+ ret = udf_check_anchor_block(sb, sbi->s_session + 256, fileset);
+ if (ret != -EAGAIN)
+ return ret;
/*
* The trouble is which block is the last one. Drives often misreport
* this so we try various possibilities.
*/
- last[last_count++] = lastblock;
- if (lastblock >= 1)
- last[last_count++] = lastblock - 1;
- last[last_count++] = lastblock + 1;
- if (lastblock >= 2)
- last[last_count++] = lastblock - 2;
- if (lastblock >= 150)
- last[last_count++] = lastblock - 150;
- if (lastblock >= 152)
- last[last_count++] = lastblock - 152;
+ last[last_count++] = *lastblock;
+ if (*lastblock >= 1)
+ last[last_count++] = *lastblock - 1;
+ last[last_count++] = *lastblock + 1;
+ if (*lastblock >= 2)
+ last[last_count++] = *lastblock - 2;
+ if (*lastblock >= 150)
+ last[last_count++] = *lastblock - 150;
+ if (*lastblock >= 152)
+ last[last_count++] = *lastblock - 152;
for (i = 0; i < last_count; i++) {
if (last[i] >= sb->s_bdev->bd_inode->i_size >>
sb->s_blocksize_bits)
continue;
- if (udf_check_anchor_block(sb, last[i], fileset))
- return last[i];
+ ret = udf_check_anchor_block(sb, last[i], fileset);
+ if (ret != -EAGAIN) {
+ if (!ret)
+ *lastblock = last[i];
+ return ret;
+ }
if (last[i] < 256)
continue;
- if (udf_check_anchor_block(sb, last[i] - 256, fileset))
- return last[i];
+ ret = udf_check_anchor_block(sb, last[i] - 256, fileset);
+ if (ret != -EAGAIN) {
+ if (!ret)
+ *lastblock = last[i];
+ return ret;
+ }
}
/* Finally try block 512 in case media is open */
- if (udf_check_anchor_block(sb, sbi->s_session + 512, fileset))
- return last[0];
- return 0;
+ return udf_check_anchor_block(sb, sbi->s_session + 512, fileset);
}
/*
@@ -1760,54 +1814,59 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
* area specified by it. The function expects sbi->s_lastblock to be the last
* block on the media.
*
- * Return 1 if ok, 0 if not found.
- *
+ * Return <0 on error, 0 if anchor found. -EAGAIN is special meaning anchor
+ * was not found.
*/
static int udf_find_anchor(struct super_block *sb,
struct kernel_lb_addr *fileset)
{
- sector_t lastblock;
struct udf_sb_info *sbi = UDF_SB(sb);
+ sector_t lastblock = sbi->s_last_block;
+ int ret;
- lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
- if (lastblock)
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret != -EAGAIN)
goto out;
/* No anchor found? Try VARCONV conversion of block numbers */
UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
+ lastblock = udf_variable_to_fixed(sbi->s_last_block);
/* Firstly, we try to not convert number of the last block */
- lastblock = udf_scan_anchors(sb,
- udf_variable_to_fixed(sbi->s_last_block),
- fileset);
- if (lastblock)
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret != -EAGAIN)
goto out;
+ lastblock = sbi->s_last_block;
/* Secondly, we try with converted number of the last block */
- lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
- if (!lastblock) {
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret < 0) {
/* VARCONV didn't help. Clear it. */
UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
- return 0;
}
out:
- sbi->s_last_block = lastblock;
- return 1;
+ if (ret == 0)
+ sbi->s_last_block = lastblock;
+ return ret;
}
/*
* Check Volume Structure Descriptor, find Anchor block and load Volume
- * Descriptor Sequence
+ * Descriptor Sequence.
+ *
+ * Returns < 0 on error, 0 on success. -EAGAIN is special meaning anchor
+ * block was not found.
*/
static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
int silent, struct kernel_lb_addr *fileset)
{
struct udf_sb_info *sbi = UDF_SB(sb);
loff_t nsr_off;
+ int ret;
if (!sb_set_blocksize(sb, uopt->blocksize)) {
if (!silent)
udf_warn(sb, "Bad block size\n");
- return 0;
+ return -EINVAL;
}
sbi->s_last_block = uopt->lastblock;
if (!uopt->novrs) {
@@ -1828,12 +1887,13 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
/* Look for anchor block and load Volume Descriptor Sequence */
sbi->s_anchor = uopt->anchor;
- if (!udf_find_anchor(sb, fileset)) {
- if (!silent)
+ ret = udf_find_anchor(sb, fileset);
+ if (ret < 0) {
+ if (!silent && ret == -EAGAIN)
udf_warn(sb, "No anchor found\n");
- return 0;
+ return ret;
}
- return 1;
+ return 0;
}
static void udf_open_lvid(struct super_block *sb)
@@ -1939,7 +1999,7 @@ u64 lvid_get_unique_id(struct super_block *sb)
static int udf_fill_super(struct super_block *sb, void *options, int silent)
{
- int ret;
+ int ret = -EINVAL;
struct inode *inode = NULL;
struct udf_options uopt;
struct kernel_lb_addr rootdir, fileset;
@@ -2011,7 +2071,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
} else {
uopt.blocksize = bdev_logical_block_size(sb->s_bdev);
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
- if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
+ if (ret == -EAGAIN && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
if (!silent)
pr_notice("Rescanning with blocksize %d\n",
UDF_DEFAULT_BLOCKSIZE);
@@ -2021,8 +2081,11 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
}
}
- if (!ret) {
- udf_warn(sb, "No partition found (1)\n");
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ udf_warn(sb, "No partition found (1)\n");
+ ret = -EINVAL;
+ }
goto error_out;
}
@@ -2040,9 +2103,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
le16_to_cpu(lvidiu->minUDFReadRev),
UDF_MAX_READ_VERSION);
+ ret = -EINVAL;
+ goto error_out;
+ } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION &&
+ !(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
goto error_out;
- } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION)
- sb->s_flags |= MS_RDONLY;
+ }
sbi->s_udfrev = minUDFWriteRev;
@@ -2054,17 +2121,20 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!sbi->s_partitions) {
udf_warn(sb, "No partition found (2)\n");
+ ret = -EINVAL;
goto error_out;
}
if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
- UDF_PART_FLAG_READ_ONLY) {
- pr_notice("Partition marked readonly; forcing readonly mount\n");
- sb->s_flags |= MS_RDONLY;
+ UDF_PART_FLAG_READ_ONLY &&
+ !(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
+ goto error_out;
}
if (udf_find_fileset(sb, &fileset, &rootdir)) {
udf_warn(sb, "No fileset found\n");
+ ret = -EINVAL;
goto error_out;
}
@@ -2086,6 +2156,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!inode) {
udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
+ ret = -EIO;
goto error_out;
}
@@ -2093,6 +2164,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
sb->s_root = d_make_root(inode);
if (!sb->s_root) {
udf_err(sb, "Couldn't allocate root dentry\n");
+ ret = -ENOMEM;
goto error_out;
}
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -2113,7 +2185,7 @@ error_out:
kfree(sbi);
sb->s_fs_info = NULL;
- return -EINVAL;
+ return ret;
}
void _udf_err(struct super_block *sb, const char *function,
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index d7c6dbe4194b..d89f324bc387 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -80,11 +80,17 @@ static int udf_symlink_filler(struct file *file, struct page *page)
struct inode *inode = page->mapping->host;
struct buffer_head *bh = NULL;
unsigned char *symlink;
- int err = -EIO;
+ int err;
unsigned char *p = kmap(page);
struct udf_inode_info *iinfo;
uint32_t pos;
+ /* We don't support symlinks longer than one block */
+ if (inode->i_size > inode->i_sb->s_blocksize) {
+ err = -ENAMETOOLONG;
+ goto out_unmap;
+ }
+
iinfo = UDF_I(inode);
pos = udf_block_map(inode, 0);
@@ -94,8 +100,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
} else {
bh = sb_bread(inode->i_sb, pos);
- if (!bh)
- goto out;
+ if (!bh) {
+ err = -EIO;
+ goto out_unlock_inode;
+ }
symlink = bh->b_data;
}
@@ -109,9 +117,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
unlock_page(page);
return 0;
-out:
+out_unlock_inode:
up_read(&iinfo->i_data_sem);
SetPageError(page);
+out_unmap:
kunmap(page);
unlock_page(page);
return err;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 41a695048be7..cfbb4c1b2f17 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1661,11 +1661,72 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
+/*
+ * This is basically a copy of __set_page_dirty_buffers() with one
+ * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
+ * dirty, we'll never be able to clean them because we don't write buffers
+ * beyond EOF, and that means we can't invalidate pages that span EOF
+ * that have been marked dirty. Further, the dirty state can leak into
+ * the file interior if the file is extended, resulting in all sorts of
+ * bad things happening as the state does not match the underlying data.
+ *
+ * XXX: this really indicates that bufferheads in XFS need to die. Warts like
+ * this only exist because of bufferheads and how the generic code manages them.
+ */
+STATIC int
+xfs_vm_set_page_dirty(
+ struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ loff_t end_offset;
+ loff_t offset;
+ int newly_dirty;
+
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ end_offset = i_size_read(inode);
+ offset = page_offset(page);
+
+ spin_lock(&mapping->private_lock);
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ do {
+ if (offset < end_offset)
+ set_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ offset += 1 << inode->i_blkbits;
+ } while (bh != head);
+ }
+ newly_dirty = !TestSetPageDirty(page);
+ spin_unlock(&mapping->private_lock);
+
+ if (newly_dirty) {
+ /* sigh - __set_page_dirty() is static, so copy it here, too */
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(!PageUptodate(page));
+ account_page_dirtied(page, mapping);
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
+ }
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ }
+ return newly_dirty;
+}
+
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
.writepage = xfs_vm_writepage,
.writepages = xfs_vm_writepages,
+ .set_page_dirty = xfs_vm_set_page_dirty,
.releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage,
.write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 4ec431777048..e0451f4201cf 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -296,6 +296,10 @@ xfs_buf_item_format(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
(bip->bli_flags & XFS_BLI_STALE));
+ ASSERT((bip->bli_flags & XFS_BLI_STALE) ||
+ (xfs_blft_from_flags(&bip->__bli_format) > XFS_BLFT_UNKNOWN_BUF
+ && xfs_blft_from_flags(&bip->__bli_format) < XFS_BLFT_MAX_BUF));
+
/*
* If it is an inode buffer, transfer the in-memory state to the
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 0b8b2a13cd24..79ddbaf93206 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1223,6 +1223,7 @@ xfs_da3_node_toosmall(
/* start with smaller blk num */
forward = nodehdr.forw < nodehdr.back;
for (i = 0; i < 2; forward = !forward, i++) {
+ struct xfs_da3_icnode_hdr thdr;
if (forward)
blkno = nodehdr.forw;
else
@@ -1235,10 +1236,10 @@ xfs_da3_node_toosmall(
return(error);
node = bp->b_addr;
- xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(&thdr, node);
xfs_trans_brelse(state->args->trans, bp);
- if (count - nodehdr.count >= 0)
+ if (count - thdr.count >= 0)
break; /* fits with at least 25% to spare */
}
if (i >= 2) {
@@ -1333,7 +1334,7 @@ xfs_da3_fixhashpath(
node = blk->bp->b_addr;
xfs_da3_node_hdr_from_disk(&nodehdr, node);
btree = xfs_da3_node_tree_p(node);
- if (be32_to_cpu(btree->hashval) == lasthash)
+ if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
break;
blk->hashval = lasthash;
btree[blk->index].hashval = cpu_to_be32(lasthash);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 044e97a33c8d..bac3e1635b7d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1104,7 +1104,8 @@ xfs_qm_dqflush(
* Get the buffer containing the on-disk dquot
*/
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL);
+ mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+ &xfs_dquot_buf_ops);
if (error)
goto out_unlock;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a5f2042aec8b..9f457fedbcfc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -298,7 +298,16 @@ xfs_file_aio_read(
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
- truncate_pagecache_range(VFS_I(ip), pos, -1);
+
+ /*
+ * Invalidate whole pages. This can return an error if
+ * we fail to invalidate a page, but this should never
+ * happen on XFS. Warn if it does fail.
+ */
+ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+ pos >> PAGE_CACHE_SHIFT, -1);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
}
@@ -677,7 +686,15 @@ xfs_file_dio_aio_write(
pos, -1);
if (ret)
goto out;
- truncate_pagecache_range(VFS_I(ip), pos, -1);
+ /*
+ * Invalidate whole pages. This can return an error if
+ * we fail to invalidate a page, but this should never
+ * happen on XFS. Warn if it does fail.
+ */
+ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+ pos >> PAGE_CACHE_SHIFT, -1);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 3c3644ea825b..2288db4e1784 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -216,6 +216,8 @@ xfs_growfs_data_private(
*/
nfree = 0;
for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
+ __be32 *agfl_bno;
+
/*
* AG freespace header block
*/
@@ -275,8 +277,10 @@ xfs_growfs_data_private(
agfl->agfl_seqno = cpu_to_be32(agno);
uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
}
+
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
- agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
+ agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 7f7be5f98f52..f010ab4594f1 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1655,6 +1655,7 @@ xfs_iunlink(
agi->agi_unlinked[bucket_index] = cpu_to_be32(agino);
offset = offsetof(xfs_agi_t, agi_unlinked) +
(sizeof(xfs_agino_t) * bucket_index);
+ xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
return 0;
@@ -1746,6 +1747,7 @@ xfs_iunlink_remove(
agi->agi_unlinked[bucket_index] = cpu_to_be32(next_agino);
offset = offsetof(xfs_agi_t, agi_unlinked) +
(sizeof(xfs_agino_t) * bucket_index);
+ xfs_trans_buf_set_type(tp, agibp, XFS_BLFT_AGI_BUF);
xfs_trans_log_buf(tp, agibp, offset,
(offset + sizeof(xfs_agino_t) - 1));
} else {
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5e999680094a..83dfe6e73235 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -409,7 +409,8 @@ xfs_attrlist_by_handle(
return -XFS_ERROR(EPERM);
if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
+ if (al_hreq.buflen < sizeof(struct attrlist) ||
+ al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
/*
@@ -1612,6 +1613,12 @@ xfs_file_ioctl(
case XFS_IOC_FREE_EOFBLOCKS: {
struct xfs_eofblocks eofb;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return -XFS_ERROR(EROFS);
+
if (copy_from_user(&eofb, arg, sizeof(eofb)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c0c66259cc91..68799d7f02cc 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -359,7 +359,8 @@ xfs_compat_attrlist_by_handle(
if (copy_from_user(&al_hreq, arg,
sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
+ if (al_hreq.buflen < sizeof(struct attrlist) ||
+ al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
/*
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b75c9bb6e71e..29d1ca567ed3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -935,6 +935,12 @@ xfs_qm_dqiter_bufs(
if (error)
break;
+ /*
+ * A corrupt buffer might not have a verifier attached, so
+ * make sure we have the correct one attached before writeback
+ * occurs.
+ */
+ bp->b_ops = &xfs_dquot_buf_ops;
xfs_qm_reset_dqcounts(mp, bp, firstid, type);
xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
@@ -1018,7 +1024,7 @@ xfs_qm_dqiterate(
xfs_buf_readahead(mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, rablkno),
mp->m_quotainfo->qi_dqchunklen,
- NULL);
+ &xfs_dquot_buf_ops);
rablkno++;
}
}
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 2fd7c1ff1d21..b5d5beb7df3a 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1100,6 +1100,7 @@ xfs_trans_apply_sb_deltas(
whole = 1;
}
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
if (whole)
/*
* Log the whole thing, the fields are noncontiguous.