aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@linaro.org>2017-03-20 12:03:07 +0800
committerAlex Shi <alex.shi@linaro.org>2017-03-20 12:03:07 +0800
commit1c563c0006661025d7a6c9bc85fc889a4e8a1c06 (patch)
tree93d455583031d0cacebfd436d5b2118a536de5f7 /fs
parent71205f3b80da389c52cc2611f59e183a77c56f4f (diff)
parent28ec98bc2e4a175b60f45d505e715a33b93dd077 (diff)
Merge tag 'v4.4.55' into linux-linaro-lsk-v4.4lsk-v4.4-17.03
This is the 4.4.55 stable release
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/mds_client.c5
-rw-r--r--fs/ext4/extents.c27
-rw-r--r--fs/ext4/inline.c9
-rw-r--r--fs/ext4/inode.c43
-rw-r--r--fs/ext4/mballoc.c7
-rw-r--r--fs/ext4/super.c9
-rw-r--r--fs/fat/inode.c13
-rw-r--r--fs/fuse/file.c1
-rw-r--r--fs/gfs2/glock.c5
-rw-r--r--fs/jbd2/transaction.c4
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namespace.c109
-rw-r--r--fs/nfs/nfs4proc.c10
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfsd/vfs.c59
-rw-r--r--fs/pnode.c61
-rw-r--r--fs/pnode.h2
17 files changed, 244 insertions, 123 deletions
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 239bc9cba28c..f54f77037d22 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -644,6 +644,9 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
{
dout("__unregister_request %p tid %lld\n", req, req->r_tid);
+ /* Never leave an unregistered request on an unsafe list! */
+ list_del_init(&req->r_unsafe_item);
+
if (req->r_tid == mdsc->oldest_tid) {
struct rb_node *p = rb_next(&req->r_node);
mdsc->oldest_tid = 0;
@@ -1051,7 +1054,6 @@ static void cleanup_session_requests(struct ceph_mds_client *mdsc,
while (!list_empty(&session->s_unsafe)) {
req = list_first_entry(&session->s_unsafe,
struct ceph_mds_request, r_unsafe_item);
- list_del_init(&req->r_unsafe_item);
pr_warn_ratelimited(" dropping unsafe request %llu\n",
req->r_tid);
__unregister_request(mdsc, req);
@@ -2477,7 +2479,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
* useful we could do with a revised return value.
*/
dout("got safe reply %llu, mds%d\n", tid, mds);
- list_del_init(&req->r_unsafe_item);
/* last unsafe request during umount? */
if (mdsc->stopping && !__get_oldest_req(mdsc))
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 9da42ace762a..8a456f9b8a44 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -5362,7 +5362,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_lblk_t stop, *iterator, ex_start, ex_end;
/* Let path point to the last extent */
- path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
+ path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
@@ -5371,15 +5372,15 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
if (!extent)
goto out;
- stop = le32_to_cpu(extent->ee_block) +
- ext4_ext_get_actual_len(extent);
+ stop = le32_to_cpu(extent->ee_block);
/*
* In case of left shift, Don't start shifting extents until we make
* sure the hole is big enough to accommodate the shift.
*/
if (SHIFT == SHIFT_LEFT) {
- path = ext4_find_extent(inode, start - 1, &path, 0);
+ path = ext4_find_extent(inode, start - 1, &path,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5411,9 +5412,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
else
iterator = &stop;
- /* Its safe to start updating extents */
- while (start < stop) {
- path = ext4_find_extent(inode, *iterator, &path, 0);
+ /*
+ * Its safe to start updating extents. Start and stop are unsigned, so
+ * in case of right shift if extent with 0 block is reached, iterator
+ * becomes NULL to indicate the end of the loop.
+ */
+ while (iterator && start <= stop) {
+ path = ext4_find_extent(inode, *iterator, &path,
+ EXT4_EX_NOCACHE);
if (IS_ERR(path))
return PTR_ERR(path);
depth = path->p_depth;
@@ -5440,8 +5446,11 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
ext4_ext_get_actual_len(extent);
} else {
extent = EXT_FIRST_EXTENT(path[depth].p_hdr);
- *iterator = le32_to_cpu(extent->ee_block) > 0 ?
- le32_to_cpu(extent->ee_block) - 1 : 0;
+ if (le32_to_cpu(extent->ee_block) > 0)
+ *iterator = le32_to_cpu(extent->ee_block) - 1;
+ else
+ /* Beginning is reached, end of the loop */
+ iterator = NULL;
/* Update path extent in case we need to stop */
while (le32_to_cpu(extent->ee_block) < start)
extent++;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 8968a93e2150..d4be4e23bc21 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -933,8 +933,15 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
struct page *page)
{
int i_size_changed = 0;
+ int ret;
- copied = ext4_write_inline_data_end(inode, pos, len, copied, page);
+ ret = ext4_write_inline_data_end(inode, pos, len, copied, page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ return ret;
+ }
+ copied = ret;
/*
* No need to use i_size_read() here, the i_size
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 10690e5ba2eb..7dcc97eadb12 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1165,8 +1165,11 @@ static int ext4_write_end(struct file *file,
if (ext4_has_inline_data(inode)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
- if (ret < 0)
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
goto errout;
+ }
copied = ret;
} else
copied = block_write_end(file, mapping, pos,
@@ -1220,7 +1223,9 @@ errout:
* set the buffer to be dirty, since in data=journalled mode we need
* to call ext4_handle_dirty_metadata() instead.
*/
-static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
+static void ext4_journalled_zero_new_buffers(handle_t *handle,
+ struct page *page,
+ unsigned from, unsigned to)
{
unsigned int block_start = 0, block_end;
struct buffer_head *head, *bh;
@@ -1237,7 +1242,7 @@ static void zero_new_buffers(struct page *page, unsigned from, unsigned to)
size = min(to, block_end) - start;
zero_user(page, start, size);
- set_buffer_uptodate(bh);
+ write_end_fn(handle, bh);
}
clear_buffer_new(bh);
}
@@ -1266,18 +1271,25 @@ static int ext4_journalled_write_end(struct file *file,
BUG_ON(!ext4_handle_valid(handle));
- if (ext4_has_inline_data(inode))
- copied = ext4_write_inline_data_end(inode, pos, len,
- copied, page);
- else {
- if (copied < len) {
- if (!PageUptodate(page))
- copied = 0;
- zero_new_buffers(page, from+copied, to);
+ if (ext4_has_inline_data(inode)) {
+ ret = ext4_write_inline_data_end(inode, pos, len,
+ copied, page);
+ if (ret < 0) {
+ unlock_page(page);
+ put_page(page);
+ goto errout;
}
-
+ copied = ret;
+ } else if (unlikely(copied < len) && !PageUptodate(page)) {
+ copied = 0;
+ ext4_journalled_zero_new_buffers(handle, page, from, to);
+ } else {
+ if (unlikely(copied < len))
+ ext4_journalled_zero_new_buffers(handle, page,
+ from + copied, to);
ret = ext4_walk_page_buffers(handle, page_buffers(page), from,
- to, &partial, write_end_fn);
+ from + copied, &partial,
+ write_end_fn);
if (!partial)
SetPageUptodate(page);
}
@@ -1303,6 +1315,7 @@ static int ext4_journalled_write_end(struct file *file,
*/
ext4_orphan_add(handle, inode);
+errout:
ret2 = ext4_journal_stop(handle);
if (!ret)
ret = ret2;
@@ -3560,6 +3573,10 @@ static int ext4_block_truncate_page(handle_t *handle,
unsigned blocksize;
struct inode *inode = mapping->host;
+ /* If we are processing an encrypted inode during orphan list handling */
+ if (ext4_encrypted_inode(inode) && !ext4_has_encryption_key(inode))
+ return 0;
+
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b7a3957a9dca..84cd77663e1f 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3120,6 +3120,13 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
if (ar->pright && start + size - 1 >= ar->lright)
size -= start + size - ar->lright;
+ /*
+ * Trim allocation request for filesystems with artificially small
+ * groups.
+ */
+ if (size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb))
+ size = EXT4_BLOCKS_PER_GROUP(ac->ac_sb);
+
end = start + size;
/* check we don't cross already preallocated blocks */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b405a7b74ce0..6fe8e30eeb99 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -793,6 +793,7 @@ static void ext4_put_super(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_super_block *es = sbi->s_es;
+ int aborted = 0;
int i, err;
ext4_unregister_li_request(sb);
@@ -802,9 +803,10 @@ static void ext4_put_super(struct super_block *sb)
destroy_workqueue(sbi->rsv_conversion_wq);
if (sbi->s_journal) {
+ aborted = is_journal_aborted(sbi->s_journal);
err = jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
- if (err < 0)
+ if ((err < 0) && !aborted)
ext4_abort(sb, "Couldn't clean up the journal");
}
@@ -816,7 +818,7 @@ static void ext4_put_super(struct super_block *sb)
ext4_ext_release(sb);
ext4_xattr_put_super(sb);
- if (!(sb->s_flags & MS_RDONLY)) {
+ if (!(sb->s_flags & MS_RDONLY) && !aborted) {
ext4_clear_feature_journal_needs_recovery(sb);
es->s_state = cpu_to_le16(sbi->s_mount_state);
}
@@ -3746,7 +3748,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
* root first: it may be modified in the journal!
*/
if (!test_opt(sb, NOLOAD) && ext4_has_feature_journal(sb)) {
- if (ext4_load_journal(sb, es, journal_devnum))
+ err = ext4_load_journal(sb, es, journal_devnum);
+ if (err)
goto failed_mount3a;
} else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
ext4_has_feature_journal_needs_recovery(sb)) {
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 509411dd3698..cf644d52c0cf 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1269,6 +1269,16 @@ out:
return 0;
}
+static void fat_dummy_inode_init(struct inode *inode)
+{
+ /* Initialize this dummy inode to work as no-op. */
+ MSDOS_I(inode)->mmu_private = 0;
+ MSDOS_I(inode)->i_start = 0;
+ MSDOS_I(inode)->i_logstart = 0;
+ MSDOS_I(inode)->i_attrs = 0;
+ MSDOS_I(inode)->i_pos = 0;
+}
+
static int fat_read_root(struct inode *inode)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
@@ -1713,12 +1723,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
fat_inode = new_inode(sb);
if (!fat_inode)
goto out_fail;
- MSDOS_I(fat_inode)->i_pos = 0;
+ fat_dummy_inode_init(fat_inode);
sbi->fat_inode = fat_inode;
fsinfo_inode = new_inode(sb);
if (!fsinfo_inode)
goto out_fail;
+ fat_dummy_inode_init(fsinfo_inode);
fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
sbi->fsinfo_inode = fsinfo_inode;
insert_inode_hash(fsinfo_inode);
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8821c380a71a..11538a8be9f0 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -100,6 +100,7 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
} else if (sync) {
+ __set_bit(FR_FORCE, &req->flags);
__clear_bit(FR_BACKGROUND, &req->flags);
fuse_request_send(ff->fc, req);
iput(req->misc.release.inode);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 32e74710b1aa..9cd8c92b953d 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -651,9 +651,11 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
struct kmem_cache *cachep;
int ret, tries = 0;
+ rcu_read_lock();
gl = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (gl && !lockref_get_not_dead(&gl->gl_lockref))
gl = NULL;
+ rcu_read_unlock();
*glp = gl;
if (gl)
@@ -721,15 +723,18 @@ again:
if (ret == -EEXIST) {
ret = 0;
+ rcu_read_lock();
tmp = rhashtable_lookup_fast(&gl_hash_table, &name, ht_parms);
if (tmp == NULL || !lockref_get_not_dead(&tmp->gl_lockref)) {
if (++tries < 100) {
+ rcu_read_unlock();
cond_resched();
goto again;
}
tmp = NULL;
ret = -ENOMEM;
}
+ rcu_read_unlock();
} else {
WARN_ON_ONCE(ret);
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index fa1b8e0dcacf..a2e724053919 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1876,7 +1876,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
__blist_del_buffer(list, jh);
jh->b_jlist = BJ_None;
- if (test_clear_buffer_jbddirty(bh))
+ if (transaction && is_journal_aborted(transaction->t_journal))
+ clear_buffer_jbddirty(bh);
+ else if (test_clear_buffer_jbddirty(bh))
mark_buffer_dirty(bh); /* Expose it to the VM */
}
diff --git a/fs/mount.h b/fs/mount.h
index 14db05d424f7..3dc7dea5a357 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -86,7 +86,6 @@ static inline int is_mounted(struct vfsmount *mnt)
}
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *);
-extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *);
extern int __legitimize_mnt(struct vfsmount *, unsigned);
extern bool legitimize_mnt(struct vfsmount *, unsigned);
diff --git a/fs/namespace.c b/fs/namespace.c
index da98a1bbd8b5..7df3d406d3e0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -638,28 +638,6 @@ struct mount *__lookup_mnt(struct vfsmount *mnt, struct dentry *dentry)
}
/*
- * find the last mount at @dentry on vfsmount @mnt.
- * mount_lock must be held.
- */
-struct mount *__lookup_mnt_last(struct vfsmount *mnt, struct dentry *dentry)
-{
- struct mount *p, *res = NULL;
- p = __lookup_mnt(mnt, dentry);
- if (!p)
- goto out;
- if (!(p->mnt.mnt_flags & MNT_UMOUNT))
- res = p;
- hlist_for_each_entry_continue(p, mnt_hash) {
- if (&p->mnt_parent->mnt != mnt || p->mnt_mountpoint != dentry)
- break;
- if (!(p->mnt.mnt_flags & MNT_UMOUNT))
- res = p;
- }
-out:
- return res;
-}
-
-/*
* lookup_mnt - Return the first child mount mounted at path
*
* "First" means first mounted chronologically. If you create the
@@ -879,6 +857,13 @@ void mnt_set_mountpoint(struct mount *mnt,
hlist_add_head(&child_mnt->mnt_mp_list, &mp->m_list);
}
+static void __attach_mnt(struct mount *mnt, struct mount *parent)
+{
+ hlist_add_head_rcu(&mnt->mnt_hash,
+ m_hash(&parent->mnt, mnt->mnt_mountpoint));
+ list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+}
+
/*
* vfsmount lock must be held for write
*/
@@ -887,28 +872,45 @@ static void attach_mnt(struct mount *mnt,
struct mountpoint *mp)
{
mnt_set_mountpoint(parent, mp, mnt);
- hlist_add_head_rcu(&mnt->mnt_hash, m_hash(&parent->mnt, mp->m_dentry));
- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
+ __attach_mnt(mnt, parent);
}
-static void attach_shadowed(struct mount *mnt,
- struct mount *parent,
- struct mount *shadows)
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp, struct mount *mnt)
{
- if (shadows) {
- hlist_add_behind_rcu(&mnt->mnt_hash, &shadows->mnt_hash);
- list_add(&mnt->mnt_child, &shadows->mnt_child);
- } else {
- hlist_add_head_rcu(&mnt->mnt_hash,
- m_hash(&parent->mnt, mnt->mnt_mountpoint));
- list_add_tail(&mnt->mnt_child, &parent->mnt_mounts);
- }
+ struct mountpoint *old_mp = mnt->mnt_mp;
+ struct dentry *old_mountpoint = mnt->mnt_mountpoint;
+ struct mount *old_parent = mnt->mnt_parent;
+
+ list_del_init(&mnt->mnt_child);
+ hlist_del_init(&mnt->mnt_mp_list);
+ hlist_del_init_rcu(&mnt->mnt_hash);
+
+ attach_mnt(mnt, parent, mp);
+
+ put_mountpoint(old_mp);
+
+ /*
+ * Safely avoid even the suggestion this code might sleep or
+ * lock the mount hash by taking advantage of the knowledge that
+ * mnt_change_mountpoint will not release the final reference
+ * to a mountpoint.
+ *
+ * During mounting, the mount passed in as the parent mount will
+ * continue to use the old mountpoint and during unmounting, the
+ * old mountpoint will continue to exist until namespace_unlock,
+ * which happens well after mnt_change_mountpoint.
+ */
+ spin_lock(&old_mountpoint->d_lock);
+ old_mountpoint->d_lockref.count--;
+ spin_unlock(&old_mountpoint->d_lock);
+
+ mnt_add_count(old_parent, -1);
}
/*
* vfsmount lock must be held for write
*/
-static void commit_tree(struct mount *mnt, struct mount *shadows)
+static void commit_tree(struct mount *mnt)
{
struct mount *parent = mnt->mnt_parent;
struct mount *m;
@@ -923,7 +925,7 @@ static void commit_tree(struct mount *mnt, struct mount *shadows)
list_splice(&head, n->list.prev);
- attach_shadowed(mnt, parent, shadows);
+ __attach_mnt(mnt, parent);
touch_mnt_namespace(n);
}
@@ -1718,7 +1720,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
continue;
for (s = r; s; s = next_mnt(s, r)) {
- struct mount *t = NULL;
if (!(flag & CL_COPY_UNBINDABLE) &&
IS_MNT_UNBINDABLE(s)) {
s = skip_mnt_tree(s);
@@ -1740,14 +1741,7 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
goto out;
lock_mount_hash();
list_add_tail(&q->mnt_list, &res->mnt_list);
- mnt_set_mountpoint(parent, p->mnt_mp, q);
- if (!list_empty(&parent->mnt_mounts)) {
- t = list_last_entry(&parent->mnt_mounts,
- struct mount, mnt_child);
- if (t->mnt_mp != p->mnt_mp)
- t = NULL;
- }
- attach_shadowed(q, parent, t);
+ attach_mnt(q, parent, p->mnt_mp);
unlock_mount_hash();
}
}
@@ -1925,10 +1919,18 @@ static int attach_recursive_mnt(struct mount *source_mnt,
struct path *parent_path)
{
HLIST_HEAD(tree_list);
+ struct mountpoint *smp;
struct mount *child, *p;
struct hlist_node *n;
int err;
+ /* Preallocate a mountpoint in case the new mounts need
+ * to be tucked under other mounts.
+ */
+ smp = get_mountpoint(source_mnt->mnt.mnt_root);
+ if (IS_ERR(smp))
+ return PTR_ERR(smp);
+
if (IS_MNT_SHARED(dest_mnt)) {
err = invent_group_ids(source_mnt, true);
if (err)
@@ -1948,16 +1950,19 @@ static int attach_recursive_mnt(struct mount *source_mnt,
touch_mnt_namespace(source_mnt->mnt_ns);
} else {
mnt_set_mountpoint(dest_mnt, dest_mp, source_mnt);
- commit_tree(source_mnt, NULL);
+ commit_tree(source_mnt);
}
hlist_for_each_entry_safe(child, n, &tree_list, mnt_hash) {
struct mount *q;
hlist_del_init(&child->mnt_hash);
- q = __lookup_mnt_last(&child->mnt_parent->mnt,
- child->mnt_mountpoint);
- commit_tree(child, q);
+ q = __lookup_mnt(&child->mnt_parent->mnt,
+ child->mnt_mountpoint);
+ if (q)
+ mnt_change_mountpoint(child, smp, q);
+ commit_tree(child);
}
+ put_mountpoint(smp);
unlock_mount_hash();
return 0;
@@ -1970,6 +1975,10 @@ static int attach_recursive_mnt(struct mount *source_mnt,
unlock_mount_hash();
cleanup_group_ids(source_mnt, NULL);
out:
+ read_seqlock_excl(&mount_lock);
+ put_mountpoint(smp);
+ read_sequnlock_excl(&mount_lock);
+
return err;
}
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9a524e763c3e..4e3679b25b9b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2452,6 +2452,7 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
ret = PTR_ERR(state);
if (IS_ERR(state))
goto out;
+ ctx->state = state;
if (server->caps & NFS_CAP_POSIX_LOCK)
set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
@@ -2474,7 +2475,6 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
if (ret != 0)
goto out;
- ctx->state = state;
if (d_inode(dentry) == state->inode) {
nfs_inode_attach_open_context(ctx);
if (read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
@@ -4711,7 +4711,7 @@ out:
*/
static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t buflen)
{
- struct page *pages[NFS4ACL_MAXPAGES] = {NULL, };
+ struct page *pages[NFS4ACL_MAXPAGES + 1] = {NULL, };
struct nfs_getaclargs args = {
.fh = NFS_FH(inode),
.acl_pages = pages,
@@ -4725,13 +4725,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
.rpc_argp = &args,
.rpc_resp = &res,
};
- unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
+ unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE) + 1;
int ret = -ENOMEM, i;
- /* As long as we're doing a round trip to the server anyway,
- * let's be prepared for a page of acl data. */
- if (npages == 0)
- npages = 1;
if (npages > ARRAY_SIZE(pages))
return -ERANGE;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e4441216804..1cb50bb898b0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2487,7 +2487,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_compound_hdr(xdr, req, &hdr);
encode_sequence(xdr, &args->seq_args, &hdr);
encode_putfh(xdr, args->fh, &hdr);
- replen = hdr.replen + op_decode_hdr_maxsz + 1;
+ replen = hdr.replen + op_decode_hdr_maxsz;
encode_getattr_two(xdr, FATTR4_WORD0_ACL, 0, &hdr);
xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66fbb446..91e0c5429b4d 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -369,7 +369,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
__be32 err;
int host_err;
bool get_write_count;
- int size_change = 0;
+ bool size_change = (iap->ia_valid & ATTR_SIZE);
if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
@@ -382,11 +382,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
/* Get inode */
err = fh_verify(rqstp, fhp, ftype, accmode);
if (err)
- goto out;
+ return err;
if (get_write_count) {
host_err = fh_want_write(fhp);
if (host_err)
- return nfserrno(host_err);
+ goto out;
}
dentry = fhp->fh_dentry;
@@ -397,20 +397,28 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~ATTR_MODE;
if (!iap->ia_valid)
- goto out;
+ return 0;
nfsd_sanitize_attrs(inode, iap);
+ if (check_guard && guardtime != inode->i_ctime.tv_sec)
+ return nfserr_notsync;
+
/*
* The size case is special, it changes the file in addition to the
- * attributes.
+ * attributes, and file systems don't expect it to be mixed with
+ * "random" attribute changes. We thus split out the size change
+ * into a separate call to ->setattr, and do the rest as a separate
+ * setattr call.
*/
- if (iap->ia_valid & ATTR_SIZE) {
+ if (size_change) {
err = nfsd_get_write_access(rqstp, fhp, iap);
if (err)
- goto out;
- size_change = 1;
+ return err;
+ }
+ fh_lock(fhp);
+ if (size_change) {
/*
* RFC5661, Section 18.30.4:
* Changing the size of a file with SETATTR indirectly
@@ -418,29 +426,36 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
*
* (and similar for the older RFCs)
*/
- if (iap->ia_size != i_size_read(inode))
- iap->ia_valid |= ATTR_MTIME;
- }
+ struct iattr size_attr = {
+ .ia_valid = ATTR_SIZE | ATTR_CTIME | ATTR_MTIME,
+ .ia_size = iap->ia_size,
+ };
- iap->ia_valid |= ATTR_CTIME;
+ host_err = notify_change(dentry, &size_attr, NULL);
+ if (host_err)
+ goto out_unlock;
+ iap->ia_valid &= ~ATTR_SIZE;
- if (check_guard && guardtime != inode->i_ctime.tv_sec) {
- err = nfserr_notsync;
- goto out_put_write_access;
+ /*
+ * Avoid the additional setattr call below if the only other
+ * attribute that the client sends is the mtime, as we update
+ * it as part of the size change above.
+ */
+ if ((iap->ia_valid & ~ATTR_MTIME) == 0)
+ goto out_unlock;
}
- fh_lock(fhp);
+ iap->ia_valid |= ATTR_CTIME;
host_err = notify_change(dentry, iap, NULL);
- fh_unlock(fhp);
- err = nfserrno(host_err);
-out_put_write_access:
+out_unlock:
+ fh_unlock(fhp);
if (size_change)
put_write_access(inode);
- if (!err)
- err = nfserrno(commit_metadata(fhp));
out:
- return err;
+ if (!host_err)
+ host_err = commit_metadata(fhp);
+ return nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
diff --git a/fs/pnode.c b/fs/pnode.c
index 99899705b105..b9f2af59b9a6 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -324,6 +324,21 @@ out:
return ret;
}
+static struct mount *find_topper(struct mount *mnt)
+{
+ /* If there is exactly one mount covering mnt completely return it. */
+ struct mount *child;
+
+ if (!list_is_singular(&mnt->mnt_mounts))
+ return NULL;
+
+ child = list_first_entry(&mnt->mnt_mounts, struct mount, mnt_child);
+ if (child->mnt_mountpoint != mnt->mnt.mnt_root)
+ return NULL;
+
+ return child;
+}
+
/*
* return true if the refcount is greater than count
*/
@@ -344,9 +359,8 @@ static inline int do_refcount_check(struct mount *mnt, int count)
*/
int propagate_mount_busy(struct mount *mnt, int refcnt)
{
- struct mount *m, *child;
+ struct mount *m, *child, *topper;
struct mount *parent = mnt->mnt_parent;
- int ret = 0;
if (mnt == parent)
return do_refcount_check(mnt, refcnt);
@@ -361,12 +375,24 @@ int propagate_mount_busy(struct mount *mnt, int refcnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
- if (child && list_empty(&child->mnt_mounts) &&
- (ret = do_refcount_check(child, 1)))
- break;
+ int count = 1;
+ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
+ if (!child)
+ continue;
+
+ /* Is there exactly one mount on the child that covers
+ * it completely whose reference should be ignored?
+ */
+ topper = find_topper(child);
+ if (topper)
+ count += 1;
+ else if (!list_empty(&child->mnt_mounts))
+ continue;
+
+ if (do_refcount_check(child, count))
+ return 1;
}
- return ret;
+ return 0;
}
/*
@@ -383,7 +409,7 @@ void propagate_mount_unlock(struct mount *mnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- child = __lookup_mnt_last(&m->mnt, mnt->mnt_mountpoint);
+ child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
if (child)
child->mnt.mnt_flags &= ~MNT_LOCKED;
}
@@ -401,9 +427,11 @@ static void mark_umount_candidates(struct mount *mnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
- struct mount *child = __lookup_mnt_last(&m->mnt,
+ struct mount *child = __lookup_mnt(&m->mnt,
mnt->mnt_mountpoint);
- if (child && (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m))) {
+ if (!child || (child->mnt.mnt_flags & MNT_UMOUNT))
+ continue;
+ if (!IS_MNT_LOCKED(child) || IS_MNT_MARKED(m)) {
SET_MNT_MARK(child);
}
}
@@ -422,8 +450,8 @@ static void __propagate_umount(struct mount *mnt)
for (m = propagation_next(parent, parent); m;
m = propagation_next(m, parent)) {
-
- struct mount *child = __lookup_mnt_last(&m->mnt,
+ struct mount *topper;
+ struct mount *child = __lookup_mnt(&m->mnt,
mnt->mnt_mountpoint);
/*
* umount the child only if the child has no children
@@ -432,6 +460,15 @@ static void __propagate_umount(struct mount *mnt)
if (!child || !IS_MNT_MARKED(child))
continue;
CLEAR_MNT_MARK(child);
+
+ /* If there is exactly one mount covering all of child
+ * replace child with that mount.
+ */
+ topper = find_topper(child);
+ if (topper)
+ mnt_change_mountpoint(child->mnt_parent, child->mnt_mp,
+ topper);
+
if (list_empty(&child->mnt_mounts)) {
list_del_init(&child->mnt_child);
child->mnt.mnt_flags |= MNT_UMOUNT;
diff --git a/fs/pnode.h b/fs/pnode.h
index 0fcdbe7ca648..623f01772bec 100644
--- a/fs/pnode.h
+++ b/fs/pnode.h
@@ -49,6 +49,8 @@ int get_dominating_id(struct mount *mnt, const struct path *root);
unsigned int mnt_get_count(struct mount *mnt);
void mnt_set_mountpoint(struct mount *, struct mountpoint *,
struct mount *);
+void mnt_change_mountpoint(struct mount *parent, struct mountpoint *mp,
+ struct mount *mnt);
struct mount *copy_tree(struct mount *, struct dentry *, int);
bool is_path_reachable(struct mount *, struct dentry *,
const struct path *root);