aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c16
-rw-r--r--fs/attr.c13
-rw-r--r--fs/binfmt_elf.c30
-rw-r--r--fs/bio-integrity.c14
-rw-r--r--fs/bio.c4
-rw-r--r--fs/btrfs/acl.c2
-rw-r--r--fs/btrfs/backref.c32
-rw-r--r--fs/btrfs/backref.h4
-rw-r--r--fs/btrfs/compression.c2
-rw-r--r--fs/btrfs/delayed-inode.c8
-rw-r--r--fs/btrfs/disk-io.c15
-rw-r--r--fs/btrfs/extent-tree.c15
-rw-r--r--fs/btrfs/extent_io.c5
-rw-r--r--fs/btrfs/extent_map.c2
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/free-space-cache.c4
-rw-r--r--fs/btrfs/inode.c62
-rw-r--r--fs/btrfs/ioctl.c9
-rw-r--r--fs/btrfs/relocation.c19
-rw-r--r--fs/btrfs/scrub.c5
-rw-r--r--fs/btrfs/send.c11
-rw-r--r--fs/btrfs/transaction.c12
-rw-r--r--fs/btrfs/tree-log.c2
-rw-r--r--fs/btrfs/volumes.c34
-rw-r--r--fs/buffer.c20
-rw-r--r--fs/ceph/addr.c8
-rw-r--r--fs/ceph/file.c47
-rw-r--r--fs/ceph/ioctl.c8
-rw-r--r--fs/ceph/mds_client.c15
-rw-r--r--fs/ceph/mdsmap.c4
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/cifs/cifs_unicode.c7
-rw-r--r--fs/cifs/cifsacl.c28
-rw-r--r--fs/cifs/cifsglob.h17
-rw-r--r--fs/cifs/cifssmb.c8
-rw-r--r--fs/cifs/dir.c1
-rw-r--r--fs/cifs/file.c43
-rw-r--r--fs/cifs/inode.c24
-rw-r--r--fs/cifs/readdir.c6
-rw-r--r--fs/cifs/smb1ops.c15
-rw-r--r--fs/cifs/smb2file.c2
-rw-r--r--fs/cifs/smb2glob.h3
-rw-r--r--fs/cifs/smb2inode.c2
-rw-r--r--fs/cifs/smb2maperror.c4
-rw-r--r--fs/cifs/smb2ops.c24
-rw-r--r--fs/cifs/smb2pdu.c17
-rw-r--r--fs/cifs/xattr.c64
-rw-r--r--fs/compat_binfmt_elf.c5
-rw-r--r--fs/configfs/dir.c16
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dcache.c23
-rw-r--r--fs/dcookies.c2
-rw-r--r--fs/devpts/inode.c1
-rw-r--r--fs/ecryptfs/crypto.c1
-rw-r--r--fs/ecryptfs/file.c12
-rw-r--r--fs/ecryptfs/inode.c2
-rw-r--r--fs/ecryptfs/keystore.c3
-rw-r--r--fs/ecryptfs/main.c16
-rw-r--r--fs/exec.c12
-rw-r--r--fs/exofs/ore.c37
-rw-r--r--fs/ext2/inode.c2
-rw-r--r--fs/ext2/super.c1
-rw-r--r--fs/ext2/xip.c1
-rw-r--r--fs/ext3/super.c7
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/ext4_jbd2.c9
-rw-r--r--fs/ext4/extents.c72
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/ialloc.c18
-rw-r--r--fs/ext4/indirect.c20
-rw-r--r--fs/ext4/inline.c8
-rw-r--r--fs/ext4/inode.c55
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/mballoc.c49
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/ext4/page-io.c24
-rw-r--r--fs/ext4/resize.c36
-rw-r--r--fs/ext4/super.c59
-rw-r--r--fs/ext4/xattr.c58
-rw-r--r--fs/file.c2
-rw-r--r--fs/file_table.c4
-rw-r--r--fs/fs-writeback.c75
-rw-r--r--fs/fuse/dev.c22
-rw-r--r--fs/fuse/file.c23
-rw-r--r--fs/fuse/inode.c20
-rw-r--r--fs/gfs2/aops.c30
-rw-r--r--fs/gfs2/inode.c16
-rw-r--r--fs/gfs2/ops_fstype.c12
-rw-r--r--fs/hugetlbfs/inode.c8
-rw-r--r--fs/inode.c10
-rw-r--r--fs/ioprio.c14
-rw-r--r--fs/isofs/inode.c15
-rw-r--r--fs/isofs/isofs.h23
-rw-r--r--fs/isofs/rock.c48
-rw-r--r--fs/jbd2/recovery.c8
-rw-r--r--fs/jbd2/transaction.c11
-rw-r--r--fs/jffs2/compr_rtime.c4
-rw-r--r--fs/jffs2/jffs2_fs_sb.h2
-rw-r--r--fs/jffs2/nodelist.h2
-rw-r--r--fs/jffs2/nodemgmt.c14
-rw-r--r--fs/jffs2/wbuf.c17
-rw-r--r--fs/jfs/jfs_inode.c3
-rw-r--r--fs/lockd/mon.c6
-rw-r--r--fs/lockd/svc.c5
-rw-r--r--fs/lockd/svclock.c8
-rw-r--r--fs/locks.c7
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namei.c17
-rw-r--r--fs/namespace.c73
-rw-r--r--fs/ncpfs/ioctl.c1
-rw-r--r--fs/nfs/blocklayout/extents.c2
-rw-r--r--fs/nfs/delegation.c36
-rw-r--r--fs/nfs/delegation.h1
-rw-r--r--fs/nfs/direct.c1
-rw-r--r--fs/nfs/inode.c28
-rw-r--r--fs/nfs/nfs3acl.c5
-rw-r--r--fs/nfs/nfs4client.c56
-rw-r--r--fs/nfs/nfs4filelayout.c2
-rw-r--r--fs/nfs/nfs4filelayoutdev.c18
-rw-r--r--fs/nfs/nfs4proc.c143
-rw-r--r--fs/nfs/nfs4renewd.c12
-rw-r--r--fs/nfs/nfs4state.c17
-rw-r--r--fs/nfs/nfs4xdr.c47
-rw-r--r--fs/nfsd/export.c15
-rw-r--r--fs/nfsd/nfs4acl.c17
-rw-r--r--fs/nfsd/nfs4callback.c15
-rw-r--r--fs/nfsd/nfs4proc.c21
-rw-r--r--fs/nfsd/nfs4state.c99
-rw-r--r--fs/nfsd/nfs4xdr.c25
-rw-r--r--fs/nfsd/nfscache.c46
-rw-r--r--fs/nfsd/nfsctl.c5
-rw-r--r--fs/nfsd/nfssvc.c5
-rw-r--r--fs/nfsd/vfs.c183
-rw-r--r--fs/nilfs2/inode.c39
-rw-r--r--fs/nilfs2/namei.c15
-rw-r--r--fs/nilfs2/page.c2
-rw-r--r--fs/nilfs2/segment.c21
-rw-r--r--fs/notify/fanotify/fanotify_user.c6
-rw-r--r--fs/notify/fdinfo.c4
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c18
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c29
-rw-r--r--fs/ocfs2/file.c8
-rw-r--r--fs/ocfs2/quota_global.c27
-rw-r--r--fs/ocfs2/quota_local.c4
-rw-r--r--fs/open.c26
-rw-r--r--fs/pipe.c39
-rw-r--r--fs/posix_acl.c6
-rw-r--r--fs/proc/array.c11
-rw-r--r--fs/proc/base.c54
-rw-r--r--fs/proc/page.c2
-rw-r--r--fs/pstore/inode.c4
-rw-r--r--fs/pstore/ram.c13
-rw-r--r--fs/pstore/ram_core.c31
-rw-r--r--fs/quota/dquot.c16
-rw-r--r--fs/read_write.c16
-rw-r--r--fs/reiserfs/dir.c6
-rw-r--r--fs/reiserfs/inode.c8
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/splice.c18
-rw-r--r--fs/statfs.c2
-rw-r--r--fs/super.c2
-rw-r--r--fs/sysv/super.c1
-rw-r--r--fs/ubifs/commit.c10
-rw-r--r--fs/ubifs/file.c3
-rw-r--r--fs/ubifs/log.c19
-rw-r--r--fs/ubifs/master.c7
-rw-r--r--fs/ubifs/shrinker.c1
-rw-r--r--fs/ubifs/super.c1
-rw-r--r--fs/ubifs/ubifs.h2
-rw-r--r--fs/udf/inode.c35
-rw-r--r--fs/udf/super.c342
-rw-r--r--fs/udf/symlink.c17
-rw-r--r--fs/xfs/xfs_aops.c61
-rw-r--r--fs/xfs/xfs_da_btree.c7
-rw-r--r--fs/xfs/xfs_dquot.c3
-rw-r--r--fs/xfs/xfs_file.c21
-rw-r--r--fs/xfs/xfs_fsops.c6
-rw-r--r--fs/xfs/xfs_ioctl.c9
-rw-r--r--fs/xfs/xfs_ioctl32.c3
-rw-r--r--fs/xfs/xfs_qm.c8
182 files changed, 2403 insertions, 1181 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 2bbcacf74d0c..ded94c4fa30d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -310,7 +310,6 @@ static void free_ioctx(struct kioctx *ctx)
avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
- atomic_sub(avail, &ctx->reqs_active);
head += avail;
head %= ctx->nr_events;
}
@@ -423,10 +422,12 @@ static void kill_ioctx_rcu(struct rcu_head *head)
* when the processes owning a context have all exited to encourage
* the rapid destruction of the kioctx.
*/
-static void kill_ioctx(struct kioctx *ctx)
+static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
{
if (!atomic_xchg(&ctx->dead, 1)) {
+ spin_lock(&mm->ioctx_lock);
hlist_del_rcu(&ctx->list);
+ spin_unlock(&mm->ioctx_lock);
/*
* It'd be more correct to do this in free_ioctx(), after all
@@ -494,7 +495,7 @@ void exit_aio(struct mm_struct *mm)
*/
ctx->mmap_size = 0;
- kill_ioctx(ctx);
+ kill_ioctx(mm, ctx);
}
}
@@ -676,6 +677,7 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
put_rq:
/* everything turned out well, dispose of the aiocb. */
aio_put_req(iocb);
+ atomic_dec(&ctx->reqs_active);
/*
* We have to order our ring_info tail store above and test
@@ -715,6 +717,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
if (head == ctx->tail)
goto out;
+ head %= ctx->nr_events;
+
while (ret < nr) {
long avail;
struct io_event *ev;
@@ -753,8 +757,6 @@ static long aio_read_events_ring(struct kioctx *ctx,
flush_dcache_page(ctx->ring_pages[0]);
pr_debug("%li h%u t%u\n", ret, head, ctx->tail);
-
- atomic_sub(ret, &ctx->reqs_active);
out:
mutex_unlock(&ctx->ring_lock);
@@ -852,7 +854,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
if (ret)
- kill_ioctx(ioctx);
+ kill_ioctx(current->mm, ioctx);
put_ioctx(ioctx);
}
@@ -870,7 +872,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
{
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
- kill_ioctx(ioctx);
+ kill_ioctx(current->mm, ioctx);
put_ioctx(ioctx);
return 0;
}
diff --git a/fs/attr.c b/fs/attr.c
index 1449adb14ef6..66fa6251c398 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -50,14 +50,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
if ((ia_valid & ATTR_UID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
!uid_eq(attr->ia_uid, inode->i_uid)) &&
- !inode_capable(inode, CAP_CHOWN))
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
(!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
- !inode_capable(inode, CAP_CHOWN))
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
/* Make sure a caller can chmod. */
@@ -67,7 +67,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
/* Also check the setgid bit! */
if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
inode->i_gid) &&
- !inode_capable(inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
@@ -160,7 +160,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
umode_t mode = attr->ia_mode;
if (!in_group_p(inode->i_gid) &&
- !inode_capable(inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
mode &= ~S_ISGID;
inode->i_mode = mode;
}
@@ -182,11 +182,6 @@ int notify_change(struct dentry * dentry, struct iattr * attr)
return -EPERM;
}
- if ((ia_valid & ATTR_SIZE) && IS_I_VERSION(inode)) {
- if (attr->ia_size != inode->i_size)
- inode_inc_iversion(inode);
- }
-
if ((ia_valid & ATTR_MODE)) {
umode_t amode = attr->ia_mode;
/* Flag setting protected by i_mutex */
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index f8a0b0efda44..3aac8e9edac3 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1415,7 +1415,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata,
* long file_ofs
* followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL...
*/
-static void fill_files_note(struct memelfnote *note)
+static int fill_files_note(struct memelfnote *note)
{
struct vm_area_struct *vma;
unsigned count, size, names_ofs, remaining, n;
@@ -1430,11 +1430,11 @@ static void fill_files_note(struct memelfnote *note)
names_ofs = (2 + 3 * count) * sizeof(data[0]);
alloc:
if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */
- goto err;
+ return -EINVAL;
size = round_up(size, PAGE_SIZE);
data = vmalloc(size);
if (!data)
- goto err;
+ return -ENOMEM;
start_end_ofs = data + 2;
name_base = name_curpos = ((char *)data) + names_ofs;
@@ -1487,7 +1487,7 @@ static void fill_files_note(struct memelfnote *note)
size = name_curpos - (char *)data;
fill_note(note, "CORE", NT_FILE, size, data);
- err: ;
+ return 0;
}
#ifdef CORE_DUMP_USE_REGSET
@@ -1688,8 +1688,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_auxv_note(&info->auxv, current->mm);
info->size += notesize(&info->auxv);
- fill_files_note(&info->files);
- info->size += notesize(&info->files);
+ if (fill_files_note(&info->files) == 0)
+ info->size += notesize(&info->files);
return 1;
}
@@ -1721,7 +1721,8 @@ static int write_note_info(struct elf_note_info *info,
return 0;
if (first && !writenote(&info->auxv, file, foffset))
return 0;
- if (first && !writenote(&info->files, file, foffset))
+ if (first && info->files.data &&
+ !writenote(&info->files, file, foffset))
return 0;
for (i = 1; i < info->thread_notes; ++i)
@@ -1808,6 +1809,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
struct elf_note_info {
struct memelfnote *notes;
+ struct memelfnote *notes_files;
struct elf_prstatus *prstatus; /* NT_PRSTATUS */
struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */
struct list_head thread_list;
@@ -1898,9 +1900,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs,
fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo);
fill_auxv_note(info->notes + 3, current->mm);
- fill_files_note(info->notes + 4);
+ info->numnote = 4;
- info->numnote = 5;
+ if (fill_files_note(info->notes + info->numnote) == 0) {
+ info->notes_files = info->notes + info->numnote;
+ info->numnote++;
+ }
/* Try to dump the FPU. */
info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
@@ -1962,8 +1967,9 @@ static void free_note_info(struct elf_note_info *info)
kfree(list_entry(tmp, struct elf_thread_status, list));
}
- /* Free data allocated by fill_files_note(): */
- vfree(info->notes[4].data);
+ /* Free data possibly allocated by fill_files_note(): */
+ if (info->notes_files)
+ vfree(info->notes_files->data);
kfree(info->prstatus);
kfree(info->psinfo);
@@ -2046,7 +2052,7 @@ static int elf_core_dump(struct coredump_params *cprm)
struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff, foffset;
- struct elf_note_info info;
+ struct elf_note_info info = { };
struct elf_phdr *phdr4note = NULL;
struct elf_shdr *shdr4extnum = NULL;
Elf_Half e_phnum;
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 8fb42916d8a2..433c3b828e1d 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio)
}
EXPORT_SYMBOL(bio_integrity_free);
+static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip)
+{
+ if (bip->bip_slab == BIO_POOL_NONE)
+ return BIP_INLINE_VECS;
+
+ return bvec_nr_vecs(bip->bip_slab);
+}
+
/**
* bio_integrity_add_page - Attach integrity metadata
* @bio: bio to update
@@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
struct bio_integrity_payload *bip = bio->bi_integrity;
struct bio_vec *iv;
- if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) {
+ if (bip->bip_vcnt >= bip_integrity_vecs(bip)) {
printk(KERN_ERR "%s: bip_vec full\n", __func__);
return 0;
}
@@ -450,7 +458,7 @@ static int bio_integrity_verify(struct bio *bio)
bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
bix.sector_size = bi->sector_size;
- bio_for_each_segment(bv, bio, i) {
+ bio_for_each_segment_all(bv, bio, i) {
void *kaddr = kmap_atomic(bv->bv_page);
bix.data_buf = kaddr + bv->bv_offset;
bix.data_size = bv->bv_len;
@@ -734,7 +742,7 @@ void bioset_integrity_free(struct bio_set *bs)
mempool_destroy(bs->bio_integrity_pool);
if (bs->bvec_integrity_pool)
- mempool_destroy(bs->bio_integrity_pool);
+ mempool_destroy(bs->bvec_integrity_pool);
}
EXPORT_SYMBOL(bioset_integrity_free);
diff --git a/fs/bio.c b/fs/bio.c
index c5eae7251490..5e7507d79297 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src)
src_p = kmap_atomic(src_bv->bv_page);
dst_p = kmap_atomic(dst_bv->bv_page);
- memcpy(dst_p + dst_bv->bv_offset,
- src_p + src_bv->bv_offset,
+ memcpy(dst_p + dst_offset,
+ src_p + src_offset,
bytes);
kunmap_atomic(dst_p);
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index e15d2b0d8d3b..0890c83643e9 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans,
if (ret > 0) {
/* we need an acl */
ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS);
- } else {
+ } else if (ret < 0) {
cache_no_acl(inode);
}
} else {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 290e347b6db3..d85f90c92bb4 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1347,9 +1347,10 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
* returns <0 on error
*/
static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- struct btrfs_extent_inline_ref **out_eiref,
- int *out_type)
+ struct btrfs_key *key,
+ struct btrfs_extent_item *ei, u32 item_size,
+ struct btrfs_extent_inline_ref **out_eiref,
+ int *out_type)
{
unsigned long end;
u64 flags;
@@ -1359,19 +1360,26 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
/* first call */
flags = btrfs_extent_flags(eb, ei);
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
- info = (struct btrfs_tree_block_info *)(ei + 1);
- *out_eiref =
- (struct btrfs_extent_inline_ref *)(info + 1);
+ if (key->type == BTRFS_METADATA_ITEM_KEY) {
+ /* a skinny metadata extent */
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(ei + 1);
+ } else {
+ WARN_ON(key->type != BTRFS_EXTENT_ITEM_KEY);
+ info = (struct btrfs_tree_block_info *)(ei + 1);
+ *out_eiref =
+ (struct btrfs_extent_inline_ref *)(info + 1);
+ }
} else {
*out_eiref = (struct btrfs_extent_inline_ref *)(ei + 1);
}
*ptr = (unsigned long)*out_eiref;
- if ((void *)*ptr >= (void *)ei + item_size)
+ if ((unsigned long)(*ptr) >= (unsigned long)ei + item_size)
return -ENOENT;
}
end = (unsigned long)ei + item_size;
- *out_eiref = (struct btrfs_extent_inline_ref *)*ptr;
+ *out_eiref = (struct btrfs_extent_inline_ref *)(*ptr);
*out_type = btrfs_extent_inline_ref_type(eb, *out_eiref);
*ptr += btrfs_extent_inline_ref_size(*out_type);
@@ -1390,8 +1398,8 @@ static int __get_extent_inline_ref(unsigned long *ptr, struct extent_buffer *eb,
* <0 on error.
*/
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level)
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level)
{
int ret;
int type;
@@ -1402,8 +1410,8 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
return 1;
while (1) {
- ret = __get_extent_inline_ref(ptr, eb, ei, item_size,
- &eiref, &type);
+ ret = __get_extent_inline_ref(ptr, eb, key, ei, item_size,
+ &eiref, &type);
if (ret < 0)
return ret;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 0f446d7ca2c0..526d09e70c93 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -42,8 +42,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
u64 *flags);
int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb,
- struct btrfs_extent_item *ei, u32 item_size,
- u64 *out_root, u8 *out_level);
+ struct btrfs_key *key, struct btrfs_extent_item *ei,
+ u32 item_size, u64 *out_root, u8 *out_level);
int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
u64 extent_item_objectid,
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index b189bd1e7a3e..ce7067881d36 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -1009,6 +1009,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
+ if (*pg_index == (vcnt - 1) && *pg_offset == 0)
+ memset(kaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
kunmap_atomic(kaddr);
flush_dcache_page(page_out);
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index f26f38ccd194..019fc5a68a14 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1843,6 +1843,14 @@ int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
struct btrfs_delayed_node *delayed_node;
int ret = 0;
+ /*
+ * we don't do delayed inode updates during log recovery because it
+ * leads to enospc problems. This means we also can't do
+ * delayed inode refs
+ */
+ if (BTRFS_I(inode)->root->fs_info->log_root_recovering)
+ return -EAGAIN;
+
delayed_node = btrfs_get_or_create_delayed_node(inode);
if (IS_ERR(delayed_node))
return PTR_ERR(delayed_node);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index b8b60b660c8f..7360f03ddbe1 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3161,6 +3161,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
/* send down all the barriers */
head = &info->fs_devices->devices;
list_for_each_entry_rcu(dev, head, dev_list) {
+ if (dev->missing)
+ continue;
if (!dev->bdev) {
errors_send++;
continue;
@@ -3175,6 +3177,8 @@ static int barrier_all_devices(struct btrfs_fs_info *info)
/* wait for all the barriers */
list_for_each_entry_rcu(dev, head, dev_list) {
+ if (dev->missing)
+ continue;
if (!dev->bdev) {
errors_wait++;
continue;
@@ -3514,6 +3518,11 @@ int close_ctree(struct btrfs_root *root)
btrfs_free_block_groups(fs_info);
+ /*
+ * we must make sure there is not any read request to
+ * submit after we stopping all workers.
+ */
+ invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
del_fs_roots(fs_info);
@@ -3848,12 +3857,6 @@ again:
if (ret)
break;
- /* opt_discard */
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_error_discard_extent(root, start,
- end + 1 - start,
- NULL);
-
clear_extent_dirty(unpin, start, end, GFP_NOFS);
btrfs_error_unpin_extent_range(root, start, end);
cond_resched();
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b272d068337..f99c71e40f8b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2402,6 +2402,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
default:
WARN_ON(1);
}
+ } else {
+ list_del_init(&locked_ref->cluster);
}
spin_unlock(&delayed_refs->lock);
@@ -2424,7 +2426,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
* list before we release it.
*/
if (btrfs_delayed_ref_is_head(ref)) {
- list_del_init(&locked_ref->cluster);
btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
}
@@ -5276,7 +5277,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
update_global_block_rsv(fs_info);
}
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+ const bool return_free_space)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *cache = NULL;
@@ -5300,7 +5302,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
if (start < cache->last_byte_to_unpin) {
len = min(len, cache->last_byte_to_unpin - start);
- btrfs_add_free_space(cache, start, len);
+ if (return_free_space)
+ btrfs_add_free_space(cache, start, len);
}
start += len;
@@ -5363,7 +5366,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
end + 1 - start, NULL);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
- unpin_extent_range(root, start, end);
+ unpin_extent_range(root, start, end, true);
cond_resched();
}
@@ -7490,7 +7493,7 @@ out:
*/
if (root_dropped == false)
btrfs_add_dead_root(root);
- if (err)
+ if (err && err != -EAGAIN)
btrfs_std_error(root->fs_info, err);
return err;
}
@@ -8563,7 +8566,7 @@ out:
int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
{
- return unpin_extent_range(root, start, end);
+ return unpin_extent_range(root, start, end, false);
}
int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index e7e7afb4a872..84ceff6abbc1 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1624,6 +1624,7 @@ again:
* shortening the size of the delalloc range we're searching
*/
free_extent_state(cached_state);
+ cached_state = NULL;
if (!loops) {
unsigned long offset = (*start) & (PAGE_CACHE_SIZE - 1);
max_bytes = PAGE_CACHE_SIZE - offset;
@@ -2356,7 +2357,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
{
int uptodate = (err == 0);
struct extent_io_tree *tree;
- int ret;
+ int ret = 0;
tree = &BTRFS_I(page->mapping->host)->io_tree;
@@ -2370,6 +2371,8 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end)
if (!uptodate) {
ClearPageUptodate(page);
SetPageError(page);
+ ret = ret < 0 ? ret : -EIO;
+ mapping_set_error(page->mapping, ret);
}
return 0;
}
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index a4a7a1a8da95..0a3809500599 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -263,8 +263,6 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
if (!em)
goto out;
- if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
- list_move(&em->list, &tree->modified_extents);
em->generation = gen;
clear_bit(EXTENT_FLAG_PINNED, &em->flags);
em->mod_start = em->start;
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index b193bf324a41..e4bcfec7787e 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -403,7 +403,7 @@ int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
ret = 0;
fail:
while (ret < 0 && !list_empty(&tmplist)) {
- sums = list_entry(&tmplist, struct btrfs_ordered_sum, list);
+ sums = list_entry(tmplist.next, struct btrfs_ordered_sum, list);
list_del(&sums->list);
kfree(sums);
}
@@ -754,7 +754,7 @@ again:
found_next = 1;
if (ret != 0)
goto insert;
- slot = 0;
+ slot = path->slots[0];
}
btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e53009657f0e..0cbe95dc8113 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -835,7 +835,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
if (!matched) {
__btrfs_remove_free_space_cache(ctl);
- btrfs_err(fs_info, "block group %llu has wrong amount of free space",
+ btrfs_warn(fs_info, "block group %llu has wrong amount of free space",
block_group->key.objectid);
ret = -1;
}
@@ -847,7 +847,7 @@ out:
spin_unlock(&block_group->lock);
ret = 0;
- btrfs_err(fs_info, "failed to load free space cache for block group %llu",
+ btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now",
block_group->key.objectid);
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 17f3064b4a3e..187911fbabce 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2419,10 +2419,23 @@ out_unlock:
return ret;
}
+static void free_sa_defrag_extent(struct new_sa_defrag_extent *new)
+{
+ struct old_sa_defrag_extent *old, *tmp;
+
+ if (!new)
+ return;
+
+ list_for_each_entry_safe(old, tmp, &new->head, list) {
+ list_del(&old->list);
+ kfree(old);
+ }
+ kfree(new);
+}
+
static void relink_file_extents(struct new_sa_defrag_extent *new)
{
struct btrfs_path *path;
- struct old_sa_defrag_extent *old, *tmp;
struct sa_defrag_extent_backref *backref;
struct sa_defrag_extent_backref *prev = NULL;
struct inode *inode;
@@ -2465,16 +2478,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)
kfree(prev);
btrfs_free_path(path);
-
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out:
+ free_sa_defrag_extent(new);
+
atomic_dec(&root->fs_info->defrag_running);
wake_up(&root->fs_info->transaction_wait);
-
- kfree(new);
}
static struct new_sa_defrag_extent *
@@ -2484,7 +2492,7 @@ record_old_file_extents(struct inode *inode,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
struct btrfs_key key;
- struct old_sa_defrag_extent *old, *tmp;
+ struct old_sa_defrag_extent *old;
struct new_sa_defrag_extent *new;
int ret;
@@ -2532,7 +2540,7 @@ record_old_file_extents(struct inode *inode,
if (slot >= btrfs_header_nritems(l)) {
ret = btrfs_next_leaf(root, path);
if (ret < 0)
- goto out_free_list;
+ goto out_free_path;
else if (ret > 0)
break;
continue;
@@ -2561,7 +2569,7 @@ record_old_file_extents(struct inode *inode,
old = kmalloc(sizeof(*old), GFP_NOFS);
if (!old)
- goto out_free_list;
+ goto out_free_path;
offset = max(new->file_pos, key.offset);
end = min(new->file_pos + new->len, key.offset + num_bytes);
@@ -2583,15 +2591,10 @@ next:
return new;
-out_free_list:
- list_for_each_entry_safe(old, tmp, &new->head, list) {
- list_del(&old->list);
- kfree(old);
- }
out_free_path:
btrfs_free_path(path);
out_kfree:
- kfree(new);
+ free_sa_defrag_extent(new);
return NULL;
}
@@ -2652,7 +2655,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
EXTENT_DEFRAG, 1, cached_state);
if (ret) {
u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item);
- if (last_snapshot >= BTRFS_I(inode)->generation)
+ if (0 && last_snapshot >= BTRFS_I(inode)->generation)
/* the inode is shared */
new = record_old_file_extents(inode, ordered_extent);
@@ -2743,8 +2746,14 @@ out:
btrfs_remove_ordered_extent(inode, ordered_extent);
/* for snapshot-aware defrag */
- if (new)
- relink_file_extents(new);
+ if (new) {
+ if (ret) {
+ free_sa_defrag_extent(new);
+ atomic_dec(&root->fs_info->defrag_running);
+ } else {
+ relink_file_extents(new);
+ }
+ }
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
@@ -3536,7 +3545,8 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans,
* without delay
*/
if (!btrfs_is_free_space_inode(inode)
- && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) {
+ && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+ && !root->fs_info->log_root_recovering) {
btrfs_update_root_times(trans, root);
ret = btrfs_delayed_update_inode(trans, root, inode);
@@ -4518,8 +4528,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
* these flags set. For all other operations the VFS set these flags
* explicitly if it wants a timestamp update.
*/
- if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
- inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
+ if (newsize != oldsize) {
+ inode_inc_iversion(inode);
+ if (!(mask & (ATTR_CTIME | ATTR_MTIME)))
+ inode->i_ctime = inode->i_mtime =
+ current_fs_time(inode->i_sb);
+ }
if (newsize > oldsize) {
truncate_pagecache(inode, oldsize, newsize);
@@ -8146,7 +8160,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
/* check for collisions, even if the name isn't there */
- ret = btrfs_check_dir_item_collision(root, new_dir->i_ino,
+ ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,
new_dentry->d_name.name,
new_dentry->d_name.len);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8dedf4019672..783906c687b5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -1528,6 +1528,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
printk(KERN_INFO "btrfs: Snapshot src from "
"another FS\n");
ret = -EINVAL;
+ } else if (!inode_owner_or_capable(src_inode)) {
+ /*
+ * Subvolume creation is not restricted, but snapshots
+ * are limited to own subvolumes only
+ */
+ ret = -EPERM;
} else {
ret = btrfs_mksubvol(&file->f_path, name, namelen,
BTRFS_I(src_inode)->root,
@@ -2093,7 +2099,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);
if (err == -EINTR)
- goto out;
+ goto out_drop_write;
dentry = lookup_one_len(vol_args->name, parent, namelen);
if (IS_ERR(dentry)) {
err = PTR_ERR(dentry);
@@ -2235,6 +2241,7 @@ out_dput:
dput(dentry);
out_unlock_dir:
mutex_unlock(&dir->i_mutex);
+out_drop_write:
mnt_drop_write_file(file);
out:
kfree(vol_args);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 4febca4fc2de..0e7f7765b3bb 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -691,6 +691,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
int cowonly;
int ret;
int err = 0;
+ bool need_check = true;
path1 = btrfs_alloc_path();
path2 = btrfs_alloc_path();
@@ -914,6 +915,7 @@ again:
cur->bytenr);
lower = cur;
+ need_check = true;
for (; level < BTRFS_MAX_LEVEL; level++) {
if (!path2->nodes[level]) {
BUG_ON(btrfs_root_bytenr(&root->root_item) !=
@@ -957,18 +959,19 @@ again:
/*
* add the block to pending list if we
- * need check its backrefs. only block
- * at 'cur->level + 1' is added to the
- * tail of pending list. this guarantees
- * we check backrefs from lower level
- * blocks to upper level blocks.
+ * need check its backrefs, we only do this once
+ * while walking up a tree as we will catch
+ * anything else later on.
*/
- if (!upper->checked &&
- level == cur->level + 1) {
+ if (!upper->checked && need_check) {
+ need_check = false;
list_add_tail(&edge->list[UPPER],
&list);
- } else
+ } else {
+ if (upper->checked)
+ need_check = true;
INIT_LIST_HEAD(&edge->list[UPPER]);
+ }
} else {
upper = rb_entry(rb_node, struct backref_node,
rb_node);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index eb84c2db1aca..e4f69e3b78b9 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -545,8 +545,9 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
do {
- ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
- &ref_root, &ref_level);
+ ret = tree_backref_for_extent(&ptr, eb, &found_key, ei,
+ item_size, &ref_root,
+ &ref_level);
printk_in_rcu(KERN_WARNING
"btrfs: %s at logical %llu on dev %s, "
"sector %llu: metadata %s (level %d) in tree "
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index ba9690b9ae24..414c1b9eb896 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1550,6 +1550,10 @@ static int lookup_dir_item_inode(struct btrfs_root *root,
goto out;
}
btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+ if (key.type == BTRFS_ROOT_ITEM_KEY) {
+ ret = -ENOENT;
+ goto out;
+ }
*found_inode = key.objectid;
*found_type = btrfs_dir_type(path->nodes[0], di);
@@ -2524,7 +2528,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
btrfs_dir_item_key_to_cpu(eb, di, &di_key);
- if (di_key.objectid < sctx->send_progress) {
+ if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
+ di_key.objectid < sctx->send_progress) {
ret = 1;
goto out;
}
@@ -4622,8 +4627,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
}
if (!access_ok(VERIFY_READ, arg->clone_sources,
- sizeof(*arg->clone_sources *
- arg->clone_sources_count))) {
+ sizeof(*arg->clone_sources) *
+ arg->clone_sources_count)) {
ret = -EFAULT;
goto out;
}
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 0544587d74f4..1f214689fa5e 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -524,7 +524,6 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
if (transid <= root->fs_info->last_trans_committed)
goto out;
- ret = -EINVAL;
/* find specified transaction */
spin_lock(&root->fs_info->trans_lock);
list_for_each_entry(t, &root->fs_info->trans_list, list) {
@@ -540,9 +539,16 @@ int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
}
}
spin_unlock(&root->fs_info->trans_lock);
- /* The specified transaction doesn't exist */
- if (!cur_trans)
+
+ /*
+ * The specified transaction doesn't exist, or we
+ * raced with btrfs_commit_transaction
+ */
+ if (!cur_trans) {
+ if (transid > root->fs_info->last_trans_committed)
+ ret = -EINVAL;
goto out;
+ }
} else {
/* find newest transaction that is committing | committed */
spin_lock(&root->fs_info->trans_lock);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index cf68596b51fb..bca436330681 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3314,7 +3314,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
btrfs_set_token_file_extent_type(leaf, fi,
BTRFS_FILE_EXTENT_REG,
&token);
- if (em->block_start == 0)
+ if (em->block_start == EXTENT_MAP_HOLE)
skip_csum = true;
}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8bffb9174afb..7fc774639a78 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1384,6 +1384,22 @@ out:
return ret;
}
+/*
+ * Function to update ctime/mtime for a given device path.
+ * Mainly used for ctime/mtime based probe like libblkid.
+ */
+static void update_dev_time(char *path_name)
+{
+ struct file *filp;
+
+ filp = filp_open(path_name, O_RDWR, 0);
+ if (!filp)
+ return;
+ file_update_time(filp);
+ filp_close(filp, NULL);
+ return;
+}
+
static int btrfs_rm_dev_item(struct btrfs_root *root,
struct btrfs_device *device)
{
@@ -1612,11 +1628,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
struct btrfs_fs_devices *fs_devices;
fs_devices = root->fs_info->fs_devices;
while (fs_devices) {
- if (fs_devices->seed == cur_devices)
+ if (fs_devices->seed == cur_devices) {
+ fs_devices->seed = cur_devices->seed;
break;
+ }
fs_devices = fs_devices->seed;
}
- fs_devices->seed = cur_devices->seed;
cur_devices->seed = NULL;
lock_chunks(root);
__btrfs_close_devices(cur_devices);
@@ -1642,10 +1659,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
ret = 0;
- /* Notify udev that device has changed */
- if (bdev)
+ if (bdev) {
+ /* Notify udev that device has changed */
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
+ /* Update ctime/mtime for device path for libblkid */
+ update_dev_time(device_path);
+ }
+
error_brelse:
brelse(bh);
if (bdev)
@@ -1817,7 +1838,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
fs_devices->seeding = 0;
fs_devices->num_devices = 0;
fs_devices->open_devices = 0;
- fs_devices->total_devices = 0;
fs_devices->seed = seed_devices;
generate_random_uuid(fs_devices->fsid);
@@ -2089,6 +2109,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = btrfs_commit_transaction(trans, root);
}
+ /* Update ctime/mtime for libblkid */
+ update_dev_time(device_path);
return ret;
error_trans:
@@ -4248,6 +4270,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
"%Lu-%Lu\n", logical, logical+len, em->start,
em->start + em->len);
+ free_extent_map(em);
return 1;
}
@@ -4429,6 +4452,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, "
"found %Lu-%Lu\n", logical, em->start,
em->start + em->len);
+ free_extent_map(em);
return -EINVAL;
}
diff --git a/fs/buffer.c b/fs/buffer.c
index d2a4d1bb2d57..83fedaa53b55 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -620,14 +620,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
static void __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- spin_lock_irq(&mapping->tree_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
- spin_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
@@ -983,7 +985,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
bh = page_buffers(page);
if (bh->b_size == size) {
end_block = init_page_buffers(page, bdev,
- index << sizebits, size);
+ (sector_t)index << sizebits,
+ size);
goto done;
}
if (!try_to_free_buffers(page))
@@ -1004,7 +1007,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
*/
spin_lock(&inode->i_mapping->private_lock);
link_dev_buffers(page, bh);
- end_block = init_page_buffers(page, bdev, index << sizebits, size);
+ end_block = init_page_buffers(page, bdev, (sector_t)index << sizebits,
+ size);
spin_unlock(&inode->i_mapping->private_lock);
done:
ret = (block < end_block) ? 1 : -ENXIO;
@@ -2014,6 +2018,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
+ loff_t old_size = inode->i_size;
int i_size_changed = 0;
copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2033,6 +2038,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
+ if (old_size < pos)
+ pagecache_isize_extended(inode, old_size, pos);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
* makes the holding time of page lock longer. Second, it forces lock
@@ -2250,6 +2257,11 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
err = 0;
balance_dirty_pages_ratelimited(mapping);
+
+ if (unlikely(fatal_signal_pending(current))) {
+ err = -EINTR;
+ goto out;
+ }
}
/* page covers the boundary, find the boundary offset */
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 3e68ac101040..5da06f020986 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -213,9 +213,13 @@ static int readpage_nounlock(struct file *filp, struct page *page)
if (err < 0) {
SetPageError(page);
goto out;
- } else if (err < PAGE_CACHE_SIZE) {
+ } else {
+ if (err < PAGE_CACHE_SIZE) {
/* zero fill remainder of page */
- zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ } else {
+ flush_dcache_page(page);
+ }
}
SetPageUptodate(page);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 656e16907430..5de16f5ac7e9 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -313,9 +313,9 @@ static int striped_read(struct inode *inode,
{
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_inode_info *ci = ceph_inode(inode);
- u64 pos, this_len;
+ u64 pos, this_len, left;
int io_align, page_align;
- int left, pages_left;
+ int pages_left;
int read;
struct page **page_pos;
int ret;
@@ -346,47 +346,40 @@ more:
ret = 0;
hit_stripe = this_len < left;
was_short = ret >= 0 && ret < this_len;
- dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read,
+ dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read,
ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : "");
- if (ret > 0) {
- int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
-
- if (read < pos - off) {
- dout(" zero gap %llu to %llu\n", off + read, pos);
- ceph_zero_page_vector_range(page_align + read,
- pos - off - read, pages);
+ if (ret >= 0) {
+ int didpages;
+ if (was_short && (pos + ret < inode->i_size)) {
+ u64 tmp = min(this_len - ret,
+ inode->i_size - pos - ret);
+ dout(" zero gap %llu to %llu\n",
+ pos + ret, pos + ret + tmp);
+ ceph_zero_page_vector_range(page_align + read + ret,
+ tmp, pages);
+ ret += tmp;
}
+
+ didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
pos += ret;
read = pos - off;
left -= ret;
page_pos += didpages;
pages_left -= didpages;
- /* hit stripe? */
- if (left && hit_stripe)
+ /* hit stripe and need continue*/
+ if (left && hit_stripe && pos < inode->i_size)
goto more;
}
- if (was_short) {
+ if (read > 0) {
+ ret = read;
/* did we bounce off eof? */
if (pos + left > inode->i_size)
*checkeof = 1;
-
- /* zero trailing bytes (inside i_size) */
- if (left > 0 && pos < inode->i_size) {
- if (pos + left > inode->i_size)
- left = inode->i_size - pos;
-
- dout("zero tail %d\n", left);
- ceph_zero_page_vector_range(page_align + read, left,
- pages);
- read += left;
- }
}
- if (ret >= 0)
- ret = read;
dout("striped_read returns %d\n", ret);
return ret;
}
@@ -618,6 +611,8 @@ out:
if (check_caps)
ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY,
NULL);
+ } else if (ret != -EOLDSNAPC && written > 0) {
+ ret = written;
}
return ret;
}
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index a5ce62eb7806..669622fd1ae3 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -211,8 +211,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg)
snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx",
ceph_ino(inode), dl.object_no);
- ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
- ceph_file_layout_pg_pool(ci->i_layout));
+ r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap,
+ ceph_file_layout_pg_pool(ci->i_layout));
+ if (r < 0) {
+ up_read(&osdc->map_sem);
+ return r;
+ }
dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid);
if (dl.osd >= 0) {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 4d2920304be8..d6a536886472 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -414,6 +414,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
{
struct ceph_mds_session *s;
+ if (mds >= mdsc->mdsmap->m_max_mds)
+ return ERR_PTR(-EINVAL);
+
s = kzalloc(sizeof(*s), GFP_NOFS);
if (!s)
return ERR_PTR(-ENOMEM);
@@ -639,6 +642,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc,
req->r_unsafe_dir = NULL;
}
+ complete_all(&req->r_safe_completion);
+
ceph_mdsc_put_request(req);
}
@@ -1840,8 +1845,11 @@ static int __do_request(struct ceph_mds_client *mdsc,
int mds = -1;
int err = -EAGAIN;
- if (req->r_err || req->r_got_result)
+ if (req->r_err || req->r_got_result) {
+ if (req->r_aborted)
+ __unregister_request(mdsc, req);
goto out;
+ }
if (req->r_timeout &&
time_after_eq(jiffies, req->r_started + req->r_timeout)) {
@@ -2151,7 +2159,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
if (head->safe) {
req->r_got_safe = true;
__unregister_request(mdsc, req);
- complete_all(&req->r_safe_completion);
if (req->r_got_unsafe) {
/*
@@ -3040,8 +3047,10 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
fsc->mdsc = mdsc;
mutex_init(&mdsc->mutex);
mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
- if (mdsc->mdsmap == NULL)
+ if (mdsc->mdsmap == NULL) {
+ kfree(mdsc);
return -ENOMEM;
+ }
init_completion(&mdsc->safe_umount_waiters);
init_waitqueue_head(&mdsc->session_close_wq);
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index 9278dec9e940..d4d38977dcbb 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -138,6 +138,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
m->m_info[mds].export_targets =
kcalloc(num_export_targets, sizeof(u32),
GFP_NOFS);
+ if (m->m_info[mds].export_targets == NULL)
+ goto badmem;
for (j = 0; j < num_export_targets; j++)
m->m_info[mds].export_targets[j] =
ceph_decode_32(&pexport_targets);
@@ -170,7 +172,7 @@ bad:
DUMP_PREFIX_OFFSET, 16, 1,
start, end - start, true);
ceph_mdsmap_destroy(m);
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(err);
}
void ceph_mdsmap_destroy(struct ceph_mdsmap *m)
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 7d377c9a5e35..6627b26a800c 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -357,7 +357,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt,
}
err = -EINVAL;
dev_name_end--; /* back up to ':' separator */
- if (*dev_name_end != ':') {
+ if (dev_name_end < dev_name || *dev_name_end != ':') {
pr_err("device name is missing path (no : separator in %s)\n",
dev_name);
goto out;
diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index 0227b45ef00a..15e9505aa35f 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -290,7 +290,8 @@ int
cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
const struct nls_table *cp, int mapChars)
{
- int i, j, charlen;
+ int i, charlen;
+ int j = 0;
char src_char;
__le16 dst_char;
wchar_t tmp;
@@ -298,12 +299,11 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
if (!mapChars)
return cifs_strtoUTF16(target, source, PATH_MAX, cp);
- for (i = 0, j = 0; i < srclen; j++) {
+ for (i = 0; i < srclen; j++) {
src_char = source[i];
charlen = 1;
switch (src_char) {
case 0:
- put_unaligned(0, &target[j]);
goto ctoUTF16_out;
case ':':
dst_char = cpu_to_le16(UNI_COLON);
@@ -350,6 +350,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen,
}
ctoUTF16_out:
+ put_unaligned(0, &target[j]); /* Null terminate target unicode string */
return j;
}
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 51f5e0ee7237..494b68349667 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -1027,15 +1027,30 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
__u32 secdesclen = 0;
struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
+ struct cifs_tcon *tcon;
+
+ if (IS_ERR(tlink))
+ return PTR_ERR(tlink);
+ tcon = tlink_tcon(tlink);
cifs_dbg(NOISY, "set ACL from mode for %s\n", path);
/* Get the security descriptor */
- pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen);
+
+ if (tcon->ses->server->ops->get_acl == NULL) {
+ cifs_put_tlink(tlink);
+ return -EOPNOTSUPP;
+ }
+
+ pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path,
+ &secdesclen);
if (IS_ERR(pntsd)) {
rc = PTR_ERR(pntsd);
cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
- goto out;
+ cifs_put_tlink(tlink);
+ return rc;
}
/*
@@ -1048,6 +1063,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
pnntsd = kmalloc(secdesclen, GFP_KERNEL);
if (!pnntsd) {
kfree(pntsd);
+ cifs_put_tlink(tlink);
return -ENOMEM;
}
@@ -1056,14 +1072,18 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
+ if (tcon->ses->server->ops->set_acl == NULL)
+ rc = -EOPNOTSUPP;
+
if (!rc) {
/* Set the security descriptor */
- rc = set_cifs_acl(pnntsd, secdesclen, inode, path, aclflag);
+ rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode,
+ path, aclflag);
cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
}
+ cifs_put_tlink(tlink);
kfree(pnntsd);
kfree(pntsd);
-out:
return rc;
}
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index ea3a0b3018a5..f74dfa89c4c4 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -74,11 +74,6 @@
#define SERVER_NAME_LENGTH 40
#define SERVER_NAME_LEN_WITH_NULL (SERVER_NAME_LENGTH + 1)
-/* used to define string lengths for reversing unicode strings */
-/* (256+1)*2 = 514 */
-/* (max path length + 1 for null) * 2 for unicode */
-#define MAX_NAME 514
-
/* SMB echo "timeout" -- FIXME: tunable? */
#define SMB_ECHO_INTERVAL (60 * HZ)
@@ -370,6 +365,18 @@ struct smb_version_operations {
void (*new_lease_key)(struct cifs_fid *fid);
int (*calc_signature)(struct smb_rqst *rqst,
struct TCP_Server_Info *server);
+ ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *,
+ const unsigned char *, const unsigned char *, char *,
+ size_t, const struct nls_table *, int);
+ int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *,
+ const char *, const void *, const __u16,
+ const struct nls_table *, int);
+ struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *,
+ const char *, u32 *);
+ int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
+ int);
+ /* check if we need to issue closedir */
+ bool (*dir_needs_close)(struct cifsFileInfo *);
};
struct smb_version_values {
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index a58dc77cc443..d17c5d72cd29 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -3306,11 +3306,13 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL,
return 0;
}
cifs_acl->version = cpu_to_le16(1);
- if (acl_type == ACL_TYPE_ACCESS)
+ if (acl_type == ACL_TYPE_ACCESS) {
cifs_acl->access_entry_count = cpu_to_le16(count);
- else if (acl_type == ACL_TYPE_DEFAULT)
+ cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF);
+ } else if (acl_type == ACL_TYPE_DEFAULT) {
cifs_acl->default_entry_count = cpu_to_le16(count);
- else {
+ cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF);
+ } else {
cifs_dbg(FYI, "unknown ACL type %d\n", acl_type);
return 0;
}
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 5699b5036ed8..0c2425b21974 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -491,6 +491,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry,
if (server->ops->close)
server->ops->close(xid, tcon, &fid);
cifs_del_pending_open(&open);
+ fput(file);
rc = -ENOMEM;
}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index c2934f8701da..5fcc10fa62bd 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -735,7 +735,7 @@ int cifs_closedir(struct inode *inode, struct file *file)
cifs_dbg(FYI, "Freeing private data in close dir\n");
spin_lock(&cifs_file_list_lock);
- if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+ if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
spin_unlock(&cifs_file_list_lock);
if (server->ops->close_dir)
@@ -2353,7 +2353,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
unsigned long nr_segs, loff_t *poffset)
{
unsigned long nr_pages, i;
- size_t copied, len, cur_len;
+ size_t bytes, copied, len, cur_len;
ssize_t total_written = 0;
loff_t offset;
struct iov_iter it;
@@ -2408,14 +2408,45 @@ cifs_iovec_write(struct file *file, const struct iovec *iov,
save_len = cur_len;
for (i = 0; i < nr_pages; i++) {
- copied = min_t(const size_t, cur_len, PAGE_SIZE);
+ bytes = min_t(const size_t, cur_len, PAGE_SIZE);
copied = iov_iter_copy_from_user(wdata->pages[i], &it,
- 0, copied);
+ 0, bytes);
cur_len -= copied;
iov_iter_advance(&it, copied);
+ /*
+ * If we didn't copy as much as we expected, then that
+ * may mean we trod into an unmapped area. Stop copying
+ * at that point. On the next pass through the big
+ * loop, we'll likely end up getting a zero-length
+ * write and bailing out of it.
+ */
+ if (copied < bytes)
+ break;
}
cur_len = save_len - cur_len;
+ /*
+ * If we have no data to send, then that probably means that
+ * the copy above failed altogether. That's most likely because
+ * the address in the iovec was bogus. Set the rc to -EFAULT,
+ * free anything we allocated and bail out.
+ */
+ if (!cur_len) {
+ for (i = 0; i < nr_pages; i++)
+ put_page(wdata->pages[i]);
+ kfree(wdata);
+ rc = -EFAULT;
+ break;
+ }
+
+ /*
+ * i + 1 now represents the number of pages we actually used in
+ * the copy phase above. Bring nr_pages down to that, and free
+ * any pages that we didn't use.
+ */
+ for ( ; nr_pages > i + 1; nr_pages--)
+ put_page(wdata->pages[nr_pages - 1]);
+
wdata->sync_mode = WB_SYNC_ALL;
wdata->nr_pages = nr_pages;
wdata->offset = (__u64)offset;
@@ -2778,7 +2809,7 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
total_read += result;
}
- return total_read > 0 ? total_read : result;
+ return total_read > 0 && result != -EAGAIN ? total_read : result;
}
static ssize_t
@@ -3201,7 +3232,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
total_read += result;
}
- return total_read > 0 ? total_read : result;
+ return total_read > 0 && result != -EAGAIN ? total_read : result;
}
static int cifs_readpages(struct file *file, struct address_space *mapping,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 449b6cf09b09..0dee93706c98 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -490,10 +490,15 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
return PTR_ERR(tlink);
tcon = tlink_tcon(tlink);
- rc = CIFSSMBQAllEAs(xid, tcon, path, "SETFILEBITS",
- ea_value, 4 /* size of buf */, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (tcon->ses->server->ops->query_all_EAs == NULL) {
+ cifs_put_tlink(tlink);
+ return -EOPNOTSUPP;
+ }
+
+ rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path,
+ "SETFILEBITS", ea_value, 4 /* size of buf */,
+ cifs_sb->local_nls,
+ cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
cifs_put_tlink(tlink);
if (rc < 0)
return (int)rc;
@@ -1635,13 +1640,22 @@ cifs_rename(struct inode *source_dir, struct dentry *source_dentry,
unlink_target:
/* Try unlinking the target dentry if it's not negative */
if (target_dentry->d_inode && (rc == -EACCES || rc == -EEXIST)) {
- tmprc = cifs_unlink(target_dir, target_dentry);
+ if (S_ISDIR(target_dentry->d_inode->i_mode))
+ tmprc = cifs_rmdir(target_dir, target_dentry);
+ else
+ tmprc = cifs_unlink(target_dir, target_dentry);
if (tmprc)
goto cifs_rename_exit;
rc = cifs_do_rename(xid, source_dentry, from_name,
target_dentry, to_name);
}
+ /* force revalidate to go get info when needed */
+ CIFS_I(source_dir)->time = CIFS_I(target_dir)->time = 0;
+
+ source_dir->i_ctime = source_dir->i_mtime = target_dir->i_ctime =
+ target_dir->i_mtime = current_fs_time(source_dir->i_sb);
+
cifs_rename_exit:
kfree(info_buf_source);
kfree(from_name);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 036279c064ff..85ebdaa21015 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -582,11 +582,11 @@ find_cifs_entry(const unsigned int xid, struct cifs_tcon *tcon,
/* close and restart search */
cifs_dbg(FYI, "search backing up - close and restart search\n");
spin_lock(&cifs_file_list_lock);
- if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+ if (server->ops->dir_needs_close(cfile)) {
cfile->invalidHandle = true;
spin_unlock(&cifs_file_list_lock);
- if (server->ops->close)
- server->ops->close(xid, tcon, &cfile->fid);
+ if (server->ops->close_dir)
+ server->ops->close_dir(xid, tcon, &cfile->fid);
} else
spin_unlock(&cifs_file_list_lock);
if (cfile->srch_inf.ntwrk_buf_start) {
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 3efdb9d5c0b8..610c6c24d41d 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -885,6 +885,12 @@ cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
(__u8)type, wait, 0);
}
+static bool
+cifs_dir_needs_close(struct cifsFileInfo *cfile)
+{
+ return !cfile->srch_inf.endOfSearch && !cfile->invalidHandle;
+}
+
struct smb_version_operations smb1_operations = {
.send_cancel = send_nt_cancel,
.compare_fids = cifs_compare_fids,
@@ -948,6 +954,15 @@ struct smb_version_operations smb1_operations = {
.mand_lock = cifs_mand_lock,
.mand_unlock_range = cifs_unlock_range,
.push_mand_locks = cifs_push_mandatory_locks,
+ .dir_needs_close = cifs_dir_needs_close,
+#ifdef CONFIG_CIFS_XATTR
+ .query_all_EAs = CIFSSMBQAllEAs,
+ .set_EA = CIFSSMBSetEA,
+#endif /* CIFS_XATTR */
+#ifdef CONFIG_CIFS_ACL
+ .get_acl = get_cifs_acl,
+ .set_acl = set_cifs_acl,
+#endif /* CIFS_ACL */
};
struct smb_version_values smb1_values = {
diff --git a/fs/cifs/smb2file.c b/fs/cifs/smb2file.c
index 5da1b55a2258..d801f63cddd0 100644
--- a/fs/cifs/smb2file.c
+++ b/fs/cifs/smb2file.c
@@ -73,7 +73,7 @@ smb2_open_file(const unsigned int xid, struct cifs_tcon *tcon, const char *path,
goto out;
}
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL) {
rc = -ENOMEM;
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index 7c0e2143e775..cc592ef6584a 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -55,4 +55,7 @@
#define SMB2_NTLMV2_SESSKEY_SIZE (16)
#define SMB2_HMACSHA256_SIZE (32)
+/* Maximum buffer size value we can send with 1 credit */
+#define SMB2_MAX_BUFFER_SIZE 65536
+
#endif /* _SMB2_GLOB_H */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index fff6dfba6204..6d535797ec76 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -123,7 +123,7 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
*adjust_tz = false;
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
diff --git a/fs/cifs/smb2maperror.c b/fs/cifs/smb2maperror.c
index 7c2f45c06fc2..4768cf8be6e2 100644
--- a/fs/cifs/smb2maperror.c
+++ b/fs/cifs/smb2maperror.c
@@ -214,7 +214,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_BREAKPOINT, -EIO, "STATUS_BREAKPOINT"},
{STATUS_SINGLE_STEP, -EIO, "STATUS_SINGLE_STEP"},
{STATUS_BUFFER_OVERFLOW, -EIO, "STATUS_BUFFER_OVERFLOW"},
- {STATUS_NO_MORE_FILES, -EIO, "STATUS_NO_MORE_FILES"},
+ {STATUS_NO_MORE_FILES, -ENODATA, "STATUS_NO_MORE_FILES"},
{STATUS_WAKE_SYSTEM_DEBUGGER, -EIO, "STATUS_WAKE_SYSTEM_DEBUGGER"},
{STATUS_HANDLES_CLOSED, -EIO, "STATUS_HANDLES_CLOSED"},
{STATUS_NO_INHERITANCE, -EIO, "STATUS_NO_INHERITANCE"},
@@ -605,7 +605,7 @@ static const struct status_to_posix_error smb2_error_map_table[] = {
{STATUS_MAPPED_FILE_SIZE_ZERO, -EIO, "STATUS_MAPPED_FILE_SIZE_ZERO"},
{STATUS_TOO_MANY_OPENED_FILES, -EMFILE, "STATUS_TOO_MANY_OPENED_FILES"},
{STATUS_CANCELLED, -EIO, "STATUS_CANCELLED"},
- {STATUS_CANNOT_DELETE, -EIO, "STATUS_CANNOT_DELETE"},
+ {STATUS_CANNOT_DELETE, -EACCES, "STATUS_CANNOT_DELETE"},
{STATUS_INVALID_COMPUTER_NAME, -EIO, "STATUS_INVALID_COMPUTER_NAME"},
{STATUS_FILE_DELETED, -EIO, "STATUS_FILE_DELETED"},
{STATUS_SPECIAL_ACCOUNT, -EIO, "STATUS_SPECIAL_ACCOUNT"},
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index f2e76f3b0c61..e12f258a5ffa 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -181,11 +181,8 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified wsize, or default */
wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
wsize = min_t(unsigned int, wsize, server->max_write);
- /*
- * limit write size to 2 ** 16, because we don't support multicredit
- * requests now.
- */
- wsize = min_t(unsigned int, wsize, 2 << 15);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
return wsize;
}
@@ -199,11 +196,8 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
/* start with specified rsize, or default */
rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
rsize = min_t(unsigned int, rsize, server->max_read);
- /*
- * limit write size to 2 ** 16, because we don't support multicredit
- * requests now.
- */
- rsize = min_t(unsigned int, rsize, 2 << 15);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
return rsize;
}
@@ -249,7 +243,7 @@ smb2_query_file_info(const unsigned int xid, struct cifs_tcon *tcon,
int rc;
struct smb2_file_all_info *smb2_data;
- smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ smb2_data = kzalloc(sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
@@ -560,6 +554,12 @@ smb2_new_lease_key(struct cifs_fid *fid)
get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
}
+static bool
+smb2_dir_needs_close(struct cifsFileInfo *cfile)
+{
+ return !cfile->invalidHandle;
+}
+
struct smb_version_operations smb21_operations = {
.compare_fids = smb2_compare_fids,
.setup_request = smb2_setup_request,
@@ -624,6 +624,7 @@ struct smb_version_operations smb21_operations = {
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
.calc_signature = smb2_calc_signature,
+ .dir_needs_close = smb2_dir_needs_close,
};
@@ -691,6 +692,7 @@ struct smb_version_operations smb30_operations = {
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
.calc_signature = smb3_calc_signature,
+ .dir_needs_close = smb2_dir_needs_close,
};
struct smb_version_values smb20_values = {
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 2b95ce2b54e8..eb0de4c3ca76 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -408,6 +408,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
server->dialect = le16_to_cpu(rsp->DialectRevision);
server->maxBuf = le32_to_cpu(rsp->MaxTransactSize);
+ /* set it to the maximum buffer size value we can send with 1 credit */
+ server->maxBuf = min_t(unsigned int, le32_to_cpu(rsp->MaxTransactSize),
+ SMB2_MAX_BUFFER_SIZE);
server->max_read = le32_to_cpu(rsp->MaxReadSize);
server->max_write = le32_to_cpu(rsp->MaxWriteSize);
/* BB Do we need to validate the SecurityMode? */
@@ -806,7 +809,8 @@ tcon_exit:
tcon_error_exit:
if (rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) {
cifs_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree);
- tcon->bad_network_name = true;
+ if (tcon)
+ tcon->bad_network_name = true;
}
goto tcon_exit;
}
@@ -1200,7 +1204,7 @@ SMB2_query_info(const unsigned int xid, struct cifs_tcon *tcon,
{
return query_info(xid, tcon, persistent_fid, volatile_fid,
FILE_ALL_INFORMATION,
- sizeof(struct smb2_file_all_info) + MAX_NAME * 2,
+ sizeof(struct smb2_file_all_info) + PATH_MAX * 2,
sizeof(struct smb2_file_all_info), data);
}
@@ -1796,6 +1800,10 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
rsp = (struct smb2_query_directory_rsp *)iov[0].iov_base;
if (rc) {
+ if (rc == -ENODATA && rsp->hdr.Status == STATUS_NO_MORE_FILES) {
+ srch_inf->endOfSearch = true;
+ rc = 0;
+ }
cifs_stats_fail_inc(tcon, SMB2_QUERY_DIRECTORY_HE);
goto qdir_exit;
}
@@ -1833,11 +1841,6 @@ SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
else
cifs_dbg(VFS, "illegal search buffer type\n");
- if (rsp->hdr.Status == STATUS_NO_MORE_FILES)
- srch_inf->endOfSearch = 1;
- else
- srch_inf->endOfSearch = 0;
-
return rc;
qdir_exit:
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index 09afda4cc58e..5ac836a86b18 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -82,9 +82,11 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name)
goto remove_ea_exit;
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL,
- (__u16)0, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, NULL, (__u16)0,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
}
remove_ea_exit:
kfree(full_path);
@@ -149,18 +151,22 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
cifs_dbg(FYI, "attempt to set cifs inode metadata\n");
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
- (__u16)value_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, ea_value, (__u16)value_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)
== 0) {
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
goto set_ea_exit;
ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */
- rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value,
- (__u16)value_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->set_EA)
+ rc = pTcon->ses->server->ops->set_EA(xid, pTcon,
+ full_path, ea_name, ea_value, (__u16)value_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
#ifdef CONFIG_CIFS_ACL
@@ -170,8 +176,12 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name,
rc = -ENOMEM;
} else {
memcpy(pacl, ea_value, value_size);
- rc = set_cifs_acl(pacl, value_size,
- direntry->d_inode, full_path, CIFS_ACL_DACL);
+ if (pTcon->ses->server->ops->set_acl)
+ rc = pTcon->ses->server->ops->set_acl(pacl,
+ value_size, direntry->d_inode,
+ full_path, CIFS_ACL_DACL);
+ else
+ rc = -EOPNOTSUPP;
if (rc == 0) /* force revalidate of the inode */
CIFS_I(direntry->d_inode)->time = 0;
kfree(pacl);
@@ -272,17 +282,21 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
/* revalidate/getattr then populate from inode */
} /* BB add else when above is implemented */
ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, ea_name, ea_value, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
goto get_ea_exit;
ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, ea_name, ea_value, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
} else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS,
strlen(POSIX_ACL_XATTR_ACCESS)) == 0) {
#ifdef CONFIG_CIFS_POSIX
@@ -313,8 +327,11 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
u32 acllen;
struct cifs_ntsd *pacl;
- pacl = get_cifs_acl(cifs_sb, direntry->d_inode,
- full_path, &acllen);
+ if (pTcon->ses->server->ops->get_acl == NULL)
+ goto get_ea_exit; /* rc already EOPNOTSUPP */
+
+ pacl = pTcon->ses->server->ops->get_acl(cifs_sb,
+ direntry->d_inode, full_path, &acllen);
if (IS_ERR(pacl)) {
rc = PTR_ERR(pacl);
cifs_dbg(VFS, "%s: error %zd getting sec desc\n",
@@ -400,11 +417,12 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size)
/* if proc/fs/cifs/streamstoxattr is set then
search server for EAs or streams to
returns as xattrs */
- rc = CIFSSMBQAllEAs(xid, pTcon, full_path, NULL, data,
- buf_size, cifs_sb->local_nls,
- cifs_sb->mnt_cifs_flags &
- CIFS_MOUNT_MAP_SPECIAL_CHR);
+ if (pTcon->ses->server->ops->query_all_EAs)
+ rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon,
+ full_path, NULL, data, buf_size,
+ cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
+ CIFS_MOUNT_MAP_SPECIAL_CHR);
list_ea_exit:
kfree(full_path);
free_xid(xid);
diff --git a/fs/compat_binfmt_elf.c b/fs/compat_binfmt_elf.c
index a81147e2e4ef..4d24d17bcfc1 100644
--- a/fs/compat_binfmt_elf.c
+++ b/fs/compat_binfmt_elf.c
@@ -88,6 +88,11 @@ static void cputime_to_compat_timeval(const cputime_t cputime,
#define ELF_HWCAP COMPAT_ELF_HWCAP
#endif
+#ifdef COMPAT_ELF_HWCAP2
+#undef ELF_HWCAP2
+#define ELF_HWCAP2 COMPAT_ELF_HWCAP2
+#endif
+
#ifdef COMPAT_ARCH_DLINFO
#undef ARCH_DLINFO
#define ARCH_DLINFO COMPAT_ARCH_DLINFO
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 7aabc6ad4e9b..fa38d076697d 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -56,10 +56,19 @@ static void configfs_d_iput(struct dentry * dentry,
struct configfs_dirent *sd = dentry->d_fsdata;
if (sd) {
- BUG_ON(sd->s_dentry != dentry);
/* Coordinate with configfs_readdir */
spin_lock(&configfs_dirent_lock);
- sd->s_dentry = NULL;
+ /* Coordinate with configfs_attach_attr where will increase
+ * sd->s_count and update sd->s_dentry to new allocated one.
+ * Only set sd->dentry to null when this dentry is the only
+ * sd owner.
+ * If not do so, configfs_d_iput may run just after
+ * configfs_attach_attr and set sd->s_dentry to null
+ * even it's still in use.
+ */
+ if (atomic_read(&sd->s_count) <= 2)
+ sd->s_dentry = NULL;
+
spin_unlock(&configfs_dirent_lock);
configfs_put(sd);
}
@@ -426,8 +435,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
struct configfs_attribute * attr = sd->s_element;
int error;
+ spin_lock(&configfs_dirent_lock);
dentry->d_fsdata = configfs_get(sd);
sd->s_dentry = dentry;
+ spin_unlock(&configfs_dirent_lock);
+
error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG,
configfs_init_file);
if (error) {
diff --git a/fs/coredump.c b/fs/coredump.c
index dafafbafa731..1d402ce5b72f 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -299,7 +299,7 @@ static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (unlikely(nr < 0))
return nr;
- tsk->flags = PF_DUMPCORE;
+ tsk->flags |= PF_DUMPCORE;
if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
/*
diff --git a/fs/dcache.c b/fs/dcache.c
index f09b9085f7d8..25c0a1b5f6c0 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -96,8 +96,6 @@ static struct kmem_cache *dentry_cache __read_mostly;
* This hash-function tries to avoid losing too many bits of hash
* information, yet avoid using a prime hash-size or similar.
*/
-#define D_HASHBITS d_hash_shift
-#define D_HASHMASK d_hash_mask
static unsigned int d_hash_mask __read_mostly;
static unsigned int d_hash_shift __read_mostly;
@@ -108,8 +106,7 @@ static inline struct hlist_bl_head *d_hash(const struct dentry *parent,
unsigned int hash)
{
hash += (unsigned long) parent / L1_CACHE_BYTES;
- hash = hash + (hash >> D_HASHBITS);
- return dentry_hashtable + (hash & D_HASHMASK);
+ return dentry_hashtable + hash_32(hash, d_hash_shift);
}
/* Statistics gathering. */
@@ -2686,8 +2683,13 @@ char *d_path(const struct path *path, char *buf, int buflen)
* thus don't need to be hashed. They also don't need a name until a
* user wants to identify the object in /proc/pid/fd/. The little hack
* below allows us to generate a name for these objects on demand:
+ *
+ * Some pseudo inodes are mountable. When they are mounted
+ * path->dentry == path->mnt->mnt_root. In that case don't call d_dname
+ * and instead have d_path return the mounted path.
*/
- if (path->dentry->d_op && path->dentry->d_op->d_dname)
+ if (path->dentry->d_op && path->dentry->d_op->d_dname &&
+ (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root))
return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
get_fs_root(current->fs, &root);
@@ -2724,6 +2726,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen,
return memcpy(buffer, temp, sz);
}
+char *simple_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+ char *end = buffer + buflen;
+ /* these dentries are never renamed, so d_lock is not needed */
+ if (prepend(&end, &buflen, " (deleted)", 11) ||
+ prepend_name(&end, &buflen, &dentry->d_name) ||
+ prepend(&end, &buflen, "/", 1))
+ end = ERR_PTR(-ENAMETOOLONG);
+ return end;
+}
+
/*
* Write full pathname from the root of the filesystem into the buffer.
*/
diff --git a/fs/dcookies.c b/fs/dcookies.c
index ab5954b50267..ac44a69fbea9 100644
--- a/fs/dcookies.c
+++ b/fs/dcookies.c
@@ -204,7 +204,7 @@ out:
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, size_t, len)
+COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len)
{
#ifdef __BIG_ENDIAN
return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 073d30b9d1ac..a726b9f29cb7 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -498,6 +498,7 @@ static void devpts_kill_sb(struct super_block *sb)
{
struct pts_fs_info *fsi = DEVPTS_SB(sb);
+ ida_destroy(&fsi->allocated_ptys);
kfree(fsi);
kill_litter_super(sb);
}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index f71ec125290d..1da2446bf6b0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -2102,7 +2102,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
break;
case 2:
dst[dst_byte_offset++] |= (src_byte);
- dst[dst_byte_offset] = 0;
current_bit_offset = 0;
break;
}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index a7abbea2c096..9ff3664bb3ea 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -196,23 +196,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
{
int rc = 0;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
- struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct dentry *ecryptfs_dentry = file->f_path.dentry;
/* Private value of ecryptfs_dentry allocated in
* ecryptfs_lookup() */
struct ecryptfs_file_info *file_info;
- mount_crypt_stat = &ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
- if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
- && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
- || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
- || (file->f_flags & O_APPEND))) {
- printk(KERN_WARNING "Mount has encrypted view enabled; "
- "files may only be read\n");
- rc = -EPERM;
- goto out;
- }
/* Released in ecryptfs_release or end of function if failure */
file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
ecryptfs_set_file_private(file, file_info);
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 5eab400e2590..41baf8b5e0eb 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -1051,7 +1051,7 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
}
rc = vfs_setxattr(lower_dentry, name, value, size, flags);
- if (!rc)
+ if (!rc && dentry->d_inode)
fsstack_copy_attr_all(dentry->d_inode, lower_dentry->d_inode);
out:
return rc;
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 7d52806c2119..4725a07f003c 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
struct ecryptfs_msg_ctx *msg_ctx;
struct ecryptfs_message *msg = NULL;
char *auth_tok_sig;
- char *payload;
+ char *payload = NULL;
size_t payload_len = 0;
int rc;
@@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
}
out:
kfree(msg);
+ kfree(payload);
return rc;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e924cf45aad9..329a9cc2b2eb 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -494,6 +494,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
{
struct super_block *s;
struct ecryptfs_sb_info *sbi;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct ecryptfs_dentry_info *root_info;
const char *err = "Getting sb failed";
struct inode *inode;
@@ -512,6 +513,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
err = "Error parsing options";
goto out;
}
+ mount_crypt_stat = &sbi->mount_crypt_stat;
s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s)) {
@@ -558,11 +560,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
/**
* Set the POSIX ACL flag based on whether they're enabled in the lower
- * mount. Force a read-only eCryptfs mount if the lower mount is ro.
- * Allow a ro eCryptfs mount even when the lower mount is rw.
+ * mount.
*/
s->s_flags = flags & ~MS_POSIXACL;
- s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+ s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+ /**
+ * Force a read-only eCryptfs mount when:
+ * 1) The lower mount is ro
+ * 2) The ecryptfs_encrypted_view mount option is specified
+ */
+ if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+ mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ s->s_flags |= MS_RDONLY;
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/exec.c b/fs/exec.c
index 1f446705636b..dd6aa61c8548 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -654,10 +654,10 @@ int setup_arg_pages(struct linux_binprm *bprm,
unsigned long rlim_stack;
#ifdef CONFIG_STACK_GROWSUP
- /* Limit stack size to 1GB */
+ /* Limit stack size */
stack_base = rlimit_max(RLIMIT_STACK);
- if (stack_base > (1 << 30))
- stack_base = 1 << 30;
+ if (stack_base > STACK_SIZE_MAX)
+ stack_base = STACK_SIZE_MAX;
/* Make sure we didn't let the argument array grow too large. */
if (vma->vm_end - vma->vm_start > stack_base)
@@ -1669,6 +1669,12 @@ int __get_dumpable(unsigned long mm_flags)
return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret;
}
+/*
+ * This returns the actual value of the suid_dumpable flag. For things
+ * that are using this for checking for privilege transitions, it must
+ * test against SUID_DUMP_USER rather than treating it as a boolean
+ * value.
+ */
int get_dumpable(struct mm_struct *mm)
{
return __get_dumpable(mm->flags);
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index b74422888604..85cde3e76290 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -103,7 +103,7 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)
layout->max_io_length =
(BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) *
- layout->group_width;
+ (layout->group_width - layout->parity);
if (layout->parity) {
unsigned stripe_length =
(layout->group_width - layout->parity) *
@@ -286,7 +286,8 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc,
if (length) {
ore_calc_stripe_info(layout, offset, length, &ios->si);
ios->length = ios->si.length;
- ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
+ ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) +
+ ios->length + PAGE_SIZE - 1) / PAGE_SIZE;
if (layout->parity)
_ore_post_alloc_raid_stuff(ios);
}
@@ -536,6 +537,7 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
u64 H = LmodS - G * T;
u32 N = div_u64(H, U);
+ u32 Nlast;
/* "H - (N * U)" is just "H % U" so it's bound to u32 */
u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width;
@@ -568,6 +570,10 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,
si->length = T - H;
if (si->length > length)
si->length = length;
+
+ Nlast = div_u64(H + si->length + U - 1, U);
+ si->maxdevUnits = Nlast - N;
+
si->M = M;
}
EXPORT_SYMBOL(ore_calc_stripe_info);
@@ -583,13 +589,16 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
int ret;
if (per_dev->bio == NULL) {
- unsigned pages_in_stripe = ios->layout->group_width *
- (ios->layout->stripe_unit / PAGE_SIZE);
- unsigned nr_pages = ios->nr_pages * ios->layout->group_width /
- (ios->layout->group_width -
- ios->layout->parity);
- unsigned bio_size = (nr_pages + pages_in_stripe) /
- ios->layout->group_width;
+ unsigned bio_size;
+
+ if (!ios->reading) {
+ bio_size = ios->si.maxdevUnits;
+ } else {
+ bio_size = (ios->si.maxdevUnits + 1) *
+ (ios->layout->group_width - ios->layout->parity) /
+ ios->layout->group_width;
+ }
+ bio_size *= (ios->layout->stripe_unit / PAGE_SIZE);
per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size);
if (unlikely(!per_dev->bio)) {
@@ -609,8 +618,12 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg,
added_len = bio_add_pc_page(q, per_dev->bio, pages[pg],
pglen, pgbase);
if (unlikely(pglen != added_len)) {
- ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n",
- per_dev->bio->bi_vcnt);
+ /* If bi_vcnt == bi_max then this is a SW BUG */
+ ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x "
+ "bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n",
+ per_dev->bio->bi_vcnt,
+ per_dev->bio->bi_max_vecs,
+ BIO_MAX_PAGES_KMALLOC, cur_len);
ret = -ENOMEM;
goto out;
}
@@ -1098,7 +1111,7 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *oc,
size_attr->attr = g_attr_logical_length;
size_attr->attr.val_ptr = &size_attr->newsize;
- ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
+ ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n",
_LLU(oc->comps->obj.id), _LLU(obj_size), i);
ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1,
&size_attr->attr);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 0a87bb10998d..99d84ce038b8 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -632,6 +632,8 @@ static int ext2_get_blocks(struct inode *inode,
int count = 0;
ext2_fsblk_t first_block = 0;
+ BUG_ON(maxblocks == 0);
+
depth = ext2_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
if (depth == 0)
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 288534920fe5..20d6697bd638 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1493,6 +1493,7 @@ static ssize_t ext2_quota_write(struct super_block *sb, int type,
sb->s_blocksize - offset : towrite;
tmp_bh.b_state = 0;
+ tmp_bh.b_size = sb->s_blocksize;
err = ext2_get_block(inode, blk, &tmp_bh, 1);
if (err < 0)
goto out;
diff --git a/fs/ext2/xip.c b/fs/ext2/xip.c
index 1c3312858fcf..e98171a11cfe 100644
--- a/fs/ext2/xip.c
+++ b/fs/ext2/xip.c
@@ -35,6 +35,7 @@ __ext2_get_block(struct inode *inode, pgoff_t pgoff, int create,
int rc;
memset(&tmp, 0, sizeof(struct buffer_head));
+ tmp.b_size = 1 << inode->i_blkbits;
rc = ext2_get_block(inode, pgoff, &tmp, create);
*result = tmp.b_blocknr;
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 6356665a74bb..882d4bdfd428 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1300,13 +1300,6 @@ set_qf_format:
"not specified.");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext3_msg(sb, KERN_ERR, "error: journaled quota format "
- "specified with no journaling "
- "enabled.");
- return 0;
- }
}
#endif
return 1;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 5aae3d12d400..e4c4ac07cc32 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -280,6 +280,16 @@ struct ext4_io_submit {
/* Translate # of blks to # of clusters */
#define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \
(sbi)->s_cluster_bits)
+/* Mask out the low bits to get the starting block of the cluster */
+#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \
+ ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \
+ ~((ext4_lblk_t) (s)->s_cluster_ratio - 1))
+/* Get the cluster offset */
+#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \
+ ((ext4_fsblk_t) (s)->s_cluster_ratio - 1))
+#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \
+ ((ext4_lblk_t) (s)->s_cluster_ratio - 1))
/*
* Structure of a blocks group descriptor
@@ -764,6 +774,8 @@ do { \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime)) \
(einode)->xtime.tv_sec = \
(signed)le32_to_cpu((raw_inode)->xtime); \
+ else \
+ (einode)->xtime.tv_sec = 0; \
if (EXT4_FITS_IN_INODE(raw_inode, einode, xtime ## _extra)) \
ext4_decode_extra_time(&(einode)->xtime, \
raw_inode->xtime ## _extra); \
@@ -2076,6 +2088,7 @@ int do_journal_get_write_access(handle_t *handle,
#define CONVERT_INLINE_DATA 2
extern struct inode *ext4_iget(struct super_block *, unsigned long);
+extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
extern int ext4_write_inode(struct inode *, struct writeback_control *);
extern int ext4_setattr(struct dentry *, struct iattr *);
extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2248,8 +2261,8 @@ extern int ext4_register_li_request(struct super_block *sb,
static inline int ext4_has_group_desc_csum(struct super_block *sb)
{
return EXT4_HAS_RO_COMPAT_FEATURE(sb,
- EXT4_FEATURE_RO_COMPAT_GDT_CSUM |
- EXT4_FEATURE_RO_COMPAT_METADATA_CSUM);
+ EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
+ (EXT4_SB(sb)->s_chksum_driver != NULL);
}
static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 1c88061da526..1be3996b5942 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -223,6 +223,15 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
if (WARN_ON_ONCE(err)) {
ext4_journal_abort_handle(where, line, __func__, bh,
handle, err);
+ ext4_error_inode(inode, where, line,
+ bh->b_blocknr,
+ "journal_dirty_metadata failed: "
+ "handle type %u started at line %u, "
+ "credits %u/%u, errcode %d",
+ handle->h_type,
+ handle->h_line_no,
+ handle->h_requested_credits,
+ handle->h_buffer_credits, err);
}
} else {
if (inode)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index dc1e03047226..84d817b842a8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
+ ext4_lblk_t lblock = le32_to_cpu(ext->ee_block);
+ ext4_lblk_t last = lblock + len - 1;
- if (len == 0)
+ if (lblock > last)
return 0;
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
}
@@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode,
if (depth == 0) {
/* leaf entries */
struct ext4_extent *ext = EXT_FIRST_EXTENT(eh);
+ struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
+ ext4_fsblk_t pblock = 0;
+ ext4_lblk_t lblock = 0;
+ ext4_lblk_t prev = 0;
+ int len = 0;
while (entries) {
if (!ext4_valid_extent(inode, ext))
return 0;
+
+ /* Check for overlapping extents */
+ lblock = le32_to_cpu(ext->ee_block);
+ len = ext4_ext_get_actual_len(ext);
+ if ((lblock <= prev) && prev) {
+ pblock = ext4_ext_pblock(ext);
+ es->s_last_error_block = cpu_to_le64(pblock);
+ return 0;
+ }
ext++;
entries--;
+ prev = lblock + len - 1;
}
} else {
struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh);
@@ -1755,8 +1772,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
depth = ext_depth(inode);
if (!path[depth].p_ext)
goto out;
- b2 = le32_to_cpu(path[depth].p_ext->ee_block);
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block));
/*
* get the next allocated block if the extent in the path
@@ -1766,7 +1782,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi,
b2 = ext4_ext_next_allocated_block(path);
if (b2 == EXT_MAX_BLOCKS)
goto out;
- b2 &= ~(sbi->s_cluster_ratio - 1);
+ b2 = EXT4_LBLK_CMASK(sbi, b2);
}
/* check for wrap through zero on extent logical start block*/
@@ -2427,7 +2443,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
* truncate operation has removed all of the blocks in
* the cluster.
*/
- if (pblk & (sbi->s_cluster_ratio - 1) &&
+ if (EXT4_PBLK_COFF(sbi, pblk) &&
(ee_len == num))
*partial_cluster = EXT4_B2C(sbi, pblk);
else
@@ -2495,6 +2511,27 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
+ /*
+ * If we're starting with an extent other than the last one in the
+ * node, we need to see if it shares a cluster with the extent to
+ * the right (towards the end of the file). If its leftmost cluster
+ * is this extent's rightmost cluster and it is not cluster aligned,
+ * we'll mark it as a partial that is not to be deallocated.
+ */
+
+ if (ex != EXT_LAST_EXTENT(eh)) {
+ ext4_fsblk_t current_pblk, right_pblk;
+ long long current_cluster, right_cluster;
+
+ current_pblk = ext4_ext_pblock(ex) + ex_ee_len - 1;
+ current_cluster = (long long)EXT4_B2C(sbi, current_pblk);
+ right_pblk = ext4_ext_pblock(ex + 1);
+ right_cluster = (long long)EXT4_B2C(sbi, right_pblk);
+ if (current_cluster == right_cluster &&
+ EXT4_PBLK_COFF(sbi, right_pblk))
+ *partial_cluster = -right_cluster;
+ }
+
trace_ext4_ext_rm_leaf(inode, start, ex, *partial_cluster);
while (ex >= EXT_FIRST_EXTENT(eh) &&
@@ -3658,7 +3695,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_lblk_t lblk_start, lblk_end;
- lblk_start = lblk & (~(sbi->s_cluster_ratio - 1));
+ lblk_start = EXT4_LBLK_CMASK(sbi, lblk);
lblk_end = lblk_start + sbi->s_cluster_ratio - 1;
return ext4_find_delalloc_range(inode, lblk_start, lblk_end);
@@ -3717,9 +3754,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks);
/* Check towards left side */
- c_offset = lblk_start & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start);
if (c_offset) {
- lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1));
+ lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start);
lblk_to = lblk_from + c_offset - 1;
if (ext4_find_delalloc_range(inode, lblk_from, lblk_to))
@@ -3727,7 +3764,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
}
/* Now check towards right. */
- c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1);
+ c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks);
if (allocated_clusters && c_offset) {
lblk_from = lblk_start + num_blks;
lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1;
@@ -3935,7 +3972,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
struct ext4_ext_path *path)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
- ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ext4_lblk_t ex_cluster_start, ex_cluster_end;
ext4_lblk_t rr_cluster_start;
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
@@ -3953,8 +3990,7 @@ static int get_implied_cluster_alloc(struct super_block *sb,
(rr_cluster_start == ex_cluster_start)) {
if (rr_cluster_start == ex_cluster_end)
ee_start += ee_len - 1;
- map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) +
- c_offset;
+ map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset;
map->m_len = min(map->m_len,
(unsigned) sbi->s_cluster_ratio - c_offset);
/*
@@ -4017,7 +4053,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
struct ext4_extent newex, *ex, *ex2;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
ext4_fsblk_t newblock = 0;
- int free_on_err = 0, err = 0, depth;
+ int free_on_err = 0, err = 0, depth, ret;
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
@@ -4078,9 +4114,13 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
if (!ext4_ext_is_uninitialized(ex))
goto out;
- allocated = ext4_ext_handle_uninitialized_extents(
+ ret = ext4_ext_handle_uninitialized_extents(
handle, inode, map, path, flags,
allocated, newblock);
+ if (ret < 0)
+ err = ret;
+ else
+ allocated = ret;
goto out3;
}
}
@@ -4108,7 +4148,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
*/
map->m_flags &= ~EXT4_MAP_FROM_CLUSTER;
newex.ee_block = cpu_to_le32(map->m_lblk);
- cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1);
+ cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
/*
* If we are doing bigalloc, check to see if the extent returned
@@ -4176,7 +4216,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* needed so that future calls to get_implied_cluster_alloc()
* work correctly.
*/
- offset = map->m_lblk & (sbi->s_cluster_ratio - 1);
+ offset = EXT4_LBLK_COFF(sbi, map->m_lblk);
ar.len = EXT4_NUM_B2C(sbi, offset+allocated);
ar.goal -= offset;
ar.logical -= offset;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index b19f0a457f32..4635788e14bf 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
size_t count = iov_length(iov, nr_segs);
loff_t final_size = pos + count;
- if (pos >= inode->i_size)
+ if (pos >= i_size_read(inode))
return 0;
if ((pos & blockmask) || (final_size & blockmask))
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3da3bf1b2cd0..4d4718cf25ab 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -780,12 +780,23 @@ got:
goto out;
}
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ err = ext4_journal_get_write_access(handle, group_desc_bh);
+ if (err) {
+ ext4_std_error(sb, err);
+ goto out;
+ }
+
/* We may have to initialize the block bitmap if it isn't already */
if (ext4_has_group_desc_csum(sb) &&
gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
struct buffer_head *block_bitmap_bh;
block_bitmap_bh = ext4_read_block_bitmap(sb, group);
+ if (!block_bitmap_bh) {
+ err = -EIO;
+ goto out;
+ }
BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
err = ext4_journal_get_write_access(handle, block_bitmap_bh);
if (err) {
@@ -816,13 +827,6 @@ got:
}
}
- BUFFER_TRACE(group_desc_bh, "get_write_access");
- err = ext4_journal_get_write_access(handle, group_desc_bh);
- if (err) {
- ext4_std_error(sb, err);
- goto out;
- }
-
/* Update the relevant bg descriptor fields */
if (ext4_has_group_desc_csum(sb)) {
int free;
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index b8d5d351e24f..589061469687 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -390,7 +390,13 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
return 0;
failed:
for (; i >= 0; i--) {
- if (i != indirect_blks && branch[i].bh)
+ /*
+ * We want to ext4_forget() only freshly allocated indirect
+ * blocks. Buffer for new_blocks[i-1] is at branch[i].bh and
+ * buffer at branch[0].bh is indirect block / inode already
+ * existing before ext4_alloc_branch() was called.
+ */
+ if (i > 0 && i != indirect_blks && branch[i].bh)
ext4_forget(handle, 1, inode, branch[i].bh,
branch[i].bh->b_blocknr);
ext4_free_blocks(handle, inode, NULL, new_blocks[i],
@@ -1325,16 +1331,24 @@ static int free_hole_blocks(handle_t *handle, struct inode *inode,
blk = *i_data;
if (level > 0) {
ext4_lblk_t first2;
+ ext4_lblk_t count2;
+
bh = sb_bread(inode->i_sb, le32_to_cpu(blk));
if (!bh) {
EXT4_ERROR_INODE_BLOCK(inode, le32_to_cpu(blk),
"Read failure");
return -EIO;
}
- first2 = (first > offset) ? first - offset : 0;
+ if (first > offset) {
+ first2 = first - offset;
+ count2 = count;
+ } else {
+ first2 = 0;
+ count2 = count - (offset - first);
+ }
ret = free_hole_blocks(handle, inode, bh,
(__le32 *)bh->b_data, level - 1,
- first2, count - offset,
+ first2, count2,
inode->i_sb->s_blocksize >> 2);
if (ret) {
brelse(bh);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 33331b4c2178..e350be6c7ac6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1957,9 +1957,11 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
}
/* Clear the content within i_blocks. */
- if (i_size < EXT4_MIN_INLINE_DATA_SIZE)
- memset(ext4_raw_inode(&is.iloc)->i_block + i_size, 0,
- EXT4_MIN_INLINE_DATA_SIZE - i_size);
+ if (i_size < EXT4_MIN_INLINE_DATA_SIZE) {
+ void *p = (void *) ext4_raw_inode(&is.iloc)->i_block;
+ memset(p + i_size, 0,
+ EXT4_MIN_INLINE_DATA_SIZE - i_size);
+ }
EXT4_I(inode)->i_inline_size = i_size <
EXT4_MIN_INLINE_DATA_SIZE ?
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 904ca1a21dce..e48bd5a1814b 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/ratelimit.h>
#include <linux/aio.h>
+#include <linux/bitops.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -1263,7 +1264,6 @@ static int ext4_journalled_write_end(struct file *file,
*/
static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1275,7 +1275,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1295,10 +1294,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
return -ENOSPC;
}
ei->i_reserved_meta_blocks += md_needed;
@@ -1312,7 +1307,6 @@ repeat:
*/
static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
{
- int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct ext4_inode_info *ei = EXT4_I(inode);
unsigned int md_needed;
@@ -1334,7 +1328,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
* in order to allocate nrblocks
* worse case is one extent per block
*/
-repeat:
spin_lock(&ei->i_block_reservation_lock);
/*
* ext4_calc_metadata_amount() has side effects, which we have
@@ -1354,10 +1347,6 @@ repeat:
ei->i_da_metadata_calc_len = save_len;
ei->i_da_metadata_calc_last_lblock = save_last_lblock;
spin_unlock(&ei->i_block_reservation_lock);
- if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
- cond_resched();
- goto repeat;
- }
dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
return -ENOSPC;
}
@@ -2658,6 +2647,20 @@ static int ext4_nonda_switch(struct super_block *sb)
return 0;
}
+/* We always reserve for an inode update; the superblock could be there too */
+static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
+{
+ if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
+ EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
+ return 1;
+
+ if (pos + len <= 0x7fffffffULL)
+ return 1;
+
+ /* We might need to update the superblock to set LARGE_FILE */
+ return 2;
+}
+
static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
@@ -2708,7 +2711,8 @@ retry_grab:
* of file which has an already mapped buffer.
*/
retry_journal:
- handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1);
+ handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+ ext4_da_write_credits(inode, pos, len));
if (IS_ERR(handle)) {
page_cache_release(page);
return PTR_ERR(handle);
@@ -4056,18 +4060,20 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
void ext4_set_inode_flags(struct inode *inode)
{
unsigned int flags = EXT4_I(inode)->i_flags;
+ unsigned int new_fl = 0;
- inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
if (flags & EXT4_SYNC_FL)
- inode->i_flags |= S_SYNC;
+ new_fl |= S_SYNC;
if (flags & EXT4_APPEND_FL)
- inode->i_flags |= S_APPEND;
+ new_fl |= S_APPEND;
if (flags & EXT4_IMMUTABLE_FL)
- inode->i_flags |= S_IMMUTABLE;
+ new_fl |= S_IMMUTABLE;
if (flags & EXT4_NOATIME_FL)
- inode->i_flags |= S_NOATIME;
+ new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
- inode->i_flags |= S_DIRSYNC;
+ new_fl |= S_DIRSYNC;
+ set_mask_bits(&inode->i_flags,
+ S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC, new_fl);
}
/* Propagate flags from i_flags to EXT4_I(inode)->i_flags */
@@ -4360,6 +4366,13 @@ bad_inode:
return ERR_PTR(ret);
}
+struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
+{
+ if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
+ return ERR_PTR(-EIO);
+ return ext4_iget(sb, ino);
+}
+
static int ext4_inode_blocks_set(handle_t *handle,
struct ext4_inode *raw_inode,
struct ext4_inode_info *ei)
@@ -4716,6 +4729,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_size > sbi->s_bitmap_maxbytes)
return -EFBIG;
}
+
+ if (IS_I_VERSION(inode) && attr->ia_size != inode->i_size)
+ inode_inc_iversion(inode);
+
if (S_ISREG(inode->i_mode) &&
(attr->ia_size < inode->i_size)) {
if (ext4_should_order_data(inode)) {
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c0427e2f6648..d4fd81c44f55 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -145,7 +145,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
handle = ext4_journal_start(inode_bl, EXT4_HT_MOVE_EXTENTS, 2);
if (IS_ERR(handle)) {
err = -EINVAL;
- goto swap_boot_out;
+ goto journal_err_out;
}
/* Protect extent tree against block allocations via delalloc */
@@ -203,6 +203,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
ext4_double_up_write_data_sem(inode, inode_bl);
+journal_err_out:
ext4_inode_resume_unlocked_dio(inode);
ext4_inode_resume_unlocked_dio(inode_bl);
@@ -548,9 +549,17 @@ group_add_out:
}
case EXT4_IOC_SWAP_BOOT:
+ {
+ int err;
if (!(filp->f_mode & FMODE_WRITE))
return -EBADF;
- return swap_inode_boot_loader(sb, inode);
+ err = mnt_want_write_file(filp);
+ if (err)
+ return err;
+ err = swap_inode_boot_loader(sb, inode);
+ mnt_drop_write_file(filp);
+ return err;
+ }
case EXT4_IOC_RESIZE_FS: {
ext4_fsblk_t n_blocks_count;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 59c6750b894f..162b80d527a0 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -1396,6 +1396,8 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
int last = first + count - 1;
struct super_block *sb = e4b->bd_sb;
+ if (WARN_ON(count == 0))
+ return;
BUG_ON(last >= (sb->s_blocksize << 3));
assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
mb_check_buddy(e4b);
@@ -3116,7 +3118,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
}
BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
start > ac->ac_o_ex.fe_logical);
- BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb));
+ BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
/* now prepare goal request */
@@ -3177,8 +3179,30 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
{
struct ext4_prealloc_space *pa = ac->ac_pa;
+ struct ext4_buddy e4b;
+ int err;
- if (pa && pa->pa_type == MB_INODE_PA)
+ if (pa == NULL) {
+ if (ac->ac_f_ex.fe_len == 0)
+ return;
+ err = ext4_mb_load_buddy(ac->ac_sb, ac->ac_f_ex.fe_group, &e4b);
+ if (err) {
+ /*
+ * This should never happen since we pin the
+ * pages in the ext4_allocation_context so
+ * ext4_mb_load_buddy() should never fail.
+ */
+ WARN(1, "mb_load_buddy failed (%d)", err);
+ return;
+ }
+ ext4_lock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ mb_free_blocks(ac->ac_inode, &e4b, ac->ac_f_ex.fe_start,
+ ac->ac_f_ex.fe_len);
+ ext4_unlock_group(ac->ac_sb, ac->ac_f_ex.fe_group);
+ ext4_mb_unload_buddy(&e4b);
+ return;
+ }
+ if (pa->pa_type == MB_INODE_PA)
pa->pa_free += ac->ac_b_ex.fe_len;
}
@@ -3423,6 +3447,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head)
{
struct ext4_prealloc_space *pa;
pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu);
+
+ BUG_ON(atomic_read(&pa->pa_count));
+ BUG_ON(pa->pa_deleted == 0);
kmem_cache_free(ext4_pspace_cachep, pa);
}
@@ -3436,11 +3463,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac,
ext4_group_t grp;
ext4_fsblk_t grp_blk;
- if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0)
- return;
-
/* in this short window concurrent discard can set pa_deleted */
spin_lock(&pa->pa_lock);
+ if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) {
+ spin_unlock(&pa->pa_lock);
+ return;
+ }
+
if (pa->pa_deleted == 1) {
spin_unlock(&pa->pa_lock);
return;
@@ -4102,7 +4131,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac,
ext4_get_group_no_and_offset(sb, goal, &group, &block);
/* set up allocation goals */
- ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1);
+ ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical);
ac->ac_status = AC_STATUS_CONTINUE;
ac->ac_sb = sb;
ac->ac_inode = ar->inode;
@@ -4639,7 +4668,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
* blocks at the beginning or the end unless we are explicitly
* requested to avoid doing so.
*/
- overflow = block & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_PBLK_COFF(sbi, block);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) {
overflow = sbi->s_cluster_ratio - overflow;
@@ -4653,7 +4682,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
count += overflow;
}
}
- overflow = count & (sbi->s_cluster_ratio - 1);
+ overflow = EXT4_LBLK_COFF(sbi, count);
if (overflow) {
if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) {
if (count > overflow)
@@ -4766,8 +4795,8 @@ do_more:
" group:%d block:%d count:%lu failed"
" with %d", block_group, bit, count,
err);
- }
-
+ } else
+ EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count_clusters);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index ab2f6dc44b3a..f1312173fa90 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1430,7 +1430,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
dentry->d_name.name);
return ERR_PTR(-EIO);
}
- inode = ext4_iget(dir->i_sb, ino);
+ inode = ext4_iget_normal(dir->i_sb, ino);
if (inode == ERR_PTR(-ESTALE)) {
EXT4_ERROR_INODE(dir,
"deleted inode referenced: %u",
@@ -1461,7 +1461,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
return ERR_PTR(-EIO);
}
- return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
+ return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
}
/*
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 4acf1f78881b..b12a4427aedc 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -384,6 +384,17 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
ClearPageError(page);
/*
+ * Comments copied from block_write_full_page_endio:
+ *
+ * The page straddles i_size. It must be zeroed out on each and every
+ * writepage invocation because it may be mmapped. "A file is mapped
+ * in multiples of the page size. For a file that is not a multiple of
+ * the page size, the remaining memory is zeroed when mapped, and
+ * writes to that region are not written out to the file."
+ */
+ if (len < PAGE_CACHE_SIZE)
+ zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ /*
* In the first loop we prepare and mark buffers to submit. We have to
* mark all buffers in the page before submitting so that
* end_page_writeback() cannot be called from ext4_bio_end_io() when IO
@@ -394,19 +405,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
do {
block_start = bh_offset(bh);
if (block_start >= len) {
- /*
- * Comments copied from block_write_full_page_endio:
- *
- * The page straddles i_size. It must be zeroed out on
- * each and every writepage invocation because it may
- * be mmapped. "A file is mapped in multiples of the
- * page size. For a file that is not a multiple of
- * the page size, the remaining memory is zeroed when
- * mapped, and writes to that region are not written
- * out to the file."
- */
- zero_user_segment(page, block_start,
- block_start + blocksize);
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 49d3c01eabf8..a69bd74ed390 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -238,6 +238,7 @@ static int ext4_alloc_group_tables(struct super_block *sb,
ext4_group_t group;
ext4_group_t last_group;
unsigned overhead;
+ __u16 uninit_mask = (flexbg_size > 1) ? ~EXT4_BG_BLOCK_UNINIT : ~0;
BUG_ON(flex_gd->count == 0 || group_data == NULL);
@@ -261,7 +262,7 @@ next_group:
src_group++;
for (; src_group <= last_group; src_group++) {
overhead = ext4_group_overhead_blocks(sb, src_group);
- if (overhead != 0)
+ if (overhead == 0)
last_blk += group_data[src_group - group].blocks_count;
else
break;
@@ -275,8 +276,7 @@ next_group:
group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group;
group_data[group].free_blocks_count--;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ flex_gd->bg_flags[group] &= uninit_mask;
}
/* Allocate inode bitmaps */
@@ -287,22 +287,30 @@ next_group:
group = ext4_get_group_number(sb, start_blk - 1);
group -= group_data[0].group;
group_data[group].free_blocks_count--;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ flex_gd->bg_flags[group] &= uninit_mask;
}
/* Allocate inode tables */
for (; it_index < flex_gd->count; it_index++) {
- if (start_blk + EXT4_SB(sb)->s_itb_per_group > last_blk)
+ unsigned int itb = EXT4_SB(sb)->s_itb_per_group;
+ ext4_fsblk_t next_group_start;
+
+ if (start_blk + itb > last_blk)
goto next_group;
group_data[it_index].inode_table = start_blk;
- group = ext4_get_group_number(sb, start_blk - 1);
+ group = ext4_get_group_number(sb, start_blk);
+ next_group_start = ext4_group_first_block_no(sb, group + 1);
group -= group_data[0].group;
- group_data[group].free_blocks_count -=
- EXT4_SB(sb)->s_itb_per_group;
- if (flexbg_size > 1)
- flex_gd->bg_flags[group] &= ~EXT4_BG_BLOCK_UNINIT;
+ if (start_blk + itb > next_group_start) {
+ flex_gd->bg_flags[group + 1] &= uninit_mask;
+ overhead = start_blk + itb - next_group_start;
+ group_data[group + 1].free_blocks_count -= overhead;
+ itb -= overhead;
+ }
+
+ group_data[group].free_blocks_count -= itb;
+ flex_gd->bg_flags[group] &= uninit_mask;
start_blk += EXT4_SB(sb)->s_itb_per_group;
}
@@ -396,7 +404,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
start = ext4_group_first_block_no(sb, group);
group -= flex_gd->groups[0].group;
- count2 = sb->s_blocksize * 8 - (block - start);
+ count2 = EXT4_BLOCKS_PER_GROUP(sb) - (block - start);
if (count2 > count)
count2 = count;
@@ -615,7 +623,7 @@ handle_ib:
if (err)
goto out;
count = group_table_count[j];
- start = group_data[i].block_bitmap;
+ start = (&group_data[i].block_bitmap)[j];
block = start;
}
@@ -1058,7 +1066,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
break;
if (meta_bg == 0)
- backup_block = group * bpg + blk_off;
+ backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
else
backup_block = (ext4_group_first_block_no(sb, group) +
ext4_bg_has_super(sb, group));
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3f7c39e6d097..21a0b43a7d31 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -964,7 +964,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
* Currently we don't know the generation for parent directory, so
* a generation of 0 means "accept any"
*/
- inode = ext4_iget(sb, ino);
+ inode = ext4_iget_normal(sb, ino);
if (IS_ERR(inode))
return ERR_CAST(inode);
if (generation && inode->i_generation != generation) {
@@ -1483,8 +1483,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
arg = JBD2_DEFAULT_MAX_COMMIT_AGE;
sbi->s_commit_interval = HZ * arg;
} else if (token == Opt_max_batch_time) {
- if (arg == 0)
- arg = EXT4_DEF_MAX_BATCH_TIME;
sbi->s_max_batch_time = arg;
} else if (token == Opt_min_batch_time) {
sbi->s_min_batch_time = arg;
@@ -1634,13 +1632,6 @@ static int parse_options(char *options, struct super_block *sb,
"not specified");
return 0;
}
- } else {
- if (sbi->s_jquota_fmt) {
- ext4_msg(sb, KERN_ERR, "journaled quota format "
- "specified with no journaling "
- "enabled");
- return 0;
- }
}
#endif
if (test_opt(sb, DIOREAD_NOLOCK)) {
@@ -1959,6 +1950,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
}
/* old crc16 code */
+ if (!(sbi->s_es->s_feature_ro_compat &
+ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
+ return 0;
+
offset = offsetof(struct ext4_group_desc, bg_checksum);
crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
@@ -2687,10 +2682,11 @@ static void print_daily_error_info(unsigned long arg)
es = sbi->s_es;
if (es->s_error_count)
- ext4_msg(sb, KERN_NOTICE, "error count: %u",
+ /* fsck newer than v1.41.13 is needed to clean this condition. */
+ ext4_msg(sb, KERN_NOTICE, "error count since last fsck: %u",
le32_to_cpu(es->s_error_count));
if (es->s_first_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+ printk(KERN_NOTICE "EXT4-fs (%s): initial error at time %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_first_error_time),
(int) sizeof(es->s_first_error_func),
es->s_first_error_func,
@@ -2704,7 +2700,7 @@ static void print_daily_error_info(unsigned long arg)
printk("\n");
}
if (es->s_last_error_time) {
- printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+ printk(KERN_NOTICE "EXT4-fs (%s): last error at time %u: %.*s:%d",
sb->s_id, le32_to_cpu(es->s_last_error_time),
(int) sizeof(es->s_last_error_func),
es->s_last_error_func,
@@ -3213,11 +3209,19 @@ int ext4_calculate_overhead(struct super_block *sb)
}
-static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
+static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
{
ext4_fsblk_t resv_clusters;
/*
+ * There's no need to reserve anything when we aren't using extents.
+ * The space estimates are exact, there are no unwritten extents,
+ * hole punching doesn't need new metadata... This is needed especially
+ * to keep ext2/3 backward compatibility.
+ */
+ if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS))
+ return 0;
+ /*
* By default we reserve 2% or 4096 clusters, whichever is smaller.
* This should cover the situations where we can not afford to run
* out of space like for example punch hole, or converting
@@ -3225,7 +3229,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi)
* allocation would require 1, or 2 blocks, higher numbers are
* very rare.
*/
- resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits;
+ resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >>
+ EXT4_SB(sb)->s_cluster_bits;
do_div(resv_clusters, 50);
resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096);
@@ -3583,16 +3588,22 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
for (i = 0; i < 4; i++)
sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
sbi->s_def_hash_version = es->s_def_hash_version;
- i = le32_to_cpu(es->s_flags);
- if (i & EXT2_FLAGS_UNSIGNED_HASH)
- sbi->s_hash_unsigned = 3;
- else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
+ if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) {
+ i = le32_to_cpu(es->s_flags);
+ if (i & EXT2_FLAGS_UNSIGNED_HASH)
+ sbi->s_hash_unsigned = 3;
+ else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
#ifdef __CHAR_UNSIGNED__
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
- sbi->s_hash_unsigned = 3;
+ if (!(sb->s_flags & MS_RDONLY))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
+ sbi->s_hash_unsigned = 3;
#else
- es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
+ if (!(sb->s_flags & MS_RDONLY))
+ es->s_flags |=
+ cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
#endif
+ }
}
/* Handle clustersize */
@@ -3969,10 +3980,10 @@ no_journal:
"available");
}
- err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi));
+ err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb));
if (err) {
ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
- "reserved pool", ext4_calculate_resv_clusters(sbi));
+ "reserved pool", ext4_calculate_resv_clusters(sb));
goto failed_mount4a;
}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index c081e34f717f..a20816e7eb3a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -189,14 +189,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
}
static int
-ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
+ void *value_start)
{
- while (!IS_LAST_ENTRY(entry)) {
- struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
+ struct ext4_xattr_entry *e = entry;
+
+ while (!IS_LAST_ENTRY(e)) {
+ struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
if ((void *)next >= end)
return -EIO;
- entry = next;
+ e = next;
+ }
+
+ while (!IS_LAST_ENTRY(entry)) {
+ if (entry->e_value_size != 0 &&
+ (value_start + le16_to_cpu(entry->e_value_offs) <
+ (void *)e + sizeof(__u32) ||
+ value_start + le16_to_cpu(entry->e_value_offs) +
+ le32_to_cpu(entry->e_value_size) > end))
+ return -EIO;
+ entry = EXT4_XATTR_NEXT(entry);
}
+
return 0;
}
@@ -213,7 +227,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
return -EIO;
if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
return -EIO;
- error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+ error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
+ bh->b_data);
if (!error)
set_buffer_verified(bh);
return error;
@@ -329,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(entry, end);
+ error = ext4_xattr_check_names(entry, end, entry);
if (error)
goto cleanup;
error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -457,7 +472,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(IFIRST(header), end);
+ error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
if (error)
goto cleanup;
error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -517,8 +532,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
}
/*
- * Release the xattr block BH: If the reference count is > 1, decrement
- * it; otherwise free the block.
+ * Release the xattr block BH: If the reference count is > 1, decrement it;
+ * otherwise free the block.
*/
static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
@@ -538,16 +553,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
if (ce)
mb_cache_entry_free(ce);
get_bh(bh);
+ unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
- unlock_buffer(bh);
} else {
le32_add_cpu(&BHDR(bh)->h_refcount, -1);
if (ce)
mb_cache_entry_release(ce);
+ /*
+ * Beware of this ugliness: Releasing of xattr block references
+ * from different inodes can race and so we have to protect
+ * from a race where someone else frees the block (and releases
+ * its journal_head) before we are done dirtying the buffer. In
+ * nojournal mode this race is harmless and we actually cannot
+ * call ext4_handle_dirty_xattr_block() with locked buffer as
+ * that function can call sync_dirty_buffer() so for that case
+ * we handle the dirtying after unlocking the buffer.
+ */
+ if (ext4_handle_valid(handle))
+ error = ext4_handle_dirty_xattr_block(handle, inode,
+ bh);
unlock_buffer(bh);
- error = ext4_handle_dirty_xattr_block(handle, inode, bh);
+ if (!ext4_handle_valid(handle))
+ error = ext4_handle_dirty_xattr_block(handle, inode,
+ bh);
if (IS_SYNC(inode))
ext4_handle_sync(handle);
dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
@@ -957,7 +987,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
is->s.here = is->s.first;
is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
- error = ext4_xattr_check_names(IFIRST(header), is->s.end);
+ error = ext4_xattr_check_names(IFIRST(header), is->s.end,
+ IFIRST(header));
if (error)
return error;
/* Find the named attribute. */
@@ -1350,6 +1381,9 @@ retry:
s_min_extra_isize) {
tried_min_extra_isize++;
new_extra_isize = s_min_extra_isize;
+ kfree(is); is = NULL;
+ kfree(bs); bs = NULL;
+ brelse(bh);
goto retry;
}
error = -1;
diff --git a/fs/file.c b/fs/file.c
index 4a78f981557a..9de20265a78c 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -34,7 +34,7 @@ static void *alloc_fdmem(size_t size)
* vmalloc() if the allocation size will be considered "large" by the VM.
*/
if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
- void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
+ void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY);
if (data != NULL)
return data;
}
diff --git a/fs/file_table.c b/fs/file_table.c
index 485dc0eddd67..54a34be444f9 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -211,10 +211,10 @@ static void drop_file_write_access(struct file *file)
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = dentry->d_inode;
- put_write_access(inode);
-
if (special_file(inode->i_mode))
return;
+
+ put_write_access(inode);
if (file_check_writeable(file) != 0)
return;
__mnt_drop_write(mnt);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 3be57189efd5..b44306378193 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -87,16 +87,29 @@ static inline struct inode *wb_inode(struct list_head *head)
#define CREATE_TRACE_POINTS
#include <trace/events/writeback.h>
+static void bdi_wakeup_thread(struct backing_dev_info *bdi)
+{
+ spin_lock_bh(&bdi->wb_lock);
+ if (test_bit(BDI_registered, &bdi->state))
+ mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+ spin_unlock_bh(&bdi->wb_lock);
+}
+
static void bdi_queue_work(struct backing_dev_info *bdi,
struct wb_writeback_work *work)
{
trace_writeback_queue(bdi, work);
spin_lock_bh(&bdi->wb_lock);
+ if (!test_bit(BDI_registered, &bdi->state)) {
+ if (work->done)
+ complete(work->done);
+ goto out_unlock;
+ }
list_add_tail(&work->list, &bdi->work_list);
- spin_unlock_bh(&bdi->wb_lock);
-
mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+out_unlock:
+ spin_unlock_bh(&bdi->wb_lock);
}
static void
@@ -112,7 +125,7 @@ __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages,
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work) {
trace_writeback_nowork(bdi);
- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+ bdi_wakeup_thread(bdi);
return;
}
@@ -159,7 +172,7 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi)
* writeback as soon as there is no other work to do.
*/
trace_writeback_wake_background(bdi);
- mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0);
+ bdi_wakeup_thread(bdi);
}
/*
@@ -457,12 +470,28 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
* write_inode()
*/
spin_lock(&inode->i_lock);
- /* Clear I_DIRTY_PAGES if we've written out all dirty pages */
- if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- inode->i_state &= ~I_DIRTY_PAGES;
+
dirty = inode->i_state & I_DIRTY;
- inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC);
+ inode->i_state &= ~I_DIRTY;
+
+ /*
+ * Paired with smp_mb() in __mark_inode_dirty(). This allows
+ * __mark_inode_dirty() to test i_state without grabbing i_lock -
+ * either they see the I_DIRTY bits cleared or we see the dirtied
+ * inode.
+ *
+ * I_DIRTY_PAGES is always cleared together above even if @mapping
+ * still has dirty pages. The flag is reinstated after smp_mb() if
+ * necessary. This guarantees that either __mark_inode_dirty()
+ * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
+ */
+ smp_mb();
+
+ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ inode->i_state |= I_DIRTY_PAGES;
+
spin_unlock(&inode->i_lock);
+
/* Don't write the inode if only I_DIRTY_PAGES was set */
if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
int err = write_inode(inode, wbc);
@@ -505,13 +534,16 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
}
WARN_ON(inode->i_state & I_SYNC);
/*
- * Skip inode if it is clean. We don't want to mess with writeback
- * lists in this function since flusher thread may be doing for example
- * sync in parallel and if we move the inode, it could get skipped. So
- * here we make sure inode is on some writeback list and leave it there
- * unless we have completely cleaned the inode.
+ * Skip inode if it is clean and we have no outstanding writeback in
+ * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
+ * function since flusher thread may be doing for example sync in
+ * parallel and if we move the inode, it could get skipped. So here we
+ * make sure inode is on some writeback list and leave it there unless
+ * we have completely cleaned the inode.
*/
- if (!(inode->i_state & I_DIRTY))
+ if (!(inode->i_state & I_DIRTY) &&
+ (wbc->sync_mode != WB_SYNC_ALL ||
+ !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
goto out;
inode->i_state |= I_SYNC;
spin_unlock(&inode->i_lock);
@@ -1013,7 +1045,7 @@ void bdi_writeback_workfn(struct work_struct *work)
current->flags |= PF_SWAPWRITE;
if (likely(!current_is_workqueue_rescuer() ||
- list_empty(&bdi->bdi_list))) {
+ !test_bit(BDI_registered, &bdi->state))) {
/*
* The normal path. Keep writing back @bdi until its
* work_list is empty. Note that this path is also taken
@@ -1035,10 +1067,10 @@ void bdi_writeback_workfn(struct work_struct *work)
trace_writeback_pages_written(pages_written);
}
- if (!list_empty(&bdi->work_list) ||
- (wb_has_dirty_io(wb) && dirty_writeback_interval))
- queue_delayed_work(bdi_wq, &wb->dwork,
- msecs_to_jiffies(dirty_writeback_interval * 10));
+ if (!list_empty(&bdi->work_list))
+ mod_delayed_work(bdi_wq, &wb->dwork, 0);
+ else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
+ bdi_wakeup_thread_delayed(bdi);
current->flags &= ~PF_SWAPWRITE;
}
@@ -1130,12 +1162,11 @@ void __mark_inode_dirty(struct inode *inode, int flags)
}
/*
- * make sure that changes are seen by all cpus before we test i_state
- * -- mikulas
+ * Paired with smp_mb() in __writeback_single_inode() for the
+ * following lockless i_state test. See there for details.
*/
smp_mb();
- /* avoid the locking if we can */
if ((inode->i_state & flags) == flags)
return;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1d55f9465400..23bf1a52a5da 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1296,22 +1296,6 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs));
}
-static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- return 1;
-}
-
-static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = {
- .can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
- .confirm = generic_pipe_buf_confirm,
- .release = generic_pipe_buf_release,
- .steal = fuse_dev_pipe_buf_steal,
- .get = generic_pipe_buf_get,
-};
-
static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe,
size_t len, unsigned int flags)
@@ -1358,7 +1342,11 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos,
buf->page = bufs[page_nr].page;
buf->offset = bufs[page_nr].offset;
buf->len = bufs[page_nr].len;
- buf->ops = &fuse_dev_pipe_buf_ops;
+ /*
+ * Need to be careful about this. Having buf->ops in module
+ * code can Oops if the buffer persists after module unload.
+ */
+ buf->ops = &nosteal_pipe_buf_ops;
pipe->nrbufs++;
page_nr++;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 473e8453a7df..4fafb8484bbc 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2468,6 +2468,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
{
struct fuse_file *ff = file->private_data;
struct inode *inode = file->f_inode;
+ struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = ff->fc;
struct fuse_req *req;
struct fuse_fallocate_in inarg = {
@@ -2485,10 +2486,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (lock_inode) {
mutex_lock(&inode->i_mutex);
- if (mode & FALLOC_FL_PUNCH_HOLE)
- fuse_set_nowrite(inode);
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ loff_t endbyte = offset + length - 1;
+ err = filemap_write_and_wait_range(inode->i_mapping,
+ offset, endbyte);
+ if (err)
+ goto out;
+
+ fuse_sync_writes(inode);
+ }
}
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
req = fuse_get_req_nopages(fc);
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -2521,11 +2532,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
fuse_invalidate_attr(inode);
out:
- if (lock_inode) {
- if (mode & FALLOC_FL_PUNCH_HOLE)
- fuse_release_nowrite(inode);
+ if (!(mode & FALLOC_FL_KEEP_SIZE))
+ clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
+
+ if (lock_inode)
mutex_unlock(&inode->i_mutex);
- }
return err;
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index b5718516825b..39a986e1da9e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -461,6 +461,17 @@ static const match_table_t tokens = {
{OPT_ERR, NULL}
};
+static int fuse_match_uint(substring_t *s, unsigned int *res)
+{
+ int err = -ENOMEM;
+ char *buf = match_strdup(s);
+ if (buf) {
+ err = kstrtouint(buf, 10, res);
+ kfree(buf);
+ }
+ return err;
+}
+
static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
{
char *p;
@@ -471,6 +482,7 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
while ((p = strsep(&opt, ",")) != NULL) {
int token;
int value;
+ unsigned uv;
substring_t args[MAX_OPT_ARGS];
if (!*p)
continue;
@@ -494,18 +506,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev)
break;
case OPT_USER_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->user_id = make_kuid(current_user_ns(), value);
+ d->user_id = make_kuid(current_user_ns(), uv);
if (!uid_valid(d->user_id))
return 0;
d->user_id_present = 1;
break;
case OPT_GROUP_ID:
- if (match_int(&args[0], &value))
+ if (fuse_match_uint(&args[0], &uv))
return 0;
- d->group_id = make_kgid(current_user_ns(), value);
+ d->group_id = make_kgid(current_user_ns(), uv);
if (!gid_valid(d->group_id))
return 0;
d->group_id_present = 1;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 0bad69ed6336..76251600cbea 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -999,6 +999,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = inode->i_mapping;
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_holder gh;
int rv;
@@ -1019,6 +1020,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
if (rv != 1)
goto out; /* dio not valid, fall back to buffered i/o */
+ /*
+ * Now since we are holding a deferred (CW) lock at this point, you
+ * might be wondering why this is ever needed. There is a case however
+ * where we've granted a deferred local lock against a cached exclusive
+ * glock. That is ok provided all granted local locks are deferred, but
+ * it also means that it is possible to encounter pages which are
+ * cached and possibly also mapped. So here we check for that and sort
+ * them out ahead of the dio. The glock state machine will take care of
+ * everything else.
+ *
+ * If in fact the cached glock state (gl->gl_state) is deferred (CW) in
+ * the first place, mapping->nr_pages will always be zero.
+ */
+ if (mapping->nrpages) {
+ loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
+ loff_t len = iov_length(iov, nr_segs);
+ loff_t end = PAGE_ALIGN(offset + len) - 1;
+
+ rv = 0;
+ if (len == 0)
+ goto out;
+ if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
+ unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len);
+ rv = filemap_write_and_wait_range(mapping, lstart, end);
+ if (rv)
+ return rv;
+ truncate_inode_pages_range(mapping, lstart, end);
+ }
+
rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
offset, nr_segs, gfs2_get_block_direct,
NULL, NULL, 0);
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 62b484e4a9e4..bc5dac400125 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -1536,10 +1536,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr)
if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid))
ogid = ngid = NO_GID_QUOTA_CHANGE;
- error = gfs2_quota_lock(ip, nuid, ngid);
+ error = get_write_access(inode);
if (error)
return error;
+ error = gfs2_rs_alloc(ip);
+ if (error)
+ goto out;
+
+ error = gfs2_rindex_update(sdp);
+ if (error)
+ goto out;
+
+ error = gfs2_quota_lock(ip, nuid, ngid);
+ if (error)
+ goto out;
+
if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) ||
!gid_eq(ogid, NO_GID_QUOTA_CHANGE)) {
error = gfs2_quota_check(ip, nuid, ngid);
@@ -1566,6 +1578,8 @@ out_end_trans:
gfs2_trans_end(sdp);
out_gunlock_q:
gfs2_quota_unlock(ip);
+out:
+ put_write_access(inode);
return error;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index 60ede2a0f43f..f7dd3b4f8ab0 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1317,8 +1317,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
if (IS_ERR(s))
goto error_bdev;
- if (s->s_root)
+ if (s->s_root) {
+ /*
+ * s_umount nests inside bd_mutex during
+ * __invalidate_device(). blkdev_put() acquires
+ * bd_mutex and can't be called under s_umount. Drop
+ * s_umount temporarily. This is safe as we're
+ * holding an active reference.
+ */
+ up_write(&s->s_umount);
blkdev_put(bdev, mode);
+ down_write(&s->s_umount);
+ }
memset(&args, 0, sizeof(args));
args.ar_quota = GFS2_QUOTA_DEFAULT;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a3f868ae3fd4..4e5f332f15d9 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -916,14 +916,8 @@ static int get_hstate_idx(int page_size_log)
return h - hstates;
}
-static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen)
-{
- return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)",
- dentry->d_name.name);
-}
-
static struct dentry_operations anon_ops = {
- .d_dname = hugetlb_dname
+ .d_dname = simple_dname
};
/*
diff --git a/fs/inode.c b/fs/inode.c
index 00d5fc3b86e1..1b300a06b8be 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1837,14 +1837,18 @@ EXPORT_SYMBOL(inode_init_owner);
* inode_owner_or_capable - check current task permissions to inode
* @inode: inode being checked
*
- * Return true if current either has CAP_FOWNER to the inode, or
- * owns the file.
+ * Return true if current either has CAP_FOWNER in a namespace with the
+ * inode owner uid mapped, or owns the file.
*/
bool inode_owner_or_capable(const struct inode *inode)
{
+ struct user_namespace *ns;
+
if (uid_eq(current_fsuid(), inode->i_uid))
return true;
- if (inode_capable(inode, CAP_FOWNER))
+
+ ns = current_user_ns();
+ if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
return true;
return false;
}
diff --git a/fs/ioprio.c b/fs/ioprio.c
index e50170ca7c33..31666c92b46a 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -157,14 +157,16 @@ out:
int ioprio_best(unsigned short aprio, unsigned short bprio)
{
- unsigned short aclass = IOPRIO_PRIO_CLASS(aprio);
- unsigned short bclass = IOPRIO_PRIO_CLASS(bprio);
+ unsigned short aclass;
+ unsigned short bclass;
- if (aclass == IOPRIO_CLASS_NONE)
- aclass = IOPRIO_CLASS_BE;
- if (bclass == IOPRIO_CLASS_NONE)
- bclass = IOPRIO_CLASS_BE;
+ if (!ioprio_valid(aprio))
+ aprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ if (!ioprio_valid(bprio))
+ bprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM);
+ aclass = IOPRIO_PRIO_CLASS(aprio);
+ bclass = IOPRIO_PRIO_CLASS(bprio);
if (aclass == bclass)
return min(aprio, bprio);
if (aclass > bclass)
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index d3705490ff9c..10489bbd40fc 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -69,7 +69,7 @@ static void isofs_put_super(struct super_block *sb)
return;
}
-static int isofs_read_inode(struct inode *);
+static int isofs_read_inode(struct inode *, int relocated);
static int isofs_statfs (struct dentry *, struct kstatfs *);
static struct kmem_cache *isofs_inode_cachep;
@@ -1274,7 +1274,7 @@ out_toomany:
goto out;
}
-static int isofs_read_inode(struct inode *inode)
+static int isofs_read_inode(struct inode *inode, int relocated)
{
struct super_block *sb = inode->i_sb;
struct isofs_sb_info *sbi = ISOFS_SB(sb);
@@ -1419,7 +1419,7 @@ static int isofs_read_inode(struct inode *inode)
*/
if (!high_sierra) {
- parse_rock_ridge_inode(de, inode);
+ parse_rock_ridge_inode(de, inode, relocated);
/* if we want uid/gid set, override the rock ridge setting */
if (sbi->s_uid_set)
inode->i_uid = sbi->s_uid;
@@ -1498,9 +1498,10 @@ static int isofs_iget5_set(struct inode *ino, void *data)
* offset that point to the underlying meta-data for the inode. The
* code below is otherwise similar to the iget() code in
* include/linux/fs.h */
-struct inode *isofs_iget(struct super_block *sb,
- unsigned long block,
- unsigned long offset)
+struct inode *__isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset,
+ int relocated)
{
unsigned long hashval;
struct inode *inode;
@@ -1522,7 +1523,7 @@ struct inode *isofs_iget(struct super_block *sb,
return ERR_PTR(-ENOMEM);
if (inode->i_state & I_NEW) {
- ret = isofs_read_inode(inode);
+ ret = isofs_read_inode(inode, relocated);
if (ret < 0) {
iget_failed(inode);
inode = ERR_PTR(ret);
diff --git a/fs/isofs/isofs.h b/fs/isofs/isofs.h
index 99167238518d..0ac4c1f73fbd 100644
--- a/fs/isofs/isofs.h
+++ b/fs/isofs/isofs.h
@@ -107,7 +107,7 @@ extern int iso_date(char *, int);
struct inode; /* To make gcc happy */
-extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *);
+extern int parse_rock_ridge_inode(struct iso_directory_record *, struct inode *, int relocated);
extern int get_rock_ridge_filename(struct iso_directory_record *, char *, struct inode *);
extern int isofs_name_translate(struct iso_directory_record *, char *, struct inode *);
@@ -118,9 +118,24 @@ extern struct dentry *isofs_lookup(struct inode *, struct dentry *, unsigned int
extern struct buffer_head *isofs_bread(struct inode *, sector_t);
extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);
-extern struct inode *isofs_iget(struct super_block *sb,
- unsigned long block,
- unsigned long offset);
+struct inode *__isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset,
+ int relocated);
+
+static inline struct inode *isofs_iget(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset)
+{
+ return __isofs_iget(sb, block, offset, 0);
+}
+
+static inline struct inode *isofs_iget_reloc(struct super_block *sb,
+ unsigned long block,
+ unsigned long offset)
+{
+ return __isofs_iget(sb, block, offset, 1);
+}
/* Because the inode number is no longer relevant to finding the
* underlying meta-data for an inode, we are free to choose a more
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index c0bf42472e40..735d7522a3a9 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -30,6 +30,7 @@ struct rock_state {
int cont_size;
int cont_extent;
int cont_offset;
+ int cont_loops;
struct inode *inode;
};
@@ -73,6 +74,9 @@ static void init_rock_state(struct rock_state *rs, struct inode *inode)
rs->inode = inode;
}
+/* Maximum number of Rock Ridge continuation entries */
+#define RR_MAX_CE_ENTRIES 32
+
/*
* Returns 0 if the caller should continue scanning, 1 if the scan must end
* and -ve on error.
@@ -105,6 +109,8 @@ static int rock_continue(struct rock_state *rs)
goto out;
}
ret = -EIO;
+ if (++rs->cont_loops >= RR_MAX_CE_ENTRIES)
+ goto out;
bh = sb_bread(rs->inode->i_sb, rs->cont_extent);
if (bh) {
memcpy(rs->buffer, bh->b_data + rs->cont_offset,
@@ -288,12 +294,16 @@ eio:
goto out;
}
+#define RR_REGARD_XA 1
+#define RR_RELOC_DE 2
+
static int
parse_rock_ridge_inode_internal(struct iso_directory_record *de,
- struct inode *inode, int regard_xa)
+ struct inode *inode, int flags)
{
int symlink_len = 0;
int cnt, sig;
+ unsigned int reloc_block;
struct inode *reloc;
struct rock_ridge *rr;
int rootflag;
@@ -305,7 +315,7 @@ parse_rock_ridge_inode_internal(struct iso_directory_record *de,
init_rock_state(&rs, inode);
setup_rock_ridge(de, inode, &rs);
- if (regard_xa) {
+ if (flags & RR_REGARD_XA) {
rs.chr += 14;
rs.len -= 14;
if (rs.len < 0)
@@ -352,6 +362,9 @@ repeat:
rs.cont_size = isonum_733(rr->u.CE.size);
break;
case SIG('E', 'R'):
+ /* Invalid length of ER tag id? */
+ if (rr->u.ER.len_id + offsetof(struct rock_ridge, u.ER.data) > rr->len)
+ goto out;
ISOFS_SB(inode->i_sb)->s_rock = 1;
printk(KERN_DEBUG "ISO 9660 Extensions: ");
{
@@ -485,12 +498,22 @@ repeat:
"relocated directory\n");
goto out;
case SIG('C', 'L'):
- ISOFS_I(inode)->i_first_extent =
- isonum_733(rr->u.CL.location);
- reloc =
- isofs_iget(inode->i_sb,
- ISOFS_I(inode)->i_first_extent,
- 0);
+ if (flags & RR_RELOC_DE) {
+ printk(KERN_ERR
+ "ISOFS: Recursive directory relocation "
+ "is not supported\n");
+ goto eio;
+ }
+ reloc_block = isonum_733(rr->u.CL.location);
+ if (reloc_block == ISOFS_I(inode)->i_iget5_block &&
+ ISOFS_I(inode)->i_iget5_offset == 0) {
+ printk(KERN_ERR
+ "ISOFS: Directory relocation points to "
+ "itself\n");
+ goto eio;
+ }
+ ISOFS_I(inode)->i_first_extent = reloc_block;
+ reloc = isofs_iget_reloc(inode->i_sb, reloc_block, 0);
if (IS_ERR(reloc)) {
ret = PTR_ERR(reloc);
goto out;
@@ -637,9 +660,11 @@ static char *get_symlink_chunk(char *rpnt, struct rock_ridge *rr, char *plimit)
return rpnt;
}
-int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
+int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode,
+ int relocated)
{
- int result = parse_rock_ridge_inode_internal(de, inode, 0);
+ int flags = relocated ? RR_RELOC_DE : 0;
+ int result = parse_rock_ridge_inode_internal(de, inode, flags);
/*
* if rockridge flag was reset and we didn't look for attributes
@@ -647,7 +672,8 @@ int parse_rock_ridge_inode(struct iso_directory_record *de, struct inode *inode)
*/
if ((ISOFS_SB(inode->i_sb)->s_rock_offset == -1)
&& (ISOFS_SB(inode->i_sb)->s_rock == 2)) {
- result = parse_rock_ridge_inode_internal(de, inode, 14);
+ result = parse_rock_ridge_inode_internal(de, inode,
+ flags | RR_REGARD_XA);
}
return result;
}
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 626846bac32f..6e2fb5cbacde 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -427,6 +427,7 @@ static int do_one_pass(journal_t *journal,
int tag_bytes = journal_tag_bytes(journal);
__u32 crc32_sum = ~0; /* Transactional Checksums */
int descr_csum_size = 0;
+ int block_error = 0;
/*
* First thing is to establish what we expect to find in the log
@@ -521,6 +522,7 @@ static int do_one_pass(journal_t *journal,
!jbd2_descr_block_csum_verify(journal,
bh->b_data)) {
err = -EIO;
+ brelse(bh);
goto failed;
}
@@ -599,7 +601,8 @@ static int do_one_pass(journal_t *journal,
"checksum recovering "
"block %llu in log\n",
blocknr);
- continue;
+ block_error = 1;
+ goto skip_write;
}
/* Find a buffer for the new
@@ -798,7 +801,8 @@ static int do_one_pass(journal_t *journal,
success = -EIO;
}
}
-
+ if (block_error && success == 0)
+ success = -EIO;
return success;
failed:
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e0c0bc275924..ec34e11d6854 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1151,7 +1151,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
* once a transaction -bzzz
*/
jh->b_modified = 1;
- J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+ if (handle->h_buffer_credits <= 0) {
+ ret = -ENOSPC;
+ goto out_unlock_bh;
+ }
handle->h_buffer_credits--;
}
@@ -1234,7 +1237,6 @@ out_unlock_bh:
jbd2_journal_put_journal_head(jh);
out:
JBUFFER_TRACE(jh, "exit");
- WARN_ON(ret); /* All errors are bugs, so dump the stack */
return ret;
}
@@ -1440,9 +1442,12 @@ int jbd2_journal_stop(handle_t *handle)
* to perform a synchronous write. We do this to detect the
* case where a single process is doing a stream of sync
* writes. No point in waiting for joiners in that case.
+ *
+ * Setting max_batch_time to 0 disables this completely.
*/
pid = current->pid;
- if (handle->h_sync && journal->j_last_sync_writer != pid) {
+ if (handle->h_sync && journal->j_last_sync_writer != pid &&
+ journal->j_max_batch_time) {
u64 commit_time, trans_time;
journal->j_last_sync_writer = pid;
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 16a5047903a6..406d9cc84ba8 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -33,7 +33,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
unsigned char *cpage_out,
uint32_t *sourcelen, uint32_t *dstlen)
{
- short positions[256];
+ unsigned short positions[256];
int outpos = 0;
int pos=0;
@@ -74,7 +74,7 @@ static int jffs2_rtime_decompress(unsigned char *data_in,
unsigned char *cpage_out,
uint32_t srclen, uint32_t destlen)
{
- short positions[256];
+ unsigned short positions[256];
int outpos = 0;
int pos=0;
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 413ef89c2d1b..046fee8b6e9b 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -134,8 +134,6 @@ struct jffs2_sb_info {
struct rw_semaphore wbuf_sem; /* Protects the write buffer */
struct delayed_work wbuf_dwork; /* write-buffer write-out work */
- int wbuf_queued; /* non-zero delayed work is queued */
- spinlock_t wbuf_dwork_lock; /* protects wbuf_dwork and and wbuf_queued */
unsigned char *oobbuf;
int oobavail; /* How many bytes are available for JFFS2 in OOB */
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index e4619b00f7c5..fa35ff79ab35 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -231,7 +231,7 @@ struct jffs2_tmp_dnode_info
uint32_t version;
uint32_t data_crc;
uint32_t partial_crc;
- uint16_t csize;
+ uint32_t csize;
uint16_t overlapped;
};
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index 03310721712f..b6bd4affd9ad 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -179,6 +179,7 @@ int jffs2_reserve_space(struct jffs2_sb_info *c, uint32_t minsize,
spin_unlock(&c->erase_completion_lock);
schedule();
+ remove_wait_queue(&c->erase_wait, &wait);
} else
spin_unlock(&c->erase_completion_lock);
} else if (ret)
@@ -211,20 +212,25 @@ out:
int jffs2_reserve_space_gc(struct jffs2_sb_info *c, uint32_t minsize,
uint32_t *len, uint32_t sumsize)
{
- int ret = -EAGAIN;
+ int ret;
minsize = PAD(minsize);
jffs2_dbg(1, "%s(): Requested 0x%x bytes\n", __func__, minsize);
- spin_lock(&c->erase_completion_lock);
- while(ret == -EAGAIN) {
+ while (true) {
+ spin_lock(&c->erase_completion_lock);
ret = jffs2_do_reserve_space(c, minsize, len, sumsize);
if (ret) {
jffs2_dbg(1, "%s(): looping, ret is %d\n",
__func__, ret);
}
+ spin_unlock(&c->erase_completion_lock);
+
+ if (ret == -EAGAIN)
+ cond_resched();
+ else
+ break;
}
- spin_unlock(&c->erase_completion_lock);
if (!ret)
ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1);
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index a6597d60d76d..09ed55190ee2 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1162,10 +1162,6 @@ static void delayed_wbuf_sync(struct work_struct *work)
struct jffs2_sb_info *c = work_to_sb(work);
struct super_block *sb = OFNI_BS_2SFFJ(c);
- spin_lock(&c->wbuf_dwork_lock);
- c->wbuf_queued = 0;
- spin_unlock(&c->wbuf_dwork_lock);
-
if (!(sb->s_flags & MS_RDONLY)) {
jffs2_dbg(1, "%s()\n", __func__);
jffs2_flush_wbuf_gc(c, 0);
@@ -1180,14 +1176,9 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c)
if (sb->s_flags & MS_RDONLY)
return;
- spin_lock(&c->wbuf_dwork_lock);
- if (!c->wbuf_queued) {
+ delay = msecs_to_jiffies(dirty_writeback_interval * 10);
+ if (queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay))
jffs2_dbg(1, "%s()\n", __func__);
- delay = msecs_to_jiffies(dirty_writeback_interval * 10);
- queue_delayed_work(system_long_wq, &c->wbuf_dwork, delay);
- c->wbuf_queued = 1;
- }
- spin_unlock(&c->wbuf_dwork_lock);
}
int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
@@ -1211,7 +1202,6 @@ int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
/* Initialise write buffer */
init_rwsem(&c->wbuf_sem);
- spin_lock_init(&c->wbuf_dwork_lock);
INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
c->wbuf_pagesize = c->mtd->writesize;
c->wbuf_ofs = 0xFFFFFFFF;
@@ -1251,7 +1241,6 @@ int jffs2_dataflash_setup(struct jffs2_sb_info *c) {
/* Initialize write buffer */
init_rwsem(&c->wbuf_sem);
- spin_lock_init(&c->wbuf_dwork_lock);
INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
c->wbuf_pagesize = c->mtd->erasesize;
@@ -1311,7 +1300,6 @@ int jffs2_nor_wbuf_flash_setup(struct jffs2_sb_info *c) {
/* Initialize write buffer */
init_rwsem(&c->wbuf_sem);
- spin_lock_init(&c->wbuf_dwork_lock);
INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
c->wbuf_pagesize = c->mtd->writesize;
@@ -1346,7 +1334,6 @@ int jffs2_ubivol_setup(struct jffs2_sb_info *c) {
return 0;
init_rwsem(&c->wbuf_sem);
- spin_lock_init(&c->wbuf_dwork_lock);
INIT_DELAYED_WORK(&c->wbuf_dwork, delayed_wbuf_sync);
c->wbuf_pagesize = c->mtd->writesize;
diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c
index c1a3e603279c..7f464c513ba0 100644
--- a/fs/jfs/jfs_inode.c
+++ b/fs/jfs/jfs_inode.c
@@ -95,7 +95,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
if (insert_inode_locked(inode) < 0) {
rc = -EINVAL;
- goto fail_unlock;
+ goto fail_put;
}
inode_init_owner(inode, parent, mode);
@@ -156,7 +156,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
fail_drop:
dquot_drop(inode);
inode->i_flags |= S_NOQUOTA;
-fail_unlock:
clear_nlink(inode);
unlock_new_inode(inode);
fail_put:
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 1812f026960c..6ae664b489af 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -159,6 +159,12 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
msg.rpc_proc = &clnt->cl_procinfo[proc];
status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
+ if (status == -ECONNREFUSED) {
+ dprintk("lockd: NSM upcall RPC failed, status=%d, forcing rebind\n",
+ status);
+ rpc_force_rebind(clnt);
+ status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
+ }
if (status < 0)
dprintk("lockd: NSM upcall RPC failed, status=%d\n",
status);
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index a2aa97d45670..9c8a5a6d33df 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -235,6 +235,7 @@ out_err:
if (warned++ == 0)
printk(KERN_WARNING
"lockd_up: makesock failed, error=%d\n", err);
+ svc_shutdown_net(serv, net);
return err;
}
@@ -252,13 +253,11 @@ static int lockd_up_net(struct svc_serv *serv, struct net *net)
error = make_socks(serv, net);
if (error < 0)
- goto err_socks;
+ goto err_bind;
set_grace_period(net);
dprintk("lockd_up_net: per-net data created; net=%p\n", net);
return 0;
-err_socks:
- svc_rpcb_cleanup(serv, net);
err_bind:
ln->nlmsvc_users--;
return error;
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 8ebd3f551e0c..ffc4045fc62e 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -767,6 +767,7 @@ nlmsvc_grant_blocked(struct nlm_block *block)
struct nlm_file *file = block->b_file;
struct nlm_lock *lock = &block->b_call->a_args.lock;
int error;
+ loff_t fl_start, fl_end;
dprintk("lockd: grant blocked lock %p\n", block);
@@ -784,9 +785,16 @@ nlmsvc_grant_blocked(struct nlm_block *block)
}
/* Try the lock operation again */
+ /* vfs_lock_file() can mangle fl_start and fl_end, but we need
+ * them unchanged for the GRANT_MSG
+ */
lock->fl.fl_flags |= FL_SLEEP;
+ fl_start = lock->fl.fl_start;
+ fl_end = lock->fl.fl_end;
error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL);
lock->fl.fl_flags &= ~FL_SLEEP;
+ lock->fl.fl_start = fl_start;
+ lock->fl.fl_end = fl_end;
switch (error) {
case 0:
diff --git a/fs/locks.c b/fs/locks.c
index cb424a4fed71..0274c953b07d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1243,11 +1243,10 @@ int __break_lease(struct inode *inode, unsigned int mode)
restart:
break_time = flock->fl_break_time;
- if (break_time != 0) {
+ if (break_time != 0)
break_time -= jiffies;
- if (break_time == 0)
- break_time++;
- }
+ if (break_time == 0)
+ break_time++;
locks_insert_block(flock, new_fl);
unlock_flocks();
error = wait_event_interruptible_timeout(new_fl->fl_wait,
diff --git a/fs/mount.h b/fs/mount.h
index 64a858143ff9..68d80bdcd081 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -73,7 +73,7 @@ static inline int mnt_has_parent(struct mount *mnt)
static inline int is_mounted(struct vfsmount *mnt)
{
/* neither detached nor internal? */
- return !IS_ERR_OR_NULL(real_mount(mnt));
+ return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns);
}
extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int);
diff --git a/fs/namei.c b/fs/namei.c
index 9ed9361223c0..f7c4393f8535 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -34,6 +34,7 @@
#include <linux/device_cgroup.h>
#include <linux/fs_struct.h>
#include <linux/posix_acl.h>
+#include <linux/hash.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -321,10 +322,11 @@ int generic_permission(struct inode *inode, int mask)
if (S_ISDIR(inode->i_mode)) {
/* DACs are overridable for directories */
- if (inode_capable(inode, CAP_DAC_OVERRIDE))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
return 0;
if (!(mask & MAY_WRITE))
- if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+ if (capable_wrt_inode_uidgid(inode,
+ CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
}
@@ -334,7 +336,7 @@ int generic_permission(struct inode *inode, int mask)
* at least one exec bit set.
*/
if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
- if (inode_capable(inode, CAP_DAC_OVERRIDE))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
return 0;
/*
@@ -342,7 +344,7 @@ int generic_permission(struct inode *inode, int mask)
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ)
- if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
@@ -1646,8 +1648,7 @@ static inline int can_lookup(struct inode *inode)
static inline unsigned int fold_hash(unsigned long hash)
{
- hash += hash >> (8*sizeof(int));
- return hash;
+ return hash_64(hash, 32);
}
#else /* 32-bit case */
@@ -2199,7 +2200,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
return 0;
if (uid_eq(dir->i_uid, fsuid))
return 0;
- return !inode_capable(inode, CAP_FOWNER);
+ return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
}
/*
@@ -2263,6 +2264,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir)
*/
static inline int may_create(struct inode *dir, struct dentry *child)
{
+ audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
if (child->d_inode)
return -EEXIST;
if (IS_DEADDIR(dir))
@@ -3654,6 +3656,7 @@ retry:
out_dput:
done_path_create(&new_path, new_dentry);
if (retry_estale(error, how)) {
+ path_put(&old_path);
how |= LOOKUP_REVAL;
goto retry;
}
diff --git a/fs/namespace.c b/fs/namespace.c
index a45ba4f267fe..d0244c8ba09c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -828,8 +828,21 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~MNT_WRITE_HOLD;
/* Don't allow unprivileged users to change mount flags */
- if ((flag & CL_UNPRIVILEGED) && (mnt->mnt.mnt_flags & MNT_READONLY))
- mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+ if (flag & CL_UNPRIVILEGED) {
+ mnt->mnt.mnt_flags |= MNT_LOCK_ATIME;
+
+ if (mnt->mnt.mnt_flags & MNT_READONLY)
+ mnt->mnt.mnt_flags |= MNT_LOCK_READONLY;
+
+ if (mnt->mnt.mnt_flags & MNT_NODEV)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NODEV;
+
+ if (mnt->mnt.mnt_flags & MNT_NOSUID)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NOSUID;
+
+ if (mnt->mnt.mnt_flags & MNT_NOEXEC)
+ mnt->mnt.mnt_flags |= MNT_LOCK_NOEXEC;
+ }
atomic_inc(&sb->s_active);
mnt->mnt.mnt_sb = sb;
@@ -1261,6 +1274,8 @@ static int do_umount(struct mount *mnt, int flags)
* Special case for "unmounting" root ...
* we just try to remount it readonly.
*/
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
down_write(&sb->s_umount);
if (!(sb->s_flags & MS_RDONLY))
retval = do_remount_sb(sb, MS_RDONLY, NULL, 0);
@@ -1327,6 +1342,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (!check_mnt(mnt))
goto dput_and_out;
+ retval = -EPERM;
+ if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+ goto dput_and_out;
retval = do_umount(mnt, flags);
dput_and_out:
@@ -1764,9 +1782,6 @@ static int change_mount_flags(struct vfsmount *mnt, int ms_flags)
if (readonly_request == __mnt_is_readonly(mnt))
return 0;
- if (mnt->mnt_flags & MNT_LOCK_READONLY)
- return -EPERM;
-
if (readonly_request)
error = mnt_make_readonly(real_mount(mnt));
else
@@ -1792,6 +1807,39 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
+ /* Don't allow changing of locked mnt flags.
+ *
+ * No locks need to be held here while testing the various
+ * MNT_LOCK flags because those flags can never be cleared
+ * once they are set.
+ */
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_READONLY) &&
+ !(mnt_flags & MNT_READONLY)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
+ !(mnt_flags & MNT_NODEV)) {
+ /* Was the nodev implicitly added in mount? */
+ if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
+ !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+ mnt_flags |= MNT_NODEV;
+ } else {
+ return -EPERM;
+ }
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
+ !(mnt_flags & MNT_NOSUID)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_NOEXEC) &&
+ !(mnt_flags & MNT_NOEXEC)) {
+ return -EPERM;
+ }
+ if ((mnt->mnt.mnt_flags & MNT_LOCK_ATIME) &&
+ ((mnt->mnt.mnt_flags & MNT_ATIME_MASK) != (mnt_flags & MNT_ATIME_MASK))) {
+ return -EPERM;
+ }
+
err = security_sb_remount(sb, data);
if (err)
return err;
@@ -1805,7 +1853,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
err = do_remount_sb(sb, flags, data, 0);
if (!err) {
br_write_lock(&vfsmount_lock);
- mnt_flags |= mnt->mnt.mnt_flags & MNT_PROPAGATION_MASK;
+ mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
mnt->mnt.mnt_flags = mnt_flags;
br_write_unlock(&vfsmount_lock);
}
@@ -1991,7 +2039,7 @@ static int do_new_mount(struct path *path, const char *fstype, int flags,
*/
if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) {
flags |= MS_NODEV;
- mnt_flags |= MNT_NODEV;
+ mnt_flags |= MNT_NODEV | MNT_LOCK_NODEV;
}
}
@@ -2309,6 +2357,14 @@ long do_mount(const char *dev_name, const char *dir_name,
if (flags & MS_RDONLY)
mnt_flags |= MNT_READONLY;
+ /* The default atime for remount is preservation */
+ if ((flags & MS_REMOUNT) &&
+ ((flags & (MS_NOATIME | MS_NODIRATIME | MS_RELATIME |
+ MS_STRICTATIME)) == 0)) {
+ mnt_flags &= ~MNT_ATIME_MASK;
+ mnt_flags |= path.mnt->mnt_flags & MNT_ATIME_MASK;
+ }
+
flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | MS_BORN |
MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT |
MS_STRICTATIME);
@@ -2649,6 +2705,9 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* make sure we can reach put_old from new_root */
if (!is_path_reachable(old_mnt, old.dentry, &new))
goto out4;
+ /* make certain new is below the root */
+ if (!is_path_reachable(new_mnt, new.dentry, &root))
+ goto out4;
root_mp->m_count++; /* pin it so it won't go away */
br_write_lock(&vfsmount_lock);
detach_mnt(new_mnt, &parent_path);
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index 60426ccb3b65..2f970de02b16 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -448,7 +448,6 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg
result = -EIO;
}
}
- result = 0;
}
mutex_unlock(&server->root_setup_lock);
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 9c3e117c3ed1..4d0161442565 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -44,7 +44,7 @@
static inline sector_t normalize(sector_t s, int base)
{
sector_t tmp = s; /* Since do_div modifies its argument */
- return s - do_div(tmp, base);
+ return s - sector_div(tmp, base);
}
static inline sector_t normalize_up(sector_t s, int base)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 57db3244f4d9..ef0c394b7bf5 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -108,6 +108,8 @@ again:
continue;
if (!test_bit(NFS_DELEGATED_STATE, &state->flags))
continue;
+ if (!nfs4_valid_open_stateid(state))
+ continue;
if (!nfs4_stateid_match(&state->stateid, stateid))
continue;
get_nfs_open_context(ctx);
@@ -175,7 +177,11 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
{
int res = 0;
- res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid, issync);
+ if (!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ res = nfs4_proc_delegreturn(inode,
+ delegation->cred,
+ &delegation->stateid,
+ issync);
nfs_free_delegation(delegation);
return res;
}
@@ -361,11 +367,13 @@ static int nfs_end_delegation_return(struct inode *inode, struct nfs_delegation
{
struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
struct nfs_inode *nfsi = NFS_I(inode);
- int err;
+ int err = 0;
if (delegation == NULL)
return 0;
do {
+ if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ break;
err = nfs_delegation_claim_opens(inode, &delegation->stateid);
if (!issync || err != -EAGAIN)
break;
@@ -586,10 +594,23 @@ static void nfs_client_mark_return_unused_delegation_types(struct nfs_client *cl
rcu_read_unlock();
}
+static void nfs_revoke_delegation(struct inode *inode)
+{
+ struct nfs_delegation *delegation;
+ rcu_read_lock();
+ delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation != NULL) {
+ set_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
+ nfs_mark_return_delegation(NFS_SERVER(inode), delegation);
+ }
+ rcu_read_unlock();
+}
+
void nfs_remove_bad_delegation(struct inode *inode)
{
struct nfs_delegation *delegation;
+ nfs_revoke_delegation(inode);
delegation = nfs_inode_detach_delegation(inode);
if (delegation) {
nfs_inode_find_state_and_recover(inode, &delegation->stateid);
@@ -656,16 +677,19 @@ int nfs_async_inode_return_delegation(struct inode *inode,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
+ if (delegation == NULL)
+ goto out_enoent;
- if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid)) {
- rcu_read_unlock();
- return -ENOENT;
- }
+ if (!clp->cl_mvops->match_stateid(&delegation->stateid, stateid))
+ goto out_enoent;
nfs_mark_return_delegation(server, delegation);
rcu_read_unlock();
nfs_delegation_run_state_manager(clp);
return 0;
+out_enoent:
+ rcu_read_unlock();
+ return -ENOENT;
}
static struct inode *
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 9a79c7a99d6d..e02b090ab9da 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -31,6 +31,7 @@ enum {
NFS_DELEGATION_RETURN_IF_CLOSED,
NFS_DELEGATION_REFERENCED,
NFS_DELEGATION_RETURNING,
+ NFS_DELEGATION_REVOKED,
};
int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 0bd7a55a5f07..725e87538c98 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -180,6 +180,7 @@ static void nfs_direct_req_free(struct kref *kref)
{
struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
+ nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
if (dreq->l_ctx != NULL)
nfs_put_lock_context(dreq->l_ctx);
if (dreq->ctx != NULL)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c1c7a9d78722..e9be01b2cc5a 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -519,7 +519,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
- int err;
+ int err = 0;
/* Flush out writes to the server in order to update c/mtime. */
if (S_ISREG(inode->i_mode)) {
@@ -1382,18 +1382,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_version = fattr->change_attr;
}
} else if (server->caps & NFS_CAP_CHANGE_ATTR)
- invalid |= save_cache_validity;
+ nfsi->cache_validity |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
} else if (server->caps & NFS_CAP_MTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
} else if (server->caps & NFS_CAP_CTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
/* Check if our cached file size is stale */
@@ -1416,7 +1418,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
(long long)new_isize);
}
} else
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
@@ -1424,7 +1427,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
else if (server->caps & NFS_CAP_ATIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_MODE) {
@@ -1435,7 +1439,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
}
} else if (server->caps & NFS_CAP_MODE)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1446,7 +1451,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_uid = fattr->uid;
}
} else if (server->caps & NFS_CAP_OWNER)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1457,7 +1463,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_gid = fattr->gid;
}
} else if (server->caps & NFS_CAP_OWNER_GROUP)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1470,7 +1477,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
}
} else if (server->caps & NFS_CAP_NLINK)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c
index 4a1aafba6a20..8c34f57a9aef 100644
--- a/fs/nfs/nfs3acl.c
+++ b/fs/nfs/nfs3acl.c
@@ -305,7 +305,10 @@ static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
.rpc_argp = &args,
.rpc_resp = &fattr,
};
- int status;
+ int status = 0;
+
+ if (acl == NULL && (!S_ISDIR(inode->i_mode) || dfacl == NULL))
+ goto out;
status = -EOPNOTSUPP;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 4cbad5d6b276..cc143ee7a56e 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -240,13 +240,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
error = nfs4_discover_server_trunking(clp, &old);
if (error < 0)
goto error;
- nfs_put_client(clp);
- if (clp != old) {
- clp->cl_preserve_clid = true;
- clp = old;
- }
- return clp;
+ if (clp != old)
+ clp->cl_preserve_clid = true;
+ nfs_put_client(clp);
+ return old;
error:
nfs_mark_client_ready(clp, error);
@@ -313,6 +311,16 @@ int nfs40_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+
+ if (pos->rpc_ops != new->rpc_ops)
+ continue;
+
+ if (pos->cl_proto != new->cl_proto)
+ continue;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ continue;
+
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos" */
if (pos->cl_cons_state > NFS_CS_READY) {
@@ -324,22 +332,14 @@ int nfs40_walk_client_list(struct nfs_client *new,
prev = pos;
status = nfs_wait_client_init_complete(pos);
- spin_lock(&nn->nfs_client_lock);
if (status < 0)
- continue;
+ goto out;
+ status = -NFS4ERR_STALE_CLIENTID;
+ spin_lock(&nn->nfs_client_lock);
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
- if (pos->rpc_ops != new->rpc_ops)
- continue;
-
- if (pos->cl_proto != new->cl_proto)
- continue;
-
- if (pos->cl_minorversion != new->cl_minorversion)
- continue;
-
if (pos->cl_clientid != new->cl_clientid)
continue;
@@ -445,6 +445,16 @@ int nfs41_walk_client_list(struct nfs_client *new,
spin_lock(&nn->nfs_client_lock);
list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
+
+ if (pos->rpc_ops != new->rpc_ops)
+ continue;
+
+ if (pos->cl_proto != new->cl_proto)
+ continue;
+
+ if (pos->cl_minorversion != new->cl_minorversion)
+ continue;
+
/* If "pos" isn't marked ready, we can't trust the
* remaining fields in "pos", especially the client
* ID and serverowner fields. Wait for CREATE_SESSION
@@ -464,20 +474,12 @@ int nfs41_walk_client_list(struct nfs_client *new,
}
spin_lock(&nn->nfs_client_lock);
if (status < 0)
- continue;
+ break;
+ status = -NFS4ERR_STALE_CLIENTID;
}
if (pos->cl_cons_state != NFS_CS_READY)
continue;
- if (pos->rpc_ops != new->rpc_ops)
- continue;
-
- if (pos->cl_proto != new->cl_proto)
- continue;
-
- if (pos->cl_minorversion != new->cl_minorversion)
- continue;
-
if (!nfs4_match_clientids(pos, new))
continue;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 22d10623f5ee..b039f7f26d95 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -1300,7 +1300,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
struct nfs4_filelayout *flo;
flo = kzalloc(sizeof(*flo), gfp_flags);
- return &flo->generic_hdr;
+ return flo != NULL ? &flo->generic_hdr : NULL;
}
static void
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 661a0f611215..678cb8964532 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -797,34 +797,34 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
-
- if (filelayout_test_devid_unavailable(devid))
- return NULL;
+ struct nfs4_pnfs_ds *ret = ds;
if (ds == NULL) {
printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
__func__, ds_idx);
filelayout_mark_devid_invalid(devid);
- return NULL;
+ goto out;
}
if (ds->ds_clp)
- return ds;
+ goto out_test_devid;
if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) {
struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
int err;
err = nfs4_ds_connect(s, ds);
- if (err) {
+ if (err)
nfs4_mark_deviceid_unavailable(devid);
- ds = NULL;
- }
nfs4_clear_ds_conn_bit(ds);
} else {
/* Either ds is connected, or ds is NULL */
nfs4_wait_ds_connect(ds);
}
- return ds;
+out_test_devid:
+ if (filelayout_test_devid_unavailable(devid))
+ ret = NULL;
+out:
+ return ret;
}
module_param(dataserver_retrans, uint, 0644);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d7ba5616989c..20ebcfa3c92e 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1160,29 +1160,24 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
int ret;
if (!data->rpc_done) {
- ret = data->rpc_status;
- goto err;
+ if (data->rpc_status) {
+ ret = data->rpc_status;
+ goto err;
+ }
+ /* cached opens have already been processed */
+ goto update;
}
- ret = -ESTALE;
- if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) ||
- !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE))
- goto err;
-
- ret = -ENOMEM;
- state = nfs4_get_open_state(inode, data->owner);
- if (state == NULL)
- goto err;
-
ret = nfs_refresh_inode(inode, &data->f_attr);
if (ret)
goto err;
if (data->o_res.delegation_type != 0)
nfs4_opendata_check_deleg(data, state);
+update:
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
+ atomic_inc(&state->count);
return state;
err:
@@ -1421,7 +1416,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
nfs_inode_find_state_and_recover(state->inode,
stateid);
nfs4_schedule_stateid_recovery(server, state);
- return 0;
+ return -EAGAIN;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
set_bit(NFS_DELEGATED_STATE, &state->flags);
@@ -1850,6 +1845,28 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
return ret;
}
+static void nfs_finish_clear_delegation_stateid(struct nfs4_state *state)
+{
+ nfs_remove_bad_delegation(state->inode);
+ write_seqlock(&state->seqlock);
+ nfs4_stateid_copy(&state->stateid, &state->open_stateid);
+ write_sequnlock(&state->seqlock);
+ clear_bit(NFS_DELEGATED_STATE, &state->flags);
+}
+
+static void nfs40_clear_delegation_stateid(struct nfs4_state *state)
+{
+ if (rcu_access_pointer(NFS_I(state->inode)->delegation) != NULL)
+ nfs_finish_clear_delegation_stateid(state);
+}
+
+static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ /* NFSv4.0 doesn't allow for delegation recovery on open expire */
+ nfs40_clear_delegation_stateid(state);
+ return nfs4_open_expired(sp, state);
+}
+
#if defined(CONFIG_NFS_V4_1)
static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
{
@@ -2292,6 +2309,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
dprintk("%s: begin!\n", __func__);
@@ -2299,21 +2317,27 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
goto out_wait;
task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE];
- calldata->arg.fmode = FMODE_READ|FMODE_WRITE;
spin_lock(&state->owner->so_lock);
+ is_rdwr = test_bit(NFS_O_RDWR_STATE, &state->flags);
+ is_rdonly = test_bit(NFS_O_RDONLY_STATE, &state->flags);
+ is_wronly = test_bit(NFS_O_WRONLY_STATE, &state->flags);
/* Calculate the change in open mode */
+ calldata->arg.fmode = 0;
if (state->n_rdwr == 0) {
- if (state->n_rdonly == 0) {
- call_close |= test_bit(NFS_O_RDONLY_STATE, &state->flags);
- call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
- calldata->arg.fmode &= ~FMODE_READ;
- }
- if (state->n_wronly == 0) {
- call_close |= test_bit(NFS_O_WRONLY_STATE, &state->flags);
- call_close |= test_bit(NFS_O_RDWR_STATE, &state->flags);
- calldata->arg.fmode &= ~FMODE_WRITE;
- }
- }
+ if (state->n_rdonly == 0)
+ call_close |= is_rdonly;
+ else if (is_rdonly)
+ calldata->arg.fmode |= FMODE_READ;
+ if (state->n_wronly == 0)
+ call_close |= is_wronly;
+ else if (is_wronly)
+ calldata->arg.fmode |= FMODE_WRITE;
+ } else if (is_rdwr)
+ calldata->arg.fmode |= FMODE_READ|FMODE_WRITE;
+
+ if (calldata->arg.fmode == 0)
+ call_close |= is_rdwr;
+
if (!nfs4_valid_open_stateid(state))
call_close = 0;
spin_unlock(&state->owner->so_lock);
@@ -3612,8 +3636,9 @@ static bool nfs4_stateid_is_current(nfs4_stateid *stateid,
{
nfs4_stateid current_stateid;
- if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode))
- return false;
+ /* If the current stateid represents a lost lock, then exit */
+ if (nfs4_set_rw_stateid(&current_stateid, ctx, l_ctx, fmode) == -EIO)
+ return true;
return nfs4_stateid_match(stateid, &current_stateid);
}
@@ -4227,8 +4252,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
dprintk("%s ERROR %d, Reset session\n", __func__,
task->tk_status);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
- task->tk_status = 0;
- return -EAGAIN;
+ goto wait_on_recovery;
#endif /* CONFIG_NFS_V4_1 */
case -NFS4ERR_DELAY:
nfs_inc_server_stats(server, NFSIOS_DELAY);
@@ -4406,11 +4430,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
return;
switch (task->tk_status) {
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_EXPIRED:
case 0:
renew_lease(data->res.server, data->timestamp);
break;
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_DELEG_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_EXPIRED:
+ task->tk_status = 0;
+ break;
default:
if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-EAGAIN) {
@@ -4572,6 +4602,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
status = 0;
}
request->fl_ops->fl_release_private(request);
+ request->fl_ops = NULL;
out:
return status;
}
@@ -6058,7 +6089,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
int ret = 0;
if ((renew_flags & NFS4_RENEW_TIMEOUT) == 0)
- return 0;
+ return -EAGAIN;
task = _nfs41_proc_sequence(clp, cred, false);
if (IS_ERR(task))
ret = PTR_ERR(task);
@@ -6231,9 +6262,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
struct nfs4_state *state = NULL;
- unsigned long timeo, giveup;
+ unsigned long timeo, now, giveup;
- dprintk("--> %s\n", __func__);
+ dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
if (!nfs41_sequence_done(task, &lgp->res.seq_res))
goto out;
@@ -6241,12 +6272,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
switch (task->tk_status) {
case 0:
goto out;
+ /*
+ * NFS4ERR_LAYOUTTRYLATER is a conflict with another client
+ * (or clients) writing to the same RAID stripe
+ */
case -NFS4ERR_LAYOUTTRYLATER:
+ /*
+ * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall
+ * existing layout before getting a new one).
+ */
case -NFS4ERR_RECALLCONFLICT:
timeo = rpc_get_timeout(task->tk_client);
giveup = lgp->args.timestamp + timeo;
- if (time_after(giveup, jiffies))
- task->tk_status = -NFS4ERR_DELAY;
+ now = jiffies;
+ if (time_after(giveup, now)) {
+ unsigned long delay;
+
+ /* Delay for:
+ * - Not less then NFS4_POLL_RETRY_MIN.
+ * - One last time a jiffie before we give up
+ * - exponential backoff (time_now minus start_attempt)
+ */
+ delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN,
+ min((giveup - now - 1),
+ now - lgp->args.timestamp));
+
+ dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n",
+ __func__, delay);
+ rpc_delay(task, delay);
+ task->tk_status = 0;
+ rpc_restart_call_prepare(task);
+ goto out; /* Do not call nfs4_async_handle_error() */
+ }
break;
case -NFS4ERR_EXPIRED:
case -NFS4ERR_BAD_STATEID:
@@ -6361,6 +6418,9 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
dprintk("--> %s\n", __func__);
+ /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
+ pnfs_get_layout_hdr(NFS_I(inode)->layout);
+
lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
if (!lgp->args.layout.pages) {
nfs4_layoutget_release(lgp);
@@ -6373,9 +6433,6 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
lgp->res.seq_res.sr_slot = NULL;
nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
- /* nfs4_layoutget_release calls pnfs_put_layout_hdr */
- pnfs_get_layout_hdr(NFS_I(inode)->layout);
-
task = rpc_run_task(&task_setup_data);
if (IS_ERR(task))
return ERR_CAST(task);
@@ -6682,7 +6739,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
switch (err) {
case 0:
case -NFS4ERR_WRONGSEC:
- case -NFS4ERR_NOTSUPP:
+ case -ENOTSUPP:
goto out;
default:
err = nfs4_handle_exception(server, err, &exception);
@@ -6714,7 +6771,7 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
* Fall back on "guess and check" method if
* the server doesn't support SECINFO_NO_NAME
*/
- if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
+ if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) {
err = nfs4_find_root_sec(server, fhandle, info);
goto out_freepage;
}
@@ -6939,7 +6996,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
static const struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
.owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
.state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
- .recover_open = nfs4_open_expired,
+ .recover_open = nfs40_open_expired,
.recover_lock = nfs4_lock_expired,
.establish_clid = nfs4_init_clientid,
.get_clid_cred = nfs4_get_setclientid_cred,
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 1720d32ffa54..e1ba58c3d1ad 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -88,10 +88,18 @@ nfs4_renew_state(struct work_struct *work)
}
nfs_expire_all_delegations(clp);
} else {
+ int ret;
+
/* Queue an asynchronous RENEW. */
- ops->sched_state_renewal(clp, cred, renew_flags);
+ ret = ops->sched_state_renewal(clp, cred, renew_flags);
put_rpccred(cred);
- goto out_exp;
+ switch (ret) {
+ default:
+ goto out_exp;
+ case -EAGAIN:
+ case -ENOMEM:
+ break;
+ }
}
} else {
dprintk("%s: failed to call renewd. Reason: lease not expired \n",
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 2c37442ed936..d482b86d0e0b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1699,7 +1699,8 @@ restart:
if (status < 0) {
set_bit(ops->owner_flag_bit, &sp->so_flags);
nfs4_put_state_owner(sp);
- return nfs4_recovery_handle_error(clp, status);
+ status = nfs4_recovery_handle_error(clp, status);
+ return (status != 0) ? status : -EAGAIN;
}
nfs4_put_state_owner(sp);
@@ -1708,7 +1709,7 @@ restart:
spin_unlock(&clp->cl_lock);
}
rcu_read_unlock();
- return status;
+ return 0;
}
static int nfs4_check_lease(struct nfs_client *clp)
@@ -1755,7 +1756,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
break;
case -NFS4ERR_STALE_CLIENTID:
clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
- nfs4_state_clear_reclaim_reboot(clp);
nfs4_state_start_reclaim_reboot(clp);
break;
case -NFS4ERR_CLID_INUSE:
@@ -2174,14 +2174,11 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim reboot";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->reboot_recovery_ops);
- if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
- test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state))
- continue;
- nfs4_state_end_reclaim_reboot(clp);
- if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state))
+ if (status == -EAGAIN)
continue;
if (status < 0)
goto out_error;
+ nfs4_state_end_reclaim_reboot(clp);
}
/* Now recover expired state... */
@@ -2189,9 +2186,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
section = "reclaim nograce";
status = nfs4_do_reclaim(clp,
clp->cl_mvops->nograce_recovery_ops);
- if (test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) ||
- test_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) ||
- test_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state))
+ if (status == -EAGAIN)
continue;
if (status < 0)
goto out_error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4be8d135ed61..988efb4caac0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3002,7 +3002,8 @@ out_overflow:
return -EIO;
}
-static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
+ int *nfs_retval)
{
__be32 *p;
uint32_t opnum;
@@ -3012,19 +3013,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
if (unlikely(!p))
goto out_overflow;
opnum = be32_to_cpup(p++);
- if (opnum != expected) {
- dprintk("nfs: Server returned operation"
- " %d but we issued a request for %d\n",
- opnum, expected);
- return -EIO;
- }
+ if (unlikely(opnum != expected))
+ goto out_bad_operation;
nfserr = be32_to_cpup(p);
- if (nfserr != NFS_OK)
- return nfs4_stat_to_errno(nfserr);
- return 0;
+ if (nfserr == NFS_OK)
+ *nfs_retval = 0;
+ else
+ *nfs_retval = nfs4_stat_to_errno(nfserr);
+ return true;
+out_bad_operation:
+ dprintk("nfs: Server returned operation"
+ " %d but we issued a request for %d\n",
+ opnum, expected);
+ *nfs_retval = -EREMOTEIO;
+ return false;
out_overflow:
print_overflow_msg(__func__, xdr);
- return -EIO;
+ *nfs_retval = -EIO;
+ return false;
+}
+
+static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
+{
+ int retval;
+
+ __decode_op_hdr(xdr, expected, &retval);
+ return retval;
}
/* Dummy routine */
@@ -4842,11 +4856,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
uint32_t savewords, bmlen, i;
int status;
- status = decode_op_hdr(xdr, OP_OPEN);
- if (status != -EIO)
- nfs_increment_open_seqid(status, res->seqid);
- if (!status)
- status = decode_stateid(xdr, &res->stateid);
+ if (!__decode_op_hdr(xdr, OP_OPEN, &status))
+ return status;
+ nfs_increment_open_seqid(status, res->seqid);
+ if (status)
+ return status;
+ status = decode_stateid(xdr, &res->stateid);
if (unlikely(status))
return status;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 5f38ea36e266..af51cf9bf2e3 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -536,16 +536,12 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (err)
goto out3;
exp.ex_anon_uid= make_kuid(&init_user_ns, an_int);
- if (!uid_valid(exp.ex_anon_uid))
- goto out3;
/* anon gid */
err = get_int(&mesg, &an_int);
if (err)
goto out3;
exp.ex_anon_gid= make_kgid(&init_user_ns, an_int);
- if (!gid_valid(exp.ex_anon_gid))
- goto out3;
/* fsid */
err = get_int(&mesg, &an_int);
@@ -583,6 +579,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_uuid);
if (err)
goto out4;
+ /*
+ * For some reason exportfs has been passing down an
+ * invalid (-1) uid & gid on the "dummy" export which it
+ * uses to test export support. To make sure exportfs
+ * sees errors from check_export we therefore need to
+ * delay these checks till after check_export:
+ */
+ if (!uid_valid(exp.ex_anon_uid))
+ goto out4;
+ if (!gid_valid(exp.ex_anon_gid))
+ goto out4;
}
expp = svc_export_lookup(&exp);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 8a50b3c18093..e15bcbd5043c 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -385,8 +385,10 @@ sort_pacl(struct posix_acl *pacl)
* by uid/gid. */
int i, j;
- if (pacl->a_count <= 4)
- return; /* no users or groups */
+ /* no users or groups */
+ if (!pacl || pacl->a_count <= 4)
+ return;
+
i = 1;
while (pacl->a_entries[i].e_tag == ACL_USER)
i++;
@@ -513,13 +515,12 @@ posix_state_to_acl(struct posix_acl_state *state, unsigned int flags)
/*
* ACLs with no ACEs are treated differently in the inheritable
- * and effective cases: when there are no inheritable ACEs, we
- * set a zero-length default posix acl:
+ * and effective cases: when there are no inheritable ACEs,
+ * calls ->set_acl with a NULL ACL structure.
*/
- if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT)) {
- pacl = posix_acl_alloc(0, GFP_KERNEL);
- return pacl ? pacl : ERR_PTR(-ENOMEM);
- }
+ if (state->empty && (flags & NFS4_ACL_TYPE_DEFAULT))
+ return NULL;
+
/*
* When there are no effective ACEs, the following will end
* up setting a 3-element effective posix ACL with all
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 7f05cd140de3..f42bbe5fbc0a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -637,9 +637,11 @@ static struct rpc_cred *get_backchannel_cred(struct nfs4_client *clp, struct rpc
static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *conn, struct nfsd4_session *ses)
{
+ int maxtime = max_cb_time(clp->net);
struct rpc_timeout timeparms = {
- .to_initval = max_cb_time(clp->net),
+ .to_initval = maxtime,
.to_retries = 0,
+ .to_maxval = maxtime,
};
struct rpc_create_args args = {
.net = clp->net,
@@ -670,7 +672,8 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
clp->cl_cb_session = ses;
args.bc_xprt = conn->cb_xprt;
args.prognumber = clp->cl_cb_session->se_cb_prog;
- args.protocol = XPRT_TRANSPORT_BC_TCP;
+ args.protocol = conn->cb_xprt->xpt_class->xcl_ident |
+ XPRT_TRANSPORT_BC;
args.authflavor = ses->se_cb_sec.flavor;
}
/* Create RPC client */
@@ -781,8 +784,12 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
{
if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
rpc_sleep_on(&clp->cl_cb_waitq, task, NULL);
- dprintk("%s slot is busy\n", __func__);
- return false;
+ /* Race breaker */
+ if (test_and_set_bit(0, &clp->cl_cb_slot_busy) != 0) {
+ dprintk("%s slot is busy\n", __func__);
+ return false;
+ }
+ rpc_wake_up_queued_task(&clp->cl_cb_waitq, task);
}
return true;
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 27d74a294515..9240dd1678da 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -576,15 +576,6 @@ nfsd4_create(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
switch (create->cr_type) {
case NF4LNK:
- /* ugh! we have to null-terminate the linktext, or
- * vfs_symlink() will choke. it is always safe to
- * null-terminate by brute force, since at worst we
- * will overwrite the first byte of the create namelen
- * in the XDR buffer, which has already been extracted
- * during XDR decode.
- */
- create->cr_linkname[create->cr_linklen] = 0;
-
status = nfsd_symlink(rqstp, &cstate->current_fh,
create->cr_name, create->cr_namelen,
create->cr_linkname, create->cr_linklen,
@@ -1200,7 +1191,8 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
*/
if (argp->opcnt == resp->opcnt)
return false;
-
+ if (next->opnum == OP_ILLEGAL)
+ return false;
nextd = OPDESC(next);
/*
* Rest of 2.6.3.1.1: certain operations will return WRONGSEC
@@ -1307,6 +1299,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
/* If op is non-idempotent */
if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
plen = opdesc->op_rsize_bop(rqstp, op);
+ /*
+ * If there's still another operation, make sure
+ * we'll have space to at least encode an error:
+ */
+ if (resp->opcnt < args->opcnt)
+ plen += COMPOUND_ERR_SLACK_SPACE;
op->status = nfsd4_check_resp_size(resp, plen);
}
@@ -1471,7 +1469,8 @@ static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *o
static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
- return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
+ return (op_encode_hdr_size + 2 + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) *
+ sizeof(__be32);
}
static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 316ec843dec2..4a58afa99654 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -367,7 +367,6 @@ static struct nfs4_delegation *
alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh, u32 type)
{
struct nfs4_delegation *dp;
- struct nfs4_file *fp = stp->st_file;
dprintk("NFSD alloc_init_deleg\n");
/*
@@ -377,8 +376,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
*/
if (type != NFS4_OPEN_DELEGATE_READ)
return NULL;
- if (fp->fi_had_conflict)
- return NULL;
if (num_delegations > max_delegations)
return NULL;
dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
@@ -395,8 +392,7 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
INIT_LIST_HEAD(&dp->dl_perfile);
INIT_LIST_HEAD(&dp->dl_perclnt);
INIT_LIST_HEAD(&dp->dl_recall_lru);
- get_nfs4_file(fp);
- dp->dl_file = fp;
+ dp->dl_file = NULL;
dp->dl_type = type;
fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
dp->dl_time = 0;
@@ -1081,6 +1077,18 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
return NULL;
}
clp->cl_name.len = name.len;
+ INIT_LIST_HEAD(&clp->cl_sessions);
+ idr_init(&clp->cl_stateids);
+ atomic_set(&clp->cl_refcount, 0);
+ clp->cl_cb_state = NFSD4_CB_UNKNOWN;
+ INIT_LIST_HEAD(&clp->cl_idhash);
+ INIT_LIST_HEAD(&clp->cl_openowners);
+ INIT_LIST_HEAD(&clp->cl_delegations);
+ INIT_LIST_HEAD(&clp->cl_lru);
+ INIT_LIST_HEAD(&clp->cl_callbacks);
+ INIT_LIST_HEAD(&clp->cl_revoked);
+ spin_lock_init(&clp->cl_lock);
+ rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
return clp;
}
@@ -1098,6 +1106,7 @@ free_client(struct nfs4_client *clp)
WARN_ON_ONCE(atomic_read(&ses->se_ref));
free_session(ses);
}
+ rpc_destroy_wait_queue(&clp->cl_cb_waitq);
free_svc_cred(&clp->cl_cred);
kfree(clp->cl_name.data);
idr_destroy(&clp->cl_stateids);
@@ -1191,15 +1200,14 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
return 0;
}
-static long long
+static int
compare_blob(const struct xdr_netobj *o1, const struct xdr_netobj *o2)
{
- long long res;
-
- res = o1->len - o2->len;
- if (res)
- return res;
- return (long long)memcmp(o1->data, o2->data, o1->len);
+ if (o1->len < o2->len)
+ return -1;
+ if (o1->len > o2->len)
+ return 1;
+ return memcmp(o1->data, o2->data, o1->len);
}
static int same_name(const char *n1, const char *n2)
@@ -1315,7 +1323,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
if (clp == NULL)
return NULL;
- INIT_LIST_HEAD(&clp->cl_sessions);
ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
if (ret) {
spin_lock(&nn->client_lock);
@@ -1323,20 +1330,9 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
spin_unlock(&nn->client_lock);
return NULL;
}
- idr_init(&clp->cl_stateids);
- atomic_set(&clp->cl_refcount, 0);
- clp->cl_cb_state = NFSD4_CB_UNKNOWN;
- INIT_LIST_HEAD(&clp->cl_idhash);
- INIT_LIST_HEAD(&clp->cl_openowners);
- INIT_LIST_HEAD(&clp->cl_delegations);
- INIT_LIST_HEAD(&clp->cl_lru);
- INIT_LIST_HEAD(&clp->cl_callbacks);
- INIT_LIST_HEAD(&clp->cl_revoked);
- spin_lock_init(&clp->cl_lock);
nfsd4_init_callback(&clp->cl_cb_null);
clp->cl_time = get_seconds();
clear_bit(0, &clp->cl_cb_slot_busy);
- rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
copy_verf(clp, verf);
rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
gen_confirm(clp);
@@ -1368,7 +1364,7 @@ add_clp_to_name_tree(struct nfs4_client *new_clp, struct rb_root *root)
static struct nfs4_client *
find_clp_in_name_tree(struct xdr_netobj *name, struct rb_root *root)
{
- long long cmp;
+ int cmp;
struct rb_node *node = root->rb_node;
struct nfs4_client *clp;
@@ -2964,22 +2960,35 @@ static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
return 0;
}
-static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
+static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag, struct nfs4_file *fp)
{
- struct nfs4_file *fp = dp->dl_file;
+ int status;
- if (!fp->fi_lease)
- return nfs4_setlease(dp, flag);
+ if (fp->fi_had_conflict)
+ return -EAGAIN;
+ get_nfs4_file(fp);
+ dp->dl_file = fp;
+ if (!fp->fi_lease) {
+ status = nfs4_setlease(dp, flag);
+ if (status)
+ goto out_free;
+ return 0;
+ }
spin_lock(&recall_lock);
if (fp->fi_had_conflict) {
spin_unlock(&recall_lock);
- return -EAGAIN;
+ status = -EAGAIN;
+ goto out_free;
}
atomic_inc(&fp->fi_delegees);
list_add(&dp->dl_perfile, &fp->fi_delegations);
spin_unlock(&recall_lock);
list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
return 0;
+out_free:
+ put_nfs4_file(fp);
+ dp->dl_file = fp;
+ return status;
}
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -3045,7 +3054,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
dp = alloc_init_deleg(oo->oo_owner.so_client, stp, fh, flag);
if (dp == NULL)
goto out_no_deleg;
- status = nfs4_set_delegation(dp, flag);
+ status = nfs4_set_delegation(dp, flag, stp->st_file);
if (status)
goto out_free;
@@ -3598,9 +3607,16 @@ out:
static __be32
nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
{
- if (check_for_locks(stp->st_file, lockowner(stp->st_stateowner)))
+ struct nfs4_lockowner *lo = lockowner(stp->st_stateowner);
+
+ if (check_for_locks(stp->st_file, lo))
return nfserr_locks_held;
- release_lock_stateid(stp);
+ /*
+ * Currently there's a 1-1 lock stateid<->lockowner
+ * correspondance, and we have to delete the lockowner when we
+ * delete the lock stateid:
+ */
+ release_lockowner(lo);
return nfs_ok;
}
@@ -4044,6 +4060,10 @@ static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, c
if (!same_owner_str(&lo->lo_owner, owner, clid))
return false;
+ if (list_empty(&lo->lo_owner.so_stateids)) {
+ WARN_ON_ONCE(1);
+ return false;
+ }
lst = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid, st_perstateowner);
return lst->st_file->fi_inode == inode;
@@ -4958,7 +4978,6 @@ nfs4_state_destroy_net(struct net *net)
int i;
struct nfs4_client *clp = NULL;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- struct rb_node *node, *tmp;
for (i = 0; i < CLIENT_HASH_SIZE; i++) {
while (!list_empty(&nn->conf_id_hashtbl[i])) {
@@ -4967,13 +4986,11 @@ nfs4_state_destroy_net(struct net *net)
}
}
- node = rb_first(&nn->unconf_name_tree);
- while (node != NULL) {
- tmp = node;
- node = rb_next(tmp);
- clp = rb_entry(tmp, struct nfs4_client, cl_namenode);
- rb_erase(tmp, &nn->unconf_name_tree);
- destroy_client(clp);
+ for (i = 0; i < CLIENT_HASH_SIZE; i++) {
+ while (!list_empty(&nn->unconf_id_hashtbl[i])) {
+ clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash);
+ destroy_client(clp);
+ }
}
kfree(nn->sessionid_hashtbl);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 582321a978b0..acf179d7615f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -553,7 +553,18 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
READ_BUF(4);
READ32(create->cr_linklen);
READ_BUF(create->cr_linklen);
- SAVEMEM(create->cr_linkname, create->cr_linklen);
+ /*
+ * The VFS will want a null-terminated string, and
+ * null-terminating in place isn't safe since this might
+ * end on a page boundary:
+ */
+ create->cr_linkname =
+ kmalloc(create->cr_linklen + 1, GFP_KERNEL);
+ if (!create->cr_linkname)
+ return nfserr_jukebox;
+ memcpy(create->cr_linkname, p, create->cr_linklen);
+ create->cr_linkname[create->cr_linklen] = '\0';
+ defer_free(argp, kfree, create->cr_linkname);
break;
case NF4BLK:
case NF4CHR:
@@ -1732,6 +1743,9 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components,
}
else
end++;
+ if (found_esc)
+ end = next;
+
str = end;
}
*pp = p;
@@ -2035,8 +2049,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
err = vfs_getattr(&path, &stat);
if (err)
goto out_nfserr;
- if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
- FATTR4_WORD0_MAXNAME)) ||
+ if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+ FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
(bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
err = vfs_statfs(&path, &statfs);
@@ -2401,6 +2415,8 @@ out_acl:
WRITE64(stat.ino);
}
if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+ if ((buflen -= 16) < 0)
+ goto out_resource;
WRITE32(3);
WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
@@ -3382,6 +3398,9 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_test_stateid_id *stateid, *next;
__be32 *p;
+ if (nfserr)
+ return nfserr;
+
RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
*p++ = htonl(test_stateid->ts_num_ids);
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index e76244edd748..e5e4675b7e75 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -221,13 +221,6 @@ hash_refile(struct svc_cacherep *rp)
hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
}
-static inline bool
-nfsd_cache_entry_expired(struct svc_cacherep *rp)
-{
- return rp->c_state != RC_INPROG &&
- time_after(jiffies, rp->c_timestamp + RC_EXPIRE);
-}
-
/*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries.
@@ -238,8 +231,14 @@ prune_cache_entries(void)
struct svc_cacherep *rp, *tmp;
list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
- if (!nfsd_cache_entry_expired(rp) &&
- num_drc_entries <= max_drc_entries)
+ /*
+ * Don't free entries attached to calls that are still
+ * in-progress, but do keep scanning the list.
+ */
+ if (rp->c_state == RC_INPROG)
+ continue;
+ if (num_drc_entries <= max_drc_entries &&
+ time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
nfsd_reply_cache_free_locked(rp);
}
@@ -395,22 +394,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
/*
* Since the common case is a cache miss followed by an insert,
- * preallocate an entry. First, try to reuse the first entry on the LRU
- * if it works, then go ahead and prune the LRU list.
+ * preallocate an entry.
*/
- spin_lock(&cache_lock);
- if (!list_empty(&lru_head)) {
- rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
- if (nfsd_cache_entry_expired(rp) ||
- num_drc_entries >= max_drc_entries) {
- lru_put_end(rp);
- prune_cache_entries();
- goto search_cache;
- }
- }
-
- /* No expired ones available, allocate a new one. */
- spin_unlock(&cache_lock);
rp = nfsd_reply_cache_alloc();
spin_lock(&cache_lock);
if (likely(rp)) {
@@ -418,7 +403,9 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
drc_mem_usage += sizeof(*rp);
}
-search_cache:
+ /* go ahead and prune the cache */
+ prune_cache_entries();
+
found = nfsd_cache_search(rqstp, csum);
if (found) {
if (likely(rp))
@@ -432,15 +419,6 @@ search_cache:
goto out;
}
- /*
- * We're keeping the one we just allocated. Are we now over the
- * limit? Prune one off the tip of the LRU in trade for the one we
- * just allocated if so.
- */
- if (num_drc_entries >= max_drc_entries)
- nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
- struct svc_cacherep, c_lru));
-
nfsdstats.rcmisses++;
rqstp->rq_cacherep = rp;
rp->c_state = RC_INPROG;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 7f555179bf81..f34d9de802ab 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -699,6 +699,11 @@ static ssize_t __write_ports_addfd(char *buf, struct net *net)
if (err != 0 || fd < 0)
return -EINVAL;
+ if (svc_alien_sock(net, fd)) {
+ printk(KERN_ERR "%s: socket net is different to NFSd's one\n", __func__);
+ return -EINVAL;
+ }
+
err = nfsd_create_serv(net);
if (err != 0)
return err;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 262df5ccbf59..8016892f3f05 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -220,7 +220,8 @@ static int nfsd_startup_generic(int nrservs)
*/
ret = nfsd_racache_init(2*nrservs);
if (ret)
- return ret;
+ goto dec_users;
+
ret = nfs4_state_start();
if (ret)
goto out_racache;
@@ -228,6 +229,8 @@ static int nfsd_startup_generic(int nrservs)
out_racache:
nfsd_racache_shutdown();
+dec_users:
+ nfsd_users--;
return ret;
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index baf149a85263..81325ba8660a 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -297,41 +297,12 @@ commit_metadata(struct svc_fh *fhp)
}
/*
- * Set various file attributes.
- * N.B. After this call fhp needs an fh_put
+ * Go over the attributes and take care of the small differences between
+ * NFS semantics and what Linux expects.
*/
-__be32
-nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
- int check_guard, time_t guardtime)
+static void
+nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap)
{
- struct dentry *dentry;
- struct inode *inode;
- int accmode = NFSD_MAY_SATTR;
- umode_t ftype = 0;
- __be32 err;
- int host_err;
- int size_change = 0;
-
- if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
- accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
- if (iap->ia_valid & ATTR_SIZE)
- ftype = S_IFREG;
-
- /* Get inode */
- err = fh_verify(rqstp, fhp, ftype, accmode);
- if (err)
- goto out;
-
- dentry = fhp->fh_dentry;
- inode = dentry->d_inode;
-
- /* Ignore any mode updates on symlinks */
- if (S_ISLNK(inode->i_mode))
- iap->ia_valid &= ~ATTR_MODE;
-
- if (!iap->ia_valid)
- goto out;
-
/*
* NFSv2 does not differentiate between "set-[ac]time-to-now"
* which only requires access, and "set-[ac]time-to-X" which
@@ -341,8 +312,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
* convert to "set to now" instead of "set to explicit time"
*
* We only call inode_change_ok as the last test as technically
- * it is not an interface that we should be using. It is only
- * valid if the filesystem does not define it's own i_op->setattr.
+ * it is not an interface that we should be using.
*/
#define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET)
#define MAX_TOUCH_TIME_ERROR (30*60)
@@ -368,30 +338,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid &= ~BOTH_TIME_SET;
}
}
-
- /*
- * The size case is special.
- * It changes the file as well as the attributes.
- */
- if (iap->ia_valid & ATTR_SIZE) {
- if (iap->ia_size < inode->i_size) {
- err = nfsd_permission(rqstp, fhp->fh_export, dentry,
- NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- }
-
- host_err = get_write_access(inode);
- if (host_err)
- goto out_nfserr;
-
- size_change = 1;
- host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
- if (host_err) {
- put_write_access(inode);
- goto out_nfserr;
- }
- }
/* sanitize the mode change */
if (iap->ia_valid & ATTR_MODE) {
@@ -414,32 +360,120 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID);
}
}
+}
- /* Change the attributes. */
+static __be32
+nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct iattr *iap)
+{
+ struct inode *inode = fhp->fh_dentry->d_inode;
+ int host_err;
- iap->ia_valid |= ATTR_CTIME;
+ if (iap->ia_size < inode->i_size) {
+ __be32 err;
+
+ err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
+ NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE);
+ if (err)
+ return err;
+ }
+
+ host_err = get_write_access(inode);
+ if (host_err)
+ goto out_nfserrno;
+
+ host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+ if (host_err)
+ goto out_put_write_access;
+ return 0;
+
+out_put_write_access:
+ put_write_access(inode);
+out_nfserrno:
+ return nfserrno(host_err);
+}
+
+/*
+ * Set various file attributes. After this call fhp needs an fh_put.
+ */
+__be32
+nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
+ int check_guard, time_t guardtime)
+{
+ struct dentry *dentry;
+ struct inode *inode;
+ int accmode = NFSD_MAY_SATTR;
+ umode_t ftype = 0;
+ __be32 err;
+ int host_err;
+ bool get_write_count;
+ int size_change = 0;
+
+ if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
+ accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE;
+ if (iap->ia_valid & ATTR_SIZE)
+ ftype = S_IFREG;
- err = nfserr_notsync;
- if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
- host_err = nfsd_break_lease(inode);
+ /* Callers that do fh_verify should do the fh_want_write: */
+ get_write_count = !fhp->fh_dentry;
+
+ /* Get inode */
+ err = fh_verify(rqstp, fhp, ftype, accmode);
+ if (err)
+ goto out;
+ if (get_write_count) {
+ host_err = fh_want_write(fhp);
if (host_err)
- goto out_nfserr;
- fh_lock(fhp);
+ return nfserrno(host_err);
+ }
- host_err = notify_change(dentry, iap);
- err = nfserrno(host_err);
- fh_unlock(fhp);
+ dentry = fhp->fh_dentry;
+ inode = dentry->d_inode;
+
+ /* Ignore any mode updates on symlinks */
+ if (S_ISLNK(inode->i_mode))
+ iap->ia_valid &= ~ATTR_MODE;
+
+ if (!iap->ia_valid)
+ goto out;
+
+ nfsd_sanitize_attrs(inode, iap);
+
+ /*
+ * The size case is special, it changes the file in addition to the
+ * attributes.
+ */
+ if (iap->ia_valid & ATTR_SIZE) {
+ err = nfsd_get_write_access(rqstp, fhp, iap);
+ if (err)
+ goto out;
+ size_change = 1;
}
+
+ iap->ia_valid |= ATTR_CTIME;
+
+ if (check_guard && guardtime != inode->i_ctime.tv_sec) {
+ err = nfserr_notsync;
+ goto out_put_write_access;
+ }
+
+ host_err = nfsd_break_lease(inode);
+ if (host_err)
+ goto out_put_write_access_nfserror;
+
+ fh_lock(fhp);
+ host_err = notify_change(dentry, iap);
+ fh_unlock(fhp);
+
+out_put_write_access_nfserror:
+ err = nfserrno(host_err);
+out_put_write_access:
if (size_change)
put_write_access(inode);
if (!err)
commit_metadata(fhp);
out:
return err;
-
-out_nfserr:
- err = nfserrno(host_err);
- goto out;
}
#if defined(CONFIG_NFSD_V2_ACL) || \
@@ -474,6 +508,9 @@ set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
char *buf = NULL;
int error = 0;
+ if (!pacl)
+ return vfs_setxattr(dentry, key, NULL, 0, 0);
+
buflen = posix_acl_xattr_size(pacl->a_count);
buf = kmalloc(buflen, GFP_KERNEL);
error = -ENOMEM;
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index bccfec8343c5..587d699bdc2c 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -24,6 +24,7 @@
#include <linux/buffer_head.h>
#include <linux/gfp.h>
#include <linux/mpage.h>
+#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/aio.h>
#include "nilfs.h"
@@ -48,6 +49,8 @@ struct nilfs_iget_args {
int for_gc;
};
+static int nilfs_iget_test(struct inode *inode, void *opaque);
+
void nilfs_inode_add_blocks(struct inode *inode, int n)
{
struct nilfs_root *root = NILFS_I(inode)->i_root;
@@ -219,10 +222,10 @@ static int nilfs_writepage(struct page *page, struct writeback_control *wbc)
static int nilfs_set_page_dirty(struct page *page)
{
+ struct inode *inode = page->mapping->host;
int ret = __set_page_dirty_nobuffers(page);
if (page_has_buffers(page)) {
- struct inode *inode = page->mapping->host;
unsigned nr_dirty = 0;
struct buffer_head *bh, *head;
@@ -245,6 +248,10 @@ static int nilfs_set_page_dirty(struct page *page)
if (nr_dirty)
nilfs_set_file_dirty(inode, nr_dirty);
+ } else if (ret) {
+ unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+ nilfs_set_file_dirty(inode, nr_dirty);
}
return ret;
}
@@ -342,6 +349,17 @@ const struct address_space_operations nilfs_aops = {
.is_partially_uptodate = block_is_partially_uptodate,
};
+static int nilfs_insert_inode_locked(struct inode *inode,
+ struct nilfs_root *root,
+ unsigned long ino)
+{
+ struct nilfs_iget_args args = {
+ .ino = ino, .root = root, .cno = 0, .for_gc = 0
+ };
+
+ return insert_inode_locked4(inode, ino, nilfs_iget_test, &args);
+}
+
struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
{
struct super_block *sb = dir->i_sb;
@@ -377,7 +395,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
err = nilfs_bmap_read(ii->i_bmap, NULL);
if (err < 0)
- goto failed_bmap;
+ goto failed_after_creation;
set_bit(NILFS_I_BMAP, &ii->i_state);
/* No lock is needed; iget() ensures it. */
@@ -393,21 +411,24 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
spin_lock(&nilfs->ns_next_gen_lock);
inode->i_generation = nilfs->ns_next_generation++;
spin_unlock(&nilfs->ns_next_gen_lock);
- insert_inode_hash(inode);
+ if (nilfs_insert_inode_locked(inode, root, ino) < 0) {
+ err = -EIO;
+ goto failed_after_creation;
+ }
err = nilfs_init_acl(inode, dir);
if (unlikely(err))
- goto failed_acl; /* never occur. When supporting
+ goto failed_after_creation; /* never occur. When supporting
nilfs_init_acl(), proper cancellation of
above jobs should be considered */
return inode;
- failed_acl:
- failed_bmap:
+ failed_after_creation:
clear_nlink(inode);
+ unlock_new_inode(inode);
iput(inode); /* raw_inode will be deleted through
- generic_delete_inode() */
+ nilfs_evict_inode() */
goto failed;
failed_ifile_create_inode:
@@ -455,8 +476,8 @@ int nilfs_read_inode_common(struct inode *inode,
inode->i_atime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
inode->i_ctime.tv_nsec = le32_to_cpu(raw_inode->i_ctime_nsec);
inode->i_mtime.tv_nsec = le32_to_cpu(raw_inode->i_mtime_nsec);
- if (inode->i_nlink == 0 && inode->i_mode == 0)
- return -EINVAL; /* this inode is deleted */
+ if (inode->i_nlink == 0)
+ return -ESTALE; /* this inode is deleted */
inode->i_blocks = le64_to_cpu(raw_inode->i_blocks);
ii->i_flags = le32_to_cpu(raw_inode->i_flags);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 9de78f08989e..0f84b257932c 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -51,9 +51,11 @@ static inline int nilfs_add_nondir(struct dentry *dentry, struct inode *inode)
int err = nilfs_add_link(dentry, inode);
if (!err) {
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
return 0;
}
inode_dec_link_count(inode);
+ unlock_new_inode(inode);
iput(inode);
return err;
}
@@ -182,6 +184,7 @@ out:
out_fail:
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
+ unlock_new_inode(inode);
iput(inode);
goto out;
}
@@ -201,11 +204,15 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
inode_inc_link_count(inode);
ihold(inode);
- err = nilfs_add_nondir(dentry, inode);
- if (!err)
+ err = nilfs_add_link(dentry, inode);
+ if (!err) {
+ d_instantiate(dentry, inode);
err = nilfs_transaction_commit(dir->i_sb);
- else
+ } else {
+ inode_dec_link_count(inode);
+ iput(inode);
nilfs_transaction_abort(dir->i_sb);
+ }
return err;
}
@@ -243,6 +250,7 @@ static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
nilfs_mark_inode_dirty(inode);
d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
out:
if (!err)
err = nilfs_transaction_commit(dir->i_sb);
@@ -255,6 +263,7 @@ out_fail:
drop_nlink(inode);
drop_nlink(inode);
nilfs_mark_inode_dirty(inode);
+ unlock_new_inode(inode);
iput(inode);
out_dir:
drop_nlink(dir);
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 0ba679866e50..da276640f776 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh)
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh);
clear_buffer_nilfs_redirected(bh);
+ clear_buffer_async_write(bh);
clear_buffer_dirty(bh);
if (nilfs_page_buffers_clean(page))
__nilfs_clear_page_dirty(page);
@@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent)
"discard block %llu, size %zu",
(u64)bh->b_blocknr, bh->b_size);
}
+ clear_buffer_async_write(bh);
clear_buffer_dirty(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_checked(bh);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index a5752a589932..958a5b57ed4a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode,
bh = head = page_buffers(page);
do {
- if (!buffer_dirty(bh))
+ if (!buffer_dirty(bh) || buffer_async_write(bh))
continue;
get_bh(bh);
list_add_tail(&bh->b_assoc_buffers, listp);
@@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode,
for (i = 0; i < pagevec_count(&pvec); i++) {
bh = head = page_buffers(pvec.pages[i]);
do {
- if (buffer_dirty(bh)) {
+ if (buffer_dirty(bh) &&
+ !buffer_async_write(bh)) {
get_bh(bh);
list_add_tail(&bh->b_assoc_buffers,
listp);
@@ -1439,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci,
nilfs_clear_logs(&sci->sc_segbufs);
- err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
- if (unlikely(err))
- return err;
-
if (sci->sc_stage.flags & NILFS_CF_SUFREED) {
err = nilfs_sufile_cancel_freev(nilfs->ns_sufile,
sci->sc_freesegs,
sci->sc_nfreesegs,
NULL);
WARN_ON(err); /* do not happen */
+ sci->sc_stage.flags &= ~NILFS_CF_SUFREED;
}
+
+ err = nilfs_segctor_extend_segments(sci, nilfs, nadd);
+ if (unlikely(err))
+ return err;
+
nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA);
sci->sc_stage = prev_stage;
}
@@ -1579,6 +1582,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ set_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page) {
lock_page(bd_page);
@@ -1592,6 +1596,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
+ set_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
lock_page(bd_page);
@@ -1677,6 +1682,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(segbuf, logs, sb_list) {
list_for_each_entry(bh, &segbuf->sb_segsum_buffers,
b_assoc_buffers) {
+ clear_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1686,6 +1692,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err)
list_for_each_entry(bh, &segbuf->sb_payload_buffers,
b_assoc_buffers) {
+ clear_buffer_async_write(bh);
if (bh == segbuf->sb_super_root) {
if (bh->b_page != bd_page) {
end_page_writeback(bd_page);
@@ -1755,6 +1762,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
+ clear_buffer_async_write(bh);
if (bh->b_page != bd_page) {
if (bd_page)
end_page_writeback(bd_page);
@@ -1776,6 +1784,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci)
b_assoc_buffers) {
set_buffer_uptodate(bh);
clear_buffer_dirty(bh);
+ clear_buffer_async_write(bh);
clear_buffer_delay(bh);
clear_buffer_nilfs_volatile(bh);
clear_buffer_nilfs_redirected(bh);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 77cc85dd0db0..9be6b4163406 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -69,7 +69,7 @@ static int create_fd(struct fsnotify_group *group,
pr_debug("%s: group=%p event=%p\n", __func__, group, event);
- client_fd = get_unused_fd();
+ client_fd = get_unused_fd_flags(group->fanotify_data.f_flags);
if (client_fd < 0)
return client_fd;
@@ -867,9 +867,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark,
{
return sys_fanotify_mark(fanotify_fd, flags,
#ifdef __BIG_ENDIAN
- ((__u64)mask1 << 32) | mask0,
-#else
((__u64)mask0 << 32) | mask1,
+#else
+ ((__u64)mask1 << 32) | mask0,
#endif
dfd, pathname);
}
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 238a5930cb3c..9d7e2b9659cb 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -42,7 +42,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
{
struct {
struct file_handle handle;
- u8 pad[64];
+ u8 pad[MAX_HANDLE_SZ];
} f;
int size, ret, i;
@@ -50,7 +50,7 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
size = f.handle.handle_bytes >> 2;
ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
- if ((ret == 255) || (ret == -ENOSPC)) {
+ if ((ret == FILEID_INVALID) || (ret < 0)) {
WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
return 0;
}
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 20dfec72e903..f998c6009ad4 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -917,7 +917,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
}
}
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
{
int i;
@@ -938,7 +938,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
page_cache_release(wc->w_target_page);
}
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+ ocfs2_unlock_pages(wc);
brelse(wc->w_di_bh);
kfree(wc);
}
@@ -2060,11 +2064,19 @@ out_write_size:
di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
ocfs2_journal_dirty(handle, wc->w_di_bh);
+ /* unlock pages before dealloc since it needs acquiring j_trans_barrier
+ * lock, or it will cause a deadlock since journal commit threads holds
+ * this lock and will ask for the page lock when flushing the data.
+ * put it here to preserve the unlock order.
+ */
+ ocfs2_unlock_pages(wc);
+
ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
- ocfs2_free_write_ctxt(wc);
+ brelse(wc->w_di_bh);
+ kfree(wc);
return copied;
}
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index 5d18ad10c27f..4f66e007dae1 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -90,7 +90,6 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
* information for this bh as it's not marked locally
* uptodate. */
ret = -EIO;
- put_bh(bh);
mlog_errno(ret);
}
@@ -420,7 +419,6 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
if (!buffer_uptodate(bh)) {
ret = -EIO;
- put_bh(bh);
mlog_errno(ret);
}
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 33ecbe0e6734..2b941113e423 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -653,12 +653,9 @@ void dlm_lockres_clear_refmap_bit(struct dlm_ctxt *dlm,
clear_bit(bit, res->refmap);
}
-
-void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
+static void __dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
- assert_spin_locked(&res->spinlock);
-
res->inflight_locks++;
mlog(0, "%s: res %.*s, inflight++: now %u, %ps()\n", dlm->name,
@@ -666,6 +663,13 @@ void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
__builtin_return_address(0));
}
+void dlm_lockres_grab_inflight_ref(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res)
+{
+ assert_spin_locked(&res->spinlock);
+ __dlm_lockres_grab_inflight_ref(dlm, res);
+}
+
void dlm_lockres_drop_inflight_ref(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
@@ -855,10 +859,8 @@ lookup:
/* finally add the lockres to its hash bucket */
__dlm_insert_lockres(dlm, res);
- /* Grab inflight ref to pin the resource */
- spin_lock(&res->spinlock);
- dlm_lockres_grab_inflight_ref(dlm, res);
- spin_unlock(&res->spinlock);
+ /* since this lockres is new it doesn't not require the spinlock */
+ __dlm_lockres_grab_inflight_ref(dlm, res);
/* get an extra ref on the mle in case this is a BLOCK
* if so, the creator of the BLOCK may try to put the last
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index e68588e6b1e8..9bd981cd3142 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -540,7 +540,10 @@ master_here:
/* success! see if any other nodes need recovery */
mlog(0, "DONE mastering recovery of %s:%u here(this=%u)!\n",
dlm->name, dlm->reco.dead_node, dlm->node_num);
- dlm_reset_recovery(dlm);
+ spin_lock(&dlm->spinlock);
+ __dlm_reset_recovery(dlm);
+ dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+ spin_unlock(&dlm->spinlock);
}
dlm_end_recovery(dlm);
@@ -698,6 +701,14 @@ static int dlm_remaster_locks(struct dlm_ctxt *dlm, u8 dead_node)
if (all_nodes_done) {
int ret;
+ /* Set this flag on recovery master to avoid
+ * a new recovery for another dead node start
+ * before the recovery is not done. That may
+ * cause recovery hung.*/
+ spin_lock(&dlm->spinlock);
+ dlm->reco.state |= DLM_RECO_STATE_FINALIZE;
+ spin_unlock(&dlm->spinlock);
+
/* all nodes are now in DLM_RECO_NODE_DATA_DONE state
* just send a finalize message to everyone and
* clean up */
@@ -1751,13 +1762,13 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
struct dlm_migratable_lockres *mres)
{
struct dlm_migratable_lock *ml;
- struct list_head *queue;
+ struct list_head *queue, *iter;
struct list_head *tmpq = NULL;
struct dlm_lock *newlock = NULL;
struct dlm_lockstatus *lksb = NULL;
int ret = 0;
int i, j, bad;
- struct dlm_lock *lock = NULL;
+ struct dlm_lock *lock;
u8 from = O2NM_MAX_NODES;
unsigned int added = 0;
__be64 c;
@@ -1792,14 +1803,16 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
/* MIGRATION ONLY! */
BUG_ON(!(mres->flags & DLM_MRES_MIGRATION));
+ lock = NULL;
spin_lock(&res->spinlock);
for (j = DLM_GRANTED_LIST; j <= DLM_BLOCKED_LIST; j++) {
tmpq = dlm_list_idx_to_ptr(res, j);
- list_for_each_entry(lock, tmpq, list) {
- if (lock->ml.cookie != ml->cookie)
- lock = NULL;
- else
+ list_for_each(iter, tmpq) {
+ lock = list_entry(iter,
+ struct dlm_lock, list);
+ if (lock->ml.cookie == ml->cookie)
break;
+ lock = NULL;
}
if (lock)
break;
@@ -2867,8 +2880,8 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
BUG();
}
dlm->reco.state &= ~DLM_RECO_STATE_FINALIZE;
+ __dlm_reset_recovery(dlm);
spin_unlock(&dlm->spinlock);
- dlm_reset_recovery(dlm);
dlm_kick_recovery_thread(dlm);
break;
default:
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index ff54014a24ec..46387e49aa46 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2374,8 +2374,8 @@ out_dio:
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
- ret = filemap_fdatawrite_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
if (ret < 0)
written = ret;
@@ -2388,8 +2388,8 @@ out_dio:
}
if (!ret)
- ret = filemap_fdatawait_range(file->f_mapping, pos,
- pos + count - 1);
+ ret = filemap_fdatawait_range(file->f_mapping, *ppos,
+ *ppos + count - 1);
}
/*
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 332a281f217e..e49b4f1cb26b 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -717,6 +717,12 @@ static int ocfs2_release_dquot(struct dquot *dquot)
*/
if (status < 0)
mlog_errno(status);
+ /*
+ * Clear dq_off so that we search for the structure in quota file next
+ * time we acquire it. The structure might be deleted and reallocated
+ * elsewhere by another node while our dquot structure is on freelist.
+ */
+ dquot->dq_off = 0;
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_trans:
ocfs2_commit_trans(osb, handle);
@@ -756,16 +762,17 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
status = ocfs2_lock_global_qf(info, 1);
if (status < 0)
goto out;
- if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
- status = ocfs2_qinfo_lock(info, 0);
- if (status < 0)
- goto out_dq;
- status = qtree_read_dquot(&info->dqi_gi, dquot);
- ocfs2_qinfo_unlock(info, 0);
- if (status < 0)
- goto out_dq;
- }
- set_bit(DQ_READ_B, &dquot->dq_flags);
+ status = ocfs2_qinfo_lock(info, 0);
+ if (status < 0)
+ goto out_dq;
+ /*
+ * We always want to read dquot structure from disk because we don't
+ * know what happened with it while it was on freelist.
+ */
+ status = qtree_read_dquot(&info->dqi_gi, dquot);
+ ocfs2_qinfo_unlock(info, 0);
+ if (status < 0)
+ goto out_dq;
OCFS2_DQUOT(dquot)->dq_use_count++;
OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace;
diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c
index 27fe7ee4874c..d0f323da0b5c 100644
--- a/fs/ocfs2/quota_local.c
+++ b/fs/ocfs2/quota_local.c
@@ -1303,10 +1303,6 @@ int ocfs2_local_release_dquot(handle_t *handle, struct dquot *dquot)
ocfs2_journal_dirty(handle, od->dq_chunk->qc_headerbh);
out:
- /* Clear the read bit so that next time someone uses this
- * dquot he reads fresh info from disk and allocates local
- * dquot structure */
- clear_bit(DQ_READ_B, &dquot->dq_flags);
return status;
}
diff --git a/fs/open.c b/fs/open.c
index 8c741002f947..86092bde31f4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -628,23 +628,12 @@ out:
static inline int __get_file_write_access(struct inode *inode,
struct vfsmount *mnt)
{
- int error;
- error = get_write_access(inode);
+ int error = get_write_access(inode);
if (error)
return error;
- /*
- * Do not take mount writer counts on
- * special files since no writes to
- * the mount itself will occur.
- */
- if (!special_file(inode->i_mode)) {
- /*
- * Balanced in __fput()
- */
- error = __mnt_want_write(mnt);
- if (error)
- put_write_access(inode);
- }
+ error = __mnt_want_write(mnt);
+ if (error)
+ put_write_access(inode);
return error;
}
@@ -677,12 +666,11 @@ static int do_dentry_open(struct file *f,
path_get(&f->f_path);
inode = f->f_inode = f->f_path.dentry->d_inode;
- if (f->f_mode & FMODE_WRITE) {
+ if (f->f_mode & FMODE_WRITE && !special_file(inode->i_mode)) {
error = __get_file_write_access(inode, f->f_path.mnt);
if (error)
goto cleanup_file;
- if (!special_file(inode->i_mode))
- file_take_write(f);
+ file_take_write(f);
}
f->f_mapping = inode->i_mapping;
@@ -723,7 +711,6 @@ cleanup_all:
fops_put(f->f_op);
file_sb_list_del(f);
if (f->f_mode & FMODE_WRITE) {
- put_write_access(inode);
if (!special_file(inode->i_mode)) {
/*
* We don't consider this a real
@@ -731,6 +718,7 @@ cleanup_all:
* because it all happenend right
* here, so just reset the state.
*/
+ put_write_access(inode);
file_reset_write(f);
__mnt_drop_write(f->f_path.mnt);
}
diff --git a/fs/pipe.c b/fs/pipe.c
index d2c45e14e6d8..0e0752ef2715 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait)
return mask;
}
+static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
+{
+ int kill = 0;
+
+ spin_lock(&inode->i_lock);
+ if (!--pipe->files) {
+ inode->i_pipe = NULL;
+ kill = 1;
+ }
+ spin_unlock(&inode->i_lock);
+
+ if (kill)
+ free_pipe_info(pipe);
+}
+
static int
pipe_release(struct inode *inode, struct file *file)
{
- struct pipe_inode_info *pipe = inode->i_pipe;
- int kill = 0;
+ struct pipe_inode_info *pipe = file->private_data;
__pipe_lock(pipe);
if (file->f_mode & FMODE_READ)
@@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file)
kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);
}
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- if (kill)
- free_pipe_info(pipe);
-
+ put_pipe_info(inode, pipe);
return 0;
}
@@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp)
{
struct pipe_inode_info *pipe;
bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC;
- int kill = 0;
int ret;
filp->f_version = 0;
@@ -1130,15 +1135,9 @@ err_wr:
goto err;
err:
- spin_lock(&inode->i_lock);
- if (!--pipe->files) {
- inode->i_pipe = NULL;
- kill = 1;
- }
- spin_unlock(&inode->i_lock);
__pipe_unlock(pipe);
- if (kill)
- free_pipe_info(pipe);
+
+ put_pipe_info(inode, pipe);
return ret;
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 8bd2135b7f82..3542f1f814e2 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -158,6 +158,12 @@ posix_acl_equiv_mode(const struct posix_acl *acl, umode_t *mode_p)
umode_t mode = 0;
int not_equiv = 0;
+ /*
+ * A null ACL can always be presented as mode bits.
+ */
+ if (!acl)
+ return 0;
+
FOREACH_ACL_ENTRY(pa, acl, pe) {
switch (pa->e_tag) {
case ACL_USER_OBJ:
diff --git a/fs/proc/array.c b/fs/proc/array.c
index cbd0f1b324b9..09f0d9c374a3 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -304,15 +304,11 @@ static void render_cap_t(struct seq_file *m, const char *header,
seq_puts(m, header);
CAP_FOR_EACH_U32(__capi) {
seq_printf(m, "%08x",
- a->cap[(_KERNEL_CAPABILITY_U32S-1) - __capi]);
+ a->cap[CAP_LAST_U32 - __capi]);
}
seq_putc(m, '\n');
}
-/* Remove non-existent capabilities */
-#define NORM_CAPS(v) (v.cap[CAP_TO_INDEX(CAP_LAST_CAP)] &= \
- CAP_TO_MASK(CAP_LAST_CAP + 1) - 1)
-
static inline void task_cap(struct seq_file *m, struct task_struct *p)
{
const struct cred *cred;
@@ -326,11 +322,6 @@ static inline void task_cap(struct seq_file *m, struct task_struct *p)
cap_bset = cred->cap_bset;
rcu_read_unlock();
- NORM_CAPS(cap_inheritable);
- NORM_CAPS(cap_permitted);
- NORM_CAPS(cap_effective);
- NORM_CAPS(cap_bset);
-
render_cap_t(m, "CapInh:\t", &cap_inheritable);
render_cap_t(m, "CapPrm:\t", &cap_permitted);
render_cap_t(m, "CapEff:\t", &cap_effective);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c3834dad09b3..8fc784aef0b8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1825,6 +1825,7 @@ static int proc_map_files_get_link(struct dentry *dentry, struct path *path)
if (rc)
goto out_mmput;
+ rc = -ENOENT;
down_read(&mm->mmap_sem);
vma = find_exact_vma(mm, vm_start, vm_end);
if (vma && vma->vm_file) {
@@ -2611,6 +2612,57 @@ static const struct file_operations proc_projid_map_operations = {
.llseek = seq_lseek,
.release = proc_id_map_release,
};
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+ struct user_namespace *ns = NULL;
+ struct task_struct *task;
+ int ret;
+
+ ret = -ESRCH;
+ task = get_proc_task(inode);
+ if (task) {
+ rcu_read_lock();
+ ns = get_user_ns(task_cred_xxx(task, user_ns));
+ rcu_read_unlock();
+ put_task_struct(task);
+ }
+ if (!ns)
+ goto err;
+
+ if (file->f_mode & FMODE_WRITE) {
+ ret = -EACCES;
+ if (!ns_capable(ns, CAP_SYS_ADMIN))
+ goto err_put_ns;
+ }
+
+ ret = single_open(file, &proc_setgroups_show, ns);
+ if (ret)
+ goto err_put_ns;
+
+ return 0;
+err_put_ns:
+ put_user_ns(ns);
+err:
+ return ret;
+}
+
+static int proc_setgroups_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct user_namespace *ns = seq->private;
+ int ret = single_release(inode, file);
+ put_user_ns(ns);
+ return ret;
+}
+
+static const struct file_operations proc_setgroups_operations = {
+ .open = proc_setgroups_open,
+ .write = proc_setgroups_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = proc_setgroups_release,
+};
#endif /* CONFIG_USER_NS */
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2719,6 +2771,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
@@ -3072,6 +3125,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
};
diff --git a/fs/proc/page.c b/fs/proc/page.c
index b8730d9ebaee..2a8cc94bb641 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -121,7 +121,7 @@ u64 stable_page_flags(struct page *page)
* just checks PG_head/PG_tail, so we need to check PageLRU to make
* sure a given page is a thp, not a non-huge compound page.
*/
- else if (PageTransCompound(page) && PageLRU(compound_trans_head(page)))
+ else if (PageTransCompound(page) && PageLRU(compound_head(page)))
u |= 1 << KPF_THP;
/*
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index e4bcb2cf055a..3ba30825f387 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -316,10 +316,10 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count,
sprintf(name, "dmesg-%s-%lld", psname, id);
break;
case PSTORE_TYPE_CONSOLE:
- sprintf(name, "console-%s", psname);
+ sprintf(name, "console-%s-%lld", psname, id);
break;
case PSTORE_TYPE_FTRACE:
- sprintf(name, "ftrace-%s", psname);
+ sprintf(name, "ftrace-%s-%lld", psname, id);
break;
case PSTORE_TYPE_MCE:
sprintf(name, "mce-%s-%lld", psname, id);
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 1376e5a8f0d6..42d5911c7e29 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -61,6 +61,11 @@ module_param(mem_size, ulong, 0400);
MODULE_PARM_DESC(mem_size,
"size of reserved RAM used to store oops/panic logs");
+static unsigned int mem_type;
+module_param(mem_type, uint, 0600);
+MODULE_PARM_DESC(mem_type,
+ "set to 1 to try to use unbuffered memory (default 0)");
+
static int dump_oops = 1;
module_param(dump_oops, int, 0600);
MODULE_PARM_DESC(dump_oops,
@@ -79,6 +84,7 @@ struct ramoops_context {
struct persistent_ram_zone *fprz;
phys_addr_t phys_addr;
unsigned long size;
+ unsigned int memtype;
size_t record_size;
size_t console_size;
size_t ftrace_size;
@@ -331,7 +337,8 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
size_t sz = cxt->record_size;
cxt->przs[i] = persistent_ram_new(*paddr, sz, 0,
- &cxt->ecc_info);
+ &cxt->ecc_info,
+ cxt->memtype);
if (IS_ERR(cxt->przs[i])) {
err = PTR_ERR(cxt->przs[i]);
dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
@@ -361,7 +368,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
return -ENOMEM;
}
- *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info);
+ *prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype);
if (IS_ERR(*prz)) {
int err = PTR_ERR(*prz);
@@ -411,6 +418,7 @@ static int ramoops_probe(struct platform_device *pdev)
cxt->dump_read_cnt = 0;
cxt->size = pdata->mem_size;
cxt->phys_addr = pdata->mem_address;
+ cxt->memtype = pdata->mem_type;
cxt->record_size = pdata->record_size;
cxt->console_size = pdata->console_size;
cxt->ftrace_size = pdata->ftrace_size;
@@ -541,6 +549,7 @@ static void ramoops_register_dummy(void)
dummy_data->mem_size = mem_size;
dummy_data->mem_address = mem_address;
+ dummy_data->mem_type = 0;
dummy_data->record_size = record_size;
dummy_data->console_size = ramoops_console_size;
dummy_data->ftrace_size = ramoops_ftrace_size;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 59337326e288..6ff97553331b 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -333,7 +333,8 @@ void persistent_ram_zap(struct persistent_ram_zone *prz)
persistent_ram_update_header_ecc(prz);
}
-static void *persistent_ram_vmap(phys_addr_t start, size_t size)
+static void *persistent_ram_vmap(phys_addr_t start, size_t size,
+ unsigned int memtype)
{
struct page **pages;
phys_addr_t page_start;
@@ -345,7 +346,10 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
page_start = start - offset_in_page(start);
page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE);
- prot = pgprot_noncached(PAGE_KERNEL);
+ if (memtype)
+ prot = pgprot_noncached(PAGE_KERNEL);
+ else
+ prot = pgprot_writecombine(PAGE_KERNEL);
pages = kmalloc(sizeof(struct page *) * page_count, GFP_KERNEL);
if (!pages) {
@@ -364,27 +368,35 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
return vaddr;
}
-static void *persistent_ram_iomap(phys_addr_t start, size_t size)
+static void *persistent_ram_iomap(phys_addr_t start, size_t size,
+ unsigned int memtype)
{
+ void *va;
+
if (!request_mem_region(start, size, "persistent_ram")) {
pr_err("request mem region (0x%llx@0x%llx) failed\n",
(unsigned long long)size, (unsigned long long)start);
return NULL;
}
- return ioremap(start, size);
+ if (memtype)
+ va = ioremap(start, size);
+ else
+ va = ioremap_wc(start, size);
+
+ return va;
}
static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
- struct persistent_ram_zone *prz)
+ struct persistent_ram_zone *prz, int memtype)
{
prz->paddr = start;
prz->size = size;
if (pfn_valid(start >> PAGE_SHIFT))
- prz->vaddr = persistent_ram_vmap(start, size);
+ prz->vaddr = persistent_ram_vmap(start, size, memtype);
else
- prz->vaddr = persistent_ram_iomap(start, size);
+ prz->vaddr = persistent_ram_iomap(start, size, memtype);
if (!prz->vaddr) {
pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__,
@@ -452,7 +464,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
}
struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
- u32 sig, struct persistent_ram_ecc_info *ecc_info)
+ u32 sig, struct persistent_ram_ecc_info *ecc_info,
+ unsigned int memtype)
{
struct persistent_ram_zone *prz;
int ret = -ENOMEM;
@@ -463,7 +476,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
goto err;
}
- ret = persistent_ram_buffer_map(start, size, prz);
+ ret = persistent_ram_buffer_map(start, size, prz, memtype);
if (ret)
goto err;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 3e64169ef527..7a10e047bc33 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -581,9 +581,17 @@ int dquot_scan_active(struct super_block *sb,
dqstats_inc(DQST_LOOKUPS);
dqput(old_dquot);
old_dquot = dquot;
- ret = fn(dquot, priv);
- if (ret < 0)
- goto out;
+ /*
+ * ->release_dquot() can be racing with us. Our reference
+ * protects us from new calls to it so just wait for any
+ * outstanding call and recheck the DQ_ACTIVE_B after that.
+ */
+ wait_on_dquot(dquot);
+ if (test_bit(DQ_ACTIVE_B, &dquot->dq_flags)) {
+ ret = fn(dquot, priv);
+ if (ret < 0)
+ goto out;
+ }
spin_lock(&dq_list_lock);
/* We are safe to continue now because our dquot could not
* be moved out of the inuse list while we hold the reference */
@@ -629,7 +637,7 @@ int dquot_writeback_dquots(struct super_block *sb, int type)
dqstats_inc(DQST_LOOKUPS);
err = sb->dq_op->write_dquot(dquot);
if (!ret && err)
- err = ret;
+ ret = err;
dqput(dquot);
spin_lock(&dq_list_lock);
}
diff --git a/fs/read_write.c b/fs/read_write.c
index 2cefa417be34..f6b7c600eb7f 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -947,9 +947,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen)
+ compat_ulong_t, vlen)
{
struct fd f = fdget(fd);
ssize_t ret;
@@ -983,9 +983,9 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
return ret;
}
-COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen, u32, pos_low, u32, pos_high)
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
return compat_sys_preadv64(fd, vec, vlen, pos);
@@ -1013,9 +1013,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
const struct compat_iovec __user *, vec,
- unsigned long, vlen)
+ compat_ulong_t, vlen)
{
struct fd f = fdget(fd);
ssize_t ret;
@@ -1049,9 +1049,9 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
return ret;
}
-COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd,
+COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
- unsigned long, vlen, u32, pos_low, u32, pos_high)
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
return compat_sys_pwritev64(fd, vec, vlen, pos);
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index 6c2d136561cb..2b96b59f75da 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -128,6 +128,7 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
char *d_name;
off_t d_off;
ino_t d_ino;
+ loff_t cur_pos = deh_offset(deh);
if (!de_visible(deh))
/* it is hidden entry */
@@ -200,8 +201,9 @@ int reiserfs_readdir_dentry(struct dentry *dentry, void *dirent,
if (local_buf != small_buf) {
kfree(local_buf);
}
- // next entry should be looked for with such offset
- next_pos = deh_offset(deh) + 1;
+
+ /* deh_offset(deh) may be invalid now. */
+ next_pos = cur_pos + 1;
if (item_moved(&tmp_ih, &path_to_entry)) {
set_cpu_key_k_offset(&pos_key,
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index f844533792ee..36166443bc45 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3211,8 +3211,14 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
attr->ia_size != i_size_read(inode)) {
error = inode_newsize_ok(inode, attr->ia_size);
if (!error) {
+ /*
+ * Could race against reiserfs_file_release
+ * if called from NFS, so take tailpack mutex.
+ */
+ mutex_lock(&REISERFS_I(inode)->tailpack);
truncate_setsize(inode, attr->ia_size);
- reiserfs_vfs_truncate_file(inode);
+ reiserfs_truncate_file(inode, 1);
+ mutex_unlock(&REISERFS_I(inode)->tailpack);
}
}
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 774c1eb7f1c9..3dd44db1465e 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence)
m->read_pos = offset;
retval = file->f_pos = offset;
}
+ } else {
+ file->f_pos = offset;
}
}
file->f_version = m->version;
diff --git a/fs/splice.c b/fs/splice.c
index d37431dd60a1..4b5a5fac3383 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -555,6 +555,24 @@ static const struct pipe_buf_operations default_pipe_buf_ops = {
.get = generic_pipe_buf_get,
};
+static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ return 1;
+}
+
+/* Pipe buffer operations for a socket and similar. */
+const struct pipe_buf_operations nosteal_pipe_buf_ops = {
+ .can_merge = 0,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .confirm = generic_pipe_buf_confirm,
+ .release = generic_pipe_buf_release,
+ .steal = generic_pipe_buf_nosteal,
+ .get = generic_pipe_buf_get,
+};
+EXPORT_SYMBOL(nosteal_pipe_buf_ops);
+
static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
unsigned long vlen, loff_t offset)
{
diff --git a/fs/statfs.c b/fs/statfs.c
index c219e733f553..083dc0ac9140 100644
--- a/fs/statfs.c
+++ b/fs/statfs.c
@@ -94,7 +94,7 @@ retry:
int fd_statfs(int fd, struct kstatfs *st)
{
- struct fd f = fdget(fd);
+ struct fd f = fdget_raw(fd);
int error = -EBADF;
if (f.file) {
error = vfs_statfs(&f.file->f_path, st);
diff --git a/fs/super.c b/fs/super.c
index 68307c029228..e028b508db25 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -76,6 +76,8 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
total_objects = sb->s_nr_dentry_unused +
sb->s_nr_inodes_unused + fs_objects + 1;
+ if (!total_objects)
+ total_objects = 1;
if (sc->nr_to_scan) {
int dentries;
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index d0c6a007ce83..eda10959714f 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -487,6 +487,7 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_sb = sb;
sbi->s_block_base = 0;
sbi->s_type = FSTYPE_V7;
+ mutex_init(&sbi->s_lock);
sb->s_fs_info = sbi;
sb_set_blocksize(sb, 512);
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index ff8229340cd5..26b69b2d4a45 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -166,15 +166,10 @@ static int do_commit(struct ubifs_info *c)
err = ubifs_orphan_end_commit(c);
if (err)
goto out;
- old_ltail_lnum = c->ltail_lnum;
- err = ubifs_log_end_commit(c, new_ltail_lnum);
- if (err)
- goto out;
err = dbg_check_old_index(c, &zroot);
if (err)
goto out;
- mutex_lock(&c->mst_mutex);
c->mst_node->cmt_no = cpu_to_le64(c->cmt_no);
c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum);
c->mst_node->root_lnum = cpu_to_le32(zroot.lnum);
@@ -203,8 +198,9 @@ static int do_commit(struct ubifs_info *c)
c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS);
else
c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS);
- err = ubifs_write_master(c);
- mutex_unlock(&c->mst_mutex);
+
+ old_ltail_lnum = c->ltail_lnum;
+ err = ubifs_log_end_commit(c, new_ltail_lnum);
if (err)
goto out;
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 14374530784c..881324c08430 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -1524,8 +1524,7 @@ static int ubifs_vm_page_mkwrite(struct vm_area_struct *vma,
}
wait_for_stable_page(page);
- unlock_page(page);
- return 0;
+ return VM_FAULT_LOCKED;
out_unlock:
unlock_page(page);
diff --git a/fs/ubifs/log.c b/fs/ubifs/log.c
index 36bd4efd0819..06649d21b056 100644
--- a/fs/ubifs/log.c
+++ b/fs/ubifs/log.c
@@ -106,10 +106,14 @@ static inline long long empty_log_bytes(const struct ubifs_info *c)
h = (long long)c->lhead_lnum * c->leb_size + c->lhead_offs;
t = (long long)c->ltail_lnum * c->leb_size;
- if (h >= t)
+ if (h > t)
return c->log_bytes - h + t;
- else
+ else if (h != t)
return t - h;
+ else if (c->lhead_lnum != c->ltail_lnum)
+ return 0;
+ else
+ return c->log_bytes;
}
/**
@@ -447,9 +451,9 @@ out:
* @ltail_lnum: new log tail LEB number
*
* This function is called on when the commit operation was finished. It
- * moves log tail to new position and unmaps LEBs which contain obsolete data.
- * Returns zero in case of success and a negative error code in case of
- * failure.
+ * moves log tail to new position and updates the master node so that it stores
+ * the new log tail LEB number. Returns zero in case of success and a negative
+ * error code in case of failure.
*/
int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
{
@@ -477,7 +481,12 @@ int ubifs_log_end_commit(struct ubifs_info *c, int ltail_lnum)
spin_unlock(&c->buds_lock);
err = dbg_check_bud_bytes(c);
+ if (err)
+ goto out;
+ err = ubifs_write_master(c);
+
+out:
mutex_unlock(&c->log_mutex);
return err;
}
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index ab83ace9910a..1a4bb9e8b3b8 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -352,10 +352,9 @@ int ubifs_read_master(struct ubifs_info *c)
* ubifs_write_master - write master node.
* @c: UBIFS file-system description object
*
- * This function writes the master node. The caller has to take the
- * @c->mst_mutex lock before calling this function. Returns zero in case of
- * success and a negative error code in case of failure. The master node is
- * written twice to enable recovery.
+ * This function writes the master node. Returns zero in case of success and a
+ * negative error code in case of failure. The master node is written twice to
+ * enable recovery.
*/
int ubifs_write_master(struct ubifs_info *c)
{
diff --git a/fs/ubifs/shrinker.c b/fs/ubifs/shrinker.c
index 9e1d05666fed..e0a7a764a903 100644
--- a/fs/ubifs/shrinker.c
+++ b/fs/ubifs/shrinker.c
@@ -128,7 +128,6 @@ static int shrink_tnc(struct ubifs_info *c, int nr, int age, int *contention)
freed = ubifs_destroy_tnc_subtree(znode);
atomic_long_sub(freed, &ubifs_clean_zn_cnt);
atomic_long_sub(freed, &c->clean_zn_cnt);
- ubifs_assert(atomic_long_read(&c->clean_zn_cnt) >= 0);
total_freed += freed;
znode = zprev;
}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 879b9976c12b..05115d719408 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1970,7 +1970,6 @@ static struct ubifs_info *alloc_ubifs_info(struct ubi_volume_desc *ubi)
mutex_init(&c->lp_mutex);
mutex_init(&c->tnc_mutex);
mutex_init(&c->log_mutex);
- mutex_init(&c->mst_mutex);
mutex_init(&c->umount_mutex);
mutex_init(&c->bu_mutex);
mutex_init(&c->write_reserve_mutex);
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index b2babce4d70f..bd51277f6fe1 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1042,7 +1042,6 @@ struct ubifs_debug_info;
*
* @mst_node: master node
* @mst_offs: offset of valid master node
- * @mst_mutex: protects the master node area, @mst_node, and @mst_offs
*
* @max_bu_buf_len: maximum bulk-read buffer length
* @bu_mutex: protects the pre-allocated bulk-read buffer and @c->bu
@@ -1282,7 +1281,6 @@ struct ubifs_info {
struct ubifs_mst_node *mst_node;
int mst_offs;
- struct mutex mst_mutex;
int max_bu_buf_len;
struct mutex bu_mutex;
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index b6d15d349810..aa023283cc8a 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -1270,13 +1270,22 @@ update_time:
return 0;
}
+/*
+ * Maximum length of linked list formed by ICB hierarchy. The chosen number is
+ * arbitrary - just that we hopefully don't limit any real use of rewritten
+ * inode on write-once media but avoid looping for too long on corrupted media.
+ */
+#define UDF_MAX_ICB_NESTING 1024
+
static void __udf_read_inode(struct inode *inode)
{
struct buffer_head *bh = NULL;
struct fileEntry *fe;
uint16_t ident;
struct udf_inode_info *iinfo = UDF_I(inode);
+ unsigned int indirections = 0;
+reread:
/*
* Set defaults, but the inode is still incomplete!
* Note: get_new_inode() sets the following on a new inode:
@@ -1313,28 +1322,26 @@ static void __udf_read_inode(struct inode *inode)
ibh = udf_read_ptagged(inode->i_sb, &iinfo->i_location, 1,
&ident);
if (ident == TAG_IDENT_IE && ibh) {
- struct buffer_head *nbh = NULL;
struct kernel_lb_addr loc;
struct indirectEntry *ie;
ie = (struct indirectEntry *)ibh->b_data;
loc = lelb_to_cpu(ie->indirectICB.extLocation);
- if (ie->indirectICB.extLength &&
- (nbh = udf_read_ptagged(inode->i_sb, &loc, 0,
- &ident))) {
- if (ident == TAG_IDENT_FE ||
- ident == TAG_IDENT_EFE) {
- memcpy(&iinfo->i_location,
- &loc,
- sizeof(struct kernel_lb_addr));
- brelse(bh);
- brelse(ibh);
- brelse(nbh);
- __udf_read_inode(inode);
+ if (ie->indirectICB.extLength) {
+ brelse(bh);
+ brelse(ibh);
+ memcpy(&iinfo->i_location, &loc,
+ sizeof(struct kernel_lb_addr));
+ if (++indirections > UDF_MAX_ICB_NESTING) {
+ udf_err(inode->i_sb,
+ "too many ICBs in ICB hierarchy"
+ " (max %d supported)\n",
+ UDF_MAX_ICB_NESTING);
+ make_bad_inode(inode);
return;
}
- brelse(nbh);
+ goto reread;
}
}
brelse(ibh);
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 9ac4057a86c9..839a2bad7f45 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -630,6 +630,12 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
struct udf_sb_info *sbi = UDF_SB(sb);
int error = 0;
+ if (sbi->s_lvid_bh) {
+ int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
+ if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY))
+ return -EACCES;
+ }
+
uopt.flags = sbi->s_flags;
uopt.uid = sbi->s_uid;
uopt.gid = sbi->s_gid;
@@ -649,12 +655,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options)
sbi->s_dmode = uopt.dmode;
write_unlock(&sbi->s_cred_lock);
- if (sbi->s_lvid_bh) {
- int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev);
- if (write_rev > UDF_MAX_WRITE_VERSION)
- *flags |= MS_RDONLY;
- }
-
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY))
goto out_unlock;
@@ -843,27 +843,38 @@ static int udf_find_fileset(struct super_block *sb,
return 1;
}
+/*
+ * Load primary Volume Descriptor Sequence
+ *
+ * Return <0 on error, 0 on success. -EAGAIN is special meaning next sequence
+ * should be tried.
+ */
static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
{
struct primaryVolDesc *pvoldesc;
struct ustr *instr, *outstr;
struct buffer_head *bh;
uint16_t ident;
- int ret = 1;
+ int ret = -ENOMEM;
instr = kmalloc(sizeof(struct ustr), GFP_NOFS);
if (!instr)
- return 1;
+ return -ENOMEM;
outstr = kmalloc(sizeof(struct ustr), GFP_NOFS);
if (!outstr)
goto out1;
bh = udf_read_tagged(sb, block, block, &ident);
- if (!bh)
+ if (!bh) {
+ ret = -EAGAIN;
goto out2;
+ }
- BUG_ON(ident != TAG_IDENT_PVD);
+ if (ident != TAG_IDENT_PVD) {
+ ret = -EIO;
+ goto out_bh;
+ }
pvoldesc = (struct primaryVolDesc *)bh->b_data;
@@ -889,8 +900,9 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
if (udf_CS0toUTF8(outstr, instr))
udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
- brelse(bh);
ret = 0;
+out_bh:
+ brelse(bh);
out2:
kfree(outstr);
out1:
@@ -947,7 +959,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
if (mdata->s_mirror_fe == NULL) {
udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n");
- goto error_exit;
+ return -EIO;
}
}
@@ -964,23 +976,18 @@ static int udf_load_metadata_files(struct super_block *sb, int partition)
addr.logicalBlockNum, addr.partitionReferenceNum);
mdata->s_bitmap_fe = udf_iget(sb, &addr);
-
if (mdata->s_bitmap_fe == NULL) {
if (sb->s_flags & MS_RDONLY)
udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n");
else {
udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n");
- goto error_exit;
+ return -EIO;
}
}
}
udf_debug("udf_load_metadata_files Ok\n");
-
return 0;
-
-error_exit:
- return 1;
}
static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh,
@@ -1069,7 +1076,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (!map->s_uspace.s_table) {
udf_debug("cannot load unallocSpaceTable (part %d)\n",
p_index);
- return 1;
+ return -EIO;
}
map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE;
udf_debug("unallocSpaceTable (part %d) @ %ld\n",
@@ -1079,7 +1086,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (phd->unallocSpaceBitmap.extLength) {
struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index);
if (!bitmap)
- return 1;
+ return -ENOMEM;
map->s_uspace.s_bitmap = bitmap;
bitmap->s_extPosition = le32_to_cpu(
phd->unallocSpaceBitmap.extPosition);
@@ -1102,7 +1109,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (!map->s_fspace.s_table) {
udf_debug("cannot load freedSpaceTable (part %d)\n",
p_index);
- return 1;
+ return -EIO;
}
map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE;
@@ -1113,7 +1120,7 @@ static int udf_fill_partdesc_info(struct super_block *sb,
if (phd->freedSpaceBitmap.extLength) {
struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index);
if (!bitmap)
- return 1;
+ return -ENOMEM;
map->s_fspace.s_bitmap = bitmap;
bitmap->s_extPosition = le32_to_cpu(
phd->freedSpaceBitmap.extPosition);
@@ -1165,7 +1172,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
udf_find_vat_block(sb, p_index, type1_index, blocks - 1);
}
if (!sbi->s_vat_inode)
- return 1;
+ return -EIO;
if (map->s_partition_type == UDF_VIRTUAL_MAP15) {
map->s_type_specific.s_virtual.s_start_offset = 0;
@@ -1177,7 +1184,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
pos = udf_block_map(sbi->s_vat_inode, 0);
bh = sb_bread(sb, pos);
if (!bh)
- return 1;
+ return -EIO;
vat20 = (struct virtualAllocationTable20 *)bh->b_data;
} else {
vat20 = (struct virtualAllocationTable20 *)
@@ -1195,6 +1202,12 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index)
return 0;
}
+/*
+ * Load partition descriptor block
+ *
+ * Returns <0 on error, 0 on success, -EAGAIN is special - try next descriptor
+ * sequence.
+ */
static int udf_load_partdesc(struct super_block *sb, sector_t block)
{
struct buffer_head *bh;
@@ -1204,13 +1217,15 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
int i, type1_idx;
uint16_t partitionNumber;
uint16_t ident;
- int ret = 0;
+ int ret;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 1;
- if (ident != TAG_IDENT_PD)
+ return -EAGAIN;
+ if (ident != TAG_IDENT_PD) {
+ ret = 0;
goto out_bh;
+ }
p = (struct partitionDesc *)bh->b_data;
partitionNumber = le16_to_cpu(p->partitionNumber);
@@ -1229,10 +1244,13 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
if (i >= sbi->s_partitions) {
udf_debug("Partition (%d) not found in partition map\n",
partitionNumber);
+ ret = 0;
goto out_bh;
}
ret = udf_fill_partdesc_info(sb, p, i);
+ if (ret < 0)
+ goto out_bh;
/*
* Now rescan for VIRTUAL or METADATA partitions when SPARABLE and
@@ -1249,32 +1267,37 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block)
break;
}
- if (i >= sbi->s_partitions)
+ if (i >= sbi->s_partitions) {
+ ret = 0;
goto out_bh;
+ }
ret = udf_fill_partdesc_info(sb, p, i);
- if (ret)
+ if (ret < 0)
goto out_bh;
if (map->s_partition_type == UDF_METADATA_MAP25) {
ret = udf_load_metadata_files(sb, i);
- if (ret) {
+ if (ret < 0) {
udf_err(sb, "error loading MetaData partition map %d\n",
i);
goto out_bh;
}
} else {
- ret = udf_load_vat(sb, i, type1_idx);
- if (ret)
- goto out_bh;
/*
- * Mark filesystem read-only if we have a partition with
- * virtual map since we don't handle writing to it (we
- * overwrite blocks instead of relocating them).
+ * If we have a partition with virtual map, we don't handle
+ * writing to it (we overwrite blocks instead of relocating
+ * them).
*/
- sb->s_flags |= MS_RDONLY;
- pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n");
+ if (!(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
+ goto out_bh;
+ }
+ ret = udf_load_vat(sb, i, type1_idx);
+ if (ret < 0)
+ goto out_bh;
}
+ ret = 0;
out_bh:
/* In case loading failed, we handle cleanup in udf_fill_super */
brelse(bh);
@@ -1340,11 +1363,11 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
uint16_t ident;
struct buffer_head *bh;
unsigned int table_len;
- int ret = 0;
+ int ret;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 1;
+ return -EAGAIN;
BUG_ON(ident != TAG_IDENT_LVD);
lvd = (struct logicalVolDesc *)bh->b_data;
table_len = le32_to_cpu(lvd->mapTableLength);
@@ -1352,7 +1375,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
udf_err(sb, "error loading logical volume descriptor: "
"Partition table too long (%u > %lu)\n", table_len,
sb->s_blocksize - sizeof(*lvd));
- ret = 1;
+ ret = -EIO;
goto out_bh;
}
@@ -1396,11 +1419,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_SPARABLE,
strlen(UDF_ID_SPARABLE))) {
- if (udf_load_sparable_map(sb, map,
- (struct sparablePartitionMap *)gpm) < 0) {
- ret = 1;
+ ret = udf_load_sparable_map(sb, map,
+ (struct sparablePartitionMap *)gpm);
+ if (ret < 0)
goto out_bh;
- }
} else if (!strncmp(upm2->partIdent.ident,
UDF_ID_METADATA,
strlen(UDF_ID_METADATA))) {
@@ -1465,7 +1487,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
}
if (lvd->integritySeqExt.extLength)
udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt));
-
+ ret = 0;
out_bh:
brelse(bh);
return ret;
@@ -1503,22 +1525,18 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_
}
/*
- * udf_process_sequence
- *
- * PURPOSE
- * Process a main/reserve volume descriptor sequence.
- *
- * PRE-CONDITIONS
- * sb Pointer to _locked_ superblock.
- * block First block of first extent of the sequence.
- * lastblock Lastblock of first extent of the sequence.
+ * Process a main/reserve volume descriptor sequence.
+ * @block First block of first extent of the sequence.
+ * @lastblock Lastblock of first extent of the sequence.
+ * @fileset There we store extent containing root fileset
*
- * HISTORY
- * July 1, 1997 - Andrew E. Mileski
- * Written, tested, and released.
+ * Returns <0 on error, 0 on success. -EAGAIN is special - try next descriptor
+ * sequence
*/
-static noinline int udf_process_sequence(struct super_block *sb, long block,
- long lastblock, struct kernel_lb_addr *fileset)
+static noinline int udf_process_sequence(
+ struct super_block *sb,
+ sector_t block, sector_t lastblock,
+ struct kernel_lb_addr *fileset)
{
struct buffer_head *bh = NULL;
struct udf_vds_record vds[VDS_POS_LENGTH];
@@ -1529,6 +1547,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
uint32_t vdsn;
uint16_t ident;
long next_s = 0, next_e = 0;
+ int ret;
memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH);
@@ -1543,7 +1562,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
udf_err(sb,
"Block %llu of volume descriptor sequence is corrupted or we could not read it\n",
(unsigned long long)block);
- return 1;
+ return -EAGAIN;
}
/* Process each descriptor (ISO 13346 3/8.3-8.4) */
@@ -1616,14 +1635,19 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
*/
if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) {
udf_err(sb, "Primary Volume Descriptor not found!\n");
- return 1;
+ return -EAGAIN;
+ }
+ ret = udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block);
+ if (ret < 0)
+ return ret;
+
+ if (vds[VDS_POS_LOGICAL_VOL_DESC].block) {
+ ret = udf_load_logicalvol(sb,
+ vds[VDS_POS_LOGICAL_VOL_DESC].block,
+ fileset);
+ if (ret < 0)
+ return ret;
}
- if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block))
- return 1;
-
- if (vds[VDS_POS_LOGICAL_VOL_DESC].block && udf_load_logicalvol(sb,
- vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset))
- return 1;
if (vds[VDS_POS_PARTITION_DESC].block) {
/*
@@ -1632,19 +1656,27 @@ static noinline int udf_process_sequence(struct super_block *sb, long block,
*/
for (block = vds[VDS_POS_PARTITION_DESC].block;
block < vds[VDS_POS_TERMINATING_DESC].block;
- block++)
- if (udf_load_partdesc(sb, block))
- return 1;
+ block++) {
+ ret = udf_load_partdesc(sb, block);
+ if (ret < 0)
+ return ret;
+ }
}
return 0;
}
+/*
+ * Load Volume Descriptor Sequence described by anchor in bh
+ *
+ * Returns <0 on error, 0 on success
+ */
static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
struct kernel_lb_addr *fileset)
{
struct anchorVolDescPtr *anchor;
- long main_s, main_e, reserve_s, reserve_e;
+ sector_t main_s, main_e, reserve_s, reserve_e;
+ int ret;
anchor = (struct anchorVolDescPtr *)bh->b_data;
@@ -1662,18 +1694,26 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh,
/* Process the main & reserve sequences */
/* responsible for finding the PartitionDesc(s) */
- if (!udf_process_sequence(sb, main_s, main_e, fileset))
- return 1;
- udf_sb_free_partitions(sb);
- if (!udf_process_sequence(sb, reserve_s, reserve_e, fileset))
- return 1;
+ ret = udf_process_sequence(sb, main_s, main_e, fileset);
+ if (ret != -EAGAIN)
+ return ret;
udf_sb_free_partitions(sb);
- return 0;
+ ret = udf_process_sequence(sb, reserve_s, reserve_e, fileset);
+ if (ret < 0) {
+ udf_sb_free_partitions(sb);
+ /* No sequence was OK, return -EIO */
+ if (ret == -EAGAIN)
+ ret = -EIO;
+ }
+ return ret;
}
/*
* Check whether there is an anchor block in the given block and
* load Volume Descriptor Sequence if so.
+ *
+ * Returns <0 on error, 0 on success, -EAGAIN is special - try next anchor
+ * block
*/
static int udf_check_anchor_block(struct super_block *sb, sector_t block,
struct kernel_lb_addr *fileset)
@@ -1685,33 +1725,40 @@ static int udf_check_anchor_block(struct super_block *sb, sector_t block,
if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) &&
udf_fixed_to_variable(block) >=
sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits)
- return 0;
+ return -EAGAIN;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh)
- return 0;
+ return -EAGAIN;
if (ident != TAG_IDENT_AVDP) {
brelse(bh);
- return 0;
+ return -EAGAIN;
}
ret = udf_load_sequence(sb, bh, fileset);
brelse(bh);
return ret;
}
-/* Search for an anchor volume descriptor pointer */
-static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
- struct kernel_lb_addr *fileset)
+/*
+ * Search for an anchor volume descriptor pointer.
+ *
+ * Returns < 0 on error, 0 on success. -EAGAIN is special - try next set
+ * of anchors.
+ */
+static int udf_scan_anchors(struct super_block *sb, sector_t *lastblock,
+ struct kernel_lb_addr *fileset)
{
sector_t last[6];
int i;
struct udf_sb_info *sbi = UDF_SB(sb);
int last_count = 0;
+ int ret;
/* First try user provided anchor */
if (sbi->s_anchor) {
- if (udf_check_anchor_block(sb, sbi->s_anchor, fileset))
- return lastblock;
+ ret = udf_check_anchor_block(sb, sbi->s_anchor, fileset);
+ if (ret != -EAGAIN)
+ return ret;
}
/*
* according to spec, anchor is in either:
@@ -1720,39 +1767,46 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
* lastblock
* however, if the disc isn't closed, it could be 512.
*/
- if (udf_check_anchor_block(sb, sbi->s_session + 256, fileset))
- return lastblock;
+ ret = udf_check_anchor_block(sb, sbi->s_session + 256, fileset);
+ if (ret != -EAGAIN)
+ return ret;
/*
* The trouble is which block is the last one. Drives often misreport
* this so we try various possibilities.
*/
- last[last_count++] = lastblock;
- if (lastblock >= 1)
- last[last_count++] = lastblock - 1;
- last[last_count++] = lastblock + 1;
- if (lastblock >= 2)
- last[last_count++] = lastblock - 2;
- if (lastblock >= 150)
- last[last_count++] = lastblock - 150;
- if (lastblock >= 152)
- last[last_count++] = lastblock - 152;
+ last[last_count++] = *lastblock;
+ if (*lastblock >= 1)
+ last[last_count++] = *lastblock - 1;
+ last[last_count++] = *lastblock + 1;
+ if (*lastblock >= 2)
+ last[last_count++] = *lastblock - 2;
+ if (*lastblock >= 150)
+ last[last_count++] = *lastblock - 150;
+ if (*lastblock >= 152)
+ last[last_count++] = *lastblock - 152;
for (i = 0; i < last_count; i++) {
if (last[i] >= sb->s_bdev->bd_inode->i_size >>
sb->s_blocksize_bits)
continue;
- if (udf_check_anchor_block(sb, last[i], fileset))
- return last[i];
+ ret = udf_check_anchor_block(sb, last[i], fileset);
+ if (ret != -EAGAIN) {
+ if (!ret)
+ *lastblock = last[i];
+ return ret;
+ }
if (last[i] < 256)
continue;
- if (udf_check_anchor_block(sb, last[i] - 256, fileset))
- return last[i];
+ ret = udf_check_anchor_block(sb, last[i] - 256, fileset);
+ if (ret != -EAGAIN) {
+ if (!ret)
+ *lastblock = last[i];
+ return ret;
+ }
}
/* Finally try block 512 in case media is open */
- if (udf_check_anchor_block(sb, sbi->s_session + 512, fileset))
- return last[0];
- return 0;
+ return udf_check_anchor_block(sb, sbi->s_session + 512, fileset);
}
/*
@@ -1760,54 +1814,59 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock,
* area specified by it. The function expects sbi->s_lastblock to be the last
* block on the media.
*
- * Return 1 if ok, 0 if not found.
- *
+ * Return <0 on error, 0 if anchor found. -EAGAIN is special meaning anchor
+ * was not found.
*/
static int udf_find_anchor(struct super_block *sb,
struct kernel_lb_addr *fileset)
{
- sector_t lastblock;
struct udf_sb_info *sbi = UDF_SB(sb);
+ sector_t lastblock = sbi->s_last_block;
+ int ret;
- lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
- if (lastblock)
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret != -EAGAIN)
goto out;
/* No anchor found? Try VARCONV conversion of block numbers */
UDF_SET_FLAG(sb, UDF_FLAG_VARCONV);
+ lastblock = udf_variable_to_fixed(sbi->s_last_block);
/* Firstly, we try to not convert number of the last block */
- lastblock = udf_scan_anchors(sb,
- udf_variable_to_fixed(sbi->s_last_block),
- fileset);
- if (lastblock)
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret != -EAGAIN)
goto out;
+ lastblock = sbi->s_last_block;
/* Secondly, we try with converted number of the last block */
- lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset);
- if (!lastblock) {
+ ret = udf_scan_anchors(sb, &lastblock, fileset);
+ if (ret < 0) {
/* VARCONV didn't help. Clear it. */
UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV);
- return 0;
}
out:
- sbi->s_last_block = lastblock;
- return 1;
+ if (ret == 0)
+ sbi->s_last_block = lastblock;
+ return ret;
}
/*
* Check Volume Structure Descriptor, find Anchor block and load Volume
- * Descriptor Sequence
+ * Descriptor Sequence.
+ *
+ * Returns < 0 on error, 0 on success. -EAGAIN is special meaning anchor
+ * block was not found.
*/
static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
int silent, struct kernel_lb_addr *fileset)
{
struct udf_sb_info *sbi = UDF_SB(sb);
loff_t nsr_off;
+ int ret;
if (!sb_set_blocksize(sb, uopt->blocksize)) {
if (!silent)
udf_warn(sb, "Bad block size\n");
- return 0;
+ return -EINVAL;
}
sbi->s_last_block = uopt->lastblock;
if (!uopt->novrs) {
@@ -1828,12 +1887,13 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt,
/* Look for anchor block and load Volume Descriptor Sequence */
sbi->s_anchor = uopt->anchor;
- if (!udf_find_anchor(sb, fileset)) {
- if (!silent)
+ ret = udf_find_anchor(sb, fileset);
+ if (ret < 0) {
+ if (!silent && ret == -EAGAIN)
udf_warn(sb, "No anchor found\n");
- return 0;
+ return ret;
}
- return 1;
+ return 0;
}
static void udf_open_lvid(struct super_block *sb)
@@ -1939,7 +1999,7 @@ u64 lvid_get_unique_id(struct super_block *sb)
static int udf_fill_super(struct super_block *sb, void *options, int silent)
{
- int ret;
+ int ret = -EINVAL;
struct inode *inode = NULL;
struct udf_options uopt;
struct kernel_lb_addr rootdir, fileset;
@@ -2011,7 +2071,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
} else {
uopt.blocksize = bdev_logical_block_size(sb->s_bdev);
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
- if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
+ if (ret == -EAGAIN && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) {
if (!silent)
pr_notice("Rescanning with blocksize %d\n",
UDF_DEFAULT_BLOCKSIZE);
@@ -2021,8 +2081,11 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
ret = udf_load_vrs(sb, &uopt, silent, &fileset);
}
}
- if (!ret) {
- udf_warn(sb, "No partition found (1)\n");
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ udf_warn(sb, "No partition found (1)\n");
+ ret = -EINVAL;
+ }
goto error_out;
}
@@ -2040,9 +2103,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
udf_err(sb, "minUDFReadRev=%x (max is %x)\n",
le16_to_cpu(lvidiu->minUDFReadRev),
UDF_MAX_READ_VERSION);
+ ret = -EINVAL;
+ goto error_out;
+ } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION &&
+ !(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
goto error_out;
- } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION)
- sb->s_flags |= MS_RDONLY;
+ }
sbi->s_udfrev = minUDFWriteRev;
@@ -2054,17 +2121,20 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!sbi->s_partitions) {
udf_warn(sb, "No partition found (2)\n");
+ ret = -EINVAL;
goto error_out;
}
if (sbi->s_partmaps[sbi->s_partition].s_partition_flags &
- UDF_PART_FLAG_READ_ONLY) {
- pr_notice("Partition marked readonly; forcing readonly mount\n");
- sb->s_flags |= MS_RDONLY;
+ UDF_PART_FLAG_READ_ONLY &&
+ !(sb->s_flags & MS_RDONLY)) {
+ ret = -EACCES;
+ goto error_out;
}
if (udf_find_fileset(sb, &fileset, &rootdir)) {
udf_warn(sb, "No fileset found\n");
+ ret = -EINVAL;
goto error_out;
}
@@ -2086,6 +2156,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
if (!inode) {
udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n",
rootdir.logicalBlockNum, rootdir.partitionReferenceNum);
+ ret = -EIO;
goto error_out;
}
@@ -2093,6 +2164,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
sb->s_root = d_make_root(inode);
if (!sb->s_root) {
udf_err(sb, "Couldn't allocate root dentry\n");
+ ret = -ENOMEM;
goto error_out;
}
sb->s_maxbytes = MAX_LFS_FILESIZE;
@@ -2113,7 +2185,7 @@ error_out:
kfree(sbi);
sb->s_fs_info = NULL;
- return -EINVAL;
+ return ret;
}
void _udf_err(struct super_block *sb, const char *function,
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index d7c6dbe4194b..d89f324bc387 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -80,11 +80,17 @@ static int udf_symlink_filler(struct file *file, struct page *page)
struct inode *inode = page->mapping->host;
struct buffer_head *bh = NULL;
unsigned char *symlink;
- int err = -EIO;
+ int err;
unsigned char *p = kmap(page);
struct udf_inode_info *iinfo;
uint32_t pos;
+ /* We don't support symlinks longer than one block */
+ if (inode->i_size > inode->i_sb->s_blocksize) {
+ err = -ENAMETOOLONG;
+ goto out_unmap;
+ }
+
iinfo = UDF_I(inode);
pos = udf_block_map(inode, 0);
@@ -94,8 +100,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
} else {
bh = sb_bread(inode->i_sb, pos);
- if (!bh)
- goto out;
+ if (!bh) {
+ err = -EIO;
+ goto out_unlock_inode;
+ }
symlink = bh->b_data;
}
@@ -109,9 +117,10 @@ static int udf_symlink_filler(struct file *file, struct page *page)
unlock_page(page);
return 0;
-out:
+out_unlock_inode:
up_read(&iinfo->i_data_sem);
SetPageError(page);
+out_unmap:
kunmap(page);
unlock_page(page);
return err;
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 41a695048be7..cfbb4c1b2f17 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1661,11 +1661,72 @@ xfs_vm_readpages(
return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
+/*
+ * This is basically a copy of __set_page_dirty_buffers() with one
+ * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
+ * dirty, we'll never be able to clean them because we don't write buffers
+ * beyond EOF, and that means we can't invalidate pages that span EOF
+ * that have been marked dirty. Further, the dirty state can leak into
+ * the file interior if the file is extended, resulting in all sorts of
+ * bad things happening as the state does not match the underlying data.
+ *
+ * XXX: this really indicates that bufferheads in XFS need to die. Warts like
+ * this only exist because of bufferheads and how the generic code manages them.
+ */
+STATIC int
+xfs_vm_set_page_dirty(
+ struct page *page)
+{
+ struct address_space *mapping = page->mapping;
+ struct inode *inode = mapping->host;
+ loff_t end_offset;
+ loff_t offset;
+ int newly_dirty;
+
+ if (unlikely(!mapping))
+ return !TestSetPageDirty(page);
+
+ end_offset = i_size_read(inode);
+ offset = page_offset(page);
+
+ spin_lock(&mapping->private_lock);
+ if (page_has_buffers(page)) {
+ struct buffer_head *head = page_buffers(page);
+ struct buffer_head *bh = head;
+
+ do {
+ if (offset < end_offset)
+ set_buffer_dirty(bh);
+ bh = bh->b_this_page;
+ offset += 1 << inode->i_blkbits;
+ } while (bh != head);
+ }
+ newly_dirty = !TestSetPageDirty(page);
+ spin_unlock(&mapping->private_lock);
+
+ if (newly_dirty) {
+ /* sigh - __set_page_dirty() is static, so copy it here, too */
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
+ if (page->mapping) { /* Race with truncate? */
+ WARN_ON_ONCE(!PageUptodate(page));
+ account_page_dirtied(page, mapping);
+ radix_tree_tag_set(&mapping->page_tree,
+ page_index(page), PAGECACHE_TAG_DIRTY);
+ }
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
+ __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+ }
+ return newly_dirty;
+}
+
const struct address_space_operations xfs_address_space_operations = {
.readpage = xfs_vm_readpage,
.readpages = xfs_vm_readpages,
.writepage = xfs_vm_writepage,
.writepages = xfs_vm_writepages,
+ .set_page_dirty = xfs_vm_set_page_dirty,
.releasepage = xfs_vm_releasepage,
.invalidatepage = xfs_vm_invalidatepage,
.write_begin = xfs_vm_write_begin,
diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c
index 0b8b2a13cd24..79ddbaf93206 100644
--- a/fs/xfs/xfs_da_btree.c
+++ b/fs/xfs/xfs_da_btree.c
@@ -1223,6 +1223,7 @@ xfs_da3_node_toosmall(
/* start with smaller blk num */
forward = nodehdr.forw < nodehdr.back;
for (i = 0; i < 2; forward = !forward, i++) {
+ struct xfs_da3_icnode_hdr thdr;
if (forward)
blkno = nodehdr.forw;
else
@@ -1235,10 +1236,10 @@ xfs_da3_node_toosmall(
return(error);
node = bp->b_addr;
- xfs_da3_node_hdr_from_disk(&nodehdr, node);
+ xfs_da3_node_hdr_from_disk(&thdr, node);
xfs_trans_brelse(state->args->trans, bp);
- if (count - nodehdr.count >= 0)
+ if (count - thdr.count >= 0)
break; /* fits with at least 25% to spare */
}
if (i >= 2) {
@@ -1333,7 +1334,7 @@ xfs_da3_fixhashpath(
node = blk->bp->b_addr;
xfs_da3_node_hdr_from_disk(&nodehdr, node);
btree = xfs_da3_node_tree_p(node);
- if (be32_to_cpu(btree->hashval) == lasthash)
+ if (be32_to_cpu(btree[blk->index].hashval) == lasthash)
break;
blk->hashval = lasthash;
btree[blk->index].hashval = cpu_to_be32(lasthash);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 044e97a33c8d..bac3e1635b7d 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -1104,7 +1104,8 @@ xfs_qm_dqflush(
* Get the buffer containing the on-disk dquot
*/
error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno,
- mp->m_quotainfo->qi_dqchunklen, 0, &bp, NULL);
+ mp->m_quotainfo->qi_dqchunklen, 0, &bp,
+ &xfs_dquot_buf_ops);
if (error)
goto out_unlock;
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index a5f2042aec8b..9f457fedbcfc 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -298,7 +298,16 @@ xfs_file_aio_read(
xfs_rw_iunlock(ip, XFS_IOLOCK_EXCL);
return ret;
}
- truncate_pagecache_range(VFS_I(ip), pos, -1);
+
+ /*
+ * Invalidate whole pages. This can return an error if
+ * we fail to invalidate a page, but this should never
+ * happen on XFS. Warn if it does fail.
+ */
+ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+ pos >> PAGE_CACHE_SHIFT, -1);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
xfs_rw_ilock_demote(ip, XFS_IOLOCK_EXCL);
}
@@ -677,7 +686,15 @@ xfs_file_dio_aio_write(
pos, -1);
if (ret)
goto out;
- truncate_pagecache_range(VFS_I(ip), pos, -1);
+ /*
+ * Invalidate whole pages. This can return an error if
+ * we fail to invalidate a page, but this should never
+ * happen on XFS. Warn if it does fail.
+ */
+ ret = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
+ pos >> PAGE_CACHE_SHIFT, -1);
+ WARN_ON_ONCE(ret);
+ ret = 0;
}
/*
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 3c3644ea825b..2288db4e1784 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -216,6 +216,8 @@ xfs_growfs_data_private(
*/
nfree = 0;
for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
+ __be32 *agfl_bno;
+
/*
* AG freespace header block
*/
@@ -275,8 +277,10 @@ xfs_growfs_data_private(
agfl->agfl_seqno = cpu_to_be32(agno);
uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);
}
+
+ agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
- agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
+ agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
error = xfs_bwrite(bp);
xfs_buf_relse(bp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 5e999680094a..83dfe6e73235 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -409,7 +409,8 @@ xfs_attrlist_by_handle(
return -XFS_ERROR(EPERM);
if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))
return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
+ if (al_hreq.buflen < sizeof(struct attrlist) ||
+ al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
/*
@@ -1612,6 +1613,12 @@ xfs_file_ioctl(
case XFS_IOC_FREE_EOFBLOCKS: {
struct xfs_eofblocks eofb;
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (mp->m_flags & XFS_MOUNT_RDONLY)
+ return -XFS_ERROR(EROFS);
+
if (copy_from_user(&eofb, arg, sizeof(eofb)))
return -XFS_ERROR(EFAULT);
diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c
index c0c66259cc91..68799d7f02cc 100644
--- a/fs/xfs/xfs_ioctl32.c
+++ b/fs/xfs/xfs_ioctl32.c
@@ -359,7 +359,8 @@ xfs_compat_attrlist_by_handle(
if (copy_from_user(&al_hreq, arg,
sizeof(compat_xfs_fsop_attrlist_handlereq_t)))
return -XFS_ERROR(EFAULT);
- if (al_hreq.buflen > XATTR_LIST_MAX)
+ if (al_hreq.buflen < sizeof(struct attrlist) ||
+ al_hreq.buflen > XATTR_LIST_MAX)
return -XFS_ERROR(EINVAL);
/*
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b75c9bb6e71e..29d1ca567ed3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -935,6 +935,12 @@ xfs_qm_dqiter_bufs(
if (error)
break;
+ /*
+ * A corrupt buffer might not have a verifier attached, so
+ * make sure we have the correct one attached before writeback
+ * occurs.
+ */
+ bp->b_ops = &xfs_dquot_buf_ops;
xfs_qm_reset_dqcounts(mp, bp, firstid, type);
xfs_buf_delwri_queue(bp, buffer_list);
xfs_buf_relse(bp);
@@ -1018,7 +1024,7 @@ xfs_qm_dqiterate(
xfs_buf_readahead(mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, rablkno),
mp->m_quotainfo->qi_dqchunklen,
- NULL);
+ &xfs_dquot_buf_ops);
rablkno++;
}
}