diff options
Diffstat (limited to 'fs')
118 files changed, 1562 insertions, 981 deletions
@@ -423,10 +423,12 @@ static void kill_ioctx_rcu(struct rcu_head *head) * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct kioctx *ctx) +static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx) { if (!atomic_xchg(&ctx->dead, 1)) { + spin_lock(&mm->ioctx_lock); hlist_del_rcu(&ctx->list); + spin_unlock(&mm->ioctx_lock); /* * It'd be more correct to do this in free_ioctx(), after all @@ -494,7 +496,7 @@ void exit_aio(struct mm_struct *mm) */ ctx->mmap_size = 0; - kill_ioctx(ctx); + kill_ioctx(mm, ctx); } } @@ -852,7 +854,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) if (!IS_ERR(ioctx)) { ret = put_user(ioctx->user_id, ctxp); if (ret) - kill_ioctx(ioctx); + kill_ioctx(current->mm, ioctx); put_ioctx(ioctx); } @@ -870,7 +872,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) { struct kioctx *ioctx = lookup_ioctx(ctx); if (likely(NULL != ioctx)) { - kill_ioctx(ioctx); + kill_ioctx(current->mm, ioctx); put_ioctx(ioctx); return 0; } diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index f8a0b0efda44..3aac8e9edac3 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1415,7 +1415,7 @@ static void fill_siginfo_note(struct memelfnote *note, user_siginfo_t *csigdata, * long file_ofs * followed by COUNT filenames in ASCII: "FILE1" NUL "FILE2" NUL... */ -static void fill_files_note(struct memelfnote *note) +static int fill_files_note(struct memelfnote *note) { struct vm_area_struct *vma; unsigned count, size, names_ofs, remaining, n; @@ -1430,11 +1430,11 @@ static void fill_files_note(struct memelfnote *note) names_ofs = (2 + 3 * count) * sizeof(data[0]); alloc: if (size >= MAX_FILE_NOTE_SIZE) /* paranoia check */ - goto err; + return -EINVAL; size = round_up(size, PAGE_SIZE); data = vmalloc(size); if (!data) - goto err; + return -ENOMEM; start_end_ofs = data + 2; name_base = name_curpos = ((char *)data) + names_ofs; @@ -1487,7 +1487,7 @@ static void fill_files_note(struct memelfnote *note) size = name_curpos - (char *)data; fill_note(note, "CORE", NT_FILE, size, data); - err: ; + return 0; } #ifdef CORE_DUMP_USE_REGSET @@ -1688,8 +1688,8 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_auxv_note(&info->auxv, current->mm); info->size += notesize(&info->auxv); - fill_files_note(&info->files); - info->size += notesize(&info->files); + if (fill_files_note(&info->files) == 0) + info->size += notesize(&info->files); return 1; } @@ -1721,7 +1721,8 @@ static int write_note_info(struct elf_note_info *info, return 0; if (first && !writenote(&info->auxv, file, foffset)) return 0; - if (first && !writenote(&info->files, file, foffset)) + if (first && info->files.data && + !writenote(&info->files, file, foffset)) return 0; for (i = 1; i < info->thread_notes; ++i) @@ -1808,6 +1809,7 @@ static int elf_dump_thread_status(long signr, struct elf_thread_status *t) struct elf_note_info { struct memelfnote *notes; + struct memelfnote *notes_files; struct elf_prstatus *prstatus; /* NT_PRSTATUS */ struct elf_prpsinfo *psinfo; /* NT_PRPSINFO */ struct list_head thread_list; @@ -1898,9 +1900,12 @@ static int fill_note_info(struct elfhdr *elf, int phdrs, fill_siginfo_note(info->notes + 2, &info->csigdata, siginfo); fill_auxv_note(info->notes + 3, current->mm); - fill_files_note(info->notes + 4); + info->numnote = 4; - info->numnote = 5; + if (fill_files_note(info->notes + info->numnote) == 0) { + info->notes_files = info->notes + info->numnote; + info->numnote++; + } /* Try to dump the FPU. */ info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs, @@ -1962,8 +1967,9 @@ static void free_note_info(struct elf_note_info *info) kfree(list_entry(tmp, struct elf_thread_status, list)); } - /* Free data allocated by fill_files_note(): */ - vfree(info->notes[4].data); + /* Free data possibly allocated by fill_files_note(): */ + if (info->notes_files) + vfree(info->notes_files->data); kfree(info->prstatus); kfree(info->psinfo); @@ -2046,7 +2052,7 @@ static int elf_core_dump(struct coredump_params *cprm) struct vm_area_struct *vma, *gate_vma; struct elfhdr *elf = NULL; loff_t offset = 0, dataoff, foffset; - struct elf_note_info info; + struct elf_note_info info = { }; struct elf_phdr *phdr4note = NULL; struct elf_shdr *shdr4extnum = NULL; Elf_Half e_phnum; diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index 8fb42916d8a2..8dccf73025b3 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -114,6 +114,14 @@ void bio_integrity_free(struct bio *bio) } EXPORT_SYMBOL(bio_integrity_free); +static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) +{ + if (bip->bip_slab == BIO_POOL_NONE) + return BIP_INLINE_VECS; + + return bvec_nr_vecs(bip->bip_slab); +} + /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update @@ -129,7 +137,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bvec_nr_vecs(bip->bip_slab)) { + if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } @@ -734,7 +742,7 @@ void bioset_integrity_free(struct bio_set *bs) mempool_destroy(bs->bio_integrity_pool); if (bs->bvec_integrity_pool) - mempool_destroy(bs->bio_integrity_pool); + mempool_destroy(bs->bvec_integrity_pool); } EXPORT_SYMBOL(bioset_integrity_free); @@ -917,8 +917,8 @@ void bio_copy_data(struct bio *dst, struct bio *src) src_p = kmap_atomic(src_bv->bv_page); dst_p = kmap_atomic(dst_bv->bv_page); - memcpy(dst_p + dst_bv->bv_offset, - src_p + src_bv->bv_offset, + memcpy(dst_p + dst_offset, + src_p + src_offset, bytes); kunmap_atomic(dst_p); @@ -1045,12 +1045,22 @@ static int __bio_copy_iov(struct bio *bio, struct bio_vec *iovecs, int bio_uncopy_user(struct bio *bio) { struct bio_map_data *bmd = bio->bi_private; - int ret = 0; + struct bio_vec *bvec; + int ret = 0, i; - if (!bio_flagged(bio, BIO_NULL_MAPPED)) - ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, - bmd->nr_sgvecs, bio_data_dir(bio) == READ, - 0, bmd->is_our_pages); + if (!bio_flagged(bio, BIO_NULL_MAPPED)) { + /* + * if we're in a workqueue, the request is orphaned, so + * don't copy into a random user address space, just free. + */ + if (current->mm) + ret = __bio_copy_iov(bio, bmd->iovecs, bmd->sgvecs, + bmd->nr_sgvecs, bio_data_dir(bio) == READ, + 0, bmd->is_our_pages); + else if (bmd->is_our_pages) + bio_for_each_segment_all(bvec, bio, i) + __free_page(bvec->bv_page); + } bio_free_map_data(bmd); bio_put(bio); return ret; diff --git a/fs/block_dev.c b/fs/block_dev.c index 2091db8cdd78..85f5c85ec91c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -58,17 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { struct backing_dev_info *old = inode->i_data.backing_dev_info; + bool wakeup_bdi = false; if (unlikely(dst == old)) /* deadlock avoidance */ return; bdi_lock_two(&old->wb, &dst->wb); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; - if (inode->i_state & I_DIRTY) + if (inode->i_state & I_DIRTY) { + if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb)) + wakeup_bdi = true; list_move(&inode->i_wb_list, &dst->wb.b_dirty); + } spin_unlock(&inode->i_lock); spin_unlock(&old->wb.list_lock); spin_unlock(&dst->wb.list_lock); + + if (wakeup_bdi) + bdi_wakeup_thread_delayed(dst); } /* Kill _all_ buffers and pagecache , dirty or not.. */ diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index e15d2b0d8d3b..0890c83643e9 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -229,7 +229,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, if (ret > 0) { /* we need an acl */ ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); - } else { + } else if (ret < 0) { cache_no_acl(inode); } } else { diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 02fae7f7e42c..7fb054ba1b60 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1089,7 +1089,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); btrfs_mark_buffer_dirty(parent); - tree_mod_log_free_eb(root->fs_info, buf); + if (last_ref) + tree_mod_log_free_eb(root->fs_info, buf); btrfs_free_tree_block(trans, root, buf, parent_start, last_ref); } @@ -1161,8 +1162,8 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info, * time_seq). */ static void -__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, - struct tree_mod_elem *first_tm) +__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, + u64 time_seq, struct tree_mod_elem *first_tm) { u32 n; struct rb_node *next; @@ -1172,6 +1173,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, unsigned long p_size = sizeof(struct btrfs_key_ptr); n = btrfs_header_nritems(eb); + tree_mod_log_read_lock(fs_info); while (tm && tm->seq >= time_seq) { /* * all the operations are recorded with the operator used for @@ -1226,6 +1228,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq, if (tm->index != first_tm->index) break; } + tree_mod_log_read_unlock(fs_info); btrfs_set_header_nritems(eb, n); } @@ -1274,7 +1277,7 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, extent_buffer_get(eb_rewin); btrfs_tree_read_lock(eb_rewin); - __tree_mod_log_rewind(eb_rewin, time_seq, tm); + __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm); WARN_ON(btrfs_header_nritems(eb_rewin) > BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root)); @@ -1350,7 +1353,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) btrfs_set_header_generation(eb, old_generation); } if (tm) - __tree_mod_log_rewind(eb, time_seq, tm); + __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm); else WARN_ON(btrfs_header_level(eb) != 0); WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root)); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index df472ab1b5ac..bbafa05519da 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2402,6 +2402,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, default: WARN_ON(1); } + } else { + list_del_init(&locked_ref->cluster); } spin_unlock(&delayed_refs->lock); @@ -2424,7 +2426,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * list before we release it. */ if (btrfs_delayed_ref_is_head(ref)) { - list_del_init(&locked_ref->cluster); btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; } @@ -7298,6 +7299,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int err = 0; int ret; int level; + bool root_dropped = false; path = btrfs_alloc_path(); if (!path) { @@ -7355,6 +7357,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, while (1) { btrfs_tree_lock(path->nodes[level]); btrfs_set_lock_blocking(path->nodes[level]); + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, path->nodes[level]->start, @@ -7370,6 +7373,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, break; btrfs_tree_unlock(path->nodes[level]); + path->locks[level] = 0; WARN_ON(wc->refs[level] != 1); level--; } @@ -7471,13 +7475,23 @@ int btrfs_drop_snapshot(struct btrfs_root *root, free_extent_buffer(root->commit_root); kfree(root); } + root_dropped = true; out_end_trans: btrfs_end_transaction_throttle(trans, tree_root); out_free: kfree(wc); btrfs_free_path(path); out: - if (err) + /* + * So if we need to stop dropping the snapshot for whatever reason we + * need to make sure to add it back to the dead root list so that we + * keep trying to do the work later. This also cleans up roots if we + * don't have it in the radix (like when we recover after a power fail + * or unmount) so we don't leak memory. + */ + if (root_dropped == false) + btrfs_add_dead_root(root); + if (err && err != -EAGAIN) btrfs_std_error(root->fs_info, err); return err; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17f3064b4a3e..25e6a8e1014e 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2419,10 +2419,23 @@ out_unlock: return ret; } +static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) +{ + struct old_sa_defrag_extent *old, *tmp; + + if (!new) + return; + + list_for_each_entry_safe(old, tmp, &new->head, list) { + list_del(&old->list); + kfree(old); + } + kfree(new); +} + static void relink_file_extents(struct new_sa_defrag_extent *new) { struct btrfs_path *path; - struct old_sa_defrag_extent *old, *tmp; struct sa_defrag_extent_backref *backref; struct sa_defrag_extent_backref *prev = NULL; struct inode *inode; @@ -2465,16 +2478,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new) kfree(prev); btrfs_free_path(path); - - list_for_each_entry_safe(old, tmp, &new->head, list) { - list_del(&old->list); - kfree(old); - } out: + free_sa_defrag_extent(new); + atomic_dec(&root->fs_info->defrag_running); wake_up(&root->fs_info->transaction_wait); - - kfree(new); } static struct new_sa_defrag_extent * @@ -2484,7 +2492,7 @@ record_old_file_extents(struct inode *inode, struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_path *path; struct btrfs_key key; - struct old_sa_defrag_extent *old, *tmp; + struct old_sa_defrag_extent *old; struct new_sa_defrag_extent *new; int ret; @@ -2532,7 +2540,7 @@ record_old_file_extents(struct inode *inode, if (slot >= btrfs_header_nritems(l)) { ret = btrfs_next_leaf(root, path); if (ret < 0) - goto out_free_list; + goto out_free_path; else if (ret > 0) break; continue; @@ -2561,7 +2569,7 @@ record_old_file_extents(struct inode *inode, old = kmalloc(sizeof(*old), GFP_NOFS); if (!old) - goto out_free_list; + goto out_free_path; offset = max(new->file_pos, key.offset); end = min(new->file_pos + new->len, key.offset + num_bytes); @@ -2583,15 +2591,10 @@ next: return new; -out_free_list: - list_for_each_entry_safe(old, tmp, &new->head, list) { - list_del(&old->list); - kfree(old); - } out_free_path: btrfs_free_path(path); out_kfree: - kfree(new); + free_sa_defrag_extent(new); return NULL; } @@ -2652,7 +2655,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) EXTENT_DEFRAG, 1, cached_state); if (ret) { u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); - if (last_snapshot >= BTRFS_I(inode)->generation) + if (0 && last_snapshot >= BTRFS_I(inode)->generation) /* the inode is shared */ new = record_old_file_extents(inode, ordered_extent); @@ -2743,8 +2746,14 @@ out: btrfs_remove_ordered_extent(inode, ordered_extent); /* for snapshot-aware defrag */ - if (new) - relink_file_extents(new); + if (new) { + if (ret) { + free_sa_defrag_extent(new); + atomic_dec(&root->fs_info->defrag_running); + } else { + relink_file_extents(new); + } + } /* once for us */ btrfs_put_ordered_extent(ordered_extent); @@ -8146,7 +8155,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, /* check for collisions, even if the name isn't there */ - ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, + ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino, new_dentry->d_name.name, new_dentry->d_name.len); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 0f81d67cdc8d..783906c687b5 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1528,6 +1528,12 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file, printk(KERN_INFO "btrfs: Snapshot src from " "another FS\n"); ret = -EINVAL; + } else if (!inode_owner_or_capable(src_inode)) { + /* + * Subvolume creation is not restricted, but snapshots + * are limited to own subvolumes only + */ + ret = -EPERM; } else { ret = btrfs_mksubvol(&file->f_path, name, namelen, BTRFS_I(src_inode)->root, @@ -2093,7 +2099,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); if (err == -EINTR) - goto out; + goto out_drop_write; dentry = lookup_one_len(vol_args->name, parent, namelen); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); @@ -2235,6 +2241,7 @@ out_dput: dput(dentry); out_unlock_dir: mutex_unlock(&dir->i_mutex); +out_drop_write: mnt_drop_write_file(file); out: kfree(vol_args); @@ -3299,6 +3306,9 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) switch (p->cmd) { case BTRFS_IOCTL_DEV_REPLACE_CMD_START: + if (root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + if (atomic_xchg( &root->fs_info->mutually_exclusive_operation_running, 1)) { diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 4febca4fc2de..b3896d5f233a 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -691,6 +691,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, int cowonly; int ret; int err = 0; + bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -914,6 +915,7 @@ again: cur->bytenr); lower = cur; + need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -957,14 +959,12 @@ again: /* * add the block to pending list if we - * need check its backrefs. only block - * at 'cur->level + 1' is added to the - * tail of pending list. this guarantees - * we check backrefs from lower level - * blocks to upper level blocks. + * need check its backrefs, we only do this once + * while walking up a tree as we will catch + * anything else later on. */ - if (!upper->checked && - level == cur->level + 1) { + if (!upper->checked && need_check) { + need_check = false; list_add_tail(&edge->list[UPPER], &list); } else diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 79bd479317cb..eb84c2db1aca 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -2501,7 +2501,7 @@ again: ret = scrub_extent(sctx, extent_logical, extent_len, extent_physical, extent_dev, flags, generation, extent_mirror_num, - extent_physical); + extent_logical - logical + physical); if (ret) goto out; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index ff40f1c00ce3..256a9a46d544 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -2524,7 +2524,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir) di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item); btrfs_dir_item_key_to_cpu(eb, di, &di_key); - if (di_key.objectid < sctx->send_progress) { + if (di_key.type != BTRFS_ROOT_ITEM_KEY && + di_key.objectid < sctx->send_progress) { ret = 1; goto out; } @@ -4579,6 +4580,41 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) send_root = BTRFS_I(file_inode(mnt_file))->root; fs_info = send_root->fs_info; + /* + * This is done when we lookup the root, it should already be complete + * by the time we get here. + */ + WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); + + /* + * If we just created this root we need to make sure that the orphan + * cleanup has been done and committed since we search the commit root, + * so check its commit root transid with our otransid and if they match + * commit the transaction to make sure everything is updated. + */ + down_read(&send_root->fs_info->extent_commit_sem); + if (btrfs_header_generation(send_root->commit_root) == + btrfs_root_otransid(&send_root->root_item)) { + struct btrfs_trans_handle *trans; + + up_read(&send_root->fs_info->extent_commit_sem); + + trans = btrfs_attach_transaction_barrier(send_root); + if (IS_ERR(trans)) { + if (PTR_ERR(trans) != -ENOENT) { + ret = PTR_ERR(trans); + goto out; + } + /* ENOENT means theres no transaction */ + } else { + ret = btrfs_commit_transaction(trans, send_root); + if (ret) + goto out; + } + } else { + up_read(&send_root->fs_info->extent_commit_sem); + } + arg = memdup_user(arg_, sizeof(*arg)); if (IS_ERR(arg)) { ret = PTR_ERR(arg); @@ -4587,8 +4623,8 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_) } if (!access_ok(VERIFY_READ, arg->clone_sources, - sizeof(*arg->clone_sources * - arg->clone_sources_count))) { + sizeof(*arg->clone_sources) * + arg->clone_sources_count)) { ret = -EFAULT; goto out; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c276ac9a0ec3..bca436330681 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3314,7 +3314,7 @@ static int log_one_extent(struct btrfs_trans_handle *trans, btrfs_set_token_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG, &token); - if (em->block_start == 0) + if (em->block_start == EXTENT_MAP_HOLE) skip_csum = true; } @@ -3728,8 +3728,9 @@ next_slot: } log_extents: + btrfs_release_path(path); + btrfs_release_path(dst_path); if (fast_search) { - btrfs_release_path(dst_path); ret = btrfs_log_changed_extents(trans, root, inode, dst_path); if (ret) { err = ret; @@ -3746,8 +3747,6 @@ log_extents: } if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { - btrfs_release_path(path); - btrfs_release_path(dst_path); ret = log_directory_changes(trans, root, inode, path, dst_path); if (ret) { err = ret; diff --git a/fs/btrfs/ulist.c b/fs/btrfs/ulist.c index 7b417e20efe2..b0a523b2c60e 100644 --- a/fs/btrfs/ulist.c +++ b/fs/btrfs/ulist.c @@ -205,6 +205,10 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, u64 new_alloced = ulist->nodes_alloced + 128; struct ulist_node *new_nodes; void *old = NULL; + int i; + + for (i = 0; i < ulist->nnodes; i++) + rb_erase(&ulist->nodes[i].rb_node, &ulist->root); /* * if nodes_alloced == ULIST_SIZE no memory has been allocated @@ -224,6 +228,17 @@ int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux, ulist->nodes = new_nodes; ulist->nodes_alloced = new_alloced; + + /* + * krealloc actually uses memcpy, which does not copy rb_node + * pointers, so we have to do it ourselves. Otherwise we may + * be bitten by crashes. + */ + for (i = 0; i < ulist->nnodes; i++) { + ret = ulist_rbtree_insert(ulist, &ulist->nodes[i]); + if (ret < 0) + return ret; + } } ulist->nodes[ulist->nnodes].val = val; ulist->nodes[ulist->nnodes].aux = aux; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8bffb9174afb..b6c23c4abae2 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4248,6 +4248,7 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " "%Lu-%Lu\n", logical, logical+len, em->start, em->start + em->len); + free_extent_map(em); return 1; } @@ -4429,6 +4430,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " "found %Lu-%Lu\n", logical, em->start, em->start + em->len); + free_extent_map(em); return -EINVAL; } diff --git a/fs/buffer.c b/fs/buffer.c index d2a4d1bb2d57..75964d734444 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -620,14 +620,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); static void __set_page_dirty(struct page *page, struct address_space *mapping, int warn) { - spin_lock_irq(&mapping->tree_lock); + unsigned long flags; + + spin_lock_irqsave(&mapping->tree_lock, flags); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); account_page_dirtied(page, mapping); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } - spin_unlock_irq(&mapping->tree_lock); + spin_unlock_irqrestore(&mapping->tree_lock, flags); __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); } diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 3e68ac101040..5da06f020986 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -213,9 +213,13 @@ static int readpage_nounlock(struct file *filp, struct page *page) if (err < 0) { SetPageError(page); goto out; - } else if (err < PAGE_CACHE_SIZE) { + } else { + if (err < PAGE_CACHE_SIZE) { /* zero fill remainder of page */ - zero_user_segment(page, err, PAGE_CACHE_SIZE); + zero_user_segment(page, err, PAGE_CACHE_SIZE); + } else { + flush_dcache_page(page); + } } SetPageUptodate(page); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 656e16907430..5de16f5ac7e9 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -313,9 +313,9 @@ static int striped_read(struct inode *inode, { struct ceph_fs_client *fsc = ceph_inode_to_client(inode); struct ceph_inode_info *ci = ceph_inode(inode); - u64 pos, this_len; + u64 pos, this_len, left; int io_align, page_align; - int left, pages_left; + int pages_left; int read; struct page **page_pos; int ret; @@ -346,47 +346,40 @@ more: ret = 0; hit_stripe = this_len < left; was_short = ret >= 0 && ret < this_len; - dout("striped_read %llu~%u (read %u) got %d%s%s\n", pos, left, read, + dout("striped_read %llu~%llu (read %u) got %d%s%s\n", pos, left, read, ret, hit_stripe ? " HITSTRIPE" : "", was_short ? " SHORT" : ""); - if (ret > 0) { - int didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; - - if (read < pos - off) { - dout(" zero gap %llu to %llu\n", off + read, pos); - ceph_zero_page_vector_range(page_align + read, - pos - off - read, pages); + if (ret >= 0) { + int didpages; + if (was_short && (pos + ret < inode->i_size)) { + u64 tmp = min(this_len - ret, + inode->i_size - pos - ret); + dout(" zero gap %llu to %llu\n", + pos + ret, pos + ret + tmp); + ceph_zero_page_vector_range(page_align + read + ret, + tmp, pages); + ret += tmp; } + + didpages = (page_align + ret) >> PAGE_CACHE_SHIFT; pos += ret; read = pos - off; left -= ret; page_pos += didpages; pages_left -= didpages; - /* hit stripe? */ - if (left && hit_stripe) + /* hit stripe and need continue*/ + if (left && hit_stripe && pos < inode->i_size) goto more; } - if (was_short) { + if (read > 0) { + ret = read; /* did we bounce off eof? */ if (pos + left > inode->i_size) *checkeof = 1; - - /* zero trailing bytes (inside i_size) */ - if (left > 0 && pos < inode->i_size) { - if (pos + left > inode->i_size) - left = inode->i_size - pos; - - dout("zero tail %d\n", left); - ceph_zero_page_vector_range(page_align + read, left, - pages); - read += left; - } } - if (ret >= 0) - ret = read; dout("striped_read returns %d\n", ret); return ret; } @@ -618,6 +611,8 @@ out: if (check_caps) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL); + } else if (ret != -EOLDSNAPC && written > 0) { + ret = written; } return ret; } diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index e0b4ef31d3c8..669622fd1ae3 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -196,8 +196,10 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) r = ceph_calc_file_object_mapping(&ci->i_layout, dl.file_offset, len, &dl.object_no, &dl.object_offset, &olen); - if (r < 0) + if (r < 0) { + up_read(&osdc->map_sem); return -EIO; + } dl.file_offset -= dl.object_offset; dl.object_size = ceph_file_layout_object_size(ci->i_layout); dl.block_size = ceph_file_layout_su(ci->i_layout); @@ -209,8 +211,12 @@ static long ceph_ioctl_get_dataloc(struct file *file, void __user *arg) snprintf(dl.object_name, sizeof(dl.object_name), "%llx.%08llx", ceph_ino(inode), dl.object_no); - ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap, - ceph_file_layout_pg_pool(ci->i_layout)); + r = ceph_calc_ceph_pg(&pgid, dl.object_name, osdc->osdmap, + ceph_file_layout_pg_pool(ci->i_layout)); + if (r < 0) { + up_read(&osdc->map_sem); + return r; + } dl.osd = ceph_calc_pg_primary(osdc->osdmap, pgid); if (dl.osd >= 0) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 4d2920304be8..d6a536886472 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -414,6 +414,9 @@ static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc, { struct ceph_mds_session *s; + if (mds >= mdsc->mdsmap->m_max_mds) + return ERR_PTR(-EINVAL); + s = kzalloc(sizeof(*s), GFP_NOFS); if (!s) return ERR_PTR(-ENOMEM); @@ -639,6 +642,8 @@ static void __unregister_request(struct ceph_mds_client *mdsc, req->r_unsafe_dir = NULL; } + complete_all(&req->r_safe_completion); + ceph_mdsc_put_request(req); } @@ -1840,8 +1845,11 @@ static int __do_request(struct ceph_mds_client *mdsc, int mds = -1; int err = -EAGAIN; - if (req->r_err || req->r_got_result) + if (req->r_err || req->r_got_result) { + if (req->r_aborted) + __unregister_request(mdsc, req); goto out; + } if (req->r_timeout && time_after_eq(jiffies, req->r_started + req->r_timeout)) { @@ -2151,7 +2159,6 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg) if (head->safe) { req->r_got_safe = true; __unregister_request(mdsc, req); - complete_all(&req->r_safe_completion); if (req->r_got_unsafe) { /* @@ -3040,8 +3047,10 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) fsc->mdsc = mdsc; mutex_init(&mdsc->mutex); mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS); - if (mdsc->mdsmap == NULL) + if (mdsc->mdsmap == NULL) { + kfree(mdsc); return -ENOMEM; + } init_completion(&mdsc->safe_umount_waiters); init_waitqueue_head(&mdsc->session_close_wq); diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c index 9278dec9e940..d4d38977dcbb 100644 --- a/fs/ceph/mdsmap.c +++ b/fs/ceph/mdsmap.c @@ -138,6 +138,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end) m->m_info[mds].export_targets = kcalloc(num_export_targets, sizeof(u32), GFP_NOFS); + if (m->m_info[mds].export_targets == NULL) + goto badmem; for (j = 0; j < num_export_targets; j++) m->m_info[mds].export_targets[j] = ceph_decode_32(&pexport_targets); @@ -170,7 +172,7 @@ bad: DUMP_PREFIX_OFFSET, 16, 1, start, end - start, true); ceph_mdsmap_destroy(m); - return ERR_PTR(-EINVAL); + return ERR_PTR(err); } void ceph_mdsmap_destroy(struct ceph_mdsmap *m) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 7d377c9a5e35..6627b26a800c 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -357,7 +357,7 @@ static int parse_mount_options(struct ceph_mount_options **pfsopt, } err = -EINVAL; dev_name_end--; /* back up to ':' separator */ - if (*dev_name_end != ':') { + if (dev_name_end < dev_name || *dev_name_end != ':') { pr_err("device name is missing path (no : separator in %s)\n", dev_name); goto out; diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index 9b6b2b6dd164..be661d8f532a 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -675,17 +675,18 @@ ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value, if (!ceph_is_valid_xattr(name)) return -ENODATA; - spin_lock(&ci->i_ceph_lock); - dout("getxattr %p ver=%lld index_ver=%lld\n", inode, - ci->i_xattrs.version, ci->i_xattrs.index_version); /* let's see if a virtual xattr was requested */ vxattr = ceph_match_vxattr(inode, name); if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) { err = vxattr->getxattr_cb(ci, value, size); - goto out; + return err; } + spin_lock(&ci->i_ceph_lock); + dout("getxattr %p ver=%lld index_ver=%lld\n", inode, + ci->i_xattrs.version, ci->i_xattrs.index_version); + if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) && (ci->i_xattrs.index_version >= ci->i_xattrs.version)) { goto get_xattr; diff --git a/fs/cifs/cifs_unicode.h b/fs/cifs/cifs_unicode.h index 4fb097468e21..fe8d6276410a 100644 --- a/fs/cifs/cifs_unicode.h +++ b/fs/cifs/cifs_unicode.h @@ -327,14 +327,14 @@ UniToupper(register wchar_t uc) /* * UniStrupr: Upper case a unicode string */ -static inline wchar_t * -UniStrupr(register wchar_t *upin) +static inline __le16 * +UniStrupr(register __le16 *upin) { - register wchar_t *up; + register __le16 *up; up = upin; while (*up) { /* For all characters */ - *up = UniToupper(*up); + *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); up++; } return upin; /* Return input pointer */ diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 51f5e0ee7237..494b68349667 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1027,15 +1027,30 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, __u32 secdesclen = 0; struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */ struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct tcon_link *tlink = cifs_sb_tlink(cifs_sb); + struct cifs_tcon *tcon; + + if (IS_ERR(tlink)) + return PTR_ERR(tlink); + tcon = tlink_tcon(tlink); cifs_dbg(NOISY, "set ACL from mode for %s\n", path); /* Get the security descriptor */ - pntsd = get_cifs_acl(CIFS_SB(inode->i_sb), inode, path, &secdesclen); + + if (tcon->ses->server->ops->get_acl == NULL) { + cifs_put_tlink(tlink); + return -EOPNOTSUPP; + } + + pntsd = tcon->ses->server->ops->get_acl(cifs_sb, inode, path, + &secdesclen); if (IS_ERR(pntsd)) { rc = PTR_ERR(pntsd); cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc); - goto out; + cifs_put_tlink(tlink); + return rc; } /* @@ -1048,6 +1063,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, pnntsd = kmalloc(secdesclen, GFP_KERNEL); if (!pnntsd) { kfree(pntsd); + cifs_put_tlink(tlink); return -ENOMEM; } @@ -1056,14 +1072,18 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode, cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc); + if (tcon->ses->server->ops->set_acl == NULL) + rc = -EOPNOTSUPP; + if (!rc) { /* Set the security descriptor */ - rc = set_cifs_acl(pnntsd, secdesclen, inode, path, aclflag); + rc = tcon->ses->server->ops->set_acl(pnntsd, secdesclen, inode, + path, aclflag); cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc); } + cifs_put_tlink(tlink); kfree(pnntsd); kfree(pntsd); -out: return rc; } diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 71436d1fca13..5c807b23ca67 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -389,7 +389,7 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) if (blobptr + attrsize > blobend) break; if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { - if (!attrsize) + if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN) break; if (!ses->domainName) { ses->domainName = @@ -414,7 +414,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, int rc = 0; int len; char nt_hash[CIFS_NTHASH_SIZE]; - wchar_t *user; + __le16 *user; wchar_t *domain; wchar_t *server; @@ -439,7 +439,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, return rc; } - /* convert ses->user_name to unicode and uppercase */ + /* convert ses->user_name to unicode */ len = ses->user_name ? strlen(ses->user_name) : 0; user = kmalloc(2 + (len * 2), GFP_KERNEL); if (user == NULL) { @@ -448,7 +448,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, } if (len) { - len = cifs_strtoUTF16((__le16 *)user, ses->user_name, len, nls_cp); + len = cifs_strtoUTF16(user, ses->user_name, len, nls_cp); UniStrupr(user); } else { memset(user, '\0', 2); diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 4f07f6fbe494..e2c2d96491fa 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -44,6 +44,7 @@ #define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1) #define MAX_SERVER_SIZE 15 #define MAX_SHARE_SIZE 80 +#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */ #define MAX_USERNAME_SIZE 256 /* reasonable maximum for current servers */ #define MAX_PASSWORD_SIZE 512 /* max for windows seems to be 256 wide chars */ @@ -369,6 +370,16 @@ struct smb_version_operations { void (*new_lease_key)(struct cifs_fid *fid); int (*calc_signature)(struct smb_rqst *rqst, struct TCP_Server_Info *server); + ssize_t (*query_all_EAs)(const unsigned int, struct cifs_tcon *, + const unsigned char *, const unsigned char *, char *, + size_t, const struct nls_table *, int); + int (*set_EA)(const unsigned int, struct cifs_tcon *, const char *, + const char *, const void *, const __u16, + const struct nls_table *, int); + struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *, + const char *, u32 *); + int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *, + int); }; struct smb_version_values { diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index a58dc77cc443..d17c5d72cd29 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -3306,11 +3306,13 @@ static __u16 ACL_to_cifs_posix(char *parm_data, const char *pACL, return 0; } cifs_acl->version = cpu_to_le16(1); - if (acl_type == ACL_TYPE_ACCESS) + if (acl_type == ACL_TYPE_ACCESS) { cifs_acl->access_entry_count = cpu_to_le16(count); - else if (acl_type == ACL_TYPE_DEFAULT) + cifs_acl->default_entry_count = __constant_cpu_to_le16(0xFFFF); + } else if (acl_type == ACL_TYPE_DEFAULT) { cifs_acl->default_entry_count = cpu_to_le16(count); - else { + cifs_acl->access_entry_count = __constant_cpu_to_le16(0xFFFF); + } else { cifs_dbg(FYI, "unknown ACL type %d\n", acl_type); return 0; } diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index e3bc39bb9d12..d05a30072023 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -377,6 +377,7 @@ cifs_reconnect(struct TCP_Server_Info *server) try_to_freeze(); /* we should try only the port we connected to before */ + mutex_lock(&server->srv_mutex); rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); @@ -388,6 +389,7 @@ cifs_reconnect(struct TCP_Server_Info *server) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); } + mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; @@ -1662,7 +1664,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (strnlen(string, 256) == 256) { + if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN) + == CIFS_MAX_DOMAINNAME_LEN) { printk(KERN_WARNING "CIFS: domain name too" " long\n"); goto cifs_parse_mount_err; @@ -2288,8 +2291,8 @@ cifs_put_smb_ses(struct cifs_ses *ses) #ifdef CONFIG_KEYS -/* strlen("cifs:a:") + INET6_ADDRSTRLEN + 1 */ -#define CIFSCREDS_DESC_SIZE (7 + INET6_ADDRSTRLEN + 1) +/* strlen("cifs:a:") + CIFS_MAX_DOMAINNAME_LEN + 1 */ +#define CIFSCREDS_DESC_SIZE (7 + CIFS_MAX_DOMAINNAME_LEN + 1) /* Populate username and pw fields from keyring if possible */ static int diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 5699b5036ed8..0c2425b21974 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -491,6 +491,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (server->ops->close) server->ops->close(xid, tcon, &fid); cifs_del_pending_open(&open); + fput(file); rc = -ENOMEM; } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 48b29d24c9f4..c2934f8701da 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -553,11 +553,10 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - /* we are going to update can_cache_brlcks here - need a write access */ - down_write(&cinode->lock_sem); + down_read(&cinode->lock_sem); if (cinode->can_cache_brlcks) { - /* can cache locks - no need to push them */ - up_write(&cinode->lock_sem); + /* can cache locks - no need to relock */ + up_read(&cinode->lock_sem); return rc; } @@ -568,7 +567,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) else rc = tcon->ses->server->ops->push_mand_locks(cfile); - up_write(&cinode->lock_sem); + up_read(&cinode->lock_sem); return rc; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 20efd81266c6..9d463501348f 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -490,10 +490,15 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path, return PTR_ERR(tlink); tcon = tlink_tcon(tlink); - rc = CIFSSMBQAllEAs(xid, tcon, path, "SETFILEBITS", - ea_value, 4 /* size of buf */, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + if (tcon->ses->server->ops->query_all_EAs == NULL) { + cifs_put_tlink(tlink); + return -EOPNOTSUPP; + } + + rc = tcon->ses->server->ops->query_all_EAs(xid, tcon, path, + "SETFILEBITS", ea_value, 4 /* size of buf */, + cifs_sb->local_nls, + cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); cifs_put_tlink(tlink); if (rc < 0) return (int)rc; @@ -558,6 +563,11 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info, fattr->cf_mode &= ~(S_IWUGO); fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks); + if (fattr->cf_nlink < 1) { + cifs_dbg(1, "replacing bogus file nlink value %u\n", + fattr->cf_nlink); + fattr->cf_nlink = 1; + } } fattr->cf_uid = cifs_sb->mnt_uid; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 770d5a9781c1..036279c064ff 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -111,6 +111,14 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, return; } + /* + * If we know that the inode will need to be revalidated immediately, + * then don't create a new dentry for it. We'll end up doing an on + * the wire call either way and this spares us an invalidation. + */ + if (fattr->cf_flags & CIFS_FATTR_NEED_REVAL) + return; + dentry = d_alloc(parent, name); if (!dentry) return; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index f230571a7ab3..8edc9eb1ef7b 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -198,7 +198,7 @@ static void unicode_domain_string(char **pbcc_area, struct cifs_ses *ses, bytes_ret = 0; } else bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->domainName, - 256, nls_cp); + CIFS_MAX_DOMAINNAME_LEN, nls_cp); bcc_ptr += 2 * bytes_ret; bcc_ptr += 2; /* account for null terminator */ @@ -256,8 +256,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses, /* copy domain */ if (ses->domainName != NULL) { - strncpy(bcc_ptr, ses->domainName, 256); - bcc_ptr += strnlen(ses->domainName, 256); + strncpy(bcc_ptr, ses->domainName, CIFS_MAX_DOMAINNAME_LEN); + bcc_ptr += strnlen(ses->domainName, CIFS_MAX_DOMAINNAME_LEN); } /* else we will send a null domain name so the server will default to its own domain */ *bcc_ptr = 0; diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 3efdb9d5c0b8..4885a40f3210 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -948,6 +948,14 @@ struct smb_version_operations smb1_operations = { .mand_lock = cifs_mand_lock, .mand_unlock_range = cifs_unlock_range, .push_mand_locks = cifs_push_mandatory_locks, +#ifdef CONFIG_CIFS_XATTR + .query_all_EAs = CIFSSMBQAllEAs, + .set_EA = CIFSSMBSetEA, +#endif /* CIFS_XATTR */ +#ifdef CONFIG_CIFS_ACL + .get_acl = get_cifs_acl, + .set_acl = set_cifs_acl, +#endif /* CIFS_ACL */ }; struct smb_version_values smb1_values = { diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 10383d8c015b..4f791e0e98d7 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -413,96 +413,108 @@ cifs_ses_oplock_break(struct work_struct *work) } static bool -smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server) +smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp, + struct smb2_lease_break_work *lw) { - struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; - struct list_head *tmp, *tmp1, *tmp2; - struct cifs_ses *ses; - struct cifs_tcon *tcon; - struct cifsInodeInfo *cinode; + bool found; + __u8 lease_state; + struct list_head *tmp; struct cifsFileInfo *cfile; struct cifs_pending_open *open; - struct smb2_lease_break_work *lw; - bool found; + struct cifsInodeInfo *cinode; int ack_req = le32_to_cpu(rsp->Flags & SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED); - lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); - if (!lw) - return false; + lease_state = smb2_map_lease_to_oplock(rsp->NewLeaseState); - INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); - lw->lease_state = rsp->NewLeaseState; + list_for_each(tmp, &tcon->openFileList) { + cfile = list_entry(tmp, struct cifsFileInfo, tlist); + cinode = CIFS_I(cfile->dentry->d_inode); - cifs_dbg(FYI, "Checking for lease break\n"); + if (memcmp(cinode->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; - /* look up tcon based on tid & uid */ - spin_lock(&cifs_tcp_ses_lock); - list_for_each(tmp, &server->smb_ses_list) { - ses = list_entry(tmp, struct cifs_ses, smb_ses_list); + cifs_dbg(FYI, "found in the open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); - spin_lock(&cifs_file_list_lock); - list_for_each(tmp1, &ses->tcon_list) { - tcon = list_entry(tmp1, struct cifs_tcon, tcon_list); + smb2_set_oplock_level(cinode, lease_state); - cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks); - list_for_each(tmp2, &tcon->openFileList) { - cfile = list_entry(tmp2, struct cifsFileInfo, - tlist); - cinode = CIFS_I(cfile->dentry->d_inode); + if (ack_req) + cfile->oplock_break_cancelled = false; + else + cfile->oplock_break_cancelled = true; - if (memcmp(cinode->lease_key, rsp->LeaseKey, - SMB2_LEASE_KEY_SIZE)) - continue; + queue_work(cifsiod_wq, &cfile->oplock_break); + kfree(lw); + return true; + } - cifs_dbg(FYI, "found in the open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", - le32_to_cpu(rsp->NewLeaseState)); + found = false; + list_for_each_entry(open, &tcon->pending_opens, olist) { + if (memcmp(open->lease_key, rsp->LeaseKey, + SMB2_LEASE_KEY_SIZE)) + continue; + + if (!found && ack_req) { + found = true; + memcpy(lw->lease_key, open->lease_key, + SMB2_LEASE_KEY_SIZE); + lw->tlink = cifs_get_tlink(open->tlink); + queue_work(cifsiod_wq, &lw->lease_break); + } - smb2_set_oplock_level(cinode, - smb2_map_lease_to_oplock(rsp->NewLeaseState)); + cifs_dbg(FYI, "found in the pending open list\n"); + cifs_dbg(FYI, "lease key match, lease break 0x%d\n", + le32_to_cpu(rsp->NewLeaseState)); - if (ack_req) - cfile->oplock_break_cancelled = false; - else - cfile->oplock_break_cancelled = true; + open->oplock = lease_state; + } + return found; +} - queue_work(cifsiod_wq, &cfile->oplock_break); +static bool +smb2_is_valid_lease_break(char *buffer) +{ + struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer; + struct list_head *tmp, *tmp1, *tmp2; + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + struct smb2_lease_break_work *lw; - spin_unlock(&cifs_file_list_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; - } + lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL); + if (!lw) + return false; - found = false; - list_for_each_entry(open, &tcon->pending_opens, olist) { - if (memcmp(open->lease_key, rsp->LeaseKey, - SMB2_LEASE_KEY_SIZE)) - continue; + INIT_WORK(&lw->lease_break, cifs_ses_oplock_break); + lw->lease_state = rsp->NewLeaseState; - if (!found && ack_req) { - found = true; - memcpy(lw->lease_key, open->lease_key, - SMB2_LEASE_KEY_SIZE); - lw->tlink = cifs_get_tlink(open->tlink); - queue_work(cifsiod_wq, - &lw->lease_break); - } + cifs_dbg(FYI, "Checking for lease break\n"); + + /* look up tcon based on tid & uid */ + spin_lock(&cifs_tcp_ses_lock); + list_for_each(tmp, &cifs_tcp_ses_list) { + server = list_entry(tmp, struct TCP_Server_Info, tcp_ses_list); - cifs_dbg(FYI, "found in the pending open list\n"); - cifs_dbg(FYI, "lease key match, lease break 0x%d\n", - le32_to_cpu(rsp->NewLeaseState)); + list_for_each(tmp1, &server->smb_ses_list) { + ses = list_entry(tmp1, struct cifs_ses, smb_ses_list); - open->oplock = - smb2_map_lease_to_oplock(rsp->NewLeaseState); - } - if (found) { - spin_unlock(&cifs_file_list_lock); - spin_unlock(&cifs_tcp_ses_lock); - return true; + spin_lock(&cifs_file_list_lock); + list_for_each(tmp2, &ses->tcon_list) { + tcon = list_entry(tmp2, struct cifs_tcon, + tcon_list); + cifs_stats_inc( + &tcon->stats.cifs_stats.num_oplock_brks); + if (smb2_tcon_has_lease(tcon, rsp, lw)) { + spin_unlock(&cifs_file_list_lock); + spin_unlock(&cifs_tcp_ses_lock); + return true; + } } + spin_unlock(&cifs_file_list_lock); } - spin_unlock(&cifs_file_list_lock); } spin_unlock(&cifs_tcp_ses_lock); kfree(lw); @@ -528,7 +540,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) if (rsp->StructureSize != smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) { if (le16_to_cpu(rsp->StructureSize) == 44) - return smb2_is_valid_lease_break(buffer, server); + return smb2_is_valid_lease_break(buffer); else return false; } diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c index 09afda4cc58e..5ac836a86b18 100644 --- a/fs/cifs/xattr.c +++ b/fs/cifs/xattr.c @@ -82,9 +82,11 @@ int cifs_removexattr(struct dentry *direntry, const char *ea_name) goto remove_ea_exit; ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ - rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, NULL, - (__u16)0, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->set_EA) + rc = pTcon->ses->server->ops->set_EA(xid, pTcon, + full_path, ea_name, NULL, (__u16)0, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } remove_ea_exit: kfree(full_path); @@ -149,18 +151,22 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, cifs_dbg(FYI, "attempt to set cifs inode metadata\n"); ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ - rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, - (__u16)value_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->set_EA) + rc = pTcon->ses->server->ops->set_EA(xid, pTcon, + full_path, ea_name, ea_value, (__u16)value_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto set_ea_exit; ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */ - rc = CIFSSMBSetEA(xid, pTcon, full_path, ea_name, ea_value, - (__u16)value_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->set_EA) + rc = pTcon->ses->server->ops->set_EA(xid, pTcon, + full_path, ea_name, ea_value, (__u16)value_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL, strlen(CIFS_XATTR_CIFS_ACL)) == 0) { #ifdef CONFIG_CIFS_ACL @@ -170,8 +176,12 @@ int cifs_setxattr(struct dentry *direntry, const char *ea_name, rc = -ENOMEM; } else { memcpy(pacl, ea_value, value_size); - rc = set_cifs_acl(pacl, value_size, - direntry->d_inode, full_path, CIFS_ACL_DACL); + if (pTcon->ses->server->ops->set_acl) + rc = pTcon->ses->server->ops->set_acl(pacl, + value_size, direntry->d_inode, + full_path, CIFS_ACL_DACL); + else + rc = -EOPNOTSUPP; if (rc == 0) /* force revalidate of the inode */ CIFS_I(direntry->d_inode)->time = 0; kfree(pacl); @@ -272,17 +282,21 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, /* revalidate/getattr then populate from inode */ } /* BB add else when above is implemented */ ea_name += XATTR_USER_PREFIX_LEN; /* skip past user. prefix */ - rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, - buf_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->query_all_EAs) + rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, + full_path, ea_name, ea_value, buf_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) { if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR) goto get_ea_exit; ea_name += XATTR_OS2_PREFIX_LEN; /* skip past os2. prefix */ - rc = CIFSSMBQAllEAs(xid, pTcon, full_path, ea_name, ea_value, - buf_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->query_all_EAs) + rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, + full_path, ea_name, ea_value, buf_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); } else if (strncmp(ea_name, POSIX_ACL_XATTR_ACCESS, strlen(POSIX_ACL_XATTR_ACCESS)) == 0) { #ifdef CONFIG_CIFS_POSIX @@ -313,8 +327,11 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name, u32 acllen; struct cifs_ntsd *pacl; - pacl = get_cifs_acl(cifs_sb, direntry->d_inode, - full_path, &acllen); + if (pTcon->ses->server->ops->get_acl == NULL) + goto get_ea_exit; /* rc already EOPNOTSUPP */ + + pacl = pTcon->ses->server->ops->get_acl(cifs_sb, + direntry->d_inode, full_path, &acllen); if (IS_ERR(pacl)) { rc = PTR_ERR(pacl); cifs_dbg(VFS, "%s: error %zd getting sec desc\n", @@ -400,11 +417,12 @@ ssize_t cifs_listxattr(struct dentry *direntry, char *data, size_t buf_size) /* if proc/fs/cifs/streamstoxattr is set then search server for EAs or streams to returns as xattrs */ - rc = CIFSSMBQAllEAs(xid, pTcon, full_path, NULL, data, - buf_size, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + if (pTcon->ses->server->ops->query_all_EAs) + rc = pTcon->ses->server->ops->query_all_EAs(xid, pTcon, + full_path, NULL, data, buf_size, + cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); list_ea_exit: kfree(full_path); free_xid(xid); diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index 7aabc6ad4e9b..fa38d076697d 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -56,10 +56,19 @@ static void configfs_d_iput(struct dentry * dentry, struct configfs_dirent *sd = dentry->d_fsdata; if (sd) { - BUG_ON(sd->s_dentry != dentry); /* Coordinate with configfs_readdir */ spin_lock(&configfs_dirent_lock); - sd->s_dentry = NULL; + /* Coordinate with configfs_attach_attr where will increase + * sd->s_count and update sd->s_dentry to new allocated one. + * Only set sd->dentry to null when this dentry is the only + * sd owner. + * If not do so, configfs_d_iput may run just after + * configfs_attach_attr and set sd->s_dentry to null + * even it's still in use. + */ + if (atomic_read(&sd->s_count) <= 2) + sd->s_dentry = NULL; + spin_unlock(&configfs_dirent_lock); configfs_put(sd); } @@ -426,8 +435,11 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den struct configfs_attribute * attr = sd->s_element; int error; + spin_lock(&configfs_dirent_lock); dentry->d_fsdata = configfs_get(sd); sd->s_dentry = dentry; + spin_unlock(&configfs_dirent_lock); + error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, configfs_init_file); if (error) { diff --git a/fs/dcache.c b/fs/dcache.c index f09b9085f7d8..9a59653d3449 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2686,8 +2686,13 @@ char *d_path(const struct path *path, char *buf, int buflen) * thus don't need to be hashed. They also don't need a name until a * user wants to identify the object in /proc/pid/fd/. The little hack * below allows us to generate a name for these objects on demand: + * + * Some pseudo inodes are mountable. When they are mounted + * path->dentry == path->mnt->mnt_root. In that case don't call d_dname + * and instead have d_path return the mounted path. */ - if (path->dentry->d_op && path->dentry->d_op->d_dname) + if (path->dentry->d_op && path->dentry->d_op->d_dname && + (!IS_ROOT(path->dentry) || path->dentry != path->mnt->mnt_root)) return path->dentry->d_op->d_dname(path->dentry, buf, buflen); get_fs_root(current->fs, &root); @@ -2724,6 +2729,17 @@ char *dynamic_dname(struct dentry *dentry, char *buffer, int buflen, return memcpy(buffer, temp, sz); } +char *simple_dname(struct dentry *dentry, char *buffer, int buflen) +{ + char *end = buffer + buflen; + /* these dentries are never renamed, so d_lock is not needed */ + if (prepend(&end, &buflen, " (deleted)", 11) || + prepend_name(&end, &buflen, &dentry->d_name) || + prepend(&end, &buflen, "/", 1)) + end = ERR_PTR(-ENAMETOOLONG); + return end; +} + /* * Write full pathname from the root of the filesystem into the buffer. */ diff --git a/fs/dcookies.c b/fs/dcookies.c index ab5954b50267..ac44a69fbea9 100644 --- a/fs/dcookies.c +++ b/fs/dcookies.c @@ -204,7 +204,7 @@ out: } #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, size_t, len) +COMPAT_SYSCALL_DEFINE4(lookup_dcookie, u32, w0, u32, w1, char __user *, buf, compat_size_t, len) { #ifdef __BIG_ENDIAN return sys_lookup_dcookie(((u64)w0 << 32) | w1, buf, len); diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 4888cb3fdef7..c7c83ff0f752 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -533,8 +533,7 @@ EXPORT_SYMBOL_GPL(debugfs_remove); */ void debugfs_remove_recursive(struct dentry *dentry) { - struct dentry *child; - struct dentry *parent; + struct dentry *child, *next, *parent; if (IS_ERR_OR_NULL(dentry)) return; @@ -544,61 +543,37 @@ void debugfs_remove_recursive(struct dentry *dentry) return; parent = dentry; + down: mutex_lock(&parent->d_inode->i_mutex); + list_for_each_entry_safe(child, next, &parent->d_subdirs, d_u.d_child) { + if (!debugfs_positive(child)) + continue; - while (1) { - /* - * When all dentries under "parent" has been removed, - * walk up the tree until we reach our starting point. - */ - if (list_empty(&parent->d_subdirs)) { - mutex_unlock(&parent->d_inode->i_mutex); - if (parent == dentry) - break; - parent = parent->d_parent; - mutex_lock(&parent->d_inode->i_mutex); - } - child = list_entry(parent->d_subdirs.next, struct dentry, - d_u.d_child); - next_sibling: - - /* - * If "child" isn't empty, walk down the tree and - * remove all its descendants first. - */ + /* perhaps simple_empty(child) makes more sense */ if (!list_empty(&child->d_subdirs)) { mutex_unlock(&parent->d_inode->i_mutex); parent = child; - mutex_lock(&parent->d_inode->i_mutex); - continue; + goto down; } - __debugfs_remove(child, parent); - if (parent->d_subdirs.next == &child->d_u.d_child) { - /* - * Try the next sibling. - */ - if (child->d_u.d_child.next != &parent->d_subdirs) { - child = list_entry(child->d_u.d_child.next, - struct dentry, - d_u.d_child); - goto next_sibling; - } - - /* - * Avoid infinite loop if we fail to remove - * one dentry. - */ - mutex_unlock(&parent->d_inode->i_mutex); - break; - } - simple_release_fs(&debugfs_mount, &debugfs_mount_count); + up: + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); } - parent = dentry->d_parent; + mutex_unlock(&parent->d_inode->i_mutex); + child = parent; + parent = parent->d_parent; mutex_lock(&parent->d_inode->i_mutex); - __debugfs_remove(dentry, parent); + + if (child != dentry) { + next = list_entry(child->d_u.d_child.next, struct dentry, + d_u.d_child); + goto up; + } + + if (!__debugfs_remove(child, parent)) + simple_release_fs(&debugfs_mount, &debugfs_mount_count); mutex_unlock(&parent->d_inode->i_mutex); - simple_release_fs(&debugfs_mount, &debugfs_mount_count); } EXPORT_SYMBOL_GPL(debugfs_remove_recursive); diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 073d30b9d1ac..a726b9f29cb7 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -498,6 +498,7 @@ static void devpts_kill_sb(struct super_block *sb) { struct pts_fs_info *fsi = DEVPTS_SB(sb); + ida_destroy(&fsi->allocated_ptys); kfree(fsi); kill_litter_super(sb); } diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 7d52806c2119..4725a07f003c 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1149,7 +1149,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, struct ecryptfs_msg_ctx *msg_ctx; struct ecryptfs_message *msg = NULL; char *auth_tok_sig; - char *payload; + char *payload = NULL; size_t payload_len = 0; int rc; @@ -1203,6 +1203,7 @@ decrypt_pki_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok, } out: kfree(msg); + kfree(payload); return rc; } diff --git a/fs/exec.c b/fs/exec.c index ffd7a813ad3d..bb60cda5ee30 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -607,7 +607,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) return -ENOMEM; lru_add_drain(); - tlb_gather_mmu(&tlb, mm, 0); + tlb_gather_mmu(&tlb, mm, old_start, old_end); if (new_end > old_start) { /* * when the old and new regions overlap clear from new_end. @@ -624,7 +624,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) free_pgd_range(&tlb, old_start, old_end, new_end, vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING); } - tlb_finish_mmu(&tlb, new_end, old_end); + tlb_finish_mmu(&tlb, old_start, old_end); /* * Shrink the vma to just the new range. Always succeeds. @@ -1669,6 +1669,12 @@ int __get_dumpable(unsigned long mm_flags) return (ret > SUID_DUMP_USER) ? SUID_DUMP_ROOT : ret; } +/* + * This returns the actual value of the suid_dumpable flag. For things + * that are using this for checking for privilege transitions, it must + * test against SUID_DUMP_USER rather than treating it as a boolean + * value. + */ int get_dumpable(struct mm_struct *mm) { return __get_dumpable(mm->flags); diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index b74422888604..85cde3e76290 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -103,7 +103,7 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout) layout->max_io_length = (BIO_MAX_PAGES_KMALLOC * PAGE_SIZE - layout->stripe_unit) * - layout->group_width; + (layout->group_width - layout->parity); if (layout->parity) { unsigned stripe_length = (layout->group_width - layout->parity) * @@ -286,7 +286,8 @@ int ore_get_rw_state(struct ore_layout *layout, struct ore_components *oc, if (length) { ore_calc_stripe_info(layout, offset, length, &ios->si); ios->length = ios->si.length; - ios->nr_pages = (ios->length + PAGE_SIZE - 1) / PAGE_SIZE; + ios->nr_pages = ((ios->offset & (PAGE_SIZE - 1)) + + ios->length + PAGE_SIZE - 1) / PAGE_SIZE; if (layout->parity) _ore_post_alloc_raid_stuff(ios); } @@ -536,6 +537,7 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, u64 H = LmodS - G * T; u32 N = div_u64(H, U); + u32 Nlast; /* "H - (N * U)" is just "H % U" so it's bound to u32 */ u32 C = (u32)(H - (N * U)) / stripe_unit + G * group_width; @@ -568,6 +570,10 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset, si->length = T - H; if (si->length > length) si->length = length; + + Nlast = div_u64(H + si->length + U - 1, U); + si->maxdevUnits = Nlast - N; + si->M = M; } EXPORT_SYMBOL(ore_calc_stripe_info); @@ -583,13 +589,16 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, int ret; if (per_dev->bio == NULL) { - unsigned pages_in_stripe = ios->layout->group_width * - (ios->layout->stripe_unit / PAGE_SIZE); - unsigned nr_pages = ios->nr_pages * ios->layout->group_width / - (ios->layout->group_width - - ios->layout->parity); - unsigned bio_size = (nr_pages + pages_in_stripe) / - ios->layout->group_width; + unsigned bio_size; + + if (!ios->reading) { + bio_size = ios->si.maxdevUnits; + } else { + bio_size = (ios->si.maxdevUnits + 1) * + (ios->layout->group_width - ios->layout->parity) / + ios->layout->group_width; + } + bio_size *= (ios->layout->stripe_unit / PAGE_SIZE); per_dev->bio = bio_kmalloc(GFP_KERNEL, bio_size); if (unlikely(!per_dev->bio)) { @@ -609,8 +618,12 @@ int _ore_add_stripe_unit(struct ore_io_state *ios, unsigned *cur_pg, added_len = bio_add_pc_page(q, per_dev->bio, pages[pg], pglen, pgbase); if (unlikely(pglen != added_len)) { - ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=%u\n", - per_dev->bio->bi_vcnt); + /* If bi_vcnt == bi_max then this is a SW BUG */ + ORE_DBGMSG("Failed bio_add_pc_page bi_vcnt=0x%x " + "bi_max=0x%x BIO_MAX=0x%x cur_len=0x%x\n", + per_dev->bio->bi_vcnt, + per_dev->bio->bi_max_vecs, + BIO_MAX_PAGES_KMALLOC, cur_len); ret = -ENOMEM; goto out; } @@ -1098,7 +1111,7 @@ int ore_truncate(struct ore_layout *layout, struct ore_components *oc, size_attr->attr = g_attr_logical_length; size_attr->attr.val_ptr = &size_attr->newsize; - ORE_DBGMSG("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", + ORE_DBGMSG2("trunc(0x%llx) obj_offset=0x%llx dev=%d\n", _LLU(oc->comps->obj.id), _LLU(obj_size), i); ret = _truncate_mirrors(ios, i * ios->layout->mirrors_p1, &size_attr->attr); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 692de13e3596..cea8ecf3e76e 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -576,11 +576,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, if (!ext3_check_dir_entry("htree_dirblock_to_tree", dir, de, bh, (block<<EXT3_BLOCK_SIZE_BITS(dir->i_sb)) +((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse (bh); - return count; + /* silently ignore the rest of the block */ + break; } ext3fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index d0f13eada0ed..3742e4c85723 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -38,8 +38,8 @@ ext4_group_t ext4_get_group_number(struct super_block *sb, ext4_group_t group; if (test_opt2(sb, STD_GROUP_SIZE)) - group = (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) + - block) >> + group = (block - + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) >> (EXT4_BLOCK_SIZE_BITS(sb) + EXT4_CLUSTER_BITS(sb) + 3); else ext4_get_group_no_and_offset(sb, block, &group, NULL); diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5aae3d12d400..7bb2e2e55123 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -280,6 +280,16 @@ struct ext4_io_submit { /* Translate # of blks to # of clusters */ #define EXT4_NUM_B2C(sbi, blks) (((blks) + (sbi)->s_cluster_ratio - 1) >> \ (sbi)->s_cluster_bits) +/* Mask out the low bits to get the starting block of the cluster */ +#define EXT4_PBLK_CMASK(s, pblk) ((pblk) & \ + ~((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) +#define EXT4_LBLK_CMASK(s, lblk) ((lblk) & \ + ~((ext4_lblk_t) (s)->s_cluster_ratio - 1)) +/* Get the cluster offset */ +#define EXT4_PBLK_COFF(s, pblk) ((pblk) & \ + ((ext4_fsblk_t) (s)->s_cluster_ratio - 1)) +#define EXT4_LBLK_COFF(s, lblk) ((lblk) & \ + ((ext4_lblk_t) (s)->s_cluster_ratio - 1)) /* * Structure of a blocks group descriptor diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 451eb4045330..1be3996b5942 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -219,10 +219,19 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line, set_buffer_prio(bh); if (ext4_handle_valid(handle)) { err = jbd2_journal_dirty_metadata(handle, bh); - if (err) { - /* Errors can only happen if there is a bug */ - handle->h_err = err; - __ext4_journal_stop(where, line, handle); + /* Errors can only happen if there is a bug */ + if (WARN_ON_ONCE(err)) { + ext4_journal_abort_handle(where, line, __func__, bh, + handle, err); + ext4_error_inode(inode, where, line, + bh->b_blocknr, + "journal_dirty_metadata failed: " + "handle type %u started at line %u, " + "credits %u/%u, errcode %d", + handle->h_type, + handle->h_line_no, + handle->h_requested_credits, + handle->h_buffer_credits, err); } } else { if (inode) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index bc0f1910b9cf..a2b625e279db 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -360,8 +360,10 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) { ext4_fsblk_t block = ext4_ext_pblock(ext); int len = ext4_ext_get_actual_len(ext); + ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); + ext4_lblk_t last = lblock + len - 1; - if (len == 0) + if (lblock > last) return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } @@ -387,11 +389,26 @@ static int ext4_valid_extent_entries(struct inode *inode, if (depth == 0) { /* leaf entries */ struct ext4_extent *ext = EXT_FIRST_EXTENT(eh); + struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es; + ext4_fsblk_t pblock = 0; + ext4_lblk_t lblock = 0; + ext4_lblk_t prev = 0; + int len = 0; while (entries) { if (!ext4_valid_extent(inode, ext)) return 0; + + /* Check for overlapping extents */ + lblock = le32_to_cpu(ext->ee_block); + len = ext4_ext_get_actual_len(ext); + if ((lblock <= prev) && prev) { + pblock = ext4_ext_pblock(ext); + es->s_last_error_block = cpu_to_le64(pblock); + return 0; + } ext++; entries--; + prev = lblock + len - 1; } } else { struct ext4_extent_idx *ext_idx = EXT_FIRST_INDEX(eh); @@ -1755,8 +1772,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, depth = ext_depth(inode); if (!path[depth].p_ext) goto out; - b2 = le32_to_cpu(path[depth].p_ext->ee_block); - b2 &= ~(sbi->s_cluster_ratio - 1); + b2 = EXT4_LBLK_CMASK(sbi, le32_to_cpu(path[depth].p_ext->ee_block)); /* * get the next allocated block if the extent in the path @@ -1766,7 +1782,7 @@ static unsigned int ext4_ext_check_overlap(struct ext4_sb_info *sbi, b2 = ext4_ext_next_allocated_block(path); if (b2 == EXT_MAX_BLOCKS) goto out; - b2 &= ~(sbi->s_cluster_ratio - 1); + b2 = EXT4_LBLK_CMASK(sbi, b2); } /* check for wrap through zero on extent logical start block*/ @@ -2427,7 +2443,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, * truncate operation has removed all of the blocks in * the cluster. */ - if (pblk & (sbi->s_cluster_ratio - 1) && + if (EXT4_PBLK_COFF(sbi, pblk) && (ee_len == num)) *partial_cluster = EXT4_B2C(sbi, pblk); else @@ -3658,7 +3674,7 @@ int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); ext4_lblk_t lblk_start, lblk_end; - lblk_start = lblk & (~(sbi->s_cluster_ratio - 1)); + lblk_start = EXT4_LBLK_CMASK(sbi, lblk); lblk_end = lblk_start + sbi->s_cluster_ratio - 1; return ext4_find_delalloc_range(inode, lblk_start, lblk_end); @@ -3717,9 +3733,9 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, trace_ext4_get_reserved_cluster_alloc(inode, lblk_start, num_blks); /* Check towards left side */ - c_offset = lblk_start & (sbi->s_cluster_ratio - 1); + c_offset = EXT4_LBLK_COFF(sbi, lblk_start); if (c_offset) { - lblk_from = lblk_start & (~(sbi->s_cluster_ratio - 1)); + lblk_from = EXT4_LBLK_CMASK(sbi, lblk_start); lblk_to = lblk_from + c_offset - 1; if (ext4_find_delalloc_range(inode, lblk_from, lblk_to)) @@ -3727,7 +3743,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start, } /* Now check towards right. */ - c_offset = (lblk_start + num_blks) & (sbi->s_cluster_ratio - 1); + c_offset = EXT4_LBLK_COFF(sbi, lblk_start + num_blks); if (allocated_clusters && c_offset) { lblk_from = lblk_start + num_blks; lblk_to = lblk_from + (sbi->s_cluster_ratio - c_offset) - 1; @@ -3935,7 +3951,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, struct ext4_ext_path *path) { struct ext4_sb_info *sbi = EXT4_SB(sb); - ext4_lblk_t c_offset = map->m_lblk & (sbi->s_cluster_ratio-1); + ext4_lblk_t c_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ext4_lblk_t ex_cluster_start, ex_cluster_end; ext4_lblk_t rr_cluster_start; ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block); @@ -3953,8 +3969,7 @@ static int get_implied_cluster_alloc(struct super_block *sb, (rr_cluster_start == ex_cluster_start)) { if (rr_cluster_start == ex_cluster_end) ee_start += ee_len - 1; - map->m_pblk = (ee_start & ~(sbi->s_cluster_ratio - 1)) + - c_offset; + map->m_pblk = EXT4_PBLK_CMASK(sbi, ee_start) + c_offset; map->m_len = min(map->m_len, (unsigned) sbi->s_cluster_ratio - c_offset); /* @@ -4108,7 +4123,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, */ map->m_flags &= ~EXT4_MAP_FROM_CLUSTER; newex.ee_block = cpu_to_le32(map->m_lblk); - cluster_offset = map->m_lblk & (sbi->s_cluster_ratio-1); + cluster_offset = EXT4_LBLK_COFF(sbi, map->m_lblk); /* * If we are doing bigalloc, check to see if the extent returned @@ -4176,7 +4191,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, * needed so that future calls to get_implied_cluster_alloc() * work correctly. */ - offset = map->m_lblk & (sbi->s_cluster_ratio - 1); + offset = EXT4_LBLK_COFF(sbi, map->m_lblk); ar.len = EXT4_NUM_B2C(sbi, offset+allocated); ar.goal -= offset; ar.logical -= offset; @@ -4386,9 +4401,20 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode) last_block = (inode->i_size + sb->s_blocksize - 1) >> EXT4_BLOCK_SIZE_BITS(sb); +retry: err = ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block); + if (err == -ENOMEM) { + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; + } + if (err) { + ext4_std_error(inode->i_sb, err); + return; + } err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); + ext4_std_error(inode->i_sb, err); } static void ext4_falloc_update_inode(struct inode *inode, @@ -4659,7 +4685,7 @@ static int ext4_xattr_fiemap(struct inode *inode, error = ext4_get_inode_loc(inode, &iloc); if (error) return error; - physical = iloc.bh->b_blocknr << blockbits; + physical = (__u64)iloc.bh->b_blocknr << blockbits; offset = EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize; physical += offset; @@ -4667,7 +4693,7 @@ static int ext4_xattr_fiemap(struct inode *inode, flags |= FIEMAP_EXTENT_DATA_INLINE; brelse(iloc.bh); } else { /* external block */ - physical = EXT4_I(inode)->i_file_acl << blockbits; + physical = (__u64)EXT4_I(inode)->i_file_acl << blockbits; length = inode->i_sb->s_blocksize; } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index b1b4d51b5d86..b19f0a457f32 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -312,7 +312,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, blkbits = inode->i_sb->s_blocksize_bits; startoff = *offset; lastoff = startoff; - endoff = (map->m_lblk + map->m_len) << blkbits; + endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits; index = startoff >> PAGE_CACHE_SHIFT; end = endoff >> PAGE_CACHE_SHIFT; @@ -457,7 +457,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { if (last != start) - dataoff = last << blkbits; + dataoff = (loff_t)last << blkbits; break; } @@ -468,7 +468,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) ext4_es_find_delayed_extent_range(inode, last, last, &es); if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { if (last != start) - dataoff = last << blkbits; + dataoff = (loff_t)last << blkbits; break; } @@ -486,7 +486,7 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize) } last++; - dataoff = last << blkbits; + dataoff = (loff_t)last << blkbits; } while (last <= end); mutex_unlock(&inode->i_mutex); @@ -540,7 +540,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) ret = ext4_map_blocks(NULL, inode, &map, 0); if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) { last += ret; - holeoff = last << blkbits; + holeoff = (loff_t)last << blkbits; continue; } @@ -551,7 +551,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) ext4_es_find_delayed_extent_range(inode, last, last, &es); if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) { last = es.es_lblk + es.es_len; - holeoff = last << blkbits; + holeoff = (loff_t)last << blkbits; continue; } @@ -566,7 +566,7 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize) &map, &holeoff); if (!unwritten) { last += ret; - holeoff = last << blkbits; + holeoff = (loff_t)last << blkbits; continue; } } diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 00a818d67b54..3da3bf1b2cd0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -734,11 +734,8 @@ repeat_in_this_group: ino = ext4_find_next_zero_bit((unsigned long *) inode_bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino); - if (ino >= EXT4_INODES_PER_GROUP(sb)) { - if (++group == ngroups) - group = 0; - continue; - } + if (ino >= EXT4_INODES_PER_GROUP(sb)) + goto next_group; if (group == 0 && (ino+1) < EXT4_FIRST_INO(sb)) { ext4_error(sb, "reserved inode found cleared - " "inode=%lu", ino + 1); @@ -768,6 +765,9 @@ repeat_in_this_group: goto got; /* we grabbed the inode! */ if (ino < EXT4_INODES_PER_GROUP(sb)) goto repeat_in_this_group; +next_group: + if (++group == ngroups) + group = 0; } err = -ENOSPC; goto out; diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index 3e2bf873e8a8..e350be6c7ac6 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -1842,7 +1842,7 @@ int ext4_inline_data_fiemap(struct inode *inode, if (error) goto out; - physical = iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; + physical = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; physical += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; physical += offsetof(struct ext4_inode, i_block); length = i_size_read(inode); @@ -1957,9 +1957,11 @@ void ext4_inline_data_truncate(struct inode *inode, int *has_inline) } /* Clear the content within i_blocks. */ - if (i_size < EXT4_MIN_INLINE_DATA_SIZE) - memset(ext4_raw_inode(&is.iloc)->i_block + i_size, 0, - EXT4_MIN_INLINE_DATA_SIZE - i_size); + if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { + void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; + memset(p + i_size, 0, + EXT4_MIN_INLINE_DATA_SIZE - i_size); + } EXT4_I(inode)->i_inline_size = i_size < EXT4_MIN_INLINE_DATA_SIZE ? diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6382b89ecbd..cb2bdc7ccb05 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1118,10 +1118,13 @@ static int ext4_write_end(struct file *file, } } - if (ext4_has_inline_data(inode)) - copied = ext4_write_inline_data_end(inode, pos, len, - copied, page); - else + if (ext4_has_inline_data(inode)) { + ret = ext4_write_inline_data_end(inode, pos, len, + copied, page); + if (ret < 0) + goto errout; + copied = ret; + } else copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); @@ -1260,7 +1263,6 @@ static int ext4_journalled_write_end(struct file *file, */ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; @@ -1272,7 +1274,6 @@ static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&ei->i_block_reservation_lock); /* * ext4_calc_metadata_amount() has side effects, which we have @@ -1292,10 +1293,6 @@ repeat: ei->i_da_metadata_calc_len = save_len; ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - cond_resched(); - goto repeat; - } return -ENOSPC; } ei->i_reserved_meta_blocks += md_needed; @@ -1309,7 +1306,6 @@ repeat: */ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) { - int retries = 0; struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); struct ext4_inode_info *ei = EXT4_I(inode); unsigned int md_needed; @@ -1331,7 +1327,6 @@ static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) * in order to allocate nrblocks * worse case is one extent per block */ -repeat: spin_lock(&ei->i_block_reservation_lock); /* * ext4_calc_metadata_amount() has side effects, which we have @@ -1351,10 +1346,6 @@ repeat: ei->i_da_metadata_calc_len = save_len; ei->i_da_metadata_calc_last_lblock = save_last_lblock; spin_unlock(&ei->i_block_reservation_lock); - if (ext4_should_retry_alloc(inode->i_sb, &retries)) { - cond_resched(); - goto repeat; - } dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); return -ENOSPC; } @@ -4703,7 +4694,9 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) ext4_journal_stop(handle); } - if (attr->ia_valid & ATTR_SIZE) { + if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) { + handle_t *handle; + loff_t oldsize = inode->i_size; if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); @@ -4711,73 +4704,60 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr) if (attr->ia_size > sbi->s_bitmap_maxbytes) return -EFBIG; } - } - - if (S_ISREG(inode->i_mode) && - attr->ia_valid & ATTR_SIZE && - (attr->ia_size < inode->i_size)) { - handle_t *handle; - - handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - goto err_out; - } - if (ext4_handle_valid(handle)) { - error = ext4_orphan_add(handle, inode); - orphan = 1; - } - EXT4_I(inode)->i_disksize = attr->ia_size; - rc = ext4_mark_inode_dirty(handle, inode); - if (!error) - error = rc; - ext4_journal_stop(handle); - - if (ext4_should_order_data(inode)) { - error = ext4_begin_ordered_truncate(inode, + if (S_ISREG(inode->i_mode) && + (attr->ia_size < inode->i_size)) { + if (ext4_should_order_data(inode)) { + error = ext4_begin_ordered_truncate(inode, attr->ia_size); - if (error) { - /* Do as much error cleanup as possible */ - handle = ext4_journal_start(inode, - EXT4_HT_INODE, 3); - if (IS_ERR(handle)) { - ext4_orphan_del(NULL, inode); + if (error) goto err_out; - } - ext4_orphan_del(handle, inode); - orphan = 0; - ext4_journal_stop(handle); + } + handle = ext4_journal_start(inode, EXT4_HT_INODE, 3); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + goto err_out; + } + if (ext4_handle_valid(handle)) { + error = ext4_orphan_add(handle, inode); + orphan = 1; + } + EXT4_I(inode)->i_disksize = attr->ia_size; + rc = ext4_mark_inode_dirty(handle, inode); + if (!error) + error = rc; + ext4_journal_stop(handle); + if (error) { + ext4_orphan_del(NULL, inode); goto err_out; } } - } - - if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != inode->i_size) { - loff_t oldsize = inode->i_size; - i_size_write(inode, attr->ia_size); - /* - * Blocks are going to be removed from the inode. Wait - * for dio in flight. Temporarily disable - * dioread_nolock to prevent livelock. - */ - if (orphan) { - if (!ext4_should_journal_data(inode)) { - ext4_inode_block_unlocked_dio(inode); - inode_dio_wait(inode); - ext4_inode_resume_unlocked_dio(inode); - } else - ext4_wait_for_tail_page_commit(inode); - } - /* - * Truncate pagecache after we've waited for commit - * in data=journal mode to make pages freeable. - */ - truncate_pagecache(inode, oldsize, inode->i_size); + i_size_write(inode, attr->ia_size); + /* + * Blocks are going to be removed from the inode. Wait + * for dio in flight. Temporarily disable + * dioread_nolock to prevent livelock. + */ + if (orphan) { + if (!ext4_should_journal_data(inode)) { + ext4_inode_block_unlocked_dio(inode); + inode_dio_wait(inode); + ext4_inode_resume_unlocked_dio(inode); + } else + ext4_wait_for_tail_page_commit(inode); } - ext4_truncate(inode); + /* + * Truncate pagecache after we've waited for commit + * in data=journal mode to make pages freeable. + */ + truncate_pagecache(inode, oldsize, inode->i_size); } + /* + * We want to call ext4_truncate() even if attr->ia_size == + * inode->i_size for cases like truncation of fallocated space + */ + if (attr->ia_valid & ATTR_SIZE) + ext4_truncate(inode); if (!rc) { setattr_copy(inode, attr); @@ -4805,7 +4785,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode; - unsigned long delalloc_blocks; + unsigned long long delalloc_blocks; inode = dentry->d_inode; generic_fillattr(inode, stat); @@ -4823,7 +4803,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, delalloc_blocks = EXT4_C2B(EXT4_SB(inode->i_sb), EXT4_I(inode)->i_reserved_data_blocks); - stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9; + stat->blocks += delalloc_blocks << (inode->i_sb->s_blocksize_bits-9); return 0; } diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index 9491ac0590f7..c0427e2f6648 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -77,8 +77,10 @@ static void swap_inode_data(struct inode *inode1, struct inode *inode2) memswap(ei1->i_data, ei2->i_data, sizeof(ei1->i_data)); memswap(&ei1->i_flags, &ei2->i_flags, sizeof(ei1->i_flags)); memswap(&ei1->i_disksize, &ei2->i_disksize, sizeof(ei1->i_disksize)); - memswap(&ei1->i_es_tree, &ei2->i_es_tree, sizeof(ei1->i_es_tree)); - memswap(&ei1->i_es_lru_nr, &ei2->i_es_lru_nr, sizeof(ei1->i_es_lru_nr)); + ext4_es_remove_extent(inode1, 0, EXT_MAX_BLOCKS); + ext4_es_remove_extent(inode2, 0, EXT_MAX_BLOCKS); + ext4_es_lru_del(inode1); + ext4_es_lru_del(inode2); isize = i_size_read(inode1); i_size_write(inode1, i_size_read(inode2)); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index def84082a9a9..fba960ee26de 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3423,6 +3423,9 @@ static void ext4_mb_pa_callback(struct rcu_head *head) { struct ext4_prealloc_space *pa; pa = container_of(head, struct ext4_prealloc_space, u.pa_rcu); + + BUG_ON(atomic_read(&pa->pa_count)); + BUG_ON(pa->pa_deleted == 0); kmem_cache_free(ext4_pspace_cachep, pa); } @@ -3436,11 +3439,13 @@ static void ext4_mb_put_pa(struct ext4_allocation_context *ac, ext4_group_t grp; ext4_fsblk_t grp_blk; - if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) - return; - /* in this short window concurrent discard can set pa_deleted */ spin_lock(&pa->pa_lock); + if (!atomic_dec_and_test(&pa->pa_count) || pa->pa_free != 0) { + spin_unlock(&pa->pa_lock); + return; + } + if (pa->pa_deleted == 1) { spin_unlock(&pa->pa_lock); return; @@ -4102,7 +4107,7 @@ ext4_mb_initialize_context(struct ext4_allocation_context *ac, ext4_get_group_no_and_offset(sb, goal, &group, &block); /* set up allocation goals */ - ac->ac_b_ex.fe_logical = ar->logical & ~(sbi->s_cluster_ratio - 1); + ac->ac_b_ex.fe_logical = EXT4_LBLK_CMASK(sbi, ar->logical); ac->ac_status = AC_STATUS_CONTINUE; ac->ac_sb = sb; ac->ac_inode = ar->inode; @@ -4639,7 +4644,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, * blocks at the beginning or the end unless we are explicitly * requested to avoid doing so. */ - overflow = block & (sbi->s_cluster_ratio - 1); + overflow = EXT4_PBLK_COFF(sbi, block); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER) { overflow = sbi->s_cluster_ratio - overflow; @@ -4653,7 +4658,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, count += overflow; } } - overflow = count & (sbi->s_cluster_ratio - 1); + overflow = EXT4_LBLK_COFF(sbi, count); if (overflow) { if (flags & EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER) { if (count > overflow) @@ -4735,11 +4740,16 @@ do_more: * blocks being freed are metadata. these blocks shouldn't * be used until this transaction is committed */ + retry: new_entry = kmem_cache_alloc(ext4_free_data_cachep, GFP_NOFS); if (!new_entry) { - ext4_mb_unload_buddy(&e4b); - err = -ENOMEM; - goto error_return; + /* + * We use a retry loop because + * ext4_free_blocks() is not allowed to fail. + */ + cond_resched(); + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto retry; } new_entry->efd_start_cluster = bit; new_entry->efd_group = block_group; @@ -4761,8 +4771,8 @@ do_more: " group:%d block:%d count:%lu failed" " with %d", block_group, bit, count, err); - } - + } else + EXT4_MB_GRP_CLEAR_TRIMMED(e4b.bd_info); ext4_lock_group(sb, block_group); mb_clear_bits(bitmap_bh->b_data, bit, count_clusters); diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 6653fc35ecb7..ab2f6dc44b3a 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -918,11 +918,8 @@ static int htree_dirblock_to_tree(struct file *dir_file, bh->b_data, bh->b_size, (block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb)) + ((char *)de - bh->b_data))) { - /* On error, skip the f_pos to the next block. */ - dir_file->f_pos = (dir_file->f_pos | - (dir->i_sb->s_blocksize - 1)) + 1; - brelse(bh); - return count; + /* silently ignore the rest of the block */ + break; } ext4fs_dirhash(de->name, de->name_len, hinfo); if ((hinfo->hash < start_hash) || diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index b27c96d01965..49d3c01eabf8 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c @@ -1656,12 +1656,10 @@ errout: err = err2; if (!err) { - ext4_fsblk_t first_block; - first_block = ext4_group_first_block_no(sb, 0); if (test_opt(sb, DEBUG)) printk(KERN_DEBUG "EXT4-fs: extended group to %llu " "blocks\n", ext4_blocks_count(es)); - update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr - first_block, + update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es, sizeof(struct ext4_super_block), 0); } return err; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 94cc84db7c9a..e4923b6a9e39 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1341,7 +1341,7 @@ static const struct mount_opts { {Opt_delalloc, EXT4_MOUNT_DELALLOC, MOPT_EXT4_ONLY | MOPT_SET | MOPT_EXPLICIT}, {Opt_nodelalloc, EXT4_MOUNT_DELALLOC, - MOPT_EXT4_ONLY | MOPT_CLEAR | MOPT_EXPLICIT}, + MOPT_EXT4_ONLY | MOPT_CLEAR}, {Opt_journal_checksum, EXT4_MOUNT_JOURNAL_CHECKSUM, MOPT_EXT4_ONLY | MOPT_SET}, {Opt_journal_async_commit, (EXT4_MOUNT_JOURNAL_ASYNC_COMMIT | @@ -1684,12 +1684,6 @@ static inline void ext4_show_quota_options(struct seq_file *seq, if (sbi->s_qf_names[GRPQUOTA]) seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]); - - if (test_opt(sb, USRQUOTA)) - seq_puts(seq, ",usrquota"); - - if (test_opt(sb, GRPQUOTA)) - seq_puts(seq, ",grpquota"); #endif } @@ -3219,11 +3213,19 @@ int ext4_calculate_overhead(struct super_block *sb) } -static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) +static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb) { ext4_fsblk_t resv_clusters; /* + * There's no need to reserve anything when we aren't using extents. + * The space estimates are exact, there are no unwritten extents, + * hole punching doesn't need new metadata... This is needed especially + * to keep ext2/3 backward compatibility. + */ + if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) + return 0; + /* * By default we reserve 2% or 4096 clusters, whichever is smaller. * This should cover the situations where we can not afford to run * out of space like for example punch hole, or converting @@ -3231,7 +3233,8 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct ext4_sb_info *sbi) * allocation would require 1, or 2 blocks, higher numbers are * very rare. */ - resv_clusters = ext4_blocks_count(sbi->s_es) >> sbi->s_cluster_bits; + resv_clusters = ext4_blocks_count(EXT4_SB(sb)->s_es) >> + EXT4_SB(sb)->s_cluster_bits; do_div(resv_clusters, 50); resv_clusters = min_t(ext4_fsblk_t, resv_clusters, 4096); @@ -3451,7 +3454,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) } if (test_opt(sb, DIOREAD_NOLOCK)) { ext4_msg(sb, KERN_ERR, "can't mount with " - "both data=journal and delalloc"); + "both data=journal and dioread_nolock"); goto failed_mount; } if (test_opt(sb, DELALLOC)) @@ -3586,10 +3589,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) sbi->s_addr_per_block_bits = ilog2(EXT4_ADDR_PER_BLOCK(sb)); sbi->s_desc_per_block_bits = ilog2(EXT4_DESC_PER_BLOCK(sb)); - /* Do we have standard group size of blocksize * 8 blocks ? */ - if (sbi->s_blocks_per_group == blocksize << 3) - set_opt2(sb, STD_GROUP_SIZE); - for (i = 0; i < 4; i++) sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]); sbi->s_def_hash_version = es->s_def_hash_version; @@ -3659,6 +3658,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) goto failed_mount; } + /* Do we have standard group size of clustersize * 8 blocks ? */ + if (sbi->s_blocks_per_group == clustersize << 3) + set_opt2(sb, STD_GROUP_SIZE); + /* * Test whether we have more sectors than will fit in sector_t, * and whether the max offset is addressable by the page cache. @@ -3975,10 +3978,10 @@ no_journal: "available"); } - err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sbi)); + err = ext4_reserve_clusters(sbi, ext4_calculate_resv_clusters(sb)); if (err) { ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " - "reserved pool", ext4_calculate_resv_clusters(sbi)); + "reserved pool", ext4_calculate_resv_clusters(sb)); goto failed_mount4a; } @@ -4652,6 +4655,21 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) goto restore_opts; } + if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { + if (test_opt2(sb, EXPLICIT_DELALLOC)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and delalloc"); + err = -EINVAL; + goto restore_opts; + } + if (test_opt(sb, DIOREAD_NOLOCK)) { + ext4_msg(sb, KERN_ERR, "can't mount with " + "both data=journal and dioread_nolock"); + err = -EINVAL; + goto restore_opts; + } + } + if (sbi->s_mount_flags & EXT4_MF_FS_ABORTED) ext4_abort(sb, "Abort forced by user"); @@ -5406,6 +5424,7 @@ static void __exit ext4_exit_fs(void) kset_unregister(ext4_kset); ext4_exit_system_zone(); ext4_exit_pageio(); + ext4_exit_es(); } MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index c081e34f717f..1423c4816a47 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1350,6 +1350,9 @@ retry: s_min_extra_isize) { tried_min_extra_isize++; new_extra_isize = s_min_extra_isize; + kfree(is); is = NULL; + kfree(bs); bs = NULL; + brelse(bh); goto retry; } error = -1; diff --git a/fs/file.c b/fs/file.c index 4a78f981557a..9de20265a78c 100644 --- a/fs/file.c +++ b/fs/file.c @@ -34,7 +34,7 @@ static void *alloc_fdmem(size_t size) * vmalloc() if the allocation size will be considered "large" by the VM. */ if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { - void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN); + void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY); if (data != NULL) return data; } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 3be57189efd5..e3ab1e4dc442 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -505,13 +505,16 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, } WARN_ON(inode->i_state & I_SYNC); /* - * Skip inode if it is clean. We don't want to mess with writeback - * lists in this function since flusher thread may be doing for example - * sync in parallel and if we move the inode, it could get skipped. So - * here we make sure inode is on some writeback list and leave it there - * unless we have completely cleaned the inode. + * Skip inode if it is clean and we have no outstanding writeback in + * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this + * function since flusher thread may be doing for example sync in + * parallel and if we move the inode, it could get skipped. So here we + * make sure inode is on some writeback list and leave it there unless + * we have completely cleaned the inode. */ - if (!(inode->i_state & I_DIRTY)) + if (!(inode->i_state & I_DIRTY) && + (wbc->sync_mode != WB_SYNC_ALL || + !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) goto out; inode->i_state |= I_SYNC; spin_unlock(&inode->i_lock); diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 1d55f9465400..23bf1a52a5da 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1296,22 +1296,6 @@ static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov, return fuse_dev_do_read(fc, file, &cs, iov_length(iov, nr_segs)); } -static int fuse_dev_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return 1; -} - -static const struct pipe_buf_operations fuse_dev_pipe_buf_ops = { - .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, - .confirm = generic_pipe_buf_confirm, - .release = generic_pipe_buf_release, - .steal = fuse_dev_pipe_buf_steal, - .get = generic_pipe_buf_get, -}; - static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags) @@ -1358,7 +1342,11 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, buf->page = bufs[page_nr].page; buf->offset = bufs[page_nr].offset; buf->len = bufs[page_nr].len; - buf->ops = &fuse_dev_pipe_buf_ops; + /* + * Need to be careful about this. Having buf->ops in module + * code can Oops if the buffer persists after module unload. + */ + buf->ops = &nosteal_pipe_buf_ops; pipe->nrbufs++; page_nr++; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index f3f783dc4f75..e67b13de2ebc 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1175,6 +1175,8 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, return -EIO; if (reclen > nbytes) break; + if (memchr(dirent->name, '/', dirent->namelen) != NULL) + return -EIO; over = filldir(dstbuf, dirent->name, dirent->namelen, file->f_pos, dirent->ino, dirent->type); @@ -1225,13 +1227,29 @@ static int fuse_direntplus_link(struct file *file, if (name.name[1] == '.' && name.len == 2) return 0; } + + if (invalid_nodeid(o->nodeid)) + return -EIO; + if (!fuse_valid_type(o->attr.mode)) + return -EIO; + fc = get_fuse_conn(dir); name.hash = full_name_hash(name.name, name.len); dentry = d_lookup(parent, &name); - if (dentry && dentry->d_inode) { + if (dentry) { inode = dentry->d_inode; - if (get_node_id(inode) == o->nodeid) { + if (!inode) { + d_drop(dentry); + } else if (get_node_id(inode) != o->nodeid || + ((o->attr.mode ^ inode->i_mode) & S_IFMT)) { + err = d_invalidate(dentry); + if (err) + goto out; + } else if (is_bad_inode(inode)) { + err = -EIO; + goto out; + } else { struct fuse_inode *fi; fi = get_fuse_inode(inode); spin_lock(&fc->lock); @@ -1244,9 +1262,6 @@ static int fuse_direntplus_link(struct file *file, */ goto found; } - err = d_invalidate(dentry); - if (err) - goto out; dput(dentry); dentry = NULL; } @@ -1261,10 +1276,19 @@ static int fuse_direntplus_link(struct file *file, if (!inode) goto out; - alias = d_materialise_unique(dentry, inode); - err = PTR_ERR(alias); - if (IS_ERR(alias)) - goto out; + if (S_ISDIR(inode->i_mode)) { + mutex_lock(&fc->inst_mutex); + alias = fuse_d_add_directory(dentry, inode); + mutex_unlock(&fc->inst_mutex); + err = PTR_ERR(alias); + if (IS_ERR(alias)) { + iput(inode); + goto out; + } + } else { + alias = d_splice_alias(inode, dentry); + } + if (alias) { dput(dentry); dentry = alias; @@ -1301,6 +1325,8 @@ static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, return -EIO; if (reclen > nbytes) break; + if (memchr(dirent->name, '/', dirent->namelen) != NULL) + return -EIO; if (!over) { /* We fill entries into dstbuf only as much as @@ -1572,6 +1598,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, struct file *file) { struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_req *req; struct fuse_setattr_in inarg; struct fuse_attr_out outarg; @@ -1599,8 +1626,10 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, if (IS_ERR(req)) return PTR_ERR(req); - if (is_truncate) + if (is_truncate) { fuse_set_nowrite(inode); + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + } memset(&inarg, 0, sizeof(inarg)); memset(&outarg, 0, sizeof(outarg)); @@ -1662,12 +1691,14 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr, invalidate_inode_pages2(inode->i_mapping); } + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return 0; error: if (is_truncate) fuse_release_nowrite(inode); + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); return err; } @@ -1731,6 +1762,8 @@ static int fuse_setxattr(struct dentry *entry, const char *name, fc->no_setxattr = 1; err = -EOPNOTSUPP; } + if (!err) + fuse_invalidate_attr(inode); return err; } @@ -1860,6 +1893,8 @@ static int fuse_removexattr(struct dentry *entry, const char *name) fc->no_removexattr = 1; err = -EOPNOTSUPP; } + if (!err) + fuse_invalidate_attr(inode); return err; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 35f281033142..4fafb8484bbc 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -630,7 +630,8 @@ static void fuse_read_update_size(struct inode *inode, loff_t size, struct fuse_inode *fi = get_fuse_inode(inode); spin_lock(&fc->lock); - if (attr_ver == fi->attr_version && size < inode->i_size) { + if (attr_ver == fi->attr_version && size < inode->i_size && + !test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { fi->attr_version = ++fc->attr_version; i_size_write(inode, size); } @@ -1033,12 +1034,16 @@ static ssize_t fuse_perform_write(struct file *file, { struct inode *inode = mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); + struct fuse_inode *fi = get_fuse_inode(inode); int err = 0; ssize_t res = 0; if (is_bad_inode(inode)) return -EIO; + if (inode->i_size < pos + iov_iter_count(ii)) + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + do { struct fuse_req *req; ssize_t count; @@ -1074,6 +1079,7 @@ static ssize_t fuse_perform_write(struct file *file, if (res > 0) fuse_write_update_size(inode, pos); + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); fuse_invalidate_attr(inode); return res > 0 ? res : err; @@ -1530,7 +1536,6 @@ static int fuse_writepage_locked(struct page *page) inc_bdi_stat(mapping->backing_dev_info, BDI_WRITEBACK); inc_zone_page_state(tmp_page, NR_WRITEBACK_TEMP); - end_page_writeback(page); spin_lock(&fc->lock); list_add(&req->writepages_entry, &fi->writepages); @@ -1538,6 +1543,8 @@ static int fuse_writepage_locked(struct page *page) fuse_flush_writepages(inode); spin_unlock(&fc->lock); + end_page_writeback(page); + return 0; err_free: @@ -2461,6 +2468,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, { struct fuse_file *ff = file->private_data; struct inode *inode = file->f_inode; + struct fuse_inode *fi = get_fuse_inode(inode); struct fuse_conn *fc = ff->fc; struct fuse_req *req; struct fuse_fallocate_in inarg = { @@ -2478,10 +2486,20 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, if (lock_inode) { mutex_lock(&inode->i_mutex); - if (mode & FALLOC_FL_PUNCH_HOLE) - fuse_set_nowrite(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { + loff_t endbyte = offset + length - 1; + err = filemap_write_and_wait_range(inode->i_mapping, + offset, endbyte); + if (err) + goto out; + + fuse_sync_writes(inode); + } } + if (!(mode & FALLOC_FL_KEEP_SIZE)) + set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + req = fuse_get_req_nopages(fc); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -2514,11 +2532,11 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset, fuse_invalidate_attr(inode); out: - if (lock_inode) { - if (mode & FALLOC_FL_PUNCH_HOLE) - fuse_release_nowrite(inode); + if (!(mode & FALLOC_FL_KEEP_SIZE)) + clear_bit(FUSE_I_SIZE_UNSTABLE, &fi->state); + + if (lock_inode) mutex_unlock(&inode->i_mutex); - } return err; } diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index fde7249a3a96..5ced199b50bb 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -115,6 +115,8 @@ struct fuse_inode { enum { /** Advise readdirplus */ FUSE_I_ADVISE_RDPLUS, + /** An operation changing file size is in progress */ + FUSE_I_SIZE_UNSTABLE, }; struct fuse_conn; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 9a0cdde14a08..b5718516825b 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -201,7 +201,8 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr, struct timespec old_mtime; spin_lock(&fc->lock); - if (attr_version != 0 && fi->attr_version > attr_version) { + if ((attr_version != 0 && fi->attr_version > attr_version) || + test_bit(FUSE_I_SIZE_UNSTABLE, &fi->state)) { spin_unlock(&fc->lock); return; } diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 0bad69ed6336..76251600cbea 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -999,6 +999,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + struct address_space *mapping = inode->i_mapping; struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int rv; @@ -1019,6 +1020,35 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, if (rv != 1) goto out; /* dio not valid, fall back to buffered i/o */ + /* + * Now since we are holding a deferred (CW) lock at this point, you + * might be wondering why this is ever needed. There is a case however + * where we've granted a deferred local lock against a cached exclusive + * glock. That is ok provided all granted local locks are deferred, but + * it also means that it is possible to encounter pages which are + * cached and possibly also mapped. So here we check for that and sort + * them out ahead of the dio. The glock state machine will take care of + * everything else. + * + * If in fact the cached glock state (gl->gl_state) is deferred (CW) in + * the first place, mapping->nr_pages will always be zero. + */ + if (mapping->nrpages) { + loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); + loff_t len = iov_length(iov, nr_segs); + loff_t end = PAGE_ALIGN(offset + len) - 1; + + rv = 0; + if (len == 0) + goto out; + if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags)) + unmap_shared_mapping_range(ip->i_inode.i_mapping, offset, len); + rv = filemap_write_and_wait_range(mapping, lstart, end); + if (rv) + return rv; + truncate_inode_pages_range(mapping, lstart, end); + } + rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 62b484e4a9e4..bc5dac400125 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1536,10 +1536,22 @@ static int setattr_chown(struct inode *inode, struct iattr *attr) if (!(attr->ia_valid & ATTR_GID) || gid_eq(ogid, ngid)) ogid = ngid = NO_GID_QUOTA_CHANGE; - error = gfs2_quota_lock(ip, nuid, ngid); + error = get_write_access(inode); if (error) return error; + error = gfs2_rs_alloc(ip); + if (error) + goto out; + + error = gfs2_rindex_update(sdp); + if (error) + goto out; + + error = gfs2_quota_lock(ip, nuid, ngid); + if (error) + goto out; + if (!uid_eq(ouid, NO_UID_QUOTA_CHANGE) || !gid_eq(ogid, NO_GID_QUOTA_CHANGE)) { error = gfs2_quota_check(ip, nuid, ngid); @@ -1566,6 +1578,8 @@ out_end_trans: gfs2_trans_end(sdp); out_gunlock_q: gfs2_quota_unlock(ip); +out: + put_write_access(inode); return error; } diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 60ede2a0f43f..f7dd3b4f8ab0 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1317,8 +1317,18 @@ static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags, if (IS_ERR(s)) goto error_bdev; - if (s->s_root) + if (s->s_root) { + /* + * s_umount nests inside bd_mutex during + * __invalidate_device(). blkdev_put() acquires + * bd_mutex and can't be called under s_umount. Drop + * s_umount temporarily. This is safe as we're + * holding an active reference. + */ + up_write(&s->s_umount); blkdev_put(bdev, mode); + down_write(&s->s_umount); + } memset(&args, 0, sizeof(args)); args.ar_quota = GFS2_QUOTA_DEFAULT; diff --git a/fs/hpfs/map.c b/fs/hpfs/map.c index 4acb19d78359..803d3da3a0fe 100644 --- a/fs/hpfs/map.c +++ b/fs/hpfs/map.c @@ -17,7 +17,8 @@ __le32 *hpfs_map_bitmap(struct super_block *s, unsigned bmp_block, struct quad_buffer_head *qbh, char *id) { secno sec; - if (hpfs_sb(s)->sb_chk) if (bmp_block * 16384 > hpfs_sb(s)->sb_fs_size) { + unsigned n_bands = (hpfs_sb(s)->sb_fs_size + 0x3fff) >> 14; + if (hpfs_sb(s)->sb_chk) if (bmp_block >= n_bands) { hpfs_error(s, "hpfs_map_bitmap called with bad parameter: %08x at %s", bmp_block, id); return NULL; } diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c index a0617e706957..962e90c37aec 100644 --- a/fs/hpfs/super.c +++ b/fs/hpfs/super.c @@ -558,7 +558,13 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent) sbi->sb_cp_table = NULL; sbi->sb_c_bitmap = -1; sbi->sb_max_fwd_alloc = 0xffffff; - + + if (sbi->sb_fs_size >= 0x80000000) { + hpfs_error(s, "invalid size in superblock: %08x", + (unsigned)sbi->sb_fs_size); + goto bail4; + } + /* Load bitmap directory */ if (!(sbi->sb_bmp_dir = hpfs_load_bitmap_directory(s, le32_to_cpu(superblock->bitmaps)))) goto bail4; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index a3f868ae3fd4..4e5f332f15d9 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -916,14 +916,8 @@ static int get_hstate_idx(int page_size_log) return h - hstates; } -static char *hugetlb_dname(struct dentry *dentry, char *buffer, int buflen) -{ - return dynamic_dname(dentry, buffer, buflen, "/%s (deleted)", - dentry->d_name.name); -} - static struct dentry_operations anon_ops = { - .d_dname = hugetlb_dname + .d_dname = simple_dname }; /* diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index d9b8aebdeb22..d3705490ff9c 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -125,8 +125,8 @@ static void destroy_inodecache(void) static int isofs_remount(struct super_block *sb, int *flags, char *data) { - /* we probably want a lot more here */ - *flags |= MS_RDONLY; + if (!(*flags & MS_RDONLY)) + return -EROFS; return 0; } @@ -779,15 +779,6 @@ root_found: */ s->s_maxbytes = 0x80000000000LL; - /* - * The CDROM is read-only, has no nodes (devices) on it, and since - * all of the files appear to be owned by root, we really do not want - * to allow suid. (suid or devices will not show up unless we have - * Rock Ridge extensions) - */ - - s->s_flags |= MS_RDONLY /* | MS_NODEV | MS_NOSUID */; - /* Set this for reference. Its not currently used except on write which we don't have .. */ @@ -1546,6 +1537,9 @@ struct inode *isofs_iget(struct super_block *sb, static struct dentry *isofs_mount(struct file_system_type *fs_type, int flags, const char *dev_name, void *data) { + /* We don't support read-write mounts */ + if (!(flags & MS_RDONLY)) + return ERR_PTR(-EACCES); return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 95457576e434..aaa1a3f33b0e 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1318,6 +1318,7 @@ static int journal_reset(journal_t *journal) static void jbd2_write_superblock(journal_t *journal, int write_op) { struct buffer_head *bh = journal->j_sb_buffer; + journal_superblock_t *sb = journal->j_superblock; int ret; trace_jbd2_write_superblock(journal, write_op); @@ -1339,6 +1340,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) clear_buffer_write_io_error(bh); set_buffer_uptodate(bh); } + jbd2_superblock_csum_set(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; ret = submit_bh(write_op, bh); @@ -1435,7 +1437,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal) jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", journal->j_errno); sb->s_errno = cpu_to_be32(journal->j_errno); - jbd2_superblock_csum_set(journal, sb); read_unlock(&journal->j_state_lock); jbd2_write_superblock(journal, WRITE_SYNC); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 10f524c59ea8..a6917125f215 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -517,10 +517,10 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) &transaction->t_outstanding_credits); if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); + tid = transaction->t_tid; spin_unlock(&transaction->t_handle_lock); jbd_debug(2, "restarting handle %p\n", handle); - tid = transaction->t_tid; need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) @@ -1151,7 +1151,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * once a transaction -bzzz */ jh->b_modified = 1; - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); + if (handle->h_buffer_credits <= 0) { + ret = -ENOSPC; + goto out_unlock_bh; + } handle->h_buffer_credits--; } @@ -1234,7 +1237,6 @@ out_unlock_bh: jbd2_journal_put_journal_head(jh); out: JBUFFER_TRACE(jh, "exit"); - WARN_ON(ret); /* All errors are bugs, so dump the stack */ return ret; } diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c index 0ddbeceafc62..c450fdb3d78d 100644 --- a/fs/jfs/jfs_dtree.c +++ b/fs/jfs/jfs_dtree.c @@ -3047,6 +3047,14 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) dir_index = (u32) filp->f_pos; + /* + * NFSv4 reserves cookies 1 and 2 for . and .. so we add + * the value we return to the vfs is one greater than the + * one we use internally. + */ + if (dir_index) + dir_index--; + if (dir_index > 1) { struct dir_table_slot dirtab_slot; @@ -3086,7 +3094,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) if (p->header.flag & BT_INTERNAL) { jfs_err("jfs_readdir: bad index table"); DT_PUTPAGE(mp); - filp->f_pos = -1; + filp->f_pos = DIREND; return 0; } } else { @@ -3094,7 +3102,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * self "." */ - filp->f_pos = 0; + filp->f_pos = 1; if (filldir(dirent, ".", 1, 0, ip->i_ino, DT_DIR)) return 0; @@ -3102,7 +3110,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * parent ".." */ - filp->f_pos = 1; + filp->f_pos = 2; if (filldir(dirent, "..", 2, 1, PARENT(ip), DT_DIR)) return 0; @@ -3123,24 +3131,25 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) /* * Legacy filesystem - OS/2 & Linux JFS < 0.3.6 * - * pn = index = 0: First entry "." - * pn = 0; index = 1: Second entry ".." + * pn = 0; index = 1: First entry "." + * pn = 0; index = 2: Second entry ".." * pn > 0: Real entries, pn=1 -> leftmost page * pn = index = -1: No more entries */ dtpos = filp->f_pos; - if (dtpos == 0) { + if (dtpos < 2) { /* build "." entry */ + filp->f_pos = 1; if (filldir(dirent, ".", 1, filp->f_pos, ip->i_ino, DT_DIR)) return 0; - dtoffset->index = 1; + dtoffset->index = 2; filp->f_pos = dtpos; } if (dtoffset->pn == 0) { - if (dtoffset->index == 1) { + if (dtoffset->index == 2) { /* build ".." entry */ if (filldir(dirent, "..", 2, filp->f_pos, @@ -3233,6 +3242,12 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir) } jfs_dirent->position = unique_pos++; } + /* + * We add 1 to the index because we may + * use a value of 2 internally, and NFSv4 + * doesn't like that. + */ + jfs_dirent->position++; } else { jfs_dirent->position = dtpos; len = min(d_namleft, DTLHDRDATALEN_LEGACY); diff --git a/fs/jfs/jfs_inode.c b/fs/jfs/jfs_inode.c index c1a3e603279c..7f464c513ba0 100644 --- a/fs/jfs/jfs_inode.c +++ b/fs/jfs/jfs_inode.c @@ -95,7 +95,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode) if (insert_inode_locked(inode) < 0) { rc = -EINVAL; - goto fail_unlock; + goto fail_put; } inode_init_owner(inode, parent, mode); @@ -156,7 +156,6 @@ struct inode *ialloc(struct inode *parent, umode_t mode) fail_drop: dquot_drop(inode); inode->i_flags |= S_NOQUOTA; -fail_unlock: clear_nlink(inode); unlock_new_inode(inode); fail_put: diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c index 01bfe7662751..41e491b8e5d7 100644 --- a/fs/lockd/clntlock.c +++ b/fs/lockd/clntlock.c @@ -64,12 +64,17 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init) nlm_init->protocol, nlm_version, nlm_init->hostname, nlm_init->noresvport, nlm_init->net); - if (host == NULL) { - lockd_down(nlm_init->net); - return ERR_PTR(-ENOLCK); - } + if (host == NULL) + goto out_nohost; + if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) + goto out_nobind; return host; +out_nobind: + nlmclnt_release_host(host); +out_nohost: + lockd_down(nlm_init->net); + return ERR_PTR(-ENOLCK); } EXPORT_SYMBOL_GPL(nlmclnt_init); diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index 9760ecb9b60f..acd394716349 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -125,14 +125,15 @@ static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_args *argp = &req->a_args; struct nlm_lock *lock = &argp->lock; + char *nodename = req->a_host->h_rpcclnt->cl_nodename; nlmclnt_next_cookie(&argp->cookie); memcpy(&lock->fh, NFS_FH(file_inode(fl->fl_file)), sizeof(struct nfs_fh)); - lock->caller = utsname()->nodename; + lock->caller = nodename; lock->oh.data = req->a_owner; lock->oh.len = snprintf(req->a_owner, sizeof(req->a_owner), "%u@%s", (unsigned int)fl->fl_u.nfs_fl.owner->pid, - utsname()->nodename); + nodename); lock->svid = fl->fl_u.nfs_fl.owner->pid; lock->fl.fl_start = fl->fl_start; lock->fl.fl_end = fl->fl_end; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index e703318c41df..ffc4045fc62e 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -767,6 +767,7 @@ nlmsvc_grant_blocked(struct nlm_block *block) struct nlm_file *file = block->b_file; struct nlm_lock *lock = &block->b_call->a_args.lock; int error; + loff_t fl_start, fl_end; dprintk("lockd: grant blocked lock %p\n", block); @@ -784,9 +785,16 @@ nlmsvc_grant_blocked(struct nlm_block *block) } /* Try the lock operation again */ + /* vfs_lock_file() can mangle fl_start and fl_end, but we need + * them unchanged for the GRANT_MSG + */ lock->fl.fl_flags |= FL_SLEEP; + fl_start = lock->fl.fl_start; + fl_end = lock->fl.fl_end; error = vfs_lock_file(file->f_file, F_SETLK, &lock->fl, NULL); lock->fl.fl_flags &= ~FL_SLEEP; + lock->fl.fl_start = fl_start; + lock->fl.fl_end = fl_end; switch (error) { case 0: @@ -939,6 +947,7 @@ nlmsvc_retry_blocked(void) unsigned long timeout = MAX_SCHEDULE_TIMEOUT; struct nlm_block *block; + spin_lock(&nlm_blocked_lock); while (!list_empty(&nlm_blocked) && !kthread_should_stop()) { block = list_entry(nlm_blocked.next, struct nlm_block, b_list); @@ -948,6 +957,7 @@ nlmsvc_retry_blocked(void) timeout = block->b_when - jiffies; break; } + spin_unlock(&nlm_blocked_lock); dprintk("nlmsvc_retry_blocked(%p, when=%ld)\n", block, block->b_when); @@ -957,7 +967,9 @@ nlmsvc_retry_blocked(void) retry_deferred_block(block); } else nlmsvc_grant_blocked(block); + spin_lock(&nlm_blocked_lock); } + spin_unlock(&nlm_blocked_lock); return timeout; } diff --git a/fs/mount.h b/fs/mount.h index 64a858143ff9..68d80bdcd081 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -73,7 +73,7 @@ static inline int mnt_has_parent(struct mount *mnt) static inline int is_mounted(struct vfsmount *mnt) { /* neither detached nor internal? */ - return !IS_ERR_OR_NULL(real_mount(mnt)); + return !IS_ERR_OR_NULL(real_mount(mnt)->mnt_ns); } extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *, int); diff --git a/fs/namei.c b/fs/namei.c index 9ed9361223c0..cccaf77e76c5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2263,6 +2263,7 @@ static int may_delete(struct inode *dir,struct dentry *victim,int isdir) */ static inline int may_create(struct inode *dir, struct dentry *child) { + audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE); if (child->d_inode) return -EEXIST; if (IS_DEADDIR(dir)) diff --git a/fs/namespace.c b/fs/namespace.c index 7b1ca9ba0b0a..a45ba4f267fe 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1429,7 +1429,7 @@ struct vfsmount *collect_mounts(struct path *path) CL_COPY_ALL | CL_PRIVATE); namespace_unlock(); if (IS_ERR(tree)) - return NULL; + return ERR_CAST(tree); return &tree->mnt; } diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c index 9c3e117c3ed1..4d0161442565 100644 --- a/fs/nfs/blocklayout/extents.c +++ b/fs/nfs/blocklayout/extents.c @@ -44,7 +44,7 @@ static inline sector_t normalize(sector_t s, int base) { sector_t tmp = s; /* Since do_div modifies its argument */ - return s - do_div(tmp, base); + return s - sector_div(tmp, base); } static inline sector_t normalize_up(sector_t s, int base) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 4cbad5d6b276..02773aab43c5 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -240,13 +240,11 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp, error = nfs4_discover_server_trunking(clp, &old); if (error < 0) goto error; - nfs_put_client(clp); - if (clp != old) { - clp->cl_preserve_clid = true; - clp = old; - } - return clp; + if (clp != old) + clp->cl_preserve_clid = true; + nfs_put_client(clp); + return old; error: nfs_mark_client_ready(clp, error); @@ -324,9 +322,10 @@ int nfs40_walk_client_list(struct nfs_client *new, prev = pos; status = nfs_wait_client_init_complete(pos); - spin_lock(&nn->nfs_client_lock); if (status < 0) - continue; + goto out; + status = -NFS4ERR_STALE_CLIENTID; + spin_lock(&nn->nfs_client_lock); } if (pos->cl_cons_state != NFS_CS_READY) continue; @@ -464,7 +463,8 @@ int nfs41_walk_client_list(struct nfs_client *new, } spin_lock(&nn->nfs_client_lock); if (status < 0) - continue; + break; + status = -NFS4ERR_STALE_CLIENTID; } if (pos->cl_cons_state != NFS_CS_READY) continue; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 661a0f611215..678cb8964532 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -797,34 +797,34 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); - - if (filelayout_test_devid_unavailable(devid)) - return NULL; + struct nfs4_pnfs_ds *ret = ds; if (ds == NULL) { printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", __func__, ds_idx); filelayout_mark_devid_invalid(devid); - return NULL; + goto out; } if (ds->ds_clp) - return ds; + goto out_test_devid; if (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) == 0) { struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode); int err; err = nfs4_ds_connect(s, ds); - if (err) { + if (err) nfs4_mark_deviceid_unavailable(devid); - ds = NULL; - } nfs4_clear_ds_conn_bit(ds); } else { /* Either ds is connected, or ds is NULL */ nfs4_wait_ds_connect(ds); } - return ds; +out_test_devid: + if (filelayout_test_devid_unavailable(devid)) + ret = NULL; +out: + return ret; } module_param(dataserver_retrans, uint, 0644); diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index d7ba5616989c..26e71bdb5b33 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1160,29 +1160,24 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data) int ret; if (!data->rpc_done) { - ret = data->rpc_status; - goto err; + if (data->rpc_status) { + ret = data->rpc_status; + goto err; + } + /* cached opens have already been processed */ + goto update; } - ret = -ESTALE; - if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) || - !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) || - !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE)) - goto err; - - ret = -ENOMEM; - state = nfs4_get_open_state(inode, data->owner); - if (state == NULL) - goto err; - ret = nfs_refresh_inode(inode, &data->f_attr); if (ret) goto err; if (data->o_res.delegation_type != 0) nfs4_opendata_check_deleg(data, state); +update: update_open_stateid(state, &data->o_res.stateid, NULL, data->o_arg.fmode); + atomic_inc(&state->count); return state; err: @@ -4227,8 +4222,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, dprintk("%s ERROR %d, Reset session\n", __func__, task->tk_status); nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); - task->tk_status = 0; - return -EAGAIN; + goto wait_on_recovery; #endif /* CONFIG_NFS_V4_1 */ case -NFS4ERR_DELAY: nfs_inc_server_stats(server, NFSIOS_DELAY); @@ -4406,11 +4400,17 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata) return; switch (task->tk_status) { - case -NFS4ERR_STALE_STATEID: - case -NFS4ERR_EXPIRED: case 0: renew_lease(data->res.server, data->timestamp); break; + case -NFS4ERR_ADMIN_REVOKED: + case -NFS4ERR_DELEG_REVOKED: + case -NFS4ERR_BAD_STATEID: + case -NFS4ERR_OLD_STATEID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + task->tk_status = 0; + break; default: if (nfs4_async_handle_error(task, data->res.server, NULL) == -EAGAIN) { @@ -4572,6 +4572,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock status = 0; } request->fl_ops->fl_release_private(request); + request->fl_ops = NULL; out: return status; } @@ -6231,9 +6232,9 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) struct nfs_server *server = NFS_SERVER(inode); struct pnfs_layout_hdr *lo; struct nfs4_state *state = NULL; - unsigned long timeo, giveup; + unsigned long timeo, now, giveup; - dprintk("--> %s\n", __func__); + dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status); if (!nfs41_sequence_done(task, &lgp->res.seq_res)) goto out; @@ -6241,12 +6242,38 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: goto out; + /* + * NFS4ERR_LAYOUTTRYLATER is a conflict with another client + * (or clients) writing to the same RAID stripe + */ case -NFS4ERR_LAYOUTTRYLATER: + /* + * NFS4ERR_RECALLCONFLICT is when conflict with self (must recall + * existing layout before getting a new one). + */ case -NFS4ERR_RECALLCONFLICT: timeo = rpc_get_timeout(task->tk_client); giveup = lgp->args.timestamp + timeo; - if (time_after(giveup, jiffies)) - task->tk_status = -NFS4ERR_DELAY; + now = jiffies; + if (time_after(giveup, now)) { + unsigned long delay; + + /* Delay for: + * - Not less then NFS4_POLL_RETRY_MIN. + * - One last time a jiffie before we give up + * - exponential backoff (time_now minus start_attempt) + */ + delay = max_t(unsigned long, NFS4_POLL_RETRY_MIN, + min((giveup - now - 1), + now - lgp->args.timestamp)); + + dprintk("%s: NFS4ERR_RECALLCONFLICT waiting %lu\n", + __func__, delay); + rpc_delay(task, delay); + task->tk_status = 0; + rpc_restart_call_prepare(task); + goto out; /* Do not call nfs4_async_handle_error() */ + } break; case -NFS4ERR_EXPIRED: case -NFS4ERR_BAD_STATEID: @@ -6682,7 +6709,7 @@ nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, switch (err) { case 0: case -NFS4ERR_WRONGSEC: - case -NFS4ERR_NOTSUPP: + case -ENOTSUPP: goto out; default: err = nfs4_handle_exception(server, err, &exception); @@ -6714,7 +6741,7 @@ nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, * Fall back on "guess and check" method if * the server doesn't support SECINFO_NO_NAME */ - if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { + if (err == -NFS4ERR_WRONGSEC || err == -ENOTSUPP) { err = nfs4_find_root_sec(server, fhandle, info); goto out_freepage; } diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 1fab140764c4..2c37442ed936 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -228,19 +228,8 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp) return status; } -/* - * Back channel returns NFS4ERR_DELAY for new requests when - * NFS4_SESSION_DRAINING is set so there is no work to be done when draining - * is ended. - */ -static void nfs4_end_drain_session(struct nfs_client *clp) +static void nfs4_end_drain_slot_table(struct nfs4_slot_table *tbl) { - struct nfs4_session *ses = clp->cl_session; - struct nfs4_slot_table *tbl; - - if (ses == NULL) - return; - tbl = &ses->fc_slot_table; if (test_and_clear_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) { spin_lock(&tbl->slot_tbl_lock); nfs41_wake_slot_table(tbl); @@ -248,6 +237,16 @@ static void nfs4_end_drain_session(struct nfs_client *clp) } } +static void nfs4_end_drain_session(struct nfs_client *clp) +{ + struct nfs4_session *ses = clp->cl_session; + + if (ses != NULL) { + nfs4_end_drain_slot_table(&ses->bc_slot_table); + nfs4_end_drain_slot_table(&ses->fc_slot_table); + } +} + /* * Signal state manager thread if session fore channel is drained */ diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 4be8d135ed61..988efb4caac0 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3002,7 +3002,8 @@ out_overflow: return -EIO; } -static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected, + int *nfs_retval) { __be32 *p; uint32_t opnum; @@ -3012,19 +3013,32 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) if (unlikely(!p)) goto out_overflow; opnum = be32_to_cpup(p++); - if (opnum != expected) { - dprintk("nfs: Server returned operation" - " %d but we issued a request for %d\n", - opnum, expected); - return -EIO; - } + if (unlikely(opnum != expected)) + goto out_bad_operation; nfserr = be32_to_cpup(p); - if (nfserr != NFS_OK) - return nfs4_stat_to_errno(nfserr); - return 0; + if (nfserr == NFS_OK) + *nfs_retval = 0; + else + *nfs_retval = nfs4_stat_to_errno(nfserr); + return true; +out_bad_operation: + dprintk("nfs: Server returned operation" + " %d but we issued a request for %d\n", + opnum, expected); + *nfs_retval = -EREMOTEIO; + return false; out_overflow: print_overflow_msg(__func__, xdr); - return -EIO; + *nfs_retval = -EIO; + return false; +} + +static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected) +{ + int retval; + + __decode_op_hdr(xdr, expected, &retval); + return retval; } /* Dummy routine */ @@ -4842,11 +4856,12 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res) uint32_t savewords, bmlen, i; int status; - status = decode_op_hdr(xdr, OP_OPEN); - if (status != -EIO) - nfs_increment_open_seqid(status, res->seqid); - if (!status) - status = decode_stateid(xdr, &res->stateid); + if (!__decode_op_hdr(xdr, OP_OPEN, &status)) + return status; + nfs_increment_open_seqid(status, res->seqid); + if (status) + return status; + status = decode_stateid(xdr, &res->stateid); if (unlikely(status)) return status; diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index 5f38ea36e266..af51cf9bf2e3 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -536,16 +536,12 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) if (err) goto out3; exp.ex_anon_uid= make_kuid(&init_user_ns, an_int); - if (!uid_valid(exp.ex_anon_uid)) - goto out3; /* anon gid */ err = get_int(&mesg, &an_int); if (err) goto out3; exp.ex_anon_gid= make_kgid(&init_user_ns, an_int); - if (!gid_valid(exp.ex_anon_gid)) - goto out3; /* fsid */ err = get_int(&mesg, &an_int); @@ -583,6 +579,17 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen) exp.ex_uuid); if (err) goto out4; + /* + * For some reason exportfs has been passing down an + * invalid (-1) uid & gid on the "dummy" export which it + * uses to test export support. To make sure exportfs + * sees errors from check_export we therefore need to + * delay these checks till after check_export: + */ + if (!uid_valid(exp.ex_anon_uid)) + goto out4; + if (!gid_valid(exp.ex_anon_gid)) + goto out4; } expp = svc_export_lookup(&exp); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 6cd86e0fe450..582321a978b0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -162,8 +162,8 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ memcpy(p, argp->p, avail); /* step to next page */ - argp->p = page_address(argp->pagelist[0]); argp->pagelist++; + argp->p = page_address(argp->pagelist[0]); if (argp->pagelen < PAGE_SIZE) { argp->end = argp->p + (argp->pagelen>>2); argp->pagelen = 0; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index e76244edd748..ec8d97ddc635 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -129,6 +129,13 @@ nfsd_reply_cache_alloc(void) } static void +nfsd_reply_cache_unhash(struct svc_cacherep *rp) +{ + hlist_del_init(&rp->c_hash); + list_del_init(&rp->c_lru); +} + +static void nfsd_reply_cache_free_locked(struct svc_cacherep *rp) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { @@ -403,7 +410,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru); if (nfsd_cache_entry_expired(rp) || num_drc_entries >= max_drc_entries) { - lru_put_end(rp); + nfsd_reply_cache_unhash(rp); prune_cache_entries(); goto search_cache; } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 84ce601d8063..62fd6616801d 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -297,41 +297,12 @@ commit_metadata(struct svc_fh *fhp) } /* - * Set various file attributes. - * N.B. After this call fhp needs an fh_put + * Go over the attributes and take care of the small differences between + * NFS semantics and what Linux expects. */ -__be32 -nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, - int check_guard, time_t guardtime) +static void +nfsd_sanitize_attrs(struct inode *inode, struct iattr *iap) { - struct dentry *dentry; - struct inode *inode; - int accmode = NFSD_MAY_SATTR; - umode_t ftype = 0; - __be32 err; - int host_err; - int size_change = 0; - - if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) - accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; - if (iap->ia_valid & ATTR_SIZE) - ftype = S_IFREG; - - /* Get inode */ - err = fh_verify(rqstp, fhp, ftype, accmode); - if (err) - goto out; - - dentry = fhp->fh_dentry; - inode = dentry->d_inode; - - /* Ignore any mode updates on symlinks */ - if (S_ISLNK(inode->i_mode)) - iap->ia_valid &= ~ATTR_MODE; - - if (!iap->ia_valid) - goto out; - /* * NFSv2 does not differentiate between "set-[ac]time-to-now" * which only requires access, and "set-[ac]time-to-X" which @@ -341,8 +312,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, * convert to "set to now" instead of "set to explicit time" * * We only call inode_change_ok as the last test as technically - * it is not an interface that we should be using. It is only - * valid if the filesystem does not define it's own i_op->setattr. + * it is not an interface that we should be using. */ #define BOTH_TIME_SET (ATTR_ATIME_SET | ATTR_MTIME_SET) #define MAX_TOUCH_TIME_ERROR (30*60) @@ -368,30 +338,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid &= ~BOTH_TIME_SET; } } - - /* - * The size case is special. - * It changes the file as well as the attributes. - */ - if (iap->ia_valid & ATTR_SIZE) { - if (iap->ia_size < inode->i_size) { - err = nfsd_permission(rqstp, fhp->fh_export, dentry, - NFSD_MAY_TRUNC|NFSD_MAY_OWNER_OVERRIDE); - if (err) - goto out; - } - - host_err = get_write_access(inode); - if (host_err) - goto out_nfserr; - - size_change = 1; - host_err = locks_verify_truncate(inode, NULL, iap->ia_size); - if (host_err) { - put_write_access(inode); - goto out_nfserr; - } - } /* sanitize the mode change */ if (iap->ia_valid & ATTR_MODE) { @@ -414,32 +360,111 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, iap->ia_valid |= (ATTR_KILL_SUID | ATTR_KILL_SGID); } } +} - /* Change the attributes. */ +static __be32 +nfsd_get_write_access(struct svc_rqst *rqstp, struct svc_fh *fhp, + struct iattr *iap) +{ + struct inode *inode = fhp->fh_dentry->d_inode; + int host_err; - iap->ia_valid |= ATTR_CTIME; + if (iap->ia_size < inode->i_size) { + __be32 err; - err = nfserr_notsync; - if (!check_guard || guardtime == inode->i_ctime.tv_sec) { - host_err = nfsd_break_lease(inode); - if (host_err) - goto out_nfserr; - fh_lock(fhp); + err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry, + NFSD_MAY_TRUNC | NFSD_MAY_OWNER_OVERRIDE); + if (err) + return err; + } - host_err = notify_change(dentry, iap); - err = nfserrno(host_err); - fh_unlock(fhp); + host_err = get_write_access(inode); + if (host_err) + goto out_nfserrno; + + host_err = locks_verify_truncate(inode, NULL, iap->ia_size); + if (host_err) + goto out_put_write_access; + return 0; + +out_put_write_access: + put_write_access(inode); +out_nfserrno: + return nfserrno(host_err); +} + +/* + * Set various file attributes. After this call fhp needs an fh_put. + */ +__be32 +nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap, + int check_guard, time_t guardtime) +{ + struct dentry *dentry; + struct inode *inode; + int accmode = NFSD_MAY_SATTR; + umode_t ftype = 0; + __be32 err; + int host_err; + int size_change = 0; + + if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE)) + accmode |= NFSD_MAY_WRITE|NFSD_MAY_OWNER_OVERRIDE; + if (iap->ia_valid & ATTR_SIZE) + ftype = S_IFREG; + + /* Get inode */ + err = fh_verify(rqstp, fhp, ftype, accmode); + if (err) + goto out; + + dentry = fhp->fh_dentry; + inode = dentry->d_inode; + + /* Ignore any mode updates on symlinks */ + if (S_ISLNK(inode->i_mode)) + iap->ia_valid &= ~ATTR_MODE; + + if (!iap->ia_valid) + goto out; + + nfsd_sanitize_attrs(inode, iap); + + /* + * The size case is special, it changes the file in addition to the + * attributes. + */ + if (iap->ia_valid & ATTR_SIZE) { + err = nfsd_get_write_access(rqstp, fhp, iap); + if (err) + goto out; + size_change = 1; } + + iap->ia_valid |= ATTR_CTIME; + + if (check_guard && guardtime != inode->i_ctime.tv_sec) { + err = nfserr_notsync; + goto out_put_write_access; + } + + host_err = nfsd_break_lease(inode); + if (host_err) + goto out_put_write_access_nfserror; + + fh_lock(fhp); + host_err = notify_change(dentry, iap); + fh_unlock(fhp); + +out_put_write_access_nfserror: + err = nfserrno(host_err); +out_put_write_access: if (size_change) put_write_access(inode); if (!err) commit_metadata(fhp); out: return err; - -out_nfserr: - err = nfserrno(host_err); - goto out; } #if defined(CONFIG_NFSD_V2_ACL) || \ @@ -802,9 +827,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, flags = O_WRONLY|O_LARGEFILE; } *filp = dentry_open(&path, flags, current_cred()); - if (IS_ERR(*filp)) + if (IS_ERR(*filp)) { host_err = PTR_ERR(*filp); - else { + *filp = NULL; + } else { host_err = ima_file_check(*filp, may_flags); if (may_flags & NFSD_MAY_64BIT_COOKIE) diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c index 0ba679866e50..da276640f776 100644 --- a/fs/nilfs2/page.c +++ b/fs/nilfs2/page.c @@ -94,6 +94,7 @@ void nilfs_forget_buffer(struct buffer_head *bh) clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); clear_buffer_nilfs_redirected(bh); + clear_buffer_async_write(bh); clear_buffer_dirty(bh); if (nilfs_page_buffers_clean(page)) __nilfs_clear_page_dirty(page); @@ -429,6 +430,7 @@ void nilfs_clear_dirty_page(struct page *page, bool silent) "discard block %llu, size %zu", (u64)bh->b_blocknr, bh->b_size); } + clear_buffer_async_write(bh); clear_buffer_dirty(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_checked(bh); diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c index dc9a913784ab..2d8be51f90dc 100644 --- a/fs/nilfs2/segbuf.c +++ b/fs/nilfs2/segbuf.c @@ -345,8 +345,7 @@ static void nilfs_end_bio_write(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - bio_put(bio); - /* to be detected by submit_seg_bio() */ + /* to be detected by nilfs_segbuf_submit_bio() */ } if (!uptodate) @@ -377,12 +376,12 @@ static int nilfs_segbuf_submit_bio(struct nilfs_segment_buffer *segbuf, bio->bi_private = segbuf; bio_get(bio); submit_bio(mode, bio); + segbuf->sb_nbio++; if (bio_flagged(bio, BIO_EOPNOTSUPP)) { bio_put(bio); err = -EOPNOTSUPP; goto failed; } - segbuf->sb_nbio++; bio_put(bio); wi->bio = NULL; diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index a5752a589932..958a5b57ed4a 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -665,7 +665,7 @@ static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, bh = head = page_buffers(page); do { - if (!buffer_dirty(bh)) + if (!buffer_dirty(bh) || buffer_async_write(bh)) continue; get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -699,7 +699,8 @@ static void nilfs_lookup_dirty_node_buffers(struct inode *inode, for (i = 0; i < pagevec_count(&pvec); i++) { bh = head = page_buffers(pvec.pages[i]); do { - if (buffer_dirty(bh)) { + if (buffer_dirty(bh) && + !buffer_async_write(bh)) { get_bh(bh); list_add_tail(&bh->b_assoc_buffers, listp); @@ -1439,17 +1440,19 @@ static int nilfs_segctor_collect(struct nilfs_sc_info *sci, nilfs_clear_logs(&sci->sc_segbufs); - err = nilfs_segctor_extend_segments(sci, nilfs, nadd); - if (unlikely(err)) - return err; - if (sci->sc_stage.flags & NILFS_CF_SUFREED) { err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, sci->sc_freesegs, sci->sc_nfreesegs, NULL); WARN_ON(err); /* do not happen */ + sci->sc_stage.flags &= ~NILFS_CF_SUFREED; } + + err = nilfs_segctor_extend_segments(sci, nilfs, nadd); + if (unlikely(err)) + return err; + nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); sci->sc_stage = prev_stage; } @@ -1579,6 +1582,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) { lock_page(bd_page); @@ -1592,6 +1596,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + set_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { lock_page(bd_page); @@ -1677,6 +1682,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(segbuf, logs, sb_list) { list_for_each_entry(bh, &segbuf->sb_segsum_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1686,6 +1692,7 @@ static void nilfs_abort_logs(struct list_head *logs, int err) list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { + clear_buffer_async_write(bh); if (bh == segbuf->sb_super_root) { if (bh->b_page != bd_page) { end_page_writeback(bd_page); @@ -1755,6 +1762,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); if (bh->b_page != bd_page) { if (bd_page) end_page_writeback(bd_page); @@ -1776,6 +1784,7 @@ static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) b_assoc_buffers) { set_buffer_uptodate(bh); clear_buffer_dirty(bh); + clear_buffer_async_write(bh); clear_buffer_delay(bh); clear_buffer_nilfs_volatile(bh); clear_buffer_nilfs_redirected(bh); diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 6c80083a984f..f1680cdbd88b 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -122,6 +122,7 @@ static int fill_event_metadata(struct fsnotify_group *group, metadata->event_len = FAN_EVENT_METADATA_LEN; metadata->metadata_len = FAN_EVENT_METADATA_LEN; metadata->vers = FANOTIFY_METADATA_VERSION; + metadata->reserved = 0; metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS; metadata->pid = pid_vnr(event->tgid); if (unlikely(event->mask & FAN_Q_OVERFLOW)) @@ -866,9 +867,9 @@ COMPAT_SYSCALL_DEFINE6(fanotify_mark, { return sys_fanotify_mark(fanotify_fd, flags, #ifdef __BIG_ENDIAN - ((__u64)mask1 << 32) | mask0, -#else ((__u64)mask0 << 32) | mask1, +#else + ((__u64)mask1 << 32) | mask0, #endif dfd, pathname); } diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c index 2487116d0d33..846064726682 100644 --- a/fs/ocfs2/extent_map.c +++ b/fs/ocfs2/extent_map.c @@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, cpos = map_start >> osb->s_clustersize_bits; mapping_end = ocfs2_clusters_for_bytes(inode->i_sb, map_start + map_len); - mapping_end -= cpos; is_last = 0; while (cpos < mapping_end && !is_last) { u32 fe_flags; diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c index 2e3ea308c144..5b8d94436105 100644 --- a/fs/ocfs2/xattr.c +++ b/fs/ocfs2/xattr.c @@ -6499,6 +6499,16 @@ static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args) } new_oi = OCFS2_I(args->new_inode); + /* + * Adjust extent record count to reserve space for extended attribute. + * Inline data count had been adjusted in ocfs2_duplicate_inline_data(). + */ + if (!(new_oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) && + !(ocfs2_inode_is_fast_symlink(args->new_inode))) { + struct ocfs2_extent_list *el = &new_di->id2.i_list; + le16_add_cpu(&el->l_count, -(inline_size / + sizeof(struct ocfs2_extent_rec))); + } spin_lock(&new_oi->ip_lock); new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL; new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features); diff --git a/fs/pipe.c b/fs/pipe.c index d2c45e14e6d8..0e0752ef2715 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -726,11 +726,25 @@ pipe_poll(struct file *filp, poll_table *wait) return mask; } +static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) +{ + int kill = 0; + + spin_lock(&inode->i_lock); + if (!--pipe->files) { + inode->i_pipe = NULL; + kill = 1; + } + spin_unlock(&inode->i_lock); + + if (kill) + free_pipe_info(pipe); +} + static int pipe_release(struct inode *inode, struct file *file) { - struct pipe_inode_info *pipe = inode->i_pipe; - int kill = 0; + struct pipe_inode_info *pipe = file->private_data; __pipe_lock(pipe); if (file->f_mode & FMODE_READ) @@ -743,17 +757,9 @@ pipe_release(struct inode *inode, struct file *file) kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); } - spin_lock(&inode->i_lock); - if (!--pipe->files) { - inode->i_pipe = NULL; - kill = 1; - } - spin_unlock(&inode->i_lock); __pipe_unlock(pipe); - if (kill) - free_pipe_info(pipe); - + put_pipe_info(inode, pipe); return 0; } @@ -1014,7 +1020,6 @@ static int fifo_open(struct inode *inode, struct file *filp) { struct pipe_inode_info *pipe; bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; - int kill = 0; int ret; filp->f_version = 0; @@ -1130,15 +1135,9 @@ err_wr: goto err; err: - spin_lock(&inode->i_lock); - if (!--pipe->files) { - inode->i_pipe = NULL; - kill = 1; - } - spin_unlock(&inode->i_lock); __pipe_unlock(pipe); - if (kill) - free_pipe_info(pipe); + + put_pipe_info(inode, pipe); return ret; } diff --git a/fs/proc/root.c b/fs/proc/root.c index 41a6ea93f486..04ec276c7bab 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -110,7 +110,8 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, ns = task_active_pid_ns(current); options = data; - if (!current_user_ns()->may_mount_proc) + if (!current_user_ns()->may_mount_proc || + !ns_capable(ns->user_ns, CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3e636d864d56..65fc60a07c47 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -792,14 +792,14 @@ typedef struct { } pagemap_entry_t; struct pagemapread { - int pos, len; + int pos, len; /* units: PM_ENTRY_BYTES, not bytes */ pagemap_entry_t *buffer; }; #define PAGEMAP_WALK_SIZE (PMD_SIZE) #define PAGEMAP_WALK_MASK (PMD_MASK) -#define PM_ENTRY_BYTES sizeof(u64) +#define PM_ENTRY_BYTES sizeof(pagemap_entry_t) #define PM_STATUS_BITS 3 #define PM_STATUS_OFFSET (64 - PM_STATUS_BITS) #define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET) @@ -1038,8 +1038,8 @@ static ssize_t pagemap_read(struct file *file, char __user *buf, if (!count) goto out_task; - pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); - pm.buffer = kmalloc(pm.len, GFP_TEMPORARY); + pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT); + pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY); ret = -ENOMEM; if (!pm.buffer) goto out_task; diff --git a/fs/read_write.c b/fs/read_write.c index 2cefa417be34..f6b7c600eb7f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -947,9 +947,9 @@ out: return ret; } -COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd, +COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen) + compat_ulong_t, vlen) { struct fd f = fdget(fd); ssize_t ret; @@ -983,9 +983,9 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, return ret; } -COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd, +COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen, u32, pos_low, u32, pos_high) + compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; return compat_sys_preadv64(fd, vec, vlen, pos); @@ -1013,9 +1013,9 @@ out: return ret; } -COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd, +COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, const struct compat_iovec __user *, vec, - unsigned long, vlen) + compat_ulong_t, vlen) { struct fd f = fdget(fd); ssize_t ret; @@ -1049,9 +1049,9 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, return ret; } -COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd, +COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, const struct compat_iovec __user *,vec, - unsigned long, vlen, u32, pos_low, u32, pos_high) + compat_ulong_t, vlen, u32, pos_low, u32, pos_high) { loff_t pos = ((loff_t)pos_high << 32) | pos_low; return compat_sys_pwritev64(fd, vec, vlen, pos); diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c index 33532f79b4f7..1d48974c25dd 100644 --- a/fs/reiserfs/procfs.c +++ b/fs/reiserfs/procfs.c @@ -19,12 +19,13 @@ /* * LOCKING: * - * We rely on new Alexander Viro's super-block locking. + * These guys are evicted from procfs as the very first step in ->kill_sb(). * */ -static int show_version(struct seq_file *m, struct super_block *sb) +static int show_version(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; char *format; if (REISERFS_SB(sb)->s_properties & (1 << REISERFS_3_6)) { @@ -66,8 +67,9 @@ static int show_version(struct seq_file *m, struct super_block *sb) #define DJP( x ) le32_to_cpu( jp -> x ) #define JF( x ) ( r -> s_journal -> x ) -static int show_super(struct seq_file *m, struct super_block *sb) +static int show_super(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); seq_printf(m, "state: \t%s\n" @@ -128,8 +130,9 @@ static int show_super(struct seq_file *m, struct super_block *sb) return 0; } -static int show_per_level(struct seq_file *m, struct super_block *sb) +static int show_per_level(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); int level; @@ -186,8 +189,9 @@ static int show_per_level(struct seq_file *m, struct super_block *sb) return 0; } -static int show_bitmap(struct seq_file *m, struct super_block *sb) +static int show_bitmap(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); seq_printf(m, "free_block: %lu\n" @@ -218,8 +222,9 @@ static int show_bitmap(struct seq_file *m, struct super_block *sb) return 0; } -static int show_on_disk_super(struct seq_file *m, struct super_block *sb) +static int show_on_disk_super(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); struct reiserfs_super_block *rs = sb_info->s_rs; int hash_code = DFL(s_hash_function_code); @@ -261,8 +266,9 @@ static int show_on_disk_super(struct seq_file *m, struct super_block *sb) return 0; } -static int show_oidmap(struct seq_file *m, struct super_block *sb) +static int show_oidmap(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *sb_info = REISERFS_SB(sb); struct reiserfs_super_block *rs = sb_info->s_rs; unsigned int mapsize = le16_to_cpu(rs->s_v1.s_oid_cursize); @@ -291,8 +297,9 @@ static int show_oidmap(struct seq_file *m, struct super_block *sb) return 0; } -static int show_journal(struct seq_file *m, struct super_block *sb) +static int show_journal(struct seq_file *m, void *unused) { + struct super_block *sb = m->private; struct reiserfs_sb_info *r = REISERFS_SB(sb); struct reiserfs_super_block *rs = r->s_rs; struct journal_params *jp = &rs->s_v1.s_journal; @@ -383,92 +390,24 @@ static int show_journal(struct seq_file *m, struct super_block *sb) return 0; } -/* iterator */ -static int test_sb(struct super_block *sb, void *data) -{ - return data == sb; -} - -static int set_sb(struct super_block *sb, void *data) -{ - return -ENOENT; -} - -struct reiserfs_seq_private { - struct super_block *sb; - int (*show) (struct seq_file *, struct super_block *); -}; - -static void *r_start(struct seq_file *m, loff_t * pos) -{ - struct reiserfs_seq_private *priv = m->private; - loff_t l = *pos; - - if (l) - return NULL; - - if (IS_ERR(sget(&reiserfs_fs_type, test_sb, set_sb, 0, priv->sb))) - return NULL; - - up_write(&priv->sb->s_umount); - return priv->sb; -} - -static void *r_next(struct seq_file *m, void *v, loff_t * pos) -{ - ++*pos; - if (v) - deactivate_super(v); - return NULL; -} - -static void r_stop(struct seq_file *m, void *v) -{ - if (v) - deactivate_super(v); -} - -static int r_show(struct seq_file *m, void *v) -{ - struct reiserfs_seq_private *priv = m->private; - return priv->show(m, v); -} - -static const struct seq_operations r_ops = { - .start = r_start, - .next = r_next, - .stop = r_stop, - .show = r_show, -}; - static int r_open(struct inode *inode, struct file *file) { - struct reiserfs_seq_private *priv; - int ret = seq_open_private(file, &r_ops, - sizeof(struct reiserfs_seq_private)); - - if (!ret) { - struct seq_file *m = file->private_data; - priv = m->private; - priv->sb = proc_get_parent_data(inode); - priv->show = PDE_DATA(inode); - } - return ret; + return single_open(file, PDE_DATA(inode), + proc_get_parent_data(inode)); } static const struct file_operations r_file_operations = { .open = r_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_private, - .owner = THIS_MODULE, + .release = single_release, }; static struct proc_dir_entry *proc_info_root = NULL; static const char proc_info_root_name[] = "fs/reiserfs"; static void add_file(struct super_block *sb, char *name, - int (*func) (struct seq_file *, struct super_block *)) + int (*func) (struct seq_file *, void *)) { proc_create_data(name, 0, REISERFS_SB(sb)->procdir, &r_file_operations, func); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index f8a23c3078f8..e2e202a07b31 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -499,6 +499,7 @@ int remove_save_link(struct inode *inode, int truncate) static void reiserfs_kill_sb(struct super_block *s) { if (REISERFS_SB(s)) { + reiserfs_proc_info_done(s); /* * Force any pending inode evictions to occur now. Any * inodes to be removed that have extended attributes @@ -554,8 +555,6 @@ static void reiserfs_put_super(struct super_block *s) REISERFS_SB(s)->reserved_blocks); } - reiserfs_proc_info_done(s); - reiserfs_write_unlock(s); mutex_destroy(&REISERFS_SB(s)->lock); kfree(s->s_fs_info); diff --git a/fs/seq_file.c b/fs/seq_file.c index 774c1eb7f1c9..3dd44db1465e 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -328,6 +328,8 @@ loff_t seq_lseek(struct file *file, loff_t offset, int whence) m->read_pos = offset; retval = file->f_pos = offset; } + } else { + file->f_pos = offset; } } file->f_version = m->version; diff --git a/fs/splice.c b/fs/splice.c index d37431dd60a1..4b5a5fac3383 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -555,6 +555,24 @@ static const struct pipe_buf_operations default_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return 1; +} + +/* Pipe buffer operations for a socket and similar. */ +const struct pipe_buf_operations nosteal_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = generic_pipe_buf_release, + .steal = generic_pipe_buf_nosteal, + .get = generic_pipe_buf_get, +}; +EXPORT_SYMBOL(nosteal_pipe_buf_ops); + static ssize_t kernel_readv(struct file *file, const struct iovec *vec, unsigned long vlen, loff_t offset) { diff --git a/fs/statfs.c b/fs/statfs.c index c219e733f553..083dc0ac9140 100644 --- a/fs/statfs.c +++ b/fs/statfs.c @@ -94,7 +94,7 @@ retry: int fd_statfs(int fd, struct kstatfs *st) { - struct fd f = fdget(fd); + struct fd f = fdget_raw(fd); int error = -EBADF; if (f.file) { error = vfs_statfs(&f.file->f_path, st); diff --git a/fs/super.c b/fs/super.c index 7465d4364208..68307c029228 100644 --- a/fs/super.c +++ b/fs/super.c @@ -336,19 +336,19 @@ EXPORT_SYMBOL(deactivate_super); * and want to turn it into a full-blown active reference. grab_super() * is called with sb_lock held and drops it. Returns 1 in case of * success, 0 if we had failed (superblock contents was already dead or - * dying when grab_super() had been called). + * dying when grab_super() had been called). Note that this is only + * called for superblocks not in rundown mode (== ones still on ->fs_supers + * of their type), so increment of ->s_count is OK here. */ static int grab_super(struct super_block *s) __releases(sb_lock) { - if (atomic_inc_not_zero(&s->s_active)) { - spin_unlock(&sb_lock); - return 1; - } - /* it's going away */ s->s_count++; spin_unlock(&sb_lock); - /* wait for it to die */ down_write(&s->s_umount); + if ((s->s_flags & MS_BORN) && atomic_inc_not_zero(&s->s_active)) { + put_super(s); + return 1; + } up_write(&s->s_umount); put_super(s); return 0; @@ -463,11 +463,6 @@ retry: destroy_super(s); s = NULL; } - down_write(&old->s_umount); - if (unlikely(!(old->s_flags & MS_BORN))) { - deactivate_locked_super(old); - goto retry; - } return old; } } @@ -660,10 +655,10 @@ restart: if (hlist_unhashed(&sb->s_instances)) continue; if (sb->s_bdev == bdev) { - if (grab_super(sb)) /* drops sb_lock */ - return sb; - else + if (!grab_super(sb)) goto restart; + up_write(&sb->s_umount); + return sb; } } spin_unlock(&sb_lock); diff --git a/fs/sysv/super.c b/fs/sysv/super.c index d0c6a007ce83..eda10959714f 100644 --- a/fs/sysv/super.c +++ b/fs/sysv/super.c @@ -487,6 +487,7 @@ static int v7_fill_super(struct super_block *sb, void *data, int silent) sbi->s_sb = sb; sbi->s_block_base = 0; sbi->s_type = FSTYPE_V7; + mutex_init(&sbi->s_lock); sb->s_fs_info = sbi; sb_set_blocksize(sb, 512); diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index f21acf0ef01f..879b9976c12b 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1412,7 +1412,7 @@ static int mount_ubifs(struct ubifs_info *c) ubifs_msg("mounted UBI device %d, volume %d, name \"%s\"%s", c->vi.ubi_num, c->vi.vol_id, c->vi.name, - c->ro_mount ? ", R/O mode" : NULL); + c->ro_mount ? ", R/O mode" : ""); x = (long long)c->main_lebs * c->leb_size; y = (long long)c->log_lebs * c->leb_size + c->max_bud_bytes; ubifs_msg("LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes", diff --git a/fs/udf/super.c b/fs/udf/super.c index 9ac4057a86c9..839a2bad7f45 100644 --- a/fs/udf/super.c +++ b/fs/udf/super.c @@ -630,6 +630,12 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) struct udf_sb_info *sbi = UDF_SB(sb); int error = 0; + if (sbi->s_lvid_bh) { + int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); + if (write_rev > UDF_MAX_WRITE_VERSION && !(*flags & MS_RDONLY)) + return -EACCES; + } + uopt.flags = sbi->s_flags; uopt.uid = sbi->s_uid; uopt.gid = sbi->s_gid; @@ -649,12 +655,6 @@ static int udf_remount_fs(struct super_block *sb, int *flags, char *options) sbi->s_dmode = uopt.dmode; write_unlock(&sbi->s_cred_lock); - if (sbi->s_lvid_bh) { - int write_rev = le16_to_cpu(udf_sb_lvidiu(sbi)->minUDFWriteRev); - if (write_rev > UDF_MAX_WRITE_VERSION) - *flags |= MS_RDONLY; - } - if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) goto out_unlock; @@ -843,27 +843,38 @@ static int udf_find_fileset(struct super_block *sb, return 1; } +/* + * Load primary Volume Descriptor Sequence + * + * Return <0 on error, 0 on success. -EAGAIN is special meaning next sequence + * should be tried. + */ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) { struct primaryVolDesc *pvoldesc; struct ustr *instr, *outstr; struct buffer_head *bh; uint16_t ident; - int ret = 1; + int ret = -ENOMEM; instr = kmalloc(sizeof(struct ustr), GFP_NOFS); if (!instr) - return 1; + return -ENOMEM; outstr = kmalloc(sizeof(struct ustr), GFP_NOFS); if (!outstr) goto out1; bh = udf_read_tagged(sb, block, block, &ident); - if (!bh) + if (!bh) { + ret = -EAGAIN; goto out2; + } - BUG_ON(ident != TAG_IDENT_PVD); + if (ident != TAG_IDENT_PVD) { + ret = -EIO; + goto out_bh; + } pvoldesc = (struct primaryVolDesc *)bh->b_data; @@ -889,8 +900,9 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block) if (udf_CS0toUTF8(outstr, instr)) udf_debug("volSetIdent[] = '%s'\n", outstr->u_name); - brelse(bh); ret = 0; +out_bh: + brelse(bh); out2: kfree(outstr); out1: @@ -947,7 +959,7 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) if (mdata->s_mirror_fe == NULL) { udf_err(sb, "Both metadata and mirror metadata inode efe can not found\n"); - goto error_exit; + return -EIO; } } @@ -964,23 +976,18 @@ static int udf_load_metadata_files(struct super_block *sb, int partition) addr.logicalBlockNum, addr.partitionReferenceNum); mdata->s_bitmap_fe = udf_iget(sb, &addr); - if (mdata->s_bitmap_fe == NULL) { if (sb->s_flags & MS_RDONLY) udf_warn(sb, "bitmap inode efe not found but it's ok since the disc is mounted read-only\n"); else { udf_err(sb, "bitmap inode efe not found and attempted read-write mount\n"); - goto error_exit; + return -EIO; } } } udf_debug("udf_load_metadata_files Ok\n"); - return 0; - -error_exit: - return 1; } static void udf_load_fileset(struct super_block *sb, struct buffer_head *bh, @@ -1069,7 +1076,7 @@ static int udf_fill_partdesc_info(struct super_block *sb, if (!map->s_uspace.s_table) { udf_debug("cannot load unallocSpaceTable (part %d)\n", p_index); - return 1; + return -EIO; } map->s_partition_flags |= UDF_PART_FLAG_UNALLOC_TABLE; udf_debug("unallocSpaceTable (part %d) @ %ld\n", @@ -1079,7 +1086,7 @@ static int udf_fill_partdesc_info(struct super_block *sb, if (phd->unallocSpaceBitmap.extLength) { struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index); if (!bitmap) - return 1; + return -ENOMEM; map->s_uspace.s_bitmap = bitmap; bitmap->s_extPosition = le32_to_cpu( phd->unallocSpaceBitmap.extPosition); @@ -1102,7 +1109,7 @@ static int udf_fill_partdesc_info(struct super_block *sb, if (!map->s_fspace.s_table) { udf_debug("cannot load freedSpaceTable (part %d)\n", p_index); - return 1; + return -EIO; } map->s_partition_flags |= UDF_PART_FLAG_FREED_TABLE; @@ -1113,7 +1120,7 @@ static int udf_fill_partdesc_info(struct super_block *sb, if (phd->freedSpaceBitmap.extLength) { struct udf_bitmap *bitmap = udf_sb_alloc_bitmap(sb, p_index); if (!bitmap) - return 1; + return -ENOMEM; map->s_fspace.s_bitmap = bitmap; bitmap->s_extPosition = le32_to_cpu( phd->freedSpaceBitmap.extPosition); @@ -1165,7 +1172,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) udf_find_vat_block(sb, p_index, type1_index, blocks - 1); } if (!sbi->s_vat_inode) - return 1; + return -EIO; if (map->s_partition_type == UDF_VIRTUAL_MAP15) { map->s_type_specific.s_virtual.s_start_offset = 0; @@ -1177,7 +1184,7 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) pos = udf_block_map(sbi->s_vat_inode, 0); bh = sb_bread(sb, pos); if (!bh) - return 1; + return -EIO; vat20 = (struct virtualAllocationTable20 *)bh->b_data; } else { vat20 = (struct virtualAllocationTable20 *) @@ -1195,6 +1202,12 @@ static int udf_load_vat(struct super_block *sb, int p_index, int type1_index) return 0; } +/* + * Load partition descriptor block + * + * Returns <0 on error, 0 on success, -EAGAIN is special - try next descriptor + * sequence. + */ static int udf_load_partdesc(struct super_block *sb, sector_t block) { struct buffer_head *bh; @@ -1204,13 +1217,15 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) int i, type1_idx; uint16_t partitionNumber; uint16_t ident; - int ret = 0; + int ret; bh = udf_read_tagged(sb, block, block, &ident); if (!bh) - return 1; - if (ident != TAG_IDENT_PD) + return -EAGAIN; + if (ident != TAG_IDENT_PD) { + ret = 0; goto out_bh; + } p = (struct partitionDesc *)bh->b_data; partitionNumber = le16_to_cpu(p->partitionNumber); @@ -1229,10 +1244,13 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) if (i >= sbi->s_partitions) { udf_debug("Partition (%d) not found in partition map\n", partitionNumber); + ret = 0; goto out_bh; } ret = udf_fill_partdesc_info(sb, p, i); + if (ret < 0) + goto out_bh; /* * Now rescan for VIRTUAL or METADATA partitions when SPARABLE and @@ -1249,32 +1267,37 @@ static int udf_load_partdesc(struct super_block *sb, sector_t block) break; } - if (i >= sbi->s_partitions) + if (i >= sbi->s_partitions) { + ret = 0; goto out_bh; + } ret = udf_fill_partdesc_info(sb, p, i); - if (ret) + if (ret < 0) goto out_bh; if (map->s_partition_type == UDF_METADATA_MAP25) { ret = udf_load_metadata_files(sb, i); - if (ret) { + if (ret < 0) { udf_err(sb, "error loading MetaData partition map %d\n", i); goto out_bh; } } else { - ret = udf_load_vat(sb, i, type1_idx); - if (ret) - goto out_bh; /* - * Mark filesystem read-only if we have a partition with - * virtual map since we don't handle writing to it (we - * overwrite blocks instead of relocating them). + * If we have a partition with virtual map, we don't handle + * writing to it (we overwrite blocks instead of relocating + * them). */ - sb->s_flags |= MS_RDONLY; - pr_notice("Filesystem marked read-only because writing to pseudooverwrite partition is not implemented\n"); + if (!(sb->s_flags & MS_RDONLY)) { + ret = -EACCES; + goto out_bh; + } + ret = udf_load_vat(sb, i, type1_idx); + if (ret < 0) + goto out_bh; } + ret = 0; out_bh: /* In case loading failed, we handle cleanup in udf_fill_super */ brelse(bh); @@ -1340,11 +1363,11 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, uint16_t ident; struct buffer_head *bh; unsigned int table_len; - int ret = 0; + int ret; bh = udf_read_tagged(sb, block, block, &ident); if (!bh) - return 1; + return -EAGAIN; BUG_ON(ident != TAG_IDENT_LVD); lvd = (struct logicalVolDesc *)bh->b_data; table_len = le32_to_cpu(lvd->mapTableLength); @@ -1352,7 +1375,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, udf_err(sb, "error loading logical volume descriptor: " "Partition table too long (%u > %lu)\n", table_len, sb->s_blocksize - sizeof(*lvd)); - ret = 1; + ret = -EIO; goto out_bh; } @@ -1396,11 +1419,10 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, } else if (!strncmp(upm2->partIdent.ident, UDF_ID_SPARABLE, strlen(UDF_ID_SPARABLE))) { - if (udf_load_sparable_map(sb, map, - (struct sparablePartitionMap *)gpm) < 0) { - ret = 1; + ret = udf_load_sparable_map(sb, map, + (struct sparablePartitionMap *)gpm); + if (ret < 0) goto out_bh; - } } else if (!strncmp(upm2->partIdent.ident, UDF_ID_METADATA, strlen(UDF_ID_METADATA))) { @@ -1465,7 +1487,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block, } if (lvd->integritySeqExt.extLength) udf_load_logicalvolint(sb, leea_to_cpu(lvd->integritySeqExt)); - + ret = 0; out_bh: brelse(bh); return ret; @@ -1503,22 +1525,18 @@ static void udf_load_logicalvolint(struct super_block *sb, struct kernel_extent_ } /* - * udf_process_sequence - * - * PURPOSE - * Process a main/reserve volume descriptor sequence. - * - * PRE-CONDITIONS - * sb Pointer to _locked_ superblock. - * block First block of first extent of the sequence. - * lastblock Lastblock of first extent of the sequence. + * Process a main/reserve volume descriptor sequence. + * @block First block of first extent of the sequence. + * @lastblock Lastblock of first extent of the sequence. + * @fileset There we store extent containing root fileset * - * HISTORY - * July 1, 1997 - Andrew E. Mileski - * Written, tested, and released. + * Returns <0 on error, 0 on success. -EAGAIN is special - try next descriptor + * sequence */ -static noinline int udf_process_sequence(struct super_block *sb, long block, - long lastblock, struct kernel_lb_addr *fileset) +static noinline int udf_process_sequence( + struct super_block *sb, + sector_t block, sector_t lastblock, + struct kernel_lb_addr *fileset) { struct buffer_head *bh = NULL; struct udf_vds_record vds[VDS_POS_LENGTH]; @@ -1529,6 +1547,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block, uint32_t vdsn; uint16_t ident; long next_s = 0, next_e = 0; + int ret; memset(vds, 0, sizeof(struct udf_vds_record) * VDS_POS_LENGTH); @@ -1543,7 +1562,7 @@ static noinline int udf_process_sequence(struct super_block *sb, long block, udf_err(sb, "Block %llu of volume descriptor sequence is corrupted or we could not read it\n", (unsigned long long)block); - return 1; + return -EAGAIN; } /* Process each descriptor (ISO 13346 3/8.3-8.4) */ @@ -1616,14 +1635,19 @@ static noinline int udf_process_sequence(struct super_block *sb, long block, */ if (!vds[VDS_POS_PRIMARY_VOL_DESC].block) { udf_err(sb, "Primary Volume Descriptor not found!\n"); - return 1; + return -EAGAIN; + } + ret = udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block); + if (ret < 0) + return ret; + + if (vds[VDS_POS_LOGICAL_VOL_DESC].block) { + ret = udf_load_logicalvol(sb, + vds[VDS_POS_LOGICAL_VOL_DESC].block, + fileset); + if (ret < 0) + return ret; } - if (udf_load_pvoldesc(sb, vds[VDS_POS_PRIMARY_VOL_DESC].block)) - return 1; - - if (vds[VDS_POS_LOGICAL_VOL_DESC].block && udf_load_logicalvol(sb, - vds[VDS_POS_LOGICAL_VOL_DESC].block, fileset)) - return 1; if (vds[VDS_POS_PARTITION_DESC].block) { /* @@ -1632,19 +1656,27 @@ static noinline int udf_process_sequence(struct super_block *sb, long block, */ for (block = vds[VDS_POS_PARTITION_DESC].block; block < vds[VDS_POS_TERMINATING_DESC].block; - block++) - if (udf_load_partdesc(sb, block)) - return 1; + block++) { + ret = udf_load_partdesc(sb, block); + if (ret < 0) + return ret; + } } return 0; } +/* + * Load Volume Descriptor Sequence described by anchor in bh + * + * Returns <0 on error, 0 on success + */ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, struct kernel_lb_addr *fileset) { struct anchorVolDescPtr *anchor; - long main_s, main_e, reserve_s, reserve_e; + sector_t main_s, main_e, reserve_s, reserve_e; + int ret; anchor = (struct anchorVolDescPtr *)bh->b_data; @@ -1662,18 +1694,26 @@ static int udf_load_sequence(struct super_block *sb, struct buffer_head *bh, /* Process the main & reserve sequences */ /* responsible for finding the PartitionDesc(s) */ - if (!udf_process_sequence(sb, main_s, main_e, fileset)) - return 1; - udf_sb_free_partitions(sb); - if (!udf_process_sequence(sb, reserve_s, reserve_e, fileset)) - return 1; + ret = udf_process_sequence(sb, main_s, main_e, fileset); + if (ret != -EAGAIN) + return ret; udf_sb_free_partitions(sb); - return 0; + ret = udf_process_sequence(sb, reserve_s, reserve_e, fileset); + if (ret < 0) { + udf_sb_free_partitions(sb); + /* No sequence was OK, return -EIO */ + if (ret == -EAGAIN) + ret = -EIO; + } + return ret; } /* * Check whether there is an anchor block in the given block and * load Volume Descriptor Sequence if so. + * + * Returns <0 on error, 0 on success, -EAGAIN is special - try next anchor + * block */ static int udf_check_anchor_block(struct super_block *sb, sector_t block, struct kernel_lb_addr *fileset) @@ -1685,33 +1725,40 @@ static int udf_check_anchor_block(struct super_block *sb, sector_t block, if (UDF_QUERY_FLAG(sb, UDF_FLAG_VARCONV) && udf_fixed_to_variable(block) >= sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) - return 0; + return -EAGAIN; bh = udf_read_tagged(sb, block, block, &ident); if (!bh) - return 0; + return -EAGAIN; if (ident != TAG_IDENT_AVDP) { brelse(bh); - return 0; + return -EAGAIN; } ret = udf_load_sequence(sb, bh, fileset); brelse(bh); return ret; } -/* Search for an anchor volume descriptor pointer */ -static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock, - struct kernel_lb_addr *fileset) +/* + * Search for an anchor volume descriptor pointer. + * + * Returns < 0 on error, 0 on success. -EAGAIN is special - try next set + * of anchors. + */ +static int udf_scan_anchors(struct super_block *sb, sector_t *lastblock, + struct kernel_lb_addr *fileset) { sector_t last[6]; int i; struct udf_sb_info *sbi = UDF_SB(sb); int last_count = 0; + int ret; /* First try user provided anchor */ if (sbi->s_anchor) { - if (udf_check_anchor_block(sb, sbi->s_anchor, fileset)) - return lastblock; + ret = udf_check_anchor_block(sb, sbi->s_anchor, fileset); + if (ret != -EAGAIN) + return ret; } /* * according to spec, anchor is in either: @@ -1720,39 +1767,46 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock, * lastblock * however, if the disc isn't closed, it could be 512. */ - if (udf_check_anchor_block(sb, sbi->s_session + 256, fileset)) - return lastblock; + ret = udf_check_anchor_block(sb, sbi->s_session + 256, fileset); + if (ret != -EAGAIN) + return ret; /* * The trouble is which block is the last one. Drives often misreport * this so we try various possibilities. */ - last[last_count++] = lastblock; - if (lastblock >= 1) - last[last_count++] = lastblock - 1; - last[last_count++] = lastblock + 1; - if (lastblock >= 2) - last[last_count++] = lastblock - 2; - if (lastblock >= 150) - last[last_count++] = lastblock - 150; - if (lastblock >= 152) - last[last_count++] = lastblock - 152; + last[last_count++] = *lastblock; + if (*lastblock >= 1) + last[last_count++] = *lastblock - 1; + last[last_count++] = *lastblock + 1; + if (*lastblock >= 2) + last[last_count++] = *lastblock - 2; + if (*lastblock >= 150) + last[last_count++] = *lastblock - 150; + if (*lastblock >= 152) + last[last_count++] = *lastblock - 152; for (i = 0; i < last_count; i++) { if (last[i] >= sb->s_bdev->bd_inode->i_size >> sb->s_blocksize_bits) continue; - if (udf_check_anchor_block(sb, last[i], fileset)) - return last[i]; + ret = udf_check_anchor_block(sb, last[i], fileset); + if (ret != -EAGAIN) { + if (!ret) + *lastblock = last[i]; + return ret; + } if (last[i] < 256) continue; - if (udf_check_anchor_block(sb, last[i] - 256, fileset)) - return last[i]; + ret = udf_check_anchor_block(sb, last[i] - 256, fileset); + if (ret != -EAGAIN) { + if (!ret) + *lastblock = last[i]; + return ret; + } } /* Finally try block 512 in case media is open */ - if (udf_check_anchor_block(sb, sbi->s_session + 512, fileset)) - return last[0]; - return 0; + return udf_check_anchor_block(sb, sbi->s_session + 512, fileset); } /* @@ -1760,54 +1814,59 @@ static sector_t udf_scan_anchors(struct super_block *sb, sector_t lastblock, * area specified by it. The function expects sbi->s_lastblock to be the last * block on the media. * - * Return 1 if ok, 0 if not found. - * + * Return <0 on error, 0 if anchor found. -EAGAIN is special meaning anchor + * was not found. */ static int udf_find_anchor(struct super_block *sb, struct kernel_lb_addr *fileset) { - sector_t lastblock; struct udf_sb_info *sbi = UDF_SB(sb); + sector_t lastblock = sbi->s_last_block; + int ret; - lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset); - if (lastblock) + ret = udf_scan_anchors(sb, &lastblock, fileset); + if (ret != -EAGAIN) goto out; /* No anchor found? Try VARCONV conversion of block numbers */ UDF_SET_FLAG(sb, UDF_FLAG_VARCONV); + lastblock = udf_variable_to_fixed(sbi->s_last_block); /* Firstly, we try to not convert number of the last block */ - lastblock = udf_scan_anchors(sb, - udf_variable_to_fixed(sbi->s_last_block), - fileset); - if (lastblock) + ret = udf_scan_anchors(sb, &lastblock, fileset); + if (ret != -EAGAIN) goto out; + lastblock = sbi->s_last_block; /* Secondly, we try with converted number of the last block */ - lastblock = udf_scan_anchors(sb, sbi->s_last_block, fileset); - if (!lastblock) { + ret = udf_scan_anchors(sb, &lastblock, fileset); + if (ret < 0) { /* VARCONV didn't help. Clear it. */ UDF_CLEAR_FLAG(sb, UDF_FLAG_VARCONV); - return 0; } out: - sbi->s_last_block = lastblock; - return 1; + if (ret == 0) + sbi->s_last_block = lastblock; + return ret; } /* * Check Volume Structure Descriptor, find Anchor block and load Volume - * Descriptor Sequence + * Descriptor Sequence. + * + * Returns < 0 on error, 0 on success. -EAGAIN is special meaning anchor + * block was not found. */ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, int silent, struct kernel_lb_addr *fileset) { struct udf_sb_info *sbi = UDF_SB(sb); loff_t nsr_off; + int ret; if (!sb_set_blocksize(sb, uopt->blocksize)) { if (!silent) udf_warn(sb, "Bad block size\n"); - return 0; + return -EINVAL; } sbi->s_last_block = uopt->lastblock; if (!uopt->novrs) { @@ -1828,12 +1887,13 @@ static int udf_load_vrs(struct super_block *sb, struct udf_options *uopt, /* Look for anchor block and load Volume Descriptor Sequence */ sbi->s_anchor = uopt->anchor; - if (!udf_find_anchor(sb, fileset)) { - if (!silent) + ret = udf_find_anchor(sb, fileset); + if (ret < 0) { + if (!silent && ret == -EAGAIN) udf_warn(sb, "No anchor found\n"); - return 0; + return ret; } - return 1; + return 0; } static void udf_open_lvid(struct super_block *sb) @@ -1939,7 +1999,7 @@ u64 lvid_get_unique_id(struct super_block *sb) static int udf_fill_super(struct super_block *sb, void *options, int silent) { - int ret; + int ret = -EINVAL; struct inode *inode = NULL; struct udf_options uopt; struct kernel_lb_addr rootdir, fileset; @@ -2011,7 +2071,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) } else { uopt.blocksize = bdev_logical_block_size(sb->s_bdev); ret = udf_load_vrs(sb, &uopt, silent, &fileset); - if (!ret && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { + if (ret == -EAGAIN && uopt.blocksize != UDF_DEFAULT_BLOCKSIZE) { if (!silent) pr_notice("Rescanning with blocksize %d\n", UDF_DEFAULT_BLOCKSIZE); @@ -2021,8 +2081,11 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) ret = udf_load_vrs(sb, &uopt, silent, &fileset); } } - if (!ret) { - udf_warn(sb, "No partition found (1)\n"); + if (ret < 0) { + if (ret == -EAGAIN) { + udf_warn(sb, "No partition found (1)\n"); + ret = -EINVAL; + } goto error_out; } @@ -2040,9 +2103,13 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) udf_err(sb, "minUDFReadRev=%x (max is %x)\n", le16_to_cpu(lvidiu->minUDFReadRev), UDF_MAX_READ_VERSION); + ret = -EINVAL; + goto error_out; + } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION && + !(sb->s_flags & MS_RDONLY)) { + ret = -EACCES; goto error_out; - } else if (minUDFWriteRev > UDF_MAX_WRITE_VERSION) - sb->s_flags |= MS_RDONLY; + } sbi->s_udfrev = minUDFWriteRev; @@ -2054,17 +2121,20 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!sbi->s_partitions) { udf_warn(sb, "No partition found (2)\n"); + ret = -EINVAL; goto error_out; } if (sbi->s_partmaps[sbi->s_partition].s_partition_flags & - UDF_PART_FLAG_READ_ONLY) { - pr_notice("Partition marked readonly; forcing readonly mount\n"); - sb->s_flags |= MS_RDONLY; + UDF_PART_FLAG_READ_ONLY && + !(sb->s_flags & MS_RDONLY)) { + ret = -EACCES; + goto error_out; } if (udf_find_fileset(sb, &fileset, &rootdir)) { udf_warn(sb, "No fileset found\n"); + ret = -EINVAL; goto error_out; } @@ -2086,6 +2156,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) if (!inode) { udf_err(sb, "Error in udf_iget, block=%d, partition=%d\n", rootdir.logicalBlockNum, rootdir.partitionReferenceNum); + ret = -EIO; goto error_out; } @@ -2093,6 +2164,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent) sb->s_root = d_make_root(inode); if (!sb->s_root) { udf_err(sb, "Couldn't allocate root dentry\n"); + ret = -ENOMEM; goto error_out; } sb->s_maxbytes = MAX_LFS_FILESIZE; @@ -2113,7 +2185,7 @@ error_out: kfree(sbi); sb->s_fs_info = NULL; - return -EINVAL; + return ret; } void _udf_err(struct super_block *sb, const char *function, diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 0b8b2a13cd24..eca6f9d8a263 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -1223,6 +1223,7 @@ xfs_da3_node_toosmall( /* start with smaller blk num */ forward = nodehdr.forw < nodehdr.back; for (i = 0; i < 2; forward = !forward, i++) { + struct xfs_da3_icnode_hdr thdr; if (forward) blkno = nodehdr.forw; else @@ -1235,10 +1236,10 @@ xfs_da3_node_toosmall( return(error); node = bp->b_addr; - xfs_da3_node_hdr_from_disk(&nodehdr, node); + xfs_da3_node_hdr_from_disk(&thdr, node); xfs_trans_brelse(state->args->trans, bp); - if (count - nodehdr.count >= 0) + if (count - thdr.count >= 0) break; /* fits with at least 25% to spare */ } if (i >= 2) { diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 3c3644ea825b..2288db4e1784 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -216,6 +216,8 @@ xfs_growfs_data_private( */ nfree = 0; for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { + __be32 *agfl_bno; + /* * AG freespace header block */ @@ -275,8 +277,10 @@ xfs_growfs_data_private( agfl->agfl_seqno = cpu_to_be32(agno); uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid); } + + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) - agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); + agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); error = xfs_bwrite(bp); xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 5e999680094a..83dfe6e73235 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -409,7 +409,8 @@ xfs_attrlist_by_handle( return -XFS_ERROR(EPERM); if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t))) return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) + if (al_hreq.buflen < sizeof(struct attrlist) || + al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); /* @@ -1612,6 +1613,12 @@ xfs_file_ioctl( case XFS_IOC_FREE_EOFBLOCKS: { struct xfs_eofblocks eofb; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (mp->m_flags & XFS_MOUNT_RDONLY) + return -XFS_ERROR(EROFS); + if (copy_from_user(&eofb, arg, sizeof(eofb))) return -XFS_ERROR(EFAULT); diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index c0c66259cc91..68799d7f02cc 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -359,7 +359,8 @@ xfs_compat_attrlist_by_handle( if (copy_from_user(&al_hreq, arg, sizeof(compat_xfs_fsop_attrlist_handlereq_t))) return -XFS_ERROR(EFAULT); - if (al_hreq.buflen > XATTR_LIST_MAX) + if (al_hreq.buflen < sizeof(struct attrlist) || + al_hreq.buflen > XATTR_LIST_MAX) return -XFS_ERROR(EINVAL); /* |