diff options
Diffstat (limited to 'fs')
64 files changed, 585 insertions, 310 deletions
@@ -69,9 +69,9 @@ struct aio_ring { #define AIO_RING_PAGES 8 struct kioctx_table { - struct rcu_head rcu; - unsigned nr; - struct kioctx *table[]; + struct rcu_head rcu; + unsigned nr; + struct kioctx __rcu *table[]; }; struct kioctx_cpu { @@ -116,7 +116,9 @@ struct kioctx { struct page **ring_pages; long nr_pages; - struct swork_event free_work; + struct rcu_head free_rcu; + struct work_struct free_work; /* see free_ioctx() */ + struct swork_event free_swork; /* see free_ioctx_users() */ /* * signals when all in-flight requests are done @@ -331,7 +333,7 @@ static int aio_ring_mremap(struct vm_area_struct *vma) for (i = 0; i < table->nr; i++) { struct kioctx *ctx; - ctx = table->table[i]; + ctx = rcu_dereference(table->table[i]); if (ctx && ctx->aio_ring_file == file) { if (!atomic_read(&ctx->dead)) { ctx->user_id = ctx->mmap_base = vma->vm_start; @@ -583,9 +585,15 @@ static int kiocb_cancel(struct aio_kiocb *kiocb) return cancel(&kiocb->common); } -static void free_ioctx(struct swork_event *sev) +/* + * free_ioctx() should be RCU delayed to synchronize against the RCU + * protected lookup_ioctx() and also needs process context to call + * aio_free_ring(), so the double bouncing through kioctx->free_rcu and + * ->free_work. + */ +static void free_ioctx(struct work_struct *work) { - struct kioctx *ctx = container_of(sev, struct kioctx, free_work); + struct kioctx *ctx = container_of(work, struct kioctx, free_work); pr_debug("freeing %p\n", ctx); @@ -596,6 +604,14 @@ static void free_ioctx(struct swork_event *sev) kmem_cache_free(kioctx_cachep, ctx); } +static void free_ioctx_rcufn(struct rcu_head *head) +{ + struct kioctx *ctx = container_of(head, struct kioctx, free_rcu); + + INIT_WORK(&ctx->free_work, free_ioctx); + schedule_work(&ctx->free_work); +} + static void free_ioctx_reqs(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, reqs); @@ -604,8 +620,8 @@ static void free_ioctx_reqs(struct percpu_ref *ref) if (ctx->rq_wait && atomic_dec_and_test(&ctx->rq_wait->count)) complete(&ctx->rq_wait->comp); - INIT_SWORK(&ctx->free_work, free_ioctx); - swork_queue(&ctx->free_work); + /* Synchronize against RCU protected table->table[] dereferences */ + call_rcu(&ctx->free_rcu, free_ioctx_rcufn); } /* @@ -615,7 +631,7 @@ static void free_ioctx_reqs(struct percpu_ref *ref) */ static void free_ioctx_users_work(struct swork_event *sev) { - struct kioctx *ctx = container_of(sev, struct kioctx, free_work); + struct kioctx *ctx = container_of(sev, struct kioctx, free_swork); struct aio_kiocb *req; spin_lock_irq(&ctx->ctx_lock); @@ -638,8 +654,8 @@ static void free_ioctx_users(struct percpu_ref *ref) { struct kioctx *ctx = container_of(ref, struct kioctx, users); - INIT_SWORK(&ctx->free_work, free_ioctx_users_work); - swork_queue(&ctx->free_work); + INIT_SWORK(&ctx->free_swork, free_ioctx_users_work); + swork_queue(&ctx->free_swork); } static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) @@ -654,9 +670,9 @@ static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm) while (1) { if (table) for (i = 0; i < table->nr; i++) - if (!table->table[i]) { + if (!rcu_access_pointer(table->table[i])) { ctx->id = i; - table->table[i] = ctx; + rcu_assign_pointer(table->table[i], ctx); spin_unlock(&mm->ioctx_lock); /* While kioctx setup is in progress, @@ -831,11 +847,11 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, } table = rcu_dereference_raw(mm->ioctx_table); - WARN_ON(ctx != table->table[ctx->id]); - table->table[ctx->id] = NULL; + WARN_ON(ctx != rcu_access_pointer(table->table[ctx->id])); + RCU_INIT_POINTER(table->table[ctx->id], NULL); spin_unlock(&mm->ioctx_lock); - /* percpu_ref_kill() will do the necessary call_rcu() */ + /* free_ioctx_reqs() will do the necessary RCU synchronization */ wake_up_all(&ctx->wait); /* @@ -877,7 +893,8 @@ void exit_aio(struct mm_struct *mm) skipped = 0; for (i = 0; i < table->nr; ++i) { - struct kioctx *ctx = table->table[i]; + struct kioctx *ctx = + rcu_dereference_protected(table->table[i], true); if (!ctx) { skipped++; @@ -1066,7 +1083,7 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) if (!table || id >= table->nr) goto out; - ctx = table->table[id]; + ctx = rcu_dereference(table->table[id]); if (ctx && ctx->user_id == ctx_id) { percpu_ref_get(&ctx->users); ret = ctx; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index a11f73174877..6182d693cf43 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -746,7 +746,7 @@ static int autofs4_dir_mkdir(struct inode *dir, autofs4_del_active(dentry); - inode = autofs4_get_inode(dir->i_sb, S_IFDIR | 0555); + inode = autofs4_get_inode(dir->i_sb, S_IFDIR | mode); if (!inode) return -ENOMEM; d_add(dentry, inode); diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 8d8370ddb6b2..1ba49ebe67da 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -114,13 +114,17 @@ out: int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) { int ret; + umode_t old_mode = inode->i_mode; if (type == ACL_TYPE_ACCESS && acl) { ret = posix_acl_update_mode(inode, &inode->i_mode, &acl); if (ret) return ret; } - return __btrfs_set_acl(NULL, inode, acl, type); + ret = __btrfs_set_acl(NULL, inode, acl, type); + if (ret) + inode->i_mode = old_mode; + return ret; } /* diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8ed05d95584a..03ac3ab4b3b4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2453,7 +2453,7 @@ void end_extent_writepage(struct page *page, int err, u64 start, u64 end) if (!uptodate) { ClearPageUptodate(page); SetPageError(page); - ret = ret < 0 ? ret : -EIO; + ret = err < 0 ? err : -EIO; mapping_set_error(page->mapping, ret); } } diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 3286a6e47ff0..c95ff096cd24 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -2817,8 +2817,10 @@ static long btrfs_fallocate(struct file *file, int mode, } ret = btrfs_qgroup_reserve_data(inode, cur_offset, last_byte - cur_offset); - if (ret < 0) + if (ret < 0) { + free_extent_map(em); break; + } } else { /* * Do not need to reserve unwritten extent for this diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d196ce4be31c..ffd5831ca15c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -567,8 +567,10 @@ cont: PAGE_SET_WRITEBACK | page_error_op | PAGE_END_WRITEBACK); - btrfs_free_reserved_data_space_noquota(inode, start, - end - start + 1); + if (ret == 0) + btrfs_free_reserved_data_space_noquota(inode, + start, + end - start + 1); goto free_pages_out; } } diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 9a47b5598df7..d040afc966fe 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -5156,13 +5156,19 @@ static int is_extent_unchanged(struct send_ctx *sctx, while (key.offset < ekey->offset + left_len) { ei = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); right_type = btrfs_file_extent_type(eb, ei); - if (right_type != BTRFS_FILE_EXTENT_REG) { + if (right_type != BTRFS_FILE_EXTENT_REG && + right_type != BTRFS_FILE_EXTENT_INLINE) { ret = 0; goto out; } right_disknr = btrfs_file_extent_disk_bytenr(eb, ei); - right_len = btrfs_file_extent_num_bytes(eb, ei); + if (right_type == BTRFS_FILE_EXTENT_INLINE) { + right_len = btrfs_file_extent_inline_len(eb, slot, ei); + right_len = PAGE_ALIGN(right_len); + } else { + right_len = btrfs_file_extent_num_bytes(eb, ei); + } right_offset = btrfs_file_extent_offset(eb, ei); right_gen = btrfs_file_extent_generation(eb, ei); @@ -5176,6 +5182,19 @@ static int is_extent_unchanged(struct send_ctx *sctx, goto out; } + /* + * We just wanted to see if when we have an inline extent, what + * follows it is a regular extent (wanted to check the above + * condition for inline extents too). This should normally not + * happen but it's possible for example when we have an inline + * compressed extent representing data with a size matching + * the page size (currently the same as sector size). + */ + if (right_type == BTRFS_FILE_EXTENT_INLINE) { + ret = 0; + goto out; + } + left_offset_fixed = left_offset; if (key.offset < ekey->offset) { /* Fix the right offset for 2a and 7. */ diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 5539f0b95efa..52401732cddc 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -3664,7 +3664,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, src_offset = btrfs_item_ptr_offset(src, start_slot + i); - if ((i == (nr - 1))) + if (i == nr - 1) last_key = ins_keys[i]; if (ins_keys[i].type == BTRFS_INODE_ITEM_KEY) { diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 06a77e47957d..4730ba2cc049 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -583,6 +583,7 @@ void btrfs_free_stale_device(struct btrfs_device *cur_dev) btrfs_sysfs_remove_fsid(fs_devs); list_del(&fs_devs->list); free_fs_devices(fs_devs); + break; } else { fs_devs->num_devices--; list_del(&dev->dev_list); @@ -3764,6 +3765,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, struct btrfs_ioctl_balance_args *bargs) { struct btrfs_fs_info *fs_info = bctl->fs_info; + u64 meta_target, data_target; u64 allowed; int mixed = 0; int ret; @@ -3860,11 +3862,16 @@ int btrfs_balance(struct btrfs_balance_control *bctl, } } while (read_seqretry(&fs_info->profiles_lock, seq)); - if (btrfs_get_num_tolerated_disk_barrier_failures(bctl->meta.target) < - btrfs_get_num_tolerated_disk_barrier_failures(bctl->data.target)) { + /* if we're not converting, the target field is uninitialized */ + meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ? + bctl->meta.target : fs_info->avail_metadata_alloc_bits; + data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ? + bctl->data.target : fs_info->avail_data_alloc_bits; + if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) < + btrfs_get_num_tolerated_disk_barrier_failures(data_target)) { btrfs_warn(fs_info, "metadata profile 0x%llx has lower redundancy than data profile 0x%llx", - bctl->meta.target, bctl->data.target); + meta_target, data_target); } if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) { @@ -4748,10 +4755,13 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (devs_max && ndevs > devs_max) ndevs = devs_max; /* - * the primary goal is to maximize the number of stripes, so use as many - * devices as possible, even if the stripes are not maximum sized. + * The primary goal is to maximize the number of stripes, so use as + * many devices as possible, even if the stripes are not maximum sized. + * + * The DUP profile stores more than one stripe per device, the + * max_avail is the total size so we have to adjust. */ - stripe_size = devices_info[ndevs-1].max_avail; + stripe_size = div_u64(devices_info[ndevs - 1].max_avail, dev_stripes); num_stripes = ndevs * dev_stripes; /* @@ -4791,8 +4801,6 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripe_size = devices_info[ndevs-1].max_avail; } - stripe_size = div_u64(stripe_size, dev_stripes); - /* align to BTRFS_STRIPE_LEN */ stripe_size = div_u64(stripe_size, raid_stripe_len); stripe_size *= raid_stripe_len; diff --git a/fs/ceph/file.c b/fs/ceph/file.c index ca3f630db90f..e7ddb23d9bb7 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -598,7 +598,8 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, struct ceph_aio_request { struct kiocb *iocb; size_t total_len; - int write; + bool write; + bool should_dirty; int error; struct list_head osd_reqs; unsigned num_reqs; @@ -708,7 +709,7 @@ static void ceph_aio_complete_req(struct ceph_osd_request *req) } } - ceph_put_page_vector(osd_data->pages, num_pages, !aio_req->write); + ceph_put_page_vector(osd_data->pages, num_pages, aio_req->should_dirty); ceph_osdc_put_request(req); if (rc < 0) @@ -890,6 +891,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, size_t count = iov_iter_count(iter); loff_t pos = iocb->ki_pos; bool write = iov_iter_rw(iter) == WRITE; + bool should_dirty = !write && iter_is_iovec(iter); if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; @@ -954,6 +956,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, if (aio_req) { aio_req->iocb = iocb; aio_req->write = write; + aio_req->should_dirty = should_dirty; INIT_LIST_HEAD(&aio_req->osd_reqs); if (write) { aio_req->mtime = mtime; @@ -1012,7 +1015,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, len = ret; } - ceph_put_page_vector(pages, num_pages, !write); + ceph_put_page_vector(pages, num_pages, should_dirty); ceph_osdc_put_request(req); if (ret < 0) diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 7b496a4e650e..4ed4736b5bc6 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -1412,6 +1412,7 @@ struct dfs_info3_param { #define CIFS_FATTR_NEED_REVAL 0x4 #define CIFS_FATTR_INO_COLLISION 0x8 #define CIFS_FATTR_UNKNOWN_NLINK 0x10 +#define CIFS_FATTR_FAKE_ROOT_INO 0x20 struct cifs_fattr { u32 cf_flags; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d9cbda269462..331ddd07e505 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -673,6 +673,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, goto mknod_out; } + if (!S_ISCHR(mode) && !S_ISBLK(mode)) + goto mknod_out; + if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)) goto mknod_out; @@ -681,10 +684,8 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL); if (buf == NULL) { - kfree(full_path); rc = -ENOMEM; - free_xid(xid); - return rc; + goto mknod_out; } if (backup_cred(cifs_sb)) @@ -731,7 +732,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, pdev->minor = cpu_to_le64(MINOR(device_number)); rc = tcon->ses->server->ops->sync_write(xid, &fid, &io_parms, &bytes_written, iov, 1); - } /* else if (S_ISFIFO) */ + } tcon->ses->server->ops->close(xid, tcon, &fid); d_drop(direntry); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 02e403af9518..49eeed25f200 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -589,7 +589,7 @@ cifs_relock_file(struct cifsFileInfo *cfile) struct cifs_tcon *tcon = tlink_tcon(cfile->tlink); int rc = 0; - down_read(&cinode->lock_sem); + down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING); if (cinode->can_cache_brlcks) { /* can cache locks - no need to relock */ up_read(&cinode->lock_sem); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7ab5be7944aa..24c19eb94fa3 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -701,6 +701,18 @@ cgfi_exit: return rc; } +/* Simple function to return a 64 bit hash of string. Rarely called */ +static __u64 simple_hashstr(const char *str) +{ + const __u64 hash_mult = 1125899906842597L; /* a big enough prime */ + __u64 hash = 0; + + while (*str) + hash = (hash + (__u64) *str++) * hash_mult; + + return hash; +} + int cifs_get_inode_info(struct inode **inode, const char *full_path, FILE_ALL_INFO *data, struct super_block *sb, int xid, @@ -810,6 +822,14 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, tmprc); fattr.cf_uniqueid = iunique(sb, ROOT_I); cifs_autodisable_serverino(cifs_sb); + } else if ((fattr.cf_uniqueid == 0) && + strlen(full_path) == 0) { + /* some servers ret bad root ino ie 0 */ + cifs_dbg(FYI, "Invalid (0) inodenum\n"); + fattr.cf_flags |= + CIFS_FATTR_FAKE_ROOT_INO; + fattr.cf_uniqueid = + simple_hashstr(tcon->treeName); } } } else @@ -826,6 +846,16 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, &fattr.cf_uniqueid, data); if (tmprc) fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; + else if ((fattr.cf_uniqueid == 0) && + strlen(full_path) == 0) { + /* + * Reuse existing root inode num since + * inum zero for root causes ls of . and .. to + * not be returned + */ + cifs_dbg(FYI, "Srv ret 0 inode num for root\n"); + fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; + } } else fattr.cf_uniqueid = CIFS_I(*inode)->uniqueid; } @@ -887,6 +917,9 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, } cgii_exit: + if ((*inode) && ((*inode)->i_ino == 0)) + cifs_dbg(FYI, "inode number of zero returned\n"); + kfree(buf); cifs_put_tlink(tlink); return rc; diff --git a/fs/cifs/netmisc.c b/fs/cifs/netmisc.c index abae6dd2c6b9..cc88f4f0325e 100644 --- a/fs/cifs/netmisc.c +++ b/fs/cifs/netmisc.c @@ -980,10 +980,10 @@ struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time, int offset) cifs_dbg(VFS, "illegal hours %d\n", st->Hours); days = sd->Day; month = sd->Month; - if ((days > 31) || (month > 12)) { + if (days < 1 || days > 31 || month < 1 || month > 12) { cifs_dbg(VFS, "illegal date, month %d day: %d\n", month, days); - if (month > 12) - month = 12; + days = clamp(days, 1, 31); + month = clamp(month, 1, 12); } month -= 1; days += total_days_of_prev_months[month]; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 538d9b55699a..c3db2a882aee 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -344,13 +344,12 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer, /* BB is NTLMV2 session security format easier to use here? */ flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | - NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; - if (ses->server->sign) { + NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC | + NTLMSSP_NEGOTIATE_SEAL; + if (ses->server->sign) flags |= NTLMSSP_NEGOTIATE_SIGN; - if (!ses->server->session_estab || - ses->ntlmssp->sesskey_per_smbsess) - flags |= NTLMSSP_NEGOTIATE_KEY_XCH; - } + if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess) + flags |= NTLMSSP_NEGOTIATE_KEY_XCH; sec_blob->NegotiateFlags = cpu_to_le32(flags); @@ -407,13 +406,12 @@ int build_ntlmssp_auth_blob(unsigned char **pbuffer, flags = NTLMSSP_NEGOTIATE_56 | NTLMSSP_REQUEST_TARGET | NTLMSSP_NEGOTIATE_TARGET_INFO | NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE | - NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC; - if (ses->server->sign) { + NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC | + NTLMSSP_NEGOTIATE_SEAL; + if (ses->server->sign) flags |= NTLMSSP_NEGOTIATE_SIGN; - if (!ses->server->session_estab || - ses->ntlmssp->sesskey_per_smbsess) - flags |= NTLMSSP_NEGOTIATE_KEY_XCH; - } + if (!ses->server->session_estab || ses->ntlmssp->sesskey_per_smbsess) + flags |= NTLMSSP_NEGOTIATE_KEY_XCH; tmp = *pbuffer + sizeof(AUTHENTICATE_MESSAGE); sec_blob->NegotiateFlags = cpu_to_le32(flags); diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 94c4c1901222..44b7ccbe4b08 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -707,15 +707,13 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) struct cifs_ses *ses = sess_data->ses; mutex_lock(&ses->server->srv_mutex); - if (ses->server->sign && ses->server->ops->generate_signingkey) { + if (ses->server->ops->generate_signingkey) { rc = ses->server->ops->generate_signingkey(ses); - kfree(ses->auth_key.response); - ses->auth_key.response = NULL; if (rc) { cifs_dbg(FYI, "SMB3 session key generation failed\n"); mutex_unlock(&ses->server->srv_mutex); - goto keygen_exit; + return rc; } } if (!ses->server->session_estab) { @@ -729,12 +727,6 @@ SMB2_sess_establish_session(struct SMB2_sess_data *sess_data) ses->status = CifsGood; ses->need_reconnect = false; spin_unlock(&GlobalMid_Lock); - -keygen_exit: - if (!ses->server->sign) { - kfree(ses->auth_key.response); - ses->auth_key.response = NULL; - } return rc; } @@ -1159,15 +1151,19 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, goto tcon_exit; } - if (rsp->ShareType & SMB2_SHARE_TYPE_DISK) + switch (rsp->ShareType) { + case SMB2_SHARE_TYPE_DISK: cifs_dbg(FYI, "connection to disk share\n"); - else if (rsp->ShareType & SMB2_SHARE_TYPE_PIPE) { + break; + case SMB2_SHARE_TYPE_PIPE: tcon->ipc = true; cifs_dbg(FYI, "connection to pipe share\n"); - } else if (rsp->ShareType & SMB2_SHARE_TYPE_PRINT) { - tcon->print = true; + break; + case SMB2_SHARE_TYPE_PRINT: + tcon->ipc = true; cifs_dbg(FYI, "connection to printer\n"); - } else { + break; + default: cifs_dbg(VFS, "unknown share type %d\n", rsp->ShareType); rc = -EOPNOTSUPP; goto tcon_error_exit; @@ -1712,6 +1708,9 @@ SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid, } else iov[0].iov_len = get_rfc1002_length(req) + 4; + /* validate negotiate request must be signed - see MS-SMB2 3.2.5.5 */ + if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) + req->hdr.Flags |= SMB2_FLAGS_SIGNED; rc = SendReceive2(xid, ses, iov, num_iovecs, &resp_buftype, 0); rsp = (struct smb2_ioctl_rsp *)iov[0].iov_base; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index f2d7402abe02..93c8e4a4bbd3 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -833,7 +833,7 @@ static int compat_ioctl_preallocate(struct file *file, */ #define XFORM(i) (((i) ^ ((i) << 27) ^ ((i) << 17)) & 0xffffffff) -#define COMPATIBLE_IOCTL(cmd) XFORM(cmd), +#define COMPATIBLE_IOCTL(cmd) XFORM((u32)cmd), /* ioctl should not be warned about even if it's not implemented. Valid reasons to use this: - It is implemented with ->compat_ioctl on some device, but programs @@ -785,6 +785,7 @@ int dax_writeback_mapping_range(struct address_space *mapping, if (ret < 0) return ret; } + start_index = indices[pvec.nr - 1] + 1; } return 0; } diff --git a/fs/dcache.c b/fs/dcache.c index f0719b2f1be5..ff77ec32b67b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -462,9 +462,11 @@ static void dentry_lru_add(struct dentry *dentry) * d_drop() is used mainly for stuff that wants to invalidate a dentry for some * reason (NFS timeouts or autofs deletes). * - * __d_drop requires dentry->d_lock. + * __d_drop requires dentry->d_lock + * ___d_drop doesn't mark dentry as "unhashed" + * (dentry->d_hash.pprev will be LIST_POISON2, not NULL). */ -void __d_drop(struct dentry *dentry) +static void ___d_drop(struct dentry *dentry) { if (!d_unhashed(dentry)) { struct hlist_bl_head *b; @@ -480,12 +482,17 @@ void __d_drop(struct dentry *dentry) hlist_bl_lock(b); __hlist_bl_del(&dentry->d_hash); - dentry->d_hash.pprev = NULL; hlist_bl_unlock(b); /* After this call, in-progress rcu-walk path lookup will fail. */ write_seqcount_invalidate(&dentry->d_seq); } } + +void __d_drop(struct dentry *dentry) +{ + ___d_drop(dentry); + dentry->d_hash.pprev = NULL; +} EXPORT_SYMBOL(__d_drop); void d_drop(struct dentry *dentry) @@ -638,11 +645,16 @@ again: spin_unlock(&parent->d_lock); goto again; } - rcu_read_unlock(); - if (parent != dentry) + if (parent != dentry) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - else + if (unlikely(dentry->d_lockref.count < 0)) { + spin_unlock(&parent->d_lock); + parent = NULL; + } + } else { parent = NULL; + } + rcu_read_unlock(); return parent; } @@ -2385,7 +2397,7 @@ EXPORT_SYMBOL(d_delete); static void __d_rehash(struct dentry *entry) { struct hlist_bl_head *b = d_hash(entry->d_name.hash); - BUG_ON(!d_unhashed(entry)); + hlist_bl_lock(b); hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); @@ -2827,9 +2839,9 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_begin_nested(&target->d_seq, DENTRY_D_LOCK_NESTED); /* unhash both */ - /* __d_drop does write_seqcount_barrier, but they're OK to nest. */ - __d_drop(dentry); - __d_drop(target); + /* ___d_drop does write_seqcount_barrier, but they're OK to nest. */ + ___d_drop(dentry); + ___d_drop(target); /* Switch the names.. */ if (exchange) @@ -2841,6 +2853,8 @@ static void __d_move(struct dentry *dentry, struct dentry *target, __d_rehash(dentry); if (exchange) __d_rehash(target); + else + target->d_hash.pprev = NULL; /* ... and switch them in the tree */ if (IS_ROOT(dentry)) { diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index e04ec868e37e..6776f4aa3d12 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c @@ -242,8 +242,6 @@ static int ext4_init_block_bitmap(struct super_block *sb, */ ext4_mark_bitmap_end(num_clusters_in_group(sb, block_group), sb->s_blocksize * 8, bh->b_data); - ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); - ext4_group_desc_csum_set(sb, block_group, gdp); return 0; } @@ -322,6 +320,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, struct ext4_sb_info *sbi = EXT4_SB(sb); ext4_grpblk_t offset; ext4_grpblk_t next_zero_bit; + ext4_grpblk_t max_bit = EXT4_CLUSTERS_PER_GROUP(sb); ext4_fsblk_t blk; ext4_fsblk_t group_first_block; @@ -339,20 +338,25 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb, /* check whether block bitmap block number is set */ blk = ext4_block_bitmap(sb, desc); offset = blk - group_first_block; - if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) + if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit || + !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; /* check whether the inode bitmap block number is set */ blk = ext4_inode_bitmap(sb, desc); offset = blk - group_first_block; - if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) + if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit || + !ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data)) /* bad block bitmap */ return blk; /* check whether the inode table block number is set */ blk = ext4_inode_table(sb, desc); offset = blk - group_first_block; + if (offset < 0 || EXT4_B2C(sbi, offset) >= max_bit || + EXT4_B2C(sbi, offset + sbi->s_itb_per_group) >= max_bit) + return blk; next_zero_bit = ext4_find_next_zero_bit(bh->b_data, EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group), EXT4_B2C(sbi, offset)); @@ -418,6 +422,7 @@ struct buffer_head * ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc *desc; + struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *bh; ext4_fsblk_t bitmap_blk; int err; @@ -426,6 +431,12 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) if (!desc) return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_block_bitmap(sb, desc); + if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (bitmap_blk >= ext4_blocks_count(sbi->s_es))) { + ext4_error(sb, "Invalid block bitmap block %llu in " + "block_group %u", bitmap_blk, block_group); + return ERR_PTR(-EFSCORRUPTED); + } bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { ext4_error(sb, "Cannot get buffer for block bitmap - " @@ -447,6 +458,7 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group) err = ext4_init_block_bitmap(sb, bh, block_group, desc); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); + set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); if (err) { diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1a0c57100f28..63c702b4b24c 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5356,8 +5356,9 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, stop = le32_to_cpu(extent->ee_block); /* - * In case of left shift, Don't start shifting extents until we make - * sure the hole is big enough to accommodate the shift. + * For left shifts, make sure the hole on the left is big enough to + * accommodate the shift. For right shifts, make sure the last extent + * won't be shifted beyond EXT_MAX_BLOCKS. */ if (SHIFT == SHIFT_LEFT) { path = ext4_find_extent(inode, start - 1, &path, @@ -5377,9 +5378,14 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle, if ((start == ex_start && shift > ex_start) || (shift > start - ex_end)) { - ext4_ext_drop_refs(path); - kfree(path); - return -EINVAL; + ret = -EINVAL; + goto out; + } + } else { + if (shift > EXT_MAX_BLOCKS - + (stop + ext4_ext_get_actual_len(extent))) { + ret = -EINVAL; + goto out; } } diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 510e66422f04..08fca4add1e2 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -429,7 +429,7 @@ static int ext4_find_unwritten_pgoff(struct inode *inode, int i, num; unsigned long nr_pages; - num = min_t(pgoff_t, end - index, PAGEVEC_SIZE); + num = min_t(pgoff_t, end - index, PAGEVEC_SIZE - 1) + 1; nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, (pgoff_t)num); if (nr_pages == 0) diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 2d94e8524839..dcf63daefee0 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -63,44 +63,6 @@ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap) memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); } -/* Initializes an uninitialized inode bitmap */ -static int ext4_init_inode_bitmap(struct super_block *sb, - struct buffer_head *bh, - ext4_group_t block_group, - struct ext4_group_desc *gdp) -{ - struct ext4_group_info *grp; - struct ext4_sb_info *sbi = EXT4_SB(sb); - J_ASSERT_BH(bh, buffer_locked(bh)); - - /* If checksum is bad mark all blocks and inodes use to prevent - * allocation, essentially implementing a per-group read-only flag. */ - if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { - grp = ext4_get_group_info(sb, block_group); - if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - percpu_counter_sub(&sbi->s_freeclusters_counter, - grp->bb_free); - set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); - if (!EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) { - int count; - count = ext4_free_inodes_count(sb, gdp); - percpu_counter_sub(&sbi->s_freeinodes_counter, - count); - } - set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); - return -EFSBADCRC; - } - - memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); - ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8, - bh->b_data); - ext4_inode_bitmap_csum_set(sb, block_group, gdp, bh, - EXT4_INODES_PER_GROUP(sb) / 8); - ext4_group_desc_csum_set(sb, block_group, gdp); - - return 0; -} - void ext4_end_bitmap_read(struct buffer_head *bh, int uptodate) { if (uptodate) { @@ -157,6 +119,7 @@ static struct buffer_head * ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) { struct ext4_group_desc *desc; + struct ext4_sb_info *sbi = EXT4_SB(sb); struct buffer_head *bh = NULL; ext4_fsblk_t bitmap_blk; int err; @@ -166,6 +129,12 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) return ERR_PTR(-EFSCORRUPTED); bitmap_blk = ext4_inode_bitmap(sb, desc); + if ((bitmap_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || + (bitmap_blk >= ext4_blocks_count(sbi->s_es))) { + ext4_error(sb, "Invalid inode bitmap blk %llu in " + "block_group %u", bitmap_blk, block_group); + return ERR_PTR(-EFSCORRUPTED); + } bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { ext4_error(sb, "Cannot read inode bitmap - " @@ -184,17 +153,14 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) ext4_lock_group(sb, block_group); if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { - err = ext4_init_inode_bitmap(sb, bh, block_group, desc); + memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8); + ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), + sb->s_blocksize * 8, bh->b_data); set_bitmap_uptodate(bh); set_buffer_uptodate(bh); set_buffer_verified(bh); ext4_unlock_group(sb, block_group); unlock_buffer(bh); - if (err) { - ext4_error(sb, "Failed to init inode bitmap for group " - "%u: %d", block_group, err); - goto out; - } return bh; } ext4_unlock_group(sb, block_group); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5cccec68a0a5..340428274532 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3396,7 +3396,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - struct ext4_inode_info *ei = EXT4_I(inode); ssize_t ret; loff_t offset = iocb->ki_pos; size_t count = iov_iter_count(iter); @@ -3420,7 +3419,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) goto out; } orphan = 1; - ei->i_disksize = inode->i_size; + ext4_update_i_disksize(inode, inode->i_size); ext4_journal_stop(handle); } @@ -3548,7 +3547,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter) if (ret > 0) { loff_t end = offset + ret; if (end > inode->i_size) { - ei->i_disksize = end; + ext4_update_i_disksize(inode, end); i_size_write(inode, end); /* * We're going to return a positive `ret' @@ -4494,6 +4493,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) goto bad_inode; raw_inode = ext4_raw_inode(&iloc); + if ((ino == EXT4_ROOT_INO) && (raw_inode->i_links_count == 0)) { + EXT4_ERROR_INODE(inode, "root inode unallocated"); + ret = -EFSCORRUPTED; + goto bad_inode; + } + if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 64056c6eb857..14bd37041e1a 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -3877,7 +3877,8 @@ ext4_mb_discard_group_preallocations(struct super_block *sb, err = ext4_mb_load_buddy(sb, group, &e4b); if (err) { - ext4_error(sb, "Error loading buddy information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + err, group); put_bh(bitmap_bh); return 0; } @@ -4034,10 +4035,11 @@ repeat: BUG_ON(pa->pa_type != MB_INODE_PA); group = ext4_get_group_number(sb, pa->pa_pstart); - err = ext4_mb_load_buddy(sb, group, &e4b); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); if (err) { - ext4_error(sb, "Error loading buddy information for %u", - group); + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } @@ -4293,11 +4295,14 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb, spin_unlock(&lg->lg_prealloc_lock); list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) { + int err; group = ext4_get_group_number(sb, pa->pa_pstart); - if (ext4_mb_load_buddy(sb, group, &e4b)) { - ext4_error(sb, "Error loading buddy information for %u", - group); + err = ext4_mb_load_buddy_gfp(sb, group, &e4b, + GFP_NOFS|__GFP_NOFAIL); + if (err) { + ext4_error(sb, "Error %d loading buddy information for %u", + err, group); continue; } ext4_lock_group(sb, group); @@ -5117,8 +5122,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group, ret = ext4_mb_load_buddy(sb, group, &e4b); if (ret) { - ext4_error(sb, "Error in loading buddy " - "information for %u", group); + ext4_warning(sb, "Error %d loading buddy information for %u", + ret, group); return ret; } bitmap = e4b.bd_bitmap; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 1ec4b6e34747..bfb83d76d128 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2260,6 +2260,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Block bitmap for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (block_bitmap < first_block || block_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2272,6 +2274,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode bitmap for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (inode_bitmap < first_block || inode_bitmap > last_block) { ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " @@ -2284,6 +2288,8 @@ static int ext4_check_descriptors(struct super_block *sb, ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: " "Inode table for group %u overlaps " "superblock", i); + if (!(sb->s_flags & MS_RDONLY)) + return 0; } if (inode_table < first_block || inode_table + sbi->s_itb_per_group - 1 > last_block) { diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index 3eeed8f0aa06..3fadfabcac39 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -837,8 +837,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, if (!IS_LAST_ENTRY(s->first)) ext4_xattr_rehash(header(s->base), s->here); - ext4_xattr_cache_insert(ext4_mb_cache, - bs->bh); } ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); @@ -959,6 +957,7 @@ inserted: } else if (bs->bh && s->base == bs->bh->b_data) { /* We were modifying this block in-place. */ ea_bdebug(bs->bh, "keeping this block"); + ext4_xattr_cache_insert(ext4_mb_cache, bs->bh); new_bh = bs->bh; get_bh(new_bh); } else { diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 99432b59c5cb..ae354ac67da1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -844,7 +844,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock, if (!ret) { map_bh(bh, inode->i_sb, map.m_pblk); bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; - bh->b_size = map.m_len << inode->i_blkbits; + bh->b_size = (u64)map.m_len << inode->i_blkbits; } return ret; } diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 7b32ce979fe1..63e519658d73 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -177,7 +177,7 @@ static void __drop_largest_extent(struct inode *inode, } /* return true, if inode page is changed */ -bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +static bool __f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct extent_tree *et; @@ -215,6 +215,16 @@ out: return false; } +bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext) +{ + bool ret = __f2fs_init_extent_tree(inode, i_ext); + + if (!F2FS_I(inode)->extent_tree) + set_inode_flag(inode, FI_NO_EXTENT); + + return ret; +} + static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, struct extent_info *ei) { diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 34a69e7ed90b..17ab23f64bba 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -538,8 +538,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, get_node_info(sbi, nid, dni); if (sum->version != dni->version) { - f2fs_put_page(node_page, 1); - return false; + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: valid data with mismatched node version.", + __func__); + set_sbi_flag(sbi, SBI_NEED_FSCK); } *nofs = ofs_of_node(node_page); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 0703a1179847..f3aea1b8702c 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -745,11 +745,12 @@ int inode_congested(struct inode *inode, int cong_bits) */ if (inode && inode_to_wb_is_valid(inode)) { struct bdi_writeback *wb; - bool locked, congested; + struct wb_lock_cookie lock_cookie = {}; + bool congested; - wb = unlocked_inode_to_wb_begin(inode, &locked); + wb = unlocked_inode_to_wb_begin(inode, &lock_cookie); congested = wb_congested(wb, cong_bits); - unlocked_inode_to_wb_end(inode, locked); + unlocked_inode_to_wb_end(inode, &lock_cookie); return congested; } @@ -1941,7 +1942,7 @@ void wb_workfn(struct work_struct *work) } if (!list_empty(&wb->work_list)) - mod_delayed_work(bdi_wq, &wb->dwork, 0); + wb_wakeup(wb); else if (wb_has_dirty_io(wb) && dirty_writeback_interval) wb_wakeup_delayed(wb); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 7d4b557f1962..d10bb2c30bf8 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -276,11 +276,11 @@ loop: goto loop; end_loop: - write_unlock(&journal->j_state_lock); del_timer_sync(&journal->j_commit_timer); journal->j_task = NULL; wake_up(&journal->j_wait_done_commit); jbd_debug(1, "Journal thread exiting.\n"); + write_unlock(&journal->j_state_lock); return 0; } @@ -691,8 +691,21 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) { int err = 0; - jbd2_might_wait_for_commit(journal); read_lock(&journal->j_state_lock); +#ifdef CONFIG_PROVE_LOCKING + /* + * Some callers make sure transaction is already committing and in that + * case we cannot block on open handles anymore. So don't warn in that + * case. + */ + if (tid_gt(tid, journal->j_commit_sequence) && + (!journal->j_committing_transaction || + journal->j_committing_transaction->t_tid != tid)) { + read_unlock(&journal->j_state_lock); + jbd2_might_wait_for_commit(journal); + read_lock(&journal->j_state_lock); + } +#endif #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { printk(KERN_ERR @@ -938,7 +951,7 @@ out: } /* - * This is a variaon of __jbd2_update_log_tail which checks for validity of + * This is a variation of __jbd2_update_log_tail which checks for validity of * provided log tail and locks j_checkpoint_mutex. So it is safe against races * with other threads updating log tail. */ @@ -1381,6 +1394,9 @@ int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, journal_superblock_t *sb = journal->j_superblock; int ret; + if (is_journal_aborted(journal)) + return -EIO; + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); jbd_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n", tail_block, tail_tid); diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 4e5c6103b76c..9e9e0936138b 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -528,6 +528,7 @@ int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, */ ret = start_this_handle(journal, handle, GFP_NOFS); if (ret < 0) { + handle->h_journal = journal; jbd2_journal_free_reserved(handle); return ret; } diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 5ef21f4c4c77..59c019a148f6 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -342,7 +342,7 @@ static void jffs2_put_super (struct super_block *sb) static void jffs2_kill_sb(struct super_block *sb) { struct jffs2_sb_info *c = JFFS2_SB_INFO(sb); - if (!(sb->s_flags & MS_RDONLY)) + if (c && !(sb->s_flags & MS_RDONLY)) jffs2_stop_garbage_collect_thread(c); kill_mtd_super(sb); kfree(c); diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 9d373247222c..c19123dcd1a4 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -132,6 +132,8 @@ lockd(void *vrqstp) { int err = 0; struct svc_rqst *rqstp = vrqstp; + struct net *net = &init_net; + struct lockd_net *ln = net_generic(net, lockd_net_id); /* try_to_freeze() is called from svc_recv() */ set_freezable(); @@ -176,6 +178,8 @@ lockd(void *vrqstp) if (nlmsvc_ops) nlmsvc_invalidate_all(); nlm_shutdown_hosts(); + cancel_delayed_work_sync(&ln->grace_period_end); + locks_end_grace(&ln->lockd_manager); return 0; } diff --git a/fs/namei.c b/fs/namei.c index 968c574d4aba..7996c8c2123b 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -221,9 +221,10 @@ getname_kernel(const char * filename) if (len <= EMBEDDED_NAME_MAX) { result->name = (char *)result->iname; } else if (len <= PATH_MAX) { + const size_t size = offsetof(struct filename, iname[1]); struct filename *tmp; - tmp = kmalloc(sizeof(*tmp), GFP_KERNEL); + tmp = kmalloc(size, GFP_KERNEL); if (unlikely(!tmp)) { __putname(result); return ERR_PTR(-ENOMEM); @@ -578,9 +579,10 @@ static int __nd_alloc_stack(struct nameidata *nd) static bool path_connected(const struct path *path) { struct vfsmount *mnt = path->mnt; + struct super_block *sb = mnt->mnt_sb; - /* Only bind mounts can have disconnected paths */ - if (mnt->mnt_root == mnt->mnt_sb->s_root) + /* Bind mounts and multi-root filesystems can have disconnected paths */ + if (!(sb->s_iflags & SB_I_MULTIROOT) && (mnt->mnt_root == sb->s_root)) return true; return is_subdir(path->dentry, mnt->mnt_root); @@ -1121,9 +1123,6 @@ static int follow_automount(struct path *path, struct nameidata *nd, path->dentry->d_inode) return -EISDIR; - if (path->dentry->d_sb->s_user_ns != &init_user_ns) - return -EACCES; - nd->total_link_count++; if (nd->total_link_count >= 40) return -ELOOP; diff --git a/fs/namespace.c b/fs/namespace.c index da188c6966a3..ffa9923ff4f7 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1037,7 +1037,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, goto out_free; } - mnt->mnt.mnt_flags = old->mnt.mnt_flags & ~(MNT_WRITE_HOLD|MNT_MARKED); + mnt->mnt.mnt_flags = old->mnt.mnt_flags; + mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL); /* Don't allow unprivileged users to change mount flags */ if (flag & CL_UNPRIVILEGED) { mnt->mnt.mnt_flags |= MNT_LOCK_ATIME; diff --git a/fs/ncpfs/ncplib_kernel.c b/fs/ncpfs/ncplib_kernel.c index 88dbbc9fcf4d..f571570a2e72 100644 --- a/fs/ncpfs/ncplib_kernel.c +++ b/fs/ncpfs/ncplib_kernel.c @@ -980,6 +980,10 @@ ncp_read_kernel(struct ncp_server *server, const char *file_id, goto out; } *bytes_read = ncp_reply_be16(server, 0); + if (*bytes_read > to_read) { + result = -EINVAL; + goto out; + } source = ncp_reply_data(server, 2 + (offset & 1)); memcpy(target, source, *bytes_read); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1ac1593aded3..1ab91124a93e 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -86,10 +86,10 @@ struct nfs_direct_req { struct nfs_direct_mirror mirrors[NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX]; int mirror_count; + loff_t io_start; /* Start offset for I/O */ ssize_t count, /* bytes actually processed */ max_count, /* max expected count */ bytes_left, /* bytes left to be sent */ - io_start, /* start of IO */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 13abd608af0f..4539008502ce 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -475,6 +475,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, goto out_err_free; /* fh */ + rc = -EIO; p = xdr_inline_decode(&stream, 4); if (!p) goto out_err_free; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 5dd6fd555c72..c5c984519e32 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -3300,6 +3300,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f .rpc_resp = &res, }; int status; + int i; bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_FH_EXPIRE_TYPE | @@ -3365,8 +3366,13 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; server->cache_consistency_bitmask[2] = 0; + + /* Avoid a regression due to buggy server */ + for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++) + res.exclcreat_bitmask[i] &= res.attr_bitmask[i]; memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask, sizeof(server->exclcreat_bitmask)); + server->acl_bitmask = res.acl_bitmask; server->fh_expire_type = res.fh_expire_type; } @@ -8173,6 +8179,12 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf /* fall through */ case -NFS4ERR_RETRY_UNCACHED_REP: return -EAGAIN; + case -NFS4ERR_BADSESSION: + case -NFS4ERR_DEADSESSION: + case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: + nfs4_schedule_session_recovery(clp->cl_session, + task->tk_status); + break; default: nfs4_schedule_lease_recovery(clp); } @@ -8251,7 +8263,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp, if (status == 0) status = task->tk_status; rpc_put_task(task); - return 0; out: dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 4be6999299dc..ae00fc5edc5c 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1647,13 +1647,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp) nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); } -static void nfs4_reclaim_complete(struct nfs_client *clp, +static int nfs4_reclaim_complete(struct nfs_client *clp, const struct nfs4_state_recovery_ops *ops, struct rpc_cred *cred) { /* Notify the server we're done reclaiming our state */ if (ops->reclaim_complete) - (void)ops->reclaim_complete(clp, cred); + return ops->reclaim_complete(clp, cred); + return 0; } static void nfs4_clear_reclaim_server(struct nfs_server *server) @@ -1700,13 +1701,16 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp) { const struct nfs4_state_recovery_ops *ops; struct rpc_cred *cred; + int err; if (!nfs4_state_clear_reclaim_reboot(clp)) return; ops = clp->cl_mvops->reboot_recovery_ops; cred = nfs4_get_clid_cred(clp); - nfs4_reclaim_complete(clp, ops, cred); + err = nfs4_reclaim_complete(clp, ops, cred); put_rpccred(cred); + if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION) + set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); } static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 3d17fc82b9fe..892c88542ebd 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1262,8 +1262,10 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) mirror = &desc->pg_mirrors[midx]; if (!list_empty(&mirror->pg_list)) { prev = nfs_list_entry(mirror->pg_list.prev); - if (index != prev->wb_index + 1) - nfs_pageio_complete_mirror(desc, midx); + if (index != prev->wb_index + 1) { + nfs_pageio_complete(desc); + break; + } } } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b8e44746f761..0e008db16b16 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1953,8 +1953,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, spin_lock(&inode->i_lock); pnfs_set_plh_return_info(lo, range.iomode, 0); - /* Block LAYOUTGET */ - set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags); /* * mark all matching lsegs so that we are sure to have no live * segments at hand when sending layoutreturn. See pnfs_put_lseg() @@ -2308,10 +2306,20 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr) enum pnfs_try_status trypnfs; trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); - if (trypnfs == PNFS_TRY_AGAIN) - pnfs_read_resend_pnfs(hdr); - if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status) + switch (trypnfs) { + case PNFS_NOT_ATTEMPTED: pnfs_read_through_mds(desc, hdr); + case PNFS_ATTEMPTED: + break; + case PNFS_TRY_AGAIN: + /* cleanup hdr and prepare to redo pnfs */ + if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); + list_splice_init(&hdr->pages, &mirror->pg_list); + mirror->pg_recoalesce = 1; + } + hdr->mds_ops->rpc_release(hdr); + } } static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 51bf1f9ab287..2fdb8f5a7b69 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2613,6 +2613,8 @@ struct dentry *nfs_fs_mount_common(struct nfs_server *server, /* initial superblock/root creation */ mount_info->fill_super(s, mount_info); nfs_get_cache_cookie(s, mount_info->parsed, mount_info->cloned); + if (!(server->flags & NFS_MOUNT_UNSHARED)) + s->s_iflags |= SB_I_MULTIROOT; } mntroot = nfs_get_root(s, mount_info->mntfh, dev_name); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9a3b3820306d..a8b786a648cd 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1847,40 +1847,43 @@ int nfs_generic_commit_list(struct inode *inode, struct list_head *head, return status; } -int nfs_commit_inode(struct inode *inode, int how) +static int __nfs_commit_inode(struct inode *inode, int how, + struct writeback_control *wbc) { LIST_HEAD(head); struct nfs_commit_info cinfo; int may_wait = how & FLUSH_SYNC; - int error = 0; - int res; + int ret, nscan; nfs_init_cinfo_from_inode(&cinfo, inode); nfs_commit_begin(cinfo.mds); - res = nfs_scan_commit(inode, &head, &cinfo); - if (res) - error = nfs_generic_commit_list(inode, &head, how, &cinfo); + for (;;) { + ret = nscan = nfs_scan_commit(inode, &head, &cinfo); + if (ret <= 0) + break; + ret = nfs_generic_commit_list(inode, &head, how, &cinfo); + if (ret < 0) + break; + ret = 0; + if (wbc && wbc->sync_mode == WB_SYNC_NONE) { + if (nscan < wbc->nr_to_write) + wbc->nr_to_write -= nscan; + else + wbc->nr_to_write = 0; + } + if (nscan < INT_MAX) + break; + cond_resched(); + } nfs_commit_end(cinfo.mds); - if (res == 0) - return res; - if (error < 0) - goto out_error; - if (!may_wait) - goto out_mark_dirty; - error = wait_on_commit(cinfo.mds); - if (error < 0) - return error; - return res; -out_error: - res = error; - /* Note: If we exit without ensuring that the commit is complete, - * we must mark the inode as dirty. Otherwise, future calls to - * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure - * that the data is on the disk. - */ -out_mark_dirty: - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - return res; + if (ret || !may_wait) + return ret; + return wait_on_commit(cinfo.mds); +} + +int nfs_commit_inode(struct inode *inode, int how) +{ + return __nfs_commit_inode(inode, how, NULL); } EXPORT_SYMBOL_GPL(nfs_commit_inode); @@ -1890,11 +1893,11 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) int flags = FLUSH_SYNC; int ret = 0; - /* no commits means nothing needs to be done */ - if (!nfsi->commit_info.ncommit) - return ret; - if (wbc->sync_mode == WB_SYNC_NONE) { + /* no commits means nothing needs to be done */ + if (!nfsi->commit_info.ncommit) + goto check_requests_outstanding; + /* Don't commit yet if this is a non-blocking flush and there * are a lot of outstanding writes for this mapping. */ @@ -1905,16 +1908,16 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) flags = 0; } - ret = nfs_commit_inode(inode, flags); - if (ret >= 0) { - if (wbc->sync_mode == WB_SYNC_NONE) { - if (ret < wbc->nr_to_write) - wbc->nr_to_write -= ret; - else - wbc->nr_to_write = 0; - } - return 0; - } + ret = __nfs_commit_inode(inode, flags, wbc); + if (!ret) { + if (flags & FLUSH_SYNC) + return 0; + } else if (nfsi->commit_info.ncommit) + goto out_mark_dirty; + +check_requests_outstanding: + if (!atomic_read(&nfsi->commit_info.rpcs_out)) + return ret; out_mark_dirty: __mark_inode_dirty(inode, I_DIRTY_DATASYNC); return ret; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 022d95886d66..eef0caf6e67d 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1338,14 +1338,14 @@ nfsd4_layoutget(struct svc_rqst *rqstp, const struct nfsd4_layout_ops *ops; struct nfs4_layout_stateid *ls; __be32 nfserr; - int accmode; + int accmode = NFSD_MAY_READ_IF_EXEC; switch (lgp->lg_seg.iomode) { case IOMODE_READ: - accmode = NFSD_MAY_READ; + accmode |= NFSD_MAY_READ; break; case IOMODE_RW: - accmode = NFSD_MAY_READ | NFSD_MAY_WRITE; + accmode |= NFSD_MAY_READ | NFSD_MAY_WRITE; break; default: dprintk("%s: invalid iomode %d\n", diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index f463c4e0b2ea..12d780718b48 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -263,6 +263,35 @@ free_blocked_lock(struct nfsd4_blocked_lock *nbl) kfree(nbl); } +static void +remove_blocked_locks(struct nfs4_lockowner *lo) +{ + struct nfs4_client *clp = lo->lo_owner.so_client; + struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id); + struct nfsd4_blocked_lock *nbl; + LIST_HEAD(reaplist); + + /* Dequeue all blocked locks */ + spin_lock(&nn->blocked_locks_lock); + while (!list_empty(&lo->lo_blocked)) { + nbl = list_first_entry(&lo->lo_blocked, + struct nfsd4_blocked_lock, + nbl_list); + list_del_init(&nbl->nbl_list); + list_move(&nbl->nbl_lru, &reaplist); + } + spin_unlock(&nn->blocked_locks_lock); + + /* Now free them */ + while (!list_empty(&reaplist)) { + nbl = list_first_entry(&reaplist, struct nfsd4_blocked_lock, + nbl_lru); + list_del_init(&nbl->nbl_lru); + posix_unblock_lock(&nbl->nbl_lock); + free_blocked_lock(nbl); + } +} + static int nfsd4_cb_notify_lock_done(struct nfsd4_callback *cb, struct rpc_task *task) { @@ -1854,6 +1883,7 @@ static __be32 mark_client_expired_locked(struct nfs4_client *clp) static void __destroy_client(struct nfs4_client *clp) { + int i; struct nfs4_openowner *oo; struct nfs4_delegation *dp; struct list_head reaplist; @@ -1883,6 +1913,16 @@ __destroy_client(struct nfs4_client *clp) nfs4_get_stateowner(&oo->oo_owner); release_openowner(oo); } + for (i = 0; i < OWNER_HASH_SIZE; i++) { + struct nfs4_stateowner *so, *tmp; + + list_for_each_entry_safe(so, tmp, &clp->cl_ownerstr_hashtbl[i], + so_strhash) { + /* Should be no openowners at this point */ + WARN_ON_ONCE(so->so_is_open_owner); + remove_blocked_locks(lockowner(so)); + } + } nfsd4_return_all_client_layouts(clp); nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) @@ -6266,6 +6306,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, } spin_unlock(&clp->cl_lock); free_ol_stateid_reaplist(&reaplist); + remove_blocked_locks(lo); nfs4_put_stateowner(&lo->lo_owner); return status; @@ -7051,6 +7092,8 @@ nfs4_state_destroy_net(struct net *net) } } + WARN_ON(!list_empty(&nn->blocked_locks_lru)); + for (i = 0; i < CLIENT_HASH_SIZE; i++) { while (!list_empty(&nn->unconf_id_hashtbl[i])) { clp = list_entry(nn->unconf_id_hashtbl[i].next, struct nfs4_client, cl_idhash); @@ -7117,7 +7160,6 @@ nfs4_state_shutdown_net(struct net *net) struct nfs4_delegation *dp = NULL; struct list_head *pos, *next, reaplist; struct nfsd_net *nn = net_generic(net, nfsd_net_id); - struct nfsd4_blocked_lock *nbl; cancel_delayed_work_sync(&nn->laundromat_work); locks_end_grace(&nn->nfsd4_manager); @@ -7138,24 +7180,6 @@ nfs4_state_shutdown_net(struct net *net) nfs4_put_stid(&dp->dl_stid); } - BUG_ON(!list_empty(&reaplist)); - spin_lock(&nn->blocked_locks_lock); - while (!list_empty(&nn->blocked_locks_lru)) { - nbl = list_first_entry(&nn->blocked_locks_lru, - struct nfsd4_blocked_lock, nbl_lru); - list_move(&nbl->nbl_lru, &reaplist); - list_del_init(&nbl->nbl_list); - } - spin_unlock(&nn->blocked_locks_lock); - - while (!list_empty(&reaplist)) { - nbl = list_first_entry(&reaplist, - struct nfsd4_blocked_lock, nbl_lru); - list_del_init(&nbl->nbl_lru); - posix_unblock_lock(&nbl->nbl_lock); - free_blocked_lock(nbl); - } - nfsd4_client_tracking_exit(net); nfs4_state_destroy_net(net); } diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b829cc9a9b39..8f0b19a3ca81 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -94,6 +94,12 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp, err = follow_down(&path); if (err < 0) goto out; + if (path.mnt == exp->ex_path.mnt && path.dentry == dentry && + nfsd_mountpoint(dentry, exp) == 2) { + /* This is only a mountpoint in some other namespace */ + path_put(&path); + goto out; + } exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) { @@ -167,16 +173,26 @@ static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, st /* * For nfsd purposes, we treat V4ROOT exports as though there was an * export at *every* directory. + * We return: + * '1' if this dentry *must* be an export point, + * '2' if it might be, if there is really a mount here, and + * '0' if there is no chance of an export point here. */ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) { - if (d_mountpoint(dentry)) + if (!d_inode(dentry)) + return 0; + if (exp->ex_flags & NFSEXP_V4ROOT) return 1; if (nfsd4_is_junction(dentry)) return 1; - if (!(exp->ex_flags & NFSEXP_V4ROOT)) - return 0; - return d_inode(dentry) != NULL; + if (d_mountpoint(dentry)) + /* + * Might only be a mountpoint in a different namespace, + * but we need to check. + */ + return 2; + return 0; } __be32 diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index e0e5f7c3c99f..8a459b179183 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -92,7 +92,7 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, u32 event_mask, void *data, int data_type) { - __u32 marks_mask, marks_ignored_mask; + __u32 marks_mask = 0, marks_ignored_mask = 0; struct path *path = data; pr_debug("%s: inode_mark=%p vfsmnt_mark=%p mask=%x data=%p" @@ -108,24 +108,20 @@ static bool fanotify_should_send_event(struct fsnotify_mark *inode_mark, !d_can_lookup(path->dentry)) return false; - if (inode_mark && vfsmnt_mark) { - marks_mask = (vfsmnt_mark->mask | inode_mark->mask); - marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask); - } else if (inode_mark) { - /* - * if the event is for a child and this inode doesn't care about - * events on the child, don't send it! - */ - if ((event_mask & FS_EVENT_ON_CHILD) && - !(inode_mark->mask & FS_EVENT_ON_CHILD)) - return false; - marks_mask = inode_mark->mask; - marks_ignored_mask = inode_mark->ignored_mask; - } else if (vfsmnt_mark) { - marks_mask = vfsmnt_mark->mask; - marks_ignored_mask = vfsmnt_mark->ignored_mask; - } else { - BUG(); + /* + * if the event is for a child and this inode doesn't care about + * events on the child, don't send it! + */ + if (inode_mark && + (!(event_mask & FS_EVENT_ON_CHILD) || + (inode_mark->mask & FS_EVENT_ON_CHILD))) { + marks_mask |= inode_mark->mask; + marks_ignored_mask |= inode_mark->ignored_mask; + } + + if (vfsmnt_mark) { + marks_mask |= vfsmnt_mark->mask; + marks_ignored_mask |= vfsmnt_mark->ignored_mask; } if (d_is_dir(path->dentry) && diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c index 629d8c917fa6..6e35ef6521b4 100644 --- a/fs/orangefs/super.c +++ b/fs/orangefs/super.c @@ -559,6 +559,11 @@ void orangefs_kill_sb(struct super_block *sb) /* provided sb cleanup */ kill_anon_super(sb); + if (!ORANGEFS_SB(sb)) { + mutex_lock(&orangefs_request_mutex); + mutex_unlock(&orangefs_request_mutex); + return; + } /* * issue the unmount to userspace to tell it to remove the * dynamic mount info it has for this superblock diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c index f61b00887481..cbca58ba008a 100644 --- a/fs/orangefs/waitqueue.c +++ b/fs/orangefs/waitqueue.c @@ -124,7 +124,14 @@ retry_servicing: gossip_debug(GOSSIP_WAIT_DEBUG, "%s:client core is NOT in service.\n", __func__); - timeout = op_timeout_secs * HZ; + /* + * Don't wait for the userspace component to return if + * the filesystem is being umounted anyway. + */ + if (op->upcall.type == ORANGEFS_VFS_OP_FS_UMOUNT) + timeout = 0; + else + timeout = op_timeout_secs * HZ; } spin_unlock(&orangefs_request_list_lock); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 306b6c161840..8546384a5fdf 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -180,6 +180,9 @@ static void ovl_instantiate(struct dentry *dentry, struct inode *inode, inc_nlink(inode); } d_instantiate(dentry, inode); + /* Force lookup of new upper hardlink to find its lower */ + if (hardlink) + d_drop(dentry); } static int ovl_create_upper(struct dentry *dentry, struct inode *inode, diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 7fb53d055537..16f6db88c8e5 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -227,6 +227,16 @@ int ovl_xattr_get(struct dentry *dentry, const char *name, return res; } +static bool ovl_can_list(const char *s) +{ + /* List all non-trusted xatts */ + if (strncmp(s, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) != 0) + return true; + + /* Never list trusted.overlay, list other trusted for superuser only */ + return !ovl_is_private_xattr(s) && capable(CAP_SYS_ADMIN); +} + ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) { struct dentry *realdentry = ovl_dentry_real(dentry); @@ -250,7 +260,7 @@ ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size) return -EIO; len -= slen; - if (ovl_is_private_xattr(s)) { + if (!ovl_can_list(s)) { res -= slen; memmove(s, s + slen, len); } else { diff --git a/fs/proc/base.c b/fs/proc/base.c index 0edc16f95596..39d3326ff6d8 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -252,7 +252,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); if (rv <= 0) goto out_free_page; @@ -270,7 +270,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -305,7 +305,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -354,7 +354,7 @@ skip_argv: bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -970,7 +970,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); + retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index bc2dde2423c2..2a5c4813c47d 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -1959,7 +1959,7 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, * will be requeued because superblock is being shutdown and doesn't * have MS_ACTIVE set. */ - cancel_delayed_work_sync(&REISERFS_SB(sb)->old_work); + reiserfs_cancel_old_flush(sb); /* wait for all commits to finish */ cancel_delayed_work_sync(&SB_JOURNAL(sb)->j_work); @@ -2640,7 +2640,7 @@ static int journal_init_dev(struct super_block *super, if (IS_ERR(journal->j_dev_bd)) { result = PTR_ERR(journal->j_dev_bd); journal->j_dev_bd = NULL; - reiserfs_warning(super, + reiserfs_warning(super, "sh-457", "journal_init_dev: Cannot open '%s': %i", jdev_name, result); return result; diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h index 5dcf3ab83886..6ca00471afbf 100644 --- a/fs/reiserfs/reiserfs.h +++ b/fs/reiserfs/reiserfs.h @@ -2948,6 +2948,7 @@ int reiserfs_allocate_list_bitmaps(struct super_block *s, struct reiserfs_list_bitmap *, unsigned int); void reiserfs_schedule_old_flush(struct super_block *s); +void reiserfs_cancel_old_flush(struct super_block *s); void add_save_link(struct reiserfs_transaction_handle *th, struct inode *inode, int truncate); int remove_save_link(struct inode *inode, int truncate); diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index e101d70d2327..dec6c93044fa 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -90,7 +90,9 @@ static void flush_old_commits(struct work_struct *work) s = sbi->s_journal->j_work_sb; spin_lock(&sbi->old_work_lock); - sbi->work_queued = 0; + /* Avoid clobbering the cancel state... */ + if (sbi->work_queued == 1) + sbi->work_queued = 0; spin_unlock(&sbi->old_work_lock); reiserfs_sync_fs(s, 1); @@ -117,21 +119,22 @@ void reiserfs_schedule_old_flush(struct super_block *s) spin_unlock(&sbi->old_work_lock); } -static void cancel_old_flush(struct super_block *s) +void reiserfs_cancel_old_flush(struct super_block *s) { struct reiserfs_sb_info *sbi = REISERFS_SB(s); - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); spin_lock(&sbi->old_work_lock); - sbi->work_queued = 0; + /* Make sure no new flushes will be queued */ + sbi->work_queued = 2; spin_unlock(&sbi->old_work_lock); + cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); } static int reiserfs_freeze(struct super_block *s) { struct reiserfs_transaction_handle th; - cancel_old_flush(s); + reiserfs_cancel_old_flush(s); reiserfs_write_lock(s); if (!(s->s_flags & MS_RDONLY)) { @@ -152,7 +155,13 @@ static int reiserfs_freeze(struct super_block *s) static int reiserfs_unfreeze(struct super_block *s) { + struct reiserfs_sb_info *sbi = REISERFS_SB(s); + reiserfs_allow_writes(s); + spin_lock(&sbi->old_work_lock); + /* Allow old_work to run again */ + sbi->work_queued = 0; + spin_unlock(&sbi->old_work_lock); return 0; } @@ -2194,7 +2203,7 @@ error_unlocked: if (sbi->commit_wq) destroy_workqueue(sbi->commit_wq); - cancel_delayed_work_sync(&REISERFS_SB(s)->old_work); + reiserfs_cancel_old_flush(s); reiserfs_free_bitmap_cache(s); if (SB_BUFFER_WITH_SB(s)) diff --git a/fs/super.c b/fs/super.c index 1058bf3e8724..7e9beab77259 100644 --- a/fs/super.c +++ b/fs/super.c @@ -519,7 +519,11 @@ retry: hlist_add_head(&s->s_instances, &type->fs_supers); spin_unlock(&sb_lock); get_filesystem(type); - register_shrinker(&s->s_shrink); + err = register_shrinker(&s->s_shrink); + if (err) { + deactivate_locked_super(s); + s = ERR_PTR(err); + } return s; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 4ec051089186..03dda1cbe485 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -1728,8 +1728,11 @@ static void ubifs_remount_ro(struct ubifs_info *c) dbg_save_space_info(c); - for (i = 0; i < c->jhead_cnt; i++) - ubifs_wbuf_sync(&c->jheads[i].wbuf); + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + ubifs_ro_mode(c, err); + } c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_DIRTY); c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); @@ -1795,8 +1798,11 @@ static void ubifs_put_super(struct super_block *sb) int err; /* Synchronize write-buffers */ - for (i = 0; i < c->jhead_cnt; i++) - ubifs_wbuf_sync(&c->jheads[i].wbuf); + for (i = 0; i < c->jhead_cnt; i++) { + err = ubifs_wbuf_sync(&c->jheads[i].wbuf); + if (err) + ubifs_ro_mode(c, err); + } /* * We are being cleanly unmounted which means the diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c index 695389a4fc23..3a3be23689b3 100644 --- a/fs/udf/unicode.c +++ b/fs/udf/unicode.c @@ -28,6 +28,9 @@ #include "udf_sb.h" +#define SURROGATE_MASK 0xfffff800 +#define SURROGATE_PAIR 0x0000d800 + static int udf_uni2char_utf8(wchar_t uni, unsigned char *out, int boundlen) @@ -37,6 +40,9 @@ static int udf_uni2char_utf8(wchar_t uni, if (boundlen <= 0) return -ENAMETOOLONG; + if ((uni & SURROGATE_MASK) == SURROGATE_PAIR) + return -EINVAL; + if (uni < 0x80) { out[u_len++] = (unsigned char)uni; } else if (uni < 0x800) { diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 362c6b4c1186..1410835eebe2 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -846,22 +846,26 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_INSERT_RANGE) { - unsigned int blksize_mask = i_blocksize(inode) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; + loff_t isize = i_size_read(inode); - new_size = i_size_read(inode) + len; if (offset & blksize_mask || len & blksize_mask) { error = -EINVAL; goto out_unlock; } - /* check the new inode size does not wrap through zero */ - if (new_size > inode->i_sb->s_maxbytes) { + /* + * New inode size must not exceed ->s_maxbytes, accounting for + * possible signed overflow. + */ + if (inode->i_sb->s_maxbytes - isize < len) { error = -EFBIG; goto out_unlock; } + new_size = isize + len; /* Offset should be less than i_size */ - if (offset >= i_size_read(inode)) { + if (offset >= isize) { error = -EINVAL; goto out_unlock; } diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 1fdd3face2d9..e3edaa547216 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -47,7 +47,7 @@ STATIC int xfs_qm_init_quotainos(xfs_mount_t *); STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); - +STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi); STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); /* * We use the batch lookup interface to iterate over the dquots as it @@ -694,9 +694,17 @@ xfs_qm_init_quotainfo( qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; - register_shrinker(&qinf->qi_shrinker); + + error = register_shrinker(&qinf->qi_shrinker); + if (error) + goto out_free_inos; + return 0; +out_free_inos: + mutex_destroy(&qinf->qi_quotaofflock); + mutex_destroy(&qinf->qi_tree_lock); + xfs_qm_destroy_quotainos(qinf); out_free_lru: list_lru_destroy(&qinf->qi_lru); out_free_qinf: @@ -705,7 +713,6 @@ out_free_qinf: return error; } - /* * Gets called when unmounting a filesystem or when all quotas get * turned off. @@ -722,19 +729,8 @@ xfs_qm_destroy_quotainfo( unregister_shrinker(&qi->qi_shrinker); list_lru_destroy(&qi->qi_lru); - - if (qi->qi_uquotaip) { - IRELE(qi->qi_uquotaip); - qi->qi_uquotaip = NULL; /* paranoia */ - } - if (qi->qi_gquotaip) { - IRELE(qi->qi_gquotaip); - qi->qi_gquotaip = NULL; - } - if (qi->qi_pquotaip) { - IRELE(qi->qi_pquotaip); - qi->qi_pquotaip = NULL; - } + xfs_qm_destroy_quotainos(qi); + mutex_destroy(&qi->qi_tree_lock); mutex_destroy(&qi->qi_quotaofflock); kmem_free(qi); mp->m_quotainfo = NULL; @@ -1620,6 +1616,24 @@ error_rele: } STATIC void +xfs_qm_destroy_quotainos( + xfs_quotainfo_t *qi) +{ + if (qi->qi_uquotaip) { + IRELE(qi->qi_uquotaip); + qi->qi_uquotaip = NULL; /* paranoia */ + } + if (qi->qi_gquotaip) { + IRELE(qi->qi_gquotaip); + qi->qi_gquotaip = NULL; + } + if (qi->qi_pquotaip) { + IRELE(qi->qi_pquotaip); + qi->qi_pquotaip = NULL; + } +} + +STATIC void xfs_qm_dqfree_one( struct xfs_dquot *dqp) { |