aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/block_dev.c7
-rw-r--r--fs/btrfs/backref.c8
-rw-r--r--fs/btrfs/extent_io.c80
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/ioctl.c8
-rw-r--r--fs/btrfs/transaction.c3
-rw-r--r--fs/btrfs/volumes.h8
-rw-r--r--fs/ceph/super.c2
-rw-r--r--fs/cifs/cifsencrypt.c53
-rw-r--r--fs/cifs/cifsfs.c6
-rw-r--r--fs/cifs/inode.c34
-rw-r--r--fs/cifs/ioctl.c6
-rw-r--r--fs/cifs/smb2ops.c8
-rw-r--r--fs/cifs/smb2pdu.c76
-rw-r--r--fs/coredump.c46
-rw-r--r--fs/dcache.c12
-rw-r--r--fs/ecryptfs/dentry.c16
-rw-r--r--fs/ext4/super.c34
-rw-r--r--fs/gfs2/super.c6
-rw-r--r--fs/hfs/bnode.c9
-rw-r--r--fs/hfs/brec.c20
-rw-r--r--fs/hfs/super.c4
-rw-r--r--fs/hfsplus/bnode.c3
-rw-r--r--fs/hfsplus/options.c4
-rw-r--r--fs/hostfs/hostfs_kern.c2
-rw-r--r--fs/hpfs/namei.c25
-rw-r--r--fs/internal.h1
-rw-r--r--fs/jbd2/checkpoint.c39
-rw-r--r--fs/jbd2/commit.c2
-rw-r--r--fs/jbd2/journal.c11
-rw-r--r--fs/libfs.c2
-rw-r--r--fs/locks.c38
-rw-r--r--fs/namei.c39
-rw-r--r--fs/nfs/filelayout/filelayout.c31
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c5
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c7
-rw-r--r--fs/nfs/inode.c15
-rw-r--r--fs/nfs/nfs4proc.c36
-rw-r--r--fs/nfs/pagelist.c6
-rw-r--r--fs/nfs/pnfs_nfs.c33
-rw-r--r--fs/nfs/read.c3
-rw-r--r--fs/nfs/write.c20
-rw-r--r--fs/nfsd/blocklayout.c8
-rw-r--r--fs/nfsd/nfs4state.c84
-rw-r--r--fs/nfsd/nfs4xdr.c45
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c9
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c8
-rw-r--r--fs/ocfs2/super.c4
-rw-r--r--fs/open.c49
-rw-r--r--fs/overlayfs/copy_up.c6
-rw-r--r--fs/overlayfs/inode.c25
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/super.c9
-rw-r--r--fs/reiserfs/super.c8
-rw-r--r--fs/ubifs/xattr.c3
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h11
-rw-r--r--fs/xfs/libxfs/xfs_dir2_data.c3
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c10
-rw-r--r--fs/xfs/xfs_inode.c68
-rw-r--r--fs/xfs/xfs_inode.h85
-rw-r--r--fs/xfs/xfs_super.c4
61 files changed, 824 insertions, 387 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index c7e4163ede87..ccfd31f1df3a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1234,6 +1234,13 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
goto out_clear;
}
bd_set_size(bdev, (loff_t)bdev->bd_part->nr_sects << 9);
+ /*
+ * If the partition is not aligned on a page
+ * boundary, we can't do dax I/O to it.
+ */
+ if ((bdev->bd_part->start_sect % (PAGE_SIZE / 512)) ||
+ (bdev->bd_part->nr_sects % (PAGE_SIZE / 512)))
+ bdev->bd_inode->i_flags &= ~S_DAX;
}
} else {
if (bdev->bd_contains == bdev) {
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 614aaa1969bd..723470850b94 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1786,7 +1786,6 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
int found = 0;
struct extent_buffer *eb;
struct btrfs_inode_extref *extref;
- struct extent_buffer *leaf;
u32 item_size;
u32 cur_offset;
unsigned long ptr;
@@ -1814,9 +1813,8 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
btrfs_release_path(path);
- leaf = path->nodes[0];
- item_size = btrfs_item_size_nr(leaf, slot);
- ptr = btrfs_item_ptr_offset(leaf, slot);
+ item_size = btrfs_item_size_nr(eb, slot);
+ ptr = btrfs_item_ptr_offset(eb, slot);
cur_offset = 0;
while (cur_offset < item_size) {
@@ -1830,7 +1828,7 @@ static int iterate_inode_extrefs(u64 inum, struct btrfs_root *fs_root,
if (ret)
break;
- cur_offset += btrfs_inode_extref_name_len(leaf, extref);
+ cur_offset += btrfs_inode_extref_name_len(eb, extref);
cur_offset += sizeof(*extref);
}
btrfs_tree_read_unlock_blocking(eb);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c32d226bfecc..885f533a34d9 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -2795,7 +2795,8 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
bio_end_io_t end_io_func,
int mirror_num,
unsigned long prev_bio_flags,
- unsigned long bio_flags)
+ unsigned long bio_flags,
+ bool force_bio_submit)
{
int ret = 0;
struct bio *bio;
@@ -2813,6 +2814,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
contig = bio_end_sector(bio) == sector;
if (prev_bio_flags != bio_flags || !contig ||
+ force_bio_submit ||
merge_bio(rw, tree, page, offset, page_size, bio, bio_flags) ||
bio_add_page(bio, page, page_size, offset) < page_size) {
ret = submit_one_bio(rw, bio, mirror_num,
@@ -2906,7 +2908,8 @@ static int __do_readpage(struct extent_io_tree *tree,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+ unsigned long *bio_flags, int rw,
+ u64 *prev_em_start)
{
struct inode *inode = page->mapping->host;
u64 start = page_offset(page);
@@ -2954,6 +2957,7 @@ static int __do_readpage(struct extent_io_tree *tree,
}
while (cur <= end) {
unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+ bool force_bio_submit = false;
if (cur >= last_byte) {
char *userpage;
@@ -3004,6 +3008,49 @@ static int __do_readpage(struct extent_io_tree *tree,
block_start = em->block_start;
if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
block_start = EXTENT_MAP_HOLE;
+
+ /*
+ * If we have a file range that points to a compressed extent
+ * and it's followed by a consecutive file range that points to
+ * to the same compressed extent (possibly with a different
+ * offset and/or length, so it either points to the whole extent
+ * or only part of it), we must make sure we do not submit a
+ * single bio to populate the pages for the 2 ranges because
+ * this makes the compressed extent read zero out the pages
+ * belonging to the 2nd range. Imagine the following scenario:
+ *
+ * File layout
+ * [0 - 8K] [8K - 24K]
+ * | |
+ * | |
+ * points to extent X, points to extent X,
+ * offset 4K, length of 8K offset 0, length 16K
+ *
+ * [extent X, compressed length = 4K uncompressed length = 16K]
+ *
+ * If the bio to read the compressed extent covers both ranges,
+ * it will decompress extent X into the pages belonging to the
+ * first range and then it will stop, zeroing out the remaining
+ * pages that belong to the other range that points to extent X.
+ * So here we make sure we submit 2 bios, one for the first
+ * range and another one for the third range. Both will target
+ * the same physical extent from disk, but we can't currently
+ * make the compressed bio endio callback populate the pages
+ * for both ranges because each compressed bio is tightly
+ * coupled with a single extent map, and each range can have
+ * an extent map with a different offset value relative to the
+ * uncompressed data of our extent and different lengths. This
+ * is a corner case so we prioritize correctness over
+ * non-optimal behavior (submitting 2 bios for the same extent).
+ */
+ if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) &&
+ prev_em_start && *prev_em_start != (u64)-1 &&
+ *prev_em_start != em->orig_start)
+ force_bio_submit = true;
+
+ if (prev_em_start)
+ *prev_em_start = em->orig_start;
+
free_extent_map(em);
em = NULL;
@@ -3053,7 +3100,8 @@ static int __do_readpage(struct extent_io_tree *tree,
bdev, bio, pnr,
end_bio_extent_readpage, mirror_num,
*bio_flags,
- this_bio_flag);
+ this_bio_flag,
+ force_bio_submit);
if (!ret) {
nr++;
*bio_flags = this_bio_flag;
@@ -3080,7 +3128,8 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+ unsigned long *bio_flags, int rw,
+ u64 *prev_em_start)
{
struct inode *inode;
struct btrfs_ordered_extent *ordered;
@@ -3100,7 +3149,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], get_extent, em_cached, bio,
- mirror_num, bio_flags, rw);
+ mirror_num, bio_flags, rw, prev_em_start);
page_cache_release(pages[index]);
}
}
@@ -3110,7 +3159,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
int nr_pages, get_extent_t *get_extent,
struct extent_map **em_cached,
struct bio **bio, int mirror_num,
- unsigned long *bio_flags, int rw)
+ unsigned long *bio_flags, int rw,
+ u64 *prev_em_start)
{
u64 start = 0;
u64 end = 0;
@@ -3131,7 +3181,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
index - first_index, start,
end, get_extent, em_cached,
bio, mirror_num, bio_flags,
- rw);
+ rw, prev_em_start);
start = page_start;
end = start + PAGE_CACHE_SIZE - 1;
first_index = index;
@@ -3142,7 +3192,8 @@ static void __extent_readpages(struct extent_io_tree *tree,
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
end, get_extent, em_cached, bio,
- mirror_num, bio_flags, rw);
+ mirror_num, bio_flags, rw,
+ prev_em_start);
}
static int __extent_read_full_page(struct extent_io_tree *tree,
@@ -3168,7 +3219,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
}
ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
- bio_flags, rw);
+ bio_flags, rw, NULL);
return ret;
}
@@ -3194,7 +3245,7 @@ int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
int ret;
ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
- &bio_flags, READ);
+ &bio_flags, READ, NULL);
if (bio)
ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
return ret;
@@ -3447,7 +3498,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
sector, iosize, pg_offset,
bdev, &epd->bio, max_nr,
end_bio_extent_writepage,
- 0, 0, 0);
+ 0, 0, 0, false);
if (ret)
SetPageError(page);
}
@@ -3749,7 +3800,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
ret = submit_extent_page(rw, tree, p, offset >> 9,
PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
-1, end_bio_extent_buffer_writepage,
- 0, epd->bio_flags, bio_flags);
+ 0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
if (ret) {
set_btree_ioerr(p);
@@ -4153,6 +4204,7 @@ int extent_readpages(struct extent_io_tree *tree,
struct page *page;
struct extent_map *em_cached = NULL;
int nr = 0;
+ u64 prev_em_start = (u64)-1;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
page = list_entry(pages->prev, struct page, lru);
@@ -4169,12 +4221,12 @@ int extent_readpages(struct extent_io_tree *tree,
if (nr < ARRAY_SIZE(pagepool))
continue;
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, READ);
+ &bio, 0, &bio_flags, READ, &prev_em_start);
nr = 0;
}
if (nr)
__extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
- &bio, 0, &bio_flags, READ);
+ &bio, 0, &bio_flags, READ, &prev_em_start);
if (em_cached)
free_extent_map(em_cached);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8bb013672aee..e3b39f0c4666 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5035,7 +5035,8 @@ void btrfs_evict_inode(struct inode *inode)
goto no_delete;
}
/* do we really want it for ->i_nlink > 0 and zero btrfs_root_refs? */
- btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (!special_file(inode->i_mode))
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
btrfs_free_io_failure_record(inode, 0, (u64)-1);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 37d456a9a3b8..8b2c82ce36b3 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -4492,6 +4492,11 @@ locked:
bctl->flags |= BTRFS_BALANCE_TYPE_MASK;
}
+ if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) {
+ ret = -EINVAL;
+ goto out_bctl;
+ }
+
do_balance:
/*
* Ownership of bctl and mutually_exclusive_operation_running
@@ -4503,12 +4508,15 @@ do_balance:
need_unlock = false;
ret = btrfs_balance(bctl, bargs);
+ bctl = NULL;
if (arg) {
if (copy_to_user(arg, bargs, sizeof(*bargs)))
ret = -EFAULT;
}
+out_bctl:
+ kfree(bctl);
out_bargs:
kfree(bargs);
out_unlock:
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 94e909c5a503..00d18c2bdb0f 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1875,8 +1875,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
spin_unlock(&root->fs_info->trans_lock);
wait_for_commit(root, prev_trans);
+ ret = prev_trans->aborted;
btrfs_put_transaction(prev_trans);
+ if (ret)
+ goto cleanup_transaction;
} else {
spin_unlock(&root->fs_info->trans_lock);
}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index ebc31331a837..e1cc5b45069a 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -372,6 +372,14 @@ struct map_lookup {
#define BTRFS_BALANCE_ARGS_VRANGE (1ULL << 4)
#define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5)
+#define BTRFS_BALANCE_ARGS_MASK \
+ (BTRFS_BALANCE_ARGS_PROFILES | \
+ BTRFS_BALANCE_ARGS_USAGE | \
+ BTRFS_BALANCE_ARGS_DEVID | \
+ BTRFS_BALANCE_ARGS_DRANGE | \
+ BTRFS_BALANCE_ARGS_VRANGE | \
+ BTRFS_BALANCE_ARGS_LIMIT)
+
/*
* Profile changing flags. When SOFT is set we won't relocate chunk if
* it already has the target profile (even though it may be
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 4e9905374078..0d47422e3548 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -466,7 +466,7 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if (fsopt->max_readdir_bytes != CEPH_MAX_READDIR_BYTES_DEFAULT)
seq_printf(m, ",readdir_max_bytes=%d", fsopt->max_readdir_bytes);
if (strcmp(fsopt->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
- seq_printf(m, ",snapdirname=%s", fsopt->snapdir_name);
+ seq_show_option(m, "snapdirname", fsopt->snapdir_name);
return 0;
}
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index aa0dc2573374..afa09fce8151 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -444,6 +444,48 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp)
return 0;
}
+/* Server has provided av pairs/target info in the type 2 challenge
+ * packet and we have plucked it and stored within smb session.
+ * We parse that blob here to find the server given timestamp
+ * as part of ntlmv2 authentication (or local current time as
+ * default in case of failure)
+ */
+static __le64
+find_timestamp(struct cifs_ses *ses)
+{
+ unsigned int attrsize;
+ unsigned int type;
+ unsigned int onesize = sizeof(struct ntlmssp2_name);
+ unsigned char *blobptr;
+ unsigned char *blobend;
+ struct ntlmssp2_name *attrptr;
+
+ if (!ses->auth_key.len || !ses->auth_key.response)
+ return 0;
+
+ blobptr = ses->auth_key.response;
+ blobend = blobptr + ses->auth_key.len;
+
+ while (blobptr + onesize < blobend) {
+ attrptr = (struct ntlmssp2_name *) blobptr;
+ type = le16_to_cpu(attrptr->type);
+ if (type == NTLMSSP_AV_EOL)
+ break;
+ blobptr += 2; /* advance attr type */
+ attrsize = le16_to_cpu(attrptr->length);
+ blobptr += 2; /* advance attr size */
+ if (blobptr + attrsize > blobend)
+ break;
+ if (type == NTLMSSP_AV_TIMESTAMP) {
+ if (attrsize == sizeof(u64))
+ return *((__le64 *)blobptr);
+ }
+ blobptr += attrsize; /* advance attr value */
+ }
+
+ return cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+}
+
static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
const struct nls_table *nls_cp)
{
@@ -641,6 +683,7 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
struct ntlmv2_resp *ntlmv2;
char ntlmv2_hash[16];
unsigned char *tiblob = NULL; /* target info blob */
+ __le64 rsp_timestamp;
if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) {
if (!ses->domainName) {
@@ -659,6 +702,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
}
}
+ /* Must be within 5 minutes of the server (or in range +/-2h
+ * in case of Mac OS X), so simply carry over server timestamp
+ * (as Windows 7 does)
+ */
+ rsp_timestamp = find_timestamp(ses);
+
baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
tilen = ses->auth_key.len;
tiblob = ses->auth_key.response;
@@ -675,8 +724,8 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
(ses->auth_key.response + CIFS_SESS_KEY_SIZE);
ntlmv2->blob_signature = cpu_to_le32(0x00000101);
ntlmv2->reserved = 0;
- /* Must be within 5 minutes of the server */
- ntlmv2->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+ ntlmv2->time = rsp_timestamp;
+
get_random_bytes(&ntlmv2->client_chal, sizeof(ntlmv2->client_chal));
ntlmv2->reserved2 = 0;
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 0a9fb6b53126..6a1119e87fbb 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -394,17 +394,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
struct sockaddr *srcaddr;
srcaddr = (struct sockaddr *)&tcon->ses->server->srcaddr;
- seq_printf(s, ",vers=%s", tcon->ses->server->vals->version_string);
+ seq_show_option(s, "vers", tcon->ses->server->vals->version_string);
cifs_show_security(s, tcon->ses);
cifs_show_cache_flavor(s, cifs_sb);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
seq_puts(s, ",multiuser");
else if (tcon->ses->user_name)
- seq_printf(s, ",username=%s", tcon->ses->user_name);
+ seq_show_option(s, "username", tcon->ses->user_name);
if (tcon->ses->domainName)
- seq_printf(s, ",domain=%s", tcon->ses->domainName);
+ seq_show_option(s, "domain", tcon->ses->domainName);
if (srcaddr->sa_family != AF_UNSPEC) {
struct sockaddr_in *saddr4;
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index f621b44cb800..6b66dd5d1540 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -2034,7 +2034,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
struct tcon_link *tlink = NULL;
struct cifs_tcon *tcon = NULL;
struct TCP_Server_Info *server;
- struct cifs_io_parms io_parms;
/*
* To avoid spurious oplock breaks from server, in the case of
@@ -2056,18 +2055,6 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
rc = -ENOSYS;
cifsFileInfo_put(open_file);
cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc);
- if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
- unsigned int bytes_written;
-
- io_parms.netfid = open_file->fid.netfid;
- io_parms.pid = open_file->pid;
- io_parms.tcon = tcon;
- io_parms.offset = 0;
- io_parms.length = attrs->ia_size;
- rc = CIFSSMBWrite(xid, &io_parms, &bytes_written,
- NULL, NULL, 1);
- cifs_dbg(FYI, "Wrt seteof rc %d\n", rc);
- }
} else
rc = -EINVAL;
@@ -2093,28 +2080,7 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs,
else
rc = -ENOSYS;
cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc);
- if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
- __u16 netfid;
- int oplock = 0;
- rc = SMBLegacyOpen(xid, tcon, full_path, FILE_OPEN,
- GENERIC_WRITE, CREATE_NOT_DIR, &netfid,
- &oplock, NULL, cifs_sb->local_nls,
- cifs_remap(cifs_sb));
- if (rc == 0) {
- unsigned int bytes_written;
-
- io_parms.netfid = netfid;
- io_parms.pid = current->tgid;
- io_parms.tcon = tcon;
- io_parms.offset = 0;
- io_parms.length = attrs->ia_size;
- rc = CIFSSMBWrite(xid, &io_parms, &bytes_written, NULL,
- NULL, 1);
- cifs_dbg(FYI, "wrt seteof rc %d\n", rc);
- CIFSSMBClose(xid, tcon, netfid);
- }
- }
if (tlink)
cifs_put_tlink(tlink);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 8b7898b7670f..64a9bca976d0 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -67,6 +67,12 @@ static long cifs_ioctl_clone(unsigned int xid, struct file *dst_file,
goto out_drop_write;
}
+ if (src_file.file->f_op->unlocked_ioctl != cifs_ioctl) {
+ rc = -EBADF;
+ cifs_dbg(VFS, "src file seems to be from a different filesystem type\n");
+ goto out_fput;
+ }
+
if ((!src_file.file->private_data) || (!dst_file->private_data)) {
rc = -EBADF;
cifs_dbg(VFS, "missing cifsFileInfo on copy range src file\n");
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 54daee5ad4c1..1678b9cb94c7 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -50,9 +50,13 @@ change_conf(struct TCP_Server_Info *server)
break;
default:
server->echoes = true;
- server->oplocks = true;
+ if (enable_oplocks) {
+ server->oplocks = true;
+ server->oplock_credits = 1;
+ } else
+ server->oplocks = false;
+
server->echo_credits = 1;
- server->oplock_credits = 1;
}
server->credits -= server->echo_credits + server->oplock_credits;
return 0;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 54cbe19d9c08..894f259d3989 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -46,6 +46,7 @@
#include "smb2status.h"
#include "smb2glob.h"
#include "cifspdu.h"
+#include "cifs_spnego.h"
/*
* The following table defines the expected "StructureSize" of SMB2 requests
@@ -427,19 +428,15 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
cifs_dbg(FYI, "missing security blob on negprot\n");
rc = cifs_enable_signing(server, ses->sign);
-#ifdef CONFIG_SMB2_ASN1 /* BB REMOVEME when updated asn1.c ready */
if (rc)
goto neg_exit;
- if (blob_length)
+ if (blob_length) {
rc = decode_negTokenInit(security_blob, blob_length, server);
- if (rc == 1)
- rc = 0;
- else if (rc == 0) {
- rc = -EIO;
- goto neg_exit;
+ if (rc == 1)
+ rc = 0;
+ else if (rc == 0)
+ rc = -EIO;
}
-#endif
-
neg_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
@@ -533,7 +530,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
__le32 phase = NtLmNegotiate; /* NTLMSSP, if needed, is multistage */
struct TCP_Server_Info *server = ses->server;
u16 blob_length = 0;
- char *security_blob;
+ struct key *spnego_key = NULL;
+ char *security_blob = NULL;
char *ntlmssp_blob = NULL;
bool use_spnego = false; /* else use raw ntlmssp */
@@ -561,7 +559,8 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
ses->ntlmssp->sesskey_per_smbsess = true;
/* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
- ses->sectype = RawNTLMSSP;
+ if (ses->sectype != Kerberos && ses->sectype != RawNTLMSSP)
+ ses->sectype = RawNTLMSSP;
ssetup_ntlmssp_authenticate:
if (phase == NtLmChallenge)
@@ -590,7 +589,48 @@ ssetup_ntlmssp_authenticate:
iov[0].iov_base = (char *)req;
/* 4 for rfc1002 length field and 1 for pad */
iov[0].iov_len = get_rfc1002_length(req) + 4 - 1;
- if (phase == NtLmNegotiate) {
+
+ if (ses->sectype == Kerberos) {
+#ifdef CONFIG_CIFS_UPCALL
+ struct cifs_spnego_msg *msg;
+
+ spnego_key = cifs_get_spnego_key(ses);
+ if (IS_ERR(spnego_key)) {
+ rc = PTR_ERR(spnego_key);
+ spnego_key = NULL;
+ goto ssetup_exit;
+ }
+
+ msg = spnego_key->payload.data;
+ /*
+ * check version field to make sure that cifs.upcall is
+ * sending us a response in an expected form
+ */
+ if (msg->version != CIFS_SPNEGO_UPCALL_VERSION) {
+ cifs_dbg(VFS,
+ "bad cifs.upcall version. Expected %d got %d",
+ CIFS_SPNEGO_UPCALL_VERSION, msg->version);
+ rc = -EKEYREJECTED;
+ goto ssetup_exit;
+ }
+ ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
+ GFP_KERNEL);
+ if (!ses->auth_key.response) {
+ cifs_dbg(VFS,
+ "Kerberos can't allocate (%u bytes) memory",
+ msg->sesskey_len);
+ rc = -ENOMEM;
+ goto ssetup_exit;
+ }
+ ses->auth_key.len = msg->sesskey_len;
+ blob_length = msg->secblob_len;
+ iov[1].iov_base = msg->data + msg->sesskey_len;
+ iov[1].iov_len = blob_length;
+#else
+ rc = -EOPNOTSUPP;
+ goto ssetup_exit;
+#endif /* CONFIG_CIFS_UPCALL */
+ } else if (phase == NtLmNegotiate) { /* if not krb5 must be ntlmssp */
ntlmssp_blob = kmalloc(sizeof(struct _NEGOTIATE_MESSAGE),
GFP_KERNEL);
if (ntlmssp_blob == NULL) {
@@ -613,6 +653,8 @@ ssetup_ntlmssp_authenticate:
/* with raw NTLMSSP we don't encapsulate in SPNEGO */
security_blob = ntlmssp_blob;
}
+ iov[1].iov_base = security_blob;
+ iov[1].iov_len = blob_length;
} else if (phase == NtLmAuthenticate) {
req->hdr.SessionId = ses->Suid;
ntlmssp_blob = kzalloc(sizeof(struct _NEGOTIATE_MESSAGE) + 500,
@@ -640,6 +682,8 @@ ssetup_ntlmssp_authenticate:
} else {
security_blob = ntlmssp_blob;
}
+ iov[1].iov_base = security_blob;
+ iov[1].iov_len = blob_length;
} else {
cifs_dbg(VFS, "illegal ntlmssp phase\n");
rc = -EIO;
@@ -651,8 +695,6 @@ ssetup_ntlmssp_authenticate:
cpu_to_le16(sizeof(struct smb2_sess_setup_req) -
1 /* pad */ - 4 /* rfc1001 len */);
req->SecurityBufferLength = cpu_to_le16(blob_length);
- iov[1].iov_base = security_blob;
- iov[1].iov_len = blob_length;
inc_rfc1001_len(req, blob_length - 1 /* pad */);
@@ -663,6 +705,7 @@ ssetup_ntlmssp_authenticate:
kfree(security_blob);
rsp = (struct smb2_sess_setup_rsp *)iov[0].iov_base;
+ ses->Suid = rsp->hdr.SessionId;
if (resp_buftype != CIFS_NO_BUFFER &&
rsp->hdr.Status == STATUS_MORE_PROCESSING_REQUIRED) {
if (phase != NtLmNegotiate) {
@@ -680,7 +723,6 @@ ssetup_ntlmssp_authenticate:
/* NTLMSSP Negotiate sent now processing challenge (response) */
phase = NtLmChallenge; /* process ntlmssp challenge */
rc = 0; /* MORE_PROCESSING is not an error here but expected */
- ses->Suid = rsp->hdr.SessionId;
rc = decode_ntlmssp_challenge(rsp->Buffer,
le16_to_cpu(rsp->SecurityBufferLength), ses);
}
@@ -737,6 +779,10 @@ keygen_exit:
kfree(ses->auth_key.response);
ses->auth_key.response = NULL;
}
+ if (spnego_key) {
+ key_invalidate(spnego_key);
+ key_put(spnego_key);
+ }
kfree(ses->ntlmssp);
return rc;
diff --git a/fs/coredump.c b/fs/coredump.c
index bbbe139ab280..8dd099dc5f9b 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -506,10 +506,10 @@ void do_coredump(const siginfo_t *siginfo)
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
- int flag = 0;
int ispipe;
struct files_struct *displaced;
- bool need_nonrelative = false;
+ /* require nonrelative corefile path and be extra careful */
+ bool need_suid_safe = false;
bool core_dumped = false;
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
@@ -543,9 +543,8 @@ void do_coredump(const siginfo_t *siginfo)
*/
if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
/* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_nonrelative = true;
+ need_suid_safe = true;
}
retval = coredump_wait(siginfo->si_signo, &core_state);
@@ -626,7 +625,7 @@ void do_coredump(const siginfo_t *siginfo)
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
- if (need_nonrelative && cn.corename[0] != '/') {
+ if (need_suid_safe && cn.corename[0] != '/') {
printk(KERN_WARNING "Pid %d(%s) can only dump core "\
"to fully qualified path!\n",
task_tgid_vnr(current), current->comm);
@@ -634,8 +633,35 @@ void do_coredump(const siginfo_t *siginfo)
goto fail_unlock;
}
+ /*
+ * Unlink the file if it exists unless this is a SUID
+ * binary - in that case, we're running around with root
+ * privs and don't want to unlink another user's coredump.
+ */
+ if (!need_suid_safe) {
+ mm_segment_t old_fs;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ /*
+ * If it doesn't exist, that's fine. If there's some
+ * other problem, we'll catch it at the filp_open().
+ */
+ (void) sys_unlink((const char __user *)cn.corename);
+ set_fs(old_fs);
+ }
+
+ /*
+ * There is a race between unlinking and creating the
+ * file, but if that causes an EEXIST here, that's
+ * fine - another process raced with us while creating
+ * the corefile, and the other process won. To userspace,
+ * what matters is that at least one of the two processes
+ * writes its coredump successfully, not which one.
+ */
cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+ O_CREAT | 2 | O_NOFOLLOW |
+ O_LARGEFILE | O_EXCL,
0600);
if (IS_ERR(cprm.file))
goto fail_unlock;
@@ -652,11 +678,15 @@ void do_coredump(const siginfo_t *siginfo)
if (!S_ISREG(inode->i_mode))
goto close_fail;
/*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
+ * Don't dump core if the filesystem changed owner or mode
+ * of the file during file creation. This is an issue when
+ * a process dumps core while its cwd is e.g. on a vfat
+ * filesystem.
*/
if (!uid_eq(inode->i_uid, current_fsuid()))
goto close_fail;
+ if ((inode->i_mode & 0677) != 0600)
+ goto close_fail;
if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
goto close_fail;
if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
diff --git a/fs/dcache.c b/fs/dcache.c
index c1dad92434d5..c235bd892098 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1677,7 +1677,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
DCACHE_OP_COMPARE |
DCACHE_OP_REVALIDATE |
DCACHE_OP_WEAK_REVALIDATE |
- DCACHE_OP_DELETE ));
+ DCACHE_OP_DELETE |
+ DCACHE_OP_SELECT_INODE));
dentry->d_op = op;
if (!op)
return;
@@ -1693,6 +1694,8 @@ void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op)
dentry->d_flags |= DCACHE_OP_DELETE;
if (op->d_prune)
dentry->d_flags |= DCACHE_OP_PRUNE;
+ if (op->d_select_inode)
+ dentry->d_flags |= DCACHE_OP_SELECT_INODE;
}
EXPORT_SYMBOL(d_set_d_op);
@@ -2924,6 +2927,13 @@ restart:
if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
struct mount *parent = ACCESS_ONCE(mnt->mnt_parent);
+ /* Escaped? */
+ if (dentry != vfsmnt->mnt_root) {
+ bptr = *buffer;
+ blen = *buflen;
+ error = 3;
+ break;
+ }
/* Global root? */
if (mnt != parent) {
dentry = ACCESS_ONCE(mnt->mnt_mountpoint);
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 8db0b464483f..63cd2c147221 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -45,20 +45,20 @@
static int ecryptfs_d_revalidate(struct dentry *dentry, unsigned int flags)
{
struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
- int rc;
-
- if (!(lower_dentry->d_flags & DCACHE_OP_REVALIDATE))
- return 1;
+ int rc = 1;
if (flags & LOOKUP_RCU)
return -ECHILD;
- rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+ if (lower_dentry->d_flags & DCACHE_OP_REVALIDATE)
+ rc = lower_dentry->d_op->d_revalidate(lower_dentry, flags);
+
if (d_really_is_positive(dentry)) {
- struct inode *lower_inode =
- ecryptfs_inode_to_lower(d_inode(dentry));
+ struct inode *inode = d_inode(dentry);
- fsstack_copy_attr_all(d_inode(dentry), lower_inode);
+ fsstack_copy_attr_all(inode, ecryptfs_inode_to_lower(inode));
+ if (!inode->i_nlink)
+ return 0;
}
return rc;
}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index ca12affdba96..ff89971e3ee0 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -324,6 +324,22 @@ static void save_error_info(struct super_block *sb, const char *func,
ext4_commit_super(sb, 1);
}
+/*
+ * The del_gendisk() function uninitializes the disk-specific data
+ * structures, including the bdi structure, without telling anyone
+ * else. Once this happens, any attempt to call mark_buffer_dirty()
+ * (for example, by ext4_commit_super), will cause a kernel OOPS.
+ * This is a kludge to prevent these oops until we can put in a proper
+ * hook in del_gendisk() to inform the VFS and file system layers.
+ */
+static int block_device_ejected(struct super_block *sb)
+{
+ struct inode *bd_inode = sb->s_bdev->bd_inode;
+ struct backing_dev_info *bdi = inode_to_bdi(bd_inode);
+
+ return bdi->dev == NULL;
+}
+
static void ext4_journal_commit_callback(journal_t *journal, transaction_t *txn)
{
struct super_block *sb = journal->j_private;
@@ -1738,10 +1754,10 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
}
if (sbi->s_qf_names[USRQUOTA])
- seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
+ seq_show_option(seq, "usrjquota", sbi->s_qf_names[USRQUOTA]);
if (sbi->s_qf_names[GRPQUOTA])
- seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
+ seq_show_option(seq, "grpjquota", sbi->s_qf_names[GRPQUOTA]);
#endif
}
@@ -4591,7 +4607,7 @@ static int ext4_commit_super(struct super_block *sb, int sync)
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
int error = 0;
- if (!sbh)
+ if (!sbh || block_device_ejected(sb))
return error;
if (buffer_write_io_error(sbh)) {
/*
@@ -4807,10 +4823,11 @@ static int ext4_freeze(struct super_block *sb)
error = jbd2_journal_flush(journal);
if (error < 0)
goto out;
+
+ /* Journal blocked and flushed, clear needs_recovery flag. */
+ EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
}
- /* Journal blocked and flushed, clear needs_recovery flag. */
- EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
error = ext4_commit_super(sb, 1);
out:
if (journal)
@@ -4828,8 +4845,11 @@ static int ext4_unfreeze(struct super_block *sb)
if (sb->s_flags & MS_RDONLY)
return 0;
- /* Reset the needs_recovery flag before the fs is unlocked. */
- EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ if (EXT4_SB(sb)->s_journal) {
+ /* Reset the needs_recovery flag before the fs is unlocked. */
+ EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+ }
+
ext4_commit_super(sb, 1);
return 0;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 859c6edbf81a..c18b49dc5d4f 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1334,11 +1334,11 @@ static int gfs2_show_options(struct seq_file *s, struct dentry *root)
if (is_ancestor(root, sdp->sd_master_dir))
seq_puts(s, ",meta");
if (args->ar_lockproto[0])
- seq_printf(s, ",lockproto=%s", args->ar_lockproto);
+ seq_show_option(s, "lockproto", args->ar_lockproto);
if (args->ar_locktable[0])
- seq_printf(s, ",locktable=%s", args->ar_locktable);
+ seq_show_option(s, "locktable", args->ar_locktable);
if (args->ar_hostdata[0])
- seq_printf(s, ",hostdata=%s", args->ar_hostdata);
+ seq_show_option(s, "hostdata", args->ar_hostdata);
if (args->ar_spectator)
seq_puts(s, ",spectator");
if (args->ar_localflocks)
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index d3fa6bd9503e..221719eac5de 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -288,7 +288,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
page_cache_release(page);
goto fail;
}
- page_cache_release(page);
node->page[i] = page;
}
@@ -398,11 +397,11 @@ node_error:
void hfs_bnode_free(struct hfs_bnode *node)
{
- //int i;
+ int i;
- //for (i = 0; i < node->tree->pages_per_bnode; i++)
- // if (node->page[i])
- // page_cache_release(node->page[i]);
+ for (i = 0; i < node->tree->pages_per_bnode; i++)
+ if (node->page[i])
+ page_cache_release(node->page[i]);
kfree(node);
}
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 9f4ee7f52026..6fc766df0461 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -131,13 +131,16 @@ skip:
hfs_bnode_write(node, entry, data_off + key_len, entry_len);
hfs_bnode_dump(node);
- if (new_node) {
- /* update parent key if we inserted a key
- * at the start of the first node
- */
- if (!rec && new_node != node)
- hfs_brec_update_parent(fd);
+ /*
+ * update parent key if we inserted a key
+ * at the start of the node and it is not the new node
+ */
+ if (!rec && new_node != node) {
+ hfs_bnode_read_key(node, fd->search_key, data_off + size);
+ hfs_brec_update_parent(fd);
+ }
+ if (new_node) {
hfs_bnode_put(fd->bnode);
if (!new_node->parent) {
hfs_btree_inc_height(tree);
@@ -166,9 +169,6 @@ skip:
goto again;
}
- if (!rec)
- hfs_brec_update_parent(fd);
-
return 0;
}
@@ -366,6 +366,8 @@ again:
if (IS_ERR(parent))
return PTR_ERR(parent);
__hfs_brec_find(parent, fd);
+ if (fd->record < 0)
+ return -ENOENT;
hfs_bnode_dump(parent);
rec = fd->record;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index eee7206c38d1..410b65eea683 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -135,9 +135,9 @@ static int hfs_show_options(struct seq_file *seq, struct dentry *root)
struct hfs_sb_info *sbi = HFS_SB(root->d_sb);
if (sbi->s_creator != cpu_to_be32(0x3f3f3f3f))
- seq_printf(seq, ",creator=%.4s", (char *)&sbi->s_creator);
+ seq_show_option_n(seq, "creator", (char *)&sbi->s_creator, 4);
if (sbi->s_type != cpu_to_be32(0x3f3f3f3f))
- seq_printf(seq, ",type=%.4s", (char *)&sbi->s_type);
+ seq_show_option_n(seq, "type", (char *)&sbi->s_type, 4);
seq_printf(seq, ",uid=%u,gid=%u",
from_kuid_munged(&init_user_ns, sbi->s_uid),
from_kgid_munged(&init_user_ns, sbi->s_gid));
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 759708fd9331..63924662aaf3 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -454,7 +454,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
page_cache_release(page);
goto fail;
}
- page_cache_release(page);
node->page[i] = page;
}
@@ -566,13 +565,11 @@ node_error:
void hfs_bnode_free(struct hfs_bnode *node)
{
-#if 0
int i;
for (i = 0; i < node->tree->pages_per_bnode; i++)
if (node->page[i])
page_cache_release(node->page[i]);
-#endif
kfree(node);
}
diff --git a/fs/hfsplus/options.c b/fs/hfsplus/options.c
index c90b72ee676d..bb806e58c977 100644
--- a/fs/hfsplus/options.c
+++ b/fs/hfsplus/options.c
@@ -218,9 +218,9 @@ int hfsplus_show_options(struct seq_file *seq, struct dentry *root)
struct hfsplus_sb_info *sbi = HFSPLUS_SB(root->d_sb);
if (sbi->creator != HFSPLUS_DEF_CR_TYPE)
- seq_printf(seq, ",creator=%.4s", (char *)&sbi->creator);
+ seq_show_option_n(seq, "creator", (char *)&sbi->creator, 4);
if (sbi->type != HFSPLUS_DEF_CR_TYPE)
- seq_printf(seq, ",type=%.4s", (char *)&sbi->type);
+ seq_show_option_n(seq, "type", (char *)&sbi->type, 4);
seq_printf(seq, ",umask=%o,uid=%u,gid=%u", sbi->umask,
from_kuid_munged(&init_user_ns, sbi->uid),
from_kgid_munged(&init_user_ns, sbi->gid));
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index 07d8d8f52faf..de2d6245e9fa 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -260,7 +260,7 @@ static int hostfs_show_options(struct seq_file *seq, struct dentry *root)
size_t offset = strlen(root_ino) + 1;
if (strlen(root_path) > offset)
- seq_printf(seq, ",%s", root_path + offset);
+ seq_show_option(seq, root_path + offset, NULL);
if (append)
seq_puts(seq, ",append");
diff --git a/fs/hpfs/namei.c b/fs/hpfs/namei.c
index a0872f239f04..9e92c9c2d319 100644
--- a/fs/hpfs/namei.c
+++ b/fs/hpfs/namei.c
@@ -8,6 +8,17 @@
#include <linux/sched.h>
#include "hpfs_fn.h"
+static void hpfs_update_directory_times(struct inode *dir)
+{
+ time_t t = get_seconds();
+ if (t == dir->i_mtime.tv_sec &&
+ t == dir->i_ctime.tv_sec)
+ return;
+ dir->i_mtime.tv_sec = dir->i_ctime.tv_sec = t;
+ dir->i_mtime.tv_nsec = dir->i_ctime.tv_nsec = 0;
+ hpfs_write_inode_nolock(dir);
+}
+
static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
const unsigned char *name = dentry->d_name.name;
@@ -99,6 +110,7 @@ static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
result->i_mode = mode | S_IFDIR;
hpfs_write_inode_nolock(result);
}
+ hpfs_update_directory_times(dir);
d_instantiate(dentry, result);
hpfs_unlock(dir->i_sb);
return 0;
@@ -187,6 +199,7 @@ static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, b
result->i_mode = mode | S_IFREG;
hpfs_write_inode_nolock(result);
}
+ hpfs_update_directory_times(dir);
d_instantiate(dentry, result);
hpfs_unlock(dir->i_sb);
return 0;
@@ -262,6 +275,7 @@ static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, de
insert_inode_hash(result);
hpfs_write_inode_nolock(result);
+ hpfs_update_directory_times(dir);
d_instantiate(dentry, result);
brelse(bh);
hpfs_unlock(dir->i_sb);
@@ -340,6 +354,7 @@ static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *sy
insert_inode_hash(result);
hpfs_write_inode_nolock(result);
+ hpfs_update_directory_times(dir);
d_instantiate(dentry, result);
hpfs_unlock(dir->i_sb);
return 0;
@@ -423,6 +438,8 @@ again:
out1:
hpfs_brelse4(&qbh);
out:
+ if (!err)
+ hpfs_update_directory_times(dir);
hpfs_unlock(dir->i_sb);
return err;
}
@@ -477,6 +494,8 @@ static int hpfs_rmdir(struct inode *dir, struct dentry *dentry)
out1:
hpfs_brelse4(&qbh);
out:
+ if (!err)
+ hpfs_update_directory_times(dir);
hpfs_unlock(dir->i_sb);
return err;
}
@@ -595,7 +614,7 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto end1;
}
- end:
+end:
hpfs_i(i)->i_parent_dir = new_dir->i_ino;
if (S_ISDIR(i->i_mode)) {
inc_nlink(new_dir);
@@ -610,6 +629,10 @@ static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
brelse(bh);
}
end1:
+ if (!err) {
+ hpfs_update_directory_times(old_dir);
+ hpfs_update_directory_times(new_dir);
+ }
hpfs_unlock(i->i_sb);
return err;
}
diff --git a/fs/internal.h b/fs/internal.h
index 01dce1d1476b..4d5af583ab03 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -107,6 +107,7 @@ extern struct file *do_file_open_root(struct dentry *, struct vfsmount *,
extern long do_handle_open(int mountdirfd,
struct file_handle __user *ufh, int open_flag);
extern int open_check_o_direct(struct file *f);
+extern int vfs_open(const struct path *, struct file *, const struct cred *);
/*
* inode.c
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 9c00e2e5a145..78c1545a3fab 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -419,12 +419,12 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
* journal_clean_one_cp_list
*
* Find all the written-back checkpoint buffers in the given list and
- * release them.
+ * release them. If 'destroy' is set, clean all buffers unconditionally.
*
* Called with j_list_lock held.
* Returns 1 if we freed the transaction, 0 otherwise.
*/
-static int journal_clean_one_cp_list(struct journal_head *jh)
+static int journal_clean_one_cp_list(struct journal_head *jh, bool destroy)
{
struct journal_head *last_jh;
struct journal_head *next_jh = jh;
@@ -438,7 +438,10 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
do {
jh = next_jh;
next_jh = jh->b_cpnext;
- ret = __try_to_free_cp_buf(jh);
+ if (!destroy)
+ ret = __try_to_free_cp_buf(jh);
+ else
+ ret = __jbd2_journal_remove_checkpoint(jh) + 1;
if (!ret)
return freed;
if (ret == 2)
@@ -461,10 +464,11 @@ static int journal_clean_one_cp_list(struct journal_head *jh)
* journal_clean_checkpoint_list
*
* Find all the written-back checkpoint buffers in the journal and release them.
+ * If 'destroy' is set, release all buffers unconditionally.
*
* Called with j_list_lock held.
*/
-void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal, bool destroy)
{
transaction_t *transaction, *last_transaction, *next_transaction;
int ret;
@@ -478,7 +482,8 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
do {
transaction = next_transaction;
next_transaction = transaction->t_cpnext;
- ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
+ ret = journal_clean_one_cp_list(transaction->t_checkpoint_list,
+ destroy);
/*
* This function only frees up some memory if possible so we
* dont have an obligation to finish processing. Bail out if
@@ -494,7 +499,7 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
* we can possibly see not yet submitted buffers on io_list
*/
ret = journal_clean_one_cp_list(transaction->
- t_checkpoint_io_list);
+ t_checkpoint_io_list, destroy);
if (need_resched())
return;
/*
@@ -508,6 +513,28 @@ void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
}
/*
+ * Remove buffers from all checkpoint lists as journal is aborted and we just
+ * need to free memory
+ */
+void jbd2_journal_destroy_checkpoint(journal_t *journal)
+{
+ /*
+ * We loop because __jbd2_journal_clean_checkpoint_list() may abort
+ * early due to a need of rescheduling.
+ */
+ while (1) {
+ spin_lock(&journal->j_list_lock);
+ if (!journal->j_checkpoint_transactions) {
+ spin_unlock(&journal->j_list_lock);
+ break;
+ }
+ __jbd2_journal_clean_checkpoint_list(journal, true);
+ spin_unlock(&journal->j_list_lock);
+ cond_resched();
+ }
+}
+
+/*
* journal_remove_checkpoint: called after a buffer has been committed
* to disk (either by being write-back flushed to disk, or being
* committed to the log).
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b73e0215baa7..362e5f614450 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -510,7 +510,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* frees some memory
*/
spin_lock(&journal->j_list_lock);
- __jbd2_journal_clean_checkpoint_list(journal);
+ __jbd2_journal_clean_checkpoint_list(journal, false);
spin_unlock(&journal->j_list_lock);
jbd_debug(3, "JBD2: commit phase 1\n");
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 112fad9e1e20..7003c0925760 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1708,8 +1708,17 @@ int jbd2_journal_destroy(journal_t *journal)
while (journal->j_checkpoint_transactions != NULL) {
spin_unlock(&journal->j_list_lock);
mutex_lock(&journal->j_checkpoint_mutex);
- jbd2_log_do_checkpoint(journal);
+ err = jbd2_log_do_checkpoint(journal);
mutex_unlock(&journal->j_checkpoint_mutex);
+ /*
+ * If checkpointing failed, just free the buffers to avoid
+ * looping forever
+ */
+ if (err) {
+ jbd2_journal_destroy_checkpoint(journal);
+ spin_lock(&journal->j_list_lock);
+ break;
+ }
spin_lock(&journal->j_list_lock);
}
diff --git a/fs/libfs.c b/fs/libfs.c
index 02813592e121..f4641fd27bda 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1176,7 +1176,7 @@ void make_empty_dir_inode(struct inode *inode)
inode->i_uid = GLOBAL_ROOT_UID;
inode->i_gid = GLOBAL_ROOT_GID;
inode->i_rdev = 0;
- inode->i_size = 2;
+ inode->i_size = 0;
inode->i_blkbits = PAGE_SHIFT;
inode->i_blocks = 0;
diff --git a/fs/locks.c b/fs/locks.c
index 653faabb07f4..d3d558ba4da7 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -862,12 +862,11 @@ static int posix_locks_deadlock(struct file_lock *caller_fl,
* whether or not a lock was successfully freed by testing the return
* value for -ENOENT.
*/
-static int flock_lock_file(struct file *filp, struct file_lock *request)
+static int flock_lock_inode(struct inode *inode, struct file_lock *request)
{
struct file_lock *new_fl = NULL;
struct file_lock *fl;
struct file_lock_context *ctx;
- struct inode *inode = file_inode(filp);
int error = 0;
bool found = false;
LIST_HEAD(dispose);
@@ -890,7 +889,7 @@ static int flock_lock_file(struct file *filp, struct file_lock *request)
goto find_conflict;
list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
- if (filp != fl->fl_file)
+ if (request->fl_file != fl->fl_file)
continue;
if (request->fl_type == fl->fl_type)
goto out;
@@ -1164,20 +1163,19 @@ int posix_lock_file(struct file *filp, struct file_lock *fl,
EXPORT_SYMBOL(posix_lock_file);
/**
- * posix_lock_file_wait - Apply a POSIX-style lock to a file
- * @filp: The file to apply the lock to
+ * posix_lock_inode_wait - Apply a POSIX-style lock to a file
+ * @inode: inode of file to which lock request should be applied
* @fl: The lock to be applied
*
- * Add a POSIX style lock to a file.
- * We merge adjacent & overlapping locks whenever possible.
- * POSIX locks are sorted by owner task, then by starting address
+ * Variant of posix_lock_file_wait that does not take a filp, and so can be
+ * used after the filp has already been torn down.
*/
-int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
+int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
int error;
might_sleep ();
for (;;) {
- error = posix_lock_file(filp, fl, NULL);
+ error = __posix_lock_file(inode, fl, NULL);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1189,7 +1187,7 @@ int posix_lock_file_wait(struct file *filp, struct file_lock *fl)
}
return error;
}
-EXPORT_SYMBOL(posix_lock_file_wait);
+EXPORT_SYMBOL(posix_lock_inode_wait);
/**
* locks_mandatory_locked - Check for an active lock
@@ -1851,18 +1849,18 @@ int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
}
/**
- * flock_lock_file_wait - Apply a FLOCK-style lock to a file
- * @filp: The file to apply the lock to
+ * flock_lock_inode_wait - Apply a FLOCK-style lock to a file
+ * @inode: inode of the file to apply to
* @fl: The lock to be applied
*
- * Add a FLOCK style lock to a file.
+ * Apply a FLOCK style lock request to an inode.
*/
-int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
+int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
{
int error;
might_sleep();
for (;;) {
- error = flock_lock_file(filp, fl);
+ error = flock_lock_inode(inode, fl);
if (error != FILE_LOCK_DEFERRED)
break;
error = wait_event_interruptible(fl->fl_wait, !fl->fl_next);
@@ -1874,8 +1872,7 @@ int flock_lock_file_wait(struct file *filp, struct file_lock *fl)
}
return error;
}
-
-EXPORT_SYMBOL(flock_lock_file_wait);
+EXPORT_SYMBOL(flock_lock_inode_wait);
/**
* sys_flock: - flock() system call.
@@ -2401,7 +2398,8 @@ locks_remove_flock(struct file *filp)
.fl_type = F_UNLCK,
.fl_end = OFFSET_MAX,
};
- struct file_lock_context *flctx = file_inode(filp)->i_flctx;
+ struct inode *inode = file_inode(filp);
+ struct file_lock_context *flctx = inode->i_flctx;
if (list_empty(&flctx->flc_flock))
return;
@@ -2409,7 +2407,7 @@ locks_remove_flock(struct file *filp)
if (filp->f_op->flock)
filp->f_op->flock(filp, F_SETLKW, &fl);
else
- flock_lock_file(filp, &fl);
+ flock_lock_inode(inode, &fl);
if (fl.fl_ops && fl.fl_ops->fl_release_private)
fl.fl_ops->fl_release_private(&fl);
diff --git a/fs/namei.c b/fs/namei.c
index fe30d3be43a8..ccd7f98d85b9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -505,6 +505,24 @@ struct nameidata {
char *saved_names[MAX_NESTED_LINKS + 1];
};
+/**
+ * path_connected - Verify that a path->dentry is below path->mnt.mnt_root
+ * @path: nameidate to verify
+ *
+ * Rename can sometimes move a file or directory outside of a bind
+ * mount, path_connected allows those cases to be detected.
+ */
+static bool path_connected(const struct path *path)
+{
+ struct vfsmount *mnt = path->mnt;
+
+ /* Only bind mounts can have disconnected paths */
+ if (mnt->mnt_root == mnt->mnt_sb->s_root)
+ return true;
+
+ return is_subdir(path->dentry, mnt->mnt_root);
+}
+
/*
* Path walking has 2 modes, rcu-walk and ref-walk (see
* Documentation/filesystems/path-lookup.txt). In situations when we can't
@@ -1194,6 +1212,8 @@ static int follow_dotdot_rcu(struct nameidata *nd)
goto failed;
nd->path.dentry = parent;
nd->seq = seq;
+ if (unlikely(!path_connected(&nd->path)))
+ goto failed;
break;
}
if (!follow_up_rcu(&nd->path))
@@ -1290,7 +1310,7 @@ static void follow_mount(struct path *path)
}
}
-static void follow_dotdot(struct nameidata *nd)
+static int follow_dotdot(struct nameidata *nd)
{
if (!nd->root.mnt)
set_root(nd);
@@ -1306,6 +1326,10 @@ static void follow_dotdot(struct nameidata *nd)
/* rare case of legitimate dget_parent()... */
nd->path.dentry = dget_parent(nd->path.dentry);
dput(old);
+ if (unlikely(!path_connected(&nd->path))) {
+ path_put(&nd->path);
+ return -ENOENT;
+ }
break;
}
if (!follow_up(&nd->path))
@@ -1313,6 +1337,7 @@ static void follow_dotdot(struct nameidata *nd)
}
follow_mount(&nd->path);
nd->inode = nd->path.dentry->d_inode;
+ return 0;
}
/*
@@ -1428,8 +1453,6 @@ static int lookup_fast(struct nameidata *nd,
negative = d_is_negative(dentry);
if (read_seqcount_retry(&dentry->d_seq, seq))
return -ECHILD;
- if (negative)
- return -ENOENT;
/*
* This sequence count validates that the parent had no
@@ -1450,6 +1473,12 @@ static int lookup_fast(struct nameidata *nd,
goto unlazy;
}
}
+ /*
+ * Note: do negative dentry check after revalidation in
+ * case that drops it.
+ */
+ if (negative)
+ return -ENOENT;
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode)))
@@ -1541,7 +1570,7 @@ static inline int handle_dots(struct nameidata *nd, int type)
if (follow_dotdot_rcu(nd))
return -ECHILD;
} else
- follow_dotdot(nd);
+ return follow_dotdot(nd);
}
return 0;
}
@@ -2290,7 +2319,7 @@ mountpoint_last(struct nameidata *nd, struct path *path)
if (unlikely(nd->last_type != LAST_NORM)) {
error = handle_dots(nd, nd->last_type);
if (error)
- goto out;
+ return error;
dentry = dget(nd->path.dentry);
goto done;
}
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index a46bf6de9ce4..fb1fb2774d34 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -628,23 +628,18 @@ out_put:
goto out;
}
-static void filelayout_free_fh_array(struct nfs4_filelayout_segment *fl)
+static void _filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
{
int i;
- for (i = 0; i < fl->num_fh; i++) {
- if (!fl->fh_array[i])
- break;
- kfree(fl->fh_array[i]);
+ if (fl->fh_array) {
+ for (i = 0; i < fl->num_fh; i++) {
+ if (!fl->fh_array[i])
+ break;
+ kfree(fl->fh_array[i]);
+ }
+ kfree(fl->fh_array);
}
- kfree(fl->fh_array);
- fl->fh_array = NULL;
-}
-
-static void
-_filelayout_free_lseg(struct nfs4_filelayout_segment *fl)
-{
- filelayout_free_fh_array(fl);
kfree(fl);
}
@@ -715,21 +710,21 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
/* Do we want to use a mempool here? */
fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), gfp_flags);
if (!fl->fh_array[i])
- goto out_err_free;
+ goto out_err;
p = xdr_inline_decode(&stream, 4);
if (unlikely(!p))
- goto out_err_free;
+ goto out_err;
fl->fh_array[i]->size = be32_to_cpup(p++);
if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
printk(KERN_ERR "NFS: Too big fh %d received %d\n",
i, fl->fh_array[i]->size);
- goto out_err_free;
+ goto out_err;
}
p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
if (unlikely(!p))
- goto out_err_free;
+ goto out_err;
memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
dprintk("DEBUG: %s: fh len %d\n", __func__,
fl->fh_array[i]->size);
@@ -738,8 +733,6 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
__free_page(scratch);
return 0;
-out_err_free:
- filelayout_free_fh_array(fl);
out_err:
__free_page(scratch);
return -EIO;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 6f5f0f425e86..fecd9201dbad 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -1039,6 +1039,11 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
hdr->res.verf->committed == NFS_DATA_SYNC)
ff_layout_set_layoutcommit(hdr);
+ /* zero out fattr since we don't care DS attr at all */
+ hdr->fattr.valid = 0;
+ if (task->tk_status >= 0)
+ nfs_writeback_update_inode(hdr);
+
return 0;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index f13e1969eedd..b28fa4cbea52 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -500,16 +500,19 @@ int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo,
range->offset, range->length))
continue;
/* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE)
- * + deviceid(NFS4_DEVICEID4_SIZE) + status(4) + opnum(4)
+ * + array length + deviceid(NFS4_DEVICEID4_SIZE)
+ * + status(4) + opnum(4)
*/
p = xdr_reserve_space(xdr,
- 24 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE);
+ 28 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE);
if (unlikely(!p))
return -ENOBUFS;
p = xdr_encode_hyper(p, err->offset);
p = xdr_encode_hyper(p, err->length);
p = xdr_encode_opaque_fixed(p, &err->stateid,
NFS4_STATEID_SIZE);
+ /* Encode 1 error */
+ *p++ = cpu_to_be32(1);
p = xdr_encode_opaque_fixed(p, &err->deviceid,
NFS4_DEVICEID4_SIZE);
*p++ = cpu_to_be32(err->status);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 5d25b9d97c29..976ba792fbc6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1270,13 +1270,6 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
return 0;
}
-static int nfs_ctime_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
-{
- if (!(fattr->valid & NFS_ATTR_FATTR_CTIME))
- return 0;
- return timespec_compare(&fattr->ctime, &inode->i_ctime) > 0;
-}
-
static atomic_long_t nfs_attr_generation_counter;
static unsigned long nfs_read_attr_generation_counter(void)
@@ -1425,7 +1418,6 @@ static int nfs_inode_attrs_need_update(const struct inode *inode, const struct n
const struct nfs_inode *nfsi = NFS_I(inode);
return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
- nfs_ctime_need_update(inode, fattr) ||
((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
}
@@ -1488,6 +1480,13 @@ static int nfs_post_op_update_inode_locked(struct inode *inode, struct nfs_fattr
{
unsigned long invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
+ /*
+ * Don't revalidate the pagecache if we hold a delegation, but do
+ * force an attribute update
+ */
+ if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
+ invalid = NFS_INO_INVALID_ATTR|NFS_INO_REVAL_FORCED;
+
if (S_ISDIR(inode->i_mode))
invalid |= NFS_INO_INVALID_DATA;
nfs_set_cache_invalid(inode, invalid);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d3f205126609..8f393fcc313b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1152,6 +1152,8 @@ static int can_open_delegated(struct nfs_delegation *delegation, fmode_t fmode)
return 0;
if ((delegation->type & fmode) != fmode)
return 0;
+ if (test_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags))
+ return 0;
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
return 0;
nfs_mark_delegation_referenced(delegation);
@@ -1216,6 +1218,7 @@ static void nfs_resync_open_stateid_locked(struct nfs4_state *state)
}
static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
+ nfs4_stateid *arg_stateid,
nfs4_stateid *stateid, fmode_t fmode)
{
clear_bit(NFS_O_RDWR_STATE, &state->flags);
@@ -1234,8 +1237,9 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
if (stateid == NULL)
return;
/* Handle races with OPEN */
- if (!nfs4_stateid_match_other(stateid, &state->open_stateid) ||
- !nfs4_stateid_is_newer(stateid, &state->open_stateid)) {
+ if (!nfs4_stateid_match_other(arg_stateid, &state->open_stateid) ||
+ (nfs4_stateid_match_other(stateid, &state->open_stateid) &&
+ !nfs4_stateid_is_newer(stateid, &state->open_stateid))) {
nfs_resync_open_stateid_locked(state);
return;
}
@@ -1244,10 +1248,12 @@ static void nfs_clear_open_stateid_locked(struct nfs4_state *state,
nfs4_stateid_copy(&state->open_stateid, stateid);
}
-static void nfs_clear_open_stateid(struct nfs4_state *state, nfs4_stateid *stateid, fmode_t fmode)
+static void nfs_clear_open_stateid(struct nfs4_state *state,
+ nfs4_stateid *arg_stateid,
+ nfs4_stateid *stateid, fmode_t fmode)
{
write_seqlock(&state->seqlock);
- nfs_clear_open_stateid_locked(state, stateid, fmode);
+ nfs_clear_open_stateid_locked(state, arg_stateid, stateid, fmode);
write_sequnlock(&state->seqlock);
if (test_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags))
nfs4_schedule_state_manager(state->owner->so_server->nfs_client);
@@ -2413,7 +2419,7 @@ static int _nfs4_do_open(struct inode *dir,
goto err_free_label;
state = ctx->state;
- if ((opendata->o_arg.open_flags & O_EXCL) &&
+ if ((opendata->o_arg.open_flags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL) &&
(opendata->o_arg.createmode != NFS4_CREATE_GUARDED)) {
nfs4_exclusive_attrset(opendata, sattr);
@@ -2672,7 +2678,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
goto out_release;
}
}
- nfs_clear_open_stateid(state, res_stateid, calldata->arg.fmode);
+ nfs_clear_open_stateid(state, &calldata->arg.stateid,
+ res_stateid, calldata->arg.fmode);
out_release:
nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
@@ -5360,15 +5367,15 @@ static int nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *
return err;
}
-static int do_vfs_lock(struct file *file, struct file_lock *fl)
+static int do_vfs_lock(struct inode *inode, struct file_lock *fl)
{
int res = 0;
switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
case FL_POSIX:
- res = posix_lock_file_wait(file, fl);
+ res = posix_lock_inode_wait(inode, fl);
break;
case FL_FLOCK:
- res = flock_lock_file_wait(file, fl);
+ res = flock_lock_inode_wait(inode, fl);
break;
default:
BUG();
@@ -5428,7 +5435,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
switch (task->tk_status) {
case 0:
renew_lease(calldata->server, calldata->timestamp);
- do_vfs_lock(calldata->fl.fl_file, &calldata->fl);
+ do_vfs_lock(calldata->lsp->ls_state->inode, &calldata->fl);
if (nfs4_update_lock_stateid(calldata->lsp,
&calldata->res.stateid))
break;
@@ -5536,7 +5543,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
mutex_lock(&sp->so_delegreturn_mutex);
/* Exclude nfs4_reclaim_open_stateid() - note nesting! */
down_read(&nfsi->rwsem);
- if (do_vfs_lock(request->fl_file, request) == -ENOENT) {
+ if (do_vfs_lock(inode, request) == -ENOENT) {
up_read(&nfsi->rwsem);
mutex_unlock(&sp->so_delegreturn_mutex);
goto out;
@@ -5677,7 +5684,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
data->timestamp);
if (data->arg.new_lock) {
data->fl.fl_flags &= ~(FL_SLEEP | FL_ACCESS);
- if (do_vfs_lock(data->fl.fl_file, &data->fl) < 0) {
+ if (do_vfs_lock(lsp->ls_state->inode, &data->fl) < 0) {
rpc_restart_call_prepare(task);
break;
}
@@ -5919,7 +5926,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
if (status != 0)
goto out;
request->fl_flags |= FL_ACCESS;
- status = do_vfs_lock(request->fl_file, request);
+ status = do_vfs_lock(state->inode, request);
if (status < 0)
goto out;
down_read(&nfsi->rwsem);
@@ -5927,7 +5934,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
/* Yes: cache locks! */
/* ...but avoid races with delegation recall... */
request->fl_flags = fl_flags & ~FL_SLEEP;
- status = do_vfs_lock(request->fl_file, request);
+ status = do_vfs_lock(state->inode, request);
up_read(&nfsi->rwsem);
goto out;
}
@@ -8571,6 +8578,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
.reboot_recovery_ops = &nfs41_reboot_recovery_ops,
.nograce_recovery_ops = &nfs41_nograce_recovery_ops,
.state_renewal_ops = &nfs41_state_renewal_ops,
+ .mig_recovery_ops = &nfs41_mig_recovery_ops,
};
#endif
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7b4552678536..93d355c8b467 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -77,8 +77,8 @@ EXPORT_SYMBOL_GPL(nfs_pgheader_init);
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
{
spin_lock(&hdr->lock);
- if (pos < hdr->io_start + hdr->good_bytes) {
- set_bit(NFS_IOHDR_ERROR, &hdr->flags);
+ if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags)
+ || pos < hdr->io_start + hdr->good_bytes) {
clear_bit(NFS_IOHDR_EOF, &hdr->flags);
hdr->good_bytes = pos - hdr->io_start;
hdr->error = error;
@@ -508,7 +508,7 @@ size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
* for it without upsetting the slab allocator.
*/
if (((mirror->pg_count + req->wb_bytes) >> PAGE_SHIFT) *
- sizeof(struct page) > PAGE_SIZE)
+ sizeof(struct page *) > PAGE_SIZE)
return 0;
return min(mirror->pg_bsize - mirror->pg_count, (size_t)req->wb_bytes);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index f37e25b6311c..1705c78ee2d8 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -359,26 +359,31 @@ same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
return false;
}
+/*
+ * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
+ * declare a match.
+ */
static bool
_same_data_server_addrs_locked(const struct list_head *dsaddrs1,
const struct list_head *dsaddrs2)
{
struct nfs4_pnfs_ds_addr *da1, *da2;
-
- /* step through both lists, comparing as we go */
- for (da1 = list_first_entry(dsaddrs1, typeof(*da1), da_node),
- da2 = list_first_entry(dsaddrs2, typeof(*da2), da_node);
- da1 != NULL && da2 != NULL;
- da1 = list_entry(da1->da_node.next, typeof(*da1), da_node),
- da2 = list_entry(da2->da_node.next, typeof(*da2), da_node)) {
- if (!same_sockaddr((struct sockaddr *)&da1->da_addr,
- (struct sockaddr *)&da2->da_addr))
- return false;
+ struct sockaddr *sa1, *sa2;
+ bool match = false;
+
+ list_for_each_entry(da1, dsaddrs1, da_node) {
+ sa1 = (struct sockaddr *)&da1->da_addr;
+ match = false;
+ list_for_each_entry(da2, dsaddrs2, da_node) {
+ sa2 = (struct sockaddr *)&da2->da_addr;
+ match = same_sockaddr(sa1, sa2);
+ if (match)
+ break;
+ }
+ if (!match)
+ break;
}
- if (da1 == NULL && da2 == NULL)
- return true;
-
- return false;
+ return match;
}
/*
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ae0ff7a11b40..01b8cc8e8cfc 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -72,6 +72,9 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
struct nfs_pgio_mirror *mirror;
+ if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
+ pgio->pg_ops->pg_cleanup(pgio);
+
pgio->pg_ops = &nfs_pgio_rw_ops;
/* read path should never have more than one mirror */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index daf355642845..d9851a6a2813 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1203,7 +1203,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino
return 1;
if (!flctx || (list_empty_careful(&flctx->flc_flock) &&
list_empty_careful(&flctx->flc_posix)))
- return 0;
+ return 1;
/* Check to see if there are whole file write locks */
ret = 0;
@@ -1331,6 +1331,9 @@ void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
struct nfs_pgio_mirror *mirror;
+ if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
+ pgio->pg_ops->pg_cleanup(pgio);
+
pgio->pg_ops = &nfs_pgio_rw_ops;
nfs_pageio_stop_mirroring(pgio);
@@ -1383,24 +1386,27 @@ static void nfs_writeback_check_extend(struct nfs_pgio_header *hdr,
{
struct nfs_pgio_args *argp = &hdr->args;
struct nfs_pgio_res *resp = &hdr->res;
+ u64 size = argp->offset + resp->count;
if (!(fattr->valid & NFS_ATTR_FATTR_SIZE))
+ fattr->size = size;
+ if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode)) {
+ fattr->valid &= ~NFS_ATTR_FATTR_SIZE;
return;
- if (argp->offset + resp->count != fattr->size)
- return;
- if (nfs_size_to_loff_t(fattr->size) < i_size_read(hdr->inode))
+ }
+ if (size != fattr->size)
return;
/* Set attribute barrier */
nfs_fattr_set_barrier(fattr);
+ /* ...and update size */
+ fattr->valid |= NFS_ATTR_FATTR_SIZE;
}
void nfs_writeback_update_inode(struct nfs_pgio_header *hdr)
{
- struct nfs_fattr *fattr = hdr->res.fattr;
+ struct nfs_fattr *fattr = &hdr->fattr;
struct inode *inode = hdr->inode;
- if (fattr == NULL)
- return;
spin_lock(&inode->i_lock);
nfs_writeback_check_extend(hdr, fattr);
nfs_post_op_update_inode_force_wcc_locked(inode, fattr);
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index cdefaa331a07..c29d9421bd5e 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -56,14 +56,6 @@ nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
u32 device_generation = 0;
int error;
- /*
- * We do not attempt to support I/O smaller than the fs block size,
- * or not aligned to it.
- */
- if (args->lg_minlength < block_size) {
- dprintk("pnfsd: I/O too small\n");
- goto out_layoutunavailable;
- }
if (seg->offset & (block_size - 1)) {
dprintk("pnfsd: I/O misaligned\n");
goto out_layoutunavailable;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6e13504f736e..397798368b1a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -777,13 +777,16 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
}
-static void
+static bool
unhash_delegation_locked(struct nfs4_delegation *dp)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
lockdep_assert_held(&state_lock);
+ if (list_empty(&dp->dl_perfile))
+ return false;
+
dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
/* Ensure that deleg break won't try to requeue it */
++dp->dl_time;
@@ -792,16 +795,21 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
list_del_init(&dp->dl_recall_lru);
list_del_init(&dp->dl_perfile);
spin_unlock(&fp->fi_lock);
+ return true;
}
static void destroy_delegation(struct nfs4_delegation *dp)
{
+ bool unhashed;
+
spin_lock(&state_lock);
- unhash_delegation_locked(dp);
+ unhashed = unhash_delegation_locked(dp);
spin_unlock(&state_lock);
- put_clnt_odstate(dp->dl_clnt_odstate);
- nfs4_put_deleg_lease(dp->dl_stid.sc_file);
- nfs4_put_stid(&dp->dl_stid);
+ if (unhashed) {
+ put_clnt_odstate(dp->dl_clnt_odstate);
+ nfs4_put_deleg_lease(dp->dl_stid.sc_file);
+ nfs4_put_stid(&dp->dl_stid);
+ }
}
static void revoke_delegation(struct nfs4_delegation *dp)
@@ -1004,16 +1012,20 @@ static void nfs4_put_stateowner(struct nfs4_stateowner *sop)
sop->so_ops->so_free(sop);
}
-static void unhash_ol_stateid(struct nfs4_ol_stateid *stp)
+static bool unhash_ol_stateid(struct nfs4_ol_stateid *stp)
{
struct nfs4_file *fp = stp->st_stid.sc_file;
lockdep_assert_held(&stp->st_stateowner->so_client->cl_lock);
+ if (list_empty(&stp->st_perfile))
+ return false;
+
spin_lock(&fp->fi_lock);
- list_del(&stp->st_perfile);
+ list_del_init(&stp->st_perfile);
spin_unlock(&fp->fi_lock);
list_del(&stp->st_perstateowner);
+ return true;
}
static void nfs4_free_ol_stateid(struct nfs4_stid *stid)
@@ -1063,25 +1075,27 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
list_add(&stp->st_locks, reaplist);
}
-static void unhash_lock_stateid(struct nfs4_ol_stateid *stp)
+static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
{
struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
lockdep_assert_held(&oo->oo_owner.so_client->cl_lock);
list_del_init(&stp->st_locks);
- unhash_ol_stateid(stp);
nfs4_unhash_stid(&stp->st_stid);
+ return unhash_ol_stateid(stp);
}
static void release_lock_stateid(struct nfs4_ol_stateid *stp)
{
struct nfs4_openowner *oo = openowner(stp->st_openstp->st_stateowner);
+ bool unhashed;
spin_lock(&oo->oo_owner.so_client->cl_lock);
- unhash_lock_stateid(stp);
+ unhashed = unhash_lock_stateid(stp);
spin_unlock(&oo->oo_owner.so_client->cl_lock);
- nfs4_put_stid(&stp->st_stid);
+ if (unhashed)
+ nfs4_put_stid(&stp->st_stid);
}
static void unhash_lockowner_locked(struct nfs4_lockowner *lo)
@@ -1129,7 +1143,7 @@ static void release_lockowner(struct nfs4_lockowner *lo)
while (!list_empty(&lo->lo_owner.so_stateids)) {
stp = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid, st_perstateowner);
- unhash_lock_stateid(stp);
+ WARN_ON(!unhash_lock_stateid(stp));
put_ol_stateid_locked(stp, &reaplist);
}
spin_unlock(&clp->cl_lock);
@@ -1142,21 +1156,26 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
{
struct nfs4_ol_stateid *stp;
+ lockdep_assert_held(&open_stp->st_stid.sc_client->cl_lock);
+
while (!list_empty(&open_stp->st_locks)) {
stp = list_entry(open_stp->st_locks.next,
struct nfs4_ol_stateid, st_locks);
- unhash_lock_stateid(stp);
+ WARN_ON(!unhash_lock_stateid(stp));
put_ol_stateid_locked(stp, reaplist);
}
}
-static void unhash_open_stateid(struct nfs4_ol_stateid *stp,
+static bool unhash_open_stateid(struct nfs4_ol_stateid *stp,
struct list_head *reaplist)
{
+ bool unhashed;
+
lockdep_assert_held(&stp->st_stid.sc_client->cl_lock);
- unhash_ol_stateid(stp);
+ unhashed = unhash_ol_stateid(stp);
release_open_stateid_locks(stp, reaplist);
+ return unhashed;
}
static void release_open_stateid(struct nfs4_ol_stateid *stp)
@@ -1164,8 +1183,8 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
LIST_HEAD(reaplist);
spin_lock(&stp->st_stid.sc_client->cl_lock);
- unhash_open_stateid(stp, &reaplist);
- put_ol_stateid_locked(stp, &reaplist);
+ if (unhash_open_stateid(stp, &reaplist))
+ put_ol_stateid_locked(stp, &reaplist);
spin_unlock(&stp->st_stid.sc_client->cl_lock);
free_ol_stateid_reaplist(&reaplist);
}
@@ -1210,8 +1229,8 @@ static void release_openowner(struct nfs4_openowner *oo)
while (!list_empty(&oo->oo_owner.so_stateids)) {
stp = list_first_entry(&oo->oo_owner.so_stateids,
struct nfs4_ol_stateid, st_perstateowner);
- unhash_open_stateid(stp, &reaplist);
- put_ol_stateid_locked(stp, &reaplist);
+ if (unhash_open_stateid(stp, &reaplist))
+ put_ol_stateid_locked(stp, &reaplist);
}
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
@@ -1714,7 +1733,7 @@ __destroy_client(struct nfs4_client *clp)
spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
- unhash_delegation_locked(dp);
+ WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -4346,7 +4365,7 @@ nfs4_laundromat(struct nfsd_net *nn)
new_timeo = min(new_timeo, t);
break;
}
- unhash_delegation_locked(dp);
+ WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -4714,7 +4733,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (check_for_locks(stp->st_stid.sc_file,
lockowner(stp->st_stateowner)))
break;
- unhash_lock_stateid(stp);
+ WARN_ON(!unhash_lock_stateid(stp));
spin_unlock(&cl->cl_lock);
nfs4_put_stid(s);
ret = nfs_ok;
@@ -4930,20 +4949,23 @@ out:
static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
{
struct nfs4_client *clp = s->st_stid.sc_client;
+ bool unhashed;
LIST_HEAD(reaplist);
s->st_stid.sc_type = NFS4_CLOSED_STID;
spin_lock(&clp->cl_lock);
- unhash_open_stateid(s, &reaplist);
+ unhashed = unhash_open_stateid(s, &reaplist);
if (clp->cl_minorversion) {
- put_ol_stateid_locked(s, &reaplist);
+ if (unhashed)
+ put_ol_stateid_locked(s, &reaplist);
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
} else {
spin_unlock(&clp->cl_lock);
free_ol_stateid_reaplist(&reaplist);
- move_to_close_lru(s, clp->net);
+ if (unhashed)
+ move_to_close_lru(s, clp->net);
}
}
@@ -5982,7 +6004,7 @@ nfsd_inject_add_lock_to_list(struct nfs4_ol_stateid *lst,
static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
struct list_head *collect,
- void (*func)(struct nfs4_ol_stateid *))
+ bool (*func)(struct nfs4_ol_stateid *))
{
struct nfs4_openowner *oop;
struct nfs4_ol_stateid *stp, *st_next;
@@ -5996,9 +6018,9 @@ static u64 nfsd_foreach_client_lock(struct nfs4_client *clp, u64 max,
list_for_each_entry_safe(lst, lst_next,
&stp->st_locks, st_locks) {
if (func) {
- func(lst);
- nfsd_inject_add_lock_to_list(lst,
- collect);
+ if (func(lst))
+ nfsd_inject_add_lock_to_list(lst,
+ collect);
}
++count;
/*
@@ -6268,7 +6290,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
continue;
atomic_inc(&clp->cl_refcount);
- unhash_delegation_locked(dp);
+ WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, victims);
}
++count;
@@ -6598,7 +6620,7 @@ nfs4_state_shutdown_net(struct net *net)
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- unhash_delegation_locked(dp);
+ WARN_ON(!unhash_delegation_locked(dp));
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d4d84451e0e6..3dd1b616b92b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2139,6 +2139,27 @@ nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
return nfsd4_encode_user(xdr, rqstp, ace->who_uid);
}
+static inline __be32
+nfsd4_encode_layout_type(struct xdr_stream *xdr, enum pnfs_layouttype layout_type)
+{
+ __be32 *p;
+
+ if (layout_type) {
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(1);
+ *p++ = cpu_to_be32(layout_type);
+ } else {
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(0);
+ }
+
+ return 0;
+}
+
#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
FATTR4_WORD0_RDATTR_ERROR)
#define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
@@ -2692,20 +2713,16 @@ out_acl:
p = xdr_encode_hyper(p, stat.ino);
}
#ifdef CONFIG_NFSD_PNFS
- if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) ||
- (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) {
- if (exp->ex_layout_type) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(1);
- *p++ = cpu_to_be32(exp->ex_layout_type);
- } else {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- *p++ = cpu_to_be32(0);
- }
+ if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
+ status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
+ if (status)
+ goto out;
+ }
+
+ if (bmval2 & FATTR4_WORD2_LAYOUT_TYPES) {
+ status = nfsd4_encode_layout_type(xdr, exp->ex_layout_type);
+ if (status)
+ goto out;
}
if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index fdf4b41d0609..482cfd34472d 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -1439,6 +1439,7 @@ int dlm_master_request_handler(struct o2net_msg *msg, u32 len, void *data,
int found, ret;
int set_maybe;
int dispatch_assert = 0;
+ int dispatched = 0;
if (!dlm_grab(dlm))
return DLM_MASTER_RESP_NO;
@@ -1658,15 +1659,18 @@ send_response:
mlog(ML_ERROR, "failed to dispatch assert master work\n");
response = DLM_MASTER_RESP_ERROR;
dlm_lockres_put(res);
- } else
+ } else {
+ dispatched = 1;
__dlm_lockres_grab_inflight_worker(dlm, res);
+ }
spin_unlock(&res->spinlock);
} else {
if (res)
dlm_lockres_put(res);
}
- dlm_put(dlm);
+ if (!dispatched)
+ dlm_put(dlm);
return response;
}
@@ -2090,7 +2094,6 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
/* queue up work for dlm_assert_master_worker */
- dlm_grab(dlm); /* get an extra ref for the work item */
dlm_init_work_item(dlm, item, dlm_assert_master_worker, NULL);
item->u.am.lockres = res; /* already have a ref */
/* can optionally ignore node numbers higher than this node */
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index ce12e0b1a31f..3d90ad7ff91f 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1694,6 +1694,7 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
unsigned int hash;
int master = DLM_LOCK_RES_OWNER_UNKNOWN;
u32 flags = DLM_ASSERT_MASTER_REQUERY;
+ int dispatched = 0;
if (!dlm_grab(dlm)) {
/* since the domain has gone away on this
@@ -1719,8 +1720,10 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
dlm_put(dlm);
/* sender will take care of this and retry */
return ret;
- } else
+ } else {
+ dispatched = 1;
__dlm_lockres_grab_inflight_worker(dlm, res);
+ }
spin_unlock(&res->spinlock);
} else {
/* put.. incase we are not the master */
@@ -1730,7 +1733,8 @@ int dlm_master_requery_handler(struct o2net_msg *msg, u32 len, void *data,
}
spin_unlock(&dlm->spinlock);
- dlm_put(dlm);
+ if (!dispatched)
+ dlm_put(dlm);
return master;
}
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 403c5660b306..a482e312c7b2 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -1550,8 +1550,8 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root)
seq_printf(s, ",localflocks,");
if (osb->osb_cluster_stack[0])
- seq_printf(s, ",cluster_stack=%.*s", OCFS2_STACK_LABEL_LEN,
- osb->osb_cluster_stack);
+ seq_show_option_n(s, "cluster_stack", osb->osb_cluster_stack,
+ OCFS2_STACK_LABEL_LEN);
if (opts & OCFS2_MOUNT_USRQUOTA)
seq_printf(s, ",usrquota");
if (opts & OCFS2_MOUNT_GRPQUOTA)
diff --git a/fs/open.c b/fs/open.c
index 98e5a52dc68c..f9d2bf935099 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -678,18 +678,18 @@ int open_check_o_direct(struct file *f)
}
static int do_dentry_open(struct file *f,
+ struct inode *inode,
int (*open)(struct inode *, struct file *),
const struct cred *cred)
{
static const struct file_operations empty_fops = {};
- struct inode *inode;
int error;
f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
path_get(&f->f_path);
- inode = f->f_inode = f->f_path.dentry->d_inode;
+ f->f_inode = inode;
f->f_mapping = inode->i_mapping;
if (unlikely(f->f_flags & O_PATH)) {
@@ -793,7 +793,8 @@ int finish_open(struct file *file, struct dentry *dentry,
BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
file->f_path.dentry = dentry;
- error = do_dentry_open(file, open, current_cred());
+ error = do_dentry_open(file, d_backing_inode(dentry), open,
+ current_cred());
if (!error)
*opened |= FILE_OPENED;
@@ -822,6 +823,28 @@ int finish_no_open(struct file *file, struct dentry *dentry)
}
EXPORT_SYMBOL(finish_no_open);
+/**
+ * vfs_open - open the file at the given path
+ * @path: path to open
+ * @file: newly allocated file with f_flag initialized
+ * @cred: credentials to use
+ */
+int vfs_open(const struct path *path, struct file *file,
+ const struct cred *cred)
+{
+ struct dentry *dentry = path->dentry;
+ struct inode *inode = dentry->d_inode;
+
+ file->f_path = *path;
+ if (dentry->d_flags & DCACHE_OP_SELECT_INODE) {
+ inode = dentry->d_op->d_select_inode(dentry, file->f_flags);
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ }
+
+ return do_dentry_open(file, inode, NULL, cred);
+}
+
struct file *dentry_open(const struct path *path, int flags,
const struct cred *cred)
{
@@ -853,26 +876,6 @@ struct file *dentry_open(const struct path *path, int flags,
}
EXPORT_SYMBOL(dentry_open);
-/**
- * vfs_open - open the file at the given path
- * @path: path to open
- * @filp: newly allocated file with f_flag initialized
- * @cred: credentials to use
- */
-int vfs_open(const struct path *path, struct file *filp,
- const struct cred *cred)
-{
- struct inode *inode = path->dentry->d_inode;
-
- if (inode->i_op->dentry_open)
- return inode->i_op->dentry_open(path->dentry, filp, cred);
- else {
- filp->f_path = *path;
- return do_dentry_open(filp, NULL, cred);
- }
-}
-EXPORT_SYMBOL(vfs_open);
-
static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
{
int lookup_flags = 0;
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 84d693d37428..871fcb67be97 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -81,11 +81,11 @@ static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
if (len == 0)
return 0;
- old_file = ovl_path_open(old, O_RDONLY);
+ old_file = ovl_path_open(old, O_LARGEFILE | O_RDONLY);
if (IS_ERR(old_file))
return PTR_ERR(old_file);
- new_file = ovl_path_open(new, O_WRONLY);
+ new_file = ovl_path_open(new, O_LARGEFILE | O_WRONLY);
if (IS_ERR(new_file)) {
error = PTR_ERR(new_file);
goto out_fput;
@@ -267,7 +267,7 @@ out:
out_cleanup:
ovl_cleanup(wdir, newdentry);
- goto out;
+ goto out2;
}
/*
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index 04f124884687..ba0db2638946 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -336,37 +336,33 @@ static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
return true;
}
-static int ovl_dentry_open(struct dentry *dentry, struct file *file,
- const struct cred *cred)
+struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags)
{
int err;
struct path realpath;
enum ovl_path_type type;
- bool want_write = false;
+
+ if (d_is_dir(dentry))
+ return d_backing_inode(dentry);
type = ovl_path_real(dentry, &realpath);
- if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
- want_write = true;
+ if (ovl_open_need_copy_up(file_flags, type, realpath.dentry)) {
err = ovl_want_write(dentry);
if (err)
- goto out;
+ return ERR_PTR(err);
- if (file->f_flags & O_TRUNC)
+ if (file_flags & O_TRUNC)
err = ovl_copy_up_last(dentry, NULL, true);
else
err = ovl_copy_up(dentry);
+ ovl_drop_write(dentry);
if (err)
- goto out_drop_write;
+ return ERR_PTR(err);
ovl_path_upper(dentry, &realpath);
}
- err = vfs_open(&realpath, file, cred);
-out_drop_write:
- if (want_write)
- ovl_drop_write(dentry);
-out:
- return err;
+ return d_backing_inode(realpath.dentry);
}
static const struct inode_operations ovl_file_inode_operations = {
@@ -377,7 +373,6 @@ static const struct inode_operations ovl_file_inode_operations = {
.getxattr = ovl_getxattr,
.listxattr = ovl_listxattr,
.removexattr = ovl_removexattr,
- .dentry_open = ovl_dentry_open,
};
static const struct inode_operations ovl_symlink_inode_operations = {
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 17ac5afc9ffb..ea5a40b06e3a 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -173,6 +173,7 @@ ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
void *value, size_t size);
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
int ovl_removexattr(struct dentry *dentry, const char *name);
+struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags);
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
struct ovl_entry *oe);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index bf8537c7f455..d74af7f78fec 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -275,6 +275,7 @@ static void ovl_dentry_release(struct dentry *dentry)
static const struct dentry_operations ovl_dentry_operations = {
.d_release = ovl_dentry_release,
+ .d_select_inode = ovl_d_select_inode,
};
static struct ovl_entry *ovl_alloc_entry(unsigned int numlower)
@@ -473,6 +474,7 @@ static void ovl_put_super(struct super_block *sb)
mntput(ufs->upper_mnt);
for (i = 0; i < ufs->numlower; i++)
mntput(ufs->lower_mnt[i]);
+ kfree(ufs->lower_mnt);
kfree(ufs->config.lowerdir);
kfree(ufs->config.upperdir);
@@ -517,10 +519,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
struct super_block *sb = dentry->d_sb;
struct ovl_fs *ufs = sb->s_fs_info;
- seq_printf(m, ",lowerdir=%s", ufs->config.lowerdir);
+ seq_show_option(m, "lowerdir", ufs->config.lowerdir);
if (ufs->config.upperdir) {
- seq_printf(m, ",upperdir=%s", ufs->config.upperdir);
- seq_printf(m, ",workdir=%s", ufs->config.workdir);
+ seq_show_option(m, "upperdir", ufs->config.upperdir);
+ seq_show_option(m, "workdir", ufs->config.workdir);
}
return 0;
}
@@ -980,6 +982,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
oe->lowerstack[i].dentry = stack[i].dentry;
oe->lowerstack[i].mnt = ufs->lower_mnt[i];
}
+ kfree(stack);
root_dentry->d_fsdata = oe;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 0111ad0466ed..cf6fa25f884b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -714,18 +714,20 @@ static int reiserfs_show_options(struct seq_file *seq, struct dentry *root)
seq_puts(seq, ",acl");
if (REISERFS_SB(s)->s_jdev)
- seq_printf(seq, ",jdev=%s", REISERFS_SB(s)->s_jdev);
+ seq_show_option(seq, "jdev", REISERFS_SB(s)->s_jdev);
if (journal->j_max_commit_age != journal->j_default_max_commit_age)
seq_printf(seq, ",commit=%d", journal->j_max_commit_age);
#ifdef CONFIG_QUOTA
if (REISERFS_SB(s)->s_qf_names[USRQUOTA])
- seq_printf(seq, ",usrjquota=%s", REISERFS_SB(s)->s_qf_names[USRQUOTA]);
+ seq_show_option(seq, "usrjquota",
+ REISERFS_SB(s)->s_qf_names[USRQUOTA]);
else if (opts & (1 << REISERFS_USRQUOTA))
seq_puts(seq, ",usrquota");
if (REISERFS_SB(s)->s_qf_names[GRPQUOTA])
- seq_printf(seq, ",grpjquota=%s", REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
+ seq_show_option(seq, "grpjquota",
+ REISERFS_SB(s)->s_qf_names[GRPQUOTA]);
else if (opts & (1 << REISERFS_GRPQUOTA))
seq_puts(seq, ",grpquota");
if (REISERFS_SB(s)->s_jquota_fmt) {
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index 96f3448b6eb4..fd65b3f1923c 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -652,11 +652,8 @@ int ubifs_init_security(struct inode *dentry, struct inode *inode,
{
int err;
- mutex_lock(&inode->i_mutex);
err = security_inode_init_security(inode, dentry, qstr,
&init_xattrs, 0);
- mutex_unlock(&inode->i_mutex);
-
if (err) {
struct ubifs_info *c = dentry->i_sb->s_fs_info;
ubifs_err(c, "cannot initialize security for inode %lu, error %d",
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 74bcbabfa523..b14bbd6bb05f 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -680,8 +680,15 @@ typedef struct xfs_attr_leaf_name_remote {
typedef struct xfs_attr_leafblock {
xfs_attr_leaf_hdr_t hdr; /* constant-structure header block */
xfs_attr_leaf_entry_t entries[1]; /* sorted on key, not name */
- xfs_attr_leaf_name_local_t namelist; /* grows from bottom of buf */
- xfs_attr_leaf_name_remote_t valuelist; /* grows from bottom of buf */
+ /*
+ * The rest of the block contains the following structures after the
+ * leaf entries, growing from the bottom up. The variables are never
+ * referenced and definining them can actually make gcc optimize away
+ * accesses to the 'entries' array above index 0 so don't do that.
+ *
+ * xfs_attr_leaf_name_local_t namelist;
+ * xfs_attr_leaf_name_remote_t valuelist;
+ */
} xfs_attr_leafblock_t;
/*
diff --git a/fs/xfs/libxfs/xfs_dir2_data.c b/fs/xfs/libxfs/xfs_dir2_data.c
index de1ea16f5748..534bbf283d6b 100644
--- a/fs/xfs/libxfs/xfs_dir2_data.c
+++ b/fs/xfs/libxfs/xfs_dir2_data.c
@@ -252,7 +252,8 @@ xfs_dir3_data_reada_verify(
return;
case cpu_to_be32(XFS_DIR2_DATA_MAGIC):
case cpu_to_be32(XFS_DIR3_DATA_MAGIC):
- xfs_dir3_data_verify(bp);
+ bp->b_ops = &xfs_dir3_data_buf_ops;
+ bp->b_ops->verify_read(bp);
return;
default:
xfs_buf_ioerror(bp, -EFSCORRUPTED);
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 41b80d3d3877..06bb4218b362 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -2132,6 +2132,7 @@ xfs_dir2_node_replace(
int error; /* error return value */
int i; /* btree level */
xfs_ino_t inum; /* new inode number */
+ int ftype; /* new file type */
xfs_dir2_leaf_t *leaf; /* leaf structure */
xfs_dir2_leaf_entry_t *lep; /* leaf entry being changed */
int rval; /* internal return value */
@@ -2145,7 +2146,14 @@ xfs_dir2_node_replace(
state = xfs_da_state_alloc();
state->args = args;
state->mp = args->dp->i_mount;
+
+ /*
+ * We have to save new inode number and ftype since
+ * xfs_da3_node_lookup_int() is going to overwrite them
+ */
inum = args->inumber;
+ ftype = args->filetype;
+
/*
* Lookup the entry to change in the btree.
*/
@@ -2183,7 +2191,7 @@ xfs_dir2_node_replace(
* Fill in the new inode number and log the entry.
*/
dep->inumber = cpu_to_be64(inum);
- args->dp->d_ops->data_put_ftype(dep, args->filetype);
+ args->dp->d_ops->data_put_ftype(dep, ftype);
xfs_dir2_data_log_entry(args, state->extrablk.bp, dep);
rval = 0;
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 539a85fddbc2..fec4bfba0839 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -164,7 +164,7 @@ xfs_ilock(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL)
mrupdate_nested(&ip->i_iolock, XFS_IOLOCK_DEP(lock_flags));
@@ -212,7 +212,7 @@ xfs_ilock_nowait(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
if (lock_flags & XFS_IOLOCK_EXCL) {
if (!mrtryupdate(&ip->i_iolock))
@@ -281,7 +281,7 @@ xfs_iunlock(
(XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
(XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_DEP_MASK)) == 0);
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
ASSERT(lock_flags != 0);
if (lock_flags & XFS_IOLOCK_EXCL)
@@ -364,30 +364,38 @@ int xfs_lock_delays;
/*
* Bump the subclass so xfs_lock_inodes() acquires each lock with a different
- * value. This shouldn't be called for page fault locking, but we also need to
- * ensure we don't overrun the number of lockdep subclasses for the iolock or
- * mmaplock as that is limited to 12 by the mmap lock lockdep annotations.
+ * value. This can be called for any type of inode lock combination, including
+ * parent locking. Care must be taken to ensure we don't overrun the subclass
+ * storage fields in the class mask we build.
*/
static inline int
xfs_lock_inumorder(int lock_mode, int subclass)
{
+ int class = 0;
+
+ ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
+ XFS_ILOCK_RTSUM)));
+
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
- ASSERT(subclass + XFS_LOCK_INUMORDER <
- (1 << (XFS_MMAPLOCK_SHIFT - XFS_IOLOCK_SHIFT)));
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_IOLOCK_SHIFT;
+ ASSERT(subclass <= XFS_IOLOCK_MAX_SUBCLASS);
+ ASSERT(subclass + XFS_IOLOCK_PARENT_VAL <
+ MAX_LOCKDEP_SUBCLASSES);
+ class += subclass << XFS_IOLOCK_SHIFT;
+ if (lock_mode & XFS_IOLOCK_PARENT)
+ class += XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT;
}
if (lock_mode & (XFS_MMAPLOCK_SHARED|XFS_MMAPLOCK_EXCL)) {
- ASSERT(subclass + XFS_LOCK_INUMORDER <
- (1 << (XFS_ILOCK_SHIFT - XFS_MMAPLOCK_SHIFT)));
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) <<
- XFS_MMAPLOCK_SHIFT;
+ ASSERT(subclass <= XFS_MMAPLOCK_MAX_SUBCLASS);
+ class += subclass << XFS_MMAPLOCK_SHIFT;
}
- if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL))
- lock_mode |= (subclass + XFS_LOCK_INUMORDER) << XFS_ILOCK_SHIFT;
+ if (lock_mode & (XFS_ILOCK_SHARED|XFS_ILOCK_EXCL)) {
+ ASSERT(subclass <= XFS_ILOCK_MAX_SUBCLASS);
+ class += subclass << XFS_ILOCK_SHIFT;
+ }
- return lock_mode;
+ return (lock_mode & ~XFS_LOCK_SUBCLASS_MASK) | class;
}
/*
@@ -399,6 +407,11 @@ xfs_lock_inumorder(int lock_mode, int subclass)
* transaction (such as truncate). This can result in deadlock since the long
* running trans might need to wait for the inode we just locked in order to
* push the tail and free space in the log.
+ *
+ * xfs_lock_inodes() can only be used to lock one type of lock at a time -
+ * the iolock, the mmaplock or the ilock, but not more than one at a time. If we
+ * lock more than one at a time, lockdep will report false positives saying we
+ * have violated locking orders.
*/
void
xfs_lock_inodes(
@@ -409,8 +422,29 @@ xfs_lock_inodes(
int attempts = 0, i, j, try_lock;
xfs_log_item_t *lp;
- /* currently supports between 2 and 5 inodes */
+ /*
+ * Currently supports between 2 and 5 inodes with exclusive locking. We
+ * support an arbitrary depth of locking here, but absolute limits on
+ * inodes depend on the the type of locking and the limits placed by
+ * lockdep annotations in xfs_lock_inumorder. These are all checked by
+ * the asserts.
+ */
ASSERT(ips && inodes >= 2 && inodes <= 5);
+ ASSERT(lock_mode & (XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL |
+ XFS_ILOCK_EXCL));
+ ASSERT(!(lock_mode & (XFS_IOLOCK_SHARED | XFS_MMAPLOCK_SHARED |
+ XFS_ILOCK_SHARED)));
+ ASSERT(!(lock_mode & XFS_IOLOCK_EXCL) ||
+ inodes <= XFS_IOLOCK_MAX_SUBCLASS + 1);
+ ASSERT(!(lock_mode & XFS_MMAPLOCK_EXCL) ||
+ inodes <= XFS_MMAPLOCK_MAX_SUBCLASS + 1);
+ ASSERT(!(lock_mode & XFS_ILOCK_EXCL) ||
+ inodes <= XFS_ILOCK_MAX_SUBCLASS + 1);
+
+ if (lock_mode & XFS_IOLOCK_EXCL) {
+ ASSERT(!(lock_mode & (XFS_MMAPLOCK_EXCL | XFS_ILOCK_EXCL)));
+ } else if (lock_mode & XFS_MMAPLOCK_EXCL)
+ ASSERT(!(lock_mode & XFS_ILOCK_EXCL));
try_lock = 0;
i = 0;
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8f22d20368d8..ee26a603c131 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -284,9 +284,9 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* Flags for lockdep annotations.
*
* XFS_LOCK_PARENT - for directory operations that require locking a
- * parent directory inode and a child entry inode. The parent gets locked
- * with this flag so it gets a lockdep subclass of 1 and the child entry
- * lock will have a lockdep subclass of 0.
+ * parent directory inode and a child entry inode. IOLOCK requires nesting,
+ * MMAPLOCK does not support this class, ILOCK requires a single subclass
+ * to differentiate parent from child.
*
* XFS_LOCK_RTBITMAP/XFS_LOCK_RTSUM - the realtime device bitmap and summary
* inodes do not participate in the normal lock order, and thus have their
@@ -295,30 +295,63 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
* XFS_LOCK_INUMORDER - for locking several inodes at the some time
* with xfs_lock_inodes(). This flag is used as the starting subclass
* and each subsequent lock acquired will increment the subclass by one.
- * So the first lock acquired will have a lockdep subclass of 4, the
- * second lock will have a lockdep subclass of 5, and so on. It is
- * the responsibility of the class builder to shift this to the correct
- * portion of the lock_mode lockdep mask.
+ * However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
+ * limited to the subclasses we can represent via nesting. We need at least
+ * 5 inodes nest depth for the ILOCK through rename, and we also have to support
+ * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
+ * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
+ * 8 subclasses supported by lockdep.
+ *
+ * This also means we have to number the sub-classes in the lowest bits of
+ * the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
+ * mask and we can't use bit-masking to build the subclasses. What a mess.
+ *
+ * Bit layout:
+ *
+ * Bit Lock Region
+ * 16-19 XFS_IOLOCK_SHIFT dependencies
+ * 20-23 XFS_MMAPLOCK_SHIFT dependencies
+ * 24-31 XFS_ILOCK_SHIFT dependencies
+ *
+ * IOLOCK values
+ *
+ * 0-3 subclass value
+ * 4-7 PARENT subclass values
+ *
+ * MMAPLOCK values
+ *
+ * 0-3 subclass value
+ * 4-7 unused
+ *
+ * ILOCK values
+ * 0-4 subclass values
+ * 5 PARENT subclass (not nestable)
+ * 6 RTBITMAP subclass (not nestable)
+ * 7 RTSUM subclass (not nestable)
+ *
*/
-#define XFS_LOCK_PARENT 1
-#define XFS_LOCK_RTBITMAP 2
-#define XFS_LOCK_RTSUM 3
-#define XFS_LOCK_INUMORDER 4
-
-#define XFS_IOLOCK_SHIFT 16
-#define XFS_IOLOCK_PARENT (XFS_LOCK_PARENT << XFS_IOLOCK_SHIFT)
-
-#define XFS_MMAPLOCK_SHIFT 20
-
-#define XFS_ILOCK_SHIFT 24
-#define XFS_ILOCK_PARENT (XFS_LOCK_PARENT << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_RTBITMAP (XFS_LOCK_RTBITMAP << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_RTSUM (XFS_LOCK_RTSUM << XFS_ILOCK_SHIFT)
-
-#define XFS_IOLOCK_DEP_MASK 0x000f0000
-#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
-#define XFS_ILOCK_DEP_MASK 0xff000000
-#define XFS_LOCK_DEP_MASK (XFS_IOLOCK_DEP_MASK | \
+#define XFS_IOLOCK_SHIFT 16
+#define XFS_IOLOCK_PARENT_VAL 4
+#define XFS_IOLOCK_MAX_SUBCLASS (XFS_IOLOCK_PARENT_VAL - 1)
+#define XFS_IOLOCK_DEP_MASK 0x000f0000
+#define XFS_IOLOCK_PARENT (XFS_IOLOCK_PARENT_VAL << XFS_IOLOCK_SHIFT)
+
+#define XFS_MMAPLOCK_SHIFT 20
+#define XFS_MMAPLOCK_NUMORDER 0
+#define XFS_MMAPLOCK_MAX_SUBCLASS 3
+#define XFS_MMAPLOCK_DEP_MASK 0x00f00000
+
+#define XFS_ILOCK_SHIFT 24
+#define XFS_ILOCK_PARENT_VAL 5
+#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1)
+#define XFS_ILOCK_RTBITMAP_VAL 6
+#define XFS_ILOCK_RTSUM_VAL 7
+#define XFS_ILOCK_DEP_MASK 0xff000000
+#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
+#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
+
+#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
XFS_MMAPLOCK_DEP_MASK | \
XFS_ILOCK_DEP_MASK)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 858e1e62bbaa..65a45372fb1f 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -504,9 +504,9 @@ xfs_showargs(
seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
if (mp->m_logname)
- seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
+ seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname);
if (mp->m_rtname)
- seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
+ seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname);
if (mp->m_dalign > 0)
seq_printf(m, "," MNTOPT_SUNIT "=%d",