aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/binfmt_misc.c7
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c6
-rw-r--r--fs/btrfs/extent-tree.c23
-rw-r--r--fs/btrfs/free-space-cache.c12
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/ceph/addr.c273
-rw-r--r--fs/ceph/caps.c132
-rw-r--r--fs/ceph/dir.c27
-rw-r--r--fs/ceph/file.c97
-rw-r--r--fs/ceph/inode.c59
-rw-r--r--fs/ceph/locks.c64
-rw-r--r--fs/ceph/mds_client.c41
-rw-r--r--fs/ceph/mds_client.h10
-rw-r--r--fs/ceph/snap.c37
-rw-r--r--fs/ceph/super.c16
-rw-r--r--fs/ceph/super.h55
-rw-r--r--fs/ceph/xattr.c7
-rw-r--r--fs/ecryptfs/crypto.c1
-rw-r--r--fs/ecryptfs/file.c12
-rw-r--r--fs/ecryptfs/keystore.c6
-rw-r--r--fs/ecryptfs/main.c16
-rw-r--r--fs/ext4/move_extent.c4
-rw-r--r--fs/fuse/cuse.c2
-rw-r--r--fs/fuse/dev.c29
-rw-r--r--fs/fuse/dir.c538
-rw-r--r--fs/fuse/file.c230
-rw-r--r--fs/fuse/fuse_i.h45
-rw-r--r--fs/fuse/inode.c39
-rw-r--r--fs/hfsplus/catalog.c89
-rw-r--r--fs/hfsplus/dir.c11
-rw-r--r--fs/hfsplus/hfsplus_fs.h4
-rw-r--r--fs/hfsplus/super.c4
-rw-r--r--fs/jffs2/readinode.c2
-rw-r--r--fs/jffs2/summary.c1
-rw-r--r--fs/kernfs/file.c22
-rw-r--r--fs/namespace.c20
-rw-r--r--fs/ocfs2/alloc.c28
-rw-r--r--fs/ocfs2/alloc.h2
-rw-r--r--fs/ocfs2/aops.c16
-rw-r--r--fs/ocfs2/dir.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c12
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/pnode.c1
-rw-r--r--fs/proc/base.c53
-rw-r--r--fs/proc/meminfo.c15
-rw-r--r--fs/proc/stat.c2
-rw-r--r--fs/proc_namespace.c16
48 files changed, 1320 insertions, 776 deletions
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index c04ef1d4f18a..97aff2879cda 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -254,6 +254,7 @@ static char *scanarg(char *s, char del)
return NULL;
}
}
+ s[-1] ='\0';
return s;
}
@@ -378,8 +379,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
p = scanarg(p, del);
if (!p)
goto einval;
- p[-1] = '\0';
- if (p == e->magic)
+ if (!e->magic[0])
goto einval;
if (USE_DEBUG)
print_hex_dump_bytes(
@@ -391,8 +391,7 @@ static Node *create_entry(const char __user *buffer, size_t count)
p = scanarg(p, del);
if (!p)
goto einval;
- p[-1] = '\0';
- if (p == e->mask) {
+ if (!e->mask[0]) {
e->mask = NULL;
pr_debug("register: mask[raw]: none\n");
} else if (USE_DEBUG)
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index e6fbbd74b716..7e607416755a 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3481,8 +3481,8 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo);
int btrfs_error_unpin_extent_range(struct btrfs_root *root,
u64 start, u64 end);
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes);
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u64 *actual_bytes);
int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans,
struct btrfs_root *root, u64 type);
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 30965120772b..8c63419a7f70 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4121,12 +4121,6 @@ again:
if (ret)
break;
- /* opt_discard */
- if (btrfs_test_opt(root, DISCARD))
- ret = btrfs_error_discard_extent(root, start,
- end + 1 - start,
- NULL);
-
clear_extent_dirty(unpin, start, end, GFP_NOFS);
btrfs_error_unpin_extent_range(root, start, end);
cond_resched();
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 222d6aea4a8a..a80b97100d90 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1889,8 +1889,8 @@ static int btrfs_issue_discard(struct block_device *bdev,
return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0);
}
-static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes)
+int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
+ u64 num_bytes, u64 *actual_bytes)
{
int ret;
u64 discarded_bytes = 0;
@@ -5727,7 +5727,8 @@ void btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
update_global_block_rsv(fs_info);
}
-static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
+static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end,
+ const bool return_free_space)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_block_group_cache *cache = NULL;
@@ -5751,7 +5752,8 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
if (start < cache->last_byte_to_unpin) {
len = min(len, cache->last_byte_to_unpin - start);
- btrfs_add_free_space(cache, start, len);
+ if (return_free_space)
+ btrfs_add_free_space(cache, start, len);
}
start += len;
@@ -5815,7 +5817,7 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
end + 1 - start, NULL);
clear_extent_dirty(unpin, start, end, GFP_NOFS);
- unpin_extent_range(root, start, end);
+ unpin_extent_range(root, start, end, true);
cond_resched();
}
@@ -8872,6 +8874,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
cache_node);
rb_erase(&block_group->cache_node,
&info->block_group_cache_tree);
+ RB_CLEAR_NODE(&block_group->cache_node);
spin_unlock(&info->block_group_cache_lock);
down_write(&block_group->space_info->groups_sem);
@@ -9130,6 +9133,7 @@ int btrfs_read_block_groups(struct btrfs_root *root)
spin_lock(&info->block_group_cache_lock);
rb_erase(&cache->cache_node,
&info->block_group_cache_tree);
+ RB_CLEAR_NODE(&cache->cache_node);
spin_unlock(&info->block_group_cache_lock);
btrfs_put_block_group(cache);
goto error;
@@ -9271,6 +9275,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
spin_lock(&root->fs_info->block_group_cache_lock);
rb_erase(&cache->cache_node,
&root->fs_info->block_group_cache_tree);
+ RB_CLEAR_NODE(&cache->cache_node);
spin_unlock(&root->fs_info->block_group_cache_lock);
btrfs_put_block_group(cache);
return ret;
@@ -9690,13 +9695,7 @@ out:
int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end)
{
- return unpin_extent_range(root, start, end);
-}
-
-int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr,
- u64 num_bytes, u64 *actual_bytes)
-{
- return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes);
+ return unpin_extent_range(root, start, end, false);
}
int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 030847bf7cec..d6c03f7f136b 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2966,8 +2966,8 @@ static int do_trimming(struct btrfs_block_group_cache *block_group,
spin_unlock(&block_group->lock);
spin_unlock(&space_info->lock);
- ret = btrfs_error_discard_extent(fs_info->extent_root,
- start, bytes, &trimmed);
+ ret = btrfs_discard_extent(fs_info->extent_root,
+ start, bytes, &trimmed);
if (!ret)
*total_trimmed += trimmed;
@@ -3185,16 +3185,18 @@ out:
spin_unlock(&block_group->lock);
+ lock_chunks(block_group->fs_info->chunk_root);
em_tree = &block_group->fs_info->mapping_tree.map_tree;
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, block_group->key.objectid,
1);
BUG_ON(!em); /* logic error, can't happen */
+ /*
+ * remove_extent_mapping() will delete us from the pinned_chunks
+ * list, which is protected by the chunk mutex.
+ */
remove_extent_mapping(em_tree, em);
write_unlock(&em_tree->lock);
-
- lock_chunks(block_group->fs_info->chunk_root);
- list_del_init(&em->list);
unlock_chunks(block_group->fs_info->chunk_root);
/* once for us and once for the tree */
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 0144790e296e..50c5a8762aed 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1485,7 +1485,7 @@ static void update_dev_time(char *path_name)
struct file *filp;
filp = filp_open(path_name, O_RDWR, 0);
- if (!filp)
+ if (IS_ERR(filp))
return;
file_update_time(filp);
filp_close(filp, NULL);
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 18c06bbaf136..f5013d92a7e6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -192,17 +192,30 @@ static int readpage_nounlock(struct file *filp, struct page *page)
struct ceph_osd_client *osdc =
&ceph_inode_to_client(inode)->client->osdc;
int err = 0;
+ u64 off = page_offset(page);
u64 len = PAGE_CACHE_SIZE;
- err = ceph_readpage_from_fscache(inode, page);
+ if (off >= i_size_read(inode)) {
+ zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ SetPageUptodate(page);
+ return 0;
+ }
+ /*
+ * Uptodate inline data should have been added into page cache
+ * while getting Fcr caps.
+ */
+ if (ci->i_inline_version != CEPH_INLINE_NONE)
+ return -EINVAL;
+
+ err = ceph_readpage_from_fscache(inode, page);
if (err == 0)
goto out;
dout("readpage inode %p file %p page %p index %lu\n",
inode, filp, page, page->index);
err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
- (u64) page_offset(page), &len,
+ off, &len,
ci->i_truncate_seq, ci->i_truncate_size,
&page, 1, 0);
if (err == -ENOENT)
@@ -319,7 +332,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
off, len);
vino = ceph_vino(inode);
req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
- 1, CEPH_OSD_OP_READ,
+ 0, 1, CEPH_OSD_OP_READ,
CEPH_OSD_FLAG_READ, NULL,
ci->i_truncate_seq, ci->i_truncate_size,
false);
@@ -384,6 +397,9 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
int rc = 0;
int max = 0;
+ if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
+ return -EINVAL;
+
rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
&nr_pages);
@@ -673,7 +689,7 @@ static int ceph_writepages_start(struct address_space *mapping,
int rc = 0;
unsigned wsize = 1 << inode->i_blkbits;
struct ceph_osd_request *req = NULL;
- int do_sync;
+ int do_sync = 0;
u64 truncate_size, snap_size;
u32 truncate_seq;
@@ -750,7 +766,6 @@ retry:
last_snapc = snapc;
while (!done && index <= end) {
- int num_ops = do_sync ? 2 : 1;
unsigned i;
int first;
pgoff_t next;
@@ -850,7 +865,8 @@ get_more_pages:
len = wsize;
req = ceph_osdc_new_request(&fsc->client->osdc,
&ci->i_layout, vino,
- offset, &len, num_ops,
+ offset, &len, 0,
+ do_sync ? 2 : 1,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +878,9 @@ get_more_pages:
break;
}
+ if (do_sync)
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
req->r_callback = writepages_finish;
req->r_inode = inode;
@@ -1204,6 +1223,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
+ struct page *pinned_page = NULL;
loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
int want, got, ret;
@@ -1215,7 +1235,8 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
want = CEPH_CAP_FILE_CACHE;
while (1) {
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want,
+ -1, &got, &pinned_page);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
@@ -1226,12 +1247,54 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
- ret = filemap_fault(vma, vmf);
+ if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
+ ci->i_inline_version == CEPH_INLINE_NONE)
+ ret = filemap_fault(vma, vmf);
+ else
+ ret = -EAGAIN;
dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+ if (pinned_page)
+ page_cache_release(pinned_page);
ceph_put_cap_refs(ci, got);
+ if (ret != -EAGAIN)
+ return ret;
+
+ /* read inline data */
+ if (off >= PAGE_CACHE_SIZE) {
+ /* does not support inline data > PAGE_SIZE */
+ ret = VM_FAULT_SIGBUS;
+ } else {
+ int ret1;
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page = find_or_create_page(mapping, 0,
+ mapping_gfp_mask(mapping) &
+ ~__GFP_FS);
+ if (!page) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
+ ret1 = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (ret1 < 0 || off >= i_size_read(inode)) {
+ unlock_page(page);
+ page_cache_release(page);
+ ret = VM_FAULT_SIGBUS;
+ goto out;
+ }
+ if (ret1 < PAGE_CACHE_SIZE)
+ zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
+ else
+ flush_dcache_page(page);
+ SetPageUptodate(page);
+ vmf->page = page;
+ ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
+ }
+out:
+ dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
+ inode, off, (size_t)PAGE_CACHE_SIZE, ret);
return ret;
}
@@ -1250,6 +1313,19 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
size_t len;
int want, got, ret;
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ struct page *locked_page = NULL;
+ if (off == 0) {
+ lock_page(page);
+ locked_page = page;
+ }
+ ret = ceph_uninline_data(vma->vm_file, locked_page);
+ if (locked_page)
+ unlock_page(locked_page);
+ if (ret < 0)
+ return VM_FAULT_SIGBUS;
+ }
+
if (off + PAGE_CACHE_SIZE <= size)
len = PAGE_CACHE_SIZE;
else
@@ -1263,7 +1339,8 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
want = CEPH_CAP_FILE_BUFFER;
while (1) {
got = 0;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, off + len);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, off + len,
+ &got, NULL);
if (ret == 0)
break;
if (ret != -ERESTARTSYS) {
@@ -1297,11 +1374,13 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = VM_FAULT_SIGBUS;
}
out:
- if (ret != VM_FAULT_LOCKED) {
+ if (ret != VM_FAULT_LOCKED)
unlock_page(page);
- } else {
+ if (ret == VM_FAULT_LOCKED ||
+ ci->i_inline_version != CEPH_INLINE_NONE) {
int dirty;
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
@@ -1315,6 +1394,178 @@ out:
return ret;
}
+void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+ char *data, size_t len)
+{
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page;
+
+ if (locked_page) {
+ page = locked_page;
+ } else {
+ if (i_size_read(inode) == 0)
+ return;
+ page = find_or_create_page(mapping, 0,
+ mapping_gfp_mask(mapping) & ~__GFP_FS);
+ if (!page)
+ return;
+ if (PageUptodate(page)) {
+ unlock_page(page);
+ page_cache_release(page);
+ return;
+ }
+ }
+
+ dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
+ inode, ceph_vinop(inode), len, locked_page);
+
+ if (len > 0) {
+ void *kaddr = kmap_atomic(page);
+ memcpy(kaddr, data, len);
+ kunmap_atomic(kaddr);
+ }
+
+ if (page != locked_page) {
+ if (len < PAGE_CACHE_SIZE)
+ zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ else
+ flush_dcache_page(page);
+
+ SetPageUptodate(page);
+ unlock_page(page);
+ page_cache_release(page);
+ }
+}
+
+int ceph_uninline_data(struct file *filp, struct page *locked_page)
+{
+ struct inode *inode = file_inode(filp);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_osd_request *req;
+ struct page *page = NULL;
+ u64 len, inline_version;
+ int err = 0;
+ bool from_pagecache = false;
+
+ spin_lock(&ci->i_ceph_lock);
+ inline_version = ci->i_inline_version;
+ spin_unlock(&ci->i_ceph_lock);
+
+ dout("uninline_data %p %llx.%llx inline_version %llu\n",
+ inode, ceph_vinop(inode), inline_version);
+
+ if (inline_version == 1 || /* initial version, no data */
+ inline_version == CEPH_INLINE_NONE)
+ goto out;
+
+ if (locked_page) {
+ page = locked_page;
+ WARN_ON(!PageUptodate(page));
+ } else if (ceph_caps_issued(ci) &
+ (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
+ page = find_get_page(inode->i_mapping, 0);
+ if (page) {
+ if (PageUptodate(page)) {
+ from_pagecache = true;
+ lock_page(page);
+ } else {
+ page_cache_release(page);
+ page = NULL;
+ }
+ }
+ }
+
+ if (page) {
+ len = i_size_read(inode);
+ if (len > PAGE_CACHE_SIZE)
+ len = PAGE_CACHE_SIZE;
+ } else {
+ page = __page_cache_alloc(GFP_NOFS);
+ if (!page) {
+ err = -ENOMEM;
+ goto out;
+ }
+ err = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (err < 0) {
+ /* no inline data */
+ if (err == -ENODATA)
+ err = 0;
+ goto out;
+ }
+ len = err;
+ }
+
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ ceph_vino(inode), 0, &len, 0, 1,
+ CEPH_OSD_OP_CREATE,
+ CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ ci->i_snap_realm->cached_context,
+ 0, 0, false);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+
+ ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+ err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!err)
+ err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+ ceph_osdc_put_request(req);
+ if (err < 0)
+ goto out;
+
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
+ ceph_vino(inode), 0, &len, 1, 3,
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ ci->i_snap_realm->cached_context,
+ ci->i_truncate_seq, ci->i_truncate_size,
+ false);
+ if (IS_ERR(req)) {
+ err = PTR_ERR(req);
+ goto out;
+ }
+
+ osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
+
+ err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
+ "inline_version", &inline_version,
+ sizeof(inline_version),
+ CEPH_OSD_CMPXATTR_OP_GT,
+ CEPH_OSD_CMPXATTR_MODE_U64);
+ if (err)
+ goto out_put;
+
+ err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
+ "inline_version", &inline_version,
+ sizeof(inline_version), 0, 0);
+ if (err)
+ goto out_put;
+
+ ceph_osdc_build_request(req, 0, NULL, CEPH_NOSNAP, &inode->i_mtime);
+ err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
+ if (!err)
+ err = ceph_osdc_wait_request(&fsc->client->osdc, req);
+out_put:
+ ceph_osdc_put_request(req);
+ if (err == -ECANCELED)
+ err = 0;
+out:
+ if (page && page != locked_page) {
+ if (from_pagecache) {
+ unlock_page(page);
+ page_cache_release(page);
+ } else
+ __free_pages(page, 0);
+ }
+
+ dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
+ inode, ceph_vinop(inode), inline_version, err);
+ return err;
+}
+
static struct vm_operations_struct ceph_vmops = {
.fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index cefca661464b..b93c631c6c87 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -975,10 +975,12 @@ static int send_cap_msg(struct ceph_mds_session *session,
kuid_t uid, kgid_t gid, umode_t mode,
u64 xattr_version,
struct ceph_buffer *xattrs_buf,
- u64 follows)
+ u64 follows, bool inline_data)
{
struct ceph_mds_caps *fc;
struct ceph_msg *msg;
+ void *p;
+ size_t extra_len;
dout("send_cap_msg %s %llx %llx caps %s wanted %s dirty %s"
" seq %u/%u mseq %u follows %lld size %llu/%llu"
@@ -988,7 +990,10 @@ static int send_cap_msg(struct ceph_mds_session *session,
seq, issue_seq, mseq, follows, size, max_size,
xattr_version, xattrs_buf ? (int)xattrs_buf->vec.iov_len : 0);
- msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc), GFP_NOFS, false);
+ /* flock buffer size + inline version + inline data size */
+ extra_len = 4 + 8 + 4;
+ msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, sizeof(*fc) + extra_len,
+ GFP_NOFS, false);
if (!msg)
return -ENOMEM;
@@ -1020,6 +1025,14 @@ static int send_cap_msg(struct ceph_mds_session *session,
fc->gid = cpu_to_le32(from_kgid(&init_user_ns, gid));
fc->mode = cpu_to_le32(mode);
+ p = fc + 1;
+ /* flock buffer size */
+ ceph_encode_32(&p, 0);
+ /* inline version */
+ ceph_encode_64(&p, inline_data ? 0 : CEPH_INLINE_NONE);
+ /* inline data size */
+ ceph_encode_32(&p, 0);
+
fc->xattr_version = cpu_to_le64(xattr_version);
if (xattrs_buf) {
msg->middle = ceph_buffer_get(xattrs_buf);
@@ -1126,6 +1139,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
u64 flush_tid = 0;
int i;
int ret;
+ bool inline_data;
held = cap->issued | cap->implemented;
revoking = cap->implemented & ~cap->issued;
@@ -1209,13 +1223,15 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
xattr_version = ci->i_xattrs.version;
}
+ inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
spin_unlock(&ci->i_ceph_lock);
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq, flush_tid, issue_seq, mseq,
size, max_size, &mtime, &atime, time_warp_seq,
uid, gid, mode, xattr_version, xattr_blob,
- follows);
+ follows, inline_data);
if (ret < 0) {
dout("error sending cap msg, must requeue %p\n", inode);
delayed = 1;
@@ -1336,7 +1352,7 @@ retry:
capsnap->time_warp_seq,
capsnap->uid, capsnap->gid, capsnap->mode,
capsnap->xattr_version, capsnap->xattr_blob,
- capsnap->follows);
+ capsnap->follows, capsnap->inline_data);
next_follows = capsnap->follows + 1;
ceph_put_cap_snap(capsnap);
@@ -2057,15 +2073,17 @@ static void __take_cap_refs(struct ceph_inode_info *ci, int got)
* requested from the MDS.
*/
static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
- int *got, loff_t endoff, int *check_max, int *err)
+ loff_t endoff, int *got, struct page **pinned_page,
+ int *check_max, int *err)
{
struct inode *inode = &ci->vfs_inode;
int ret = 0;
- int have, implemented;
+ int have, implemented, _got = 0;
int file_wanted;
dout("get_cap_refs %p need %s want %s\n", inode,
ceph_cap_string(need), ceph_cap_string(want));
+again:
spin_lock(&ci->i_ceph_lock);
/* make sure file is actually open */
@@ -2075,7 +2093,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
ceph_cap_string(need), ceph_cap_string(file_wanted));
*err = -EBADF;
ret = 1;
- goto out;
+ goto out_unlock;
}
/* finish pending truncate */
@@ -2095,7 +2113,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
*check_max = 1;
ret = 1;
}
- goto out;
+ goto out_unlock;
}
/*
* If a sync write is in progress, we must wait, so that we
@@ -2103,7 +2121,7 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
*/
if (__ceph_have_pending_cap_snap(ci)) {
dout("get_cap_refs %p cap_snap_pending\n", inode);
- goto out;
+ goto out_unlock;
}
}
@@ -2120,18 +2138,50 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
inode, ceph_cap_string(have), ceph_cap_string(not),
ceph_cap_string(revoking));
if ((revoking & not) == 0) {
- *got = need | (have & want);
- __take_cap_refs(ci, *got);
+ _got = need | (have & want);
+ __take_cap_refs(ci, _got);
ret = 1;
}
} else {
dout("get_cap_refs %p have %s needed %s\n", inode,
ceph_cap_string(have), ceph_cap_string(need));
}
-out:
+out_unlock:
spin_unlock(&ci->i_ceph_lock);
+
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
+ i_size_read(inode) > 0) {
+ int ret1;
+ struct page *page = find_get_page(inode->i_mapping, 0);
+ if (page) {
+ if (PageUptodate(page)) {
+ *pinned_page = page;
+ goto out;
+ }
+ page_cache_release(page);
+ }
+ /*
+ * drop cap refs first because getattr while holding
+ * caps refs can cause deadlock.
+ */
+ ceph_put_cap_refs(ci, _got);
+ _got = 0;
+
+ /* getattr request will bring inline data into page cache */
+ ret1 = __ceph_do_getattr(inode, NULL,
+ CEPH_STAT_CAP_INLINE_DATA, true);
+ if (ret1 >= 0) {
+ ret = 0;
+ goto again;
+ }
+ *err = ret1;
+ ret = 1;
+ }
+out:
dout("get_cap_refs %p ret %d got %s\n", inode,
- ret, ceph_cap_string(*got));
+ ret, ceph_cap_string(_got));
+ *got = _got;
return ret;
}
@@ -2168,8 +2218,8 @@ static void check_max_size(struct inode *inode, loff_t endoff)
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
-int ceph_get_caps(struct ceph_inode_info *ci, int need, int want, int *got,
- loff_t endoff)
+int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
+ loff_t endoff, int *got, struct page **pinned_page)
{
int check_max, ret, err;
@@ -2179,8 +2229,8 @@ retry:
check_max = 0;
err = 0;
ret = wait_event_interruptible(ci->i_cap_wq,
- try_get_cap_refs(ci, need, want,
- got, endoff,
+ try_get_cap_refs(ci, need, want, endoff,
+ got, pinned_page,
&check_max, &err));
if (err)
ret = err;
@@ -2383,6 +2433,8 @@ static void invalidate_aliases(struct inode *inode)
static void handle_cap_grant(struct ceph_mds_client *mdsc,
struct inode *inode, struct ceph_mds_caps *grant,
void *snaptrace, int snaptrace_len,
+ u64 inline_version,
+ void *inline_data, int inline_len,
struct ceph_buffer *xattr_buf,
struct ceph_mds_session *session,
struct ceph_cap *cap, int issued)
@@ -2403,6 +2455,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
bool queue_invalidate = false;
bool queue_revalidate = false;
bool deleted_inode = false;
+ bool fill_inline = false;
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2576,6 +2629,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
}
BUG_ON(cap->issued & ~cap->implemented);
+ if (inline_version > 0 && inline_version >= ci->i_inline_version) {
+ ci->i_inline_version = inline_version;
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+ fill_inline = true;
+ }
+
spin_unlock(&ci->i_ceph_lock);
if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2589,6 +2649,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
wake = true;
}
+ if (fill_inline)
+ ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
+
if (queue_trunc) {
ceph_queue_vmtruncate(inode);
ceph_queue_revalidate(inode);
@@ -2996,11 +3059,12 @@ void ceph_handle_caps(struct ceph_mds_session *session,
u64 cap_id;
u64 size, max_size;
u64 tid;
+ u64 inline_version = 0;
+ void *inline_data = NULL;
+ u32 inline_len = 0;
void *snaptrace;
size_t snaptrace_len;
- void *flock;
- void *end;
- u32 flock_len;
+ void *p, *end;
dout("handle_caps from mds%d\n", mds);
@@ -3021,30 +3085,37 @@ void ceph_handle_caps(struct ceph_mds_session *session,
snaptrace = h + 1;
snaptrace_len = le32_to_cpu(h->snap_trace_len);
+ p = snaptrace + snaptrace_len;
if (le16_to_cpu(msg->hdr.version) >= 2) {
- void *p = snaptrace + snaptrace_len;
+ u32 flock_len;
ceph_decode_32_safe(&p, end, flock_len, bad);
if (p + flock_len > end)
goto bad;
- flock = p;
- } else {
- flock = NULL;
- flock_len = 0;
+ p += flock_len;
}
if (le16_to_cpu(msg->hdr.version) >= 3) {
if (op == CEPH_CAP_OP_IMPORT) {
- void *p = flock + flock_len;
if (p + sizeof(*peer) > end)
goto bad;
peer = p;
+ p += sizeof(*peer);
} else if (op == CEPH_CAP_OP_EXPORT) {
/* recorded in unused fields */
peer = (void *)&h->size;
}
}
+ if (le16_to_cpu(msg->hdr.version) >= 4) {
+ ceph_decode_64_safe(&p, end, inline_version, bad);
+ ceph_decode_32_safe(&p, end, inline_len, bad);
+ if (p + inline_len > end)
+ goto bad;
+ inline_data = p;
+ p += inline_len;
+ }
+
/* lookup ino */
inode = ceph_find_inode(sb, vino);
ci = ceph_inode(inode);
@@ -3085,6 +3156,7 @@ void ceph_handle_caps(struct ceph_mds_session *session,
handle_cap_import(mdsc, inode, h, peer, session,
&cap, &issued);
handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len,
+ inline_version, inline_data, inline_len,
msg->middle, session, cap, issued);
goto done_unlocked;
}
@@ -3105,8 +3177,9 @@ void ceph_handle_caps(struct ceph_mds_session *session,
case CEPH_CAP_OP_GRANT:
__ceph_caps_issued(ci, &issued);
issued |= __ceph_caps_dirty(ci);
- handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle,
- session, cap, issued);
+ handle_cap_grant(mdsc, inode, h, NULL, 0,
+ inline_version, inline_data, inline_len,
+ msg->middle, session, cap, issued);
goto done_unlocked;
case CEPH_CAP_OP_FLUSH_ACK:
@@ -3137,8 +3210,7 @@ flush_cap_releases:
done:
mutex_unlock(&session->s_mutex);
done_unlocked:
- if (inode)
- iput(inode);
+ iput(inode);
return;
bad:
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 681a8537b64f..c241603764fd 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -183,7 +183,7 @@ more:
spin_unlock(&parent->d_lock);
/* make sure a dentry wasn't dropped while we didn't have parent lock */
- if (!ceph_dir_is_complete(dir)) {
+ if (!ceph_dir_is_complete_ordered(dir)) {
dout(" lost dir complete on %p; falling back to mds\n", dir);
dput(dentry);
err = -EAGAIN;
@@ -261,10 +261,6 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* always start with . and .. */
if (ctx->pos == 0) {
- /* note dir version at start of readdir so we can tell
- * if any dentries get dropped */
- fi->dir_release_count = atomic_read(&ci->i_release_count);
-
dout("readdir off 0 -> '.'\n");
if (!dir_emit(ctx, ".", 1,
ceph_translate_ino(inode->i_sb, inode->i_ino),
@@ -289,7 +285,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
if ((ctx->pos == 2 || fi->dentry) &&
!ceph_test_mount_opt(fsc, NOASYNCREADDIR) &&
ceph_snap(inode) != CEPH_SNAPDIR &&
- __ceph_dir_is_complete(ci) &&
+ __ceph_dir_is_complete_ordered(ci) &&
__ceph_caps_issued_mask(ci, CEPH_CAP_FILE_SHARED, 1)) {
u32 shared_gen = ci->i_shared_gen;
spin_unlock(&ci->i_ceph_lock);
@@ -312,6 +308,13 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* proceed with a normal readdir */
+ if (ctx->pos == 2) {
+ /* note dir version at start of readdir so we can tell
+ * if any dentries get dropped */
+ fi->dir_release_count = atomic_read(&ci->i_release_count);
+ fi->dir_ordered_count = ci->i_ordered_count;
+ }
+
more:
/* do we have the correct frag content buffered? */
if (fi->frag != frag || fi->last_readdir == NULL) {
@@ -446,8 +449,12 @@ more:
*/
spin_lock(&ci->i_ceph_lock);
if (atomic_read(&ci->i_release_count) == fi->dir_release_count) {
- dout(" marking %p complete\n", inode);
- __ceph_dir_set_complete(ci, fi->dir_release_count);
+ if (ci->i_ordered_count == fi->dir_ordered_count)
+ dout(" marking %p complete and ordered\n", inode);
+ else
+ dout(" marking %p complete\n", inode);
+ __ceph_dir_set_complete(ci, fi->dir_release_count,
+ fi->dir_ordered_count);
}
spin_unlock(&ci->i_ceph_lock);
@@ -805,7 +812,9 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
acls.pagelist = NULL;
}
err = ceph_mdsc_do_request(mdsc, dir, req);
- if (!err && !req->r_reply_info.head->is_dentry)
+ if (!err &&
+ !req->r_reply_info.head->is_target &&
+ !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
ceph_mdsc_put_request(req);
out:
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 9f8e3572040e..ce74b394b49d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -333,6 +333,11 @@ int ceph_release(struct inode *inode, struct file *file)
return 0;
}
+enum {
+ CHECK_EOF = 1,
+ READ_INLINE = 2,
+};
+
/*
* Read a range of bytes striped over one or more objects. Iterate over
* objects we stripe over. (That's not atomic, but good enough for now.)
@@ -412,7 +417,7 @@ more:
ret = read;
/* did we bounce off eof? */
if (pos + left > inode->i_size)
- *checkeof = 1;
+ *checkeof = CHECK_EOF;
}
dout("striped_read returns %d\n", ret);
@@ -598,7 +603,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
snapc = ci->i_snap_realm->cached_context;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len,
+ vino, pos, &len, 0,
2,/*include a 'startsync' command*/
CEPH_OSD_OP_WRITE, flags, snapc,
ci->i_truncate_seq,
@@ -609,6 +614,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
break;
}
+ osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
n = iov_iter_get_pages_alloc(from, &pages, len, &start);
if (unlikely(n < 0)) {
ret = n;
@@ -713,7 +720,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
snapc = ci->i_snap_realm->cached_context;
vino = ceph_vino(inode);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
- vino, pos, &len, 1,
+ vino, pos, &len, 0, 1,
CEPH_OSD_OP_WRITE, flags, snapc,
ci->i_truncate_seq,
ci->i_truncate_size,
@@ -803,9 +810,10 @@ static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
size_t len = iocb->ki_nbytes;
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
+ struct page *pinned_page = NULL;
ssize_t ret;
int want, got = 0;
- int checkeof = 0, read = 0;
+ int retry_op = 0, read = 0;
again:
dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
@@ -815,7 +823,7 @@ again:
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, &got, -1);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_RD, want, -1, &got, &pinned_page);
if (ret < 0)
return ret;
@@ -827,8 +835,12 @@ again:
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
ceph_cap_string(got));
- /* hmm, this isn't really async... */
- ret = ceph_sync_read(iocb, to, &checkeof);
+ if (ci->i_inline_version == CEPH_INLINE_NONE) {
+ /* hmm, this isn't really async... */
+ ret = ceph_sync_read(iocb, to, &retry_op);
+ } else {
+ retry_op = READ_INLINE;
+ }
} else {
dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
@@ -838,13 +850,55 @@ again:
}
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
+ if (pinned_page) {
+ page_cache_release(pinned_page);
+ pinned_page = NULL;
+ }
ceph_put_cap_refs(ci, got);
+ if (retry_op && ret >= 0) {
+ int statret;
+ struct page *page = NULL;
+ loff_t i_size;
+ if (retry_op == READ_INLINE) {
+ page = __page_cache_alloc(GFP_NOFS);
+ if (!page)
+ return -ENOMEM;
+ }
- if (checkeof && ret >= 0) {
- int statret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
+ statret = __ceph_do_getattr(inode, page,
+ CEPH_STAT_CAP_INLINE_DATA, !!page);
+ if (statret < 0) {
+ __free_page(page);
+ if (statret == -ENODATA) {
+ BUG_ON(retry_op != READ_INLINE);
+ goto again;
+ }
+ return statret;
+ }
+
+ i_size = i_size_read(inode);
+ if (retry_op == READ_INLINE) {
+ /* does not support inline data > PAGE_SIZE */
+ if (i_size > PAGE_CACHE_SIZE) {
+ ret = -EIO;
+ } else if (iocb->ki_pos < i_size) {
+ loff_t end = min_t(loff_t, i_size,
+ iocb->ki_pos + len);
+ if (statret < end)
+ zero_user_segment(page, statret, end);
+ ret = copy_page_to_iter(page,
+ iocb->ki_pos & ~PAGE_MASK,
+ end - iocb->ki_pos, to);
+ iocb->ki_pos += ret;
+ } else {
+ ret = 0;
+ }
+ __free_pages(page, 0);
+ return ret;
+ }
/* hit EOF or hole? */
- if (statret == 0 && iocb->ki_pos < inode->i_size &&
+ if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
ret < len) {
dout("sync_read hit hole, ppos %lld < size %lld"
", reading more\n", iocb->ki_pos,
@@ -852,7 +906,7 @@ again:
read += ret;
len -= ret;
- checkeof = 0;
+ retry_op = 0;
goto again;
}
}
@@ -909,6 +963,12 @@ static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (err)
goto out;
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ err = ceph_uninline_data(file, NULL);
+ if (err < 0)
+ goto out;
+ }
+
retry_snap:
if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL)) {
err = -ENOSPC;
@@ -922,7 +982,8 @@ retry_snap:
else
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, pos + count);
+ err = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, pos + count,
+ &got, NULL);
if (err < 0)
goto out;
@@ -969,6 +1030,7 @@ retry_snap:
if (written >= 0) {
int dirty;
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
@@ -1111,7 +1173,7 @@ static int ceph_zero_partial_object(struct inode *inode,
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode),
offset, length,
- 1, op,
+ 0, 1, op,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
NULL, 0, 0, false);
@@ -1214,6 +1276,12 @@ static long ceph_fallocate(struct file *file, int mode,
goto unlock;
}
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ ret = ceph_uninline_data(file, NULL);
+ if (ret < 0)
+ goto unlock;
+ }
+
size = i_size_read(inode);
if (!(mode & FALLOC_FL_KEEP_SIZE))
endoff = offset + length;
@@ -1223,7 +1291,7 @@ static long ceph_fallocate(struct file *file, int mode,
else
want = CEPH_CAP_FILE_BUFFER;
- ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff);
+ ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
if (ret < 0)
goto unlock;
@@ -1240,6 +1308,7 @@ static long ceph_fallocate(struct file *file, int mode,
if (!ret) {
spin_lock(&ci->i_ceph_lock);
+ ci->i_inline_version = CEPH_INLINE_NONE;
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR);
spin_unlock(&ci->i_ceph_lock);
if (dirty)
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index a5593d51d035..f61a74115beb 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -387,8 +387,10 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
spin_lock_init(&ci->i_ceph_lock);
ci->i_version = 0;
+ ci->i_inline_version = 0;
ci->i_time_warp_seq = 0;
ci->i_ceph_flags = 0;
+ ci->i_ordered_count = 0;
atomic_set(&ci->i_release_count, 1);
atomic_set(&ci->i_complete_count, 0);
ci->i_symlink = NULL;
@@ -657,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
* Populate an inode based on info from mds. May be called on new or
* existing inodes.
*/
-static int fill_inode(struct inode *inode,
+static int fill_inode(struct inode *inode, struct page *locked_page,
struct ceph_mds_reply_info_in *iinfo,
struct ceph_mds_reply_dirfrag *dirinfo,
struct ceph_mds_session *session,
@@ -675,6 +677,7 @@ static int fill_inode(struct inode *inode,
bool wake = false;
bool queue_trunc = false;
bool new_version = false;
+ bool fill_inline = false;
dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -845,7 +848,8 @@ static int fill_inode(struct inode *inode,
(issued & CEPH_CAP_FILE_EXCL) == 0 &&
!__ceph_dir_is_complete(ci)) {
dout(" marking %p complete (empty)\n", inode);
- __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count));
+ __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count),
+ ci->i_ordered_count);
}
/* were we issued a capability? */
@@ -873,8 +877,23 @@ static int fill_inode(struct inode *inode,
ceph_vinop(inode));
__ceph_get_fmode(ci, cap_fmode);
}
+
+ if (iinfo->inline_version > 0 &&
+ iinfo->inline_version >= ci->i_inline_version) {
+ int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+ ci->i_inline_version = iinfo->inline_version;
+ if (ci->i_inline_version != CEPH_INLINE_NONE &&
+ (locked_page ||
+ (le32_to_cpu(info->cap.caps) & cache_caps)))
+ fill_inline = true;
+ }
+
spin_unlock(&ci->i_ceph_lock);
+ if (fill_inline)
+ ceph_fill_inline_data(inode, locked_page,
+ iinfo->inline_data, iinfo->inline_len);
+
if (wake)
wake_up_all(&ci->i_cap_wq);
@@ -1062,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
struct inode *dir = req->r_locked_dir;
if (dir) {
- err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+ err = fill_inode(dir, NULL,
+ &rinfo->diri, rinfo->dirfrag,
session, req->r_request_started, -1,
&req->r_caps_reservation);
if (err < 0)
@@ -1132,7 +1152,7 @@ retry_lookup:
}
req->r_target_inode = in;
- err = fill_inode(in, &rinfo->targeti, NULL,
+ err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
session, req->r_request_started,
(!req->r_aborted && rinfo->head->result == 0) ?
req->r_fmode : -1,
@@ -1204,8 +1224,8 @@ retry_lookup:
ceph_invalidate_dentry_lease(dn);
/* d_move screws up sibling dentries' offsets */
- ceph_dir_clear_complete(dir);
- ceph_dir_clear_complete(olddir);
+ ceph_dir_clear_ordered(dir);
+ ceph_dir_clear_ordered(olddir);
dout("dn %p gets new offset %lld\n", req->r_old_dentry,
ceph_dentry(req->r_old_dentry)->offset);
@@ -1217,6 +1237,7 @@ retry_lookup:
if (!rinfo->head->is_target) {
dout("fill_trace null dentry\n");
if (dn->d_inode) {
+ ceph_dir_clear_ordered(dir);
dout("d_delete %p\n", dn);
d_delete(dn);
} else {
@@ -1233,7 +1254,7 @@ retry_lookup:
/* attach proper inode */
if (!dn->d_inode) {
- ceph_dir_clear_complete(dir);
+ ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, &have_lease);
if (IS_ERR(dn)) {
@@ -1263,7 +1284,7 @@ retry_lookup:
BUG_ON(!dir);
BUG_ON(ceph_snap(dir) != CEPH_SNAPDIR);
dout(" linking snapped dir %p to dn %p\n", in, dn);
- ceph_dir_clear_complete(dir);
+ ceph_dir_clear_ordered(dir);
ihold(in);
dn = splice_dentry(dn, in, NULL);
if (IS_ERR(dn)) {
@@ -1300,7 +1321,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
dout("new_inode badness got %d\n", err);
continue;
}
- rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation);
if (rc < 0) {
@@ -1416,7 +1437,7 @@ retry_lookup:
}
}
- if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
+ if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
req->r_request_started, -1,
&req->r_caps_reservation) < 0) {
pr_err("fill_inode badness on %p\n", in);
@@ -1899,7 +1920,8 @@ out_put:
* Verify that we have a lease on the given mask. If not,
* do a getattr against an mds.
*/
-int ceph_do_getattr(struct inode *inode, int mask, bool force)
+int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+ int mask, bool force)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1911,7 +1933,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
return 0;
}
- dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
+ dout("do_getattr inode %p mask %s mode 0%o\n",
+ inode, ceph_cap_string(mask), inode->i_mode);
if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
return 0;
@@ -1922,7 +1945,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
ihold(inode);
req->r_num_caps = 1;
req->r_args.getattr.mask = cpu_to_le32(mask);
+ req->r_locked_page = locked_page;
err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (locked_page && err == 0) {
+ u64 inline_version = req->r_reply_info.targeti.inline_version;
+ if (inline_version == 0) {
+ /* the reply is supposed to contain inline data */
+ err = -EINVAL;
+ } else if (inline_version == CEPH_INLINE_NONE) {
+ err = -ENODATA;
+ } else {
+ err = req->r_reply_info.targeti.inline_len;
+ }
+ }
ceph_mdsc_put_request(req);
dout("do_getattr result=%d\n", err);
return err;
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fbc39c47bacd..c35c5c614e38 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -9,6 +9,8 @@
#include <linux/ceph/pagelist.h>
static u64 lock_secret;
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req);
static inline u64 secure_addr(void *addr)
{
@@ -40,6 +42,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
u64 length = 0;
u64 owner;
+ if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
+ wait = 0;
+
req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -68,6 +73,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
req->r_args.filelock_change.length = cpu_to_le64(length);
req->r_args.filelock_change.wait = wait;
+ if (wait)
+ req->r_wait_for_completion = ceph_lock_wait_for_completion;
+
err = ceph_mdsc_do_request(mdsc, inode, req);
if (operation == CEPH_MDS_OP_GETFILELOCK) {
@@ -96,6 +104,52 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
return err;
}
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req)
+{
+ struct ceph_mds_request *intr_req;
+ struct inode *inode = req->r_inode;
+ int err, lock_type;
+
+ BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
+ if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
+ lock_type = CEPH_LOCK_FCNTL_INTR;
+ else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
+ lock_type = CEPH_LOCK_FLOCK_INTR;
+ else
+ BUG_ON(1);
+ BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
+
+ err = wait_for_completion_interruptible(&req->r_completion);
+ if (!err)
+ return 0;
+
+ dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
+ req->r_tid);
+
+ intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
+ USE_AUTH_MDS);
+ if (IS_ERR(intr_req))
+ return PTR_ERR(intr_req);
+
+ intr_req->r_inode = inode;
+ ihold(inode);
+ intr_req->r_num_caps = 1;
+
+ intr_req->r_args.filelock_change = req->r_args.filelock_change;
+ intr_req->r_args.filelock_change.rule = lock_type;
+ intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
+
+ err = ceph_mdsc_do_request(mdsc, inode, intr_req);
+ ceph_mdsc_put_request(intr_req);
+
+ if (err && err != -ERESTARTSYS)
+ return err;
+
+ wait_for_completion(&req->r_completion);
+ return 0;
+}
+
/**
* Attempt to set an fcntl lock.
* For now, this just goes away to the server. Later it may be more awesome.
@@ -143,11 +197,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
err);
}
}
-
- } else if (err == -ERESTARTSYS) {
- dout("undoing lock\n");
- ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
- CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
@@ -186,11 +235,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
file, CEPH_LOCK_UNLOCK, 0, fl);
dout("got %d on flock_lock_file_wait, undid lock", err);
}
- } else if (err == -ERESTARTSYS) {
- dout("undoing lock\n");
- ceph_lock_message(CEPH_LOCK_FLOCK,
- CEPH_MDS_OP_SETFILELOCK,
- file, CEPH_LOCK_UNLOCK, 0, fl);
}
return err;
}
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a92d3f5c6c12..d2171f4a6980 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -89,6 +89,16 @@ static int parse_reply_info_in(void **p, void *end,
ceph_decode_need(p, end, info->xattr_len, bad);
info->xattr_data = *p;
*p += info->xattr_len;
+
+ if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
+ ceph_decode_64_safe(p, end, info->inline_version, bad);
+ ceph_decode_32_safe(p, end, info->inline_len, bad);
+ ceph_decode_need(p, end, info->inline_len, bad);
+ info->inline_data = *p;
+ *p += info->inline_len;
+ } else
+ info->inline_version = CEPH_INLINE_NONE;
+
return 0;
bad:
return err;
@@ -524,8 +534,7 @@ void ceph_mdsc_release_request(struct kref *kref)
}
if (req->r_locked_dir)
ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN);
- if (req->r_target_inode)
- iput(req->r_target_inode);
+ iput(req->r_target_inode);
if (req->r_dentry)
dput(req->r_dentry);
if (req->r_old_dentry)
@@ -861,8 +870,11 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
/*
* Serialize client metadata into waiting buffer space, using
* the format that userspace expects for map<string, string>
+ *
+ * ClientSession messages with metadata are v2
*/
- msg->hdr.version = 2; /* ClientSession messages with metadata are v2 */
+ msg->hdr.version = cpu_to_le16(2);
+ msg->hdr.compat_version = cpu_to_le16(1);
/* The write pointer, following the session_head structure */
p = msg->front.iov_base + sizeof(*h);
@@ -1066,8 +1078,7 @@ out:
session->s_cap_iterator = NULL;
spin_unlock(&session->s_cap_lock);
- if (last_inode)
- iput(last_inode);
+ iput(last_inode);
if (old_cap)
ceph_put_cap(session->s_mdsc, old_cap);
@@ -1874,7 +1885,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
goto out_free2;
}
- msg->hdr.version = 2;
+ msg->hdr.version = cpu_to_le16(2);
msg->hdr.tid = cpu_to_le64(req->r_tid);
head = msg->front.iov_base;
@@ -2208,6 +2219,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
&req->r_completion, req->r_timeout);
if (err == 0)
err = -EIO;
+ } else if (req->r_wait_for_completion) {
+ err = req->r_wait_for_completion(mdsc, req);
} else {
err = wait_for_completion_killable(&req->r_completion);
}
@@ -3744,6 +3757,20 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
return msg;
}
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+ struct ceph_mds_session *s = con->private;
+ struct ceph_auth_handshake *auth = &s->s_auth;
+ return ceph_auth_check_message_signature(auth, msg);
+}
+
static const struct ceph_connection_operations mds_con_ops = {
.get = con_get,
.put = con_put,
@@ -3753,6 +3780,8 @@ static const struct ceph_connection_operations mds_con_ops = {
.invalidate_authorizer = invalidate_authorizer,
.peer_reset = peer_reset,
.alloc_msg = mds_alloc_msg,
+ .sign_message = sign_message,
+ .check_message_signature = check_message_signature,
};
/* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3288359353e9..e2817d00f7d9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -41,6 +41,9 @@ struct ceph_mds_reply_info_in {
char *symlink;
u32 xattr_len;
char *xattr_data;
+ u64 inline_version;
+ u32 inline_len;
+ char *inline_data;
};
/*
@@ -166,6 +169,11 @@ struct ceph_mds_client;
*/
typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
struct ceph_mds_request *req);
+/*
+ * wait for request completion callback
+ */
+typedef int (*ceph_mds_request_wait_callback_t) (struct ceph_mds_client *mdsc,
+ struct ceph_mds_request *req);
/*
* an in-flight mds request
@@ -215,6 +223,7 @@ struct ceph_mds_request {
int r_request_release_offset;
struct ceph_msg *r_reply;
struct ceph_mds_reply_info_parsed r_reply_info;
+ struct page *r_locked_page;
int r_err;
bool r_aborted;
@@ -239,6 +248,7 @@ struct ceph_mds_request {
struct completion r_completion;
struct completion r_safe_completion;
ceph_mds_request_callback_t r_callback;
+ ceph_mds_request_wait_callback_t r_wait_for_completion;
struct list_head r_unsafe_item; /* per-session unsafe list item */
bool r_got_unsafe, r_got_safe, r_got_result;
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f01645a27752..ce35fbd4ba5d 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -288,6 +288,9 @@ static int cmpu64_rev(const void *a, const void *b)
return 0;
}
+
+static struct ceph_snap_context *empty_snapc;
+
/*
* build the snap context for a given realm.
*/
@@ -328,6 +331,12 @@ static int build_snap_context(struct ceph_snap_realm *realm)
return 0;
}
+ if (num == 0 && realm->seq == empty_snapc->seq) {
+ ceph_get_snap_context(empty_snapc);
+ snapc = empty_snapc;
+ goto done;
+ }
+
/* alloc new snap context */
err = -ENOMEM;
if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
@@ -365,8 +374,8 @@ static int build_snap_context(struct ceph_snap_realm *realm)
realm->ino, realm, snapc, snapc->seq,
(unsigned int) snapc->num_snaps);
- if (realm->cached_context)
- ceph_put_snap_context(realm->cached_context);
+done:
+ ceph_put_snap_context(realm->cached_context);
realm->cached_context = snapc;
return 0;
@@ -466,6 +475,9 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
cap_snap. lucky us. */
dout("queue_cap_snap %p already pending\n", inode);
kfree(capsnap);
+ } else if (ci->i_snap_realm->cached_context == empty_snapc) {
+ dout("queue_cap_snap %p empty snapc\n", inode);
+ kfree(capsnap);
} else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL|
CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) {
struct ceph_snap_context *snapc = ci->i_head_snapc;
@@ -504,6 +516,8 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci)
capsnap->xattr_version = 0;
}
+ capsnap->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
+
/* dirty page count moved from _head to this cap_snap;
all subsequent writes page dirties occur _after_ this
snapshot. */
@@ -590,15 +604,13 @@ static void queue_realm_cap_snaps(struct ceph_snap_realm *realm)
if (!inode)
continue;
spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
+ iput(lastinode);
lastinode = inode;
ceph_queue_cap_snap(ci);
spin_lock(&realm->inodes_with_caps_lock);
}
spin_unlock(&realm->inodes_with_caps_lock);
- if (lastinode)
- iput(lastinode);
+ iput(lastinode);
list_for_each_entry(child, &realm->children, child_item) {
dout("queue_realm_cap_snaps %p %llx queue child %p %llx\n",
@@ -928,5 +940,16 @@ out:
return;
}
+int __init ceph_snap_init(void)
+{
+ empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
+ if (!empty_snapc)
+ return -ENOMEM;
+ empty_snapc->seq = 1;
+ return 0;
+}
-
+void ceph_snap_exit(void)
+{
+ ceph_put_snap_context(empty_snapc);
+}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index f6e12377335c..50f06cddc94b 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -515,7 +515,8 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
struct ceph_fs_client *fsc;
const u64 supported_features =
CEPH_FEATURE_FLOCK |
- CEPH_FEATURE_DIRLAYOUTHASH;
+ CEPH_FEATURE_DIRLAYOUTHASH |
+ CEPH_FEATURE_MDS_INLINE_DATA;
const u64 required_features = 0;
int page_count;
size_t size;
@@ -1017,9 +1018,6 @@ static struct file_system_type ceph_fs_type = {
};
MODULE_ALIAS_FS("ceph");
-#define _STRINGIFY(x) #x
-#define STRINGIFY(x) _STRINGIFY(x)
-
static int __init init_ceph(void)
{
int ret = init_caches();
@@ -1028,15 +1026,20 @@ static int __init init_ceph(void)
ceph_flock_init();
ceph_xattr_init();
+ ret = ceph_snap_init();
+ if (ret)
+ goto out_xattr;
ret = register_filesystem(&ceph_fs_type);
if (ret)
- goto out_icache;
+ goto out_snap;
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
return 0;
-out_icache:
+out_snap:
+ ceph_snap_exit();
+out_xattr:
ceph_xattr_exit();
destroy_caches();
out:
@@ -1047,6 +1050,7 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
+ ceph_snap_exit();
ceph_xattr_exit();
destroy_caches();
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b82f507979b8..e1aa32d0759d 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -161,6 +161,7 @@ struct ceph_cap_snap {
u64 time_warp_seq;
int writing; /* a sync write is still in progress */
int dirty_pages; /* dirty pages awaiting writeback */
+ bool inline_data;
};
static inline void ceph_put_cap_snap(struct ceph_cap_snap *capsnap)
@@ -253,9 +254,11 @@ struct ceph_inode_info {
spinlock_t i_ceph_lock;
u64 i_version;
+ u64 i_inline_version;
u32 i_time_warp_seq;
unsigned i_ceph_flags;
+ int i_ordered_count;
atomic_t i_release_count;
atomic_t i_complete_count;
@@ -434,14 +437,19 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
/*
* Ceph inode.
*/
-#define CEPH_I_NODELAY 4 /* do not delay cap release */
-#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
-#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
+#define CEPH_I_DIR_ORDERED 1 /* dentries in dir are ordered */
+#define CEPH_I_NODELAY 4 /* do not delay cap release */
+#define CEPH_I_FLUSH 8 /* do not delay flush of dirty metadata */
+#define CEPH_I_NOFLUSH 16 /* do not flush dirty caps */
static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
- int release_count)
+ int release_count, int ordered_count)
{
atomic_set(&ci->i_complete_count, release_count);
+ if (ci->i_ordered_count == ordered_count)
+ ci->i_ceph_flags |= CEPH_I_DIR_ORDERED;
+ else
+ ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
}
static inline void __ceph_dir_clear_complete(struct ceph_inode_info *ci)
@@ -455,16 +463,35 @@ static inline bool __ceph_dir_is_complete(struct ceph_inode_info *ci)
atomic_read(&ci->i_release_count);
}
+static inline bool __ceph_dir_is_complete_ordered(struct ceph_inode_info *ci)
+{
+ return __ceph_dir_is_complete(ci) &&
+ (ci->i_ceph_flags & CEPH_I_DIR_ORDERED);
+}
+
static inline void ceph_dir_clear_complete(struct inode *inode)
{
__ceph_dir_clear_complete(ceph_inode(inode));
}
-static inline bool ceph_dir_is_complete(struct inode *inode)
+static inline void ceph_dir_clear_ordered(struct inode *inode)
{
- return __ceph_dir_is_complete(ceph_inode(inode));
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ spin_lock(&ci->i_ceph_lock);
+ ci->i_ordered_count++;
+ ci->i_ceph_flags &= ~CEPH_I_DIR_ORDERED;
+ spin_unlock(&ci->i_ceph_lock);
}
+static inline bool ceph_dir_is_complete_ordered(struct inode *inode)
+{
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ bool ret;
+ spin_lock(&ci->i_ceph_lock);
+ ret = __ceph_dir_is_complete_ordered(ci);
+ spin_unlock(&ci->i_ceph_lock);
+ return ret;
+}
/* find a specific frag @f */
extern struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci,
@@ -580,6 +607,7 @@ struct ceph_file_info {
char *last_name; /* last entry in previous chunk */
struct dentry *dentry; /* next dentry (for dcache readdir) */
int dir_release_count;
+ int dir_ordered_count;
/* used for -o dirstat read() on directory thing */
char *dir_info;
@@ -673,6 +701,8 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap);
extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
+extern int ceph_snap_init(void);
+extern void ceph_snap_exit(void);
/*
* a cap_snap is "pending" if it is still awaiting an in-progress
@@ -715,7 +745,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
extern void ceph_queue_invalidate(struct inode *inode);
extern void ceph_queue_writeback(struct inode *inode);
-extern int ceph_do_getattr(struct inode *inode, int mask, bool force);
+extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+ int mask, bool force);
+static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
+{
+ return __ceph_do_getattr(inode, NULL, mask, force);
+}
extern int ceph_permission(struct inode *inode, int mask);
extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -830,7 +865,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
int mds, int drop, int unless);
extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
- int *got, loff_t endoff);
+ loff_t endoff, int *got, struct page **pinned_page);
/* for counting open files by mode */
static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
@@ -852,7 +887,9 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct file *file, unsigned flags, umode_t mode,
int *opened);
extern int ceph_release(struct inode *inode, struct file *filp);
-
+extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+ char *data, size_t len);
+int ceph_uninline_data(struct file *filp, struct page *locked_page);
/* dir.c */
extern const struct file_operations ceph_dir_fops;
extern const struct inode_operations ceph_dir_iops;
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 678b0d2bbbc4..5a492caf34cb 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -854,7 +854,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
struct ceph_pagelist *pagelist = NULL;
int err;
- if (value) {
+ if (size > 0) {
/* copy value into pagelist */
pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
if (!pagelist)
@@ -864,7 +864,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
err = ceph_pagelist_append(pagelist, value, size);
if (err)
goto out;
- } else {
+ } else if (!value) {
flags |= CEPH_XATTR_REMOVE;
}
@@ -1001,6 +1001,9 @@ int ceph_setxattr(struct dentry *dentry, const char *name,
if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
return generic_setxattr(dentry, name, value, size, flags);
+ if (size == 0)
+ value = ""; /* empty EA, do not remove */
+
return __ceph_setxattr(dentry, name, value, size, flags);
}
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index c2d6604667b0..719e1ce1c609 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1917,7 +1917,6 @@ ecryptfs_decode_from_filename(unsigned char *dst, size_t *dst_size,
break;
case 2:
dst[dst_byte_offset++] |= (src_byte);
- dst[dst_byte_offset] = 0;
current_bit_offset = 0;
break;
}
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 80154ec4f8c2..6f4e659f508f 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -190,23 +190,11 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
{
int rc = 0;
struct ecryptfs_crypt_stat *crypt_stat = NULL;
- struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct dentry *ecryptfs_dentry = file->f_path.dentry;
/* Private value of ecryptfs_dentry allocated in
* ecryptfs_lookup() */
struct ecryptfs_file_info *file_info;
- mount_crypt_stat = &ecryptfs_superblock_to_private(
- ecryptfs_dentry->d_sb)->mount_crypt_stat;
- if ((mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
- && ((file->f_flags & O_WRONLY) || (file->f_flags & O_RDWR)
- || (file->f_flags & O_CREAT) || (file->f_flags & O_TRUNC)
- || (file->f_flags & O_APPEND))) {
- printk(KERN_WARNING "Mount has encrypted view enabled; "
- "files may only be read\n");
- rc = -EPERM;
- goto out;
- }
/* Released in ecryptfs_release or end of function if failure */
file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
ecryptfs_set_file_private(file, file_info);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 635e8e16a5b7..917bd5c9776a 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -100,12 +100,12 @@ int ecryptfs_parse_packet_length(unsigned char *data, size_t *size,
(*size) = 0;
if (data[0] < 192) {
/* One-byte length */
- (*size) = (unsigned char)data[0];
+ (*size) = data[0];
(*length_size) = 1;
} else if (data[0] < 224) {
/* Two-byte length */
- (*size) = (((unsigned char)(data[0]) - 192) * 256);
- (*size) += ((unsigned char)(data[1]) + 192);
+ (*size) = (data[0] - 192) * 256;
+ (*size) += data[1] + 192;
(*length_size) = 2;
} else if (data[0] == 255) {
/* If support is added, adjust ECRYPTFS_MAX_PKT_LEN_SIZE */
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c4cd1fd86cc2..d9eb84bda559 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -493,6 +493,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
{
struct super_block *s;
struct ecryptfs_sb_info *sbi;
+ struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
struct ecryptfs_dentry_info *root_info;
const char *err = "Getting sb failed";
struct inode *inode;
@@ -511,6 +512,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
err = "Error parsing options";
goto out;
}
+ mount_crypt_stat = &sbi->mount_crypt_stat;
s = sget(fs_type, NULL, set_anon_super, flags, NULL);
if (IS_ERR(s)) {
@@ -557,11 +559,19 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
/**
* Set the POSIX ACL flag based on whether they're enabled in the lower
- * mount. Force a read-only eCryptfs mount if the lower mount is ro.
- * Allow a ro eCryptfs mount even when the lower mount is rw.
+ * mount.
*/
s->s_flags = flags & ~MS_POSIXACL;
- s->s_flags |= path.dentry->d_sb->s_flags & (MS_RDONLY | MS_POSIXACL);
+ s->s_flags |= path.dentry->d_sb->s_flags & MS_POSIXACL;
+
+ /**
+ * Force a read-only eCryptfs mount when:
+ * 1) The lower mount is ro
+ * 2) The ecryptfs_encrypted_view mount option is specified
+ */
+ if (path.dentry->d_sb->s_flags & MS_RDONLY ||
+ mount_crypt_stat->flags & ECRYPTFS_ENCRYPTED_VIEW_ENABLED)
+ s->s_flags |= MS_RDONLY;
s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
s->s_blocksize = path.dentry->d_sb->s_blocksize;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 503ea15dc5db..370420bfae8d 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -267,7 +267,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
handle_t *handle;
ext4_lblk_t orig_blk_offset, donor_blk_offset;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
- unsigned int w_flags = 0;
unsigned int tmp_data_size, data_size, replaced_size;
int err2, jblocks, retries = 0;
int replaced_count = 0;
@@ -288,9 +287,6 @@ again:
return 0;
}
- if (segment_eq(get_fs(), KERNEL_DS))
- w_flags |= AOP_FLAG_UNINTERRUPTIBLE;
-
orig_blk_offset = orig_page_offset * blocks_per_page +
data_offset_in_page;
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 966ace8b243f..28d0c7abba1c 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -415,7 +415,7 @@ err_unlock:
err_region:
unregister_chrdev_region(devt, 1);
err:
- fuse_conn_kill(fc);
+ fuse_abort_conn(fc);
goto out;
}
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ca887314aba9..ba1107977f2e 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -511,6 +511,35 @@ void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
}
EXPORT_SYMBOL_GPL(fuse_request_send);
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args)
+{
+ struct fuse_req *req;
+ ssize_t ret;
+
+ req = fuse_get_req(fc, 0);
+ if (IS_ERR(req))
+ return PTR_ERR(req);
+
+ req->in.h.opcode = args->in.h.opcode;
+ req->in.h.nodeid = args->in.h.nodeid;
+ req->in.numargs = args->in.numargs;
+ memcpy(req->in.args, args->in.args,
+ args->in.numargs * sizeof(struct fuse_in_arg));
+ req->out.argvar = args->out.argvar;
+ req->out.numargs = args->out.numargs;
+ memcpy(req->out.args, args->out.args,
+ args->out.numargs * sizeof(struct fuse_arg));
+ fuse_request_send(fc, req);
+ ret = req->out.h.error;
+ if (!ret && args->out.argvar) {
+ BUG_ON(args->out.numargs != 1);
+ ret = req->out.args[0].size;
+ }
+ fuse_put_request(fc, req);
+
+ return ret;
+}
+
static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
struct fuse_req *req)
{
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index df562cc87763..252b8a5de8b5 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -145,22 +145,22 @@ static void fuse_invalidate_entry(struct dentry *entry)
fuse_invalidate_entry_cache(entry);
}
-static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_lookup_init(struct fuse_conn *fc, struct fuse_args *args,
u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg)
{
memset(outarg, 0, sizeof(struct fuse_entry_out));
- req->in.h.opcode = FUSE_LOOKUP;
- req->in.h.nodeid = nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = name->len + 1;
- req->in.args[0].value = name->name;
- req->out.numargs = 1;
+ args->in.h.opcode = FUSE_LOOKUP;
+ args->in.h.nodeid = nodeid;
+ args->in.numargs = 1;
+ args->in.args[0].size = name->len + 1;
+ args->in.args[0].value = name->name;
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(struct fuse_entry_out);
- req->out.args[0].value = outarg;
+ args->out.args[0].size = sizeof(struct fuse_entry_out);
+ args->out.args[0].value = outarg;
}
u64 fuse_get_attr_version(struct fuse_conn *fc)
@@ -200,9 +200,8 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto invalid;
else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
(flags & LOOKUP_REVAL)) {
- int err;
struct fuse_entry_out outarg;
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
@@ -215,31 +214,23 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
goto out;
fc = get_fuse_conn(inode);
- req = fuse_get_req_nopages(fc);
- ret = PTR_ERR(req);
- if (IS_ERR(req))
- goto out;
forget = fuse_alloc_forget();
- if (!forget) {
- fuse_put_request(fc, req);
- ret = -ENOMEM;
+ ret = -ENOMEM;
+ if (!forget)
goto out;
- }
attr_version = fuse_get_attr_version(fc);
parent = dget_parent(entry);
- fuse_lookup_init(fc, req, get_node_id(parent->d_inode),
+ fuse_lookup_init(fc, &args, get_node_id(parent->d_inode),
&entry->d_name, &outarg);
- fuse_request_send(fc, req);
+ ret = fuse_simple_request(fc, &args);
dput(parent);
- err = req->out.h.error;
- fuse_put_request(fc, req);
/* Zero nodeid is same as -ENOENT */
- if (!err && !outarg.nodeid)
- err = -ENOENT;
- if (!err) {
+ if (!ret && !outarg.nodeid)
+ ret = -ENOENT;
+ if (!ret) {
fi = get_fuse_inode(inode);
if (outarg.nodeid != get_node_id(inode)) {
fuse_queue_forget(fc, forget, outarg.nodeid, 1);
@@ -250,7 +241,9 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
spin_unlock(&fc->lock);
}
kfree(forget);
- if (err || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
+ if (ret == -ENOMEM)
+ goto out;
+ if (ret || (outarg.attr.mode ^ inode->i_mode) & S_IFMT)
goto invalid;
fuse_change_attributes(inode, &outarg.attr,
@@ -296,7 +289,7 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
struct fuse_entry_out *outarg, struct inode **inode)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
u64 attr_version;
int err;
@@ -306,24 +299,16 @@ int fuse_lookup_name(struct super_block *sb, u64 nodeid, struct qstr *name,
if (name->len > FUSE_NAME_MAX)
goto out;
- req = fuse_get_req_nopages(fc);
- err = PTR_ERR(req);
- if (IS_ERR(req))
- goto out;
forget = fuse_alloc_forget();
err = -ENOMEM;
- if (!forget) {
- fuse_put_request(fc, req);
+ if (!forget)
goto out;
- }
attr_version = fuse_get_attr_version(fc);
- fuse_lookup_init(fc, req, nodeid, name, outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_lookup_init(fc, &args, nodeid, name, outarg);
+ err = fuse_simple_request(fc, &args);
/* Zero nodeid is same as -ENOENT, but with valid timeout */
if (err || !outarg->nodeid)
goto out_put_forget;
@@ -405,7 +390,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
int err;
struct inode *inode;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_forget_link *forget;
struct fuse_create_in inarg;
struct fuse_open_out outopen;
@@ -420,15 +405,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!forget)
goto out_err;
- req = fuse_get_req_nopages(fc);
- err = PTR_ERR(req);
- if (IS_ERR(req))
- goto out_put_forget_req;
-
err = -ENOMEM;
ff = fuse_file_alloc(fc);
if (!ff)
- goto out_put_request;
+ goto out_put_forget_req;
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -439,24 +419,23 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
inarg.flags = flags;
inarg.mode = mode;
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_CREATE;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 2;
- req->in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
+ args.in.h.opcode = FUSE_CREATE;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 2;
+ args.in.args[0].size = fc->minor < 12 ? sizeof(struct fuse_open_in) :
sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- req->out.numargs = 2;
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ args.out.numargs = 2;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args.out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outentry);
- req->out.args[0].value = &outentry;
- req->out.args[1].size = sizeof(outopen);
- req->out.args[1].value = &outopen;
- fuse_request_send(fc, req);
- err = req->out.h.error;
+ args.out.args[0].size = sizeof(outentry);
+ args.out.args[0].value = &outentry;
+ args.out.args[1].size = sizeof(outopen);
+ args.out.args[1].value = &outopen;
+ err = fuse_simple_request(fc, &args);
if (err)
goto out_free_ff;
@@ -464,7 +443,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
if (!S_ISREG(outentry.attr.mode) || invalid_nodeid(outentry.nodeid))
goto out_free_ff;
- fuse_put_request(fc, req);
ff->fh = outopen.fh;
ff->nodeid = outentry.nodeid;
ff->open_flags = outopen.open_flags;
@@ -492,8 +470,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
out_free_ff:
fuse_file_free(ff);
-out_put_request:
- fuse_put_request(fc, req);
out_put_forget_req:
kfree(forget);
out_err:
@@ -547,7 +523,7 @@ no_open:
/*
* Code shared between mknod, mkdir, symlink and link
*/
-static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
+static int create_new_entry(struct fuse_conn *fc, struct fuse_args *args,
struct inode *dir, struct dentry *entry,
umode_t mode)
{
@@ -557,22 +533,18 @@ static int create_new_entry(struct fuse_conn *fc, struct fuse_req *req,
struct fuse_forget_link *forget;
forget = fuse_alloc_forget();
- if (!forget) {
- fuse_put_request(fc, req);
+ if (!forget)
return -ENOMEM;
- }
memset(&outarg, 0, sizeof(outarg));
- req->in.h.nodeid = get_node_id(dir);
- req->out.numargs = 1;
+ args->in.h.nodeid = get_node_id(dir);
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ENTRY_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args->out.args[0].size = sizeof(outarg);
+ args->out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, args);
if (err)
goto out_put_forget_req;
@@ -609,9 +581,7 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
{
struct fuse_mknod_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -620,14 +590,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
inarg.mode = mode;
inarg.rdev = new_encode_dev(rdev);
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_MKNOD;
- req->in.numargs = 2;
- req->in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
+ args.in.h.opcode = FUSE_MKNOD;
+ args.in.numargs = 2;
+ args.in.args[0].size = fc->minor < 12 ? FUSE_COMPAT_MKNOD_IN_SIZE :
sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- return create_new_entry(fc, req, dir, entry, mode);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, &args, dir, entry, mode);
}
static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
@@ -640,9 +610,7 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
{
struct fuse_mkdir_in inarg;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
if (!fc->dont_mask)
mode &= ~current_umask();
@@ -650,13 +618,13 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
memset(&inarg, 0, sizeof(inarg));
inarg.mode = mode;
inarg.umask = current_umask();
- req->in.h.opcode = FUSE_MKDIR;
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = entry->d_name.len + 1;
- req->in.args[1].value = entry->d_name.name;
- return create_new_entry(fc, req, dir, entry, S_IFDIR);
+ args.in.h.opcode = FUSE_MKDIR;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = entry->d_name.len + 1;
+ args.in.args[1].value = entry->d_name.name;
+ return create_new_entry(fc, &args, dir, entry, S_IFDIR);
}
static int fuse_symlink(struct inode *dir, struct dentry *entry,
@@ -664,17 +632,15 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
{
struct fuse_conn *fc = get_fuse_conn(dir);
unsigned len = strlen(link) + 1;
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
- req->in.h.opcode = FUSE_SYMLINK;
- req->in.numargs = 2;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- req->in.args[1].size = len;
- req->in.args[1].value = link;
- return create_new_entry(fc, req, dir, entry, S_IFLNK);
+ args.in.h.opcode = FUSE_SYMLINK;
+ args.in.numargs = 2;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ args.in.args[1].size = len;
+ args.in.args[1].value = link;
+ return create_new_entry(fc, &args, dir, entry, S_IFLNK);
}
static inline void fuse_update_ctime(struct inode *inode)
@@ -689,18 +655,14 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_UNLINK;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_UNLINK;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 1;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
struct inode *inode = entry->d_inode;
struct fuse_inode *fi = get_fuse_inode(inode);
@@ -729,18 +691,14 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
{
int err;
struct fuse_conn *fc = get_fuse_conn(dir);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_RMDIR;
- req->in.h.nodeid = get_node_id(dir);
- req->in.numargs = 1;
- req->in.args[0].size = entry->d_name.len + 1;
- req->in.args[0].value = entry->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ FUSE_ARGS(args);
+
+ args.in.h.opcode = FUSE_RMDIR;
+ args.in.h.nodeid = get_node_id(dir);
+ args.in.numargs = 1;
+ args.in.args[0].size = entry->d_name.len + 1;
+ args.in.args[0].value = entry->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
clear_nlink(entry->d_inode);
fuse_invalidate_attr(dir);
@@ -757,27 +715,21 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
int err;
struct fuse_rename2_in inarg;
struct fuse_conn *fc = get_fuse_conn(olddir);
- struct fuse_req *req;
-
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
memset(&inarg, 0, argsize);
inarg.newdir = get_node_id(newdir);
inarg.flags = flags;
- req->in.h.opcode = opcode;
- req->in.h.nodeid = get_node_id(olddir);
- req->in.numargs = 3;
- req->in.args[0].size = argsize;
- req->in.args[0].value = &inarg;
- req->in.args[1].size = oldent->d_name.len + 1;
- req->in.args[1].value = oldent->d_name.name;
- req->in.args[2].size = newent->d_name.len + 1;
- req->in.args[2].value = newent->d_name.name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = get_node_id(olddir);
+ args.in.numargs = 3;
+ args.in.args[0].size = argsize;
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = oldent->d_name.len + 1;
+ args.in.args[1].value = oldent->d_name.name;
+ args.in.args[2].size = newent->d_name.len + 1;
+ args.in.args[2].value = newent->d_name.name;
+ err = fuse_simple_request(fc, &args);
if (!err) {
/* ctime changes */
fuse_invalidate_attr(oldent->d_inode);
@@ -849,19 +801,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir,
struct fuse_link_in inarg;
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg));
inarg.oldnodeid = get_node_id(inode);
- req->in.h.opcode = FUSE_LINK;
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = newent->d_name.len + 1;
- req->in.args[1].value = newent->d_name.name;
- err = create_new_entry(fc, req, newdir, newent, inode->i_mode);
+ args.in.h.opcode = FUSE_LINK;
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = newent->d_name.len + 1;
+ args.in.args[1].value = newent->d_name.name;
+ err = create_new_entry(fc, &args, newdir, newent, inode->i_mode);
/* Contrary to "normal" filesystems it can happen that link
makes two "logical" inodes point to the same "physical"
inode. We invalidate the attributes of the old one, so it
@@ -929,13 +879,9 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
struct fuse_getattr_in inarg;
struct fuse_attr_out outarg;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
u64 attr_version;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
attr_version = fuse_get_attr_version(fc);
memset(&inarg, 0, sizeof(inarg));
@@ -947,20 +893,18 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
inarg.getattr_flags |= FUSE_GETATTR_FH;
inarg.fh = ff->fh;
}
- req->in.h.opcode = FUSE_GETATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
+ args.in.h.opcode = FUSE_GETATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ args.out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err) {
if ((inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
make_bad_inode(inode);
@@ -1102,7 +1046,7 @@ int fuse_allow_current_process(struct fuse_conn *fc)
static int fuse_access(struct inode *inode, int mask)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_access_in inarg;
int err;
@@ -1111,20 +1055,14 @@ static int fuse_access(struct inode *inode, int mask)
if (fc->no_access)
return 0;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.mask = mask & (MAY_READ | MAY_WRITE | MAY_EXEC);
- req->in.h.opcode = FUSE_ACCESS;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_ACCESS;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_access = 1;
err = 0;
@@ -1445,31 +1383,27 @@ static char *read_link(struct dentry *dentry)
{
struct inode *inode = dentry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req = fuse_get_req_nopages(fc);
+ FUSE_ARGS(args);
char *link;
-
- if (IS_ERR(req))
- return ERR_CAST(req);
+ ssize_t ret;
link = (char *) __get_free_page(GFP_KERNEL);
- if (!link) {
- link = ERR_PTR(-ENOMEM);
- goto out;
- }
- req->in.h.opcode = FUSE_READLINK;
- req->in.h.nodeid = get_node_id(inode);
- req->out.argvar = 1;
- req->out.numargs = 1;
- req->out.args[0].size = PAGE_SIZE - 1;
- req->out.args[0].value = link;
- fuse_request_send(fc, req);
- if (req->out.h.error) {
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+
+ args.in.h.opcode = FUSE_READLINK;
+ args.in.h.nodeid = get_node_id(inode);
+ args.out.argvar = 1;
+ args.out.numargs = 1;
+ args.out.args[0].size = PAGE_SIZE - 1;
+ args.out.args[0].value = link;
+ ret = fuse_simple_request(fc, &args);
+ if (ret < 0) {
free_page((unsigned long) link);
- link = ERR_PTR(req->out.h.error);
- } else
- link[req->out.args[0].size] = '\0';
- out:
- fuse_put_request(fc, req);
+ link = ERR_PTR(ret);
+ } else {
+ link[ret] = '\0';
+ }
fuse_invalidate_atime(inode);
return link;
}
@@ -1629,22 +1563,22 @@ void fuse_release_nowrite(struct inode *inode)
spin_unlock(&fc->lock);
}
-static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
+static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_args *args,
struct inode *inode,
struct fuse_setattr_in *inarg_p,
struct fuse_attr_out *outarg_p)
{
- req->in.h.opcode = FUSE_SETATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(*inarg_p);
- req->in.args[0].value = inarg_p;
- req->out.numargs = 1;
+ args->in.h.opcode = FUSE_SETATTR;
+ args->in.h.nodeid = get_node_id(inode);
+ args->in.numargs = 1;
+ args->in.args[0].size = sizeof(*inarg_p);
+ args->in.args[0].value = inarg_p;
+ args->out.numargs = 1;
if (fc->minor < 9)
- req->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
+ args->out.args[0].size = FUSE_COMPAT_ATTR_OUT_SIZE;
else
- req->out.args[0].size = sizeof(*outarg_p);
- req->out.args[0].value = outarg_p;
+ args->out.args[0].size = sizeof(*outarg_p);
+ args->out.args[0].value = outarg_p;
}
/*
@@ -1653,14 +1587,9 @@ static void fuse_setattr_fill(struct fuse_conn *fc, struct fuse_req *req,
int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
{
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
- int err;
-
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
memset(&inarg, 0, sizeof(inarg));
memset(&outarg, 0, sizeof(outarg));
@@ -1677,12 +1606,9 @@ int fuse_flush_times(struct inode *inode, struct fuse_file *ff)
inarg.valid |= FATTR_FH;
inarg.fh = ff->fh;
}
- fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
- return err;
+ return fuse_simple_request(fc, &args);
}
/*
@@ -1698,7 +1624,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
{
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_inode *fi = get_fuse_inode(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setattr_in inarg;
struct fuse_attr_out outarg;
bool is_truncate = false;
@@ -1723,10 +1649,6 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
if (attr->ia_valid & ATTR_SIZE)
is_truncate = true;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
if (is_truncate) {
fuse_set_nowrite(inode);
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
@@ -1747,10 +1669,8 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
inarg.valid |= FATTR_LOCKOWNER;
inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
}
- fuse_setattr_fill(fc, req, inode, &inarg, &outarg);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
+ err = fuse_simple_request(fc, &args);
if (err) {
if (err == -EINTR)
fuse_invalidate_attr(inode);
@@ -1837,32 +1757,26 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_setxattr_in inarg;
int err;
if (fc->no_setxattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
inarg.flags = flags;
- req->in.h.opcode = FUSE_SETXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 3;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = strlen(name) + 1;
- req->in.args[1].value = name;
- req->in.args[2].size = size;
- req->in.args[2].value = value;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_SETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 3;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
+ args.in.args[2].size = size;
+ args.in.args[2].value = value;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_setxattr = 1;
err = -EOPNOTSUPP;
@@ -1879,7 +1793,7 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
@@ -1887,40 +1801,32 @@ static ssize_t fuse_getxattr(struct dentry *entry, const char *name,
if (fc->no_getxattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
- req->in.h.opcode = FUSE_GETXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 2;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->in.args[1].size = strlen(name) + 1;
- req->in.args[1].value = name;
+ args.in.h.opcode = FUSE_GETXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 2;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.in.args[1].size = strlen(name) + 1;
+ args.in.args[1].value = name;
/* This is really two different operations rolled into one */
- req->out.numargs = 1;
+ args.out.numargs = 1;
if (size) {
- req->out.argvar = 1;
- req->out.args[0].size = size;
- req->out.args[0].value = value;
+ args.out.argvar = 1;
+ args.out.args[0].size = size;
+ args.out.args[0].value = value;
} else {
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
}
- fuse_request_send(fc, req);
- ret = req->out.h.error;
- if (!ret)
- ret = size ? req->out.args[0].size : outarg.size;
- else {
- if (ret == -ENOSYS) {
- fc->no_getxattr = 1;
- ret = -EOPNOTSUPP;
- }
+ ret = fuse_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = outarg.size;
+ if (ret == -ENOSYS) {
+ fc->no_getxattr = 1;
+ ret = -EOPNOTSUPP;
}
- fuse_put_request(fc, req);
return ret;
}
@@ -1928,7 +1834,7 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_getxattr_in inarg;
struct fuse_getxattr_out outarg;
ssize_t ret;
@@ -1939,38 +1845,30 @@ static ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
if (fc->no_listxattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&inarg, 0, sizeof(inarg));
inarg.size = size;
- req->in.h.opcode = FUSE_LISTXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
+ args.in.h.opcode = FUSE_LISTXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
/* This is really two different operations rolled into one */
- req->out.numargs = 1;
+ args.out.numargs = 1;
if (size) {
- req->out.argvar = 1;
- req->out.args[0].size = size;
- req->out.args[0].value = list;
+ args.out.argvar = 1;
+ args.out.args[0].size = size;
+ args.out.args[0].value = list;
} else {
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
}
- fuse_request_send(fc, req);
- ret = req->out.h.error;
- if (!ret)
- ret = size ? req->out.args[0].size : outarg.size;
- else {
- if (ret == -ENOSYS) {
- fc->no_listxattr = 1;
- ret = -EOPNOTSUPP;
- }
+ ret = fuse_simple_request(fc, &args);
+ if (!ret && !size)
+ ret = outarg.size;
+ if (ret == -ENOSYS) {
+ fc->no_listxattr = 1;
+ ret = -EOPNOTSUPP;
}
- fuse_put_request(fc, req);
return ret;
}
@@ -1978,24 +1876,18 @@ static int fuse_removexattr(struct dentry *entry, const char *name)
{
struct inode *inode = entry->d_inode;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
int err;
if (fc->no_removexattr)
return -EOPNOTSUPP;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- req->in.h.opcode = FUSE_REMOVEXATTR;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = strlen(name) + 1;
- req->in.args[0].value = name;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_REMOVEXATTR;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = strlen(name) + 1;
+ args.in.args[0].value = name;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_removexattr = 1;
err = -EOPNOTSUPP;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index bf50259012ab..760b2c552197 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -24,30 +24,22 @@ static int fuse_send_open(struct fuse_conn *fc, u64 nodeid, struct file *file,
int opcode, struct fuse_open_out *outargp)
{
struct fuse_open_in inarg;
- struct fuse_req *req;
- int err;
-
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ FUSE_ARGS(args);
memset(&inarg, 0, sizeof(inarg));
inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
if (!fc->atomic_o_trunc)
inarg.flags &= ~O_TRUNC;
- req->in.h.opcode = opcode;
- req->in.h.nodeid = nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
- req->out.args[0].size = sizeof(*outargp);
- req->out.args[0].value = outargp;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = opcode;
+ args.in.h.nodeid = nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(*outargp);
+ args.out.args[0].value = outargp;
- return err;
+ return fuse_simple_request(fc, &args);
}
struct fuse_file *fuse_file_alloc(struct fuse_conn *fc)
@@ -89,37 +81,9 @@ struct fuse_file *fuse_file_get(struct fuse_file *ff)
return ff;
}
-static void fuse_release_async(struct work_struct *work)
-{
- struct fuse_req *req;
- struct fuse_conn *fc;
- struct path path;
-
- req = container_of(work, struct fuse_req, misc.release.work);
- path = req->misc.release.path;
- fc = get_fuse_conn(path.dentry->d_inode);
-
- fuse_put_request(fc, req);
- path_put(&path);
-}
-
static void fuse_release_end(struct fuse_conn *fc, struct fuse_req *req)
{
- if (fc->destroy_req) {
- /*
- * If this is a fuseblk mount, then it's possible that
- * releasing the path will result in releasing the
- * super block and sending the DESTROY request. If
- * the server is single threaded, this would hang.
- * For this reason do the path_put() in a separate
- * thread.
- */
- atomic_inc(&req->count);
- INIT_WORK(&req->misc.release.work, fuse_release_async);
- schedule_work(&req->misc.release.work);
- } else {
- path_put(&req->misc.release.path);
- }
+ iput(req->misc.release.inode);
}
static void fuse_file_put(struct fuse_file *ff, bool sync)
@@ -133,12 +97,12 @@ static void fuse_file_put(struct fuse_file *ff, bool sync)
* implement 'open'
*/
req->background = 0;
- path_put(&req->misc.release.path);
+ iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
} else if (sync) {
req->background = 0;
fuse_request_send(ff->fc, req);
- path_put(&req->misc.release.path);
+ iput(req->misc.release.inode);
fuse_put_request(ff->fc, req);
} else {
req->end = fuse_release_end;
@@ -297,9 +261,8 @@ void fuse_release_common(struct file *file, int opcode)
inarg->lock_owner = fuse_lock_owner_id(ff->fc,
(fl_owner_t) file);
}
- /* Hold vfsmount and dentry until release is finished */
- path_get(&file->f_path);
- req->misc.release.path = file->f_path;
+ /* Hold inode until release is finished */
+ req->misc.release.inode = igrab(file_inode(file));
/*
* Normally this will send the RELEASE request, however if
@@ -480,7 +443,7 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
struct inode *inode = file->f_mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_fsync_in inarg;
int err;
@@ -506,23 +469,15 @@ int fuse_fsync_common(struct file *file, loff_t start, loff_t end,
if ((!isdir && fc->no_fsync) || (isdir && fc->no_fsyncdir))
goto out;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req)) {
- err = PTR_ERR(req);
- goto out;
- }
-
memset(&inarg, 0, sizeof(inarg));
inarg.fh = ff->fh;
inarg.fsync_flags = datasync ? 1 : 0;
- req->in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = isdir ? FUSE_FSYNCDIR : FUSE_FSYNC;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
if (isdir)
fc->no_fsyncdir = 1;
@@ -2156,49 +2111,44 @@ static int convert_fuse_file_lock(const struct fuse_file_lock *ffl,
return 0;
}
-static void fuse_lk_fill(struct fuse_req *req, struct file *file,
+static void fuse_lk_fill(struct fuse_args *args, struct file *file,
const struct file_lock *fl, int opcode, pid_t pid,
- int flock)
+ int flock, struct fuse_lk_in *inarg)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
- struct fuse_lk_in *arg = &req->misc.lk_in;
-
- arg->fh = ff->fh;
- arg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
- arg->lk.start = fl->fl_start;
- arg->lk.end = fl->fl_end;
- arg->lk.type = fl->fl_type;
- arg->lk.pid = pid;
+
+ memset(inarg, 0, sizeof(*inarg));
+ inarg->fh = ff->fh;
+ inarg->owner = fuse_lock_owner_id(fc, fl->fl_owner);
+ inarg->lk.start = fl->fl_start;
+ inarg->lk.end = fl->fl_end;
+ inarg->lk.type = fl->fl_type;
+ inarg->lk.pid = pid;
if (flock)
- arg->lk_flags |= FUSE_LK_FLOCK;
- req->in.h.opcode = opcode;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(*arg);
- req->in.args[0].value = arg;
+ inarg->lk_flags |= FUSE_LK_FLOCK;
+ args->in.h.opcode = opcode;
+ args->in.h.nodeid = get_node_id(inode);
+ args->in.numargs = 1;
+ args->in.args[0].size = sizeof(*inarg);
+ args->in.args[0].value = inarg;
}
static int fuse_getlk(struct file *file, struct file_lock *fl)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
+ struct fuse_lk_in inarg;
struct fuse_lk_out outarg;
int err;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
- fuse_lk_fill(req, file, fl, FUSE_GETLK, 0, 0);
- req->out.numargs = 1;
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ fuse_lk_fill(&args, file, fl, FUSE_GETLK, 0, 0, &inarg);
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err)
err = convert_fuse_file_lock(&outarg.lk, fl);
@@ -2209,7 +2159,8 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
+ struct fuse_lk_in inarg;
int opcode = (fl->fl_flags & FL_SLEEP) ? FUSE_SETLKW : FUSE_SETLK;
pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0;
int err;
@@ -2223,17 +2174,13 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
if (fl->fl_flags & FL_CLOSE)
return 0;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ fuse_lk_fill(&args, file, fl, opcode, pid, flock, &inarg);
+ err = fuse_simple_request(fc, &args);
- fuse_lk_fill(req, file, fl, opcode, pid, flock);
- fuse_request_send(fc, req);
- err = req->out.h.error;
/* locking is restartable */
if (err == -EINTR)
err = -ERESTARTSYS;
- fuse_put_request(fc, req);
+
return err;
}
@@ -2283,7 +2230,7 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
{
struct inode *inode = mapping->host;
struct fuse_conn *fc = get_fuse_conn(inode);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_bmap_in inarg;
struct fuse_bmap_out outarg;
int err;
@@ -2291,24 +2238,18 @@ static sector_t fuse_bmap(struct address_space *mapping, sector_t block)
if (!inode->i_sb->s_bdev || fc->no_bmap)
return 0;
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return 0;
-
memset(&inarg, 0, sizeof(inarg));
inarg.block = block;
inarg.blocksize = inode->i_sb->s_blocksize;
- req->in.h.opcode = FUSE_BMAP;
- req->in.h.nodeid = get_node_id(inode);
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_BMAP;
+ args.in.h.nodeid = get_node_id(inode);
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS)
fc->no_bmap = 1;
@@ -2776,7 +2717,7 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
struct fuse_conn *fc = ff->fc;
struct fuse_poll_in inarg = { .fh = ff->fh, .kh = ff->kh };
struct fuse_poll_out outarg;
- struct fuse_req *req;
+ FUSE_ARGS(args);
int err;
if (fc->no_poll)
@@ -2794,21 +2735,15 @@ unsigned fuse_file_poll(struct file *file, poll_table *wait)
fuse_register_polled_file(fc, ff);
}
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return POLLERR;
-
- req->in.h.opcode = FUSE_POLL;
- req->in.h.nodeid = ff->nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- req->out.numargs = 1;
- req->out.args[0].size = sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
+ args.in.h.opcode = FUSE_POLL;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ args.out.numargs = 1;
+ args.out.args[0].size = sizeof(outarg);
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err)
return outarg.revents;
@@ -2949,10 +2884,10 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
loff_t length)
{
struct fuse_file *ff = file->private_data;
- struct inode *inode = file->f_inode;
+ struct inode *inode = file_inode(file);
struct fuse_inode *fi = get_fuse_inode(inode);
struct fuse_conn *fc = ff->fc;
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_fallocate_in inarg = {
.fh = ff->fh,
.offset = offset,
@@ -2985,25 +2920,16 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
if (!(mode & FALLOC_FL_KEEP_SIZE))
set_bit(FUSE_I_SIZE_UNSTABLE, &fi->state);
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req)) {
- err = PTR_ERR(req);
- goto out;
- }
-
- req->in.h.opcode = FUSE_FALLOCATE;
- req->in.h.nodeid = ff->nodeid;
- req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
+ args.in.h.opcode = FUSE_FALLOCATE;
+ args.in.h.nodeid = ff->nodeid;
+ args.in.numargs = 1;
+ args.in.args[0].size = sizeof(inarg);
+ args.in.args[0].value = &inarg;
+ err = fuse_simple_request(fc, &args);
if (err == -ENOSYS) {
fc->no_fallocate = 1;
err = -EOPNOTSUPP;
}
- fuse_put_request(fc, req);
-
if (err)
goto out;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index e8e47a6ab518..e0fc6725d1d0 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -213,7 +213,7 @@ struct fuse_out {
unsigned numargs;
/** Array of arguments */
- struct fuse_arg args[3];
+ struct fuse_arg args[2];
};
/** FUSE page descriptor */
@@ -222,6 +222,25 @@ struct fuse_page_desc {
unsigned int offset;
};
+struct fuse_args {
+ struct {
+ struct {
+ uint32_t opcode;
+ uint64_t nodeid;
+ } h;
+ unsigned numargs;
+ struct fuse_in_arg args[3];
+
+ } in;
+ struct {
+ unsigned argvar:1;
+ unsigned numargs;
+ struct fuse_arg args[2];
+ } out;
+};
+
+#define FUSE_ARGS(args) struct fuse_args args = {}
+
/** The request state */
enum fuse_req_state {
FUSE_REQ_INIT = 0,
@@ -305,11 +324,8 @@ struct fuse_req {
/** Data for asynchronous requests */
union {
struct {
- union {
- struct fuse_release_in in;
- struct work_struct work;
- };
- struct path path;
+ struct fuse_release_in in;
+ struct inode *inode;
} release;
struct fuse_init_in init_in;
struct fuse_init_out init_out;
@@ -324,7 +340,6 @@ struct fuse_req {
struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
- struct fuse_lk_in lk_in;
} misc;
/** page vector */
@@ -754,15 +769,6 @@ struct fuse_req *fuse_get_req_for_background(struct fuse_conn *fc,
void __fuse_get_request(struct fuse_req *req);
/**
- * Get a request, may fail with -ENOMEM,
- * useful for callers who doesn't use req->pages[]
- */
-static inline struct fuse_req *fuse_get_req_nopages(struct fuse_conn *fc)
-{
- return fuse_get_req(fc, 0);
-}
-
-/**
* Gets a requests for a file operation, always succeeds
*/
struct fuse_req *fuse_get_req_nofail_nopages(struct fuse_conn *fc,
@@ -780,6 +786,11 @@ void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req);
void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req);
/**
+ * Simple request sending that does request allocation and freeing
+ */
+ssize_t fuse_simple_request(struct fuse_conn *fc, struct fuse_args *args);
+
+/**
* Send a request in the background
*/
void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req);
@@ -804,8 +815,6 @@ void fuse_invalidate_atime(struct inode *inode);
*/
struct fuse_conn *fuse_conn_get(struct fuse_conn *fc);
-void fuse_conn_kill(struct fuse_conn *fc);
-
/**
* Initialize fuse_conn
*/
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 03246cd9d47a..6749109f255d 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -376,28 +376,13 @@ static void fuse_bdi_destroy(struct fuse_conn *fc)
bdi_destroy(&fc->bdi);
}
-void fuse_conn_kill(struct fuse_conn *fc)
-{
- spin_lock(&fc->lock);
- fc->connected = 0;
- fc->blocked = 0;
- fc->initialized = 1;
- spin_unlock(&fc->lock);
- /* Flush all readers on this fs */
- kill_fasync(&fc->fasync, SIGIO, POLL_IN);
- wake_up_all(&fc->waitq);
- wake_up_all(&fc->blocked_waitq);
- wake_up_all(&fc->reserved_req_waitq);
-}
-EXPORT_SYMBOL_GPL(fuse_conn_kill);
-
static void fuse_put_super(struct super_block *sb)
{
struct fuse_conn *fc = get_fuse_conn_super(sb);
fuse_send_destroy(fc);
- fuse_conn_kill(fc);
+ fuse_abort_conn(fc);
mutex_lock(&fuse_mutex);
list_del(&fc->entry);
fuse_ctl_remove_conn(fc);
@@ -425,7 +410,7 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
{
struct super_block *sb = dentry->d_sb;
struct fuse_conn *fc = get_fuse_conn_super(sb);
- struct fuse_req *req;
+ FUSE_ARGS(args);
struct fuse_statfs_out outarg;
int err;
@@ -434,23 +419,17 @@ static int fuse_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
- req = fuse_get_req_nopages(fc);
- if (IS_ERR(req))
- return PTR_ERR(req);
-
memset(&outarg, 0, sizeof(outarg));
- req->in.numargs = 0;
- req->in.h.opcode = FUSE_STATFS;
- req->in.h.nodeid = get_node_id(dentry->d_inode);
- req->out.numargs = 1;
- req->out.args[0].size =
+ args.in.numargs = 0;
+ args.in.h.opcode = FUSE_STATFS;
+ args.in.h.nodeid = get_node_id(dentry->d_inode);
+ args.out.numargs = 1;
+ args.out.args[0].size =
fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
- req->out.args[0].value = &outarg;
- fuse_request_send(fc, req);
- err = req->out.h.error;
+ args.out.args[0].value = &outarg;
+ err = fuse_simple_request(fc, &args);
if (!err)
convert_fuse_statfs(buf, &outarg.st);
- fuse_put_request(fc, req);
return err;
}
diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c
index 32602c667b4a..7892e6fddb66 100644
--- a/fs/hfsplus/catalog.c
+++ b/fs/hfsplus/catalog.c
@@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
return hfsplus_strcmp(&k1->cat.name, &k2->cat.name);
}
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
- u32 parent, struct qstr *str)
+/* Generates key for catalog file/folders record. */
+int hfsplus_cat_build_key(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent, struct qstr *str)
{
- int len;
+ int len, err;
key->cat.parent = cpu_to_be32(parent);
- if (str) {
- hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
- str->name, str->len);
- len = be16_to_cpu(key->cat.name.length);
- } else {
- key->cat.name.length = 0;
- len = 0;
- }
+ err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN,
+ str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
+ len = be16_to_cpu(key->cat.name.length);
key->key_len = cpu_to_be16(6 + 2 * len);
+ return 0;
+}
+
+/* Generates key for catalog thread record. */
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent)
+{
+ key->cat.parent = cpu_to_be32(parent);
+ key->cat.name.length = 0;
+ key->key_len = cpu_to_be16(6);
}
static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent,
@@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb,
hfsplus_cat_entry *entry, int type,
u32 parentid, struct qstr *str)
{
+ int err;
+
entry->type = cpu_to_be16(type);
entry->thread.reserved = 0;
entry->thread.parentID = cpu_to_be32(parentid);
- hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
+ err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN,
str->name, str->len);
+ if (unlikely(err < 0))
+ return err;
+
return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2;
}
@@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid,
int err;
u16 type;
- hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid);
err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry));
if (err)
return err;
@@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
return err;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry,
S_ISDIR(inode->i_mode) ?
HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD,
dir->i_ino, str);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto err2;
+ }
+
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
if (err)
goto err2;
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto err1;
+
entry_size = hfsplus_cat_build_record(&entry, cnid, inode);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
@@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir,
return 0;
err1:
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
if (!hfs_brec_find(&fd, hfs_find_rec_by_key))
hfs_brec_remove(&fd);
err2:
@@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (!str) {
int len;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
off + 2, len);
fd.search_key->key_len = cpu_to_be16(6 + len);
} else
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str);
+ if (unlikely(err))
+ goto out;
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
@@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str)
if (err)
goto out;
- hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_fd = src_fd;
/* find the old dir entry and read the data */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid,
type = be16_to_cpu(entry.type);
/* create new dir entry with the data from the old entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name);
+ err = hfsplus_cat_build_key(sb, dst_fd.search_key,
+ dst_dir->i_ino, dst_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
@@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid,
dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC;
/* finally remove the old entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name);
+ err = hfsplus_cat_build_key(sb, src_fd.search_key,
+ src_dir->i_ino, src_name);
+ if (unlikely(err))
+ goto out;
+
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid,
src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC;
/* remove old thread entry */
- hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid);
err = hfs_brec_find(&src_fd, hfs_find_rec_by_key);
if (err)
goto out;
@@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid,
goto out;
/* create new thread entry */
- hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid);
entry_size = hfsplus_fill_cat_thread(sb, &entry, type,
dst_dir->i_ino, dst_name);
+ if (unlikely(entry_size < 0)) {
+ err = entry_size;
+ goto out;
+ }
+
err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key);
if (err != -ENOENT) {
if (!err)
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index 610a3260bef1..435bea231cc6 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry,
err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd);
if (err)
return ERR_PTR(err);
- hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name);
+ err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino,
+ &dentry->d_name);
+ if (unlikely(err < 0))
+ goto fail;
again:
err = hfs_brec_read(&fd, &entry, sizeof(entry));
if (err) {
@@ -97,9 +100,11 @@ again:
be32_to_cpu(entry.file.permissions.dev);
str.len = sprintf(name, "iNode%d", linkid);
str.name = name;
- hfsplus_cat_build_key(sb, fd.search_key,
+ err = hfsplus_cat_build_key(sb, fd.search_key,
HFSPLUS_SB(sb)->hidden_dir->i_ino,
&str);
+ if (unlikely(err < 0))
+ goto fail;
goto again;
}
} else if (!dentry->d_fsdata)
@@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx)
err = -ENOMEM;
goto out;
}
- hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL);
+ hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino);
err = hfs_brec_find(&fd, hfs_find_rec_by_key);
if (err)
goto out;
diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h
index eb5e059f481a..b0441d65fa54 100644
--- a/fs/hfsplus/hfsplus_fs.h
+++ b/fs/hfsplus/hfsplus_fs.h
@@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1,
const hfsplus_btree_key *k2);
-void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
+int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key,
u32 parent, struct qstr *str);
+void hfsplus_cat_build_key_with_cnid(struct super_block *sb,
+ hfsplus_btree_key *key, u32 parent);
void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms);
int hfsplus_find_cat(struct super_block *sb, u32 cnid,
struct hfs_find_data *fd);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 4cf2024b87da..593af2fdcc2d 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = hfs_find_init(sbi->cat_tree, &fd);
if (err)
goto out_put_root;
- hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str);
+ if (unlikely(err < 0))
+ goto out_put_root;
if (!hfs_brec_read(&fd, &entry, sizeof(entry))) {
hfs_find_exit(&fd);
if (entry.type != cpu_to_be16(HFSPLUS_FOLDER))
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 386303dca382..dddbde4f56f4 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -224,7 +224,7 @@ static int jffs2_add_tn_to_tree(struct jffs2_sb_info *c,
dbg_readinode("insert fragment %#04x-%#04x, ver %u at %08x\n", tn->fn->ofs, fn_end, tn->version, ref_offset(tn->fn->raw));
- /* If a node has zero dsize, we only have to keep if it if it might be the
+ /* If a node has zero dsize, we only have to keep it if it might be the
node with highest version -- i.e. the one which will end up as f->metadata.
Note that such nodes won't be REF_UNCHECKED since there are no data to
check anyway. */
diff --git a/fs/jffs2/summary.c b/fs/jffs2/summary.c
index c522d098bb4f..bc5385471a6e 100644
--- a/fs/jffs2/summary.c
+++ b/fs/jffs2/summary.c
@@ -844,6 +844,7 @@ static int jffs2_sum_write_data(struct jffs2_sb_info *c, struct jffs2_eraseblock
/* Write out summary information - called from jffs2_do_reserve_space */
int jffs2_sum_write_sumnode(struct jffs2_sb_info *c)
+ __must_hold(&c->erase_completion_block)
{
int datasize, infosize, padsize;
struct jffs2_eraseblock *jeb;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 697390ea47b8..ddc9f9612f16 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -448,27 +448,6 @@ static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
return pol;
}
-static int kernfs_vma_migrate(struct vm_area_struct *vma,
- const nodemask_t *from, const nodemask_t *to,
- unsigned long flags)
-{
- struct file *file = vma->vm_file;
- struct kernfs_open_file *of = kernfs_of(file);
- int ret;
-
- if (!of->vm_ops)
- return 0;
-
- if (!kernfs_get_active(of->kn))
- return 0;
-
- ret = 0;
- if (of->vm_ops->migrate)
- ret = of->vm_ops->migrate(vma, from, to, flags);
-
- kernfs_put_active(of->kn);
- return ret;
-}
#endif
static const struct vm_operations_struct kernfs_vm_ops = {
@@ -479,7 +458,6 @@ static const struct vm_operations_struct kernfs_vm_ops = {
#ifdef CONFIG_NUMA
.set_policy = kernfs_vma_set_policy,
.get_policy = kernfs_vma_get_policy,
- .migrate = kernfs_vma_migrate,
#endif
};
diff --git a/fs/namespace.c b/fs/namespace.c
index 30df6e7dd807..cd1e9681a0cf 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -963,7 +963,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
}
/* Don't allow unprivileged users to reveal what is under a mount */
- if ((flag & CL_UNPRIVILEGED) && list_empty(&old->mnt_expire))
+ if ((flag & CL_UNPRIVILEGED) &&
+ (!(flag & CL_EXPIRE) || list_empty(&old->mnt_expire)))
mnt->mnt.mnt_flags |= MNT_LOCKED;
atomic_inc(&sb->s_active);
@@ -1369,6 +1370,8 @@ void umount_tree(struct mount *mnt, int how)
}
if (last) {
last->mnt_hash.next = unmounted.first;
+ if (unmounted.first)
+ unmounted.first->pprev = &last->mnt_hash.next;
unmounted.first = tmp_list.first;
unmounted.first->pprev = &unmounted.first;
}
@@ -1544,6 +1547,9 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
if (mnt->mnt.mnt_flags & MNT_LOCKED)
goto dput_and_out;
+ retval = -EPERM;
+ if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
+ goto dput_and_out;
retval = do_umount(mnt, flags);
dput_and_out:
@@ -1606,7 +1612,6 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
if (IS_ERR(q))
return q;
- q->mnt.mnt_flags &= ~MNT_LOCKED;
q->mnt_mountpoint = mnt->mnt_mountpoint;
p = mnt;
@@ -2097,7 +2102,13 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NODEV) &&
!(mnt_flags & MNT_NODEV)) {
- return -EPERM;
+ /* Was the nodev implicitly added in mount? */
+ if ((mnt->mnt_ns->user_ns != &init_user_ns) &&
+ !(sb->s_type->fs_flags & FS_USERNS_DEV_MOUNT)) {
+ mnt_flags |= MNT_NODEV;
+ } else {
+ return -EPERM;
+ }
}
if ((mnt->mnt.mnt_flags & MNT_LOCK_NOSUID) &&
!(mnt_flags & MNT_NOSUID)) {
@@ -2958,6 +2969,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
/* mount new_root on / */
attach_mnt(new_mnt, real_mount(root_parent.mnt), root_mp);
touch_mnt_namespace(current->nsproxy->mnt_ns);
+ /* A moved mount should not expire automatically */
+ list_del_init(&new_mnt->mnt_expire);
unlock_mount_hash();
chroot_fs_refs(&root, &new);
put_mountpoint(root_mp);
@@ -3002,6 +3015,7 @@ static void __init init_mount_tree(void)
root.mnt = mnt;
root.dentry = mnt->mnt_root;
+ mnt->mnt_flags |= MNT_LOCKED;
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index a93bf9892256..fcae9ef1a328 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -5662,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len, int flags,
struct ocfs2_cached_dealloc_ctxt *dealloc,
- u64 refcount_loc)
+ u64 refcount_loc, bool refcount_tree_locked)
{
int ret, credits = 0, extra_blocks = 0;
u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
@@ -5676,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode,
BUG_ON(!(OCFS2_I(inode)->ip_dyn_features &
OCFS2_HAS_REFCOUNT_FL));
- ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
- &ref_tree, NULL);
- if (ret) {
- mlog_errno(ret);
- goto bail;
+ if (!refcount_tree_locked) {
+ ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+ &ref_tree, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto bail;
+ }
}
ret = ocfs2_prepare_refcount_change_for_del(inode,
@@ -7021,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb,
u64 refcount_loc = le64_to_cpu(di->i_refcount_loc);
struct ocfs2_extent_tree et;
struct ocfs2_cached_dealloc_ctxt dealloc;
+ struct ocfs2_refcount_tree *ref_tree = NULL;
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
ocfs2_init_dealloc_ctxt(&dealloc);
@@ -7130,9 +7133,18 @@ start:
phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
+ if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) {
+ status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1,
+ &ref_tree, NULL);
+ if (status) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
status = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
phys_cpos, trunc_len, flags, &dealloc,
- refcount_loc);
+ refcount_loc, true);
if (status < 0) {
mlog_errno(status);
goto bail;
@@ -7147,6 +7159,8 @@ start:
goto start;
bail:
+ if (ref_tree)
+ ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
ocfs2_schedule_truncate_log_flush(osb, 1);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index ca381c584127..fb09b97db162 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
struct ocfs2_extent_tree *et,
u32 cpos, u32 phys_cpos, u32 len, int flags,
struct ocfs2_cached_dealloc_ctxt *dealloc,
- u64 refcount_loc);
+ u64 refcount_loc, bool refcount_tree_locked);
int ocfs2_num_free_extents(struct ocfs2_super *osb,
struct ocfs2_extent_tree *et);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index d9f222987f24..46d93e941f3d 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -894,7 +894,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
}
}
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
{
int i;
@@ -915,7 +915,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
page_cache_release(wc->w_target_page);
}
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
+}
+static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+{
+ ocfs2_unlock_pages(wc);
brelse(wc->w_di_bh);
kfree(wc);
}
@@ -2042,11 +2046,19 @@ out_write_size:
ocfs2_update_inode_fsync_trans(handle, inode, 1);
ocfs2_journal_dirty(handle, wc->w_di_bh);
+ /* unlock pages before dealloc since it needs acquiring j_trans_barrier
+ * lock, or it will cause a deadlock since journal commit threads holds
+ * this lock and will ask for the page lock when flushing the data.
+ * put it here to preserve the unlock order.
+ */
+ ocfs2_unlock_pages(wc);
+
ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
- ocfs2_free_write_ctxt(wc);
+ brelse(wc->w_di_bh);
+ kfree(wc);
return copied;
}
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 79d56dc981bc..319e786175af 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -4479,7 +4479,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
- &dealloc, 0);
+ &dealloc, 0, false);
if (ret) {
mlog_errno(ret);
goto out;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 3689b3592042..a6944b25fd5b 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -695,14 +695,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
res->inflight_assert_workers);
}
-static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
- struct dlm_lock_resource *res)
-{
- spin_lock(&res->spinlock);
- __dlm_lockres_grab_inflight_worker(dlm, res);
- spin_unlock(&res->spinlock);
-}
-
static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm,
struct dlm_lock_resource *res)
{
@@ -1646,6 +1638,7 @@ send_response:
}
mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
dlm->node_num, res->lockname.len, res->lockname.name);
+ spin_lock(&res->spinlock);
ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
DLM_ASSERT_MASTER_MLE_CLEANUP);
if (ret < 0) {
@@ -1653,7 +1646,8 @@ send_response:
response = DLM_MASTER_RESP_ERROR;
dlm_lockres_put(res);
} else
- dlm_lockres_grab_inflight_worker(dlm, res);
+ __dlm_lockres_grab_inflight_worker(dlm, res);
+ spin_unlock(&res->spinlock);
} else {
if (res)
dlm_lockres_put(res);
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 69fb9f75b082..3950693dd0f6 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1803,7 +1803,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos,
phys_cpos, trunc_len, flags,
- &dealloc, refcount_loc);
+ &dealloc, refcount_loc, false);
if (ret < 0) {
mlog_errno(ret);
goto out;
diff --git a/fs/pnode.c b/fs/pnode.c
index aae331a5d03b..260ac8f898a4 100644
--- a/fs/pnode.c
+++ b/fs/pnode.c
@@ -242,6 +242,7 @@ static int propagate_one(struct mount *m)
child = copy_tree(last_source, last_source->mnt.mnt_root, type);
if (IS_ERR(child))
return PTR_ERR(child);
+ child->mnt.mnt_flags &= ~MNT_LOCKED;
mnt_set_mountpoint(m, mp, child);
last_dest = m;
last_source = child;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 590aeda5af12..3f3d7aeb0712 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
.llseek = seq_lseek,
.release = proc_id_map_release,
};
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+ struct user_namespace *ns = NULL;
+ struct task_struct *task;
+ int ret;
+
+ ret = -ESRCH;
+ task = get_proc_task(inode);
+ if (task) {
+ rcu_read_lock();
+ ns = get_user_ns(task_cred_xxx(task, user_ns));
+ rcu_read_unlock();
+ put_task_struct(task);
+ }
+ if (!ns)
+ goto err;
+
+ if (file->f_mode & FMODE_WRITE) {
+ ret = -EACCES;
+ if (!ns_capable(ns, CAP_SYS_ADMIN))
+ goto err_put_ns;
+ }
+
+ ret = single_open(file, &proc_setgroups_show, ns);
+ if (ret)
+ goto err_put_ns;
+
+ return 0;
+err_put_ns:
+ put_user_ns(ns);
+err:
+ return ret;
+}
+
+static int proc_setgroups_release(struct inode *inode, struct file *file)
+{
+ struct seq_file *seq = file->private_data;
+ struct user_namespace *ns = seq->private;
+ int ret = single_release(inode, file);
+ put_user_ns(ns);
+ return ret;
+}
+
+static const struct file_operations proc_setgroups_operations = {
+ .open = proc_setgroups_open,
+ .write = proc_setgroups_write,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = proc_setgroups_release,
+};
#endif /* CONFIG_USER_NS */
static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
@@ -2916,6 +2968,7 @@ static const struct pid_entry tid_base_stuff[] = {
REG("uid_map", S_IRUGO|S_IWUSR, proc_uid_map_operations),
REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+ REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations),
#endif
};
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index aa1eee06420f..d3ebf2e61853 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -12,6 +12,9 @@
#include <linux/vmstat.h>
#include <linux/atomic.h>
#include <linux/vmalloc.h>
+#ifdef CONFIG_CMA
+#include <linux/cma.h>
+#endif
#include <asm/page.h>
#include <asm/pgtable.h>
#include "internal.h"
@@ -138,6 +141,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
"AnonHugePages: %8lu kB\n"
#endif
+#ifdef CONFIG_CMA
+ "CmaTotal: %8lu kB\n"
+ "CmaFree: %8lu kB\n"
+#endif
,
K(i.totalram),
K(i.freeram),
@@ -187,12 +194,16 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
vmi.used >> 10,
vmi.largest_chunk >> 10
#ifdef CONFIG_MEMORY_FAILURE
- ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
+ , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10)
#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
+ , K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
HPAGE_PMD_NR)
#endif
+#ifdef CONFIG_CMA
+ , K(totalcma_pages)
+ , K(global_page_state(NR_FREE_CMA_PAGES))
+#endif
);
hugetlb_report_meminfo(m);
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf2d03f8fd3e..510413eb25b8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v)
/* sum again ? it could be updated? */
for_each_irq_nr(j)
- seq_put_decimal_ull(p, ' ', kstat_irqs(j));
+ seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
seq_printf(p,
"\nctxt %llu\n"
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 73ca1740d839..0f96f71ab32b 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -91,6 +91,7 @@ static void show_type(struct seq_file *m, struct super_block *sb)
static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
{
+ struct proc_mounts *p = proc_mounts(m);
struct mount *r = real_mount(mnt);
int err = 0;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -104,7 +105,10 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
mangle(m, r->mnt_devname ? r->mnt_devname : "none");
}
seq_putc(m, ' ');
- seq_path(m, &mnt_path, " \t\n\\");
+ /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+ if (err)
+ goto out;
seq_putc(m, ' ');
show_type(m, sb);
seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
@@ -125,7 +129,6 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
struct mount *r = real_mount(mnt);
struct super_block *sb = mnt->mnt_sb;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
- struct path root = p->root;
int err = 0;
seq_printf(m, "%i %i %u:%u ", r->mnt_id, r->mnt_parent->mnt_id,
@@ -139,7 +142,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
seq_putc(m, ' ');
/* mountpoints outside of chroot jail will give SEQ_SKIP on this */
- err = seq_path_root(m, &mnt_path, &root, " \t\n\\");
+ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
if (err)
goto out;
@@ -182,6 +185,7 @@ out:
static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
{
+ struct proc_mounts *p = proc_mounts(m);
struct mount *r = real_mount(mnt);
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
struct super_block *sb = mnt_path.dentry->d_sb;
@@ -201,7 +205,10 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
/* mount point */
seq_puts(m, " mounted on ");
- seq_path(m, &mnt_path, " \t\n\\");
+ /* mountpoints outside of chroot jail will give SEQ_SKIP on this */
+ err = seq_path_root(m, &mnt_path, &p->root, " \t\n\\");
+ if (err)
+ goto out;
seq_putc(m, ' ');
/* file system type */
@@ -216,6 +223,7 @@ static int show_vfsstat(struct seq_file *m, struct vfsmount *mnt)
}
seq_putc(m, '\n');
+out:
return err;
}