summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/disk-io.c8
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/extent_io.c48
-rw-r--r--fs/btrfs/extent_map.c4
-rw-r--r--fs/btrfs/file.c1
-rw-r--r--fs/btrfs/inode.c3
-rw-r--r--fs/btrfs/ioctl.c10
-rw-r--r--fs/btrfs/relocation.c1
-rw-r--r--fs/btrfs/volumes.c2
-rw-r--r--fs/cifs/cifsglob.h4
-rw-r--r--fs/cifs/connect.c37
-rw-r--r--fs/cifs/transport.c3
-rw-r--r--fs/dlm/lowcomms.c6
-rw-r--r--fs/ext4/ext4.h10
-rw-r--r--fs/ext4/extents.c10
-rw-r--r--fs/ext4/file.c60
-rw-r--r--fs/ext4/mballoc.c100
-rw-r--r--fs/ext4/page-io.c36
-rw-r--r--fs/ext4/super.c66
-rw-r--r--fs/jbd2/journal.c9
-rw-r--r--fs/jbd2/transaction.c21
-rw-r--r--fs/namei.c142
-rw-r--r--fs/nfsd/nfs4callback.c6
-rw-r--r--fs/nfsd/nfs4state.c186
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/vfs.c21
-rw-r--r--fs/open.c2
-rw-r--r--fs/proc/array.c3
-rw-r--r--fs/super.c5
29 files changed, 524 insertions, 287 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index fdce8799b98..e1aa8d607bc 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -359,10 +359,14 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
tree = &BTRFS_I(page->mapping->host)->io_tree;
- if (page->private == EXTENT_PAGE_PRIVATE)
+ if (page->private == EXTENT_PAGE_PRIVATE) {
+ WARN_ON(1);
goto out;
- if (!page->private)
+ }
+ if (!page->private) {
+ WARN_ON(1);
goto out;
+ }
len = page->private >> 2;
WARN_ON(len == 0);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4e7e012ad66..f3c96fc0143 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6583,7 +6583,7 @@ static noinline int relocate_data_extent(struct inode *reloc_inode,
u64 end = start + extent_key->offset - 1;
em = alloc_extent_map(GFP_NOFS);
- BUG_ON(!em || IS_ERR(em));
+ BUG_ON(!em);
em->start = start;
em->len = extent_key->offset;
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 5e76a474cb7..92ac5192c51 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -1946,6 +1946,7 @@ void set_page_extent_mapped(struct page *page)
static void set_page_extent_head(struct page *page, unsigned long len)
{
+ WARN_ON(!PagePrivate(page));
set_page_private(page, EXTENT_PAGE_PRIVATE_FIRST_PAGE | len << 2);
}
@@ -2821,9 +2822,17 @@ int try_release_extent_state(struct extent_map_tree *map,
* at this point we can safely clear everything except the
* locked bit and the nodatasum bit
*/
- clear_extent_bit(tree, start, end,
+ ret = clear_extent_bit(tree, start, end,
~(EXTENT_LOCKED | EXTENT_NODATASUM),
0, 0, NULL, mask);
+
+ /* if clear_extent_bit failed for enomem reasons,
+ * we can't allow the release to continue.
+ */
+ if (ret < 0)
+ ret = 0;
+ else
+ ret = 1;
}
return ret;
}
@@ -3194,7 +3203,13 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
}
if (!PageUptodate(p))
uptodate = 0;
- unlock_page(p);
+
+ /*
+ * see below about how we avoid a nasty race with release page
+ * and why we unlock later
+ */
+ if (i != 0)
+ unlock_page(p);
}
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
@@ -3218,9 +3233,26 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
atomic_inc(&eb->refs);
spin_unlock(&tree->buffer_lock);
radix_tree_preload_end();
+
+ /*
+ * there is a race where release page may have
+ * tried to find this extent buffer in the radix
+ * but failed. It will tell the VM it is safe to
+ * reclaim the, and it will clear the page private bit.
+ * We must make sure to set the page private bit properly
+ * after the extent buffer is in the radix tree so
+ * it doesn't get lost
+ */
+ set_page_extent_mapped(eb->first_page);
+ set_page_extent_head(eb->first_page, eb->len);
+ if (!page0)
+ unlock_page(eb->first_page);
return eb;
free_eb:
+ if (eb->first_page && !page0)
+ unlock_page(eb->first_page);
+
if (!atomic_dec_and_test(&eb->refs))
return exists;
btrfs_release_extent_buffer(eb);
@@ -3271,10 +3303,11 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree,
continue;
lock_page(page);
+ WARN_ON(!PagePrivate(page));
+
+ set_page_extent_mapped(page);
if (i == 0)
set_page_extent_head(page, eb->len);
- else
- set_page_private(page, EXTENT_PAGE_PRIVATE);
clear_page_dirty_for_io(page);
spin_lock_irq(&page->mapping->tree_lock);
@@ -3464,6 +3497,13 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
for (i = start_i; i < num_pages; i++) {
page = extent_buffer_page(eb, i);
+
+ WARN_ON(!PagePrivate(page));
+
+ set_page_extent_mapped(page);
+ if (i == 0)
+ set_page_extent_head(page, eb->len);
+
if (inc_all_pages)
page_cache_get(page);
if (!PageUptodate(page)) {
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index b0e1fce1253..2b6c12e983b 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -51,8 +51,8 @@ struct extent_map *alloc_extent_map(gfp_t mask)
{
struct extent_map *em;
em = kmem_cache_alloc(extent_map_cache, mask);
- if (!em || IS_ERR(em))
- return em;
+ if (!em)
+ return NULL;
em->in_tree = 0;
em->flags = 0;
em->compress_type = BTRFS_COMPRESS_NONE;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c1d3a818731..7084140d594 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -186,6 +186,7 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
split = alloc_extent_map(GFP_NOFS);
if (!split2)
split2 = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!split || !split2);
write_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, start, len);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index bcc461a9695..fb9bd7832b6 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -644,6 +644,7 @@ retry:
async_extent->ram_size - 1, 0);
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = async_extent->start;
em->len = async_extent->ram_size;
em->orig_start = em->start;
@@ -820,6 +821,7 @@ static noinline int cow_file_range(struct inode *inode,
BUG_ON(ret);
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = start;
em->orig_start = em->start;
ram_size = ins.offset;
@@ -1169,6 +1171,7 @@ out_check:
struct extent_map_tree *em_tree;
em_tree = &BTRFS_I(inode)->extent_tree;
em = alloc_extent_map(GFP_NOFS);
+ BUG_ON(!em);
em->start = cur_offset;
em->orig_start = em->start;
em->len = num_bytes;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 02d224e8c83..be2d4f6aaa5 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2208,7 +2208,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
int num_types = 4;
int alloc_size;
int ret = 0;
- int slot_count = 0;
+ u64 slot_count = 0;
int i, c;
if (copy_from_user(&space_args,
@@ -2247,7 +2247,7 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
goto out;
}
- slot_count = min_t(int, space_args.space_slots, slot_count);
+ slot_count = min_t(u64, space_args.space_slots, slot_count);
alloc_size = sizeof(*dest) * slot_count;
@@ -2267,6 +2267,9 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
for (i = 0; i < num_types; i++) {
struct btrfs_space_info *tmp;
+ if (!slot_count)
+ break;
+
info = NULL;
rcu_read_lock();
list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
@@ -2288,7 +2291,10 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
memcpy(dest, &space, sizeof(space));
dest++;
space_args.total_spaces++;
+ slot_count--;
}
+ if (!slot_count)
+ break;
}
up_read(&info->groups_sem);
}
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 1f5556acb53..0825e4ed944 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1157,6 +1157,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,
new_node->bytenr = dest->node->start;
new_node->level = node->level;
new_node->lowest = node->lowest;
+ new_node->checked = 1;
new_node->root = dest;
if (!node->lowest) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2636a051e4b..af7dbca1527 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1605,12 +1605,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
ret = find_next_devid(root, &device->devid);
if (ret) {
+ kfree(device->name);
kfree(device);
goto error;
}
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
+ kfree(device->name);
kfree(device);
ret = PTR_ERR(trans);
goto error;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index edd5b29b53c..17afb0fbcae 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -188,6 +188,8 @@ struct TCP_Server_Info {
/* multiplexed reads or writes */
unsigned int maxBuf; /* maxBuf specifies the maximum */
/* message size the server can send or receive for non-raw SMBs */
+ /* maxBuf is returned by SMB NegotiateProtocol so maxBuf is only 0 */
+ /* when socket is setup (and during reconnect) before NegProt sent */
unsigned int max_rw; /* maxRw specifies the maximum */
/* message size the server can send or receive for */
/* SMB_COM_WRITE_RAW or SMB_COM_READ_RAW. */
@@ -652,7 +654,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
#define MID_REQUEST_SUBMITTED 2
#define MID_RESPONSE_RECEIVED 4
#define MID_RETRY_NEEDED 8 /* session closed while this request out */
-#define MID_NO_RESP_NEEDED 0x10
+#define MID_RESPONSE_MALFORMED 0x10
/* Types of response buffer returned from SendReceive2 */
#define CIFS_NO_BUFFER 0 /* Response buffer not returned */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 257b6d895e2..8d6c17ab593 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -338,10 +338,11 @@ cifs_echo_request(struct work_struct *work)
struct TCP_Server_Info, echo.work);
/*
- * We cannot send an echo until the NEGOTIATE_PROTOCOL request is done.
- * Also, no need to ping if we got a response recently
+ * We cannot send an echo until the NEGOTIATE_PROTOCOL request is
+ * done, which is indicated by maxBuf != 0. Also, no need to ping if
+ * we got a response recently
*/
- if (server->tcpStatus != CifsGood ||
+ if (server->maxBuf == 0 ||
time_before(jiffies, server->lstrp + SMB_ECHO_INTERVAL - HZ))
goto requeue_echo;
@@ -585,11 +586,20 @@ incomplete_rcv:
total_read += 4; /* account for rfc1002 hdr */
dump_smb(smb_buffer, total_read);
- if (checkSMB(smb_buffer, smb_buffer->Mid, total_read)) {
+
+ /*
+ * We know that we received enough to get to the MID as we
+ * checked the pdu_length earlier. Now check to see
+ * if the rest of the header is OK. We borrow the length
+ * var for the rest of the loop to avoid a new stack var.
+ *
+ * 48 bytes is enough to display the header and a little bit
+ * into the payload for debugging purposes.
+ */
+ length = checkSMB(smb_buffer, smb_buffer->Mid, total_read);
+ if (length != 0)
cifs_dump_mem("Bad SMB: ", smb_buffer,
- total_read < 48 ? total_read : 48);
- continue;
- }
+ min_t(unsigned int, total_read, 48));
mid_entry = NULL;
server->lstrp = jiffies;
@@ -601,7 +611,8 @@ incomplete_rcv:
if ((mid_entry->mid == smb_buffer->Mid) &&
(mid_entry->midState == MID_REQUEST_SUBMITTED) &&
(mid_entry->command == smb_buffer->Command)) {
- if (check2ndT2(smb_buffer,server->maxBuf) > 0) {
+ if (length == 0 &&
+ check2ndT2(smb_buffer, server->maxBuf) > 0) {
/* We have a multipart transact2 resp */
isMultiRsp = true;
if (mid_entry->resp_buf) {
@@ -636,7 +647,12 @@ incomplete_rcv:
mid_entry->resp_buf = smb_buffer;
mid_entry->largeBuf = isLargeBuf;
multi_t2_fnd:
- mid_entry->midState = MID_RESPONSE_RECEIVED;
+ if (length == 0)
+ mid_entry->midState =
+ MID_RESPONSE_RECEIVED;
+ else
+ mid_entry->midState =
+ MID_RESPONSE_MALFORMED;
#ifdef CONFIG_CIFS_STATS2
mid_entry->when_received = jiffies;
#endif
@@ -657,6 +673,9 @@ multi_t2_fnd:
else
smallbuf = NULL;
}
+ } else if (length != 0) {
+ /* response sanity checks failed */
+ continue;
} else if (!is_valid_oplock_break(smb_buffer, server) &&
!isMultiRsp) {
cERROR(1, "No task to wake, unknown frame received! "
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index fbc5aace54b..46d8756f2b2 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -457,6 +457,9 @@ sync_mid_result(struct mid_q_entry *mid, struct TCP_Server_Info *server)
case MID_RETRY_NEEDED:
rc = -EAGAIN;
break;
+ case MID_RESPONSE_MALFORMED:
+ rc = -EIO;
+ break;
default:
cERROR(1, "%s: invalid mid state mid=%d state=%d", __func__,
mid->mid, mid->midState);
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9c64ae9e4c1..2d8c87b951c 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1468,15 +1468,13 @@ static void work_stop(void)
static int work_start(void)
{
- recv_workqueue = alloc_workqueue("dlm_recv", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ recv_workqueue = create_singlethread_workqueue("dlm_recv");
if (!recv_workqueue) {
log_print("can't start dlm_recv");
return -ENOMEM;
}
- send_workqueue = alloc_workqueue("dlm_send", WQ_MEM_RECLAIM |
- WQ_HIGHPRI | WQ_FREEZEABLE, 0);
+ send_workqueue = create_singlethread_workqueue("dlm_send");
if (!send_workqueue) {
log_print("can't start dlm_send");
destroy_workqueue(recv_workqueue);
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0c8d97b56f3..3aa0b72b3b9 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -848,6 +848,7 @@ struct ext4_inode_info {
atomic_t i_ioend_count; /* Number of outstanding io_end structs */
/* current io_end structure for async DIO write*/
ext4_io_end_t *cur_aio_dio;
+ atomic_t i_aiodio_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock;
@@ -2119,6 +2120,15 @@ static inline void set_bitmap_uptodate(struct buffer_head *bh)
#define in_range(b, first, len) ((b) >= (first) && (b) <= (first) + (len) - 1)
+/* For ioend & aio unwritten conversion wait queues */
+#define EXT4_WQ_HASH_SZ 37
+#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
+ EXT4_WQ_HASH_SZ])
+#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
+ EXT4_WQ_HASH_SZ])
+extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+
#endif /* __KERNEL__ */
#endif /* _EXT4_H */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 63a75810b7c..ccce8a7e94e 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3174,9 +3174,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
* that this IO needs to convertion to written when IO is
* completed
*/
- if (io)
+ if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
io->flag = EXT4_IO_END_UNWRITTEN;
- else
+ atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+ } else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode))
map->m_flags |= EXT4_MAP_UNINIT;
@@ -3463,9 +3464,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* that we need to perform convertion when IO is done.
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
- if (io)
+ if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) {
io->flag = EXT4_IO_END_UNWRITTEN;
- else
+ atomic_inc(&EXT4_I(inode)->i_aiodio_unwritten);
+ } else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 2e8322c8aa8..7b80d543b89 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -55,11 +55,47 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
return 0;
}
+static void ext4_aiodio_wait(struct inode *inode)
+{
+ wait_queue_head_t *wq = ext4_ioend_wq(inode);
+
+ wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_aiodio_unwritten) == 0));
+}
+
+/*
+ * This tests whether the IO in question is block-aligned or not.
+ * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
+ * are converted to written only after the IO is complete. Until they are
+ * mapped, these blocks appear as holes, so dio_zero_block() will assume that
+ * it needs to zero out portions of the start and/or end block. If 2 AIO
+ * threads are at work on the same unwritten block, they must be synchronized
+ * or one thread will zero the other's data, causing corruption.
+ */
+static int
+ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct super_block *sb = inode->i_sb;
+ int blockmask = sb->s_blocksize - 1;
+ size_t count = iov_length(iov, nr_segs);
+ loff_t final_size = pos + count;
+
+ if (pos >= inode->i_size)
+ return 0;
+
+ if ((pos & blockmask) || (final_size & blockmask))
+ return 1;
+
+ return 0;
+}
+
static ssize_t
ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
+ int unaligned_aio = 0;
+ int ret;
/*
* If we have encountered a bitmap-format file, the size limit
@@ -78,9 +114,31 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
sbi->s_bitmap_maxbytes - pos);
}
+ } else if (unlikely((iocb->ki_filp->f_flags & O_DIRECT) &&
+ !is_sync_kiocb(iocb))) {
+ unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
}
- return generic_file_aio_write(iocb, iov, nr_segs, pos);
+ /* Unaligned direct AIO must be serialized; see comment above */
+ if (unaligned_aio) {
+ static unsigned long unaligned_warn_time;
+
+ /* Warn about this once per day */
+ if (printk_timed_ratelimit(&unaligned_warn_time, 60*60*24*HZ))
+ ext4_msg(inode->i_sb, KERN_WARNING,
+ "Unaligned AIO/DIO on inode %ld by %s; "
+ "performance will be poor.",
+ inode->i_ino, current->comm);
+ mutex_lock(ext4_aio_mutex(inode));
+ ext4_aiodio_wait(inode);
+ }
+
+ ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+
+ if (unaligned_aio)
+ mutex_unlock(ext4_aio_mutex(inode));
+
+ return ret;
}
static const struct vm_operations_struct ext4_file_vm_ops = {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 851f49b2f9d..d1fe09aea73 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -342,10 +342,15 @@ static struct kmem_cache *ext4_free_ext_cachep;
/* We create slab caches for groupinfo data structures based on the
* superblock block size. There will be one per mounted filesystem for
* each unique s_blocksize_bits */
-#define NR_GRPINFO_CACHES \
- (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
+#define NR_GRPINFO_CACHES 8
static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
+static const char *ext4_groupinfo_slab_names[NR_GRPINFO_CACHES] = {
+ "ext4_groupinfo_1k", "ext4_groupinfo_2k", "ext4_groupinfo_4k",
+ "ext4_groupinfo_8k", "ext4_groupinfo_16k", "ext4_groupinfo_32k",
+ "ext4_groupinfo_64k", "ext4_groupinfo_128k"
+};
+
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -2414,6 +2419,55 @@ err_freesgi:
return -ENOMEM;
}
+static void ext4_groupinfo_destroy_slabs(void)
+{
+ int i;
+
+ for (i = 0; i < NR_GRPINFO_CACHES; i++) {
+ if (ext4_groupinfo_caches[i])
+ kmem_cache_destroy(ext4_groupinfo_caches[i]);
+ ext4_groupinfo_caches[i] = NULL;
+ }
+}
+
+static int ext4_groupinfo_create_slab(size_t size)
+{
+ static DEFINE_MUTEX(ext4_grpinfo_slab_create_mutex);
+ int slab_size;
+ int blocksize_bits = order_base_2(size);
+ int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+ struct kmem_cache *cachep;
+
+ if (cache_index >= NR_GRPINFO_CACHES)
+ return -EINVAL;
+
+ if (unlikely(cache_index < 0))
+ cache_index = 0;
+
+ mutex_lock(&ext4_grpinfo_slab_create_mutex);
+ if (ext4_groupinfo_caches[cache_index]) {
+ mutex_unlock(&ext4_grpinfo_slab_create_mutex);
+ return 0; /* Already created */
+ }
+
+ slab_size = offsetof(struct ext4_group_info,
+ bb_counters[blocksize_bits + 2]);
+
+ cachep = kmem_cache_create(ext4_groupinfo_slab_names[cache_index],
+ slab_size, 0, SLAB_RECLAIM_ACCOUNT,
+ NULL);
+
+ mutex_unlock(&ext4_grpinfo_slab_create_mutex);
+ if (!cachep) {
+ printk(KERN_EMERG "EXT4: no memory for groupinfo slab cache\n");
+ return -ENOMEM;
+ }
+
+ ext4_groupinfo_caches[cache_index] = cachep;
+
+ return 0;
+}
+
int ext4_mb_init(struct super_block *sb, int needs_recovery)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -2421,9 +2475,6 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
unsigned offset;
unsigned max;
int ret;
- int cache_index;
- struct kmem_cache *cachep;
- char *namep = NULL;
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
@@ -2440,30 +2491,9 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
goto out;
}
- cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
- cachep = ext4_groupinfo_caches[cache_index];
- if (!cachep) {
- char name[32];
- int len = offsetof(struct ext4_group_info,
- bb_counters[sb->s_blocksize_bits + 2]);
-
- sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
- namep = kstrdup(name, GFP_KERNEL);
- if (!namep) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* Need to free the kmem_cache_name() when we
- * destroy the slab */
- cachep = kmem_cache_create(namep, len, 0,
- SLAB_RECLAIM_ACCOUNT, NULL);
- if (!cachep) {
- ret = -ENOMEM;
- goto out;
- }
- ext4_groupinfo_caches[cache_index] = cachep;
- }
+ ret = ext4_groupinfo_create_slab(sb->s_blocksize);
+ if (ret < 0)
+ goto out;
/* order 0 is regular bitmap */
sbi->s_mb_maxs[0] = sb->s_blocksize << 3;
@@ -2520,7 +2550,6 @@ out:
if (ret) {
kfree(sbi->s_mb_offsets);
kfree(sbi->s_mb_maxs);
- kfree(namep);
}
return ret;
}
@@ -2734,7 +2763,6 @@ int __init ext4_init_mballoc(void)
void ext4_exit_mballoc(void)
{
- int i;
/*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache.
@@ -2743,15 +2771,7 @@ void ext4_exit_mballoc(void)
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep);
-
- for (i = 0; i < NR_GRPINFO_CACHES; i++) {
- struct kmem_cache *cachep = ext4_groupinfo_caches[i];
- if (cachep) {
- char *name = (char *)kmem_cache_name(cachep);
- kmem_cache_destroy(cachep);
- kfree(name);
- }
- }
+ ext4_groupinfo_destroy_slabs();
ext4_remove_debugfs_entry();
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7270dcfca92..955cc309142 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -32,14 +32,8 @@
static struct kmem_cache *io_page_cachep, *io_end_cachep;
-#define WQ_HASH_SZ 37
-#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
-static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
-
int __init ext4_init_pageio(void)
{
- int i;
-
io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
if (io_page_cachep == NULL)
return -ENOMEM;
@@ -48,9 +42,6 @@ int __init ext4_init_pageio(void)
kmem_cache_destroy(io_page_cachep);
return -ENOMEM;
}
- for (i = 0; i < WQ_HASH_SZ; i++)
- init_waitqueue_head(&ioend_wq[i]);
-
return 0;
}
@@ -62,7 +53,7 @@ void ext4_exit_pageio(void)
void ext4_ioend_wait(struct inode *inode)
{
- wait_queue_head_t *wq = to_ioend_wq(inode);
+ wait_queue_head_t *wq = ext4_ioend_wq(inode);
wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
}
@@ -87,7 +78,7 @@ void ext4_free_io_end(ext4_io_end_t *io)
for (i = 0; i < io->num_io_pages; i++)
put_io_page(io->pages[i]);
io->num_io_pages = 0;
- wq = to_ioend_wq(io->inode);
+ wq = ext4_ioend_wq(io->inode);
if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
waitqueue_active(wq))
wake_up_all(wq);
@@ -102,6 +93,7 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
+ wait_queue_head_t *wq;
int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
@@ -126,7 +118,16 @@ int ext4_end_io_nolock(ext4_io_end_t *io)
if (io->iocb)
aio_complete(io->iocb, io->result, 0);
/* clear the DIO AIO unwritten flag */
- io->flag &= ~EXT4_IO_END_UNWRITTEN;
+ if (io->flag & EXT4_IO_END_UNWRITTEN) {
+ io->flag &= ~EXT4_IO_END_UNWRITTEN;
+ /* Wake up anyone waiting on unwritten extent conversion */
+ wq = ext4_ioend_wq(io->inode);
+ if (atomic_dec_and_test(&EXT4_I(inode)->i_aiodio_unwritten) &&
+ waitqueue_active(wq)) {
+ wake_up_all(wq);
+ }
+ }
+
return ret;
}
@@ -190,6 +191,7 @@ static void ext4_end_bio(struct bio *bio, int error)
struct inode *inode;
unsigned long flags;
int i;
+ sector_t bi_sector = bio->bi_sector;
BUG_ON(!io_end);
bio->bi_private = NULL;
@@ -207,9 +209,7 @@ static void ext4_end_bio(struct bio *bio, int error)
if (error)
SetPageError(page);
BUG_ON(!head);
- if (head->b_size == PAGE_CACHE_SIZE)
- clear_buffer_dirty(head);
- else {
+ if (head->b_size != PAGE_CACHE_SIZE) {
loff_t offset;
loff_t io_end_offset = io_end->offset + io_end->size;
@@ -221,7 +221,6 @@ static void ext4_end_bio(struct bio *bio, int error)
if (error)
buffer_io_error(bh);
- clear_buffer_dirty(bh);
}
if (buffer_delay(bh))
partial_write = 1;
@@ -257,7 +256,7 @@ static void ext4_end_bio(struct bio *bio, int error)
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
- bio->bi_sector >> (inode->i_blkbits - 9));
+ bi_sector >> (inode->i_blkbits - 9));
}
/* Add the io_end to per-inode completed io list*/
@@ -380,6 +379,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
blocksize = 1 << inode->i_blkbits;
+ BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
set_page_writeback(page);
ClearPageError(page);
@@ -397,12 +397,14 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
for (bh = head = page_buffers(page), block_start = 0;
bh != head || !block_start;
block_start = block_end, bh = bh->b_this_page) {
+
block_end = block_start + blocksize;
if (block_start >= len) {
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
}
+ clear_buffer_dirty(bh);
ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
if (ret) {
/*
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 48ce561fafa..f6a318f836b 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -77,6 +77,7 @@ static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data);
static void ext4_destroy_lazyinit_thread(void);
static void ext4_unregister_li_request(struct super_block *sb);
+static void ext4_clear_request_list(void);
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
@@ -832,6 +833,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
atomic_set(&ei->i_ioend_count, 0);
+ atomic_set(&ei->i_aiodio_unwritten, 0);
return &ei->vfs_inode;
}
@@ -2716,6 +2718,8 @@ static void ext4_unregister_li_request(struct super_block *sb)
mutex_unlock(&ext4_li_info->li_list_mtx);
}
+static struct task_struct *ext4_lazyinit_task;
+
/*
* This is the function where ext4lazyinit thread lives. It walks
* through the request list searching for next scheduled filesystem.
@@ -2784,6 +2788,10 @@ cont_thread:
if (time_before(jiffies, next_wakeup))
schedule();
finish_wait(&eli->li_wait_daemon, &wait);
+ if (kthread_should_stop()) {
+ ext4_clear_request_list();
+ goto exit_thread;
+ }
}
exit_thread:
@@ -2808,6 +2816,7 @@ exit_thread:
wake_up(&eli->li_wait_task);
kfree(ext4_li_info);
+ ext4_lazyinit_task = NULL;
ext4_li_info = NULL;
mutex_unlock(&ext4_li_mtx);
@@ -2830,11 +2839,10 @@ static void ext4_clear_request_list(void)
static int ext4_run_lazyinit_thread(void)
{
- struct task_struct *t;
-
- t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
- if (IS_ERR(t)) {
- int err = PTR_ERR(t);
+ ext4_lazyinit_task = kthread_run(ext4_lazyinit_thread,
+ ext4_li_info, "ext4lazyinit");
+ if (IS_ERR(ext4_lazyinit_task)) {
+ int err = PTR_ERR(ext4_lazyinit_task);
ext4_clear_request_list();
del_timer_sync(&ext4_li_info->li_timer);
kfree(ext4_li_info);
@@ -2985,16 +2993,10 @@ static void ext4_destroy_lazyinit_thread(void)
* If thread exited earlier
* there's nothing to be done.
*/
- if (!ext4_li_info)
+ if (!ext4_li_info || !ext4_lazyinit_task)
return;
- ext4_clear_request_list();
-
- while (ext4_li_info->li_task) {
- wake_up(&ext4_li_info->li_wait_daemon);
- wait_event(ext4_li_info->li_wait_task,
- ext4_li_info->li_task == NULL);
- }
+ kthread_stop(ext4_lazyinit_task);
}
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
@@ -4768,7 +4770,7 @@ static struct file_system_type ext4_fs_type = {
.fs_flags = FS_REQUIRES_DEV,
};
-int __init ext4_init_feat_adverts(void)
+static int __init ext4_init_feat_adverts(void)
{
struct ext4_features *ef;
int ret = -ENOMEM;
@@ -4792,23 +4794,44 @@ out:
return ret;
}
+static void ext4_exit_feat_adverts(void)
+{
+ kobject_put(&ext4_feat->f_kobj);
+ wait_for_completion(&ext4_feat->f_kobj_unregister);
+ kfree(ext4_feat);
+}
+
+/* Shared across all ext4 file systems */
+wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
+struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
+
static int __init ext4_init_fs(void)
{
- int err;
+ int i, err;
ext4_check_flag_values();
+
+ for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
+ mutex_init(&ext4__aio_mutex[i]);
+ init_waitqueue_head(&ext4__ioend_wq[i]);
+ }
+
err = ext4_init_pageio();
if (err)
return err;
err = ext4_init_system_zone();
if (err)
- goto out5;
+ goto out7;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset)
- goto out4;
+ goto out6;
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
+ if (!ext4_proc_root)
+ goto out5;
err = ext4_init_feat_adverts();
+ if (err)
+ goto out4;
err = ext4_init_mballoc();
if (err)
@@ -4838,12 +4861,14 @@ out1:
out2:
ext4_exit_mballoc();
out3:
- kfree(ext4_feat);
+ ext4_exit_feat_adverts();
+out4:
remove_proc_entry("fs/ext4", NULL);
+out5:
kset_unregister(ext4_kset);
-out4:
+out6:
ext4_exit_system_zone();
-out5:
+out7:
ext4_exit_pageio();
return err;
}
@@ -4857,6 +4882,7 @@ static void __exit ext4_exit_fs(void)
destroy_inodecache();
ext4_exit_xattr();
ext4_exit_mballoc();
+ ext4_exit_feat_adverts();
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
ext4_exit_system_zone();
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9e4686900f1..97e73469b2c 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -473,7 +473,8 @@ int __jbd2_log_space_left(journal_t *journal)
}
/*
- * Called under j_state_lock. Returns true if a transaction commit was started.
+ * Called with j_state_lock locked for writing.
+ * Returns true if a transaction commit was started.
*/
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{
@@ -520,11 +521,13 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
{
transaction_t *transaction = NULL;
tid_t tid;
+ int need_to_start = 0;
read_lock(&journal->j_state_lock);
if (journal->j_running_transaction && !current->journal_info) {
transaction = journal->j_running_transaction;
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ if (!tid_geq(journal->j_commit_request, transaction->t_tid))
+ need_to_start = 1;
} else if (journal->j_committing_transaction)
transaction = journal->j_committing_transaction;
@@ -535,6 +538,8 @@ int jbd2_journal_force_commit_nested(journal_t *journal)
tid = transaction->t_tid;
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid);
return 1;
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index faad2bd787c..1d1191050f9 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -117,10 +117,10 @@ static inline void update_t_max_wait(transaction_t *transaction)
static int start_this_handle(journal_t *journal, handle_t *handle,
int gfp_mask)
{
- transaction_t *transaction;
- int needed;
- int nblocks = handle->h_buffer_credits;
- transaction_t *new_transaction = NULL;
+ transaction_t *transaction, *new_transaction = NULL;
+ tid_t tid;
+ int needed, need_to_start;
+ int nblocks = handle->h_buffer_credits;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -222,8 +222,11 @@ repeat:
atomic_sub(nblocks, &transaction->t_outstanding_credits);
prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
TASK_UNINTERRUPTIBLE);
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ tid = transaction->t_tid;
+ need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
schedule();
finish_wait(&journal->j_wait_transaction_locked, &wait);
goto repeat;
@@ -442,7 +445,8 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
{
transaction_t *transaction = handle->h_transaction;
journal_t *journal = transaction->t_journal;
- int ret;
+ tid_t tid;
+ int need_to_start, ret;
/* If we've had an abort of any type, don't even think about
* actually doing the restart! */
@@ -465,8 +469,11 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask)
spin_unlock(&transaction->t_handle_lock);
jbd_debug(2, "restarting handle %p\n", handle);
- __jbd2_log_start_commit(journal, transaction->t_tid);
+ tid = transaction->t_tid;
+ need_to_start = !tid_geq(journal->j_commit_request, tid);
read_unlock(&journal->j_state_lock);
+ if (need_to_start)
+ jbd2_log_start_commit(journal, tid);
lock_map_release(&handle->h_lockdep_map);
handle->h_buffer_credits = nblocks;
diff --git a/fs/namei.c b/fs/namei.c
index 7d77f24d32a..9e701e28a32 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -455,14 +455,6 @@ static int nameidata_dentry_drop_rcu(struct nameidata *nd, struct dentry *dentry
struct fs_struct *fs = current->fs;
struct dentry *parent = nd->path.dentry;
- /*
- * It can be possible to revalidate the dentry that we started
- * the path walk with. force_reval_path may also revalidate the
- * dentry already committed to the nameidata.
- */
- if (unlikely(parent == dentry))
- return nameidata_drop_rcu(nd);
-
BUG_ON(!(nd->flags & LOOKUP_RCU));
if (nd->root.mnt) {
spin_lock(&fs->lock);
@@ -561,39 +553,25 @@ static inline int nameidata_drop_rcu_last_maybe(struct nameidata *nd)
*/
void release_open_intent(struct nameidata *nd)
{
- if (nd->intent.open.file->f_path.dentry == NULL)
- put_filp(nd->intent.open.file);
- else
- fput(nd->intent.open.file);
-}
-
-/*
- * Call d_revalidate and handle filesystems that request rcu-walk
- * to be dropped. This may be called and return in rcu-walk mode,
- * regardless of success or error. If -ECHILD is returned, the caller
- * must return -ECHILD back up the path walk stack so path walk may
- * be restarted in ref-walk mode.
- */
-static int d_revalidate(struct dentry *dentry, struct nameidata *nd)
-{
- int status;
+ struct file *file = nd->intent.open.file;
- status = dentry->d_op->d_revalidate(dentry, nd);
- if (status == -ECHILD) {
- if (nameidata_dentry_drop_rcu(nd, dentry))
- return status;
- status = dentry->d_op->d_revalidate(dentry, nd);
+ if (file && !IS_ERR(file)) {
+ if (file->f_path.dentry == NULL)
+ put_filp(file);
+ else
+ fput(file);
}
+}
- return status;
+static inline int d_revalidate(struct dentry *dentry, struct nameidata *nd)
+{
+ return dentry->d_op->d_revalidate(dentry, nd);
}
-static inline struct dentry *
+static struct dentry *
do_revalidate(struct dentry *dentry, struct nameidata *nd)
{
- int status;
-
- status = d_revalidate(dentry, nd);
+ int status = d_revalidate(dentry, nd);
if (unlikely(status <= 0)) {
/*
* The dentry failed validation.
@@ -602,24 +580,39 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
* to return a fail status.
*/
if (status < 0) {
- /* If we're in rcu-walk, we don't have a ref */
- if (!(nd->flags & LOOKUP_RCU))
- dput(dentry);
+ dput(dentry);
dentry = ERR_PTR(status);
-
- } else {
- /* Don't d_invalidate in rcu-walk mode */
- if (nameidata_dentry_drop_rcu_maybe(nd, dentry))
- return ERR_PTR(-ECHILD);
- if (!d_invalidate(dentry)) {
- dput(dentry);
- dentry = NULL;
- }
+ } else if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
}
}
return dentry;
}
+static inline struct dentry *
+do_revalidate_rcu(struct dentry *dentry, struct nameidata *nd)
+{
+ int status = d_revalidate(dentry, nd);
+ if (likely(status > 0))
+ return dentry;
+ if (status == -ECHILD) {
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return ERR_PTR(-ECHILD);
+ return do_revalidate(dentry, nd);
+ }
+ if (status < 0)
+ return ERR_PTR(status);
+ /* Don't d_invalidate in rcu-walk mode */
+ if (nameidata_dentry_drop_rcu(nd, dentry))
+ return ERR_PTR(-ECHILD);
+ if (!d_invalidate(dentry)) {
+ dput(dentry);
+ dentry = NULL;
+ }
+ return dentry;
+}
+
static inline int need_reval_dot(struct dentry *dentry)
{
if (likely(!(dentry->d_flags & DCACHE_OP_REVALIDATE)))
@@ -664,9 +657,6 @@ force_reval_path(struct path *path, struct nameidata *nd)
return 0;
if (!status) {
- /* Don't d_invalidate in rcu-walk mode */
- if (nameidata_drop_rcu(nd))
- return -ECHILD;
d_invalidate(dentry);
status = -ESTALE;
}
@@ -773,6 +763,8 @@ __do_follow_link(const struct path *link, struct nameidata *nd, void **p)
int error;
struct dentry *dentry = link->dentry;
+ BUG_ON(nd->flags & LOOKUP_RCU);
+
touch_atime(link->mnt, dentry);
nd_set_link(nd, NULL);
@@ -807,6 +799,11 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
{
void *cookie;
int err = -ELOOP;
+
+ /* We drop rcu-walk here */
+ if (nameidata_dentry_drop_rcu_maybe(nd, path->dentry))
+ return -ECHILD;
+
if (current->link_count >= MAX_NESTED_LINKS)
goto loop;
if (current->total_link_count >= 40)
@@ -1251,9 +1248,15 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
return -ECHILD;
nd->seq = seq;
- if (dentry->d_flags & DCACHE_OP_REVALIDATE)
- goto need_revalidate;
-done2:
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+ dentry = do_revalidate_rcu(dentry, nd);
+ if (!dentry)
+ goto need_lookup;
+ if (IS_ERR(dentry))
+ goto fail;
+ if (!(nd->flags & LOOKUP_RCU))
+ goto done;
+ }
path->mnt = mnt;
path->dentry = dentry;
if (likely(__follow_mount_rcu(nd, path, inode, false)))
@@ -1266,8 +1269,13 @@ done2:
if (!dentry)
goto need_lookup;
found:
- if (dentry->d_flags & DCACHE_OP_REVALIDATE)
- goto need_revalidate;
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+ dentry = do_revalidate(dentry, nd);
+ if (!dentry)
+ goto need_lookup;
+ if (IS_ERR(dentry))
+ goto fail;
+ }
done:
path->mnt = mnt;
path->dentry = dentry;
@@ -1309,16 +1317,6 @@ need_lookup:
mutex_unlock(&dir->i_mutex);
goto found;
-need_revalidate:
- dentry = do_revalidate(dentry, nd);
- if (!dentry)
- goto need_lookup;
- if (IS_ERR(dentry))
- goto fail;
- if (nd->flags & LOOKUP_RCU)
- goto done2;
- goto done;
-
fail:
return PTR_ERR(dentry);
}
@@ -1415,9 +1413,6 @@ exec_again:
goto out_dput;
if (inode->i_op->follow_link) {
- /* We commonly drop rcu-walk here */
- if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
- return -ECHILD;
BUG_ON(inode != next.dentry->d_inode);
err = do_follow_link(&next, nd);
if (err)
@@ -1463,8 +1458,6 @@ last_component:
break;
if (inode && unlikely(inode->i_op->follow_link) &&
(lookup_flags & LOOKUP_FOLLOW)) {
- if (nameidata_dentry_drop_rcu_maybe(nd, next.dentry))
- return -ECHILD;
BUG_ON(inode != next.dentry->d_inode);
err = do_follow_link(&next, nd);
if (err)
@@ -1500,12 +1493,15 @@ return_reval:
* We may need to check the cached dentry for staleness.
*/
if (need_reval_dot(nd->path.dentry)) {
+ if (nameidata_drop_rcu_last_maybe(nd))
+ return -ECHILD;
/* Note: we do not d_invalidate() */
err = d_revalidate(nd->path.dentry, nd);
if (!err)
err = -ESTALE;
if (err < 0)
break;
+ return 0;
}
return_base:
if (nameidata_drop_rcu_last_maybe(nd))
@@ -2265,8 +2261,6 @@ static struct file *finish_open(struct nameidata *nd,
return filp;
exit:
- if (!IS_ERR(nd->intent.open.file))
- release_open_intent(nd);
path_put(&nd->path);
return ERR_PTR(error);
}
@@ -2389,8 +2383,6 @@ exit_mutex_unlock:
exit_dput:
path_put_conditional(path, nd);
exit:
- if (!IS_ERR(nd->intent.open.file))
- release_open_intent(nd);
path_put(&nd->path);
return ERR_PTR(error);
}
@@ -2477,6 +2469,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
}
audit_inode(pathname, nd.path.dentry);
filp = finish_open(&nd, open_flag, acc_mode);
+ release_open_intent(&nd);
return filp;
creat:
@@ -2553,6 +2546,7 @@ out:
path_put(&nd.root);
if (filp == ERR_PTR(-ESTALE) && !(flags & LOOKUP_REVAL))
goto reval;
+ release_open_intent(&nd);
return filp;
exit_dput:
@@ -2560,8 +2554,6 @@ exit_dput:
out_path:
path_put(&nd.path);
out_filp:
- if (!IS_ERR(nd.intent.open.file))
- release_open_intent(&nd);
filp = ERR_PTR(error);
goto out;
}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 3be975e1891..cde36cb0f34 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -484,7 +484,7 @@ static int decode_cb_sequence4res(struct xdr_stream *xdr,
out:
return status;
out_default:
- return nfs_cb_stat_to_errno(status);
+ return nfs_cb_stat_to_errno(nfserr);
}
/*
@@ -564,11 +564,9 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
if (unlikely(status))
goto out;
if (unlikely(nfserr != NFS4_OK))
- goto out_default;
+ status = nfs_cb_stat_to_errno(nfserr);
out:
return status;
-out_default:
- return nfs_cb_stat_to_errno(status);
}
/*
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d98d0213285..54b60bfceb8 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -230,9 +230,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
dp->dl_client = clp;
get_nfs4_file(fp);
dp->dl_file = fp;
- dp->dl_vfs_file = find_readable_file(fp);
- get_file(dp->dl_vfs_file);
- dp->dl_flock = NULL;
dp->dl_type = type;
dp->dl_stateid.si_boot = boot_time;
dp->dl_stateid.si_stateownerid = current_delegid++;
@@ -241,8 +238,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_stateid *stp, struct svc_f
fh_copy_shallow(&dp->dl_fh, &current_fh->fh_handle);
dp->dl_time = 0;
atomic_set(&dp->dl_count, 1);
- list_add(&dp->dl_perfile, &fp->fi_delegations);
- list_add(&dp->dl_perclnt, &clp->cl_delegations);
INIT_WORK(&dp->dl_recall.cb_work, nfsd4_do_callback_rpc);
return dp;
}
@@ -253,36 +248,30 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
if (atomic_dec_and_test(&dp->dl_count)) {
dprintk("NFSD: freeing dp %p\n",dp);
put_nfs4_file(dp->dl_file);
- fput(dp->dl_vfs_file);
kmem_cache_free(deleg_slab, dp);
num_delegations--;
}
}
-/* Remove the associated file_lock first, then remove the delegation.
- * lease_modify() is called to remove the FS_LEASE file_lock from
- * the i_flock list, eventually calling nfsd's lock_manager
- * fl_release_callback.
- */
-static void
-nfs4_close_delegation(struct nfs4_delegation *dp)
+static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{
- dprintk("NFSD: close_delegation dp %p\n",dp);
- /* XXX: do we even need this check?: */
- if (dp->dl_flock)
- vfs_setlease(dp->dl_vfs_file, F_UNLCK, &dp->dl_flock);
+ if (atomic_dec_and_test(&fp->fi_delegees)) {
+ vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
+ fp->fi_lease = NULL;
+ fp->fi_deleg_file = NULL;
+ }
}
/* Called under the state lock. */
static void
unhash_delegation(struct nfs4_delegation *dp)
{
- list_del_init(&dp->dl_perfile);
list_del_init(&dp->dl_perclnt);
spin_lock(&recall_lock);
+ list_del_init(&dp->dl_perfile);
list_del_init(&dp->dl_recall_lru);
spin_unlock(&recall_lock);
- nfs4_close_delegation(dp);
+ nfs4_put_deleg_lease(dp->dl_file);
nfs4_put_delegation(dp);
}
@@ -958,8 +947,6 @@ expire_client(struct nfs4_client *clp)
spin_lock(&recall_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
- dprintk("NFSD: expire client. dp %p, fp %p\n", dp,
- dp->dl_flock);
list_del_init(&dp->dl_perclnt);
list_move(&dp->dl_recall_lru, &reaplist);
}
@@ -2078,6 +2065,7 @@ alloc_init_file(struct inode *ino)
fp->fi_inode = igrab(ino);
fp->fi_id = current_fileid++;
fp->fi_had_conflict = false;
+ fp->fi_lease = NULL;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
spin_lock(&recall_lock);
@@ -2329,23 +2317,8 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
nfs4_file_put_access(fp, O_RDONLY);
}
-/*
- * Spawn a thread to perform a recall on the delegation represented
- * by the lease (file_lock)
- *
- * Called from break_lease() with lock_flocks() held.
- * Note: we assume break_lease will only call this *once* for any given
- * lease.
- */
-static
-void nfsd_break_deleg_cb(struct file_lock *fl)
+static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
{
- struct nfs4_delegation *dp = (struct nfs4_delegation *)fl->fl_owner;
-
- dprintk("NFSD nfsd_break_deleg_cb: dp %p fl %p\n",dp,fl);
- if (!dp)
- return;
-
/* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the kernel
@@ -2353,22 +2326,35 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
* it's safe to take a reference: */
atomic_inc(&dp->dl_count);
- spin_lock(&recall_lock);
list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
- spin_unlock(&recall_lock);
/* only place dl_time is set. protected by lock_flocks*/
dp->dl_time = get_seconds();
+ nfsd4_cb_recall(dp);
+}
+
+/* Called from break_lease() with lock_flocks() held. */
+static void nfsd_break_deleg_cb(struct file_lock *fl)
+{
+ struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
+ struct nfs4_delegation *dp;
+
+ BUG_ON(!fp);
+ /* We assume break_lease is only called once per lease: */
+ BUG_ON(fp->fi_had_conflict);
/*
* We don't want the locks code to timeout the lease for us;
- * we'll remove it ourself if the delegation isn't returned
- * in time.
+ * we'll remove it ourself if a delegation isn't returned
+ * in time:
*/
fl->fl_break_time = 0;
- dp->dl_file->fi_had_conflict = true;
- nfsd4_cb_recall(dp);
+ spin_lock(&recall_lock);
+ fp->fi_had_conflict = true;
+ list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
+ nfsd_break_one_deleg(dp);
+ spin_unlock(&recall_lock);
}
static
@@ -2459,13 +2445,15 @@ nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
static struct nfs4_delegation *
find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
{
- struct nfs4_delegation *dp;
+ struct nfs4_delegation *dp = NULL;
+ spin_lock(&recall_lock);
list_for_each_entry(dp, &fp->fi_delegations, dl_perfile) {
if (dp->dl_stateid.si_stateownerid == stid->si_stateownerid)
- return dp;
+ break;
}
- return NULL;
+ spin_unlock(&recall_lock);
+ return dp;
}
int share_access_to_flags(u32 share_access)
@@ -2641,6 +2629,66 @@ static bool nfsd4_cb_channel_good(struct nfs4_client *clp)
return clp->cl_minorversion && clp->cl_cb_state == NFSD4_CB_UNKNOWN;
}
+static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp, int flag)
+{
+ struct file_lock *fl;
+
+ fl = locks_alloc_lock();
+ if (!fl)
+ return NULL;
+ locks_init_lock(fl);
+ fl->fl_lmops = &nfsd_lease_mng_ops;
+ fl->fl_flags = FL_LEASE;
+ fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+ fl->fl_end = OFFSET_MAX;
+ fl->fl_owner = (fl_owner_t)(dp->dl_file);
+ fl->fl_pid = current->tgid;
+ return fl;
+}
+
+static int nfs4_setlease(struct nfs4_delegation *dp, int flag)
+{
+ struct nfs4_file *fp = dp->dl_file;
+ struct file_lock *fl;
+ int status;
+
+ fl = nfs4_alloc_init_lease(dp, flag);
+ if (!fl)
+ return -ENOMEM;
+ fl->fl_file = find_readable_file(fp);
+ list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
+ status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
+ if (status) {
+ list_del_init(&dp->dl_perclnt);
+ locks_free_lock(fl);
+ return -ENOMEM;
+ }
+ fp->fi_lease = fl;
+ fp->fi_deleg_file = fl->fl_file;
+ get_file(fp->fi_deleg_file);
+ atomic_set(&fp->fi_delegees, 1);
+ list_add(&dp->dl_perfile, &fp->fi_delegations);
+ return 0;
+}
+
+static int nfs4_set_delegation(struct nfs4_delegation *dp, int flag)
+{
+ struct nfs4_file *fp = dp->dl_file;
+
+ if (!fp->fi_lease)
+ return nfs4_setlease(dp, flag);
+ spin_lock(&recall_lock);
+ if (fp->fi_had_conflict) {
+ spin_unlock(&recall_lock);
+ return -EAGAIN;
+ }
+ atomic_inc(&fp->fi_delegees);
+ list_add(&dp->dl_perfile, &fp->fi_delegations);
+ spin_unlock(&recall_lock);
+ list_add(&dp->dl_perclnt, &dp->dl_client->cl_delegations);
+ return 0;
+}
+
/*
* Attempt to hand out a delegation.
*/
@@ -2650,7 +2698,6 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
struct nfs4_delegation *dp;
struct nfs4_stateowner *sop = stp->st_stateowner;
int cb_up;
- struct file_lock *fl;
int status, flag = 0;
cb_up = nfsd4_cb_channel_good(sop->so_client);
@@ -2681,36 +2728,11 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
}
dp = alloc_init_deleg(sop->so_client, stp, fh, flag);
- if (dp == NULL) {
- flag = NFS4_OPEN_DELEGATE_NONE;
- goto out;
- }
- status = -ENOMEM;
- fl = locks_alloc_lock();
- if (!fl)
- goto out;
- locks_init_lock(fl);
- fl->fl_lmops = &nfsd_lease_mng_ops;
- fl->fl_flags = FL_LEASE;
- fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
- fl->fl_end = OFFSET_MAX;
- fl->fl_owner = (fl_owner_t)dp;
- fl->fl_file = find_readable_file(stp->st_file);
- BUG_ON(!fl->fl_file);
- fl->fl_pid = current->tgid;
- dp->dl_flock = fl;
-
- /* vfs_setlease checks to see if delegation should be handed out.
- * the lock_manager callback fl_change is used
- */
- if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
- dprintk("NFSD: setlease failed [%d], no delegation\n", status);
- dp->dl_flock = NULL;
- locks_free_lock(fl);
- unhash_delegation(dp);
- flag = NFS4_OPEN_DELEGATE_NONE;
- goto out;
- }
+ if (dp == NULL)
+ goto out_no_deleg;
+ status = nfs4_set_delegation(dp, flag);
+ if (status)
+ goto out_free;
memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
@@ -2722,6 +2744,12 @@ out:
&& open->op_delegate_type != NFS4_OPEN_DELEGATE_NONE)
dprintk("NFSD: WARNING: refusing delegation reclaim\n");
open->op_delegate_type = flag;
+ return;
+out_free:
+ nfs4_put_delegation(dp);
+out_no_deleg:
+ flag = NFS4_OPEN_DELEGATE_NONE;
+ goto out;
}
/*
@@ -2916,8 +2944,6 @@ nfs4_laundromat(void)
test_val = u;
break;
}
- dprintk("NFSD: purging unused delegation dp %p, fp %p\n",
- dp, dp->dl_flock);
list_move(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&recall_lock);
@@ -3128,7 +3154,7 @@ nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
goto out;
renew_client(dp->dl_client);
if (filpp) {
- *filpp = find_readable_file(dp->dl_file);
+ *filpp = dp->dl_file->fi_deleg_file;
BUG_ON(!*filpp);
}
} else { /* open or lock stateid */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 3074656ba7b..2d31224b07b 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -83,8 +83,6 @@ struct nfs4_delegation {
atomic_t dl_count; /* ref count */
struct nfs4_client *dl_client;
struct nfs4_file *dl_file;
- struct file *dl_vfs_file;
- struct file_lock *dl_flock;
u32 dl_type;
time_t dl_time;
/* For recall: */
@@ -379,6 +377,9 @@ struct nfs4_file {
*/
atomic_t fi_readers;
atomic_t fi_writers;
+ struct file *fi_deleg_file;
+ struct file_lock *fi_lease;
+ atomic_t fi_delegees;
struct inode *fi_inode;
u32 fi_id; /* used with stateowner->so_id
* for stateid_hashtbl hash */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 641117f2188..da1d9701f8e 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -808,7 +808,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
if (ra->p_count == 0)
frap = rap;
}
- depth = nfsdstats.ra_size*11/10;
+ depth = nfsdstats.ra_size;
if (!frap) {
spin_unlock(&rab->pb_lock);
return NULL;
@@ -1744,6 +1744,13 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
host_err = nfsd_break_lease(odentry->d_inode);
if (host_err)
goto out_drop_write;
+ if (ndentry->d_inode) {
+ host_err = nfsd_break_lease(ndentry->d_inode);
+ if (host_err)
+ goto out_drop_write;
+ }
+ if (host_err)
+ goto out_drop_write;
host_err = vfs_rename(fdir, odentry, tdir, ndentry);
if (!host_err) {
host_err = commit_metadata(tfhp);
@@ -1812,22 +1819,22 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
if (host_err)
- goto out_nfserr;
+ goto out_put;
host_err = nfsd_break_lease(rdentry->d_inode);
if (host_err)
- goto out_put;
+ goto out_drop_write;
if (type != S_IFDIR)
host_err = vfs_unlink(dirp, rdentry);
else
host_err = vfs_rmdir(dirp, rdentry);
-out_put:
- dput(rdentry);
-
if (!host_err)
host_err = commit_metadata(fhp);
-
+out_drop_write:
mnt_drop_write(fhp->fh_export->ex_path.mnt);
+out_put:
+ dput(rdentry);
+
out_nfserr:
err = nfserrno(host_err);
out:
diff --git a/fs/open.c b/fs/open.c
index e52389e1f05..5a2c6ebc22b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -790,6 +790,8 @@ struct file *nameidata_to_filp(struct nameidata *nd)
/* Pick up the filp from the open intent */
filp = nd->intent.open.file;
+ nd->intent.open.file = NULL;
+
/* Has the filesystem initialised the file for us? */
if (filp->f_path.dentry == NULL) {
path_get(&nd->path);
diff --git a/fs/proc/array.c b/fs/proc/array.c
index df2b703b9d0..7c99c1cf7e5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -353,9 +353,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
task_cap(m, task);
task_cpus_allowed(m, task);
cpuset_task_status_allowed(m, task);
-#if defined(CONFIG_S390)
- task_show_regs(m, task);
-#endif
task_context_switch_counts(m, task);
return 0;
}
diff --git a/fs/super.c b/fs/super.c
index 74e149efed8..7e9dd4cc2c0 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -177,6 +177,11 @@ void deactivate_locked_super(struct super_block *s)
struct file_system_type *fs = s->s_type;
if (atomic_dec_and_test(&s->s_active)) {
fs->kill_sb(s);
+ /*
+ * We need to call rcu_barrier so all the delayed rcu free
+ * inodes are flushed before we release the fs module.
+ */
+ rcu_barrier();
put_filesystem(fs);
put_super(s);
} else {