11 files changed, 211 insertions, 288 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7fb054ba1b60..82f14a1da542 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2769,7 +2769,7 @@ done:
 	 */
 	if (!p->leave_spinning)
 		btrfs_set_path_blocking(p);
-	if (ret < 0)
+	if (ret < 0 && !p->skip_release_on_error)
 		btrfs_release_path(p);
 	return ret;
 }
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index d6dd49b51ba8..c19444e412be 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -586,6 +586,7 @@ struct btrfs_path {
 	unsigned int skip_locking:1;
 	unsigned int leave_spinning:1;
 	unsigned int search_commit_root:1;
+	unsigned int skip_release_on_error:1;
 };
 
 /*
@@ -3406,6 +3407,10 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
 int verify_dir_item(struct btrfs_root *root,
 		    struct extent_buffer *leaf,
 		    struct btrfs_dir_item *dir_item);
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name,
+						 int name_len);
 
 /* orphan.c */
 int btrfs_insert_orphan_item(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index 79e594e341c7..6f61b9b1526f 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -21,10 +21,6 @@
 #include "hash.h"
 #include "transaction.h"
 
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len);
-
 /*
  * insert a name into a directory, doing overflow properly if there is a hash
  * collision.  data_size indicates how big the item inserted should be.  On
@@ -383,9 +379,9 @@ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
  * this walks through all the entries in a dir item and finds one
  * for a specific name.
  */
-static struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
-			      struct btrfs_path *path,
-			      const char *name, int name_len)
+struct btrfs_dir_item *btrfs_match_dir_item_name(struct btrfs_root *root,
+						 struct btrfs_path *path,
+						 const char *name, int name_len)
 {
 	struct btrfs_dir_item *dir_item;
 	unsigned long name_ptr;
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 05740b9789e4..9cf20d63cc99 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -27,6 +27,7 @@
 #include "transaction.h"
 #include "xattr.h"
 #include "disk-io.h"
+#include "locking.h"
 
 
 ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
@@ -89,7 +90,7 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 		       struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
-	struct btrfs_dir_item *di;
+	struct btrfs_dir_item *di = NULL;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path;
 	size_t name_len = strlen(name);
@@ -101,84 +102,128 @@ static int do_setxattr(struct btrfs_trans_handle *trans,
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
+	path->skip_release_on_error = 1;
+
+	if (!value) {
+		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
+					name, name_len, -1);
+		if (!di && (flags & XATTR_REPLACE))
+			ret = -ENODATA;
+		else if (di)
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+		goto out;
+	}
 
+	/*
+	 * For a replace we can't just do the insert blindly.
+	 * Do a lookup first (read-only btrfs_search_slot), and return if xattr
+	 * doesn't exist. If it exists, fall down below to the insert/replace
+	 * path - we can't race with a concurrent xattr delete, because the VFS
+	 * locks the inode's i_mutex before calling setxattr or removexattr.
+	 */
 	if (flags & XATTR_REPLACE) {
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name,
-					name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
+		if(!mutex_is_locked(&inode->i_mutex)) {
+			pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
+			       "mutex_is_locked(&inode->i_mutex)", __FILE__,
+			       __LINE__);
+			BUG();
+		}
+		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
+					name, name_len, 0);
+		if (!di) {
 			ret = -ENODATA;
 			goto out;
 		}
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
 		btrfs_release_path(path);
+		di = NULL;
+	}
 
+	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
+				      name, name_len, value, size);
+	if (ret == -EOVERFLOW) {
 		/*
-		 * remove the attribute
+		 * We have an existing item in a leaf, split_leaf couldn't
+		 * expand it. That item might have or not a dir_item that
+		 * matches our target xattr, so lets check.
 		 */
-		if (!value)
-			goto out;
-	} else {
-		di = btrfs_lookup_xattr(NULL, root, path, btrfs_ino(inode),
-					name, name_len, 0);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
+		ret = 0;
+		btrfs_assert_tree_locked(path->nodes[0]);
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		if (!di && !(flags & XATTR_REPLACE)) {
+			ret = -ENOSPC;
 			goto out;
 		}
-		if (!di && !value)
-			goto out;
-		btrfs_release_path(path);
+	} else if (ret == -EEXIST) {
+		ret = 0;
+		di = btrfs_match_dir_item_name(root, path, name, name_len);
+		if(!di) { /* logic error */
+			pr_err("BTRFS: assertion failed: %s, file: %s, line: %d",
+			       "di", __FILE__, __LINE__);
+			BUG();
+		}
+	} else if (ret) {
+		goto out;
 	}
 
-again:
-	ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode),
-				      name, name_len, value, size);
-	/*
-	 * If we're setting an xattr to a new value but the new value is say
-	 * exactly BTRFS_MAX_XATTR_SIZE, we could end up with EOVERFLOW getting
-	 * back from split_leaf.  This is because it thinks we'll be extending
-	 * the existing item size, but we're asking for enough space to add the
-	 * item itself.  So if we get EOVERFLOW just set ret to EEXIST and let
-	 * the rest of the function figure it out.
-	 */
-	if (ret == -EOVERFLOW)
+	if (di && (flags & XATTR_CREATE)) {
 		ret = -EEXIST;
+		goto out;
+	}
 
-	if (ret == -EEXIST) {
-		if (flags & XATTR_CREATE)
-			goto out;
+	if (di) {
 		/*
-		 * We can't use the path we already have since we won't have the
-		 * proper locking for a delete, so release the path and
-		 * re-lookup to delete the thing.
+		 * We're doing a replace, and it must be atomic, that is, at
+		 * any point in time we have either the old or the new xattr
+		 * value in the tree. We don't want readers (getxattr and
+		 * listxattrs) to miss a value, this is specially important
+		 * for ACLs.
 		 */
-		btrfs_release_path(path);
-		di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode),
-					name, name_len, -1);
-		if (IS_ERR(di)) {
-			ret = PTR_ERR(di);
-			goto out;
-		} else if (!di) {
-			/* Shouldn't happen but just in case... */
-			btrfs_release_path(path);
-			goto again;
+		const int slot = path->slots[0];
+		struct extent_buffer *leaf = path->nodes[0];
+		const u16 old_data_len = btrfs_dir_data_len(leaf, di);
+		const u32 item_size = btrfs_item_size_nr(leaf, slot);
+		const u32 data_size = sizeof(*di) + name_len + size;
+		struct btrfs_item *item;
+		unsigned long data_ptr;
+		char *ptr;
+
+		if (size > old_data_len) {
+			if (btrfs_leaf_free_space(root, leaf) <
+			    (size - old_data_len)) {
+				ret = -ENOSPC;
+				goto out;
+			}
 		}
 
-		ret = btrfs_delete_one_dir_name(trans, root, path, di);
-		if (ret)
-			goto out;
+		if (old_data_len + name_len + sizeof(*di) == item_size) {
+			/* No other xattrs packed in the same leaf item. */
+			if (size > old_data_len)
+				btrfs_extend_item(root, path,
+						  size - old_data_len);
+			else if (size < old_data_len)
+				btrfs_truncate_item(root, path, data_size, 1);
+		} else {
+			/* There are other xattrs packed in the same item. */
+			ret = btrfs_delete_one_dir_name(trans, root, path, di);
+			if (ret)
+				goto out;
+			btrfs_extend_item(root, path, data_size);
+		}
 
+		item = btrfs_item_nr(NULL, slot);
+		ptr = btrfs_item_ptr(leaf, slot, char);
+		ptr += btrfs_item_size(leaf, item) - data_size;
+		di = (struct btrfs_dir_item *)ptr;
+		btrfs_set_dir_data_len(leaf, di, size);
+		data_ptr = ((unsigned long)(di + 1)) + name_len;
+		write_extent_buffer(leaf, value, data_ptr, size);
+		btrfs_mark_buffer_dirty(leaf);
+	} else {
 		/*
-		 * We have a value to set, so go back and try to insert it now.
+		 * Insert, and we had space for the xattr, so path->slots[0] is
+		 * where our xattr dir_item is and btrfs_insert_xattr_item()
+		 * filled it.
 		 */
-		if (value) {
-			btrfs_release_path(path);
-			goto again;
-		}
 	}
 out:
 	btrfs_free_path(path);
diff --git a/fs/dcache.c b/fs/dcache.c
index e2800926ae05..38c4a302fab4 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1053,13 +1053,13 @@ ascend:
 		/* might go back up the wrong parent if we have had a rename. */
 		if (!locked && read_seqretry(&rename_lock, seq))
 			goto rename_retry;
-		next = child->d_child.next;
-		while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+		/* go into the first sibling still alive */
+		do {
+			next = child->d_child.next;
 			if (next == &this_parent->d_subdirs)
 				goto ascend;
 			child = list_entry(next, struct dentry, d_child);
-			next = next->next;
-		}
+		} while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
 		rcu_read_unlock();
 		goto resume;
 	}
@@ -2977,13 +2977,13 @@ ascend:
 		/* might go back up the wrong parent if we have had a rename. */
 		if (!locked && read_seqretry(&rename_lock, seq))
 			goto rename_retry;
-		next = child->d_child.next;
-		while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) {
+		/* go into the first sibling still alive */
+		do {
+			next = child->d_child.next;
 			if (next == &this_parent->d_subdirs)
 				goto ascend;
 			child = list_entry(next, struct dentry, d_child);
-			next = next->next;
-		}
+		} while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED));
 		rcu_read_unlock();
 		goto resume;
 	}
diff --git a/fs/exec.c b/fs/exec.c
index dd6aa61c8548..acbd7ac2deda 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1265,6 +1265,53 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
 	return res;
 }
 
+static void bprm_fill_uid(struct linux_binprm *bprm)
+{
+	struct inode *inode;
+	unsigned int mode;
+	kuid_t uid;
+	kgid_t gid;
+
+	/* clear any previous set[ug]id data from a previous binary */
+	bprm->cred->euid = current_euid();
+	bprm->cred->egid = current_egid();
+
+	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
+		return;
+
+	if (current->no_new_privs)
+		return;
+
+	inode = file_inode(bprm->file);
+	mode = ACCESS_ONCE(inode->i_mode);
+	if (!(mode & (S_ISUID|S_ISGID)))
+		return;
+
+	/* Be careful if suid/sgid is set */
+	mutex_lock(&inode->i_mutex);
+
+	/* reload atomically mode/uid/gid now that lock held */
+	mode = inode->i_mode;
+	uid = inode->i_uid;
+	gid = inode->i_gid;
+	mutex_unlock(&inode->i_mutex);
+
+	/* We ignore suid/sgid if there are no mappings for them in the ns */
+	if (!kuid_has_mapping(bprm->cred->user_ns, uid) ||
+		 !kgid_has_mapping(bprm->cred->user_ns, gid))
+		return;
+
+	if (mode & S_ISUID) {
+		bprm->per_clear |= PER_CLEAR_ON_SETID;
+		bprm->cred->euid = uid;
+	}
+
+	if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
+		bprm->per_clear |= PER_CLEAR_ON_SETID;
+		bprm->cred->egid = gid;
+	}
+}
+
 /* 
  * Fill the binprm structure from the inode. 
  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
@@ -1273,39 +1320,12 @@ static int check_unsafe_exec(struct linux_binprm *bprm)
  */
 int prepare_binprm(struct linux_binprm *bprm)
 {
-	umode_t mode;
-	struct inode * inode = file_inode(bprm->file);
 	int retval;
 
-	mode = inode->i_mode;
 	if (bprm->file->f_op == NULL)
 		return -EACCES;
 
-	/* clear any previous set[ug]id data from a previous binary */
-	bprm->cred->euid = current_euid();
-	bprm->cred->egid = current_egid();
-
-	if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) &&
-	    !current->no_new_privs &&
-	    kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) &&
-	    kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) {
-		/* Set-uid? */
-		if (mode & S_ISUID) {
-			bprm->per_clear |= PER_CLEAR_ON_SETID;
-			bprm->cred->euid = inode->i_uid;
-		}
-
-		/* Set-gid? */
-		/*
-		 * If setgid is set but no group execute bit then this
-		 * is a candidate for mandatory locking, not a setgid
-		 * executable.
-		 */
-		if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) {
-			bprm->per_clear |= PER_CLEAR_ON_SETID;
-			bprm->cred->egid = inode->i_gid;
-		}
-	}
+	bprm_fill_uid(bprm);
 
 	/* fill in binprm security blob */
 	retval = security_bprm_set_creds(bprm);
diff --git a/fs/file_table.c b/fs/file_table.c
index 54a34be444f9..28f02a7cbba1 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -36,8 +36,6 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-DEFINE_STATIC_LGLOCK(files_lglock);
-
 /* SLAB cache for file structures */
 static struct kmem_cache *filp_cachep __read_mostly;
 
@@ -134,7 +132,6 @@ struct file *get_empty_filp(void)
 		return ERR_PTR(error);
 	}
 
-	INIT_LIST_HEAD(&f->f_u.fu_list);
 	atomic_long_set(&f->f_count, 1);
 	rwlock_init(&f->f_owner.lock);
 	spin_lock_init(&f->f_lock);
@@ -265,18 +262,15 @@ static void __fput(struct file *file)
 	mntput(mnt);
 }
 
-static DEFINE_SPINLOCK(delayed_fput_lock);
-static LIST_HEAD(delayed_fput_list);
+static LLIST_HEAD(delayed_fput_list);
 static void delayed_fput(struct work_struct *unused)
 {
-	LIST_HEAD(head);
-	spin_lock_irq(&delayed_fput_lock);
-	list_splice_init(&delayed_fput_list, &head);
-	spin_unlock_irq(&delayed_fput_lock);
-	while (!list_empty(&head)) {
-		struct file *f = list_first_entry(&head, struct file, f_u.fu_list);
-		list_del_init(&f->f_u.fu_list);
-		__fput(f);
+	struct llist_node *node = llist_del_all(&delayed_fput_list);
+	struct llist_node *next;
+
+	for (; node; node = next) {
+		next = llist_next(node);
+		__fput(llist_entry(node, struct file, f_u.fu_llist));
 	}
 }
 
@@ -306,18 +300,15 @@ void fput(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		struct task_struct *task = current;
-		unsigned long flags;
 
-		file_sb_list_del(file);
 		if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) {
 			init_task_work(&file->f_u.fu_rcuhead, ____fput);
 			if (!task_work_add(task, &file->f_u.fu_rcuhead, true))
 				return;
 		}
-		spin_lock_irqsave(&delayed_fput_lock, flags);
-		list_add(&file->f_u.fu_list, &delayed_fput_list);
-		schedule_work(&delayed_fput_work);
-		spin_unlock_irqrestore(&delayed_fput_lock, flags);
+
+		if (llist_add(&file->f_u.fu_llist, &delayed_fput_list))
+			schedule_work(&delayed_fput_work);
 	}
 }
 
@@ -333,7 +324,6 @@ void __fput_sync(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		struct task_struct *task = current;
-		file_sb_list_del(file);
 		BUG_ON(!(task->flags & PF_KTHREAD));
 		__fput(file);
 	}
@@ -345,127 +335,10 @@ void put_filp(struct file *file)
 {
 	if (atomic_long_dec_and_test(&file->f_count)) {
 		security_file_free(file);
-		file_sb_list_del(file);
 		file_free(file);
 	}
 }
 
-static inline int file_list_cpu(struct file *file)
-{
-#ifdef CONFIG_SMP
-	return file->f_sb_list_cpu;
-#else
-	return smp_processor_id();
-#endif
-}
-
-/* helper for file_sb_list_add to reduce ifdefs */
-static inline void __file_sb_list_add(struct file *file, struct super_block *sb)
-{
-	struct list_head *list;
-#ifdef CONFIG_SMP
-	int cpu;
-	cpu = smp_processor_id();
-	file->f_sb_list_cpu = cpu;
-	list = per_cpu_ptr(sb->s_files, cpu);
-#else
-	list = &sb->s_files;
-#endif
-	list_add(&file->f_u.fu_list, list);
-}
-
-/**
- * file_sb_list_add - add a file to the sb's file list
- * @file: file to add
- * @sb: sb to add it to
- *
- * Use this function to associate a file with the superblock of the inode it
- * refers to.
- */
-void file_sb_list_add(struct file *file, struct super_block *sb)
-{
-	lg_local_lock(&files_lglock);
-	__file_sb_list_add(file, sb);
-	lg_local_unlock(&files_lglock);
-}
-
-/**
- * file_sb_list_del - remove a file from the sb's file list
- * @file: file to remove
- * @sb: sb to remove it from
- *
- * Use this function to remove a file from its superblock.
- */
-void file_sb_list_del(struct file *file)
-{
-	if (!list_empty(&file->f_u.fu_list)) {
-		lg_local_lock_cpu(&files_lglock, file_list_cpu(file));
-		list_del_init(&file->f_u.fu_list);
-		lg_local_unlock_cpu(&files_lglock, file_list_cpu(file));
-	}
-}
-
-#ifdef CONFIG_SMP
-
-/*
- * These macros iterate all files on all CPUs for a given superblock.
- * files_lglock must be held globally.
- */
-#define do_file_list_for_each_entry(__sb, __file)		\
-{								\
-	int i;							\
-	for_each_possible_cpu(i) {				\
-		struct list_head *list;				\
-		list = per_cpu_ptr((__sb)->s_files, i);		\
-		list_for_each_entry((__file), list, f_u.fu_list)
-
-#define while_file_list_for_each_entry				\
-	}							\
-}
-
-#else
-
-#define do_file_list_for_each_entry(__sb, __file)		\
-{								\
-	struct list_head *list;					\
-	list = &(sb)->s_files;					\
-	list_for_each_entry((__file), list, f_u.fu_list)
-
-#define while_file_list_for_each_entry				\
-}
-
-#endif
-
-/**
- *	mark_files_ro - mark all files read-only
- *	@sb: superblock in question
- *
- *	All files are marked read-only.  We don't care about pending
- *	delete files so this should be used in 'force' mode only.
- */
-void mark_files_ro(struct super_block *sb)
-{
-	struct file *f;
-
-	lg_global_lock(&files_lglock);
-	do_file_list_for_each_entry(sb, f) {
-		if (!S_ISREG(file_inode(f)->i_mode))
-		       continue;
-		if (!file_count(f))
-			continue;
-		if (!(f->f_mode & FMODE_WRITE))
-			continue;
-		spin_lock(&f->f_lock);
-		f->f_mode &= ~FMODE_WRITE;
-		spin_unlock(&f->f_lock);
-		if (file_check_writeable(f) != 0)
-			continue;
-		__mnt_drop_write(f->f_path.mnt);
-		file_release_write(f);
-	} while_file_list_for_each_entry;
-	lg_global_unlock(&files_lglock);
-}
-
 void __init files_init(unsigned long mempages)
 { 
 	unsigned long n;
@@ -481,6 +354,5 @@ void __init files_init(unsigned long mempages)
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
 	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
 	files_defer_init();
-	lg_lock_init(&files_lglock, "files_lglock");
 	percpu_counter_init(&nr_files, 0);
 } 
diff --git a/fs/internal.h b/fs/internal.h
index 68121584ae37..2ffa65a36ca0 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -74,9 +74,6 @@ extern void chroot_fs_refs(const struct path *, const struct path *);
 /*
  * file_table.c
  */
-extern void file_sb_list_add(struct file *f, struct super_block *sb);
-extern void file_sb_list_del(struct file *f);
-extern void mark_files_ro(struct super_block *);
 extern struct file *get_empty_filp(void);
 
 /*
diff --git a/fs/open.c b/fs/open.c
index 86092bde31f4..5f129683b7d7 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -674,7 +674,6 @@ static int do_dentry_open(struct file *f,
 	}
 
 	f->f_mapping = inode->i_mapping;
-	file_sb_list_add(f, inode->i_sb);
 
 	if (unlikely(f->f_mode & FMODE_PATH)) {
 		f->f_op = &empty_fops;
@@ -709,7 +708,6 @@ static int do_dentry_open(struct file *f,
 
 cleanup_all:
 	fops_put(f->f_op);
-	file_sb_list_del(f);
 	if (f->f_mode & FMODE_WRITE) {
 		if (!special_file(inode->i_mode)) {
 			/*
diff --git a/fs/pipe.c b/fs/pipe.c
index 0e0752ef2715..3e7ab278bb0c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -117,25 +117,27 @@ void pipe_wait(struct pipe_inode_info *pipe)
 }
 
 static int
-pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
-			int atomic)
+pipe_iov_copy_from_user(void *addr, int *offset, struct iovec *iov,
+			size_t *remaining, int atomic)
 {
 	unsigned long copy;
 
-	while (len > 0) {
+	while (*remaining > 0) {
 		while (!iov->iov_len)
 			iov++;
-		copy = min_t(unsigned long, len, iov->iov_len);
+		copy = min_t(unsigned long, *remaining, iov->iov_len);
 
 		if (atomic) {
-			if (__copy_from_user_inatomic(to, iov->iov_base, copy))
+			if (__copy_from_user_inatomic(addr + *offset,
+						      iov->iov_base, copy))
 				return -EFAULT;
 		} else {
-			if (copy_from_user(to, iov->iov_base, copy))
+			if (copy_from_user(addr + *offset,
+					   iov->iov_base, copy))
 				return -EFAULT;
 		}
-		to += copy;
-		len -= copy;
+		*offset += copy;
+		*remaining -= copy;
 		iov->iov_base += copy;
 		iov->iov_len -= copy;
 	}
@@ -143,25 +145,27 @@ pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len,
 }
 
 static int
-pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len,
-		      int atomic)
+pipe_iov_copy_to_user(struct iovec *iov, void *addr, int *offset,
+		      size_t *remaining, int atomic)
 {
 	unsigned long copy;
 
-	while (len > 0) {
+	while (*remaining > 0) {
 		while (!iov->iov_len)
 			iov++;
-		copy = min_t(unsigned long, len, iov->iov_len);
+		copy = min_t(unsigned long, *remaining, iov->iov_len);
 
 		if (atomic) {
-			if (__copy_to_user_inatomic(iov->iov_base, from, copy))
+			if (__copy_to_user_inatomic(iov->iov_base,
+						    addr + *offset, copy))
 				return -EFAULT;
 		} else {
-			if (copy_to_user(iov->iov_base, from, copy))
+			if (copy_to_user(iov->iov_base,
+					 addr + *offset, copy))
 				return -EFAULT;
 		}
-		from += copy;
-		len -= copy;
+		*offset += copy;
+		*remaining -= copy;
 		iov->iov_base += copy;
 		iov->iov_len -= copy;
 	}
@@ -395,7 +399,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 			struct pipe_buffer *buf = pipe->bufs + curbuf;
 			const struct pipe_buf_operations *ops = buf->ops;
 			void *addr;
-			size_t chars = buf->len;
+			size_t chars = buf->len, remaining;
 			int error, atomic;
 
 			if (chars > total_len)
@@ -409,9 +413,11 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,
 			}
 
 			atomic = !iov_fault_in_pages_write(iov, chars);
+			remaining = chars;
 redo:
 			addr = ops->map(pipe, buf, atomic);
-			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic);
+			error = pipe_iov_copy_to_user(iov, addr, &buf->offset,
+						      &remaining, atomic);
 			ops->unmap(pipe, buf, addr);
 			if (unlikely(error)) {
 				/*
@@ -426,7 +432,6 @@ redo:
 				break;
 			}
 			ret += chars;
-			buf->offset += chars;
 			buf->len -= chars;
 
 			/* Was it a packet buffer? Clean up and exit */
@@ -531,6 +536,7 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 		if (ops->can_merge && offset + chars <= PAGE_SIZE) {
 			int error, atomic = 1;
 			void *addr;
+			size_t remaining = chars;
 
 			error = ops->confirm(pipe, buf);
 			if (error)
@@ -539,8 +545,8 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,
 			iov_fault_in_pages_read(iov, chars);
 redo1:
 			addr = ops->map(pipe, buf, atomic);
-			error = pipe_iov_copy_from_user(offset + addr, iov,
-							chars, atomic);
+			error = pipe_iov_copy_from_user(addr, &offset, iov,
+							&remaining, atomic);
 			ops->unmap(pipe, buf, addr);
 			ret = error;
 			do_wakeup = 1;
@@ -575,6 +581,8 @@ redo1:
 			struct page *page = pipe->tmp_page;
 			char *src;
 			int error, atomic = 1;
+			int offset = 0;
+			size_t remaining;
 
 			if (!page) {
 				page = alloc_page(GFP_HIGHUSER);
@@ -595,14 +603,15 @@ redo1:
 				chars = total_len;
 
 			iov_fault_in_pages_read(iov, chars);
+			remaining = chars;
 redo2:
 			if (atomic)
 				src = kmap_atomic(page);
 			else
 				src = kmap(page);
 
-			error = pipe_iov_copy_from_user(src, iov, chars,
-							atomic);
+			error = pipe_iov_copy_from_user(src, &offset, iov,
+							&remaining, atomic);
 			if (atomic)
 				kunmap_atomic(src);
 			else
diff --git a/fs/super.c b/fs/super.c
index e028b508db25..97280e76179c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -163,19 +163,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 			s = NULL;
 			goto out;
 		}
-#ifdef CONFIG_SMP
-		s->s_files = alloc_percpu(struct list_head);
-		if (!s->s_files)
-			goto err_out;
-		else {
-			int i;
-
-			for_each_possible_cpu(i)
-				INIT_LIST_HEAD(per_cpu_ptr(s->s_files, i));
-		}
-#else
-		INIT_LIST_HEAD(&s->s_files);
-#endif
 		if (init_sb_writers(s, type))
 			goto err_out;
 		s->s_flags = flags;
@@ -225,10 +212,6 @@ out:
 	return s;
 err_out:
 	security_sb_free(s);
-#ifdef CONFIG_SMP
-	if (s->s_files)
-		free_percpu(s->s_files);
-#endif
 	destroy_sb_writers(s);
 	kfree(s);
 	s = NULL;
@@ -243,9 +226,6 @@ err_out:
  */
 static inline void destroy_super(struct super_block *s)
 {
-#ifdef CONFIG_SMP
-	free_percpu(s->s_files);
-#endif
 	destroy_sb_writers(s);
 	security_sb_free(s);
 	WARN_ON(!list_empty(&s->s_mounts));
@@ -727,7 +707,8 @@ int do_remount_sb(struct super_block *sb, int flags, void *data, int force)
 	   make sure there are no rw files opened */
 	if (remount_ro) {
 		if (force) {
-			mark_files_ro(sb);
+			sb->s_readonly_remount = 1;
+			smp_wmb();
 		} else {
 			retval = sb_prepare_remount_readonly(sb);
 			if (retval)