aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 18:19:42 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 18:19:42 -0700
commit33e247c7e58d335d70ecb84fd869091e2e4b8dcb (patch)
treee8561e1993dff03f8e56d10a5795fe9d379a3390 /fs
parentd71fc239b6915a8b750e9a447311029ff45b6580 (diff)
parent452e06af1f0149b01201f94264d452cd7a95db7a (diff)
Merge branch 'akpm' (patches from Andrew)
Merge third patch-bomb from Andrew Morton: - even more of the rest of MM - lib/ updates - checkpatch updates - small changes to a few scruffy filesystems - kmod fixes/cleanups - kexec updates - a dma-mapping cleanup series from hch * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (81 commits) dma-mapping: consolidate dma_set_mask dma-mapping: consolidate dma_supported dma-mapping: cosolidate dma_mapping_error dma-mapping: consolidate dma_{alloc,free}_noncoherent dma-mapping: consolidate dma_{alloc,free}_{attrs,coherent} mm: use vma_is_anonymous() in create_huge_pmd() and wp_huge_pmd() mm: make sure all file VMAs have ->vm_ops set mm, mpx: add "vm_flags_t vm_flags" arg to do_mmap_pgoff() mm: mark most vm_operations_struct const namei: fix warning while make xmldocs caused by namei.c ipc: convert invalid scenarios to use WARN_ON zlib_deflate/deftree: remove bi_reverse() lib/decompress_unlzma: Do a NULL check for pointer lib/decompressors: use real out buf size for gunzip with kernel fs/affs: make root lookup from blkdev logical size sysctl: fix int -> unsigned long assignments in INT_MIN case kexec: export KERNEL_IMAGE_SIZE to vmcoreinfo kexec: align crash_notes allocation to make it be inside one physical page kexec: remove unnecessary test in kimage_alloc_crash_control_pages() kexec: split kexec_load syscall from kexec core code ...
Diffstat (limited to 'fs')
-rw-r--r--fs/affs/super.c8
-rw-r--r--fs/ceph/addr.c2
-rw-r--r--fs/cifs/file.c2
-rw-r--r--fs/coda/upcall.c6
-rw-r--r--fs/coredump.c46
-rw-r--r--fs/hfs/bnode.c9
-rw-r--r--fs/hfs/brec.c20
-rw-r--r--fs/hfsplus/bnode.c3
-rw-r--r--fs/namei.c2
-rw-r--r--fs/proc/base.c113
-rw-r--r--fs/proc/generic.c44
-rw-r--r--fs/proc/page.c65
-rw-r--r--fs/proc/task_mmu.c5
-rw-r--r--fs/seq_file.c42
14 files changed, 242 insertions, 125 deletions
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 3f89c9e05b40..5b50c4ca43a7 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -18,6 +18,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/writeback.h>
+#include <linux/blkdev.h>
#include "affs.h"
static int affs_statfs(struct dentry *dentry, struct kstatfs *buf);
@@ -352,18 +353,19 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
* blocks, we will have to change it.
*/
- size = sb->s_bdev->bd_inode->i_size >> 9;
+ size = i_size_read(sb->s_bdev->bd_inode) >> 9;
pr_debug("initial blocksize=%d, #blocks=%d\n", 512, size);
affs_set_blocksize(sb, PAGE_SIZE);
/* Try to find root block. Its location depends on the block size. */
- i = 512;
- j = 4096;
+ i = bdev_logical_block_size(sb->s_bdev);
+ j = PAGE_SIZE;
if (blocksize > 0) {
i = j = blocksize;
size = size / (blocksize / 512);
}
+
for (blocksize = i; blocksize <= j; blocksize <<= 1, size >>= 1) {
sbi->s_root_block = root_block;
if (root_block < 0)
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 890c50971a69..a268abfe60ac 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1593,7 +1593,7 @@ out:
return err;
}
-static struct vm_operations_struct ceph_vmops = {
+static const struct vm_operations_struct ceph_vmops = {
.fault = ceph_filemap_fault,
.page_mkwrite = ceph_page_mkwrite,
};
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 3f50cee79df9..e2a6af1508af 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -3216,7 +3216,7 @@ cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
return VM_FAULT_LOCKED;
}
-static struct vm_operations_struct cifs_file_vm_ops = {
+static const struct vm_operations_struct cifs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
.page_mkwrite = cifs_page_mkwrite,
diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c
index 9b1ffaa0572e..f6c6c8adbc01 100644
--- a/fs/coda/upcall.c
+++ b/fs/coda/upcall.c
@@ -353,7 +353,7 @@ int venus_readlink(struct super_block *sb, struct CodaFid *fid,
char *result;
insize = max_t(unsigned int,
- INSIZE(readlink), OUTSIZE(readlink)+ *length + 1);
+ INSIZE(readlink), OUTSIZE(readlink)+ *length);
UPARG(CODA_READLINK);
inp->coda_readlink.VFid = *fid;
@@ -361,8 +361,8 @@ int venus_readlink(struct super_block *sb, struct CodaFid *fid,
error = coda_upcall(coda_vcp(sb), insize, &outsize, inp);
if (!error) {
retlen = outp->coda_readlink.count;
- if ( retlen > *length )
- retlen = *length;
+ if (retlen >= *length)
+ retlen = *length - 1;
*length = retlen;
result = (char *)outp + (long)outp->coda_readlink.data;
memcpy(buffer, result, retlen);
diff --git a/fs/coredump.c b/fs/coredump.c
index c5ecde6f3eed..a8f75640ac86 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -513,10 +513,10 @@ void do_coredump(const siginfo_t *siginfo)
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
- int flag = 0;
int ispipe;
struct files_struct *displaced;
- bool need_nonrelative = false;
+ /* require nonrelative corefile path and be extra careful */
+ bool need_suid_safe = false;
bool core_dumped = false;
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
@@ -550,9 +550,8 @@ void do_coredump(const siginfo_t *siginfo)
*/
if (__get_dumpable(cprm.mm_flags) == SUID_DUMP_ROOT) {
/* Setuid core dump mode */
- flag = O_EXCL; /* Stop rewrite attacks */
cred->fsuid = GLOBAL_ROOT_UID; /* Dump root private */
- need_nonrelative = true;
+ need_suid_safe = true;
}
retval = coredump_wait(siginfo->si_signo, &core_state);
@@ -633,7 +632,7 @@ void do_coredump(const siginfo_t *siginfo)
if (cprm.limit < binfmt->min_coredump)
goto fail_unlock;
- if (need_nonrelative && cn.corename[0] != '/') {
+ if (need_suid_safe && cn.corename[0] != '/') {
printk(KERN_WARNING "Pid %d(%s) can only dump core "\
"to fully qualified path!\n",
task_tgid_vnr(current), current->comm);
@@ -641,8 +640,35 @@ void do_coredump(const siginfo_t *siginfo)
goto fail_unlock;
}
+ /*
+ * Unlink the file if it exists unless this is a SUID
+ * binary - in that case, we're running around with root
+ * privs and don't want to unlink another user's coredump.
+ */
+ if (!need_suid_safe) {
+ mm_segment_t old_fs;
+
+ old_fs = get_fs();
+ set_fs(KERNEL_DS);
+ /*
+ * If it doesn't exist, that's fine. If there's some
+ * other problem, we'll catch it at the filp_open().
+ */
+ (void) sys_unlink((const char __user *)cn.corename);
+ set_fs(old_fs);
+ }
+
+ /*
+ * There is a race between unlinking and creating the
+ * file, but if that causes an EEXIST here, that's
+ * fine - another process raced with us while creating
+ * the corefile, and the other process won. To userspace,
+ * what matters is that at least one of the two processes
+ * writes its coredump successfully, not which one.
+ */
cprm.file = filp_open(cn.corename,
- O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
+ O_CREAT | 2 | O_NOFOLLOW |
+ O_LARGEFILE | O_EXCL,
0600);
if (IS_ERR(cprm.file))
goto fail_unlock;
@@ -659,11 +685,15 @@ void do_coredump(const siginfo_t *siginfo)
if (!S_ISREG(inode->i_mode))
goto close_fail;
/*
- * Dont allow local users get cute and trick others to coredump
- * into their pre-created files.
+ * Don't dump core if the filesystem changed owner or mode
+ * of the file during file creation. This is an issue when
+ * a process dumps core while its cwd is e.g. on a vfat
+ * filesystem.
*/
if (!uid_eq(inode->i_uid, current_fsuid()))
goto close_fail;
+ if ((inode->i_mode & 0677) != 0600)
+ goto close_fail;
if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
goto close_fail;
if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index d3fa6bd9503e..221719eac5de 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -288,7 +288,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
page_cache_release(page);
goto fail;
}
- page_cache_release(page);
node->page[i] = page;
}
@@ -398,11 +397,11 @@ node_error:
void hfs_bnode_free(struct hfs_bnode *node)
{
- //int i;
+ int i;
- //for (i = 0; i < node->tree->pages_per_bnode; i++)
- // if (node->page[i])
- // page_cache_release(node->page[i]);
+ for (i = 0; i < node->tree->pages_per_bnode; i++)
+ if (node->page[i])
+ page_cache_release(node->page[i]);
kfree(node);
}
diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 9f4ee7f52026..6fc766df0461 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -131,13 +131,16 @@ skip:
hfs_bnode_write(node, entry, data_off + key_len, entry_len);
hfs_bnode_dump(node);
- if (new_node) {
- /* update parent key if we inserted a key
- * at the start of the first node
- */
- if (!rec && new_node != node)
- hfs_brec_update_parent(fd);
+ /*
+ * update parent key if we inserted a key
+ * at the start of the node and it is not the new node
+ */
+ if (!rec && new_node != node) {
+ hfs_bnode_read_key(node, fd->search_key, data_off + size);
+ hfs_brec_update_parent(fd);
+ }
+ if (new_node) {
hfs_bnode_put(fd->bnode);
if (!new_node->parent) {
hfs_btree_inc_height(tree);
@@ -166,9 +169,6 @@ skip:
goto again;
}
- if (!rec)
- hfs_brec_update_parent(fd);
-
return 0;
}
@@ -366,6 +366,8 @@ again:
if (IS_ERR(parent))
return PTR_ERR(parent);
__hfs_brec_find(parent, fd);
+ if (fd->record < 0)
+ return -ENOENT;
hfs_bnode_dump(parent);
rec = fd->record;
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 759708fd9331..63924662aaf3 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -454,7 +454,6 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
page_cache_release(page);
goto fail;
}
- page_cache_release(page);
node->page[i] = page;
}
@@ -566,13 +565,11 @@ node_error:
void hfs_bnode_free(struct hfs_bnode *node)
{
-#if 0
int i;
for (i = 0; i < node->tree->pages_per_bnode; i++)
if (node->page[i])
page_cache_release(node->page[i]);
-#endif
kfree(node);
}
diff --git a/fs/namei.c b/fs/namei.c
index 29b927938b8c..726d211db484 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2438,7 +2438,7 @@ done:
/**
* path_mountpoint - look up a path to be umounted
- * @nameidata: lookup context
+ * @nd: lookup context
* @flags: lookup flags
* @path: pointer to container for result
*
diff --git a/fs/proc/base.c b/fs/proc/base.c
index aa50d1ac28fc..b25eee4cead5 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1230,10 +1230,9 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
size_t count, loff_t *ppos)
{
struct inode * inode = file_inode(file);
- char *page, *tmp;
- ssize_t length;
uid_t loginuid;
kuid_t kloginuid;
+ int rv;
rcu_read_lock();
if (current != pid_task(proc_pid(inode), PIDTYPE_PID)) {
@@ -1242,46 +1241,28 @@ static ssize_t proc_loginuid_write(struct file * file, const char __user * buf,
}
rcu_read_unlock();
- if (count >= PAGE_SIZE)
- count = PAGE_SIZE - 1;
-
if (*ppos != 0) {
/* No partial writes. */
return -EINVAL;
}
- page = (char*)__get_free_page(GFP_TEMPORARY);
- if (!page)
- return -ENOMEM;
- length = -EFAULT;
- if (copy_from_user(page, buf, count))
- goto out_free_page;
-
- page[count] = '\0';
- loginuid = simple_strtoul(page, &tmp, 10);
- if (tmp == page) {
- length = -EINVAL;
- goto out_free_page;
- }
+ rv = kstrtou32_from_user(buf, count, 10, &loginuid);
+ if (rv < 0)
+ return rv;
/* is userspace tring to explicitly UNSET the loginuid? */
if (loginuid == AUDIT_UID_UNSET) {
kloginuid = INVALID_UID;
} else {
kloginuid = make_kuid(file->f_cred->user_ns, loginuid);
- if (!uid_valid(kloginuid)) {
- length = -EINVAL;
- goto out_free_page;
- }
+ if (!uid_valid(kloginuid))
+ return -EINVAL;
}
- length = audit_set_loginuid(kloginuid);
- if (likely(length == 0))
- length = count;
-
-out_free_page:
- free_page((unsigned long) page);
- return length;
+ rv = audit_set_loginuid(kloginuid);
+ if (rv < 0)
+ return rv;
+ return count;
}
static const struct file_operations proc_loginuid_operations = {
@@ -1335,8 +1316,9 @@ static ssize_t proc_fault_inject_write(struct file * file,
const char __user * buf, size_t count, loff_t *ppos)
{
struct task_struct *task;
- char buffer[PROC_NUMBUF], *end;
+ char buffer[PROC_NUMBUF];
int make_it_fail;
+ int rv;
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
@@ -1345,9 +1327,9 @@ static ssize_t proc_fault_inject_write(struct file * file,
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- make_it_fail = simple_strtol(strstrip(buffer), &end, 0);
- if (*end)
- return -EINVAL;
+ rv = kstrtoint(strstrip(buffer), 0, &make_it_fail);
+ if (rv < 0)
+ return rv;
if (make_it_fail < 0 || make_it_fail > 1)
return -EINVAL;
@@ -1836,8 +1818,6 @@ end_instantiate:
return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
-
/*
* dname_to_vma_addr - maps a dentry name into two unsigned longs
* which represent vma start and end addresses.
@@ -1864,11 +1844,6 @@ static int map_files_d_revalidate(struct dentry *dentry, unsigned int flags)
if (flags & LOOKUP_RCU)
return -ECHILD;
- if (!capable(CAP_SYS_ADMIN)) {
- status = -EPERM;
- goto out_notask;
- }
-
inode = d_inode(dentry);
task = get_proc_task(inode);
if (!task)
@@ -1957,6 +1932,29 @@ struct map_files_info {
unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
};
+/*
+ * Only allow CAP_SYS_ADMIN to follow the links, due to concerns about how the
+ * symlinks may be used to bypass permissions on ancestor directories in the
+ * path to the file in question.
+ */
+static const char *
+proc_map_files_follow_link(struct dentry *dentry, void **cookie)
+{
+ if (!capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EPERM);
+
+ return proc_pid_follow_link(dentry, NULL);
+}
+
+/*
+ * Identical to proc_pid_link_inode_operations except for follow_link()
+ */
+static const struct inode_operations proc_map_files_link_inode_operations = {
+ .readlink = proc_pid_readlink,
+ .follow_link = proc_map_files_follow_link,
+ .setattr = proc_setattr,
+};
+
static int
proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
@@ -1972,7 +1970,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
ei = PROC_I(inode);
ei->op.proc_get_link = proc_map_files_get_link;
- inode->i_op = &proc_pid_link_inode_operations;
+ inode->i_op = &proc_map_files_link_inode_operations;
inode->i_size = 64;
inode->i_mode = S_IFLNK;
@@ -1996,10 +1994,6 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
int result;
struct mm_struct *mm;
- result = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- goto out;
-
result = -ENOENT;
task = get_proc_task(dir);
if (!task)
@@ -2053,10 +2047,6 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx)
struct map_files_info *p;
int ret;
- ret = -EPERM;
- if (!capable(CAP_SYS_ADMIN))
- goto out;
-
ret = -ENOENT;
task = get_proc_task(file_inode(file));
if (!task)
@@ -2245,7 +2235,6 @@ static const struct file_operations proc_timers_operations = {
.llseek = seq_lseek,
.release = seq_release_private,
};
-#endif /* CONFIG_CHECKPOINT_RESTORE */
static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
@@ -2481,32 +2470,20 @@ static ssize_t proc_coredump_filter_write(struct file *file,
{
struct task_struct *task;
struct mm_struct *mm;
- char buffer[PROC_NUMBUF], *end;
unsigned int val;
int ret;
int i;
unsigned long mask;
- ret = -EFAULT;
- memset(buffer, 0, sizeof(buffer));
- if (count > sizeof(buffer) - 1)
- count = sizeof(buffer) - 1;
- if (copy_from_user(buffer, buf, count))
- goto out_no_task;
-
- ret = -EINVAL;
- val = (unsigned int)simple_strtoul(buffer, &end, 0);
- if (*end == '\n')
- end++;
- if (end - buffer == 0)
- goto out_no_task;
+ ret = kstrtouint_from_user(buf, count, 0, &val);
+ if (ret < 0)
+ return ret;
ret = -ESRCH;
task = get_proc_task(file_inode(file));
if (!task)
goto out_no_task;
- ret = end - buffer;
mm = get_task_mm(task);
if (!mm)
goto out_no_mm;
@@ -2522,7 +2499,9 @@ static ssize_t proc_coredump_filter_write(struct file *file,
out_no_mm:
put_task_struct(task);
out_no_task:
- return ret;
+ if (ret < 0)
+ return ret;
+ return count;
}
static const struct file_operations proc_coredump_filter_operations = {
@@ -2744,9 +2723,7 @@ static const struct inode_operations proc_task_inode_operations;
static const struct pid_entry tgid_base_stuff[] = {
DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
-#ifdef CONFIG_CHECKPOINT_RESTORE
DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations),
-#endif
DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
#ifdef CONFIG_NET
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index e5dee5c3188e..ff3ffc76a937 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -26,7 +26,7 @@
#include "internal.h"
-static DEFINE_SPINLOCK(proc_subdir_lock);
+static DEFINE_RWLOCK(proc_subdir_lock);
static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de)
{
@@ -172,9 +172,9 @@ static int xlate_proc_name(const char *name, struct proc_dir_entry **ret,
{
int rv;
- spin_lock(&proc_subdir_lock);
+ read_lock(&proc_subdir_lock);
rv = __xlate_proc_name(name, ret, residual);
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
return rv;
}
@@ -231,11 +231,11 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
{
struct inode *inode;
- spin_lock(&proc_subdir_lock);
+ read_lock(&proc_subdir_lock);
de = pde_subdir_find(de, dentry->d_name.name, dentry->d_name.len);
if (de) {
pde_get(de);
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
inode = proc_get_inode(dir->i_sb, de);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -243,7 +243,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir,
d_add(dentry, inode);
return NULL;
}
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
return ERR_PTR(-ENOENT);
}
@@ -270,12 +270,12 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
if (!dir_emit_dots(file, ctx))
return 0;
- spin_lock(&proc_subdir_lock);
+ read_lock(&proc_subdir_lock);
de = pde_subdir_first(de);
i = ctx->pos - 2;
for (;;) {
if (!de) {
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
return 0;
}
if (!i)
@@ -287,19 +287,19 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
do {
struct proc_dir_entry *next;
pde_get(de);
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
if (!dir_emit(ctx, de->name, de->namelen,
de->low_ino, de->mode >> 12)) {
pde_put(de);
return 0;
}
- spin_lock(&proc_subdir_lock);
+ read_lock(&proc_subdir_lock);
ctx->pos++;
next = pde_subdir_next(de);
pde_put(de);
de = next;
} while (de);
- spin_unlock(&proc_subdir_lock);
+ read_unlock(&proc_subdir_lock);
return 1;
}
@@ -338,16 +338,16 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
if (ret)
return ret;
- spin_lock(&proc_subdir_lock);
+ write_lock(&proc_subdir_lock);
dp->parent = dir;
if (pde_subdir_insert(dir, dp) == false) {
WARN(1, "proc_dir_entry '%s/%s' already registered\n",
dir->name, dp->name);
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
proc_free_inum(dp->low_ino);
return -EEXIST;
}
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
return 0;
}
@@ -549,9 +549,9 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
const char *fn = name;
unsigned int len;
- spin_lock(&proc_subdir_lock);
+ write_lock(&proc_subdir_lock);
if (__xlate_proc_name(name, &parent, &fn) != 0) {
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
return;
}
len = strlen(fn);
@@ -559,7 +559,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
de = pde_subdir_find(parent, fn, len);
if (de)
rb_erase(&de->subdir_node, &parent->subdir);
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
if (!de) {
WARN(1, "name '%s'\n", name);
return;
@@ -583,16 +583,16 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
const char *fn = name;
unsigned int len;
- spin_lock(&proc_subdir_lock);
+ write_lock(&proc_subdir_lock);
if (__xlate_proc_name(name, &parent, &fn) != 0) {
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
return -ENOENT;
}
len = strlen(fn);
root = pde_subdir_find(parent, fn, len);
if (!root) {
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
return -ENOENT;
}
rb_erase(&root->subdir_node, &parent->subdir);
@@ -605,7 +605,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
de = next;
continue;
}
- spin_unlock(&proc_subdir_lock);
+ write_unlock(&proc_subdir_lock);
proc_entry_rundown(de);
next = de->parent;
@@ -616,7 +616,7 @@ int remove_proc_subtree(const char *name, struct proc_dir_entry *parent)
break;
pde_put(de);
- spin_lock(&proc_subdir_lock);
+ write_lock(&proc_subdir_lock);
de = next;
}
pde_put(root);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 7eee2d8b97d9..93484034a03d 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -9,12 +9,16 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/hugetlb.h>
+#include <linux/memcontrol.h>
+#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
#include <linux/kernel-page-flags.h>
#include <asm/uaccess.h>
#include "internal.h"
#define KPMSIZE sizeof(u64)
#define KPMMASK (KPMSIZE - 1)
+#define KPMBITS (KPMSIZE * BITS_PER_BYTE)
/* /proc/kpagecount - an array exposing page counts
*
@@ -54,6 +58,8 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
pfn++;
out++;
count -= KPMSIZE;
+
+ cond_resched();
}
*ppos += (char __user *)out - buf;
@@ -146,6 +152,9 @@ u64 stable_page_flags(struct page *page)
if (PageBalloon(page))
u |= 1 << KPF_BALLOON;
+ if (page_is_idle(page))
+ u |= 1 << KPF_IDLE;
+
u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
@@ -212,6 +221,8 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
pfn++;
out++;
count -= KPMSIZE;
+
+ cond_resched();
}
*ppos += (char __user *)out - buf;
@@ -225,10 +236,64 @@ static const struct file_operations proc_kpageflags_operations = {
.read = kpageflags_read,
};
+#ifdef CONFIG_MEMCG
+static ssize_t kpagecgroup_read(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ u64 __user *out = (u64 __user *)buf;
+ struct page *ppage;
+ unsigned long src = *ppos;
+ unsigned long pfn;
+ ssize_t ret = 0;
+ u64 ino;
+
+ pfn = src / KPMSIZE;
+ count = min_t(unsigned long, count, (max_pfn * KPMSIZE) - src);
+ if (src & KPMMASK || count & KPMMASK)
+ return -EINVAL;
+
+ while (count > 0) {
+ if (pfn_valid(pfn))
+ ppage = pfn_to_page(pfn);
+ else
+ ppage = NULL;
+
+ if (ppage)
+ ino = page_cgroup_ino(ppage);
+ else
+ ino = 0;
+
+ if (put_user(ino, out)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ pfn++;
+ out++;
+ count -= KPMSIZE;
+
+ cond_resched();
+ }
+
+ *ppos += (char __user *)out - buf;
+ if (!ret)
+ ret = (char __user *)out - buf;
+ return ret;
+}
+
+static const struct file_operations proc_kpagecgroup_operations = {
+ .llseek = mem_lseek,
+ .read = kpagecgroup_read,
+};
+#endif /* CONFIG_MEMCG */
+
static int __init proc_page_init(void)
{
proc_create("kpagecount", S_IRUSR, NULL, &proc_kpagecount_operations);
proc_create("kpageflags", S_IRUSR, NULL, &proc_kpageflags_operations);
+#ifdef CONFIG_MEMCG
+ proc_create("kpagecgroup", S_IRUSR, NULL, &proc_kpagecgroup_operations);
+#endif
return 0;
}
fs_initcall(proc_page_init);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 41f1a50c10c9..e2d46adb54b4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -13,6 +13,7 @@
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
+#include <linux/page_idle.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -459,7 +460,7 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
mss->resident += size;
/* Accumulate the size in pages that have been accessed. */
- if (young || PageReferenced(page))
+ if (young || page_is_young(page) || PageReferenced(page))
mss->referenced += size;
mapcount = page_mapcount(page);
if (mapcount >= 2) {
@@ -807,6 +808,7 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
/* Clear accessed and referenced bits. */
pmdp_test_and_clear_young(vma, addr, pmd);
+ test_and_clear_page_young(page);
ClearPageReferenced(page);
out:
spin_unlock(ptl);
@@ -834,6 +836,7 @@ out:
/* Clear accessed and referenced bits. */
ptep_test_and_clear_young(vma, addr, pte);
+ test_and_clear_page_young(page);
ClearPageReferenced(page);
}
pte_unmap_unlock(pte - 1, ptl);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index ce9e39fd5daf..263b125dbcf4 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -12,6 +12,7 @@
#include <linux/slab.h>
#include <linux/cred.h>
#include <linux/mm.h>
+#include <linux/printk.h>
#include <asm/uaccess.h>
#include <asm/page.h>
@@ -773,6 +774,47 @@ void seq_pad(struct seq_file *m, char c)
}
EXPORT_SYMBOL(seq_pad);
+/* A complete analogue of print_hex_dump() */
+void seq_hex_dump(struct seq_file *m, const char *prefix_str, int prefix_type,
+ int rowsize, int groupsize, const void *buf, size_t len,
+ bool ascii)
+{
+ const u8 *ptr = buf;
+ int i, linelen, remaining = len;
+ int ret;
+
+ if (rowsize != 16 && rowsize != 32)
+ rowsize = 16;
+
+ for (i = 0; i < len && !seq_has_overflowed(m); i += rowsize) {
+ linelen = min(remaining, rowsize);
+ remaining -= rowsize;
+
+ switch (prefix_type) {
+ case DUMP_PREFIX_ADDRESS:
+ seq_printf(m, "%s%p: ", prefix_str, ptr + i);
+ break;
+ case DUMP_PREFIX_OFFSET:
+ seq_printf(m, "%s%.8x: ", prefix_str, i);
+ break;
+ default:
+ seq_printf(m, "%s", prefix_str);
+ break;
+ }
+
+ ret = hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize,
+ m->buf + m->count, m->size - m->count,
+ ascii);
+ if (ret >= m->size - m->count) {
+ seq_set_overflow(m);
+ } else {
+ m->count += ret;
+ seq_putc(m, '\n');
+ }
+ }
+}
+EXPORT_SYMBOL(seq_hex_dump);
+
struct list_head *seq_list_start(struct list_head *head, loff_t pos)
{
struct list_head *lh;