summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/ksm.c12
-rw-r--r--mm/memory-failure.c5
-rw-r--r--mm/memory.c4
-rw-r--r--mm/mmap.c138
-rw-r--r--mm/mremap.c7
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/rmap.c156
7 files changed, 248 insertions, 76 deletions
diff --git a/mm/ksm.c b/mm/ksm.c
index 56a0da1f997..a93f1b7f508 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1563,10 +1563,12 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
again:
hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
struct anon_vma *anon_vma = rmap_item->anon_vma;
+ struct anon_vma_chain *vmac;
struct vm_area_struct *vma;
spin_lock(&anon_vma->lock);
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+ list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+ vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
rmap_item->address >= vma->vm_end)
continue;
@@ -1614,10 +1616,12 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
again:
hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
struct anon_vma *anon_vma = rmap_item->anon_vma;
+ struct anon_vma_chain *vmac;
struct vm_area_struct *vma;
spin_lock(&anon_vma->lock);
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+ list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+ vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
rmap_item->address >= vma->vm_end)
continue;
@@ -1664,10 +1668,12 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
again:
hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
struct anon_vma *anon_vma = rmap_item->anon_vma;
+ struct anon_vma_chain *vmac;
struct vm_area_struct *vma;
spin_lock(&anon_vma->lock);
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+ list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
+ vma = vmac->vma;
if (rmap_item->address < vma->vm_start ||
rmap_item->address >= vma->vm_end)
continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 17299fd4577..d1f33516297 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -383,9 +383,12 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
if (av == NULL) /* Not actually mapped anymore */
goto out;
for_each_process (tsk) {
+ struct anon_vma_chain *vmac;
+
if (!task_early_kill(tsk))
continue;
- list_for_each_entry (vma, &av->head, anon_vma_node) {
+ list_for_each_entry(vmac, &av->head, same_anon_vma) {
+ vma = vmac->vma;
if (!page_mapped_in_vma(page, vma))
continue;
if (vma->vm_mm == tsk->mm)
diff --git a/mm/memory.c b/mm/memory.c
index 77d9f840936..dc785b438d7 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -374,7 +374,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
* Hide vma from rmap and truncate_pagecache before freeing
* pgtables
*/
- anon_vma_unlink(vma);
+ unlink_anon_vmas(vma);
unlink_file_vma(vma);
if (is_vm_hugetlb_page(vma)) {
@@ -388,7 +388,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
&& !is_vm_hugetlb_page(next)) {
vma = next;
next = vma->vm_next;
- anon_vma_unlink(vma);
+ unlink_anon_vmas(vma);
unlink_file_vma(vma);
}
free_pgd_range(tlb, addr, vma->vm_end,
diff --git a/mm/mmap.c b/mm/mmap.c
index 31656147128..6a0c15db7f6 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -437,7 +437,6 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
{
__vma_link_list(mm, vma, prev, rb_parent);
__vma_link_rb(mm, vma, rb_link, rb_parent);
- __anon_vma_link(vma);
}
static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -499,7 +498,7 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
* are necessary. The "insert" vma (if any) is to be inserted
* before we drop the necessary locks.
*/
-void vma_adjust(struct vm_area_struct *vma, unsigned long start,
+int vma_adjust(struct vm_area_struct *vma, unsigned long start,
unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
{
struct mm_struct *mm = vma->vm_mm;
@@ -542,6 +541,28 @@ again: remove_next = 1 + (end > next->vm_end);
}
}
+ /*
+ * When changing only vma->vm_end, we don't really need anon_vma lock.
+ */
+ if (vma->anon_vma && (insert || importer || start != vma->vm_start))
+ anon_vma = vma->anon_vma;
+ if (anon_vma) {
+ /*
+ * Easily overlooked: when mprotect shifts the boundary,
+ * make sure the expanding vma has anon_vma set if the
+ * shrinking vma had, to cover any anon pages imported.
+ */
+ if (importer && !importer->anon_vma) {
+ /* Block reverse map lookups until things are set up. */
+ importer->vm_flags |= VM_LOCK_RMAP;
+ if (anon_vma_clone(importer, vma)) {
+ importer->vm_flags &= ~VM_LOCK_RMAP;
+ return -ENOMEM;
+ }
+ importer->anon_vma = anon_vma;
+ }
+ }
+
if (file) {
mapping = file->f_mapping;
if (!(vma->vm_flags & VM_NONLINEAR))
@@ -567,25 +588,6 @@ again: remove_next = 1 + (end > next->vm_end);
}
}
- /*
- * When changing only vma->vm_end, we don't really need
- * anon_vma lock.
- */
- if (vma->anon_vma && (insert || importer || start != vma->vm_start))
- anon_vma = vma->anon_vma;
- if (anon_vma) {
- spin_lock(&anon_vma->lock);
- /*
- * Easily overlooked: when mprotect shifts the boundary,
- * make sure the expanding vma has anon_vma set if the
- * shrinking vma had, to cover any anon pages imported.
- */
- if (importer && !importer->anon_vma) {
- importer->anon_vma = anon_vma;
- __anon_vma_link(importer);
- }
- }
-
if (root) {
flush_dcache_mmap_lock(mapping);
vma_prio_tree_remove(vma, root);
@@ -616,8 +618,11 @@ again: remove_next = 1 + (end > next->vm_end);
__vma_unlink(mm, next, vma);
if (file)
__remove_shared_vm_struct(next, file, mapping);
- if (next->anon_vma)
- __anon_vma_merge(vma, next);
+ /*
+ * This VMA is now dead, no need for rmap to follow it.
+ * Call anon_vma_merge below, outside of i_mmap_lock.
+ */
+ next->vm_flags |= VM_LOCK_RMAP;
} else if (insert) {
/*
* split_vma has split insert from vma, and needs
@@ -627,17 +632,25 @@ again: remove_next = 1 + (end > next->vm_end);
__insert_vm_struct(mm, insert);
}
- if (anon_vma)
- spin_unlock(&anon_vma->lock);
if (mapping)
spin_unlock(&mapping->i_mmap_lock);
+ /*
+ * The current VMA has been set up. It is now safe for the
+ * rmap code to get from the pages to the ptes.
+ */
+ if (anon_vma && importer)
+ importer->vm_flags &= ~VM_LOCK_RMAP;
+
if (remove_next) {
if (file) {
fput(file);
if (next->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(mm);
}
+ /* Protected by mmap_sem and VM_LOCK_RMAP. */
+ if (next->anon_vma)
+ anon_vma_merge(vma, next);
mm->map_count--;
mpol_put(vma_policy(next));
kmem_cache_free(vm_area_cachep, next);
@@ -653,6 +666,8 @@ again: remove_next = 1 + (end > next->vm_end);
}
validate_mm(mm);
+
+ return 0;
}
/*
@@ -759,6 +774,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
{
pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
struct vm_area_struct *area, *next;
+ int err;
/*
* We later require that vma->vm_flags == vm_flags,
@@ -792,11 +808,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
is_mergeable_anon_vma(prev->anon_vma,
next->anon_vma)) {
/* cases 1, 6 */
- vma_adjust(prev, prev->vm_start,
+ err = vma_adjust(prev, prev->vm_start,
next->vm_end, prev->vm_pgoff, NULL);
} else /* cases 2, 5, 7 */
- vma_adjust(prev, prev->vm_start,
+ err = vma_adjust(prev, prev->vm_start,
end, prev->vm_pgoff, NULL);
+ if (err)
+ return NULL;
return prev;
}
@@ -808,11 +826,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
can_vma_merge_before(next, vm_flags,
anon_vma, file, pgoff+pglen)) {
if (prev && addr < prev->vm_end) /* case 4 */
- vma_adjust(prev, prev->vm_start,
+ err = vma_adjust(prev, prev->vm_start,
addr, prev->vm_pgoff, NULL);
else /* cases 3, 8 */
- vma_adjust(area, addr, next->vm_end,
+ err = vma_adjust(area, addr, next->vm_end,
next->vm_pgoff - pglen, NULL);
+ if (err)
+ return NULL;
return area;
}
@@ -1205,6 +1225,7 @@ munmap_back:
vma->vm_flags = vm_flags;
vma->vm_page_prot = vm_get_page_prot(vm_flags);
vma->vm_pgoff = pgoff;
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
if (file) {
error = -EINVAL;
@@ -1865,6 +1886,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
{
struct mempolicy *pol;
struct vm_area_struct *new;
+ int err = -ENOMEM;
if (is_vm_hugetlb_page(vma) && (addr &
~(huge_page_mask(hstate_vma(vma)))))
@@ -1872,11 +1894,13 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
if (!new)
- return -ENOMEM;
+ goto out_err;
/* most fields are the same, copy all, and then fixup */
*new = *vma;
+ INIT_LIST_HEAD(&new->anon_vma_chain);
+
if (new_below)
new->vm_end = addr;
else {
@@ -1886,11 +1910,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
pol = mpol_dup(vma_policy(vma));
if (IS_ERR(pol)) {
- kmem_cache_free(vm_area_cachep, new);
- return PTR_ERR(pol);
+ err = PTR_ERR(pol);
+ goto out_free_vma;
}
vma_set_policy(new, pol);
+ if (anon_vma_clone(new, vma))
+ goto out_free_mpol;
+
if (new->vm_file) {
get_file(new->vm_file);
if (vma->vm_flags & VM_EXECUTABLE)
@@ -1901,12 +1928,28 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
new->vm_ops->open(new);
if (new_below)
- vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
+ err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
((addr - new->vm_start) >> PAGE_SHIFT), new);
else
- vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
+ err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
- return 0;
+ /* Success. */
+ if (!err)
+ return 0;
+
+ /* Clean everything up if vma_adjust failed. */
+ new->vm_ops->close(new);
+ if (new->vm_file) {
+ if (vma->vm_flags & VM_EXECUTABLE)
+ removed_exe_file_vma(mm);
+ fput(new->vm_file);
+ }
+ out_free_mpol:
+ mpol_put(pol);
+ out_free_vma:
+ kmem_cache_free(vm_area_cachep, new);
+ out_err:
+ return err;
}
/*
@@ -2116,6 +2159,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
return -ENOMEM;
}
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
@@ -2252,10 +2296,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
if (new_vma) {
*new_vma = *vma;
pol = mpol_dup(vma_policy(vma));
- if (IS_ERR(pol)) {
- kmem_cache_free(vm_area_cachep, new_vma);
- return NULL;
- }
+ if (IS_ERR(pol))
+ goto out_free_vma;
+ INIT_LIST_HEAD(&new_vma->anon_vma_chain);
+ if (anon_vma_clone(new_vma, vma))
+ goto out_free_mempol;
vma_set_policy(new_vma, pol);
new_vma->vm_start = addr;
new_vma->vm_end = addr + len;
@@ -2271,6 +2316,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
}
}
return new_vma;
+
+ out_free_mempol:
+ mpol_put(pol);
+ out_free_vma:
+ kmem_cache_free(vm_area_cachep, new_vma);
+ return NULL;
}
/*
@@ -2348,6 +2399,7 @@ int install_special_mapping(struct mm_struct *mm,
if (unlikely(vma == NULL))
return -ENOMEM;
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
vma->vm_mm = mm;
vma->vm_start = addr;
vma->vm_end = addr + len;
@@ -2448,6 +2500,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
int mm_take_all_locks(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ struct anon_vma_chain *avc;
int ret = -EINTR;
BUG_ON(down_read_trylock(&mm->mmap_sem));
@@ -2465,7 +2518,8 @@ int mm_take_all_locks(struct mm_struct *mm)
if (signal_pending(current))
goto out_unlock;
if (vma->anon_vma)
- vm_lock_anon_vma(mm, vma->anon_vma);
+ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+ vm_lock_anon_vma(mm, avc->anon_vma);
}
ret = 0;
@@ -2520,13 +2574,15 @@ static void vm_unlock_mapping(struct address_space *mapping)
void mm_drop_all_locks(struct mm_struct *mm)
{
struct vm_area_struct *vma;
+ struct anon_vma_chain *avc;
BUG_ON(down_read_trylock(&mm->mmap_sem));
BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
for (vma = mm->mmap; vma; vma = vma->vm_next) {
if (vma->anon_vma)
- vm_unlock_anon_vma(vma->anon_vma);
+ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+ vm_unlock_anon_vma(avc->anon_vma);
if (vma->vm_file && vma->vm_file->f_mapping)
vm_unlock_mapping(vma->vm_file->f_mapping);
}
diff --git a/mm/mremap.c b/mm/mremap.c
index 4c4c803453f..e9c75efce60 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -460,8 +460,11 @@ unsigned long do_mremap(unsigned long addr,
if (vma_expandable(vma, new_len - old_len)) {
int pages = (new_len - old_len) >> PAGE_SHIFT;
- vma_adjust(vma, vma->vm_start,
- addr + new_len, vma->vm_pgoff, NULL);
+ if (vma_adjust(vma, vma->vm_start, addr + new_len,
+ vma->vm_pgoff, NULL)) {
+ ret = -ENOMEM;
+ goto out;
+ }
mm->total_vm += pages;
vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
diff --git a/mm/nommu.c b/mm/nommu.c
index 48a2ecfaf05..55727a74af9 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1209,7 +1209,7 @@ unsigned long do_mmap_pgoff(struct file *file,
region->vm_flags = vm_flags;
region->vm_pgoff = pgoff;
- INIT_LIST_HEAD(&vma->anon_vma_node);
+ INIT_LIST_HEAD(&vma->anon_vma_chain);
vma->vm_flags = vm_flags;
vma->vm_pgoff = pgoff;
diff --git a/mm/rmap.c b/mm/rmap.c
index 5cb47111f79..be34094e459 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -62,6 +62,7 @@
#include "internal.h"
static struct kmem_cache *anon_vma_cachep;
+static struct kmem_cache *anon_vma_chain_cachep;
static inline struct anon_vma *anon_vma_alloc(void)
{
@@ -73,6 +74,16 @@ void anon_vma_free(struct anon_vma *anon_vma)
kmem_cache_free(anon_vma_cachep, anon_vma);
}
+static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
+{
+ return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
+}
+
+void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
+{
+ kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
+}
+
/**
* anon_vma_prepare - attach an anon_vma to a memory region
* @vma: the memory region in question
@@ -103,18 +114,23 @@ void anon_vma_free(struct anon_vma *anon_vma)
int anon_vma_prepare(struct vm_area_struct *vma)
{
struct anon_vma *anon_vma = vma->anon_vma;
+ struct anon_vma_chain *avc;
might_sleep();
if (unlikely(!anon_vma)) {
struct mm_struct *mm = vma->vm_mm;
struct anon_vma *allocated;
+ avc = anon_vma_chain_alloc();
+ if (!avc)
+ goto out_enomem;
+
anon_vma = find_mergeable_anon_vma(vma);
allocated = NULL;
if (!anon_vma) {
anon_vma = anon_vma_alloc();
if (unlikely(!anon_vma))
- return -ENOMEM;
+ goto out_enomem_free_avc;
allocated = anon_vma;
}
spin_lock(&anon_vma->lock);
@@ -123,53 +139,113 @@ int anon_vma_prepare(struct vm_area_struct *vma)
spin_lock(&mm->page_table_lock);
if (likely(!vma->anon_vma)) {
vma->anon_vma = anon_vma;
- list_add_tail(&vma->anon_vma_node, &anon_vma->head);
+ avc->anon_vma = anon_vma;
+ avc->vma = vma;
+ list_add(&avc->same_vma, &vma->anon_vma_chain);
+ list_add(&avc->same_anon_vma, &anon_vma->head);
allocated = NULL;
}
spin_unlock(&mm->page_table_lock);
spin_unlock(&anon_vma->lock);
- if (unlikely(allocated))
+ if (unlikely(allocated)) {
anon_vma_free(allocated);
+ anon_vma_chain_free(avc);
+ }
}
return 0;
+
+ out_enomem_free_avc:
+ anon_vma_chain_free(avc);
+ out_enomem:
+ return -ENOMEM;
}
-void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next)
+static void anon_vma_chain_link(struct vm_area_struct *vma,
+ struct anon_vma_chain *avc,
+ struct anon_vma *anon_vma)
{
- BUG_ON(vma->anon_vma != next->anon_vma);
- list_del(&next->anon_vma_node);
+ avc->vma = vma;
+ avc->anon_vma = anon_vma;
+ list_add(&avc->same_vma, &vma->anon_vma_chain);
+
+ spin_lock(&anon_vma->lock);
+ list_add_tail(&avc->same_anon_vma, &anon_vma->head);
+ spin_unlock(&anon_vma->lock);
}
-void __anon_vma_link(struct vm_area_struct *vma)
+/*
+ * Attach the anon_vmas from src to dst.
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
{
- struct anon_vma *anon_vma = vma->anon_vma;
+ struct anon_vma_chain *avc, *pavc;
+
+ list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
+ avc = anon_vma_chain_alloc();
+ if (!avc)
+ goto enomem_failure;
+ anon_vma_chain_link(dst, avc, pavc->anon_vma);
+ }
+ return 0;
- if (anon_vma)
- list_add_tail(&vma->anon_vma_node, &anon_vma->head);
+ enomem_failure:
+ unlink_anon_vmas(dst);
+ return -ENOMEM;
}
-void anon_vma_link(struct vm_area_struct *vma)
+/*
+ * Attach vma to its own anon_vma, as well as to the anon_vmas that
+ * the corresponding VMA in the parent process is attached to.
+ * Returns 0 on success, non-zero on failure.
+ */
+int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
{
- struct anon_vma *anon_vma = vma->anon_vma;
+ struct anon_vma_chain *avc;
+ struct anon_vma *anon_vma;
- if (anon_vma) {
- spin_lock(&anon_vma->lock);
- list_add_tail(&vma->anon_vma_node, &anon_vma->head);
- spin_unlock(&anon_vma->lock);
- }
+ /* Don't bother if the parent process has no anon_vma here. */
+ if (!pvma->anon_vma)
+ return 0;
+
+ /*
+ * First, attach the new VMA to the parent VMA's anon_vmas,
+ * so rmap can find non-COWed pages in child processes.
+ */
+ if (anon_vma_clone(vma, pvma))
+ return -ENOMEM;
+
+ /* Then add our own anon_vma. */
+ anon_vma = anon_vma_alloc();
+ if (!anon_vma)
+ goto out_error;
+ avc = anon_vma_chain_alloc();
+ if (!avc)
+ goto out_error_free_anon_vma;
+ anon_vma_chain_link(vma, avc, anon_vma);
+ /* Mark this anon_vma as the one where our new (COWed) pages go. */
+ vma->anon_vma = anon_vma;
+
+ return 0;
+
+ out_error_free_anon_vma:
+ anon_vma_free(anon_vma);
+ out_error:
+ return -ENOMEM;
}
-void anon_vma_unlink(struct vm_area_struct *vma)
+static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
{
- struct anon_vma *anon_vma = vma->anon_vma;
+ struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
int empty;
+ /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
if (!anon_vma)
return;
spin_lock(&anon_vma->lock);
- list_del(&vma->anon_vma_node);
+ list_del(&anon_vma_chain->same_anon_vma);
/* We must garbage collect the anon_vma if it's empty */
empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
@@ -179,6 +255,18 @@ void anon_vma_unlink(struct vm_area_struct *vma)
anon_vma_free(anon_vma);
}
+void unlink_anon_vmas(struct vm_area_struct *vma)
+{
+ struct anon_vma_chain *avc, *next;
+
+ /* Unlink each anon_vma chained to the VMA. */
+ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
+ anon_vma_unlink(avc);
+ list_del(&avc->same_vma);
+ anon_vma_chain_free(avc);
+ }
+}
+
static void anon_vma_ctor(void *data)
{
struct anon_vma *anon_vma = data;
@@ -192,6 +280,7 @@ void __init anon_vma_init(void)
{
anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
+ anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
}
/*
@@ -240,6 +329,18 @@ vma_address(struct page *page, struct vm_area_struct *vma)
/* page should be within @vma mapping range */
return -EFAULT;
}
+ if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) {
+ /*
+ * This VMA is being unlinked or is not yet linked into the
+ * VMA tree. Do not try to follow this rmap. This race
+ * condition can result in page_referenced() ignoring a
+ * reference or in try_to_unmap() failing to unmap a page.
+ * The VMA cannot be freed under us because we hold the
+ * anon_vma->lock, which the munmap code takes while
+ * unlinking the anon_vmas from the VMA.
+ */
+ return -EFAULT;
+ }
return address;
}
@@ -396,7 +497,7 @@ static int page_referenced_anon(struct page *page,
{
unsigned int mapcount;
struct anon_vma *anon_vma;
- struct vm_area_struct *vma;
+ struct anon_vma_chain *avc;
int referenced = 0;
anon_vma = page_lock_anon_vma(page);
@@ -404,7 +505,8 @@ static int page_referenced_anon(struct page *page,
return referenced;
mapcount = page_mapcount(page);
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+ list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)
continue;
@@ -1025,14 +1127,15 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
{
struct anon_vma *anon_vma;
- struct vm_area_struct *vma;
+ struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
anon_vma = page_lock_anon_vma(page);
if (!anon_vma)
return ret;
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+ list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)
continue;
@@ -1223,7 +1326,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
struct vm_area_struct *, unsigned long, void *), void *arg)
{
struct anon_vma *anon_vma;
- struct vm_area_struct *vma;
+ struct anon_vma_chain *avc;
int ret = SWAP_AGAIN;
/*
@@ -1238,7 +1341,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
if (!anon_vma)
return ret;
spin_lock(&anon_vma->lock);
- list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+ list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
+ struct vm_area_struct *vma = avc->vma;
unsigned long address = vma_address(page, vma);
if (address == -EFAULT)
continue;