aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig4
-rw-r--r--mm/backing-dev.c19
-rw-r--r--mm/kmemleak.c5
-rw-r--r--mm/ksm.c1
-rw-r--r--mm/memory-failure.c59
-rw-r--r--mm/memory.c14
-rw-r--r--mm/mempolicy.c13
-rw-r--r--mm/nommu.c6
-rw-r--r--mm/page-writeback.c3
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/percpu.c35
-rw-r--r--mm/swapfile.c3
-rw-r--r--mm/vmalloc.c1
-rw-r--r--mm/vmscan.c14
14 files changed, 113 insertions, 67 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index 57963c6063d..fd3386242cf 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -67,7 +67,7 @@ config DISCONTIGMEM
config SPARSEMEM
def_bool y
- depends on SPARSEMEM_MANUAL
+ depends on (!SELECT_MEMORY_MODEL && ARCH_SPARSEMEM_ENABLE) || SPARSEMEM_MANUAL
config FLATMEM
def_bool y
@@ -129,7 +129,7 @@ config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
depends on SPARSEMEM || X86_64_ACPI_NUMA
depends on HOTPLUG && !(HIBERNATION && !S390) && ARCH_ENABLE_MEMORY_HOTPLUG
- depends on (IA64 || X86 || PPC64 || SUPERH || S390)
+ depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
comment "Memory hotplug is currently incompatible with Software Suspend"
depends on SPARSEMEM && HOTPLUG && HIBERNATION && !S390
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 3d3accb1f80..11aee09dd2a 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -92,7 +92,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
"BdiDirtyThresh: %8lu kB\n"
"DirtyThresh: %8lu kB\n"
"BackgroundThresh: %8lu kB\n"
- "WriteBack threads:%8lu\n"
+ "WritebackThreads: %8lu\n"
"b_dirty: %8lu\n"
"b_io: %8lu\n"
"b_more_io: %8lu\n"
@@ -610,9 +610,26 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
kthread_stop(wb->task);
}
+/*
+ * This bdi is going away now, make sure that no super_blocks point to it
+ */
+static void bdi_prune_sb(struct backing_dev_info *bdi)
+{
+ struct super_block *sb;
+
+ spin_lock(&sb_lock);
+ list_for_each_entry(sb, &super_blocks, s_list) {
+ if (sb->s_bdi == bdi)
+ sb->s_bdi = NULL;
+ }
+ spin_unlock(&sb_lock);
+}
+
void bdi_unregister(struct backing_dev_info *bdi)
{
if (bdi->dev) {
+ bdi_prune_sb(bdi);
+
if (!bdi_cap_flush_forker(bdi))
bdi_wb_shutdown(bdi);
bdi_debug_unregister(bdi);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 4ea4510e299..8bf765c4f58 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -833,12 +833,15 @@ static void early_alloc(struct early_log *log)
*/
rcu_read_lock();
object = create_object((unsigned long)log->ptr, log->size,
- log->min_count, GFP_KERNEL);
+ log->min_count, GFP_ATOMIC);
+ if (!object)
+ goto out;
spin_lock_irqsave(&object->lock, flags);
for (i = 0; i < log->trace_len; i++)
object->trace[i] = log->trace[i];
object->trace_len = log->trace_len;
spin_unlock_irqrestore(&object->lock, flags);
+out:
rcu_read_unlock();
}
diff --git a/mm/ksm.c b/mm/ksm.c
index bef1af4f77e..5575f8628fe 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1012,6 +1012,7 @@ static struct rmap_item *unstable_tree_search_insert(struct page *page,
struct rmap_item *tree_rmap_item;
int ret;
+ cond_resched();
tree_rmap_item = rb_entry(*new, struct rmap_item, node);
page2[0] = get_mergeable_page(tree_rmap_item);
if (!page2[0])
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 729d4b15b64..dacc6418387 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -35,6 +35,7 @@
#include <linux/mm.h>
#include <linux/page-flags.h>
#include <linux/sched.h>
+#include <linux/ksm.h>
#include <linux/rmap.h>
#include <linux/pagemap.h>
#include <linux/swap.h>
@@ -370,9 +371,6 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)
int ret = FAILED;
struct address_space *mapping;
- if (!isolate_lru_page(p))
- page_cache_release(p);
-
/*
* For anonymous pages we're done the only reference left
* should be the one m_f() holds.
@@ -498,30 +496,18 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)
*/
static int me_swapcache_dirty(struct page *p, unsigned long pfn)
{
- int ret = FAILED;
-
ClearPageDirty(p);
/* Trigger EIO in shmem: */
ClearPageUptodate(p);
- if (!isolate_lru_page(p)) {
- page_cache_release(p);
- ret = DELAYED;
- }
-
- return ret;
+ return DELAYED;
}
static int me_swapcache_clean(struct page *p, unsigned long pfn)
{
- int ret = FAILED;
-
- if (!isolate_lru_page(p)) {
- page_cache_release(p);
- ret = RECOVERED;
- }
delete_from_swap_cache(p);
- return ret;
+
+ return RECOVERED;
}
/*
@@ -611,8 +597,6 @@ static struct page_state {
{ 0, 0, "unknown page state", me_unknown },
};
-#undef lru
-
static void action_result(unsigned long pfn, char *msg, int result)
{
struct page *page = NULL;
@@ -629,13 +613,16 @@ static int page_action(struct page_state *ps, struct page *p,
unsigned long pfn, int ref)
{
int result;
+ int count;
result = ps->action(p, pfn);
action_result(pfn, ps->msg, result);
- if (page_count(p) != 1 + ref)
+
+ count = page_count(p) - 1 - ref;
+ if (count != 0)
printk(KERN_ERR
"MCE %#lx: %s page still referenced by %d users\n",
- pfn, ps->msg, page_count(p) - 1);
+ pfn, ps->msg, count);
/* Could do more checks here if page looks ok */
/*
@@ -661,12 +648,9 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
int i;
int kill = 1;
- if (PageReserved(p) || PageCompound(p) || PageSlab(p))
+ if (PageReserved(p) || PageCompound(p) || PageSlab(p) || PageKsm(p))
return;
- if (!PageLRU(p))
- lru_add_drain_all();
-
/*
* This check implies we don't kill processes if their pages
* are in the swap cache early. Those are always late kills.
@@ -738,6 +722,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
int __memory_failure(unsigned long pfn, int trapno, int ref)
{
+ unsigned long lru_flag;
struct page_state *ps;
struct page *p;
int res;
@@ -775,6 +760,24 @@ int __memory_failure(unsigned long pfn, int trapno, int ref)
}
/*
+ * We ignore non-LRU pages for good reasons.
+ * - PG_locked is only well defined for LRU pages and a few others
+ * - to avoid races with __set_page_locked()
+ * - to avoid races with __SetPageSlab*() (and more non-atomic ops)
+ * The check (unnecessarily) ignores LRU pages being isolated and
+ * walked by the page reclaim code, however that's not a big loss.
+ */
+ if (!PageLRU(p))
+ lru_add_drain_all();
+ lru_flag = p->flags & lru;
+ if (isolate_lru_page(p)) {
+ action_result(pfn, "non LRU", IGNORED);
+ put_page(p);
+ return -EBUSY;
+ }
+ page_cache_release(p);
+
+ /*
* Lock the page and wait for writeback to finish.
* It's very difficult to mess with pages currently under IO
* and in many cases impossible, so we just avoid it here.
@@ -790,7 +793,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref)
/*
* Torn down by someone else?
*/
- if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
+ if ((lru_flag & lru) && !PageSwapCache(p) && p->mapping == NULL) {
action_result(pfn, "already truncated LRU", IGNORED);
res = 0;
goto out;
@@ -798,7 +801,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref)
res = -EBUSY;
for (ps = error_states;; ps++) {
- if ((p->flags & ps->mask) == ps->res) {
+ if (((p->flags | lru_flag)& ps->mask) == ps->res) {
res = page_action(ps, p, pfn, ref);
break;
}
diff --git a/mm/memory.c b/mm/memory.c
index 7e91b5f9f69..6ab19dd4a19 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -641,6 +641,7 @@ static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
unsigned long addr, unsigned long end)
{
+ pte_t *orig_src_pte, *orig_dst_pte;
pte_t *src_pte, *dst_pte;
spinlock_t *src_ptl, *dst_ptl;
int progress = 0;
@@ -654,6 +655,8 @@ again:
src_pte = pte_offset_map_nested(src_pmd, addr);
src_ptl = pte_lockptr(src_mm, src_pmd);
spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+ orig_src_pte = src_pte;
+ orig_dst_pte = dst_pte;
arch_enter_lazy_mmu_mode();
do {
@@ -677,9 +680,9 @@ again:
arch_leave_lazy_mmu_mode();
spin_unlock(src_ptl);
- pte_unmap_nested(src_pte - 1);
+ pte_unmap_nested(orig_src_pte);
add_mm_rss(dst_mm, rss[0], rss[1]);
- pte_unmap_unlock(dst_pte - 1, dst_ptl);
+ pte_unmap_unlock(orig_dst_pte, dst_ptl);
cond_resched();
if (addr != end)
goto again;
@@ -1820,10 +1823,10 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
token = pmd_pgtable(*pmd);
do {
- err = fn(pte, token, addr, data);
+ err = fn(pte++, token, addr, data);
if (err)
break;
- } while (pte++, addr += PAGE_SIZE, addr != end);
+ } while (addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
@@ -2539,7 +2542,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
} else if (PageHWPoison(page)) {
ret = VM_FAULT_HWPOISON;
delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
- goto out;
+ goto out_release;
}
lock_page(page);
@@ -2611,6 +2614,7 @@ out_nomap:
pte_unmap_unlock(page_table, ptl);
out_page:
unlock_page(page);
+out_release:
page_cache_release(page);
return ret;
}
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 7dd9d9f8069..4545d594424 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1024,7 +1024,7 @@ static long do_mbind(unsigned long start, unsigned long len,
err = migrate_prep();
if (err)
- return err;
+ goto mpol_out;
}
{
NODEMASK_SCRATCH(scratch);
@@ -1039,10 +1039,9 @@ static long do_mbind(unsigned long start, unsigned long len,
err = -ENOMEM;
NODEMASK_SCRATCH_FREE(scratch);
}
- if (err) {
- mpol_put(new);
- return err;
- }
+ if (err)
+ goto mpol_out;
+
vma = check_range(mm, start, end, nmask,
flags | MPOL_MF_INVERT, &pagelist);
@@ -1058,9 +1057,11 @@ static long do_mbind(unsigned long start, unsigned long len,
if (!err && nr_failed && (flags & MPOL_MF_STRICT))
err = -EIO;
- }
+ } else
+ putback_lru_pages(&pagelist);
up_write(&mm->mmap_sem);
+ mpol_out:
mpol_put(new);
return err;
}
diff --git a/mm/nommu.c b/mm/nommu.c
index 5189b5aed8c..9876fa0c3ad 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1362,9 +1362,11 @@ share:
error_just_free:
up_write(&nommu_region_sem);
error:
- fput(region->vm_file);
+ if (region->vm_file)
+ fput(region->vm_file);
kmem_cache_free(vm_region_jar, region);
- fput(vma->vm_file);
+ if (vma->vm_file)
+ fput(vma->vm_file);
if (vma->vm_flags & VM_EXECUTABLE)
removed_exe_file_vma(vma->vm_mm);
kmem_cache_free(vm_area_cachep, vma);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a3b14090b1f..2c5d79236ea 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -566,7 +566,8 @@ static void balance_dirty_pages(struct address_space *mapping,
if (pages_written >= write_chunk)
break; /* We've done our duty */
- schedule_timeout_interruptible(pause);
+ __set_current_state(TASK_INTERRUPTIBLE);
+ io_schedule_timeout(pause);
/*
* Increase the delay for each loop, up to our previous
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index bf720550b44..cdcedf66161 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2183,7 +2183,7 @@ void show_free_areas(void)
printk("active_anon:%lu inactive_anon:%lu isolated_anon:%lu\n"
" active_file:%lu inactive_file:%lu isolated_file:%lu\n"
" unevictable:%lu"
- " dirty:%lu writeback:%lu unstable:%lu buffer:%lu\n"
+ " dirty:%lu writeback:%lu unstable:%lu\n"
" free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
" mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
global_page_state(NR_ACTIVE_ANON),
@@ -2196,7 +2196,6 @@ void show_free_areas(void)
global_page_state(NR_FILE_DIRTY),
global_page_state(NR_WRITEBACK),
global_page_state(NR_UNSTABLE_NFS),
- nr_blockdev_pages(),
global_page_state(NR_FREE_PAGES),
global_page_state(NR_SLAB_RECLAIMABLE),
global_page_state(NR_SLAB_UNRECLAIMABLE),
diff --git a/mm/percpu.c b/mm/percpu.c
index 4a048abad04..d90797160c2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -153,7 +153,10 @@ static int pcpu_reserved_chunk_limit;
*
* During allocation, pcpu_alloc_mutex is kept locked all the time and
* pcpu_lock is grabbed and released as necessary. All actual memory
- * allocations are done using GFP_KERNEL with pcpu_lock released.
+ * allocations are done using GFP_KERNEL with pcpu_lock released. In
+ * general, percpu memory can't be allocated with irq off but
+ * irqsave/restore are still used in alloc path so that it can be used
+ * from early init path - sched_init() specifically.
*
* Free path accesses and alters only the index data structures, so it
* can be safely called from atomic context. When memory needs to be
@@ -366,7 +369,7 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
* RETURNS:
* 0 if noop, 1 if successfully extended, -errno on failure.
*/
-static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
+static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags)
{
int new_alloc;
int *new;
@@ -376,7 +379,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
if (chunk->map_alloc >= chunk->map_used + 2)
return 0;
- spin_unlock_irq(&pcpu_lock);
+ spin_unlock_irqrestore(&pcpu_lock, *flags);
new_alloc = PCPU_DFL_MAP_ALLOC;
while (new_alloc < chunk->map_used + 2)
@@ -384,7 +387,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
new = pcpu_mem_alloc(new_alloc * sizeof(new[0]));
if (!new) {
- spin_lock_irq(&pcpu_lock);
+ spin_lock_irqsave(&pcpu_lock, *flags);
return -ENOMEM;
}
@@ -393,7 +396,7 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk)
* could have happened inbetween, so map_used couldn't have
* grown.
*/
- spin_lock_irq(&pcpu_lock);
+ spin_lock_irqsave(&pcpu_lock, *flags);
BUG_ON(new_alloc < chunk->map_used + 2);
size = chunk->map_alloc * sizeof(chunk->map[0]);
@@ -1047,6 +1050,7 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
struct pcpu_chunk *chunk;
const char *err;
int slot, off;
+ unsigned long flags;
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
WARN(true, "illegal size (%zu) or align (%zu) for "
@@ -1055,13 +1059,13 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
}
mutex_lock(&pcpu_alloc_mutex);
- spin_lock_irq(&pcpu_lock);
+ spin_lock_irqsave(&pcpu_lock, flags);
/* serve reserved allocations from the reserved chunk if available */
if (reserved && pcpu_reserved_chunk) {
chunk = pcpu_reserved_chunk;
if (size > chunk->contig_hint ||
- pcpu_extend_area_map(chunk) < 0) {
+ pcpu_extend_area_map(chunk, &flags) < 0) {
err = "failed to extend area map of reserved chunk";
goto fail_unlock;
}
@@ -1079,7 +1083,7 @@ restart:
if (size > chunk->contig_hint)
continue;
- switch (pcpu_extend_area_map(chunk)) {
+ switch (pcpu_extend_area_map(chunk, &flags)) {
case 0:
break;
case 1:
@@ -1096,7 +1100,7 @@ restart:
}
/* hmmm... no space left, create a new chunk */
- spin_unlock_irq(&pcpu_lock);
+ spin_unlock_irqrestore(&pcpu_lock, flags);
chunk = alloc_pcpu_chunk();
if (!chunk) {
@@ -1104,16 +1108,16 @@ restart:
goto fail_unlock_mutex;
}
- spin_lock_irq(&pcpu_lock);
+ spin_lock_irqsave(&pcpu_lock, flags);
pcpu_chunk_relocate(chunk, -1);
goto restart;
area_found:
- spin_unlock_irq(&pcpu_lock);
+ spin_unlock_irqrestore(&pcpu_lock, flags);
/* populate, map and clear the area */
if (pcpu_populate_chunk(chunk, off, size)) {
- spin_lock_irq(&pcpu_lock);
+ spin_lock_irqsave(&pcpu_lock, flags);
pcpu_free_area(chunk, off);
err = "failed to populate";
goto fail_unlock;
@@ -1125,7 +1129,7 @@ area_found:
return __addr_to_pcpu_ptr(chunk->base_addr + off);
fail_unlock:
- spin_unlock_irq(&pcpu_lock);
+ spin_unlock_irqrestore(&pcpu_lock, flags);
fail_unlock_mutex:
mutex_unlock(&pcpu_alloc_mutex);
if (warn_limit) {
@@ -1870,13 +1874,14 @@ int __init pcpu_embed_first_chunk(size_t reserved_size, ssize_t dyn_size,
max_distance = 0;
for (group = 0; group < ai->nr_groups; group++) {
ai->groups[group].base_offset = areas[group] - base;
- max_distance = max(max_distance, ai->groups[group].base_offset);
+ max_distance = max_t(size_t, max_distance,
+ ai->groups[group].base_offset);
}
max_distance += ai->unit_size;
/* warn if maximum distance is further than 75% of vmalloc space */
if (max_distance > (VMALLOC_END - VMALLOC_START) * 3 / 4) {
- pr_warning("PERCPU: max_distance=0x%lx too large for vmalloc "
+ pr_warning("PERCPU: max_distance=0x%zx too large for vmalloc "
"space 0x%lx\n",
max_distance, VMALLOC_END - VMALLOC_START);
#ifdef CONFIG_NEED_PER_CPU_PAGE_FIRST_CHUNK
diff --git a/mm/swapfile.c b/mm/swapfile.c
index a1bc6b9af9a..9c590eef791 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1151,8 +1151,7 @@ static int try_to_unuse(unsigned int type)
} else
retval = unuse_mm(mm, entry, page);
- if (set_start_mm &&
- swap_count(*swap_map) < swcount) {
+ if (set_start_mm && *swap_map < swcount) {
mmput(new_start_mm);
atomic_inc(&mm->mm_users);
new_start_mm = mm;
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5e7aed0802b..0f551a4a44c 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -12,6 +12,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/highmem.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/interrupt.h>
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 64e43889883..777af57fd8c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -544,6 +544,16 @@ redo:
*/
lru = LRU_UNEVICTABLE;
add_page_to_unevictable_list(page);
+ /*
+ * When racing with an mlock clearing (page is
+ * unlocked), make sure that if the other thread does
+ * not observe our setting of PG_lru and fails
+ * isolation, we see PG_mlocked cleared below and move
+ * the page back to the evictable list.
+ *
+ * The other side is TestClearPageMlocked().
+ */
+ smp_mb();
}
/*
@@ -1088,7 +1098,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan,
int lumpy_reclaim = 0;
while (unlikely(too_many_isolated(zone, file, sc))) {
- congestion_wait(WRITE, HZ/10);
+ congestion_wait(BLK_RW_ASYNC, HZ/10);
/* We are about to die and free our memory. Return now. */
if (fatal_signal_pending(current))
@@ -1356,7 +1366,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone,
* IO, plus JVM can create lots of anon VM_EXEC pages,
* so we ignore them here.
*/
- if ((vm_flags & VM_EXEC) && !PageAnon(page)) {
+ if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
list_add(&page->lru, &l_active);
continue;
}