aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/fadvise.c11
-rw-r--r--mm/hugetlb.c42
-rw-r--r--mm/kasan/kasan.c4
-rw-r--r--mm/memcontrol.c5
-rw-r--r--mm/oom_kill.c7
-rw-r--r--mm/page_alloc.c39
-rw-r--r--mm/page_owner.c26
-rw-r--r--mm/page_poison.c8
-rw-r--r--mm/swap.c20
-rw-r--r--mm/swap_state.c5
-rw-r--r--mm/vmalloc.c9
-rw-r--r--mm/vmstat.c2
-rw-r--r--mm/z3fold.c24
13 files changed, 163 insertions, 39 deletions
diff --git a/mm/fadvise.c b/mm/fadvise.c
index b8024fa7101d..6c707bfe02fd 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -126,6 +126,17 @@ SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
*/
start_index = (offset+(PAGE_SIZE-1)) >> PAGE_SHIFT;
end_index = (endbyte >> PAGE_SHIFT);
+ if ((endbyte & ~PAGE_MASK) != ~PAGE_MASK) {
+ /* First page is tricky as 0 - 1 = -1, but pgoff_t
+ * is unsigned, so the end_index >= start_index
+ * check below would be true and we'll discard the whole
+ * file cache which is not what was asked.
+ */
+ if (end_index == 0)
+ break;
+
+ end_index--;
+ }
if (end_index >= start_index) {
unsigned long count = invalidate_mapping_pages(mapping,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d26162e81fea..388c2bb9b55c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -832,8 +832,27 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
* Only the process that called mmap() has reserves for
* private mappings.
*/
- if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
- return true;
+ if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
+ /*
+ * Like the shared case above, a hole punch or truncate
+ * could have been performed on the private mapping.
+ * Examine the value of chg to determine if reserves
+ * actually exist or were previously consumed.
+ * Very Subtle - The value of chg comes from a previous
+ * call to vma_needs_reserves(). The reserve map for
+ * private mappings has different (opposite) semantics
+ * than that of shared mappings. vma_needs_reserves()
+ * has already taken this difference in semantics into
+ * account. Therefore, the meaning of chg is the same
+ * as in the shared case above. Code could easily be
+ * combined, but keeping it separate draws attention to
+ * subtle differences.
+ */
+ if (chg)
+ return false;
+ else
+ return true;
+ }
return false;
}
@@ -1816,6 +1835,25 @@ static long __vma_reservation_common(struct hstate *h,
if (vma->vm_flags & VM_MAYSHARE)
return ret;
+ else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) {
+ /*
+ * In most cases, reserves always exist for private mappings.
+ * However, a file associated with mapping could have been
+ * hole punched or truncated after reserves were consumed.
+ * As subsequent fault on such a range will not use reserves.
+ * Subtle - The reserve map for private mappings has the
+ * opposite meaning than that of shared mappings. If NO
+ * entry is in the reserve map, it means a reservation exists.
+ * If an entry exists in the reserve map, it means the
+ * reservation has already been consumed. As a result, the
+ * return value of this routine is the opposite of the
+ * value returned from reserve map manipulation routines above.
+ */
+ if (ret)
+ return 0;
+ else
+ return 1;
+ }
else
return ret < 0 ? ret : 0;
}
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
index 18b6a2b8d183..28439acda6ec 100644
--- a/mm/kasan/kasan.c
+++ b/mm/kasan/kasan.c
@@ -763,8 +763,8 @@ static int kasan_mem_notifier(struct notifier_block *nb,
static int __init kasan_memhotplug_init(void)
{
- pr_err("WARNING: KASAN doesn't support memory hot-add\n");
- pr_err("Memory hot-add will be disabled\n");
+ pr_info("WARNING: KASAN doesn't support memory hot-add\n");
+ pr_info("Memory hot-add will be disabled\n");
hotplug_memory_notifier(kasan_mem_notifier, 0);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 925b431f3f03..75e74408cc8f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1608,7 +1608,7 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
{
- if (!current->memcg_may_oom || current->memcg_in_oom)
+ if (!current->memcg_may_oom)
return;
/*
* We are in the middle of the charge context here, so we
@@ -2896,6 +2896,7 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
* ordering is imposed by list_lru_node->lock taken by
* memcg_drain_all_list_lrus().
*/
+ rcu_read_lock(); /* can be called from css_free w/o cgroup_mutex */
css_for_each_descendant_pre(css, &memcg->css) {
child = mem_cgroup_from_css(css);
BUG_ON(child->kmemcg_id != kmemcg_id);
@@ -2903,6 +2904,8 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
if (!memcg->use_hierarchy)
break;
}
+ rcu_read_unlock();
+
memcg_drain_all_list_lrus(kmemcg_id, parent->kmemcg_id);
memcg_free_cache_id(kmemcg_id);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index dfb1ab61fb23..acbc432d1a52 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -625,8 +625,6 @@ void try_oom_reaper(struct task_struct *tsk)
if (atomic_read(&mm->mm_users) > 1) {
rcu_read_lock();
for_each_process(p) {
- bool exiting;
-
if (!process_shares_mm(p, mm))
continue;
if (fatal_signal_pending(p))
@@ -636,10 +634,7 @@ void try_oom_reaper(struct task_struct *tsk)
* If the task is exiting make sure the whole thread group
* is exiting and cannot acces mm anymore.
*/
- spin_lock_irq(&p->sighand->siglock);
- exiting = signal_group_exit(p->signal);
- spin_unlock_irq(&p->sighand->siglock);
- if (exiting)
+ if (signal_group_exit(p->signal))
continue;
/* Give up */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f8f3bfc435ee..6903b695ebae 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -656,6 +656,9 @@ static inline void set_page_guard(struct zone *zone, struct page *page,
return;
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
+
__set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
INIT_LIST_HEAD(&page->lru);
@@ -673,6 +676,9 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
return;
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
+
__clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
set_page_private(page, 0);
@@ -2609,11 +2615,12 @@ struct page *buffered_rmqueue(struct zone *preferred_zone,
page = list_last_entry(list, struct page, lru);
else
page = list_first_entry(list, struct page, lru);
- } while (page && check_new_pcp(page));
- __dec_zone_state(zone, NR_ALLOC_BATCH);
- list_del(&page->lru);
- pcp->count--;
+ __dec_zone_state(zone, NR_ALLOC_BATCH);
+ list_del(&page->lru);
+ pcp->count--;
+
+ } while (check_new_pcp(page));
} else {
/*
* We most definitely don't want callers attempting to
@@ -3023,6 +3030,7 @@ reset_fair:
apply_fair = false;
fair_skipped = false;
reset_alloc_batches(ac->preferred_zoneref->zone);
+ z = ac->preferred_zoneref;
goto zonelist_scan;
}
@@ -3596,6 +3604,17 @@ retry:
*/
alloc_flags = gfp_to_alloc_flags(gfp_mask);
+ /*
+ * Reset the zonelist iterators if memory policies can be ignored.
+ * These allocations are high priority and system rather than user
+ * orientated.
+ */
+ if ((alloc_flags & ALLOC_NO_WATERMARKS) || !(alloc_flags & ALLOC_CPUSET)) {
+ ac->zonelist = node_zonelist(numa_node_id(), gfp_mask);
+ ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
+ ac->high_zoneidx, ac->nodemask);
+ }
+
/* This is the last chance, in general, before the goto nopage. */
page = get_page_from_freelist(gfp_mask, order,
alloc_flags & ~ALLOC_NO_WATERMARKS, ac);
@@ -3604,12 +3623,6 @@ retry:
/* Allocate without watermarks if the context allows */
if (alloc_flags & ALLOC_NO_WATERMARKS) {
- /*
- * Ignore mempolicies if ALLOC_NO_WATERMARKS on the grounds
- * the allocation is high priority and these type of
- * allocations are system rather than user orientated
- */
- ac->zonelist = node_zonelist(numa_node_id(), gfp_mask);
page = get_page_from_freelist(gfp_mask, order,
ALLOC_NO_WATERMARKS, ac);
if (page)
@@ -3808,7 +3821,11 @@ retry_cpuset:
/* Dirty zone balancing only done in the fast path */
ac.spread_dirty_pages = (gfp_mask & __GFP_WRITE);
- /* The preferred zone is used for statistics later */
+ /*
+ * The preferred zone is used for statistics but crucially it is
+ * also used as the starting point for the zonelist iterator. It
+ * may get reset for allocations that ignore memory policies.
+ */
ac.preferred_zoneref = first_zones_zonelist(ac.zonelist,
ac.high_zoneidx, ac.nodemask);
if (!ac.preferred_zoneref) {
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 792b56da13d8..c6cda3e36212 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -55,6 +55,8 @@ void __reset_page_owner(struct page *page, unsigned int order)
for (i = 0; i < (1 << order); i++) {
page_ext = lookup_page_ext(page + i);
+ if (unlikely(!page_ext))
+ continue;
__clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
}
}
@@ -62,6 +64,7 @@ void __reset_page_owner(struct page *page, unsigned int order)
void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
{
struct page_ext *page_ext = lookup_page_ext(page);
+
struct stack_trace trace = {
.nr_entries = 0,
.max_entries = ARRAY_SIZE(page_ext->trace_entries),
@@ -69,6 +72,9 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
.skip = 3,
};
+ if (unlikely(!page_ext))
+ return;
+
save_stack_trace(&trace);
page_ext->order = order;
@@ -82,6 +88,8 @@ void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
void __set_page_owner_migrate_reason(struct page *page, int reason)
{
struct page_ext *page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
page_ext->last_migrate_reason = reason;
}
@@ -89,6 +97,12 @@ void __set_page_owner_migrate_reason(struct page *page, int reason)
gfp_t __get_page_owner_gfp(struct page *page)
{
struct page_ext *page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ /*
+ * The caller just returns 0 if no valid gfp
+ * So return 0 here too.
+ */
+ return 0;
return page_ext->gfp_mask;
}
@@ -99,6 +113,9 @@ void __copy_page_owner(struct page *oldpage, struct page *newpage)
struct page_ext *new_ext = lookup_page_ext(newpage);
int i;
+ if (unlikely(!old_ext || !new_ext))
+ return;
+
new_ext->order = old_ext->order;
new_ext->gfp_mask = old_ext->gfp_mask;
new_ext->nr_entries = old_ext->nr_entries;
@@ -193,6 +210,11 @@ void __dump_page_owner(struct page *page)
gfp_t gfp_mask = page_ext->gfp_mask;
int mt = gfpflags_to_migratetype(gfp_mask);
+ if (unlikely(!page_ext)) {
+ pr_alert("There is not page extension available.\n");
+ return;
+ }
+
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags)) {
pr_alert("page_owner info is not active (free page?)\n");
return;
@@ -251,6 +273,8 @@ read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
}
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ continue;
/*
* Some pages could be missed by concurrent allocation or free,
@@ -317,6 +341,8 @@ static void init_pages_in_zone(pg_data_t *pgdat, struct zone *zone)
continue;
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ continue;
/* Maybe overraping zone */
if (test_bit(PAGE_EXT_OWNER, &page_ext->flags))
diff --git a/mm/page_poison.c b/mm/page_poison.c
index 1eae5fad2446..2e647c65916b 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -54,6 +54,9 @@ static inline void set_page_poison(struct page *page)
struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
+
__set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}
@@ -62,6 +65,9 @@ static inline void clear_page_poison(struct page *page)
struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ return;
+
__clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
}
@@ -70,7 +76,7 @@ bool page_is_poisoned(struct page *page)
struct page_ext *page_ext;
page_ext = lookup_page_ext(page);
- if (!page_ext)
+ if (unlikely(!page_ext))
return false;
return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
diff --git a/mm/swap.c b/mm/swap.c
index 95916142fc46..59f5fafa6e1f 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -667,6 +667,24 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
+/*
+ * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
+ * workqueue, aiding in getting memory freed.
+ */
+static struct workqueue_struct *lru_add_drain_wq;
+
+static int __init lru_init(void)
+{
+ lru_add_drain_wq = alloc_workqueue("lru-add-drain", WQ_MEM_RECLAIM, 0);
+
+ if (WARN(!lru_add_drain_wq,
+ "Failed to create workqueue lru_add_drain_wq"))
+ return -ENOMEM;
+
+ return 0;
+}
+early_initcall(lru_init);
+
void lru_add_drain_all(void)
{
static DEFINE_MUTEX(lock);
@@ -686,7 +704,7 @@ void lru_add_drain_all(void)
pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
- schedule_work_on(cpu, work);
+ queue_work_on(cpu, lru_add_drain_wq, work);
cpumask_set_cpu(cpu, &has_work);
}
}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 0d457e7db8d6..c99463ac02fb 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -252,7 +252,10 @@ static inline void free_swap_cache(struct page *page)
void free_page_and_swap_cache(struct page *page)
{
free_swap_cache(page);
- put_page(page);
+ if (is_huge_zero_page(page))
+ put_huge_zero_page();
+ else
+ put_page(page);
}
/*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index cf7ad1a53be0..e11475cdeb7a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1105,7 +1105,7 @@ EXPORT_SYMBOL_GPL(vm_unmap_aliases);
*/
void vm_unmap_ram(const void *mem, unsigned int count)
{
- unsigned long size = count << PAGE_SHIFT;
+ unsigned long size = (unsigned long)count << PAGE_SHIFT;
unsigned long addr = (unsigned long)mem;
BUG_ON(!addr);
@@ -1140,7 +1140,7 @@ EXPORT_SYMBOL(vm_unmap_ram);
*/
void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t prot)
{
- unsigned long size = count << PAGE_SHIFT;
+ unsigned long size = (unsigned long)count << PAGE_SHIFT;
unsigned long addr;
void *mem;
@@ -1574,14 +1574,15 @@ void *vmap(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot)
{
struct vm_struct *area;
+ unsigned long size; /* In bytes */
might_sleep();
if (count > totalram_pages)
return NULL;
- area = get_vm_area_caller((count << PAGE_SHIFT), flags,
- __builtin_return_address(0));
+ size = (unsigned long)count << PAGE_SHIFT;
+ area = get_vm_area_caller(size, flags, __builtin_return_address(0));
if (!area)
return NULL;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 77e42ef388c2..cb2a67bb4158 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1061,6 +1061,8 @@ static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
continue;
page_ext = lookup_page_ext(page);
+ if (unlikely(!page_ext))
+ continue;
if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
continue;
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 34917d55d311..8f9e89ca1d31 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -412,7 +412,7 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
/* HEADLESS page stored */
bud = HEADLESS;
} else {
- bud = (handle - zhdr->first_num) & BUDDY_MASK;
+ bud = handle_to_buddy(handle);
switch (bud) {
case FIRST:
@@ -572,15 +572,19 @@ next:
pool->pages_nr--;
spin_unlock(&pool->lock);
return 0;
- } else if (zhdr->first_chunks != 0 &&
- zhdr->last_chunks != 0 && zhdr->middle_chunks != 0) {
- /* Full, add to buddied list */
- list_add(&zhdr->buddy, &pool->buddied);
- } else if (!test_bit(PAGE_HEADLESS, &page->private)) {
- z3fold_compact_page(zhdr);
- /* add to unbuddied list */
- freechunks = num_free_chunks(zhdr);
- list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
+ } else if (!test_bit(PAGE_HEADLESS, &page->private)) {
+ if (zhdr->first_chunks != 0 &&
+ zhdr->last_chunks != 0 &&
+ zhdr->middle_chunks != 0) {
+ /* Full, add to buddied list */
+ list_add(&zhdr->buddy, &pool->buddied);
+ } else {
+ z3fold_compact_page(zhdr);
+ /* add to unbuddied list */
+ freechunks = num_free_chunks(zhdr);
+ list_add(&zhdr->buddy,
+ &pool->unbuddied[freechunks]);
+ }
}
/* add to beginning of LRU */