aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile4
-rw-r--r--mm/balloon_compaction.c4
-rw-r--r--mm/bootmem.c13
-rw-r--r--mm/compaction.c67
-rw-r--r--mm/early_ioremap.c23
-rw-r--r--mm/filemap.c9
-rw-r--r--mm/huge_memory.c6
-rw-r--r--mm/hugetlb.c68
-rw-r--r--mm/internal.h7
-rw-r--r--mm/kasan/Makefile8
-rw-r--r--mm/kasan/kasan.c572
-rw-r--r--mm/kasan/kasan.h71
-rw-r--r--mm/kasan/kasan_init.c152
-rw-r--r--mm/kasan/report.c298
-rw-r--r--mm/kmemleak.c34
-rw-r--r--mm/memblock.c2
-rw-r--r--mm/memcontrol.c11
-rw-r--r--mm/memory-failure.c17
-rw-r--r--mm/memory.c14
-rw-r--r--mm/memory_hotplug.c4
-rw-r--r--mm/mempool.c131
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmap.c55
-rw-r--r--mm/nobootmem.c4
-rw-r--r--mm/page_alloc.c77
-rw-r--r--mm/page_isolation.c8
-rw-r--r--mm/percpu.c75
-rw-r--r--mm/process_vm_access.c2
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/slab.c13
-rw-r--r--mm/slab_common.c5
-rw-r--r--mm/slub.c64
-rw-r--r--mm/swap.c33
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/vmalloc.c22
-rw-r--r--mm/vmscan.c16
36 files changed, 1682 insertions, 219 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 8405eb0023a9..3614adbf6d32 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -2,6 +2,9 @@
# Makefile for the linux memory manager.
#
+KASAN_SANITIZE_slab_common.o := n
+KASAN_SANITIZE_slub.o := n
+
mmu-y := nommu.o
mmu-$(CONFIG_MMU) := fremap.o gup.o highmem.o memory.o mincore.o \
mlock.o mmap.o mprotect.o mremap.o msync.o rmap.o \
@@ -49,6 +52,7 @@ obj-$(CONFIG_PAGE_POISONING) += debug-pagealloc.o
obj-$(CONFIG_SLAB) += slab.o
obj-$(CONFIG_SLUB) += slub.o
obj-$(CONFIG_KMEMCHECK) += kmemcheck.o
+obj-$(CONFIG_KASAN) += kasan/
obj-$(CONFIG_FAILSLAB) += failslab.o
obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_FS_XIP) += filemap_xip.o
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index fcad8322ef36..b640609bcd17 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -61,6 +61,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
bool dequeued_page;
dequeued_page = false;
+ spin_lock_irqsave(&b_dev_info->pages_lock, flags);
list_for_each_entry_safe(page, tmp, &b_dev_info->pages, lru) {
/*
* Block others from accessing the 'page' while we get around
@@ -75,15 +76,14 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
continue;
}
#endif
- spin_lock_irqsave(&b_dev_info->pages_lock, flags);
balloon_page_delete(page);
__count_vm_event(BALLOON_DEFLATE);
- spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
unlock_page(page);
dequeued_page = true;
break;
}
}
+ spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
if (!dequeued_page) {
/*
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 477be696511d..a23dd1934654 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -164,7 +164,7 @@ void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
end = PFN_DOWN(physaddr + size);
for (; cursor < end; cursor++) {
- __free_pages_bootmem(pfn_to_page(cursor), 0);
+ __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
totalram_pages++;
}
}
@@ -172,7 +172,7 @@ void __init free_bootmem_late(unsigned long physaddr, unsigned long size)
static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
{
struct page *page;
- unsigned long *map, start, end, pages, count = 0;
+ unsigned long *map, start, end, pages, cur, count = 0;
if (!bdata->node_bootmem_map)
return 0;
@@ -210,17 +210,17 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
if (IS_ALIGNED(start, BITS_PER_LONG) && vec == ~0UL) {
int order = ilog2(BITS_PER_LONG);
- __free_pages_bootmem(pfn_to_page(start), order);
+ __free_pages_bootmem(pfn_to_page(start), start, order);
count += BITS_PER_LONG;
start += BITS_PER_LONG;
} else {
- unsigned long cur = start;
+ cur = start;
start = ALIGN(start + 1, BITS_PER_LONG);
while (vec && cur != start) {
if (vec & 1) {
page = pfn_to_page(cur);
- __free_pages_bootmem(page, 0);
+ __free_pages_bootmem(page, cur, 0);
count++;
}
vec >>= 1;
@@ -229,12 +229,13 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
}
}
+ cur = bdata->node_min_pfn;
page = virt_to_page(bdata->node_bootmem_map);
pages = bdata->node_low_pfn - bdata->node_min_pfn;
pages = bootmem_bootmap_pages(pages);
count += pages;
while (pages--)
- __free_pages_bootmem(page++, 0);
+ __free_pages_bootmem(page++, cur++, 0);
bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count);
diff --git a/mm/compaction.c b/mm/compaction.c
index b47f08e159d4..be7729b706af 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -16,6 +16,7 @@
#include <linux/sysfs.h>
#include <linux/balloon_compaction.h>
#include <linux/page-isolation.h>
+#include <linux/kasan.h>
#include "internal.h"
#ifdef CONFIG_COMPACTION
@@ -59,6 +60,7 @@ static void map_pages(struct list_head *list)
list_for_each_entry(page, list, lru) {
arch_alloc_page(page, 0);
kernel_map_pages(page, 1, 1);
+ kasan_alloc_pages(page, 0);
}
}
@@ -371,6 +373,24 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
if (!valid_page)
valid_page = page;
+
+ /*
+ * For compound pages such as THP and hugetlbfs, we can save
+ * potentially a lot of iterations if we skip them at once.
+ * The check is racy, but we can consider only valid values
+ * and the only danger is skipping too much.
+ */
+ if (PageCompound(page)) {
+ unsigned int comp_order = compound_order(page);
+
+ if (likely(comp_order < MAX_ORDER)) {
+ blockpfn += (1UL << comp_order) - 1;
+ cursor += (1UL << comp_order) - 1;
+ }
+
+ goto isolate_fail;
+ }
+
if (!PageBuddy(page))
goto isolate_fail;
@@ -402,18 +422,23 @@ static unsigned long isolate_freepages_block(struct compact_control *cc,
/* Found a free page, break it into order-0 pages */
isolated = split_free_page(page);
+ if (!isolated)
+ break;
+
total_isolated += isolated;
+ cc->nr_freepages += isolated;
for (i = 0; i < isolated; i++) {
list_add(&page->lru, freelist);
page++;
}
-
- /* If a page was split, advance to the end of it */
- if (isolated) {
- blockpfn += isolated - 1;
- cursor += isolated - 1;
- continue;
+ if (!strict && cc->nr_migratepages <= cc->nr_freepages) {
+ blockpfn += isolated;
+ break;
}
+ /* Advance to the end of split page */
+ blockpfn += isolated - 1;
+ cursor += isolated - 1;
+ continue;
isolate_fail:
if (strict)
@@ -423,6 +448,16 @@ isolate_fail:
}
+ if (locked)
+ spin_unlock_irqrestore(&cc->zone->lock, flags);
+
+ /*
+ * There is a tiny chance that we have read bogus compound_order(),
+ * so be careful to not go outside of the pageblock.
+ */
+ if (unlikely(blockpfn > end_pfn))
+ blockpfn = end_pfn;
+
/* Record how far we have got within the block */
*start_pfn = blockpfn;
@@ -436,9 +471,6 @@ isolate_fail:
if (strict && blockpfn < end_pfn)
total_isolated = 0;
- if (locked)
- spin_unlock_irqrestore(&cc->zone->lock, flags);
-
/* Update the pageblock-skip if the whole pageblock was scanned */
if (blockpfn == end_pfn)
update_pageblock_skip(cc, valid_page, total_isolated, false);
@@ -784,16 +816,8 @@ isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
ISOLATE_UNEVICTABLE);
- /*
- * In case of fatal failure, release everything that might
- * have been isolated in the previous iteration, and signal
- * the failure back to caller.
- */
- if (!pfn) {
- putback_movable_pages(&cc->migratepages);
- cc->nr_migratepages = 0;
+ if (!pfn)
break;
- }
if (cc->nr_migratepages == COMPACT_CLUSTER_MAX)
break;
@@ -872,7 +896,12 @@ static void isolate_freepages(struct compact_control *cc)
/* Found a block suitable for isolating free pages from. */
isolated = isolate_freepages_block(cc, &isolate_start_pfn,
- block_end_pfn, freelist, false);
+ block_end_pfn, freelist, false);
+ /* If isolation failed early, do not continue needlessly */
+ if (!isolated && isolate_start_pfn < block_end_pfn &&
+ cc->nr_migratepages > cc->nr_freepages)
+ break;
+
nr_freepages += isolated;
/*
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index e10ccd299d66..d0d7cdbfb69f 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -15,6 +15,7 @@
#include <linux/mm.h>
#include <linux/vmalloc.h>
#include <asm/fixmap.h>
+#include <asm/early_ioremap.h>
#ifdef CONFIG_MMU
static int early_ioremap_debug __initdata;
@@ -217,6 +218,28 @@ early_memremap(resource_size_t phys_addr, unsigned long size)
return (__force void *)__early_ioremap(phys_addr, size,
FIXMAP_PAGE_NORMAL);
}
+
+#define MAX_MAP_CHUNK (NR_FIX_BTMAPS << PAGE_SHIFT)
+
+void __init copy_from_early_mem(void *dest, phys_addr_t src, unsigned long size)
+{
+ unsigned long slop, clen;
+ char *p;
+
+ while (size) {
+ slop = src & ~PAGE_MASK;
+ clen = size;
+ if (clen > MAX_MAP_CHUNK - slop)
+ clen = MAX_MAP_CHUNK - slop;
+ p = early_memremap(src & PAGE_MASK, clen + slop);
+ memcpy(dest, p + slop, clen);
+ early_memunmap(p, clen + slop);
+ dest += clen;
+ src += clen;
+ size -= clen;
+ }
+}
+
#else /* CONFIG_MMU */
void __init __iomem *
diff --git a/mm/filemap.c b/mm/filemap.c
index 9159e0454fba..98ed28f9f9f2 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2494,6 +2494,11 @@ again:
break;
}
+ if (fatal_signal_pending(current)) {
+ status = -EINTR;
+ break;
+ }
+
status = a_ops->write_begin(file, mapping, pos, bytes, flags,
&page, &fsdata);
if (unlikely(status < 0))
@@ -2531,10 +2536,6 @@ again:
written += copied;
balance_dirty_pages_ratelimited(mapping);
- if (fatal_signal_pending(current)) {
- status = -EINTR;
- break;
- }
} while (iov_iter_count(i));
return written ? written : status;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de984159cf0b..2e39d4e0ff09 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2081,10 +2081,9 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
* page fault if needed.
*/
return 0;
- if (vma->vm_ops)
+ if (vma->vm_ops || (vm_flags & VM_NO_THP))
/* khugepaged not yet working on file or special mappings */
return 0;
- VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
hend = vma->vm_end & HPAGE_PMD_MASK;
if (hstart < hend)
@@ -2407,8 +2406,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
return false;
if (is_vma_temporary_stack(vma))
return false;
- VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
- return true;
+ return !(vma->vm_flags & VM_NO_THP);
}
static void collapse_huge_page(struct mm_struct *mm,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index da8fa4e4237c..549bf5ac3d6e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -681,7 +681,7 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
#if defined(CONFIG_CMA) && defined(CONFIG_X86_64)
static void destroy_compound_gigantic_page(struct page *page,
- unsigned long order)
+ unsigned int order)
{
int i;
int nr_pages = 1 << order;
@@ -697,7 +697,7 @@ static void destroy_compound_gigantic_page(struct page *page,
__ClearPageHead(page);
}
-static void free_gigantic_page(struct page *page, unsigned order)
+static void free_gigantic_page(struct page *page, unsigned int order)
{
free_contig_range(page_to_pfn(page), 1 << order);
}
@@ -741,7 +741,7 @@ static bool zone_spans_last_pfn(const struct zone *zone,
return zone_spans_pfn(zone, last_pfn);
}
-static struct page *alloc_gigantic_page(int nid, unsigned order)
+static struct page *alloc_gigantic_page(int nid, unsigned int order)
{
unsigned long nr_pages = 1 << order;
unsigned long ret, pfn, flags;
@@ -777,7 +777,7 @@ static struct page *alloc_gigantic_page(int nid, unsigned order)
}
static void prep_new_huge_page(struct hstate *h, struct page *page, int nid);
-static void prep_compound_gigantic_page(struct page *page, unsigned long order);
+static void prep_compound_gigantic_page(struct page *page, unsigned int order);
static struct page *alloc_fresh_gigantic_page_node(struct hstate *h, int nid)
{
@@ -810,9 +810,9 @@ static int alloc_fresh_gigantic_page(struct hstate *h,
static inline bool gigantic_page_supported(void) { return true; }
#else
static inline bool gigantic_page_supported(void) { return false; }
-static inline void free_gigantic_page(struct page *page, unsigned order) { }
+static inline void free_gigantic_page(struct page *page, unsigned int order) { }
static inline void destroy_compound_gigantic_page(struct page *page,
- unsigned long order) { }
+ unsigned int order) { }
static inline int alloc_fresh_gigantic_page(struct hstate *h,
nodemask_t *nodes_allowed) { return 0; }
#endif
@@ -855,6 +855,31 @@ struct hstate *size_to_hstate(unsigned long size)
return NULL;
}
+/*
+ * Test to determine whether the hugepage is "active/in-use" (i.e. being linked
+ * to hstate->hugepage_activelist.)
+ *
+ * This function can be called for tail pages, but never returns true for them.
+ */
+bool page_huge_active(struct page *page)
+{
+ VM_BUG_ON_PAGE(!PageHuge(page), page);
+ return PageHead(page) && PagePrivate(&page[1]);
+}
+
+/* never called for tail page */
+static void set_page_huge_active(struct page *page)
+{
+ VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+ SetPagePrivate(&page[1]);
+}
+
+static void clear_page_huge_active(struct page *page)
+{
+ VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
+ ClearPagePrivate(&page[1]);
+}
+
void free_huge_page(struct page *page)
{
/*
@@ -875,6 +900,7 @@ void free_huge_page(struct page *page)
ClearPagePrivate(page);
spin_lock(&hugetlb_lock);
+ clear_page_huge_active(page);
hugetlb_cgroup_uncharge_page(hstate_index(h),
pages_per_huge_page(h), page);
if (restore_reserve)
@@ -906,7 +932,7 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
put_page(page); /* free it into the hugepage allocator */
}
-static void prep_compound_gigantic_page(struct page *page, unsigned long order)
+static void prep_compound_gigantic_page(struct page *page, unsigned int order)
{
int i;
int nr_pages = 1 << order;
@@ -1464,7 +1490,8 @@ found:
return 1;
}
-static void __init prep_compound_huge_page(struct page *page, int order)
+static void __init prep_compound_huge_page(struct page *page,
+ unsigned int order)
{
if (unlikely(order > (MAX_ORDER - 1)))
prep_compound_gigantic_page(page, order);
@@ -2171,7 +2198,7 @@ static int __init hugetlb_init(void)
module_init(hugetlb_init);
/* Should be called on processing a hugepagesz=... option */
-void __init hugetlb_add_hstate(unsigned order)
+void __init hugetlb_add_hstate(unsigned int order)
{
struct hstate *h;
unsigned long i;
@@ -2780,6 +2807,14 @@ static void unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
continue;
/*
+ * Shared VMAs have their own reserves and do not affect
+ * MAP_PRIVATE accounting but it is possible that a shared
+ * VMA is using the same page so check and skip such VMAs.
+ */
+ if (iter_vma->vm_flags & VM_MAYSHARE)
+ continue;
+
+ /*
* Unmap the page from other VMAs without their own reserves.
* They get marked to be SIGKILLed if they fault in these
* areas. This is because a future no-page fault on this VMA
@@ -2884,6 +2919,7 @@ retry_avoidcopy:
copy_user_huge_page(new_page, old_page, address, vma,
pages_per_huge_page(h));
__SetPageUptodate(new_page);
+ set_page_huge_active(new_page);
mmun_start = address & huge_page_mask(h);
mmun_end = mmun_start + huge_page_size(h);
@@ -2995,6 +3031,7 @@ retry:
}
clear_huge_page(page, address, pages_per_huge_page(h));
__SetPageUptodate(page);
+ set_page_huge_active(page);
if (vma->vm_flags & VM_MAYSHARE) {
int err;
@@ -3799,19 +3836,26 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
bool isolate_huge_page(struct page *page, struct list_head *list)
{
+ bool ret = true;
+
VM_BUG_ON_PAGE(!PageHead(page), page);
- if (!get_page_unless_zero(page))
- return false;
spin_lock(&hugetlb_lock);
+ if (!page_huge_active(page) || !get_page_unless_zero(page)) {
+ ret = false;
+ goto unlock;
+ }
+ clear_page_huge_active(page);
list_move_tail(&page->lru, list);
+unlock:
spin_unlock(&hugetlb_lock);
- return true;
+ return ret;
}
void putback_active_hugepage(struct page *page)
{
VM_BUG_ON_PAGE(!PageHead(page), page);
spin_lock(&hugetlb_lock);
+ set_page_huge_active(page);
list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
spin_unlock(&hugetlb_lock);
put_page(page);
diff --git a/mm/internal.h b/mm/internal.h
index a4f90ba7068e..858c8bf8aaa4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -133,8 +133,9 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
}
extern int __isolate_free_page(struct page *page, unsigned int order);
-extern void __free_pages_bootmem(struct page *page, unsigned int order);
-extern void prep_compound_page(struct page *page, unsigned long order);
+extern void __free_pages_bootmem(struct page *page, unsigned long pfn,
+ unsigned int order);
+extern void prep_compound_page(struct page *page, unsigned int order);
#ifdef CONFIG_MEMORY_FAILURE
extern bool is_free_buddy_page(struct page *page);
#endif
@@ -192,7 +193,7 @@ isolate_migratepages_range(struct compact_control *cc,
* page cannot be allocated or merged in parallel. Alternatively, it must
* handle invalid values gracefully, and use page_order_unsafe() below.
*/
-static inline unsigned long page_order(struct page *page)
+static inline unsigned int page_order(struct page *page)
{
/* PageBuddy() must be checked by the caller */
return page_private(page);
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
new file mode 100644
index 000000000000..64710148941e
--- /dev/null
+++ b/mm/kasan/Makefile
@@ -0,0 +1,8 @@
+KASAN_SANITIZE := n
+
+CFLAGS_REMOVE_kasan.o = -pg
+# Function splitter causes unnecessary splits in __asan_load1/__asan_store1
+# see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63533
+CFLAGS_kasan.o := $(call cc-option, -fno-conserve-stack -fno-stack-protector)
+
+obj-y := kasan.o report.o kasan_init.o
diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c
new file mode 100644
index 000000000000..8f6ff54e7440
--- /dev/null
+++ b/mm/kasan/kasan.c
@@ -0,0 +1,572 @@
+/*
+ * This file contains shadow memory manipulation code.
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
+ *
+ * Some code borrowed from https://github.com/xairy/kasan-prototype by
+ * Andrey Konovalov <adech.fo@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#define DISABLE_BRANCH_PROFILING
+
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/kmemleak.h>
+#include <linux/linkage.h>
+#include <linux/memblock.h>
+#include <linux/memory.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/vmalloc.h>
+#include <linux/kasan.h>
+
+#include "kasan.h"
+#include "../slab.h"
+
+/*
+ * Poisons the shadow memory for 'size' bytes starting from 'addr'.
+ * Memory addresses should be aligned to KASAN_SHADOW_SCALE_SIZE.
+ */
+static void kasan_poison_shadow(const void *address, size_t size, u8 value)
+{
+ void *shadow_start, *shadow_end;
+
+ shadow_start = kasan_mem_to_shadow(address);
+ shadow_end = kasan_mem_to_shadow(address + size);
+
+ memset(shadow_start, value, shadow_end - shadow_start);
+}
+
+void kasan_unpoison_shadow(const void *address, size_t size)
+{
+ kasan_poison_shadow(address, size, 0);
+
+ if (size & KASAN_SHADOW_MASK) {
+ u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size);
+ *shadow = size & KASAN_SHADOW_MASK;
+ }
+}
+
+static void __kasan_unpoison_stack(struct task_struct *task, void *sp)
+{
+ void *base = task_stack_page(task);
+ size_t size = sp - base;
+
+ kasan_unpoison_shadow(base, size);
+}
+
+/* Unpoison the entire stack for a task. */
+void kasan_unpoison_task_stack(struct task_struct *task)
+{
+ __kasan_unpoison_stack(task, task_stack_page(task) + THREAD_SIZE);
+}
+
+/* Unpoison the stack for the current task beyond a watermark sp value. */
+asmlinkage void kasan_unpoison_remaining_stack(void *sp)
+{
+ __kasan_unpoison_stack(current, sp);
+}
+
+/*
+ * All functions below always inlined so compiler could
+ * perform better optimizations in each of __asan_loadX/__assn_storeX
+ * depending on memory access size X.
+ */
+
+static __always_inline bool memory_is_poisoned_1(unsigned long addr)
+{
+ s8 shadow_value = *(s8 *)kasan_mem_to_shadow((void *)addr);
+
+ if (unlikely(shadow_value)) {
+ s8 last_accessible_byte = addr & KASAN_SHADOW_MASK;
+ return unlikely(last_accessible_byte >= shadow_value);
+ }
+
+ return false;
+}
+
+static __always_inline bool memory_is_poisoned_2(unsigned long addr)
+{
+ u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
+
+ if (unlikely(*shadow_addr)) {
+ if (memory_is_poisoned_1(addr + 1))
+ return true;
+
+ /*
+ * If single shadow byte covers 2-byte access, we don't
+ * need to do anything more. Otherwise, test the first
+ * shadow byte.
+ */
+ if (likely(((addr + 1) & KASAN_SHADOW_MASK) != 0))
+ return false;
+
+ return unlikely(*(u8 *)shadow_addr);
+ }
+
+ return false;
+}
+
+static __always_inline bool memory_is_poisoned_4(unsigned long addr)
+{
+ u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
+
+ if (unlikely(*shadow_addr)) {
+ if (memory_is_poisoned_1(addr + 3))
+ return true;
+
+ /*
+ * If single shadow byte covers 4-byte access, we don't
+ * need to do anything more. Otherwise, test the first
+ * shadow byte.
+ */
+ if (likely(((addr + 3) & KASAN_SHADOW_MASK) >= 3))
+ return false;
+
+ return unlikely(*(u8 *)shadow_addr);
+ }
+
+ return false;
+}
+
+static __always_inline bool memory_is_poisoned_8(unsigned long addr)
+{
+ u16 *shadow_addr = (u16 *)kasan_mem_to_shadow((void *)addr);
+
+ if (unlikely(*shadow_addr)) {
+ if (memory_is_poisoned_1(addr + 7))
+ return true;
+
+ /*
+ * If single shadow byte covers 8-byte access, we don't
+ * need to do anything more. Otherwise, test the first
+ * shadow byte.
+ */
+ if (likely(IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
+ return false;
+
+ return unlikely(*(u8 *)shadow_addr);
+ }
+
+ return false;
+}
+
+static __always_inline bool memory_is_poisoned_16(unsigned long addr)
+{
+ u32 *shadow_addr = (u32 *)kasan_mem_to_shadow((void *)addr);
+
+ if (unlikely(*shadow_addr)) {
+ u16 shadow_first_bytes = *(u16 *)shadow_addr;
+
+ if (unlikely(shadow_first_bytes))
+ return true;
+
+ /*
+ * If two shadow bytes covers 16-byte access, we don't
+ * need to do anything more. Otherwise, test the last
+ * shadow byte.
+ */
+ if (likely(IS_ALIGNED(addr, KASAN_SHADOW_SCALE_SIZE)))
+ return false;
+
+ return memory_is_poisoned_1(addr + 15);
+ }
+
+ return false;
+}
+
+static __always_inline unsigned long bytes_is_zero(const u8 *start,
+ size_t size)
+{
+ while (size) {
+ if (unlikely(*start))
+ return (unsigned long)start;
+ start++;
+ size--;
+ }
+
+ return 0;
+}
+
+static __always_inline unsigned long memory_is_zero(const void *start,
+ const void *end)
+{
+ unsigned int words;
+ unsigned long ret;
+ unsigned int prefix = (unsigned long)start % 8;
+
+ if (end - start <= 16)
+ return bytes_is_zero(start, end - start);
+
+ if (prefix) {
+ prefix = 8 - prefix;
+ ret = bytes_is_zero(start, prefix);
+ if (unlikely(ret))
+ return ret;
+ start += prefix;
+ }
+
+ words = (end - start) / 8;
+ while (words) {
+ if (unlikely(*(u64 *)start))
+ return bytes_is_zero(start, 8);
+ start += 8;
+ words--;
+ }
+
+ return bytes_is_zero(start, (end - start) % 8);
+}
+
+static __always_inline bool memory_is_poisoned_n(unsigned long addr,
+ size_t size)
+{
+ unsigned long ret;
+
+ ret = memory_is_zero(kasan_mem_to_shadow((void *)addr),
+ kasan_mem_to_shadow((void *)addr + size - 1) + 1);
+
+ if (unlikely(ret)) {
+ unsigned long last_byte = addr + size - 1;
+ s8 *last_shadow = (s8 *)kasan_mem_to_shadow((void *)last_byte);
+
+ if (unlikely(ret != (unsigned long)last_shadow ||
+ ((long)(last_byte & KASAN_SHADOW_MASK) >= *last_shadow)))
+ return true;
+ }
+ return false;
+}
+
+static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size)
+{
+ if (__builtin_constant_p(size)) {
+ switch (size) {
+ case 1:
+ return memory_is_poisoned_1(addr);
+ case 2:
+ return memory_is_poisoned_2(addr);
+ case 4:
+ return memory_is_poisoned_4(addr);
+ case 8:
+ return memory_is_poisoned_8(addr);
+ case 16:
+ return memory_is_poisoned_16(addr);
+ default:
+ BUILD_BUG();
+ }
+ }
+
+ return memory_is_poisoned_n(addr, size);
+}
+
+
+static __always_inline void check_memory_region(unsigned long addr,
+ size_t size, bool write)
+{
+ if (unlikely(size == 0))
+ return;
+
+ if (unlikely((void *)addr <
+ kasan_shadow_to_mem((void *)KASAN_SHADOW_START))) {
+ kasan_report(addr, size, write, _RET_IP_);
+ return;
+ }
+
+ if (likely(!memory_is_poisoned(addr, size)))
+ return;
+
+ kasan_report(addr, size, write, _RET_IP_);
+}
+
+void __asan_loadN(unsigned long addr, size_t size);
+void __asan_storeN(unsigned long addr, size_t size);
+
+#undef memset
+void *memset(void *addr, int c, size_t len)
+{
+ __asan_storeN((unsigned long)addr, len);
+
+ return __memset(addr, c, len);
+}
+
+#undef memmove
+void *memmove(void *dest, const void *src, size_t len)
+{
+ __asan_loadN((unsigned long)src, len);
+ __asan_storeN((unsigned long)dest, len);
+
+ return __memmove(dest, src, len);
+}
+
+#undef memcpy
+void *memcpy(void *dest, const void *src, size_t len)
+{
+ __asan_loadN((unsigned long)src, len);
+ __asan_storeN((unsigned long)dest, len);
+
+ return __memcpy(dest, src, len);
+}
+
+void kasan_alloc_pages(struct page *page, unsigned int order)
+{
+ if (likely(!PageHighMem(page)))
+ kasan_unpoison_shadow(page_address(page), PAGE_SIZE << order);
+}
+
+void kasan_free_pages(struct page *page, unsigned int order)
+{
+ if (likely(!PageHighMem(page)))
+ kasan_poison_shadow(page_address(page),
+ PAGE_SIZE << order,
+ KASAN_FREE_PAGE);
+}
+
+void kasan_poison_slab(struct page *page)
+{
+ kasan_poison_shadow(page_address(page),
+ PAGE_SIZE << compound_order(page),
+ KASAN_KMALLOC_REDZONE);
+}
+
+void kasan_unpoison_object_data(struct kmem_cache *cache, void *object)
+{
+ kasan_unpoison_shadow(object, cache->object_size);
+}
+
+void kasan_poison_object_data(struct kmem_cache *cache, void *object)
+{
+ kasan_poison_shadow(object,
+ round_up(cache->object_size, KASAN_SHADOW_SCALE_SIZE),
+ KASAN_KMALLOC_REDZONE);
+}
+
+void kasan_slab_alloc(struct kmem_cache *cache, void *object)
+{
+ kasan_kmalloc(cache, object, cache->object_size);
+}
+
+void kasan_slab_free(struct kmem_cache *cache, void *object)
+{
+ unsigned long size = cache->object_size;
+ unsigned long rounded_up_size = round_up(size, KASAN_SHADOW_SCALE_SIZE);
+
+ /* RCU slabs could be legally used after free within the RCU period */
+ if (unlikely(cache->flags & SLAB_DESTROY_BY_RCU))
+ return;
+
+ kasan_poison_shadow(object, rounded_up_size, KASAN_KMALLOC_FREE);
+}
+
+void kasan_kmalloc(struct kmem_cache *cache, const void *object, size_t size)
+{
+ unsigned long redzone_start;
+ unsigned long redzone_end;
+
+ if (unlikely(object == NULL))
+ return;
+
+ redzone_start = round_up((unsigned long)(object + size),
+ KASAN_SHADOW_SCALE_SIZE);
+ redzone_end = round_up((unsigned long)object + cache->object_size,
+ KASAN_SHADOW_SCALE_SIZE);
+
+ kasan_unpoison_shadow(object, size);
+ kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
+ KASAN_KMALLOC_REDZONE);
+}
+EXPORT_SYMBOL(kasan_kmalloc);
+
+void kasan_kmalloc_large(const void *ptr, size_t size)
+{
+ struct page *page;
+ unsigned long redzone_start;
+ unsigned long redzone_end;
+
+ if (unlikely(ptr == NULL))
+ return;
+
+ page = virt_to_page(ptr);
+ redzone_start = round_up((unsigned long)(ptr + size),
+ KASAN_SHADOW_SCALE_SIZE);
+ redzone_end = (unsigned long)ptr + (PAGE_SIZE << compound_order(page));
+
+ kasan_unpoison_shadow(ptr, size);
+ kasan_poison_shadow((void *)redzone_start, redzone_end - redzone_start,
+ KASAN_PAGE_REDZONE);
+}
+
+void kasan_krealloc(const void *object, size_t size)
+{
+ struct page *page;
+
+ if (unlikely(object == ZERO_SIZE_PTR))
+ return;
+
+ page = virt_to_head_page(object);
+
+ if (unlikely(!PageSlab(page)))
+ kasan_kmalloc_large(object, size);
+ else
+ kasan_kmalloc(page->slab_cache, object, size);
+}
+
+void kasan_kfree(void *ptr)
+{
+ struct page *page;
+
+ page = virt_to_head_page(ptr);
+
+ if (unlikely(!PageSlab(page)))
+ kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
+ KASAN_FREE_PAGE);
+ else
+ kasan_slab_free(page->slab_cache, ptr);
+}
+
+void kasan_kfree_large(const void *ptr)
+{
+ struct page *page = virt_to_page(ptr);
+
+ kasan_poison_shadow(ptr, PAGE_SIZE << compound_order(page),
+ KASAN_FREE_PAGE);
+}
+
+int kasan_module_alloc(void *addr, size_t size)
+{
+ void *ret;
+ size_t shadow_size;
+ unsigned long shadow_start;
+
+ shadow_start = (unsigned long)kasan_mem_to_shadow(addr);
+ shadow_size = round_up(size >> KASAN_SHADOW_SCALE_SHIFT,
+ PAGE_SIZE);
+
+ if (WARN_ON(!PAGE_ALIGNED(shadow_start)))
+ return -EINVAL;
+
+ ret = __vmalloc_node_range(shadow_size, 1, shadow_start,
+ shadow_start + shadow_size,
+ GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
+ PAGE_KERNEL, VM_NO_GUARD, NUMA_NO_NODE,
+ __builtin_return_address(0));
+
+ if (ret) {
+ find_vm_area(addr)->flags |= VM_KASAN;
+ kmemleak_ignore(ret);
+ return 0;
+ }
+
+ return -ENOMEM;
+}
+
+void kasan_free_shadow(const struct vm_struct *vm)
+{
+ if (vm->flags & VM_KASAN)
+ vfree(kasan_mem_to_shadow(vm->addr));
+}
+
+static void register_global(struct kasan_global *global)
+{
+ size_t aligned_size = round_up(global->size, KASAN_SHADOW_SCALE_SIZE);
+
+ kasan_unpoison_shadow(global->beg, global->size);
+
+ kasan_poison_shadow(global->beg + aligned_size,
+ global->size_with_redzone - aligned_size,
+ KASAN_GLOBAL_REDZONE);
+}
+
+void __asan_register_globals(struct kasan_global *globals, size_t size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ register_global(&globals[i]);
+}
+EXPORT_SYMBOL(__asan_register_globals);
+
+void __asan_unregister_globals(struct kasan_global *globals, size_t size)
+{
+}
+EXPORT_SYMBOL(__asan_unregister_globals);
+
+#define DEFINE_ASAN_LOAD_STORE(size) \
+ void __asan_load##size(unsigned long addr) \
+ { \
+ check_memory_region(addr, size, false); \
+ } \
+ EXPORT_SYMBOL(__asan_load##size); \
+ __alias(__asan_load##size) \
+ void __asan_load##size##_noabort(unsigned long); \
+ EXPORT_SYMBOL(__asan_load##size##_noabort); \
+ void __asan_store##size(unsigned long addr) \
+ { \
+ check_memory_region(addr, size, true); \
+ } \
+ EXPORT_SYMBOL(__asan_store##size); \
+ __alias(__asan_store##size) \
+ void __asan_store##size##_noabort(unsigned long); \
+ EXPORT_SYMBOL(__asan_store##size##_noabort)
+
+DEFINE_ASAN_LOAD_STORE(1);
+DEFINE_ASAN_LOAD_STORE(2);
+DEFINE_ASAN_LOAD_STORE(4);
+DEFINE_ASAN_LOAD_STORE(8);
+DEFINE_ASAN_LOAD_STORE(16);
+
+void __asan_loadN(unsigned long addr, size_t size)
+{
+ check_memory_region(addr, size, false);
+}
+EXPORT_SYMBOL(__asan_loadN);
+
+__alias(__asan_loadN)
+void __asan_loadN_noabort(unsigned long, size_t);
+EXPORT_SYMBOL(__asan_loadN_noabort);
+
+void __asan_storeN(unsigned long addr, size_t size)
+{
+ check_memory_region(addr, size, true);
+}
+EXPORT_SYMBOL(__asan_storeN);
+
+__alias(__asan_storeN)
+void __asan_storeN_noabort(unsigned long, size_t);
+EXPORT_SYMBOL(__asan_storeN_noabort);
+
+/* to shut up compiler complaints */
+void __asan_handle_no_return(void) {}
+EXPORT_SYMBOL(__asan_handle_no_return);
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+static int kasan_mem_notifier(struct notifier_block *nb,
+ unsigned long action, void *data)
+{
+ return (action == MEM_GOING_ONLINE) ? NOTIFY_BAD : NOTIFY_OK;
+}
+
+static int __init kasan_memhotplug_init(void)
+{
+ pr_err("WARNING: KASAN doesn't support memory hot-add\n");
+ pr_err("Memory hot-add will be disabled\n");
+
+ hotplug_memory_notifier(kasan_mem_notifier, 0);
+
+ return 0;
+}
+
+module_init(kasan_memhotplug_init);
+#endif
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
new file mode 100644
index 000000000000..4f6c62e5c21e
--- /dev/null
+++ b/mm/kasan/kasan.h
@@ -0,0 +1,71 @@
+#ifndef __MM_KASAN_KASAN_H
+#define __MM_KASAN_KASAN_H
+
+#include <linux/kasan.h>
+
+#define KASAN_SHADOW_SCALE_SIZE (1UL << KASAN_SHADOW_SCALE_SHIFT)
+#define KASAN_SHADOW_MASK (KASAN_SHADOW_SCALE_SIZE - 1)
+
+#define KASAN_FREE_PAGE 0xFF /* page was freed */
+#define KASAN_PAGE_REDZONE 0xFE /* redzone for kmalloc_large allocations */
+#define KASAN_KMALLOC_REDZONE 0xFC /* redzone inside slub object */
+#define KASAN_KMALLOC_FREE 0xFB /* object was freed (kmem_cache_free/kfree) */
+#define KASAN_GLOBAL_REDZONE 0xFA /* redzone for global variable */
+
+/*
+ * Stack redzone shadow values
+ * (Those are compiler's ABI, don't change them)
+ */
+#define KASAN_STACK_LEFT 0xF1
+#define KASAN_STACK_MID 0xF2
+#define KASAN_STACK_RIGHT 0xF3
+#define KASAN_STACK_PARTIAL 0xF4
+
+/* Don't break randconfig/all*config builds */
+#ifndef KASAN_ABI_VERSION
+#define KASAN_ABI_VERSION 1
+#endif
+
+struct kasan_access_info {
+ const void *access_addr;
+ const void *first_bad_addr;
+ size_t access_size;
+ bool is_write;
+ unsigned long ip;
+};
+
+/* The layout of struct dictated by compiler */
+struct kasan_source_location {
+ const char *filename;
+ int line_no;
+ int column_no;
+};
+
+/* The layout of struct dictated by compiler */
+struct kasan_global {
+ const void *beg; /* Address of the beginning of the global variable. */
+ size_t size; /* Size of the global variable. */
+ size_t size_with_redzone; /* Size of the variable + size of the red zone. 32 bytes aligned */
+ const void *name;
+ const void *module_name; /* Name of the module where the global variable is declared. */
+ unsigned long has_dynamic_init; /* This needed for C++ */
+#if KASAN_ABI_VERSION >= 4
+ struct kasan_source_location *location;
+#endif
+};
+
+static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
+{
+ return (void *)(((unsigned long)shadow_addr - KASAN_SHADOW_OFFSET)
+ << KASAN_SHADOW_SCALE_SHIFT);
+}
+
+static inline bool kasan_report_enabled(void)
+{
+ return !current->kasan_depth;
+}
+
+void kasan_report(unsigned long addr, size_t size,
+ bool is_write, unsigned long ip);
+
+#endif
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
new file mode 100644
index 000000000000..f41ac9a551cc
--- /dev/null
+++ b/mm/kasan/kasan_init.c
@@ -0,0 +1,152 @@
+/*
+ * This file contains some kasan initialization code.
+ *
+ * Copyright (c) 2015 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/bootmem.h>
+#include <linux/init.h>
+#include <linux/kasan.h>
+#include <linux/kernel.h>
+#include <linux/memblock.h>
+#include <linux/pfn.h>
+
+#include <asm/page.h>
+#include <asm/pgalloc.h>
+
+/*
+ * This page serves two purposes:
+ * - It used as early shadow memory. The entire shadow region populated
+ * with this page, before we will be able to setup normal shadow memory.
+ * - Latter it reused it as zero shadow to cover large ranges of memory
+ * that allowed to access, but not handled by kasan (vmalloc/vmemmap ...).
+ */
+unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
+
+#if CONFIG_ARM64_PGTABLE_LEVELS > 3
+pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
+#endif
+#if CONFIG_ARM64_PGTABLE_LEVELS > 2
+pmd_t kasan_zero_pmd[PTRS_PER_PMD] __page_aligned_bss;
+#endif
+pte_t kasan_zero_pte[PTRS_PER_PTE] __page_aligned_bss;
+
+static __init void *early_alloc(size_t size, int node)
+{
+ return memblock_virt_alloc_try_nid(size, size, __pa(MAX_DMA_ADDRESS),
+ BOOTMEM_ALLOC_ACCESSIBLE, node);
+}
+
+static void __init zero_pte_populate(pmd_t *pmd, unsigned long addr,
+ unsigned long end)
+{
+ pte_t *pte = pte_offset_kernel(pmd, addr);
+ pte_t zero_pte;
+
+ zero_pte = pfn_pte(PFN_DOWN(__pa(kasan_zero_page)), PAGE_KERNEL);
+ zero_pte = pte_wrprotect(zero_pte);
+
+ while (addr + PAGE_SIZE <= end) {
+ set_pte_at(&init_mm, addr, pte, zero_pte);
+ addr += PAGE_SIZE;
+ pte = pte_offset_kernel(pmd, addr);
+ }
+}
+
+static void __init zero_pmd_populate(pud_t *pud, unsigned long addr,
+ unsigned long end)
+{
+ pmd_t *pmd = pmd_offset(pud, addr);
+ unsigned long next;
+
+ do {
+ next = pmd_addr_end(addr, end);
+
+ if (IS_ALIGNED(addr, PMD_SIZE) && end - addr >= PMD_SIZE) {
+ pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+ continue;
+ }
+
+ if (pmd_none(*pmd)) {
+ pmd_populate_kernel(&init_mm, pmd,
+ early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ }
+ zero_pte_populate(pmd, addr, next);
+ } while (pmd++, addr = next, addr != end);
+}
+
+static void __init zero_pud_populate(pgd_t *pgd, unsigned long addr,
+ unsigned long end)
+{
+ pud_t *pud = pud_offset(pgd, addr);
+ unsigned long next;
+
+ do {
+ next = pud_addr_end(addr, end);
+ if (IS_ALIGNED(addr, PUD_SIZE) && end - addr >= PUD_SIZE) {
+ pmd_t *pmd;
+
+ pud_populate(&init_mm, pud, kasan_zero_pmd);
+ pmd = pmd_offset(pud, addr);
+ pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+ continue;
+ }
+
+ if (pud_none(*pud)) {
+ pud_populate(&init_mm, pud,
+ early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ }
+ zero_pmd_populate(pud, addr, next);
+ } while (pud++, addr = next, addr != end);
+}
+
+/**
+ * kasan_populate_zero_shadow - populate shadow memory region with
+ * kasan_zero_page
+ * @shadow_start - start of the memory range to populate
+ * @shadow_end - end of the memory range to populate
+ */
+void __init kasan_populate_zero_shadow(const void *shadow_start,
+ const void *shadow_end)
+{
+ unsigned long addr = (unsigned long)shadow_start;
+ unsigned long end = (unsigned long)shadow_end;
+ pgd_t *pgd = pgd_offset_k(addr);
+ unsigned long next;
+
+ do {
+ next = pgd_addr_end(addr, end);
+
+ if (IS_ALIGNED(addr, PGDIR_SIZE) && end - addr >= PGDIR_SIZE) {
+ pud_t *pud;
+ pmd_t *pmd;
+
+ /*
+ * kasan_zero_pud should be populated with pmds
+ * at this moment.
+ * [pud,pmd]_populate*() below needed only for
+ * 3,2 - level page tables where we don't have
+ * puds,pmds, so pgd_populate(), pud_populate()
+ * is noops.
+ */
+ pgd_populate(&init_mm, pgd, kasan_zero_pud);
+ pud = pud_offset(pgd, addr);
+ pud_populate(&init_mm, pud, kasan_zero_pmd);
+ pmd = pmd_offset(pud, addr);
+ pmd_populate_kernel(&init_mm, pmd, kasan_zero_pte);
+ continue;
+ }
+
+ if (pgd_none(*pgd)) {
+ pgd_populate(&init_mm, pgd,
+ early_alloc(PAGE_SIZE, NUMA_NO_NODE));
+ }
+ zero_pud_populate(pgd, addr, next);
+ } while (pgd++, addr = next, addr != end);
+}
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
new file mode 100644
index 000000000000..e0b3e94ecdb5
--- /dev/null
+++ b/mm/kasan/report.c
@@ -0,0 +1,298 @@
+/*
+ * This file contains error reporting code.
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
+ *
+ * Some code borrowed from https://github.com/xairy/kasan-prototype by
+ * Andrey Konovalov <adech.fo@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/printk.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/kasan.h>
+#include <linux/module.h>
+
+#include <asm/sections.h>
+
+#include "kasan.h"
+#include "../slab.h"
+
+/* Shadow layout customization. */
+#define SHADOW_BYTES_PER_BLOCK 1
+#define SHADOW_BLOCKS_PER_ROW 16
+#define SHADOW_BYTES_PER_ROW (SHADOW_BLOCKS_PER_ROW * SHADOW_BYTES_PER_BLOCK)
+#define SHADOW_ROWS_AROUND_ADDR 2
+
+static const void *find_first_bad_addr(const void *addr, size_t size)
+{
+ u8 shadow_val = *(u8 *)kasan_mem_to_shadow(addr);
+ const void *first_bad_addr = addr;
+
+ while (!shadow_val && first_bad_addr < addr + size) {
+ first_bad_addr += KASAN_SHADOW_SCALE_SIZE;
+ shadow_val = *(u8 *)kasan_mem_to_shadow(first_bad_addr);
+ }
+ return first_bad_addr;
+}
+
+static void print_error_description(struct kasan_access_info *info)
+{
+ const char *bug_type = "unknown-crash";
+ u8 *shadow_addr;
+
+ info->first_bad_addr = find_first_bad_addr(info->access_addr,
+ info->access_size);
+
+ shadow_addr = (u8 *)kasan_mem_to_shadow(info->first_bad_addr);
+
+ /*
+ * If shadow byte value is in [0, KASAN_SHADOW_SCALE_SIZE) we can look
+ * at the next shadow byte to determine the type of the bad access.
+ */
+ if (*shadow_addr > 0 && *shadow_addr <= KASAN_SHADOW_SCALE_SIZE - 1)
+ shadow_addr++;
+
+ switch (*shadow_addr) {
+ case 0 ... KASAN_SHADOW_SCALE_SIZE - 1:
+ /*
+ * In theory it's still possible to see these shadow values
+ * due to a data race in the kernel code.
+ */
+ bug_type = "out-of-bounds";
+ break;
+ case KASAN_PAGE_REDZONE:
+ case KASAN_KMALLOC_REDZONE:
+ bug_type = "slab-out-of-bounds";
+ break;
+ case KASAN_GLOBAL_REDZONE:
+ bug_type = "global-out-of-bounds";
+ break;
+ case KASAN_STACK_LEFT:
+ case KASAN_STACK_MID:
+ case KASAN_STACK_RIGHT:
+ case KASAN_STACK_PARTIAL:
+ bug_type = "stack-out-of-bounds";
+ break;
+ case KASAN_FREE_PAGE:
+ case KASAN_KMALLOC_FREE:
+ bug_type = "use-after-free";
+ break;
+ }
+
+ pr_err("BUG: KASAN: %s in %pS at addr %p\n",
+ bug_type, (void *)info->ip,
+ info->access_addr);
+ pr_err("%s of size %zu by task %s/%d\n",
+ info->is_write ? "Write" : "Read",
+ info->access_size, current->comm, task_pid_nr(current));
+}
+
+static inline bool kernel_or_module_addr(const void *addr)
+{
+ if (addr >= (void *)_stext && addr < (void *)_end)
+ return true;
+ if (is_module_address((unsigned long)addr))
+ return true;
+ return false;
+}
+
+static inline bool init_task_stack_addr(const void *addr)
+{
+ return addr >= (void *)&init_thread_union.stack &&
+ (addr <= (void *)&init_thread_union.stack +
+ sizeof(init_thread_union.stack));
+}
+
+static void print_address_description(struct kasan_access_info *info)
+{
+ const void *addr = info->access_addr;
+
+ if ((addr >= (void *)PAGE_OFFSET) &&
+ (addr < high_memory)) {
+ struct page *page = virt_to_head_page(addr);
+
+ if (PageSlab(page)) {
+ void *object;
+ struct kmem_cache *cache = page->slab_cache;
+ void *last_object;
+
+ object = virt_to_obj(cache, page_address(page), addr);
+ last_object = page_address(page) +
+ page->objects * cache->size;
+
+ if (unlikely(object > last_object))
+ object = last_object; /* we hit into padding */
+
+ object_err(cache, page, object,
+ "kasan: bad access detected");
+ return;
+ }
+ dump_page(page, "kasan: bad access detected");
+ }
+
+ if (kernel_or_module_addr(addr)) {
+ if (!init_task_stack_addr(addr))
+ pr_err("Address belongs to variable %pS\n", addr);
+ }
+
+ dump_stack();
+}
+
+static bool row_is_guilty(const void *row, const void *guilty)
+{
+ return (row <= guilty) && (guilty < row + SHADOW_BYTES_PER_ROW);
+}
+
+static int shadow_pointer_offset(const void *row, const void *shadow)
+{
+ /* The length of ">ff00ff00ff00ff00: " is
+ * 3 + (BITS_PER_LONG/8)*2 chars.
+ */
+ return 3 + (BITS_PER_LONG/8)*2 + (shadow - row)*2 +
+ (shadow - row) / SHADOW_BYTES_PER_BLOCK + 1;
+}
+
+static void print_shadow_for_address(const void *addr)
+{
+ int i;
+ const void *shadow = kasan_mem_to_shadow(addr);
+ const void *shadow_row;
+
+ shadow_row = (void *)round_down((unsigned long)shadow,
+ SHADOW_BYTES_PER_ROW)
+ - SHADOW_ROWS_AROUND_ADDR * SHADOW_BYTES_PER_ROW;
+
+ pr_err("Memory state around the buggy address:\n");
+
+ for (i = -SHADOW_ROWS_AROUND_ADDR; i <= SHADOW_ROWS_AROUND_ADDR; i++) {
+ const void *kaddr = kasan_shadow_to_mem(shadow_row);
+ char buffer[4 + (BITS_PER_LONG/8)*2];
+ char shadow_buf[SHADOW_BYTES_PER_ROW];
+
+ snprintf(buffer, sizeof(buffer),
+ (i == 0) ? ">%p: " : " %p: ", kaddr);
+ /*
+ * We should not pass a shadow pointer to generic
+ * function, because generic functions may try to
+ * access kasan mapping for the passed address.
+ */
+ memcpy(shadow_buf, shadow_row, SHADOW_BYTES_PER_ROW);
+ print_hex_dump(KERN_ERR, buffer,
+ DUMP_PREFIX_NONE, SHADOW_BYTES_PER_ROW, 1,
+ shadow_buf, SHADOW_BYTES_PER_ROW, 0);
+
+ if (row_is_guilty(shadow_row, shadow))
+ pr_err("%*c\n",
+ shadow_pointer_offset(shadow_row, shadow),
+ '^');
+
+ shadow_row += SHADOW_BYTES_PER_ROW;
+ }
+}
+
+static DEFINE_SPINLOCK(report_lock);
+
+static void kasan_report_error(struct kasan_access_info *info)
+{
+ unsigned long flags;
+ const char *bug_type;
+
+ /*
+ * Make sure we don't end up in loop.
+ */
+ kasan_disable_current();
+ spin_lock_irqsave(&report_lock, flags);
+ pr_err("================================="
+ "=================================\n");
+ if (info->access_addr <
+ kasan_shadow_to_mem((void *)KASAN_SHADOW_START)) {
+ if ((unsigned long)info->access_addr < PAGE_SIZE)
+ bug_type = "null-ptr-deref";
+ else if ((unsigned long)info->access_addr < TASK_SIZE)
+ bug_type = "user-memory-access";
+ else
+ bug_type = "wild-memory-access";
+ pr_err("BUG: KASAN: %s on address %p\n",
+ bug_type, info->access_addr);
+ pr_err("%s of size %zu by task %s/%d\n",
+ info->is_write ? "Write" : "Read",
+ info->access_size, current->comm,
+ task_pid_nr(current));
+ dump_stack();
+ } else {
+ print_error_description(info);
+ print_address_description(info);
+ print_shadow_for_address(info->first_bad_addr);
+ }
+ pr_err("================================="
+ "=================================\n");
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
+ spin_unlock_irqrestore(&report_lock, flags);
+ kasan_enable_current();
+}
+
+void kasan_report(unsigned long addr, size_t size,
+ bool is_write, unsigned long ip)
+{
+ struct kasan_access_info info;
+
+ if (likely(!kasan_report_enabled()))
+ return;
+
+ info.access_addr = (void *)addr;
+ info.access_size = size;
+ info.is_write = is_write;
+ info.ip = ip;
+
+ kasan_report_error(&info);
+}
+
+
+#define DEFINE_ASAN_REPORT_LOAD(size) \
+void __asan_report_load##size##_noabort(unsigned long addr) \
+{ \
+ kasan_report(addr, size, false, _RET_IP_); \
+} \
+EXPORT_SYMBOL(__asan_report_load##size##_noabort)
+
+#define DEFINE_ASAN_REPORT_STORE(size) \
+void __asan_report_store##size##_noabort(unsigned long addr) \
+{ \
+ kasan_report(addr, size, true, _RET_IP_); \
+} \
+EXPORT_SYMBOL(__asan_report_store##size##_noabort)
+
+DEFINE_ASAN_REPORT_LOAD(1);
+DEFINE_ASAN_REPORT_LOAD(2);
+DEFINE_ASAN_REPORT_LOAD(4);
+DEFINE_ASAN_REPORT_LOAD(8);
+DEFINE_ASAN_REPORT_LOAD(16);
+DEFINE_ASAN_REPORT_STORE(1);
+DEFINE_ASAN_REPORT_STORE(2);
+DEFINE_ASAN_REPORT_STORE(4);
+DEFINE_ASAN_REPORT_STORE(8);
+DEFINE_ASAN_REPORT_STORE(16);
+
+void __asan_report_load_n_noabort(unsigned long addr, size_t size)
+{
+ kasan_report(addr, size, false, _RET_IP_);
+}
+EXPORT_SYMBOL(__asan_report_load_n_noabort);
+
+void __asan_report_store_n_noabort(unsigned long addr, size_t size)
+{
+ kasan_report(addr, size, true, _RET_IP_);
+}
+EXPORT_SYMBOL(__asan_report_store_n_noabort);
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 3cda50c1e394..c607bc059664 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -98,6 +98,7 @@
#include <asm/processor.h>
#include <linux/atomic.h>
+#include <linux/kasan.h>
#include <linux/kmemcheck.h>
#include <linux/kmemleak.h>
#include <linux/memory_hotplug.h>
@@ -193,6 +194,8 @@ static struct kmem_cache *scan_area_cache;
/* set if tracing memory operations is enabled */
static int kmemleak_enabled;
+/* same as above but only for the kmemleak_free() callback */
+static int kmemleak_free_enabled;
/* set in the late_initcall if there were no errors */
static int kmemleak_initialized;
/* enables or disables early logging of the memory operations */
@@ -905,12 +908,13 @@ EXPORT_SYMBOL_GPL(kmemleak_alloc);
* kmemleak_alloc_percpu - register a newly allocated __percpu object
* @ptr: __percpu pointer to beginning of the object
* @size: size of the object
+ * @gfp: flags used for kmemleak internal memory allocations
*
* This function is called from the kernel percpu allocator when a new object
- * (memory block) is allocated (alloc_percpu). It assumes GFP_KERNEL
- * allocation.
+ * (memory block) is allocated (alloc_percpu).
*/
-void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
+void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size,
+ gfp_t gfp)
{
unsigned int cpu;
@@ -923,7 +927,7 @@ void __ref kmemleak_alloc_percpu(const void __percpu *ptr, size_t size)
if (kmemleak_enabled && ptr && !IS_ERR(ptr))
for_each_possible_cpu(cpu)
create_object((unsigned long)per_cpu_ptr(ptr, cpu),
- size, 0, GFP_KERNEL);
+ size, 0, gfp);
else if (kmemleak_early_log)
log_early(KMEMLEAK_ALLOC_PERCPU, ptr, size, 0);
}
@@ -940,7 +944,7 @@ void __ref kmemleak_free(const void *ptr)
{
pr_debug("%s(0x%p)\n", __func__, ptr);
- if (kmemleak_enabled && ptr && !IS_ERR(ptr))
+ if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
delete_object_full((unsigned long)ptr);
else if (kmemleak_early_log)
log_early(KMEMLEAK_FREE, ptr, 0, 0);
@@ -980,7 +984,7 @@ void __ref kmemleak_free_percpu(const void __percpu *ptr)
pr_debug("%s(0x%p)\n", __func__, ptr);
- if (kmemleak_enabled && ptr && !IS_ERR(ptr))
+ if (kmemleak_free_enabled && ptr && !IS_ERR(ptr))
for_each_possible_cpu(cpu)
delete_object_full((unsigned long)per_cpu_ptr(ptr,
cpu));
@@ -1113,7 +1117,10 @@ static bool update_checksum(struct kmemleak_object *object)
if (!kmemcheck_is_obj_initialized(object->pointer, object->size))
return false;
+ kasan_disable_current();
object->checksum = crc32(0, (void *)object->pointer, object->size);
+ kasan_enable_current();
+
return object->checksum != old_csum;
}
@@ -1164,7 +1171,9 @@ static void scan_block(void *_start, void *_end,
BYTES_PER_POINTER))
continue;
+ kasan_disable_current();
pointer = *ptr;
+ kasan_enable_current();
object = find_and_get_object(pointer, 1);
if (!object)
@@ -1743,6 +1752,13 @@ static void kmemleak_do_cleanup(struct work_struct *work)
mutex_lock(&scan_mutex);
stop_scan_thread();
+ /*
+ * Once the scan thread has stopped, it is safe to no longer track
+ * object freeing. Ordering of the scan thread stopping and the memory
+ * accesses below is guaranteed by the kthread_stop() function.
+ */
+ kmemleak_free_enabled = 0;
+
if (!kmemleak_found_leaks)
__kmemleak_do_cleanup();
else
@@ -1769,6 +1785,8 @@ static void kmemleak_disable(void)
/* check whether it is too early for a kernel thread */
if (kmemleak_initialized)
schedule_work(&cleanup_work);
+ else
+ kmemleak_free_enabled = 0;
pr_info("Kernel memory leak detector disabled\n");
}
@@ -1833,8 +1851,10 @@ void __init kmemleak_init(void)
if (kmemleak_error) {
local_irq_restore(flags);
return;
- } else
+ } else {
kmemleak_enabled = 1;
+ kmemleak_free_enabled = 1;
+ }
local_irq_restore(flags);
/*
diff --git a/mm/memblock.c b/mm/memblock.c
index 6ecb0d937fb5..eda16393e4f6 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1305,7 +1305,7 @@ void __init __memblock_free_late(phys_addr_t base, phys_addr_t size)
end = PFN_DOWN(base + size);
for (; cursor < end; cursor++) {
- __free_pages_bootmem(pfn_to_page(cursor), 0);
+ __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
totalram_pages++;
}
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0c4538811188..30037f330bf2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4754,16 +4754,17 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
swap_buffers:
/* Swap primary and spare array */
thresholds->spare = thresholds->primary;
- /* If all events are unregistered, free the spare array */
- if (!new) {
- kfree(thresholds->spare);
- thresholds->spare = NULL;
- }
rcu_assign_pointer(thresholds->primary, new);
/* To be sure that nobody uses thresholds */
synchronize_rcu();
+
+ /* If all events are unregistered, free the spare array */
+ if (!new) {
+ kfree(thresholds->spare);
+ thresholds->spare = NULL;
+ }
unlock:
mutex_unlock(&memcg->thresholds_lock);
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 22f047fbaa33..f7d6522beeaf 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1523,7 +1523,9 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)
* Did it turn free?
*/
ret = __get_any_page(page, pfn, 0);
- if (!PageLRU(page)) {
+ if (ret == 1 && !PageLRU(page)) {
+ /* Drop page reference which is from __get_any_page() */
+ put_page(page);
pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
pfn, page->flags);
return -EIO;
@@ -1552,8 +1554,17 @@ static int soft_offline_huge_page(struct page *page, int flags)
}
unlock_page(hpage);
- /* Keep page count to indicate a given hugepage is isolated. */
- list_move(&hpage->lru, &pagelist);
+ ret = isolate_huge_page(hpage, &pagelist);
+ /*
+ * get_any_page() and isolate_huge_page() takes a refcount each,
+ * so need to drop one here.
+ */
+ put_page(hpage);
+ if (!ret) {
+ pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn);
+ return -EBUSY;
+ }
+
ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
MIGRATE_SYNC, MR_MEMORY_FAILURE);
if (ret) {
diff --git a/mm/memory.c b/mm/memory.c
index 8b6028fc342e..902ec0c0f914 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3342,8 +3342,18 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (unlikely(pmd_none(*pmd)) &&
unlikely(__pte_alloc(mm, vma, pmd, address)))
return VM_FAULT_OOM;
- /* if an huge pmd materialized from under us just retry later */
- if (unlikely(pmd_trans_huge(*pmd)))
+ /*
+ * If a huge pmd materialized under us just retry later. Use
+ * pmd_trans_unstable() instead of pmd_trans_huge() to ensure the pmd
+ * didn't become pmd_trans_huge under us and then back to pmd_none, as
+ * a result of MADV_DONTNEED running immediately after a huge pmd fault
+ * in a different thread of this mm, in turn leading to a misleading
+ * pmd_trans_huge() retval. All we have to ensure is that it is a
+ * regular pmd that we can walk with pte_offset_map() and we can do that
+ * through an atomic read in C, which is what pmd_trans_unstable()
+ * provides.
+ */
+ if (unlikely(pmd_trans_unstable(pmd)))
return 0;
/*
* A regular pmd is established and it can't morph into a huge pmd
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 8c71654e261f..05014e89efae 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1978,8 +1978,10 @@ void try_offline_node(int nid)
* wait_table may be allocated from boot memory,
* here only free if it's allocated by vmalloc.
*/
- if (is_vmalloc_addr(zone->wait_table))
+ if (is_vmalloc_addr(zone->wait_table)) {
vfree(zone->wait_table);
+ zone->wait_table = NULL;
+ }
}
}
EXPORT_SYMBOL(try_offline_node);
diff --git a/mm/mempool.c b/mm/mempool.c
index e209c98c7203..d4c4e4c1bc56 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -6,26 +6,139 @@
* extreme VM load.
*
* started by Ingo Molnar, Copyright (C) 2001
+ * debugging by David Rientjes, Copyright (C) 2015
*/
#include <linux/mm.h>
#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/kasan.h>
#include <linux/kmemleak.h>
#include <linux/export.h>
#include <linux/mempool.h>
#include <linux/blkdev.h>
#include <linux/writeback.h>
+#include "slab.h"
+
+#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB_DEBUG_ON)
+static void poison_error(mempool_t *pool, void *element, size_t size,
+ size_t byte)
+{
+ const int nr = pool->curr_nr;
+ const int start = max_t(int, byte - (BITS_PER_LONG / 8), 0);
+ const int end = min_t(int, byte + (BITS_PER_LONG / 8), size);
+ int i;
+
+ pr_err("BUG: mempool element poison mismatch\n");
+ pr_err("Mempool %p size %zu\n", pool, size);
+ pr_err(" nr=%d @ %p: %s0x", nr, element, start > 0 ? "... " : "");
+ for (i = start; i < end; i++)
+ pr_cont("%x ", *(u8 *)(element + i));
+ pr_cont("%s\n", end < size ? "..." : "");
+ dump_stack();
+}
+
+static void __check_element(mempool_t *pool, void *element, size_t size)
+{
+ u8 *obj = element;
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ u8 exp = (i < size - 1) ? POISON_FREE : POISON_END;
+
+ if (obj[i] != exp) {
+ poison_error(pool, element, size, i);
+ return;
+ }
+ }
+ memset(obj, POISON_INUSE, size);
+}
+
+static void check_element(mempool_t *pool, void *element)
+{
+ /* Mempools backed by slab allocator */
+ if (pool->free == mempool_free_slab || pool->free == mempool_kfree)
+ __check_element(pool, element, ksize(element));
+
+ /* Mempools backed by page allocator */
+ if (pool->free == mempool_free_pages) {
+ int order = (int)(long)pool->pool_data;
+ void *addr = kmap_atomic((struct page *)element);
+
+ __check_element(pool, addr, 1UL << (PAGE_SHIFT + order));
+ kunmap_atomic(addr);
+ }
+}
+
+static void __poison_element(void *element, size_t size)
+{
+ u8 *obj = element;
+
+ memset(obj, POISON_FREE, size - 1);
+ obj[size - 1] = POISON_END;
+}
+
+static void poison_element(mempool_t *pool, void *element)
+{
+ /* Mempools backed by slab allocator */
+ if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
+ __poison_element(element, ksize(element));
+
+ /* Mempools backed by page allocator */
+ if (pool->alloc == mempool_alloc_pages) {
+ int order = (int)(long)pool->pool_data;
+ void *addr = kmap_atomic((struct page *)element);
+
+ __poison_element(addr, 1UL << (PAGE_SHIFT + order));
+ kunmap_atomic(addr);
+ }
+}
+#else /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
+static inline void check_element(mempool_t *pool, void *element)
+{
+}
+static inline void poison_element(mempool_t *pool, void *element)
+{
+}
+#endif /* CONFIG_DEBUG_SLAB || CONFIG_SLUB_DEBUG_ON */
+
+static void kasan_poison_element(mempool_t *pool, void *element)
+{
+ if (pool->alloc == mempool_alloc_slab)
+ kasan_slab_free(pool->pool_data, element);
+ if (pool->alloc == mempool_kmalloc)
+ kasan_kfree(element);
+ if (pool->alloc == mempool_alloc_pages)
+ kasan_free_pages(element, (unsigned long)pool->pool_data);
+}
+
+static void kasan_unpoison_element(mempool_t *pool, void *element)
+{
+ if (pool->alloc == mempool_alloc_slab)
+ kasan_slab_alloc(pool->pool_data, element);
+ if (pool->alloc == mempool_kmalloc)
+ kasan_krealloc(element, (size_t)pool->pool_data);
+ if (pool->alloc == mempool_alloc_pages)
+ kasan_alloc_pages(element, (unsigned long)pool->pool_data);
+}
static void add_element(mempool_t *pool, void *element)
{
BUG_ON(pool->curr_nr >= pool->min_nr);
+ poison_element(pool, element);
+ kasan_poison_element(pool, element);
pool->elements[pool->curr_nr++] = element;
}
static void *remove_element(mempool_t *pool)
{
- BUG_ON(pool->curr_nr <= 0);
- return pool->elements[--pool->curr_nr];
+ void *element = pool->elements[--pool->curr_nr];
+
+ BUG_ON(pool->curr_nr < 0);
+ kasan_unpoison_element(pool, element);
+ check_element(pool, element);
+ kasan_unpoison_element(pool, element);
+ return element;
}
/**
@@ -38,6 +151,9 @@ static void *remove_element(mempool_t *pool)
*/
void mempool_destroy(mempool_t *pool)
{
+ if (unlikely(!pool))
+ return;
+
while (pool->curr_nr) {
void *element = remove_element(pool);
pool->free(element, pool->pool_data);
@@ -113,23 +229,24 @@ EXPORT_SYMBOL(mempool_create_node);
* mempool_create().
* @new_min_nr: the new minimum number of elements guaranteed to be
* allocated for this pool.
- * @gfp_mask: the usual allocation bitmask.
*
* This function shrinks/grows the pool. In the case of growing,
* it cannot be guaranteed that the pool will be grown to the new
* size immediately, but new mempool_free() calls will refill it.
+ * This function may sleep.
*
* Note, the caller must guarantee that no mempool_destroy is called
* while this function is running. mempool_alloc() & mempool_free()
* might be called (eg. from IRQ contexts) while this function executes.
*/
-int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
+int mempool_resize(mempool_t *pool, int new_min_nr)
{
void *element;
void **new_elements;
unsigned long flags;
BUG_ON(new_min_nr <= 0);
+ might_sleep();
spin_lock_irqsave(&pool->lock, flags);
if (new_min_nr <= pool->min_nr) {
@@ -145,7 +262,8 @@ int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
spin_unlock_irqrestore(&pool->lock, flags);
/* Grow the pool */
- new_elements = kmalloc(new_min_nr * sizeof(*new_elements), gfp_mask);
+ new_elements = kmalloc_array(new_min_nr, sizeof(*new_elements),
+ GFP_KERNEL);
if (!new_elements)
return -ENOMEM;
@@ -164,7 +282,7 @@ int mempool_resize(mempool_t *pool, int new_min_nr, gfp_t gfp_mask)
while (pool->curr_nr < pool->min_nr) {
spin_unlock_irqrestore(&pool->lock, flags);
- element = pool->alloc(gfp_mask, pool->pool_data);
+ element = pool->alloc(GFP_KERNEL, pool->pool_data);
if (!element)
goto out;
spin_lock_irqsave(&pool->lock, flags);
@@ -332,6 +450,7 @@ EXPORT_SYMBOL(mempool_free);
void *mempool_alloc_slab(gfp_t gfp_mask, void *pool_data)
{
struct kmem_cache *mem = pool_data;
+ VM_BUG_ON(mem->ctor);
return kmem_cache_alloc(mem, gfp_mask);
}
EXPORT_SYMBOL(mempool_alloc_slab);
diff --git a/mm/migrate.c b/mm/migrate.c
index cd4fd10c4ec3..3594defceff9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -421,6 +421,7 @@ int migrate_page_move_mapping(struct address_space *mapping,
return MIGRATEPAGE_SUCCESS;
}
+EXPORT_SYMBOL(migrate_page_move_mapping);
/*
* The expected number of remaining references is the same as that
@@ -580,6 +581,7 @@ void migrate_page_copy(struct page *newpage, struct page *page)
if (PageWriteback(newpage))
end_page_writeback(newpage);
}
+EXPORT_SYMBOL(migrate_page_copy);
/************************************************************
* Migration functions
diff --git a/mm/mmap.c b/mm/mmap.c
index f88b4f940327..f03267136fb7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -443,12 +443,16 @@ static void validate_mm(struct mm_struct *mm)
struct vm_area_struct *vma = mm->mmap;
while (vma) {
+ struct anon_vma *anon_vma = vma->anon_vma;
struct anon_vma_chain *avc;
- vma_lock_anon_vma(vma);
- list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
- anon_vma_interval_tree_verify(avc);
- vma_unlock_anon_vma(vma);
+ if (anon_vma) {
+ anon_vma_lock_read(anon_vma);
+ list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
+ anon_vma_interval_tree_verify(avc);
+ anon_vma_unlock_read(anon_vma);
+ }
+
highest_address = vma->vm_end;
vma = vma->vm_next;
i++;
@@ -2150,32 +2154,27 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
*/
int expand_upwards(struct vm_area_struct *vma, unsigned long address)
{
- int error;
+ int error = 0;
if (!(vma->vm_flags & VM_GROWSUP))
return -EFAULT;
- /*
- * We must make sure the anon_vma is allocated
- * so that the anon_vma locking is not a noop.
- */
+ /* Guard against wrapping around to address 0. */
+ if (address < PAGE_ALIGN(address+4))
+ address = PAGE_ALIGN(address+4);
+ else
+ return -ENOMEM;
+
+ /* We must make sure the anon_vma is allocated. */
if (unlikely(anon_vma_prepare(vma)))
return -ENOMEM;
- vma_lock_anon_vma(vma);
/*
* vma->vm_start/vm_end cannot change under us because the caller
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
- * Also guard against wrapping around to address 0.
*/
- if (address < PAGE_ALIGN(address+4))
- address = PAGE_ALIGN(address+4);
- else {
- vma_unlock_anon_vma(vma);
- return -ENOMEM;
- }
- error = 0;
+ anon_vma_lock_write(vma->anon_vma);
/* Somebody else might have raced and expanded it already */
if (address > vma->vm_end) {
@@ -2193,7 +2192,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
* updates, but we only hold a shared mmap_sem
* lock here, so we need to protect against
* concurrent vma expansions.
- * vma_lock_anon_vma() doesn't help here, as
+ * anon_vma_lock_write() doesn't help here, as
* we don't guarantee that all growable vmas
* in a mm share the same root anon vma.
* So, we reuse mm->page_table_lock to guard
@@ -2213,7 +2212,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
}
}
}
- vma_unlock_anon_vma(vma);
+ anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(vma->vm_mm);
return error;
@@ -2228,25 +2227,21 @@ int expand_downwards(struct vm_area_struct *vma,
{
int error;
- /*
- * We must make sure the anon_vma is allocated
- * so that the anon_vma locking is not a noop.
- */
- if (unlikely(anon_vma_prepare(vma)))
- return -ENOMEM;
-
address &= PAGE_MASK;
error = security_mmap_addr(address);
if (error)
return error;
- vma_lock_anon_vma(vma);
+ /* We must make sure the anon_vma is allocated. */
+ if (unlikely(anon_vma_prepare(vma)))
+ return -ENOMEM;
/*
* vma->vm_start/vm_end cannot change under us because the caller
* is required to hold the mmap_sem in read mode. We need the
* anon_vma lock to serialize against concurrent expand_stacks.
*/
+ anon_vma_lock_write(vma->anon_vma);
/* Somebody else might have raced and expanded it already */
if (address < vma->vm_start) {
@@ -2264,7 +2259,7 @@ int expand_downwards(struct vm_area_struct *vma,
* updates, but we only hold a shared mmap_sem
* lock here, so we need to protect against
* concurrent vma expansions.
- * vma_lock_anon_vma() doesn't help here, as
+ * anon_vma_lock_write() doesn't help here, as
* we don't guarantee that all growable vmas
* in a mm share the same root anon vma.
* So, we reuse mm->page_table_lock to guard
@@ -2282,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma,
}
}
}
- vma_unlock_anon_vma(vma);
+ anon_vma_unlock_write(vma->anon_vma);
khugepaged_enter_vma_merge(vma, vma->vm_flags);
validate_mm(vma->vm_mm);
return error;
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 90b50468333e..4bea539921df 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -77,7 +77,7 @@ void __init free_bootmem_late(unsigned long addr, unsigned long size)
end = PFN_DOWN(addr + size);
for (; cursor < end; cursor++) {
- __free_pages_bootmem(pfn_to_page(cursor), 0);
+ __free_pages_bootmem(pfn_to_page(cursor), cursor, 0);
totalram_pages++;
}
}
@@ -92,7 +92,7 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
while (start + (1UL << order) > end)
order--;
- __free_pages_bootmem(pfn_to_page(start), order);
+ __free_pages_bootmem(pfn_to_page(start), start, order);
start += (1UL << order);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6c18699a36e0..11a06f842cfd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -25,6 +25,7 @@
#include <linux/compiler.h>
#include <linux/kernel.h>
#include <linux/kmemcheck.h>
+#include <linux/kasan.h>
#include <linux/module.h>
#include <linux/suspend.h>
#include <linux/pagevec.h>
@@ -160,7 +161,7 @@ bool pm_suspended_storage(void)
#endif /* CONFIG_PM_SLEEP */
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
-int pageblock_order __read_mostly;
+unsigned int pageblock_order __read_mostly;
#endif
static void __free_pages_ok(struct page *page, unsigned int order);
@@ -374,7 +375,7 @@ static void free_compound_page(struct page *page)
__free_pages_ok(page, compound_order(page));
}
-void prep_compound_page(struct page *page, unsigned long order)
+void prep_compound_page(struct page *page, unsigned int order)
{
int i;
int nr_pages = 1 << order;
@@ -559,7 +560,9 @@ static inline void __free_one_page(struct page *page,
unsigned long combined_idx;
unsigned long uninitialized_var(buddy_idx);
struct page *buddy;
- int max_order = MAX_ORDER;
+ unsigned int max_order;
+
+ max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
VM_BUG_ON(!zone_is_initialized(zone));
@@ -568,28 +571,20 @@ static inline void __free_one_page(struct page *page,
return;
VM_BUG_ON(migratetype == -1);
- if (is_migrate_isolate(migratetype)) {
- /*
- * We restrict max order of merging to prevent merge
- * between freepages on isolate pageblock and normal
- * pageblock. Without this, pageblock isolation
- * could cause incorrect freepage accounting.
- */
- max_order = min(MAX_ORDER, pageblock_order + 1);
- } else {
+ if (likely(!is_migrate_isolate(migratetype)))
__mod_zone_freepage_state(zone, 1 << order, migratetype);
- }
- page_idx = pfn & ((1 << max_order) - 1);
+ page_idx = pfn & ((1 << MAX_ORDER) - 1);
VM_BUG_ON_PAGE(page_idx & ((1 << order) - 1), page);
VM_BUG_ON_PAGE(bad_range(zone, page), page);
+continue_merging:
while (order < max_order - 1) {
buddy_idx = __find_buddy_index(page_idx, order);
buddy = page + (buddy_idx - page_idx);
if (!page_is_buddy(page, buddy, order))
- break;
+ goto done_merging;
/*
* Our buddy is free or it is CONFIG_DEBUG_PAGEALLOC guard page,
* merge with it and move up one order.
@@ -611,6 +606,32 @@ static inline void __free_one_page(struct page *page,
page_idx = combined_idx;
order++;
}
+ if (max_order < MAX_ORDER) {
+ /* If we are here, it means order is >= pageblock_order.
+ * We want to prevent merge between freepages on isolate
+ * pageblock and normal pageblock. Without this, pageblock
+ * isolation could cause incorrect freepage or CMA accounting.
+ *
+ * We don't want to hit this code for the more frequent
+ * low-order merging.
+ */
+ if (unlikely(has_isolate_pageblock(zone))) {
+ int buddy_mt;
+
+ buddy_idx = __find_buddy_index(page_idx, order);
+ buddy = page + (buddy_idx - page_idx);
+ buddy_mt = get_pageblock_migratetype(buddy);
+
+ if (migratetype != buddy_mt
+ && (is_migrate_isolate(migratetype) ||
+ is_migrate_isolate(buddy_mt)))
+ goto done_merging;
+ }
+ max_order++;
+ goto continue_merging;
+ }
+
+done_merging:
set_page_order(page, order);
/*
@@ -781,6 +802,7 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
trace_mm_page_free(page, order);
kmemcheck_free_shadow(page, order);
+ kasan_free_pages(page, order);
if (PageAnon(page))
page->mapping = NULL;
@@ -818,7 +840,8 @@ static void __free_pages_ok(struct page *page, unsigned int order)
local_unlock_irqrestore(pa_lock, flags);
}
-void __init __free_pages_bootmem(struct page *page, unsigned int order)
+void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
+ unsigned int order)
{
unsigned int nr_pages = 1 << order;
struct page *p = page;
@@ -960,6 +983,7 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
arch_alloc_page(page, order);
kernel_map_pages(page, 1 << order, 1);
+ kasan_alloc_pages(page, order);
if (gfp_flags & __GFP_ZERO)
prep_zero_page(page, order, gfp_flags);
@@ -1031,7 +1055,7 @@ int move_freepages(struct zone *zone,
int migratetype)
{
struct page *page;
- unsigned long order;
+ unsigned int order;
int pages_moved = 0;
#ifndef CONFIG_HOLES_IN_ZONE
@@ -1117,7 +1141,7 @@ static void change_pageblock_range(struct page *pageblock_page,
static int try_to_steal_freepages(struct zone *zone, struct page *page,
int start_type, int fallback_type)
{
- int current_order = page_order(page);
+ unsigned int current_order = page_order(page);
/*
* When borrowing from MIGRATE_CMA, we need to release the excess
@@ -2199,7 +2223,7 @@ static DEFINE_RATELIMIT_STATE(nopage_rs,
DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
-void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
+void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, const char *fmt, ...)
{
unsigned int filter = SHOW_MEM_FILTER_NODES;
@@ -2233,7 +2257,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
va_end(args);
}
- pr_warn("%s: page allocation failure: order:%d, mode:0x%x\n",
+ pr_warn("%s: page allocation failure: order:%u, mode:0x%x\n",
current->comm, order, gfp_mask);
dump_stack();
@@ -3006,7 +3030,8 @@ void free_kmem_pages(unsigned long addr, unsigned int order)
}
}
-static void *make_alloc_exact(unsigned long addr, unsigned order, size_t size)
+static void *make_alloc_exact(unsigned long addr, unsigned int order,
+ size_t size)
{
if (addr) {
unsigned long alloc_end = addr + (PAGE_SIZE << order);
@@ -3058,7 +3083,7 @@ EXPORT_SYMBOL(alloc_pages_exact);
*/
void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask)
{
- unsigned order = get_order(size);
+ unsigned int order = get_order(size);
struct page *p = alloc_pages_node(nid, gfp_mask, order);
if (!p)
return NULL;
@@ -3357,7 +3382,8 @@ void show_free_areas(unsigned int filter)
}
for_each_populated_zone(zone) {
- unsigned long nr[MAX_ORDER], flags, order, total = 0;
+ unsigned int order;
+ unsigned long nr[MAX_ORDER], flags, total = 0;
unsigned char types[MAX_ORDER];
if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -3706,7 +3732,7 @@ static void build_zonelists(pg_data_t *pgdat)
nodemask_t used_mask;
int local_node, prev_node;
struct zonelist *zonelist;
- int order = current_zonelist_order;
+ unsigned int order = current_zonelist_order;
/* initialize zonelists */
for (i = 0; i < MAX_ZONELISTS; i++) {
@@ -6378,7 +6404,8 @@ int alloc_contig_range(unsigned long start, unsigned long end,
unsigned migratetype)
{
unsigned long outer_start, outer_end;
- int ret = 0, order;
+ unsigned int order;
+ int ret = 0;
struct compact_control cc = {
.nr_migratepages = 0,
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index ec66134fb2a5..03648ced21c2 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -299,11 +299,11 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private,
* now as a simple work-around, we use the next node for destination.
*/
if (PageHuge(page)) {
- nodemask_t src = nodemask_of_node(page_to_nid(page));
- nodemask_t dst;
- nodes_complement(dst, src);
+ int node = next_online_node(page_to_nid(page));
+ if (node == MAX_NUMNODES)
+ node = first_online_node;
return alloc_huge_page_node(page_hstate(compound_head(page)),
- next_node(page_to_nid(page), dst));
+ node);
}
if (PageHighMem(page))
diff --git a/mm/percpu.c b/mm/percpu.c
index 014bab65e0ff..5ae6e0284967 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -110,7 +110,7 @@ struct pcpu_chunk {
int map_used; /* # of map entries used before the sentry */
int map_alloc; /* # of map entries allocated */
int *map; /* allocation map */
- struct work_struct map_extend_work;/* async ->map[] extension */
+ struct list_head map_extend_list;/* on pcpu_map_extend_chunks */
void *data; /* chunk data */
int first_free; /* no free below this */
@@ -160,10 +160,13 @@ static struct pcpu_chunk *pcpu_reserved_chunk;
static int pcpu_reserved_chunk_limit;
static DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */
-static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop */
+static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */
static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
+/* chunks which need their map areas extended, protected by pcpu_lock */
+static LIST_HEAD(pcpu_map_extend_chunks);
+
/*
* The number of empty populated pages, protected by pcpu_lock. The
* reserved chunk doesn't contribute to the count.
@@ -397,13 +400,19 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
{
int margin, new_alloc;
+ lockdep_assert_held(&pcpu_lock);
+
if (is_atomic) {
margin = 3;
if (chunk->map_alloc <
- chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW &&
- pcpu_async_enabled)
- schedule_work(&chunk->map_extend_work);
+ chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW) {
+ if (list_empty(&chunk->map_extend_list)) {
+ list_add_tail(&chunk->map_extend_list,
+ &pcpu_map_extend_chunks);
+ pcpu_schedule_balance_work();
+ }
+ }
} else {
margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
}
@@ -437,6 +446,8 @@ static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
unsigned long flags;
+ lockdep_assert_held(&pcpu_alloc_mutex);
+
new = pcpu_mem_zalloc(new_size);
if (!new)
return -ENOMEM;
@@ -469,20 +480,6 @@ out_unlock:
return 0;
}
-static void pcpu_map_extend_workfn(struct work_struct *work)
-{
- struct pcpu_chunk *chunk = container_of(work, struct pcpu_chunk,
- map_extend_work);
- int new_alloc;
-
- spin_lock_irq(&pcpu_lock);
- new_alloc = pcpu_need_to_extend(chunk, false);
- spin_unlock_irq(&pcpu_lock);
-
- if (new_alloc)
- pcpu_extend_area_map(chunk, new_alloc);
-}
-
/**
* pcpu_fit_in_area - try to fit the requested allocation in a candidate area
* @chunk: chunk the candidate area belongs to
@@ -742,7 +739,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
chunk->map_used = 1;
INIT_LIST_HEAD(&chunk->list);
- INIT_WORK(&chunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&chunk->map_extend_list);
chunk->free_size = pcpu_unit_size;
chunk->contig_hint = pcpu_unit_size;
@@ -897,6 +894,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
return NULL;
}
+ if (!is_atomic)
+ mutex_lock(&pcpu_alloc_mutex);
+
spin_lock_irqsave(&pcpu_lock, flags);
/* serve reserved allocations from the reserved chunk if available */
@@ -969,12 +969,9 @@ restart:
if (is_atomic)
goto fail;
- mutex_lock(&pcpu_alloc_mutex);
-
if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
chunk = pcpu_create_chunk();
if (!chunk) {
- mutex_unlock(&pcpu_alloc_mutex);
err = "failed to allocate new chunk";
goto fail;
}
@@ -985,7 +982,6 @@ restart:
spin_lock_irqsave(&pcpu_lock, flags);
}
- mutex_unlock(&pcpu_alloc_mutex);
goto restart;
area_found:
@@ -995,8 +991,6 @@ area_found:
if (!is_atomic) {
int page_start, page_end, rs, re;
- mutex_lock(&pcpu_alloc_mutex);
-
page_start = PFN_DOWN(off);
page_end = PFN_UP(off + size);
@@ -1007,7 +1001,6 @@ area_found:
spin_lock_irqsave(&pcpu_lock, flags);
if (ret) {
- mutex_unlock(&pcpu_alloc_mutex);
pcpu_free_area(chunk, off, &occ_pages);
err = "failed to populate";
goto fail_unlock;
@@ -1030,7 +1023,7 @@ area_found:
memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
ptr = __addr_to_pcpu_ptr(chunk->base_addr + off);
- kmemleak_alloc_percpu(ptr, size);
+ kmemleak_alloc_percpu(ptr, size, gfp);
return ptr;
fail_unlock:
@@ -1047,6 +1040,8 @@ fail:
/* see the flag handling in pcpu_blance_workfn() */
pcpu_atomic_alloc_failed = true;
pcpu_schedule_balance_work();
+ } else {
+ mutex_unlock(&pcpu_alloc_mutex);
}
return NULL;
}
@@ -1131,6 +1126,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
if (chunk == list_first_entry(free_head, struct pcpu_chunk, list))
continue;
+ list_del_init(&chunk->map_extend_list);
list_move(&chunk->list, &to_free);
}
@@ -1148,6 +1144,25 @@ static void pcpu_balance_workfn(struct work_struct *work)
pcpu_destroy_chunk(chunk);
}
+ /* service chunks which requested async area map extension */
+ do {
+ int new_alloc = 0;
+
+ spin_lock_irq(&pcpu_lock);
+
+ chunk = list_first_entry_or_null(&pcpu_map_extend_chunks,
+ struct pcpu_chunk, map_extend_list);
+ if (chunk) {
+ list_del_init(&chunk->map_extend_list);
+ new_alloc = pcpu_need_to_extend(chunk, false);
+ }
+
+ spin_unlock_irq(&pcpu_lock);
+
+ if (new_alloc)
+ pcpu_extend_area_map(chunk, new_alloc);
+ } while (chunk);
+
/*
* Ensure there are certain number of free populated pages for
* atomic allocs. Fill up from the most packed so that atomic
@@ -1648,7 +1663,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
*/
schunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
INIT_LIST_HEAD(&schunk->list);
- INIT_WORK(&schunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&schunk->map_extend_list);
schunk->base_addr = base_addr;
schunk->map = smap;
schunk->map_alloc = ARRAY_SIZE(smap);
@@ -1678,7 +1693,7 @@ int __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai,
if (dyn_size) {
dchunk = memblock_virt_alloc(pcpu_chunk_struct_size, 0);
INIT_LIST_HEAD(&dchunk->list);
- INIT_WORK(&dchunk->map_extend_work, pcpu_map_extend_workfn);
+ INIT_LIST_HEAD(&dchunk->map_extend_list);
dchunk->base_addr = base_addr;
dchunk->map = dmap;
dchunk->map_alloc = ARRAY_SIZE(dmap);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 5077afcd9e11..b2dfa8cf6e4c 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -197,7 +197,7 @@ static ssize_t process_vm_rw_core(pid_t pid, struct iov_iter *iter,
goto free_proc_pages;
}
- mm = mm_access(task, PTRACE_MODE_ATTACH);
+ mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS);
if (!mm || IS_ERR(mm)) {
rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
/*
diff --git a/mm/shmem.c b/mm/shmem.c
index 0b4ba556703a..fac22b578eb9 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2140,9 +2140,11 @@ static long shmem_fallocate(struct file *file, int mode, loff_t offset,
NULL);
if (error) {
/* Remove the !PageUptodate pages we added */
- shmem_undo_range(inode,
- (loff_t)start << PAGE_CACHE_SHIFT,
- (loff_t)index << PAGE_CACHE_SHIFT, true);
+ if (index > start) {
+ shmem_undo_range(inode,
+ (loff_t)start << PAGE_CACHE_SHIFT,
+ ((loff_t)index << PAGE_CACHE_SHIFT) - 1, true);
+ }
goto undone;
}
diff --git a/mm/slab.c b/mm/slab.c
index f34e053ec46e..b7f9f6456a61 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2175,9 +2175,16 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
- if (size >= kmalloc_size(INDEX_NODE + 1)
- && cachep->object_size > cache_line_size()
- && ALIGN(size, cachep->align) < PAGE_SIZE) {
+ /*
+ * To activate debug pagealloc, off-slab management is necessary
+ * requirement. In early phase of initialization, small sized slab
+ * doesn't get initialized so it would not be possible. So, we need
+ * to check size >= 256. It guarantees that all necessary small
+ * sized slab is initialized in current slab initialization sequence.
+ */
+ if (!slab_early_init && size >= kmalloc_size(INDEX_NODE) &&
+ size >= 256 && cachep->object_size > cache_line_size() &&
+ ALIGN(size, cachep->align) < PAGE_SIZE) {
cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
diff --git a/mm/slab_common.c b/mm/slab_common.c
index dcdab81bd240..9a18f19360d3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -789,6 +789,7 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
page = alloc_kmem_pages(flags, order);
ret = page ? page_address(page) : NULL;
kmemleak_alloc(ret, size, 1, flags);
+ kasan_kmalloc_large(ret, size);
return ret;
}
EXPORT_SYMBOL(kmalloc_order);
@@ -963,8 +964,10 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size,
if (p)
ks = ksize(p);
- if (ks >= new_size)
+ if (ks >= new_size) {
+ kasan_krealloc((void *)p, new_size);
return (void *)p;
+ }
ret = kmalloc_track_caller(new_size, flags);
if (ret && p)
diff --git a/mm/slub.c b/mm/slub.c
index 72bb06beaabc..21b2582a3b76 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -20,6 +20,7 @@
#include <linux/proc_fs.h>
#include <linux/notifier.h>
#include <linux/seq_file.h>
+#include <linux/kasan.h>
#include <linux/kmemcheck.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
@@ -458,8 +459,10 @@ static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
/*
* Debug settings:
*/
-#ifdef CONFIG_SLUB_DEBUG_ON
+#if defined(CONFIG_SLUB_DEBUG_ON)
static int slub_debug = DEBUG_DEFAULT_FLAGS;
+#elif defined(CONFIG_KASAN)
+static int slub_debug = SLAB_STORE_USER;
#else
static int slub_debug;
#endif
@@ -468,12 +471,30 @@ static char *slub_debug_slabs;
static int disable_higher_order_debug;
/*
+ * slub is about to manipulate internal object metadata. This memory lies
+ * outside the range of the allocated object, so accessing it would normally
+ * be reported by kasan as a bounds error. metadata_access_enable() is used
+ * to tell kasan that these accesses are OK.
+ */
+static inline void metadata_access_enable(void)
+{
+ kasan_disable_current();
+}
+
+static inline void metadata_access_disable(void)
+{
+ kasan_enable_current();
+}
+
+/*
* Object debugging
*/
static void print_section(char *text, u8 *addr, unsigned int length)
{
+ metadata_access_enable();
print_hex_dump(KERN_ERR, text, DUMP_PREFIX_ADDRESS, 16, 1, addr,
length, 1);
+ metadata_access_disable();
}
static struct track *get_track(struct kmem_cache *s, void *object,
@@ -503,7 +524,9 @@ static void set_track(struct kmem_cache *s, void *object,
trace.max_entries = TRACK_ADDRS_COUNT;
trace.entries = p->addrs;
trace.skip = 3;
+ metadata_access_enable();
save_stack_trace(&trace);
+ metadata_access_disable();
/* See rant in lockdep.c */
if (trace.nr_entries != 0 &&
@@ -629,7 +652,7 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
dump_stack();
}
-static void object_err(struct kmem_cache *s, struct page *page,
+void object_err(struct kmem_cache *s, struct page *page,
u8 *object, char *reason)
{
slab_bug(s, "%s", reason);
@@ -677,7 +700,9 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
u8 *fault;
u8 *end;
+ metadata_access_enable();
fault = memchr_inv(start, value, bytes);
+ metadata_access_disable();
if (!fault)
return 1;
@@ -770,7 +795,9 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
if (!remainder)
return 1;
+ metadata_access_enable();
fault = memchr_inv(end - remainder, POISON_INUSE, remainder);
+ metadata_access_disable();
if (!fault)
return 1;
while (end > fault && end[-1] == POISON_INUSE)
@@ -1232,11 +1259,13 @@ static DEFINE_PER_CPU(struct slub_free_list, slub_free_list);
static inline void kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
{
kmemleak_alloc(ptr, size, 1, flags);
+ kasan_kmalloc_large(ptr, size);
}
static inline void kfree_hook(const void *x)
{
kmemleak_free(x);
+ kasan_kfree_large(x);
}
static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
@@ -1254,6 +1283,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
flags &= gfp_allowed_mask;
kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
+ kasan_slab_alloc(s, object);
}
static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -1277,6 +1307,8 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
#endif
if (!(s->flags & SLAB_DEBUG_OBJECTS))
debug_check_no_obj_freed(x, s->object_size);
+
+ kasan_slab_free(s, x);
}
/*
@@ -1376,10 +1408,11 @@ static void setup_object(struct kmem_cache *s, struct page *page,
void *object)
{
setup_object_debug(s, page, object);
-#ifndef CONFIG_PREEMPT_RT_FULL
- if (unlikely(s->ctor))
+ if (unlikely(s->ctor)) {
+ kasan_unpoison_object_data(s, object);
s->ctor(object);
-#endif
+ kasan_poison_object_data(s, object);
+ }
}
static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
@@ -1409,6 +1442,8 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
if (unlikely(s->flags & SLAB_POISON))
memset(start, POISON_INUSE, PAGE_SIZE << order);
+ kasan_poison_slab(page);
+
for_each_object_idx(p, idx, s, start, page->objects) {
setup_object(s, page, p);
if (likely(idx < page->objects))
@@ -2533,6 +2568,7 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
{
void *ret = slab_alloc(s, gfpflags, _RET_IP_);
trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
+ kasan_kmalloc(s, ret, size);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_trace);
@@ -2559,6 +2595,8 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
trace_kmalloc_node(_RET_IP_, ret,
size, s->size, gfpflags, node);
+
+ kasan_kmalloc(s, ret, size);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
@@ -2942,6 +2980,7 @@ static void early_kmem_cache_node_alloc(int node)
init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
init_tracking(kmem_cache_node, n);
#endif
+ kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node));
init_kmem_cache_node(n);
inc_slabs_node(kmem_cache_node, node, page->objects);
@@ -3314,6 +3353,8 @@ void *__kmalloc(size_t size, gfp_t flags)
trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
+ kasan_kmalloc(s, ret, size);
+
return ret;
}
EXPORT_SYMBOL(__kmalloc);
@@ -3357,12 +3398,14 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
+ kasan_kmalloc(s, ret, size);
+
return ret;
}
EXPORT_SYMBOL(__kmalloc_node);
#endif
-size_t ksize(const void *object)
+static size_t __ksize(const void *object)
{
struct page *page;
@@ -3378,6 +3421,15 @@ size_t ksize(const void *object)
return slab_ksize(page->slab_cache);
}
+
+size_t ksize(const void *object)
+{
+ size_t size = __ksize(object);
+ /* We assume that ksize callers could use whole allocated area,
+ so we need unpoison this area. */
+ kasan_krealloc(object, size);
+ return size;
+}
EXPORT_SYMBOL(ksize);
void kfree(const void *x)
diff --git a/mm/swap.c b/mm/swap.c
index acb833351464..2dbb2f373762 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -43,7 +43,7 @@ int page_cluster;
static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
-static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
static DEFINE_LOCAL_IRQ_LOCK(rotate_lock);
static DEFINE_LOCAL_IRQ_LOCK(swapvec_lock);
@@ -479,7 +479,7 @@ void rotate_reclaimable_page(struct page *page)
page_cache_get(page);
local_lock_irqsave(rotate_lock, flags);
pvec = this_cpu_ptr(&lru_rotate_pvecs);
- if (!pagevec_add(pvec, page))
+ if (!pagevec_add(pvec, page) || PageCompound(page))
pagevec_move_tail(pvec);
local_unlock_irqrestore(rotate_lock, flags);
}
@@ -536,7 +536,7 @@ void activate_page(struct page *page)
activate_page_pvecs);
page_cache_get(page);
- if (!pagevec_add(pvec, page))
+ if (!pagevec_add(pvec, page) || PageCompound(page))
pagevec_lru_move_fn(pvec, __activate_page, NULL);
put_locked_var(swapvec_lock, activate_page_pvecs);
}
@@ -628,7 +628,7 @@ static void __lru_cache_add(struct page *page)
struct pagevec *pvec = &get_locked_var(swapvec_lock, lru_add_pvec);
page_cache_get(page);
- if (!pagevec_space(pvec))
+ if (!pagevec_add(pvec, page) || PageCompound(page))
__pagevec_lru_add(pvec);
pagevec_add(pvec, page);
put_locked_var(swapvec_lock, lru_add_pvec);
@@ -748,7 +748,7 @@ void lru_cache_add_active_or_unevictable(struct page *page,
* be write it out by flusher threads as this is much more effective
* than the single-page writeout from reclaim.
*/
-static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
+static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
void *arg)
{
int lru, file;
@@ -816,37 +816,36 @@ void lru_add_drain_cpu(int cpu)
local_unlock_irqrestore(rotate_lock, flags);
}
- pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+ pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
if (pagevec_count(pvec))
- pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+ pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
activate_page_drain(cpu);
}
/**
- * deactivate_page - forcefully deactivate a page
+ * deactivate_file_page - forcefully deactivate a file page
* @page: page to deactivate
*
* This function hints the VM that @page is a good reclaim candidate,
* for example if its invalidation fails due to the page being dirty
* or under writeback.
*/
-void deactivate_page(struct page *page)
+void deactivate_file_page(struct page *page)
{
/*
- * In a workload with many unevictable page such as mprotect, unevictable
- * page deactivation for accelerating reclaim is pointless.
+ * In a workload with many unevictable page such as mprotect,
+ * unevictable page deactivation for accelerating reclaim is pointless.
*/
if (PageUnevictable(page))
return;
if (likely(get_page_unless_zero(page))) {
- struct pagevec *pvec = &get_locked_var(swapvec_lock,
- lru_deactivate_pvecs);
+ struct pagevec *pvec = &get_cpu_var(lru_deactivate_file_pvecs);
- if (!pagevec_add(pvec, page))
- pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
- put_locked_var(swapvec_lock, lru_deactivate_pvecs);
+ if (!pagevec_add(pvec, page) || PageCompound(page))
+ pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
+ put_cpu_var(lru_deactivate_file_pvecs);
}
}
@@ -878,7 +877,7 @@ void lru_add_drain_all(void)
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
- pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
+ pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
need_activate_page_drain(cpu)) {
INIT_WORK(work, lru_add_drain_per_cpu);
schedule_work_on(cpu, work);
diff --git a/mm/truncate.c b/mm/truncate.c
index a06369d67fa4..d83247ec256d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -516,7 +516,7 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
* of interest and try to speed up its reclaim.
*/
if (!ret)
- deactivate_page(page);
+ deactivate_file_page(page);
count += ret;
}
pagevec_remove_exceptionals(&pvec);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 43122b6c46ed..e32d5ffc3e46 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1328,10 +1328,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
if (unlikely(!area))
return NULL;
- /*
- * We always allocate a guard page.
- */
- size += PAGE_SIZE;
+ if (!(flags & VM_NO_GUARD))
+ size += PAGE_SIZE;
va = alloc_vmap_area(size, align, start, end, node, gfp_mask);
if (IS_ERR(va)) {
@@ -1424,8 +1422,8 @@ struct vm_struct *remove_vm_area(const void *addr)
spin_unlock(&vmap_area_lock);
vmap_debug_free_range(va->va_start, va->va_end);
+ kasan_free_shadow(vm);
free_unmap_vmap_area(va);
- vm->size -= PAGE_SIZE;
return vm;
}
@@ -1450,8 +1448,8 @@ static void __vunmap(const void *addr, int deallocate_pages)
return;
}
- debug_check_no_locks_freed(addr, area->size);
- debug_check_no_obj_freed(addr, area->size);
+ debug_check_no_locks_freed(addr, get_vm_area_size(area));
+ debug_check_no_obj_freed(addr, get_vm_area_size(area));
if (deallocate_pages) {
int i;
@@ -1625,6 +1623,7 @@ fail:
* @end: vm area range end
* @gfp_mask: flags for the page level allocator
* @prot: protection mask for the allocated pages
+ * @vm_flags: additional vm area flags (e.g. %VM_NO_GUARD)
* @node: node to use for allocation or NUMA_NO_NODE
* @caller: caller's return address
*
@@ -1634,7 +1633,8 @@ fail:
*/
void *__vmalloc_node_range(unsigned long size, unsigned long align,
unsigned long start, unsigned long end, gfp_t gfp_mask,
- pgprot_t prot, int node, const void *caller)
+ pgprot_t prot, unsigned long vm_flags, int node,
+ const void *caller)
{
struct vm_struct *area;
void *addr;
@@ -1644,8 +1644,8 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
if (!size || (size >> PAGE_SHIFT) > totalram_pages)
goto fail;
- area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED,
- start, end, node, gfp_mask, caller);
+ area = __get_vm_area_node(size, align, VM_ALLOC | VM_UNINITIALIZED |
+ vm_flags, start, end, node, gfp_mask, caller);
if (!area)
goto fail;
@@ -1694,7 +1694,7 @@ static void *__vmalloc_node(unsigned long size, unsigned long align,
int node, const void *caller)
{
return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
- gfp_mask, prot, node, caller);
+ gfp_mask, prot, 0, node, caller);
}
void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e3b0a54a44aa..d48b28219edf 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -895,21 +895,17 @@ static unsigned long shrink_page_list(struct list_head *page_list,
*
* 2) Global reclaim encounters a page, memcg encounters a
* page that is not marked for immediate reclaim or
- * the caller does not have __GFP_IO. In this case mark
+ * the caller does not have __GFP_FS (or __GFP_IO if it's
+ * simply going to swap, not to fs). In this case mark
* the page for immediate reclaim and continue scanning.
*
- * __GFP_IO is checked because a loop driver thread might
+ * Require may_enter_fs because we would wait on fs, which
+ * may not have submitted IO yet. And the loop driver might
* enter reclaim, and deadlock if it waits on a page for
* which it is needed to do the write (loop masks off
* __GFP_IO|__GFP_FS for this reason); but more thought
* would probably show more reasons.
*
- * Don't require __GFP_FS, since we're not going into the
- * FS, just waiting on its writeback completion. Worryingly,
- * ext4 gfs2 and xfs allocate pages with
- * grab_cache_page_write_begin(,,AOP_FLAG_NOFS), so testing
- * may_enter_fs here is liable to OOM on them.
- *
* 3) memcg encounters a page that is not already marked
* PageReclaim. memcg does not have any dirty pages
* throttling so we could easily OOM just because too many
@@ -926,7 +922,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
/* Case 2 above */
} else if (global_reclaim(sc) ||
- !PageReclaim(page) || !(sc->gfp_mask & __GFP_IO)) {
+ !PageReclaim(page) || !may_enter_fs) {
/*
* This is slightly racy - end_page_writeback()
* might have just cleared PageReclaim, then
@@ -1115,7 +1111,7 @@ cull_mlocked:
if (PageSwapCache(page))
try_to_free_swap(page);
unlock_page(page);
- putback_lru_page(page);
+ list_add(&page->lru, &ret_pages);
continue;
activate_locked: