aboutsummaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorAlex Shi <alex.shi@linaro.org>2018-02-01 12:03:41 +0800
committerAlex Shi <alex.shi@linaro.org>2018-02-01 12:03:41 +0800
commit9cebd248bd83dea5ead3f1ab5f3cbaf241588e37 (patch)
tree5f2d8fd7122c345045c476d57cb7c5ad17a4f08b /mm
parent4f0ca2b2718d297c88b1c303f2d414327c537636 (diff)
parent6c6f924f9c6294944ee6efb1bbd8cdb853582e50 (diff)
Merge tag 'v4.9.79' into linux-linaro-lsk-v4.9lsk-v4.9-18.02
This is the 4.9.79 stable release
Diffstat (limited to 'mm')
-rw-r--r--mm/cma.c15
-rw-r--r--mm/internal.h6
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c7
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/page_alloc.c15
-rw-r--r--mm/vmscan.c47
-rw-r--r--mm/vmstat.c2
8 files changed, 62 insertions, 38 deletions
diff --git a/mm/cma.c b/mm/cma.c
index c960459eda7e..397687fc51f9 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -54,7 +54,7 @@ unsigned long cma_get_size(const struct cma *cma)
}
static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
- int align_order)
+ unsigned int align_order)
{
if (align_order <= cma->order_per_bit)
return 0;
@@ -62,17 +62,14 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma,
}
/*
- * Find a PFN aligned to the specified order and return an offset represented in
- * order_per_bits.
+ * Find the offset of the base PFN from the specified align_order.
+ * The value returned is represented in order_per_bits.
*/
static unsigned long cma_bitmap_aligned_offset(const struct cma *cma,
- int align_order)
+ unsigned int align_order)
{
- if (align_order <= cma->order_per_bit)
- return 0;
-
- return (ALIGN(cma->base_pfn, (1UL << align_order))
- - cma->base_pfn) >> cma->order_per_bit;
+ return (cma->base_pfn & ((1UL << align_order) - 1))
+ >> cma->order_per_bit;
}
static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma,
diff --git a/mm/internal.h b/mm/internal.h
index 34a5459e5989..3e2d01694747 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -74,6 +74,12 @@ static inline void set_page_refcounted(struct page *page)
extern unsigned long highest_memmap_pfn;
/*
+ * Maximum number of reclaim retries without progress before the OOM
+ * killer is consider the only way forward.
+ */
+#define MAX_RECLAIM_RETRIES 16
+
+/*
* in mm/vmscan.c:
*/
extern int isolate_lru_page(struct page *page);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2a800c4a39bd..50088150fc17 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5531,7 +5531,7 @@ static void uncharge_list(struct list_head *page_list)
next = page->lru.next;
VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(page_count(page), page);
+ VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
if (!page->mem_cgroup)
continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index ce7d416edab7..5aa71a82ca73 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -535,6 +535,13 @@ static int delete_from_lru_cache(struct page *p)
*/
ClearPageActive(p);
ClearPageUnevictable(p);
+
+ /*
+ * Poisoned page might never drop its ref count to 0 so we have
+ * to uncharge it manually from its memcg.
+ */
+ mem_cgroup_uncharge(p);
+
/*
* drop the page count elevated by isolate_lru_page()
*/
diff --git a/mm/mmap.c b/mm/mmap.c
index 5b48adb4aa56..45ac5b973459 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2240,7 +2240,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
gap_addr = TASK_SIZE;
next = vma->vm_next;
- if (next && next->vm_start < gap_addr) {
+ if (next && next->vm_start < gap_addr &&
+ (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
if (!(next->vm_flags & VM_GROWSUP))
return -ENOMEM;
/* Check that both stack segments have the same anon_vma? */
@@ -2324,7 +2325,8 @@ int expand_downwards(struct vm_area_struct *vma,
if (gap_addr > address)
return -ENOMEM;
prev = vma->vm_prev;
- if (prev && prev->vm_end > gap_addr) {
+ if (prev && prev->vm_end > gap_addr &&
+ (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) {
if (!(prev->vm_flags & VM_GROWSDOWN))
return -ENOMEM;
/* Check that both stack segments have the same anon_vma? */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fbc38888252b..94018ea5f935 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2821,9 +2821,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
if (!area->nr_free)
continue;
- if (alloc_harder)
- return true;
-
for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
if (!list_empty(&area->free_list[mt]))
return true;
@@ -2835,6 +2832,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark,
return true;
}
#endif
+ if (alloc_harder &&
+ !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+ return true;
}
return false;
}
@@ -3422,12 +3422,6 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
}
/*
- * Maximum number of reclaim retries without any progress before OOM killer
- * is consider as the only way to move forward.
- */
-#define MAX_RECLAIM_RETRIES 16
-
-/*
* Checks whether it makes sense to retry the reclaim to make a forward progress
* for the given allocation request.
* The reclaim feedback represented by did_some_progress (any progress during
@@ -4385,7 +4379,8 @@ void show_free_areas(unsigned int filter)
K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
node_page_state(pgdat, NR_PAGES_SCANNED),
- !pgdat_reclaimable(pgdat) ? "yes" : "no");
+ pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ?
+ "yes" : "no");
}
for_each_populated_zone(zone) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 30a88b945a44..f118dc23f662 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2606,6 +2606,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
} while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed,
sc->nr_scanned - nr_scanned, sc));
+ /*
+ * Kswapd gives up on balancing particular nodes after too
+ * many failures to reclaim anything from them and goes to
+ * sleep. On reclaim progress, reset the failure counter. A
+ * successful direct reclaim run will revive a dormant kswapd.
+ */
+ if (reclaimable)
+ pgdat->kswapd_failures = 0;
+
return reclaimable;
}
@@ -2680,10 +2689,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
GFP_KERNEL | __GFP_HARDWALL))
continue;
- if (sc->priority != DEF_PRIORITY &&
- !pgdat_reclaimable(zone->zone_pgdat))
- continue; /* Let kswapd poll it */
-
/*
* If we already have plenty of memory free for
* compaction in this zone, don't free any more.
@@ -2820,7 +2825,7 @@ retry:
return 0;
}
-static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
+static bool allow_direct_reclaim(pg_data_t *pgdat)
{
struct zone *zone;
unsigned long pfmemalloc_reserve = 0;
@@ -2828,6 +2833,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat)
int i;
bool wmark_ok;
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+ return true;
+
for (i = 0; i <= ZONE_NORMAL; i++) {
zone = &pgdat->node_zones[i];
if (!managed_zone(zone) ||
@@ -2908,7 +2916,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
/* Throttle based on the first usable node */
pgdat = zone->zone_pgdat;
- if (pfmemalloc_watermark_ok(pgdat))
+ if (allow_direct_reclaim(pgdat))
goto out;
break;
}
@@ -2930,14 +2938,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist,
*/
if (!(gfp_mask & __GFP_FS)) {
wait_event_interruptible_timeout(pgdat->pfmemalloc_wait,
- pfmemalloc_watermark_ok(pgdat), HZ);
+ allow_direct_reclaim(pgdat), HZ);
goto check_pending;
}
/* Throttle until kswapd wakes the process */
wait_event_killable(zone->zone_pgdat->pfmemalloc_wait,
- pfmemalloc_watermark_ok(pgdat));
+ allow_direct_reclaim(pgdat));
check_pending:
if (fatal_signal_pending(current))
@@ -3116,7 +3124,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
/*
* The throttled processes are normally woken up in balance_pgdat() as
- * soon as pfmemalloc_watermark_ok() is true. But there is a potential
+ * soon as allow_direct_reclaim() is true. But there is a potential
* race between when kswapd checks the watermarks and a process gets
* throttled. There is also a potential race if processes get
* throttled, kswapd wakes, a large process exits thereby balancing the
@@ -3130,6 +3138,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
if (waitqueue_active(&pgdat->pfmemalloc_wait))
wake_up_all(&pgdat->pfmemalloc_wait);
+ /* Hopeless node, leave it to direct reclaim */
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+ return true;
+
for (i = 0; i <= classzone_idx; i++) {
struct zone *zone = pgdat->node_zones + i;
@@ -3216,9 +3228,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
count_vm_event(PAGEOUTRUN);
do {
+ unsigned long nr_reclaimed = sc.nr_reclaimed;
bool raise_priority = true;
- sc.nr_reclaimed = 0;
sc.reclaim_idx = classzone_idx;
/*
@@ -3297,7 +3309,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
* able to safely make forward progress. Wake them
*/
if (waitqueue_active(&pgdat->pfmemalloc_wait) &&
- pfmemalloc_watermark_ok(pgdat))
+ allow_direct_reclaim(pgdat))
wake_up_all(&pgdat->pfmemalloc_wait);
/* Check if kswapd should be suspending */
@@ -3308,10 +3320,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx)
* Raise priority if scanning rate is too low or there was no
* progress in reclaiming pages
*/
- if (raise_priority || !sc.nr_reclaimed)
+ nr_reclaimed = sc.nr_reclaimed - nr_reclaimed;
+ if (raise_priority || !nr_reclaimed)
sc.priority--;
} while (sc.priority >= 1);
+ if (!sc.nr_reclaimed)
+ pgdat->kswapd_failures++;
+
out:
/*
* Return the order kswapd stopped reclaiming at as
@@ -3511,6 +3527,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
if (!waitqueue_active(&pgdat->kswapd_wait))
return;
+ /* Hopeless node, leave it to direct reclaim */
+ if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES)
+ return;
+
/* Only wake kswapd if all zones are unbalanced */
for (z = 0; z <= classzone_idx; z++) {
zone = pgdat->node_zones + z;
@@ -3781,9 +3801,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order)
sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages)
return NODE_RECLAIM_FULL;
- if (!pgdat_reclaimable(pgdat))
- return NODE_RECLAIM_FULL;
-
/*
* Do not scan if the allocation should not be delayed.
*/
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6a088df04b29..3863b5d6d598 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1421,7 +1421,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
"\n node_unreclaimable: %u"
"\n start_pfn: %lu"
"\n node_inactive_ratio: %u",
- !pgdat_reclaimable(zone->zone_pgdat),
+ pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES,
zone->zone_start_pfn,
zone->zone_pgdat->inactive_ratio);
seq_putc(m, '\n');