diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/cma.c | 15 | ||||
-rw-r--r-- | mm/early_ioremap.c | 2 | ||||
-rw-r--r-- | mm/internal.h | 6 | ||||
-rw-r--r-- | mm/kmemleak.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory-failure.c | 7 | ||||
-rw-r--r-- | mm/memory.c | 2 | ||||
-rw-r--r-- | mm/mmap.c | 6 | ||||
-rw-r--r-- | mm/page_alloc.c | 15 | ||||
-rw-r--r-- | mm/shmem.c | 6 | ||||
-rw-r--r-- | mm/util.c | 24 | ||||
-rw-r--r-- | mm/vmscan.c | 50 | ||||
-rw-r--r-- | mm/vmstat.c | 2 | ||||
-rw-r--r-- | mm/zswap.c | 12 |
14 files changed, 104 insertions, 47 deletions
@@ -54,7 +54,7 @@ unsigned long cma_get_size(const struct cma *cma) } static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, - int align_order) + unsigned int align_order) { if (align_order <= cma->order_per_bit) return 0; @@ -62,17 +62,14 @@ static unsigned long cma_bitmap_aligned_mask(const struct cma *cma, } /* - * Find a PFN aligned to the specified order and return an offset represented in - * order_per_bits. + * Find the offset of the base PFN from the specified align_order. + * The value returned is represented in order_per_bits. */ static unsigned long cma_bitmap_aligned_offset(const struct cma *cma, - int align_order) + unsigned int align_order) { - if (align_order <= cma->order_per_bit) - return 0; - - return (ALIGN(cma->base_pfn, (1UL << align_order)) - - cma->base_pfn) >> cma->order_per_bit; + return (cma->base_pfn & ((1UL << align_order) - 1)) + >> cma->order_per_bit; } static unsigned long cma_bitmap_pages_to_bits(const struct cma *cma, diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c index 6d5717bd7197..57540de2b44c 100644 --- a/mm/early_ioremap.c +++ b/mm/early_ioremap.c @@ -103,7 +103,7 @@ __early_ioremap(resource_size_t phys_addr, unsigned long size, pgprot_t prot) enum fixed_addresses idx; int i, slot; - WARN_ON(system_state != SYSTEM_BOOTING); + WARN_ON(system_state >= SYSTEM_RUNNING); slot = -1; for (i = 0; i < FIX_BTMAPS_SLOTS; i++) { diff --git a/mm/internal.h b/mm/internal.h index 34a5459e5989..3e2d01694747 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -74,6 +74,12 @@ static inline void set_page_refcounted(struct page *page) extern unsigned long highest_memmap_pfn; /* + * Maximum number of reclaim retries without progress before the OOM + * killer is consider the only way forward. + */ +#define MAX_RECLAIM_RETRIES 16 + +/* * in mm/vmscan.c: */ extern int isolate_lru_page(struct page *page); diff --git a/mm/kmemleak.c b/mm/kmemleak.c index d1380ed93fdf..20cf3be9a5e8 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1442,6 +1442,8 @@ static void kmemleak_scan(void) if (page_count(page) == 0) continue; scan_block(page, page + 1, NULL); + if (!(pfn % (MAX_SCAN_SIZE / sizeof(*page)))) + cond_resched(); } } put_online_mems(); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c04403033aec..cbcd52b76df3 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -5534,7 +5534,7 @@ static void uncharge_list(struct list_head *page_list) next = page->lru.next; VM_BUG_ON_PAGE(PageLRU(page), page); - VM_BUG_ON_PAGE(page_count(page), page); + VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page); if (!page->mem_cgroup) continue; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index ce7d416edab7..5aa71a82ca73 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -535,6 +535,13 @@ static int delete_from_lru_cache(struct page *p) */ ClearPageActive(p); ClearPageUnevictable(p); + + /* + * Poisoned page might never drop its ref count to 0 so we have + * to uncharge it manually from its memcg. + */ + mem_cgroup_uncharge(p); + /* * drop the page count elevated by isolate_lru_page() */ diff --git a/mm/memory.c b/mm/memory.c index 1aa63e7dd790..e2e68767a373 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -75,7 +75,7 @@ #include "internal.h" -#ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS +#if defined(LAST_CPUPID_NOT_IN_PAGE_FLAGS) && !defined(CONFIG_COMPILE_TEST) #warning Unfortunate NUMA and NUMA Balancing config, growing page-frame for last_cpupid. #endif diff --git a/mm/mmap.c b/mm/mmap.c index 5b48adb4aa56..45ac5b973459 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2240,7 +2240,8 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) gap_addr = TASK_SIZE; next = vma->vm_next; - if (next && next->vm_start < gap_addr) { + if (next && next->vm_start < gap_addr && + (next->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) { if (!(next->vm_flags & VM_GROWSUP)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ @@ -2324,7 +2325,8 @@ int expand_downwards(struct vm_area_struct *vma, if (gap_addr > address) return -ENOMEM; prev = vma->vm_prev; - if (prev && prev->vm_end > gap_addr) { + if (prev && prev->vm_end > gap_addr && + (prev->vm_flags & (VM_WRITE|VM_READ|VM_EXEC))) { if (!(prev->vm_flags & VM_GROWSDOWN)) return -ENOMEM; /* Check that both stack segments have the same anon_vma? */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1cb08e1406ea..66131fa1a8e5 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2883,9 +2883,6 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, if (!area->nr_free) continue; - if (alloc_harder) - return true; - for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) { if (!list_empty(&area->free_list[mt])) return true; @@ -2897,6 +2894,9 @@ bool __zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, return true; } #endif + if (alloc_harder && + !list_empty(&area->free_list[MIGRATE_HIGHATOMIC])) + return true; } return false; } @@ -3484,12 +3484,6 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask) } /* - * Maximum number of reclaim retries without any progress before OOM killer - * is consider as the only way to move forward. - */ -#define MAX_RECLAIM_RETRIES 16 - -/* * Checks whether it makes sense to retry the reclaim to make a forward progress * for the given allocation request. * The reclaim feedback represented by did_some_progress (any progress during @@ -4447,7 +4441,8 @@ void show_free_areas(unsigned int filter) K(node_page_state(pgdat, NR_WRITEBACK_TEMP)), K(node_page_state(pgdat, NR_UNSTABLE_NFS)), node_page_state(pgdat, NR_PAGES_SCANNED), - !pgdat_reclaimable(pgdat) ? "yes" : "no"); + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES ? + "yes" : "no"); } for_each_populated_zone(zone) { diff --git a/mm/shmem.c b/mm/shmem.c index 004e0f87e8a8..2123bfc39ef2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -370,6 +370,7 @@ static bool shmem_confirm_swap(struct address_space *mapping, int shmem_huge __read_mostly; +#if defined(CONFIG_SYSFS) || defined(CONFIG_TMPFS) static int shmem_parse_huge(const char *str) { if (!strcmp(str, "never")) @@ -407,6 +408,7 @@ static const char *shmem_format_huge(int huge) return "bad_val"; } } +#endif static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo, struct shrink_control *sc, unsigned long nr_to_split) @@ -1550,7 +1552,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index, struct mm_struct *fault_mm, int *fault_type) { struct address_space *mapping = inode->i_mapping; - struct shmem_inode_info *info; + struct shmem_inode_info *info = SHMEM_I(inode); struct shmem_sb_info *sbinfo; struct mm_struct *charge_mm; struct mem_cgroup *memcg; @@ -1600,7 +1602,6 @@ repeat: * Fast cache lookup did not find it: * bring it back from swap or allocate. */ - info = SHMEM_I(inode); sbinfo = SHMEM_SB(inode->i_sb); charge_mm = fault_mm ? : current->mm; @@ -1852,7 +1853,6 @@ unlock: put_page(page); } if (error == -ENOSPC && !once++) { - info = SHMEM_I(inode); spin_lock_irq(&info->lock); shmem_recalc_inode(inode); spin_unlock_irq(&info->lock); diff --git a/mm/util.c b/mm/util.c index 1a41553db866..8c755d05d4e6 100644 --- a/mm/util.c +++ b/mm/util.c @@ -80,6 +80,8 @@ EXPORT_SYMBOL(kstrdup_const); * @s: the string to duplicate * @max: read at most @max chars from @s * @gfp: the GFP mask used in the kmalloc() call when allocating memory + * + * Note: Use kmemdup_nul() instead if the size is known exactly. */ char *kstrndup(const char *s, size_t max, gfp_t gfp) { @@ -118,6 +120,28 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp) EXPORT_SYMBOL(kmemdup); /** + * kmemdup_nul - Create a NUL-terminated string from unterminated data + * @s: The data to stringify + * @len: The size of the data + * @gfp: the GFP mask used in the kmalloc() call when allocating memory + */ +char *kmemdup_nul(const char *s, size_t len, gfp_t gfp) +{ + char *buf; + + if (!s) + return NULL; + + buf = kmalloc_track_caller(len + 1, gfp); + if (buf) { + memcpy(buf, s, len); + buf[len] = '\0'; + } + return buf; +} +EXPORT_SYMBOL(kmemdup_nul); + +/** * memdup_user - duplicate memory region from user space * * @src: source address in user space diff --git a/mm/vmscan.c b/mm/vmscan.c index 30a88b945a44..4365de74bdb0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -295,10 +295,13 @@ EXPORT_SYMBOL(register_shrinker); */ void unregister_shrinker(struct shrinker *shrinker) { + if (!shrinker->nr_deferred) + return; down_write(&shrinker_rwsem); list_del(&shrinker->list); up_write(&shrinker_rwsem); kfree(shrinker->nr_deferred); + shrinker->nr_deferred = NULL; } EXPORT_SYMBOL(unregister_shrinker); @@ -2606,6 +2609,15 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc) } while (should_continue_reclaim(pgdat, sc->nr_reclaimed - nr_reclaimed, sc->nr_scanned - nr_scanned, sc)); + /* + * Kswapd gives up on balancing particular nodes after too + * many failures to reclaim anything from them and goes to + * sleep. On reclaim progress, reset the failure counter. A + * successful direct reclaim run will revive a dormant kswapd. + */ + if (reclaimable) + pgdat->kswapd_failures = 0; + return reclaimable; } @@ -2680,10 +2692,6 @@ static void shrink_zones(struct zonelist *zonelist, struct scan_control *sc) GFP_KERNEL | __GFP_HARDWALL)) continue; - if (sc->priority != DEF_PRIORITY && - !pgdat_reclaimable(zone->zone_pgdat)) - continue; /* Let kswapd poll it */ - /* * If we already have plenty of memory free for * compaction in this zone, don't free any more. @@ -2820,7 +2828,7 @@ retry: return 0; } -static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) +static bool allow_direct_reclaim(pg_data_t *pgdat) { struct zone *zone; unsigned long pfmemalloc_reserve = 0; @@ -2828,6 +2836,9 @@ static bool pfmemalloc_watermark_ok(pg_data_t *pgdat) int i; bool wmark_ok; + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= ZONE_NORMAL; i++) { zone = &pgdat->node_zones[i]; if (!managed_zone(zone) || @@ -2908,7 +2919,7 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, /* Throttle based on the first usable node */ pgdat = zone->zone_pgdat; - if (pfmemalloc_watermark_ok(pgdat)) + if (allow_direct_reclaim(pgdat)) goto out; break; } @@ -2930,14 +2941,14 @@ static bool throttle_direct_reclaim(gfp_t gfp_mask, struct zonelist *zonelist, */ if (!(gfp_mask & __GFP_FS)) { wait_event_interruptible_timeout(pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat), HZ); + allow_direct_reclaim(pgdat), HZ); goto check_pending; } /* Throttle until kswapd wakes the process */ wait_event_killable(zone->zone_pgdat->pfmemalloc_wait, - pfmemalloc_watermark_ok(pgdat)); + allow_direct_reclaim(pgdat)); check_pending: if (fatal_signal_pending(current)) @@ -3116,7 +3127,7 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) /* * The throttled processes are normally woken up in balance_pgdat() as - * soon as pfmemalloc_watermark_ok() is true. But there is a potential + * soon as allow_direct_reclaim() is true. But there is a potential * race between when kswapd checks the watermarks and a process gets * throttled. There is also a potential race if processes get * throttled, kswapd wakes, a large process exits thereby balancing the @@ -3130,6 +3141,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx) if (waitqueue_active(&pgdat->pfmemalloc_wait)) wake_up_all(&pgdat->pfmemalloc_wait); + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return true; + for (i = 0; i <= classzone_idx; i++) { struct zone *zone = pgdat->node_zones + i; @@ -3216,9 +3231,9 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) count_vm_event(PAGEOUTRUN); do { + unsigned long nr_reclaimed = sc.nr_reclaimed; bool raise_priority = true; - sc.nr_reclaimed = 0; sc.reclaim_idx = classzone_idx; /* @@ -3297,7 +3312,7 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * able to safely make forward progress. Wake them */ if (waitqueue_active(&pgdat->pfmemalloc_wait) && - pfmemalloc_watermark_ok(pgdat)) + allow_direct_reclaim(pgdat)) wake_up_all(&pgdat->pfmemalloc_wait); /* Check if kswapd should be suspending */ @@ -3308,10 +3323,14 @@ static int balance_pgdat(pg_data_t *pgdat, int order, int classzone_idx) * Raise priority if scanning rate is too low or there was no * progress in reclaiming pages */ - if (raise_priority || !sc.nr_reclaimed) + nr_reclaimed = sc.nr_reclaimed - nr_reclaimed; + if (raise_priority || !nr_reclaimed) sc.priority--; } while (sc.priority >= 1); + if (!sc.nr_reclaimed) + pgdat->kswapd_failures++; + out: /* * Return the order kswapd stopped reclaiming at as @@ -3511,6 +3530,10 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx) if (!waitqueue_active(&pgdat->kswapd_wait)) return; + /* Hopeless node, leave it to direct reclaim */ + if (pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES) + return; + /* Only wake kswapd if all zones are unbalanced */ for (z = 0; z <= classzone_idx; z++) { zone = pgdat->node_zones + z; @@ -3781,9 +3804,6 @@ int node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned int order) sum_zone_node_page_state(pgdat->node_id, NR_SLAB_RECLAIMABLE) <= pgdat->min_slab_pages) return NODE_RECLAIM_FULL; - if (!pgdat_reclaimable(pgdat)) - return NODE_RECLAIM_FULL; - /* * Do not scan if the allocation should not be delayed. */ diff --git a/mm/vmstat.c b/mm/vmstat.c index abda95be88b4..c3585f345c22 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1433,7 +1433,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n node_unreclaimable: %u" "\n start_pfn: %lu" "\n node_inactive_ratio: %u", - !pgdat_reclaimable(zone->zone_pgdat), + pgdat->kswapd_failures >= MAX_RECLAIM_RETRIES, zone->zone_start_pfn, zone->zone_pgdat->inactive_ratio); seq_putc(m, '\n'); diff --git a/mm/zswap.c b/mm/zswap.c index dbef27822a98..ded051e3433d 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -752,18 +752,22 @@ static int __zswap_param_set(const char *val, const struct kernel_param *kp, pool = zswap_pool_find_get(type, compressor); if (pool) { zswap_pool_debug("using existing", pool); + WARN_ON(pool == zswap_pool_current()); list_del_rcu(&pool->list); - } else { - spin_unlock(&zswap_pools_lock); - pool = zswap_pool_create(type, compressor); - spin_lock(&zswap_pools_lock); } + spin_unlock(&zswap_pools_lock); + + if (!pool) + pool = zswap_pool_create(type, compressor); + if (pool) ret = param_set_charp(s, kp); else ret = -EINVAL; + spin_lock(&zswap_pools_lock); + if (!ret) { put_pool = zswap_pool_current(); list_add_rcu(&pool->list, &zswap_pools); |