aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-01-29 14:05:41 -0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-02-13 13:48:00 -0800
commit48526149964e69fc54a06c409e13d36990386464 (patch)
tree844e34d12c42f34711531573af92243fbf273ef8
parent03381bd28963f97a976d4742468359f12474ea39 (diff)
mm/page-writeback.c: do not count anon pages as dirtyable memory
commit a1c3bfb2f67ef766de03f1f56bdfff9c8595ab14 upstream. The VM is currently heavily tuned to avoid swapping. Whether that is good or bad is a separate discussion, but as long as the VM won't swap to make room for dirty cache, we can not consider anonymous pages when calculating the amount of dirtyable memory, the baseline to which dirty_background_ratio and dirty_ratio are applied. A simple workload that occupies a significant size (40+%, depending on memory layout, storage speeds etc.) of memory with anon/tmpfs pages and uses the remainder for a streaming writer demonstrates this problem. In that case, the actual cache pages are a small fraction of what is considered dirtyable overall, which results in an relatively large portion of the cache pages to be dirtied. As kswapd starts rotating these, random tasks enter direct reclaim and stall on IO. Only consider free pages and file pages dirtyable. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Tejun Heo <tj@kernel.org> Tested-by: Tejun Heo <tj@kernel.org> Reviewed-by: Rik van Riel <riel@redhat.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Wu Fengguang <fengguang.wu@intel.com> Reviewed-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--include/linux/vmstat.h3
-rw-r--r--mm/page-writeback.c6
-rw-r--r--mm/vmscan.c49
3 files changed, 18 insertions, 40 deletions
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index c586679b6fe..9044769f229 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -142,9 +142,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
return x;
}
-extern unsigned long global_reclaimable_pages(void);
-extern unsigned long zone_reclaimable_pages(struct zone *zone);
-
#ifdef CONFIG_NUMA
/*
* Determine the per node value of a stat item. This function
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index bcd929093e6..5a06d4cb9a3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -202,7 +202,8 @@ static unsigned long zone_dirtyable_memory(struct zone *zone)
nr_pages = zone_page_state(zone, NR_FREE_PAGES);
nr_pages -= min(nr_pages, zone->dirty_balance_reserve);
- nr_pages += zone_reclaimable_pages(zone);
+ nr_pages += zone_page_state(zone, NR_INACTIVE_FILE);
+ nr_pages += zone_page_state(zone, NR_ACTIVE_FILE);
return nr_pages;
}
@@ -255,7 +256,8 @@ static unsigned long global_dirtyable_memory(void)
x = global_page_state(NR_FREE_PAGES);
x -= min(x, dirty_balance_reserve);
- x += global_reclaimable_pages();
+ x += global_page_state(NR_INACTIVE_FILE);
+ x += global_page_state(NR_ACTIVE_FILE);
if (!vm_highmem_is_dirtyable)
x -= highmem_dirtyable_memory(x);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7dbdb6afd10..43ddef3cf44 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2117,6 +2117,20 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
return aborted_reclaim;
}
+static unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+ int nr;
+
+ nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+ zone_page_state(zone, NR_INACTIVE_FILE);
+
+ if (get_nr_swap_pages() > 0)
+ nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+ zone_page_state(zone, NR_INACTIVE_ANON);
+
+ return nr;
+}
+
static bool zone_reclaimable(struct zone *zone)
{
return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
@@ -3075,41 +3089,6 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
wake_up_interruptible(&pgdat->kswapd_wait);
}
-/*
- * The reclaimable count would be mostly accurate.
- * The less reclaimable pages may be
- * - mlocked pages, which will be moved to unevictable list when encountered
- * - mapped pages, which may require several travels to be reclaimed
- * - dirty pages, which is not "instantly" reclaimable
- */
-unsigned long global_reclaimable_pages(void)
-{
- int nr;
-
- nr = global_page_state(NR_ACTIVE_FILE) +
- global_page_state(NR_INACTIVE_FILE);
-
- if (get_nr_swap_pages() > 0)
- nr += global_page_state(NR_ACTIVE_ANON) +
- global_page_state(NR_INACTIVE_ANON);
-
- return nr;
-}
-
-unsigned long zone_reclaimable_pages(struct zone *zone)
-{
- int nr;
-
- nr = zone_page_state(zone, NR_ACTIVE_FILE) +
- zone_page_state(zone, NR_INACTIVE_FILE);
-
- if (get_nr_swap_pages() > 0)
- nr += zone_page_state(zone, NR_ACTIVE_ANON) +
- zone_page_state(zone, NR_INACTIVE_ANON);
-
- return nr;
-}
-
#ifdef CONFIG_HIBERNATION
/*
* Try to free `nr_to_reclaim' of memory, system-wide, and return the number of