From e43c3afb367112a5b357f9adfac7817255129c88 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Tue, 29 Sep 2009 13:16:20 +0800 Subject: HWPOISON: return early on non-LRU pages Right now we have some trouble with non atomic access to page flags when locking the page. To plug this hole for now, limit error recovery to LRU pages for now. This could be better fixed by defining a suitable protocol, but let's go this simple way for now This avoids unnecessary races with __set_page_locked() and __SetPageSlab*() and maybe more non-atomic page flag operations. This loses isolated pages which are currently in page reclaim, but these are relatively limited compared to the total memory. Signed-off-by: Wu Fengguang Signed-off-by: Andi Kleen [AK: new description, bug fixes, cleanups] --- mm/memory-failure.c | 49 ++++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 25 deletions(-) (limited to 'mm') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 729d4b15b64..e17ec3f1c63 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -370,9 +370,6 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn) int ret = FAILED; struct address_space *mapping; - if (!isolate_lru_page(p)) - page_cache_release(p); - /* * For anonymous pages we're done the only reference left * should be the one m_f() holds. @@ -498,30 +495,18 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn) */ static int me_swapcache_dirty(struct page *p, unsigned long pfn) { - int ret = FAILED; - ClearPageDirty(p); /* Trigger EIO in shmem: */ ClearPageUptodate(p); - if (!isolate_lru_page(p)) { - page_cache_release(p); - ret = DELAYED; - } - - return ret; + return DELAYED; } static int me_swapcache_clean(struct page *p, unsigned long pfn) { - int ret = FAILED; - - if (!isolate_lru_page(p)) { - page_cache_release(p); - ret = RECOVERED; - } delete_from_swap_cache(p); - return ret; + + return RECOVERED; } /* @@ -611,8 +596,6 @@ static struct page_state { { 0, 0, "unknown page state", me_unknown }, }; -#undef lru - static void action_result(unsigned long pfn, char *msg, int result) { struct page *page = NULL; @@ -664,9 +647,6 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, if (PageReserved(p) || PageCompound(p) || PageSlab(p)) return; - if (!PageLRU(p)) - lru_add_drain_all(); - /* * This check implies we don't kill processes if their pages * are in the swap cache early. Those are always late kills. @@ -738,6 +718,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, int __memory_failure(unsigned long pfn, int trapno, int ref) { + unsigned long lru_flag; struct page_state *ps; struct page *p; int res; @@ -774,6 +755,24 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) return PageBuddy(compound_head(p)) ? 0 : -EBUSY; } + /* + * We ignore non-LRU pages for good reasons. + * - PG_locked is only well defined for LRU pages and a few others + * - to avoid races with __set_page_locked() + * - to avoid races with __SetPageSlab*() (and more non-atomic ops) + * The check (unnecessarily) ignores LRU pages being isolated and + * walked by the page reclaim code, however that's not a big loss. + */ + if (!PageLRU(p)) + lru_add_drain_all(); + lru_flag = p->flags & lru; + if (isolate_lru_page(p)) { + action_result(pfn, "non LRU", IGNORED); + put_page(p); + return -EBUSY; + } + page_cache_release(p); + /* * Lock the page and wait for writeback to finish. * It's very difficult to mess with pages currently under IO @@ -790,7 +789,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) /* * Torn down by someone else? */ - if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { + if ((lru_flag & lru) && !PageSwapCache(p) && p->mapping == NULL) { action_result(pfn, "already truncated LRU", IGNORED); res = 0; goto out; @@ -798,7 +797,7 @@ int __memory_failure(unsigned long pfn, int trapno, int ref) res = -EBUSY; for (ps = error_states;; ps++) { - if ((p->flags & ps->mask) == ps->res) { + if (((p->flags | lru_flag)& ps->mask) == ps->res) { res = page_action(ps, p, pfn, ref); break; } -- cgit v1.2.3 From 4779cb31c0ee3b355116745edca3f3e5fe865553 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 14 Oct 2009 01:51:41 +0200 Subject: HWPOISON: Fix page count leak in hwpoison late kill in do_swap_page When returning due to a poisoned page drop the page count. It wasn't a fatal problem because noone cares about the page count on a poisoned page (except when it wraps), but it's cleaner to fix it. Pointed out by Linus. Signed-off-by: Andi Kleen --- mm/memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/memory.c b/mm/memory.c index 7e91b5f9f69..7a3b0ad5594 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2539,7 +2539,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, } else if (PageHWPoison(page)) { ret = VM_FAULT_HWPOISON; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - goto out; + goto out_release; } lock_page(page); @@ -2611,6 +2611,7 @@ out_nomap: pte_unmap_unlock(page_table, ptl); out_page: unlock_page(page); +out_release: page_cache_release(page); return ret; } -- cgit v1.2.3 From 01e00f880ca700376e1845cf7a2524ebe68e47d6 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Tue, 13 Oct 2009 15:02:11 +0100 Subject: HWPOISON: fix oops on ksm pages Memory failure on a KSM page currently oopses on its NULL anon_vma in page_lock_anon_vma(): that may not be much worse than the consequence of ignoring it, but it is better to be consistent with how ZERO_PAGE and hugetlb pages and other awkward cases are treated. Just skip it. Signed-off-by: Hugh Dickins Signed-off-by: Andi Kleen --- mm/memory-failure.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e17ec3f1c63..e354b9f2f38 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -644,7 +645,7 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn, int i; int kill = 1; - if (PageReserved(p) || PageCompound(p) || PageSlab(p)) + if (PageReserved(p) || PageCompound(p) || PageSlab(p) || PageKsm(p)) return; /* -- cgit v1.2.3 From 7456b0405d8fc063c49628f969cdb23be060fc80 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Mon, 19 Oct 2009 08:15:01 +0200 Subject: HWPOISON: fix invalid page count in printk output The madvise injector already holds a reference when passing in a page to the memory-failure code. The code corrects for this additional reference for its checks, but the final printk output didn't. Fix that. Signed-off-by: Wu Fengguang Signed-off-by: Andi Kleen --- mm/memory-failure.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/memory-failure.c b/mm/memory-failure.c index e354b9f2f38..dacc6418387 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -613,13 +613,16 @@ static int page_action(struct page_state *ps, struct page *p, unsigned long pfn, int ref) { int result; + int count; result = ps->action(p, pfn); action_result(pfn, ps->msg, result); - if (page_count(p) != 1 + ref) + + count = page_count(p) - 1 - ref; + if (count != 0) printk(KERN_ERR "MCE %#lx: %s page still referenced by %d users\n", - pfn, ps->msg, page_count(p) - 1); + pfn, ps->msg, count); /* Could do more checks here if page looks ok */ /* -- cgit v1.2.3