From d833049bd20570cbbadeb5228c579f9f3aaa4e03 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 12 Apr 2012 12:49:11 -0700 Subject: memcg: fix broken boolen expression action != CPU_DEAD || action != CPU_DEAD_FROZEN is always true. Signed-off-by: Kirill A. Shutemov Acked-by: KAMEZAWA Hiroyuki Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7d698df4a06..ea1e879b2db 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -2165,7 +2165,7 @@ static int __cpuinit memcg_cpu_hotplug_callback(struct notifier_block *nb, if (action == CPU_ONLINE) return NOTIFY_OK; - if ((action != CPU_DEAD) || action != CPU_DEAD_FROZEN) + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) return NOTIFY_OK; for_each_mem_cgroup(iter) -- cgit v1.2.3 From 569530fb1b40ab2d2ca147ee79898ac807ebdf90 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 12 Apr 2012 12:49:13 -0700 Subject: memcg: do not open code accesses to res_counter members We should use the accessor res_counter_read_u64 for that. Although a purely cosmetic change is sometimes better delayed, to avoid conflicting with other people's work, we are starting to have people touching this code as well, and reproducing the open code behavior because that's the standard =) Time to fix it, then. Signed-off-by: Glauber Costa Cc: Johannes Weiner Acked-by: Michal Hocko Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ea1e879b2db..a7165a60d0a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3763,7 +3763,7 @@ move_account: goto try_to_free; cond_resched(); /* "ret" should also be checked to ensure all lists are empty. */ - } while (memcg->res.usage > 0 || ret); + } while (res_counter_read_u64(&memcg->res, RES_USAGE) > 0 || ret); out: css_put(&memcg->css); return ret; @@ -3778,7 +3778,7 @@ try_to_free: lru_add_drain_all(); /* try to free all pages in this cgroup */ shrink = 1; - while (nr_retries && memcg->res.usage > 0) { + while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) { int progress; if (signal_pending(current)) { -- cgit v1.2.3 From 66aebce747eaf9bc456bf1f1b217d8db843031d0 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Thu, 12 Apr 2012 12:49:15 -0700 Subject: hugetlb: fix race condition in hugetlb_fault() The race is as follows: Suppose a multi-threaded task forks a new process (on cpu A), thus bumping up the ref count on all the pages. While the fork is occurring (and thus we have marked all the PTEs as read-only), another thread in the original process (on cpu B) tries to write to a huge page, taking an access violation from the write-protect and calling hugetlb_cow(). Now, suppose the fork() fails. It will undo the COW and decrement the ref count on the pages, so the ref count on the huge page drops back to 1. Meanwhile hugetlb_cow() also decrements the ref count by one on the original page, since the original address space doesn't need it any more, having copied a new page to replace the original page. This leaves the ref count at zero, and when we call unlock_page(), we panic. fork on CPU A fault on CPU B ============= ============== ... down_write(&parent->mmap_sem); down_write_nested(&child->mmap_sem); ... while duplicating vmas if error break; ... up_write(&child->mmap_sem); up_write(&parent->mmap_sem); ... down_read(&parent->mmap_sem); ... lock_page(page); handle COW page_mapcount(old_page) == 2 alloc and prepare new_page ... handle error page_remove_rmap(page); put_page(page); ... fold new_page into pte page_remove_rmap(page); put_page(page); ... oops ==> unlock_page(page); up_read(&parent->mmap_sem); The solution is to take an extra reference to the page while we are holding the lock on it. Signed-off-by: Chris Metcalf Cc: Hillf Danton Cc: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b8ce6f45095..cd65cb19c94 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2791,6 +2791,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, * so no worry about deadlock. */ page = pte_page(entry); + get_page(page); if (page != pagecache_page) lock_page(page); @@ -2822,6 +2823,7 @@ out_page_table_lock: } if (page != pagecache_page) unlock_page(page); + put_page(page); out_mutex: mutex_unlock(&hugetlb_instantiation_mutex); -- cgit v1.2.3 From 41c93088127df2579e8ca64010929ec9e41d5543 Mon Sep 17 00:00:00 2001 From: Ying Han Date: Thu, 12 Apr 2012 12:49:16 -0700 Subject: Revert "mm: vmscan: fix misused nr_reclaimed in shrink_mem_cgroup_zone()" This reverts commit c38446cc65e1f2b3eb8630c53943b94c4f65f670. Before the commit, the code makes senses to me but not after the commit. The "nr_reclaimed" is the number of pages reclaimed by scanning through the memcg's lru lists. The "nr_to_reclaim" is the target value for the whole function. For example, we like to early break the reclaim if reclaimed 32 pages under direct reclaim (not DEF_PRIORITY). After the reverted commit, the target "nr_to_reclaim" is decremented each time by "nr_reclaimed" but we still use it to compare the "nr_reclaimed". It just doesn't make sense to me... Signed-off-by: Ying Han Acked-by: Hugh Dickins Cc: Rik van Riel Cc: Hillf Danton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/vmscan.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'mm') diff --git a/mm/vmscan.c b/mm/vmscan.c index 33c332bbab7..1a518684a32 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2107,12 +2107,7 @@ restart: * with multiple processes reclaiming pages, the total * freeing target can get unreasonably large. */ - if (nr_reclaimed >= nr_to_reclaim) - nr_to_reclaim = 0; - else - nr_to_reclaim -= nr_reclaimed; - - if (!nr_to_reclaim && priority < DEF_PRIORITY) + if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) break; } blk_finish_plug(&plug); -- cgit v1.2.3