From 98f32602d42951e61a059685f842aa7d778ffab0 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 21 May 2009 20:33:58 +0100 Subject: hugh: update email address My old address will shut down in a few days time: remove it from the tree, and add a tmpfs (shmem filesystem) maintainer entry with the new address. Signed-off-by: Hugh Dickins Signed-off-by: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/rmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/rmap.c b/mm/rmap.c index 16521664010d..23122af32611 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -14,7 +14,7 @@ * Original design by Rik van Riel 2001 * File methods by Dave McCracken 2003, 2004 * Anonymous methods by Andrea Arcangeli 2004 - * Contributions by Hugh Dickins 2003, 2004 + * Contributions by Hugh Dickins 2003, 2004 */ /* -- cgit v1.2.3 From 6d2661ede5f20f968422e790af3334908c3bc857 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 28 May 2009 14:34:19 -0700 Subject: oom: fix possible oom_dump_tasks NULL pointer When /proc/sys/vm/oom_dump_tasks is enabled, it is possible to get a NULL pointer for tasks that have detached mm's since task_lock() is not held during the tasklist scan. Add the task_lock(). Acked-by: Nick Piggin Acked-by: Mel Gorman Cc: Rik van Riel Signed-off-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/oom_kill.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'mm') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 92bcf1db16b2..a7b2460e922b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -284,22 +284,28 @@ static void dump_tasks(const struct mem_cgroup *mem) printk(KERN_INFO "[ pid ] uid tgid total_vm rss cpu oom_adj " "name\n"); do_each_thread(g, p) { - /* - * total_vm and rss sizes do not exist for tasks with a - * detached mm so there's no need to report them. - */ - if (!p->mm) - continue; + struct mm_struct *mm; + if (mem && !task_in_mem_cgroup(p, mem)) continue; if (!thread_group_leader(p)) continue; task_lock(p); + mm = p->mm; + if (!mm) { + /* + * total_vm and rss sizes do not exist for tasks with no + * mm so there's no need to report them; they can't be + * oom killed anyway. + */ + task_unlock(p); + continue; + } printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, __task_cred(p)->uid, p->tgid, - p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), - p->oomkilladj, p->comm); + p->pid, __task_cred(p)->uid, p->tgid, mm->total_vm, + get_mm_rss(mm), (int)task_cpu(p), p->oomkilladj, + p->comm); task_unlock(p); } while_each_thread(g, p); } -- cgit v1.2.3 From e767e0561d7fd2333df1921f1ab4176211f9036b Mon Sep 17 00:00:00 2001 From: Daisuke Nishimura Date: Thu, 28 May 2009 14:34:28 -0700 Subject: memcg: fix deadlock between lock_page_cgroup and mapping tree_lock mapping->tree_lock can be acquired from interrupt context. Then, following dead lock can occur. Assume "A" as a page. CPU0: lock_page_cgroup(A) interrupted -> take mapping->tree_lock. CPU1: take mapping->tree_lock -> lock_page_cgroup(A) This patch tries to fix above deadlock by moving memcg's hook to out of mapping->tree_lock. charge/uncharge of pagecache/swapcache is protected by page lock, not tree_lock. After this patch, lock_page_cgroup() is not called under mapping->tree_lock. Signed-off-by: KAMEZAWA Hiroyuki Signed-off-by: Daisuke Nishimura Cc: Balbir Singh Cc: Daisuke Nishimura Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/filemap.c | 6 +++--- mm/memcontrol.c | 4 +++- mm/swap_state.c | 4 +--- mm/truncate.c | 1 + mm/vmscan.c | 2 ++ 5 files changed, 10 insertions(+), 7 deletions(-) (limited to 'mm') diff --git a/mm/filemap.c b/mm/filemap.c index 379ff0bcbf6e..1b60f30cebfa 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -121,7 +121,6 @@ void __remove_from_page_cache(struct page *page) mapping->nrpages--; __dec_zone_page_state(page, NR_FILE_PAGES); BUG_ON(page_mapped(page)); - mem_cgroup_uncharge_cache_page(page); /* * Some filesystems seem to re-dirty the page even after @@ -145,6 +144,7 @@ void remove_from_page_cache(struct page *page) spin_lock_irq(&mapping->tree_lock); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); } static int sync_page(void *word) @@ -476,13 +476,13 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping, if (likely(!error)) { mapping->nrpages++; __inc_zone_page_state(page, NR_FILE_PAGES); + spin_unlock_irq(&mapping->tree_lock); } else { page->mapping = NULL; + spin_unlock_irq(&mapping->tree_lock); mem_cgroup_uncharge_cache_page(page); page_cache_release(page); } - - spin_unlock_irq(&mapping->tree_lock); radix_tree_preload_end(); } else mem_cgroup_uncharge_cache_page(page); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 01c2d8f14685..4a747a27a22f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1488,8 +1488,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page) __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE); } +#ifdef CONFIG_SWAP /* - * called from __delete_from_swap_cache() and drop "page" account. + * called after __delete_from_swap_cache() and drop "page" account. * memcg information is recorded to swap_cgroup of "ent" */ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) @@ -1506,6 +1507,7 @@ void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent) if (memcg) css_put(&memcg->css); } +#endif #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP /* diff --git a/mm/swap_state.c b/mm/swap_state.c index 3ecea98ecb45..1416e7e9e02d 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -109,8 +109,6 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask) */ void __delete_from_swap_cache(struct page *page) { - swp_entry_t ent = {.val = page_private(page)}; - VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageSwapCache(page)); VM_BUG_ON(PageWriteback(page)); @@ -121,7 +119,6 @@ void __delete_from_swap_cache(struct page *page) total_swapcache_pages--; __dec_zone_page_state(page, NR_FILE_PAGES); INC_CACHE_INFO(del_total); - mem_cgroup_uncharge_swapcache(page, ent); } /** @@ -191,6 +188,7 @@ void delete_from_swap_cache(struct page *page) __delete_from_swap_cache(page); spin_unlock_irq(&swapper_space.tree_lock); + mem_cgroup_uncharge_swapcache(page, entry); swap_free(entry); page_cache_release(page); } diff --git a/mm/truncate.c b/mm/truncate.c index 55206fab7b99..12e1579f9165 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -359,6 +359,7 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) BUG_ON(page_has_private(page)); __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); page_cache_release(page); /* pagecache ref */ return 1; failed: diff --git a/mm/vmscan.c b/mm/vmscan.c index 5fa3eda1f03f..d254306562cd 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -470,10 +470,12 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) swp_entry_t swap = { .val = page_private(page) }; __delete_from_swap_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_swapcache(page, swap); swap_free(swap); } else { __remove_from_page_cache(page); spin_unlock_irq(&mapping->tree_lock); + mem_cgroup_uncharge_cache_page(page); } return 1; -- cgit v1.2.3 From f83a275dbc5ca1721143698e844243fcadfabf6a Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Thu, 28 May 2009 14:34:40 -0700 Subject: mm: account for MAP_SHARED mappings using VM_MAYSHARE and not VM_SHARED in hugetlbfs Addresses http://bugzilla.kernel.org/show_bug.cgi?id=13302 hugetlbfs reserves huge pages but does not fault them at mmap() time to ensure that future faults succeed. The reservation behaviour differs depending on whether the mapping was mapped MAP_SHARED or MAP_PRIVATE. For MAP_SHARED mappings, hugepages are reserved when mmap() is first called and are tracked based on information associated with the inode. Other processes mapping MAP_SHARED use the same reservation. MAP_PRIVATE track the reservations based on the VMA created as part of the mmap() operation. Each process mapping MAP_PRIVATE must make its own reservation. hugetlbfs currently checks if a VMA is MAP_SHARED with the VM_SHARED flag and not VM_MAYSHARE. For file-backed mappings, such as hugetlbfs, VM_SHARED is set only if the mapping is MAP_SHARED and the file was opened read-write. If a shared memory mapping was mapped shared-read-write for populating of data and mapped shared-read-only by other processes, then hugetlbfs would account for the mapping as if it was MAP_PRIVATE. This causes processes to fail to map the file MAP_SHARED even though it should succeed as the reservation is there. This patch alters mm/hugetlb.c and replaces VM_SHARED with VM_MAYSHARE when the intent of the code was to check whether the VMA was mapped MAP_SHARED or MAP_PRIVATE. Signed-off-by: Mel Gorman Cc: Hugh Dickins Cc: Ingo Molnar Cc: Cc: Lee Schermerhorn Cc: KOSAKI Motohiro Cc: Cc: Eric B Munson Cc: Adam Litke Cc: Andy Whitcroft Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/hugetlb.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'mm') diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 28c655ba9353..e83ad2c9228c 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -316,7 +316,7 @@ static void resv_map_release(struct kref *ref) static struct resv_map *vma_resv_map(struct vm_area_struct *vma) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - if (!(vma->vm_flags & VM_SHARED)) + if (!(vma->vm_flags & VM_MAYSHARE)) return (struct resv_map *)(get_vma_private_data(vma) & ~HPAGE_RESV_MASK); return NULL; @@ -325,7 +325,7 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma) static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - VM_BUG_ON(vma->vm_flags & VM_SHARED); + VM_BUG_ON(vma->vm_flags & VM_MAYSHARE); set_vma_private_data(vma, (get_vma_private_data(vma) & HPAGE_RESV_MASK) | (unsigned long)map); @@ -334,7 +334,7 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map) static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - VM_BUG_ON(vma->vm_flags & VM_SHARED); + VM_BUG_ON(vma->vm_flags & VM_MAYSHARE); set_vma_private_data(vma, get_vma_private_data(vma) | flags); } @@ -353,7 +353,7 @@ static void decrement_hugepage_resv_vma(struct hstate *h, if (vma->vm_flags & VM_NORESERVE) return; - if (vma->vm_flags & VM_SHARED) { + if (vma->vm_flags & VM_MAYSHARE) { /* Shared mappings always use reserves */ h->resv_huge_pages--; } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) { @@ -369,14 +369,14 @@ static void decrement_hugepage_resv_vma(struct hstate *h, void reset_vma_resv_huge_pages(struct vm_area_struct *vma) { VM_BUG_ON(!is_vm_hugetlb_page(vma)); - if (!(vma->vm_flags & VM_SHARED)) + if (!(vma->vm_flags & VM_MAYSHARE)) vma->vm_private_data = (void *)0; } /* Returns true if the VMA has associated reserve pages */ static int vma_has_reserves(struct vm_area_struct *vma) { - if (vma->vm_flags & VM_SHARED) + if (vma->vm_flags & VM_MAYSHARE) return 1; if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) return 1; @@ -924,7 +924,7 @@ static long vma_needs_reservation(struct hstate *h, struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; - if (vma->vm_flags & VM_SHARED) { + if (vma->vm_flags & VM_MAYSHARE) { pgoff_t idx = vma_hugecache_offset(h, vma, addr); return region_chg(&inode->i_mapping->private_list, idx, idx + 1); @@ -949,7 +949,7 @@ static void vma_commit_reservation(struct hstate *h, struct address_space *mapping = vma->vm_file->f_mapping; struct inode *inode = mapping->host; - if (vma->vm_flags & VM_SHARED) { + if (vma->vm_flags & VM_MAYSHARE) { pgoff_t idx = vma_hugecache_offset(h, vma, addr); region_add(&inode->i_mapping->private_list, idx, idx + 1); @@ -1893,7 +1893,7 @@ retry_avoidcopy: * at the time of fork() could consume its reserves on COW instead * of the full address range. */ - if (!(vma->vm_flags & VM_SHARED) && + if (!(vma->vm_flags & VM_MAYSHARE) && is_vma_resv_set(vma, HPAGE_RESV_OWNER) && old_page != pagecache_page) outside_reserve = 1; @@ -2000,7 +2000,7 @@ retry: clear_huge_page(page, address, huge_page_size(h)); __SetPageUptodate(page); - if (vma->vm_flags & VM_SHARED) { + if (vma->vm_flags & VM_MAYSHARE) { int err; struct inode *inode = mapping->host; @@ -2104,7 +2104,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, goto out_mutex; } - if (!(vma->vm_flags & VM_SHARED)) + if (!(vma->vm_flags & VM_MAYSHARE)) pagecache_page = hugetlbfs_pagecache_page(h, vma, address); } @@ -2289,7 +2289,7 @@ int hugetlb_reserve_pages(struct inode *inode, * to reserve the full area even if read-only as mprotect() may be * called to make the mapping read-write. Assume !vma is a shm mapping */ - if (!vma || vma->vm_flags & VM_SHARED) + if (!vma || vma->vm_flags & VM_MAYSHARE) chg = region_chg(&inode->i_mapping->private_list, from, to); else { struct resv_map *resv_map = resv_map_alloc(); @@ -2330,7 +2330,7 @@ int hugetlb_reserve_pages(struct inode *inode, * consumed reservations are stored in the map. Hence, nothing * else has to be done for private mappings here */ - if (!vma || vma->vm_flags & VM_SHARED) + if (!vma || vma->vm_flags & VM_MAYSHARE) region_add(&inode->i_mapping->private_list, from, to); return 0; } -- cgit v1.2.3 From 46f7e602fb32e02145ef14f8c0ca6d399f0a96b9 Mon Sep 17 00:00:00 2001 From: Nikanth Karthikesan Date: Thu, 28 May 2009 14:34:41 -0700 Subject: memcg: fix build warning and avoid checking for mem != null again and again Fix build warning, "mem_cgroup_is_obsolete defined but not used" when CONFIG_DEBUG_VM is not set. Also avoid checking for !mem again and again. Signed-off-by: Nikanth Karthikesan Acked-by: Pekka Enberg Acked-by: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memcontrol.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'mm') diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4a747a27a22f..78eb8552818b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -314,14 +314,6 @@ static struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) return mem; } -static bool mem_cgroup_is_obsolete(struct mem_cgroup *mem) -{ - if (!mem) - return true; - return css_is_removed(&mem->css); -} - - /* * Call callback function against all cgroup under hierarchy tree. */ @@ -932,7 +924,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, if (unlikely(!mem)) return 0; - VM_BUG_ON(!mem || mem_cgroup_is_obsolete(mem)); + VM_BUG_ON(css_is_removed(&mem->css)); while (1) { int ret; -- cgit v1.2.3