diff options
authorMichal Hocko <mhocko@suse.cz>2014-05-22 11:54:19 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2014-05-23 09:37:29 -0700
commit6f6acb00514c10be35529402f36ad7a288f08c2e (patch)
parent55231e5c898c5c03c14194001e349f40f59bd300 (diff)
memcg: fix swapcache charge from kernel thread context
Commit 284f39afeaa4 ("mm: memcg: push !mm handling out to page cache charge function") explicitly checks for page cache charges without any mm context (from kernel thread context[1]). This seemed to be the only possible case where memory could be charged without mm context so commit 03583f1a631c ("memcg: remove unnecessary !mm check from try_get_mem_cgroup_from_mm()") removed the mm check from get_mem_cgroup_from_mm(). This however caused another NULL ptr dereference during early boot when loopback kernel thread splices to tmpfs as reported by Stephan Kulow: BUG: unable to handle kernel NULL pointer dereference at 0000000000000360 IP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60 Oops: 0000 [#1] SMP Modules linked in: btrfs dm_multipath dm_mod scsi_dh multipath raid10 raid456 async_raid6_recov async_memcpy async_pq raid6_pq async_xor xor async_tx raid1 raid0 md_mod parport_pc parport nls_utf8 isofs usb_storage iscsi_ibft iscsi_boot_sysfs arc4 ecb fan thermal nfs lockd fscache nls_iso8859_1 nls_cp437 sg st hid_generic usbhid af_packet sunrpc sr_mod cdrom ata_generic uhci_hcd virtio_net virtio_blk ehci_hcd usbcore ata_piix floppy processor button usb_common virtio_pci virtio_ring virtio edd squashfs loop ppa] CPU: 0 PID: 97 Comm: loop1 Not tainted 3.15.0-rc5-5-default #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Call Trace: __mem_cgroup_try_charge_swapin+0x40/0xe0 mem_cgroup_charge_file+0x8b/0xd0 shmem_getpage_gfp+0x66b/0x7b0 shmem_file_splice_read+0x18f/0x430 splice_direct_to_actor+0xa2/0x1c0 do_lo_receive+0x5a/0x60 [loop] loop_thread+0x298/0x720 [loop] kthread+0xc6/0xe0 ret_from_fork+0x7c/0xb0 Also Branimir Maksimovic reported the following oops which is tiggered for the swapcache charge path from the accounting code for kernel threads: CPU: 1 PID: 160 Comm: kworker/u8:5 Tainted: P OE 3.15.0-rc5-core2-custom #159 Hardware name: System manufacturer System Product Name/MAXIMUSV GENE, BIOS 1903 08/19/2013 task: ffff880404e349b0 ti: ffff88040486a000 task.ti: ffff88040486a000 RIP: get_mem_cgroup_from_mm.isra.42+0x2b/0x60 Call Trace: __mem_cgroup_try_charge_swapin+0x45/0xf0 mem_cgroup_charge_file+0x9c/0xe0 shmem_getpage_gfp+0x62c/0x770 shmem_write_begin+0x38/0x40 generic_perform_write+0xc5/0x1c0 __generic_file_aio_write+0x1d1/0x3f0 generic_file_aio_write+0x4f/0xc0 do_sync_write+0x5a/0x90 do_acct_process+0x4b1/0x550 acct_process+0x6d/0xa0 do_exit+0x827/0xa70 kthread+0xc3/0xf0 This patch fixes the issue by reintroducing mm check into get_mem_cgroup_from_mm. We could do the same trick in __mem_cgroup_try_charge_swapin as we do for the regular page cache path but it is not worth troubles. The check is not that expensive and it is better to have get_mem_cgroup_from_mm more robust. [1] - http://marc.info/?l=linux-mm&m=139463617808941&w=2 Fixes: 03583f1a631c ("memcg: remove unnecessary !mm check from try_get_mem_cgroup_from_mm()") Reported-and-tested-by: Stephan Kulow <coolo@suse.com> Reported-by: Branimir Maksimovic <branimir.maksimovic@gmail.com> Signed-off-by: Michal Hocko <mhocko@suse.cz> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
1 files changed, 14 insertions, 13 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c47dffdcb246..5177c6d4a2dd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1077,9 +1077,18 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
do {
- memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
- if (unlikely(!memcg))
+ /*
+ * Page cache insertions can happen withou an
+ * actual mm context, e.g. during disk probing
+ * on boot, loopback IO, acct() writes etc.
+ */
+ if (unlikely(!mm))
memcg = root_mem_cgroup;
+ else {
+ memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
+ if (unlikely(!memcg))
+ memcg = root_mem_cgroup;
+ }
} while (!css_tryget(&memcg->css));
return memcg;
@@ -3958,17 +3967,9 @@ int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
return 0;
- /*
- * Page cache insertions can happen without an actual mm
- * context, e.g. during disk probing on boot.
- */
- if (unlikely(!mm))
- memcg = root_mem_cgroup;
- else {
- memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
- if (!memcg)
- return -ENOMEM;
- }
+ memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
+ if (!memcg)
+ return -ENOMEM;
__mem_cgroup_commit_charge(memcg, page, 1, type, false);
return 0;