blob: 384518e5f6eb0eb8608133760224e0b4131e9462 [file] [log] [blame]
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -07001#include <linux/mm.h>
2#include <linux/mmzone.h>
3#include <linux/bootmem.h>
4#include <linux/bit_spinlock.h>
5#include <linux/page_cgroup.h>
6#include <linux/hash.h>
KAMEZAWA Hiroyuki94b6da52008-10-22 14:15:05 -07007#include <linux/slab.h>
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -07008#include <linux/memory.h>
Paul Mundt4c8210422008-10-22 14:14:58 -07009#include <linux/vmalloc.h>
KAMEZAWA Hiroyuki94b6da52008-10-22 14:15:05 -070010#include <linux/cgroup.h>
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -080011#include <linux/swapops.h>
Catalin Marinas7952f982010-07-19 11:54:14 +010012#include <linux/kmemleak.h>
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070013
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070014static unsigned long total_usage;
15
Thomas Gleixnere6d50c42009-08-19 09:56:42 +020016static void page_cgroup_lock_init(struct page_cgroup *pc, int nr_pages)
17{
18#ifdef CONFIG_PREEMPT_RT_BASE
19 for (; nr_pages; nr_pages--, pc++)
20 spin_lock_init(&pc->pcg_lock);
21#endif
22}
23
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070024#if !defined(CONFIG_SPARSEMEM)
25
26
Al Viro31168482008-11-22 17:33:24 +000027void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070028{
29 pgdat->node_page_cgroup = NULL;
30}
31
32struct page_cgroup *lookup_page_cgroup(struct page *page)
33{
34 unsigned long pfn = page_to_pfn(page);
35 unsigned long offset;
36 struct page_cgroup *base;
37
38 base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
Johannes Weiner00c54c02012-01-12 17:18:40 -080039#ifdef CONFIG_DEBUG_VM
40 /*
41 * The sanity checks the page allocator does upon freeing a
42 * page can reach here before the page_cgroup arrays are
43 * allocated when feeding a range of pages to the allocator
44 * for the first time during bootup or memory hotplug.
45 */
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070046 if (unlikely(!base))
47 return NULL;
Johannes Weiner00c54c02012-01-12 17:18:40 -080048#endif
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070049 offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
50 return base + offset;
51}
52
53static int __init alloc_node_page_cgroup(int nid)
54{
Johannes Weiner6b208e32012-01-12 17:18:18 -080055 struct page_cgroup *base;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070056 unsigned long table_size;
Johannes Weiner6b208e32012-01-12 17:18:18 -080057 unsigned long nr_pages;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070058
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070059 nr_pages = NODE_DATA(nid)->node_spanned_pages;
KAMEZAWA Hiroyuki653d22c2008-12-09 13:14:20 -080060 if (!nr_pages)
61 return 0;
62
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070063 table_size = sizeof(struct page_cgroup) * nr_pages;
KAMEZAWA Hiroyukica371c02009-06-12 10:33:53 +030064
65 base = __alloc_bootmem_node_nopanic(NODE_DATA(nid),
66 table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
67 if (!base)
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070068 return -ENOMEM;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070069 NODE_DATA(nid)->node_page_cgroup = base;
70 total_usage += table_size;
Thomas Gleixnere6d50c42009-08-19 09:56:42 +020071 page_cgroup_lock_init(base, nr_pages);
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070072 return 0;
73}
74
KAMEZAWA Hiroyukica371c02009-06-12 10:33:53 +030075void __init page_cgroup_init_flatmem(void)
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070076{
77
78 int nid, fail;
79
Hirokazu Takahashif8d665422009-01-07 18:08:02 -080080 if (mem_cgroup_disabled())
KAMEZAWA Hiroyuki94b6da52008-10-22 14:15:05 -070081 return;
82
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070083 for_each_online_node(nid) {
84 fail = alloc_node_page_cgroup(nid);
85 if (fail)
86 goto fail;
87 }
88 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
Randy Dunlap8ca739e2009-06-17 16:26:32 -070089 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
90 " don't want memory cgroups\n");
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070091 return;
92fail:
Randy Dunlap8ca739e2009-06-17 16:26:32 -070093 printk(KERN_CRIT "allocation of page_cgroup failed.\n");
94 printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option\n");
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -070095 panic("Out of memory");
96}
97
98#else /* CONFIG_FLAT_NODE_MEM_MAP */
99
100struct page_cgroup *lookup_page_cgroup(struct page *page)
101{
102 unsigned long pfn = page_to_pfn(page);
103 struct mem_section *section = __pfn_to_section(pfn);
Johannes Weiner00c54c02012-01-12 17:18:40 -0800104#ifdef CONFIG_DEBUG_VM
105 /*
106 * The sanity checks the page allocator does upon freeing a
107 * page can reach here before the page_cgroup arrays are
108 * allocated when feeding a range of pages to the allocator
109 * for the first time during bootup or memory hotplug.
110 */
Balbir Singhd69b0422009-06-17 16:26:34 -0700111 if (!section->page_cgroup)
112 return NULL;
Johannes Weiner00c54c02012-01-12 17:18:40 -0800113#endif
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700114 return section->page_cgroup + pfn;
115}
116
Namhyung Kim268433b2011-05-26 16:25:29 -0700117static void *__meminit alloc_page_cgroup(size_t size, int nid)
Michal Hockodde79e02011-03-23 16:42:40 -0700118{
Johannes Weiner6b208e32012-01-12 17:18:18 -0800119 gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
Michal Hockodde79e02011-03-23 16:42:40 -0700120 void *addr = NULL;
121
Steven Rostedtff7ee932011-11-02 13:38:11 -0700122 addr = alloc_pages_exact_nid(nid, size, flags);
123 if (addr) {
124 kmemleak_alloc(addr, size, 1, flags);
Michal Hockodde79e02011-03-23 16:42:40 -0700125 return addr;
Steven Rostedtff7ee932011-11-02 13:38:11 -0700126 }
Michal Hockodde79e02011-03-23 16:42:40 -0700127
128 if (node_state(nid, N_HIGH_MEMORY))
Johannes Weiner6b208e32012-01-12 17:18:18 -0800129 addr = vzalloc_node(size, nid);
Michal Hockodde79e02011-03-23 16:42:40 -0700130 else
Johannes Weiner6b208e32012-01-12 17:18:18 -0800131 addr = vzalloc(size);
Michal Hockodde79e02011-03-23 16:42:40 -0700132
133 return addr;
134}
135
KAMEZAWA Hiroyuki37573e82011-06-15 15:08:42 -0700136static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700137{
Johannes Weiner6b3ae582011-03-23 16:42:30 -0700138 struct mem_section *section;
Johannes Weiner6b208e32012-01-12 17:18:18 -0800139 struct page_cgroup *base;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700140 unsigned long table_size;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700141
Johannes Weiner6b208e32012-01-12 17:18:18 -0800142 section = __pfn_to_section(pfn);
Johannes Weiner6b3ae582011-03-23 16:42:30 -0700143
144 if (section->page_cgroup)
145 return 0;
146
Johannes Weiner6b3ae582011-03-23 16:42:30 -0700147 table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
Michal Hockodde79e02011-03-23 16:42:40 -0700148 base = alloc_page_cgroup(table_size, nid);
149
Johannes Weiner6b3ae582011-03-23 16:42:30 -0700150 /*
151 * The value stored in section->page_cgroup is (base - pfn)
152 * and it does not point to the memory block allocated above,
153 * causing kmemleak false positives.
154 */
155 kmemleak_not_leak(base);
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700156
157 if (!base) {
158 printk(KERN_ERR "page cgroup allocation failure\n");
159 return -ENOMEM;
160 }
161
Thomas Gleixnere6d50c42009-08-19 09:56:42 +0200162 page_cgroup_lock_init(base, PAGES_PER_SECTION);
163
KAMEZAWA Hiroyuki37573e82011-06-15 15:08:42 -0700164 /*
165 * The passed "pfn" may not be aligned to SECTION. For the calculation
166 * we need to apply a mask.
167 */
168 pfn &= PAGE_SECTION_MASK;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700169 section->page_cgroup = base - pfn;
170 total_usage += table_size;
171 return 0;
172}
173#ifdef CONFIG_MEMORY_HOTPLUG
Bob Liu0efc8eb2012-01-12 17:19:08 -0800174static void free_page_cgroup(void *addr)
175{
176 if (is_vmalloc_addr(addr)) {
177 vfree(addr);
178 } else {
179 struct page *page = virt_to_page(addr);
180 size_t table_size =
181 sizeof(struct page_cgroup) * PAGES_PER_SECTION;
182
183 BUG_ON(PageReserved(page));
Wang Nan6c8ad602014-10-29 14:50:18 -0700184 kmemleak_free(addr);
Bob Liu0efc8eb2012-01-12 17:19:08 -0800185 free_pages_exact(addr, table_size);
186 }
187}
188
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700189void __free_page_cgroup(unsigned long pfn)
190{
191 struct mem_section *ms;
192 struct page_cgroup *base;
193
194 ms = __pfn_to_section(pfn);
195 if (!ms || !ms->page_cgroup)
196 return;
197 base = ms->page_cgroup + pfn;
Michal Hockodde79e02011-03-23 16:42:40 -0700198 free_page_cgroup(base);
199 ms->page_cgroup = NULL;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700200}
201
Al Viro31168482008-11-22 17:33:24 +0000202int __meminit online_page_cgroup(unsigned long start_pfn,
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700203 unsigned long nr_pages,
204 int nid)
205{
206 unsigned long start, end, pfn;
207 int fail = 0;
208
Daniel Kiper1bb36fb2011-07-25 17:12:13 -0700209 start = SECTION_ALIGN_DOWN(start_pfn);
210 end = SECTION_ALIGN_UP(start_pfn + nr_pages);
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700211
KAMEZAWA Hiroyuki37573e82011-06-15 15:08:42 -0700212 if (nid == -1) {
213 /*
214 * In this case, "nid" already exists and contains valid memory.
215 * "start_pfn" passed to us is a pfn which is an arg for
216 * online__pages(), and start_pfn should exist.
217 */
218 nid = pfn_to_nid(start_pfn);
219 VM_BUG_ON(!node_state(nid, N_ONLINE));
220 }
221
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700222 for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
223 if (!pfn_present(pfn))
224 continue;
KAMEZAWA Hiroyuki37573e82011-06-15 15:08:42 -0700225 fail = init_section_page_cgroup(pfn, nid);
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700226 }
227 if (!fail)
228 return 0;
229
230 /* rollback */
231 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
232 __free_page_cgroup(pfn);
233
234 return -ENOMEM;
235}
236
Al Viro31168482008-11-22 17:33:24 +0000237int __meminit offline_page_cgroup(unsigned long start_pfn,
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700238 unsigned long nr_pages, int nid)
239{
240 unsigned long start, end, pfn;
241
Daniel Kiper1bb36fb2011-07-25 17:12:13 -0700242 start = SECTION_ALIGN_DOWN(start_pfn);
243 end = SECTION_ALIGN_UP(start_pfn + nr_pages);
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700244
245 for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
246 __free_page_cgroup(pfn);
247 return 0;
248
249}
250
Al Viro31168482008-11-22 17:33:24 +0000251static int __meminit page_cgroup_callback(struct notifier_block *self,
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700252 unsigned long action, void *arg)
253{
254 struct memory_notify *mn = arg;
255 int ret = 0;
256 switch (action) {
257 case MEM_GOING_ONLINE:
258 ret = online_page_cgroup(mn->start_pfn,
259 mn->nr_pages, mn->status_change_nid);
260 break;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700261 case MEM_OFFLINE:
262 offline_page_cgroup(mn->start_pfn,
263 mn->nr_pages, mn->status_change_nid);
264 break;
KAMEZAWA Hiroyukidc19f9d2008-12-01 13:13:48 -0800265 case MEM_CANCEL_ONLINE:
Wen Congyang7c72eb32012-12-11 16:00:49 -0800266 offline_page_cgroup(mn->start_pfn,
267 mn->nr_pages, mn->status_change_nid);
268 break;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700269 case MEM_GOING_OFFLINE:
270 break;
271 case MEM_ONLINE:
272 case MEM_CANCEL_OFFLINE:
273 break;
274 }
KAMEZAWA Hiroyukidc19f9d2008-12-01 13:13:48 -0800275
Prarit Bhargava5fda1bd2011-03-22 16:30:49 -0700276 return notifier_from_errno(ret);
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700277}
278
279#endif
280
281void __init page_cgroup_init(void)
282{
283 unsigned long pfn;
KAMEZAWA Hiroyuki37573e82011-06-15 15:08:42 -0700284 int nid;
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700285
Hirokazu Takahashif8d665422009-01-07 18:08:02 -0800286 if (mem_cgroup_disabled())
KAMEZAWA Hiroyuki94b6da52008-10-22 14:15:05 -0700287 return;
288
Lai Jiangshan31aaea42012-12-12 13:51:27 -0800289 for_each_node_state(nid, N_MEMORY) {
KAMEZAWA Hiroyuki37573e82011-06-15 15:08:42 -0700290 unsigned long start_pfn, end_pfn;
291
292 start_pfn = node_start_pfn(nid);
293 end_pfn = node_end_pfn(nid);
294 /*
295 * start_pfn and end_pfn may not be aligned to SECTION and the
296 * page->flags of out of node pages are not initialized. So we
297 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
298 */
299 for (pfn = start_pfn;
300 pfn < end_pfn;
301 pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
302
303 if (!pfn_valid(pfn))
304 continue;
305 /*
306 * Nodes's pfns can be overlapping.
307 * We know some arch can have a nodes layout such as
308 * -------------pfn-------------->
309 * N0 | N1 | N2 | N0 | N1 | N2|....
310 */
311 if (pfn_to_nid(pfn) != nid)
312 continue;
313 if (init_section_page_cgroup(pfn, nid))
314 goto oom;
315 }
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700316 }
KAMEZAWA Hiroyuki37573e82011-06-15 15:08:42 -0700317 hotplug_memory_notifier(page_cgroup_callback, 0);
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700318 printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
KAMEZAWA Hiroyuki37573e82011-06-15 15:08:42 -0700319 printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
320 "don't want memory cgroups\n");
321 return;
322oom:
323 printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
324 panic("Out of memory");
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700325}
326
Al Viro31168482008-11-22 17:33:24 +0000327void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
KAMEZAWA Hiroyuki52d4b9a2008-10-18 20:28:16 -0700328{
329 return;
330}
331
332#endif
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800333
334
Andrew Mortonc255a452012-07-31 16:43:02 -0700335#ifdef CONFIG_MEMCG_SWAP
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800336
337static DEFINE_MUTEX(swap_cgroup_mutex);
338struct swap_cgroup_ctrl {
339 struct page **map;
340 unsigned long length;
KAMEZAWA Hiroyukie9e58a42010-03-15 00:34:57 -0400341 spinlock_t lock;
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800342};
343
H Hartley Sweeten61600f52011-11-02 13:38:36 -0700344static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800345
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800346struct swap_cgroup {
KAMEZAWA Hiroyukia3b2d692009-04-02 16:57:45 -0700347 unsigned short id;
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800348};
349#define SC_PER_PAGE (PAGE_SIZE/sizeof(struct swap_cgroup))
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800350
351/*
352 * SwapCgroup implements "lookup" and "exchange" operations.
353 * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
354 * against SwapCache. At swap_free(), this is accessed directly from swap.
355 *
356 * This means,
357 * - we have no race in "exchange" when we're accessed via SwapCache because
358 * SwapCache(and its swp_entry) is under lock.
359 * - When called via swap_free(), there is no user of this entry and no race.
360 * Then, we don't need lock around "exchange".
361 *
362 * TODO: we can push these buffers out to HIGHMEM.
363 */
364
365/*
366 * allocate buffer for swap_cgroup.
367 */
368static int swap_cgroup_prepare(int type)
369{
370 struct page *page;
371 struct swap_cgroup_ctrl *ctrl;
372 unsigned long idx, max;
373
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800374 ctrl = &swap_cgroup_ctrl[type];
375
376 for (idx = 0; idx < ctrl->length; idx++) {
377 page = alloc_page(GFP_KERNEL | __GFP_ZERO);
378 if (!page)
379 goto not_enough_page;
380 ctrl->map[idx] = page;
381 }
382 return 0;
383not_enough_page:
384 max = idx;
385 for (idx = 0; idx < max; idx++)
386 __free_page(ctrl->map[idx]);
387
388 return -ENOMEM;
389}
390
Bob Liu9fb4b7c2012-01-12 17:18:48 -0800391static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
392 struct swap_cgroup_ctrl **ctrlp)
393{
394 pgoff_t offset = swp_offset(ent);
395 struct swap_cgroup_ctrl *ctrl;
396 struct page *mappage;
Hugh Dickinsc09ff082012-03-05 20:52:55 -0800397 struct swap_cgroup *sc;
Bob Liu9fb4b7c2012-01-12 17:18:48 -0800398
399 ctrl = &swap_cgroup_ctrl[swp_type(ent)];
400 if (ctrlp)
401 *ctrlp = ctrl;
402
403 mappage = ctrl->map[offset / SC_PER_PAGE];
Hugh Dickinsc09ff082012-03-05 20:52:55 -0800404 sc = page_address(mappage);
405 return sc + offset % SC_PER_PAGE;
Bob Liu9fb4b7c2012-01-12 17:18:48 -0800406}
407
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800408/**
Daisuke Nishimura02491442010-03-10 15:22:17 -0800409 * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
Wanpeng Lidad75572012-06-20 12:53:01 -0700410 * @ent: swap entry to be cmpxchged
Daisuke Nishimura02491442010-03-10 15:22:17 -0800411 * @old: old id
412 * @new: new id
413 *
414 * Returns old id at success, 0 at failure.
Lucas De Marchi25985ed2011-03-30 22:57:33 -0300415 * (There is no mem_cgroup using 0 as its id)
Daisuke Nishimura02491442010-03-10 15:22:17 -0800416 */
417unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
418 unsigned short old, unsigned short new)
419{
Daisuke Nishimura02491442010-03-10 15:22:17 -0800420 struct swap_cgroup_ctrl *ctrl;
Daisuke Nishimura02491442010-03-10 15:22:17 -0800421 struct swap_cgroup *sc;
KAMEZAWA Hiroyukie9e58a42010-03-15 00:34:57 -0400422 unsigned long flags;
423 unsigned short retval;
Daisuke Nishimura02491442010-03-10 15:22:17 -0800424
Bob Liu9fb4b7c2012-01-12 17:18:48 -0800425 sc = lookup_swap_cgroup(ent, &ctrl);
Daisuke Nishimura02491442010-03-10 15:22:17 -0800426
KAMEZAWA Hiroyukie9e58a42010-03-15 00:34:57 -0400427 spin_lock_irqsave(&ctrl->lock, flags);
428 retval = sc->id;
429 if (retval == old)
430 sc->id = new;
Daisuke Nishimura02491442010-03-10 15:22:17 -0800431 else
KAMEZAWA Hiroyukie9e58a42010-03-15 00:34:57 -0400432 retval = 0;
433 spin_unlock_irqrestore(&ctrl->lock, flags);
434 return retval;
Daisuke Nishimura02491442010-03-10 15:22:17 -0800435}
436
437/**
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800438 * swap_cgroup_record - record mem_cgroup for this swp_entry.
439 * @ent: swap entry to be recorded into
Wanpeng Lidad75572012-06-20 12:53:01 -0700440 * @id: mem_cgroup to be recorded
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800441 *
KAMEZAWA Hiroyukia3b2d692009-04-02 16:57:45 -0700442 * Returns old value at success, 0 at failure.
443 * (Of course, old value can be 0.)
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800444 */
KAMEZAWA Hiroyukia3b2d692009-04-02 16:57:45 -0700445unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800446{
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800447 struct swap_cgroup_ctrl *ctrl;
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800448 struct swap_cgroup *sc;
KAMEZAWA Hiroyukia3b2d692009-04-02 16:57:45 -0700449 unsigned short old;
KAMEZAWA Hiroyukie9e58a42010-03-15 00:34:57 -0400450 unsigned long flags;
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800451
Bob Liu9fb4b7c2012-01-12 17:18:48 -0800452 sc = lookup_swap_cgroup(ent, &ctrl);
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800453
KAMEZAWA Hiroyukie9e58a42010-03-15 00:34:57 -0400454 spin_lock_irqsave(&ctrl->lock, flags);
455 old = sc->id;
456 sc->id = id;
457 spin_unlock_irqrestore(&ctrl->lock, flags);
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800458
459 return old;
460}
461
462/**
Bob Liu9fb4b7c2012-01-12 17:18:48 -0800463 * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800464 * @ent: swap entry to be looked up.
465 *
KAMEZAWA Hiroyukia3b2d692009-04-02 16:57:45 -0700466 * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800467 */
Bob Liu9fb4b7c2012-01-12 17:18:48 -0800468unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800469{
Bob Liu9fb4b7c2012-01-12 17:18:48 -0800470 return lookup_swap_cgroup(ent, NULL)->id;
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800471}
472
473int swap_cgroup_swapon(int type, unsigned long max_pages)
474{
475 void *array;
476 unsigned long array_size;
477 unsigned long length;
478 struct swap_cgroup_ctrl *ctrl;
479
480 if (!do_swap_account)
481 return 0;
482
Namhyung Kim33278f72011-05-26 16:25:30 -0700483 length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800484 array_size = length * sizeof(void *);
485
Joe Perches8c1fec12011-05-28 10:36:34 -0700486 array = vzalloc(array_size);
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800487 if (!array)
488 goto nomem;
489
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800490 ctrl = &swap_cgroup_ctrl[type];
491 mutex_lock(&swap_cgroup_mutex);
492 ctrl->length = length;
493 ctrl->map = array;
KAMEZAWA Hiroyukie9e58a42010-03-15 00:34:57 -0400494 spin_lock_init(&ctrl->lock);
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800495 if (swap_cgroup_prepare(type)) {
496 /* memory shortage */
497 ctrl->map = NULL;
498 ctrl->length = 0;
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800499 mutex_unlock(&swap_cgroup_mutex);
Namhyung Kim6a5b18d2011-05-26 16:25:31 -0700500 vfree(array);
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800501 goto nomem;
502 }
503 mutex_unlock(&swap_cgroup_mutex);
504
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800505 return 0;
506nomem:
507 printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
508 printk(KERN_INFO
WANG Cong00a66d22011-07-25 17:12:12 -0700509 "swap_cgroup can be disabled by swapaccount=0 boot option\n");
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800510 return -ENOMEM;
511}
512
513void swap_cgroup_swapoff(int type)
514{
Namhyung Kim6a5b18d2011-05-26 16:25:31 -0700515 struct page **map;
516 unsigned long i, length;
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800517 struct swap_cgroup_ctrl *ctrl;
518
519 if (!do_swap_account)
520 return;
521
522 mutex_lock(&swap_cgroup_mutex);
523 ctrl = &swap_cgroup_ctrl[type];
Namhyung Kim6a5b18d2011-05-26 16:25:31 -0700524 map = ctrl->map;
525 length = ctrl->length;
526 ctrl->map = NULL;
527 ctrl->length = 0;
528 mutex_unlock(&swap_cgroup_mutex);
529
530 if (map) {
531 for (i = 0; i < length; i++) {
532 struct page *page = map[i];
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800533 if (page)
534 __free_page(page);
535 }
Namhyung Kim6a5b18d2011-05-26 16:25:31 -0700536 vfree(map);
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800537 }
KAMEZAWA Hiroyuki27a7faa2009-01-07 18:07:58 -0800538}
539
540#endif