aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/cgroups/memory.txt56
-rw-r--r--mm/memcontrol.c97
2 files changed, 147 insertions, 6 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index b871f2552b4..e726fb0df71 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -262,10 +262,12 @@ some of the pages cached in the cgroup (page cache pages).
4.2 Task migration
When a task migrates from one cgroup to another, it's charge is not
-carried forward. The pages allocated from the original cgroup still
+carried forward by default. The pages allocated from the original cgroup still
remain charged to it, the charge is dropped when the page is freed or
reclaimed.
+Note: You can move charges of a task along with task migration. See 8.
+
4.3 Removing a cgroup
A cgroup can be removed by rmdir, but as discussed in sections 4.1 and 4.2, a
@@ -414,7 +416,57 @@ NOTE1: Soft limits take effect over a long period of time, since they involve
NOTE2: It is recommended to set the soft limit always below the hard limit,
otherwise the hard limit will take precedence.
-8. TODO
+8. Move charges at task migration
+
+Users can move charges associated with a task along with task migration, that
+is, uncharge task's pages from the old cgroup and charge them to the new cgroup.
+
+8.1 Interface
+
+This feature is disabled by default. It can be enabled(and disabled again) by
+writing to memory.move_charge_at_immigrate of the destination cgroup.
+
+If you want to enable it:
+
+# echo (some positive value) > memory.move_charge_at_immigrate
+
+Note: Each bits of move_charge_at_immigrate has its own meaning about what type
+ of charges should be moved. See 8.2 for details.
+Note: Charges are moved only when you move mm->owner, IOW, a leader of a thread
+ group.
+Note: If we cannot find enough space for the task in the destination cgroup, we
+ try to make space by reclaiming memory. Task migration may fail if we
+ cannot make enough space.
+Note: It can take several seconds if you move charges in giga bytes order.
+
+And if you want disable it again:
+
+# echo 0 > memory.move_charge_at_immigrate
+
+8.2 Type of charges which can be move
+
+Each bits of move_charge_at_immigrate has its own meaning about what type of
+charges should be moved.
+
+ bit | what type of charges would be moved ?
+ -----+------------------------------------------------------------------------
+ 0 | A charge of an anonymous page(or swap of it) used by the target task.
+ | Those pages and swaps must be used only by the target task. You must
+ | enable Swap Extension(see 2.4) to enable move of swap charges.
+
+Note: Those pages and swaps must be charged to the old cgroup.
+Note: More type of pages(e.g. file cache, shmem,) will be supported by other
+ bits in future.
+
+8.3 TODO
+
+- Add support for other types of pages(e.g. file cache, shmem, etc.).
+- Implement madvise(2) to let users decide the vma to be moved or not to be
+ moved.
+- All of moving charge operations are done under cgroup_mutex. It's not good
+ behavior to hold the mutex too long, so we may need some trick.
+
+9. TODO
1. Add support for accounting huge pages (as a separate controller)
2. Make per-cgroup scanner reclaim not-shared pages first
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d813823ab08..59ffaf511d7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -226,11 +226,26 @@ struct mem_cgroup {
bool memsw_is_minimum;
/*
+ * Should we move charges of a task when a task is moved into this
+ * mem_cgroup ? And what type of charges should we move ?
+ */
+ unsigned long move_charge_at_immigrate;
+
+ /*
* statistics. This must be placed at the end of memcg.
*/
struct mem_cgroup_stat stat;
};
+/* Stuffs for move charges at task migration. */
+/*
+ * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a
+ * left-shifted bitmap of these types.
+ */
+enum move_type {
+ NR_MOVE_TYPE,
+};
+
/*
* Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
* limit reclaim to prevent infinite loops, if they ever occur.
@@ -2865,6 +2880,31 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
return 0;
}
+static u64 mem_cgroup_move_charge_read(struct cgroup *cgrp,
+ struct cftype *cft)
+{
+ return mem_cgroup_from_cont(cgrp)->move_charge_at_immigrate;
+}
+
+static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
+ struct cftype *cft, u64 val)
+{
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cgrp);
+
+ if (val >= (1 << NR_MOVE_TYPE))
+ return -EINVAL;
+ /*
+ * We check this value several times in both in can_attach() and
+ * attach(), so we need cgroup lock to prevent this value from being
+ * inconsistent.
+ */
+ cgroup_lock();
+ mem->move_charge_at_immigrate = val;
+ cgroup_unlock();
+
+ return 0;
+}
+
/* For read statistics */
enum {
@@ -3098,6 +3138,11 @@ static struct cftype mem_cgroup_files[] = {
.read_u64 = mem_cgroup_swappiness_read,
.write_u64 = mem_cgroup_swappiness_write,
},
+ {
+ .name = "move_charge_at_immigrate",
+ .read_u64 = mem_cgroup_move_charge_read,
+ .write_u64 = mem_cgroup_move_charge_write,
+ },
};
#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
@@ -3345,6 +3390,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
if (parent)
mem->swappiness = get_swappiness(parent);
atomic_set(&mem->refcnt, 1);
+ mem->move_charge_at_immigrate = 0;
return &mem->css;
free_out:
__mem_cgroup_free(mem);
@@ -3381,16 +3427,57 @@ static int mem_cgroup_populate(struct cgroup_subsys *ss,
return ret;
}
+/* Handlers for move charge at task migration. */
+static int mem_cgroup_can_move_charge(void)
+{
+ return 0;
+}
+
+static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
+ struct cgroup *cgroup,
+ struct task_struct *p,
+ bool threadgroup)
+{
+ int ret = 0;
+ struct mem_cgroup *mem = mem_cgroup_from_cont(cgroup);
+
+ if (mem->move_charge_at_immigrate) {
+ struct mm_struct *mm;
+ struct mem_cgroup *from = mem_cgroup_from_task(p);
+
+ VM_BUG_ON(from == mem);
+
+ mm = get_task_mm(p);
+ if (!mm)
+ return 0;
+
+ /* We move charges only when we move a owner of the mm */
+ if (mm->owner == p)
+ ret = mem_cgroup_can_move_charge();
+
+ mmput(mm);
+ }
+ return ret;
+}
+
+static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
+ struct cgroup *cgroup,
+ struct task_struct *p,
+ bool threadgroup)
+{
+}
+
+static void mem_cgroup_move_charge(void)
+{
+}
+
static void mem_cgroup_move_task(struct cgroup_subsys *ss,
struct cgroup *cont,
struct cgroup *old_cont,
struct task_struct *p,
bool threadgroup)
{
- /*
- * FIXME: It's better to move charges of this process from old
- * memcg to new memcg. But it's just on TODO-List now.
- */
+ mem_cgroup_move_charge();
}
struct cgroup_subsys mem_cgroup_subsys = {
@@ -3400,6 +3487,8 @@ struct cgroup_subsys mem_cgroup_subsys = {
.pre_destroy = mem_cgroup_pre_destroy,
.destroy = mem_cgroup_destroy,
.populate = mem_cgroup_populate,
+ .can_attach = mem_cgroup_can_attach,
+ .cancel_attach = mem_cgroup_cancel_attach,
.attach = mem_cgroup_move_task,
.early_init = 0,
.use_id = 1,