aboutsummaryrefslogtreecommitdiff
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c150
1 files changed, 128 insertions, 22 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 87f9f32bf0c..ba2ca53f6c3 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -31,6 +31,7 @@
#include <linux/fault-inject.h>
#include <linux/stacktrace.h>
#include <linux/prefetch.h>
+#include <linux/memcontrol.h>
#include <trace/events/kmem.h>
@@ -200,13 +201,14 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
static int sysfs_slab_add(struct kmem_cache *);
static int sysfs_slab_alias(struct kmem_cache *, const char *);
static void sysfs_slab_remove(struct kmem_cache *);
-
+static void memcg_propagate_slab_attrs(struct kmem_cache *s);
#else
static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
{ return 0; }
static inline void sysfs_slab_remove(struct kmem_cache *s) { }
+static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { }
#endif
static inline void stat(const struct kmem_cache *s, enum stat_item si)
@@ -1343,6 +1345,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
void *start;
void *last;
void *p;
+ int order;
BUG_ON(flags & GFP_SLAB_BUG_MASK);
@@ -1351,7 +1354,9 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
if (!page)
goto out;
+ order = compound_order(page);
inc_slabs_node(s, page_to_nid(page), page->objects);
+ memcg_bind_pages(s, order);
page->slab_cache = s;
__SetPageSlab(page);
if (page->pfmemalloc)
@@ -1360,7 +1365,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
start = page_address(page);
if (unlikely(s->flags & SLAB_POISON))
- memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page));
+ memset(start, POISON_INUSE, PAGE_SIZE << order);
last = start;
for_each_object(p, s, start, page->objects) {
@@ -1401,10 +1406,12 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
__ClearPageSlabPfmemalloc(page);
__ClearPageSlab(page);
+
+ memcg_release_pages(s, order);
reset_page_mapcount(page);
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += pages;
- __free_pages(page, order);
+ __free_memcg_kmem_pages(page, order);
}
#define need_reserve_slab_rcu \
@@ -2322,6 +2329,7 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
if (slab_pre_alloc_hook(s, gfpflags))
return NULL;
+ s = memcg_kmem_get_cache(s, gfpflags);
redo:
/*
@@ -2610,19 +2618,10 @@ redo:
void kmem_cache_free(struct kmem_cache *s, void *x)
{
- struct page *page;
-
- page = virt_to_head_page(x);
-
- if (kmem_cache_debug(s) && page->slab_cache != s) {
- pr_err("kmem_cache_free: Wrong slab cache. %s but object"
- " is from %s\n", page->slab_cache->name, s->name);
- WARN_ON_ONCE(1);
+ s = cache_from_obj(s, x);
+ if (!s)
return;
- }
-
- slab_free(s, page, x, _RET_IP_);
-
+ slab_free(s, virt_to_head_page(x), x, _RET_IP_);
trace_kmem_cache_free(_RET_IP_, x);
}
EXPORT_SYMBOL(kmem_cache_free);
@@ -3154,8 +3153,19 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
{
int rc = kmem_cache_close(s);
- if (!rc)
+ if (!rc) {
+ /*
+ * We do the same lock strategy around sysfs_slab_add, see
+ * __kmem_cache_create. Because this is pretty much the last
+ * operation we do and the lock will be released shortly after
+ * that in slab_common.c, we could just move sysfs_slab_remove
+ * to a later point in common code. We should do that when we
+ * have a common sysfs framework for all allocators.
+ */
+ mutex_unlock(&slab_mutex);
sysfs_slab_remove(s);
+ mutex_lock(&slab_mutex);
+ }
return rc;
}
@@ -3292,7 +3302,7 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
struct page *page;
void *ptr = NULL;
- flags |= __GFP_COMP | __GFP_NOTRACK;
+ flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG;
page = alloc_pages_node(node, flags, get_order(size));
if (page)
ptr = page_address(page);
@@ -3398,7 +3408,7 @@ void kfree(const void *x)
if (unlikely(!PageSlab(page))) {
BUG_ON(!PageCompound(page));
kmemleak_free(x);
- __free_pages(page, compound_order(page));
+ __free_memcg_kmem_pages(page, compound_order(page));
return;
}
slab_free(page->slab_cache, page, object, _RET_IP_);
@@ -3786,7 +3796,7 @@ static int slab_unmergeable(struct kmem_cache *s)
return 0;
}
-static struct kmem_cache *find_mergeable(size_t size,
+static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size,
size_t align, unsigned long flags, const char *name,
void (*ctor)(void *))
{
@@ -3822,17 +3832,21 @@ static struct kmem_cache *find_mergeable(size_t size,
if (s->size - size >= sizeof(void *))
continue;
+ if (!cache_match_memcg(s, memcg))
+ continue;
+
return s;
}
return NULL;
}
-struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
- size_t align, unsigned long flags, void (*ctor)(void *))
+struct kmem_cache *
+__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size,
+ size_t align, unsigned long flags, void (*ctor)(void *))
{
struct kmem_cache *s;
- s = find_mergeable(size, align, flags, name, ctor);
+ s = find_mergeable(memcg, size, align, flags, name, ctor);
if (s) {
s->refcount++;
/*
@@ -3863,6 +3877,7 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
if (slab_state <= UP)
return 0;
+ memcg_propagate_slab_attrs(s);
mutex_unlock(&slab_mutex);
err = sysfs_slab_add(s);
mutex_lock(&slab_mutex);
@@ -5096,10 +5111,95 @@ static ssize_t slab_attr_store(struct kobject *kobj,
return -EIO;
err = attribute->store(s, buf, len);
+#ifdef CONFIG_MEMCG_KMEM
+ if (slab_state >= FULL && err >= 0 && is_root_cache(s)) {
+ int i;
+ mutex_lock(&slab_mutex);
+ if (s->max_attr_size < len)
+ s->max_attr_size = len;
+
+ /*
+ * This is a best effort propagation, so this function's return
+ * value will be determined by the parent cache only. This is
+ * basically because not all attributes will have a well
+ * defined semantics for rollbacks - most of the actions will
+ * have permanent effects.
+ *
+ * Returning the error value of any of the children that fail
+ * is not 100 % defined, in the sense that users seeing the
+ * error code won't be able to know anything about the state of
+ * the cache.
+ *
+ * Only returning the error code for the parent cache at least
+ * has well defined semantics. The cache being written to
+ * directly either failed or succeeded, in which case we loop
+ * through the descendants with best-effort propagation.
+ */
+ for_each_memcg_cache_index(i) {
+ struct kmem_cache *c = cache_from_memcg(s, i);
+ if (c)
+ attribute->store(c, buf, len);
+ }
+ mutex_unlock(&slab_mutex);
+ }
+#endif
return err;
}
+static void memcg_propagate_slab_attrs(struct kmem_cache *s)
+{
+#ifdef CONFIG_MEMCG_KMEM
+ int i;
+ char *buffer = NULL;
+
+ if (!is_root_cache(s))
+ return;
+
+ /*
+ * This mean this cache had no attribute written. Therefore, no point
+ * in copying default values around
+ */
+ if (!s->max_attr_size)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) {
+ char mbuf[64];
+ char *buf;
+ struct slab_attribute *attr = to_slab_attr(slab_attrs[i]);
+
+ if (!attr || !attr->store || !attr->show)
+ continue;
+
+ /*
+ * It is really bad that we have to allocate here, so we will
+ * do it only as a fallback. If we actually allocate, though,
+ * we can just use the allocated buffer until the end.
+ *
+ * Most of the slub attributes will tend to be very small in
+ * size, but sysfs allows buffers up to a page, so they can
+ * theoretically happen.
+ */
+ if (buffer)
+ buf = buffer;
+ else if (s->max_attr_size < ARRAY_SIZE(mbuf))
+ buf = mbuf;
+ else {
+ buffer = (char *) get_zeroed_page(GFP_KERNEL);
+ if (WARN_ON(!buffer))
+ continue;
+ buf = buffer;
+ }
+
+ attr->show(s->memcg_params->root_cache, buf);
+ attr->store(s, buf, strlen(buf));
+ }
+
+ if (buffer)
+ free_page((unsigned long)buffer);
+#endif
+}
+
static const struct sysfs_ops slab_sysfs_ops = {
.show = slab_attr_show,
.store = slab_attr_store,
@@ -5156,6 +5256,12 @@ static char *create_unique_id(struct kmem_cache *s)
if (p != name + 1)
*p++ = '-';
p += sprintf(p, "%07d", s->size);
+
+#ifdef CONFIG_MEMCG_KMEM
+ if (!is_root_cache(s))
+ p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
+#endif
+
BUG_ON(p > name + ID_STR_LENGTH - 1);
return name;
}