From 441c7e0a2ed38827b48b907bd1fa29faba2017a3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 10 Jun 2009 20:05:53 +0300 Subject: bootmem: use slab if bootmem is no longer available As a preparation for initializing the slab allocator early, make sure the bootmem allocator does not crash and burn if someone calls it after slab is up; otherwise we'd need a flag day for switching to early slab. Acked-by: Johannes Weiner Acked-by: Linus Torvalds Cc: Christoph Lameter Cc: Ingo Molnar Cc: Matt Mackall Cc: Nick Piggin Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- mm/bootmem.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'mm') diff --git a/mm/bootmem.c b/mm/bootmem.c index daf92713f7d..457269c73e8 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -532,6 +532,9 @@ static void * __init alloc_arch_preferred_bootmem(bootmem_data_t *bdata, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit) { + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc(size, GFP_NOWAIT); + #ifdef CONFIG_HAVE_ARCH_BOOTMEM bootmem_data_t *p_bdata; -- cgit v1.2.3 From c91c4773b334d4d3a6d44626dc2a558ad97b86f3 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Thu, 11 Jun 2009 08:10:28 +0300 Subject: bootmem: fix slab fallback on numa If the user requested bootmem allocation on a specific node, we should use kzalloc_node() for the fallback allocation. Cc: Ingo Molnar Cc: Johannes Weiner Cc: Linus Torvalds Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- mm/bootmem.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'mm') diff --git a/mm/bootmem.c b/mm/bootmem.c index 457269c73e8..282df0a09e6 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -665,6 +665,9 @@ static void * __init ___alloc_bootmem_node(bootmem_data_t *bdata, void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, 0); } @@ -696,6 +699,9 @@ void * __init __alloc_bootmem_node_nopanic(pg_data_t *pgdat, unsigned long size, { void *ptr; + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + ptr = alloc_arch_preferred_bootmem(pgdat->bdata, size, align, goal, 0); if (ptr) return ptr; @@ -748,6 +754,9 @@ void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal) { + if (WARN_ON_ONCE(slab_is_available())) + return kzalloc_node(size, GFP_NOWAIT, pgdat->node_id); + return ___alloc_bootmem_node(pgdat->bdata, size, align, goal, ARCH_LOW_ADDRESS_LIMIT); } -- cgit v1.2.3 From 83b519e8b9572c319c8e0c615ee5dd7272856090 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Wed, 10 Jun 2009 19:40:04 +0300 Subject: slab: setup allocators earlier in the boot sequence This patch makes kmalloc() available earlier in the boot sequence so we can get rid of some bootmem allocations. The bulk of the changes are due to kmem_cache_init() being called with interrupts disabled which requires some changes to allocator boostrap code. Note: 32-bit x86 does WP protect test in mem_init() so we must setup traps before we call mem_init() during boot as reported by Ingo Molnar: We have a hard crash in the WP-protect code: [ 0.000000] Checking if this processor honours the WP bit even in supervisor mode...BUG: Int 14: CR2 ffcff000 [ 0.000000] EDI 00000188 ESI 00000ac7 EBP c17eaf9c ESP c17eaf8c [ 0.000000] EBX 000014e0 EDX 0000000e ECX 01856067 EAX 00000001 [ 0.000000] err 00000003 EIP c10135b1 CS 00000060 flg 00010002 [ 0.000000] Stack: c17eafa8 c17fd410 c16747bc c17eafc4 c17fd7e5 000011fd f8616000 c18237cc [ 0.000000] 00099800 c17bb000 c17eafec c17f1668 000001c5 c17f1322 c166e039 c1822bf0 [ 0.000000] c166e033 c153a014 c18237cc 00020800 c17eaff8 c17f106a 00020800 01ba5003 [ 0.000000] Pid: 0, comm: swapper Not tainted 2.6.30-tip-02161-g7a74539-dirty #52203 [ 0.000000] Call Trace: [ 0.000000] [] ? printk+0x14/0x16 [ 0.000000] [] ? do_test_wp_bit+0x19/0x23 [ 0.000000] [] ? test_wp_bit+0x26/0x64 [ 0.000000] [] ? mem_init+0x1ba/0x1d8 [ 0.000000] [] ? start_kernel+0x164/0x2f7 [ 0.000000] [] ? unknown_bootoption+0x0/0x19c [ 0.000000] [] ? __init_begin+0x6a/0x6f Acked-by: Johannes Weiner Acked-by Linus Torvalds Cc: Christoph Lameter Cc: Ingo Molnar Cc: Matt Mackall Cc: Nick Piggin Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- mm/slab.c | 85 +++++++++++++++++++++++++++++++++------------------------------ mm/slub.c | 17 +++++++------ 2 files changed, 55 insertions(+), 47 deletions(-) (limited to 'mm') diff --git a/mm/slab.c b/mm/slab.c index f85831da908..2bd611fa87b 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -315,7 +315,7 @@ static int drain_freelist(struct kmem_cache *cache, struct kmem_list3 *l3, int tofree); static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); -static int enable_cpucache(struct kmem_cache *cachep); +static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); static void cache_reap(struct work_struct *unused); /* @@ -958,12 +958,12 @@ static void __cpuinit start_cpu_timer(int cpu) } static struct array_cache *alloc_arraycache(int node, int entries, - int batchcount) + int batchcount, gfp_t gfp) { int memsize = sizeof(void *) * entries + sizeof(struct array_cache); struct array_cache *nc = NULL; - nc = kmalloc_node(memsize, GFP_KERNEL, node); + nc = kmalloc_node(memsize, gfp, node); if (nc) { nc->avail = 0; nc->limit = entries; @@ -1003,7 +1003,7 @@ static int transfer_objects(struct array_cache *to, #define drain_alien_cache(cachep, alien) do { } while (0) #define reap_alien(cachep, l3) do { } while (0) -static inline struct array_cache **alloc_alien_cache(int node, int limit) +static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { return (struct array_cache **)BAD_ALIEN_MAGIC; } @@ -1034,7 +1034,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); static void *alternate_node_alloc(struct kmem_cache *, gfp_t); -static struct array_cache **alloc_alien_cache(int node, int limit) +static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) { struct array_cache **ac_ptr; int memsize = sizeof(void *) * nr_node_ids; @@ -1042,14 +1042,14 @@ static struct array_cache **alloc_alien_cache(int node, int limit) if (limit > 1) limit = 12; - ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); + ac_ptr = kmalloc_node(memsize, gfp, node); if (ac_ptr) { for_each_node(i) { if (i == node || !node_online(i)) { ac_ptr[i] = NULL; continue; } - ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); + ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); if (!ac_ptr[i]) { for (i--; i >= 0; i--) kfree(ac_ptr[i]); @@ -1282,20 +1282,20 @@ static int __cpuinit cpuup_prepare(long cpu) struct array_cache **alien = NULL; nc = alloc_arraycache(node, cachep->limit, - cachep->batchcount); + cachep->batchcount, GFP_KERNEL); if (!nc) goto bad; if (cachep->shared) { shared = alloc_arraycache(node, cachep->shared * cachep->batchcount, - 0xbaadf00d); + 0xbaadf00d, GFP_KERNEL); if (!shared) { kfree(nc); goto bad; } } if (use_alien_caches) { - alien = alloc_alien_cache(node, cachep->limit); + alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL); if (!alien) { kfree(shared); kfree(nc); @@ -1399,10 +1399,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, { struct kmem_list3 *ptr; - ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); + ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); BUG_ON(!ptr); - local_irq_disable(); memcpy(ptr, list, sizeof(struct kmem_list3)); /* * Do not assume that spinlocks can be initialized via memcpy: @@ -1411,7 +1410,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, MAKE_ALL_LISTS(cachep, ptr, nodeid); cachep->nodelists[nodeid] = ptr; - local_irq_enable(); } /* @@ -1575,9 +1573,8 @@ void __init kmem_cache_init(void) { struct array_cache *ptr; - ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - local_irq_disable(); BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); memcpy(ptr, cpu_cache_get(&cache_cache), sizeof(struct arraycache_init)); @@ -1587,11 +1584,9 @@ void __init kmem_cache_init(void) spin_lock_init(&ptr->lock); cache_cache.array[smp_processor_id()] = ptr; - local_irq_enable(); - ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); - local_irq_disable(); BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) != &initarray_generic.cache); memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), @@ -1603,7 +1598,6 @@ void __init kmem_cache_init(void) malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = ptr; - local_irq_enable(); } /* 5) Replace the bootstrap kmem_list3's */ { @@ -1627,7 +1621,7 @@ void __init kmem_cache_init(void) struct kmem_cache *cachep; mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) - if (enable_cpucache(cachep)) + if (enable_cpucache(cachep, GFP_NOWAIT)) BUG(); mutex_unlock(&cache_chain_mutex); } @@ -2064,10 +2058,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, return left_over; } -static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) +static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) { if (g_cpucache_up == FULL) - return enable_cpucache(cachep); + return enable_cpucache(cachep, gfp); if (g_cpucache_up == NONE) { /* @@ -2089,7 +2083,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) g_cpucache_up = PARTIAL_AC; } else { cachep->array[smp_processor_id()] = - kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + kmalloc(sizeof(struct arraycache_init), gfp); if (g_cpucache_up == PARTIAL_AC) { set_up_list3s(cachep, SIZE_L3); @@ -2153,6 +2147,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, { size_t left_over, slab_size, ralign; struct kmem_cache *cachep = NULL, *pc; + gfp_t gfp; /* * Sanity checks... these are all serious usage bugs. @@ -2168,8 +2163,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, * We use cache_chain_mutex to ensure a consistent view of * cpu_online_mask as well. Please see cpuup_callback */ - get_online_cpus(); - mutex_lock(&cache_chain_mutex); + if (slab_is_available()) { + get_online_cpus(); + mutex_lock(&cache_chain_mutex); + } list_for_each_entry(pc, &cache_chain, next) { char tmp; @@ -2278,8 +2275,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, */ align = ralign; + if (slab_is_available()) + gfp = GFP_KERNEL; + else + gfp = GFP_NOWAIT; + /* Get cache's description obj. */ - cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); + cachep = kmem_cache_zalloc(&cache_cache, gfp); if (!cachep) goto oops; @@ -2382,7 +2384,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->ctor = ctor; cachep->name = name; - if (setup_cpu_cache(cachep)) { + if (setup_cpu_cache(cachep, gfp)) { __kmem_cache_destroy(cachep); cachep = NULL; goto oops; @@ -2394,8 +2396,10 @@ oops: if (!cachep && (flags & SLAB_PANIC)) panic("kmem_cache_create(): failed to create slab `%s'\n", name); - mutex_unlock(&cache_chain_mutex); - put_online_cpus(); + if (slab_is_available()) { + mutex_unlock(&cache_chain_mutex); + put_online_cpus(); + } return cachep; } EXPORT_SYMBOL(kmem_cache_create); @@ -3802,7 +3806,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); /* * This initializes kmem_list3 or resizes various caches for all nodes. */ -static int alloc_kmemlist(struct kmem_cache *cachep) +static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) { int node; struct kmem_list3 *l3; @@ -3812,7 +3816,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) for_each_online_node(node) { if (use_alien_caches) { - new_alien = alloc_alien_cache(node, cachep->limit); + new_alien = alloc_alien_cache(node, cachep->limit, gfp); if (!new_alien) goto fail; } @@ -3821,7 +3825,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) if (cachep->shared) { new_shared = alloc_arraycache(node, cachep->shared*cachep->batchcount, - 0xbaadf00d); + 0xbaadf00d, gfp); if (!new_shared) { free_alien_cache(new_alien); goto fail; @@ -3850,7 +3854,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) free_alien_cache(new_alien); continue; } - l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); + l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); if (!l3) { free_alien_cache(new_alien); kfree(new_shared); @@ -3906,18 +3910,18 @@ static void do_ccupdate_local(void *info) /* Always called with the cache_chain_mutex held */ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, - int batchcount, int shared) + int batchcount, int shared, gfp_t gfp) { struct ccupdate_struct *new; int i; - new = kzalloc(sizeof(*new), GFP_KERNEL); + new = kzalloc(sizeof(*new), gfp); if (!new) return -ENOMEM; for_each_online_cpu(i) { new->new[i] = alloc_arraycache(cpu_to_node(i), limit, - batchcount); + batchcount, gfp); if (!new->new[i]) { for (i--; i >= 0; i--) kfree(new->new[i]); @@ -3944,11 +3948,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, kfree(ccold); } kfree(new); - return alloc_kmemlist(cachep); + return alloc_kmemlist(cachep, gfp); } /* Called with cache_chain_mutex held always */ -static int enable_cpucache(struct kmem_cache *cachep) +static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) { int err; int limit, shared; @@ -3994,7 +3998,7 @@ static int enable_cpucache(struct kmem_cache *cachep) if (limit > 32) limit = 32; #endif - err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); + err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp); if (err) printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", cachep->name, -err); @@ -4300,7 +4304,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, res = 0; } else { res = do_tune_cpucache(cachep, limit, - batchcount, shared); + batchcount, shared, + GFP_KERNEL); } break; } diff --git a/mm/slub.c b/mm/slub.c index 5e805a6fe36..c1815a63807 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2557,13 +2557,16 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, if (gfp_flags & SLUB_DMA) flags = SLAB_CACHE_DMA; - down_write(&slub_lock); + /* + * This function is called with IRQs disabled during early-boot on + * single CPU so there's no need to take slub_lock here. + */ if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, flags, NULL)) goto panic; list_add(&s->list, &slab_caches); - up_write(&slub_lock); + if (sysfs_slab_add(s)) goto panic; return s; @@ -3021,7 +3024,7 @@ void __init kmem_cache_init(void) * kmem_cache_open for slab_state == DOWN. */ create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", - sizeof(struct kmem_cache_node), GFP_KERNEL); + sizeof(struct kmem_cache_node), GFP_NOWAIT); kmalloc_caches[0].refcount = -1; caches++; @@ -3034,16 +3037,16 @@ void __init kmem_cache_init(void) /* Caches that are not of the two-to-the-power-of size */ if (KMALLOC_MIN_SIZE <= 64) { create_kmalloc_cache(&kmalloc_caches[1], - "kmalloc-96", 96, GFP_KERNEL); + "kmalloc-96", 96, GFP_NOWAIT); caches++; create_kmalloc_cache(&kmalloc_caches[2], - "kmalloc-192", 192, GFP_KERNEL); + "kmalloc-192", 192, GFP_NOWAIT); caches++; } for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], - "kmalloc", 1 << i, GFP_KERNEL); + "kmalloc", 1 << i, GFP_NOWAIT); caches++; } @@ -3080,7 +3083,7 @@ void __init kmem_cache_init(void) /* Provide the correct kmalloc names now that the caches are up */ for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) kmalloc_caches[i]. name = - kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); + kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); #ifdef CONFIG_SMP register_cpu_notifier(&slab_notifier); -- cgit v1.2.3 From 43ebdac42f16037263b52a5aeedcd1bfa4a9bb29 Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 25 May 2009 15:01:35 +0300 Subject: vmalloc: use kzalloc() instead of alloc_bootmem() We can call vmalloc_init() after kmem_cache_init() and use kzalloc() instead of the bootmem allocator when initializing vmalloc data structures. Acked-by: Johannes Weiner Acked-by: Linus Torvalds Acked-by: Nick Piggin Cc: Ingo Molnar Cc: Yinghai Lu Signed-off-by: Pekka Enberg --- mm/vmalloc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'mm') diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 083716ea38c..323513858c2 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include @@ -1032,7 +1031,7 @@ void __init vmalloc_init(void) /* Import existing vmlist entries. */ for (tmp = vmlist; tmp; tmp = tmp->next) { - va = alloc_bootmem(sizeof(struct vmap_area)); + va = kzalloc(sizeof(struct vmap_area), GFP_NOWAIT); va->flags = tmp->flags | VM_VM_AREA; va->va_start = (unsigned long)tmp->addr; va->va_end = va->va_start + tmp->size; -- cgit v1.2.3 From 959982fee4e635c61780e989c3e34267143fcc02 Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Thu, 28 May 2009 18:15:16 -0700 Subject: memcg: don't use bootmem allocator in setup code The bootmem allocator is no longer available for page_cgroup_init() because we set up the kernel slab allocator much earlier now. Cc: Ingo Molnar Cc: Johannes Weiner Cc: Linus Torvalds Signed-off-by: Yinghai Lu Signed-off-by: Pekka Enberg --- mm/page_cgroup.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index 791905c991d..3dd4a909a1d 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -47,6 +47,8 @@ static int __init alloc_node_page_cgroup(int nid) struct page_cgroup *base, *pc; unsigned long table_size; unsigned long start_pfn, nr_pages, index; + struct page *page; + unsigned int order; start_pfn = NODE_DATA(nid)->node_start_pfn; nr_pages = NODE_DATA(nid)->node_spanned_pages; @@ -55,11 +57,13 @@ static int __init alloc_node_page_cgroup(int nid) return 0; table_size = sizeof(struct page_cgroup) * nr_pages; - - base = __alloc_bootmem_node_nopanic(NODE_DATA(nid), - table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); - if (!base) + order = get_order(table_size); + page = alloc_pages_node(nid, GFP_NOWAIT | __GFP_ZERO, order); + if (!page) + page = alloc_pages_node(-1, GFP_NOWAIT | __GFP_ZERO, order); + if (!page) return -ENOMEM; + base = page_address(page); for (index = 0; index < nr_pages; index++) { pc = base + index; __init_page_cgroup(pc, start_pfn + index); -- cgit v1.2.3