From 3947be1969a9ce455ec30f60ef51efb10e4323d1 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Sat, 29 Oct 2005 18:16:54 -0700 Subject: [PATCH] memory hotplug: sysfs and add/remove functions This adds generic memory add/remove and supporting functions for memory hotplug into a new file as well as a memory hotplug kernel config option. Individual architecture patches will follow. For now, disable memory hotplug when swsusp is enabled. There's a lot of churn there right now. We'll fix it up properly once it calms down. Signed-off-by: Matt Tolentino Signed-off-by: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/Kconfig | 8 +++ mm/Makefile | 2 +- mm/memory_hotplug.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++++ mm/page_alloc.c | 4 +- 4 files changed, 189 insertions(+), 3 deletions(-) create mode 100644 mm/memory_hotplug.c (limited to 'mm') diff --git a/mm/Kconfig b/mm/Kconfig index f35a550ba4b..1a4473fcb2c 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -112,6 +112,14 @@ config SPARSEMEM_EXTREME def_bool y depends on SPARSEMEM && !SPARSEMEM_STATIC +# eventually, we can have this option just 'select SPARSEMEM' +config MEMORY_HOTPLUG + bool "Allow for memory hot-add" + depends on SPARSEMEM && HOTPLUG && !SOFTWARE_SUSPEND + +comment "Memory hotplug is currently incompatible with Software Suspend" + depends on SPARSEMEM && HOTPLUG && SOFTWARE_SUSPEND + # Heavily threaded applications may benefit from splitting the mm-wide # page_table_lock, so that faults on different parts of the user address # space can be handled with less contention: split it at this NR_CPUS. diff --git a/mm/Makefile b/mm/Makefile index 4cd69e3ce42..2fa6d2ca9f2 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -18,5 +18,5 @@ obj-$(CONFIG_NUMA) += mempolicy.o obj-$(CONFIG_SPARSEMEM) += sparse.o obj-$(CONFIG_SHMEM) += shmem.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o - +obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c new file mode 100644 index 00000000000..855e0fc928b --- /dev/null +++ b/mm/memory_hotplug.c @@ -0,0 +1,178 @@ +/* + * linux/mm/memory_hotplug.c + * + * Copyright (C) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static struct page *__kmalloc_section_memmap(unsigned long nr_pages) +{ + struct page *page, *ret; + unsigned long memmap_size = sizeof(struct page) * nr_pages; + + page = alloc_pages(GFP_KERNEL, get_order(memmap_size)); + if (page) + goto got_map_page; + + ret = vmalloc(memmap_size); + if (ret) + goto got_map_ptr; + + return NULL; +got_map_page: + ret = (struct page *)pfn_to_kaddr(page_to_pfn(page)); +got_map_ptr: + memset(ret, 0, memmap_size); + + return ret; +} + +extern void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn, + unsigned long size); +static void __add_zone(struct zone *zone, unsigned long phys_start_pfn) +{ + struct pglist_data *pgdat = zone->zone_pgdat; + int nr_pages = PAGES_PER_SECTION; + int nid = pgdat->node_id; + int zone_type; + + zone_type = zone - pgdat->node_zones; + memmap_init_zone(nr_pages, nid, zone_type, phys_start_pfn); + zonetable_add(zone, nid, zone_type, phys_start_pfn, nr_pages); +} + +extern int sparse_add_one_section(struct zone *, unsigned long, + struct page *mem_map); +static int __add_section(struct zone *zone, unsigned long phys_start_pfn) +{ + struct pglist_data *pgdat = zone->zone_pgdat; + int nr_pages = PAGES_PER_SECTION; + struct page *memmap; + int ret; + + /* + * This can potentially allocate memory, and does its own + * internal locking. + */ + sparse_index_init(pfn_to_section_nr(phys_start_pfn), pgdat->node_id); + + pgdat_resize_lock(pgdat, &flags); + memmap = __kmalloc_section_memmap(nr_pages); + ret = sparse_add_one_section(zone, phys_start_pfn, memmap); + pgdat_resize_unlock(pgdat, &flags); + + if (ret <= 0) { + /* the mem_map didn't get used */ + if (memmap >= (struct page *)VMALLOC_START && + memmap < (struct page *)VMALLOC_END) + vfree(memmap); + else + free_pages((unsigned long)memmap, + get_order(sizeof(struct page) * nr_pages)); + } + + if (ret < 0) + return ret; + + __add_zone(zone, phys_start_pfn); + return register_new_memory(__pfn_to_section(phys_start_pfn)); +} + +/* + * Reasonably generic function for adding memory. It is + * expected that archs that support memory hotplug will + * call this function after deciding the zone to which to + * add the new pages. + */ +int __add_pages(struct zone *zone, unsigned long phys_start_pfn, + unsigned long nr_pages) +{ + unsigned long i; + int err = 0; + + for (i = 0; i < nr_pages; i += PAGES_PER_SECTION) { + err = __add_section(zone, phys_start_pfn + i); + + if (err) + break; + } + + return err; +} + +static void grow_zone_span(struct zone *zone, + unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long old_zone_end_pfn; + + zone_span_writelock(zone); + + old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages; + if (start_pfn < zone->zone_start_pfn) + zone->zone_start_pfn = start_pfn; + + if (end_pfn > old_zone_end_pfn) + zone->spanned_pages = end_pfn - zone->zone_start_pfn; + + zone_span_writeunlock(zone); +} + +static void grow_pgdat_span(struct pglist_data *pgdat, + unsigned long start_pfn, unsigned long end_pfn) +{ + unsigned long old_pgdat_end_pfn = + pgdat->node_start_pfn + pgdat->node_spanned_pages; + + if (start_pfn < pgdat->node_start_pfn) + pgdat->node_start_pfn = start_pfn; + + if (end_pfn > old_pgdat_end_pfn) + pgdat->node_spanned_pages = end_pfn - pgdat->node_spanned_pages; +} + +int online_pages(unsigned long pfn, unsigned long nr_pages) +{ + unsigned long i; + unsigned long flags; + unsigned long onlined_pages = 0; + struct zone *zone; + + /* + * This doesn't need a lock to do pfn_to_page(). + * The section can't be removed here because of the + * memory_block->state_sem. + */ + zone = page_zone(pfn_to_page(pfn)); + pgdat_resize_lock(zone->zone_pgdat, &flags); + grow_zone_span(zone, pfn, pfn + nr_pages); + grow_pgdat_span(zone->zone_pgdat, pfn, pfn + nr_pages); + pgdat_resize_unlock(zone->zone_pgdat, &flags); + + for (i = 0; i < nr_pages; i++) { + struct page *page = pfn_to_page(pfn + i); + online_page(page); + onlined_pages++; + } + zone->present_pages += onlined_pages; + + return 0; +} diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 817635f2ab6..183abf39b44 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1686,7 +1686,7 @@ static void __init calculate_zone_totalpages(struct pglist_data *pgdat, * up by free_all_bootmem() once the early boot process is * done. Non-atomic initialization, single-pass. */ -void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, +void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone, unsigned long start_pfn) { struct page *page; @@ -2407,7 +2407,7 @@ static void setup_per_zone_lowmem_reserve(void) * that the pages_{min,low,high} values for each zone are set correctly * with respect to min_free_kbytes. */ -static void setup_per_zone_pages_min(void) +void setup_per_zone_pages_min(void) { unsigned long pages_min = min_free_kbytes >> (PAGE_SHIFT - 10); unsigned long lowmem_pages = 0; -- cgit v1.2.3