summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/base/memory.c19
-rw-r--r--include/linux/memory.h27
-rw-r--r--mm/page_alloc.c57
3 files changed, 96 insertions, 7 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index c4c8f2e1dd1..d7d77d4a402 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -63,6 +63,20 @@ void unregister_memory_notifier(struct notifier_block *nb)
}
EXPORT_SYMBOL(unregister_memory_notifier);
+static ATOMIC_NOTIFIER_HEAD(memory_isolate_chain);
+
+int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+ return atomic_notifier_chain_register(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(register_memory_isolate_notifier);
+
+void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+ atomic_notifier_chain_unregister(&memory_isolate_chain, nb);
+}
+EXPORT_SYMBOL(unregister_memory_isolate_notifier);
+
/*
* register_memory - Setup a sysfs device for a memory block
*/
@@ -157,6 +171,11 @@ int memory_notify(unsigned long val, void *v)
return blocking_notifier_call_chain(&memory_chain, val, v);
}
+int memory_isolate_notify(unsigned long val, void *v)
+{
+ return atomic_notifier_call_chain(&memory_isolate_chain, val, v);
+}
+
/*
* MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
* OK to have direct references to sparsemem variables in here.
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 37fa19b34ef..1adfe779eb9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -50,6 +50,19 @@ struct memory_notify {
int status_change_nid;
};
+/*
+ * During pageblock isolation, count the number of pages within the
+ * range [start_pfn, start_pfn + nr_pages) which are owned by code
+ * in the notifier chain.
+ */
+#define MEM_ISOLATE_COUNT (1<<0)
+
+struct memory_isolate_notify {
+ unsigned long start_pfn; /* Start of range to check */
+ unsigned int nr_pages; /* # pages in range to check */
+ unsigned int pages_found; /* # pages owned found by callbacks */
+};
+
struct notifier_block;
struct mem_section;
@@ -76,14 +89,28 @@ static inline int memory_notify(unsigned long val, void *v)
{
return 0;
}
+static inline int register_memory_isolate_notifier(struct notifier_block *nb)
+{
+ return 0;
+}
+static inline void unregister_memory_isolate_notifier(struct notifier_block *nb)
+{
+}
+static inline int memory_isolate_notify(unsigned long val, void *v)
+{
+ return 0;
+}
#else
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
+extern int register_memory_isolate_notifier(struct notifier_block *nb);
+extern void unregister_memory_isolate_notifier(struct notifier_block *nb);
extern int register_new_memory(int, struct mem_section *);
extern int unregister_memory_section(struct mem_section *);
extern int memory_dev_init(void);
extern int remove_memory_block(unsigned long, struct mem_section *, int);
extern int memory_notify(unsigned long val, void *v);
+extern int memory_isolate_notify(unsigned long val, void *v);
extern struct memory_block *find_memory_block(struct mem_section *);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
enum mem_add_context { BOOT, HOTPLUG };
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 74af449b1f1..998eacc1e4c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
#include <linux/page_cgroup.h>
#include <linux/debugobjects.h>
#include <linux/kmemleak.h>
+#include <linux/memory.h>
#include <trace/events/kmem.h>
#include <asm/tlbflush.h>
@@ -5008,23 +5009,65 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
int set_migratetype_isolate(struct page *page)
{
struct zone *zone;
- unsigned long flags;
+ struct page *curr_page;
+ unsigned long flags, pfn, iter;
+ unsigned long immobile = 0;
+ struct memory_isolate_notify arg;
+ int notifier_ret;
int ret = -EBUSY;
int zone_idx;
zone = page_zone(page);
zone_idx = zone_idx(zone);
+
spin_lock_irqsave(&zone->lock, flags);
+ if (get_pageblock_migratetype(page) == MIGRATE_MOVABLE ||
+ zone_idx == ZONE_MOVABLE) {
+ ret = 0;
+ goto out;
+ }
+
+ pfn = page_to_pfn(page);
+ arg.start_pfn = pfn;
+ arg.nr_pages = pageblock_nr_pages;
+ arg.pages_found = 0;
+
/*
- * In future, more migrate types will be able to be isolation target.
+ * It may be possible to isolate a pageblock even if the
+ * migratetype is not MIGRATE_MOVABLE. The memory isolation
+ * notifier chain is used by balloon drivers to return the
+ * number of pages in a range that are held by the balloon
+ * driver to shrink memory. If all the pages are accounted for
+ * by balloons, are free, or on the LRU, isolation can continue.
+ * Later, for example, when memory hotplug notifier runs, these
+ * pages reported as "can be isolated" should be isolated(freed)
+ * by the balloon driver through the memory notifier chain.
*/
- if (get_pageblock_migratetype(page) != MIGRATE_MOVABLE &&
- zone_idx != ZONE_MOVABLE)
+ notifier_ret = memory_isolate_notify(MEM_ISOLATE_COUNT, &arg);
+ notifier_ret = notifier_to_errno(notifier_ret);
+ if (notifier_ret || !arg.pages_found)
goto out;
- set_pageblock_migratetype(page, MIGRATE_ISOLATE);
- move_freepages_block(zone, page, MIGRATE_ISOLATE);
- ret = 0;
+
+ for (iter = pfn; iter < (pfn + pageblock_nr_pages); iter++) {
+ if (!pfn_valid_within(pfn))
+ continue;
+
+ curr_page = pfn_to_page(iter);
+ if (!page_count(curr_page) || PageLRU(curr_page))
+ continue;
+
+ immobile++;
+ }
+
+ if (arg.pages_found == immobile)
+ ret = 0;
+
out:
+ if (!ret) {
+ set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+ move_freepages_block(zone, page, MIGRATE_ISOLATE);
+ }
+
spin_unlock_irqrestore(&zone->lock, flags);
if (!ret)
drain_all_pages();