aboutsummaryrefslogtreecommitdiff
path: root/drivers/staging/zsmalloc/zsmalloc-main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/staging/zsmalloc/zsmalloc-main.c')
-rw-r--r--drivers/staging/zsmalloc/zsmalloc-main.c322
1 files changed, 273 insertions, 49 deletions
diff --git a/drivers/staging/zsmalloc/zsmalloc-main.c b/drivers/staging/zsmalloc/zsmalloc-main.c
index 8b0bcb626a7..09a9d35d436 100644
--- a/drivers/staging/zsmalloc/zsmalloc-main.c
+++ b/drivers/staging/zsmalloc/zsmalloc-main.c
@@ -75,9 +75,140 @@
#include <linux/cpumask.h>
#include <linux/cpu.h>
#include <linux/vmalloc.h>
+#include <linux/hardirq.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
#include "zsmalloc.h"
-#include "zsmalloc_int.h"
+
+/*
+ * This must be power of 2 and greater than of equal to sizeof(link_free).
+ * These two conditions ensure that any 'struct link_free' itself doesn't
+ * span more than 1 page which avoids complex case of mapping 2 pages simply
+ * to restore link_free pointer values.
+ */
+#define ZS_ALIGN 8
+
+/*
+ * A single 'zspage' is composed of up to 2^N discontiguous 0-order (single)
+ * pages. ZS_MAX_ZSPAGE_ORDER defines upper limit on N.
+ */
+#define ZS_MAX_ZSPAGE_ORDER 2
+#define ZS_MAX_PAGES_PER_ZSPAGE (_AC(1, UL) << ZS_MAX_ZSPAGE_ORDER)
+
+/*
+ * Object location (<PFN>, <obj_idx>) is encoded as
+ * as single (void *) handle value.
+ *
+ * Note that object index <obj_idx> is relative to system
+ * page <PFN> it is stored in, so for each sub-page belonging
+ * to a zspage, obj_idx starts with 0.
+ *
+ * This is made more complicated by various memory models and PAE.
+ */
+
+#ifndef MAX_PHYSMEM_BITS
+#ifdef CONFIG_HIGHMEM64G
+#define MAX_PHYSMEM_BITS 36
+#else /* !CONFIG_HIGHMEM64G */
+/*
+ * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
+ * be PAGE_SHIFT
+ */
+#define MAX_PHYSMEM_BITS BITS_PER_LONG
+#endif
+#endif
+#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+#define OBJ_INDEX_BITS (BITS_PER_LONG - _PFN_BITS)
+#define OBJ_INDEX_MASK ((_AC(1, UL) << OBJ_INDEX_BITS) - 1)
+
+#define MAX(a, b) ((a) >= (b) ? (a) : (b))
+/* ZS_MIN_ALLOC_SIZE must be multiple of ZS_ALIGN */
+#define ZS_MIN_ALLOC_SIZE \
+ MAX(32, (ZS_MAX_PAGES_PER_ZSPAGE << PAGE_SHIFT >> OBJ_INDEX_BITS))
+#define ZS_MAX_ALLOC_SIZE PAGE_SIZE
+
+/*
+ * On systems with 4K page size, this gives 254 size classes! There is a
+ * trader-off here:
+ * - Large number of size classes is potentially wasteful as free page are
+ * spread across these classes
+ * - Small number of size classes causes large internal fragmentation
+ * - Probably its better to use specific size classes (empirically
+ * determined). NOTE: all those class sizes must be set as multiple of
+ * ZS_ALIGN to make sure link_free itself never has to span 2 pages.
+ *
+ * ZS_MIN_ALLOC_SIZE and ZS_SIZE_CLASS_DELTA must be multiple of ZS_ALIGN
+ * (reason above)
+ */
+#define ZS_SIZE_CLASS_DELTA 16
+#define ZS_SIZE_CLASSES ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
+ ZS_SIZE_CLASS_DELTA + 1)
+
+/*
+ * We do not maintain any list for completely empty or full pages
+ */
+enum fullness_group {
+ ZS_ALMOST_FULL,
+ ZS_ALMOST_EMPTY,
+ _ZS_NR_FULLNESS_GROUPS,
+
+ ZS_EMPTY,
+ ZS_FULL
+};
+
+/*
+ * We assign a page to ZS_ALMOST_EMPTY fullness group when:
+ * n <= N / f, where
+ * n = number of allocated objects
+ * N = total number of objects zspage can store
+ * f = 1/fullness_threshold_frac
+ *
+ * Similarly, we assign zspage to:
+ * ZS_ALMOST_FULL when n > N / f
+ * ZS_EMPTY when n == 0
+ * ZS_FULL when n == N
+ *
+ * (see: fix_fullness_group())
+ */
+static const int fullness_threshold_frac = 4;
+
+struct size_class {
+ /*
+ * Size of objects stored in this class. Must be multiple
+ * of ZS_ALIGN.
+ */
+ int size;
+ unsigned int index;
+
+ /* Number of PAGE_SIZE sized pages to combine to form a 'zspage' */
+ int pages_per_zspage;
+
+ spinlock_t lock;
+
+ /* stats */
+ u64 pages_allocated;
+
+ struct page *fullness_list[_ZS_NR_FULLNESS_GROUPS];
+};
+
+/*
+ * Placed within free objects to form a singly linked list.
+ * For every zspage, first_page->freelist gives head of this list.
+ *
+ * This must be power of 2 and less than or equal to ZS_ALIGN
+ */
+struct link_free {
+ /* Handle of next free chunk (encodes <PFN, obj_idx>) */
+ void *next;
+};
+
+struct zs_pool {
+ struct size_class size_class[ZS_SIZE_CLASSES];
+
+ gfp_t flags; /* allocation flags used when growing pool */
+ const char *name;
+};
/*
* A zspage's class index and fullness group
@@ -88,6 +219,30 @@
#define CLASS_IDX_MASK ((1 << CLASS_IDX_BITS) - 1)
#define FULLNESS_MASK ((1 << FULLNESS_BITS) - 1)
+/*
+ * By default, zsmalloc uses a copy-based object mapping method to access
+ * allocations that span two pages. However, if a particular architecture
+ * 1) Implements local_flush_tlb_kernel_range() and 2) Performs VM mapping
+ * faster than copying, then it should be added here so that
+ * USE_PGTABLE_MAPPING is defined. This causes zsmalloc to use page table
+ * mapping rather than copying
+ * for object mapping.
+*/
+#if defined(CONFIG_ARM)
+#define USE_PGTABLE_MAPPING
+#endif
+
+struct mapping_area {
+#ifdef USE_PGTABLE_MAPPING
+ struct vm_struct *vm; /* vm area for mapping object that span pages */
+#else
+ char *vm_buf; /* copy buffer for objects that span pages */
+#endif
+ char *vm_addr; /* address of kmap_atomic()'ed pages */
+ enum zs_mapmode vm_mm; /* mapping mode */
+};
+
+
/* per-cpu VM mapping areas for zspage accesses that cross page boundaries */
static DEFINE_PER_CPU(struct mapping_area, zs_map_area);
@@ -470,16 +625,83 @@ static struct page *find_get_zspage(struct size_class *class)
return page;
}
-static void zs_copy_map_object(char *buf, struct page *firstpage,
- int off, int size)
+#ifdef USE_PGTABLE_MAPPING
+static inline int __zs_cpu_up(struct mapping_area *area)
+{
+ /*
+ * Make sure we don't leak memory if a cpu UP notification
+ * and zs_init() race and both call zs_cpu_up() on the same cpu
+ */
+ if (area->vm)
+ return 0;
+ area->vm = alloc_vm_area(PAGE_SIZE * 2, NULL);
+ if (!area->vm)
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void __zs_cpu_down(struct mapping_area *area)
+{
+ if (area->vm)
+ free_vm_area(area->vm);
+ area->vm = NULL;
+}
+
+static inline void *__zs_map_object(struct mapping_area *area,
+ struct page *pages[2], int off, int size)
+{
+ BUG_ON(map_vm_area(area->vm, PAGE_KERNEL, &pages));
+ area->vm_addr = area->vm->addr;
+ return area->vm_addr + off;
+}
+
+static inline void __zs_unmap_object(struct mapping_area *area,
+ struct page *pages[2], int off, int size)
+{
+ unsigned long addr = (unsigned long)area->vm_addr;
+ unsigned long end = addr + (PAGE_SIZE * 2);
+
+ flush_cache_vunmap(addr, end);
+ unmap_kernel_range_noflush(addr, PAGE_SIZE * 2);
+ local_flush_tlb_kernel_range(addr, end);
+}
+
+#else /* USE_PGTABLE_MAPPING */
+
+static inline int __zs_cpu_up(struct mapping_area *area)
+{
+ /*
+ * Make sure we don't leak memory if a cpu UP notification
+ * and zs_init() race and both call zs_cpu_up() on the same cpu
+ */
+ if (area->vm_buf)
+ return 0;
+ area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
+ if (!area->vm_buf)
+ return -ENOMEM;
+ return 0;
+}
+
+static inline void __zs_cpu_down(struct mapping_area *area)
+{
+ if (area->vm_buf)
+ free_page((unsigned long)area->vm_buf);
+ area->vm_buf = NULL;
+}
+
+static void *__zs_map_object(struct mapping_area *area,
+ struct page *pages[2], int off, int size)
{
- struct page *pages[2];
int sizes[2];
void *addr;
+ char *buf = area->vm_buf;
- pages[0] = firstpage;
- pages[1] = get_next_page(firstpage);
- BUG_ON(!pages[1]);
+ /* disable page faults to match kmap_atomic() return conditions */
+ pagefault_disable();
+
+ /* no read fastpath */
+ if (area->vm_mm == ZS_MM_WO)
+ goto out;
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
@@ -491,18 +713,20 @@ static void zs_copy_map_object(char *buf, struct page *firstpage,
addr = kmap_atomic(pages[1]);
memcpy(buf + sizes[0], addr, sizes[1]);
kunmap_atomic(addr);
+out:
+ return area->vm_buf;
}
-static void zs_copy_unmap_object(char *buf, struct page *firstpage,
- int off, int size)
+static void __zs_unmap_object(struct mapping_area *area,
+ struct page *pages[2], int off, int size)
{
- struct page *pages[2];
int sizes[2];
void *addr;
+ char *buf = area->vm_buf;
- pages[0] = firstpage;
- pages[1] = get_next_page(firstpage);
- BUG_ON(!pages[1]);
+ /* no write fastpath */
+ if (area->vm_mm == ZS_MM_RO)
+ goto out;
sizes[0] = PAGE_SIZE - off;
sizes[1] = size - sizes[0];
@@ -514,34 +738,31 @@ static void zs_copy_unmap_object(char *buf, struct page *firstpage,
addr = kmap_atomic(pages[1]);
memcpy(addr, buf + sizes[0], sizes[1]);
kunmap_atomic(addr);
+
+out:
+ /* enable page faults to match kunmap_atomic() return conditions */
+ pagefault_enable();
}
+#endif /* USE_PGTABLE_MAPPING */
+
static int zs_cpu_notifier(struct notifier_block *nb, unsigned long action,
void *pcpu)
{
- int cpu = (long)pcpu;
+ int ret, cpu = (long)pcpu;
struct mapping_area *area;
switch (action) {
case CPU_UP_PREPARE:
area = &per_cpu(zs_map_area, cpu);
- /*
- * Make sure we don't leak memory if a cpu UP notification
- * and zs_init() race and both call zs_cpu_up() on the same cpu
- */
- if (area->vm_buf)
- return 0;
- area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
- if (!area->vm_buf)
- return -ENOMEM;
- return 0;
+ ret = __zs_cpu_up(area);
+ if (ret)
+ return notifier_from_errno(ret);
break;
case CPU_DEAD:
case CPU_UP_CANCELED:
area = &per_cpu(zs_map_area, cpu);
- if (area->vm_buf)
- free_page((unsigned long)area->vm_buf);
- area->vm_buf = NULL;
+ __zs_cpu_down(area);
break;
}
@@ -758,28 +979,36 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
enum fullness_group fg;
struct size_class *class;
struct mapping_area *area;
+ struct page *pages[2];
BUG_ON(!handle);
+ /*
+ * Because we use per-cpu mapping areas shared among the
+ * pools/users, we can't allow mapping in interrupt context
+ * because it can corrupt another users mappings.
+ */
+ BUG_ON(in_interrupt());
+
obj_handle_to_location(handle, &page, &obj_idx);
get_zspage_mapping(get_first_page(page), &class_idx, &fg);
class = &pool->size_class[class_idx];
off = obj_idx_to_offset(page, obj_idx, class->size);
area = &get_cpu_var(zs_map_area);
+ area->vm_mm = mm;
if (off + class->size <= PAGE_SIZE) {
/* this object is contained entirely within a page */
area->vm_addr = kmap_atomic(page);
return area->vm_addr + off;
}
- /* disable page faults to match kmap_atomic() return conditions */
- pagefault_disable();
+ /* this object spans two pages */
+ pages[0] = page;
+ pages[1] = get_next_page(page);
+ BUG_ON(!pages[1]);
- if (mm != ZS_MM_WO)
- zs_copy_map_object(area->vm_buf, page, off, class->size);
- area->vm_addr = NULL;
- return area->vm_buf;
+ return __zs_map_object(area, pages, off, class->size);
}
EXPORT_SYMBOL_GPL(zs_map_object);
@@ -793,17 +1022,6 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
struct size_class *class;
struct mapping_area *area;
- area = &__get_cpu_var(zs_map_area);
- /* single-page object fastpath */
- if (area->vm_addr) {
- kunmap_atomic(area->vm_addr);
- goto out;
- }
-
- /* no write fastpath */
- if (area->vm_mm == ZS_MM_RO)
- goto pfenable;
-
BUG_ON(!handle);
obj_handle_to_location(handle, &page, &obj_idx);
@@ -811,12 +1029,18 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
class = &pool->size_class[class_idx];
off = obj_idx_to_offset(page, obj_idx, class->size);
- zs_copy_unmap_object(area->vm_buf, page, off, class->size);
+ area = &__get_cpu_var(zs_map_area);
+ if (off + class->size <= PAGE_SIZE)
+ kunmap_atomic(area->vm_addr);
+ else {
+ struct page *pages[2];
-pfenable:
- /* enable page faults to match kunmap_atomic() return conditions */
- pagefault_enable();
-out:
+ pages[0] = page;
+ pages[1] = get_next_page(page);
+ BUG_ON(!pages[1]);
+
+ __zs_unmap_object(area, pages, off, class->size);
+ }
put_cpu_var(zs_map_area);
}
EXPORT_SYMBOL_GPL(zs_unmap_object);