From 66ee620f06f99d72475db6eb638559ba608c7dee Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 25 Jun 2018 06:56:50 -0400 Subject: idr: Permit any valid kernel pointer to be stored An upcoming change to the encoding of internal entries will set the bottom two bits to 0b10. Unfortunately, m68k only aligns some data structures to 2 bytes, so the IDR will interpret them as internal entries and things will go badly wrong. Change the radix tree so that it stops either when the node indicates that it's the bottom of the tree (shift == 0) or when the entry is not an internal entry. This means we cannot insert an arbitrary kernel pointer as a multiorder entry, but the IDR does not permit multiorder entries. Annoyingly, this means the IDR can no longer take advantage of the radix tree's ability to store a single entry at offset 0 without allocating memory. A pointer which is 2-byte aligned cannot be stored directly in the root as it would be indistinguishable from a node, so we must allocate a node in order to store a 2-byte pointer at index 0. The idr_replace() function does not take a GFP flags argument, so cannot allocate memory. If a user inserts a 4-byte aligned pointer at index 0 and then replaces it with a 2-byte aligned pointer, we must be able to store it. Arbitrary pointer values are still not permitted; pointers of the form 2 + (i * 4) for values of i between 0 and 1023 are reserved for the implementation. These are not valid kernel pointers as they would point into the zero page. This change does cause a runtime memory consumption regression for the IDA. I will recover that later. Signed-off-by: Matthew Wilcox Tested-by: Guenter Roeck --- lib/idr.c | 4 ---- lib/radix-tree.c | 21 +++++++++++++++------ 2 files changed, 15 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index fab2fd5bc326..729e381e23b4 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -39,8 +39,6 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, unsigned int base = idr->idr_base; unsigned int id = *nextid; - if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) - return -EINVAL; if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR))) idr->idr_rt.gfp_mask |= IDR_RT_MARKER; @@ -295,8 +293,6 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id) void __rcu **slot = NULL; void *entry; - if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) - return ERR_PTR(-EINVAL); id -= idr->idr_base; entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index bc03ecc4dfd2..a904a8ddd174 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -703,6 +703,14 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root, if (!radix_tree_is_internal_node(child) && node->shift) break; + /* + * For an IDR, we must not shrink entry 0 into the root in + * case somebody calls idr_replace() with a pointer that + * appears to be an internal entry + */ + if (!node->shift && is_idr(root)) + break; + if (radix_tree_is_internal_node(child)) entry_to_node(child)->parent = NULL; @@ -875,8 +883,8 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) for (;;) { void *entry = rcu_dereference_raw(child->slots[offset]); - if (radix_tree_is_internal_node(entry) && - !is_sibling_entry(child, entry)) { + if (radix_tree_is_internal_node(entry) && child->shift && + !is_sibling_entry(child, entry)) { child = entry_to_node(entry); offset = 0; continue; @@ -1049,6 +1057,8 @@ void *__radix_tree_lookup(const struct radix_tree_root *root, parent = entry_to_node(node); offset = radix_tree_descend(parent, &node, index); slot = parent->slots + offset; + if (parent->shift == 0) + break; } if (nodep) @@ -1123,9 +1133,6 @@ static inline void replace_sibling_entries(struct radix_tree_node *node, static void replace_slot(void __rcu **slot, void *item, struct radix_tree_node *node, int count, int exceptional) { - if (WARN_ON_ONCE(radix_tree_is_internal_node(item))) - return; - if (node && (count || exceptional)) { node->count += count; node->exceptional += exceptional; @@ -1784,7 +1791,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, goto restart; if (child == RADIX_TREE_RETRY) break; - } while (radix_tree_is_internal_node(child)); + } while (node->shift && radix_tree_is_internal_node(child)); /* Update the iterator state */ iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); @@ -2150,6 +2157,8 @@ void __rcu **idr_get_free(struct radix_tree_root *root, shift = error; child = rcu_dereference_raw(root->rnode); } + if (start == 0 && shift == 0) + shift = RADIX_TREE_MAP_SHIFT; while (shift) { shift -= RADIX_TREE_MAP_SHIFT; -- cgit v1.2.3 From 3159f943aafdbacb2f94c38fdaadabf2bbde2a14 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Nov 2017 13:30:42 -0400 Subject: xarray: Replace exceptional entries Introduce xarray value entries and tagged pointers to replace radix tree exceptional entries. This is a slight change in encoding to allow the use of an extra bit (we can now store BITS_PER_LONG - 1 bits in a value entry). It is also a change in emphasis; exceptional entries are intimidating and different. As the comment explains, you can choose to store values or pointers in the xarray and they are both first-class citizens. Signed-off-by: Matthew Wilcox Reviewed-by: Josef Bacik --- lib/idr.c | 60 +++++++++++++++++++++++--------------------------------- lib/radix-tree.c | 21 +++++++++----------- 2 files changed, 34 insertions(+), 47 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 729e381e23b4..88419fbc5737 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -338,11 +338,8 @@ EXPORT_SYMBOL(idr_replace); * by the number of bits in the leaf bitmap before doing a radix tree lookup. * * As an optimisation, if there are only a few low bits set in any given - * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional - * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits - * directly in the entry. By being really tricksy, we could store - * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising - * for 0-3 allocated IDs. + * leaf, instead of allocating a 128-byte bitmap, we store the bits + * directly in the entry. * * We allow the radix tree 'exceptional' count to get out of date. Nothing * in the IDA nor the radix tree code checks it. If it becomes important @@ -366,12 +363,11 @@ static int ida_get_new_above(struct ida *ida, int start) struct radix_tree_iter iter; struct ida_bitmap *bitmap; unsigned long index; - unsigned bit, ebit; + unsigned bit; int new; index = start / IDA_BITMAP_BITS; bit = start % IDA_BITMAP_BITS; - ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT; slot = radix_tree_iter_init(&iter, index); for (;;) { @@ -386,25 +382,23 @@ static int ida_get_new_above(struct ida *ida, int start) return PTR_ERR(slot); } } - if (iter.index > index) { + if (iter.index > index) bit = 0; - ebit = RADIX_TREE_EXCEPTIONAL_SHIFT; - } new = iter.index * IDA_BITMAP_BITS; bitmap = rcu_dereference_raw(*slot); - if (radix_tree_exception(bitmap)) { - unsigned long tmp = (unsigned long)bitmap; - ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit); - if (ebit < BITS_PER_LONG) { - tmp |= 1UL << ebit; - rcu_assign_pointer(*slot, (void *)tmp); - return new + ebit - - RADIX_TREE_EXCEPTIONAL_SHIFT; + if (xa_is_value(bitmap)) { + unsigned long tmp = xa_to_value(bitmap); + int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, + bit); + if (vbit < BITS_PER_XA_VALUE) { + tmp |= 1UL << vbit; + rcu_assign_pointer(*slot, xa_mk_value(tmp)); + return new + vbit; } bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; - bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; + bitmap->bitmap[0] = tmp; rcu_assign_pointer(*slot, bitmap); } @@ -425,17 +419,14 @@ static int ida_get_new_above(struct ida *ida, int start) new += bit; if (new < 0) return -ENOSPC; - if (ebit < BITS_PER_LONG) { - bitmap = (void *)((1UL << ebit) | - RADIX_TREE_EXCEPTIONAL_ENTRY); - radix_tree_iter_replace(root, &iter, slot, - bitmap); - return new; + if (bit < BITS_PER_XA_VALUE) { + bitmap = xa_mk_value(1UL << bit); + } else { + bitmap = this_cpu_xchg(ida_bitmap, NULL); + if (!bitmap) + return -EAGAIN; + __set_bit(bit, bitmap->bitmap); } - bitmap = this_cpu_xchg(ida_bitmap, NULL); - if (!bitmap) - return -EAGAIN; - __set_bit(bit, bitmap->bitmap); radix_tree_iter_replace(root, &iter, slot, bitmap); } @@ -457,9 +448,9 @@ static void ida_remove(struct ida *ida, int id) goto err; bitmap = rcu_dereference_raw(*slot); - if (radix_tree_exception(bitmap)) { + if (xa_is_value(bitmap)) { btmp = (unsigned long *)slot; - offset += RADIX_TREE_EXCEPTIONAL_SHIFT; + offset += 1; /* Intimate knowledge of the value encoding */ if (offset >= BITS_PER_LONG) goto err; } else { @@ -470,9 +461,8 @@ static void ida_remove(struct ida *ida, int id) __clear_bit(offset, btmp); radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); - if (radix_tree_exception(bitmap)) { - if (rcu_dereference_raw(*slot) == - (void *)RADIX_TREE_EXCEPTIONAL_ENTRY) + if (xa_is_value(bitmap)) { + if (xa_to_value(rcu_dereference_raw(*slot)) == 0) radix_tree_iter_delete(&ida->ida_rt, &iter, slot); } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) { kfree(bitmap); @@ -503,7 +493,7 @@ void ida_destroy(struct ida *ida) xa_lock_irqsave(&ida->ida_rt, flags); radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); - if (!radix_tree_exception(bitmap)) + if (!xa_is_value(bitmap)) kfree(bitmap); radix_tree_iter_delete(&ida->ida_rt, &iter, slot); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index a904a8ddd174..b6c0e7f3a894 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -340,14 +340,12 @@ static void dump_ida_node(void *entry, unsigned long index) for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) dump_ida_node(node->slots[i], index | (i << node->shift)); - } else if (radix_tree_exceptional_entry(entry)) { + } else if (xa_is_value(entry)) { pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", entry, (int)(index & RADIX_TREE_MAP_MASK), index * IDA_BITMAP_BITS, - index * IDA_BITMAP_BITS + BITS_PER_LONG - - RADIX_TREE_EXCEPTIONAL_SHIFT, - (unsigned long)entry >> - RADIX_TREE_EXCEPTIONAL_SHIFT); + index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE, + xa_to_value(entry)); } else { struct ida_bitmap *bitmap = entry; @@ -656,7 +654,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, BUG_ON(shift > BITS_PER_LONG); if (radix_tree_is_internal_node(entry)) { entry_to_node(entry)->parent = node; - } else if (radix_tree_exceptional_entry(entry)) { + } else if (xa_is_value(entry)) { /* Moving an exceptional root->rnode to a node */ node->exceptional = 1; } @@ -955,12 +953,12 @@ static inline int insert_entries(struct radix_tree_node *node, !is_sibling_entry(node, old) && (old != RADIX_TREE_RETRY)) radix_tree_free_nodes(old); - if (radix_tree_exceptional_entry(old)) + if (xa_is_value(old)) node->exceptional--; } if (node) { node->count += n; - if (radix_tree_exceptional_entry(item)) + if (xa_is_value(item)) node->exceptional += n; } return n; @@ -974,7 +972,7 @@ static inline int insert_entries(struct radix_tree_node *node, rcu_assign_pointer(*slot, item); if (node) { node->count++; - if (radix_tree_exceptional_entry(item)) + if (xa_is_value(item)) node->exceptional++; } return 1; @@ -1190,8 +1188,7 @@ void __radix_tree_replace(struct radix_tree_root *root, radix_tree_update_node_t update_node) { void *old = rcu_dereference_raw(*slot); - int exceptional = !!radix_tree_exceptional_entry(item) - - !!radix_tree_exceptional_entry(old); + int exceptional = !!xa_is_value(item) - !!xa_is_value(old); int count = calculate_count(root, node, slot, item, old); /* @@ -1992,7 +1989,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root, struct radix_tree_node *node, void __rcu **slot) { void *old = rcu_dereference_raw(*slot); - int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0; + int exceptional = xa_is_value(old) ? -1 : 0; unsigned offset = get_slot_offset(node, slot); int tag; -- cgit v1.2.3 From 02c02bf12c5d838603eed44195d3e91f094e2ab2 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 3 Nov 2017 23:09:45 -0400 Subject: xarray: Change definition of sibling entries Instead of storing a pointer to the slot containing the canonical entry, store the offset of the slot. Produces slightly more efficient code (~300 bytes) and simplifies the implementation. Signed-off-by: Matthew Wilcox Reviewed-by: Josef Bacik --- lib/Kconfig | 7 +++++++ lib/radix-tree.c | 64 +++++++++++++++++--------------------------------------- 2 files changed, 26 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index a3928d4438b5..40bfa6ccd294 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -399,8 +399,15 @@ config INTERVAL_TREE for more information. +config XARRAY_MULTI + bool + help + Support entries which occupy multiple consecutive indices in the + XArray. + config RADIX_TREE_MULTIORDER bool + select XARRAY_MULTI config ASSOCIATIVE_ARRAY bool diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b6c0e7f3a894..59b28111eabc 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -38,6 +38,7 @@ #include #include #include +#include /* Number of nodes in fully populated tree of given height */ @@ -98,24 +99,7 @@ static inline void *node_to_entry(void *ptr) return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); } -#define RADIX_TREE_RETRY node_to_entry(NULL) - -#ifdef CONFIG_RADIX_TREE_MULTIORDER -/* Sibling slots point directly to another slot in the same node */ -static inline -bool is_sibling_entry(const struct radix_tree_node *parent, void *node) -{ - void __rcu **ptr = node; - return (parent->slots <= ptr) && - (ptr < parent->slots + RADIX_TREE_MAP_SIZE); -} -#else -static inline -bool is_sibling_entry(const struct radix_tree_node *parent, void *node) -{ - return false; -} -#endif +#define RADIX_TREE_RETRY XA_RETRY_ENTRY static inline unsigned long get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) @@ -129,16 +113,10 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent, unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); -#ifdef CONFIG_RADIX_TREE_MULTIORDER - if (radix_tree_is_internal_node(entry)) { - if (is_sibling_entry(parent, entry)) { - void __rcu **sibentry; - sibentry = (void __rcu **) entry_to_node(entry); - offset = get_slot_offset(parent, sibentry); - entry = rcu_dereference_raw(*sibentry); - } + if (xa_is_sibling(entry)) { + offset = xa_to_sibling(entry); + entry = rcu_dereference_raw(parent->slots[offset]); } -#endif *nodep = (void *)entry; return offset; @@ -300,10 +278,10 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) } else if (!radix_tree_is_internal_node(entry)) { pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", entry, i, first, last, node); - } else if (is_sibling_entry(node, entry)) { + } else if (xa_is_sibling(entry)) { pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", entry, i, first, last, node, - *(void **)entry_to_node(entry)); + node->slots[xa_to_sibling(entry)]); } else { dump_node(entry_to_node(entry), first); } @@ -881,8 +859,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) for (;;) { void *entry = rcu_dereference_raw(child->slots[offset]); - if (radix_tree_is_internal_node(entry) && child->shift && - !is_sibling_entry(child, entry)) { + if (xa_is_node(entry) && child->shift) { child = entry_to_node(entry); offset = 0; continue; @@ -904,7 +881,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) static inline int insert_entries(struct radix_tree_node *node, void __rcu **slot, void *item, unsigned order, bool replace) { - struct radix_tree_node *child; + void *sibling; unsigned i, n, tag, offset, tags = 0; if (node) { @@ -922,7 +899,7 @@ static inline int insert_entries(struct radix_tree_node *node, offset = offset & ~(n - 1); slot = &node->slots[offset]; } - child = node_to_entry(slot); + sibling = xa_mk_sibling(offset); for (i = 0; i < n; i++) { if (slot[i]) { @@ -939,7 +916,7 @@ static inline int insert_entries(struct radix_tree_node *node, for (i = 0; i < n; i++) { struct radix_tree_node *old = rcu_dereference_raw(slot[i]); if (i) { - rcu_assign_pointer(slot[i], child); + rcu_assign_pointer(slot[i], sibling); for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) if (tags & (1 << tag)) tag_clear(node, tag, offset + i); @@ -949,9 +926,7 @@ static inline int insert_entries(struct radix_tree_node *node, if (tags & (1 << tag)) tag_set(node, tag, offset); } - if (radix_tree_is_internal_node(old) && - !is_sibling_entry(node, old) && - (old != RADIX_TREE_RETRY)) + if (xa_is_node(old)) radix_tree_free_nodes(old); if (xa_is_value(old)) node->exceptional--; @@ -1112,10 +1087,10 @@ static inline void replace_sibling_entries(struct radix_tree_node *node, void __rcu **slot, int count, int exceptional) { #ifdef CONFIG_RADIX_TREE_MULTIORDER - void *ptr = node_to_entry(slot); - unsigned offset = get_slot_offset(node, slot) + 1; + unsigned offset = get_slot_offset(node, slot); + void *ptr = xa_mk_sibling(offset); - while (offset < RADIX_TREE_MAP_SIZE) { + while (++offset < RADIX_TREE_MAP_SIZE) { if (rcu_dereference_raw(node->slots[offset]) != ptr) break; if (count < 0) { @@ -1123,7 +1098,6 @@ static inline void replace_sibling_entries(struct radix_tree_node *node, node->count--; } node->exceptional += exceptional; - offset++; } #endif } @@ -1319,8 +1293,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index, tags |= 1 << tag; for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { - if (!is_sibling_entry(parent, - rcu_dereference_raw(parent->slots[end]))) + if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end]))) break; for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) if (tags & (1 << tag)) @@ -1618,7 +1591,7 @@ static void __rcu **skip_siblings(struct radix_tree_node **nodep, { while (iter->index < iter->next_index) { *nodep = rcu_dereference_raw(*slot); - if (*nodep && !is_sibling_entry(iter->node, *nodep)) + if (*nodep && !xa_is_sibling(*nodep)) return slot; slot++; iter->index = __radix_tree_iter_add(iter, 1); @@ -1769,7 +1742,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, while (++offset < RADIX_TREE_MAP_SIZE) { void *slot = rcu_dereference_raw( node->slots[offset]); - if (is_sibling_entry(node, slot)) + if (xa_is_sibling(slot)) continue; if (slot) break; @@ -2283,6 +2256,7 @@ void __init radix_tree_init(void) BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); + BUILD_BUG_ON(XA_CHUNK_SIZE > 255); radix_tree_node_cachep = kmem_cache_create("radix_tree_node", sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, -- cgit v1.2.3 From f8d5d0cc145cc21bfc56ef807dc28102aebbf228 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 7 Nov 2017 16:30:10 -0500 Subject: xarray: Add definition of struct xarray This is a direct replacement for struct radix_tree_root. Some of the struct members have changed name; convert those, and use a #define so that radix_tree users continue to work without change. Signed-off-by: Matthew Wilcox Reviewed-by: Josef Bacik --- lib/Makefile | 2 +- lib/idr.c | 4 +-- lib/radix-tree.c | 75 ++++++++++++++++++++++++++++---------------------------- lib/xarray.c | 44 +++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 41 deletions(-) create mode 100644 lib/xarray.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index ca3f7ebb900d..057385f1f145 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ - rbtree.o radix-tree.o timerqueue.o\ + rbtree.o radix-tree.o timerqueue.o xarray.o \ idr.o int_sqrt.o extable.o \ sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ diff --git a/lib/idr.c b/lib/idr.c index 88419fbc5737..9a366b5be2c2 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -39,8 +39,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, unsigned int base = idr->idr_base; unsigned int id = *nextid; - if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR))) - idr->idr_rt.gfp_mask |= IDR_RT_MARKER; + if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR))) + idr->idr_rt.xa_flags |= IDR_RT_MARKER; id = (id < base) ? 0 : id - base; radix_tree_iter_init(&iter, id); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 59b28111eabc..299d4bdba109 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -124,7 +124,7 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent, static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) { - return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK); + return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK); } static inline void tag_set(struct radix_tree_node *node, unsigned int tag, @@ -147,32 +147,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag, static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) { - root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); + root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); } static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) { - root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); + root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); } static inline void root_tag_clear_all(struct radix_tree_root *root) { - root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1; + root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1); } static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) { - return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT)); + return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT)); } static inline unsigned root_tags_get(const struct radix_tree_root *root) { - return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT; + return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT; } static inline bool is_idr(const struct radix_tree_root *root) { - return !!(root->gfp_mask & ROOT_IS_IDR); + return !!(root->xa_flags & ROOT_IS_IDR); } /* @@ -291,12 +291,12 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) /* For debug */ static void radix_tree_dump(struct radix_tree_root *root) { - pr_debug("radix root: %p rnode %p tags %x\n", - root, root->rnode, - root->gfp_mask >> ROOT_TAG_SHIFT); - if (!radix_tree_is_internal_node(root->rnode)) + pr_debug("radix root: %p xa_head %p tags %x\n", + root, root->xa_head, + root->xa_flags >> ROOT_TAG_SHIFT); + if (!radix_tree_is_internal_node(root->xa_head)) return; - dump_node(entry_to_node(root->rnode), 0); + dump_node(entry_to_node(root->xa_head), 0); } static void dump_ida_node(void *entry, unsigned long index) @@ -340,9 +340,9 @@ static void dump_ida_node(void *entry, unsigned long index) static void ida_dump(struct ida *ida) { struct radix_tree_root *root = &ida->ida_rt; - pr_debug("ida: %p node %p free %d\n", ida, root->rnode, - root->gfp_mask >> ROOT_TAG_SHIFT); - dump_ida_node(root->rnode, 0); + pr_debug("ida: %p node %p free %d\n", ida, root->xa_head, + root->xa_flags >> ROOT_TAG_SHIFT); + dump_ida_node(root->xa_head, 0); } #endif @@ -576,7 +576,7 @@ int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) static unsigned radix_tree_load_root(const struct radix_tree_root *root, struct radix_tree_node **nodep, unsigned long *maxindex) { - struct radix_tree_node *node = rcu_dereference_raw(root->rnode); + struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); *nodep = node; @@ -605,7 +605,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, while (index > shift_maxindex(maxshift)) maxshift += RADIX_TREE_MAP_SHIFT; - entry = rcu_dereference_raw(root->rnode); + entry = rcu_dereference_raw(root->xa_head); if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) goto out; @@ -633,7 +633,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, if (radix_tree_is_internal_node(entry)) { entry_to_node(entry)->parent = node; } else if (xa_is_value(entry)) { - /* Moving an exceptional root->rnode to a node */ + /* Moving an exceptional root->xa_head to a node */ node->exceptional = 1; } /* @@ -642,7 +642,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, */ node->slots[0] = (void __rcu *)entry; entry = node_to_entry(node); - rcu_assign_pointer(root->rnode, entry); + rcu_assign_pointer(root->xa_head, entry); shift += RADIX_TREE_MAP_SHIFT; } while (shift <= maxshift); out: @@ -659,7 +659,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root, bool shrunk = false; for (;;) { - struct radix_tree_node *node = rcu_dereference_raw(root->rnode); + struct radix_tree_node *node = rcu_dereference_raw(root->xa_head); struct radix_tree_node *child; if (!radix_tree_is_internal_node(node)) @@ -695,9 +695,9 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root, * moving the node from one part of the tree to another: if it * was safe to dereference the old pointer to it * (node->slots[0]), it will be safe to dereference the new - * one (root->rnode) as far as dependent read barriers go. + * one (root->xa_head) as far as dependent read barriers go. */ - root->rnode = (void __rcu *)child; + root->xa_head = (void __rcu *)child; if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) root_tag_clear(root, IDR_FREE); @@ -745,9 +745,8 @@ static bool delete_node(struct radix_tree_root *root, if (node->count) { if (node_to_entry(node) == - rcu_dereference_raw(root->rnode)) - deleted |= radix_tree_shrink(root, - update_node); + rcu_dereference_raw(root->xa_head)) + deleted |= radix_tree_shrink(root, update_node); return deleted; } @@ -762,7 +761,7 @@ static bool delete_node(struct radix_tree_root *root, */ if (!is_idr(root)) root_tag_clear_all(root); - root->rnode = NULL; + root->xa_head = NULL; } WARN_ON_ONCE(!list_empty(&node->private_list)); @@ -787,7 +786,7 @@ static bool delete_node(struct radix_tree_root *root, * at position @index in the radix tree @root. * * Until there is more than one item in the tree, no nodes are - * allocated and @root->rnode is used as a direct slot instead of + * allocated and @root->xa_head is used as a direct slot instead of * pointing to a node, in which case *@nodep will be NULL. * * Returns -ENOMEM, or 0 for success. @@ -797,7 +796,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, void __rcu ***slotp) { struct radix_tree_node *node = NULL, *child; - void __rcu **slot = (void __rcu **)&root->rnode; + void __rcu **slot = (void __rcu **)&root->xa_head; unsigned long maxindex; unsigned int shift, offset = 0; unsigned long max = index | ((1UL << order) - 1); @@ -813,7 +812,7 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index, if (error < 0) return error; shift = error; - child = rcu_dereference_raw(root->rnode); + child = rcu_dereference_raw(root->xa_head); } while (shift > order) { @@ -1004,7 +1003,7 @@ EXPORT_SYMBOL(__radix_tree_insert); * tree @root. * * Until there is more than one item in the tree, no nodes are - * allocated and @root->rnode is used as a direct slot instead of + * allocated and @root->xa_head is used as a direct slot instead of * pointing to a node, in which case *@nodep will be NULL. */ void *__radix_tree_lookup(const struct radix_tree_root *root, @@ -1017,7 +1016,7 @@ void *__radix_tree_lookup(const struct radix_tree_root *root, restart: parent = NULL; - slot = (void __rcu **)&root->rnode; + slot = (void __rcu **)&root->xa_head; radix_tree_load_root(root, &node, &maxindex); if (index > maxindex) return NULL; @@ -1168,9 +1167,9 @@ void __radix_tree_replace(struct radix_tree_root *root, /* * This function supports replacing exceptional entries and * deleting entries, but that needs accounting against the - * node unless the slot is root->rnode. + * node unless the slot is root->xa_head. */ - WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) && + WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && (count || exceptional)); replace_slot(slot, item, node, count, exceptional); @@ -1722,7 +1721,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, iter->tags = 1; iter->node = NULL; __set_iter_shift(iter, 0); - return (void __rcu **)&root->rnode; + return (void __rcu **)&root->xa_head; } do { @@ -2109,7 +2108,7 @@ void __rcu **idr_get_free(struct radix_tree_root *root, unsigned long max) { struct radix_tree_node *node = NULL, *child; - void __rcu **slot = (void __rcu **)&root->rnode; + void __rcu **slot = (void __rcu **)&root->xa_head; unsigned long maxindex, start = iter->next_index; unsigned int shift, offset = 0; @@ -2125,7 +2124,7 @@ void __rcu **idr_get_free(struct radix_tree_root *root, if (error < 0) return ERR_PTR(error); shift = error; - child = rcu_dereference_raw(root->rnode); + child = rcu_dereference_raw(root->xa_head); } if (start == 0 && shift == 0) shift = RADIX_TREE_MAP_SHIFT; @@ -2190,10 +2189,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root, */ void idr_destroy(struct idr *idr) { - struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode); + struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head); if (radix_tree_is_internal_node(node)) radix_tree_free_nodes(node); - idr->idr_rt.rnode = NULL; + idr->idr_rt.xa_head = NULL; root_tag_set(&idr->idr_rt, IDR_FREE); } EXPORT_SYMBOL(idr_destroy); diff --git a/lib/xarray.c b/lib/xarray.c new file mode 100644 index 000000000000..862f4c64c754 --- /dev/null +++ b/lib/xarray.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * XArray implementation + * Copyright (c) 2017 Microsoft Corporation + * Author: Matthew Wilcox + */ + +#include +#include + +/* + * Coding conventions in this file: + * + * @xa is used to refer to the entire xarray. + * @xas is the 'xarray operation state'. It may be either a pointer to + * an xa_state, or an xa_state stored on the stack. This is an unfortunate + * ambiguity. + * @index is the index of the entry being operated on + * @mark is an xa_mark_t; a small number indicating one of the mark bits. + * @node refers to an xa_node; usually the primary one being operated on by + * this function. + * @offset is the index into the slots array inside an xa_node. + * @parent refers to the @xa_node closer to the head than @node. + * @entry refers to something stored in a slot in the xarray + */ + +/** + * xa_init_flags() - Initialise an empty XArray with flags. + * @xa: XArray. + * @flags: XA_FLAG values. + * + * If you need to initialise an XArray with special flags (eg you need + * to take the lock from interrupt context), use this function instead + * of xa_init(). + * + * Context: Any context. + */ +void xa_init_flags(struct xarray *xa, gfp_t flags) +{ + spin_lock_init(&xa->xa_lock); + xa->xa_flags = flags; + xa->xa_head = NULL; +} +EXPORT_SYMBOL(xa_init_flags); -- cgit v1.2.3 From 01959dfe771c6893365482ec78dc1d9cbbbe6de8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 9 Nov 2017 09:23:56 -0500 Subject: xarray: Define struct xa_node This is a direct replacement for struct radix_tree_node. A couple of struct members have changed name, so convert those. Use a #define so that radix tree users continue to work without change. Signed-off-by: Matthew Wilcox Reviewed-by: Josef Bacik --- lib/radix-tree.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 299d4bdba109..8a568cea1237 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -260,11 +260,11 @@ static void dump_node(struct radix_tree_node *node, unsigned long index) { unsigned long i; - pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n", + pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d nr_values %d\n", node, node->offset, index, index | node_maxindex(node), node->parent, node->tags[0][0], node->tags[1][0], node->tags[2][0], - node->shift, node->count, node->exceptional); + node->shift, node->count, node->nr_values); for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { unsigned long first = index | (i << node->shift); @@ -354,7 +354,7 @@ static struct radix_tree_node * radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, struct radix_tree_root *root, unsigned int shift, unsigned int offset, - unsigned int count, unsigned int exceptional) + unsigned int count, unsigned int nr_values) { struct radix_tree_node *ret = NULL; @@ -401,9 +401,9 @@ out: ret->shift = shift; ret->offset = offset; ret->count = count; - ret->exceptional = exceptional; + ret->nr_values = nr_values; ret->parent = parent; - ret->root = root; + ret->array = root; } return ret; } @@ -633,8 +633,8 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp, if (radix_tree_is_internal_node(entry)) { entry_to_node(entry)->parent = node; } else if (xa_is_value(entry)) { - /* Moving an exceptional root->xa_head to a node */ - node->exceptional = 1; + /* Moving a value entry root->xa_head to a node */ + node->nr_values = 1; } /* * entry was already in the radix tree, so we do not need @@ -928,12 +928,12 @@ static inline int insert_entries(struct radix_tree_node *node, if (xa_is_node(old)) radix_tree_free_nodes(old); if (xa_is_value(old)) - node->exceptional--; + node->nr_values--; } if (node) { node->count += n; if (xa_is_value(item)) - node->exceptional += n; + node->nr_values += n; } return n; } @@ -947,7 +947,7 @@ static inline int insert_entries(struct radix_tree_node *node, if (node) { node->count++; if (xa_is_value(item)) - node->exceptional++; + node->nr_values++; } return 1; } @@ -1083,7 +1083,7 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) EXPORT_SYMBOL(radix_tree_lookup); static inline void replace_sibling_entries(struct radix_tree_node *node, - void __rcu **slot, int count, int exceptional) + void __rcu **slot, int count, int values) { #ifdef CONFIG_RADIX_TREE_MULTIORDER unsigned offset = get_slot_offset(node, slot); @@ -1096,18 +1096,18 @@ static inline void replace_sibling_entries(struct radix_tree_node *node, node->slots[offset] = NULL; node->count--; } - node->exceptional += exceptional; + node->nr_values += values; } #endif } static void replace_slot(void __rcu **slot, void *item, - struct radix_tree_node *node, int count, int exceptional) + struct radix_tree_node *node, int count, int values) { - if (node && (count || exceptional)) { + if (node && (count || values)) { node->count += count; - node->exceptional += exceptional; - replace_sibling_entries(node, slot, count, exceptional); + node->nr_values += values; + replace_sibling_entries(node, slot, count, values); } rcu_assign_pointer(*slot, item); @@ -1161,17 +1161,17 @@ void __radix_tree_replace(struct radix_tree_root *root, radix_tree_update_node_t update_node) { void *old = rcu_dereference_raw(*slot); - int exceptional = !!xa_is_value(item) - !!xa_is_value(old); + int values = !!xa_is_value(item) - !!xa_is_value(old); int count = calculate_count(root, node, slot, item, old); /* - * This function supports replacing exceptional entries and + * This function supports replacing value entries and * deleting entries, but that needs accounting against the * node unless the slot is root->xa_head. */ WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) && - (count || exceptional)); - replace_slot(slot, item, node, count, exceptional); + (count || values)); + replace_slot(slot, item, node, count, values); if (!node) return; @@ -1193,7 +1193,7 @@ void __radix_tree_replace(struct radix_tree_root *root, * across slot lookup and replacement. * * NOTE: This cannot be used to switch between non-entries (empty slots), - * regular entries, and exceptional entries, as that requires accounting + * regular entries, and value entries, as that requires accounting * inside the radix tree node. When switching from one type of entry or * deleting, use __radix_tree_lookup() and __radix_tree_replace() or * radix_tree_iter_replace(). @@ -1301,7 +1301,7 @@ int radix_tree_split(struct radix_tree_root *root, unsigned long index, rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); } rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); - parent->exceptional -= (end - offset); + parent->nr_values -= (end - offset); if (order == parent->shift) return 0; @@ -1961,7 +1961,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root, struct radix_tree_node *node, void __rcu **slot) { void *old = rcu_dereference_raw(*slot); - int exceptional = xa_is_value(old) ? -1 : 0; + int values = xa_is_value(old) ? -1 : 0; unsigned offset = get_slot_offset(node, slot); int tag; @@ -1971,7 +1971,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root, for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) node_tag_clear(root, node, tag, offset); - replace_slot(slot, NULL, node, -1, exceptional); + replace_slot(slot, NULL, node, -1, values); return node && delete_node(root, node, NULL); } -- cgit v1.2.3 From ad3d6c7263e368abdc151e1cc13dc78aa39cc7a7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 7 Nov 2017 14:57:46 -0500 Subject: xarray: Add XArray load operation The xa_load function brings with it a lot of infrastructure; xa_empty(), xa_is_err(), and large chunks of the XArray advanced API that are used to implement xa_load. As the test-suite demonstrates, it is possible to use the XArray functions on a radix tree. The radix tree functions depend on the GFP flags being stored in the root of the tree, so it's not possible to use the radix tree functions on an XArray. Signed-off-by: Matthew Wilcox --- lib/Kconfig.debug | 3 + lib/Makefile | 1 + lib/radix-tree.c | 43 ------------ lib/test_xarray.c | 87 ++++++++++++++++++++++++ lib/xarray.c | 195 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 286 insertions(+), 43 deletions(-) create mode 100644 lib/test_xarray.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 4966c4fbe7f7..091155e12422 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1813,6 +1813,9 @@ config TEST_BITFIELD config TEST_UUID tristate "Test functions located in the uuid module at runtime" +config TEST_XARRAY + tristate "Test the XArray code at runtime" + config TEST_OVERFLOW tristate "Test check_*_overflow() functions at runtime" diff --git a/lib/Makefile b/lib/Makefile index 057385f1f145..e6f809556af5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o obj-$(CONFIG_TEST_UUID) += test_uuid.o +obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8a568cea1237..b8e961428484 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -256,49 +256,6 @@ static unsigned long next_index(unsigned long index, } #ifndef __KERNEL__ -static void dump_node(struct radix_tree_node *node, unsigned long index) -{ - unsigned long i; - - pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d nr_values %d\n", - node, node->offset, index, index | node_maxindex(node), - node->parent, - node->tags[0][0], node->tags[1][0], node->tags[2][0], - node->shift, node->count, node->nr_values); - - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) { - unsigned long first = index | (i << node->shift); - unsigned long last = first | ((1UL << node->shift) - 1); - void *entry = node->slots[i]; - if (!entry) - continue; - if (entry == RADIX_TREE_RETRY) { - pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n", - i, first, last, node); - } else if (!radix_tree_is_internal_node(entry)) { - pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n", - entry, i, first, last, node); - } else if (xa_is_sibling(entry)) { - pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n", - entry, i, first, last, node, - node->slots[xa_to_sibling(entry)]); - } else { - dump_node(entry_to_node(entry), first); - } - } -} - -/* For debug */ -static void radix_tree_dump(struct radix_tree_root *root) -{ - pr_debug("radix root: %p xa_head %p tags %x\n", - root, root->xa_head, - root->xa_flags >> ROOT_TAG_SHIFT); - if (!radix_tree_is_internal_node(root->xa_head)) - return; - dump_node(entry_to_node(root->xa_head), 0); -} - static void dump_ida_node(void *entry, unsigned long index) { unsigned long i; diff --git a/lib/test_xarray.c b/lib/test_xarray.c new file mode 100644 index 000000000000..a7248b87617f --- /dev/null +++ b/lib/test_xarray.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * test_xarray.c: Test the XArray API + * Copyright (c) 2017-2018 Microsoft Corporation + * Author: Matthew Wilcox + */ + +#include +#include + +static unsigned int tests_run; +static unsigned int tests_passed; + +#ifndef XA_DEBUG +# ifdef __KERNEL__ +void xa_dump(const struct xarray *xa) { } +# endif +#undef XA_BUG_ON +#define XA_BUG_ON(xa, x) do { \ + tests_run++; \ + if (x) { \ + printk("BUG at %s:%d\n", __func__, __LINE__); \ + xa_dump(xa); \ + dump_stack(); \ + } else { \ + tests_passed++; \ + } \ +} while (0) +#endif + +static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + radix_tree_insert(xa, index, xa_mk_value(index)); + return NULL; +} + +static void xa_erase_index(struct xarray *xa, unsigned long index) +{ + radix_tree_delete(xa, index); +} + +static noinline void check_xa_load(struct xarray *xa) +{ + unsigned long i, j; + + for (i = 0; i < 1024; i++) { + for (j = 0; j < 1024; j++) { + void *entry = xa_load(xa, j); + if (j < i) + XA_BUG_ON(xa, xa_to_value(entry) != j); + else + XA_BUG_ON(xa, entry); + } + XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL); + } + + for (i = 0; i < 1024; i++) { + for (j = 0; j < 1024; j++) { + void *entry = xa_load(xa, j); + if (j >= i) + XA_BUG_ON(xa, xa_to_value(entry) != j); + else + XA_BUG_ON(xa, entry); + } + xa_erase_index(xa, i); + } + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static RADIX_TREE(array, GFP_KERNEL); + +static int xarray_checks(void) +{ + check_xa_load(&array); + + printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); + return (tests_run == tests_passed) ? 0 : -EINVAL; +} + +static void xarray_exit(void) +{ +} + +module_init(xarray_checks); +module_exit(xarray_exit); +MODULE_AUTHOR("Matthew Wilcox "); +MODULE_LICENSE("GPL"); diff --git a/lib/xarray.c b/lib/xarray.c index 862f4c64c754..19cfcbc69a68 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -24,6 +24,100 @@ * @entry refers to something stored in a slot in the xarray */ +/* extracts the offset within this node from the index */ +static unsigned int get_offset(unsigned long index, struct xa_node *node) +{ + return (index >> node->shift) & XA_CHUNK_MASK; +} + +/* move the index either forwards (find) or backwards (sibling slot) */ +static void xas_move_index(struct xa_state *xas, unsigned long offset) +{ + unsigned int shift = xas->xa_node->shift; + xas->xa_index &= ~XA_CHUNK_MASK << shift; + xas->xa_index += offset << shift; +} + +static void *set_bounds(struct xa_state *xas) +{ + xas->xa_node = XAS_BOUNDS; + return NULL; +} + +/* + * Starts a walk. If the @xas is already valid, we assume that it's on + * the right path and just return where we've got to. If we're in an + * error state, return NULL. If the index is outside the current scope + * of the xarray, return NULL without changing @xas->xa_node. Otherwise + * set @xas->xa_node to NULL and return the current head of the array. + */ +static void *xas_start(struct xa_state *xas) +{ + void *entry; + + if (xas_valid(xas)) + return xas_reload(xas); + if (xas_error(xas)) + return NULL; + + entry = xa_head(xas->xa); + if (!xa_is_node(entry)) { + if (xas->xa_index) + return set_bounds(xas); + } else { + if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK) + return set_bounds(xas); + } + + xas->xa_node = NULL; + return entry; +} + +static void *xas_descend(struct xa_state *xas, struct xa_node *node) +{ + unsigned int offset = get_offset(xas->xa_index, node); + void *entry = xa_entry(xas->xa, node, offset); + + xas->xa_node = node; + if (xa_is_sibling(entry)) { + offset = xa_to_sibling(entry); + entry = xa_entry(xas->xa, node, offset); + } + + xas->xa_offset = offset; + return entry; +} + +/** + * xas_load() - Load an entry from the XArray (advanced). + * @xas: XArray operation state. + * + * Usually walks the @xas to the appropriate state to load the entry + * stored at xa_index. However, it will do nothing and return %NULL if + * @xas is in an error state. xas_load() will never expand the tree. + * + * If the xa_state is set up to operate on a multi-index entry, xas_load() + * may return %NULL or an internal entry, even if there are entries + * present within the range specified by @xas. + * + * Context: Any context. The caller should hold the xa_lock or the RCU lock. + * Return: Usually an entry in the XArray, but see description for exceptions. + */ +void *xas_load(struct xa_state *xas) +{ + void *entry = xas_start(xas); + + while (xa_is_node(entry)) { + struct xa_node *node = xa_to_node(entry); + + if (xas->xa_shift > node->shift) + break; + entry = xas_descend(xas, node); + } + return entry; +} +EXPORT_SYMBOL_GPL(xas_load); + /** * xa_init_flags() - Initialise an empty XArray with flags. * @xa: XArray. @@ -42,3 +136,104 @@ void xa_init_flags(struct xarray *xa, gfp_t flags) xa->xa_head = NULL; } EXPORT_SYMBOL(xa_init_flags); + +/** + * xa_load() - Load an entry from an XArray. + * @xa: XArray. + * @index: index into array. + * + * Context: Any context. Takes and releases the RCU lock. + * Return: The entry at @index in @xa. + */ +void *xa_load(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + void *entry; + + rcu_read_lock(); + do { + entry = xas_load(&xas); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + return entry; +} +EXPORT_SYMBOL(xa_load); + +#ifdef XA_DEBUG +void xa_dump_node(const struct xa_node *node) +{ + unsigned i, j; + + if (!node) + return; + if ((unsigned long)node & 3) { + pr_cont("node %px\n", node); + return; + } + + pr_cont("node %px %s %d parent %px shift %d count %d values %d " + "array %px list %px %px marks", + node, node->parent ? "offset" : "max", node->offset, + node->parent, node->shift, node->count, node->nr_values, + node->array, node->private_list.prev, node->private_list.next); + for (i = 0; i < XA_MAX_MARKS; i++) + for (j = 0; j < XA_MARK_LONGS; j++) + pr_cont(" %lx", node->marks[i][j]); + pr_cont("\n"); +} + +void xa_dump_index(unsigned long index, unsigned int shift) +{ + if (!shift) + pr_info("%lu: ", index); + else if (shift >= BITS_PER_LONG) + pr_info("0-%lu: ", ~0UL); + else + pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1)); +} + +void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift) +{ + if (!entry) + return; + + xa_dump_index(index, shift); + + if (xa_is_node(entry)) { + if (shift == 0) { + pr_cont("%px\n", entry); + } else { + unsigned long i; + struct xa_node *node = xa_to_node(entry); + xa_dump_node(node); + for (i = 0; i < XA_CHUNK_SIZE; i++) + xa_dump_entry(node->slots[i], + index + (i << node->shift), node->shift); + } + } else if (xa_is_value(entry)) + pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry), + xa_to_value(entry), entry); + else if (!xa_is_internal(entry)) + pr_cont("%px\n", entry); + else if (xa_is_retry(entry)) + pr_cont("retry (%ld)\n", xa_to_internal(entry)); + else if (xa_is_sibling(entry)) + pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry)); + else + pr_cont("UNKNOWN ENTRY (%px)\n", entry); +} + +void xa_dump(const struct xarray *xa) +{ + void *entry = xa->xa_head; + unsigned int shift = 0; + + pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry, + xa->xa_flags, radix_tree_tagged(xa, 0), + radix_tree_tagged(xa, 1), radix_tree_tagged(xa, 2)); + if (xa_is_node(entry)) + shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT; + xa_dump_entry(entry, 0, shift); +} +#endif -- cgit v1.2.3 From 9b89a0355144685a787b0dc5bcf7bdd6f2d02968 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 10 Nov 2017 09:34:31 -0500 Subject: xarray: Add XArray marks XArray marks are like the radix tree tags, only slightly more strongly typed. They are renamed in order to distinguish them from tagged pointers. This commit adds the basic get/set/clear operations. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 34 ++++++++ lib/xarray.c | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 264 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index a7248b87617f..f8b0e9ba80a4 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -67,11 +67,45 @@ static noinline void check_xa_load(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) +{ + /* NULL elements have no marks set */ + XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); + xa_set_mark(xa, index, XA_MARK_0); + XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); + + /* Storing a pointer will not make a mark appear */ + XA_BUG_ON(xa, xa_store_index(xa, index, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); + xa_set_mark(xa, index, XA_MARK_0); + XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0)); + + /* Setting one mark will not set another mark */ + XA_BUG_ON(xa, xa_get_mark(xa, index + 1, XA_MARK_0)); + XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1)); + + /* Storing NULL clears marks, and they can't be set again */ + xa_erase_index(xa, index); + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); + xa_set_mark(xa, index, XA_MARK_0); + XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); +} + +static noinline void check_xa_mark(struct xarray *xa) +{ + unsigned long index; + + for (index = 0; index < 16384; index += 4) + check_xa_mark_1(xa, index); +} + static RADIX_TREE(array, GFP_KERNEL); static int xarray_checks(void) { check_xa_load(&array); + check_xa_mark(&array); printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run == tests_passed) ? 0 : -EINVAL; diff --git a/lib/xarray.c b/lib/xarray.c index 19cfcbc69a68..aa86c47e532f 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -5,6 +5,7 @@ * Author: Matthew Wilcox */ +#include #include #include @@ -24,6 +25,48 @@ * @entry refers to something stored in a slot in the xarray */ +static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) +{ + if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) + xa->xa_flags |= XA_FLAGS_MARK(mark); +} + +static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark) +{ + if (xa->xa_flags & XA_FLAGS_MARK(mark)) + xa->xa_flags &= ~(XA_FLAGS_MARK(mark)); +} + +static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark) +{ + return node->marks[(__force unsigned)mark]; +} + +static inline bool node_get_mark(struct xa_node *node, + unsigned int offset, xa_mark_t mark) +{ + return test_bit(offset, node_marks(node, mark)); +} + +/* returns true if the bit was set */ +static inline bool node_set_mark(struct xa_node *node, unsigned int offset, + xa_mark_t mark) +{ + return __test_and_set_bit(offset, node_marks(node, mark)); +} + +/* returns true if the bit was set */ +static inline bool node_clear_mark(struct xa_node *node, unsigned int offset, + xa_mark_t mark) +{ + return __test_and_clear_bit(offset, node_marks(node, mark)); +} + +static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark) +{ + return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE); +} + /* extracts the offset within this node from the index */ static unsigned int get_offset(unsigned long index, struct xa_node *node) { @@ -118,6 +161,85 @@ void *xas_load(struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_load); +/** + * xas_get_mark() - Returns the state of this mark. + * @xas: XArray operation state. + * @mark: Mark number. + * + * Return: true if the mark is set, false if the mark is clear or @xas + * is in an error state. + */ +bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark) +{ + if (xas_invalid(xas)) + return false; + if (!xas->xa_node) + return xa_marked(xas->xa, mark); + return node_get_mark(xas->xa_node, xas->xa_offset, mark); +} +EXPORT_SYMBOL_GPL(xas_get_mark); + +/** + * xas_set_mark() - Sets the mark on this entry and its parents. + * @xas: XArray operation state. + * @mark: Mark number. + * + * Sets the specified mark on this entry, and walks up the tree setting it + * on all the ancestor entries. Does nothing if @xas has not been walked to + * an entry, or is in an error state. + */ +void xas_set_mark(const struct xa_state *xas, xa_mark_t mark) +{ + struct xa_node *node = xas->xa_node; + unsigned int offset = xas->xa_offset; + + if (xas_invalid(xas)) + return; + + while (node) { + if (node_set_mark(node, offset, mark)) + return; + offset = node->offset; + node = xa_parent_locked(xas->xa, node); + } + + if (!xa_marked(xas->xa, mark)) + xa_mark_set(xas->xa, mark); +} +EXPORT_SYMBOL_GPL(xas_set_mark); + +/** + * xas_clear_mark() - Clears the mark on this entry and its parents. + * @xas: XArray operation state. + * @mark: Mark number. + * + * Clears the specified mark on this entry, and walks back to the head + * attempting to clear it on all the ancestor entries. Does nothing if + * @xas has not been walked to an entry, or is in an error state. + */ +void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark) +{ + struct xa_node *node = xas->xa_node; + unsigned int offset = xas->xa_offset; + + if (xas_invalid(xas)) + return; + + while (node) { + if (!node_clear_mark(node, offset, mark)) + return; + if (node_any_mark(node, mark)) + return; + + offset = node->offset; + node = xa_parent_locked(xas->xa, node); + } + + if (xa_marked(xas->xa, mark)) + xa_mark_clear(xas->xa, mark); +} +EXPORT_SYMBOL_GPL(xas_clear_mark); + /** * xa_init_flags() - Initialise an empty XArray with flags. * @xa: XArray. @@ -160,6 +282,112 @@ void *xa_load(struct xarray *xa, unsigned long index) } EXPORT_SYMBOL(xa_load); +/** + * __xa_set_mark() - Set this mark on this entry while locked. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * Attempting to set a mark on a NULL entry does not succeed. + * + * Context: Any context. Expects xa_lock to be held on entry. + */ +void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + XA_STATE(xas, xa, index); + void *entry = xas_load(&xas); + + if (entry) + xas_set_mark(&xas, mark); +} +EXPORT_SYMBOL_GPL(__xa_set_mark); + +/** + * __xa_clear_mark() - Clear this mark on this entry while locked. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * Context: Any context. Expects xa_lock to be held on entry. + */ +void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + XA_STATE(xas, xa, index); + void *entry = xas_load(&xas); + + if (entry) + xas_clear_mark(&xas, mark); +} +EXPORT_SYMBOL_GPL(__xa_clear_mark); + +/** + * xa_get_mark() - Inquire whether this mark is set on this entry. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * This function uses the RCU read lock, so the result may be out of date + * by the time it returns. If you need the result to be stable, use a lock. + * + * Context: Any context. Takes and releases the RCU lock. + * Return: True if the entry at @index has this mark set, false if it doesn't. + */ +bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + XA_STATE(xas, xa, index); + void *entry; + + rcu_read_lock(); + entry = xas_start(&xas); + while (xas_get_mark(&xas, mark)) { + if (!xa_is_node(entry)) + goto found; + entry = xas_descend(&xas, xa_to_node(entry)); + } + rcu_read_unlock(); + return false; + found: + rcu_read_unlock(); + return true; +} +EXPORT_SYMBOL(xa_get_mark); + +/** + * xa_set_mark() - Set this mark on this entry. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * Attempting to set a mark on a NULL entry does not succeed. + * + * Context: Process context. Takes and releases the xa_lock. + */ +void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + xa_lock(xa); + __xa_set_mark(xa, index, mark); + xa_unlock(xa); +} +EXPORT_SYMBOL(xa_set_mark); + +/** + * xa_clear_mark() - Clear this mark on this entry. + * @xa: XArray. + * @index: Index of entry. + * @mark: Mark number. + * + * Clearing a mark always succeeds. + * + * Context: Process context. Takes and releases the xa_lock. + */ +void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) +{ + xa_lock(xa); + __xa_clear_mark(xa, index, mark); + xa_unlock(xa); +} +EXPORT_SYMBOL(xa_clear_mark); + #ifdef XA_DEBUG void xa_dump_node(const struct xa_node *node) { @@ -230,8 +458,8 @@ void xa_dump(const struct xarray *xa) unsigned int shift = 0; pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry, - xa->xa_flags, radix_tree_tagged(xa, 0), - radix_tree_tagged(xa, 1), radix_tree_tagged(xa, 2)); + xa->xa_flags, xa_marked(xa, XA_MARK_0), + xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2)); if (xa_is_node(entry)) shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT; xa_dump_entry(entry, 0, shift); -- cgit v1.2.3 From 58d6ea3085f2e53714810a513c61629f6d2be0a6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 10 Nov 2017 15:15:08 -0500 Subject: xarray: Add XArray unconditional store operations xa_store() differs from radix_tree_insert() in that it will overwrite an existing element in the array rather than returning an error. This is the behaviour which most users want, and those that want more complex behaviour generally want to use the xas family of routines anyway. For memory allocation, xa_store() will first attempt to request memory from the slab allocator; if memory is not immediately available, it will drop the xa_lock and allocate memory, keeping a pointer in the xa_state. It does not use the per-CPU cache, although those will continue to exist until all radix tree users are converted to the xarray. This patch also includes xa_erase() and __xa_erase() for a streamlined way to store NULL. Since there is no need to allocate memory in order to store a NULL in the XArray, we do not need to trouble the user with deciding what memory allocation flags to use. Signed-off-by: Matthew Wilcox --- lib/radix-tree.c | 4 +- lib/test_xarray.c | 177 +++++++++++++- lib/xarray.c | 693 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 868 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b8e961428484..3479d93c32b9 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -47,7 +47,7 @@ static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; /* * Radix tree node cache. */ -static struct kmem_cache *radix_tree_node_cachep; +struct kmem_cache *radix_tree_node_cachep; /* * The radix tree is variable-height, so an insert operation not only has @@ -365,7 +365,7 @@ out: return ret; } -static void radix_tree_node_rcu_free(struct rcu_head *head) +void radix_tree_node_rcu_free(struct rcu_head *head) { struct radix_tree_node *node = container_of(head, struct radix_tree_node, rcu_head); diff --git a/lib/test_xarray.c b/lib/test_xarray.c index f8b0e9ba80a4..b711030fbc32 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -30,13 +30,49 @@ void xa_dump(const struct xarray *xa) { } static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) { - radix_tree_insert(xa, index, xa_mk_value(index)); - return NULL; + return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp); } static void xa_erase_index(struct xarray *xa, unsigned long index) { - radix_tree_delete(xa, index); + XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX)); + XA_BUG_ON(xa, xa_load(xa, index) != NULL); +} + +/* + * If anyone needs this, please move it to xarray.c. We have no current + * users outside the test suite because all current multislot users want + * to use the advanced API. + */ +static void *xa_store_order(struct xarray *xa, unsigned long index, + unsigned order, void *entry, gfp_t gfp) +{ + XA_STATE_ORDER(xas, xa, index, order); + void *curr; + + do { + xas_lock(&xas); + curr = xas_store(&xas, entry); + xas_unlock(&xas); + } while (xas_nomem(&xas, gfp)); + + return curr; +} + +static noinline void check_xa_err(struct xarray *xa) +{ + XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0); + XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0); +#ifndef __KERNEL__ + /* The kernel does not fail GFP_NOWAIT allocations */ + XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM); + XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM); +#endif + XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_KERNEL)) != 0); + XA_BUG_ON(xa, xa_err(xa_store(xa, 1, xa_mk_value(0), GFP_KERNEL)) != 0); + XA_BUG_ON(xa, xa_err(xa_erase(xa, 1)) != 0); +// kills the test-suite :-( +// XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL); } static noinline void check_xa_load(struct xarray *xa) @@ -69,6 +105,9 @@ static noinline void check_xa_load(struct xarray *xa) static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) { + unsigned int order; + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1; + /* NULL elements have no marks set */ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); xa_set_mark(xa, index, XA_MARK_0); @@ -90,6 +129,37 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); xa_set_mark(xa, index, XA_MARK_0); XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0)); + + /* + * Storing a multi-index entry over entries with marks gives the + * entire entry the union of the marks + */ + BUG_ON((index % 4) != 0); + for (order = 2; order < max_order; order++) { + unsigned long base = round_down(index, 1UL << order); + unsigned long next = base + (1UL << order); + unsigned long i; + + XA_BUG_ON(xa, xa_store_index(xa, index + 1, GFP_KERNEL)); + xa_set_mark(xa, index + 1, XA_MARK_0); + XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL)); + xa_set_mark(xa, index + 2, XA_MARK_1); + XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL)); + xa_store_order(xa, index, order, xa_mk_value(index), + GFP_KERNEL); + for (i = base; i < next; i++) { + XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0)); + XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1)); + XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2)); + } + XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0)); + XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1)); + XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2)); + xa_erase_index(xa, index); + xa_erase_index(xa, next); + XA_BUG_ON(xa, !xa_empty(xa)); + } + XA_BUG_ON(xa, !xa_empty(xa)); } static noinline void check_xa_mark(struct xarray *xa) @@ -100,12 +170,111 @@ static noinline void check_xa_mark(struct xarray *xa) check_xa_mark_1(xa, index); } -static RADIX_TREE(array, GFP_KERNEL); +static noinline void check_xa_shrink(struct xarray *xa) +{ + XA_STATE(xas, xa, 1); + struct xa_node *node; + + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_store_index(xa, 0, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL); + + /* + * Check that erasing the entry at 1 shrinks the tree and properly + * marks the node as being deleted. + */ + xas_lock(&xas); + XA_BUG_ON(xa, xas_load(&xas) != xa_mk_value(1)); + node = xas.xa_node; + XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != xa_mk_value(0)); + XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1)); + XA_BUG_ON(xa, xa_load(xa, 1) != NULL); + XA_BUG_ON(xa, xas.xa_node != XAS_BOUNDS); + XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != XA_RETRY_ENTRY); + XA_BUG_ON(xa, xas_load(&xas) != NULL); + xas_unlock(&xas); + XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0)); + xa_erase_index(xa, 0); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static noinline void check_multi_store(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + unsigned long i, j, k; + unsigned int max_order = (sizeof(long) == 4) ? 30 : 60; + + /* Loading from any position returns the same value */ + xa_store_order(xa, 0, 1, xa_mk_value(0), GFP_KERNEL); + XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0)); + XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0)); + XA_BUG_ON(xa, xa_load(xa, 2) != NULL); + rcu_read_lock(); + XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 2); + XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2); + rcu_read_unlock(); + + /* Storing adjacent to the value does not alter the value */ + xa_store(xa, 3, xa, GFP_KERNEL); + XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0)); + XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0)); + XA_BUG_ON(xa, xa_load(xa, 2) != NULL); + rcu_read_lock(); + XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 3); + XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2); + rcu_read_unlock(); + + /* Overwriting multiple indexes works */ + xa_store_order(xa, 0, 2, xa_mk_value(1), GFP_KERNEL); + XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(1)); + XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(1)); + XA_BUG_ON(xa, xa_load(xa, 2) != xa_mk_value(1)); + XA_BUG_ON(xa, xa_load(xa, 3) != xa_mk_value(1)); + XA_BUG_ON(xa, xa_load(xa, 4) != NULL); + rcu_read_lock(); + XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 4); + XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 4); + rcu_read_unlock(); + + /* We can erase multiple values with a single store */ + xa_store_order(xa, 0, 63, NULL, GFP_KERNEL); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* Even when the first slot is empty but the others aren't */ + xa_store_index(xa, 1, GFP_KERNEL); + xa_store_index(xa, 2, GFP_KERNEL); + xa_store_order(xa, 0, 2, NULL, GFP_KERNEL); + XA_BUG_ON(xa, !xa_empty(xa)); + + for (i = 0; i < max_order; i++) { + for (j = 0; j < max_order; j++) { + xa_store_order(xa, 0, i, xa_mk_value(i), GFP_KERNEL); + xa_store_order(xa, 0, j, xa_mk_value(j), GFP_KERNEL); + + for (k = 0; k < max_order; k++) { + void *entry = xa_load(xa, (1UL << k) - 1); + if ((i < k) && (j < k)) + XA_BUG_ON(xa, entry != NULL); + else + XA_BUG_ON(xa, entry != xa_mk_value(j)); + } + + xa_erase(xa, 0); + XA_BUG_ON(xa, !xa_empty(xa)); + } + } +#endif +} + +static DEFINE_XARRAY(array); static int xarray_checks(void) { + check_xa_err(&array); check_xa_load(&array); check_xa_mark(&array); + check_xa_shrink(&array); + check_multi_store(&array); printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run == tests_passed) ? 0 : -EINVAL; diff --git a/lib/xarray.c b/lib/xarray.c index aa86c47e532f..4596a95ed9cd 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -7,6 +7,8 @@ #include #include +#include +#include #include /* @@ -25,6 +27,31 @@ * @entry refers to something stored in a slot in the xarray */ +static inline unsigned int xa_lock_type(const struct xarray *xa) +{ + return (__force unsigned int)xa->xa_flags & 3; +} + +static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type) +{ + if (lock_type == XA_LOCK_IRQ) + xas_lock_irq(xas); + else if (lock_type == XA_LOCK_BH) + xas_lock_bh(xas); + else + xas_lock(xas); +} + +static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type) +{ + if (lock_type == XA_LOCK_IRQ) + xas_unlock_irq(xas); + else if (lock_type == XA_LOCK_BH) + xas_unlock_bh(xas); + else + xas_unlock(xas); +} + static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) { if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) @@ -67,6 +94,34 @@ static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark) return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE); } +#define mark_inc(mark) do { \ + mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \ +} while (0) + +/* + * xas_squash_marks() - Merge all marks to the first entry + * @xas: Array operation state. + * + * Set a mark on the first entry if any entry has it set. Clear marks on + * all sibling entries. + */ +static void xas_squash_marks(const struct xa_state *xas) +{ + unsigned int mark = 0; + unsigned int limit = xas->xa_offset + xas->xa_sibs + 1; + + if (!xas->xa_sibs) + return; + + do { + unsigned long *marks = xas->xa_node->marks[mark]; + if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit) + continue; + __set_bit(xas->xa_offset, marks); + bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs); + } while (mark++ != (__force unsigned)XA_MARK_MAX); +} + /* extracts the offset within this node from the index */ static unsigned int get_offset(unsigned long index, struct xa_node *node) { @@ -161,6 +216,516 @@ void *xas_load(struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_load); +/* Move the radix tree node cache here */ +extern struct kmem_cache *radix_tree_node_cachep; +extern void radix_tree_node_rcu_free(struct rcu_head *head); + +#define XA_RCU_FREE ((struct xarray *)1) + +static void xa_node_free(struct xa_node *node) +{ + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); + node->array = XA_RCU_FREE; + call_rcu(&node->rcu_head, radix_tree_node_rcu_free); +} + +/* + * xas_destroy() - Free any resources allocated during the XArray operation. + * @xas: XArray operation state. + * + * This function is now internal-only. + */ +static void xas_destroy(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_alloc; + + if (!node) + return; + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); + kmem_cache_free(radix_tree_node_cachep, node); + xas->xa_alloc = NULL; +} + +/** + * xas_nomem() - Allocate memory if needed. + * @xas: XArray operation state. + * @gfp: Memory allocation flags. + * + * If we need to add new nodes to the XArray, we try to allocate memory + * with GFP_NOWAIT while holding the lock, which will usually succeed. + * If it fails, @xas is flagged as needing memory to continue. The caller + * should drop the lock and call xas_nomem(). If xas_nomem() succeeds, + * the caller should retry the operation. + * + * Forward progress is guaranteed as one node is allocated here and + * stored in the xa_state where it will be found by xas_alloc(). More + * nodes will likely be found in the slab allocator, but we do not tie + * them up here. + * + * Return: true if memory was needed, and was successfully allocated. + */ +bool xas_nomem(struct xa_state *xas, gfp_t gfp) +{ + if (xas->xa_node != XA_ERROR(-ENOMEM)) { + xas_destroy(xas); + return false; + } + xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + if (!xas->xa_alloc) + return false; + XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); + xas->xa_node = XAS_RESTART; + return true; +} +EXPORT_SYMBOL_GPL(xas_nomem); + +/* + * __xas_nomem() - Drop locks and allocate memory if needed. + * @xas: XArray operation state. + * @gfp: Memory allocation flags. + * + * Internal variant of xas_nomem(). + * + * Return: true if memory was needed, and was successfully allocated. + */ +static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) + __must_hold(xas->xa->xa_lock) +{ + unsigned int lock_type = xa_lock_type(xas->xa); + + if (xas->xa_node != XA_ERROR(-ENOMEM)) { + xas_destroy(xas); + return false; + } + if (gfpflags_allow_blocking(gfp)) { + xas_unlock_type(xas, lock_type); + xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + xas_lock_type(xas, lock_type); + } else { + xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); + } + if (!xas->xa_alloc) + return false; + XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); + xas->xa_node = XAS_RESTART; + return true; +} + +static void xas_update(struct xa_state *xas, struct xa_node *node) +{ + if (xas->xa_update) + xas->xa_update(node); + else + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); +} + +static void *xas_alloc(struct xa_state *xas, unsigned int shift) +{ + struct xa_node *parent = xas->xa_node; + struct xa_node *node = xas->xa_alloc; + + if (xas_invalid(xas)) + return NULL; + + if (node) { + xas->xa_alloc = NULL; + } else { + node = kmem_cache_alloc(radix_tree_node_cachep, + GFP_NOWAIT | __GFP_NOWARN); + if (!node) { + xas_set_err(xas, -ENOMEM); + return NULL; + } + } + + if (parent) { + node->offset = xas->xa_offset; + parent->count++; + XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE); + xas_update(xas, parent); + } + XA_NODE_BUG_ON(node, shift > BITS_PER_LONG); + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); + node->shift = shift; + node->count = 0; + node->nr_values = 0; + RCU_INIT_POINTER(node->parent, xas->xa_node); + node->array = xas->xa; + + return node; +} + +/* + * Use this to calculate the maximum index that will need to be created + * in order to add the entry described by @xas. Because we cannot store a + * multiple-index entry at index 0, the calculation is a little more complex + * than you might expect. + */ +static unsigned long xas_max(struct xa_state *xas) +{ + unsigned long max = xas->xa_index; + +#ifdef CONFIG_XARRAY_MULTI + if (xas->xa_shift || xas->xa_sibs) { + unsigned long mask; + mask = (((xas->xa_sibs + 1UL) << xas->xa_shift) - 1); + max |= mask; + if (mask == max) + max++; + } +#endif + + return max; +} + +/* The maximum index that can be contained in the array without expanding it */ +static unsigned long max_index(void *entry) +{ + if (!xa_is_node(entry)) + return 0; + return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1; +} + +static void xas_shrink(struct xa_state *xas) +{ + struct xarray *xa = xas->xa; + struct xa_node *node = xas->xa_node; + + for (;;) { + void *entry; + + XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); + if (node->count != 1) + break; + entry = xa_entry_locked(xa, node, 0); + if (!entry) + break; + if (!xa_is_node(entry) && node->shift) + break; + xas->xa_node = XAS_BOUNDS; + + RCU_INIT_POINTER(xa->xa_head, entry); + + node->count = 0; + node->nr_values = 0; + if (!xa_is_node(entry)) + RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY); + xas_update(xas, node); + xa_node_free(node); + if (!xa_is_node(entry)) + break; + node = xa_to_node(entry); + node->parent = NULL; + } +} + +/* + * xas_delete_node() - Attempt to delete an xa_node + * @xas: Array operation state. + * + * Attempts to delete the @xas->xa_node. This will fail if xa->node has + * a non-zero reference count. + */ +static void xas_delete_node(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_node; + + for (;;) { + struct xa_node *parent; + + XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); + if (node->count) + break; + + parent = xa_parent_locked(xas->xa, node); + xas->xa_node = parent; + xas->xa_offset = node->offset; + xa_node_free(node); + + if (!parent) { + xas->xa->xa_head = NULL; + xas->xa_node = XAS_BOUNDS; + return; + } + + parent->slots[xas->xa_offset] = NULL; + parent->count--; + XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE); + node = parent; + xas_update(xas, node); + } + + if (!node->parent) + xas_shrink(xas); +} + +/** + * xas_free_nodes() - Free this node and all nodes that it references + * @xas: Array operation state. + * @top: Node to free + * + * This node has been removed from the tree. We must now free it and all + * of its subnodes. There may be RCU walkers with references into the tree, + * so we must replace all entries with retry markers. + */ +static void xas_free_nodes(struct xa_state *xas, struct xa_node *top) +{ + unsigned int offset = 0; + struct xa_node *node = top; + + for (;;) { + void *entry = xa_entry_locked(xas->xa, node, offset); + + if (xa_is_node(entry)) { + node = xa_to_node(entry); + offset = 0; + continue; + } + if (entry) + RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY); + offset++; + while (offset == XA_CHUNK_SIZE) { + struct xa_node *parent; + + parent = xa_parent_locked(xas->xa, node); + offset = node->offset + 1; + node->count = 0; + node->nr_values = 0; + xas_update(xas, node); + xa_node_free(node); + if (node == top) + return; + node = parent; + } + } +} + +/* + * xas_expand adds nodes to the head of the tree until it has reached + * sufficient height to be able to contain @xas->xa_index + */ +static int xas_expand(struct xa_state *xas, void *head) +{ + struct xarray *xa = xas->xa; + struct xa_node *node = NULL; + unsigned int shift = 0; + unsigned long max = xas_max(xas); + + if (!head) { + if (max == 0) + return 0; + while ((max >> shift) >= XA_CHUNK_SIZE) + shift += XA_CHUNK_SHIFT; + return shift + XA_CHUNK_SHIFT; + } else if (xa_is_node(head)) { + node = xa_to_node(head); + shift = node->shift + XA_CHUNK_SHIFT; + } + xas->xa_node = NULL; + + while (max > max_index(head)) { + xa_mark_t mark = 0; + + XA_NODE_BUG_ON(node, shift > BITS_PER_LONG); + node = xas_alloc(xas, shift); + if (!node) + return -ENOMEM; + + node->count = 1; + if (xa_is_value(head)) + node->nr_values = 1; + RCU_INIT_POINTER(node->slots[0], head); + + /* Propagate the aggregated mark info to the new child */ + for (;;) { + if (xa_marked(xa, mark)) + node_set_mark(node, 0, mark); + if (mark == XA_MARK_MAX) + break; + mark_inc(mark); + } + + /* + * Now that the new node is fully initialised, we can add + * it to the tree + */ + if (xa_is_node(head)) { + xa_to_node(head)->offset = 0; + rcu_assign_pointer(xa_to_node(head)->parent, node); + } + head = xa_mk_node(node); + rcu_assign_pointer(xa->xa_head, head); + xas_update(xas, node); + + shift += XA_CHUNK_SHIFT; + } + + xas->xa_node = node; + return shift; +} + +/* + * xas_create() - Create a slot to store an entry in. + * @xas: XArray operation state. + * + * Most users will not need to call this function directly, as it is called + * by xas_store(). It is useful for doing conditional store operations + * (see the xa_cmpxchg() implementation for an example). + * + * Return: If the slot already existed, returns the contents of this slot. + * If the slot was newly created, returns NULL. If it failed to create the + * slot, returns NULL and indicates the error in @xas. + */ +static void *xas_create(struct xa_state *xas) +{ + struct xarray *xa = xas->xa; + void *entry; + void __rcu **slot; + struct xa_node *node = xas->xa_node; + int shift; + unsigned int order = xas->xa_shift; + + if (xas_top(node)) { + entry = xa_head_locked(xa); + xas->xa_node = NULL; + shift = xas_expand(xas, entry); + if (shift < 0) + return NULL; + entry = xa_head_locked(xa); + slot = &xa->xa_head; + } else if (xas_error(xas)) { + return NULL; + } else if (node) { + unsigned int offset = xas->xa_offset; + + shift = node->shift; + entry = xa_entry_locked(xa, node, offset); + slot = &node->slots[offset]; + } else { + shift = 0; + entry = xa_head_locked(xa); + slot = &xa->xa_head; + } + + while (shift > order) { + shift -= XA_CHUNK_SHIFT; + if (!entry) { + node = xas_alloc(xas, shift); + if (!node) + break; + rcu_assign_pointer(*slot, xa_mk_node(node)); + } else if (xa_is_node(entry)) { + node = xa_to_node(entry); + } else { + break; + } + entry = xas_descend(xas, node); + slot = &node->slots[xas->xa_offset]; + } + + return entry; +} + +static void update_node(struct xa_state *xas, struct xa_node *node, + int count, int values) +{ + if (!node || (!count && !values)) + return; + + node->count += count; + node->nr_values += values; + XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE); + XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE); + xas_update(xas, node); + if (count < 0) + xas_delete_node(xas); +} + +/** + * xas_store() - Store this entry in the XArray. + * @xas: XArray operation state. + * @entry: New entry. + * + * If @xas is operating on a multi-index entry, the entry returned by this + * function is essentially meaningless (it may be an internal entry or it + * may be %NULL, even if there are non-NULL entries at some of the indices + * covered by the range). This is not a problem for any current users, + * and can be changed if needed. + * + * Return: The old entry at this index. + */ +void *xas_store(struct xa_state *xas, void *entry) +{ + struct xa_node *node; + void __rcu **slot = &xas->xa->xa_head; + unsigned int offset, max; + int count = 0; + int values = 0; + void *first, *next; + bool value = xa_is_value(entry); + + if (entry) + first = xas_create(xas); + else + first = xas_load(xas); + + if (xas_invalid(xas)) + return first; + node = xas->xa_node; + if (node && (xas->xa_shift < node->shift)) + xas->xa_sibs = 0; + if ((first == entry) && !xas->xa_sibs) + return first; + + next = first; + offset = xas->xa_offset; + max = xas->xa_offset + xas->xa_sibs; + if (node) { + slot = &node->slots[offset]; + if (xas->xa_sibs) + xas_squash_marks(xas); + } + if (!entry) + xas_init_marks(xas); + + for (;;) { + /* + * Must clear the marks before setting the entry to NULL, + * otherwise xas_for_each_marked may find a NULL entry and + * stop early. rcu_assign_pointer contains a release barrier + * so the mark clearing will appear to happen before the + * entry is set to NULL. + */ + rcu_assign_pointer(*slot, entry); + if (xa_is_node(next)) + xas_free_nodes(xas, xa_to_node(next)); + if (!node) + break; + count += !next - !entry; + values += !xa_is_value(first) - !value; + if (entry) { + if (offset == max) + break; + if (!xa_is_sibling(entry)) + entry = xa_mk_sibling(xas->xa_offset); + } else { + if (offset == XA_CHUNK_MASK) + break; + } + next = xa_entry_locked(xas->xa, node, ++offset); + if (!xa_is_sibling(next)) { + if (!entry && (offset > max)) + break; + first = next; + } + slot++; + } + + update_node(xas, node, count, values); + return first; +} +EXPORT_SYMBOL_GPL(xas_store); + /** * xas_get_mark() - Returns the state of this mark. * @xas: XArray operation state. @@ -240,6 +805,30 @@ void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark) } EXPORT_SYMBOL_GPL(xas_clear_mark); +/** + * xas_init_marks() - Initialise all marks for the entry + * @xas: Array operations state. + * + * Initialise all marks for the entry specified by @xas. If we're tracking + * free entries with a mark, we need to set it on all entries. All other + * marks are cleared. + * + * This implementation is not as efficient as it could be; we may walk + * up the tree multiple times. + */ +void xas_init_marks(const struct xa_state *xas) +{ + xa_mark_t mark = 0; + + for (;;) { + xas_clear_mark(xas, mark); + if (mark == XA_MARK_MAX) + break; + mark_inc(mark); + } +} +EXPORT_SYMBOL_GPL(xas_init_marks); + /** * xa_init_flags() - Initialise an empty XArray with flags. * @xa: XArray. @@ -253,9 +842,19 @@ EXPORT_SYMBOL_GPL(xas_clear_mark); */ void xa_init_flags(struct xarray *xa, gfp_t flags) { + unsigned int lock_type; + static struct lock_class_key xa_lock_irq; + static struct lock_class_key xa_lock_bh; + spin_lock_init(&xa->xa_lock); xa->xa_flags = flags; xa->xa_head = NULL; + + lock_type = xa_lock_type(xa); + if (lock_type == XA_LOCK_IRQ) + lockdep_set_class(&xa->xa_lock, &xa_lock_irq); + else if (lock_type == XA_LOCK_BH) + lockdep_set_class(&xa->xa_lock, &xa_lock_bh); } EXPORT_SYMBOL(xa_init_flags); @@ -282,6 +881,100 @@ void *xa_load(struct xarray *xa, unsigned long index) } EXPORT_SYMBOL(xa_load); +static void *xas_result(struct xa_state *xas, void *curr) +{ + XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr)); + if (xas_error(xas)) + curr = xas->xa_node; + return curr; +} + +/** + * __xa_erase() - Erase this entry from the XArray while locked. + * @xa: XArray. + * @index: Index into array. + * + * If the entry at this index is a multi-index entry then all indices will + * be erased, and the entry will no longer be a multi-index entry. + * This function expects the xa_lock to be held on entry. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: The old entry at this index. + */ +void *__xa_erase(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + return xas_result(&xas, xas_store(&xas, NULL)); +} +EXPORT_SYMBOL_GPL(__xa_erase); + +/** + * xa_store() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * After this function returns, loads from this index will return @entry. + * Storing into an existing multislot entry updates the entry of every index. + * The marks associated with @index are unaffected unless @entry is %NULL. + * + * Context: Process context. Takes and releases the xa_lock. May sleep + * if the @gfp flags permit. + * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry + * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation + * failed. + */ +void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + void *curr; + + if (WARN_ON_ONCE(xa_is_internal(entry))) + return XA_ERROR(-EINVAL); + + do { + xas_lock(&xas); + curr = xas_store(&xas, entry); + xas_unlock(&xas); + } while (xas_nomem(&xas, gfp)); + + return xas_result(&xas, curr); +} +EXPORT_SYMBOL(xa_store); + +/** + * __xa_store() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * You must already be holding the xa_lock when calling this function. + * It will drop the lock if needed to allocate memory, and then reacquire + * it afterwards. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: The old entry at this index or xa_err() if an error happened. + */ +void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + void *curr; + + if (WARN_ON_ONCE(xa_is_internal(entry))) + return XA_ERROR(-EINVAL); + + do { + curr = xas_store(&xas, entry); + } while (__xas_nomem(&xas, gfp)); + + return xas_result(&xas, curr); +} +EXPORT_SYMBOL(__xa_store); + /** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. -- cgit v1.2.3 From 41aec91f55985e7f14ee75fe2f6e7bcfff0d0fae Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 10 Nov 2017 15:34:55 -0500 Subject: xarray: Add XArray conditional store operations Like cmpxchg(), xa_cmpxchg will only store to the index if the current entry matches the old entry. It returns the current entry, which is usually more useful than the errno returned by radix_tree_insert(). For the users who really only want the errno, the xa_insert() wrapper provides a more convenient calling convention. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 20 ++++++++++++++++ lib/xarray.c | 71 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index b711030fbc32..fb472258b639 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -198,6 +198,25 @@ static noinline void check_xa_shrink(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_cmpxchg(struct xarray *xa) +{ + void *FIVE = xa_mk_value(5); + void *SIX = xa_mk_value(6); + void *LOTS = xa_mk_value(12345678); + + XA_BUG_ON(xa, !xa_empty(xa)); + XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST); + XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS); + XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS); + XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE); + XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL); + xa_erase_index(xa, 12345678); + xa_erase_index(xa, 5); + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_multi_store(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI @@ -274,6 +293,7 @@ static int xarray_checks(void) check_xa_load(&array); check_xa_mark(&array); check_xa_shrink(&array); + check_cmpxchg(&array); check_multi_store(&array); printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); diff --git a/lib/xarray.c b/lib/xarray.c index 4596a95ed9cd..2ba5a98ec618 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -975,6 +975,77 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) } EXPORT_SYMBOL(__xa_store); +/** + * xa_cmpxchg() - Conditionally replace an entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @old: Old value to test against. + * @entry: New value to place in array. + * @gfp: Memory allocation flags. + * + * If the entry at @index is the same as @old, replace it with @entry. + * If the return value is equal to @old, then the exchange was successful. + * + * Context: Process context. Takes and releases the xa_lock. May sleep + * if the @gfp flags permit. + * Return: The old value at this index or xa_err() if an error happened. + */ +void *xa_cmpxchg(struct xarray *xa, unsigned long index, + void *old, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + void *curr; + + if (WARN_ON_ONCE(xa_is_internal(entry))) + return XA_ERROR(-EINVAL); + + do { + xas_lock(&xas); + curr = xas_load(&xas); + if (curr == old) + xas_store(&xas, entry); + xas_unlock(&xas); + } while (xas_nomem(&xas, gfp)); + + return xas_result(&xas, curr); +} +EXPORT_SYMBOL(xa_cmpxchg); + +/** + * __xa_cmpxchg() - Store this entry in the XArray. + * @xa: XArray. + * @index: Index into array. + * @old: Old value to test against. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * You must already be holding the xa_lock when calling this function. + * It will drop the lock if needed to allocate memory, and then reacquire + * it afterwards. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: The old entry at this index or xa_err() if an error happened. + */ +void *__xa_cmpxchg(struct xarray *xa, unsigned long index, + void *old, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + void *curr; + + if (WARN_ON_ONCE(xa_is_internal(entry))) + return XA_ERROR(-EINVAL); + + do { + curr = xas_load(&xas); + if (curr == old) + xas_store(&xas, entry); + } while (__xas_nomem(&xas, gfp)); + + return xas_result(&xas, curr); +} +EXPORT_SYMBOL(__xa_cmpxchg); + /** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. -- cgit v1.2.3 From b803b42823d0d9e8b6deccf01ffc2aba5d0738df Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 14 Nov 2017 08:30:11 -0500 Subject: xarray: Add XArray iterators The xa_for_each iterator allows the user to efficiently walk a range of the array, executing the loop body once for each entry in that range that matches the filter. This commit also includes xa_find() and xa_find_after() which are helper functions for xa_for_each() but may also be useful in their own right. In the xas family of functions, we have xas_for_each(), xas_find(), xas_next_entry(), xas_for_each_tagged(), xas_find_tagged(), xas_next_tagged() and xas_pause(). Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 183 ++++++++++++++++++++++++++++++++++ lib/xarray.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 475 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index fb472258b639..e3c2d4d00b15 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -75,6 +75,48 @@ static noinline void check_xa_err(struct xarray *xa) // XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL); } +static noinline void check_xas_retry(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + void *entry; + + xa_store_index(xa, 0, GFP_KERNEL); + xa_store_index(xa, 1, GFP_KERNEL); + + rcu_read_lock(); + XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0)); + xa_erase_index(xa, 1); + XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas))); + XA_BUG_ON(xa, xas_retry(&xas, NULL)); + XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0))); + xas_reset(&xas); + XA_BUG_ON(xa, xas.xa_node != XAS_RESTART); + XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0)); + XA_BUG_ON(xa, xas.xa_node != NULL); + + XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, !xa_is_internal(xas_reload(&xas))); + xas.xa_node = XAS_RESTART; + XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0)); + rcu_read_unlock(); + + /* Make sure we can iterate through retry entries */ + xas_lock(&xas); + xas_set(&xas, 0); + xas_store(&xas, XA_RETRY_ENTRY); + xas_set(&xas, 1); + xas_store(&xas, XA_RETRY_ENTRY); + + xas_set(&xas, 0); + xas_for_each(&xas, entry, ULONG_MAX) { + xas_store(&xas, xa_mk_value(xas.xa_index)); + } + xas_unlock(&xas); + + xa_erase_index(xa, 0); + xa_erase_index(xa, 1); +} + static noinline void check_xa_load(struct xarray *xa) { unsigned long i, j; @@ -217,6 +259,44 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_xas_erase(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + void *entry; + unsigned long i, j; + + for (i = 0; i < 200; i++) { + for (j = i; j < 2 * i + 17; j++) { + xas_set(&xas, j); + do { + xas_lock(&xas); + xas_store(&xas, xa_mk_value(j)); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + } + + xas_set(&xas, ULONG_MAX); + do { + xas_lock(&xas); + xas_store(&xas, xa_mk_value(0)); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + xas_lock(&xas); + xas_store(&xas, NULL); + + xas_set(&xas, 0); + j = i; + xas_for_each(&xas, entry, ULONG_MAX) { + XA_BUG_ON(xa, entry != xa_mk_value(j)); + xas_store(&xas, NULL); + j++; + } + xas_unlock(&xas); + XA_BUG_ON(xa, !xa_empty(xa)); + } +} + static noinline void check_multi_store(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI @@ -285,16 +365,119 @@ static noinline void check_multi_store(struct xarray *xa) #endif } +static noinline void check_multi_find(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + unsigned long index; + + xa_store_order(xa, 12, 2, xa_mk_value(12), GFP_KERNEL); + XA_BUG_ON(xa, xa_store_index(xa, 16, GFP_KERNEL) != NULL); + + index = 0; + XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) != + xa_mk_value(12)); + XA_BUG_ON(xa, index != 12); + index = 13; + XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) != + xa_mk_value(12)); + XA_BUG_ON(xa, (index < 12) || (index >= 16)); + XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT) != + xa_mk_value(16)); + XA_BUG_ON(xa, index != 16); + + xa_erase_index(xa, 12); + xa_erase_index(xa, 16); + XA_BUG_ON(xa, !xa_empty(xa)); +#endif +} + +static noinline void check_multi_find_2(struct xarray *xa) +{ + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1; + unsigned int i, j; + void *entry; + + for (i = 0; i < max_order; i++) { + unsigned long index = 1UL << i; + for (j = 0; j < index; j++) { + XA_STATE(xas, xa, j + index); + xa_store_index(xa, index - 1, GFP_KERNEL); + xa_store_order(xa, index, i, xa_mk_value(index), + GFP_KERNEL); + rcu_read_lock(); + xas_for_each(&xas, entry, ULONG_MAX) { + xa_erase_index(xa, index); + } + rcu_read_unlock(); + xa_erase_index(xa, index - 1); + XA_BUG_ON(xa, !xa_empty(xa)); + } + } +} + +static noinline void check_find(struct xarray *xa) +{ + unsigned long i, j, k; + + XA_BUG_ON(xa, !xa_empty(xa)); + + /* + * Check xa_find with all pairs between 0 and 99 inclusive, + * starting at every index between 0 and 99 + */ + for (i = 0; i < 100; i++) { + XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL); + xa_set_mark(xa, i, XA_MARK_0); + for (j = 0; j < i; j++) { + XA_BUG_ON(xa, xa_store_index(xa, j, GFP_KERNEL) != + NULL); + xa_set_mark(xa, j, XA_MARK_0); + for (k = 0; k < 100; k++) { + unsigned long index = k; + void *entry = xa_find(xa, &index, ULONG_MAX, + XA_PRESENT); + if (k <= j) + XA_BUG_ON(xa, index != j); + else if (k <= i) + XA_BUG_ON(xa, index != i); + else + XA_BUG_ON(xa, entry != NULL); + + index = k; + entry = xa_find(xa, &index, ULONG_MAX, + XA_MARK_0); + if (k <= j) + XA_BUG_ON(xa, index != j); + else if (k <= i) + XA_BUG_ON(xa, index != i); + else + XA_BUG_ON(xa, entry != NULL); + } + xa_erase_index(xa, j); + XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0)); + XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0)); + } + xa_erase_index(xa, i); + XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0)); + } + XA_BUG_ON(xa, !xa_empty(xa)); + check_multi_find(xa); + check_multi_find_2(xa); +} + static DEFINE_XARRAY(array); static int xarray_checks(void) { check_xa_err(&array); + check_xas_retry(&array); check_xa_load(&array); check_xa_mark(&array); check_xa_shrink(&array); + check_xas_erase(&array); check_cmpxchg(&array); check_multi_store(&array); + check_find(&array); printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run == tests_passed) ? 0 : -EINVAL; diff --git a/lib/xarray.c b/lib/xarray.c index 2ba5a98ec618..24494f42daa6 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -128,6 +128,11 @@ static unsigned int get_offset(unsigned long index, struct xa_node *node) return (index >> node->shift) & XA_CHUNK_MASK; } +static void xas_set_offset(struct xa_state *xas) +{ + xas->xa_offset = get_offset(xas->xa_index, xas->xa_node); +} + /* move the index either forwards (find) or backwards (sibling slot) */ static void xas_move_index(struct xa_state *xas, unsigned long offset) { @@ -136,6 +141,12 @@ static void xas_move_index(struct xa_state *xas, unsigned long offset) xas->xa_index += offset << shift; } +static void xas_advance(struct xa_state *xas) +{ + xas->xa_offset++; + xas_move_index(xas, xas->xa_offset); +} + static void *set_bounds(struct xa_state *xas) { xas->xa_node = XAS_BOUNDS; @@ -829,6 +840,202 @@ void xas_init_marks(const struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_init_marks); +/** + * xas_pause() - Pause a walk to drop a lock. + * @xas: XArray operation state. + * + * Some users need to pause a walk and drop the lock they're holding in + * order to yield to a higher priority thread or carry out an operation + * on an entry. Those users should call this function before they drop + * the lock. It resets the @xas to be suitable for the next iteration + * of the loop after the user has reacquired the lock. If most entries + * found during a walk require you to call xas_pause(), the xa_for_each() + * iterator may be more appropriate. + * + * Note that xas_pause() only works for forward iteration. If a user needs + * to pause a reverse iteration, we will need a xas_pause_rev(). + */ +void xas_pause(struct xa_state *xas) +{ + struct xa_node *node = xas->xa_node; + + if (xas_invalid(xas)) + return; + + if (node) { + unsigned int offset = xas->xa_offset; + while (++offset < XA_CHUNK_SIZE) { + if (!xa_is_sibling(xa_entry(xas->xa, node, offset))) + break; + } + xas->xa_index += (offset - xas->xa_offset) << node->shift; + } else { + xas->xa_index++; + } + xas->xa_node = XAS_RESTART; +} +EXPORT_SYMBOL_GPL(xas_pause); + +/** + * xas_find() - Find the next present entry in the XArray. + * @xas: XArray operation state. + * @max: Highest index to return. + * + * If the @xas has not yet been walked to an entry, return the entry + * which has an index >= xas.xa_index. If it has been walked, the entry + * currently being pointed at has been processed, and so we move to the + * next entry. + * + * If no entry is found and the array is smaller than @max, the iterator + * is set to the smallest index not yet in the array. This allows @xas + * to be immediately passed to xas_store(). + * + * Return: The entry, if found, otherwise %NULL. + */ +void *xas_find(struct xa_state *xas, unsigned long max) +{ + void *entry; + + if (xas_error(xas)) + return NULL; + + if (!xas->xa_node) { + xas->xa_index = 1; + return set_bounds(xas); + } else if (xas_top(xas->xa_node)) { + entry = xas_load(xas); + if (entry || xas_not_node(xas->xa_node)) + return entry; + } else if (!xas->xa_node->shift && + xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) { + xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1; + } + + xas_advance(xas); + + while (xas->xa_node && (xas->xa_index <= max)) { + if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { + xas->xa_offset = xas->xa_node->offset + 1; + xas->xa_node = xa_parent(xas->xa, xas->xa_node); + continue; + } + + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (xa_is_node(entry)) { + xas->xa_node = xa_to_node(entry); + xas->xa_offset = 0; + continue; + } + if (entry && !xa_is_sibling(entry)) + return entry; + + xas_advance(xas); + } + + if (!xas->xa_node) + xas->xa_node = XAS_BOUNDS; + return NULL; +} +EXPORT_SYMBOL_GPL(xas_find); + +/** + * xas_find_marked() - Find the next marked entry in the XArray. + * @xas: XArray operation state. + * @max: Highest index to return. + * @mark: Mark number to search for. + * + * If the @xas has not yet been walked to an entry, return the marked entry + * which has an index >= xas.xa_index. If it has been walked, the entry + * currently being pointed at has been processed, and so we return the + * first marked entry with an index > xas.xa_index. + * + * If no marked entry is found and the array is smaller than @max, @xas is + * set to the bounds state and xas->xa_index is set to the smallest index + * not yet in the array. This allows @xas to be immediately passed to + * xas_store(). + * + * If no entry is found before @max is reached, @xas is set to the restart + * state. + * + * Return: The entry, if found, otherwise %NULL. + */ +void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark) +{ + bool advance = true; + unsigned int offset; + void *entry; + + if (xas_error(xas)) + return NULL; + + if (!xas->xa_node) { + xas->xa_index = 1; + goto out; + } else if (xas_top(xas->xa_node)) { + advance = false; + entry = xa_head(xas->xa); + xas->xa_node = NULL; + if (xas->xa_index > max_index(entry)) + goto bounds; + if (!xa_is_node(entry)) { + if (xa_marked(xas->xa, mark)) + return entry; + xas->xa_index = 1; + goto out; + } + xas->xa_node = xa_to_node(entry); + xas->xa_offset = xas->xa_index >> xas->xa_node->shift; + } + + while (xas->xa_index <= max) { + if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) { + xas->xa_offset = xas->xa_node->offset + 1; + xas->xa_node = xa_parent(xas->xa, xas->xa_node); + if (!xas->xa_node) + break; + advance = false; + continue; + } + + if (!advance) { + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (xa_is_sibling(entry)) { + xas->xa_offset = xa_to_sibling(entry); + xas_move_index(xas, xas->xa_offset); + } + } + + offset = xas_find_chunk(xas, advance, mark); + if (offset > xas->xa_offset) { + advance = false; + xas_move_index(xas, offset); + /* Mind the wrap */ + if ((xas->xa_index - 1) >= max) + goto max; + xas->xa_offset = offset; + if (offset == XA_CHUNK_SIZE) + continue; + } + + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (!xa_is_node(entry)) + return entry; + xas->xa_node = xa_to_node(entry); + xas_set_offset(xas); + } + +out: + if (!max) + goto max; +bounds: + xas->xa_node = XAS_BOUNDS; + return NULL; +max: + xas->xa_node = XAS_RESTART; + return NULL; +} +EXPORT_SYMBOL_GPL(xas_find_marked); + /** * xa_init_flags() - Initialise an empty XArray with flags. * @xa: XArray. @@ -1152,6 +1359,91 @@ void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark) } EXPORT_SYMBOL(xa_clear_mark); +/** + * xa_find() - Search the XArray for an entry. + * @xa: XArray. + * @indexp: Pointer to an index. + * @max: Maximum index to search to. + * @filter: Selection criterion. + * + * Finds the entry in @xa which matches the @filter, and has the lowest + * index that is at least @indexp and no more than @max. + * If an entry is found, @indexp is updated to be the index of the entry. + * This function is protected by the RCU read lock, so it may not find + * entries which are being simultaneously added. It will not return an + * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find(). + * + * Context: Any context. Takes and releases the RCU lock. + * Return: The entry, if found, otherwise %NULL. + */ +void *xa_find(struct xarray *xa, unsigned long *indexp, + unsigned long max, xa_mark_t filter) +{ + XA_STATE(xas, xa, *indexp); + void *entry; + + rcu_read_lock(); + do { + if ((__force unsigned int)filter < XA_MAX_MARKS) + entry = xas_find_marked(&xas, max, filter); + else + entry = xas_find(&xas, max); + } while (xas_retry(&xas, entry)); + rcu_read_unlock(); + + if (entry) + *indexp = xas.xa_index; + return entry; +} +EXPORT_SYMBOL(xa_find); + +/** + * xa_find_after() - Search the XArray for a present entry. + * @xa: XArray. + * @indexp: Pointer to an index. + * @max: Maximum index to search to. + * @filter: Selection criterion. + * + * Finds the entry in @xa which matches the @filter and has the lowest + * index that is above @indexp and no more than @max. + * If an entry is found, @indexp is updated to be the index of the entry. + * This function is protected by the RCU read lock, so it may miss entries + * which are being simultaneously added. It will not return an + * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find(). + * + * Context: Any context. Takes and releases the RCU lock. + * Return: The pointer, if found, otherwise %NULL. + */ +void *xa_find_after(struct xarray *xa, unsigned long *indexp, + unsigned long max, xa_mark_t filter) +{ + XA_STATE(xas, xa, *indexp + 1); + void *entry; + + rcu_read_lock(); + for (;;) { + if ((__force unsigned int)filter < XA_MAX_MARKS) + entry = xas_find_marked(&xas, max, filter); + else + entry = xas_find(&xas, max); + if (xas.xa_shift) { + if (xas.xa_index & ((1UL << xas.xa_shift) - 1)) + continue; + } else { + if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK)) + continue; + } + if (!xas_retry(&xas, entry)) + break; + } + rcu_read_unlock(); + + if (entry) + *indexp = xas.xa_index; + return entry; +} +EXPORT_SYMBOL(xa_find_after); + #ifdef XA_DEBUG void xa_dump_node(const struct xa_node *node) { -- cgit v1.2.3 From 80a0a1a9a3cde9b23851e8eb7160e2786549306a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 14 Nov 2017 16:42:22 -0500 Subject: xarray: Extract entries from an XArray The xa_extract function combines the functionality of radix_tree_gang_lookup() and radix_tree_gang_lookup_tagged(). It extracts entries matching the specified filter into a normal array. Signed-off-by: Matthew Wilcox --- lib/xarray.c | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'lib') diff --git a/lib/xarray.c b/lib/xarray.c index 24494f42daa6..70e04810c8c2 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1444,6 +1444,86 @@ void *xa_find_after(struct xarray *xa, unsigned long *indexp, } EXPORT_SYMBOL(xa_find_after); +static unsigned int xas_extract_present(struct xa_state *xas, void **dst, + unsigned long max, unsigned int n) +{ + void *entry; + unsigned int i = 0; + + rcu_read_lock(); + xas_for_each(xas, entry, max) { + if (xas_retry(xas, entry)) + continue; + dst[i++] = entry; + if (i == n) + break; + } + rcu_read_unlock(); + + return i; +} + +static unsigned int xas_extract_marked(struct xa_state *xas, void **dst, + unsigned long max, unsigned int n, xa_mark_t mark) +{ + void *entry; + unsigned int i = 0; + + rcu_read_lock(); + xas_for_each_marked(xas, entry, max, mark) { + if (xas_retry(xas, entry)) + continue; + dst[i++] = entry; + if (i == n) + break; + } + rcu_read_unlock(); + + return i; +} + +/** + * xa_extract() - Copy selected entries from the XArray into a normal array. + * @xa: The source XArray to copy from. + * @dst: The buffer to copy entries into. + * @start: The first index in the XArray eligible to be selected. + * @max: The last index in the XArray eligible to be selected. + * @n: The maximum number of entries to copy. + * @filter: Selection criterion. + * + * Copies up to @n entries that match @filter from the XArray. The + * copied entries will have indices between @start and @max, inclusive. + * + * The @filter may be an XArray mark value, in which case entries which are + * marked with that mark will be copied. It may also be %XA_PRESENT, in + * which case all entries which are not NULL will be copied. + * + * The entries returned may not represent a snapshot of the XArray at a + * moment in time. For example, if another thread stores to index 5, then + * index 10, calling xa_extract() may return the old contents of index 5 + * and the new contents of index 10. Indices not modified while this + * function is running will not be skipped. + * + * If you need stronger guarantees, holding the xa_lock across calls to this + * function will prevent concurrent modification. + * + * Context: Any context. Takes and releases the RCU lock. + * Return: The number of entries copied. + */ +unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start, + unsigned long max, unsigned int n, xa_mark_t filter) +{ + XA_STATE(xas, xa, start); + + if (!n) + return 0; + + if ((__force unsigned int)filter < XA_MAX_MARKS) + return xas_extract_marked(&xas, dst, max, n, filter); + return xas_extract_present(&xas, dst, max, n); +} +EXPORT_SYMBOL(xa_extract); + #ifdef XA_DEBUG void xa_dump_node(const struct xa_node *node) { -- cgit v1.2.3 From 687149fca1f37c447e5d161e0a4a04cb2c880cb6 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 17 Nov 2017 08:16:34 -0500 Subject: xarray: Destroy an XArray This function frees all the internal memory allocated to the xarray and reinitialises it to be empty. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 34 ++++++++++++++++++++++++++++++++++ lib/xarray.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index e3c2d4d00b15..a96f67caa1c2 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -465,6 +465,39 @@ static noinline void check_find(struct xarray *xa) check_multi_find_2(xa); } +static noinline void check_destroy(struct xarray *xa) +{ + unsigned long index; + + XA_BUG_ON(xa, !xa_empty(xa)); + + /* Destroying an empty array is a no-op */ + xa_destroy(xa); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* Destroying an array with a single entry */ + for (index = 0; index < 1000; index++) { + xa_store_index(xa, index, GFP_KERNEL); + XA_BUG_ON(xa, xa_empty(xa)); + xa_destroy(xa); + XA_BUG_ON(xa, !xa_empty(xa)); + } + + /* Destroying an array with a single entry at ULONG_MAX */ + xa_store(xa, ULONG_MAX, xa, GFP_KERNEL); + XA_BUG_ON(xa, xa_empty(xa)); + xa_destroy(xa); + XA_BUG_ON(xa, !xa_empty(xa)); + +#ifdef CONFIG_XARRAY_MULTI + /* Destroying an array with a multi-index entry */ + xa_store_order(xa, 1 << 11, 11, xa, GFP_KERNEL); + XA_BUG_ON(xa, xa_empty(xa)); + xa_destroy(xa); + XA_BUG_ON(xa, !xa_empty(xa)); +#endif +} + static DEFINE_XARRAY(array); static int xarray_checks(void) @@ -478,6 +511,7 @@ static int xarray_checks(void) check_cmpxchg(&array); check_multi_store(&array); check_find(&array); + check_destroy(&array); printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run == tests_passed) ? 0 : -EINVAL; diff --git a/lib/xarray.c b/lib/xarray.c index 70e04810c8c2..057dbe0d3bf6 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1524,6 +1524,34 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start, } EXPORT_SYMBOL(xa_extract); +/** + * xa_destroy() - Free all internal data structures. + * @xa: XArray. + * + * After calling this function, the XArray is empty and has freed all memory + * allocated for its internal data structures. You are responsible for + * freeing the objects referenced by the XArray. + * + * Context: Any context. Takes and releases the xa_lock, interrupt-safe. + */ +void xa_destroy(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + unsigned long flags; + void *entry; + + xas.xa_node = NULL; + xas_lock_irqsave(&xas, flags); + entry = xa_head_locked(xa); + RCU_INIT_POINTER(xa->xa_head, NULL); + xas_init_marks(&xas); + /* lockdep checks we're still holding the lock in xas_free_nodes() */ + if (xa_is_node(entry)) + xas_free_nodes(&xas, xa_to_node(entry)); + xas_unlock_irqrestore(&xas, flags); +} +EXPORT_SYMBOL(xa_destroy); + #ifdef XA_DEBUG void xa_dump_node(const struct xa_node *node) { -- cgit v1.2.3 From 64d3e9a9e0cc51957d243dd2b0adc5d74ff5e128 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 1 Dec 2017 00:06:52 -0500 Subject: xarray: Step through an XArray The xas_next and xas_prev functions move the xas index by one position, and adjust the rest of the iterator state to match it. This is more efficient than calling xas_set() as it keeps the iterator at the leaves of the tree instead of walking the iterator from the root each time. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/xarray.c | 74 +++++++++++++++++++++++++++++++++++ 2 files changed, 189 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index a96f67caa1c2..85ef1882dd3c 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -465,6 +465,120 @@ static noinline void check_find(struct xarray *xa) check_multi_find_2(xa); } +static noinline void check_move_small(struct xarray *xa, unsigned long idx) +{ + XA_STATE(xas, xa, 0); + unsigned long i; + + xa_store_index(xa, 0, GFP_KERNEL); + xa_store_index(xa, idx, GFP_KERNEL); + + rcu_read_lock(); + for (i = 0; i < idx * 4; i++) { + void *entry = xas_next(&xas); + if (i <= idx) + XA_BUG_ON(xa, xas.xa_node == XAS_RESTART); + XA_BUG_ON(xa, xas.xa_index != i); + if (i == 0 || i == idx) + XA_BUG_ON(xa, entry != xa_mk_value(i)); + else + XA_BUG_ON(xa, entry != NULL); + } + xas_next(&xas); + XA_BUG_ON(xa, xas.xa_index != i); + + do { + void *entry = xas_prev(&xas); + i--; + if (i <= idx) + XA_BUG_ON(xa, xas.xa_node == XAS_RESTART); + XA_BUG_ON(xa, xas.xa_index != i); + if (i == 0 || i == idx) + XA_BUG_ON(xa, entry != xa_mk_value(i)); + else + XA_BUG_ON(xa, entry != NULL); + } while (i > 0); + + xas_set(&xas, ULONG_MAX); + XA_BUG_ON(xa, xas_next(&xas) != NULL); + XA_BUG_ON(xa, xas.xa_index != ULONG_MAX); + XA_BUG_ON(xa, xas_next(&xas) != xa_mk_value(0)); + XA_BUG_ON(xa, xas.xa_index != 0); + XA_BUG_ON(xa, xas_prev(&xas) != NULL); + XA_BUG_ON(xa, xas.xa_index != ULONG_MAX); + rcu_read_unlock(); + + xa_erase_index(xa, 0); + xa_erase_index(xa, idx); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static noinline void check_move(struct xarray *xa) +{ + XA_STATE(xas, xa, (1 << 16) - 1); + unsigned long i; + + for (i = 0; i < (1 << 16); i++) + XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL); + + rcu_read_lock(); + do { + void *entry = xas_prev(&xas); + i--; + XA_BUG_ON(xa, entry != xa_mk_value(i)); + XA_BUG_ON(xa, i != xas.xa_index); + } while (i != 0); + + XA_BUG_ON(xa, xas_prev(&xas) != NULL); + XA_BUG_ON(xa, xas.xa_index != ULONG_MAX); + + do { + void *entry = xas_next(&xas); + XA_BUG_ON(xa, entry != xa_mk_value(i)); + XA_BUG_ON(xa, i != xas.xa_index); + i++; + } while (i < (1 << 16)); + rcu_read_unlock(); + + for (i = (1 << 8); i < (1 << 15); i++) + xa_erase_index(xa, i); + + i = xas.xa_index; + + rcu_read_lock(); + do { + void *entry = xas_prev(&xas); + i--; + if ((i < (1 << 8)) || (i >= (1 << 15))) + XA_BUG_ON(xa, entry != xa_mk_value(i)); + else + XA_BUG_ON(xa, entry != NULL); + XA_BUG_ON(xa, i != xas.xa_index); + } while (i != 0); + + XA_BUG_ON(xa, xas_prev(&xas) != NULL); + XA_BUG_ON(xa, xas.xa_index != ULONG_MAX); + + do { + void *entry = xas_next(&xas); + if ((i < (1 << 8)) || (i >= (1 << 15))) + XA_BUG_ON(xa, entry != xa_mk_value(i)); + else + XA_BUG_ON(xa, entry != NULL); + XA_BUG_ON(xa, i != xas.xa_index); + i++; + } while (i < (1 << 16)); + rcu_read_unlock(); + + xa_destroy(xa); + + for (i = 0; i < 16; i++) + check_move_small(xa, 1UL << i); + + for (i = 2; i < 16; i++) + check_move_small(xa, (1UL << i) - 1); +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -512,6 +626,7 @@ static int xarray_checks(void) check_multi_store(&array); check_find(&array); check_destroy(&array); + check_move(&array); printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run == tests_passed) ? 0 : -EINVAL; diff --git a/lib/xarray.c b/lib/xarray.c index 057dbe0d3bf6..303c46579598 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -876,6 +876,80 @@ void xas_pause(struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_pause); +/* + * __xas_prev() - Find the previous entry in the XArray. + * @xas: XArray operation state. + * + * Helper function for xas_prev() which handles all the complex cases + * out of line. + */ +void *__xas_prev(struct xa_state *xas) +{ + void *entry; + + if (!xas_frozen(xas->xa_node)) + xas->xa_index--; + if (xas_not_node(xas->xa_node)) + return xas_load(xas); + + if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node)) + xas->xa_offset--; + + while (xas->xa_offset == 255) { + xas->xa_offset = xas->xa_node->offset - 1; + xas->xa_node = xa_parent(xas->xa, xas->xa_node); + if (!xas->xa_node) + return set_bounds(xas); + } + + for (;;) { + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (!xa_is_node(entry)) + return entry; + + xas->xa_node = xa_to_node(entry); + xas_set_offset(xas); + } +} +EXPORT_SYMBOL_GPL(__xas_prev); + +/* + * __xas_next() - Find the next entry in the XArray. + * @xas: XArray operation state. + * + * Helper function for xas_next() which handles all the complex cases + * out of line. + */ +void *__xas_next(struct xa_state *xas) +{ + void *entry; + + if (!xas_frozen(xas->xa_node)) + xas->xa_index++; + if (xas_not_node(xas->xa_node)) + return xas_load(xas); + + if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node)) + xas->xa_offset++; + + while (xas->xa_offset == XA_CHUNK_SIZE) { + xas->xa_offset = xas->xa_node->offset + 1; + xas->xa_node = xa_parent(xas->xa, xas->xa_node); + if (!xas->xa_node) + return set_bounds(xas); + } + + for (;;) { + entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset); + if (!xa_is_node(entry)) + return entry; + + xas->xa_node = xa_to_node(entry); + xas_set_offset(xas); + } +} +EXPORT_SYMBOL_GPL(__xas_next); + /** * xas_find() - Find the next present entry in the XArray. * @xas: XArray operation state. -- cgit v1.2.3 From 4e99d4e9579d3b950bf4b38d0d64eb1b9be78761 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 1 Jun 2018 22:46:02 -0400 Subject: xarray: Add xas_for_each_conflict This iterator iterates over each entry that is stored in the index or indices specified by the xa_state. This is intended for use for a conditional store of a multiindex entry, or to allow entries which are about to be removed from the xarray to be disposed of properly. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 68 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/xarray.c | 61 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 85ef1882dd3c..8eba3de1baea 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -365,6 +365,73 @@ static noinline void check_multi_store(struct xarray *xa) #endif } +static noinline void __check_store_iter(struct xarray *xa, unsigned long start, + unsigned int order, unsigned int present) +{ + XA_STATE_ORDER(xas, xa, start, order); + void *entry; + unsigned int count = 0; + +retry: + xas_lock(&xas); + xas_for_each_conflict(&xas, entry) { + XA_BUG_ON(xa, !xa_is_value(entry)); + XA_BUG_ON(xa, entry < xa_mk_value(start)); + XA_BUG_ON(xa, entry > xa_mk_value(start + (1UL << order) - 1)); + count++; + } + xas_store(&xas, xa_mk_value(start)); + xas_unlock(&xas); + if (xas_nomem(&xas, GFP_KERNEL)) { + count = 0; + goto retry; + } + XA_BUG_ON(xa, xas_error(&xas)); + XA_BUG_ON(xa, count != present); + XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_value(start)); + XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) != + xa_mk_value(start)); + xa_erase_index(xa, start); +} + +static noinline void check_store_iter(struct xarray *xa) +{ + unsigned int i, j; + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; + + for (i = 0; i < max_order; i++) { + unsigned int min = 1 << i; + unsigned int max = (2 << i) - 1; + __check_store_iter(xa, 0, i, 0); + XA_BUG_ON(xa, !xa_empty(xa)); + __check_store_iter(xa, min, i, 0); + XA_BUG_ON(xa, !xa_empty(xa)); + + xa_store_index(xa, min, GFP_KERNEL); + __check_store_iter(xa, min, i, 1); + XA_BUG_ON(xa, !xa_empty(xa)); + xa_store_index(xa, max, GFP_KERNEL); + __check_store_iter(xa, min, i, 1); + XA_BUG_ON(xa, !xa_empty(xa)); + + for (j = 0; j < min; j++) + xa_store_index(xa, j, GFP_KERNEL); + __check_store_iter(xa, 0, i, min); + XA_BUG_ON(xa, !xa_empty(xa)); + for (j = 0; j < min; j++) + xa_store_index(xa, min + j, GFP_KERNEL); + __check_store_iter(xa, min, i, min); + XA_BUG_ON(xa, !xa_empty(xa)); + } +#ifdef CONFIG_XARRAY_MULTI + xa_store_index(xa, 63, GFP_KERNEL); + xa_store_index(xa, 65, GFP_KERNEL); + __check_store_iter(xa, 64, 2, 1); + xa_erase_index(xa, 63); +#endif + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_multi_find(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI @@ -627,6 +694,7 @@ static int xarray_checks(void) check_find(&array); check_destroy(&array); check_move(&array); + check_store_iter(&array); printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run == tests_passed) ? 0 : -EINVAL; diff --git a/lib/xarray.c b/lib/xarray.c index 303c46579598..41f8ebc651f5 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1110,6 +1110,67 @@ max: } EXPORT_SYMBOL_GPL(xas_find_marked); +/** + * xas_find_conflict() - Find the next present entry in a range. + * @xas: XArray operation state. + * + * The @xas describes both a range and a position within that range. + * + * Context: Any context. Expects xa_lock to be held. + * Return: The next entry in the range covered by @xas or %NULL. + */ +void *xas_find_conflict(struct xa_state *xas) +{ + void *curr; + + if (xas_error(xas)) + return NULL; + + if (!xas->xa_node) + return NULL; + + if (xas_top(xas->xa_node)) { + curr = xas_start(xas); + if (!curr) + return NULL; + while (xa_is_node(curr)) { + struct xa_node *node = xa_to_node(curr); + curr = xas_descend(xas, node); + } + if (curr) + return curr; + } + + if (xas->xa_node->shift > xas->xa_shift) + return NULL; + + for (;;) { + if (xas->xa_node->shift == xas->xa_shift) { + if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs) + break; + } else if (xas->xa_offset == XA_CHUNK_MASK) { + xas->xa_offset = xas->xa_node->offset; + xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node); + if (!xas->xa_node) + break; + continue; + } + curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset); + if (xa_is_sibling(curr)) + continue; + while (xa_is_node(curr)) { + xas->xa_node = xa_to_node(curr); + xas->xa_offset = 0; + curr = xa_entry_locked(xas->xa, xas->xa_node, 0); + } + if (curr) + return curr; + } + xas->xa_offset -= xas->xa_sibs; + return NULL; +} +EXPORT_SYMBOL_GPL(xas_find_conflict); + /** * xa_init_flags() - Initialise an empty XArray with flags. * @xa: XArray. -- cgit v1.2.3 From 2264f5132fe45571139727ebdeb78696b35d1506 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 4 Dec 2017 00:11:48 -0500 Subject: xarray: Add xas_create_range This hopefully temporary function is useful for users who have not yet been converted to multi-index entries. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/xarray.c | 50 +++++++++++++++++++++++ 2 files changed, 169 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 8eba3de1baea..703370015d10 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -646,6 +646,124 @@ static noinline void check_move(struct xarray *xa) check_move_small(xa, (1UL << i) - 1); } +static noinline void xa_store_many_order(struct xarray *xa, + unsigned long index, unsigned order) +{ + XA_STATE_ORDER(xas, xa, index, order); + unsigned int i = 0; + + do { + xas_lock(&xas); + XA_BUG_ON(xa, xas_find_conflict(&xas)); + xas_create_range(&xas); + if (xas_error(&xas)) + goto unlock; + for (i = 0; i < (1U << order); i++) { + XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(index + i))); + xas_next(&xas); + } +unlock: + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + XA_BUG_ON(xa, xas_error(&xas)); +} + +static noinline void check_create_range_1(struct xarray *xa, + unsigned long index, unsigned order) +{ + unsigned long i; + + xa_store_many_order(xa, index, order); + for (i = index; i < index + (1UL << order); i++) + xa_erase_index(xa, i); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static noinline void check_create_range_2(struct xarray *xa, unsigned order) +{ + unsigned long i; + unsigned long nr = 1UL << order; + + for (i = 0; i < nr * nr; i += nr) + xa_store_many_order(xa, i, order); + for (i = 0; i < nr * nr; i++) + xa_erase_index(xa, i); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static noinline void check_create_range_3(void) +{ + XA_STATE(xas, NULL, 0); + xas_set_err(&xas, -EEXIST); + xas_create_range(&xas); + XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST); +} + +static noinline void check_create_range_4(struct xarray *xa, + unsigned long index, unsigned order) +{ + XA_STATE_ORDER(xas, xa, index, order); + unsigned long base = xas.xa_index; + unsigned long i = 0; + + xa_store_index(xa, index, GFP_KERNEL); + do { + xas_lock(&xas); + xas_create_range(&xas); + if (xas_error(&xas)) + goto unlock; + for (i = 0; i < (1UL << order); i++) { + void *old = xas_store(&xas, xa_mk_value(base + i)); + if (xas.xa_index == index) + XA_BUG_ON(xa, old != xa_mk_value(base + i)); + else + XA_BUG_ON(xa, old != NULL); + xas_next(&xas); + } +unlock: + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + XA_BUG_ON(xa, xas_error(&xas)); + + for (i = base; i < base + (1UL << order); i++) + xa_erase_index(xa, i); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static noinline void check_create_range(struct xarray *xa) +{ + unsigned int order; + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1; + + for (order = 0; order < max_order; order++) { + check_create_range_1(xa, 0, order); + check_create_range_1(xa, 1U << order, order); + check_create_range_1(xa, 2U << order, order); + check_create_range_1(xa, 3U << order, order); + check_create_range_1(xa, 1U << 24, order); + if (order < 10) + check_create_range_2(xa, order); + + check_create_range_4(xa, 0, order); + check_create_range_4(xa, 1U << order, order); + check_create_range_4(xa, 2U << order, order); + check_create_range_4(xa, 3U << order, order); + check_create_range_4(xa, 1U << 24, order); + + check_create_range_4(xa, 1, order); + check_create_range_4(xa, (1U << order) + 1, order); + check_create_range_4(xa, (2U << order) + 1, order); + check_create_range_4(xa, (2U << order) - 1, order); + check_create_range_4(xa, (3U << order) + 1, order); + check_create_range_4(xa, (3U << order) - 1, order); + check_create_range_4(xa, (1U << 24) + 1, order); + } + + check_create_range_3(); +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -694,6 +812,7 @@ static int xarray_checks(void) check_find(&array); check_destroy(&array); check_move(&array); + check_create_range(&array); check_store_iter(&array); printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); diff --git a/lib/xarray.c b/lib/xarray.c index 41f8ebc651f5..ff37516fe832 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -637,6 +637,56 @@ static void *xas_create(struct xa_state *xas) return entry; } +/** + * xas_create_range() - Ensure that stores to this range will succeed + * @xas: XArray operation state. + * + * Creates all of the slots in the range covered by @xas. Sets @xas to + * create single-index entries and positions it at the beginning of the + * range. This is for the benefit of users which have not yet been + * converted to use multi-index entries. + */ +void xas_create_range(struct xa_state *xas) +{ + unsigned long index = xas->xa_index; + unsigned char shift = xas->xa_shift; + unsigned char sibs = xas->xa_sibs; + + xas->xa_index |= ((sibs + 1) << shift) - 1; + if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift) + xas->xa_offset |= sibs; + xas->xa_shift = 0; + xas->xa_sibs = 0; + + for (;;) { + xas_create(xas); + if (xas_error(xas)) + goto restore; + if (xas->xa_index <= (index | XA_CHUNK_MASK)) + goto success; + xas->xa_index -= XA_CHUNK_SIZE; + + for (;;) { + struct xa_node *node = xas->xa_node; + xas->xa_node = xa_parent_locked(xas->xa, node); + xas->xa_offset = node->offset - 1; + if (node->offset != 0) + break; + } + } + +restore: + xas->xa_shift = shift; + xas->xa_sibs = sibs; + xas->xa_index = index; + return; +success: + xas->xa_index = index; + if (xas->xa_node) + xas_set_offset(xas); +} +EXPORT_SYMBOL_GPL(xas_create_range); + static void update_node(struct xa_state *xas, struct xa_node *node, int count, int values) { -- cgit v1.2.3 From 9f14d4f1f1045f161fd4db8a8e194b7825c2874a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 1 Oct 2018 14:54:59 -0400 Subject: xarray: Add xa_reserve and xa_release This function reserves a slot in the XArray for users which need to acquire multiple locks before storing their entry in the tree and so cannot use a plain xa_store(). Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 40 ++++++++++++++++++++++++++++++++++++++++ lib/xarray.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 703370015d10..6aafd411a5c3 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -259,6 +259,45 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_reserve(struct xarray *xa) +{ + void *entry; + unsigned long index = 0; + + /* An array with a reserved entry is not empty */ + XA_BUG_ON(xa, !xa_empty(xa)); + xa_reserve(xa, 12345678, GFP_KERNEL); + XA_BUG_ON(xa, xa_empty(xa)); + XA_BUG_ON(xa, xa_load(xa, 12345678)); + xa_release(xa, 12345678); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* Releasing a used entry does nothing */ + xa_reserve(xa, 12345678, GFP_KERNEL); + XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL); + xa_release(xa, 12345678); + xa_erase_index(xa, 12345678); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* cmpxchg sees a reserved entry as NULL */ + xa_reserve(xa, 12345678, GFP_KERNEL); + XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678), + GFP_NOWAIT) != NULL); + xa_release(xa, 12345678); + xa_erase_index(xa, 12345678); + XA_BUG_ON(xa, !xa_empty(xa)); + + /* Can iterate through a reserved entry */ + xa_store_index(xa, 5, GFP_KERNEL); + xa_reserve(xa, 6, GFP_KERNEL); + xa_store_index(xa, 7, GFP_KERNEL); + + xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) { + XA_BUG_ON(xa, index != 5 && index != 7); + } + xa_destroy(xa); +} + static noinline void check_xas_erase(struct xarray *xa) { XA_STATE(xas, xa, 0); @@ -808,6 +847,7 @@ static int xarray_checks(void) check_xa_shrink(&array); check_xas_erase(&array); check_cmpxchg(&array); + check_reserve(&array); check_multi_store(&array); check_find(&array); check_destroy(&array); diff --git a/lib/xarray.c b/lib/xarray.c index ff37516fe832..546461914282 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1266,6 +1266,8 @@ void *xa_load(struct xarray *xa, unsigned long index) rcu_read_lock(); do { entry = xas_load(&xas); + if (xa_is_zero(entry)) + entry = NULL; } while (xas_retry(&xas, entry)); rcu_read_unlock(); @@ -1275,6 +1277,8 @@ EXPORT_SYMBOL(xa_load); static void *xas_result(struct xa_state *xas, void *curr) { + if (xa_is_zero(curr)) + return NULL; XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr)); if (xas_error(xas)) curr = xas->xa_node; @@ -1394,6 +1398,8 @@ void *xa_cmpxchg(struct xarray *xa, unsigned long index, do { xas_lock(&xas); curr = xas_load(&xas); + if (curr == XA_ZERO_ENTRY) + curr = NULL; if (curr == old) xas_store(&xas, entry); xas_unlock(&xas); @@ -1430,6 +1436,8 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index, do { curr = xas_load(&xas); + if (curr == XA_ZERO_ENTRY) + curr = NULL; if (curr == old) xas_store(&xas, entry); } while (__xas_nomem(&xas, gfp)); @@ -1438,6 +1446,43 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index, } EXPORT_SYMBOL(__xa_cmpxchg); +/** + * xa_reserve() - Reserve this index in the XArray. + * @xa: XArray. + * @index: Index into array. + * @gfp: Memory allocation flags. + * + * Ensures there is somewhere to store an entry at @index in the array. + * If there is already something stored at @index, this function does + * nothing. If there was nothing there, the entry is marked as reserved. + * Loads from @index will continue to see a %NULL pointer until a + * subsequent store to @index. + * + * If you do not use the entry that you have reserved, call xa_release() + * or xa_erase() to free any unnecessary memory. + * + * Context: Process context. Takes and releases the xa_lock, IRQ or BH safe + * if specified in XArray flags. May sleep if the @gfp flags permit. + * Return: 0 if the reservation succeeded or -ENOMEM if it failed. + */ +int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + XA_STATE(xas, xa, index); + unsigned int lock_type = xa_lock_type(xa); + void *curr; + + do { + xas_lock_type(&xas, lock_type); + curr = xas_load(&xas); + if (!curr) + xas_store(&xas, XA_ZERO_ENTRY); + xas_unlock_type(&xas, lock_type); + } while (xas_nomem(&xas, gfp)); + + return xas_error(&xas); +} +EXPORT_SYMBOL(xa_reserve); + /** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. @@ -1797,6 +1842,8 @@ void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift) pr_cont("retry (%ld)\n", xa_to_internal(entry)); else if (xa_is_sibling(entry)) pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry)); + else if (xa_is_zero(entry)) + pr_cont("zero (%ld)\n", xa_to_internal(entry)); else pr_cont("UNKNOWN ENTRY (%px)\n", entry); } -- cgit v1.2.3 From 371c752dc66948714ee3b66c3306f3ff1ff71d2e Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 4 Jul 2018 10:50:12 -0400 Subject: xarray: Track free entries in an XArray Add the optional ability to track which entries in an XArray are free and provide xa_alloc() to replace most of the functionality of the IDR. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 61 ++++++++++++++++++++++++++++++++++++++ lib/xarray.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 145 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 6aafd411a5c3..a752e6a37e6f 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -33,6 +33,15 @@ static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp) return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp); } +static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp) +{ + u32 id = 0; + + XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_value(index & LONG_MAX), + gfp) != 0); + XA_BUG_ON(xa, id != index); +} + static void xa_erase_index(struct xarray *xa, unsigned long index) { XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX)); @@ -404,6 +413,57 @@ static noinline void check_multi_store(struct xarray *xa) #endif } +static DEFINE_XARRAY_ALLOC(xa0); + +static noinline void check_xa_alloc(void) +{ + int i; + u32 id; + + /* An empty array should assign 0 to the first alloc */ + xa_alloc_index(&xa0, 0, GFP_KERNEL); + + /* Erasing it should make the array empty again */ + xa_erase_index(&xa0, 0); + XA_BUG_ON(&xa0, !xa_empty(&xa0)); + + /* And it should assign 0 again */ + xa_alloc_index(&xa0, 0, GFP_KERNEL); + + /* The next assigned ID should be 1 */ + xa_alloc_index(&xa0, 1, GFP_KERNEL); + xa_erase_index(&xa0, 1); + + /* Storing a value should mark it used */ + xa_store_index(&xa0, 1, GFP_KERNEL); + xa_alloc_index(&xa0, 2, GFP_KERNEL); + + /* If we then erase 0, it should be free */ + xa_erase_index(&xa0, 0); + xa_alloc_index(&xa0, 0, GFP_KERNEL); + + xa_erase_index(&xa0, 1); + xa_erase_index(&xa0, 2); + + for (i = 1; i < 5000; i++) { + xa_alloc_index(&xa0, i, GFP_KERNEL); + } + + xa_destroy(&xa0); + + id = 0xfffffffeU; + XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0), + GFP_KERNEL) != 0); + XA_BUG_ON(&xa0, id != 0xfffffffeU); + XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0), + GFP_KERNEL) != 0); + XA_BUG_ON(&xa0, id != 0xffffffffU); + XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0), + GFP_KERNEL) != -ENOSPC); + XA_BUG_ON(&xa0, id != 0xffffffffU); + xa_destroy(&xa0); +} + static noinline void __check_store_iter(struct xarray *xa, unsigned long start, unsigned int order, unsigned int present) { @@ -849,6 +909,7 @@ static int xarray_checks(void) check_cmpxchg(&array); check_reserve(&array); check_multi_store(&array); + check_xa_alloc(); check_find(&array); check_destroy(&array); check_move(&array); diff --git a/lib/xarray.c b/lib/xarray.c index 546461914282..9a0d49d4b5f0 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -52,6 +52,11 @@ static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type) xas_unlock(xas); } +static inline bool xa_track_free(const struct xarray *xa) +{ + return xa->xa_flags & XA_FLAGS_TRACK_FREE; +} + static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark) { if (!(xa->xa_flags & XA_FLAGS_MARK(mark))) @@ -94,6 +99,11 @@ static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark) return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE); } +static inline void node_mark_all(struct xa_node *node, xa_mark_t mark) +{ + bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE); +} + #define mark_inc(mark) do { \ mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \ } while (0) @@ -416,6 +426,8 @@ static void xas_shrink(struct xa_state *xas) xas->xa_node = XAS_BOUNDS; RCU_INIT_POINTER(xa->xa_head, entry); + if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK)) + xa_mark_clear(xa, XA_FREE_MARK); node->count = 0; node->nr_values = 0; @@ -549,8 +561,15 @@ static int xas_expand(struct xa_state *xas, void *head) /* Propagate the aggregated mark info to the new child */ for (;;) { - if (xa_marked(xa, mark)) + if (xa_track_free(xa) && mark == XA_FREE_MARK) { + node_mark_all(node, XA_FREE_MARK); + if (!xa_marked(xa, XA_FREE_MARK)) { + node_clear_mark(node, 0, XA_FREE_MARK); + xa_mark_set(xa, XA_FREE_MARK); + } + } else if (xa_marked(xa, mark)) { node_set_mark(node, 0, mark); + } if (mark == XA_MARK_MAX) break; mark_inc(mark); @@ -624,6 +643,8 @@ static void *xas_create(struct xa_state *xas) node = xas_alloc(xas, shift); if (!node) break; + if (xa_track_free(xa)) + node_mark_all(node, XA_FREE_MARK); rcu_assign_pointer(*slot, xa_mk_node(node)); } else if (xa_is_node(entry)) { node = xa_to_node(entry); @@ -882,7 +903,10 @@ void xas_init_marks(const struct xa_state *xas) xa_mark_t mark = 0; for (;;) { - xas_clear_mark(xas, mark); + if (xa_track_free(xas->xa) && mark == XA_FREE_MARK) + xas_set_mark(xas, mark); + else + xas_clear_mark(xas, mark); if (mark == XA_MARK_MAX) break; mark_inc(mark); @@ -1333,6 +1357,8 @@ void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) do { xas_lock(&xas); curr = xas_store(&xas, entry); + if (xa_track_free(xa) && entry) + xas_clear_mark(&xas, XA_FREE_MARK); xas_unlock(&xas); } while (xas_nomem(&xas, gfp)); @@ -1365,6 +1391,8 @@ void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp) do { curr = xas_store(&xas, entry); + if (xa_track_free(xa) && entry) + xas_clear_mark(&xas, XA_FREE_MARK); } while (__xas_nomem(&xas, gfp)); return xas_result(&xas, curr); @@ -1400,8 +1428,11 @@ void *xa_cmpxchg(struct xarray *xa, unsigned long index, curr = xas_load(&xas); if (curr == XA_ZERO_ENTRY) curr = NULL; - if (curr == old) + if (curr == old) { xas_store(&xas, entry); + if (xa_track_free(xa) && entry) + xas_clear_mark(&xas, XA_FREE_MARK); + } xas_unlock(&xas); } while (xas_nomem(&xas, gfp)); @@ -1438,8 +1469,11 @@ void *__xa_cmpxchg(struct xarray *xa, unsigned long index, curr = xas_load(&xas); if (curr == XA_ZERO_ENTRY) curr = NULL; - if (curr == old) + if (curr == old) { xas_store(&xas, entry); + if (xa_track_free(xa) && entry) + xas_clear_mark(&xas, XA_FREE_MARK); + } } while (__xas_nomem(&xas, gfp)); return xas_result(&xas, curr); @@ -1483,6 +1517,52 @@ int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp) } EXPORT_SYMBOL(xa_reserve); +/** + * __xa_alloc() - Find somewhere to store this entry in the XArray. + * @xa: XArray. + * @id: Pointer to ID. + * @max: Maximum ID to allocate (inclusive). + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * Allocates an unused ID in the range specified by @id and @max. + * Updates the @id pointer with the index, then stores the entry at that + * index. A concurrent lookup will not see an uninitialised @id. + * + * Context: Any context. Expects xa_lock to be held on entry. May + * release and reacquire xa_lock if @gfp flags permit. + * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if + * there is no more space in the XArray. + */ +int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, 0); + int err; + + if (WARN_ON_ONCE(xa_is_internal(entry))) + return -EINVAL; + if (WARN_ON_ONCE(!xa_track_free(xa))) + return -EINVAL; + + if (!entry) + entry = XA_ZERO_ENTRY; + + do { + xas.xa_index = *id; + xas_find_marked(&xas, max, XA_FREE_MARK); + if (xas.xa_node == XAS_RESTART) + xas_set_err(&xas, -ENOSPC); + xas_store(&xas, entry); + xas_clear_mark(&xas, XA_FREE_MARK); + } while (__xas_nomem(&xas, gfp)); + + err = xas_error(&xas); + if (!err) + *id = xas.xa_index; + return err; +} +EXPORT_SYMBOL(__xa_alloc); + /** * __xa_set_mark() - Set this mark on this entry while locked. * @xa: XArray. -- cgit v1.2.3 From f32f004cddf86d63a9c42994bbce9f4e2f07c9fa Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 4 Jul 2018 15:42:46 -0400 Subject: ida: Convert to XArray Use the XA_TRACK_FREE ability to track which entries have a free bit, similarly to how it uses the radix tree's IDR_FREE tag. This eliminates the per-cpu ida_bitmap preload, and fixes the memory consumption regression I introduced when making the IDR able to store any pointer. Signed-off-by: Matthew Wilcox --- lib/idr.c | 379 +++++++++++++++++++++++++++++-------------------------- lib/radix-tree.c | 71 ----------- 2 files changed, 203 insertions(+), 247 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 9a366b5be2c2..3c20ad9b0463 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -6,8 +6,6 @@ #include #include -DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap); - /** * idr_alloc_u32() - Allocate an ID. * @idr: IDR handle. @@ -320,6 +318,9 @@ EXPORT_SYMBOL(idr_replace); * free the individual IDs in it. You can use ida_is_empty() to find * out whether the IDA has any IDs currently allocated. * + * The IDA handles its own locking. It is safe to call any of the IDA + * functions without synchronisation in your code. + * * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward * limitation, it should be quite straightforward to raise the maximum. */ @@ -327,151 +328,197 @@ EXPORT_SYMBOL(idr_replace); /* * Developer's notes: * - * The IDA uses the functionality provided by the IDR & radix tree to store - * bitmaps in each entry. The IDR_FREE tag means there is at least one bit - * free, unlike the IDR where it means at least one entry is free. + * The IDA uses the functionality provided by the XArray to store bitmaps in + * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap + * have been set. * - * I considered telling the radix tree that each slot is an order-10 node - * and storing the bit numbers in the radix tree, but the radix tree can't - * allow a single multiorder entry at index 0, which would significantly - * increase memory consumption for the IDA. So instead we divide the index - * by the number of bits in the leaf bitmap before doing a radix tree lookup. + * I considered telling the XArray that each slot is an order-10 node + * and indexing by bit number, but the XArray can't allow a single multi-index + * entry in the head, which would significantly increase memory consumption + * for the IDA. So instead we divide the index by the number of bits in the + * leaf bitmap before doing a radix tree lookup. * * As an optimisation, if there are only a few low bits set in any given * leaf, instead of allocating a 128-byte bitmap, we store the bits - * directly in the entry. - * - * We allow the radix tree 'exceptional' count to get out of date. Nothing - * in the IDA nor the radix tree code checks it. If it becomes important - * to maintain an accurate exceptional count, switch the rcu_assign_pointer() - * calls to radix_tree_iter_replace() which will correct the exceptional - * count. - * - * The IDA always requires a lock to alloc/free. If we add a 'test_bit' + * as a value entry. Value entries never have the XA_FREE_MARK cleared + * because we can always convert them into a bitmap entry. + * + * It would be possible to optimise further; once we've run out of a + * single 128-byte bitmap, we currently switch to a 576-byte node, put + * the 128-byte bitmap in the first entry and then start allocating extra + * 128-byte entries. We could instead use the 512 bytes of the node's + * data as a bitmap before moving to that scheme. I do not believe this + * is a worthwhile optimisation; Rasmus Villemoes surveyed the current + * users of the IDA and almost none of them use more than 1024 entries. + * Those that do use more than the 8192 IDs that the 512 bytes would + * provide. + * + * The IDA always uses a lock to alloc/free. If we add a 'test_bit' * equivalent, it will still need locking. Going to RCU lookup would require * using RCU to free bitmaps, and that's not trivial without embedding an * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte * bitmap, which is excessive. */ -#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1) - -static int ida_get_new_above(struct ida *ida, int start) +/** + * ida_alloc_range() - Allocate an unused ID. + * @ida: IDA handle. + * @min: Lowest ID to allocate. + * @max: Highest ID to allocate. + * @gfp: Memory allocation flags. + * + * Allocate an ID between @min and @max, inclusive. The allocated ID will + * not exceed %INT_MAX, even if @max is larger. + * + * Context: Any context. + * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, + * or %-ENOSPC if there are no free IDs. + */ +int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, + gfp_t gfp) { - struct radix_tree_root *root = &ida->ida_rt; - void __rcu **slot; - struct radix_tree_iter iter; - struct ida_bitmap *bitmap; - unsigned long index; - unsigned bit; - int new; - - index = start / IDA_BITMAP_BITS; - bit = start % IDA_BITMAP_BITS; - - slot = radix_tree_iter_init(&iter, index); - for (;;) { - if (slot) - slot = radix_tree_next_slot(slot, &iter, - RADIX_TREE_ITER_TAGGED); - if (!slot) { - slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX); - if (IS_ERR(slot)) { - if (slot == ERR_PTR(-ENOMEM)) - return -EAGAIN; - return PTR_ERR(slot); + XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS); + unsigned bit = min % IDA_BITMAP_BITS; + unsigned long flags; + struct ida_bitmap *bitmap, *alloc = NULL; + + if ((int)min < 0) + return -ENOSPC; + + if ((int)max < 0) + max = INT_MAX; + +retry: + xas_lock_irqsave(&xas, flags); +next: + bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK); + if (xas.xa_index > min / IDA_BITMAP_BITS) + bit = 0; + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; + + if (xa_is_value(bitmap)) { + unsigned long tmp = xa_to_value(bitmap); + + if (bit < BITS_PER_XA_VALUE) { + bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit); + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; + if (bit < BITS_PER_XA_VALUE) { + tmp |= 1UL << bit; + xas_store(&xas, xa_mk_value(tmp)); + goto out; } } - if (iter.index > index) - bit = 0; - new = iter.index * IDA_BITMAP_BITS; - bitmap = rcu_dereference_raw(*slot); - if (xa_is_value(bitmap)) { - unsigned long tmp = xa_to_value(bitmap); - int vbit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, - bit); - if (vbit < BITS_PER_XA_VALUE) { - tmp |= 1UL << vbit; - rcu_assign_pointer(*slot, xa_mk_value(tmp)); - return new + vbit; - } - bitmap = this_cpu_xchg(ida_bitmap, NULL); - if (!bitmap) - return -EAGAIN; - bitmap->bitmap[0] = tmp; - rcu_assign_pointer(*slot, bitmap); + bitmap = alloc; + if (!bitmap) + bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); + if (!bitmap) + goto alloc; + bitmap->bitmap[0] = tmp; + xas_store(&xas, bitmap); + if (xas_error(&xas)) { + bitmap->bitmap[0] = 0; + goto out; } + } - if (bitmap) { - bit = find_next_zero_bit(bitmap->bitmap, - IDA_BITMAP_BITS, bit); - new += bit; - if (new < 0) - return -ENOSPC; - if (bit == IDA_BITMAP_BITS) - continue; + if (bitmap) { + bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit); + if (xas.xa_index * IDA_BITMAP_BITS + bit > max) + goto nospc; + if (bit == IDA_BITMAP_BITS) + goto next; - __set_bit(bit, bitmap->bitmap); - if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) - radix_tree_iter_tag_clear(root, &iter, - IDR_FREE); + __set_bit(bit, bitmap->bitmap); + if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) + xas_clear_mark(&xas, XA_FREE_MARK); + } else { + if (bit < BITS_PER_XA_VALUE) { + bitmap = xa_mk_value(1UL << bit); } else { - new += bit; - if (new < 0) - return -ENOSPC; - if (bit < BITS_PER_XA_VALUE) { - bitmap = xa_mk_value(1UL << bit); - } else { - bitmap = this_cpu_xchg(ida_bitmap, NULL); - if (!bitmap) - return -EAGAIN; - __set_bit(bit, bitmap->bitmap); - } - radix_tree_iter_replace(root, &iter, slot, bitmap); + bitmap = alloc; + if (!bitmap) + bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT); + if (!bitmap) + goto alloc; + __set_bit(bit, bitmap->bitmap); } - - return new; + xas_store(&xas, bitmap); + } +out: + xas_unlock_irqrestore(&xas, flags); + if (xas_nomem(&xas, gfp)) { + xas.xa_index = min / IDA_BITMAP_BITS; + bit = min % IDA_BITMAP_BITS; + goto retry; } + if (bitmap != alloc) + kfree(alloc); + if (xas_error(&xas)) + return xas_error(&xas); + return xas.xa_index * IDA_BITMAP_BITS + bit; +alloc: + xas_unlock_irqrestore(&xas, flags); + alloc = kzalloc(sizeof(*bitmap), gfp); + if (!alloc) + return -ENOMEM; + xas_set(&xas, min / IDA_BITMAP_BITS); + bit = min % IDA_BITMAP_BITS; + goto retry; +nospc: + xas_unlock_irqrestore(&xas, flags); + return -ENOSPC; } +EXPORT_SYMBOL(ida_alloc_range); -static void ida_remove(struct ida *ida, int id) +/** + * ida_free() - Release an allocated ID. + * @ida: IDA handle. + * @id: Previously allocated ID. + * + * Context: Any context. + */ +void ida_free(struct ida *ida, unsigned int id) { - unsigned long index = id / IDA_BITMAP_BITS; - unsigned offset = id % IDA_BITMAP_BITS; + XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS); + unsigned bit = id % IDA_BITMAP_BITS; struct ida_bitmap *bitmap; - unsigned long *btmp; - struct radix_tree_iter iter; - void __rcu **slot; + unsigned long flags; - slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index); - if (!slot) - goto err; + BUG_ON((int)id < 0); + + xas_lock_irqsave(&xas, flags); + bitmap = xas_load(&xas); - bitmap = rcu_dereference_raw(*slot); if (xa_is_value(bitmap)) { - btmp = (unsigned long *)slot; - offset += 1; /* Intimate knowledge of the value encoding */ - if (offset >= BITS_PER_LONG) + unsigned long v = xa_to_value(bitmap); + if (bit >= BITS_PER_XA_VALUE) + goto err; + if (!(v & (1UL << bit))) goto err; + v &= ~(1UL << bit); + if (!v) + goto delete; + xas_store(&xas, xa_mk_value(v)); } else { - btmp = bitmap->bitmap; - } - if (!test_bit(offset, btmp)) - goto err; - - __clear_bit(offset, btmp); - radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); - if (xa_is_value(bitmap)) { - if (xa_to_value(rcu_dereference_raw(*slot)) == 0) - radix_tree_iter_delete(&ida->ida_rt, &iter, slot); - } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) { - kfree(bitmap); - radix_tree_iter_delete(&ida->ida_rt, &iter, slot); + if (!test_bit(bit, bitmap->bitmap)) + goto err; + __clear_bit(bit, bitmap->bitmap); + xas_set_mark(&xas, XA_FREE_MARK); + if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) { + kfree(bitmap); +delete: + xas_store(&xas, NULL); + } } + xas_unlock_irqrestore(&xas, flags); return; err: + xas_unlock_irqrestore(&xas, flags); WARN(1, "ida_free called for id=%d which is not allocated.\n", id); } +EXPORT_SYMBOL(ida_free); /** * ida_destroy() - Free all IDs. @@ -486,80 +533,60 @@ static void ida_remove(struct ida *ida, int id) */ void ida_destroy(struct ida *ida) { + XA_STATE(xas, &ida->xa, 0); + struct ida_bitmap *bitmap; unsigned long flags; - struct radix_tree_iter iter; - void __rcu **slot; - xa_lock_irqsave(&ida->ida_rt, flags); - radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { - struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); + xas_lock_irqsave(&xas, flags); + xas_for_each(&xas, bitmap, ULONG_MAX) { if (!xa_is_value(bitmap)) kfree(bitmap); - radix_tree_iter_delete(&ida->ida_rt, &iter, slot); + xas_store(&xas, NULL); } - xa_unlock_irqrestore(&ida->ida_rt, flags); + xas_unlock_irqrestore(&xas, flags); } EXPORT_SYMBOL(ida_destroy); -/** - * ida_alloc_range() - Allocate an unused ID. - * @ida: IDA handle. - * @min: Lowest ID to allocate. - * @max: Highest ID to allocate. - * @gfp: Memory allocation flags. - * - * Allocate an ID between @min and @max, inclusive. The allocated ID will - * not exceed %INT_MAX, even if @max is larger. - * - * Context: Any context. - * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, - * or %-ENOSPC if there are no free IDs. - */ -int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max, - gfp_t gfp) -{ - int id = 0; - unsigned long flags; - - if ((int)min < 0) - return -ENOSPC; +#ifndef __KERNEL__ +extern void xa_dump_index(unsigned long index, unsigned int shift); +#define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS) - if ((int)max < 0) - max = INT_MAX; - -again: - xa_lock_irqsave(&ida->ida_rt, flags); - id = ida_get_new_above(ida, min); - if (id > (int)max) { - ida_remove(ida, id); - id = -ENOSPC; - } - xa_unlock_irqrestore(&ida->ida_rt, flags); +static void ida_dump_entry(void *entry, unsigned long index) +{ + unsigned long i; + + if (!entry) + return; + + if (xa_is_node(entry)) { + struct xa_node *node = xa_to_node(entry); + unsigned int shift = node->shift + IDA_CHUNK_SHIFT + + XA_CHUNK_SHIFT; + + xa_dump_index(index * IDA_BITMAP_BITS, shift); + xa_dump_node(node); + for (i = 0; i < XA_CHUNK_SIZE; i++) + ida_dump_entry(node->slots[i], + index | (i << node->shift)); + } else if (xa_is_value(entry)) { + xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG)); + pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry); + } else { + struct ida_bitmap *bitmap = entry; - if (unlikely(id == -EAGAIN)) { - if (!ida_pre_get(ida, gfp)) - return -ENOMEM; - goto again; + xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT); + pr_cont("bitmap: %p data", bitmap); + for (i = 0; i < IDA_BITMAP_LONGS; i++) + pr_cont(" %lx", bitmap->bitmap[i]); + pr_cont("\n"); } - - return id; } -EXPORT_SYMBOL(ida_alloc_range); -/** - * ida_free() - Release an allocated ID. - * @ida: IDA handle. - * @id: Previously allocated ID. - * - * Context: Any context. - */ -void ida_free(struct ida *ida, unsigned int id) +static void ida_dump(struct ida *ida) { - unsigned long flags; - - BUG_ON((int)id < 0); - xa_lock_irqsave(&ida->ida_rt, flags); - ida_remove(ida, id); - xa_unlock_irqrestore(&ida->ida_rt, flags); + struct xarray *xa = &ida->xa; + pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head, + xa->xa_flags >> ROOT_TAG_SHIFT); + ida_dump_entry(xa->xa_head, 0); } -EXPORT_SYMBOL(ida_free); +#endif diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3479d93c32b9..68702061464f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -255,54 +255,6 @@ static unsigned long next_index(unsigned long index, return (index & ~node_maxindex(node)) + (offset << node->shift); } -#ifndef __KERNEL__ -static void dump_ida_node(void *entry, unsigned long index) -{ - unsigned long i; - - if (!entry) - return; - - if (radix_tree_is_internal_node(entry)) { - struct radix_tree_node *node = entry_to_node(entry); - - pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n", - node, node->offset, index * IDA_BITMAP_BITS, - ((index | node_maxindex(node)) + 1) * - IDA_BITMAP_BITS - 1, - node->parent, node->tags[0][0], node->shift, - node->count); - for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) - dump_ida_node(node->slots[i], - index | (i << node->shift)); - } else if (xa_is_value(entry)) { - pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n", - entry, (int)(index & RADIX_TREE_MAP_MASK), - index * IDA_BITMAP_BITS, - index * IDA_BITMAP_BITS + BITS_PER_XA_VALUE, - xa_to_value(entry)); - } else { - struct ida_bitmap *bitmap = entry; - - pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap, - (int)(index & RADIX_TREE_MAP_MASK), - index * IDA_BITMAP_BITS, - (index + 1) * IDA_BITMAP_BITS - 1); - for (i = 0; i < IDA_BITMAP_LONGS; i++) - pr_cont(" %lx", bitmap->bitmap[i]); - pr_cont("\n"); - } -} - -static void ida_dump(struct ida *ida) -{ - struct radix_tree_root *root = &ida->ida_rt; - pr_debug("ida: %p node %p free %d\n", ida, root->xa_head, - root->xa_flags >> ROOT_TAG_SHIFT); - dump_ida_node(root->xa_head, 0); -} -#endif - /* * This assumes that the caller has performed appropriate preallocation, and * that the caller has pinned this thread of control to the current CPU. @@ -2039,27 +1991,6 @@ void idr_preload(gfp_t gfp_mask) } EXPORT_SYMBOL(idr_preload); -int ida_pre_get(struct ida *ida, gfp_t gfp) -{ - /* - * The IDA API has no preload_end() equivalent. Instead, - * ida_get_new() can return -EAGAIN, prompting the caller - * to return to the ida_pre_get() step. - */ - if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE)) - preempt_enable(); - - if (!this_cpu_read(ida_bitmap)) { - struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp); - if (!bitmap) - return 0; - if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) - kfree(bitmap); - } - - return 1; -} - void __rcu **idr_get_free(struct radix_tree_root *root, struct radix_tree_iter *iter, gfp_t gfp, unsigned long max) @@ -2201,8 +2132,6 @@ static int radix_tree_cpu_dead(unsigned int cpu) kmem_cache_free(radix_tree_node_cachep, node); rtp->nr--; } - kfree(per_cpu(ida_bitmap, cpu)); - per_cpu(ida_bitmap, cpu) = NULL; return 0; } -- cgit v1.2.3 From 74d609585d8bd6083bd9d75bc1fd2c0d3851bcc5 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 17 Nov 2017 10:01:45 -0500 Subject: page cache: Add and replace pages using the XArray Use the XArray APIs to add and replace pages in the page cache. This removes two uses of the radix tree preload API and is significantly shorter code. It also removes the last user of __radix_tree_create() outside radix-tree.c itself, so make it static. Signed-off-by: Matthew Wilcox --- lib/radix-tree.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 68702061464f..8a58051eb5b3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -700,9 +700,9 @@ static bool delete_node(struct radix_tree_root *root, * * Returns -ENOMEM, or 0 for success. */ -int __radix_tree_create(struct radix_tree_root *root, unsigned long index, - unsigned order, struct radix_tree_node **nodep, - void __rcu ***slotp) +static int __radix_tree_create(struct radix_tree_root *root, + unsigned long index, unsigned order, + struct radix_tree_node **nodep, void __rcu ***slotp) { struct radix_tree_node *node = NULL, *child; void __rcu **slot = (void __rcu **)&root->xa_head; -- cgit v1.2.3 From a97e7904c0806309fd77103005bb7820c3f1c5e4 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 24 Nov 2017 14:24:59 -0500 Subject: mm: Convert workingset to XArray We construct an XA_STATE and use it to delete the node with xas_store() rather than adding a special function for this unique use case. Includes a test that simulates this usage for the test suite. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index a752e6a37e6f..128c6489082f 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -863,6 +863,67 @@ static noinline void check_create_range(struct xarray *xa) check_create_range_3(); } +static LIST_HEAD(shadow_nodes); + +static void test_update_node(struct xa_node *node) +{ + if (node->count && node->count == node->nr_values) { + if (list_empty(&node->private_list)) + list_add(&shadow_nodes, &node->private_list); + } else { + if (!list_empty(&node->private_list)) + list_del_init(&node->private_list); + } +} + +static noinline void shadow_remove(struct xarray *xa) +{ + struct xa_node *node; + + xa_lock(xa); + while ((node = list_first_entry_or_null(&shadow_nodes, + struct xa_node, private_list))) { + XA_STATE(xas, node->array, 0); + XA_BUG_ON(xa, node->array != xa); + list_del_init(&node->private_list); + xas.xa_node = xa_parent_locked(node->array, node); + xas.xa_offset = node->offset; + xas.xa_shift = node->shift + XA_CHUNK_SHIFT; + xas_set_update(&xas, test_update_node); + xas_store(&xas, NULL); + } + xa_unlock(xa); +} + +static noinline void check_workingset(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + xas_set_update(&xas, test_update_node); + + do { + xas_lock(&xas); + xas_store(&xas, xa_mk_value(0)); + xas_next(&xas); + xas_store(&xas, xa_mk_value(1)); + xas_unlock(&xas); + } while (xas_nomem(&xas, GFP_KERNEL)); + + XA_BUG_ON(xa, list_empty(&shadow_nodes)); + + xas_lock(&xas); + xas_next(&xas); + xas_store(&xas, &xas); + XA_BUG_ON(xa, !list_empty(&shadow_nodes)); + + xas_store(&xas, xa_mk_value(2)); + xas_unlock(&xas); + XA_BUG_ON(xa, list_empty(&shadow_nodes)); + + shadow_remove(xa); + XA_BUG_ON(xa, !list_empty(&shadow_nodes)); + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -916,6 +977,10 @@ static int xarray_checks(void) check_create_range(&array); check_store_iter(&array); + check_workingset(&array, 0); + check_workingset(&array, 64); + check_workingset(&array, 4096); + printk("XArray: %u of %u tests passed\n", tests_passed, tests_run); return (tests_run == tests_passed) ? 0 : -EINVAL; } -- cgit v1.2.3 From e21a29552fa3f44ea41c53488875015ae70fd7f8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 22 Nov 2017 08:36:00 -0500 Subject: shmem: Convert find_swap_entry to XArray This is a 1:1 conversion. The major part of this patch is converting the test framework from userspace to kernel space and mirroring the algorithm now used in find_swap_entry(). Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 128c6489082f..815daffdd8c9 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -631,6 +631,61 @@ static noinline void check_find(struct xarray *xa) check_multi_find_2(xa); } +/* See find_swap_entry() in mm/shmem.c */ +static noinline unsigned long xa_find_entry(struct xarray *xa, void *item) +{ + XA_STATE(xas, xa, 0); + unsigned int checked = 0; + void *entry; + + rcu_read_lock(); + xas_for_each(&xas, entry, ULONG_MAX) { + if (xas_retry(&xas, entry)) + continue; + if (entry == item) + break; + checked++; + if ((checked % 4) != 0) + continue; + xas_pause(&xas); + } + rcu_read_unlock(); + + return entry ? xas.xa_index : -1; +} + +static noinline void check_find_entry(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + unsigned int order; + unsigned long offset, index; + + for (order = 0; order < 20; order++) { + for (offset = 0; offset < (1UL << (order + 3)); + offset += (1UL << order)) { + for (index = 0; index < (1UL << (order + 5)); + index += (1UL << order)) { + xa_store_order(xa, index, order, + xa_mk_value(index), GFP_KERNEL); + XA_BUG_ON(xa, xa_load(xa, index) != + xa_mk_value(index)); + XA_BUG_ON(xa, xa_find_entry(xa, + xa_mk_value(index)) != index); + } + XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1); + xa_destroy(xa); + } + } +#endif + + XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1); + xa_store_index(xa, ULONG_MAX, GFP_KERNEL); + XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1); + XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_value(LONG_MAX)) != -1); + xa_erase_index(xa, ULONG_MAX); + XA_BUG_ON(xa, !xa_empty(xa)); +} + static noinline void check_move_small(struct xarray *xa, unsigned long idx) { XA_STATE(xas, xa, 0); @@ -972,6 +1027,7 @@ static int xarray_checks(void) check_multi_store(&array); check_xa_alloc(); check_find(&array); + check_find_entry(&array); check_destroy(&array); check_move(&array); check_create_range(&array); -- cgit v1.2.3 From 7b8d046fba91d62beb8a8f78244aaa3c23a60cdd Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 1 Dec 2017 22:13:06 -0500 Subject: shmem: Convert shmem_alloc_hugepage to XArray xa_find() is a slightly easier API to use than radix_tree_gang_lookup_slot() because it contains its own RCU locking. This commit removes the last user of radix_tree_gang_lookup_slot() so remove the function too. Signed-off-by: Matthew Wilcox --- lib/radix-tree.c | 44 +------------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8a58051eb5b3..2f9c0e45eeb7 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1097,7 +1097,7 @@ void __radix_tree_replace(struct radix_tree_root *root, * @slot: pointer to slot * @item: new item to store in the slot. * - * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(), + * For use with radix_tree_lookup_slot() and * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked * across slot lookup and replacement. * @@ -1731,48 +1731,6 @@ radix_tree_gang_lookup(const struct radix_tree_root *root, void **results, } EXPORT_SYMBOL(radix_tree_gang_lookup); -/** - * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree - * @root: radix tree root - * @results: where the results of the lookup are placed - * @indices: where their indices should be placed (but usually NULL) - * @first_index: start the lookup from this key - * @max_items: place up to this many items at *results - * - * Performs an index-ascending scan of the tree for present items. Places - * their slots at *@results and returns the number of items which were - * placed at *@results. - * - * The implementation is naive. - * - * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must - * be dereferenced with radix_tree_deref_slot, and if using only RCU - * protection, radix_tree_deref_slot may fail requiring a retry. - */ -unsigned int -radix_tree_gang_lookup_slot(const struct radix_tree_root *root, - void __rcu ***results, unsigned long *indices, - unsigned long first_index, unsigned int max_items) -{ - struct radix_tree_iter iter; - void __rcu **slot; - unsigned int ret = 0; - - if (unlikely(!max_items)) - return 0; - - radix_tree_for_each_slot(slot, root, &iter, first_index) { - results[ret] = slot; - if (indices) - indices[ret] = iter.index; - if (++ret == max_items) - break; - } - - return ret; -} -EXPORT_SYMBOL(radix_tree_gang_lookup_slot); - /** * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree * based on a tag -- cgit v1.2.3 From 1cf56f9d670b88b2e947a7ccdb8ba32e6477915d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 9 Apr 2018 16:24:45 -0400 Subject: radix tree: Remove radix_tree_update_node_t The only user of this functionality was the workingset code, and it's now been converted to the XArray. Remove __radix_tree_delete_node() entirely as it was also only used by the workingset code. Signed-off-by: Matthew Wilcox --- lib/idr.c | 2 +- lib/radix-tree.c | 42 ++++++++---------------------------------- 2 files changed, 9 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index 3c20ad9b0463..cb1db9b8d3f6 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -297,7 +297,7 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id) if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) return ERR_PTR(-ENOENT); - __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL); + __radix_tree_replace(&idr->idr_rt, node, slot, ptr); return entry; } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 2f9c0e45eeb7..c4c252185734 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -562,8 +562,7 @@ out: * radix_tree_shrink - shrink radix tree to minimum height * @root radix tree root */ -static inline bool radix_tree_shrink(struct radix_tree_root *root, - radix_tree_update_node_t update_node) +static inline bool radix_tree_shrink(struct radix_tree_root *root) { bool shrunk = false; @@ -631,8 +630,6 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root, node->count = 0; if (!radix_tree_is_internal_node(child)) { node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; - if (update_node) - update_node(node); } WARN_ON_ONCE(!list_empty(&node->private_list)); @@ -644,8 +641,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root, } static bool delete_node(struct radix_tree_root *root, - struct radix_tree_node *node, - radix_tree_update_node_t update_node) + struct radix_tree_node *node) { bool deleted = false; @@ -655,7 +651,7 @@ static bool delete_node(struct radix_tree_root *root, if (node->count) { if (node_to_entry(node) == rcu_dereference_raw(root->xa_head)) - deleted |= radix_tree_shrink(root, update_node); + deleted |= radix_tree_shrink(root); return deleted; } @@ -1059,15 +1055,13 @@ static int calculate_count(struct radix_tree_root *root, * @node: pointer to tree node * @slot: pointer to slot in @node * @item: new item to store in the slot. - * @update_node: callback for changing leaf nodes * * For use with __radix_tree_lookup(). Caller must hold tree write locked * across slot lookup and replacement. */ void __radix_tree_replace(struct radix_tree_root *root, struct radix_tree_node *node, - void __rcu **slot, void *item, - radix_tree_update_node_t update_node) + void __rcu **slot, void *item) { void *old = rcu_dereference_raw(*slot); int values = !!xa_is_value(item) - !!xa_is_value(old); @@ -1085,10 +1079,7 @@ void __radix_tree_replace(struct radix_tree_root *root, if (!node) return; - if (update_node) - update_node(node); - - delete_node(root, node, update_node); + delete_node(root, node); } /** @@ -1110,7 +1101,7 @@ void __radix_tree_replace(struct radix_tree_root *root, void radix_tree_replace_slot(struct radix_tree_root *root, void __rcu **slot, void *item) { - __radix_tree_replace(root, NULL, slot, item, NULL); + __radix_tree_replace(root, NULL, slot, item); } EXPORT_SYMBOL(radix_tree_replace_slot); @@ -1127,7 +1118,7 @@ void radix_tree_iter_replace(struct radix_tree_root *root, const struct radix_tree_iter *iter, void __rcu **slot, void *item) { - __radix_tree_replace(root, iter->node, slot, item, NULL); + __radix_tree_replace(root, iter->node, slot, item); } #ifdef CONFIG_RADIX_TREE_MULTIORDER @@ -1807,23 +1798,6 @@ radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); -/** - * __radix_tree_delete_node - try to free node after clearing a slot - * @root: radix tree root - * @node: node containing @index - * @update_node: callback for changing leaf nodes - * - * After clearing the slot at @index in @node from radix tree - * rooted at @root, call this function to attempt freeing the - * node and shrinking the tree. - */ -void __radix_tree_delete_node(struct radix_tree_root *root, - struct radix_tree_node *node, - radix_tree_update_node_t update_node) -{ - delete_node(root, node, update_node); -} - static bool __radix_tree_delete(struct radix_tree_root *root, struct radix_tree_node *node, void __rcu **slot) { @@ -1839,7 +1813,7 @@ static bool __radix_tree_delete(struct radix_tree_root *root, node_tag_clear(root, node, tag, offset); replace_slot(slot, NULL, node, -1, values); - return node && delete_node(root, node, NULL); + return node && delete_node(root, node); } /** -- cgit v1.2.3 From 2956c6644bfd9aab9f6b21a12e1bd75876d9dd73 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 19 May 2018 16:47:47 -0400 Subject: radix tree: Remove split/join code radix_tree_split and radix_tree_join were never used upstream. Remove them; if they're needed in future they will be replaced by XArray equivalents. Signed-off-by: Matthew Wilcox --- lib/radix-tree.c | 171 +------------------------------------------------------ 1 file changed, 2 insertions(+), 169 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index c4c252185734..c52e0bef5264 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -415,28 +415,6 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) } EXPORT_SYMBOL(radix_tree_maybe_preload); -#ifdef CONFIG_RADIX_TREE_MULTIORDER -/* - * Preload with enough objects to ensure that we can split a single entry - * of order @old_order into many entries of size @new_order - */ -int radix_tree_split_preload(unsigned int old_order, unsigned int new_order, - gfp_t gfp_mask) -{ - unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT); - unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) - - (new_order / RADIX_TREE_MAP_SHIFT); - unsigned nr = 0; - - WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask)); - BUG_ON(new_order >= old_order); - - while (layers--) - nr = nr * RADIX_TREE_MAP_SIZE + 1; - return __radix_tree_preload(gfp_mask, top * nr); -} -#endif - /* * The same as function above, but preload number of nodes required to insert * (1 << order) continuous naturally-aligned elements. @@ -1111,8 +1089,8 @@ EXPORT_SYMBOL(radix_tree_replace_slot); * @slot: pointer to slot * @item: new item to store in the slot. * - * For use with radix_tree_split() and radix_tree_for_each_slot(). - * Caller must hold tree write locked across split and replacement. + * For use with radix_tree_for_each_slot(). + * Caller must hold tree write locked. */ void radix_tree_iter_replace(struct radix_tree_root *root, const struct radix_tree_iter *iter, @@ -1121,151 +1099,6 @@ void radix_tree_iter_replace(struct radix_tree_root *root, __radix_tree_replace(root, iter->node, slot, item); } -#ifdef CONFIG_RADIX_TREE_MULTIORDER -/** - * radix_tree_join - replace multiple entries with one multiorder entry - * @root: radix tree root - * @index: an index inside the new entry - * @order: order of the new entry - * @item: new entry - * - * Call this function to replace several entries with one larger entry. - * The existing entries are presumed to not need freeing as a result of - * this call. - * - * The replacement entry will have all the tags set on it that were set - * on any of the entries it is replacing. - */ -int radix_tree_join(struct radix_tree_root *root, unsigned long index, - unsigned order, void *item) -{ - struct radix_tree_node *node; - void __rcu **slot; - int error; - - BUG_ON(radix_tree_is_internal_node(item)); - - error = __radix_tree_create(root, index, order, &node, &slot); - if (!error) - error = insert_entries(node, slot, item, order, true); - if (error > 0) - error = 0; - - return error; -} - -/** - * radix_tree_split - Split an entry into smaller entries - * @root: radix tree root - * @index: An index within the large entry - * @order: Order of new entries - * - * Call this function as the first step in replacing a multiorder entry - * with several entries of lower order. After this function returns, - * loop over the relevant portion of the tree using radix_tree_for_each_slot() - * and call radix_tree_iter_replace() to set up each new entry. - * - * The tags from this entry are replicated to all the new entries. - * - * The radix tree should be locked against modification during the entire - * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which - * should prompt RCU walkers to restart the lookup from the root. - */ -int radix_tree_split(struct radix_tree_root *root, unsigned long index, - unsigned order) -{ - struct radix_tree_node *parent, *node, *child; - void __rcu **slot; - unsigned int offset, end; - unsigned n, tag, tags = 0; - gfp_t gfp = root_gfp_mask(root); - - if (!__radix_tree_lookup(root, index, &parent, &slot)) - return -ENOENT; - if (!parent) - return -ENOENT; - - offset = get_slot_offset(parent, slot); - - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) - if (tag_get(parent, tag, offset)) - tags |= 1 << tag; - - for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) { - if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end]))) - break; - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) - if (tags & (1 << tag)) - tag_set(parent, tag, end); - /* rcu_assign_pointer ensures tags are set before RETRY */ - rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY); - } - rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY); - parent->nr_values -= (end - offset); - - if (order == parent->shift) - return 0; - if (order > parent->shift) { - while (offset < end) - offset += insert_entries(parent, &parent->slots[offset], - RADIX_TREE_RETRY, order, true); - return 0; - } - - node = parent; - - for (;;) { - if (node->shift > order) { - child = radix_tree_node_alloc(gfp, node, root, - node->shift - RADIX_TREE_MAP_SHIFT, - offset, 0, 0); - if (!child) - goto nomem; - if (node != parent) { - node->count++; - rcu_assign_pointer(node->slots[offset], - node_to_entry(child)); - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) - if (tags & (1 << tag)) - tag_set(node, tag, offset); - } - - node = child; - offset = 0; - continue; - } - - n = insert_entries(node, &node->slots[offset], - RADIX_TREE_RETRY, order, false); - BUG_ON(n > RADIX_TREE_MAP_SIZE); - - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) - if (tags & (1 << tag)) - tag_set(node, tag, offset); - offset += n; - - while (offset == RADIX_TREE_MAP_SIZE) { - if (node == parent) - break; - offset = node->offset; - child = node; - node = node->parent; - rcu_assign_pointer(node->slots[offset], - node_to_entry(child)); - offset++; - } - if ((node == parent) && (offset == end)) - return 0; - } - - nomem: - /* Shouldn't happen; did user forget to preload? */ - /* TODO: free all the allocated nodes */ - WARN_ON(1); - return -ENOMEM; -} -#endif - static void node_tag_set(struct radix_tree_root *root, struct radix_tree_node *node, unsigned int tag, unsigned int offset) -- cgit v1.2.3 From 8cf2f98411e3a0865026a1061af637161b16d32b Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 19 May 2018 16:47:47 -0400 Subject: radix tree: Remove radix_tree_maybe_preload_order This function was only used by the page cache which is now converted to the XArray. Signed-off-by: Matthew Wilcox --- lib/radix-tree.c | 74 -------------------------------------------------------- 1 file changed, 74 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index c52e0bef5264..b57ddc3dbbbd 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -41,9 +41,6 @@ #include -/* Number of nodes in fully populated tree of given height */ -static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly; - /* * Radix tree node cache. */ @@ -415,51 +412,6 @@ int radix_tree_maybe_preload(gfp_t gfp_mask) } EXPORT_SYMBOL(radix_tree_maybe_preload); -/* - * The same as function above, but preload number of nodes required to insert - * (1 << order) continuous naturally-aligned elements. - */ -int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order) -{ - unsigned long nr_subtrees; - int nr_nodes, subtree_height; - - /* Preloading doesn't help anything with this gfp mask, skip it */ - if (!gfpflags_allow_blocking(gfp_mask)) { - preempt_disable(); - return 0; - } - - /* - * Calculate number and height of fully populated subtrees it takes to - * store (1 << order) elements. - */ - nr_subtrees = 1 << order; - for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE; - subtree_height++) - nr_subtrees >>= RADIX_TREE_MAP_SHIFT; - - /* - * The worst case is zero height tree with a single item at index 0 and - * then inserting items starting at ULONG_MAX - (1 << order). - * - * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to - * 0-index item. - */ - nr_nodes = RADIX_TREE_MAX_PATH; - - /* Plus branch to fully populated subtrees. */ - nr_nodes += RADIX_TREE_MAX_PATH - subtree_height; - - /* Root node is shared. */ - nr_nodes--; - - /* Plus nodes required to build subtrees. */ - nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height]; - - return __radix_tree_preload(gfp_mask, nr_nodes); -} - static unsigned radix_tree_load_root(const struct radix_tree_root *root, struct radix_tree_node **nodep, unsigned long *maxindex) { @@ -1859,31 +1811,6 @@ radix_tree_node_ctor(void *arg) INIT_LIST_HEAD(&node->private_list); } -static __init unsigned long __maxindex(unsigned int height) -{ - unsigned int width = height * RADIX_TREE_MAP_SHIFT; - int shift = RADIX_TREE_INDEX_BITS - width; - - if (shift < 0) - return ~0UL; - if (shift >= BITS_PER_LONG) - return 0UL; - return ~0UL >> shift; -} - -static __init void radix_tree_init_maxnodes(void) -{ - unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1]; - unsigned int i, j; - - for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++) - height_to_maxindex[i] = __maxindex(i); - for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) { - for (j = i; j > 0; j--) - height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1; - } -} - static int radix_tree_cpu_dead(unsigned int cpu) { struct radix_tree_preload *rtp; @@ -1911,7 +1838,6 @@ void __init radix_tree_init(void) sizeof(struct radix_tree_node), 0, SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, radix_tree_node_ctor); - radix_tree_init_maxnodes(); ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", NULL, radix_tree_cpu_dead); WARN_ON(ret < 0); -- cgit v1.2.3 From adb9d9c4ccb1ff0bf1d376e65f36aa5573c75c1a Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Mon, 9 Apr 2018 16:52:21 -0400 Subject: radix tree: Remove radix_tree_clear_tags The page cache was the only user of this interface and it has now been converted to the XArray. Transform the test into a test of xas_init_marks(). Signed-off-by: Matthew Wilcox --- lib/radix-tree.c | 13 ------------- lib/test_xarray.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index b57ddc3dbbbd..e92f2b6ae9c5 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1670,19 +1670,6 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) } EXPORT_SYMBOL(radix_tree_delete); -void radix_tree_clear_tags(struct radix_tree_root *root, - struct radix_tree_node *node, - void __rcu **slot) -{ - if (node) { - unsigned int tag, offset = get_slot_offset(node, slot); - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) - node_tag_clear(root, node, tag, offset); - } else { - root_tag_clear_all(root); - } -} - /** * radix_tree_tagged - test whether any items in the tree are tagged * @root: radix tree root diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 815daffdd8c9..ca86141641cb 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -213,12 +213,52 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) XA_BUG_ON(xa, !xa_empty(xa)); } +static noinline void check_xa_mark_2(struct xarray *xa) +{ + XA_STATE(xas, xa, 0); + unsigned long index; + unsigned int count = 0; + void *entry; + + xa_store_index(xa, 0, GFP_KERNEL); + xa_set_mark(xa, 0, XA_MARK_0); + xas_lock(&xas); + xas_load(&xas); + xas_init_marks(&xas); + xas_unlock(&xas); + XA_BUG_ON(xa, !xa_get_mark(xa, 0, XA_MARK_0) == 0); + + for (index = 3500; index < 4500; index++) { + xa_store_index(xa, index, GFP_KERNEL); + xa_set_mark(xa, index, XA_MARK_0); + } + + xas_reset(&xas); + rcu_read_lock(); + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) + count++; + rcu_read_unlock(); + XA_BUG_ON(xa, count != 1000); + + xas_lock(&xas); + xas_for_each(&xas, entry, ULONG_MAX) { + xas_init_marks(&xas); + XA_BUG_ON(xa, !xa_get_mark(xa, xas.xa_index, XA_MARK_0)); + XA_BUG_ON(xa, !xas_get_mark(&xas, XA_MARK_0)); + } + xas_unlock(&xas); + + xa_destroy(xa); +} + static noinline void check_xa_mark(struct xarray *xa) { unsigned long index; for (index = 0; index < 16384; index += 4) check_xa_mark_1(xa, index); + + check_xa_mark_2(xa); } static noinline void check_xa_shrink(struct xarray *xa) -- cgit v1.2.3 From 372266ba0267803564824b1c09f1bb7f3f3fc761 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 18 Aug 2018 07:09:22 -0400 Subject: radix tree test suite: Convert tag_tagged_items to XArray The tag_tagged_items() function is supposed to test the page-writeback tagging code. Since that has been converted to the XArray, there's not much point in testing the radix tree's tagging code. This requires using the pthread mutex embedded in the xarray instead of an external lock, so remove the pthread mutexes which protect xarrays/radix trees. Also remove radix_tree_iter_tag_set() as this was the last user. Signed-off-by: Matthew Wilcox --- lib/radix-tree.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e92f2b6ae9c5..f107dd2698e3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1108,18 +1108,6 @@ void *radix_tree_tag_set(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_tag_set); -/** - * radix_tree_iter_tag_set - set a tag on the current iterator entry - * @root: radix tree root - * @iter: iterator state - * @tag: tag to set - */ -void radix_tree_iter_tag_set(struct radix_tree_root *root, - const struct radix_tree_iter *iter, unsigned int tag) -{ - node_tag_set(root, iter->node, tag, iter_offset(iter)); -} - static void node_tag_clear(struct radix_tree_root *root, struct radix_tree_node *node, unsigned int tag, unsigned int offset) -- cgit v1.2.3 From d6427f8179b5dd65eb468c61fc8cc24657c336c9 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 28 Aug 2018 16:13:16 -0400 Subject: xarray: Move multiorder account test in-kernel Move this test to the in-kernel test suite, and enhance it to test several different orders. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index ca86141641cb..38cab4ccb24e 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1019,6 +1019,37 @@ static noinline void check_workingset(struct xarray *xa, unsigned long index) XA_BUG_ON(xa, !xa_empty(xa)); } +/* + * Check that the pointer / value / sibling entries are accounted the + * way we expect them to be. + */ +static noinline void check_account(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + unsigned int order; + + for (order = 1; order < 12; order++) { + XA_STATE(xas, xa, 1 << order); + + xa_store_order(xa, 0, order, xa, GFP_KERNEL); + xas_load(&xas); + XA_BUG_ON(xa, xas.xa_node->count == 0); + XA_BUG_ON(xa, xas.xa_node->count > (1 << order)); + XA_BUG_ON(xa, xas.xa_node->nr_values != 0); + + xa_store_order(xa, 1 << order, order, xa_mk_value(1 << order), + GFP_KERNEL); + XA_BUG_ON(xa, xas.xa_node->count != xas.xa_node->nr_values * 2); + + xa_erase(xa, 1 << order); + XA_BUG_ON(xa, xas.xa_node->nr_values != 0); + + xa_erase(xa, 0); + XA_BUG_ON(xa, !xa_empty(xa)); + } +#endif +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -1068,6 +1099,7 @@ static int xarray_checks(void) check_xa_alloc(); check_find(&array); check_find_entry(&array); + check_account(&array); check_destroy(&array); check_move(&array); check_create_range(&array); -- cgit v1.2.3 From 93eb07f72c8d86f8fe5e90907df1cc037f6ffbb7 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 8 Sep 2018 12:09:52 -0400 Subject: xarray: Move multiorder_shrink to kernel tests Test this functionality inside the kernel as well as in userspace. Also remove insert_bug() as there's no comparable thing to test in the XArray code. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 38cab4ccb24e..ff94b54a926d 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -199,9 +199,25 @@ static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index) xa_store_order(xa, index, order, xa_mk_value(index), GFP_KERNEL); for (i = base; i < next; i++) { + XA_STATE(xas, xa, i); + unsigned int seen = 0; + void *entry; + XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0)); XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1)); XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2)); + + /* We should see two elements in the array */ + xas_for_each(&xas, entry, ULONG_MAX) + seen++; + XA_BUG_ON(xa, seen != 2); + + /* One of which is marked */ + xas_set(&xas, 0); + seen = 0; + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) + seen++; + XA_BUG_ON(xa, seen != 1); } XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0)); XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1)); @@ -265,6 +281,8 @@ static noinline void check_xa_shrink(struct xarray *xa) { XA_STATE(xas, xa, 1); struct xa_node *node; + unsigned int order; + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 15 : 1; XA_BUG_ON(xa, !xa_empty(xa)); XA_BUG_ON(xa, xa_store_index(xa, 0, GFP_KERNEL) != NULL); @@ -287,6 +305,25 @@ static noinline void check_xa_shrink(struct xarray *xa) XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0)); xa_erase_index(xa, 0); XA_BUG_ON(xa, !xa_empty(xa)); + + for (order = 0; order < max_order; order++) { + unsigned long max = (1UL << order) - 1; + xa_store_order(xa, 0, order, xa_mk_value(0), GFP_KERNEL); + XA_BUG_ON(xa, xa_load(xa, max) != xa_mk_value(0)); + XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL); + rcu_read_lock(); + node = xa_head(xa); + rcu_read_unlock(); + XA_BUG_ON(xa, xa_store_index(xa, ULONG_MAX, GFP_KERNEL) != + NULL); + rcu_read_lock(); + XA_BUG_ON(xa, xa_head(xa) == node); + rcu_read_unlock(); + XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL); + xa_erase_index(xa, ULONG_MAX); + XA_BUG_ON(xa, xa->xa_head != node); + xa_erase_index(xa, 0); + } } static noinline void check_cmpxchg(struct xarray *xa) -- cgit v1.2.3 From 4f06d6302da682157890f72c0573e12a73536814 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sun, 9 Sep 2018 01:52:17 -0400 Subject: xarray: Move multiorder_check to in-kernel tests This version is a little less thorough in order to be a little quicker, but tests the important edge cases. Also test adding a multiorder entry at a non-canonical index, and erasing it. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index ff94b54a926d..0f06a93b4d0e 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -422,6 +422,43 @@ static noinline void check_xas_erase(struct xarray *xa) } } +#ifdef CONFIG_XARRAY_MULTI +static noinline void check_multi_store_1(struct xarray *xa, unsigned long index, + unsigned int order) +{ + XA_STATE(xas, xa, index); + unsigned long min = index & ~((1UL << order) - 1); + unsigned long max = min + (1UL << order); + + xa_store_order(xa, index, order, xa_mk_value(index), GFP_KERNEL); + XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(index)); + XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(index)); + XA_BUG_ON(xa, xa_load(xa, max) != NULL); + XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL); + + XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(min)) != xa_mk_value(index)); + XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(min)); + XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(min)); + XA_BUG_ON(xa, xa_load(xa, max) != NULL); + XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL); + + xa_erase_index(xa, min); + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static noinline void check_multi_store_2(struct xarray *xa, unsigned long index, + unsigned int order) +{ + XA_STATE(xas, xa, index); + xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL); + + XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(1)) != xa_mk_value(0)); + XA_BUG_ON(xa, xas.xa_index != index); + XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1)); + XA_BUG_ON(xa, !xa_empty(xa)); +} +#endif + static noinline void check_multi_store(struct xarray *xa) { #ifdef CONFIG_XARRAY_MULTI @@ -487,6 +524,13 @@ static noinline void check_multi_store(struct xarray *xa) XA_BUG_ON(xa, !xa_empty(xa)); } } + + for (i = 0; i < 20; i++) { + check_multi_store_1(xa, 200, i); + check_multi_store_1(xa, 0, i); + check_multi_store_1(xa, (1UL << i) + 1, i); + } + check_multi_store_2(xa, 4095, 9); #endif } -- cgit v1.2.3 From 0e9446c35a80931044b6d8d2d74a9cabd248539f Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Wed, 15 Aug 2018 14:13:29 -0400 Subject: xarray: Add range store functionality This version of xa_store_range() really only supports load and store. Our only user only needs basic load and store functionality, so there's no need to do the extra work to support marking and overlapping stores correctly yet. Signed-off-by: Matthew Wilcox --- lib/test_xarray.c | 34 +++++++++++++++++++ lib/xarray.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 129 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 0f06a93b4d0e..aa47754150ce 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1039,6 +1039,39 @@ static noinline void check_create_range(struct xarray *xa) check_create_range_3(); } +static noinline void __check_store_range(struct xarray *xa, unsigned long first, + unsigned long last) +{ +#ifdef CONFIG_XARRAY_MULTI + xa_store_range(xa, first, last, xa_mk_value(first), GFP_KERNEL); + + XA_BUG_ON(xa, xa_load(xa, first) != xa_mk_value(first)); + XA_BUG_ON(xa, xa_load(xa, last) != xa_mk_value(first)); + XA_BUG_ON(xa, xa_load(xa, first - 1) != NULL); + XA_BUG_ON(xa, xa_load(xa, last + 1) != NULL); + + xa_store_range(xa, first, last, NULL, GFP_KERNEL); +#endif + + XA_BUG_ON(xa, !xa_empty(xa)); +} + +static noinline void check_store_range(struct xarray *xa) +{ + unsigned long i, j; + + for (i = 0; i < 128; i++) { + for (j = i; j < 128; j++) { + __check_store_range(xa, i, j); + __check_store_range(xa, 128 + i, 128 + j); + __check_store_range(xa, 4095 + i, 4095 + j); + __check_store_range(xa, 4096 + i, 4096 + j); + __check_store_range(xa, 123456 + i, 123456 + j); + __check_store_range(xa, UINT_MAX + i, UINT_MAX + j); + } + } +} + static LIST_HEAD(shadow_nodes); static void test_update_node(struct xa_node *node) @@ -1184,6 +1217,7 @@ static int xarray_checks(void) check_destroy(&array); check_move(&array); check_create_range(&array); + check_store_range(&array); check_store_iter(&array); check_workingset(&array, 0); diff --git a/lib/xarray.c b/lib/xarray.c index 9a0d49d4b5f0..8b176f009c08 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -376,6 +376,14 @@ static void *xas_alloc(struct xa_state *xas, unsigned int shift) return node; } +#ifdef CONFIG_XARRAY_MULTI +/* Returns the number of indices covered by a given xa_state */ +static unsigned long xas_size(const struct xa_state *xas) +{ + return (xas->xa_sibs + 1UL) << xas->xa_shift; +} +#endif + /* * Use this to calculate the maximum index that will need to be created * in order to add the entry described by @xas. Because we cannot store a @@ -388,8 +396,7 @@ static unsigned long xas_max(struct xa_state *xas) #ifdef CONFIG_XARRAY_MULTI if (xas->xa_shift || xas->xa_sibs) { - unsigned long mask; - mask = (((xas->xa_sibs + 1UL) << xas->xa_shift) - 1); + unsigned long mask = xas_size(xas) - 1; max |= mask; if (mask == max) max++; @@ -1517,6 +1524,92 @@ int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp) } EXPORT_SYMBOL(xa_reserve); +#ifdef CONFIG_XARRAY_MULTI +static void xas_set_range(struct xa_state *xas, unsigned long first, + unsigned long last) +{ + unsigned int shift = 0; + unsigned long sibs = last - first; + unsigned int offset = XA_CHUNK_MASK; + + xas_set(xas, first); + + while ((first & XA_CHUNK_MASK) == 0) { + if (sibs < XA_CHUNK_MASK) + break; + if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK)) + break; + shift += XA_CHUNK_SHIFT; + if (offset == XA_CHUNK_MASK) + offset = sibs & XA_CHUNK_MASK; + sibs >>= XA_CHUNK_SHIFT; + first >>= XA_CHUNK_SHIFT; + } + + offset = first & XA_CHUNK_MASK; + if (offset + sibs > XA_CHUNK_MASK) + sibs = XA_CHUNK_MASK - offset; + if ((((first + sibs + 1) << shift) - 1) > last) + sibs -= 1; + + xas->xa_shift = shift; + xas->xa_sibs = sibs; +} + +/** + * xa_store_range() - Store this entry at a range of indices in the XArray. + * @xa: XArray. + * @first: First index to affect. + * @last: Last index to affect. + * @entry: New entry. + * @gfp: Memory allocation flags. + * + * After this function returns, loads from any index between @first and @last, + * inclusive will return @entry. + * Storing into an existing multislot entry updates the entry of every index. + * The marks associated with @index are unaffected unless @entry is %NULL. + * + * Context: Process context. Takes and releases the xa_lock. May sleep + * if the @gfp flags permit. + * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in + * an XArray, or xa_err(-ENOMEM) if memory allocation failed. + */ +void *xa_store_range(struct xarray *xa, unsigned long first, + unsigned long last, void *entry, gfp_t gfp) +{ + XA_STATE(xas, xa, 0); + + if (WARN_ON_ONCE(xa_is_internal(entry))) + return XA_ERROR(-EINVAL); + if (last < first) + return XA_ERROR(-EINVAL); + + do { + xas_lock(&xas); + if (entry) { + unsigned int order = (last == ~0UL) ? 64 : + ilog2(last + 1); + xas_set_order(&xas, last, order); + xas_create(&xas); + if (xas_error(&xas)) + goto unlock; + } + do { + xas_set_range(&xas, first, last); + xas_store(&xas, entry); + if (xas_error(&xas)) + goto unlock; + first += xas_size(&xas); + } while (first <= last); +unlock: + xas_unlock(&xas); + } while (xas_nomem(&xas, gfp)); + + return xas_result(&xas, NULL); +} +EXPORT_SYMBOL(xa_store_range); +#endif /* CONFIG_XARRAY_MULTI */ + /** * __xa_alloc() - Find somewhere to store this entry in the XArray. * @xa: XArray. -- cgit v1.2.3 From 3a08cd52c37c793ffc199f6fc2ecfc368e284b2d Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Sat, 22 Sep 2018 16:14:30 -0400 Subject: radix tree: Remove multiorder support All users have now been converted to the XArray. Removing the support reduces code size and ensures new users will use the XArray instead. Signed-off-by: Matthew Wilcox --- lib/Kconfig | 4 -- lib/radix-tree.c | 215 ++++--------------------------------------------------- 2 files changed, 13 insertions(+), 206 deletions(-) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 40bfa6ccd294..a9965f4af4dd 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -405,10 +405,6 @@ config XARRAY_MULTI Support entries which occupy multiple consecutive indices in the XArray. -config RADIX_TREE_MULTIORDER - bool - select XARRAY_MULTI - config ASSOCIATIVE_ARRAY bool help diff --git a/lib/radix-tree.c b/lib/radix-tree.c index f107dd2698e3..1106bb6aa01e 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -110,11 +110,6 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent, unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); - if (xa_is_sibling(entry)) { - offset = xa_to_sibling(entry); - entry = rcu_dereference_raw(parent->slots[offset]); - } - *nodep = (void *)entry; return offset; } @@ -229,7 +224,7 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag, static unsigned int iter_offset(const struct radix_tree_iter *iter) { - return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; + return iter->index & RADIX_TREE_MAP_MASK; } /* @@ -506,16 +501,13 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root) /* * The candidate node has more than one child, or its child - * is not at the leftmost slot, or the child is a multiorder - * entry, we cannot shrink. + * is not at the leftmost slot, we cannot shrink. */ if (node->count != 1) break; child = rcu_dereference_raw(node->slots[0]); if (!child) break; - if (!radix_tree_is_internal_node(child) && node->shift) - break; /* * For an IDR, we must not shrink entry 0 into the root in @@ -613,7 +605,6 @@ static bool delete_node(struct radix_tree_root *root, * __radix_tree_create - create a slot in a radix tree * @root: radix tree root * @index: index key - * @order: index occupies 2^order aligned slots * @nodep: returns node * @slotp: returns slot * @@ -627,21 +618,19 @@ static bool delete_node(struct radix_tree_root *root, * Returns -ENOMEM, or 0 for success. */ static int __radix_tree_create(struct radix_tree_root *root, - unsigned long index, unsigned order, - struct radix_tree_node **nodep, void __rcu ***slotp) + unsigned long index, struct radix_tree_node **nodep, + void __rcu ***slotp) { struct radix_tree_node *node = NULL, *child; void __rcu **slot = (void __rcu **)&root->xa_head; unsigned long maxindex; unsigned int shift, offset = 0; - unsigned long max = index | ((1UL << order) - 1); + unsigned long max = index; gfp_t gfp = root_gfp_mask(root); shift = radix_tree_load_root(root, &child, &maxindex); /* Make sure the tree is high enough. */ - if (order > 0 && max == ((1UL << order) - 1)) - max++; if (max > maxindex) { int error = radix_tree_extend(root, gfp, max, shift); if (error < 0) @@ -650,7 +639,7 @@ static int __radix_tree_create(struct radix_tree_root *root, child = rcu_dereference_raw(root->xa_head); } - while (shift > order) { + while (shift > 0) { shift -= RADIX_TREE_MAP_SHIFT; if (child == NULL) { /* Have to add a child node. */ @@ -711,70 +700,8 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) } } -#ifdef CONFIG_RADIX_TREE_MULTIORDER -static inline int insert_entries(struct radix_tree_node *node, - void __rcu **slot, void *item, unsigned order, bool replace) -{ - void *sibling; - unsigned i, n, tag, offset, tags = 0; - - if (node) { - if (order > node->shift) - n = 1 << (order - node->shift); - else - n = 1; - offset = get_slot_offset(node, slot); - } else { - n = 1; - offset = 0; - } - - if (n > 1) { - offset = offset & ~(n - 1); - slot = &node->slots[offset]; - } - sibling = xa_mk_sibling(offset); - - for (i = 0; i < n; i++) { - if (slot[i]) { - if (replace) { - node->count--; - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) - if (tag_get(node, tag, offset + i)) - tags |= 1 << tag; - } else - return -EEXIST; - } - } - - for (i = 0; i < n; i++) { - struct radix_tree_node *old = rcu_dereference_raw(slot[i]); - if (i) { - rcu_assign_pointer(slot[i], sibling); - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) - if (tags & (1 << tag)) - tag_clear(node, tag, offset + i); - } else { - rcu_assign_pointer(slot[i], item); - for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) - if (tags & (1 << tag)) - tag_set(node, tag, offset); - } - if (xa_is_node(old)) - radix_tree_free_nodes(old); - if (xa_is_value(old)) - node->nr_values--; - } - if (node) { - node->count += n; - if (xa_is_value(item)) - node->nr_values += n; - } - return n; -} -#else static inline int insert_entries(struct radix_tree_node *node, - void __rcu **slot, void *item, unsigned order, bool replace) + void __rcu **slot, void *item, bool replace) { if (*slot) return -EEXIST; @@ -786,19 +713,17 @@ static inline int insert_entries(struct radix_tree_node *node, } return 1; } -#endif /** * __radix_tree_insert - insert into a radix tree * @root: radix tree root * @index: index key - * @order: key covers the 2^order indices around index * @item: item to insert * * Insert an item into the radix tree at position @index. */ -int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, - unsigned order, void *item) +int radix_tree_insert(struct radix_tree_root *root, unsigned long index, + void *item) { struct radix_tree_node *node; void __rcu **slot; @@ -806,11 +731,11 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, BUG_ON(radix_tree_is_internal_node(item)); - error = __radix_tree_create(root, index, order, &node, &slot); + error = __radix_tree_create(root, index, &node, &slot); if (error) return error; - error = insert_entries(node, slot, item, order, false); + error = insert_entries(node, slot, item, false); if (error < 0) return error; @@ -825,7 +750,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, return 0; } -EXPORT_SYMBOL(__radix_tree_insert); +EXPORT_SYMBOL(radix_tree_insert); /** * __radix_tree_lookup - lookup an item in a radix tree @@ -917,32 +842,12 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index) } EXPORT_SYMBOL(radix_tree_lookup); -static inline void replace_sibling_entries(struct radix_tree_node *node, - void __rcu **slot, int count, int values) -{ -#ifdef CONFIG_RADIX_TREE_MULTIORDER - unsigned offset = get_slot_offset(node, slot); - void *ptr = xa_mk_sibling(offset); - - while (++offset < RADIX_TREE_MAP_SIZE) { - if (rcu_dereference_raw(node->slots[offset]) != ptr) - break; - if (count < 0) { - node->slots[offset] = NULL; - node->count--; - } - node->nr_values += values; - } -#endif -} - static void replace_slot(void __rcu **slot, void *item, struct radix_tree_node *node, int count, int values) { if (node && (count || values)) { node->count += count; node->nr_values += values; - replace_sibling_entries(node, slot, count, values); } rcu_assign_pointer(*slot, item); @@ -1223,14 +1128,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_tag_get); -static inline void __set_iter_shift(struct radix_tree_iter *iter, - unsigned int shift) -{ -#ifdef CONFIG_RADIX_TREE_MULTIORDER - iter->shift = shift; -#endif -} - /* Construct iter->tags bit-mask from node->tags[tag] array */ static void set_iter_tags(struct radix_tree_iter *iter, struct radix_tree_node *node, unsigned offset, @@ -1257,92 +1154,11 @@ static void set_iter_tags(struct radix_tree_iter *iter, } } -#ifdef CONFIG_RADIX_TREE_MULTIORDER -static void __rcu **skip_siblings(struct radix_tree_node **nodep, - void __rcu **slot, struct radix_tree_iter *iter) -{ - while (iter->index < iter->next_index) { - *nodep = rcu_dereference_raw(*slot); - if (*nodep && !xa_is_sibling(*nodep)) - return slot; - slot++; - iter->index = __radix_tree_iter_add(iter, 1); - iter->tags >>= 1; - } - - *nodep = NULL; - return NULL; -} - -void __rcu **__radix_tree_next_slot(void __rcu **slot, - struct radix_tree_iter *iter, unsigned flags) -{ - unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; - struct radix_tree_node *node; - - slot = skip_siblings(&node, slot, iter); - - while (radix_tree_is_internal_node(node)) { - unsigned offset; - unsigned long next_index; - - if (node == RADIX_TREE_RETRY) - return slot; - node = entry_to_node(node); - iter->node = node; - iter->shift = node->shift; - - if (flags & RADIX_TREE_ITER_TAGGED) { - offset = radix_tree_find_next_bit(node, tag, 0); - if (offset == RADIX_TREE_MAP_SIZE) - return NULL; - slot = &node->slots[offset]; - iter->index = __radix_tree_iter_add(iter, offset); - set_iter_tags(iter, node, offset, tag); - node = rcu_dereference_raw(*slot); - } else { - offset = 0; - slot = &node->slots[0]; - for (;;) { - node = rcu_dereference_raw(*slot); - if (node) - break; - slot++; - offset++; - if (offset == RADIX_TREE_MAP_SIZE) - return NULL; - } - iter->index = __radix_tree_iter_add(iter, offset); - } - if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0)) - goto none; - next_index = (iter->index | shift_maxindex(iter->shift)) + 1; - if (next_index < iter->next_index) - iter->next_index = next_index; - } - - return slot; - none: - iter->next_index = 0; - return NULL; -} -EXPORT_SYMBOL(__radix_tree_next_slot); -#else -static void __rcu **skip_siblings(struct radix_tree_node **nodep, - void __rcu **slot, struct radix_tree_iter *iter) -{ - return slot; -} -#endif - void __rcu **radix_tree_iter_resume(void __rcu **slot, struct radix_tree_iter *iter) { - struct radix_tree_node *node; - slot++; iter->index = __radix_tree_iter_add(iter, 1); - skip_siblings(&node, slot, iter); iter->next_index = iter->index; iter->tags = 0; return NULL; @@ -1393,7 +1209,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, iter->next_index = maxindex + 1; iter->tags = 1; iter->node = NULL; - __set_iter_shift(iter, 0); return (void __rcu **)&root->xa_head; } @@ -1414,8 +1229,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, while (++offset < RADIX_TREE_MAP_SIZE) { void *slot = rcu_dereference_raw( node->slots[offset]); - if (xa_is_sibling(slot)) - continue; if (slot) break; } @@ -1436,10 +1249,9 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root, } while (node->shift && radix_tree_is_internal_node(child)); /* Update the iterator state */ - iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); + iter->index = (index &~ node_maxindex(node)) | offset; iter->next_index = (index | node_maxindex(node)) + 1; iter->node = node; - __set_iter_shift(iter, node->shift); if (flags & RADIX_TREE_ITER_TAGGED) set_iter_tags(iter, node, offset, tag); @@ -1750,7 +1562,6 @@ void __rcu **idr_get_free(struct radix_tree_root *root, else iter->next_index = 1; iter->node = node; - __set_iter_shift(iter, shift); set_iter_tags(iter, node, offset, IDR_FREE); return slot; -- cgit v1.2.3