aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/Makefile3
-rw-r--r--lib/idr.c401
-rw-r--r--lib/radix-tree.c834
-rw-r--r--lib/test_xarray.c1238
-rw-r--r--lib/xarray.c2036
7 files changed, 3578 insertions, 942 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index d82f20609939..d1573a16aa92 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -399,8 +399,11 @@ config INTERVAL_TREE
for more information.
-config RADIX_TREE_MULTIORDER
+config XARRAY_MULTI
bool
+ help
+ Support entries which occupy multiple consecutive indices in the
+ XArray.
config ASSOCIATIVE_ARRAY
bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 04adfc3b185e..e0ba05e6f6bd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1813,6 +1813,9 @@ config TEST_BITFIELD
config TEST_UUID
tristate "Test functions located in the uuid module at runtime"
+config TEST_XARRAY
+ tristate "Test the XArray code at runtime"
+
config TEST_OVERFLOW
tristate "Test check_*_overflow() functions at runtime"
diff --git a/lib/Makefile b/lib/Makefile
index fa3eb1b4c0e3..3d341f59f756 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
- rbtree.o radix-tree.o timerqueue.o\
+ rbtree.o radix-tree.o timerqueue.o xarray.o \
idr.o int_sqrt.o extable.o \
sha1.o chacha20.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
@@ -68,6 +68,7 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
obj-$(CONFIG_TEST_PARMAN) += test_parman.o
obj-$(CONFIG_TEST_KMOD) += test_kmod.o
obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
diff --git a/lib/idr.c b/lib/idr.c
index fab2fd5bc326..cb1db9b8d3f6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,8 +6,6 @@
#include <linux/spinlock.h>
#include <linux/xarray.h>
-DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-
/**
* idr_alloc_u32() - Allocate an ID.
* @idr: IDR handle.
@@ -39,10 +37,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
unsigned int base = idr->idr_base;
unsigned int id = *nextid;
- if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
- return -EINVAL;
- if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
- idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
+ if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
+ idr->idr_rt.xa_flags |= IDR_RT_MARKER;
id = (id < base) ? 0 : id - base;
radix_tree_iter_init(&iter, id);
@@ -295,15 +291,13 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
void __rcu **slot = NULL;
void *entry;
- if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
- return ERR_PTR(-EINVAL);
id -= idr->idr_base;
entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
return ERR_PTR(-ENOENT);
- __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL);
+ __radix_tree_replace(&idr->idr_rt, node, slot, ptr);
return entry;
}
@@ -324,6 +318,9 @@ EXPORT_SYMBOL(idr_replace);
* free the individual IDs in it. You can use ida_is_empty() to find
* out whether the IDA has any IDs currently allocated.
*
+ * The IDA handles its own locking. It is safe to call any of the IDA
+ * functions without synchronisation in your code.
+ *
* IDs are currently limited to the range [0-INT_MAX]. If this is an awkward
* limitation, it should be quite straightforward to raise the maximum.
*/
@@ -331,161 +328,197 @@ EXPORT_SYMBOL(idr_replace);
/*
* Developer's notes:
*
- * The IDA uses the functionality provided by the IDR & radix tree to store
- * bitmaps in each entry. The IDR_FREE tag means there is at least one bit
- * free, unlike the IDR where it means at least one entry is free.
+ * The IDA uses the functionality provided by the XArray to store bitmaps in
+ * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap
+ * have been set.
*
- * I considered telling the radix tree that each slot is an order-10 node
- * and storing the bit numbers in the radix tree, but the radix tree can't
- * allow a single multiorder entry at index 0, which would significantly
- * increase memory consumption for the IDA. So instead we divide the index
- * by the number of bits in the leaf bitmap before doing a radix tree lookup.
+ * I considered telling the XArray that each slot is an order-10 node
+ * and indexing by bit number, but the XArray can't allow a single multi-index
+ * entry in the head, which would significantly increase memory consumption
+ * for the IDA. So instead we divide the index by the number of bits in the
+ * leaf bitmap before doing a radix tree lookup.
*
* As an optimisation, if there are only a few low bits set in any given
- * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
- * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
- * directly in the entry. By being really tricksy, we could store
- * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
- * for 0-3 allocated IDs.
- *
- * We allow the radix tree 'exceptional' count to get out of date. Nothing
- * in the IDA nor the radix tree code checks it. If it becomes important
- * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
- * calls to radix_tree_iter_replace() which will correct the exceptional
- * count.
- *
- * The IDA always requires a lock to alloc/free. If we add a 'test_bit'
+ * leaf, instead of allocating a 128-byte bitmap, we store the bits
+ * as a value entry. Value entries never have the XA_FREE_MARK cleared
+ * because we can always convert them into a bitmap entry.
+ *
+ * It would be possible to optimise further; once we've run out of a
+ * single 128-byte bitmap, we currently switch to a 576-byte node, put
+ * the 128-byte bitmap in the first entry and then start allocating extra
+ * 128-byte entries. We could instead use the 512 bytes of the node's
+ * data as a bitmap before moving to that scheme. I do not believe this
+ * is a worthwhile optimisation; Rasmus Villemoes surveyed the current
+ * users of the IDA and almost none of them use more than 1024 entries.
+ * Those that do use more than the 8192 IDs that the 512 bytes would
+ * provide.
+ *
+ * The IDA always uses a lock to alloc/free. If we add a 'test_bit'
* equivalent, it will still need locking. Going to RCU lookup would require
* using RCU to free bitmaps, and that's not trivial without embedding an
* RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
* bitmap, which is excessive.
*/
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
-
-static int ida_get_new_above(struct ida *ida, int start)
+/**
+ * ida_alloc_range() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between @min and @max, inclusive. The allocated ID will
+ * not exceed %INT_MAX, even if @max is larger.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
+ gfp_t gfp)
{
- struct radix_tree_root *root = &ida->ida_rt;
- void __rcu **slot;
- struct radix_tree_iter iter;
- struct ida_bitmap *bitmap;
- unsigned long index;
- unsigned bit, ebit;
- int new;
-
- index = start / IDA_BITMAP_BITS;
- bit = start % IDA_BITMAP_BITS;
- ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
-
- slot = radix_tree_iter_init(&iter, index);
- for (;;) {
- if (slot)
- slot = radix_tree_next_slot(slot, &iter,
- RADIX_TREE_ITER_TAGGED);
- if (!slot) {
- slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
- if (IS_ERR(slot)) {
- if (slot == ERR_PTR(-ENOMEM))
- return -EAGAIN;
- return PTR_ERR(slot);
+ XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
+ unsigned bit = min % IDA_BITMAP_BITS;
+ unsigned long flags;
+ struct ida_bitmap *bitmap, *alloc = NULL;
+
+ if ((int)min < 0)
+ return -ENOSPC;
+
+ if ((int)max < 0)
+ max = INT_MAX;
+
+retry:
+ xas_lock_irqsave(&xas, flags);
+next:
+ bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
+ if (xas.xa_index > min / IDA_BITMAP_BITS)
+ bit = 0;
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+
+ if (xa_is_value(bitmap)) {
+ unsigned long tmp = xa_to_value(bitmap);
+
+ if (bit < BITS_PER_XA_VALUE) {
+ bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+ if (bit < BITS_PER_XA_VALUE) {
+ tmp |= 1UL << bit;
+ xas_store(&xas, xa_mk_value(tmp));
+ goto out;
}
}
- if (iter.index > index) {
- bit = 0;
- ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
- }
- new = iter.index * IDA_BITMAP_BITS;
- bitmap = rcu_dereference_raw(*slot);
- if (radix_tree_exception(bitmap)) {
- unsigned long tmp = (unsigned long)bitmap;
- ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
- if (ebit < BITS_PER_LONG) {
- tmp |= 1UL << ebit;
- rcu_assign_pointer(*slot, (void *)tmp);
- return new + ebit -
- RADIX_TREE_EXCEPTIONAL_SHIFT;
- }
- bitmap = this_cpu_xchg(ida_bitmap, NULL);
- if (!bitmap)
- return -EAGAIN;
- bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
- rcu_assign_pointer(*slot, bitmap);
+ bitmap = alloc;
+ if (!bitmap)
+ bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+ if (!bitmap)
+ goto alloc;
+ bitmap->bitmap[0] = tmp;
+ xas_store(&xas, bitmap);
+ if (xas_error(&xas)) {
+ bitmap->bitmap[0] = 0;
+ goto out;
}
+ }
- if (bitmap) {
- bit = find_next_zero_bit(bitmap->bitmap,
- IDA_BITMAP_BITS, bit);
- new += bit;
- if (new < 0)
- return -ENOSPC;
- if (bit == IDA_BITMAP_BITS)
- continue;
+ if (bitmap) {
+ bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+ if (bit == IDA_BITMAP_BITS)
+ goto next;
- __set_bit(bit, bitmap->bitmap);
- if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
- radix_tree_iter_tag_clear(root, &iter,
- IDR_FREE);
+ __set_bit(bit, bitmap->bitmap);
+ if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ } else {
+ if (bit < BITS_PER_XA_VALUE) {
+ bitmap = xa_mk_value(1UL << bit);
} else {
- new += bit;
- if (new < 0)
- return -ENOSPC;
- if (ebit < BITS_PER_LONG) {
- bitmap = (void *)((1UL << ebit) |
- RADIX_TREE_EXCEPTIONAL_ENTRY);
- radix_tree_iter_replace(root, &iter, slot,
- bitmap);
- return new;
- }
- bitmap = this_cpu_xchg(ida_bitmap, NULL);
+ bitmap = alloc;
if (!bitmap)
- return -EAGAIN;
+ bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+ if (!bitmap)
+ goto alloc;
__set_bit(bit, bitmap->bitmap);
- radix_tree_iter_replace(root, &iter, slot, bitmap);
}
-
- return new;
+ xas_store(&xas, bitmap);
+ }
+out:
+ xas_unlock_irqrestore(&xas, flags);
+ if (xas_nomem(&xas, gfp)) {
+ xas.xa_index = min / IDA_BITMAP_BITS;
+ bit = min % IDA_BITMAP_BITS;
+ goto retry;
}
+ if (bitmap != alloc)
+ kfree(alloc);
+ if (xas_error(&xas))
+ return xas_error(&xas);
+ return xas.xa_index * IDA_BITMAP_BITS + bit;
+alloc:
+ xas_unlock_irqrestore(&xas, flags);
+ alloc = kzalloc(sizeof(*bitmap), gfp);
+ if (!alloc)
+ return -ENOMEM;
+ xas_set(&xas, min / IDA_BITMAP_BITS);
+ bit = min % IDA_BITMAP_BITS;
+ goto retry;
+nospc:
+ xas_unlock_irqrestore(&xas, flags);
+ return -ENOSPC;
}
+EXPORT_SYMBOL(ida_alloc_range);
-static void ida_remove(struct ida *ida, int id)
+/**
+ * ida_free() - Release an allocated ID.
+ * @ida: IDA handle.
+ * @id: Previously allocated ID.
+ *
+ * Context: Any context.
+ */
+void ida_free(struct ida *ida, unsigned int id)
{
- unsigned long index = id / IDA_BITMAP_BITS;
- unsigned offset = id % IDA_BITMAP_BITS;
+ XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
+ unsigned bit = id % IDA_BITMAP_BITS;
struct ida_bitmap *bitmap;
- unsigned long *btmp;
- struct radix_tree_iter iter;
- void __rcu **slot;
+ unsigned long flags;
- slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
- if (!slot)
- goto err;
+ BUG_ON((int)id < 0);
+
+ xas_lock_irqsave(&xas, flags);
+ bitmap = xas_load(&xas);
- bitmap = rcu_dereference_raw(*slot);
- if (radix_tree_exception(bitmap)) {
- btmp = (unsigned long *)slot;
- offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
- if (offset >= BITS_PER_LONG)
+ if (xa_is_value(bitmap)) {
+ unsigned long v = xa_to_value(bitmap);
+ if (bit >= BITS_PER_XA_VALUE)
goto err;
+ if (!(v & (1UL << bit)))
+ goto err;
+ v &= ~(1UL << bit);
+ if (!v)
+ goto delete;
+ xas_store(&xas, xa_mk_value(v));
} else {
- btmp = bitmap->bitmap;
- }
- if (!test_bit(offset, btmp))
- goto err;
-
- __clear_bit(offset, btmp);
- radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
- if (radix_tree_exception(bitmap)) {
- if (rcu_dereference_raw(*slot) ==
- (void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
- } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
- kfree(bitmap);
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+ if (!test_bit(bit, bitmap->bitmap))
+ goto err;
+ __clear_bit(bit, bitmap->bitmap);
+ xas_set_mark(&xas, XA_FREE_MARK);
+ if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+ kfree(bitmap);
+delete:
+ xas_store(&xas, NULL);
+ }
}
+ xas_unlock_irqrestore(&xas, flags);
return;
err:
+ xas_unlock_irqrestore(&xas, flags);
WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
}
+EXPORT_SYMBOL(ida_free);
/**
* ida_destroy() - Free all IDs.
@@ -500,80 +533,60 @@ static void ida_remove(struct ida *ida, int id)
*/
void ida_destroy(struct ida *ida)
{
+ XA_STATE(xas, &ida->xa, 0);
+ struct ida_bitmap *bitmap;
unsigned long flags;
- struct radix_tree_iter iter;
- void __rcu **slot;
- xa_lock_irqsave(&ida->ida_rt, flags);
- radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
- struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
- if (!radix_tree_exception(bitmap))
+ xas_lock_irqsave(&xas, flags);
+ xas_for_each(&xas, bitmap, ULONG_MAX) {
+ if (!xa_is_value(bitmap))
kfree(bitmap);
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+ xas_store(&xas, NULL);
}
- xa_unlock_irqrestore(&ida->ida_rt, flags);
+ xas_unlock_irqrestore(&xas, flags);
}
EXPORT_SYMBOL(ida_destroy);
-/**
- * ida_alloc_range() - Allocate an unused ID.
- * @ida: IDA handle.
- * @min: Lowest ID to allocate.
- * @max: Highest ID to allocate.
- * @gfp: Memory allocation flags.
- *
- * Allocate an ID between @min and @max, inclusive. The allocated ID will
- * not exceed %INT_MAX, even if @max is larger.
- *
- * Context: Any context.
- * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
- * or %-ENOSPC if there are no free IDs.
- */
-int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
- gfp_t gfp)
-{
- int id = 0;
- unsigned long flags;
+#ifndef __KERNEL__
+extern void xa_dump_index(unsigned long index, unsigned int shift);
+#define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS)
- if ((int)min < 0)
- return -ENOSPC;
-
- if ((int)max < 0)
- max = INT_MAX;
-
-again:
- xa_lock_irqsave(&ida->ida_rt, flags);
- id = ida_get_new_above(ida, min);
- if (id > (int)max) {
- ida_remove(ida, id);
- id = -ENOSPC;
- }
- xa_unlock_irqrestore(&ida->ida_rt, flags);
+static void ida_dump_entry(void *entry, unsigned long index)
+{
+ unsigned long i;
+
+ if (!entry)
+ return;
+
+ if (xa_is_node(entry)) {
+ struct xa_node *node = xa_to_node(entry);
+ unsigned int shift = node->shift + IDA_CHUNK_SHIFT +
+ XA_CHUNK_SHIFT;
+
+ xa_dump_index(index * IDA_BITMAP_BITS, shift);
+ xa_dump_node(node);
+ for (i = 0; i < XA_CHUNK_SIZE; i++)
+ ida_dump_entry(node->slots[i],
+ index | (i << node->shift));
+ } else if (xa_is_value(entry)) {
+ xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG));
+ pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry);
+ } else {
+ struct ida_bitmap *bitmap = entry;
- if (unlikely(id == -EAGAIN)) {
- if (!ida_pre_get(ida, gfp))
- return -ENOMEM;
- goto again;
+ xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT);
+ pr_cont("bitmap: %p data", bitmap);
+ for (i = 0; i < IDA_BITMAP_LONGS; i++)
+ pr_cont(" %lx", bitmap->bitmap[i]);
+ pr_cont("\n");
}
-
- return id;
}
-EXPORT_SYMBOL(ida_alloc_range);
-/**
- * ida_free() - Release an allocated ID.
- * @ida: IDA handle.
- * @id: Previously allocated ID.
- *
- * Context: Any context.
- */
-void ida_free(struct ida *ida, unsigned int id)
+static void ida_dump(struct ida *ida)
{
- unsigned long flags;
-
- BUG_ON((int)id < 0);
- xa_lock_irqsave(&ida->ida_rt, flags);
- ida_remove(ida, id);
- xa_unlock_irqrestore(&ida->ida_rt, flags);
+ struct xarray *xa = &ida->xa;
+ pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head,
+ xa->xa_flags >> ROOT_TAG_SHIFT);
+ ida_dump_entry(xa->xa_head, 0);
}
-EXPORT_SYMBOL(ida_free);
+#endif
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index bc03ecc4dfd2..1106bb6aa01e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,15 +38,13 @@
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/xarray.h>
-/* Number of nodes in fully populated tree of given height */
-static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
-
/*
* Radix tree node cache.
*/
-static struct kmem_cache *radix_tree_node_cachep;
+struct kmem_cache *radix_tree_node_cachep;
/*
* The radix tree is variable-height, so an insert operation not only has
@@ -98,24 +96,7 @@ static inline void *node_to_entry(void *ptr)
return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
}
-#define RADIX_TREE_RETRY node_to_entry(NULL)
-
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/* Sibling slots point directly to another slot in the same node */
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
- void __rcu **ptr = node;
- return (parent->slots <= ptr) &&
- (ptr < parent->slots + RADIX_TREE_MAP_SIZE);
-}
-#else
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
- return false;
-}
-#endif
+#define RADIX_TREE_RETRY XA_RETRY_ENTRY
static inline unsigned long
get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
@@ -129,24 +110,13 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
- if (radix_tree_is_internal_node(entry)) {
- if (is_sibling_entry(parent, entry)) {
- void __rcu **sibentry;
- sibentry = (void __rcu **) entry_to_node(entry);
- offset = get_slot_offset(parent, sibentry);
- entry = rcu_dereference_raw(*sibentry);
- }
- }
-#endif
-
*nodep = (void *)entry;
return offset;
}
static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
{
- return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
+ return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
}
static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -169,32 +139,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
{
- root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
+ root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
}
static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
{
- root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
+ root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
}
static inline void root_tag_clear_all(struct radix_tree_root *root)
{
- root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
+ root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
}
static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
{
- return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
+ return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
}
static inline unsigned root_tags_get(const struct radix_tree_root *root)
{
- return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
+ return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
}
static inline bool is_idr(const struct radix_tree_root *root)
{
- return !!(root->gfp_mask & ROOT_IS_IDR);
+ return !!(root->xa_flags & ROOT_IS_IDR);
}
/*
@@ -254,7 +224,7 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
static unsigned int iter_offset(const struct radix_tree_iter *iter)
{
- return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
+ return iter->index & RADIX_TREE_MAP_MASK;
}
/*
@@ -277,99 +247,6 @@ static unsigned long next_index(unsigned long index,
return (index & ~node_maxindex(node)) + (offset << node->shift);
}
-#ifndef __KERNEL__
-static void dump_node(struct radix_tree_node *node, unsigned long index)
-{
- unsigned long i;
-
- pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n",
- node, node->offset, index, index | node_maxindex(node),
- node->parent,
- node->tags[0][0], node->tags[1][0], node->tags[2][0],
- node->shift, node->count, node->exceptional);
-
- for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
- unsigned long first = index | (i << node->shift);
- unsigned long last = first | ((1UL << node->shift) - 1);
- void *entry = node->slots[i];
- if (!entry)
- continue;
- if (entry == RADIX_TREE_RETRY) {
- pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n",
- i, first, last, node);
- } else if (!radix_tree_is_internal_node(entry)) {
- pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
- entry, i, first, last, node);
- } else if (is_sibling_entry(node, entry)) {
- pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
- entry, i, first, last, node,
- *(void **)entry_to_node(entry));
- } else {
- dump_node(entry_to_node(entry), first);
- }
- }
-}
-
-/* For debug */
-static void radix_tree_dump(struct radix_tree_root *root)
-{
- pr_debug("radix root: %p rnode %p tags %x\n",
- root, root->rnode,
- root->gfp_mask >> ROOT_TAG_SHIFT);
- if (!radix_tree_is_internal_node(root->rnode))
- return;
- dump_node(entry_to_node(root->rnode), 0);
-}
-
-static void dump_ida_node(void *entry, unsigned long index)
-{
- unsigned long i;
-
- if (!entry)
- return;
-
- if (radix_tree_is_internal_node(entry)) {
- struct radix_tree_node *node = entry_to_node(entry);
-
- pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
- node, node->offset, index * IDA_BITMAP_BITS,
- ((index | node_maxindex(node)) + 1) *
- IDA_BITMAP_BITS - 1,
- node->parent, node->tags[0][0], node->shift,
- node->count);
- for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
- dump_ida_node(node->slots[i],
- index | (i << node->shift));
- } else if (radix_tree_exceptional_entry(entry)) {
- pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
- entry, (int)(index & RADIX_TREE_MAP_MASK),
- index * IDA_BITMAP_BITS,
- index * IDA_BITMAP_BITS + BITS_PER_LONG -
- RADIX_TREE_EXCEPTIONAL_SHIFT,
- (unsigned long)entry >>
- RADIX_TREE_EXCEPTIONAL_SHIFT);
- } else {
- struct ida_bitmap *bitmap = entry;
-
- pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
- (int)(index & RADIX_TREE_MAP_MASK),
- index * IDA_BITMAP_BITS,
- (index + 1) * IDA_BITMAP_BITS - 1);
- for (i = 0; i < IDA_BITMAP_LONGS; i++)
- pr_cont(" %lx", bitmap->bitmap[i]);
- pr_cont("\n");
- }
-}
-
-static void ida_dump(struct ida *ida)
-{
- struct radix_tree_root *root = &ida->ida_rt;
- pr_debug("ida: %p node %p free %d\n", ida, root->rnode,
- root->gfp_mask >> ROOT_TAG_SHIFT);
- dump_ida_node(root->rnode, 0);
-}
-#endif
-
/*
* This assumes that the caller has performed appropriate preallocation, and
* that the caller has pinned this thread of control to the current CPU.
@@ -378,7 +255,7 @@ static struct radix_tree_node *
radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
struct radix_tree_root *root,
unsigned int shift, unsigned int offset,
- unsigned int count, unsigned int exceptional)
+ unsigned int count, unsigned int nr_values)
{
struct radix_tree_node *ret = NULL;
@@ -425,14 +302,14 @@ out:
ret->shift = shift;
ret->offset = offset;
ret->count = count;
- ret->exceptional = exceptional;
+ ret->nr_values = nr_values;
ret->parent = parent;
- ret->root = root;
+ ret->array = root;
}
return ret;
}
-static void radix_tree_node_rcu_free(struct rcu_head *head)
+void radix_tree_node_rcu_free(struct rcu_head *head)
{
struct radix_tree_node *node =
container_of(head, struct radix_tree_node, rcu_head);
@@ -530,77 +407,10 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
}
EXPORT_SYMBOL(radix_tree_maybe_preload);
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/*
- * Preload with enough objects to ensure that we can split a single entry
- * of order @old_order into many entries of size @new_order
- */
-int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
- gfp_t gfp_mask)
-{
- unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
- unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
- (new_order / RADIX_TREE_MAP_SHIFT);
- unsigned nr = 0;
-
- WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
- BUG_ON(new_order >= old_order);
-
- while (layers--)
- nr = nr * RADIX_TREE_MAP_SIZE + 1;
- return __radix_tree_preload(gfp_mask, top * nr);
-}
-#endif
-
-/*
- * The same as function above, but preload number of nodes required to insert
- * (1 << order) continuous naturally-aligned elements.
- */
-int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
-{
- unsigned long nr_subtrees;
- int nr_nodes, subtree_height;
-
- /* Preloading doesn't help anything with this gfp mask, skip it */
- if (!gfpflags_allow_blocking(gfp_mask)) {
- preempt_disable();
- return 0;
- }
-
- /*
- * Calculate number and height of fully populated subtrees it takes to
- * store (1 << order) elements.
- */
- nr_subtrees = 1 << order;
- for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
- subtree_height++)
- nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
-
- /*
- * The worst case is zero height tree with a single item at index 0 and
- * then inserting items starting at ULONG_MAX - (1 << order).
- *
- * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
- * 0-index item.
- */
- nr_nodes = RADIX_TREE_MAX_PATH;
-
- /* Plus branch to fully populated subtrees. */
- nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
-
- /* Root node is shared. */
- nr_nodes--;
-
- /* Plus nodes required to build subtrees. */
- nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
-
- return __radix_tree_preload(gfp_mask, nr_nodes);
-}
-
static unsigned radix_tree_load_root(const struct radix_tree_root *root,
struct radix_tree_node **nodep, unsigned long *maxindex)
{
- struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+ struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
*nodep = node;
@@ -629,7 +439,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
while (index > shift_maxindex(maxshift))
maxshift += RADIX_TREE_MAP_SHIFT;
- entry = rcu_dereference_raw(root->rnode);
+ entry = rcu_dereference_raw(root->xa_head);
if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
goto out;
@@ -656,9 +466,9 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
BUG_ON(shift > BITS_PER_LONG);
if (radix_tree_is_internal_node(entry)) {
entry_to_node(entry)->parent = node;
- } else if (radix_tree_exceptional_entry(entry)) {
- /* Moving an exceptional root->rnode to a node */
- node->exceptional = 1;
+ } else if (xa_is_value(entry)) {
+ /* Moving a value entry root->xa_head to a node */
+ node->nr_values = 1;
}
/*
* entry was already in the radix tree, so we do not need
@@ -666,7 +476,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
*/
node->slots[0] = (void __rcu *)entry;
entry = node_to_entry(node);
- rcu_assign_pointer(root->rnode, entry);
+ rcu_assign_pointer(root->xa_head, entry);
shift += RADIX_TREE_MAP_SHIFT;
} while (shift <= maxshift);
out:
@@ -677,13 +487,12 @@ out:
* radix_tree_shrink - shrink radix tree to minimum height
* @root radix tree root
*/
-static inline bool radix_tree_shrink(struct radix_tree_root *root,
- radix_tree_update_node_t update_node)
+static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
bool shrunk = false;
for (;;) {
- struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+ struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
struct radix_tree_node *child;
if (!radix_tree_is_internal_node(node))
@@ -692,15 +501,20 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
/*
* The candidate node has more than one child, or its child
- * is not at the leftmost slot, or the child is a multiorder
- * entry, we cannot shrink.
+ * is not at the leftmost slot, we cannot shrink.
*/
if (node->count != 1)
break;
child = rcu_dereference_raw(node->slots[0]);
if (!child)
break;
- if (!radix_tree_is_internal_node(child) && node->shift)
+
+ /*
+ * For an IDR, we must not shrink entry 0 into the root in
+ * case somebody calls idr_replace() with a pointer that
+ * appears to be an internal entry
+ */
+ if (!node->shift && is_idr(root))
break;
if (radix_tree_is_internal_node(child))
@@ -711,9 +525,9 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
* moving the node from one part of the tree to another: if it
* was safe to dereference the old pointer to it
* (node->slots[0]), it will be safe to dereference the new
- * one (root->rnode) as far as dependent read barriers go.
+ * one (root->xa_head) as far as dependent read barriers go.
*/
- root->rnode = (void __rcu *)child;
+ root->xa_head = (void __rcu *)child;
if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
root_tag_clear(root, IDR_FREE);
@@ -738,8 +552,6 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
node->count = 0;
if (!radix_tree_is_internal_node(child)) {
node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
- if (update_node)
- update_node(node);
}
WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -751,8 +563,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
}
static bool delete_node(struct radix_tree_root *root,
- struct radix_tree_node *node,
- radix_tree_update_node_t update_node)
+ struct radix_tree_node *node)
{
bool deleted = false;
@@ -761,9 +572,8 @@ static bool delete_node(struct radix_tree_root *root,
if (node->count) {
if (node_to_entry(node) ==
- rcu_dereference_raw(root->rnode))
- deleted |= radix_tree_shrink(root,
- update_node);
+ rcu_dereference_raw(root->xa_head))
+ deleted |= radix_tree_shrink(root);
return deleted;
}
@@ -778,7 +588,7 @@ static bool delete_node(struct radix_tree_root *root,
*/
if (!is_idr(root))
root_tag_clear_all(root);
- root->rnode = NULL;
+ root->xa_head = NULL;
}
WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -795,7 +605,6 @@ static bool delete_node(struct radix_tree_root *root,
* __radix_tree_create - create a slot in a radix tree
* @root: radix tree root
* @index: index key
- * @order: index occupies 2^order aligned slots
* @nodep: returns node
* @slotp: returns slot
*
@@ -803,36 +612,34 @@ static bool delete_node(struct radix_tree_root *root,
* at position @index in the radix tree @root.
*
* Until there is more than one item in the tree, no nodes are
- * allocated and @root->rnode is used as a direct slot instead of
+ * allocated and @root->xa_head is used as a direct slot instead of
* pointing to a node, in which case *@nodep will be NULL.
*
* Returns -ENOMEM, or 0 for success.
*/
-int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
- unsigned order, struct radix_tree_node **nodep,
- void __rcu ***slotp)
+static int __radix_tree_create(struct radix_tree_root *root,
+ unsigned long index, struct radix_tree_node **nodep,
+ void __rcu ***slotp)
{
struct radix_tree_node *node = NULL, *child;
- void __rcu **slot = (void __rcu **)&root->rnode;
+ void __rcu **slot = (void __rcu **)&root->xa_head;
unsigned long maxindex;
unsigned int shift, offset = 0;
- unsigned long max = index | ((1UL << order) - 1);
+ unsigned long max = index;
gfp_t gfp = root_gfp_mask(root);
shift = radix_tree_load_root(root, &child, &maxindex);
/* Make sure the tree is high enough. */
- if (order > 0 && max == ((1UL << order) - 1))
- max++;
if (max > maxindex) {
int error = radix_tree_extend(root, gfp, max, shift);
if (error < 0)
return error;
shift = error;
- child = rcu_dereference_raw(root->rnode);
+ child = rcu_dereference_raw(root->xa_head);
}
- while (shift > order) {
+ while (shift > 0) {
shift -= RADIX_TREE_MAP_SHIFT;
if (child == NULL) {
/* Have to add a child node. */
@@ -875,8 +682,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
for (;;) {
void *entry = rcu_dereference_raw(child->slots[offset]);
- if (radix_tree_is_internal_node(entry) &&
- !is_sibling_entry(child, entry)) {
+ if (xa_is_node(entry) && child->shift) {
child = entry_to_node(entry);
offset = 0;
continue;
@@ -894,96 +700,30 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
}
}
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
static inline int insert_entries(struct radix_tree_node *node,
- void __rcu **slot, void *item, unsigned order, bool replace)
-{
- struct radix_tree_node *child;
- unsigned i, n, tag, offset, tags = 0;
-
- if (node) {
- if (order > node->shift)
- n = 1 << (order - node->shift);
- else
- n = 1;
- offset = get_slot_offset(node, slot);
- } else {
- n = 1;
- offset = 0;
- }
-
- if (n > 1) {
- offset = offset & ~(n - 1);
- slot = &node->slots[offset];
- }
- child = node_to_entry(slot);
-
- for (i = 0; i < n; i++) {
- if (slot[i]) {
- if (replace) {
- node->count--;
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tag_get(node, tag, offset + i))
- tags |= 1 << tag;
- } else
- return -EEXIST;
- }
- }
-
- for (i = 0; i < n; i++) {
- struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
- if (i) {
- rcu_assign_pointer(slot[i], child);
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_clear(node, tag, offset + i);
- } else {
- rcu_assign_pointer(slot[i], item);
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_set(node, tag, offset);
- }
- if (radix_tree_is_internal_node(old) &&
- !is_sibling_entry(node, old) &&
- (old != RADIX_TREE_RETRY))
- radix_tree_free_nodes(old);
- if (radix_tree_exceptional_entry(old))
- node->exceptional--;
- }
- if (node) {
- node->count += n;
- if (radix_tree_exceptional_entry(item))
- node->exceptional += n;
- }
- return n;
-}
-#else
-static inline int insert_entries(struct radix_tree_node *node,
- void __rcu **slot, void *item, unsigned order, bool replace)
+ void __rcu **slot, void *item, bool replace)
{
if (*slot)
return -EEXIST;
rcu_assign_pointer(*slot, item);
if (node) {
node->count++;
- if (radix_tree_exceptional_entry(item))
- node->exceptional++;
+ if (xa_is_value(item))
+ node->nr_values++;
}
return 1;
}
-#endif
/**
* __radix_tree_insert - insert into a radix tree
* @root: radix tree root
* @index: index key
- * @order: key covers the 2^order indices around index
* @item: item to insert
*
* Insert an item into the radix tree at position @index.
*/
-int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
- unsigned order, void *item)
+int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
+ void *item)
{
struct radix_tree_node *node;
void __rcu **slot;
@@ -991,11 +731,11 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
BUG_ON(radix_tree_is_internal_node(item));
- error = __radix_tree_create(root, index, order, &node, &slot);
+ error = __radix_tree_create(root, index, &node, &slot);
if (error)
return error;
- error = insert_entries(node, slot, item, order, false);
+ error = insert_entries(node, slot, item, false);
if (error < 0)
return error;
@@ -1010,7 +750,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
return 0;
}
-EXPORT_SYMBOL(__radix_tree_insert);
+EXPORT_SYMBOL(radix_tree_insert);
/**
* __radix_tree_lookup - lookup an item in a radix tree
@@ -1023,7 +763,7 @@ EXPORT_SYMBOL(__radix_tree_insert);
* tree @root.
*
* Until there is more than one item in the tree, no nodes are
- * allocated and @root->rnode is used as a direct slot instead of
+ * allocated and @root->xa_head is used as a direct slot instead of
* pointing to a node, in which case *@nodep will be NULL.
*/
void *__radix_tree_lookup(const struct radix_tree_root *root,
@@ -1036,7 +776,7 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
restart:
parent = NULL;
- slot = (void __rcu **)&root->rnode;
+ slot = (void __rcu **)&root->xa_head;
radix_tree_load_root(root, &node, &maxindex);
if (index > maxindex)
return NULL;
@@ -1049,6 +789,8 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
parent = entry_to_node(node);
offset = radix_tree_descend(parent, &node, index);
slot = parent->slots + offset;
+ if (parent->shift == 0)
+ break;
}
if (nodep)
@@ -1100,36 +842,12 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
}
EXPORT_SYMBOL(radix_tree_lookup);
-static inline void replace_sibling_entries(struct radix_tree_node *node,
- void __rcu **slot, int count, int exceptional)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
- void *ptr = node_to_entry(slot);
- unsigned offset = get_slot_offset(node, slot) + 1;
-
- while (offset < RADIX_TREE_MAP_SIZE) {
- if (rcu_dereference_raw(node->slots[offset]) != ptr)
- break;
- if (count < 0) {
- node->slots[offset] = NULL;
- node->count--;
- }
- node->exceptional += exceptional;
- offset++;
- }
-#endif
-}
-
static void replace_slot(void __rcu **slot, void *item,
- struct radix_tree_node *node, int count, int exceptional)
+ struct radix_tree_node *node, int count, int values)
{
- if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))
- return;
-
- if (node && (count || exceptional)) {
+ if (node && (count || values)) {
node->count += count;
- node->exceptional += exceptional;
- replace_sibling_entries(node, slot, count, exceptional);
+ node->nr_values += values;
}
rcu_assign_pointer(*slot, item);
@@ -1172,37 +890,31 @@ static int calculate_count(struct radix_tree_root *root,
* @node: pointer to tree node
* @slot: pointer to slot in @node
* @item: new item to store in the slot.
- * @update_node: callback for changing leaf nodes
*
* For use with __radix_tree_lookup(). Caller must hold tree write locked
* across slot lookup and replacement.
*/
void __radix_tree_replace(struct radix_tree_root *root,
struct radix_tree_node *node,
- void __rcu **slot, void *item,
- radix_tree_update_node_t update_node)
+ void __rcu **slot, void *item)
{
void *old = rcu_dereference_raw(*slot);
- int exceptional = !!radix_tree_exceptional_entry(item) -
- !!radix_tree_exceptional_entry(old);
+ int values = !!xa_is_value(item) - !!xa_is_value(old);
int count = calculate_count(root, node, slot, item, old);
/*
- * This function supports replacing exceptional entries and
+ * This function supports replacing value entries and
* deleting entries, but that needs accounting against the
- * node unless the slot is root->rnode.
+ * node unless the slot is root->xa_head.
*/
- WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) &&
- (count || exceptional));
- replace_slot(slot, item, node, count, exceptional);
+ WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
+ (count || values));
+ replace_slot(slot, item, node, count, values);
if (!node)
return;
- if (update_node)
- update_node(node);
-
- delete_node(root, node, update_node);
+ delete_node(root, node);
}
/**
@@ -1211,12 +923,12 @@ void __radix_tree_replace(struct radix_tree_root *root,
* @slot: pointer to slot
* @item: new item to store in the slot.
*
- * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
+ * For use with radix_tree_lookup_slot() and
* radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
* across slot lookup and replacement.
*
* NOTE: This cannot be used to switch between non-entries (empty slots),
- * regular entries, and exceptional entries, as that requires accounting
+ * regular entries, and value entries, as that requires accounting
* inside the radix tree node. When switching from one type of entry or
* deleting, use __radix_tree_lookup() and __radix_tree_replace() or
* radix_tree_iter_replace().
@@ -1224,7 +936,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
void radix_tree_replace_slot(struct radix_tree_root *root,
void __rcu **slot, void *item)
{
- __radix_tree_replace(root, NULL, slot, item, NULL);
+ __radix_tree_replace(root, NULL, slot, item);
}
EXPORT_SYMBOL(radix_tree_replace_slot);
@@ -1234,162 +946,16 @@ EXPORT_SYMBOL(radix_tree_replace_slot);
* @slot: pointer to slot
* @item: new item to store in the slot.
*
- * For use with radix_tree_split() and radix_tree_for_each_slot().
- * Caller must hold tree write locked across split and replacement.
+ * For use with radix_tree_for_each_slot().
+ * Caller must hold tree write locked.
*/
void radix_tree_iter_replace(struct radix_tree_root *root,
const struct radix_tree_iter *iter,
void __rcu **slot, void *item)
{
- __radix_tree_replace(root, iter->node, slot, item, NULL);
+ __radix_tree_replace(root, iter->node, slot, item);
}
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/**
- * radix_tree_join - replace multiple entries with one multiorder entry
- * @root: radix tree root
- * @index: an index inside the new entry
- * @order: order of the new entry
- * @item: new entry
- *
- * Call this function to replace several entries with one larger entry.
- * The existing entries are presumed to not need freeing as a result of
- * this call.
- *
- * The replacement entry will have all the tags set on it that were set
- * on any of the entries it is replacing.
- */
-int radix_tree_join(struct radix_tree_root *root, unsigned long index,
- unsigned order, void *item)
-{
- struct radix_tree_node *node;
- void __rcu **slot;
- int error;
-
- BUG_ON(radix_tree_is_internal_node(item));
-
- error = __radix_tree_create(root, index, order, &node, &slot);
- if (!error)
- error = insert_entries(node, slot, item, order, true);
- if (error > 0)
- error = 0;
-
- return error;
-}
-
-/**
- * radix_tree_split - Split an entry into smaller entries
- * @root: radix tree root
- * @index: An index within the large entry
- * @order: Order of new entries
- *
- * Call this function as the first step in replacing a multiorder entry
- * with several entries of lower order. After this function returns,
- * loop over the relevant portion of the tree using radix_tree_for_each_slot()
- * and call radix_tree_iter_replace() to set up each new entry.
- *
- * The tags from this entry are replicated to all the new entries.
- *
- * The radix tree should be locked against modification during the entire
- * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which
- * should prompt RCU walkers to restart the lookup from the root.
- */
-int radix_tree_split(struct radix_tree_root *root, unsigned long index,
- unsigned order)
-{
- struct radix_tree_node *parent, *node, *child;
- void __rcu **slot;
- unsigned int offset, end;
- unsigned n, tag, tags = 0;
- gfp_t gfp = root_gfp_mask(root);
-
- if (!__radix_tree_lookup(root, index, &parent, &slot))
- return -ENOENT;
- if (!parent)
- return -ENOENT;
-
- offset = get_slot_offset(parent, slot);
-
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tag_get(parent, tag, offset))
- tags |= 1 << tag;
-
- for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
- if (!is_sibling_entry(parent,
- rcu_dereference_raw(parent->slots[end])))
- break;
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_set(parent, tag, end);
- /* rcu_assign_pointer ensures tags are set before RETRY */
- rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
- }
- rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
- parent->exceptional -= (end - offset);
-
- if (order == parent->shift)
- return 0;
- if (order > parent->shift) {
- while (offset < end)
- offset += insert_entries(parent, &parent->slots[offset],
- RADIX_TREE_RETRY, order, true);
- return 0;
- }
-
- node = parent;
-
- for (;;) {
- if (node->shift > order) {
- child = radix_tree_node_alloc(gfp, node, root,
- node->shift - RADIX_TREE_MAP_SHIFT,
- offset, 0, 0);
- if (!child)
- goto nomem;
- if (node != parent) {
- node->count++;
- rcu_assign_pointer(node->slots[offset],
- node_to_entry(child));
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_set(node, tag, offset);
- }
-
- node = child;
- offset = 0;
- continue;
- }
-
- n = insert_entries(node, &node->slots[offset],
- RADIX_TREE_RETRY, order, false);
- BUG_ON(n > RADIX_TREE_MAP_SIZE);
-
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_set(node, tag, offset);
- offset += n;
-
- while (offset == RADIX_TREE_MAP_SIZE) {
- if (node == parent)
- break;
- offset = node->offset;
- child = node;
- node = node->parent;
- rcu_assign_pointer(node->slots[offset],
- node_to_entry(child));
- offset++;
- }
- if ((node == parent) && (offset == end))
- return 0;
- }
-
- nomem:
- /* Shouldn't happen; did user forget to preload? */
- /* TODO: free all the allocated nodes */
- WARN_ON(1);
- return -ENOMEM;
-}
-#endif
-
static void node_tag_set(struct radix_tree_root *root,
struct radix_tree_node *node,
unsigned int tag, unsigned int offset)
@@ -1447,18 +1013,6 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
}
EXPORT_SYMBOL(radix_tree_tag_set);
-/**
- * radix_tree_iter_tag_set - set a tag on the current iterator entry
- * @root: radix tree root
- * @iter: iterator state
- * @tag: tag to set
- */
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
- const struct radix_tree_iter *iter, unsigned int tag)
-{
- node_tag_set(root, iter->node, tag, iter_offset(iter));
-}
-
static void node_tag_clear(struct radix_tree_root *root,
struct radix_tree_node *node,
unsigned int tag, unsigned int offset)
@@ -1574,14 +1128,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
}
EXPORT_SYMBOL(radix_tree_tag_get);
-static inline void __set_iter_shift(struct radix_tree_iter *iter,
- unsigned int shift)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
- iter->shift = shift;
-#endif
-}
-
/* Construct iter->tags bit-mask from node->tags[tag] array */
static void set_iter_tags(struct radix_tree_iter *iter,
struct radix_tree_node *node, unsigned offset,
@@ -1608,92 +1154,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
}
}
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-static void __rcu **skip_siblings(struct radix_tree_node **nodep,
- void __rcu **slot, struct radix_tree_iter *iter)
-{
- while (iter->index < iter->next_index) {
- *nodep = rcu_dereference_raw(*slot);
- if (*nodep && !is_sibling_entry(iter->node, *nodep))
- return slot;
- slot++;
- iter->index = __radix_tree_iter_add(iter, 1);
- iter->tags >>= 1;
- }
-
- *nodep = NULL;
- return NULL;
-}
-
-void __rcu **__radix_tree_next_slot(void __rcu **slot,
- struct radix_tree_iter *iter, unsigned flags)
-{
- unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
- struct radix_tree_node *node;
-
- slot = skip_siblings(&node, slot, iter);
-
- while (radix_tree_is_internal_node(node)) {
- unsigned offset;
- unsigned long next_index;
-
- if (node == RADIX_TREE_RETRY)
- return slot;
- node = entry_to_node(node);
- iter->node = node;
- iter->shift = node->shift;
-
- if (flags & RADIX_TREE_ITER_TAGGED) {
- offset = radix_tree_find_next_bit(node, tag, 0);
- if (offset == RADIX_TREE_MAP_SIZE)
- return NULL;
- slot = &node->slots[offset];
- iter->index = __radix_tree_iter_add(iter, offset);
- set_iter_tags(iter, node, offset, tag);
- node = rcu_dereference_raw(*slot);
- } else {
- offset = 0;
- slot = &node->slots[0];
- for (;;) {
- node = rcu_dereference_raw(*slot);
- if (node)
- break;
- slot++;
- offset++;
- if (offset == RADIX_TREE_MAP_SIZE)
- return NULL;
- }
- iter->index = __radix_tree_iter_add(iter, offset);
- }
- if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
- goto none;
- next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
- if (next_index < iter->next_index)
- iter->next_index = next_index;
- }
-
- return slot;
- none:
- iter->next_index = 0;
- return NULL;
-}
-EXPORT_SYMBOL(__radix_tree_next_slot);
-#else
-static void __rcu **skip_siblings(struct radix_tree_node **nodep,
- void __rcu **slot, struct radix_tree_iter *iter)
-{
- return slot;
-}
-#endif
-
void __rcu **radix_tree_iter_resume(void __rcu **slot,
struct radix_tree_iter *iter)
{
- struct radix_tree_node *node;
-
slot++;
iter->index = __radix_tree_iter_add(iter, 1);
- skip_siblings(&node, slot, iter);
iter->next_index = iter->index;
iter->tags = 0;
return NULL;
@@ -1744,8 +1209,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
iter->next_index = maxindex + 1;
iter->tags = 1;
iter->node = NULL;
- __set_iter_shift(iter, 0);
- return (void __rcu **)&root->rnode;
+ return (void __rcu **)&root->xa_head;
}
do {
@@ -1765,8 +1229,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
while (++offset < RADIX_TREE_MAP_SIZE) {
void *slot = rcu_dereference_raw(
node->slots[offset]);
- if (is_sibling_entry(node, slot))
- continue;
if (slot)
break;
}
@@ -1784,13 +1246,12 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
goto restart;
if (child == RADIX_TREE_RETRY)
break;
- } while (radix_tree_is_internal_node(child));
+ } while (node->shift && radix_tree_is_internal_node(child));
/* Update the iterator state */
- iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
+ iter->index = (index &~ node_maxindex(node)) | offset;
iter->next_index = (index | node_maxindex(node)) + 1;
iter->node = node;
- __set_iter_shift(iter, node->shift);
if (flags & RADIX_TREE_ITER_TAGGED)
set_iter_tags(iter, node, offset, tag);
@@ -1847,48 +1308,6 @@ radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
EXPORT_SYMBOL(radix_tree_gang_lookup);
/**
- * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
- * @root: radix tree root
- * @results: where the results of the lookup are placed
- * @indices: where their indices should be placed (but usually NULL)
- * @first_index: start the lookup from this key
- * @max_items: place up to this many items at *results
- *
- * Performs an index-ascending scan of the tree for present items. Places
- * their slots at *@results and returns the number of items which were
- * placed at *@results.
- *
- * The implementation is naive.
- *
- * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
- * be dereferenced with radix_tree_deref_slot, and if using only RCU
- * protection, radix_tree_deref_slot may fail requiring a retry.
- */
-unsigned int
-radix_tree_gang_lookup_slot(const struct radix_tree_root *root,
- void __rcu ***results, unsigned long *indices,
- unsigned long first_index, unsigned int max_items)
-{
- struct radix_tree_iter iter;
- void __rcu **slot;
- unsigned int ret = 0;
-
- if (unlikely(!max_items))
- return 0;
-
- radix_tree_for_each_slot(slot, root, &iter, first_index) {
- results[ret] = slot;
- if (indices)
- indices[ret] = iter.index;
- if (++ret == max_items)
- break;
- }
-
- return ret;
-}
-EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
-
-/**
* radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
* based on a tag
* @root: radix tree root
@@ -1964,28 +1383,11 @@ radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
-/**
- * __radix_tree_delete_node - try to free node after clearing a slot
- * @root: radix tree root
- * @node: node containing @index
- * @update_node: callback for changing leaf nodes
- *
- * After clearing the slot at @index in @node from radix tree
- * rooted at @root, call this function to attempt freeing the
- * node and shrinking the tree.
- */
-void __radix_tree_delete_node(struct radix_tree_root *root,
- struct radix_tree_node *node,
- radix_tree_update_node_t update_node)
-{
- delete_node(root, node, update_node);
-}
-
static bool __radix_tree_delete(struct radix_tree_root *root,
struct radix_tree_node *node, void __rcu **slot)
{
void *old = rcu_dereference_raw(*slot);
- int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
+ int values = xa_is_value(old) ? -1 : 0;
unsigned offset = get_slot_offset(node, slot);
int tag;
@@ -1995,8 +1397,8 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
node_tag_clear(root, node, tag, offset);
- replace_slot(slot, NULL, node, -1, exceptional);
- return node && delete_node(root, node, NULL);
+ replace_slot(slot, NULL, node, -1, values);
+ return node && delete_node(root, node);
}
/**
@@ -2068,19 +1470,6 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
}
EXPORT_SYMBOL(radix_tree_delete);
-void radix_tree_clear_tags(struct radix_tree_root *root,
- struct radix_tree_node *node,
- void __rcu **slot)
-{
- if (node) {
- unsigned int tag, offset = get_slot_offset(node, slot);
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- node_tag_clear(root, node, tag, offset);
- } else {
- root_tag_clear_all(root);
- }
-}
-
/**
* radix_tree_tagged - test whether any items in the tree are tagged
* @root: radix tree root
@@ -2106,33 +1495,12 @@ void idr_preload(gfp_t gfp_mask)
}
EXPORT_SYMBOL(idr_preload);
-int ida_pre_get(struct ida *ida, gfp_t gfp)
-{
- /*
- * The IDA API has no preload_end() equivalent. Instead,
- * ida_get_new() can return -EAGAIN, prompting the caller
- * to return to the ida_pre_get() step.
- */
- if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE))
- preempt_enable();
-
- if (!this_cpu_read(ida_bitmap)) {
- struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
- if (!bitmap)
- return 0;
- if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
- kfree(bitmap);
- }
-
- return 1;
-}
-
void __rcu **idr_get_free(struct radix_tree_root *root,
struct radix_tree_iter *iter, gfp_t gfp,
unsigned long max)
{
struct radix_tree_node *node = NULL, *child;
- void __rcu **slot = (void __rcu **)&root->rnode;
+ void __rcu **slot = (void __rcu **)&root->xa_head;
unsigned long maxindex, start = iter->next_index;
unsigned int shift, offset = 0;
@@ -2148,8 +1516,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
if (error < 0)
return ERR_PTR(error);
shift = error;
- child = rcu_dereference_raw(root->rnode);
+ child = rcu_dereference_raw(root->xa_head);
}
+ if (start == 0 && shift == 0)
+ shift = RADIX_TREE_MAP_SHIFT;
while (shift) {
shift -= RADIX_TREE_MAP_SHIFT;
@@ -2192,7 +1562,6 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
else
iter->next_index = 1;
iter->node = node;
- __set_iter_shift(iter, shift);
set_iter_tags(iter, node, offset, IDR_FREE);
return slot;
@@ -2211,10 +1580,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
*/
void idr_destroy(struct idr *idr)
{
- struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
+ struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
if (radix_tree_is_internal_node(node))
radix_tree_free_nodes(node);
- idr->idr_rt.rnode = NULL;
+ idr->idr_rt.xa_head = NULL;
root_tag_set(&idr->idr_rt, IDR_FREE);
}
EXPORT_SYMBOL(idr_destroy);
@@ -2228,31 +1597,6 @@ radix_tree_node_ctor(void *arg)
INIT_LIST_HEAD(&node->private_list);
}
-static __init unsigned long __maxindex(unsigned int height)
-{
- unsigned int width = height * RADIX_TREE_MAP_SHIFT;
- int shift = RADIX_TREE_INDEX_BITS - width;
-
- if (shift < 0)
- return ~0UL;
- if (shift >= BITS_PER_LONG)
- return 0UL;
- return ~0UL >> shift;
-}
-
-static __init void radix_tree_init_maxnodes(void)
-{
- unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
- unsigned int i, j;
-
- for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
- height_to_maxindex[i] = __maxindex(i);
- for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
- for (j = i; j > 0; j--)
- height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
- }
-}
-
static int radix_tree_cpu_dead(unsigned int cpu)
{
struct radix_tree_preload *rtp;
@@ -2266,8 +1610,6 @@ static int radix_tree_cpu_dead(unsigned int cpu)
kmem_cache_free(radix_tree_node_cachep, node);
rtp->nr--;
}
- kfree(per_cpu(ida_bitmap, cpu));
- per_cpu(ida_bitmap, cpu) = NULL;
return 0;
}
@@ -2277,11 +1619,11 @@ void __init radix_tree_init(void)
BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
+ BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
sizeof(struct radix_tree_node), 0,
SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
radix_tree_node_ctor);
- radix_tree_init_maxnodes();
ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
NULL, radix_tree_cpu_dead);
WARN_ON(ret < 0);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
new file mode 100644
index 000000000000..aa47754150ce
--- /dev/null
+++ b/lib/test_xarray.c
@@ -0,0 +1,1238 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * test_xarray.c: Test the XArray API
+ * Copyright (c) 2017-2018 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/xarray.h>
+#include <linux/module.h>
+
+static unsigned int tests_run;
+static unsigned int tests_passed;
+
+#ifndef XA_DEBUG
+# ifdef __KERNEL__
+void xa_dump(const struct xarray *xa) { }
+# endif
+#undef XA_BUG_ON
+#define XA_BUG_ON(xa, x) do { \
+ tests_run++; \
+ if (x) { \
+ printk("BUG at %s:%d\n", __func__, __LINE__); \
+ xa_dump(xa); \
+ dump_stack(); \
+ } else { \
+ tests_passed++; \
+ } \
+} while (0)
+#endif
+
+static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+ return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp);
+}
+
+static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+ u32 id = 0;
+
+ XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_value(index & LONG_MAX),
+ gfp) != 0);
+ XA_BUG_ON(xa, id != index);
+}
+
+static void xa_erase_index(struct xarray *xa, unsigned long index)
+{
+ XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX));
+ XA_BUG_ON(xa, xa_load(xa, index) != NULL);
+}
+
+/*
+ * If anyone needs this, please move it to xarray.c. We have no current
+ * users outside the test suite because all current multislot users want
+ * to use the advanced API.
+ */
+static void *xa_store_order(struct xarray *xa, unsigned long index,
+ unsigned order, void *entry, gfp_t gfp)
+{
+ XA_STATE_ORDER(xas, xa, index, order);
+ void *curr;
+
+ do {
+ xas_lock(&xas);
+ curr = xas_store(&xas, entry);
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, gfp));
+
+ return curr;
+}
+
+static noinline void check_xa_err(struct xarray *xa)
+{
+ XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0);
+ XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0);
+#ifndef __KERNEL__
+ /* The kernel does not fail GFP_NOWAIT allocations */
+ XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
+ XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
+#endif
+ XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_KERNEL)) != 0);
+ XA_BUG_ON(xa, xa_err(xa_store(xa, 1, xa_mk_value(0), GFP_KERNEL)) != 0);
+ XA_BUG_ON(xa, xa_err(xa_erase(xa, 1)) != 0);
+// kills the test-suite :-(
+// XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
+}
+
+static noinline void check_xas_retry(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ void *entry;
+
+ xa_store_index(xa, 0, GFP_KERNEL);
+ xa_store_index(xa, 1, GFP_KERNEL);
+
+ rcu_read_lock();
+ XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0));
+ xa_erase_index(xa, 1);
+ XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas)));
+ XA_BUG_ON(xa, xas_retry(&xas, NULL));
+ XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0)));
+ xas_reset(&xas);
+ XA_BUG_ON(xa, xas.xa_node != XAS_RESTART);
+ XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
+ XA_BUG_ON(xa, xas.xa_node != NULL);
+
+ XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, !xa_is_internal(xas_reload(&xas)));
+ xas.xa_node = XAS_RESTART;
+ XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
+ rcu_read_unlock();
+
+ /* Make sure we can iterate through retry entries */
+ xas_lock(&xas);
+ xas_set(&xas, 0);
+ xas_store(&xas, XA_RETRY_ENTRY);
+ xas_set(&xas, 1);
+ xas_store(&xas, XA_RETRY_ENTRY);
+
+ xas_set(&xas, 0);
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ xas_store(&xas, xa_mk_value(xas.xa_index));
+ }
+ xas_unlock(&xas);
+
+ xa_erase_index(xa, 0);
+ xa_erase_index(xa, 1);
+}
+
+static noinline void check_xa_load(struct xarray *xa)
+{
+ unsigned long i, j;
+
+ for (i = 0; i < 1024; i++) {
+ for (j = 0; j < 1024; j++) {
+ void *entry = xa_load(xa, j);
+ if (j < i)
+ XA_BUG_ON(xa, xa_to_value(entry) != j);
+ else
+ XA_BUG_ON(xa, entry);
+ }
+ XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+ }
+
+ for (i = 0; i < 1024; i++) {
+ for (j = 0; j < 1024; j++) {
+ void *entry = xa_load(xa, j);
+ if (j >= i)
+ XA_BUG_ON(xa, xa_to_value(entry) != j);
+ else
+ XA_BUG_ON(xa, entry);
+ }
+ xa_erase_index(xa, i);
+ }
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
+{
+ unsigned int order;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1;
+
+ /* NULL elements have no marks set */
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+ xa_set_mark(xa, index, XA_MARK_0);
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+
+ /* Storing a pointer will not make a mark appear */
+ XA_BUG_ON(xa, xa_store_index(xa, index, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+ xa_set_mark(xa, index, XA_MARK_0);
+ XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
+
+ /* Setting one mark will not set another mark */
+ XA_BUG_ON(xa, xa_get_mark(xa, index + 1, XA_MARK_0));
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1));
+
+ /* Storing NULL clears marks, and they can't be set again */
+ xa_erase_index(xa, index);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+ xa_set_mark(xa, index, XA_MARK_0);
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+
+ /*
+ * Storing a multi-index entry over entries with marks gives the
+ * entire entry the union of the marks
+ */
+ BUG_ON((index % 4) != 0);
+ for (order = 2; order < max_order; order++) {
+ unsigned long base = round_down(index, 1UL << order);
+ unsigned long next = base + (1UL << order);
+ unsigned long i;
+
+ XA_BUG_ON(xa, xa_store_index(xa, index + 1, GFP_KERNEL));
+ xa_set_mark(xa, index + 1, XA_MARK_0);
+ XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL));
+ xa_set_mark(xa, index + 2, XA_MARK_1);
+ XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL));
+ xa_store_order(xa, index, order, xa_mk_value(index),
+ GFP_KERNEL);
+ for (i = base; i < next; i++) {
+ XA_STATE(xas, xa, i);
+ unsigned int seen = 0;
+ void *entry;
+
+ XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
+ XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1));
+ XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2));
+
+ /* We should see two elements in the array */
+ xas_for_each(&xas, entry, ULONG_MAX)
+ seen++;
+ XA_BUG_ON(xa, seen != 2);
+
+ /* One of which is marked */
+ xas_set(&xas, 0);
+ seen = 0;
+ xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
+ seen++;
+ XA_BUG_ON(xa, seen != 1);
+ }
+ XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0));
+ XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1));
+ XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2));
+ xa_erase_index(xa, index);
+ xa_erase_index(xa, next);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_xa_mark_2(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ unsigned long index;
+ unsigned int count = 0;
+ void *entry;
+
+ xa_store_index(xa, 0, GFP_KERNEL);
+ xa_set_mark(xa, 0, XA_MARK_0);
+ xas_lock(&xas);
+ xas_load(&xas);
+ xas_init_marks(&xas);
+ xas_unlock(&xas);
+ XA_BUG_ON(xa, !xa_get_mark(xa, 0, XA_MARK_0) == 0);
+
+ for (index = 3500; index < 4500; index++) {
+ xa_store_index(xa, index, GFP_KERNEL);
+ xa_set_mark(xa, index, XA_MARK_0);
+ }
+
+ xas_reset(&xas);
+ rcu_read_lock();
+ xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
+ count++;
+ rcu_read_unlock();
+ XA_BUG_ON(xa, count != 1000);
+
+ xas_lock(&xas);
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ xas_init_marks(&xas);
+ XA_BUG_ON(xa, !xa_get_mark(xa, xas.xa_index, XA_MARK_0));
+ XA_BUG_ON(xa, !xas_get_mark(&xas, XA_MARK_0));
+ }
+ xas_unlock(&xas);
+
+ xa_destroy(xa);
+}
+
+static noinline void check_xa_mark(struct xarray *xa)
+{
+ unsigned long index;
+
+ for (index = 0; index < 16384; index += 4)
+ check_xa_mark_1(xa, index);
+
+ check_xa_mark_2(xa);
+}
+
+static noinline void check_xa_shrink(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 1);
+ struct xa_node *node;
+ unsigned int order;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 15 : 1;
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+ XA_BUG_ON(xa, xa_store_index(xa, 0, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+
+ /*
+ * Check that erasing the entry at 1 shrinks the tree and properly
+ * marks the node as being deleted.
+ */
+ xas_lock(&xas);
+ XA_BUG_ON(xa, xas_load(&xas) != xa_mk_value(1));
+ node = xas.xa_node;
+ XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != xa_mk_value(0));
+ XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 1) != NULL);
+ XA_BUG_ON(xa, xas.xa_node != XAS_BOUNDS);
+ XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != XA_RETRY_ENTRY);
+ XA_BUG_ON(xa, xas_load(&xas) != NULL);
+ xas_unlock(&xas);
+ XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+ xa_erase_index(xa, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ for (order = 0; order < max_order; order++) {
+ unsigned long max = (1UL << order) - 1;
+ xa_store_order(xa, 0, order, xa_mk_value(0), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, max) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL);
+ rcu_read_lock();
+ node = xa_head(xa);
+ rcu_read_unlock();
+ XA_BUG_ON(xa, xa_store_index(xa, ULONG_MAX, GFP_KERNEL) !=
+ NULL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xa_head(xa) == node);
+ rcu_read_unlock();
+ XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL);
+ xa_erase_index(xa, ULONG_MAX);
+ XA_BUG_ON(xa, xa->xa_head != node);
+ xa_erase_index(xa, 0);
+ }
+}
+
+static noinline void check_cmpxchg(struct xarray *xa)
+{
+ void *FIVE = xa_mk_value(5);
+ void *SIX = xa_mk_value(6);
+ void *LOTS = xa_mk_value(12345678);
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+ XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL);
+ xa_erase_index(xa, 12345678);
+ xa_erase_index(xa, 5);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_reserve(struct xarray *xa)
+{
+ void *entry;
+ unsigned long index = 0;
+
+ /* An array with a reserved entry is not empty */
+ XA_BUG_ON(xa, !xa_empty(xa));
+ xa_reserve(xa, 12345678, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_empty(xa));
+ XA_BUG_ON(xa, xa_load(xa, 12345678));
+ xa_release(xa, 12345678);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Releasing a used entry does nothing */
+ xa_reserve(xa, 12345678, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
+ xa_release(xa, 12345678);
+ xa_erase_index(xa, 12345678);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* cmpxchg sees a reserved entry as NULL */
+ xa_reserve(xa, 12345678, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678),
+ GFP_NOWAIT) != NULL);
+ xa_release(xa, 12345678);
+ xa_erase_index(xa, 12345678);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Can iterate through a reserved entry */
+ xa_store_index(xa, 5, GFP_KERNEL);
+ xa_reserve(xa, 6, GFP_KERNEL);
+ xa_store_index(xa, 7, GFP_KERNEL);
+
+ xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+ XA_BUG_ON(xa, index != 5 && index != 7);
+ }
+ xa_destroy(xa);
+}
+
+static noinline void check_xas_erase(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ void *entry;
+ unsigned long i, j;
+
+ for (i = 0; i < 200; i++) {
+ for (j = i; j < 2 * i + 17; j++) {
+ xas_set(&xas, j);
+ do {
+ xas_lock(&xas);
+ xas_store(&xas, xa_mk_value(j));
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+ }
+
+ xas_set(&xas, ULONG_MAX);
+ do {
+ xas_lock(&xas);
+ xas_store(&xas, xa_mk_value(0));
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ xas_lock(&xas);
+ xas_store(&xas, NULL);
+
+ xas_set(&xas, 0);
+ j = i;
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ XA_BUG_ON(xa, entry != xa_mk_value(j));
+ xas_store(&xas, NULL);
+ j++;
+ }
+ xas_unlock(&xas);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+}
+
+#ifdef CONFIG_XARRAY_MULTI
+static noinline void check_multi_store_1(struct xarray *xa, unsigned long index,
+ unsigned int order)
+{
+ XA_STATE(xas, xa, index);
+ unsigned long min = index & ~((1UL << order) - 1);
+ unsigned long max = min + (1UL << order);
+
+ xa_store_order(xa, index, order, xa_mk_value(index), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(index));
+ XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(index));
+ XA_BUG_ON(xa, xa_load(xa, max) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
+
+ XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(min)) != xa_mk_value(index));
+ XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(min));
+ XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(min));
+ XA_BUG_ON(xa, xa_load(xa, max) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
+
+ xa_erase_index(xa, min);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_multi_store_2(struct xarray *xa, unsigned long index,
+ unsigned int order)
+{
+ XA_STATE(xas, xa, index);
+ xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL);
+
+ XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(1)) != xa_mk_value(0));
+ XA_BUG_ON(xa, xas.xa_index != index);
+ XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+#endif
+
+static noinline void check_multi_store(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ unsigned long i, j, k;
+ unsigned int max_order = (sizeof(long) == 4) ? 30 : 60;
+
+ /* Loading from any position returns the same value */
+ xa_store_order(xa, 0, 1, xa_mk_value(0), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 2);
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
+ rcu_read_unlock();
+
+ /* Storing adjacent to the value does not alter the value */
+ xa_store(xa, 3, xa, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 3);
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
+ rcu_read_unlock();
+
+ /* Overwriting multiple indexes works */
+ xa_store_order(xa, 0, 2, xa_mk_value(1), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 2) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 3) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 4) != NULL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 4);
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 4);
+ rcu_read_unlock();
+
+ /* We can erase multiple values with a single store */
+ xa_store_order(xa, 0, 63, NULL, GFP_KERNEL);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Even when the first slot is empty but the others aren't */
+ xa_store_index(xa, 1, GFP_KERNEL);
+ xa_store_index(xa, 2, GFP_KERNEL);
+ xa_store_order(xa, 0, 2, NULL, GFP_KERNEL);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ for (i = 0; i < max_order; i++) {
+ for (j = 0; j < max_order; j++) {
+ xa_store_order(xa, 0, i, xa_mk_value(i), GFP_KERNEL);
+ xa_store_order(xa, 0, j, xa_mk_value(j), GFP_KERNEL);
+
+ for (k = 0; k < max_order; k++) {
+ void *entry = xa_load(xa, (1UL << k) - 1);
+ if ((i < k) && (j < k))
+ XA_BUG_ON(xa, entry != NULL);
+ else
+ XA_BUG_ON(xa, entry != xa_mk_value(j));
+ }
+
+ xa_erase(xa, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+ }
+
+ for (i = 0; i < 20; i++) {
+ check_multi_store_1(xa, 200, i);
+ check_multi_store_1(xa, 0, i);
+ check_multi_store_1(xa, (1UL << i) + 1, i);
+ }
+ check_multi_store_2(xa, 4095, 9);
+#endif
+}
+
+static DEFINE_XARRAY_ALLOC(xa0);
+
+static noinline void check_xa_alloc(void)
+{
+ int i;
+ u32 id;
+
+ /* An empty array should assign 0 to the first alloc */
+ xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+ /* Erasing it should make the array empty again */
+ xa_erase_index(&xa0, 0);
+ XA_BUG_ON(&xa0, !xa_empty(&xa0));
+
+ /* And it should assign 0 again */
+ xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+ /* The next assigned ID should be 1 */
+ xa_alloc_index(&xa0, 1, GFP_KERNEL);
+ xa_erase_index(&xa0, 1);
+
+ /* Storing a value should mark it used */
+ xa_store_index(&xa0, 1, GFP_KERNEL);
+ xa_alloc_index(&xa0, 2, GFP_KERNEL);
+
+ /* If we then erase 0, it should be free */
+ xa_erase_index(&xa0, 0);
+ xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+ xa_erase_index(&xa0, 1);
+ xa_erase_index(&xa0, 2);
+
+ for (i = 1; i < 5000; i++) {
+ xa_alloc_index(&xa0, i, GFP_KERNEL);
+ }
+
+ xa_destroy(&xa0);
+
+ id = 0xfffffffeU;
+ XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+ GFP_KERNEL) != 0);
+ XA_BUG_ON(&xa0, id != 0xfffffffeU);
+ XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+ GFP_KERNEL) != 0);
+ XA_BUG_ON(&xa0, id != 0xffffffffU);
+ XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+ GFP_KERNEL) != -ENOSPC);
+ XA_BUG_ON(&xa0, id != 0xffffffffU);
+ xa_destroy(&xa0);
+}
+
+static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
+ unsigned int order, unsigned int present)
+{
+ XA_STATE_ORDER(xas, xa, start, order);
+ void *entry;
+ unsigned int count = 0;
+
+retry:
+ xas_lock(&xas);
+ xas_for_each_conflict(&xas, entry) {
+ XA_BUG_ON(xa, !xa_is_value(entry));
+ XA_BUG_ON(xa, entry < xa_mk_value(start));
+ XA_BUG_ON(xa, entry > xa_mk_value(start + (1UL << order) - 1));
+ count++;
+ }
+ xas_store(&xas, xa_mk_value(start));
+ xas_unlock(&xas);
+ if (xas_nomem(&xas, GFP_KERNEL)) {
+ count = 0;
+ goto retry;
+ }
+ XA_BUG_ON(xa, xas_error(&xas));
+ XA_BUG_ON(xa, count != present);
+ XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_value(start));
+ XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) !=
+ xa_mk_value(start));
+ xa_erase_index(xa, start);
+}
+
+static noinline void check_store_iter(struct xarray *xa)
+{
+ unsigned int i, j;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+
+ for (i = 0; i < max_order; i++) {
+ unsigned int min = 1 << i;
+ unsigned int max = (2 << i) - 1;
+ __check_store_iter(xa, 0, i, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ __check_store_iter(xa, min, i, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ xa_store_index(xa, min, GFP_KERNEL);
+ __check_store_iter(xa, min, i, 1);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ xa_store_index(xa, max, GFP_KERNEL);
+ __check_store_iter(xa, min, i, 1);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ for (j = 0; j < min; j++)
+ xa_store_index(xa, j, GFP_KERNEL);
+ __check_store_iter(xa, 0, i, min);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ for (j = 0; j < min; j++)
+ xa_store_index(xa, min + j, GFP_KERNEL);
+ __check_store_iter(xa, min, i, min);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+#ifdef CONFIG_XARRAY_MULTI
+ xa_store_index(xa, 63, GFP_KERNEL);
+ xa_store_index(xa, 65, GFP_KERNEL);
+ __check_store_iter(xa, 64, 2, 1);
+ xa_erase_index(xa, 63);
+#endif
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_multi_find(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ unsigned long index;
+
+ xa_store_order(xa, 12, 2, xa_mk_value(12), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_store_index(xa, 16, GFP_KERNEL) != NULL);
+
+ index = 0;
+ XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
+ xa_mk_value(12));
+ XA_BUG_ON(xa, index != 12);
+ index = 13;
+ XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
+ xa_mk_value(12));
+ XA_BUG_ON(xa, (index < 12) || (index >= 16));
+ XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT) !=
+ xa_mk_value(16));
+ XA_BUG_ON(xa, index != 16);
+
+ xa_erase_index(xa, 12);
+ xa_erase_index(xa, 16);
+ XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
+static noinline void check_multi_find_2(struct xarray *xa)
+{
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1;
+ unsigned int i, j;
+ void *entry;
+
+ for (i = 0; i < max_order; i++) {
+ unsigned long index = 1UL << i;
+ for (j = 0; j < index; j++) {
+ XA_STATE(xas, xa, j + index);
+ xa_store_index(xa, index - 1, GFP_KERNEL);
+ xa_store_order(xa, index, i, xa_mk_value(index),
+ GFP_KERNEL);
+ rcu_read_lock();
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ xa_erase_index(xa, index);
+ }
+ rcu_read_unlock();
+ xa_erase_index(xa, index - 1);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+ }
+}
+
+static noinline void check_find(struct xarray *xa)
+{
+ unsigned long i, j, k;
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /*
+ * Check xa_find with all pairs between 0 and 99 inclusive,
+ * starting at every index between 0 and 99
+ */
+ for (i = 0; i < 100; i++) {
+ XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+ xa_set_mark(xa, i, XA_MARK_0);
+ for (j = 0; j < i; j++) {
+ XA_BUG_ON(xa, xa_store_index(xa, j, GFP_KERNEL) !=
+ NULL);
+ xa_set_mark(xa, j, XA_MARK_0);
+ for (k = 0; k < 100; k++) {
+ unsigned long index = k;
+ void *entry = xa_find(xa, &index, ULONG_MAX,
+ XA_PRESENT);
+ if (k <= j)
+ XA_BUG_ON(xa, index != j);
+ else if (k <= i)
+ XA_BUG_ON(xa, index != i);
+ else
+ XA_BUG_ON(xa, entry != NULL);
+
+ index = k;
+ entry = xa_find(xa, &index, ULONG_MAX,
+ XA_MARK_0);
+ if (k <= j)
+ XA_BUG_ON(xa, index != j);
+ else if (k <= i)
+ XA_BUG_ON(xa, index != i);
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ }
+ xa_erase_index(xa, j);
+ XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0));
+ XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
+ }
+ xa_erase_index(xa, i);
+ XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0));
+ }
+ XA_BUG_ON(xa, !xa_empty(xa));
+ check_multi_find(xa);
+ check_multi_find_2(xa);
+}
+
+/* See find_swap_entry() in mm/shmem.c */
+static noinline unsigned long xa_find_entry(struct xarray *xa, void *item)
+{
+ XA_STATE(xas, xa, 0);
+ unsigned int checked = 0;
+ void *entry;
+
+ rcu_read_lock();
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ if (xas_retry(&xas, entry))
+ continue;
+ if (entry == item)
+ break;
+ checked++;
+ if ((checked % 4) != 0)
+ continue;
+ xas_pause(&xas);
+ }
+ rcu_read_unlock();
+
+ return entry ? xas.xa_index : -1;
+}
+
+static noinline void check_find_entry(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ unsigned int order;
+ unsigned long offset, index;
+
+ for (order = 0; order < 20; order++) {
+ for (offset = 0; offset < (1UL << (order + 3));
+ offset += (1UL << order)) {
+ for (index = 0; index < (1UL << (order + 5));
+ index += (1UL << order)) {
+ xa_store_order(xa, index, order,
+ xa_mk_value(index), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, index) !=
+ xa_mk_value(index));
+ XA_BUG_ON(xa, xa_find_entry(xa,
+ xa_mk_value(index)) != index);
+ }
+ XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
+ xa_destroy(xa);
+ }
+ }
+#endif
+
+ XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
+ xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
+ XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_value(LONG_MAX)) != -1);
+ xa_erase_index(xa, ULONG_MAX);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_move_small(struct xarray *xa, unsigned long idx)
+{
+ XA_STATE(xas, xa, 0);
+ unsigned long i;
+
+ xa_store_index(xa, 0, GFP_KERNEL);
+ xa_store_index(xa, idx, GFP_KERNEL);
+
+ rcu_read_lock();
+ for (i = 0; i < idx * 4; i++) {
+ void *entry = xas_next(&xas);
+ if (i <= idx)
+ XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
+ XA_BUG_ON(xa, xas.xa_index != i);
+ if (i == 0 || i == idx)
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ }
+ xas_next(&xas);
+ XA_BUG_ON(xa, xas.xa_index != i);
+
+ do {
+ void *entry = xas_prev(&xas);
+ i--;
+ if (i <= idx)
+ XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
+ XA_BUG_ON(xa, xas.xa_index != i);
+ if (i == 0 || i == idx)
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ } while (i > 0);
+
+ xas_set(&xas, ULONG_MAX);
+ XA_BUG_ON(xa, xas_next(&xas) != NULL);
+ XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+ XA_BUG_ON(xa, xas_next(&xas) != xa_mk_value(0));
+ XA_BUG_ON(xa, xas.xa_index != 0);
+ XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+ XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+ rcu_read_unlock();
+
+ xa_erase_index(xa, 0);
+ xa_erase_index(xa, idx);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_move(struct xarray *xa)
+{
+ XA_STATE(xas, xa, (1 << 16) - 1);
+ unsigned long i;
+
+ for (i = 0; i < (1 << 16); i++)
+ XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+
+ rcu_read_lock();
+ do {
+ void *entry = xas_prev(&xas);
+ i--;
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ XA_BUG_ON(xa, i != xas.xa_index);
+ } while (i != 0);
+
+ XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+ XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+
+ do {
+ void *entry = xas_next(&xas);
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ XA_BUG_ON(xa, i != xas.xa_index);
+ i++;
+ } while (i < (1 << 16));
+ rcu_read_unlock();
+
+ for (i = (1 << 8); i < (1 << 15); i++)
+ xa_erase_index(xa, i);
+
+ i = xas.xa_index;
+
+ rcu_read_lock();
+ do {
+ void *entry = xas_prev(&xas);
+ i--;
+ if ((i < (1 << 8)) || (i >= (1 << 15)))
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ XA_BUG_ON(xa, i != xas.xa_index);
+ } while (i != 0);
+
+ XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+ XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+
+ do {
+ void *entry = xas_next(&xas);
+ if ((i < (1 << 8)) || (i >= (1 << 15)))
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ XA_BUG_ON(xa, i != xas.xa_index);
+ i++;
+ } while (i < (1 << 16));
+ rcu_read_unlock();
+
+ xa_destroy(xa);
+
+ for (i = 0; i < 16; i++)
+ check_move_small(xa, 1UL << i);
+
+ for (i = 2; i < 16; i++)
+ check_move_small(xa, (1UL << i) - 1);
+}
+
+static noinline void xa_store_many_order(struct xarray *xa,
+ unsigned long index, unsigned order)
+{
+ XA_STATE_ORDER(xas, xa, index, order);
+ unsigned int i = 0;
+
+ do {
+ xas_lock(&xas);
+ XA_BUG_ON(xa, xas_find_conflict(&xas));
+ xas_create_range(&xas);
+ if (xas_error(&xas))
+ goto unlock;
+ for (i = 0; i < (1U << order); i++) {
+ XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(index + i)));
+ xas_next(&xas);
+ }
+unlock:
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ XA_BUG_ON(xa, xas_error(&xas));
+}
+
+static noinline void check_create_range_1(struct xarray *xa,
+ unsigned long index, unsigned order)
+{
+ unsigned long i;
+
+ xa_store_many_order(xa, index, order);
+ for (i = index; i < index + (1UL << order); i++)
+ xa_erase_index(xa, i);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range_2(struct xarray *xa, unsigned order)
+{
+ unsigned long i;
+ unsigned long nr = 1UL << order;
+
+ for (i = 0; i < nr * nr; i += nr)
+ xa_store_many_order(xa, i, order);
+ for (i = 0; i < nr * nr; i++)
+ xa_erase_index(xa, i);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range_3(void)
+{
+ XA_STATE(xas, NULL, 0);
+ xas_set_err(&xas, -EEXIST);
+ xas_create_range(&xas);
+ XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST);
+}
+
+static noinline void check_create_range_4(struct xarray *xa,
+ unsigned long index, unsigned order)
+{
+ XA_STATE_ORDER(xas, xa, index, order);
+ unsigned long base = xas.xa_index;
+ unsigned long i = 0;
+
+ xa_store_index(xa, index, GFP_KERNEL);
+ do {
+ xas_lock(&xas);
+ xas_create_range(&xas);
+ if (xas_error(&xas))
+ goto unlock;
+ for (i = 0; i < (1UL << order); i++) {
+ void *old = xas_store(&xas, xa_mk_value(base + i));
+ if (xas.xa_index == index)
+ XA_BUG_ON(xa, old != xa_mk_value(base + i));
+ else
+ XA_BUG_ON(xa, old != NULL);
+ xas_next(&xas);
+ }
+unlock:
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ XA_BUG_ON(xa, xas_error(&xas));
+
+ for (i = base; i < base + (1UL << order); i++)
+ xa_erase_index(xa, i);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range(struct xarray *xa)
+{
+ unsigned int order;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1;
+
+ for (order = 0; order < max_order; order++) {
+ check_create_range_1(xa, 0, order);
+ check_create_range_1(xa, 1U << order, order);
+ check_create_range_1(xa, 2U << order, order);
+ check_create_range_1(xa, 3U << order, order);
+ check_create_range_1(xa, 1U << 24, order);
+ if (order < 10)
+ check_create_range_2(xa, order);
+
+ check_create_range_4(xa, 0, order);
+ check_create_range_4(xa, 1U << order, order);
+ check_create_range_4(xa, 2U << order, order);
+ check_create_range_4(xa, 3U << order, order);
+ check_create_range_4(xa, 1U << 24, order);
+
+ check_create_range_4(xa, 1, order);
+ check_create_range_4(xa, (1U << order) + 1, order);
+ check_create_range_4(xa, (2U << order) + 1, order);
+ check_create_range_4(xa, (2U << order) - 1, order);
+ check_create_range_4(xa, (3U << order) + 1, order);
+ check_create_range_4(xa, (3U << order) - 1, order);
+ check_create_range_4(xa, (1U << 24) + 1, order);
+ }
+
+ check_create_range_3();
+}
+
+static noinline void __check_store_range(struct xarray *xa, unsigned long first,
+ unsigned long last)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ xa_store_range(xa, first, last, xa_mk_value(first), GFP_KERNEL);
+
+ XA_BUG_ON(xa, xa_load(xa, first) != xa_mk_value(first));
+ XA_BUG_ON(xa, xa_load(xa, last) != xa_mk_value(first));
+ XA_BUG_ON(xa, xa_load(xa, first - 1) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, last + 1) != NULL);
+
+ xa_store_range(xa, first, last, NULL, GFP_KERNEL);
+#endif
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_store_range(struct xarray *xa)
+{
+ unsigned long i, j;
+
+ for (i = 0; i < 128; i++) {
+ for (j = i; j < 128; j++) {
+ __check_store_range(xa, i, j);
+ __check_store_range(xa, 128 + i, 128 + j);
+ __check_store_range(xa, 4095 + i, 4095 + j);
+ __check_store_range(xa, 4096 + i, 4096 + j);
+ __check_store_range(xa, 123456 + i, 123456 + j);
+ __check_store_range(xa, UINT_MAX + i, UINT_MAX + j);
+ }
+ }
+}
+
+static LIST_HEAD(shadow_nodes);
+
+static void test_update_node(struct xa_node *node)
+{
+ if (node->count && node->count == node->nr_values) {
+ if (list_empty(&node->private_list))
+ list_add(&shadow_nodes, &node->private_list);
+ } else {
+ if (!list_empty(&node->private_list))
+ list_del_init(&node->private_list);
+ }
+}
+
+static noinline void shadow_remove(struct xarray *xa)
+{
+ struct xa_node *node;
+
+ xa_lock(xa);
+ while ((node = list_first_entry_or_null(&shadow_nodes,
+ struct xa_node, private_list))) {
+ XA_STATE(xas, node->array, 0);
+ XA_BUG_ON(xa, node->array != xa);
+ list_del_init(&node->private_list);
+ xas.xa_node = xa_parent_locked(node->array, node);
+ xas.xa_offset = node->offset;
+ xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
+ xas_set_update(&xas, test_update_node);
+ xas_store(&xas, NULL);
+ }
+ xa_unlock(xa);
+}
+
+static noinline void check_workingset(struct xarray *xa, unsigned long index)
+{
+ XA_STATE(xas, xa, index);
+ xas_set_update(&xas, test_update_node);
+
+ do {
+ xas_lock(&xas);
+ xas_store(&xas, xa_mk_value(0));
+ xas_next(&xas);
+ xas_store(&xas, xa_mk_value(1));
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ XA_BUG_ON(xa, list_empty(&shadow_nodes));
+
+ xas_lock(&xas);
+ xas_next(&xas);
+ xas_store(&xas, &xas);
+ XA_BUG_ON(xa, !list_empty(&shadow_nodes));
+
+ xas_store(&xas, xa_mk_value(2));
+ xas_unlock(&xas);
+ XA_BUG_ON(xa, list_empty(&shadow_nodes));
+
+ shadow_remove(xa);
+ XA_BUG_ON(xa, !list_empty(&shadow_nodes));
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+/*
+ * Check that the pointer / value / sibling entries are accounted the
+ * way we expect them to be.
+ */
+static noinline void check_account(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ unsigned int order;
+
+ for (order = 1; order < 12; order++) {
+ XA_STATE(xas, xa, 1 << order);
+
+ xa_store_order(xa, 0, order, xa, GFP_KERNEL);
+ xas_load(&xas);
+ XA_BUG_ON(xa, xas.xa_node->count == 0);
+ XA_BUG_ON(xa, xas.xa_node->count > (1 << order));
+ XA_BUG_ON(xa, xas.xa_node->nr_values != 0);
+
+ xa_store_order(xa, 1 << order, order, xa_mk_value(1 << order),
+ GFP_KERNEL);
+ XA_BUG_ON(xa, xas.xa_node->count != xas.xa_node->nr_values * 2);
+
+ xa_erase(xa, 1 << order);
+ XA_BUG_ON(xa, xas.xa_node->nr_values != 0);
+
+ xa_erase(xa, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+#endif
+}
+
+static noinline void check_destroy(struct xarray *xa)
+{
+ unsigned long index;
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Destroying an empty array is a no-op */
+ xa_destroy(xa);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Destroying an array with a single entry */
+ for (index = 0; index < 1000; index++) {
+ xa_store_index(xa, index, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_empty(xa));
+ xa_destroy(xa);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+
+ /* Destroying an array with a single entry at ULONG_MAX */
+ xa_store(xa, ULONG_MAX, xa, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_empty(xa));
+ xa_destroy(xa);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+#ifdef CONFIG_XARRAY_MULTI
+ /* Destroying an array with a multi-index entry */
+ xa_store_order(xa, 1 << 11, 11, xa, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_empty(xa));
+ xa_destroy(xa);
+ XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
+static DEFINE_XARRAY(array);
+
+static int xarray_checks(void)
+{
+ check_xa_err(&array);
+ check_xas_retry(&array);
+ check_xa_load(&array);
+ check_xa_mark(&array);
+ check_xa_shrink(&array);
+ check_xas_erase(&array);
+ check_cmpxchg(&array);
+ check_reserve(&array);
+ check_multi_store(&array);
+ check_xa_alloc();
+ check_find(&array);
+ check_find_entry(&array);
+ check_account(&array);
+ check_destroy(&array);
+ check_move(&array);
+ check_create_range(&array);
+ check_store_range(&array);
+ check_store_iter(&array);
+
+ check_workingset(&array, 0);
+ check_workingset(&array, 64);
+ check_workingset(&array, 4096);
+
+ printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
+ return (tests_run == tests_passed) ? 0 : -EINVAL;
+}
+
+static void xarray_exit(void)
+{
+}
+
+module_init(xarray_checks);
+module_exit(xarray_exit);
+MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/xarray.c b/lib/xarray.c
new file mode 100644
index 000000000000..8b176f009c08
--- /dev/null
+++ b/lib/xarray.c
@@ -0,0 +1,2036 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * XArray implementation
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+
+/*
+ * Coding conventions in this file:
+ *
+ * @xa is used to refer to the entire xarray.
+ * @xas is the 'xarray operation state'. It may be either a pointer to
+ * an xa_state, or an xa_state stored on the stack. This is an unfortunate
+ * ambiguity.
+ * @index is the index of the entry being operated on
+ * @mark is an xa_mark_t; a small number indicating one of the mark bits.
+ * @node refers to an xa_node; usually the primary one being operated on by
+ * this function.
+ * @offset is the index into the slots array inside an xa_node.
+ * @parent refers to the @xa_node closer to the head than @node.
+ * @entry refers to something stored in a slot in the xarray
+ */
+
+static inline unsigned int xa_lock_type(const struct xarray *xa)
+{
+ return (__force unsigned int)xa->xa_flags & 3;
+}
+
+static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type)
+{
+ if (lock_type == XA_LOCK_IRQ)
+ xas_lock_irq(xas);
+ else if (lock_type == XA_LOCK_BH)
+ xas_lock_bh(xas);
+ else
+ xas_lock(xas);
+}
+
+static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
+{
+ if (lock_type == XA_LOCK_IRQ)
+ xas_unlock_irq(xas);
+ else if (lock_type == XA_LOCK_BH)
+ xas_unlock_bh(xas);
+ else
+ xas_unlock(xas);
+}
+
+static inline bool xa_track_free(const struct xarray *xa)
+{
+ return xa->xa_flags & XA_FLAGS_TRACK_FREE;
+}
+
+static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
+{
+ if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
+ xa->xa_flags |= XA_FLAGS_MARK(mark);
+}
+
+static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark)
+{
+ if (xa->xa_flags & XA_FLAGS_MARK(mark))
+ xa->xa_flags &= ~(XA_FLAGS_MARK(mark));
+}
+
+static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark)
+{
+ return node->marks[(__force unsigned)mark];
+}
+
+static inline bool node_get_mark(struct xa_node *node,
+ unsigned int offset, xa_mark_t mark)
+{
+ return test_bit(offset, node_marks(node, mark));
+}
+
+/* returns true if the bit was set */
+static inline bool node_set_mark(struct xa_node *node, unsigned int offset,
+ xa_mark_t mark)
+{
+ return __test_and_set_bit(offset, node_marks(node, mark));
+}
+
+/* returns true if the bit was set */
+static inline bool node_clear_mark(struct xa_node *node, unsigned int offset,
+ xa_mark_t mark)
+{
+ return __test_and_clear_bit(offset, node_marks(node, mark));
+}
+
+static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
+{
+ return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
+}
+
+static inline void node_mark_all(struct xa_node *node, xa_mark_t mark)
+{
+ bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE);
+}
+
+#define mark_inc(mark) do { \
+ mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
+} while (0)
+
+/*
+ * xas_squash_marks() - Merge all marks to the first entry
+ * @xas: Array operation state.
+ *
+ * Set a mark on the first entry if any entry has it set. Clear marks on
+ * all sibling entries.
+ */
+static void xas_squash_marks(const struct xa_state *xas)
+{
+ unsigned int mark = 0;
+ unsigned int limit = xas->xa_offset + xas->xa_sibs + 1;
+
+ if (!xas->xa_sibs)
+ return;
+
+ do {
+ unsigned long *marks = xas->xa_node->marks[mark];
+ if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit)
+ continue;
+ __set_bit(xas->xa_offset, marks);
+ bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
+ } while (mark++ != (__force unsigned)XA_MARK_MAX);
+}
+
+/* extracts the offset within this node from the index */
+static unsigned int get_offset(unsigned long index, struct xa_node *node)
+{
+ return (index >> node->shift) & XA_CHUNK_MASK;
+}
+
+static void xas_set_offset(struct xa_state *xas)
+{
+ xas->xa_offset = get_offset(xas->xa_index, xas->xa_node);
+}
+
+/* move the index either forwards (find) or backwards (sibling slot) */
+static void xas_move_index(struct xa_state *xas, unsigned long offset)
+{
+ unsigned int shift = xas->xa_node->shift;
+ xas->xa_index &= ~XA_CHUNK_MASK << shift;
+ xas->xa_index += offset << shift;
+}
+
+static void xas_advance(struct xa_state *xas)
+{
+ xas->xa_offset++;
+ xas_move_index(xas, xas->xa_offset);
+}
+
+static void *set_bounds(struct xa_state *xas)
+{
+ xas->xa_node = XAS_BOUNDS;
+ return NULL;
+}
+
+/*
+ * Starts a walk. If the @xas is already valid, we assume that it's on
+ * the right path and just return where we've got to. If we're in an
+ * error state, return NULL. If the index is outside the current scope
+ * of the xarray, return NULL without changing @xas->xa_node. Otherwise
+ * set @xas->xa_node to NULL and return the current head of the array.
+ */
+static void *xas_start(struct xa_state *xas)
+{
+ void *entry;
+
+ if (xas_valid(xas))
+ return xas_reload(xas);
+ if (xas_error(xas))
+ return NULL;
+
+ entry = xa_head(xas->xa);
+ if (!xa_is_node(entry)) {
+ if (xas->xa_index)
+ return set_bounds(xas);
+ } else {
+ if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK)
+ return set_bounds(xas);
+ }
+
+ xas->xa_node = NULL;
+ return entry;
+}
+
+static void *xas_descend(struct xa_state *xas, struct xa_node *node)
+{
+ unsigned int offset = get_offset(xas->xa_index, node);
+ void *entry = xa_entry(xas->xa, node, offset);
+
+ xas->xa_node = node;
+ if (xa_is_sibling(entry)) {
+ offset = xa_to_sibling(entry);
+ entry = xa_entry(xas->xa, node, offset);
+ }
+
+ xas->xa_offset = offset;
+ return entry;
+}
+
+/**
+ * xas_load() - Load an entry from the XArray (advanced).
+ * @xas: XArray operation state.
+ *
+ * Usually walks the @xas to the appropriate state to load the entry
+ * stored at xa_index. However, it will do nothing and return %NULL if
+ * @xas is in an error state. xas_load() will never expand the tree.
+ *
+ * If the xa_state is set up to operate on a multi-index entry, xas_load()
+ * may return %NULL or an internal entry, even if there are entries
+ * present within the range specified by @xas.
+ *
+ * Context: Any context. The caller should hold the xa_lock or the RCU lock.
+ * Return: Usually an entry in the XArray, but see description for exceptions.
+ */
+void *xas_load(struct xa_state *xas)
+{
+ void *entry = xas_start(xas);
+
+ while (xa_is_node(entry)) {
+ struct xa_node *node = xa_to_node(entry);
+
+ if (xas->xa_shift > node->shift)
+ break;
+ entry = xas_descend(xas, node);
+ }
+ return entry;
+}
+EXPORT_SYMBOL_GPL(xas_load);
+
+/* Move the radix tree node cache here */
+extern struct kmem_cache *radix_tree_node_cachep;
+extern void radix_tree_node_rcu_free(struct rcu_head *head);
+
+#define XA_RCU_FREE ((struct xarray *)1)
+
+static void xa_node_free(struct xa_node *node)
+{
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+ node->array = XA_RCU_FREE;
+ call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
+}
+
+/*
+ * xas_destroy() - Free any resources allocated during the XArray operation.
+ * @xas: XArray operation state.
+ *
+ * This function is now internal-only.
+ */
+static void xas_destroy(struct xa_state *xas)
+{
+ struct xa_node *node = xas->xa_alloc;
+
+ if (!node)
+ return;
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+ kmem_cache_free(radix_tree_node_cachep, node);
+ xas->xa_alloc = NULL;
+}
+
+/**
+ * xas_nomem() - Allocate memory if needed.
+ * @xas: XArray operation state.
+ * @gfp: Memory allocation flags.
+ *
+ * If we need to add new nodes to the XArray, we try to allocate memory
+ * with GFP_NOWAIT while holding the lock, which will usually succeed.
+ * If it fails, @xas is flagged as needing memory to continue. The caller
+ * should drop the lock and call xas_nomem(). If xas_nomem() succeeds,
+ * the caller should retry the operation.
+ *
+ * Forward progress is guaranteed as one node is allocated here and
+ * stored in the xa_state where it will be found by xas_alloc(). More
+ * nodes will likely be found in the slab allocator, but we do not tie
+ * them up here.
+ *
+ * Return: true if memory was needed, and was successfully allocated.
+ */
+bool xas_nomem(struct xa_state *xas, gfp_t gfp)
+{
+ if (xas->xa_node != XA_ERROR(-ENOMEM)) {
+ xas_destroy(xas);
+ return false;
+ }
+ xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ if (!xas->xa_alloc)
+ return false;
+ XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+ xas->xa_node = XAS_RESTART;
+ return true;
+}
+EXPORT_SYMBOL_GPL(xas_nomem);
+
+/*
+ * __xas_nomem() - Drop locks and allocate memory if needed.
+ * @xas: XArray operation state.
+ * @gfp: Memory allocation flags.
+ *
+ * Internal variant of xas_nomem().
+ *
+ * Return: true if memory was needed, and was successfully allocated.
+ */
+static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
+ __must_hold(xas->xa->xa_lock)
+{
+ unsigned int lock_type = xa_lock_type(xas->xa);
+
+ if (xas->xa_node != XA_ERROR(-ENOMEM)) {
+ xas_destroy(xas);
+ return false;
+ }
+ if (gfpflags_allow_blocking(gfp)) {
+ xas_unlock_type(xas, lock_type);
+ xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ xas_lock_type(xas, lock_type);
+ } else {
+ xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ }
+ if (!xas->xa_alloc)
+ return false;
+ XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+ xas->xa_node = XAS_RESTART;
+ return true;
+}
+
+static void xas_update(struct xa_state *xas, struct xa_node *node)
+{
+ if (xas->xa_update)
+ xas->xa_update(node);
+ else
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+}
+
+static void *xas_alloc(struct xa_state *xas, unsigned int shift)
+{
+ struct xa_node *parent = xas->xa_node;
+ struct xa_node *node = xas->xa_alloc;
+
+ if (xas_invalid(xas))
+ return NULL;
+
+ if (node) {
+ xas->xa_alloc = NULL;
+ } else {
+ node = kmem_cache_alloc(radix_tree_node_cachep,
+ GFP_NOWAIT | __GFP_NOWARN);
+ if (!node) {
+ xas_set_err(xas, -ENOMEM);
+ return NULL;
+ }
+ }
+
+ if (parent) {
+ node->offset = xas->xa_offset;
+ parent->count++;
+ XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE);
+ xas_update(xas, parent);
+ }
+ XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+ node->shift = shift;
+ node->count = 0;
+ node->nr_values = 0;
+ RCU_INIT_POINTER(node->parent, xas->xa_node);
+ node->array = xas->xa;
+
+ return node;
+}
+
+#ifdef CONFIG_XARRAY_MULTI
+/* Returns the number of indices covered by a given xa_state */
+static unsigned long xas_size(const struct xa_state *xas)
+{
+ return (xas->xa_sibs + 1UL) << xas->xa_shift;
+}
+#endif
+
+/*
+ * Use this to calculate the maximum index that will need to be created
+ * in order to add the entry described by @xas. Because we cannot store a
+ * multiple-index entry at index 0, the calculation is a little more complex
+ * than you might expect.
+ */
+static unsigned long xas_max(struct xa_state *xas)
+{
+ unsigned long max = xas->xa_index;
+
+#ifdef CONFIG_XARRAY_MULTI
+ if (xas->xa_shift || xas->xa_sibs) {
+ unsigned long mask = xas_size(xas) - 1;
+ max |= mask;
+ if (mask == max)
+ max++;
+ }
+#endif
+
+ return max;
+}
+
+/* The maximum index that can be contained in the array without expanding it */
+static unsigned long max_index(void *entry)
+{
+ if (!xa_is_node(entry))
+ return 0;
+ return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1;
+}
+
+static void xas_shrink(struct xa_state *xas)
+{
+ struct xarray *xa = xas->xa;
+ struct xa_node *node = xas->xa_node;
+
+ for (;;) {
+ void *entry;
+
+ XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+ if (node->count != 1)
+ break;
+ entry = xa_entry_locked(xa, node, 0);
+ if (!entry)
+ break;
+ if (!xa_is_node(entry) && node->shift)
+ break;
+ xas->xa_node = XAS_BOUNDS;
+
+ RCU_INIT_POINTER(xa->xa_head, entry);
+ if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK))
+ xa_mark_clear(xa, XA_FREE_MARK);
+
+ node->count = 0;
+ node->nr_values = 0;
+ if (!xa_is_node(entry))
+ RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY);
+ xas_update(xas, node);
+ xa_node_free(node);
+ if (!xa_is_node(entry))
+ break;
+ node = xa_to_node(entry);
+ node->parent = NULL;
+ }
+}
+
+/*
+ * xas_delete_node() - Attempt to delete an xa_node
+ * @xas: Array operation state.
+ *
+ * Attempts to delete the @xas->xa_node. This will fail if xa->node has
+ * a non-zero reference count.
+ */
+static void xas_delete_node(struct xa_state *xas)
+{
+ struct xa_node *node = xas->xa_node;
+
+ for (;;) {
+ struct xa_node *parent;
+
+ XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+ if (node->count)
+ break;
+
+ parent = xa_parent_locked(xas->xa, node);
+ xas->xa_node = parent;
+ xas->xa_offset = node->offset;
+ xa_node_free(node);
+
+ if (!parent) {
+ xas->xa->xa_head = NULL;
+ xas->xa_node = XAS_BOUNDS;
+ return;
+ }
+
+ parent->slots[xas->xa_offset] = NULL;
+ parent->count--;
+ XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE);
+ node = parent;
+ xas_update(xas, node);
+ }
+
+ if (!node->parent)
+ xas_shrink(xas);
+}
+
+/**
+ * xas_free_nodes() - Free this node and all nodes that it references
+ * @xas: Array operation state.
+ * @top: Node to free
+ *
+ * This node has been removed from the tree. We must now free it and all
+ * of its subnodes. There may be RCU walkers with references into the tree,
+ * so we must replace all entries with retry markers.
+ */
+static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
+{
+ unsigned int offset = 0;
+ struct xa_node *node = top;
+
+ for (;;) {
+ void *entry = xa_entry_locked(xas->xa, node, offset);
+
+ if (xa_is_node(entry)) {
+ node = xa_to_node(entry);
+ offset = 0;
+ continue;
+ }
+ if (entry)
+ RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY);
+ offset++;
+ while (offset == XA_CHUNK_SIZE) {
+ struct xa_node *parent;
+
+ parent = xa_parent_locked(xas->xa, node);
+ offset = node->offset + 1;
+ node->count = 0;
+ node->nr_values = 0;
+ xas_update(xas, node);
+ xa_node_free(node);
+ if (node == top)
+ return;
+ node = parent;
+ }
+ }
+}
+
+/*
+ * xas_expand adds nodes to the head of the tree until it has reached
+ * sufficient height to be able to contain @xas->xa_index
+ */
+static int xas_expand(struct xa_state *xas, void *head)
+{
+ struct xarray *xa = xas->xa;
+ struct xa_node *node = NULL;
+ unsigned int shift = 0;
+ unsigned long max = xas_max(xas);
+
+ if (!head) {
+ if (max == 0)
+ return 0;
+ while ((max >> shift) >= XA_CHUNK_SIZE)
+ shift += XA_CHUNK_SHIFT;
+ return shift + XA_CHUNK_SHIFT;
+ } else if (xa_is_node(head)) {
+ node = xa_to_node(head);
+ shift = node->shift + XA_CHUNK_SHIFT;
+ }
+ xas->xa_node = NULL;
+
+ while (max > max_index(head)) {
+ xa_mark_t mark = 0;
+
+ XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
+ node = xas_alloc(xas, shift);
+ if (!node)
+ return -ENOMEM;
+
+ node->count = 1;
+ if (xa_is_value(head))
+ node->nr_values = 1;
+ RCU_INIT_POINTER(node->slots[0], head);
+
+ /* Propagate the aggregated mark info to the new child */
+ for (;;) {
+ if (xa_track_free(xa) && mark == XA_FREE_MARK) {
+ node_mark_all(node, XA_FREE_MARK);
+ if (!xa_marked(xa, XA_FREE_MARK)) {
+ node_clear_mark(node, 0, XA_FREE_MARK);
+ xa_mark_set(xa, XA_FREE_MARK);
+ }
+ } else if (xa_marked(xa, mark)) {
+ node_set_mark(node, 0, mark);
+ }
+ if (mark == XA_MARK_MAX)
+ break;
+ mark_inc(mark);
+ }
+
+ /*
+ * Now that the new node is fully initialised, we can add
+ * it to the tree
+ */
+ if (xa_is_node(head)) {
+ xa_to_node(head)->offset = 0;
+ rcu_assign_pointer(xa_to_node(head)->parent, node);
+ }
+ head = xa_mk_node(node);
+ rcu_assign_pointer(xa->xa_head, head);
+ xas_update(xas, node);
+
+ shift += XA_CHUNK_SHIFT;
+ }
+
+ xas->xa_node = node;
+ return shift;
+}
+
+/*
+ * xas_create() - Create a slot to store an entry in.
+ * @xas: XArray operation state.
+ *
+ * Most users will not need to call this function directly, as it is called
+ * by xas_store(). It is useful for doing conditional store operations
+ * (see the xa_cmpxchg() implementation for an example).
+ *
+ * Return: If the slot already existed, returns the contents of this slot.
+ * If the slot was newly created, returns NULL. If it failed to create the
+ * slot, returns NULL and indicates the error in @xas.
+ */
+static void *xas_create(struct xa_state *xas)
+{
+ struct xarray *xa = xas->xa;
+ void *entry;
+ void __rcu **slot;
+ struct xa_node *node = xas->xa_node;
+ int shift;
+ unsigned int order = xas->xa_shift;
+
+ if (xas_top(node)) {
+ entry = xa_head_locked(xa);
+ xas->xa_node = NULL;
+ shift = xas_expand(xas, entry);
+ if (shift < 0)
+ return NULL;
+ entry = xa_head_locked(xa);
+ slot = &xa->xa_head;
+ } else if (xas_error(xas)) {
+ return NULL;
+ } else if (node) {
+ unsigned int offset = xas->xa_offset;
+
+ shift = node->shift;
+ entry = xa_entry_locked(xa, node, offset);
+ slot = &node->slots[offset];
+ } else {
+ shift = 0;
+ entry = xa_head_locked(xa);
+ slot = &xa->xa_head;
+ }
+
+ while (shift > order) {
+ shift -= XA_CHUNK_SHIFT;
+ if (!entry) {
+ node = xas_alloc(xas, shift);
+ if (!node)
+ break;
+ if (xa_track_free(xa))
+ node_mark_all(node, XA_FREE_MARK);
+ rcu_assign_pointer(*slot, xa_mk_node(node));
+ } else if (xa_is_node(entry)) {
+ node = xa_to_node(entry);
+ } else {
+ break;
+ }
+ entry = xas_descend(xas, node);
+ slot = &node->slots[xas->xa_offset];
+ }
+
+ return entry;
+}
+
+/**
+ * xas_create_range() - Ensure that stores to this range will succeed
+ * @xas: XArray operation state.
+ *
+ * Creates all of the slots in the range covered by @xas. Sets @xas to
+ * create single-index entries and positions it at the beginning of the
+ * range. This is for the benefit of users which have not yet been
+ * converted to use multi-index entries.
+ */
+void xas_create_range(struct xa_state *xas)
+{
+ unsigned long index = xas->xa_index;
+ unsigned char shift = xas->xa_shift;
+ unsigned char sibs = xas->xa_sibs;
+
+ xas->xa_index |= ((sibs + 1) << shift) - 1;
+ if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
+ xas->xa_offset |= sibs;
+ xas->xa_shift = 0;
+ xas->xa_sibs = 0;
+
+ for (;;) {
+ xas_create(xas);
+ if (xas_error(xas))
+ goto restore;
+ if (xas->xa_index <= (index | XA_CHUNK_MASK))
+ goto success;
+ xas->xa_index -= XA_CHUNK_SIZE;
+
+ for (;;) {
+ struct xa_node *node = xas->xa_node;
+ xas->xa_node = xa_parent_locked(xas->xa, node);
+ xas->xa_offset = node->offset - 1;
+ if (node->offset != 0)
+ break;
+ }
+ }
+
+restore:
+ xas->xa_shift = shift;
+ xas->xa_sibs = sibs;
+ xas->xa_index = index;
+ return;
+success:
+ xas->xa_index = index;
+ if (xas->xa_node)
+ xas_set_offset(xas);
+}
+EXPORT_SYMBOL_GPL(xas_create_range);
+
+static void update_node(struct xa_state *xas, struct xa_node *node,
+ int count, int values)
+{
+ if (!node || (!count && !values))
+ return;
+
+ node->count += count;
+ node->nr_values += values;
+ XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+ XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE);
+ xas_update(xas, node);
+ if (count < 0)
+ xas_delete_node(xas);
+}
+
+/**
+ * xas_store() - Store this entry in the XArray.
+ * @xas: XArray operation state.
+ * @entry: New entry.
+ *
+ * If @xas is operating on a multi-index entry, the entry returned by this
+ * function is essentially meaningless (it may be an internal entry or it
+ * may be %NULL, even if there are non-NULL entries at some of the indices
+ * covered by the range). This is not a problem for any current users,
+ * and can be changed if needed.
+ *
+ * Return: The old entry at this index.
+ */
+void *xas_store(struct xa_state *xas, void *entry)
+{
+ struct xa_node *node;
+ void __rcu **slot = &xas->xa->xa_head;
+ unsigned int offset, max;
+ int count = 0;
+ int values = 0;
+ void *first, *next;
+ bool value = xa_is_value(entry);
+
+ if (entry)
+ first = xas_create(xas);
+ else
+ first = xas_load(xas);
+
+ if (xas_invalid(xas))
+ return first;
+ node = xas->xa_node;
+ if (node && (xas->xa_shift < node->shift))
+ xas->xa_sibs = 0;
+ if ((first == entry) && !xas->xa_sibs)
+ return first;
+
+ next = first;
+ offset = xas->xa_offset;
+ max = xas->xa_offset + xas->xa_sibs;
+ if (node) {
+ slot = &node->slots[offset];
+ if (xas->xa_sibs)
+ xas_squash_marks(xas);
+ }
+ if (!entry)
+ xas_init_marks(xas);
+
+ for (;;) {
+ /*
+ * Must clear the marks before setting the entry to NULL,
+ * otherwise xas_for_each_marked may find a NULL entry and
+ * stop early. rcu_assign_pointer contains a release barrier
+ * so the mark clearing will appear to happen before the
+ * entry is set to NULL.
+ */
+ rcu_assign_pointer(*slot, entry);
+ if (xa_is_node(next))
+ xas_free_nodes(xas, xa_to_node(next));
+ if (!node)
+ break;
+ count += !next - !entry;
+ values += !xa_is_value(first) - !value;
+ if (entry) {
+ if (offset == max)
+ break;
+ if (!xa_is_sibling(entry))
+ entry = xa_mk_sibling(xas->xa_offset);
+ } else {
+ if (offset == XA_CHUNK_MASK)
+ break;
+ }
+ next = xa_entry_locked(xas->xa, node, ++offset);
+ if (!xa_is_sibling(next)) {
+ if (!entry && (offset > max))
+ break;
+ first = next;
+ }
+ slot++;
+ }
+
+ update_node(xas, node, count, values);
+ return first;
+}
+EXPORT_SYMBOL_GPL(xas_store);
+
+/**
+ * xas_get_mark() - Returns the state of this mark.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Return: true if the mark is set, false if the mark is clear or @xas
+ * is in an error state.
+ */
+bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+ if (xas_invalid(xas))
+ return false;
+ if (!xas->xa_node)
+ return xa_marked(xas->xa, mark);
+ return node_get_mark(xas->xa_node, xas->xa_offset, mark);
+}
+EXPORT_SYMBOL_GPL(xas_get_mark);
+
+/**
+ * xas_set_mark() - Sets the mark on this entry and its parents.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Sets the specified mark on this entry, and walks up the tree setting it
+ * on all the ancestor entries. Does nothing if @xas has not been walked to
+ * an entry, or is in an error state.
+ */
+void xas_set_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+ struct xa_node *node = xas->xa_node;
+ unsigned int offset = xas->xa_offset;
+
+ if (xas_invalid(xas))
+ return;
+
+ while (node) {
+ if (node_set_mark(node, offset, mark))
+ return;
+ offset = node->offset;
+ node = xa_parent_locked(xas->xa, node);
+ }
+
+ if (!xa_marked(xas->xa, mark))
+ xa_mark_set(xas->xa, mark);
+}
+EXPORT_SYMBOL_GPL(xas_set_mark);
+
+/**
+ * xas_clear_mark() - Clears the mark on this entry and its parents.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Clears the specified mark on this entry, and walks back to the head
+ * attempting to clear it on all the ancestor entries. Does nothing if
+ * @xas has not been walked to an entry, or is in an error state.
+ */
+void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+ struct xa_node *node = xas->xa_node;
+ unsigned int offset = xas->xa_offset;
+
+ if (xas_invalid(xas))
+ return;
+
+ while (node) {
+ if (!node_clear_mark(node, offset, mark))
+ return;
+ if (node_any_mark(node, mark))
+ return;
+
+ offset = node->offset;
+ node = xa_parent_locked(xas->xa, node);
+ }
+
+ if (xa_marked(xas->xa, mark))
+ xa_mark_clear(xas->xa, mark);
+}
+EXPORT_SYMBOL_GPL(xas_clear_mark);
+
+/**
+ * xas_init_marks() - Initialise all marks for the entry
+ * @xas: Array operations state.
+ *
+ * Initialise all marks for the entry specified by @xas. If we're tracking
+ * free entries with a mark, we need to set it on all entries. All other
+ * marks are cleared.
+ *
+ * This implementation is not as efficient as it could be; we may walk
+ * up the tree multiple times.
+ */
+void xas_init_marks(const struct xa_state *xas)
+{
+ xa_mark_t mark = 0;
+
+ for (;;) {
+ if (xa_track_free(xas->xa) && mark == XA_FREE_MARK)
+ xas_set_mark(xas, mark);
+ else
+ xas_clear_mark(xas, mark);
+ if (mark == XA_MARK_MAX)
+ break;
+ mark_inc(mark);
+ }
+}
+EXPORT_SYMBOL_GPL(xas_init_marks);
+
+/**
+ * xas_pause() - Pause a walk to drop a lock.
+ * @xas: XArray operation state.
+ *
+ * Some users need to pause a walk and drop the lock they're holding in
+ * order to yield to a higher priority thread or carry out an operation
+ * on an entry. Those users should call this function before they drop
+ * the lock. It resets the @xas to be suitable for the next iteration
+ * of the loop after the user has reacquired the lock. If most entries
+ * found during a walk require you to call xas_pause(), the xa_for_each()
+ * iterator may be more appropriate.
+ *
+ * Note that xas_pause() only works for forward iteration. If a user needs
+ * to pause a reverse iteration, we will need a xas_pause_rev().
+ */
+void xas_pause(struct xa_state *xas)
+{
+ struct xa_node *node = xas->xa_node;
+
+ if (xas_invalid(xas))
+ return;
+
+ if (node) {
+ unsigned int offset = xas->xa_offset;
+ while (++offset < XA_CHUNK_SIZE) {
+ if (!xa_is_sibling(xa_entry(xas->xa, node, offset)))
+ break;
+ }
+ xas->xa_index += (offset - xas->xa_offset) << node->shift;
+ } else {
+ xas->xa_index++;
+ }
+ xas->xa_node = XAS_RESTART;
+}
+EXPORT_SYMBOL_GPL(xas_pause);
+
+/*
+ * __xas_prev() - Find the previous entry in the XArray.
+ * @xas: XArray operation state.
+ *
+ * Helper function for xas_prev() which handles all the complex cases
+ * out of line.
+ */
+void *__xas_prev(struct xa_state *xas)
+{
+ void *entry;
+
+ if (!xas_frozen(xas->xa_node))
+ xas->xa_index--;
+ if (xas_not_node(xas->xa_node))
+ return xas_load(xas);
+
+ if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
+ xas->xa_offset--;
+
+ while (xas->xa_offset == 255) {
+ xas->xa_offset = xas->xa_node->offset - 1;
+ xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+ if (!xas->xa_node)
+ return set_bounds(xas);
+ }
+
+ for (;;) {
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (!xa_is_node(entry))
+ return entry;
+
+ xas->xa_node = xa_to_node(entry);
+ xas_set_offset(xas);
+ }
+}
+EXPORT_SYMBOL_GPL(__xas_prev);
+
+/*
+ * __xas_next() - Find the next entry in the XArray.
+ * @xas: XArray operation state.
+ *
+ * Helper function for xas_next() which handles all the complex cases
+ * out of line.
+ */
+void *__xas_next(struct xa_state *xas)
+{
+ void *entry;
+
+ if (!xas_frozen(xas->xa_node))
+ xas->xa_index++;
+ if (xas_not_node(xas->xa_node))
+ return xas_load(xas);
+
+ if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
+ xas->xa_offset++;
+
+ while (xas->xa_offset == XA_CHUNK_SIZE) {
+ xas->xa_offset = xas->xa_node->offset + 1;
+ xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+ if (!xas->xa_node)
+ return set_bounds(xas);
+ }
+
+ for (;;) {
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (!xa_is_node(entry))
+ return entry;
+
+ xas->xa_node = xa_to_node(entry);
+ xas_set_offset(xas);
+ }
+}
+EXPORT_SYMBOL_GPL(__xas_next);
+
+/**
+ * xas_find() - Find the next present entry in the XArray.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ *
+ * If the @xas has not yet been walked to an entry, return the entry
+ * which has an index >= xas.xa_index. If it has been walked, the entry
+ * currently being pointed at has been processed, and so we move to the
+ * next entry.
+ *
+ * If no entry is found and the array is smaller than @max, the iterator
+ * is set to the smallest index not yet in the array. This allows @xas
+ * to be immediately passed to xas_store().
+ *
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xas_find(struct xa_state *xas, unsigned long max)
+{
+ void *entry;
+
+ if (xas_error(xas))
+ return NULL;
+
+ if (!xas->xa_node) {
+ xas->xa_index = 1;
+ return set_bounds(xas);
+ } else if (xas_top(xas->xa_node)) {
+ entry = xas_load(xas);
+ if (entry || xas_not_node(xas->xa_node))
+ return entry;
+ } else if (!xas->xa_node->shift &&
+ xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) {
+ xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1;
+ }
+
+ xas_advance(xas);
+
+ while (xas->xa_node && (xas->xa_index <= max)) {
+ if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
+ xas->xa_offset = xas->xa_node->offset + 1;
+ xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+ continue;
+ }
+
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (xa_is_node(entry)) {
+ xas->xa_node = xa_to_node(entry);
+ xas->xa_offset = 0;
+ continue;
+ }
+ if (entry && !xa_is_sibling(entry))
+ return entry;
+
+ xas_advance(xas);
+ }
+
+ if (!xas->xa_node)
+ xas->xa_node = XAS_BOUNDS;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find);
+
+/**
+ * xas_find_marked() - Find the next marked entry in the XArray.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ * @mark: Mark number to search for.
+ *
+ * If the @xas has not yet been walked to an entry, return the marked entry
+ * which has an index >= xas.xa_index. If it has been walked, the entry
+ * currently being pointed at has been processed, and so we return the
+ * first marked entry with an index > xas.xa_index.
+ *
+ * If no marked entry is found and the array is smaller than @max, @xas is
+ * set to the bounds state and xas->xa_index is set to the smallest index
+ * not yet in the array. This allows @xas to be immediately passed to
+ * xas_store().
+ *
+ * If no entry is found before @max is reached, @xas is set to the restart
+ * state.
+ *
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
+{
+ bool advance = true;
+ unsigned int offset;
+ void *entry;
+
+ if (xas_error(xas))
+ return NULL;
+
+ if (!xas->xa_node) {
+ xas->xa_index = 1;
+ goto out;
+ } else if (xas_top(xas->xa_node)) {
+ advance = false;
+ entry = xa_head(xas->xa);
+ xas->xa_node = NULL;
+ if (xas->xa_index > max_index(entry))
+ goto bounds;
+ if (!xa_is_node(entry)) {
+ if (xa_marked(xas->xa, mark))
+ return entry;
+ xas->xa_index = 1;
+ goto out;
+ }
+ xas->xa_node = xa_to_node(entry);
+ xas->xa_offset = xas->xa_index >> xas->xa_node->shift;
+ }
+
+ while (xas->xa_index <= max) {
+ if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
+ xas->xa_offset = xas->xa_node->offset + 1;
+ xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+ if (!xas->xa_node)
+ break;
+ advance = false;
+ continue;
+ }
+
+ if (!advance) {
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (xa_is_sibling(entry)) {
+ xas->xa_offset = xa_to_sibling(entry);
+ xas_move_index(xas, xas->xa_offset);
+ }
+ }
+
+ offset = xas_find_chunk(xas, advance, mark);
+ if (offset > xas->xa_offset) {
+ advance = false;
+ xas_move_index(xas, offset);
+ /* Mind the wrap */
+ if ((xas->xa_index - 1) >= max)
+ goto max;
+ xas->xa_offset = offset;
+ if (offset == XA_CHUNK_SIZE)
+ continue;
+ }
+
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (!xa_is_node(entry))
+ return entry;
+ xas->xa_node = xa_to_node(entry);
+ xas_set_offset(xas);
+ }
+
+out:
+ if (!max)
+ goto max;
+bounds:
+ xas->xa_node = XAS_BOUNDS;
+ return NULL;
+max:
+ xas->xa_node = XAS_RESTART;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find_marked);
+
+/**
+ * xas_find_conflict() - Find the next present entry in a range.
+ * @xas: XArray operation state.
+ *
+ * The @xas describes both a range and a position within that range.
+ *
+ * Context: Any context. Expects xa_lock to be held.
+ * Return: The next entry in the range covered by @xas or %NULL.
+ */
+void *xas_find_conflict(struct xa_state *xas)
+{
+ void *curr;
+
+ if (xas_error(xas))
+ return NULL;
+
+ if (!xas->xa_node)
+ return NULL;
+
+ if (xas_top(xas->xa_node)) {
+ curr = xas_start(xas);
+ if (!curr)
+ return NULL;
+ while (xa_is_node(curr)) {
+ struct xa_node *node = xa_to_node(curr);
+ curr = xas_descend(xas, node);
+ }
+ if (curr)
+ return curr;
+ }
+
+ if (xas->xa_node->shift > xas->xa_shift)
+ return NULL;
+
+ for (;;) {
+ if (xas->xa_node->shift == xas->xa_shift) {
+ if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs)
+ break;
+ } else if (xas->xa_offset == XA_CHUNK_MASK) {
+ xas->xa_offset = xas->xa_node->offset;
+ xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node);
+ if (!xas->xa_node)
+ break;
+ continue;
+ }
+ curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset);
+ if (xa_is_sibling(curr))
+ continue;
+ while (xa_is_node(curr)) {
+ xas->xa_node = xa_to_node(curr);
+ xas->xa_offset = 0;
+ curr = xa_entry_locked(xas->xa, xas->xa_node, 0);
+ }
+ if (curr)
+ return curr;
+ }
+ xas->xa_offset -= xas->xa_sibs;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find_conflict);
+
+/**
+ * xa_init_flags() - Initialise an empty XArray with flags.
+ * @xa: XArray.
+ * @flags: XA_FLAG values.
+ *
+ * If you need to initialise an XArray with special flags (eg you need
+ * to take the lock from interrupt context), use this function instead
+ * of xa_init().
+ *
+ * Context: Any context.
+ */
+void xa_init_flags(struct xarray *xa, gfp_t flags)
+{
+ unsigned int lock_type;
+ static struct lock_class_key xa_lock_irq;
+ static struct lock_class_key xa_lock_bh;
+
+ spin_lock_init(&xa->xa_lock);
+ xa->xa_flags = flags;
+ xa->xa_head = NULL;
+
+ lock_type = xa_lock_type(xa);
+ if (lock_type == XA_LOCK_IRQ)
+ lockdep_set_class(&xa->xa_lock, &xa_lock_irq);
+ else if (lock_type == XA_LOCK_BH)
+ lockdep_set_class(&xa->xa_lock, &xa_lock_bh);
+}
+EXPORT_SYMBOL(xa_init_flags);
+
+/**
+ * xa_load() - Load an entry from an XArray.
+ * @xa: XArray.
+ * @index: index into array.
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: The entry at @index in @xa.
+ */
+void *xa_load(struct xarray *xa, unsigned long index)
+{
+ XA_STATE(xas, xa, index);
+ void *entry;
+
+ rcu_read_lock();
+ do {
+ entry = xas_load(&xas);
+ if (xa_is_zero(entry))
+ entry = NULL;
+ } while (xas_retry(&xas, entry));
+ rcu_read_unlock();
+
+ return entry;
+}
+EXPORT_SYMBOL(xa_load);
+
+static void *xas_result(struct xa_state *xas, void *curr)
+{
+ if (xa_is_zero(curr))
+ return NULL;
+ XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
+ if (xas_error(xas))
+ curr = xas->xa_node;
+ return curr;
+}
+
+/**
+ * __xa_erase() - Erase this entry from the XArray while locked.
+ * @xa: XArray.
+ * @index: Index into array.
+ *
+ * If the entry at this index is a multi-index entry then all indices will
+ * be erased, and the entry will no longer be a multi-index entry.
+ * This function expects the xa_lock to be held on entry.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry. May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index.
+ */
+void *__xa_erase(struct xarray *xa, unsigned long index)
+{
+ XA_STATE(xas, xa, index);
+ return xas_result(&xas, xas_store(&xas, NULL));
+}
+EXPORT_SYMBOL_GPL(__xa_erase);
+
+/**
+ * xa_store() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * After this function returns, loads from this index will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
+ *
+ * Context: Process context. Takes and releases the xa_lock. May sleep
+ * if the @gfp flags permit.
+ * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
+ * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
+ * failed.
+ */
+void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ void *curr;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+
+ do {
+ xas_lock(&xas);
+ curr = xas_store(&xas, entry);
+ if (xa_track_free(xa) && entry)
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(xa_store);
+
+/**
+ * __xa_store() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry. May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
+ */
+void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ void *curr;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+
+ do {
+ curr = xas_store(&xas, entry);
+ if (xa_track_free(xa) && entry)
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ } while (__xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(__xa_store);
+
+/**
+ * xa_cmpxchg() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * If the entry at @index is the same as @old, replace it with @entry.
+ * If the return value is equal to @old, then the exchange was successful.
+ *
+ * Context: Process context. Takes and releases the xa_lock. May sleep
+ * if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+void *xa_cmpxchg(struct xarray *xa, unsigned long index,
+ void *old, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ void *curr;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+
+ do {
+ xas_lock(&xas);
+ curr = xas_load(&xas);
+ if (curr == XA_ZERO_ENTRY)
+ curr = NULL;
+ if (curr == old) {
+ xas_store(&xas, entry);
+ if (xa_track_free(xa) && entry)
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ }
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(xa_cmpxchg);
+
+/**
+ * __xa_cmpxchg() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry. May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
+ */
+void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
+ void *old, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ void *curr;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+
+ do {
+ curr = xas_load(&xas);
+ if (curr == XA_ZERO_ENTRY)
+ curr = NULL;
+ if (curr == old) {
+ xas_store(&xas, entry);
+ if (xa_track_free(xa) && entry)
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ }
+ } while (__xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(__xa_cmpxchg);
+
+/**
+ * xa_reserve() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * Ensures there is somewhere to store an entry at @index in the array.
+ * If there is already something stored at @index, this function does
+ * nothing. If there was nothing there, the entry is marked as reserved.
+ * Loads from @index will continue to see a %NULL pointer until a
+ * subsequent store to @index.
+ *
+ * If you do not use the entry that you have reserved, call xa_release()
+ * or xa_erase() to free any unnecessary memory.
+ *
+ * Context: Process context. Takes and releases the xa_lock, IRQ or BH safe
+ * if specified in XArray flags. May sleep if the @gfp flags permit.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ unsigned int lock_type = xa_lock_type(xa);
+ void *curr;
+
+ do {
+ xas_lock_type(&xas, lock_type);
+ curr = xas_load(&xas);
+ if (!curr)
+ xas_store(&xas, XA_ZERO_ENTRY);
+ xas_unlock_type(&xas, lock_type);
+ } while (xas_nomem(&xas, gfp));
+
+ return xas_error(&xas);
+}
+EXPORT_SYMBOL(xa_reserve);
+
+#ifdef CONFIG_XARRAY_MULTI
+static void xas_set_range(struct xa_state *xas, unsigned long first,
+ unsigned long last)
+{
+ unsigned int shift = 0;
+ unsigned long sibs = last - first;
+ unsigned int offset = XA_CHUNK_MASK;
+
+ xas_set(xas, first);
+
+ while ((first & XA_CHUNK_MASK) == 0) {
+ if (sibs < XA_CHUNK_MASK)
+ break;
+ if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK))
+ break;
+ shift += XA_CHUNK_SHIFT;
+ if (offset == XA_CHUNK_MASK)
+ offset = sibs & XA_CHUNK_MASK;
+ sibs >>= XA_CHUNK_SHIFT;
+ first >>= XA_CHUNK_SHIFT;
+ }
+
+ offset = first & XA_CHUNK_MASK;
+ if (offset + sibs > XA_CHUNK_MASK)
+ sibs = XA_CHUNK_MASK - offset;
+ if ((((first + sibs + 1) << shift) - 1) > last)
+ sibs -= 1;
+
+ xas->xa_shift = shift;
+ xas->xa_sibs = sibs;
+}
+
+/**
+ * xa_store_range() - Store this entry at a range of indices in the XArray.
+ * @xa: XArray.
+ * @first: First index to affect.
+ * @last: Last index to affect.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * After this function returns, loads from any index between @first and @last,
+ * inclusive will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
+ *
+ * Context: Process context. Takes and releases the xa_lock. May sleep
+ * if the @gfp flags permit.
+ * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in
+ * an XArray, or xa_err(-ENOMEM) if memory allocation failed.
+ */
+void *xa_store_range(struct xarray *xa, unsigned long first,
+ unsigned long last, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, 0);
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+ if (last < first)
+ return XA_ERROR(-EINVAL);
+
+ do {
+ xas_lock(&xas);
+ if (entry) {
+ unsigned int order = (last == ~0UL) ? 64 :
+ ilog2(last + 1);
+ xas_set_order(&xas, last, order);
+ xas_create(&xas);
+ if (xas_error(&xas))
+ goto unlock;
+ }
+ do {
+ xas_set_range(&xas, first, last);
+ xas_store(&xas, entry);
+ if (xas_error(&xas))
+ goto unlock;
+ first += xas_size(&xas);
+ } while (first <= last);
+unlock:
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, NULL);
+}
+EXPORT_SYMBOL(xa_store_range);
+#endif /* CONFIG_XARRAY_MULTI */
+
+/**
+ * __xa_alloc() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index. A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry. May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, 0);
+ int err;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!xa_track_free(xa)))
+ return -EINVAL;
+
+ if (!entry)
+ entry = XA_ZERO_ENTRY;
+
+ do {
+ xas.xa_index = *id;
+ xas_find_marked(&xas, max, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART)
+ xas_set_err(&xas, -ENOSPC);
+ xas_store(&xas, entry);
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ } while (__xas_nomem(&xas, gfp));
+
+ err = xas_error(&xas);
+ if (!err)
+ *id = xas.xa_index;
+ return err;
+}
+EXPORT_SYMBOL(__xa_alloc);
+
+/**
+ * __xa_set_mark() - Set this mark on this entry while locked.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Attempting to set a mark on a NULL entry does not succeed.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry.
+ */
+void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ XA_STATE(xas, xa, index);
+ void *entry = xas_load(&xas);
+
+ if (entry)
+ xas_set_mark(&xas, mark);
+}
+EXPORT_SYMBOL_GPL(__xa_set_mark);
+
+/**
+ * __xa_clear_mark() - Clear this mark on this entry while locked.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry.
+ */
+void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ XA_STATE(xas, xa, index);
+ void *entry = xas_load(&xas);
+
+ if (entry)
+ xas_clear_mark(&xas, mark);
+}
+EXPORT_SYMBOL_GPL(__xa_clear_mark);
+
+/**
+ * xa_get_mark() - Inquire whether this mark is set on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * This function uses the RCU read lock, so the result may be out of date
+ * by the time it returns. If you need the result to be stable, use a lock.
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: True if the entry at @index has this mark set, false if it doesn't.
+ */
+bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ XA_STATE(xas, xa, index);
+ void *entry;
+
+ rcu_read_lock();
+ entry = xas_start(&xas);
+ while (xas_get_mark(&xas, mark)) {
+ if (!xa_is_node(entry))
+ goto found;
+ entry = xas_descend(&xas, xa_to_node(entry));
+ }
+ rcu_read_unlock();
+ return false;
+ found:
+ rcu_read_unlock();
+ return true;
+}
+EXPORT_SYMBOL(xa_get_mark);
+
+/**
+ * xa_set_mark() - Set this mark on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Attempting to set a mark on a NULL entry does not succeed.
+ *
+ * Context: Process context. Takes and releases the xa_lock.
+ */
+void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ xa_lock(xa);
+ __xa_set_mark(xa, index, mark);
+ xa_unlock(xa);
+}
+EXPORT_SYMBOL(xa_set_mark);
+
+/**
+ * xa_clear_mark() - Clear this mark on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Clearing a mark always succeeds.
+ *
+ * Context: Process context. Takes and releases the xa_lock.
+ */
+void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ xa_lock(xa);
+ __xa_clear_mark(xa, index, mark);
+ xa_unlock(xa);
+}
+EXPORT_SYMBOL(xa_clear_mark);
+
+/**
+ * xa_find() - Search the XArray for an entry.
+ * @xa: XArray.
+ * @indexp: Pointer to an index.
+ * @max: Maximum index to search to.
+ * @filter: Selection criterion.
+ *
+ * Finds the entry in @xa which matches the @filter, and has the lowest
+ * index that is at least @indexp and no more than @max.
+ * If an entry is found, @indexp is updated to be the index of the entry.
+ * This function is protected by the RCU read lock, so it may not find
+ * entries which are being simultaneously added. It will not return an
+ * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xa_find(struct xarray *xa, unsigned long *indexp,
+ unsigned long max, xa_mark_t filter)
+{
+ XA_STATE(xas, xa, *indexp);
+ void *entry;
+
+ rcu_read_lock();
+ do {
+ if ((__force unsigned int)filter < XA_MAX_MARKS)
+ entry = xas_find_marked(&xas, max, filter);
+ else
+ entry = xas_find(&xas, max);
+ } while (xas_retry(&xas, entry));
+ rcu_read_unlock();
+
+ if (entry)
+ *indexp = xas.xa_index;
+ return entry;
+}
+EXPORT_SYMBOL(xa_find);
+
+/**
+ * xa_find_after() - Search the XArray for a present entry.
+ * @xa: XArray.
+ * @indexp: Pointer to an index.
+ * @max: Maximum index to search to.
+ * @filter: Selection criterion.
+ *
+ * Finds the entry in @xa which matches the @filter and has the lowest
+ * index that is above @indexp and no more than @max.
+ * If an entry is found, @indexp is updated to be the index of the entry.
+ * This function is protected by the RCU read lock, so it may miss entries
+ * which are being simultaneously added. It will not return an
+ * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: The pointer, if found, otherwise %NULL.
+ */
+void *xa_find_after(struct xarray *xa, unsigned long *indexp,
+ unsigned long max, xa_mark_t filter)
+{
+ XA_STATE(xas, xa, *indexp + 1);
+ void *entry;
+
+ rcu_read_lock();
+ for (;;) {
+ if ((__force unsigned int)filter < XA_MAX_MARKS)
+ entry = xas_find_marked(&xas, max, filter);
+ else
+ entry = xas_find(&xas, max);
+ if (xas.xa_shift) {
+ if (xas.xa_index & ((1UL << xas.xa_shift) - 1))
+ continue;
+ } else {
+ if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK))
+ continue;
+ }
+ if (!xas_retry(&xas, entry))
+ break;
+ }
+ rcu_read_unlock();
+
+ if (entry)
+ *indexp = xas.xa_index;
+ return entry;
+}
+EXPORT_SYMBOL(xa_find_after);
+
+static unsigned int xas_extract_present(struct xa_state *xas, void **dst,
+ unsigned long max, unsigned int n)
+{
+ void *entry;
+ unsigned int i = 0;
+
+ rcu_read_lock();
+ xas_for_each(xas, entry, max) {
+ if (xas_retry(xas, entry))
+ continue;
+ dst[i++] = entry;
+ if (i == n)
+ break;
+ }
+ rcu_read_unlock();
+
+ return i;
+}
+
+static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
+ unsigned long max, unsigned int n, xa_mark_t mark)
+{
+ void *entry;
+ unsigned int i = 0;
+
+ rcu_read_lock();
+ xas_for_each_marked(xas, entry, max, mark) {
+ if (xas_retry(xas, entry))
+ continue;
+ dst[i++] = entry;
+ if (i == n)
+ break;
+ }
+ rcu_read_unlock();
+
+ return i;
+}
+
+/**
+ * xa_extract() - Copy selected entries from the XArray into a normal array.
+ * @xa: The source XArray to copy from.
+ * @dst: The buffer to copy entries into.
+ * @start: The first index in the XArray eligible to be selected.
+ * @max: The last index in the XArray eligible to be selected.
+ * @n: The maximum number of entries to copy.
+ * @filter: Selection criterion.
+ *
+ * Copies up to @n entries that match @filter from the XArray. The
+ * copied entries will have indices between @start and @max, inclusive.
+ *
+ * The @filter may be an XArray mark value, in which case entries which are
+ * marked with that mark will be copied. It may also be %XA_PRESENT, in
+ * which case all entries which are not NULL will be copied.
+ *
+ * The entries returned may not represent a snapshot of the XArray at a
+ * moment in time. For example, if another thread stores to index 5, then
+ * index 10, calling xa_extract() may return the old contents of index 5
+ * and the new contents of index 10. Indices not modified while this
+ * function is running will not be skipped.
+ *
+ * If you need stronger guarantees, holding the xa_lock across calls to this
+ * function will prevent concurrent modification.
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: The number of entries copied.
+ */
+unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
+ unsigned long max, unsigned int n, xa_mark_t filter)
+{
+ XA_STATE(xas, xa, start);
+
+ if (!n)
+ return 0;
+
+ if ((__force unsigned int)filter < XA_MAX_MARKS)
+ return xas_extract_marked(&xas, dst, max, n, filter);
+ return xas_extract_present(&xas, dst, max, n);
+}
+EXPORT_SYMBOL(xa_extract);
+
+/**
+ * xa_destroy() - Free all internal data structures.
+ * @xa: XArray.
+ *
+ * After calling this function, the XArray is empty and has freed all memory
+ * allocated for its internal data structures. You are responsible for
+ * freeing the objects referenced by the XArray.
+ *
+ * Context: Any context. Takes and releases the xa_lock, interrupt-safe.
+ */
+void xa_destroy(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ unsigned long flags;
+ void *entry;
+
+ xas.xa_node = NULL;
+ xas_lock_irqsave(&xas, flags);
+ entry = xa_head_locked(xa);
+ RCU_INIT_POINTER(xa->xa_head, NULL);
+ xas_init_marks(&xas);
+ /* lockdep checks we're still holding the lock in xas_free_nodes() */
+ if (xa_is_node(entry))
+ xas_free_nodes(&xas, xa_to_node(entry));
+ xas_unlock_irqrestore(&xas, flags);
+}
+EXPORT_SYMBOL(xa_destroy);
+
+#ifdef XA_DEBUG
+void xa_dump_node(const struct xa_node *node)
+{
+ unsigned i, j;
+
+ if (!node)
+ return;
+ if ((unsigned long)node & 3) {
+ pr_cont("node %px\n", node);
+ return;
+ }
+
+ pr_cont("node %px %s %d parent %px shift %d count %d values %d "
+ "array %px list %px %px marks",
+ node, node->parent ? "offset" : "max", node->offset,
+ node->parent, node->shift, node->count, node->nr_values,
+ node->array, node->private_list.prev, node->private_list.next);
+ for (i = 0; i < XA_MAX_MARKS; i++)
+ for (j = 0; j < XA_MARK_LONGS; j++)
+ pr_cont(" %lx", node->marks[i][j]);
+ pr_cont("\n");
+}
+
+void xa_dump_index(unsigned long index, unsigned int shift)
+{
+ if (!shift)
+ pr_info("%lu: ", index);
+ else if (shift >= BITS_PER_LONG)
+ pr_info("0-%lu: ", ~0UL);
+ else
+ pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1));
+}
+
+void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
+{
+ if (!entry)
+ return;
+
+ xa_dump_index(index, shift);
+
+ if (xa_is_node(entry)) {
+ if (shift == 0) {
+ pr_cont("%px\n", entry);
+ } else {
+ unsigned long i;
+ struct xa_node *node = xa_to_node(entry);
+ xa_dump_node(node);
+ for (i = 0; i < XA_CHUNK_SIZE; i++)
+ xa_dump_entry(node->slots[i],
+ index + (i << node->shift), node->shift);
+ }
+ } else if (xa_is_value(entry))
+ pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry),
+ xa_to_value(entry), entry);
+ else if (!xa_is_internal(entry))
+ pr_cont("%px\n", entry);
+ else if (xa_is_retry(entry))
+ pr_cont("retry (%ld)\n", xa_to_internal(entry));
+ else if (xa_is_sibling(entry))
+ pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
+ else if (xa_is_zero(entry))
+ pr_cont("zero (%ld)\n", xa_to_internal(entry));
+ else
+ pr_cont("UNKNOWN ENTRY (%px)\n", entry);
+}
+
+void xa_dump(const struct xarray *xa)
+{
+ void *entry = xa->xa_head;
+ unsigned int shift = 0;
+
+ pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
+ xa->xa_flags, xa_marked(xa, XA_MARK_0),
+ xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2));
+ if (xa_is_node(entry))
+ shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
+ xa_dump_entry(entry, 0, shift);
+}
+#endif