aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig8
-rw-r--r--lib/Kconfig.debug21
-rw-r--r--lib/Kconfig.kasan9
-rw-r--r--lib/Makefile8
-rw-r--r--lib/bch.c17
-rw-r--r--lib/chacha20.c6
-rw-r--r--lib/crc-t10dif.c57
-rw-r--r--lib/crc32.c11
-rw-r--r--lib/debug_locks.c6
-rw-r--r--lib/idr.c401
-rw-r--r--lib/memcat_p.c34
-rw-r--r--lib/nlattr.c269
-rw-r--r--lib/percpu-refcount.c28
-rw-r--r--lib/radix-tree.c834
-rw-r--r--lib/string.c1
-rw-r--r--lib/test_bpf.c1
-rw-r--r--lib/test_ida.c4
-rw-r--r--lib/test_kasan.c70
-rw-r--r--lib/test_memcat_p.c115
-rw-r--r--lib/test_xarray.c1238
-rw-r--r--lib/udivmoddi4.c310
-rw-r--r--lib/umoddi3.c32
-rw-r--r--lib/vsprintf.c245
-rw-r--r--lib/xarray.c2036
-rw-r--r--lib/xz/xz_crc32.c1
-rw-r--r--lib/xz/xz_private.h4
26 files changed, 4628 insertions, 1138 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index a3928d4438b5..d1573a16aa92 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -399,8 +399,11 @@ config INTERVAL_TREE
for more information.
-config RADIX_TREE_MULTIORDER
+config XARRAY_MULTI
bool
+ help
+ Support entries which occupy multiple consecutive indices in the
+ XArray.
config ASSOCIATIVE_ARRAY
bool
@@ -621,3 +624,6 @@ config GENERIC_LIB_CMPDI2
config GENERIC_LIB_UCMPDI2
bool
+
+config GENERIC_LIB_UMODDI3
+ bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 613316724c6a..e0ba05e6f6bd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1179,7 +1179,7 @@ config LOCKDEP
bool
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
select STACKTRACE
- select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
+ select FRAME_POINTER if !MIPS && !PPC && !ARM && !S390 && !MICROBLAZE && !ARC && !X86
select KALLSYMS
select KALLSYMS_ALL
@@ -1277,13 +1277,13 @@ config WARN_ALL_UNSEEDED_RANDOM
time. This is really bad from a security perspective, and
so architecture maintainers really need to do what they can
to get the CRNG seeded sooner after the system is booted.
- However, since users can not do anything actionble to
+ However, since users cannot do anything actionable to
address this, by default the kernel will issue only a single
warning for the first use of unseeded randomness.
Say Y here if you want to receive warnings for all uses of
unseeded randomness. This will be of use primarily for
- those developers interersted in improving the security of
+ those developers interested in improving the security of
Linux kernels running on their architecture (or
subarchitecture).
@@ -1590,7 +1590,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER
depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
depends on !X86_64
select STACKTRACE
- select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
+ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86
help
Provide stacktrace filter for fault-injection capabilities
@@ -1599,7 +1599,7 @@ config LATENCYTOP
depends on DEBUG_KERNEL
depends on STACKTRACE_SUPPORT
depends on PROC_FS
- select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
+ select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM && !ARC && !X86
select KALLSYMS
select KALLSYMS_ALL
select STACKTRACE
@@ -1813,6 +1813,9 @@ config TEST_BITFIELD
config TEST_UUID
tristate "Test functions located in the uuid module at runtime"
+config TEST_XARRAY
+ tristate "Test the XArray code at runtime"
+
config TEST_OVERFLOW
tristate "Test check_*_overflow() functions at runtime"
@@ -1965,6 +1968,14 @@ config TEST_DEBUG_VIRTUAL
If unsure, say N.
+config TEST_MEMCAT_P
+ tristate "Test memcat_p() helper function"
+ help
+ Test the memcat_p() helper for correctly merging two
+ pointer arrays together.
+
+ If unsure, say N.
+
endif # RUNTIME_TESTING_MENU
config MEMTEST
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index befb127507c0..d0bad1bd9a2b 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -57,6 +57,15 @@ config KASAN_INLINE
endchoice
+config KASAN_S390_4_LEVEL_PAGING
+ bool "KASan: use 4-level paging"
+ depends on KASAN && S390
+ help
+ Compiling the kernel with KASan disables automatic 3-level vs
+ 4-level paging selection. 3-level paging is used by default (up
+ to 3TB of RAM with KASan enabled). This options allows to force
+ 4-level paging instead.
+
config TEST_KASAN
tristate "Module for testing kasan for bug detection"
depends on m && KASAN
diff --git a/lib/Makefile b/lib/Makefile
index 8e0aca8a2f09..988949c4fd3a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,13 +18,13 @@ KCOV_INSTRUMENT_debugobjects.o := n
KCOV_INSTRUMENT_dynamic_debug.o := n
lib-y := ctype.o string.o vsprintf.o cmdline.o \
- rbtree.o radix-tree.o timerqueue.o\
+ rbtree.o radix-tree.o timerqueue.o xarray.o \
idr.o int_sqrt.o extable.o \
sha1.o chacha20.o irq_regs.o argv_split.o \
flex_proportions.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
- nmi_backtrace.o nodemask.o win_minmax.o
+ nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o
lib-$(CONFIG_PRINTK) += dump_stack.o
lib-$(CONFIG_MMU) += ioremap.o
@@ -70,9 +70,11 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
obj-$(CONFIG_TEST_UUID) += test_uuid.o
+obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
obj-$(CONFIG_TEST_PARMAN) += test_parman.o
obj-$(CONFIG_TEST_KMOD) += test_kmod.o
obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
+obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o
ifeq ($(CONFIG_DEBUG_KOBJECT),y)
CFLAGS_kobject.o += -DDEBUG
@@ -121,7 +123,6 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/
obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/
obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
obj-$(CONFIG_BCH) += bch.o
-CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500)
obj-$(CONFIG_LZO_COMPRESS) += lzo/
obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
obj-$(CONFIG_LZ4_COMPRESS) += lz4/
@@ -273,3 +274,4 @@ obj-$(CONFIG_GENERIC_LIB_LSHRDI3) += lshrdi3.o
obj-$(CONFIG_GENERIC_LIB_MULDI3) += muldi3.o
obj-$(CONFIG_GENERIC_LIB_CMPDI2) += cmpdi2.o
obj-$(CONFIG_GENERIC_LIB_UCMPDI2) += ucmpdi2.o
+obj-$(CONFIG_GENERIC_LIB_UMODDI3) += umoddi3.o udivmoddi4.o
diff --git a/lib/bch.c b/lib/bch.c
index 7b0f2006698b..5db6d3a4c8a6 100644
--- a/lib/bch.c
+++ b/lib/bch.c
@@ -79,20 +79,19 @@
#define GF_T(_p) (CONFIG_BCH_CONST_T)
#define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1)
#define BCH_MAX_M (CONFIG_BCH_CONST_M)
+#define BCH_MAX_T (CONFIG_BCH_CONST_T)
#else
#define GF_M(_p) ((_p)->m)
#define GF_T(_p) ((_p)->t)
#define GF_N(_p) ((_p)->n)
-#define BCH_MAX_M 15
+#define BCH_MAX_M 15 /* 2KB */
+#define BCH_MAX_T 64 /* 64 bit correction */
#endif
-#define BCH_MAX_T (((1 << BCH_MAX_M) - 1) / BCH_MAX_M)
-
#define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32)
#define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8)
#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32)
-#define BCH_ECC_MAX_BYTES DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8)
#ifndef dbg
#define dbg(_fmt, args...) do {} while (0)
@@ -202,6 +201,9 @@ void encode_bch(struct bch_control *bch, const uint8_t *data,
const uint32_t * const tab3 = tab2 + 256*(l+1);
const uint32_t *pdata, *p0, *p1, *p2, *p3;
+ if (WARN_ON(r_bytes > sizeof(r)))
+ return;
+
if (ecc) {
/* load ecc parity bytes into internal 32-bit buffer */
load_ecc8(bch, bch->ecc_buf, ecc);
@@ -1285,6 +1287,13 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly)
*/
goto fail;
+ if (t > BCH_MAX_T)
+ /*
+ * we can support larger than 64 bits if necessary, at the
+ * cost of higher stack usage.
+ */
+ goto fail;
+
/* sanity checks */
if ((t < 1) || (m*t >= ((1 << m)-1)))
/* invalid t value */
diff --git a/lib/chacha20.c b/lib/chacha20.c
index c1cc50fb68c9..d907fec6a9ed 100644
--- a/lib/chacha20.c
+++ b/lib/chacha20.c
@@ -16,9 +16,9 @@
#include <asm/unaligned.h>
#include <crypto/chacha20.h>
-void chacha20_block(u32 *state, u32 *stream)
+void chacha20_block(u32 *state, u8 *stream)
{
- u32 x[16], *out = stream;
+ u32 x[16];
int i;
for (i = 0; i < ARRAY_SIZE(x); i++)
@@ -67,7 +67,7 @@ void chacha20_block(u32 *state, u32 *stream)
}
for (i = 0; i < ARRAY_SIZE(x); i++)
- out[i] = cpu_to_le32(x[i] + state[i]);
+ put_unaligned_le32(x[i] + state[i], &stream[i * sizeof(u32)]);
state[12]++;
}
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index 1ad33e555805..4d0d47c1ffbd 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -14,10 +14,47 @@
#include <linux/err.h>
#include <linux/init.h>
#include <crypto/hash.h>
+#include <crypto/algapi.h>
#include <linux/static_key.h>
+#include <linux/notifier.h>
-static struct crypto_shash *crct10dif_tfm;
+static struct crypto_shash __rcu *crct10dif_tfm;
static struct static_key crct10dif_fallback __read_mostly;
+static DEFINE_MUTEX(crc_t10dif_mutex);
+
+static int crc_t10dif_rehash(struct notifier_block *self, unsigned long val, void *data)
+{
+ struct crypto_alg *alg = data;
+ struct crypto_shash *new, *old;
+
+ if (val != CRYPTO_MSG_ALG_LOADED ||
+ static_key_false(&crct10dif_fallback) ||
+ strncmp(alg->cra_name, CRC_T10DIF_STRING, strlen(CRC_T10DIF_STRING)))
+ return 0;
+
+ mutex_lock(&crc_t10dif_mutex);
+ old = rcu_dereference_protected(crct10dif_tfm,
+ lockdep_is_held(&crc_t10dif_mutex));
+ if (!old) {
+ mutex_unlock(&crc_t10dif_mutex);
+ return 0;
+ }
+ new = crypto_alloc_shash("crct10dif", 0, 0);
+ if (IS_ERR(new)) {
+ mutex_unlock(&crc_t10dif_mutex);
+ return 0;
+ }
+ rcu_assign_pointer(crct10dif_tfm, new);
+ mutex_unlock(&crc_t10dif_mutex);
+
+ synchronize_rcu();
+ crypto_free_shash(old);
+ return 0;
+}
+
+static struct notifier_block crc_t10dif_nb = {
+ .notifier_call = crc_t10dif_rehash,
+};
__u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
{
@@ -30,11 +67,14 @@ __u16 crc_t10dif_update(__u16 crc, const unsigned char *buffer, size_t len)
if (static_key_false(&crct10dif_fallback))
return crc_t10dif_generic(crc, buffer, len);
- desc.shash.tfm = crct10dif_tfm;
+ rcu_read_lock();
+ desc.shash.tfm = rcu_dereference(crct10dif_tfm);
desc.shash.flags = 0;
*(__u16 *)desc.ctx = crc;
err = crypto_shash_update(&desc.shash, buffer, len);
+ rcu_read_unlock();
+
BUG_ON(err);
return *(__u16 *)desc.ctx;
@@ -49,6 +89,7 @@ EXPORT_SYMBOL(crc_t10dif);
static int __init crc_t10dif_mod_init(void)
{
+ crypto_register_notifier(&crc_t10dif_nb);
crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
if (IS_ERR(crct10dif_tfm)) {
static_key_slow_inc(&crct10dif_fallback);
@@ -59,12 +100,24 @@ static int __init crc_t10dif_mod_init(void)
static void __exit crc_t10dif_mod_fini(void)
{
+ crypto_unregister_notifier(&crc_t10dif_nb);
crypto_free_shash(crct10dif_tfm);
}
module_init(crc_t10dif_mod_init);
module_exit(crc_t10dif_mod_fini);
+static int crc_t10dif_transform_show(char *buffer, const struct kernel_param *kp)
+{
+ if (static_key_false(&crct10dif_fallback))
+ return sprintf(buffer, "fallback\n");
+
+ return sprintf(buffer, "%s\n",
+ crypto_tfm_alg_driver_name(crypto_shash_tfm(crct10dif_tfm)));
+}
+
+module_param_call(transform, NULL, crc_t10dif_transform_show, NULL, 0644);
+
MODULE_DESCRIPTION("T10 DIF CRC calculation");
MODULE_LICENSE("GPL");
MODULE_SOFTDEP("pre: crct10dif");
diff --git a/lib/crc32.c b/lib/crc32.c
index a6c9afafc8c8..45b1d67a1767 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -183,21 +183,21 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
}
#if CRC_LE_BITS == 1
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE);
}
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE);
}
#else
-u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32table_le, CRC32_POLY_LE);
}
-u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
+u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len)
{
return crc32_le_generic(crc, p, len,
(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
@@ -206,6 +206,9 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
EXPORT_SYMBOL(crc32_le);
EXPORT_SYMBOL(__crc32c_le);
+u32 crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
+u32 __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+
/*
* This multiplies the polynomials x and y modulo the given modulus.
* This follows the "little-endian" CRC convention that the lsbit
diff --git a/lib/debug_locks.c b/lib/debug_locks.c
index 96c4c633d95e..ce51749cc145 100644
--- a/lib/debug_locks.c
+++ b/lib/debug_locks.c
@@ -21,7 +21,7 @@
* that would just muddy the log. So we report the first one and
* shut up after that.
*/
-int debug_locks = 1;
+int debug_locks __read_mostly = 1;
EXPORT_SYMBOL_GPL(debug_locks);
/*
@@ -29,7 +29,7 @@ EXPORT_SYMBOL_GPL(debug_locks);
* 'silent failure': nothing is printed to the console when
* a locking bug is detected.
*/
-int debug_locks_silent;
+int debug_locks_silent __read_mostly;
EXPORT_SYMBOL_GPL(debug_locks_silent);
/*
@@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(debug_locks_silent);
*/
int debug_locks_off(void)
{
- if (__debug_locks_off()) {
+ if (debug_locks && __debug_locks_off()) {
if (!debug_locks_silent) {
console_verbose();
return 1;
diff --git a/lib/idr.c b/lib/idr.c
index fab2fd5bc326..cb1db9b8d3f6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,8 +6,6 @@
#include <linux/spinlock.h>
#include <linux/xarray.h>
-DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
-
/**
* idr_alloc_u32() - Allocate an ID.
* @idr: IDR handle.
@@ -39,10 +37,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
unsigned int base = idr->idr_base;
unsigned int id = *nextid;
- if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
- return -EINVAL;
- if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
- idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
+ if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
+ idr->idr_rt.xa_flags |= IDR_RT_MARKER;
id = (id < base) ? 0 : id - base;
radix_tree_iter_init(&iter, id);
@@ -295,15 +291,13 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
void __rcu **slot = NULL;
void *entry;
- if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
- return ERR_PTR(-EINVAL);
id -= idr->idr_base;
entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
return ERR_PTR(-ENOENT);
- __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL);
+ __radix_tree_replace(&idr->idr_rt, node, slot, ptr);
return entry;
}
@@ -324,6 +318,9 @@ EXPORT_SYMBOL(idr_replace);
* free the individual IDs in it. You can use ida_is_empty() to find
* out whether the IDA has any IDs currently allocated.
*
+ * The IDA handles its own locking. It is safe to call any of the IDA
+ * functions without synchronisation in your code.
+ *
* IDs are currently limited to the range [0-INT_MAX]. If this is an awkward
* limitation, it should be quite straightforward to raise the maximum.
*/
@@ -331,161 +328,197 @@ EXPORT_SYMBOL(idr_replace);
/*
* Developer's notes:
*
- * The IDA uses the functionality provided by the IDR & radix tree to store
- * bitmaps in each entry. The IDR_FREE tag means there is at least one bit
- * free, unlike the IDR where it means at least one entry is free.
+ * The IDA uses the functionality provided by the XArray to store bitmaps in
+ * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap
+ * have been set.
*
- * I considered telling the radix tree that each slot is an order-10 node
- * and storing the bit numbers in the radix tree, but the radix tree can't
- * allow a single multiorder entry at index 0, which would significantly
- * increase memory consumption for the IDA. So instead we divide the index
- * by the number of bits in the leaf bitmap before doing a radix tree lookup.
+ * I considered telling the XArray that each slot is an order-10 node
+ * and indexing by bit number, but the XArray can't allow a single multi-index
+ * entry in the head, which would significantly increase memory consumption
+ * for the IDA. So instead we divide the index by the number of bits in the
+ * leaf bitmap before doing a radix tree lookup.
*
* As an optimisation, if there are only a few low bits set in any given
- * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional
- * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits
- * directly in the entry. By being really tricksy, we could store
- * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising
- * for 0-3 allocated IDs.
- *
- * We allow the radix tree 'exceptional' count to get out of date. Nothing
- * in the IDA nor the radix tree code checks it. If it becomes important
- * to maintain an accurate exceptional count, switch the rcu_assign_pointer()
- * calls to radix_tree_iter_replace() which will correct the exceptional
- * count.
- *
- * The IDA always requires a lock to alloc/free. If we add a 'test_bit'
+ * leaf, instead of allocating a 128-byte bitmap, we store the bits
+ * as a value entry. Value entries never have the XA_FREE_MARK cleared
+ * because we can always convert them into a bitmap entry.
+ *
+ * It would be possible to optimise further; once we've run out of a
+ * single 128-byte bitmap, we currently switch to a 576-byte node, put
+ * the 128-byte bitmap in the first entry and then start allocating extra
+ * 128-byte entries. We could instead use the 512 bytes of the node's
+ * data as a bitmap before moving to that scheme. I do not believe this
+ * is a worthwhile optimisation; Rasmus Villemoes surveyed the current
+ * users of the IDA and almost none of them use more than 1024 entries.
+ * Those that do use more than the 8192 IDs that the 512 bytes would
+ * provide.
+ *
+ * The IDA always uses a lock to alloc/free. If we add a 'test_bit'
* equivalent, it will still need locking. Going to RCU lookup would require
* using RCU to free bitmaps, and that's not trivial without embedding an
* RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
* bitmap, which is excessive.
*/
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
-
-static int ida_get_new_above(struct ida *ida, int start)
+/**
+ * ida_alloc_range() - Allocate an unused ID.
+ * @ida: IDA handle.
+ * @min: Lowest ID to allocate.
+ * @max: Highest ID to allocate.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocate an ID between @min and @max, inclusive. The allocated ID will
+ * not exceed %INT_MAX, even if @max is larger.
+ *
+ * Context: Any context.
+ * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
+ * or %-ENOSPC if there are no free IDs.
+ */
+int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
+ gfp_t gfp)
{
- struct radix_tree_root *root = &ida->ida_rt;
- void __rcu **slot;
- struct radix_tree_iter iter;
- struct ida_bitmap *bitmap;
- unsigned long index;
- unsigned bit, ebit;
- int new;
-
- index = start / IDA_BITMAP_BITS;
- bit = start % IDA_BITMAP_BITS;
- ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT;
-
- slot = radix_tree_iter_init(&iter, index);
- for (;;) {
- if (slot)
- slot = radix_tree_next_slot(slot, &iter,
- RADIX_TREE_ITER_TAGGED);
- if (!slot) {
- slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX);
- if (IS_ERR(slot)) {
- if (slot == ERR_PTR(-ENOMEM))
- return -EAGAIN;
- return PTR_ERR(slot);
+ XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
+ unsigned bit = min % IDA_BITMAP_BITS;
+ unsigned long flags;
+ struct ida_bitmap *bitmap, *alloc = NULL;
+
+ if ((int)min < 0)
+ return -ENOSPC;
+
+ if ((int)max < 0)
+ max = INT_MAX;
+
+retry:
+ xas_lock_irqsave(&xas, flags);
+next:
+ bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
+ if (xas.xa_index > min / IDA_BITMAP_BITS)
+ bit = 0;
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+
+ if (xa_is_value(bitmap)) {
+ unsigned long tmp = xa_to_value(bitmap);
+
+ if (bit < BITS_PER_XA_VALUE) {
+ bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+ if (bit < BITS_PER_XA_VALUE) {
+ tmp |= 1UL << bit;
+ xas_store(&xas, xa_mk_value(tmp));
+ goto out;
}
}
- if (iter.index > index) {
- bit = 0;
- ebit = RADIX_TREE_EXCEPTIONAL_SHIFT;
- }
- new = iter.index * IDA_BITMAP_BITS;
- bitmap = rcu_dereference_raw(*slot);
- if (radix_tree_exception(bitmap)) {
- unsigned long tmp = (unsigned long)bitmap;
- ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit);
- if (ebit < BITS_PER_LONG) {
- tmp |= 1UL << ebit;
- rcu_assign_pointer(*slot, (void *)tmp);
- return new + ebit -
- RADIX_TREE_EXCEPTIONAL_SHIFT;
- }
- bitmap = this_cpu_xchg(ida_bitmap, NULL);
- if (!bitmap)
- return -EAGAIN;
- bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
- rcu_assign_pointer(*slot, bitmap);
+ bitmap = alloc;
+ if (!bitmap)
+ bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+ if (!bitmap)
+ goto alloc;
+ bitmap->bitmap[0] = tmp;
+ xas_store(&xas, bitmap);
+ if (xas_error(&xas)) {
+ bitmap->bitmap[0] = 0;
+ goto out;
}
+ }
- if (bitmap) {
- bit = find_next_zero_bit(bitmap->bitmap,
- IDA_BITMAP_BITS, bit);
- new += bit;
- if (new < 0)
- return -ENOSPC;
- if (bit == IDA_BITMAP_BITS)
- continue;
+ if (bitmap) {
+ bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
+ if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
+ goto nospc;
+ if (bit == IDA_BITMAP_BITS)
+ goto next;
- __set_bit(bit, bitmap->bitmap);
- if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
- radix_tree_iter_tag_clear(root, &iter,
- IDR_FREE);
+ __set_bit(bit, bitmap->bitmap);
+ if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ } else {
+ if (bit < BITS_PER_XA_VALUE) {
+ bitmap = xa_mk_value(1UL << bit);
} else {
- new += bit;
- if (new < 0)
- return -ENOSPC;
- if (ebit < BITS_PER_LONG) {
- bitmap = (void *)((1UL << ebit) |
- RADIX_TREE_EXCEPTIONAL_ENTRY);
- radix_tree_iter_replace(root, &iter, slot,
- bitmap);
- return new;
- }
- bitmap = this_cpu_xchg(ida_bitmap, NULL);
+ bitmap = alloc;
if (!bitmap)
- return -EAGAIN;
+ bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
+ if (!bitmap)
+ goto alloc;
__set_bit(bit, bitmap->bitmap);
- radix_tree_iter_replace(root, &iter, slot, bitmap);
}
-
- return new;
+ xas_store(&xas, bitmap);
+ }
+out:
+ xas_unlock_irqrestore(&xas, flags);
+ if (xas_nomem(&xas, gfp)) {
+ xas.xa_index = min / IDA_BITMAP_BITS;
+ bit = min % IDA_BITMAP_BITS;
+ goto retry;
}
+ if (bitmap != alloc)
+ kfree(alloc);
+ if (xas_error(&xas))
+ return xas_error(&xas);
+ return xas.xa_index * IDA_BITMAP_BITS + bit;
+alloc:
+ xas_unlock_irqrestore(&xas, flags);
+ alloc = kzalloc(sizeof(*bitmap), gfp);
+ if (!alloc)
+ return -ENOMEM;
+ xas_set(&xas, min / IDA_BITMAP_BITS);
+ bit = min % IDA_BITMAP_BITS;
+ goto retry;
+nospc:
+ xas_unlock_irqrestore(&xas, flags);
+ return -ENOSPC;
}
+EXPORT_SYMBOL(ida_alloc_range);
-static void ida_remove(struct ida *ida, int id)
+/**
+ * ida_free() - Release an allocated ID.
+ * @ida: IDA handle.
+ * @id: Previously allocated ID.
+ *
+ * Context: Any context.
+ */
+void ida_free(struct ida *ida, unsigned int id)
{
- unsigned long index = id / IDA_BITMAP_BITS;
- unsigned offset = id % IDA_BITMAP_BITS;
+ XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
+ unsigned bit = id % IDA_BITMAP_BITS;
struct ida_bitmap *bitmap;
- unsigned long *btmp;
- struct radix_tree_iter iter;
- void __rcu **slot;
+ unsigned long flags;
- slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index);
- if (!slot)
- goto err;
+ BUG_ON((int)id < 0);
+
+ xas_lock_irqsave(&xas, flags);
+ bitmap = xas_load(&xas);
- bitmap = rcu_dereference_raw(*slot);
- if (radix_tree_exception(bitmap)) {
- btmp = (unsigned long *)slot;
- offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
- if (offset >= BITS_PER_LONG)
+ if (xa_is_value(bitmap)) {
+ unsigned long v = xa_to_value(bitmap);
+ if (bit >= BITS_PER_XA_VALUE)
goto err;
+ if (!(v & (1UL << bit)))
+ goto err;
+ v &= ~(1UL << bit);
+ if (!v)
+ goto delete;
+ xas_store(&xas, xa_mk_value(v));
} else {
- btmp = bitmap->bitmap;
- }
- if (!test_bit(offset, btmp))
- goto err;
-
- __clear_bit(offset, btmp);
- radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE);
- if (radix_tree_exception(bitmap)) {
- if (rcu_dereference_raw(*slot) ==
- (void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
- } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
- kfree(bitmap);
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+ if (!test_bit(bit, bitmap->bitmap))
+ goto err;
+ __clear_bit(bit, bitmap->bitmap);
+ xas_set_mark(&xas, XA_FREE_MARK);
+ if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
+ kfree(bitmap);
+delete:
+ xas_store(&xas, NULL);
+ }
}
+ xas_unlock_irqrestore(&xas, flags);
return;
err:
+ xas_unlock_irqrestore(&xas, flags);
WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
}
+EXPORT_SYMBOL(ida_free);
/**
* ida_destroy() - Free all IDs.
@@ -500,80 +533,60 @@ static void ida_remove(struct ida *ida, int id)
*/
void ida_destroy(struct ida *ida)
{
+ XA_STATE(xas, &ida->xa, 0);
+ struct ida_bitmap *bitmap;
unsigned long flags;
- struct radix_tree_iter iter;
- void __rcu **slot;
- xa_lock_irqsave(&ida->ida_rt, flags);
- radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) {
- struct ida_bitmap *bitmap = rcu_dereference_raw(*slot);
- if (!radix_tree_exception(bitmap))
+ xas_lock_irqsave(&xas, flags);
+ xas_for_each(&xas, bitmap, ULONG_MAX) {
+ if (!xa_is_value(bitmap))
kfree(bitmap);
- radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
+ xas_store(&xas, NULL);
}
- xa_unlock_irqrestore(&ida->ida_rt, flags);
+ xas_unlock_irqrestore(&xas, flags);
}
EXPORT_SYMBOL(ida_destroy);
-/**
- * ida_alloc_range() - Allocate an unused ID.
- * @ida: IDA handle.
- * @min: Lowest ID to allocate.
- * @max: Highest ID to allocate.
- * @gfp: Memory allocation flags.
- *
- * Allocate an ID between @min and @max, inclusive. The allocated ID will
- * not exceed %INT_MAX, even if @max is larger.
- *
- * Context: Any context.
- * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
- * or %-ENOSPC if there are no free IDs.
- */
-int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
- gfp_t gfp)
-{
- int id = 0;
- unsigned long flags;
+#ifndef __KERNEL__
+extern void xa_dump_index(unsigned long index, unsigned int shift);
+#define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS)
- if ((int)min < 0)
- return -ENOSPC;
-
- if ((int)max < 0)
- max = INT_MAX;
-
-again:
- xa_lock_irqsave(&ida->ida_rt, flags);
- id = ida_get_new_above(ida, min);
- if (id > (int)max) {
- ida_remove(ida, id);
- id = -ENOSPC;
- }
- xa_unlock_irqrestore(&ida->ida_rt, flags);
+static void ida_dump_entry(void *entry, unsigned long index)
+{
+ unsigned long i;
+
+ if (!entry)
+ return;
+
+ if (xa_is_node(entry)) {
+ struct xa_node *node = xa_to_node(entry);
+ unsigned int shift = node->shift + IDA_CHUNK_SHIFT +
+ XA_CHUNK_SHIFT;
+
+ xa_dump_index(index * IDA_BITMAP_BITS, shift);
+ xa_dump_node(node);
+ for (i = 0; i < XA_CHUNK_SIZE; i++)
+ ida_dump_entry(node->slots[i],
+ index | (i << node->shift));
+ } else if (xa_is_value(entry)) {
+ xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG));
+ pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry);
+ } else {
+ struct ida_bitmap *bitmap = entry;
- if (unlikely(id == -EAGAIN)) {
- if (!ida_pre_get(ida, gfp))
- return -ENOMEM;
- goto again;
+ xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT);
+ pr_cont("bitmap: %p data", bitmap);
+ for (i = 0; i < IDA_BITMAP_LONGS; i++)
+ pr_cont(" %lx", bitmap->bitmap[i]);
+ pr_cont("\n");
}
-
- return id;
}
-EXPORT_SYMBOL(ida_alloc_range);
-/**
- * ida_free() - Release an allocated ID.
- * @ida: IDA handle.
- * @id: Previously allocated ID.
- *
- * Context: Any context.
- */
-void ida_free(struct ida *ida, unsigned int id)
+static void ida_dump(struct ida *ida)
{
- unsigned long flags;
-
- BUG_ON((int)id < 0);
- xa_lock_irqsave(&ida->ida_rt, flags);
- ida_remove(ida, id);
- xa_unlock_irqrestore(&ida->ida_rt, flags);
+ struct xarray *xa = &ida->xa;
+ pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head,
+ xa->xa_flags >> ROOT_TAG_SHIFT);
+ ida_dump_entry(xa->xa_head, 0);
}
-EXPORT_SYMBOL(ida_free);
+#endif
diff --git a/lib/memcat_p.c b/lib/memcat_p.c
new file mode 100644
index 000000000000..b810fbc66962
--- /dev/null
+++ b/lib/memcat_p.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/slab.h>
+
+/*
+ * Merge two NULL-terminated pointer arrays into a newly allocated
+ * array, which is also NULL-terminated. Nomenclature is inspired by
+ * memset_p() and memcat() found elsewhere in the kernel source tree.
+ */
+void **__memcat_p(void **a, void **b)
+{
+ void **p = a, **new;
+ int nr;
+
+ /* count the elements in both arrays */
+ for (nr = 0, p = a; *p; nr++, p++)
+ ;
+ for (p = b; *p; nr++, p++)
+ ;
+ /* one for the NULL-terminator */
+ nr++;
+
+ new = kmalloc_array(nr, sizeof(void *), GFP_KERNEL);
+ if (!new)
+ return NULL;
+
+ /* nr -> last index; p points to NULL in b[] */
+ for (nr--; nr >= 0; nr--, p = p == b ? &a[nr] : p - 1)
+ new[nr] = *p;
+
+ return new;
+}
+EXPORT_SYMBOL_GPL(__memcat_p);
+
diff --git a/lib/nlattr.c b/lib/nlattr.c
index e335bcafa9e4..d26de6156b97 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -45,12 +45,11 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
};
static int validate_nla_bitfield32(const struct nlattr *nla,
- u32 *valid_flags_allowed)
+ const u32 *valid_flags_mask)
{
const struct nla_bitfield32 *bf = nla_data(nla);
- u32 *valid_flags_mask = valid_flags_allowed;
- if (!valid_flags_allowed)
+ if (!valid_flags_mask)
return -EINVAL;
/*disallow invalid bit selector */
@@ -68,11 +67,99 @@ static int validate_nla_bitfield32(const struct nlattr *nla,
return 0;
}
+static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ const struct nlattr *entry;
+ int rem;
+
+ nla_for_each_attr(entry, head, len, rem) {
+ int ret;
+
+ if (nla_len(entry) == 0)
+ continue;
+
+ if (nla_len(entry) < NLA_HDRLEN) {
+ NL_SET_ERR_MSG_ATTR(extack, entry,
+ "Array element too short");
+ return -ERANGE;
+ }
+
+ ret = nla_validate(nla_data(entry), nla_len(entry),
+ maxtype, policy, extack);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int nla_validate_int_range(const struct nla_policy *pt,
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack)
+{
+ bool validate_min, validate_max;
+ s64 value;
+
+ validate_min = pt->validation_type == NLA_VALIDATE_RANGE ||
+ pt->validation_type == NLA_VALIDATE_MIN;
+ validate_max = pt->validation_type == NLA_VALIDATE_RANGE ||
+ pt->validation_type == NLA_VALIDATE_MAX;
+
+ switch (pt->type) {
+ case NLA_U8:
+ value = nla_get_u8(nla);
+ break;
+ case NLA_U16:
+ value = nla_get_u16(nla);
+ break;
+ case NLA_U32:
+ value = nla_get_u32(nla);
+ break;
+ case NLA_S8:
+ value = nla_get_s8(nla);
+ break;
+ case NLA_S16:
+ value = nla_get_s16(nla);
+ break;
+ case NLA_S32:
+ value = nla_get_s32(nla);
+ break;
+ case NLA_S64:
+ value = nla_get_s64(nla);
+ break;
+ case NLA_U64:
+ /* treat this one specially, since it may not fit into s64 */
+ if ((validate_min && nla_get_u64(nla) < pt->min) ||
+ (validate_max && nla_get_u64(nla) > pt->max)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "integer out of range");
+ return -ERANGE;
+ }
+ return 0;
+ default:
+ WARN_ON(1);
+ return -EINVAL;
+ }
+
+ if ((validate_min && value < pt->min) ||
+ (validate_max && value > pt->max)) {
+ NL_SET_ERR_MSG_ATTR(extack, nla,
+ "integer out of range");
+ return -ERANGE;
+ }
+
+ return 0;
+}
+
static int validate_nla(const struct nlattr *nla, int maxtype,
- const struct nla_policy *policy)
+ const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
{
const struct nla_policy *pt;
int minlen = 0, attrlen = nla_len(nla), type = nla_type(nla);
+ int err = -ERANGE;
if (type <= 0 || type > maxtype)
return 0;
@@ -81,22 +168,40 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
BUG_ON(pt->type > NLA_TYPE_MAX);
- if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) {
+ if ((nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) ||
+ (pt->type == NLA_EXACT_LEN_WARN && attrlen != pt->len)) {
pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
current->comm, type);
}
switch (pt->type) {
+ case NLA_EXACT_LEN:
+ if (attrlen != pt->len)
+ goto out_err;
+ break;
+
+ case NLA_REJECT:
+ if (extack && pt->validation_data) {
+ NL_SET_BAD_ATTR(extack, nla);
+ extack->_msg = pt->validation_data;
+ return -EINVAL;
+ }
+ err = -EINVAL;
+ goto out_err;
+
case NLA_FLAG:
if (attrlen > 0)
- return -ERANGE;
+ goto out_err;
break;
case NLA_BITFIELD32:
if (attrlen != sizeof(struct nla_bitfield32))
- return -ERANGE;
+ goto out_err;
- return validate_nla_bitfield32(nla, pt->validation_data);
+ err = validate_nla_bitfield32(nla, pt->validation_data);
+ if (err)
+ goto out_err;
+ break;
case NLA_NUL_STRING:
if (pt->len)
@@ -104,13 +209,15 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
else
minlen = attrlen;
- if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL)
- return -EINVAL;
+ if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) {
+ err = -EINVAL;
+ goto out_err;
+ }
/* fall through */
case NLA_STRING:
if (attrlen < 1)
- return -ERANGE;
+ goto out_err;
if (pt->len) {
char *buf = nla_data(nla);
@@ -119,32 +226,58 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
attrlen--;
if (attrlen > pt->len)
- return -ERANGE;
+ goto out_err;
}
break;
case NLA_BINARY:
if (pt->len && attrlen > pt->len)
- return -ERANGE;
+ goto out_err;
break;
- case NLA_NESTED_COMPAT:
- if (attrlen < pt->len)
- return -ERANGE;
- if (attrlen < NLA_ALIGN(pt->len))
- break;
- if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN)
- return -ERANGE;
- nla = nla_data(nla) + NLA_ALIGN(pt->len);
- if (attrlen < NLA_ALIGN(pt->len) + NLA_HDRLEN + nla_len(nla))
- return -ERANGE;
- break;
case NLA_NESTED:
/* a nested attributes is allowed to be empty; if its not,
* it must have a size of at least NLA_HDRLEN.
*/
if (attrlen == 0)
break;
+ if (attrlen < NLA_HDRLEN)
+ goto out_err;
+ if (pt->validation_data) {
+ err = nla_validate(nla_data(nla), nla_len(nla), pt->len,
+ pt->validation_data, extack);
+ if (err < 0) {
+ /*
+ * return directly to preserve the inner
+ * error message/attribute pointer
+ */
+ return err;
+ }
+ }
+ break;
+ case NLA_NESTED_ARRAY:
+ /* a nested array attribute is allowed to be empty; if its not,
+ * it must have a size of at least NLA_HDRLEN.
+ */
+ if (attrlen == 0)
+ break;
+ if (attrlen < NLA_HDRLEN)
+ goto out_err;
+ if (pt->validation_data) {
+ int err;
+
+ err = nla_validate_array(nla_data(nla), nla_len(nla),
+ pt->len, pt->validation_data,
+ extack);
+ if (err < 0) {
+ /*
+ * return directly to preserve the inner
+ * error message/attribute pointer
+ */
+ return err;
+ }
+ }
+ break;
default:
if (pt->len)
minlen = pt->len;
@@ -152,10 +285,34 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
minlen = nla_attr_minlen[pt->type];
if (attrlen < minlen)
- return -ERANGE;
+ goto out_err;
+ }
+
+ /* further validation */
+ switch (pt->validation_type) {
+ case NLA_VALIDATE_NONE:
+ /* nothing to do */
+ break;
+ case NLA_VALIDATE_RANGE:
+ case NLA_VALIDATE_MIN:
+ case NLA_VALIDATE_MAX:
+ err = nla_validate_int_range(pt, nla, extack);
+ if (err)
+ return err;
+ break;
+ case NLA_VALIDATE_FUNCTION:
+ if (pt->validate) {
+ err = pt->validate(nla, extack);
+ if (err)
+ return err;
+ }
+ break;
}
return 0;
+out_err:
+ NL_SET_ERR_MSG_ATTR(extack, nla, "Attribute failed policy validation");
+ return err;
}
/**
@@ -180,13 +337,10 @@ int nla_validate(const struct nlattr *head, int len, int maxtype,
int rem;
nla_for_each_attr(nla, head, len, rem) {
- int err = validate_nla(nla, maxtype, policy);
+ int err = validate_nla(nla, maxtype, policy, extack);
- if (err < 0) {
- if (extack)
- extack->bad_attr = nla;
+ if (err < 0)
return err;
- }
}
return 0;
@@ -237,42 +391,63 @@ EXPORT_SYMBOL(nla_policy_len);
*
* Returns 0 on success or a negative error code.
*/
-int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
- int len, const struct nla_policy *policy,
- struct netlink_ext_ack *extack)
+static int __nla_parse(struct nlattr **tb, int maxtype,
+ const struct nlattr *head, int len,
+ bool strict, const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
{
const struct nlattr *nla;
- int rem, err;
+ int rem;
memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
nla_for_each_attr(nla, head, len, rem) {
u16 type = nla_type(nla);
- if (type > 0 && type <= maxtype) {
- if (policy) {
- err = validate_nla(nla, maxtype, policy);
- if (err < 0) {
- NL_SET_ERR_MSG_ATTR(extack, nla,
- "Attribute failed policy validation");
- goto errout;
- }
+ if (type == 0 || type > maxtype) {
+ if (strict) {
+ NL_SET_ERR_MSG(extack, "Unknown attribute type");
+ return -EINVAL;
}
+ continue;
+ }
+ if (policy) {
+ int err = validate_nla(nla, maxtype, policy, extack);
- tb[type] = (struct nlattr *)nla;
+ if (err < 0)
+ return err;
}
+
+ tb[type] = (struct nlattr *)nla;
}
- if (unlikely(rem > 0))
+ if (unlikely(rem > 0)) {
pr_warn_ratelimited("netlink: %d bytes leftover after parsing attributes in process `%s'.\n",
rem, current->comm);
+ NL_SET_ERR_MSG(extack, "bytes leftover after parsing attributes");
+ if (strict)
+ return -EINVAL;
+ }
- err = 0;
-errout:
- return err;
+ return 0;
+}
+
+int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head,
+ int len, const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_parse(tb, maxtype, head, len, false, policy, extack);
}
EXPORT_SYMBOL(nla_parse);
+int nla_parse_strict(struct nlattr **tb, int maxtype, const struct nlattr *head,
+ int len, const struct nla_policy *policy,
+ struct netlink_ext_ack *extack)
+{
+ return __nla_parse(tb, maxtype, head, len, true, policy, extack);
+}
+EXPORT_SYMBOL(nla_parse_strict);
+
/**
* nla_find - Find a specific attribute in a stream of attributes
* @head: head of attribute stream
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 9f96fa7bc000..de10b8c0bff6 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -356,11 +356,35 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
*/
void percpu_ref_reinit(struct percpu_ref *ref)
{
+ WARN_ON_ONCE(!percpu_ref_is_zero(ref));
+
+ percpu_ref_resurrect(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+
+/**
+ * percpu_ref_resurrect - modify a percpu refcount from dead to live
+ * @ref: perpcu_ref to resurrect
+ *
+ * Modify @ref so that it's in the same state as before percpu_ref_kill() was
+ * called. @ref must be dead but must not yet have exited.
+ *
+ * If @ref->release() frees @ref then the caller is responsible for
+ * guaranteeing that @ref->release() does not get called while this
+ * function is in progress.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_resurrect(struct percpu_ref *ref)
+{
+ unsigned long __percpu *percpu_count;
unsigned long flags;
spin_lock_irqsave(&percpu_ref_switch_lock, flags);
- WARN_ON_ONCE(!percpu_ref_is_zero(ref));
+ WARN_ON_ONCE(!(ref->percpu_count_ptr & __PERCPU_REF_DEAD));
+ WARN_ON_ONCE(__ref_is_percpu(ref, &percpu_count));
ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
percpu_ref_get(ref);
@@ -368,4 +392,4 @@ void percpu_ref_reinit(struct percpu_ref *ref)
spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
}
-EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+EXPORT_SYMBOL_GPL(percpu_ref_resurrect);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index bc03ecc4dfd2..1106bb6aa01e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,15 +38,13 @@
#include <linux/rcupdate.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/xarray.h>
-/* Number of nodes in fully populated tree of given height */
-static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
-
/*
* Radix tree node cache.
*/
-static struct kmem_cache *radix_tree_node_cachep;
+struct kmem_cache *radix_tree_node_cachep;
/*
* The radix tree is variable-height, so an insert operation not only has
@@ -98,24 +96,7 @@ static inline void *node_to_entry(void *ptr)
return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
}
-#define RADIX_TREE_RETRY node_to_entry(NULL)
-
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/* Sibling slots point directly to another slot in the same node */
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
- void __rcu **ptr = node;
- return (parent->slots <= ptr) &&
- (ptr < parent->slots + RADIX_TREE_MAP_SIZE);
-}
-#else
-static inline
-bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
-{
- return false;
-}
-#endif
+#define RADIX_TREE_RETRY XA_RETRY_ENTRY
static inline unsigned long
get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
@@ -129,24 +110,13 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
- if (radix_tree_is_internal_node(entry)) {
- if (is_sibling_entry(parent, entry)) {
- void __rcu **sibentry;
- sibentry = (void __rcu **) entry_to_node(entry);
- offset = get_slot_offset(parent, sibentry);
- entry = rcu_dereference_raw(*sibentry);
- }
- }
-#endif
-
*nodep = (void *)entry;
return offset;
}
static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
{
- return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
+ return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
}
static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -169,32 +139,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
{
- root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
+ root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
}
static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
{
- root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
+ root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
}
static inline void root_tag_clear_all(struct radix_tree_root *root)
{
- root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1;
+ root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
}
static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
{
- return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT));
+ return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
}
static inline unsigned root_tags_get(const struct radix_tree_root *root)
{
- return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT;
+ return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
}
static inline bool is_idr(const struct radix_tree_root *root)
{
- return !!(root->gfp_mask & ROOT_IS_IDR);
+ return !!(root->xa_flags & ROOT_IS_IDR);
}
/*
@@ -254,7 +224,7 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
static unsigned int iter_offset(const struct radix_tree_iter *iter)
{
- return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK;
+ return iter->index & RADIX_TREE_MAP_MASK;
}
/*
@@ -277,99 +247,6 @@ static unsigned long next_index(unsigned long index,
return (index & ~node_maxindex(node)) + (offset << node->shift);
}
-#ifndef __KERNEL__
-static void dump_node(struct radix_tree_node *node, unsigned long index)
-{
- unsigned long i;
-
- pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n",
- node, node->offset, index, index | node_maxindex(node),
- node->parent,
- node->tags[0][0], node->tags[1][0], node->tags[2][0],
- node->shift, node->count, node->exceptional);
-
- for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
- unsigned long first = index | (i << node->shift);
- unsigned long last = first | ((1UL << node->shift) - 1);
- void *entry = node->slots[i];
- if (!entry)
- continue;
- if (entry == RADIX_TREE_RETRY) {
- pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n",
- i, first, last, node);
- } else if (!radix_tree_is_internal_node(entry)) {
- pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
- entry, i, first, last, node);
- } else if (is_sibling_entry(node, entry)) {
- pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
- entry, i, first, last, node,
- *(void **)entry_to_node(entry));
- } else {
- dump_node(entry_to_node(entry), first);
- }
- }
-}
-
-/* For debug */
-static void radix_tree_dump(struct radix_tree_root *root)
-{
- pr_debug("radix root: %p rnode %p tags %x\n",
- root, root->rnode,
- root->gfp_mask >> ROOT_TAG_SHIFT);
- if (!radix_tree_is_internal_node(root->rnode))
- return;
- dump_node(entry_to_node(root->rnode), 0);
-}
-
-static void dump_ida_node(void *entry, unsigned long index)
-{
- unsigned long i;
-
- if (!entry)
- return;
-
- if (radix_tree_is_internal_node(entry)) {
- struct radix_tree_node *node = entry_to_node(entry);
-
- pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
- node, node->offset, index * IDA_BITMAP_BITS,
- ((index | node_maxindex(node)) + 1) *
- IDA_BITMAP_BITS - 1,
- node->parent, node->tags[0][0], node->shift,
- node->count);
- for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
- dump_ida_node(node->slots[i],
- index | (i << node->shift));
- } else if (radix_tree_exceptional_entry(entry)) {
- pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
- entry, (int)(index & RADIX_TREE_MAP_MASK),
- index * IDA_BITMAP_BITS,
- index * IDA_BITMAP_BITS + BITS_PER_LONG -
- RADIX_TREE_EXCEPTIONAL_SHIFT,
- (unsigned long)entry >>
- RADIX_TREE_EXCEPTIONAL_SHIFT);
- } else {
- struct ida_bitmap *bitmap = entry;
-
- pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
- (int)(index & RADIX_TREE_MAP_MASK),
- index * IDA_BITMAP_BITS,
- (index + 1) * IDA_BITMAP_BITS - 1);
- for (i = 0; i < IDA_BITMAP_LONGS; i++)
- pr_cont(" %lx", bitmap->bitmap[i]);
- pr_cont("\n");
- }
-}
-
-static void ida_dump(struct ida *ida)
-{
- struct radix_tree_root *root = &ida->ida_rt;
- pr_debug("ida: %p node %p free %d\n", ida, root->rnode,
- root->gfp_mask >> ROOT_TAG_SHIFT);
- dump_ida_node(root->rnode, 0);
-}
-#endif
-
/*
* This assumes that the caller has performed appropriate preallocation, and
* that the caller has pinned this thread of control to the current CPU.
@@ -378,7 +255,7 @@ static struct radix_tree_node *
radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
struct radix_tree_root *root,
unsigned int shift, unsigned int offset,
- unsigned int count, unsigned int exceptional)
+ unsigned int count, unsigned int nr_values)
{
struct radix_tree_node *ret = NULL;
@@ -425,14 +302,14 @@ out:
ret->shift = shift;
ret->offset = offset;
ret->count = count;
- ret->exceptional = exceptional;
+ ret->nr_values = nr_values;
ret->parent = parent;
- ret->root = root;
+ ret->array = root;
}
return ret;
}
-static void radix_tree_node_rcu_free(struct rcu_head *head)
+void radix_tree_node_rcu_free(struct rcu_head *head)
{
struct radix_tree_node *node =
container_of(head, struct radix_tree_node, rcu_head);
@@ -530,77 +407,10 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
}
EXPORT_SYMBOL(radix_tree_maybe_preload);
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/*
- * Preload with enough objects to ensure that we can split a single entry
- * of order @old_order into many entries of size @new_order
- */
-int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
- gfp_t gfp_mask)
-{
- unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
- unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
- (new_order / RADIX_TREE_MAP_SHIFT);
- unsigned nr = 0;
-
- WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
- BUG_ON(new_order >= old_order);
-
- while (layers--)
- nr = nr * RADIX_TREE_MAP_SIZE + 1;
- return __radix_tree_preload(gfp_mask, top * nr);
-}
-#endif
-
-/*
- * The same as function above, but preload number of nodes required to insert
- * (1 << order) continuous naturally-aligned elements.
- */
-int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
-{
- unsigned long nr_subtrees;
- int nr_nodes, subtree_height;
-
- /* Preloading doesn't help anything with this gfp mask, skip it */
- if (!gfpflags_allow_blocking(gfp_mask)) {
- preempt_disable();
- return 0;
- }
-
- /*
- * Calculate number and height of fully populated subtrees it takes to
- * store (1 << order) elements.
- */
- nr_subtrees = 1 << order;
- for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
- subtree_height++)
- nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
-
- /*
- * The worst case is zero height tree with a single item at index 0 and
- * then inserting items starting at ULONG_MAX - (1 << order).
- *
- * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
- * 0-index item.
- */
- nr_nodes = RADIX_TREE_MAX_PATH;
-
- /* Plus branch to fully populated subtrees. */
- nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
-
- /* Root node is shared. */
- nr_nodes--;
-
- /* Plus nodes required to build subtrees. */
- nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
-
- return __radix_tree_preload(gfp_mask, nr_nodes);
-}
-
static unsigned radix_tree_load_root(const struct radix_tree_root *root,
struct radix_tree_node **nodep, unsigned long *maxindex)
{
- struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+ struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
*nodep = node;
@@ -629,7 +439,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
while (index > shift_maxindex(maxshift))
maxshift += RADIX_TREE_MAP_SHIFT;
- entry = rcu_dereference_raw(root->rnode);
+ entry = rcu_dereference_raw(root->xa_head);
if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
goto out;
@@ -656,9 +466,9 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
BUG_ON(shift > BITS_PER_LONG);
if (radix_tree_is_internal_node(entry)) {
entry_to_node(entry)->parent = node;
- } else if (radix_tree_exceptional_entry(entry)) {
- /* Moving an exceptional root->rnode to a node */
- node->exceptional = 1;
+ } else if (xa_is_value(entry)) {
+ /* Moving a value entry root->xa_head to a node */
+ node->nr_values = 1;
}
/*
* entry was already in the radix tree, so we do not need
@@ -666,7 +476,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
*/
node->slots[0] = (void __rcu *)entry;
entry = node_to_entry(node);
- rcu_assign_pointer(root->rnode, entry);
+ rcu_assign_pointer(root->xa_head, entry);
shift += RADIX_TREE_MAP_SHIFT;
} while (shift <= maxshift);
out:
@@ -677,13 +487,12 @@ out:
* radix_tree_shrink - shrink radix tree to minimum height
* @root radix tree root
*/
-static inline bool radix_tree_shrink(struct radix_tree_root *root,
- radix_tree_update_node_t update_node)
+static inline bool radix_tree_shrink(struct radix_tree_root *root)
{
bool shrunk = false;
for (;;) {
- struct radix_tree_node *node = rcu_dereference_raw(root->rnode);
+ struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
struct radix_tree_node *child;
if (!radix_tree_is_internal_node(node))
@@ -692,15 +501,20 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
/*
* The candidate node has more than one child, or its child
- * is not at the leftmost slot, or the child is a multiorder
- * entry, we cannot shrink.
+ * is not at the leftmost slot, we cannot shrink.
*/
if (node->count != 1)
break;
child = rcu_dereference_raw(node->slots[0]);
if (!child)
break;
- if (!radix_tree_is_internal_node(child) && node->shift)
+
+ /*
+ * For an IDR, we must not shrink entry 0 into the root in
+ * case somebody calls idr_replace() with a pointer that
+ * appears to be an internal entry
+ */
+ if (!node->shift && is_idr(root))
break;
if (radix_tree_is_internal_node(child))
@@ -711,9 +525,9 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
* moving the node from one part of the tree to another: if it
* was safe to dereference the old pointer to it
* (node->slots[0]), it will be safe to dereference the new
- * one (root->rnode) as far as dependent read barriers go.
+ * one (root->xa_head) as far as dependent read barriers go.
*/
- root->rnode = (void __rcu *)child;
+ root->xa_head = (void __rcu *)child;
if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
root_tag_clear(root, IDR_FREE);
@@ -738,8 +552,6 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
node->count = 0;
if (!radix_tree_is_internal_node(child)) {
node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
- if (update_node)
- update_node(node);
}
WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -751,8 +563,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
}
static bool delete_node(struct radix_tree_root *root,
- struct radix_tree_node *node,
- radix_tree_update_node_t update_node)
+ struct radix_tree_node *node)
{
bool deleted = false;
@@ -761,9 +572,8 @@ static bool delete_node(struct radix_tree_root *root,
if (node->count) {
if (node_to_entry(node) ==
- rcu_dereference_raw(root->rnode))
- deleted |= radix_tree_shrink(root,
- update_node);
+ rcu_dereference_raw(root->xa_head))
+ deleted |= radix_tree_shrink(root);
return deleted;
}
@@ -778,7 +588,7 @@ static bool delete_node(struct radix_tree_root *root,
*/
if (!is_idr(root))
root_tag_clear_all(root);
- root->rnode = NULL;
+ root->xa_head = NULL;
}
WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -795,7 +605,6 @@ static bool delete_node(struct radix_tree_root *root,
* __radix_tree_create - create a slot in a radix tree
* @root: radix tree root
* @index: index key
- * @order: index occupies 2^order aligned slots
* @nodep: returns node
* @slotp: returns slot
*
@@ -803,36 +612,34 @@ static bool delete_node(struct radix_tree_root *root,
* at position @index in the radix tree @root.
*
* Until there is more than one item in the tree, no nodes are
- * allocated and @root->rnode is used as a direct slot instead of
+ * allocated and @root->xa_head is used as a direct slot instead of
* pointing to a node, in which case *@nodep will be NULL.
*
* Returns -ENOMEM, or 0 for success.
*/
-int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
- unsigned order, struct radix_tree_node **nodep,
- void __rcu ***slotp)
+static int __radix_tree_create(struct radix_tree_root *root,
+ unsigned long index, struct radix_tree_node **nodep,
+ void __rcu ***slotp)
{
struct radix_tree_node *node = NULL, *child;
- void __rcu **slot = (void __rcu **)&root->rnode;
+ void __rcu **slot = (void __rcu **)&root->xa_head;
unsigned long maxindex;
unsigned int shift, offset = 0;
- unsigned long max = index | ((1UL << order) - 1);
+ unsigned long max = index;
gfp_t gfp = root_gfp_mask(root);
shift = radix_tree_load_root(root, &child, &maxindex);
/* Make sure the tree is high enough. */
- if (order > 0 && max == ((1UL << order) - 1))
- max++;
if (max > maxindex) {
int error = radix_tree_extend(root, gfp, max, shift);
if (error < 0)
return error;
shift = error;
- child = rcu_dereference_raw(root->rnode);
+ child = rcu_dereference_raw(root->xa_head);
}
- while (shift > order) {
+ while (shift > 0) {
shift -= RADIX_TREE_MAP_SHIFT;
if (child == NULL) {
/* Have to add a child node. */
@@ -875,8 +682,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
for (;;) {
void *entry = rcu_dereference_raw(child->slots[offset]);
- if (radix_tree_is_internal_node(entry) &&
- !is_sibling_entry(child, entry)) {
+ if (xa_is_node(entry) && child->shift) {
child = entry_to_node(entry);
offset = 0;
continue;
@@ -894,96 +700,30 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
}
}
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
static inline int insert_entries(struct radix_tree_node *node,
- void __rcu **slot, void *item, unsigned order, bool replace)
-{
- struct radix_tree_node *child;
- unsigned i, n, tag, offset, tags = 0;
-
- if (node) {
- if (order > node->shift)
- n = 1 << (order - node->shift);
- else
- n = 1;
- offset = get_slot_offset(node, slot);
- } else {
- n = 1;
- offset = 0;
- }
-
- if (n > 1) {
- offset = offset & ~(n - 1);
- slot = &node->slots[offset];
- }
- child = node_to_entry(slot);
-
- for (i = 0; i < n; i++) {
- if (slot[i]) {
- if (replace) {
- node->count--;
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tag_get(node, tag, offset + i))
- tags |= 1 << tag;
- } else
- return -EEXIST;
- }
- }
-
- for (i = 0; i < n; i++) {
- struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
- if (i) {
- rcu_assign_pointer(slot[i], child);
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_clear(node, tag, offset + i);
- } else {
- rcu_assign_pointer(slot[i], item);
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_set(node, tag, offset);
- }
- if (radix_tree_is_internal_node(old) &&
- !is_sibling_entry(node, old) &&
- (old != RADIX_TREE_RETRY))
- radix_tree_free_nodes(old);
- if (radix_tree_exceptional_entry(old))
- node->exceptional--;
- }
- if (node) {
- node->count += n;
- if (radix_tree_exceptional_entry(item))
- node->exceptional += n;
- }
- return n;
-}
-#else
-static inline int insert_entries(struct radix_tree_node *node,
- void __rcu **slot, void *item, unsigned order, bool replace)
+ void __rcu **slot, void *item, bool replace)
{
if (*slot)
return -EEXIST;
rcu_assign_pointer(*slot, item);
if (node) {
node->count++;
- if (radix_tree_exceptional_entry(item))
- node->exceptional++;
+ if (xa_is_value(item))
+ node->nr_values++;
}
return 1;
}
-#endif
/**
* __radix_tree_insert - insert into a radix tree
* @root: radix tree root
* @index: index key
- * @order: key covers the 2^order indices around index
* @item: item to insert
*
* Insert an item into the radix tree at position @index.
*/
-int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
- unsigned order, void *item)
+int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
+ void *item)
{
struct radix_tree_node *node;
void __rcu **slot;
@@ -991,11 +731,11 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
BUG_ON(radix_tree_is_internal_node(item));
- error = __radix_tree_create(root, index, order, &node, &slot);
+ error = __radix_tree_create(root, index, &node, &slot);
if (error)
return error;
- error = insert_entries(node, slot, item, order, false);
+ error = insert_entries(node, slot, item, false);
if (error < 0)
return error;
@@ -1010,7 +750,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
return 0;
}
-EXPORT_SYMBOL(__radix_tree_insert);
+EXPORT_SYMBOL(radix_tree_insert);
/**
* __radix_tree_lookup - lookup an item in a radix tree
@@ -1023,7 +763,7 @@ EXPORT_SYMBOL(__radix_tree_insert);
* tree @root.
*
* Until there is more than one item in the tree, no nodes are
- * allocated and @root->rnode is used as a direct slot instead of
+ * allocated and @root->xa_head is used as a direct slot instead of
* pointing to a node, in which case *@nodep will be NULL.
*/
void *__radix_tree_lookup(const struct radix_tree_root *root,
@@ -1036,7 +776,7 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
restart:
parent = NULL;
- slot = (void __rcu **)&root->rnode;
+ slot = (void __rcu **)&root->xa_head;
radix_tree_load_root(root, &node, &maxindex);
if (index > maxindex)
return NULL;
@@ -1049,6 +789,8 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
parent = entry_to_node(node);
offset = radix_tree_descend(parent, &node, index);
slot = parent->slots + offset;
+ if (parent->shift == 0)
+ break;
}
if (nodep)
@@ -1100,36 +842,12 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
}
EXPORT_SYMBOL(radix_tree_lookup);
-static inline void replace_sibling_entries(struct radix_tree_node *node,
- void __rcu **slot, int count, int exceptional)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
- void *ptr = node_to_entry(slot);
- unsigned offset = get_slot_offset(node, slot) + 1;
-
- while (offset < RADIX_TREE_MAP_SIZE) {
- if (rcu_dereference_raw(node->slots[offset]) != ptr)
- break;
- if (count < 0) {
- node->slots[offset] = NULL;
- node->count--;
- }
- node->exceptional += exceptional;
- offset++;
- }
-#endif
-}
-
static void replace_slot(void __rcu **slot, void *item,
- struct radix_tree_node *node, int count, int exceptional)
+ struct radix_tree_node *node, int count, int values)
{
- if (WARN_ON_ONCE(radix_tree_is_internal_node(item)))
- return;
-
- if (node && (count || exceptional)) {
+ if (node && (count || values)) {
node->count += count;
- node->exceptional += exceptional;
- replace_sibling_entries(node, slot, count, exceptional);
+ node->nr_values += values;
}
rcu_assign_pointer(*slot, item);
@@ -1172,37 +890,31 @@ static int calculate_count(struct radix_tree_root *root,
* @node: pointer to tree node
* @slot: pointer to slot in @node
* @item: new item to store in the slot.
- * @update_node: callback for changing leaf nodes
*
* For use with __radix_tree_lookup(). Caller must hold tree write locked
* across slot lookup and replacement.
*/
void __radix_tree_replace(struct radix_tree_root *root,
struct radix_tree_node *node,
- void __rcu **slot, void *item,
- radix_tree_update_node_t update_node)
+ void __rcu **slot, void *item)
{
void *old = rcu_dereference_raw(*slot);
- int exceptional = !!radix_tree_exceptional_entry(item) -
- !!radix_tree_exceptional_entry(old);
+ int values = !!xa_is_value(item) - !!xa_is_value(old);
int count = calculate_count(root, node, slot, item, old);
/*
- * This function supports replacing exceptional entries and
+ * This function supports replacing value entries and
* deleting entries, but that needs accounting against the
- * node unless the slot is root->rnode.
+ * node unless the slot is root->xa_head.
*/
- WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) &&
- (count || exceptional));
- replace_slot(slot, item, node, count, exceptional);
+ WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
+ (count || values));
+ replace_slot(slot, item, node, count, values);
if (!node)
return;
- if (update_node)
- update_node(node);
-
- delete_node(root, node, update_node);
+ delete_node(root, node);
}
/**
@@ -1211,12 +923,12 @@ void __radix_tree_replace(struct radix_tree_root *root,
* @slot: pointer to slot
* @item: new item to store in the slot.
*
- * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(),
+ * For use with radix_tree_lookup_slot() and
* radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
* across slot lookup and replacement.
*
* NOTE: This cannot be used to switch between non-entries (empty slots),
- * regular entries, and exceptional entries, as that requires accounting
+ * regular entries, and value entries, as that requires accounting
* inside the radix tree node. When switching from one type of entry or
* deleting, use __radix_tree_lookup() and __radix_tree_replace() or
* radix_tree_iter_replace().
@@ -1224,7 +936,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
void radix_tree_replace_slot(struct radix_tree_root *root,
void __rcu **slot, void *item)
{
- __radix_tree_replace(root, NULL, slot, item, NULL);
+ __radix_tree_replace(root, NULL, slot, item);
}
EXPORT_SYMBOL(radix_tree_replace_slot);
@@ -1234,162 +946,16 @@ EXPORT_SYMBOL(radix_tree_replace_slot);
* @slot: pointer to slot
* @item: new item to store in the slot.
*
- * For use with radix_tree_split() and radix_tree_for_each_slot().
- * Caller must hold tree write locked across split and replacement.
+ * For use with radix_tree_for_each_slot().
+ * Caller must hold tree write locked.
*/
void radix_tree_iter_replace(struct radix_tree_root *root,
const struct radix_tree_iter *iter,
void __rcu **slot, void *item)
{
- __radix_tree_replace(root, iter->node, slot, item, NULL);
+ __radix_tree_replace(root, iter->node, slot, item);
}
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-/**
- * radix_tree_join - replace multiple entries with one multiorder entry
- * @root: radix tree root
- * @index: an index inside the new entry
- * @order: order of the new entry
- * @item: new entry
- *
- * Call this function to replace several entries with one larger entry.
- * The existing entries are presumed to not need freeing as a result of
- * this call.
- *
- * The replacement entry will have all the tags set on it that were set
- * on any of the entries it is replacing.
- */
-int radix_tree_join(struct radix_tree_root *root, unsigned long index,
- unsigned order, void *item)
-{
- struct radix_tree_node *node;
- void __rcu **slot;
- int error;
-
- BUG_ON(radix_tree_is_internal_node(item));
-
- error = __radix_tree_create(root, index, order, &node, &slot);
- if (!error)
- error = insert_entries(node, slot, item, order, true);
- if (error > 0)
- error = 0;
-
- return error;
-}
-
-/**
- * radix_tree_split - Split an entry into smaller entries
- * @root: radix tree root
- * @index: An index within the large entry
- * @order: Order of new entries
- *
- * Call this function as the first step in replacing a multiorder entry
- * with several entries of lower order. After this function returns,
- * loop over the relevant portion of the tree using radix_tree_for_each_slot()
- * and call radix_tree_iter_replace() to set up each new entry.
- *
- * The tags from this entry are replicated to all the new entries.
- *
- * The radix tree should be locked against modification during the entire
- * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which
- * should prompt RCU walkers to restart the lookup from the root.
- */
-int radix_tree_split(struct radix_tree_root *root, unsigned long index,
- unsigned order)
-{
- struct radix_tree_node *parent, *node, *child;
- void __rcu **slot;
- unsigned int offset, end;
- unsigned n, tag, tags = 0;
- gfp_t gfp = root_gfp_mask(root);
-
- if (!__radix_tree_lookup(root, index, &parent, &slot))
- return -ENOENT;
- if (!parent)
- return -ENOENT;
-
- offset = get_slot_offset(parent, slot);
-
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tag_get(parent, tag, offset))
- tags |= 1 << tag;
-
- for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
- if (!is_sibling_entry(parent,
- rcu_dereference_raw(parent->slots[end])))
- break;
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_set(parent, tag, end);
- /* rcu_assign_pointer ensures tags are set before RETRY */
- rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
- }
- rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
- parent->exceptional -= (end - offset);
-
- if (order == parent->shift)
- return 0;
- if (order > parent->shift) {
- while (offset < end)
- offset += insert_entries(parent, &parent->slots[offset],
- RADIX_TREE_RETRY, order, true);
- return 0;
- }
-
- node = parent;
-
- for (;;) {
- if (node->shift > order) {
- child = radix_tree_node_alloc(gfp, node, root,
- node->shift - RADIX_TREE_MAP_SHIFT,
- offset, 0, 0);
- if (!child)
- goto nomem;
- if (node != parent) {
- node->count++;
- rcu_assign_pointer(node->slots[offset],
- node_to_entry(child));
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_set(node, tag, offset);
- }
-
- node = child;
- offset = 0;
- continue;
- }
-
- n = insert_entries(node, &node->slots[offset],
- RADIX_TREE_RETRY, order, false);
- BUG_ON(n > RADIX_TREE_MAP_SIZE);
-
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- if (tags & (1 << tag))
- tag_set(node, tag, offset);
- offset += n;
-
- while (offset == RADIX_TREE_MAP_SIZE) {
- if (node == parent)
- break;
- offset = node->offset;
- child = node;
- node = node->parent;
- rcu_assign_pointer(node->slots[offset],
- node_to_entry(child));
- offset++;
- }
- if ((node == parent) && (offset == end))
- return 0;
- }
-
- nomem:
- /* Shouldn't happen; did user forget to preload? */
- /* TODO: free all the allocated nodes */
- WARN_ON(1);
- return -ENOMEM;
-}
-#endif
-
static void node_tag_set(struct radix_tree_root *root,
struct radix_tree_node *node,
unsigned int tag, unsigned int offset)
@@ -1447,18 +1013,6 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
}
EXPORT_SYMBOL(radix_tree_tag_set);
-/**
- * radix_tree_iter_tag_set - set a tag on the current iterator entry
- * @root: radix tree root
- * @iter: iterator state
- * @tag: tag to set
- */
-void radix_tree_iter_tag_set(struct radix_tree_root *root,
- const struct radix_tree_iter *iter, unsigned int tag)
-{
- node_tag_set(root, iter->node, tag, iter_offset(iter));
-}
-
static void node_tag_clear(struct radix_tree_root *root,
struct radix_tree_node *node,
unsigned int tag, unsigned int offset)
@@ -1574,14 +1128,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
}
EXPORT_SYMBOL(radix_tree_tag_get);
-static inline void __set_iter_shift(struct radix_tree_iter *iter,
- unsigned int shift)
-{
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
- iter->shift = shift;
-#endif
-}
-
/* Construct iter->tags bit-mask from node->tags[tag] array */
static void set_iter_tags(struct radix_tree_iter *iter,
struct radix_tree_node *node, unsigned offset,
@@ -1608,92 +1154,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
}
}
-#ifdef CONFIG_RADIX_TREE_MULTIORDER
-static void __rcu **skip_siblings(struct radix_tree_node **nodep,
- void __rcu **slot, struct radix_tree_iter *iter)
-{
- while (iter->index < iter->next_index) {
- *nodep = rcu_dereference_raw(*slot);
- if (*nodep && !is_sibling_entry(iter->node, *nodep))
- return slot;
- slot++;
- iter->index = __radix_tree_iter_add(iter, 1);
- iter->tags >>= 1;
- }
-
- *nodep = NULL;
- return NULL;
-}
-
-void __rcu **__radix_tree_next_slot(void __rcu **slot,
- struct radix_tree_iter *iter, unsigned flags)
-{
- unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
- struct radix_tree_node *node;
-
- slot = skip_siblings(&node, slot, iter);
-
- while (radix_tree_is_internal_node(node)) {
- unsigned offset;
- unsigned long next_index;
-
- if (node == RADIX_TREE_RETRY)
- return slot;
- node = entry_to_node(node);
- iter->node = node;
- iter->shift = node->shift;
-
- if (flags & RADIX_TREE_ITER_TAGGED) {
- offset = radix_tree_find_next_bit(node, tag, 0);
- if (offset == RADIX_TREE_MAP_SIZE)
- return NULL;
- slot = &node->slots[offset];
- iter->index = __radix_tree_iter_add(iter, offset);
- set_iter_tags(iter, node, offset, tag);
- node = rcu_dereference_raw(*slot);
- } else {
- offset = 0;
- slot = &node->slots[0];
- for (;;) {
- node = rcu_dereference_raw(*slot);
- if (node)
- break;
- slot++;
- offset++;
- if (offset == RADIX_TREE_MAP_SIZE)
- return NULL;
- }
- iter->index = __radix_tree_iter_add(iter, offset);
- }
- if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
- goto none;
- next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
- if (next_index < iter->next_index)
- iter->next_index = next_index;
- }
-
- return slot;
- none:
- iter->next_index = 0;
- return NULL;
-}
-EXPORT_SYMBOL(__radix_tree_next_slot);
-#else
-static void __rcu **skip_siblings(struct radix_tree_node **nodep,
- void __rcu **slot, struct radix_tree_iter *iter)
-{
- return slot;
-}
-#endif
-
void __rcu **radix_tree_iter_resume(void __rcu **slot,
struct radix_tree_iter *iter)
{
- struct radix_tree_node *node;
-
slot++;
iter->index = __radix_tree_iter_add(iter, 1);
- skip_siblings(&node, slot, iter);
iter->next_index = iter->index;
iter->tags = 0;
return NULL;
@@ -1744,8 +1209,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
iter->next_index = maxindex + 1;
iter->tags = 1;
iter->node = NULL;
- __set_iter_shift(iter, 0);
- return (void __rcu **)&root->rnode;
+ return (void __rcu **)&root->xa_head;
}
do {
@@ -1765,8 +1229,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
while (++offset < RADIX_TREE_MAP_SIZE) {
void *slot = rcu_dereference_raw(
node->slots[offset]);
- if (is_sibling_entry(node, slot))
- continue;
if (slot)
break;
}
@@ -1784,13 +1246,12 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
goto restart;
if (child == RADIX_TREE_RETRY)
break;
- } while (radix_tree_is_internal_node(child));
+ } while (node->shift && radix_tree_is_internal_node(child));
/* Update the iterator state */
- iter->index = (index &~ node_maxindex(node)) | (offset << node->shift);
+ iter->index = (index &~ node_maxindex(node)) | offset;
iter->next_index = (index | node_maxindex(node)) + 1;
iter->node = node;
- __set_iter_shift(iter, node->shift);
if (flags & RADIX_TREE_ITER_TAGGED)
set_iter_tags(iter, node, offset, tag);
@@ -1847,48 +1308,6 @@ radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
EXPORT_SYMBOL(radix_tree_gang_lookup);
/**
- * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
- * @root: radix tree root
- * @results: where the results of the lookup are placed
- * @indices: where their indices should be placed (but usually NULL)
- * @first_index: start the lookup from this key
- * @max_items: place up to this many items at *results
- *
- * Performs an index-ascending scan of the tree for present items. Places
- * their slots at *@results and returns the number of items which were
- * placed at *@results.
- *
- * The implementation is naive.
- *
- * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
- * be dereferenced with radix_tree_deref_slot, and if using only RCU
- * protection, radix_tree_deref_slot may fail requiring a retry.
- */
-unsigned int
-radix_tree_gang_lookup_slot(const struct radix_tree_root *root,
- void __rcu ***results, unsigned long *indices,
- unsigned long first_index, unsigned int max_items)
-{
- struct radix_tree_iter iter;
- void __rcu **slot;
- unsigned int ret = 0;
-
- if (unlikely(!max_items))
- return 0;
-
- radix_tree_for_each_slot(slot, root, &iter, first_index) {
- results[ret] = slot;
- if (indices)
- indices[ret] = iter.index;
- if (++ret == max_items)
- break;
- }
-
- return ret;
-}
-EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
-
-/**
* radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
* based on a tag
* @root: radix tree root
@@ -1964,28 +1383,11 @@ radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
}
EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
-/**
- * __radix_tree_delete_node - try to free node after clearing a slot
- * @root: radix tree root
- * @node: node containing @index
- * @update_node: callback for changing leaf nodes
- *
- * After clearing the slot at @index in @node from radix tree
- * rooted at @root, call this function to attempt freeing the
- * node and shrinking the tree.
- */
-void __radix_tree_delete_node(struct radix_tree_root *root,
- struct radix_tree_node *node,
- radix_tree_update_node_t update_node)
-{
- delete_node(root, node, update_node);
-}
-
static bool __radix_tree_delete(struct radix_tree_root *root,
struct radix_tree_node *node, void __rcu **slot)
{
void *old = rcu_dereference_raw(*slot);
- int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0;
+ int values = xa_is_value(old) ? -1 : 0;
unsigned offset = get_slot_offset(node, slot);
int tag;
@@ -1995,8 +1397,8 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
node_tag_clear(root, node, tag, offset);
- replace_slot(slot, NULL, node, -1, exceptional);
- return node && delete_node(root, node, NULL);
+ replace_slot(slot, NULL, node, -1, values);
+ return node && delete_node(root, node);
}
/**
@@ -2068,19 +1470,6 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
}
EXPORT_SYMBOL(radix_tree_delete);
-void radix_tree_clear_tags(struct radix_tree_root *root,
- struct radix_tree_node *node,
- void __rcu **slot)
-{
- if (node) {
- unsigned int tag, offset = get_slot_offset(node, slot);
- for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
- node_tag_clear(root, node, tag, offset);
- } else {
- root_tag_clear_all(root);
- }
-}
-
/**
* radix_tree_tagged - test whether any items in the tree are tagged
* @root: radix tree root
@@ -2106,33 +1495,12 @@ void idr_preload(gfp_t gfp_mask)
}
EXPORT_SYMBOL(idr_preload);
-int ida_pre_get(struct ida *ida, gfp_t gfp)
-{
- /*
- * The IDA API has no preload_end() equivalent. Instead,
- * ida_get_new() can return -EAGAIN, prompting the caller
- * to return to the ida_pre_get() step.
- */
- if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE))
- preempt_enable();
-
- if (!this_cpu_read(ida_bitmap)) {
- struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
- if (!bitmap)
- return 0;
- if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
- kfree(bitmap);
- }
-
- return 1;
-}
-
void __rcu **idr_get_free(struct radix_tree_root *root,
struct radix_tree_iter *iter, gfp_t gfp,
unsigned long max)
{
struct radix_tree_node *node = NULL, *child;
- void __rcu **slot = (void __rcu **)&root->rnode;
+ void __rcu **slot = (void __rcu **)&root->xa_head;
unsigned long maxindex, start = iter->next_index;
unsigned int shift, offset = 0;
@@ -2148,8 +1516,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
if (error < 0)
return ERR_PTR(error);
shift = error;
- child = rcu_dereference_raw(root->rnode);
+ child = rcu_dereference_raw(root->xa_head);
}
+ if (start == 0 && shift == 0)
+ shift = RADIX_TREE_MAP_SHIFT;
while (shift) {
shift -= RADIX_TREE_MAP_SHIFT;
@@ -2192,7 +1562,6 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
else
iter->next_index = 1;
iter->node = node;
- __set_iter_shift(iter, shift);
set_iter_tags(iter, node, offset, IDR_FREE);
return slot;
@@ -2211,10 +1580,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
*/
void idr_destroy(struct idr *idr)
{
- struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode);
+ struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
if (radix_tree_is_internal_node(node))
radix_tree_free_nodes(node);
- idr->idr_rt.rnode = NULL;
+ idr->idr_rt.xa_head = NULL;
root_tag_set(&idr->idr_rt, IDR_FREE);
}
EXPORT_SYMBOL(idr_destroy);
@@ -2228,31 +1597,6 @@ radix_tree_node_ctor(void *arg)
INIT_LIST_HEAD(&node->private_list);
}
-static __init unsigned long __maxindex(unsigned int height)
-{
- unsigned int width = height * RADIX_TREE_MAP_SHIFT;
- int shift = RADIX_TREE_INDEX_BITS - width;
-
- if (shift < 0)
- return ~0UL;
- if (shift >= BITS_PER_LONG)
- return 0UL;
- return ~0UL >> shift;
-}
-
-static __init void radix_tree_init_maxnodes(void)
-{
- unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
- unsigned int i, j;
-
- for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
- height_to_maxindex[i] = __maxindex(i);
- for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
- for (j = i; j > 0; j--)
- height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
- }
-}
-
static int radix_tree_cpu_dead(unsigned int cpu)
{
struct radix_tree_preload *rtp;
@@ -2266,8 +1610,6 @@ static int radix_tree_cpu_dead(unsigned int cpu)
kmem_cache_free(radix_tree_node_cachep, node);
rtp->nr--;
}
- kfree(per_cpu(ida_bitmap, cpu));
- per_cpu(ida_bitmap, cpu) = NULL;
return 0;
}
@@ -2277,11 +1619,11 @@ void __init radix_tree_init(void)
BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
+ BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
sizeof(struct radix_tree_node), 0,
SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
radix_tree_node_ctor);
- radix_tree_init_maxnodes();
ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
NULL, radix_tree_cpu_dead);
WARN_ON(ret < 0);
diff --git a/lib/string.c b/lib/string.c
index 2c0900a5d51a..38e4ca08e757 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -27,6 +27,7 @@
#include <linux/export.h>
#include <linux/bug.h>
#include <linux/errno.h>
+#include <linux/slab.h>
#include <asm/byteorder.h>
#include <asm/word-at-a-time.h>
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 08d3d59dca17..aa22bcaec1dc 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -6494,6 +6494,7 @@ static struct sk_buff *populate_skb(char *buf, int size)
skb->queue_mapping = SKB_QUEUE_MAP;
skb->vlan_tci = SKB_VLAN_TCI;
skb->vlan_proto = htons(ETH_P_IP);
+ dev_net_set(&dev, &init_net);
skb->dev = &dev;
skb->dev->ifindex = SKB_DEV_IFINDEX;
skb->dev->type = SKB_DEV_TYPE;
diff --git a/lib/test_ida.c b/lib/test_ida.c
index 2d1637d8136b..b06880625961 100644
--- a/lib/test_ida.c
+++ b/lib/test_ida.c
@@ -150,10 +150,10 @@ static void ida_check_conv(struct ida *ida)
IDA_BUG_ON(ida, !ida_is_empty(ida));
}
+static DEFINE_IDA(ida);
+
static int ida_checks(void)
{
- DEFINE_IDA(ida);
-
IDA_BUG_ON(&ida, !ida_is_empty(&ida));
ida_check_alloc(&ida);
ida_check_destroy(&ida);
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index ec657105edbf..51b78405bf24 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -579,6 +579,73 @@ static noinline void __init kmem_cache_invalid_free(void)
kmem_cache_destroy(cache);
}
+static noinline void __init kasan_memchr(void)
+{
+ char *ptr;
+ size_t size = 24;
+
+ pr_info("out-of-bounds in memchr\n");
+ ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!ptr)
+ return;
+
+ memchr(ptr, '1', size + 1);
+ kfree(ptr);
+}
+
+static noinline void __init kasan_memcmp(void)
+{
+ char *ptr;
+ size_t size = 24;
+ int arr[9];
+
+ pr_info("out-of-bounds in memcmp\n");
+ ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!ptr)
+ return;
+
+ memset(arr, 0, sizeof(arr));
+ memcmp(ptr, arr, size+1);
+ kfree(ptr);
+}
+
+static noinline void __init kasan_strings(void)
+{
+ char *ptr;
+ size_t size = 24;
+
+ pr_info("use-after-free in strchr\n");
+ ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+ if (!ptr)
+ return;
+
+ kfree(ptr);
+
+ /*
+ * Try to cause only 1 invalid access (less spam in dmesg).
+ * For that we need ptr to point to zeroed byte.
+ * Skip metadata that could be stored in freed object so ptr
+ * will likely point to zeroed byte.
+ */
+ ptr += 16;
+ strchr(ptr, '1');
+
+ pr_info("use-after-free in strrchr\n");
+ strrchr(ptr, '1');
+
+ pr_info("use-after-free in strcmp\n");
+ strcmp(ptr, "2");
+
+ pr_info("use-after-free in strncmp\n");
+ strncmp(ptr, "2", 1);
+
+ pr_info("use-after-free in strlen\n");
+ strlen(ptr);
+
+ pr_info("use-after-free in strnlen\n");
+ strnlen(ptr, 1);
+}
+
static int __init kmalloc_tests_init(void)
{
/*
@@ -618,6 +685,9 @@ static int __init kmalloc_tests_init(void)
use_after_scope_test();
kmem_cache_double_free();
kmem_cache_invalid_free();
+ kasan_memchr();
+ kasan_memcmp();
+ kasan_strings();
kasan_restore_multi_shot(multishot);
diff --git a/lib/test_memcat_p.c b/lib/test_memcat_p.c
new file mode 100644
index 000000000000..849c477d49d0
--- /dev/null
+++ b/lib/test_memcat_p.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test cases for memcat_p() in lib/memcat_p.c
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+
+struct test_struct {
+ int num;
+ unsigned int magic;
+};
+
+#define MAGIC 0xf00ff00f
+/* Size of each of the NULL-terminated input arrays */
+#define INPUT_MAX 128
+/* Expected number of non-NULL elements in the output array */
+#define EXPECT (INPUT_MAX * 2 - 2)
+
+static int __init test_memcat_p_init(void)
+{
+ struct test_struct **in0, **in1, **out, **p;
+ int err = -ENOMEM, i, r, total = 0;
+
+ in0 = kcalloc(INPUT_MAX, sizeof(*in0), GFP_KERNEL);
+ if (!in0)
+ return err;
+
+ in1 = kcalloc(INPUT_MAX, sizeof(*in1), GFP_KERNEL);
+ if (!in1)
+ goto err_free_in0;
+
+ for (i = 0, r = 1; i < INPUT_MAX - 1; i++) {
+ in0[i] = kmalloc(sizeof(**in0), GFP_KERNEL);
+ if (!in0[i])
+ goto err_free_elements;
+
+ in1[i] = kmalloc(sizeof(**in1), GFP_KERNEL);
+ if (!in1[i]) {
+ kfree(in0[i]);
+ goto err_free_elements;
+ }
+
+ /* lifted from test_sort.c */
+ r = (r * 725861) % 6599;
+ in0[i]->num = r;
+ in1[i]->num = -r;
+ in0[i]->magic = MAGIC;
+ in1[i]->magic = MAGIC;
+ }
+
+ in0[i] = in1[i] = NULL;
+
+ out = memcat_p(in0, in1);
+ if (!out)
+ goto err_free_all_elements;
+
+ err = -EINVAL;
+ for (i = 0, p = out; *p && (i < INPUT_MAX * 2 - 1); p++, i++) {
+ total += (*p)->num;
+
+ if ((*p)->magic != MAGIC) {
+ pr_err("test failed: wrong magic at %d: %u\n", i,
+ (*p)->magic);
+ goto err_free_out;
+ }
+ }
+
+ if (total) {
+ pr_err("test failed: expected zero total, got %d\n", total);
+ goto err_free_out;
+ }
+
+ if (i != EXPECT) {
+ pr_err("test failed: expected output size %d, got %d\n",
+ EXPECT, i);
+ goto err_free_out;
+ }
+
+ for (i = 0; i < INPUT_MAX - 1; i++)
+ if (out[i] != in0[i] || out[i + INPUT_MAX - 1] != in1[i]) {
+ pr_err("test failed: wrong element order at %d\n", i);
+ goto err_free_out;
+ }
+
+ err = 0;
+ pr_info("test passed\n");
+
+err_free_out:
+ kfree(out);
+err_free_all_elements:
+ i = INPUT_MAX;
+err_free_elements:
+ for (i--; i >= 0; i--) {
+ kfree(in1[i]);
+ kfree(in0[i]);
+ }
+
+ kfree(in1);
+err_free_in0:
+ kfree(in0);
+
+ return err;
+}
+
+static void __exit test_memcat_p_exit(void)
+{
+}
+
+module_init(test_memcat_p_init);
+module_exit(test_memcat_p_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
new file mode 100644
index 000000000000..aa47754150ce
--- /dev/null
+++ b/lib/test_xarray.c
@@ -0,0 +1,1238 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * test_xarray.c: Test the XArray API
+ * Copyright (c) 2017-2018 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/xarray.h>
+#include <linux/module.h>
+
+static unsigned int tests_run;
+static unsigned int tests_passed;
+
+#ifndef XA_DEBUG
+# ifdef __KERNEL__
+void xa_dump(const struct xarray *xa) { }
+# endif
+#undef XA_BUG_ON
+#define XA_BUG_ON(xa, x) do { \
+ tests_run++; \
+ if (x) { \
+ printk("BUG at %s:%d\n", __func__, __LINE__); \
+ xa_dump(xa); \
+ dump_stack(); \
+ } else { \
+ tests_passed++; \
+ } \
+} while (0)
+#endif
+
+static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+ return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp);
+}
+
+static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+ u32 id = 0;
+
+ XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_value(index & LONG_MAX),
+ gfp) != 0);
+ XA_BUG_ON(xa, id != index);
+}
+
+static void xa_erase_index(struct xarray *xa, unsigned long index)
+{
+ XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX));
+ XA_BUG_ON(xa, xa_load(xa, index) != NULL);
+}
+
+/*
+ * If anyone needs this, please move it to xarray.c. We have no current
+ * users outside the test suite because all current multislot users want
+ * to use the advanced API.
+ */
+static void *xa_store_order(struct xarray *xa, unsigned long index,
+ unsigned order, void *entry, gfp_t gfp)
+{
+ XA_STATE_ORDER(xas, xa, index, order);
+ void *curr;
+
+ do {
+ xas_lock(&xas);
+ curr = xas_store(&xas, entry);
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, gfp));
+
+ return curr;
+}
+
+static noinline void check_xa_err(struct xarray *xa)
+{
+ XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0);
+ XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0);
+#ifndef __KERNEL__
+ /* The kernel does not fail GFP_NOWAIT allocations */
+ XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
+ XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
+#endif
+ XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_KERNEL)) != 0);
+ XA_BUG_ON(xa, xa_err(xa_store(xa, 1, xa_mk_value(0), GFP_KERNEL)) != 0);
+ XA_BUG_ON(xa, xa_err(xa_erase(xa, 1)) != 0);
+// kills the test-suite :-(
+// XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
+}
+
+static noinline void check_xas_retry(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ void *entry;
+
+ xa_store_index(xa, 0, GFP_KERNEL);
+ xa_store_index(xa, 1, GFP_KERNEL);
+
+ rcu_read_lock();
+ XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0));
+ xa_erase_index(xa, 1);
+ XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas)));
+ XA_BUG_ON(xa, xas_retry(&xas, NULL));
+ XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0)));
+ xas_reset(&xas);
+ XA_BUG_ON(xa, xas.xa_node != XAS_RESTART);
+ XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
+ XA_BUG_ON(xa, xas.xa_node != NULL);
+
+ XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, !xa_is_internal(xas_reload(&xas)));
+ xas.xa_node = XAS_RESTART;
+ XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
+ rcu_read_unlock();
+
+ /* Make sure we can iterate through retry entries */
+ xas_lock(&xas);
+ xas_set(&xas, 0);
+ xas_store(&xas, XA_RETRY_ENTRY);
+ xas_set(&xas, 1);
+ xas_store(&xas, XA_RETRY_ENTRY);
+
+ xas_set(&xas, 0);
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ xas_store(&xas, xa_mk_value(xas.xa_index));
+ }
+ xas_unlock(&xas);
+
+ xa_erase_index(xa, 0);
+ xa_erase_index(xa, 1);
+}
+
+static noinline void check_xa_load(struct xarray *xa)
+{
+ unsigned long i, j;
+
+ for (i = 0; i < 1024; i++) {
+ for (j = 0; j < 1024; j++) {
+ void *entry = xa_load(xa, j);
+ if (j < i)
+ XA_BUG_ON(xa, xa_to_value(entry) != j);
+ else
+ XA_BUG_ON(xa, entry);
+ }
+ XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+ }
+
+ for (i = 0; i < 1024; i++) {
+ for (j = 0; j < 1024; j++) {
+ void *entry = xa_load(xa, j);
+ if (j >= i)
+ XA_BUG_ON(xa, xa_to_value(entry) != j);
+ else
+ XA_BUG_ON(xa, entry);
+ }
+ xa_erase_index(xa, i);
+ }
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
+{
+ unsigned int order;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1;
+
+ /* NULL elements have no marks set */
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+ xa_set_mark(xa, index, XA_MARK_0);
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+
+ /* Storing a pointer will not make a mark appear */
+ XA_BUG_ON(xa, xa_store_index(xa, index, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+ xa_set_mark(xa, index, XA_MARK_0);
+ XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
+
+ /* Setting one mark will not set another mark */
+ XA_BUG_ON(xa, xa_get_mark(xa, index + 1, XA_MARK_0));
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1));
+
+ /* Storing NULL clears marks, and they can't be set again */
+ xa_erase_index(xa, index);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+ xa_set_mark(xa, index, XA_MARK_0);
+ XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
+
+ /*
+ * Storing a multi-index entry over entries with marks gives the
+ * entire entry the union of the marks
+ */
+ BUG_ON((index % 4) != 0);
+ for (order = 2; order < max_order; order++) {
+ unsigned long base = round_down(index, 1UL << order);
+ unsigned long next = base + (1UL << order);
+ unsigned long i;
+
+ XA_BUG_ON(xa, xa_store_index(xa, index + 1, GFP_KERNEL));
+ xa_set_mark(xa, index + 1, XA_MARK_0);
+ XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL));
+ xa_set_mark(xa, index + 2, XA_MARK_1);
+ XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL));
+ xa_store_order(xa, index, order, xa_mk_value(index),
+ GFP_KERNEL);
+ for (i = base; i < next; i++) {
+ XA_STATE(xas, xa, i);
+ unsigned int seen = 0;
+ void *entry;
+
+ XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
+ XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1));
+ XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2));
+
+ /* We should see two elements in the array */
+ xas_for_each(&xas, entry, ULONG_MAX)
+ seen++;
+ XA_BUG_ON(xa, seen != 2);
+
+ /* One of which is marked */
+ xas_set(&xas, 0);
+ seen = 0;
+ xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
+ seen++;
+ XA_BUG_ON(xa, seen != 1);
+ }
+ XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0));
+ XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1));
+ XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2));
+ xa_erase_index(xa, index);
+ xa_erase_index(xa, next);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_xa_mark_2(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ unsigned long index;
+ unsigned int count = 0;
+ void *entry;
+
+ xa_store_index(xa, 0, GFP_KERNEL);
+ xa_set_mark(xa, 0, XA_MARK_0);
+ xas_lock(&xas);
+ xas_load(&xas);
+ xas_init_marks(&xas);
+ xas_unlock(&xas);
+ XA_BUG_ON(xa, !xa_get_mark(xa, 0, XA_MARK_0) == 0);
+
+ for (index = 3500; index < 4500; index++) {
+ xa_store_index(xa, index, GFP_KERNEL);
+ xa_set_mark(xa, index, XA_MARK_0);
+ }
+
+ xas_reset(&xas);
+ rcu_read_lock();
+ xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
+ count++;
+ rcu_read_unlock();
+ XA_BUG_ON(xa, count != 1000);
+
+ xas_lock(&xas);
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ xas_init_marks(&xas);
+ XA_BUG_ON(xa, !xa_get_mark(xa, xas.xa_index, XA_MARK_0));
+ XA_BUG_ON(xa, !xas_get_mark(&xas, XA_MARK_0));
+ }
+ xas_unlock(&xas);
+
+ xa_destroy(xa);
+}
+
+static noinline void check_xa_mark(struct xarray *xa)
+{
+ unsigned long index;
+
+ for (index = 0; index < 16384; index += 4)
+ check_xa_mark_1(xa, index);
+
+ check_xa_mark_2(xa);
+}
+
+static noinline void check_xa_shrink(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 1);
+ struct xa_node *node;
+ unsigned int order;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 15 : 1;
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+ XA_BUG_ON(xa, xa_store_index(xa, 0, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
+
+ /*
+ * Check that erasing the entry at 1 shrinks the tree and properly
+ * marks the node as being deleted.
+ */
+ xas_lock(&xas);
+ XA_BUG_ON(xa, xas_load(&xas) != xa_mk_value(1));
+ node = xas.xa_node;
+ XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != xa_mk_value(0));
+ XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 1) != NULL);
+ XA_BUG_ON(xa, xas.xa_node != XAS_BOUNDS);
+ XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != XA_RETRY_ENTRY);
+ XA_BUG_ON(xa, xas_load(&xas) != NULL);
+ xas_unlock(&xas);
+ XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+ xa_erase_index(xa, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ for (order = 0; order < max_order; order++) {
+ unsigned long max = (1UL << order) - 1;
+ xa_store_order(xa, 0, order, xa_mk_value(0), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, max) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL);
+ rcu_read_lock();
+ node = xa_head(xa);
+ rcu_read_unlock();
+ XA_BUG_ON(xa, xa_store_index(xa, ULONG_MAX, GFP_KERNEL) !=
+ NULL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xa_head(xa) == node);
+ rcu_read_unlock();
+ XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL);
+ xa_erase_index(xa, ULONG_MAX);
+ XA_BUG_ON(xa, xa->xa_head != node);
+ xa_erase_index(xa, 0);
+ }
+}
+
+static noinline void check_cmpxchg(struct xarray *xa)
+{
+ void *FIVE = xa_mk_value(5);
+ void *SIX = xa_mk_value(6);
+ void *LOTS = xa_mk_value(12345678);
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+ XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL);
+ xa_erase_index(xa, 12345678);
+ xa_erase_index(xa, 5);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_reserve(struct xarray *xa)
+{
+ void *entry;
+ unsigned long index = 0;
+
+ /* An array with a reserved entry is not empty */
+ XA_BUG_ON(xa, !xa_empty(xa));
+ xa_reserve(xa, 12345678, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_empty(xa));
+ XA_BUG_ON(xa, xa_load(xa, 12345678));
+ xa_release(xa, 12345678);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Releasing a used entry does nothing */
+ xa_reserve(xa, 12345678, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
+ xa_release(xa, 12345678);
+ xa_erase_index(xa, 12345678);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* cmpxchg sees a reserved entry as NULL */
+ xa_reserve(xa, 12345678, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678),
+ GFP_NOWAIT) != NULL);
+ xa_release(xa, 12345678);
+ xa_erase_index(xa, 12345678);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Can iterate through a reserved entry */
+ xa_store_index(xa, 5, GFP_KERNEL);
+ xa_reserve(xa, 6, GFP_KERNEL);
+ xa_store_index(xa, 7, GFP_KERNEL);
+
+ xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
+ XA_BUG_ON(xa, index != 5 && index != 7);
+ }
+ xa_destroy(xa);
+}
+
+static noinline void check_xas_erase(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ void *entry;
+ unsigned long i, j;
+
+ for (i = 0; i < 200; i++) {
+ for (j = i; j < 2 * i + 17; j++) {
+ xas_set(&xas, j);
+ do {
+ xas_lock(&xas);
+ xas_store(&xas, xa_mk_value(j));
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+ }
+
+ xas_set(&xas, ULONG_MAX);
+ do {
+ xas_lock(&xas);
+ xas_store(&xas, xa_mk_value(0));
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ xas_lock(&xas);
+ xas_store(&xas, NULL);
+
+ xas_set(&xas, 0);
+ j = i;
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ XA_BUG_ON(xa, entry != xa_mk_value(j));
+ xas_store(&xas, NULL);
+ j++;
+ }
+ xas_unlock(&xas);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+}
+
+#ifdef CONFIG_XARRAY_MULTI
+static noinline void check_multi_store_1(struct xarray *xa, unsigned long index,
+ unsigned int order)
+{
+ XA_STATE(xas, xa, index);
+ unsigned long min = index & ~((1UL << order) - 1);
+ unsigned long max = min + (1UL << order);
+
+ xa_store_order(xa, index, order, xa_mk_value(index), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(index));
+ XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(index));
+ XA_BUG_ON(xa, xa_load(xa, max) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
+
+ XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(min)) != xa_mk_value(index));
+ XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(min));
+ XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(min));
+ XA_BUG_ON(xa, xa_load(xa, max) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
+
+ xa_erase_index(xa, min);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_multi_store_2(struct xarray *xa, unsigned long index,
+ unsigned int order)
+{
+ XA_STATE(xas, xa, index);
+ xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL);
+
+ XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(1)) != xa_mk_value(0));
+ XA_BUG_ON(xa, xas.xa_index != index);
+ XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+#endif
+
+static noinline void check_multi_store(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ unsigned long i, j, k;
+ unsigned int max_order = (sizeof(long) == 4) ? 30 : 60;
+
+ /* Loading from any position returns the same value */
+ xa_store_order(xa, 0, 1, xa_mk_value(0), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 2);
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
+ rcu_read_unlock();
+
+ /* Storing adjacent to the value does not alter the value */
+ xa_store(xa, 3, xa, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
+ XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 3);
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
+ rcu_read_unlock();
+
+ /* Overwriting multiple indexes works */
+ xa_store_order(xa, 0, 2, xa_mk_value(1), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 2) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 3) != xa_mk_value(1));
+ XA_BUG_ON(xa, xa_load(xa, 4) != NULL);
+ rcu_read_lock();
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 4);
+ XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 4);
+ rcu_read_unlock();
+
+ /* We can erase multiple values with a single store */
+ xa_store_order(xa, 0, 63, NULL, GFP_KERNEL);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Even when the first slot is empty but the others aren't */
+ xa_store_index(xa, 1, GFP_KERNEL);
+ xa_store_index(xa, 2, GFP_KERNEL);
+ xa_store_order(xa, 0, 2, NULL, GFP_KERNEL);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ for (i = 0; i < max_order; i++) {
+ for (j = 0; j < max_order; j++) {
+ xa_store_order(xa, 0, i, xa_mk_value(i), GFP_KERNEL);
+ xa_store_order(xa, 0, j, xa_mk_value(j), GFP_KERNEL);
+
+ for (k = 0; k < max_order; k++) {
+ void *entry = xa_load(xa, (1UL << k) - 1);
+ if ((i < k) && (j < k))
+ XA_BUG_ON(xa, entry != NULL);
+ else
+ XA_BUG_ON(xa, entry != xa_mk_value(j));
+ }
+
+ xa_erase(xa, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+ }
+
+ for (i = 0; i < 20; i++) {
+ check_multi_store_1(xa, 200, i);
+ check_multi_store_1(xa, 0, i);
+ check_multi_store_1(xa, (1UL << i) + 1, i);
+ }
+ check_multi_store_2(xa, 4095, 9);
+#endif
+}
+
+static DEFINE_XARRAY_ALLOC(xa0);
+
+static noinline void check_xa_alloc(void)
+{
+ int i;
+ u32 id;
+
+ /* An empty array should assign 0 to the first alloc */
+ xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+ /* Erasing it should make the array empty again */
+ xa_erase_index(&xa0, 0);
+ XA_BUG_ON(&xa0, !xa_empty(&xa0));
+
+ /* And it should assign 0 again */
+ xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+ /* The next assigned ID should be 1 */
+ xa_alloc_index(&xa0, 1, GFP_KERNEL);
+ xa_erase_index(&xa0, 1);
+
+ /* Storing a value should mark it used */
+ xa_store_index(&xa0, 1, GFP_KERNEL);
+ xa_alloc_index(&xa0, 2, GFP_KERNEL);
+
+ /* If we then erase 0, it should be free */
+ xa_erase_index(&xa0, 0);
+ xa_alloc_index(&xa0, 0, GFP_KERNEL);
+
+ xa_erase_index(&xa0, 1);
+ xa_erase_index(&xa0, 2);
+
+ for (i = 1; i < 5000; i++) {
+ xa_alloc_index(&xa0, i, GFP_KERNEL);
+ }
+
+ xa_destroy(&xa0);
+
+ id = 0xfffffffeU;
+ XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+ GFP_KERNEL) != 0);
+ XA_BUG_ON(&xa0, id != 0xfffffffeU);
+ XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+ GFP_KERNEL) != 0);
+ XA_BUG_ON(&xa0, id != 0xffffffffU);
+ XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
+ GFP_KERNEL) != -ENOSPC);
+ XA_BUG_ON(&xa0, id != 0xffffffffU);
+ xa_destroy(&xa0);
+}
+
+static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
+ unsigned int order, unsigned int present)
+{
+ XA_STATE_ORDER(xas, xa, start, order);
+ void *entry;
+ unsigned int count = 0;
+
+retry:
+ xas_lock(&xas);
+ xas_for_each_conflict(&xas, entry) {
+ XA_BUG_ON(xa, !xa_is_value(entry));
+ XA_BUG_ON(xa, entry < xa_mk_value(start));
+ XA_BUG_ON(xa, entry > xa_mk_value(start + (1UL << order) - 1));
+ count++;
+ }
+ xas_store(&xas, xa_mk_value(start));
+ xas_unlock(&xas);
+ if (xas_nomem(&xas, GFP_KERNEL)) {
+ count = 0;
+ goto retry;
+ }
+ XA_BUG_ON(xa, xas_error(&xas));
+ XA_BUG_ON(xa, count != present);
+ XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_value(start));
+ XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) !=
+ xa_mk_value(start));
+ xa_erase_index(xa, start);
+}
+
+static noinline void check_store_iter(struct xarray *xa)
+{
+ unsigned int i, j;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+
+ for (i = 0; i < max_order; i++) {
+ unsigned int min = 1 << i;
+ unsigned int max = (2 << i) - 1;
+ __check_store_iter(xa, 0, i, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ __check_store_iter(xa, min, i, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ xa_store_index(xa, min, GFP_KERNEL);
+ __check_store_iter(xa, min, i, 1);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ xa_store_index(xa, max, GFP_KERNEL);
+ __check_store_iter(xa, min, i, 1);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ for (j = 0; j < min; j++)
+ xa_store_index(xa, j, GFP_KERNEL);
+ __check_store_iter(xa, 0, i, min);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ for (j = 0; j < min; j++)
+ xa_store_index(xa, min + j, GFP_KERNEL);
+ __check_store_iter(xa, min, i, min);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+#ifdef CONFIG_XARRAY_MULTI
+ xa_store_index(xa, 63, GFP_KERNEL);
+ xa_store_index(xa, 65, GFP_KERNEL);
+ __check_store_iter(xa, 64, 2, 1);
+ xa_erase_index(xa, 63);
+#endif
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_multi_find(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ unsigned long index;
+
+ xa_store_order(xa, 12, 2, xa_mk_value(12), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_store_index(xa, 16, GFP_KERNEL) != NULL);
+
+ index = 0;
+ XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
+ xa_mk_value(12));
+ XA_BUG_ON(xa, index != 12);
+ index = 13;
+ XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
+ xa_mk_value(12));
+ XA_BUG_ON(xa, (index < 12) || (index >= 16));
+ XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT) !=
+ xa_mk_value(16));
+ XA_BUG_ON(xa, index != 16);
+
+ xa_erase_index(xa, 12);
+ xa_erase_index(xa, 16);
+ XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
+static noinline void check_multi_find_2(struct xarray *xa)
+{
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1;
+ unsigned int i, j;
+ void *entry;
+
+ for (i = 0; i < max_order; i++) {
+ unsigned long index = 1UL << i;
+ for (j = 0; j < index; j++) {
+ XA_STATE(xas, xa, j + index);
+ xa_store_index(xa, index - 1, GFP_KERNEL);
+ xa_store_order(xa, index, i, xa_mk_value(index),
+ GFP_KERNEL);
+ rcu_read_lock();
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ xa_erase_index(xa, index);
+ }
+ rcu_read_unlock();
+ xa_erase_index(xa, index - 1);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+ }
+}
+
+static noinline void check_find(struct xarray *xa)
+{
+ unsigned long i, j, k;
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /*
+ * Check xa_find with all pairs between 0 and 99 inclusive,
+ * starting at every index between 0 and 99
+ */
+ for (i = 0; i < 100; i++) {
+ XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+ xa_set_mark(xa, i, XA_MARK_0);
+ for (j = 0; j < i; j++) {
+ XA_BUG_ON(xa, xa_store_index(xa, j, GFP_KERNEL) !=
+ NULL);
+ xa_set_mark(xa, j, XA_MARK_0);
+ for (k = 0; k < 100; k++) {
+ unsigned long index = k;
+ void *entry = xa_find(xa, &index, ULONG_MAX,
+ XA_PRESENT);
+ if (k <= j)
+ XA_BUG_ON(xa, index != j);
+ else if (k <= i)
+ XA_BUG_ON(xa, index != i);
+ else
+ XA_BUG_ON(xa, entry != NULL);
+
+ index = k;
+ entry = xa_find(xa, &index, ULONG_MAX,
+ XA_MARK_0);
+ if (k <= j)
+ XA_BUG_ON(xa, index != j);
+ else if (k <= i)
+ XA_BUG_ON(xa, index != i);
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ }
+ xa_erase_index(xa, j);
+ XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0));
+ XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
+ }
+ xa_erase_index(xa, i);
+ XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0));
+ }
+ XA_BUG_ON(xa, !xa_empty(xa));
+ check_multi_find(xa);
+ check_multi_find_2(xa);
+}
+
+/* See find_swap_entry() in mm/shmem.c */
+static noinline unsigned long xa_find_entry(struct xarray *xa, void *item)
+{
+ XA_STATE(xas, xa, 0);
+ unsigned int checked = 0;
+ void *entry;
+
+ rcu_read_lock();
+ xas_for_each(&xas, entry, ULONG_MAX) {
+ if (xas_retry(&xas, entry))
+ continue;
+ if (entry == item)
+ break;
+ checked++;
+ if ((checked % 4) != 0)
+ continue;
+ xas_pause(&xas);
+ }
+ rcu_read_unlock();
+
+ return entry ? xas.xa_index : -1;
+}
+
+static noinline void check_find_entry(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ unsigned int order;
+ unsigned long offset, index;
+
+ for (order = 0; order < 20; order++) {
+ for (offset = 0; offset < (1UL << (order + 3));
+ offset += (1UL << order)) {
+ for (index = 0; index < (1UL << (order + 5));
+ index += (1UL << order)) {
+ xa_store_order(xa, index, order,
+ xa_mk_value(index), GFP_KERNEL);
+ XA_BUG_ON(xa, xa_load(xa, index) !=
+ xa_mk_value(index));
+ XA_BUG_ON(xa, xa_find_entry(xa,
+ xa_mk_value(index)) != index);
+ }
+ XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
+ xa_destroy(xa);
+ }
+ }
+#endif
+
+ XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
+ xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
+ XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_value(LONG_MAX)) != -1);
+ xa_erase_index(xa, ULONG_MAX);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_move_small(struct xarray *xa, unsigned long idx)
+{
+ XA_STATE(xas, xa, 0);
+ unsigned long i;
+
+ xa_store_index(xa, 0, GFP_KERNEL);
+ xa_store_index(xa, idx, GFP_KERNEL);
+
+ rcu_read_lock();
+ for (i = 0; i < idx * 4; i++) {
+ void *entry = xas_next(&xas);
+ if (i <= idx)
+ XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
+ XA_BUG_ON(xa, xas.xa_index != i);
+ if (i == 0 || i == idx)
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ }
+ xas_next(&xas);
+ XA_BUG_ON(xa, xas.xa_index != i);
+
+ do {
+ void *entry = xas_prev(&xas);
+ i--;
+ if (i <= idx)
+ XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
+ XA_BUG_ON(xa, xas.xa_index != i);
+ if (i == 0 || i == idx)
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ } while (i > 0);
+
+ xas_set(&xas, ULONG_MAX);
+ XA_BUG_ON(xa, xas_next(&xas) != NULL);
+ XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+ XA_BUG_ON(xa, xas_next(&xas) != xa_mk_value(0));
+ XA_BUG_ON(xa, xas.xa_index != 0);
+ XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+ XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+ rcu_read_unlock();
+
+ xa_erase_index(xa, 0);
+ xa_erase_index(xa, idx);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_move(struct xarray *xa)
+{
+ XA_STATE(xas, xa, (1 << 16) - 1);
+ unsigned long i;
+
+ for (i = 0; i < (1 << 16); i++)
+ XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
+
+ rcu_read_lock();
+ do {
+ void *entry = xas_prev(&xas);
+ i--;
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ XA_BUG_ON(xa, i != xas.xa_index);
+ } while (i != 0);
+
+ XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+ XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+
+ do {
+ void *entry = xas_next(&xas);
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ XA_BUG_ON(xa, i != xas.xa_index);
+ i++;
+ } while (i < (1 << 16));
+ rcu_read_unlock();
+
+ for (i = (1 << 8); i < (1 << 15); i++)
+ xa_erase_index(xa, i);
+
+ i = xas.xa_index;
+
+ rcu_read_lock();
+ do {
+ void *entry = xas_prev(&xas);
+ i--;
+ if ((i < (1 << 8)) || (i >= (1 << 15)))
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ XA_BUG_ON(xa, i != xas.xa_index);
+ } while (i != 0);
+
+ XA_BUG_ON(xa, xas_prev(&xas) != NULL);
+ XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
+
+ do {
+ void *entry = xas_next(&xas);
+ if ((i < (1 << 8)) || (i >= (1 << 15)))
+ XA_BUG_ON(xa, entry != xa_mk_value(i));
+ else
+ XA_BUG_ON(xa, entry != NULL);
+ XA_BUG_ON(xa, i != xas.xa_index);
+ i++;
+ } while (i < (1 << 16));
+ rcu_read_unlock();
+
+ xa_destroy(xa);
+
+ for (i = 0; i < 16; i++)
+ check_move_small(xa, 1UL << i);
+
+ for (i = 2; i < 16; i++)
+ check_move_small(xa, (1UL << i) - 1);
+}
+
+static noinline void xa_store_many_order(struct xarray *xa,
+ unsigned long index, unsigned order)
+{
+ XA_STATE_ORDER(xas, xa, index, order);
+ unsigned int i = 0;
+
+ do {
+ xas_lock(&xas);
+ XA_BUG_ON(xa, xas_find_conflict(&xas));
+ xas_create_range(&xas);
+ if (xas_error(&xas))
+ goto unlock;
+ for (i = 0; i < (1U << order); i++) {
+ XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(index + i)));
+ xas_next(&xas);
+ }
+unlock:
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ XA_BUG_ON(xa, xas_error(&xas));
+}
+
+static noinline void check_create_range_1(struct xarray *xa,
+ unsigned long index, unsigned order)
+{
+ unsigned long i;
+
+ xa_store_many_order(xa, index, order);
+ for (i = index; i < index + (1UL << order); i++)
+ xa_erase_index(xa, i);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range_2(struct xarray *xa, unsigned order)
+{
+ unsigned long i;
+ unsigned long nr = 1UL << order;
+
+ for (i = 0; i < nr * nr; i += nr)
+ xa_store_many_order(xa, i, order);
+ for (i = 0; i < nr * nr; i++)
+ xa_erase_index(xa, i);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range_3(void)
+{
+ XA_STATE(xas, NULL, 0);
+ xas_set_err(&xas, -EEXIST);
+ xas_create_range(&xas);
+ XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST);
+}
+
+static noinline void check_create_range_4(struct xarray *xa,
+ unsigned long index, unsigned order)
+{
+ XA_STATE_ORDER(xas, xa, index, order);
+ unsigned long base = xas.xa_index;
+ unsigned long i = 0;
+
+ xa_store_index(xa, index, GFP_KERNEL);
+ do {
+ xas_lock(&xas);
+ xas_create_range(&xas);
+ if (xas_error(&xas))
+ goto unlock;
+ for (i = 0; i < (1UL << order); i++) {
+ void *old = xas_store(&xas, xa_mk_value(base + i));
+ if (xas.xa_index == index)
+ XA_BUG_ON(xa, old != xa_mk_value(base + i));
+ else
+ XA_BUG_ON(xa, old != NULL);
+ xas_next(&xas);
+ }
+unlock:
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ XA_BUG_ON(xa, xas_error(&xas));
+
+ for (i = base; i < base + (1UL << order); i++)
+ xa_erase_index(xa, i);
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_create_range(struct xarray *xa)
+{
+ unsigned int order;
+ unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1;
+
+ for (order = 0; order < max_order; order++) {
+ check_create_range_1(xa, 0, order);
+ check_create_range_1(xa, 1U << order, order);
+ check_create_range_1(xa, 2U << order, order);
+ check_create_range_1(xa, 3U << order, order);
+ check_create_range_1(xa, 1U << 24, order);
+ if (order < 10)
+ check_create_range_2(xa, order);
+
+ check_create_range_4(xa, 0, order);
+ check_create_range_4(xa, 1U << order, order);
+ check_create_range_4(xa, 2U << order, order);
+ check_create_range_4(xa, 3U << order, order);
+ check_create_range_4(xa, 1U << 24, order);
+
+ check_create_range_4(xa, 1, order);
+ check_create_range_4(xa, (1U << order) + 1, order);
+ check_create_range_4(xa, (2U << order) + 1, order);
+ check_create_range_4(xa, (2U << order) - 1, order);
+ check_create_range_4(xa, (3U << order) + 1, order);
+ check_create_range_4(xa, (3U << order) - 1, order);
+ check_create_range_4(xa, (1U << 24) + 1, order);
+ }
+
+ check_create_range_3();
+}
+
+static noinline void __check_store_range(struct xarray *xa, unsigned long first,
+ unsigned long last)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ xa_store_range(xa, first, last, xa_mk_value(first), GFP_KERNEL);
+
+ XA_BUG_ON(xa, xa_load(xa, first) != xa_mk_value(first));
+ XA_BUG_ON(xa, xa_load(xa, last) != xa_mk_value(first));
+ XA_BUG_ON(xa, xa_load(xa, first - 1) != NULL);
+ XA_BUG_ON(xa, xa_load(xa, last + 1) != NULL);
+
+ xa_store_range(xa, first, last, NULL, GFP_KERNEL);
+#endif
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+static noinline void check_store_range(struct xarray *xa)
+{
+ unsigned long i, j;
+
+ for (i = 0; i < 128; i++) {
+ for (j = i; j < 128; j++) {
+ __check_store_range(xa, i, j);
+ __check_store_range(xa, 128 + i, 128 + j);
+ __check_store_range(xa, 4095 + i, 4095 + j);
+ __check_store_range(xa, 4096 + i, 4096 + j);
+ __check_store_range(xa, 123456 + i, 123456 + j);
+ __check_store_range(xa, UINT_MAX + i, UINT_MAX + j);
+ }
+ }
+}
+
+static LIST_HEAD(shadow_nodes);
+
+static void test_update_node(struct xa_node *node)
+{
+ if (node->count && node->count == node->nr_values) {
+ if (list_empty(&node->private_list))
+ list_add(&shadow_nodes, &node->private_list);
+ } else {
+ if (!list_empty(&node->private_list))
+ list_del_init(&node->private_list);
+ }
+}
+
+static noinline void shadow_remove(struct xarray *xa)
+{
+ struct xa_node *node;
+
+ xa_lock(xa);
+ while ((node = list_first_entry_or_null(&shadow_nodes,
+ struct xa_node, private_list))) {
+ XA_STATE(xas, node->array, 0);
+ XA_BUG_ON(xa, node->array != xa);
+ list_del_init(&node->private_list);
+ xas.xa_node = xa_parent_locked(node->array, node);
+ xas.xa_offset = node->offset;
+ xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
+ xas_set_update(&xas, test_update_node);
+ xas_store(&xas, NULL);
+ }
+ xa_unlock(xa);
+}
+
+static noinline void check_workingset(struct xarray *xa, unsigned long index)
+{
+ XA_STATE(xas, xa, index);
+ xas_set_update(&xas, test_update_node);
+
+ do {
+ xas_lock(&xas);
+ xas_store(&xas, xa_mk_value(0));
+ xas_next(&xas);
+ xas_store(&xas, xa_mk_value(1));
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, GFP_KERNEL));
+
+ XA_BUG_ON(xa, list_empty(&shadow_nodes));
+
+ xas_lock(&xas);
+ xas_next(&xas);
+ xas_store(&xas, &xas);
+ XA_BUG_ON(xa, !list_empty(&shadow_nodes));
+
+ xas_store(&xas, xa_mk_value(2));
+ xas_unlock(&xas);
+ XA_BUG_ON(xa, list_empty(&shadow_nodes));
+
+ shadow_remove(xa);
+ XA_BUG_ON(xa, !list_empty(&shadow_nodes));
+ XA_BUG_ON(xa, !xa_empty(xa));
+}
+
+/*
+ * Check that the pointer / value / sibling entries are accounted the
+ * way we expect them to be.
+ */
+static noinline void check_account(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+ unsigned int order;
+
+ for (order = 1; order < 12; order++) {
+ XA_STATE(xas, xa, 1 << order);
+
+ xa_store_order(xa, 0, order, xa, GFP_KERNEL);
+ xas_load(&xas);
+ XA_BUG_ON(xa, xas.xa_node->count == 0);
+ XA_BUG_ON(xa, xas.xa_node->count > (1 << order));
+ XA_BUG_ON(xa, xas.xa_node->nr_values != 0);
+
+ xa_store_order(xa, 1 << order, order, xa_mk_value(1 << order),
+ GFP_KERNEL);
+ XA_BUG_ON(xa, xas.xa_node->count != xas.xa_node->nr_values * 2);
+
+ xa_erase(xa, 1 << order);
+ XA_BUG_ON(xa, xas.xa_node->nr_values != 0);
+
+ xa_erase(xa, 0);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+#endif
+}
+
+static noinline void check_destroy(struct xarray *xa)
+{
+ unsigned long index;
+
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Destroying an empty array is a no-op */
+ xa_destroy(xa);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+ /* Destroying an array with a single entry */
+ for (index = 0; index < 1000; index++) {
+ xa_store_index(xa, index, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_empty(xa));
+ xa_destroy(xa);
+ XA_BUG_ON(xa, !xa_empty(xa));
+ }
+
+ /* Destroying an array with a single entry at ULONG_MAX */
+ xa_store(xa, ULONG_MAX, xa, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_empty(xa));
+ xa_destroy(xa);
+ XA_BUG_ON(xa, !xa_empty(xa));
+
+#ifdef CONFIG_XARRAY_MULTI
+ /* Destroying an array with a multi-index entry */
+ xa_store_order(xa, 1 << 11, 11, xa, GFP_KERNEL);
+ XA_BUG_ON(xa, xa_empty(xa));
+ xa_destroy(xa);
+ XA_BUG_ON(xa, !xa_empty(xa));
+#endif
+}
+
+static DEFINE_XARRAY(array);
+
+static int xarray_checks(void)
+{
+ check_xa_err(&array);
+ check_xas_retry(&array);
+ check_xa_load(&array);
+ check_xa_mark(&array);
+ check_xa_shrink(&array);
+ check_xas_erase(&array);
+ check_cmpxchg(&array);
+ check_reserve(&array);
+ check_multi_store(&array);
+ check_xa_alloc();
+ check_find(&array);
+ check_find_entry(&array);
+ check_account(&array);
+ check_destroy(&array);
+ check_move(&array);
+ check_create_range(&array);
+ check_store_range(&array);
+ check_store_iter(&array);
+
+ check_workingset(&array, 0);
+ check_workingset(&array, 64);
+ check_workingset(&array, 4096);
+
+ printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
+ return (tests_run == tests_passed) ? 0 : -EINVAL;
+}
+
+static void xarray_exit(void)
+{
+}
+
+module_init(xarray_checks);
+module_exit(xarray_exit);
+MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/udivmoddi4.c b/lib/udivmoddi4.c
new file mode 100644
index 000000000000..c08bc8a5f1cf
--- /dev/null
+++ b/lib/udivmoddi4.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
+ */
+
+#include <linux/libgcc.h>
+
+#define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz(X))
+
+#define W_TYPE_SIZE 32
+
+#define __ll_B ((unsigned long) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((unsigned long) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((unsigned long) (t) >> (W_TYPE_SIZE / 2))
+
+/* If we still don't have umul_ppmm, define it using plain C. */
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+ do { \
+ unsigned long __x0, __x1, __x2, __x3; \
+ unsigned short __ul, __vl, __uh, __vh; \
+ \
+ __ul = __ll_lowpart(u); \
+ __uh = __ll_highpart(u); \
+ __vl = __ll_lowpart(v); \
+ __vh = __ll_highpart(v); \
+ \
+ __x0 = (unsigned long) __ul * __vl; \
+ __x1 = (unsigned long) __ul * __vh; \
+ __x2 = (unsigned long) __uh * __vl; \
+ __x3 = (unsigned long) __uh * __vh; \
+ \
+ __x1 += __ll_highpart(__x0); \
+ __x1 += __x2; \
+ if (__x1 < __x2) \
+ __x3 += __ll_B; \
+ \
+ (w1) = __x3 + __ll_highpart(__x1); \
+ (w0) = __ll_lowpart(__x1) * __ll_B + __ll_lowpart(__x0);\
+ } while (0)
+#endif
+
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+ do { \
+ unsigned long __x; \
+ __x = (al) - (bl); \
+ (sh) = (ah) - (bh) - (__x > (al)); \
+ (sl) = __x; \
+ } while (0)
+#endif
+
+/* Define this unconditionally, so it can be used for debugging. */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+ do { \
+ unsigned long __d1, __d0, __q1, __q0; \
+ unsigned long __r1, __r0, __m; \
+ __d1 = __ll_highpart(d); \
+ __d0 = __ll_lowpart(d); \
+ \
+ __r1 = (n1) % __d1; \
+ __q1 = (n1) / __d1; \
+ __m = (unsigned long) __q1 * __d0; \
+ __r1 = __r1 * __ll_B | __ll_highpart(n0); \
+ if (__r1 < __m) { \
+ __q1--, __r1 += (d); \
+ if (__r1 >= (d)) \
+ if (__r1 < __m) \
+ __q1--, __r1 += (d); \
+ } \
+ __r1 -= __m; \
+ \
+ __r0 = __r1 % __d1; \
+ __q0 = __r1 / __d1; \
+ __m = (unsigned long) __q0 * __d0; \
+ __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+ if (__r0 < __m) { \
+ __q0--, __r0 += (d); \
+ if (__r0 >= (d)) \
+ if (__r0 < __m) \
+ __q0--, __r0 += (d); \
+ } \
+ __r0 -= __m; \
+ \
+ (q) = (unsigned long) __q1 * __ll_B | __q0; \
+ (r) = __r0; \
+ } while (0)
+
+/* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+
+unsigned long long __udivmoddi4(unsigned long long u, unsigned long long v,
+ unsigned long long *rp)
+{
+ const DWunion nn = {.ll = u };
+ const DWunion dd = {.ll = v };
+ DWunion rr, ww;
+ unsigned long d0, d1, n0, n1, n2;
+ unsigned long q0 = 0, q1 = 0;
+ unsigned long b, bm;
+
+ d0 = dd.s.low;
+ d1 = dd.s.high;
+ n0 = nn.s.low;
+ n1 = nn.s.high;
+
+#if !UDIV_NEEDS_NORMALIZATION
+
+ if (d1 == 0) {
+ if (d0 > n1) {
+ /* 0q = nn / 0D */
+
+ udiv_qrnnd(q0, n0, n1, n0, d0);
+ q1 = 0;
+
+ /* Remainder in n0. */
+ } else {
+ /* qq = NN / 0d */
+
+ if (d0 == 0)
+ /* Divide intentionally by zero. */
+ d0 = 1 / d0;
+
+ udiv_qrnnd(q1, n1, 0, n1, d0);
+ udiv_qrnnd(q0, n0, n1, n0, d0);
+
+ /* Remainder in n0. */
+ }
+
+ if (rp != 0) {
+ rr.s.low = n0;
+ rr.s.high = 0;
+ *rp = rr.ll;
+ }
+
+#else /* UDIV_NEEDS_NORMALIZATION */
+
+ if (d1 == 0) {
+ if (d0 > n1) {
+ /* 0q = nn / 0D */
+
+ count_leading_zeros(bm, d0);
+
+ if (bm != 0) {
+ /*
+ * Normalize, i.e. make the most significant bit
+ * of the denominator set.
+ */
+
+ d0 = d0 << bm;
+ n1 = (n1 << bm) | (n0 >> (W_TYPE_SIZE - bm));
+ n0 = n0 << bm;
+ }
+
+ udiv_qrnnd(q0, n0, n1, n0, d0);
+ q1 = 0;
+
+ /* Remainder in n0 >> bm. */
+ } else {
+ /* qq = NN / 0d */
+
+ if (d0 == 0)
+ /* Divide intentionally by zero. */
+ d0 = 1 / d0;
+
+ count_leading_zeros(bm, d0);
+
+ if (bm == 0) {
+ /*
+ * From (n1 >= d0) /\ (the most significant bit
+ * of d0 is set), conclude (the most significant
+ * bit of n1 is set) /\ (theleading quotient
+ * digit q1 = 1).
+ *
+ * This special case is necessary, not an
+ * optimization. (Shifts counts of W_TYPE_SIZE
+ * are undefined.)
+ */
+
+ n1 -= d0;
+ q1 = 1;
+ } else {
+ /* Normalize. */
+
+ b = W_TYPE_SIZE - bm;
+
+ d0 = d0 << bm;
+ n2 = n1 >> b;
+ n1 = (n1 << bm) | (n0 >> b);
+ n0 = n0 << bm;
+
+ udiv_qrnnd(q1, n1, n2, n1, d0);
+ }
+
+ /* n1 != d0... */
+
+ udiv_qrnnd(q0, n0, n1, n0, d0);
+
+ /* Remainder in n0 >> bm. */
+ }
+
+ if (rp != 0) {
+ rr.s.low = n0 >> bm;
+ rr.s.high = 0;
+ *rp = rr.ll;
+ }
+
+#endif /* UDIV_NEEDS_NORMALIZATION */
+
+ } else {
+ if (d1 > n1) {
+ /* 00 = nn / DD */
+
+ q0 = 0;
+ q1 = 0;
+
+ /* Remainder in n1n0. */
+ if (rp != 0) {
+ rr.s.low = n0;
+ rr.s.high = n1;
+ *rp = rr.ll;
+ }
+ } else {
+ /* 0q = NN / dd */
+
+ count_leading_zeros(bm, d1);
+ if (bm == 0) {
+ /*
+ * From (n1 >= d1) /\ (the most significant bit
+ * of d1 is set), conclude (the most significant
+ * bit of n1 is set) /\ (the quotient digit q0 =
+ * 0 or 1).
+ *
+ * This special case is necessary, not an
+ * optimization.
+ */
+
+ /*
+ * The condition on the next line takes
+ * advantage of that n1 >= d1 (true due to
+ * program flow).
+ */
+ if (n1 > d1 || n0 >= d0) {
+ q0 = 1;
+ sub_ddmmss(n1, n0, n1, n0, d1, d0);
+ } else {
+ q0 = 0;
+ }
+
+ q1 = 0;
+
+ if (rp != 0) {
+ rr.s.low = n0;
+ rr.s.high = n1;
+ *rp = rr.ll;
+ }
+ } else {
+ unsigned long m1, m0;
+ /* Normalize. */
+
+ b = W_TYPE_SIZE - bm;
+
+ d1 = (d1 << bm) | (d0 >> b);
+ d0 = d0 << bm;
+ n2 = n1 >> b;
+ n1 = (n1 << bm) | (n0 >> b);
+ n0 = n0 << bm;
+
+ udiv_qrnnd(q0, n1, n2, n1, d1);
+ umul_ppmm(m1, m0, q0, d0);
+
+ if (m1 > n1 || (m1 == n1 && m0 > n0)) {
+ q0--;
+ sub_ddmmss(m1, m0, m1, m0, d1, d0);
+ }
+
+ q1 = 0;
+
+ /* Remainder in (n1n0 - m1m0) >> bm. */
+ if (rp != 0) {
+ sub_ddmmss(n1, n0, n1, n0, m1, m0);
+ rr.s.low = (n1 << b) | (n0 >> bm);
+ rr.s.high = n1 >> bm;
+ *rp = rr.ll;
+ }
+ }
+ }
+ }
+
+ ww.s.low = q0;
+ ww.s.high = q1;
+
+ return ww.ll;
+}
diff --git a/lib/umoddi3.c b/lib/umoddi3.c
new file mode 100644
index 000000000000..d7bbf0f85197
--- /dev/null
+++ b/lib/umoddi3.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see the file COPYING, or write
+ * to the Free Software Foundation, Inc.
+ */
+
+#include <linux/module.h>
+#include <linux/libgcc.h>
+
+extern unsigned long long __udivmoddi4(unsigned long long u,
+ unsigned long long v,
+ unsigned long long *rp);
+
+unsigned long long __umoddi3(unsigned long long u, unsigned long long v)
+{
+ unsigned long long w;
+ (void)__udivmoddi4(u, v, &w);
+ return w;
+}
+EXPORT_SYMBOL(__umoddi3);
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index d5b3a3f95c01..37a54a6dd594 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -613,6 +613,109 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec)
}
static noinline_for_stack
+char *pointer_string(char *buf, char *end, const void *ptr,
+ struct printf_spec spec)
+{
+ spec.base = 16;
+ spec.flags |= SMALL;
+ if (spec.field_width == -1) {
+ spec.field_width = 2 * sizeof(ptr);
+ spec.flags |= ZEROPAD;
+ }
+
+ return number(buf, end, (unsigned long int)ptr, spec);
+}
+
+/* Make pointers available for printing early in the boot sequence. */
+static int debug_boot_weak_hash __ro_after_init;
+
+static int __init debug_boot_weak_hash_enable(char *str)
+{
+ debug_boot_weak_hash = 1;
+ pr_info("debug_boot_weak_hash enabled\n");
+ return 0;
+}
+early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable);
+
+static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
+static siphash_key_t ptr_key __read_mostly;
+
+static void enable_ptr_key_workfn(struct work_struct *work)
+{
+ get_random_bytes(&ptr_key, sizeof(ptr_key));
+ /* Needs to run from preemptible context */
+ static_branch_disable(&not_filled_random_ptr_key);
+}
+
+static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
+
+static void fill_random_ptr_key(struct random_ready_callback *unused)
+{
+ /* This may be in an interrupt handler. */
+ queue_work(system_unbound_wq, &enable_ptr_key_work);
+}
+
+static struct random_ready_callback random_ready = {
+ .func = fill_random_ptr_key
+};
+
+static int __init initialize_ptr_random(void)
+{
+ int key_size = sizeof(ptr_key);
+ int ret;
+
+ /* Use hw RNG if available. */
+ if (get_random_bytes_arch(&ptr_key, key_size) == key_size) {
+ static_branch_disable(&not_filled_random_ptr_key);
+ return 0;
+ }
+
+ ret = add_random_ready_callback(&random_ready);
+ if (!ret) {
+ return 0;
+ } else if (ret == -EALREADY) {
+ /* This is in preemptible context */
+ enable_ptr_key_workfn(&enable_ptr_key_work);
+ return 0;
+ }
+
+ return ret;
+}
+early_initcall(initialize_ptr_random);
+
+/* Maps a pointer to a 32 bit unique identifier. */
+static char *ptr_to_id(char *buf, char *end, const void *ptr,
+ struct printf_spec spec)
+{
+ const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
+ unsigned long hashval;
+
+ /* When debugging early boot use non-cryptographically secure hash. */
+ if (unlikely(debug_boot_weak_hash)) {
+ hashval = hash_long((unsigned long)ptr, 32);
+ return pointer_string(buf, end, (const void *)hashval, spec);
+ }
+
+ if (static_branch_unlikely(&not_filled_random_ptr_key)) {
+ spec.field_width = 2 * sizeof(ptr);
+ /* string length must be less than default_width */
+ return string(buf, end, str, spec);
+ }
+
+#ifdef CONFIG_64BIT
+ hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
+ /*
+ * Mask off the first 32 bits, this makes explicit that we have
+ * modified the address (and 32 bits is plenty for a unique ID).
+ */
+ hashval = hashval & 0xffffffff;
+#else
+ hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
+#endif
+ return pointer_string(buf, end, (const void *)hashval, spec);
+}
+
+static noinline_for_stack
char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec,
const char *fmt)
{
@@ -1357,20 +1460,6 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
return string(buf, end, uuid, spec);
}
-static noinline_for_stack
-char *pointer_string(char *buf, char *end, const void *ptr,
- struct printf_spec spec)
-{
- spec.base = 16;
- spec.flags |= SMALL;
- if (spec.field_width == -1) {
- spec.field_width = 2 * sizeof(ptr);
- spec.flags |= ZEROPAD;
- }
-
- return number(buf, end, (unsigned long int)ptr, spec);
-}
-
int kptr_restrict __read_mostly;
static noinline_for_stack
@@ -1421,7 +1510,8 @@ char *restricted_pointer(char *buf, char *end, const void *ptr,
}
static noinline_for_stack
-char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt)
+char *netdev_bits(char *buf, char *end, const void *addr,
+ struct printf_spec spec, const char *fmt)
{
unsigned long long num;
int size;
@@ -1432,9 +1522,7 @@ char *netdev_bits(char *buf, char *end, const void *addr, const char *fmt)
size = sizeof(netdev_features_t);
break;
default:
- num = (unsigned long)addr;
- size = sizeof(unsigned long);
- break;
+ return ptr_to_id(buf, end, addr, spec);
}
return special_hex_number(buf, end, num, size);
@@ -1474,7 +1562,7 @@ char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec,
#ifdef CONFIG_COMMON_CLK
return string(buf, end, __clk_get_name(clk), spec);
#else
- return special_hex_number(buf, end, (unsigned long)clk, sizeof(unsigned long));
+ return ptr_to_id(buf, end, clk, spec);
#endif
}
}
@@ -1596,6 +1684,7 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
fmt = "f";
for (pass = false; strspn(fmt,"fnpPFcC"); fmt++, pass = true) {
+ int precision;
if (pass) {
if (buf < end)
*buf = ':';
@@ -1607,7 +1696,11 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
buf = device_node_gen_full_name(dn, buf, end);
break;
case 'n': /* name */
- buf = string(buf, end, dn->name, str_spec);
+ p = kbasename(of_node_full_name(dn));
+ precision = str_spec.precision;
+ str_spec.precision = strchrnul(p, '@') - p;
+ buf = string(buf, end, p, str_spec);
+ str_spec.precision = precision;
break;
case 'p': /* phandle */
buf = number(buf, end, (unsigned int)dn->phandle, num_spec);
@@ -1651,94 +1744,6 @@ char *device_node_string(char *buf, char *end, struct device_node *dn,
return widen_string(buf, buf - buf_start, end, spec);
}
-/* Make pointers available for printing early in the boot sequence. */
-static int debug_boot_weak_hash __ro_after_init;
-
-static int __init debug_boot_weak_hash_enable(char *str)
-{
- debug_boot_weak_hash = 1;
- pr_info("debug_boot_weak_hash enabled\n");
- return 0;
-}
-early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable);
-
-static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key);
-static siphash_key_t ptr_key __read_mostly;
-
-static void enable_ptr_key_workfn(struct work_struct *work)
-{
- get_random_bytes(&ptr_key, sizeof(ptr_key));
- /* Needs to run from preemptible context */
- static_branch_disable(&not_filled_random_ptr_key);
-}
-
-static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
-
-static void fill_random_ptr_key(struct random_ready_callback *unused)
-{
- /* This may be in an interrupt handler. */
- queue_work(system_unbound_wq, &enable_ptr_key_work);
-}
-
-static struct random_ready_callback random_ready = {
- .func = fill_random_ptr_key
-};
-
-static int __init initialize_ptr_random(void)
-{
- int key_size = sizeof(ptr_key);
- int ret;
-
- /* Use hw RNG if available. */
- if (get_random_bytes_arch(&ptr_key, key_size) == key_size) {
- static_branch_disable(&not_filled_random_ptr_key);
- return 0;
- }
-
- ret = add_random_ready_callback(&random_ready);
- if (!ret) {
- return 0;
- } else if (ret == -EALREADY) {
- /* This is in preemptible context */
- enable_ptr_key_workfn(&enable_ptr_key_work);
- return 0;
- }
-
- return ret;
-}
-early_initcall(initialize_ptr_random);
-
-/* Maps a pointer to a 32 bit unique identifier. */
-static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
-{
- const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)";
- unsigned long hashval;
-
- /* When debugging early boot use non-cryptographically secure hash. */
- if (unlikely(debug_boot_weak_hash)) {
- hashval = hash_long((unsigned long)ptr, 32);
- return pointer_string(buf, end, (const void *)hashval, spec);
- }
-
- if (static_branch_unlikely(&not_filled_random_ptr_key)) {
- spec.field_width = 2 * sizeof(ptr);
- /* string length must be less than default_width */
- return string(buf, end, str, spec);
- }
-
-#ifdef CONFIG_64BIT
- hashval = (unsigned long)siphash_1u64((u64)ptr, &ptr_key);
- /*
- * Mask off the first 32 bits, this makes explicit that we have
- * modified the address (and 32 bits is plenty for a unique ID).
- */
- hashval = hashval & 0xffffffff;
-#else
- hashval = (unsigned long)siphash_1u32((u32)ptr, &ptr_key);
-#endif
- return pointer_string(buf, end, (const void *)hashval, spec);
-}
-
/*
* Show a '%p' thing. A kernel extension is that the '%p' is followed
* by an extra set of alphanumeric characters that are extended format
@@ -1833,17 +1838,15 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec)
* p page flags (see struct page) given as pointer to unsigned long
* g gfp flags (GFP_* and __GFP_*) given as pointer to gfp_t
* v vma flags (VM_*) given as pointer to unsigned long
- * - 'O' For a kobject based struct. Must be one of the following:
- * - 'OF[fnpPcCF]' For a device tree object
- * Without any optional arguments prints the full_name
- * f device node full_name
- * n device node name
- * p device node phandle
- * P device node path spec (name + @unit)
- * F device node flags
- * c major compatible string
- * C full compatible string
- *
+ * - 'OF[fnpPcCF]' For a device tree object
+ * Without any optional arguments prints the full_name
+ * f device node full_name
+ * n device node name
+ * p device node phandle
+ * P device node path spec (name + @unit)
+ * F device node flags
+ * c major compatible string
+ * C full compatible string
* - 'x' For printing the address. Equivalent to "%lx".
*
* ** When making changes please also update:
@@ -1944,7 +1947,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
break;
return restricted_pointer(buf, end, ptr, spec);
case 'N':
- return netdev_bits(buf, end, ptr, fmt);
+ return netdev_bits(buf, end, ptr, spec, fmt);
case 'a':
return address_val(buf, end, ptr, fmt);
case 'd':
@@ -2794,7 +2797,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf)
copy = end - str;
memcpy(str, args, copy);
str += len;
- args += len;
+ args += len + 1;
}
}
if (process)
diff --git a/lib/xarray.c b/lib/xarray.c
new file mode 100644
index 000000000000..8b176f009c08
--- /dev/null
+++ b/lib/xarray.c
@@ -0,0 +1,2036 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * XArray implementation
+ * Copyright (c) 2017 Microsoft Corporation
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/xarray.h>
+
+/*
+ * Coding conventions in this file:
+ *
+ * @xa is used to refer to the entire xarray.
+ * @xas is the 'xarray operation state'. It may be either a pointer to
+ * an xa_state, or an xa_state stored on the stack. This is an unfortunate
+ * ambiguity.
+ * @index is the index of the entry being operated on
+ * @mark is an xa_mark_t; a small number indicating one of the mark bits.
+ * @node refers to an xa_node; usually the primary one being operated on by
+ * this function.
+ * @offset is the index into the slots array inside an xa_node.
+ * @parent refers to the @xa_node closer to the head than @node.
+ * @entry refers to something stored in a slot in the xarray
+ */
+
+static inline unsigned int xa_lock_type(const struct xarray *xa)
+{
+ return (__force unsigned int)xa->xa_flags & 3;
+}
+
+static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type)
+{
+ if (lock_type == XA_LOCK_IRQ)
+ xas_lock_irq(xas);
+ else if (lock_type == XA_LOCK_BH)
+ xas_lock_bh(xas);
+ else
+ xas_lock(xas);
+}
+
+static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
+{
+ if (lock_type == XA_LOCK_IRQ)
+ xas_unlock_irq(xas);
+ else if (lock_type == XA_LOCK_BH)
+ xas_unlock_bh(xas);
+ else
+ xas_unlock(xas);
+}
+
+static inline bool xa_track_free(const struct xarray *xa)
+{
+ return xa->xa_flags & XA_FLAGS_TRACK_FREE;
+}
+
+static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
+{
+ if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
+ xa->xa_flags |= XA_FLAGS_MARK(mark);
+}
+
+static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark)
+{
+ if (xa->xa_flags & XA_FLAGS_MARK(mark))
+ xa->xa_flags &= ~(XA_FLAGS_MARK(mark));
+}
+
+static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark)
+{
+ return node->marks[(__force unsigned)mark];
+}
+
+static inline bool node_get_mark(struct xa_node *node,
+ unsigned int offset, xa_mark_t mark)
+{
+ return test_bit(offset, node_marks(node, mark));
+}
+
+/* returns true if the bit was set */
+static inline bool node_set_mark(struct xa_node *node, unsigned int offset,
+ xa_mark_t mark)
+{
+ return __test_and_set_bit(offset, node_marks(node, mark));
+}
+
+/* returns true if the bit was set */
+static inline bool node_clear_mark(struct xa_node *node, unsigned int offset,
+ xa_mark_t mark)
+{
+ return __test_and_clear_bit(offset, node_marks(node, mark));
+}
+
+static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
+{
+ return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
+}
+
+static inline void node_mark_all(struct xa_node *node, xa_mark_t mark)
+{
+ bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE);
+}
+
+#define mark_inc(mark) do { \
+ mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
+} while (0)
+
+/*
+ * xas_squash_marks() - Merge all marks to the first entry
+ * @xas: Array operation state.
+ *
+ * Set a mark on the first entry if any entry has it set. Clear marks on
+ * all sibling entries.
+ */
+static void xas_squash_marks(const struct xa_state *xas)
+{
+ unsigned int mark = 0;
+ unsigned int limit = xas->xa_offset + xas->xa_sibs + 1;
+
+ if (!xas->xa_sibs)
+ return;
+
+ do {
+ unsigned long *marks = xas->xa_node->marks[mark];
+ if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit)
+ continue;
+ __set_bit(xas->xa_offset, marks);
+ bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
+ } while (mark++ != (__force unsigned)XA_MARK_MAX);
+}
+
+/* extracts the offset within this node from the index */
+static unsigned int get_offset(unsigned long index, struct xa_node *node)
+{
+ return (index >> node->shift) & XA_CHUNK_MASK;
+}
+
+static void xas_set_offset(struct xa_state *xas)
+{
+ xas->xa_offset = get_offset(xas->xa_index, xas->xa_node);
+}
+
+/* move the index either forwards (find) or backwards (sibling slot) */
+static void xas_move_index(struct xa_state *xas, unsigned long offset)
+{
+ unsigned int shift = xas->xa_node->shift;
+ xas->xa_index &= ~XA_CHUNK_MASK << shift;
+ xas->xa_index += offset << shift;
+}
+
+static void xas_advance(struct xa_state *xas)
+{
+ xas->xa_offset++;
+ xas_move_index(xas, xas->xa_offset);
+}
+
+static void *set_bounds(struct xa_state *xas)
+{
+ xas->xa_node = XAS_BOUNDS;
+ return NULL;
+}
+
+/*
+ * Starts a walk. If the @xas is already valid, we assume that it's on
+ * the right path and just return where we've got to. If we're in an
+ * error state, return NULL. If the index is outside the current scope
+ * of the xarray, return NULL without changing @xas->xa_node. Otherwise
+ * set @xas->xa_node to NULL and return the current head of the array.
+ */
+static void *xas_start(struct xa_state *xas)
+{
+ void *entry;
+
+ if (xas_valid(xas))
+ return xas_reload(xas);
+ if (xas_error(xas))
+ return NULL;
+
+ entry = xa_head(xas->xa);
+ if (!xa_is_node(entry)) {
+ if (xas->xa_index)
+ return set_bounds(xas);
+ } else {
+ if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK)
+ return set_bounds(xas);
+ }
+
+ xas->xa_node = NULL;
+ return entry;
+}
+
+static void *xas_descend(struct xa_state *xas, struct xa_node *node)
+{
+ unsigned int offset = get_offset(xas->xa_index, node);
+ void *entry = xa_entry(xas->xa, node, offset);
+
+ xas->xa_node = node;
+ if (xa_is_sibling(entry)) {
+ offset = xa_to_sibling(entry);
+ entry = xa_entry(xas->xa, node, offset);
+ }
+
+ xas->xa_offset = offset;
+ return entry;
+}
+
+/**
+ * xas_load() - Load an entry from the XArray (advanced).
+ * @xas: XArray operation state.
+ *
+ * Usually walks the @xas to the appropriate state to load the entry
+ * stored at xa_index. However, it will do nothing and return %NULL if
+ * @xas is in an error state. xas_load() will never expand the tree.
+ *
+ * If the xa_state is set up to operate on a multi-index entry, xas_load()
+ * may return %NULL or an internal entry, even if there are entries
+ * present within the range specified by @xas.
+ *
+ * Context: Any context. The caller should hold the xa_lock or the RCU lock.
+ * Return: Usually an entry in the XArray, but see description for exceptions.
+ */
+void *xas_load(struct xa_state *xas)
+{
+ void *entry = xas_start(xas);
+
+ while (xa_is_node(entry)) {
+ struct xa_node *node = xa_to_node(entry);
+
+ if (xas->xa_shift > node->shift)
+ break;
+ entry = xas_descend(xas, node);
+ }
+ return entry;
+}
+EXPORT_SYMBOL_GPL(xas_load);
+
+/* Move the radix tree node cache here */
+extern struct kmem_cache *radix_tree_node_cachep;
+extern void radix_tree_node_rcu_free(struct rcu_head *head);
+
+#define XA_RCU_FREE ((struct xarray *)1)
+
+static void xa_node_free(struct xa_node *node)
+{
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+ node->array = XA_RCU_FREE;
+ call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
+}
+
+/*
+ * xas_destroy() - Free any resources allocated during the XArray operation.
+ * @xas: XArray operation state.
+ *
+ * This function is now internal-only.
+ */
+static void xas_destroy(struct xa_state *xas)
+{
+ struct xa_node *node = xas->xa_alloc;
+
+ if (!node)
+ return;
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+ kmem_cache_free(radix_tree_node_cachep, node);
+ xas->xa_alloc = NULL;
+}
+
+/**
+ * xas_nomem() - Allocate memory if needed.
+ * @xas: XArray operation state.
+ * @gfp: Memory allocation flags.
+ *
+ * If we need to add new nodes to the XArray, we try to allocate memory
+ * with GFP_NOWAIT while holding the lock, which will usually succeed.
+ * If it fails, @xas is flagged as needing memory to continue. The caller
+ * should drop the lock and call xas_nomem(). If xas_nomem() succeeds,
+ * the caller should retry the operation.
+ *
+ * Forward progress is guaranteed as one node is allocated here and
+ * stored in the xa_state where it will be found by xas_alloc(). More
+ * nodes will likely be found in the slab allocator, but we do not tie
+ * them up here.
+ *
+ * Return: true if memory was needed, and was successfully allocated.
+ */
+bool xas_nomem(struct xa_state *xas, gfp_t gfp)
+{
+ if (xas->xa_node != XA_ERROR(-ENOMEM)) {
+ xas_destroy(xas);
+ return false;
+ }
+ xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ if (!xas->xa_alloc)
+ return false;
+ XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+ xas->xa_node = XAS_RESTART;
+ return true;
+}
+EXPORT_SYMBOL_GPL(xas_nomem);
+
+/*
+ * __xas_nomem() - Drop locks and allocate memory if needed.
+ * @xas: XArray operation state.
+ * @gfp: Memory allocation flags.
+ *
+ * Internal variant of xas_nomem().
+ *
+ * Return: true if memory was needed, and was successfully allocated.
+ */
+static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
+ __must_hold(xas->xa->xa_lock)
+{
+ unsigned int lock_type = xa_lock_type(xas->xa);
+
+ if (xas->xa_node != XA_ERROR(-ENOMEM)) {
+ xas_destroy(xas);
+ return false;
+ }
+ if (gfpflags_allow_blocking(gfp)) {
+ xas_unlock_type(xas, lock_type);
+ xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ xas_lock_type(xas, lock_type);
+ } else {
+ xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+ }
+ if (!xas->xa_alloc)
+ return false;
+ XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
+ xas->xa_node = XAS_RESTART;
+ return true;
+}
+
+static void xas_update(struct xa_state *xas, struct xa_node *node)
+{
+ if (xas->xa_update)
+ xas->xa_update(node);
+ else
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+}
+
+static void *xas_alloc(struct xa_state *xas, unsigned int shift)
+{
+ struct xa_node *parent = xas->xa_node;
+ struct xa_node *node = xas->xa_alloc;
+
+ if (xas_invalid(xas))
+ return NULL;
+
+ if (node) {
+ xas->xa_alloc = NULL;
+ } else {
+ node = kmem_cache_alloc(radix_tree_node_cachep,
+ GFP_NOWAIT | __GFP_NOWARN);
+ if (!node) {
+ xas_set_err(xas, -ENOMEM);
+ return NULL;
+ }
+ }
+
+ if (parent) {
+ node->offset = xas->xa_offset;
+ parent->count++;
+ XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE);
+ xas_update(xas, parent);
+ }
+ XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
+ XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+ node->shift = shift;
+ node->count = 0;
+ node->nr_values = 0;
+ RCU_INIT_POINTER(node->parent, xas->xa_node);
+ node->array = xas->xa;
+
+ return node;
+}
+
+#ifdef CONFIG_XARRAY_MULTI
+/* Returns the number of indices covered by a given xa_state */
+static unsigned long xas_size(const struct xa_state *xas)
+{
+ return (xas->xa_sibs + 1UL) << xas->xa_shift;
+}
+#endif
+
+/*
+ * Use this to calculate the maximum index that will need to be created
+ * in order to add the entry described by @xas. Because we cannot store a
+ * multiple-index entry at index 0, the calculation is a little more complex
+ * than you might expect.
+ */
+static unsigned long xas_max(struct xa_state *xas)
+{
+ unsigned long max = xas->xa_index;
+
+#ifdef CONFIG_XARRAY_MULTI
+ if (xas->xa_shift || xas->xa_sibs) {
+ unsigned long mask = xas_size(xas) - 1;
+ max |= mask;
+ if (mask == max)
+ max++;
+ }
+#endif
+
+ return max;
+}
+
+/* The maximum index that can be contained in the array without expanding it */
+static unsigned long max_index(void *entry)
+{
+ if (!xa_is_node(entry))
+ return 0;
+ return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1;
+}
+
+static void xas_shrink(struct xa_state *xas)
+{
+ struct xarray *xa = xas->xa;
+ struct xa_node *node = xas->xa_node;
+
+ for (;;) {
+ void *entry;
+
+ XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+ if (node->count != 1)
+ break;
+ entry = xa_entry_locked(xa, node, 0);
+ if (!entry)
+ break;
+ if (!xa_is_node(entry) && node->shift)
+ break;
+ xas->xa_node = XAS_BOUNDS;
+
+ RCU_INIT_POINTER(xa->xa_head, entry);
+ if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK))
+ xa_mark_clear(xa, XA_FREE_MARK);
+
+ node->count = 0;
+ node->nr_values = 0;
+ if (!xa_is_node(entry))
+ RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY);
+ xas_update(xas, node);
+ xa_node_free(node);
+ if (!xa_is_node(entry))
+ break;
+ node = xa_to_node(entry);
+ node->parent = NULL;
+ }
+}
+
+/*
+ * xas_delete_node() - Attempt to delete an xa_node
+ * @xas: Array operation state.
+ *
+ * Attempts to delete the @xas->xa_node. This will fail if xa->node has
+ * a non-zero reference count.
+ */
+static void xas_delete_node(struct xa_state *xas)
+{
+ struct xa_node *node = xas->xa_node;
+
+ for (;;) {
+ struct xa_node *parent;
+
+ XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+ if (node->count)
+ break;
+
+ parent = xa_parent_locked(xas->xa, node);
+ xas->xa_node = parent;
+ xas->xa_offset = node->offset;
+ xa_node_free(node);
+
+ if (!parent) {
+ xas->xa->xa_head = NULL;
+ xas->xa_node = XAS_BOUNDS;
+ return;
+ }
+
+ parent->slots[xas->xa_offset] = NULL;
+ parent->count--;
+ XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE);
+ node = parent;
+ xas_update(xas, node);
+ }
+
+ if (!node->parent)
+ xas_shrink(xas);
+}
+
+/**
+ * xas_free_nodes() - Free this node and all nodes that it references
+ * @xas: Array operation state.
+ * @top: Node to free
+ *
+ * This node has been removed from the tree. We must now free it and all
+ * of its subnodes. There may be RCU walkers with references into the tree,
+ * so we must replace all entries with retry markers.
+ */
+static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
+{
+ unsigned int offset = 0;
+ struct xa_node *node = top;
+
+ for (;;) {
+ void *entry = xa_entry_locked(xas->xa, node, offset);
+
+ if (xa_is_node(entry)) {
+ node = xa_to_node(entry);
+ offset = 0;
+ continue;
+ }
+ if (entry)
+ RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY);
+ offset++;
+ while (offset == XA_CHUNK_SIZE) {
+ struct xa_node *parent;
+
+ parent = xa_parent_locked(xas->xa, node);
+ offset = node->offset + 1;
+ node->count = 0;
+ node->nr_values = 0;
+ xas_update(xas, node);
+ xa_node_free(node);
+ if (node == top)
+ return;
+ node = parent;
+ }
+ }
+}
+
+/*
+ * xas_expand adds nodes to the head of the tree until it has reached
+ * sufficient height to be able to contain @xas->xa_index
+ */
+static int xas_expand(struct xa_state *xas, void *head)
+{
+ struct xarray *xa = xas->xa;
+ struct xa_node *node = NULL;
+ unsigned int shift = 0;
+ unsigned long max = xas_max(xas);
+
+ if (!head) {
+ if (max == 0)
+ return 0;
+ while ((max >> shift) >= XA_CHUNK_SIZE)
+ shift += XA_CHUNK_SHIFT;
+ return shift + XA_CHUNK_SHIFT;
+ } else if (xa_is_node(head)) {
+ node = xa_to_node(head);
+ shift = node->shift + XA_CHUNK_SHIFT;
+ }
+ xas->xa_node = NULL;
+
+ while (max > max_index(head)) {
+ xa_mark_t mark = 0;
+
+ XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
+ node = xas_alloc(xas, shift);
+ if (!node)
+ return -ENOMEM;
+
+ node->count = 1;
+ if (xa_is_value(head))
+ node->nr_values = 1;
+ RCU_INIT_POINTER(node->slots[0], head);
+
+ /* Propagate the aggregated mark info to the new child */
+ for (;;) {
+ if (xa_track_free(xa) && mark == XA_FREE_MARK) {
+ node_mark_all(node, XA_FREE_MARK);
+ if (!xa_marked(xa, XA_FREE_MARK)) {
+ node_clear_mark(node, 0, XA_FREE_MARK);
+ xa_mark_set(xa, XA_FREE_MARK);
+ }
+ } else if (xa_marked(xa, mark)) {
+ node_set_mark(node, 0, mark);
+ }
+ if (mark == XA_MARK_MAX)
+ break;
+ mark_inc(mark);
+ }
+
+ /*
+ * Now that the new node is fully initialised, we can add
+ * it to the tree
+ */
+ if (xa_is_node(head)) {
+ xa_to_node(head)->offset = 0;
+ rcu_assign_pointer(xa_to_node(head)->parent, node);
+ }
+ head = xa_mk_node(node);
+ rcu_assign_pointer(xa->xa_head, head);
+ xas_update(xas, node);
+
+ shift += XA_CHUNK_SHIFT;
+ }
+
+ xas->xa_node = node;
+ return shift;
+}
+
+/*
+ * xas_create() - Create a slot to store an entry in.
+ * @xas: XArray operation state.
+ *
+ * Most users will not need to call this function directly, as it is called
+ * by xas_store(). It is useful for doing conditional store operations
+ * (see the xa_cmpxchg() implementation for an example).
+ *
+ * Return: If the slot already existed, returns the contents of this slot.
+ * If the slot was newly created, returns NULL. If it failed to create the
+ * slot, returns NULL and indicates the error in @xas.
+ */
+static void *xas_create(struct xa_state *xas)
+{
+ struct xarray *xa = xas->xa;
+ void *entry;
+ void __rcu **slot;
+ struct xa_node *node = xas->xa_node;
+ int shift;
+ unsigned int order = xas->xa_shift;
+
+ if (xas_top(node)) {
+ entry = xa_head_locked(xa);
+ xas->xa_node = NULL;
+ shift = xas_expand(xas, entry);
+ if (shift < 0)
+ return NULL;
+ entry = xa_head_locked(xa);
+ slot = &xa->xa_head;
+ } else if (xas_error(xas)) {
+ return NULL;
+ } else if (node) {
+ unsigned int offset = xas->xa_offset;
+
+ shift = node->shift;
+ entry = xa_entry_locked(xa, node, offset);
+ slot = &node->slots[offset];
+ } else {
+ shift = 0;
+ entry = xa_head_locked(xa);
+ slot = &xa->xa_head;
+ }
+
+ while (shift > order) {
+ shift -= XA_CHUNK_SHIFT;
+ if (!entry) {
+ node = xas_alloc(xas, shift);
+ if (!node)
+ break;
+ if (xa_track_free(xa))
+ node_mark_all(node, XA_FREE_MARK);
+ rcu_assign_pointer(*slot, xa_mk_node(node));
+ } else if (xa_is_node(entry)) {
+ node = xa_to_node(entry);
+ } else {
+ break;
+ }
+ entry = xas_descend(xas, node);
+ slot = &node->slots[xas->xa_offset];
+ }
+
+ return entry;
+}
+
+/**
+ * xas_create_range() - Ensure that stores to this range will succeed
+ * @xas: XArray operation state.
+ *
+ * Creates all of the slots in the range covered by @xas. Sets @xas to
+ * create single-index entries and positions it at the beginning of the
+ * range. This is for the benefit of users which have not yet been
+ * converted to use multi-index entries.
+ */
+void xas_create_range(struct xa_state *xas)
+{
+ unsigned long index = xas->xa_index;
+ unsigned char shift = xas->xa_shift;
+ unsigned char sibs = xas->xa_sibs;
+
+ xas->xa_index |= ((sibs + 1) << shift) - 1;
+ if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
+ xas->xa_offset |= sibs;
+ xas->xa_shift = 0;
+ xas->xa_sibs = 0;
+
+ for (;;) {
+ xas_create(xas);
+ if (xas_error(xas))
+ goto restore;
+ if (xas->xa_index <= (index | XA_CHUNK_MASK))
+ goto success;
+ xas->xa_index -= XA_CHUNK_SIZE;
+
+ for (;;) {
+ struct xa_node *node = xas->xa_node;
+ xas->xa_node = xa_parent_locked(xas->xa, node);
+ xas->xa_offset = node->offset - 1;
+ if (node->offset != 0)
+ break;
+ }
+ }
+
+restore:
+ xas->xa_shift = shift;
+ xas->xa_sibs = sibs;
+ xas->xa_index = index;
+ return;
+success:
+ xas->xa_index = index;
+ if (xas->xa_node)
+ xas_set_offset(xas);
+}
+EXPORT_SYMBOL_GPL(xas_create_range);
+
+static void update_node(struct xa_state *xas, struct xa_node *node,
+ int count, int values)
+{
+ if (!node || (!count && !values))
+ return;
+
+ node->count += count;
+ node->nr_values += values;
+ XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
+ XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE);
+ xas_update(xas, node);
+ if (count < 0)
+ xas_delete_node(xas);
+}
+
+/**
+ * xas_store() - Store this entry in the XArray.
+ * @xas: XArray operation state.
+ * @entry: New entry.
+ *
+ * If @xas is operating on a multi-index entry, the entry returned by this
+ * function is essentially meaningless (it may be an internal entry or it
+ * may be %NULL, even if there are non-NULL entries at some of the indices
+ * covered by the range). This is not a problem for any current users,
+ * and can be changed if needed.
+ *
+ * Return: The old entry at this index.
+ */
+void *xas_store(struct xa_state *xas, void *entry)
+{
+ struct xa_node *node;
+ void __rcu **slot = &xas->xa->xa_head;
+ unsigned int offset, max;
+ int count = 0;
+ int values = 0;
+ void *first, *next;
+ bool value = xa_is_value(entry);
+
+ if (entry)
+ first = xas_create(xas);
+ else
+ first = xas_load(xas);
+
+ if (xas_invalid(xas))
+ return first;
+ node = xas->xa_node;
+ if (node && (xas->xa_shift < node->shift))
+ xas->xa_sibs = 0;
+ if ((first == entry) && !xas->xa_sibs)
+ return first;
+
+ next = first;
+ offset = xas->xa_offset;
+ max = xas->xa_offset + xas->xa_sibs;
+ if (node) {
+ slot = &node->slots[offset];
+ if (xas->xa_sibs)
+ xas_squash_marks(xas);
+ }
+ if (!entry)
+ xas_init_marks(xas);
+
+ for (;;) {
+ /*
+ * Must clear the marks before setting the entry to NULL,
+ * otherwise xas_for_each_marked may find a NULL entry and
+ * stop early. rcu_assign_pointer contains a release barrier
+ * so the mark clearing will appear to happen before the
+ * entry is set to NULL.
+ */
+ rcu_assign_pointer(*slot, entry);
+ if (xa_is_node(next))
+ xas_free_nodes(xas, xa_to_node(next));
+ if (!node)
+ break;
+ count += !next - !entry;
+ values += !xa_is_value(first) - !value;
+ if (entry) {
+ if (offset == max)
+ break;
+ if (!xa_is_sibling(entry))
+ entry = xa_mk_sibling(xas->xa_offset);
+ } else {
+ if (offset == XA_CHUNK_MASK)
+ break;
+ }
+ next = xa_entry_locked(xas->xa, node, ++offset);
+ if (!xa_is_sibling(next)) {
+ if (!entry && (offset > max))
+ break;
+ first = next;
+ }
+ slot++;
+ }
+
+ update_node(xas, node, count, values);
+ return first;
+}
+EXPORT_SYMBOL_GPL(xas_store);
+
+/**
+ * xas_get_mark() - Returns the state of this mark.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Return: true if the mark is set, false if the mark is clear or @xas
+ * is in an error state.
+ */
+bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+ if (xas_invalid(xas))
+ return false;
+ if (!xas->xa_node)
+ return xa_marked(xas->xa, mark);
+ return node_get_mark(xas->xa_node, xas->xa_offset, mark);
+}
+EXPORT_SYMBOL_GPL(xas_get_mark);
+
+/**
+ * xas_set_mark() - Sets the mark on this entry and its parents.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Sets the specified mark on this entry, and walks up the tree setting it
+ * on all the ancestor entries. Does nothing if @xas has not been walked to
+ * an entry, or is in an error state.
+ */
+void xas_set_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+ struct xa_node *node = xas->xa_node;
+ unsigned int offset = xas->xa_offset;
+
+ if (xas_invalid(xas))
+ return;
+
+ while (node) {
+ if (node_set_mark(node, offset, mark))
+ return;
+ offset = node->offset;
+ node = xa_parent_locked(xas->xa, node);
+ }
+
+ if (!xa_marked(xas->xa, mark))
+ xa_mark_set(xas->xa, mark);
+}
+EXPORT_SYMBOL_GPL(xas_set_mark);
+
+/**
+ * xas_clear_mark() - Clears the mark on this entry and its parents.
+ * @xas: XArray operation state.
+ * @mark: Mark number.
+ *
+ * Clears the specified mark on this entry, and walks back to the head
+ * attempting to clear it on all the ancestor entries. Does nothing if
+ * @xas has not been walked to an entry, or is in an error state.
+ */
+void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
+{
+ struct xa_node *node = xas->xa_node;
+ unsigned int offset = xas->xa_offset;
+
+ if (xas_invalid(xas))
+ return;
+
+ while (node) {
+ if (!node_clear_mark(node, offset, mark))
+ return;
+ if (node_any_mark(node, mark))
+ return;
+
+ offset = node->offset;
+ node = xa_parent_locked(xas->xa, node);
+ }
+
+ if (xa_marked(xas->xa, mark))
+ xa_mark_clear(xas->xa, mark);
+}
+EXPORT_SYMBOL_GPL(xas_clear_mark);
+
+/**
+ * xas_init_marks() - Initialise all marks for the entry
+ * @xas: Array operations state.
+ *
+ * Initialise all marks for the entry specified by @xas. If we're tracking
+ * free entries with a mark, we need to set it on all entries. All other
+ * marks are cleared.
+ *
+ * This implementation is not as efficient as it could be; we may walk
+ * up the tree multiple times.
+ */
+void xas_init_marks(const struct xa_state *xas)
+{
+ xa_mark_t mark = 0;
+
+ for (;;) {
+ if (xa_track_free(xas->xa) && mark == XA_FREE_MARK)
+ xas_set_mark(xas, mark);
+ else
+ xas_clear_mark(xas, mark);
+ if (mark == XA_MARK_MAX)
+ break;
+ mark_inc(mark);
+ }
+}
+EXPORT_SYMBOL_GPL(xas_init_marks);
+
+/**
+ * xas_pause() - Pause a walk to drop a lock.
+ * @xas: XArray operation state.
+ *
+ * Some users need to pause a walk and drop the lock they're holding in
+ * order to yield to a higher priority thread or carry out an operation
+ * on an entry. Those users should call this function before they drop
+ * the lock. It resets the @xas to be suitable for the next iteration
+ * of the loop after the user has reacquired the lock. If most entries
+ * found during a walk require you to call xas_pause(), the xa_for_each()
+ * iterator may be more appropriate.
+ *
+ * Note that xas_pause() only works for forward iteration. If a user needs
+ * to pause a reverse iteration, we will need a xas_pause_rev().
+ */
+void xas_pause(struct xa_state *xas)
+{
+ struct xa_node *node = xas->xa_node;
+
+ if (xas_invalid(xas))
+ return;
+
+ if (node) {
+ unsigned int offset = xas->xa_offset;
+ while (++offset < XA_CHUNK_SIZE) {
+ if (!xa_is_sibling(xa_entry(xas->xa, node, offset)))
+ break;
+ }
+ xas->xa_index += (offset - xas->xa_offset) << node->shift;
+ } else {
+ xas->xa_index++;
+ }
+ xas->xa_node = XAS_RESTART;
+}
+EXPORT_SYMBOL_GPL(xas_pause);
+
+/*
+ * __xas_prev() - Find the previous entry in the XArray.
+ * @xas: XArray operation state.
+ *
+ * Helper function for xas_prev() which handles all the complex cases
+ * out of line.
+ */
+void *__xas_prev(struct xa_state *xas)
+{
+ void *entry;
+
+ if (!xas_frozen(xas->xa_node))
+ xas->xa_index--;
+ if (xas_not_node(xas->xa_node))
+ return xas_load(xas);
+
+ if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
+ xas->xa_offset--;
+
+ while (xas->xa_offset == 255) {
+ xas->xa_offset = xas->xa_node->offset - 1;
+ xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+ if (!xas->xa_node)
+ return set_bounds(xas);
+ }
+
+ for (;;) {
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (!xa_is_node(entry))
+ return entry;
+
+ xas->xa_node = xa_to_node(entry);
+ xas_set_offset(xas);
+ }
+}
+EXPORT_SYMBOL_GPL(__xas_prev);
+
+/*
+ * __xas_next() - Find the next entry in the XArray.
+ * @xas: XArray operation state.
+ *
+ * Helper function for xas_next() which handles all the complex cases
+ * out of line.
+ */
+void *__xas_next(struct xa_state *xas)
+{
+ void *entry;
+
+ if (!xas_frozen(xas->xa_node))
+ xas->xa_index++;
+ if (xas_not_node(xas->xa_node))
+ return xas_load(xas);
+
+ if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
+ xas->xa_offset++;
+
+ while (xas->xa_offset == XA_CHUNK_SIZE) {
+ xas->xa_offset = xas->xa_node->offset + 1;
+ xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+ if (!xas->xa_node)
+ return set_bounds(xas);
+ }
+
+ for (;;) {
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (!xa_is_node(entry))
+ return entry;
+
+ xas->xa_node = xa_to_node(entry);
+ xas_set_offset(xas);
+ }
+}
+EXPORT_SYMBOL_GPL(__xas_next);
+
+/**
+ * xas_find() - Find the next present entry in the XArray.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ *
+ * If the @xas has not yet been walked to an entry, return the entry
+ * which has an index >= xas.xa_index. If it has been walked, the entry
+ * currently being pointed at has been processed, and so we move to the
+ * next entry.
+ *
+ * If no entry is found and the array is smaller than @max, the iterator
+ * is set to the smallest index not yet in the array. This allows @xas
+ * to be immediately passed to xas_store().
+ *
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xas_find(struct xa_state *xas, unsigned long max)
+{
+ void *entry;
+
+ if (xas_error(xas))
+ return NULL;
+
+ if (!xas->xa_node) {
+ xas->xa_index = 1;
+ return set_bounds(xas);
+ } else if (xas_top(xas->xa_node)) {
+ entry = xas_load(xas);
+ if (entry || xas_not_node(xas->xa_node))
+ return entry;
+ } else if (!xas->xa_node->shift &&
+ xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) {
+ xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1;
+ }
+
+ xas_advance(xas);
+
+ while (xas->xa_node && (xas->xa_index <= max)) {
+ if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
+ xas->xa_offset = xas->xa_node->offset + 1;
+ xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+ continue;
+ }
+
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (xa_is_node(entry)) {
+ xas->xa_node = xa_to_node(entry);
+ xas->xa_offset = 0;
+ continue;
+ }
+ if (entry && !xa_is_sibling(entry))
+ return entry;
+
+ xas_advance(xas);
+ }
+
+ if (!xas->xa_node)
+ xas->xa_node = XAS_BOUNDS;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find);
+
+/**
+ * xas_find_marked() - Find the next marked entry in the XArray.
+ * @xas: XArray operation state.
+ * @max: Highest index to return.
+ * @mark: Mark number to search for.
+ *
+ * If the @xas has not yet been walked to an entry, return the marked entry
+ * which has an index >= xas.xa_index. If it has been walked, the entry
+ * currently being pointed at has been processed, and so we return the
+ * first marked entry with an index > xas.xa_index.
+ *
+ * If no marked entry is found and the array is smaller than @max, @xas is
+ * set to the bounds state and xas->xa_index is set to the smallest index
+ * not yet in the array. This allows @xas to be immediately passed to
+ * xas_store().
+ *
+ * If no entry is found before @max is reached, @xas is set to the restart
+ * state.
+ *
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
+{
+ bool advance = true;
+ unsigned int offset;
+ void *entry;
+
+ if (xas_error(xas))
+ return NULL;
+
+ if (!xas->xa_node) {
+ xas->xa_index = 1;
+ goto out;
+ } else if (xas_top(xas->xa_node)) {
+ advance = false;
+ entry = xa_head(xas->xa);
+ xas->xa_node = NULL;
+ if (xas->xa_index > max_index(entry))
+ goto bounds;
+ if (!xa_is_node(entry)) {
+ if (xa_marked(xas->xa, mark))
+ return entry;
+ xas->xa_index = 1;
+ goto out;
+ }
+ xas->xa_node = xa_to_node(entry);
+ xas->xa_offset = xas->xa_index >> xas->xa_node->shift;
+ }
+
+ while (xas->xa_index <= max) {
+ if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
+ xas->xa_offset = xas->xa_node->offset + 1;
+ xas->xa_node = xa_parent(xas->xa, xas->xa_node);
+ if (!xas->xa_node)
+ break;
+ advance = false;
+ continue;
+ }
+
+ if (!advance) {
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (xa_is_sibling(entry)) {
+ xas->xa_offset = xa_to_sibling(entry);
+ xas_move_index(xas, xas->xa_offset);
+ }
+ }
+
+ offset = xas_find_chunk(xas, advance, mark);
+ if (offset > xas->xa_offset) {
+ advance = false;
+ xas_move_index(xas, offset);
+ /* Mind the wrap */
+ if ((xas->xa_index - 1) >= max)
+ goto max;
+ xas->xa_offset = offset;
+ if (offset == XA_CHUNK_SIZE)
+ continue;
+ }
+
+ entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
+ if (!xa_is_node(entry))
+ return entry;
+ xas->xa_node = xa_to_node(entry);
+ xas_set_offset(xas);
+ }
+
+out:
+ if (!max)
+ goto max;
+bounds:
+ xas->xa_node = XAS_BOUNDS;
+ return NULL;
+max:
+ xas->xa_node = XAS_RESTART;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find_marked);
+
+/**
+ * xas_find_conflict() - Find the next present entry in a range.
+ * @xas: XArray operation state.
+ *
+ * The @xas describes both a range and a position within that range.
+ *
+ * Context: Any context. Expects xa_lock to be held.
+ * Return: The next entry in the range covered by @xas or %NULL.
+ */
+void *xas_find_conflict(struct xa_state *xas)
+{
+ void *curr;
+
+ if (xas_error(xas))
+ return NULL;
+
+ if (!xas->xa_node)
+ return NULL;
+
+ if (xas_top(xas->xa_node)) {
+ curr = xas_start(xas);
+ if (!curr)
+ return NULL;
+ while (xa_is_node(curr)) {
+ struct xa_node *node = xa_to_node(curr);
+ curr = xas_descend(xas, node);
+ }
+ if (curr)
+ return curr;
+ }
+
+ if (xas->xa_node->shift > xas->xa_shift)
+ return NULL;
+
+ for (;;) {
+ if (xas->xa_node->shift == xas->xa_shift) {
+ if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs)
+ break;
+ } else if (xas->xa_offset == XA_CHUNK_MASK) {
+ xas->xa_offset = xas->xa_node->offset;
+ xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node);
+ if (!xas->xa_node)
+ break;
+ continue;
+ }
+ curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset);
+ if (xa_is_sibling(curr))
+ continue;
+ while (xa_is_node(curr)) {
+ xas->xa_node = xa_to_node(curr);
+ xas->xa_offset = 0;
+ curr = xa_entry_locked(xas->xa, xas->xa_node, 0);
+ }
+ if (curr)
+ return curr;
+ }
+ xas->xa_offset -= xas->xa_sibs;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(xas_find_conflict);
+
+/**
+ * xa_init_flags() - Initialise an empty XArray with flags.
+ * @xa: XArray.
+ * @flags: XA_FLAG values.
+ *
+ * If you need to initialise an XArray with special flags (eg you need
+ * to take the lock from interrupt context), use this function instead
+ * of xa_init().
+ *
+ * Context: Any context.
+ */
+void xa_init_flags(struct xarray *xa, gfp_t flags)
+{
+ unsigned int lock_type;
+ static struct lock_class_key xa_lock_irq;
+ static struct lock_class_key xa_lock_bh;
+
+ spin_lock_init(&xa->xa_lock);
+ xa->xa_flags = flags;
+ xa->xa_head = NULL;
+
+ lock_type = xa_lock_type(xa);
+ if (lock_type == XA_LOCK_IRQ)
+ lockdep_set_class(&xa->xa_lock, &xa_lock_irq);
+ else if (lock_type == XA_LOCK_BH)
+ lockdep_set_class(&xa->xa_lock, &xa_lock_bh);
+}
+EXPORT_SYMBOL(xa_init_flags);
+
+/**
+ * xa_load() - Load an entry from an XArray.
+ * @xa: XArray.
+ * @index: index into array.
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: The entry at @index in @xa.
+ */
+void *xa_load(struct xarray *xa, unsigned long index)
+{
+ XA_STATE(xas, xa, index);
+ void *entry;
+
+ rcu_read_lock();
+ do {
+ entry = xas_load(&xas);
+ if (xa_is_zero(entry))
+ entry = NULL;
+ } while (xas_retry(&xas, entry));
+ rcu_read_unlock();
+
+ return entry;
+}
+EXPORT_SYMBOL(xa_load);
+
+static void *xas_result(struct xa_state *xas, void *curr)
+{
+ if (xa_is_zero(curr))
+ return NULL;
+ XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
+ if (xas_error(xas))
+ curr = xas->xa_node;
+ return curr;
+}
+
+/**
+ * __xa_erase() - Erase this entry from the XArray while locked.
+ * @xa: XArray.
+ * @index: Index into array.
+ *
+ * If the entry at this index is a multi-index entry then all indices will
+ * be erased, and the entry will no longer be a multi-index entry.
+ * This function expects the xa_lock to be held on entry.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry. May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index.
+ */
+void *__xa_erase(struct xarray *xa, unsigned long index)
+{
+ XA_STATE(xas, xa, index);
+ return xas_result(&xas, xas_store(&xas, NULL));
+}
+EXPORT_SYMBOL_GPL(__xa_erase);
+
+/**
+ * xa_store() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * After this function returns, loads from this index will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
+ *
+ * Context: Process context. Takes and releases the xa_lock. May sleep
+ * if the @gfp flags permit.
+ * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
+ * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
+ * failed.
+ */
+void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ void *curr;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+
+ do {
+ xas_lock(&xas);
+ curr = xas_store(&xas, entry);
+ if (xa_track_free(xa) && entry)
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(xa_store);
+
+/**
+ * __xa_store() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry. May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
+ */
+void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ void *curr;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+
+ do {
+ curr = xas_store(&xas, entry);
+ if (xa_track_free(xa) && entry)
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ } while (__xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(__xa_store);
+
+/**
+ * xa_cmpxchg() - Conditionally replace an entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New value to place in array.
+ * @gfp: Memory allocation flags.
+ *
+ * If the entry at @index is the same as @old, replace it with @entry.
+ * If the return value is equal to @old, then the exchange was successful.
+ *
+ * Context: Process context. Takes and releases the xa_lock. May sleep
+ * if the @gfp flags permit.
+ * Return: The old value at this index or xa_err() if an error happened.
+ */
+void *xa_cmpxchg(struct xarray *xa, unsigned long index,
+ void *old, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ void *curr;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+
+ do {
+ xas_lock(&xas);
+ curr = xas_load(&xas);
+ if (curr == XA_ZERO_ENTRY)
+ curr = NULL;
+ if (curr == old) {
+ xas_store(&xas, entry);
+ if (xa_track_free(xa) && entry)
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ }
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(xa_cmpxchg);
+
+/**
+ * __xa_cmpxchg() - Store this entry in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @old: Old value to test against.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * You must already be holding the xa_lock when calling this function.
+ * It will drop the lock if needed to allocate memory, and then reacquire
+ * it afterwards.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry. May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: The old entry at this index or xa_err() if an error happened.
+ */
+void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
+ void *old, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ void *curr;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+
+ do {
+ curr = xas_load(&xas);
+ if (curr == XA_ZERO_ENTRY)
+ curr = NULL;
+ if (curr == old) {
+ xas_store(&xas, entry);
+ if (xa_track_free(xa) && entry)
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ }
+ } while (__xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, curr);
+}
+EXPORT_SYMBOL(__xa_cmpxchg);
+
+/**
+ * xa_reserve() - Reserve this index in the XArray.
+ * @xa: XArray.
+ * @index: Index into array.
+ * @gfp: Memory allocation flags.
+ *
+ * Ensures there is somewhere to store an entry at @index in the array.
+ * If there is already something stored at @index, this function does
+ * nothing. If there was nothing there, the entry is marked as reserved.
+ * Loads from @index will continue to see a %NULL pointer until a
+ * subsequent store to @index.
+ *
+ * If you do not use the entry that you have reserved, call xa_release()
+ * or xa_erase() to free any unnecessary memory.
+ *
+ * Context: Process context. Takes and releases the xa_lock, IRQ or BH safe
+ * if specified in XArray flags. May sleep if the @gfp flags permit.
+ * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
+ */
+int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
+{
+ XA_STATE(xas, xa, index);
+ unsigned int lock_type = xa_lock_type(xa);
+ void *curr;
+
+ do {
+ xas_lock_type(&xas, lock_type);
+ curr = xas_load(&xas);
+ if (!curr)
+ xas_store(&xas, XA_ZERO_ENTRY);
+ xas_unlock_type(&xas, lock_type);
+ } while (xas_nomem(&xas, gfp));
+
+ return xas_error(&xas);
+}
+EXPORT_SYMBOL(xa_reserve);
+
+#ifdef CONFIG_XARRAY_MULTI
+static void xas_set_range(struct xa_state *xas, unsigned long first,
+ unsigned long last)
+{
+ unsigned int shift = 0;
+ unsigned long sibs = last - first;
+ unsigned int offset = XA_CHUNK_MASK;
+
+ xas_set(xas, first);
+
+ while ((first & XA_CHUNK_MASK) == 0) {
+ if (sibs < XA_CHUNK_MASK)
+ break;
+ if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK))
+ break;
+ shift += XA_CHUNK_SHIFT;
+ if (offset == XA_CHUNK_MASK)
+ offset = sibs & XA_CHUNK_MASK;
+ sibs >>= XA_CHUNK_SHIFT;
+ first >>= XA_CHUNK_SHIFT;
+ }
+
+ offset = first & XA_CHUNK_MASK;
+ if (offset + sibs > XA_CHUNK_MASK)
+ sibs = XA_CHUNK_MASK - offset;
+ if ((((first + sibs + 1) << shift) - 1) > last)
+ sibs -= 1;
+
+ xas->xa_shift = shift;
+ xas->xa_sibs = sibs;
+}
+
+/**
+ * xa_store_range() - Store this entry at a range of indices in the XArray.
+ * @xa: XArray.
+ * @first: First index to affect.
+ * @last: Last index to affect.
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * After this function returns, loads from any index between @first and @last,
+ * inclusive will return @entry.
+ * Storing into an existing multislot entry updates the entry of every index.
+ * The marks associated with @index are unaffected unless @entry is %NULL.
+ *
+ * Context: Process context. Takes and releases the xa_lock. May sleep
+ * if the @gfp flags permit.
+ * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in
+ * an XArray, or xa_err(-ENOMEM) if memory allocation failed.
+ */
+void *xa_store_range(struct xarray *xa, unsigned long first,
+ unsigned long last, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, 0);
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return XA_ERROR(-EINVAL);
+ if (last < first)
+ return XA_ERROR(-EINVAL);
+
+ do {
+ xas_lock(&xas);
+ if (entry) {
+ unsigned int order = (last == ~0UL) ? 64 :
+ ilog2(last + 1);
+ xas_set_order(&xas, last, order);
+ xas_create(&xas);
+ if (xas_error(&xas))
+ goto unlock;
+ }
+ do {
+ xas_set_range(&xas, first, last);
+ xas_store(&xas, entry);
+ if (xas_error(&xas))
+ goto unlock;
+ first += xas_size(&xas);
+ } while (first <= last);
+unlock:
+ xas_unlock(&xas);
+ } while (xas_nomem(&xas, gfp));
+
+ return xas_result(&xas, NULL);
+}
+EXPORT_SYMBOL(xa_store_range);
+#endif /* CONFIG_XARRAY_MULTI */
+
+/**
+ * __xa_alloc() - Find somewhere to store this entry in the XArray.
+ * @xa: XArray.
+ * @id: Pointer to ID.
+ * @max: Maximum ID to allocate (inclusive).
+ * @entry: New entry.
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @id and @max.
+ * Updates the @id pointer with the index, then stores the entry at that
+ * index. A concurrent lookup will not see an uninitialised @id.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry. May
+ * release and reacquire xa_lock if @gfp flags permit.
+ * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
+ * there is no more space in the XArray.
+ */
+int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
+{
+ XA_STATE(xas, xa, 0);
+ int err;
+
+ if (WARN_ON_ONCE(xa_is_internal(entry)))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!xa_track_free(xa)))
+ return -EINVAL;
+
+ if (!entry)
+ entry = XA_ZERO_ENTRY;
+
+ do {
+ xas.xa_index = *id;
+ xas_find_marked(&xas, max, XA_FREE_MARK);
+ if (xas.xa_node == XAS_RESTART)
+ xas_set_err(&xas, -ENOSPC);
+ xas_store(&xas, entry);
+ xas_clear_mark(&xas, XA_FREE_MARK);
+ } while (__xas_nomem(&xas, gfp));
+
+ err = xas_error(&xas);
+ if (!err)
+ *id = xas.xa_index;
+ return err;
+}
+EXPORT_SYMBOL(__xa_alloc);
+
+/**
+ * __xa_set_mark() - Set this mark on this entry while locked.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Attempting to set a mark on a NULL entry does not succeed.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry.
+ */
+void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ XA_STATE(xas, xa, index);
+ void *entry = xas_load(&xas);
+
+ if (entry)
+ xas_set_mark(&xas, mark);
+}
+EXPORT_SYMBOL_GPL(__xa_set_mark);
+
+/**
+ * __xa_clear_mark() - Clear this mark on this entry while locked.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Context: Any context. Expects xa_lock to be held on entry.
+ */
+void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ XA_STATE(xas, xa, index);
+ void *entry = xas_load(&xas);
+
+ if (entry)
+ xas_clear_mark(&xas, mark);
+}
+EXPORT_SYMBOL_GPL(__xa_clear_mark);
+
+/**
+ * xa_get_mark() - Inquire whether this mark is set on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * This function uses the RCU read lock, so the result may be out of date
+ * by the time it returns. If you need the result to be stable, use a lock.
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: True if the entry at @index has this mark set, false if it doesn't.
+ */
+bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ XA_STATE(xas, xa, index);
+ void *entry;
+
+ rcu_read_lock();
+ entry = xas_start(&xas);
+ while (xas_get_mark(&xas, mark)) {
+ if (!xa_is_node(entry))
+ goto found;
+ entry = xas_descend(&xas, xa_to_node(entry));
+ }
+ rcu_read_unlock();
+ return false;
+ found:
+ rcu_read_unlock();
+ return true;
+}
+EXPORT_SYMBOL(xa_get_mark);
+
+/**
+ * xa_set_mark() - Set this mark on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Attempting to set a mark on a NULL entry does not succeed.
+ *
+ * Context: Process context. Takes and releases the xa_lock.
+ */
+void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ xa_lock(xa);
+ __xa_set_mark(xa, index, mark);
+ xa_unlock(xa);
+}
+EXPORT_SYMBOL(xa_set_mark);
+
+/**
+ * xa_clear_mark() - Clear this mark on this entry.
+ * @xa: XArray.
+ * @index: Index of entry.
+ * @mark: Mark number.
+ *
+ * Clearing a mark always succeeds.
+ *
+ * Context: Process context. Takes and releases the xa_lock.
+ */
+void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
+{
+ xa_lock(xa);
+ __xa_clear_mark(xa, index, mark);
+ xa_unlock(xa);
+}
+EXPORT_SYMBOL(xa_clear_mark);
+
+/**
+ * xa_find() - Search the XArray for an entry.
+ * @xa: XArray.
+ * @indexp: Pointer to an index.
+ * @max: Maximum index to search to.
+ * @filter: Selection criterion.
+ *
+ * Finds the entry in @xa which matches the @filter, and has the lowest
+ * index that is at least @indexp and no more than @max.
+ * If an entry is found, @indexp is updated to be the index of the entry.
+ * This function is protected by the RCU read lock, so it may not find
+ * entries which are being simultaneously added. It will not return an
+ * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: The entry, if found, otherwise %NULL.
+ */
+void *xa_find(struct xarray *xa, unsigned long *indexp,
+ unsigned long max, xa_mark_t filter)
+{
+ XA_STATE(xas, xa, *indexp);
+ void *entry;
+
+ rcu_read_lock();
+ do {
+ if ((__force unsigned int)filter < XA_MAX_MARKS)
+ entry = xas_find_marked(&xas, max, filter);
+ else
+ entry = xas_find(&xas, max);
+ } while (xas_retry(&xas, entry));
+ rcu_read_unlock();
+
+ if (entry)
+ *indexp = xas.xa_index;
+ return entry;
+}
+EXPORT_SYMBOL(xa_find);
+
+/**
+ * xa_find_after() - Search the XArray for a present entry.
+ * @xa: XArray.
+ * @indexp: Pointer to an index.
+ * @max: Maximum index to search to.
+ * @filter: Selection criterion.
+ *
+ * Finds the entry in @xa which matches the @filter and has the lowest
+ * index that is above @indexp and no more than @max.
+ * If an entry is found, @indexp is updated to be the index of the entry.
+ * This function is protected by the RCU read lock, so it may miss entries
+ * which are being simultaneously added. It will not return an
+ * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: The pointer, if found, otherwise %NULL.
+ */
+void *xa_find_after(struct xarray *xa, unsigned long *indexp,
+ unsigned long max, xa_mark_t filter)
+{
+ XA_STATE(xas, xa, *indexp + 1);
+ void *entry;
+
+ rcu_read_lock();
+ for (;;) {
+ if ((__force unsigned int)filter < XA_MAX_MARKS)
+ entry = xas_find_marked(&xas, max, filter);
+ else
+ entry = xas_find(&xas, max);
+ if (xas.xa_shift) {
+ if (xas.xa_index & ((1UL << xas.xa_shift) - 1))
+ continue;
+ } else {
+ if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK))
+ continue;
+ }
+ if (!xas_retry(&xas, entry))
+ break;
+ }
+ rcu_read_unlock();
+
+ if (entry)
+ *indexp = xas.xa_index;
+ return entry;
+}
+EXPORT_SYMBOL(xa_find_after);
+
+static unsigned int xas_extract_present(struct xa_state *xas, void **dst,
+ unsigned long max, unsigned int n)
+{
+ void *entry;
+ unsigned int i = 0;
+
+ rcu_read_lock();
+ xas_for_each(xas, entry, max) {
+ if (xas_retry(xas, entry))
+ continue;
+ dst[i++] = entry;
+ if (i == n)
+ break;
+ }
+ rcu_read_unlock();
+
+ return i;
+}
+
+static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
+ unsigned long max, unsigned int n, xa_mark_t mark)
+{
+ void *entry;
+ unsigned int i = 0;
+
+ rcu_read_lock();
+ xas_for_each_marked(xas, entry, max, mark) {
+ if (xas_retry(xas, entry))
+ continue;
+ dst[i++] = entry;
+ if (i == n)
+ break;
+ }
+ rcu_read_unlock();
+
+ return i;
+}
+
+/**
+ * xa_extract() - Copy selected entries from the XArray into a normal array.
+ * @xa: The source XArray to copy from.
+ * @dst: The buffer to copy entries into.
+ * @start: The first index in the XArray eligible to be selected.
+ * @max: The last index in the XArray eligible to be selected.
+ * @n: The maximum number of entries to copy.
+ * @filter: Selection criterion.
+ *
+ * Copies up to @n entries that match @filter from the XArray. The
+ * copied entries will have indices between @start and @max, inclusive.
+ *
+ * The @filter may be an XArray mark value, in which case entries which are
+ * marked with that mark will be copied. It may also be %XA_PRESENT, in
+ * which case all entries which are not NULL will be copied.
+ *
+ * The entries returned may not represent a snapshot of the XArray at a
+ * moment in time. For example, if another thread stores to index 5, then
+ * index 10, calling xa_extract() may return the old contents of index 5
+ * and the new contents of index 10. Indices not modified while this
+ * function is running will not be skipped.
+ *
+ * If you need stronger guarantees, holding the xa_lock across calls to this
+ * function will prevent concurrent modification.
+ *
+ * Context: Any context. Takes and releases the RCU lock.
+ * Return: The number of entries copied.
+ */
+unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
+ unsigned long max, unsigned int n, xa_mark_t filter)
+{
+ XA_STATE(xas, xa, start);
+
+ if (!n)
+ return 0;
+
+ if ((__force unsigned int)filter < XA_MAX_MARKS)
+ return xas_extract_marked(&xas, dst, max, n, filter);
+ return xas_extract_present(&xas, dst, max, n);
+}
+EXPORT_SYMBOL(xa_extract);
+
+/**
+ * xa_destroy() - Free all internal data structures.
+ * @xa: XArray.
+ *
+ * After calling this function, the XArray is empty and has freed all memory
+ * allocated for its internal data structures. You are responsible for
+ * freeing the objects referenced by the XArray.
+ *
+ * Context: Any context. Takes and releases the xa_lock, interrupt-safe.
+ */
+void xa_destroy(struct xarray *xa)
+{
+ XA_STATE(xas, xa, 0);
+ unsigned long flags;
+ void *entry;
+
+ xas.xa_node = NULL;
+ xas_lock_irqsave(&xas, flags);
+ entry = xa_head_locked(xa);
+ RCU_INIT_POINTER(xa->xa_head, NULL);
+ xas_init_marks(&xas);
+ /* lockdep checks we're still holding the lock in xas_free_nodes() */
+ if (xa_is_node(entry))
+ xas_free_nodes(&xas, xa_to_node(entry));
+ xas_unlock_irqrestore(&xas, flags);
+}
+EXPORT_SYMBOL(xa_destroy);
+
+#ifdef XA_DEBUG
+void xa_dump_node(const struct xa_node *node)
+{
+ unsigned i, j;
+
+ if (!node)
+ return;
+ if ((unsigned long)node & 3) {
+ pr_cont("node %px\n", node);
+ return;
+ }
+
+ pr_cont("node %px %s %d parent %px shift %d count %d values %d "
+ "array %px list %px %px marks",
+ node, node->parent ? "offset" : "max", node->offset,
+ node->parent, node->shift, node->count, node->nr_values,
+ node->array, node->private_list.prev, node->private_list.next);
+ for (i = 0; i < XA_MAX_MARKS; i++)
+ for (j = 0; j < XA_MARK_LONGS; j++)
+ pr_cont(" %lx", node->marks[i][j]);
+ pr_cont("\n");
+}
+
+void xa_dump_index(unsigned long index, unsigned int shift)
+{
+ if (!shift)
+ pr_info("%lu: ", index);
+ else if (shift >= BITS_PER_LONG)
+ pr_info("0-%lu: ", ~0UL);
+ else
+ pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1));
+}
+
+void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
+{
+ if (!entry)
+ return;
+
+ xa_dump_index(index, shift);
+
+ if (xa_is_node(entry)) {
+ if (shift == 0) {
+ pr_cont("%px\n", entry);
+ } else {
+ unsigned long i;
+ struct xa_node *node = xa_to_node(entry);
+ xa_dump_node(node);
+ for (i = 0; i < XA_CHUNK_SIZE; i++)
+ xa_dump_entry(node->slots[i],
+ index + (i << node->shift), node->shift);
+ }
+ } else if (xa_is_value(entry))
+ pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry),
+ xa_to_value(entry), entry);
+ else if (!xa_is_internal(entry))
+ pr_cont("%px\n", entry);
+ else if (xa_is_retry(entry))
+ pr_cont("retry (%ld)\n", xa_to_internal(entry));
+ else if (xa_is_sibling(entry))
+ pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
+ else if (xa_is_zero(entry))
+ pr_cont("zero (%ld)\n", xa_to_internal(entry));
+ else
+ pr_cont("UNKNOWN ENTRY (%px)\n", entry);
+}
+
+void xa_dump(const struct xarray *xa)
+{
+ void *entry = xa->xa_head;
+ unsigned int shift = 0;
+
+ pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
+ xa->xa_flags, xa_marked(xa, XA_MARK_0),
+ xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2));
+ if (xa_is_node(entry))
+ shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
+ xa_dump_entry(entry, 0, shift);
+}
+#endif
diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c
index 25a5d87e2e4c..912aae5fa09e 100644
--- a/lib/xz/xz_crc32.c
+++ b/lib/xz/xz_crc32.c
@@ -15,7 +15,6 @@
* but they are bigger and use more memory for the lookup table.
*/
-#include <linux/crc32poly.h>
#include "xz_private.h"
/*
diff --git a/lib/xz/xz_private.h b/lib/xz/xz_private.h
index 482b90f363fe..09360ebb510e 100644
--- a/lib/xz/xz_private.h
+++ b/lib/xz/xz_private.h
@@ -102,6 +102,10 @@
# endif
#endif
+#ifndef CRC32_POLY_LE
+#define CRC32_POLY_LE 0xedb88320
+#endif
+
/*
* Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used
* before calling xz_dec_lzma2_run().