blob: cdd43086b55bc6a6b527cd55444fee14f7e8438e [file] [log] [blame]
Kent Overstreet798ab482013-08-16 22:04:37 +00001/*
2 * Percpu IDA library
3 *
4 * Copyright (C) 2013 Datera, Inc. Kent Overstreet
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2, or (at
9 * your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful, but
12 * WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * General Public License for more details.
15 */
16
Ingo Molnarfd771232017-02-02 20:56:33 +010017#include <linux/mm.h>
Kent Overstreet798ab482013-08-16 22:04:37 +000018#include <linux/bitmap.h>
19#include <linux/bitops.h>
20#include <linux/bug.h>
21#include <linux/err.h>
22#include <linux/export.h>
Kent Overstreet798ab482013-08-16 22:04:37 +000023#include <linux/init.h>
24#include <linux/kernel.h>
25#include <linux/percpu.h>
Ingo Molnar174cd4b2017-02-02 19:15:33 +010026#include <linux/sched/signal.h>
Kent Overstreet798ab482013-08-16 22:04:37 +000027#include <linux/string.h>
28#include <linux/spinlock.h>
29#include <linux/percpu_ida.h>
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +020030#include <linux/locallock.h>
31
32static DEFINE_LOCAL_IRQ_LOCK(irq_off_lock);
Kent Overstreet798ab482013-08-16 22:04:37 +000033
Kent Overstreet798ab482013-08-16 22:04:37 +000034struct percpu_ida_cpu {
35 /*
36 * Even though this is percpu, we need a lock for tag stealing by remote
37 * CPUs:
38 */
39 spinlock_t lock;
40
41 /* nr_free/freelist form a stack of free IDs */
42 unsigned nr_free;
43 unsigned freelist[];
44};
45
46static inline void move_tags(unsigned *dst, unsigned *dst_nr,
47 unsigned *src, unsigned *src_nr,
48 unsigned nr)
49{
50 *src_nr -= nr;
51 memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr);
52 *dst_nr += nr;
53}
54
55/*
56 * Try to steal tags from a remote cpu's percpu freelist.
57 *
Shaohua Lid8355022013-12-31 11:38:27 +080058 * We first check how many percpu freelists have tags
Kent Overstreet798ab482013-08-16 22:04:37 +000059 *
60 * Then we iterate through the cpus until we find some tags - we don't attempt
61 * to find the "best" cpu to steal from, to keep cacheline bouncing to a
62 * minimum.
63 */
64static inline void steal_tags(struct percpu_ida *pool,
65 struct percpu_ida_cpu *tags)
66{
67 unsigned cpus_have_tags, cpu = pool->cpu_last_stolen;
68 struct percpu_ida_cpu *remote;
69
70 for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
Shaohua Lid8355022013-12-31 11:38:27 +080071 cpus_have_tags; cpus_have_tags--) {
Kent Overstreet798ab482013-08-16 22:04:37 +000072 cpu = cpumask_next(cpu, &pool->cpus_have_tags);
73
74 if (cpu >= nr_cpu_ids) {
75 cpu = cpumask_first(&pool->cpus_have_tags);
76 if (cpu >= nr_cpu_ids)
77 BUG();
78 }
79
80 pool->cpu_last_stolen = cpu;
81 remote = per_cpu_ptr(pool->tag_cpu, cpu);
82
83 cpumask_clear_cpu(cpu, &pool->cpus_have_tags);
84
85 if (remote == tags)
86 continue;
87
88 spin_lock(&remote->lock);
89
90 if (remote->nr_free) {
91 memcpy(tags->freelist,
92 remote->freelist,
93 sizeof(unsigned) * remote->nr_free);
94
95 tags->nr_free = remote->nr_free;
96 remote->nr_free = 0;
97 }
98
99 spin_unlock(&remote->lock);
100
101 if (tags->nr_free)
102 break;
103 }
104}
105
106/*
107 * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto
108 * our percpu freelist:
109 */
110static inline void alloc_global_tags(struct percpu_ida *pool,
111 struct percpu_ida_cpu *tags)
112{
113 move_tags(tags->freelist, &tags->nr_free,
114 pool->freelist, &pool->nr_free,
Shaohua Lie26b53d2013-10-15 09:05:01 +0800115 min(pool->nr_free, pool->percpu_batch_size));
Kent Overstreet798ab482013-08-16 22:04:37 +0000116}
117
Nick Swenson6b378382013-10-02 17:55:45 -0700118static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
Kent Overstreet798ab482013-08-16 22:04:37 +0000119{
120 int tag = -ENOSPC;
121
122 spin_lock(&tags->lock);
123 if (tags->nr_free)
124 tag = tags->freelist[--tags->nr_free];
125 spin_unlock(&tags->lock);
126
127 return tag;
128}
129
130/**
131 * percpu_ida_alloc - allocate a tag
132 * @pool: pool to allocate from
Kent Overstreet6f6b5d12014-01-19 08:26:37 +0000133 * @state: task state for prepare_to_wait
Kent Overstreet798ab482013-08-16 22:04:37 +0000134 *
135 * Returns a tag - an integer in the range [0..nr_tags) (passed to
136 * tag_pool_init()), or otherwise -ENOSPC on allocation failure.
137 *
138 * Safe to be called from interrupt context (assuming it isn't passed
Nicholas Bellinger555b2702014-01-20 03:36:24 +0000139 * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course).
Kent Overstreet798ab482013-08-16 22:04:37 +0000140 *
141 * @gfp indicates whether or not to wait until a free id is available (it's not
Mel Gorman71baba42015-11-06 16:28:28 -0800142 * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep
Kent Overstreet798ab482013-08-16 22:04:37 +0000143 * however long it takes until another thread frees an id (same semantics as a
144 * mempool).
145 *
Nicholas Bellinger555b2702014-01-20 03:36:24 +0000146 * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE.
Kent Overstreet798ab482013-08-16 22:04:37 +0000147 */
Kent Overstreet6f6b5d12014-01-19 08:26:37 +0000148int percpu_ida_alloc(struct percpu_ida *pool, int state)
Kent Overstreet798ab482013-08-16 22:04:37 +0000149{
150 DEFINE_WAIT(wait);
151 struct percpu_ida_cpu *tags;
152 unsigned long flags;
153 int tag;
154
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200155 local_lock_irqsave(irq_off_lock, flags);
Kent Overstreet798ab482013-08-16 22:04:37 +0000156 tags = this_cpu_ptr(pool->tag_cpu);
157
158 /* Fastpath */
Nick Swenson6b378382013-10-02 17:55:45 -0700159 tag = alloc_local_tag(tags);
Kent Overstreet798ab482013-08-16 22:04:37 +0000160 if (likely(tag >= 0)) {
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200161 local_unlock_irqrestore(irq_off_lock, flags);
Kent Overstreet798ab482013-08-16 22:04:37 +0000162 return tag;
163 }
164
165 while (1) {
166 spin_lock(&pool->lock);
167
168 /*
169 * prepare_to_wait() must come before steal_tags(), in case
170 * percpu_ida_free() on another cpu flips a bit in
171 * cpus_have_tags
172 *
173 * global lock held and irqs disabled, don't need percpu lock
174 */
Kent Overstreet6f6b5d12014-01-19 08:26:37 +0000175 if (state != TASK_RUNNING)
176 prepare_to_wait(&pool->wait, &wait, state);
Kent Overstreet798ab482013-08-16 22:04:37 +0000177
178 if (!tags->nr_free)
179 alloc_global_tags(pool, tags);
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200180
Kent Overstreet798ab482013-08-16 22:04:37 +0000181 if (!tags->nr_free)
182 steal_tags(pool, tags);
183
184 if (tags->nr_free) {
185 tag = tags->freelist[--tags->nr_free];
186 if (tags->nr_free)
187 cpumask_set_cpu(smp_processor_id(),
188 &pool->cpus_have_tags);
189 }
190
191 spin_unlock(&pool->lock);
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200192 local_unlock_irqrestore(irq_off_lock, flags);
Kent Overstreet798ab482013-08-16 22:04:37 +0000193
Kent Overstreet6f6b5d12014-01-19 08:26:37 +0000194 if (tag >= 0 || state == TASK_RUNNING)
Kent Overstreet798ab482013-08-16 22:04:37 +0000195 break;
196
Nicholas Bellinger555b2702014-01-20 03:36:24 +0000197 if (signal_pending_state(state, current)) {
198 tag = -ERESTARTSYS;
199 break;
200 }
201
Kent Overstreet798ab482013-08-16 22:04:37 +0000202 schedule();
203
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200204 local_lock_irqsave(irq_off_lock, flags);
Kent Overstreet798ab482013-08-16 22:04:37 +0000205 tags = this_cpu_ptr(pool->tag_cpu);
206 }
Kent Overstreet6f6b5d12014-01-19 08:26:37 +0000207 if (state != TASK_RUNNING)
208 finish_wait(&pool->wait, &wait);
Kent Overstreet798ab482013-08-16 22:04:37 +0000209
Kent Overstreet798ab482013-08-16 22:04:37 +0000210 return tag;
211}
212EXPORT_SYMBOL_GPL(percpu_ida_alloc);
213
214/**
215 * percpu_ida_free - free a tag
216 * @pool: pool @tag was allocated from
217 * @tag: a tag previously allocated with percpu_ida_alloc()
218 *
219 * Safe to be called from interrupt context.
220 */
221void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
222{
223 struct percpu_ida_cpu *tags;
224 unsigned long flags;
225 unsigned nr_free;
226
227 BUG_ON(tag >= pool->nr_tags);
228
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200229 local_lock_irqsave(irq_off_lock, flags);
Kent Overstreet798ab482013-08-16 22:04:37 +0000230 tags = this_cpu_ptr(pool->tag_cpu);
231
232 spin_lock(&tags->lock);
233 tags->freelist[tags->nr_free++] = tag;
234
235 nr_free = tags->nr_free;
236 spin_unlock(&tags->lock);
237
238 if (nr_free == 1) {
239 cpumask_set_cpu(smp_processor_id(),
240 &pool->cpus_have_tags);
241 wake_up(&pool->wait);
242 }
243
Shaohua Lie26b53d2013-10-15 09:05:01 +0800244 if (nr_free == pool->percpu_max_size) {
Kent Overstreet798ab482013-08-16 22:04:37 +0000245 spin_lock(&pool->lock);
246
247 /*
248 * Global lock held and irqs disabled, don't need percpu
249 * lock
250 */
Shaohua Lie26b53d2013-10-15 09:05:01 +0800251 if (tags->nr_free == pool->percpu_max_size) {
Kent Overstreet798ab482013-08-16 22:04:37 +0000252 move_tags(pool->freelist, &pool->nr_free,
253 tags->freelist, &tags->nr_free,
Shaohua Lie26b53d2013-10-15 09:05:01 +0800254 pool->percpu_batch_size);
Kent Overstreet798ab482013-08-16 22:04:37 +0000255
256 wake_up(&pool->wait);
257 }
258 spin_unlock(&pool->lock);
259 }
260
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200261 local_unlock_irqrestore(irq_off_lock, flags);
Kent Overstreet798ab482013-08-16 22:04:37 +0000262}
263EXPORT_SYMBOL_GPL(percpu_ida_free);
264
265/**
266 * percpu_ida_destroy - release a tag pool's resources
267 * @pool: pool to free
268 *
269 * Frees the resources allocated by percpu_ida_init().
270 */
271void percpu_ida_destroy(struct percpu_ida *pool)
272{
273 free_percpu(pool->tag_cpu);
274 free_pages((unsigned long) pool->freelist,
275 get_order(pool->nr_tags * sizeof(unsigned)));
276}
277EXPORT_SYMBOL_GPL(percpu_ida_destroy);
278
279/**
280 * percpu_ida_init - initialize a percpu tag pool
281 * @pool: pool to initialize
282 * @nr_tags: number of tags that will be available for allocation
283 *
284 * Initializes @pool so that it can be used to allocate tags - integers in the
285 * range [0, nr_tags). Typically, they'll be used by driver code to refer to a
286 * preallocated array of tag structures.
287 *
288 * Allocation is percpu, but sharding is limited by nr_tags - for best
289 * performance, the workload should not span more cpus than nr_tags / 128.
290 */
Shaohua Lie26b53d2013-10-15 09:05:01 +0800291int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
292 unsigned long max_size, unsigned long batch_size)
Kent Overstreet798ab482013-08-16 22:04:37 +0000293{
294 unsigned i, cpu, order;
295
296 memset(pool, 0, sizeof(*pool));
297
298 init_waitqueue_head(&pool->wait);
299 spin_lock_init(&pool->lock);
300 pool->nr_tags = nr_tags;
Shaohua Lie26b53d2013-10-15 09:05:01 +0800301 pool->percpu_max_size = max_size;
302 pool->percpu_batch_size = batch_size;
Kent Overstreet798ab482013-08-16 22:04:37 +0000303
304 /* Guard against overflow */
305 if (nr_tags > (unsigned) INT_MAX + 1) {
306 pr_err("percpu_ida_init(): nr_tags too large\n");
307 return -EINVAL;
308 }
309
310 order = get_order(nr_tags * sizeof(unsigned));
311 pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order);
312 if (!pool->freelist)
313 return -ENOMEM;
314
315 for (i = 0; i < nr_tags; i++)
316 pool->freelist[i] = i;
317
318 pool->nr_free = nr_tags;
319
320 pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
Shaohua Lie26b53d2013-10-15 09:05:01 +0800321 pool->percpu_max_size * sizeof(unsigned),
Kent Overstreet798ab482013-08-16 22:04:37 +0000322 sizeof(unsigned));
323 if (!pool->tag_cpu)
324 goto err;
325
326 for_each_possible_cpu(cpu)
327 spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock);
328
329 return 0;
330err:
331 percpu_ida_destroy(pool);
332 return -ENOMEM;
333}
Shaohua Lie26b53d2013-10-15 09:05:01 +0800334EXPORT_SYMBOL_GPL(__percpu_ida_init);
Shaohua Li7fc2ba12013-10-15 09:05:02 +0800335
336/**
337 * percpu_ida_for_each_free - iterate free ids of a pool
338 * @pool: pool to iterate
339 * @fn: interate callback function
340 * @data: parameter for @fn
341 *
342 * Note, this doesn't guarantee to iterate all free ids restrictly. Some free
343 * ids might be missed, some might be iterated duplicated, and some might
344 * be iterated and not free soon.
345 */
346int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
347 void *data)
348{
349 unsigned long flags;
350 struct percpu_ida_cpu *remote;
351 unsigned cpu, i, err = 0;
352
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200353 local_lock_irqsave(irq_off_lock, flags);
Shaohua Li7fc2ba12013-10-15 09:05:02 +0800354 for_each_possible_cpu(cpu) {
355 remote = per_cpu_ptr(pool->tag_cpu, cpu);
356 spin_lock(&remote->lock);
357 for (i = 0; i < remote->nr_free; i++) {
358 err = fn(remote->freelist[i], data);
359 if (err)
360 break;
361 }
362 spin_unlock(&remote->lock);
363 if (err)
364 goto out;
365 }
366
367 spin_lock(&pool->lock);
368 for (i = 0; i < pool->nr_free; i++) {
369 err = fn(pool->freelist[i], data);
370 if (err)
371 break;
372 }
373 spin_unlock(&pool->lock);
374out:
Sebastian Andrzej Siewiore4f99492014-04-09 11:58:17 +0200375 local_unlock_irqrestore(irq_off_lock, flags);
Shaohua Li7fc2ba12013-10-15 09:05:02 +0800376 return err;
377}
378EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
Shaohua Li1dddc012013-10-15 09:05:03 +0800379
380/**
381 * percpu_ida_free_tags - return free tags number of a specific cpu or global pool
382 * @pool: pool related
383 * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids
384 *
385 * Note: this just returns a snapshot of free tags number.
386 */
387unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu)
388{
389 struct percpu_ida_cpu *remote;
390 if (cpu == nr_cpu_ids)
391 return pool->nr_free;
392 remote = per_cpu_ptr(pool->tag_cpu, cpu);
393 return remote->nr_free;
394}
395EXPORT_SYMBOL_GPL(percpu_ida_free_tags);