blob: ac04f31a632e73a602d60ce5427ae53d8c97236c [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
Lucas De Marchi25985ed2011-03-30 22:57:33 -030015 * This work is based on the LPC-trie which is originally described in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
Justin P. Mattock631dd1a2010-10-18 11:03:14 +020019 * http://www.csc.kth.se/~snilsson/software/dyntrie2/
Robert Olsson19baf832005-06-21 12:43:18 -070020 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Jens Låås80b71b82009-08-28 23:57:15 -070051#define VERSION "0.409"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070054#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070055#include <linux/types.h>
56#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070057#include <linux/mm.h>
58#include <linux/string.h>
59#include <linux/socket.h>
60#include <linux/sockios.h>
61#include <linux/errno.h>
62#include <linux/in.h>
63#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080064#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070065#include <linux/netdevice.h>
66#include <linux/if_arp.h>
67#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070068#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070069#include <linux/skbuff.h>
70#include <linux/netlink.h>
71#include <linux/init.h>
72#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090073#include <linux/slab.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040074#include <linux/export.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020075#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070076#include <net/ip.h>
77#include <net/protocol.h>
78#include <net/route.h>
79#include <net/tcp.h>
80#include <net/sock.h>
81#include <net/ip_fib.h>
82#include "fib_lookup.h"
83
Robert Olsson06ef9212006-03-20 21:35:01 -080084#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070085
Robert Olsson19baf832005-06-21 12:43:18 -070086#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070087
Robert Olsson19baf832005-06-21 12:43:18 -070088typedef unsigned int t_key;
89
Alexander Duyck64c9b6f2014-12-31 10:55:35 -080090#define IS_TNODE(n) ((n)->bits)
91#define IS_LEAF(n) (!(n)->bits)
Robert Olsson2373ce12005-08-25 13:01:29 -070092
Alexander Duyck9f9e6362014-12-31 10:55:54 -080093#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
94#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
95
Alexander Duyck64c9b6f2014-12-31 10:55:35 -080096struct tnode {
97 t_key key;
98 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
99 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
100 struct tnode __rcu *parent;
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800101 struct rcu_head rcu;
Alexander Duyckadaf9812014-12-31 10:55:47 -0800102 union {
103 /* The fields in this struct are valid if bits > 0 (TNODE) */
104 struct {
105 unsigned int full_children; /* KEYLENGTH bits needed */
106 unsigned int empty_children; /* KEYLENGTH bits needed */
107 struct tnode __rcu *child[0];
108 };
109 /* This list pointer if valid if bits == 0 (LEAF) */
110 struct hlist_head list;
111 };
Robert Olsson19baf832005-06-21 12:43:18 -0700112};
113
114struct leaf_info {
115 struct hlist_node hlist;
116 int plen;
Eric Dumazet5c745012011-07-18 03:16:33 +0000117 u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
Robert Olsson19baf832005-06-21 12:43:18 -0700118 struct list_head falh;
Eric Dumazet5c745012011-07-18 03:16:33 +0000119 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700120};
121
Robert Olsson19baf832005-06-21 12:43:18 -0700122#ifdef CONFIG_IP_FIB_TRIE_STATS
123struct trie_use_stats {
124 unsigned int gets;
125 unsigned int backtrack;
126 unsigned int semantic_match_passed;
127 unsigned int semantic_match_miss;
128 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700129 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700130};
131#endif
132
133struct trie_stat {
134 unsigned int totdepth;
135 unsigned int maxdepth;
136 unsigned int tnodes;
137 unsigned int leaves;
138 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800139 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800140 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700141};
Robert Olsson19baf832005-06-21 12:43:18 -0700142
143struct trie {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800144 struct tnode __rcu *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700145#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800146 struct trie_use_stats __percpu *stats;
Robert Olsson19baf832005-06-21 12:43:18 -0700147#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700148};
149
Alexander Duyckadaf9812014-12-31 10:55:47 -0800150static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800151 int wasfull);
Alexander Duyckadaf9812014-12-31 10:55:47 -0800152static struct tnode *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700153static struct tnode *inflate(struct trie *t, struct tnode *tn);
154static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700155/* tnodes to free after resize(); protected by RTNL */
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800156static struct callback_head *tnode_free_head;
Jarek Poplawskic3059472009-07-14 08:33:08 +0000157static size_t tnode_free_size;
158
159/*
160 * synchronize_rcu after call_rcu for that many pages; it should be especially
161 * useful before resizing the root node with PREEMPT_NONE configs; the value was
162 * obtained experimentally, aiming to avoid visible slowdown.
163 */
164static const int sync_pages = 128;
Robert Olsson19baf832005-06-21 12:43:18 -0700165
Christoph Lametere18b8902006-12-06 20:33:20 -0800166static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800167static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700168
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800169/* caller must hold RTNL */
170#define node_parent(n) rtnl_dereference((n)->parent)
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700171
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800172/* caller must hold RCU read lock or RTNL */
173#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700174
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800175/* wrapper for rcu_assign_pointer */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800176static inline void node_set_parent(struct tnode *n, struct tnode *tp)
Stephen Hemminger06801912007-08-10 15:22:13 -0700177{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800178 if (n)
179 rcu_assign_pointer(n->parent, tp);
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800180}
181
182#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
183
184/* This provides us with the number of children in this node, in the case of a
185 * leaf this will return 0 meaning none of the children are accessible.
186 */
187static inline int tnode_child_length(const struct tnode *tn)
188{
189 return (1ul << tn->bits) & ~(1ul);
Stephen Hemminger06801912007-08-10 15:22:13 -0700190}
Robert Olsson2373ce12005-08-25 13:01:29 -0700191
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700192/*
193 * caller must hold RTNL
194 */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800195static inline struct tnode *tnode_get_child(const struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700196{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800197 BUG_ON(i >= tnode_child_length(tn));
Robert Olsson19baf832005-06-21 12:43:18 -0700198
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700199 return rtnl_dereference(tn->child[i]);
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800200}
201
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700202/*
203 * caller must hold RCU read lock or RTNL
204 */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800205static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800206{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800207 BUG_ON(i >= tnode_child_length(tn));
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800208
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700209 return rcu_dereference_rtnl(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700210}
211
David S. Miller3b004562011-02-16 14:56:22 -0800212static inline t_key mask_pfx(t_key k, unsigned int l)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700213{
214 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
215}
216
David S. Miller3b004562011-02-16 14:56:22 -0800217static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700218{
Olof Johansson91b9a272005-08-09 20:24:39 -0700219 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700220 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700221 else
Robert Olsson19baf832005-06-21 12:43:18 -0700222 return 0;
223}
224
225static inline int tkey_equals(t_key a, t_key b)
226{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700227 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700228}
229
230static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
231{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700232 if (bits == 0 || offset >= KEYLENGTH)
233 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700234 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
235 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700236}
Robert Olsson19baf832005-06-21 12:43:18 -0700237
238static inline int tkey_mismatch(t_key a, int offset, t_key b)
239{
240 t_key diff = a ^ b;
241 int i = offset;
242
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700243 if (!diff)
244 return 0;
245 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700246 i++;
247 return i;
248}
249
Robert Olsson19baf832005-06-21 12:43:18 -0700250/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900251 To understand this stuff, an understanding of keys and all their bits is
252 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700253 all of the bits in that key are significant.
254
255 Consider a node 'n' and its parent 'tp'.
256
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900257 If n is a leaf, every bit in its key is significant. Its presence is
258 necessitated by path compression, since during a tree traversal (when
259 searching for a leaf - unless we are doing an insertion) we will completely
260 ignore all skipped bits we encounter. Thus we need to verify, at the end of
261 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700262 correct key path.
263
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900264 Note that we can never "miss" the correct key in the tree if present by
265 following the wrong path. Path compression ensures that segments of the key
266 that are the same for all keys with a given prefix are skipped, but the
267 skipped part *is* identical for each node in the subtrie below the skipped
268 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700269 call to tkey_sub_equals() in trie_insert().
270
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900271 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700272 have many different meanings.
273
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900274 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700275 _________________________________________________________________
276 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
277 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900278 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700279
280 _________________________________________________________________
281 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
282 -----------------------------------------------------------------
283 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
284
285 tp->pos = 7
286 tp->bits = 3
287 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700288 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700289
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900290 First, let's just ignore the bits that come before the parent tp, that is
291 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700292 not use them for anything.
293
294 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900295 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700296 'n' among tp's children.
297
298 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
299 for the node n.
300
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900301 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700302 of the bits are really not needed or indeed known in n->key.
303
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900304 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700305 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900306
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700307
Robert Olsson19baf832005-06-21 12:43:18 -0700308 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
309 at this point.
310
311*/
312
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800313static const int halve_threshold = 25;
314static const int inflate_threshold = 50;
Jarek Poplawski345aa032009-07-07 19:39:16 -0700315static const int halve_threshold_root = 15;
Jens Låås80b71b82009-08-28 23:57:15 -0700316static const int inflate_threshold_root = 30;
Robert Olsson2373ce12005-08-25 13:01:29 -0700317
318static void __alias_free_mem(struct rcu_head *head)
319{
320 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
321 kmem_cache_free(fn_alias_kmem, fa);
322}
323
324static inline void alias_free_mem_rcu(struct fib_alias *fa)
325{
326 call_rcu(&fa->rcu, __alias_free_mem);
327}
328
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800329#define TNODE_KMALLOC_MAX \
Alexander Duyckadaf9812014-12-31 10:55:47 -0800330 ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *))
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800331
332static void __node_free_rcu(struct rcu_head *head)
Robert Olsson2373ce12005-08-25 13:01:29 -0700333{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800334 struct tnode *n = container_of(head, struct tnode, rcu);
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800335
336 if (IS_LEAF(n))
337 kmem_cache_free(trie_leaf_kmem, n);
338 else if (n->bits <= TNODE_KMALLOC_MAX)
339 kfree(n);
340 else
341 vfree(n);
Robert Olsson2373ce12005-08-25 13:01:29 -0700342}
343
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800344#define node_free(n) call_rcu(&n->rcu, __node_free_rcu)
Stephen Hemminger387a5482008-04-10 03:47:34 -0700345
Robert Olsson2373ce12005-08-25 13:01:29 -0700346static inline void free_leaf_info(struct leaf_info *leaf)
347{
Lai Jiangshanbceb0f42011-03-18 11:42:34 +0800348 kfree_rcu(leaf, rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700349}
350
Eric Dumazet8d965442008-01-13 22:31:44 -0800351static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700352{
Robert Olsson2373ce12005-08-25 13:01:29 -0700353 if (size <= PAGE_SIZE)
Eric Dumazet8d965442008-01-13 22:31:44 -0800354 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700355 else
Eric Dumazet7a1c8e52010-11-20 07:46:35 +0000356 return vzalloc(size);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700357}
Robert Olsson2373ce12005-08-25 13:01:29 -0700358
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700359static void tnode_free_safe(struct tnode *tn)
360{
361 BUG_ON(IS_LEAF(tn));
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800362 tn->rcu.next = tnode_free_head;
363 tnode_free_head = &tn->rcu;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700364}
365
366static void tnode_free_flush(void)
367{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800368 struct callback_head *head;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700369
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800370 while ((head = tnode_free_head)) {
371 struct tnode *tn = container_of(head, struct tnode, rcu);
372
373 tnode_free_head = head->next;
374 tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
375
376 node_free(tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700377 }
Jarek Poplawskic3059472009-07-14 08:33:08 +0000378
379 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
380 tnode_free_size = 0;
381 synchronize_rcu();
382 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700383}
384
Alexander Duyckadaf9812014-12-31 10:55:47 -0800385static struct tnode *leaf_new(t_key key)
Robert Olsson19baf832005-06-21 12:43:18 -0700386{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800387 struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700388 if (l) {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800389 l->parent = NULL;
390 /* set key and pos to reflect full key value
391 * any trailing zeros in the key should be ignored
392 * as the nodes are searched
393 */
394 l->key = key;
395 l->pos = KEYLENGTH;
396 /* set bits to 0 indicating we are not a tnode */
397 l->bits = 0;
398
Robert Olsson19baf832005-06-21 12:43:18 -0700399 INIT_HLIST_HEAD(&l->list);
400 }
401 return l;
402}
403
404static struct leaf_info *leaf_info_new(int plen)
405{
406 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700407 if (li) {
408 li->plen = plen;
Eric Dumazet5c745012011-07-18 03:16:33 +0000409 li->mask_plen = ntohl(inet_make_mask(plen));
Robert Olsson2373ce12005-08-25 13:01:29 -0700410 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700411 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700412 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700413}
414
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800415static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700416{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800417 size_t sz = offsetof(struct tnode, child[1 << bits]);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700418 struct tnode *tn = tnode_alloc(sz);
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800419 unsigned int shift = pos + bits;
420
421 /* verify bits and pos their msb bits clear and values are valid */
422 BUG_ON(!bits || (shift > KEYLENGTH));
Robert Olsson19baf832005-06-21 12:43:18 -0700423
Olof Johansson91b9a272005-08-09 20:24:39 -0700424 if (tn) {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800425 tn->parent = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700426 tn->pos = pos;
427 tn->bits = bits;
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800428 tn->key = mask_pfx(key, pos);
Robert Olsson19baf832005-06-21 12:43:18 -0700429 tn->full_children = 0;
430 tn->empty_children = 1<<bits;
431 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700432
Eric Dumazeta034ee32010-09-09 23:32:28 +0000433 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
Alexander Duyckadaf9812014-12-31 10:55:47 -0800434 sizeof(struct tnode *) << bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700435 return tn;
436}
437
Robert Olsson19baf832005-06-21 12:43:18 -0700438/*
439 * Check whether a tnode 'n' is "full", i.e. it is an internal node
440 * and no bits are skipped. See discussion in dyntree paper p. 6
441 */
442
Alexander Duyckadaf9812014-12-31 10:55:47 -0800443static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700444{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800445 return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700446}
447
Lin Ming61648d92012-07-29 02:00:03 +0000448static inline void put_child(struct tnode *tn, int i,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800449 struct tnode *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700450{
451 tnode_put_child_reorg(tn, i, n, -1);
452}
453
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700454 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700455 * Add a child at position i overwriting the old value.
456 * Update the value of full_children and empty_children.
457 */
458
Alexander Duyckadaf9812014-12-31 10:55:47 -0800459static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800460 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700461{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800462 struct tnode *chi = rtnl_dereference(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700463 int isfull;
464
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700465 BUG_ON(i >= 1<<tn->bits);
466
Robert Olsson19baf832005-06-21 12:43:18 -0700467 /* update emptyChildren */
468 if (n == NULL && chi != NULL)
469 tn->empty_children++;
470 else if (n != NULL && chi == NULL)
471 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700472
Robert Olsson19baf832005-06-21 12:43:18 -0700473 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700474 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700475 wasfull = tnode_full(tn, chi);
476
477 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700478 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700479 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700480 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700481 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700482
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800483 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700484
Eric Dumazetcf778b02012-01-12 04:41:32 +0000485 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700486}
487
Jens Låås80b71b82009-08-28 23:57:15 -0700488#define MAX_WORK 10
Alexander Duyckadaf9812014-12-31 10:55:47 -0800489static struct tnode *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700490{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800491 struct tnode *old_tn, *n = NULL;
Robert Olssone6308be2005-10-04 13:01:58 -0700492 int inflate_threshold_use;
493 int halve_threshold_use;
Jens Låås80b71b82009-08-28 23:57:15 -0700494 int max_work;
Robert Olsson19baf832005-06-21 12:43:18 -0700495
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900496 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700497 return NULL;
498
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700499 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
500 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700501
502 /* No children */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800503 if (tn->empty_children > (tnode_child_length(tn) - 1))
504 goto no_children;
505
Robert Olsson19baf832005-06-21 12:43:18 -0700506 /* One child */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800507 if (tn->empty_children == (tnode_child_length(tn) - 1))
Jens Låås80b71b82009-08-28 23:57:15 -0700508 goto one_child;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700509 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700510 * Double as long as the resulting node has a number of
511 * nonempty nodes that are above the threshold.
512 */
513
514 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700515 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
516 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700517 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700518 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700519 * children in the *doubled* node is at least 'high'."
520 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700521 * 'high' in this instance is the variable 'inflate_threshold'. It
522 * is expressed as a percentage, so we multiply it with
523 * tnode_child_length() and instead of multiplying by 2 (since the
524 * child array will be doubled by inflate()) and multiplying
525 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700526 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700527 *
528 * The left-hand side may look a bit weird: tnode_child_length(tn)
529 * - tn->empty_children is of course the number of non-null children
530 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700531 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700532 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700533 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700534 *
Robert Olsson19baf832005-06-21 12:43:18 -0700535 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 *
Robert Olsson19baf832005-06-21 12:43:18 -0700537 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700538 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700539 * tn->full_children;
540 *
541 * new_child_length = tnode_child_length(tn) * 2;
542 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700543 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700544 * new_child_length;
545 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700546 *
547 * ...and so on, tho it would mess up the while () loop.
548 *
Robert Olsson19baf832005-06-21 12:43:18 -0700549 * anyway,
550 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
551 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700552 *
Robert Olsson19baf832005-06-21 12:43:18 -0700553 * avoid a division:
554 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
555 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700556 *
Robert Olsson19baf832005-06-21 12:43:18 -0700557 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700558 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700559 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700560 *
Robert Olsson19baf832005-06-21 12:43:18 -0700561 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700562 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700563 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700564 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700565 *
Robert Olsson19baf832005-06-21 12:43:18 -0700566 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700567 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700568 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700569 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700570 *
Robert Olsson19baf832005-06-21 12:43:18 -0700571 */
572
Robert Olssone6308be2005-10-04 13:01:58 -0700573 /* Keep root node larger */
574
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800575 if (!node_parent(tn)) {
Jens Låås80b71b82009-08-28 23:57:15 -0700576 inflate_threshold_use = inflate_threshold_root;
577 halve_threshold_use = halve_threshold_root;
Eric Dumazeta034ee32010-09-09 23:32:28 +0000578 } else {
Robert Olssone6308be2005-10-04 13:01:58 -0700579 inflate_threshold_use = inflate_threshold;
Jens Låås80b71b82009-08-28 23:57:15 -0700580 halve_threshold_use = halve_threshold;
581 }
Robert Olssone6308be2005-10-04 13:01:58 -0700582
Jens Låås80b71b82009-08-28 23:57:15 -0700583 max_work = MAX_WORK;
584 while ((tn->full_children > 0 && max_work-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800585 50 * (tn->full_children + tnode_child_length(tn)
586 - tn->empty_children)
587 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700588
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700589 old_tn = tn;
590 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800591
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700592 if (IS_ERR(tn)) {
593 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700594#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800595 this_cpu_inc(t->stats->resize_node_skipped);
Robert Olsson2f368952005-07-05 15:02:40 -0700596#endif
597 break;
598 }
Robert Olsson19baf832005-06-21 12:43:18 -0700599 }
600
Jens Låås80b71b82009-08-28 23:57:15 -0700601 /* Return if at least one inflate is run */
Eric Dumazeta034ee32010-09-09 23:32:28 +0000602 if (max_work != MAX_WORK)
Alexander Duyckadaf9812014-12-31 10:55:47 -0800603 return tn;
Jens Låås80b71b82009-08-28 23:57:15 -0700604
Robert Olsson19baf832005-06-21 12:43:18 -0700605 /*
606 * Halve as long as the number of empty children in this
607 * node is above threshold.
608 */
Robert Olsson2f368952005-07-05 15:02:40 -0700609
Jens Låås80b71b82009-08-28 23:57:15 -0700610 max_work = MAX_WORK;
611 while (tn->bits > 1 && max_work-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700612 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700613 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700614
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700615 old_tn = tn;
616 tn = halve(t, tn);
617 if (IS_ERR(tn)) {
618 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700619#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800620 this_cpu_inc(t->stats->resize_node_skipped);
Robert Olsson2f368952005-07-05 15:02:40 -0700621#endif
622 break;
623 }
624 }
625
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700626
Robert Olsson19baf832005-06-21 12:43:18 -0700627 /* Only one child remains */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800628 if (tn->empty_children == (tnode_child_length(tn) - 1)) {
629 unsigned long i;
Jens Låås80b71b82009-08-28 23:57:15 -0700630one_child:
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800631 for (i = tnode_child_length(tn); !n && i;)
632 n = tnode_get_child(tn, --i);
633no_children:
634 /* compress one level */
635 node_set_parent(n, NULL);
636 tnode_free_safe(tn);
637 return n;
Jens Låås80b71b82009-08-28 23:57:15 -0700638 }
Alexander Duyckadaf9812014-12-31 10:55:47 -0800639 return tn;
Robert Olsson19baf832005-06-21 12:43:18 -0700640}
641
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700642
643static void tnode_clean_free(struct tnode *tn)
644{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800645 struct tnode *tofree;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700646 int i;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700647
648 for (i = 0; i < tnode_child_length(tn); i++) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800649 tofree = rtnl_dereference(tn->child[i]);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700650 if (tofree)
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800651 node_free(tofree);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700652 }
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800653 node_free(tn);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700654}
655
Alexander Duyckadaf9812014-12-31 10:55:47 -0800656static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
Robert Olsson19baf832005-06-21 12:43:18 -0700657{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800658 int olen = tnode_child_length(oldtnode);
659 struct tnode *tn;
Robert Olsson19baf832005-06-21 12:43:18 -0700660 int i;
661
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700662 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700663
664 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
665
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700666 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700667 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700668
669 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700670 * Preallocate and store tnodes before the actual work so we
671 * don't get into an inconsistent state if memory allocation
672 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700673 * of tnode is ignored.
674 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700675
676 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800677 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700678
Alexander Duyckadaf9812014-12-31 10:55:47 -0800679 inode = tnode_get_child(oldtnode, i);
680 if (tnode_full(oldtnode, inode) && inode->bits > 1) {
Robert Olsson2f368952005-07-05 15:02:40 -0700681 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700682 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700683
Robert Olsson2f368952005-07-05 15:02:40 -0700684 left = tnode_new(inode->key&(~m), inode->pos + 1,
685 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700686 if (!left)
687 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700688
Robert Olsson2f368952005-07-05 15:02:40 -0700689 right = tnode_new(inode->key|m, inode->pos + 1,
690 inode->bits - 1);
691
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900692 if (!right) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800693 node_free(left);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700694 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900695 }
Robert Olsson2f368952005-07-05 15:02:40 -0700696
Alexander Duyckadaf9812014-12-31 10:55:47 -0800697 put_child(tn, 2*i, left);
698 put_child(tn, 2*i+1, right);
Robert Olsson2f368952005-07-05 15:02:40 -0700699 }
700 }
701
Olof Johansson91b9a272005-08-09 20:24:39 -0700702 for (i = 0; i < olen; i++) {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800703 struct tnode *inode = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700704 struct tnode *left, *right;
705 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700706
Robert Olsson19baf832005-06-21 12:43:18 -0700707 /* An empty child */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800708 if (inode == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -0700709 continue;
710
711 /* A leaf or an internal node with skipped bits */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800712 if (!tnode_full(oldtnode, inode)) {
baker.zhangbbe34cf2013-10-01 07:45:09 +0800713 put_child(tn,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800714 tkey_extract_bits(inode->key, tn->pos, tn->bits),
715 inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700716 continue;
717 }
718
719 /* An internal node with two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700720 if (inode->bits == 1) {
Lin Ming61648d92012-07-29 02:00:03 +0000721 put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
722 put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
Robert Olsson19baf832005-06-21 12:43:18 -0700723
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700724 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700725 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700726 }
727
Olof Johansson91b9a272005-08-09 20:24:39 -0700728 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700729
Olof Johansson91b9a272005-08-09 20:24:39 -0700730 /* We will replace this node 'inode' with two new
731 * ones, 'left' and 'right', each with half of the
732 * original children. The two new nodes will have
733 * a position one bit further down the key and this
734 * means that the "significant" part of their keys
735 * (see the discussion near the top of this file)
736 * will differ by one bit, which will be "0" in
737 * left's key and "1" in right's key. Since we are
738 * moving the key position by one step, the bit that
739 * we are moving away from - the bit at position
740 * (inode->pos) - is the one that will differ between
741 * left and right. So... we synthesize that bit in the
742 * two new keys.
743 * The mask 'm' below will be a single "one" bit at
744 * the position (inode->pos)
745 */
Robert Olsson19baf832005-06-21 12:43:18 -0700746
Olof Johansson91b9a272005-08-09 20:24:39 -0700747 /* Use the old key, but set the new significant
748 * bit to zero.
749 */
Robert Olsson19baf832005-06-21 12:43:18 -0700750
Alexander Duyckadaf9812014-12-31 10:55:47 -0800751 left = tnode_get_child(tn, 2*i);
Lin Ming61648d92012-07-29 02:00:03 +0000752 put_child(tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700753
Olof Johansson91b9a272005-08-09 20:24:39 -0700754 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700755
Alexander Duyckadaf9812014-12-31 10:55:47 -0800756 right = tnode_get_child(tn, 2*i+1);
Lin Ming61648d92012-07-29 02:00:03 +0000757 put_child(tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700758
Olof Johansson91b9a272005-08-09 20:24:39 -0700759 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700760
Olof Johansson91b9a272005-08-09 20:24:39 -0700761 size = tnode_child_length(left);
762 for (j = 0; j < size; j++) {
Lin Ming61648d92012-07-29 02:00:03 +0000763 put_child(left, j, rtnl_dereference(inode->child[j]));
764 put_child(right, j, rtnl_dereference(inode->child[j + size]));
Robert Olsson19baf832005-06-21 12:43:18 -0700765 }
Lin Ming61648d92012-07-29 02:00:03 +0000766 put_child(tn, 2*i, resize(t, left));
767 put_child(tn, 2*i+1, resize(t, right));
Olof Johansson91b9a272005-08-09 20:24:39 -0700768
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700769 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700770 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700771 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700772 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700773nomem:
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700774 tnode_clean_free(tn);
775 return ERR_PTR(-ENOMEM);
Robert Olsson19baf832005-06-21 12:43:18 -0700776}
777
Alexander Duyckadaf9812014-12-31 10:55:47 -0800778static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
Robert Olsson19baf832005-06-21 12:43:18 -0700779{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800780 int olen = tnode_child_length(oldtnode);
781 struct tnode *tn, *left, *right;
Robert Olsson19baf832005-06-21 12:43:18 -0700782 int i;
Robert Olsson19baf832005-06-21 12:43:18 -0700783
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700784 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700785
786 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700787
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700788 if (!tn)
789 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700790
791 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700792 * Preallocate and store tnodes before the actual work so we
793 * don't get into an inconsistent state if memory allocation
794 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700795 * of tnode is ignored.
796 */
797
Olof Johansson91b9a272005-08-09 20:24:39 -0700798 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700799 left = tnode_get_child(oldtnode, i);
800 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700801
Robert Olsson2f368952005-07-05 15:02:40 -0700802 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700803 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700804 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700805
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700806 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700807
808 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700809 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700810
Alexander Duyckadaf9812014-12-31 10:55:47 -0800811 put_child(tn, i/2, newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700812 }
Robert Olsson2f368952005-07-05 15:02:40 -0700813
Robert Olsson2f368952005-07-05 15:02:40 -0700814 }
Robert Olsson19baf832005-06-21 12:43:18 -0700815
Olof Johansson91b9a272005-08-09 20:24:39 -0700816 for (i = 0; i < olen; i += 2) {
817 struct tnode *newBinNode;
818
Robert Olsson19baf832005-06-21 12:43:18 -0700819 left = tnode_get_child(oldtnode, i);
820 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700821
Robert Olsson19baf832005-06-21 12:43:18 -0700822 /* At least one of the children is empty */
823 if (left == NULL) {
824 if (right == NULL) /* Both are empty */
825 continue;
Lin Ming61648d92012-07-29 02:00:03 +0000826 put_child(tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700827 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700828 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700829
830 if (right == NULL) {
Lin Ming61648d92012-07-29 02:00:03 +0000831 put_child(tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700832 continue;
833 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700834
Robert Olsson19baf832005-06-21 12:43:18 -0700835 /* Two nonempty children */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800836 newBinNode = tnode_get_child(tn, i/2);
Lin Ming61648d92012-07-29 02:00:03 +0000837 put_child(tn, i/2, NULL);
838 put_child(newBinNode, 0, left);
839 put_child(newBinNode, 1, right);
840 put_child(tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700841 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700842 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700843 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700844nomem:
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700845 tnode_clean_free(tn);
846 return ERR_PTR(-ENOMEM);
Robert Olsson19baf832005-06-21 12:43:18 -0700847}
848
Robert Olsson772cb712005-09-19 15:31:18 -0700849/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700850 via get_fa_head and dump */
851
Alexander Duyckadaf9812014-12-31 10:55:47 -0800852static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700853{
Robert Olsson772cb712005-09-19 15:31:18 -0700854 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700855 struct leaf_info *li;
856
Sasha Levinb67bfe02013-02-27 17:06:00 -0800857 hlist_for_each_entry_rcu(li, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700858 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700859 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700860
Robert Olsson19baf832005-06-21 12:43:18 -0700861 return NULL;
862}
863
Alexander Duyckadaf9812014-12-31 10:55:47 -0800864static inline struct list_head *get_fa_head(struct tnode *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700865{
Robert Olsson772cb712005-09-19 15:31:18 -0700866 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700867
Olof Johansson91b9a272005-08-09 20:24:39 -0700868 if (!li)
869 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700870
Olof Johansson91b9a272005-08-09 20:24:39 -0700871 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700872}
873
874static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
875{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900876 struct leaf_info *li = NULL, *last = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700877
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900878 if (hlist_empty(head)) {
879 hlist_add_head_rcu(&new->hlist, head);
880 } else {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800881 hlist_for_each_entry(li, head, hlist) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900882 if (new->plen > li->plen)
883 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700884
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900885 last = li;
886 }
887 if (last)
Ken Helias1d023282014-08-06 16:09:16 -0700888 hlist_add_behind_rcu(&new->hlist, &last->hlist);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900889 else
890 hlist_add_before_rcu(&new->hlist, &li->hlist);
891 }
Robert Olsson19baf832005-06-21 12:43:18 -0700892}
893
Robert Olsson2373ce12005-08-25 13:01:29 -0700894/* rcu_read_lock needs to be hold by caller from readside */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800895static struct tnode *fib_find_node(struct trie *t, u32 key)
Robert Olsson19baf832005-06-21 12:43:18 -0700896{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800897 struct tnode *n = rcu_dereference_rtnl(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700898
Alexander Duyck939afb02014-12-31 10:56:00 -0800899 while (n) {
900 unsigned long index = get_index(key, n);
901
902 /* This bit of code is a bit tricky but it combines multiple
903 * checks into a single check. The prefix consists of the
904 * prefix plus zeros for the bits in the cindex. The index
905 * is the difference between the key and this value. From
906 * this we can actually derive several pieces of data.
907 * if !(index >> bits)
908 * we know the value is cindex
909 * else
910 * we have a mismatch in skip bits and failed
911 */
912 if (index >> n->bits)
913 return NULL;
914
915 /* we have found a leaf. Prefixes have already been compared */
916 if (IS_LEAF(n))
Robert Olsson19baf832005-06-21 12:43:18 -0700917 break;
Alexander Duyck939afb02014-12-31 10:56:00 -0800918
919 n = rcu_dereference_rtnl(n->child[index]);
Robert Olsson19baf832005-06-21 12:43:18 -0700920 }
Robert Olsson19baf832005-06-21 12:43:18 -0700921
Alexander Duyck939afb02014-12-31 10:56:00 -0800922 return n;
Robert Olsson19baf832005-06-21 12:43:18 -0700923}
924
Jarek Poplawski7b855762009-06-18 00:28:51 -0700925static void trie_rebalance(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700926{
Robert Olsson19baf832005-06-21 12:43:18 -0700927 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -0700928 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -0700929 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700930
Robert Olsson3ed18d72009-05-21 15:20:59 -0700931 key = tn->key;
932
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800933 while (tn != NULL && (tp = node_parent(tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700934 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
935 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Alexander Duyckadaf9812014-12-31 10:55:47 -0800936 tn = resize(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800937
Alexander Duyckadaf9812014-12-31 10:55:47 -0800938 tnode_put_child_reorg(tp, cindex, tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700939
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800940 tp = node_parent(tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -0700941 if (!tp)
Alexander Duyckadaf9812014-12-31 10:55:47 -0800942 rcu_assign_pointer(t->trie, tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -0700943
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700944 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -0700945 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -0700946 break;
Stephen Hemminger06801912007-08-10 15:22:13 -0700947 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700948 }
Stephen Hemminger06801912007-08-10 15:22:13 -0700949
Robert Olsson19baf832005-06-21 12:43:18 -0700950 /* Handle last (top) tnode */
Jarek Poplawski7b855762009-06-18 00:28:51 -0700951 if (IS_TNODE(tn))
Alexander Duyckadaf9812014-12-31 10:55:47 -0800952 tn = resize(t, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700953
Alexander Duyckadaf9812014-12-31 10:55:47 -0800954 rcu_assign_pointer(t->trie, tn);
Jarek Poplawski7b855762009-06-18 00:28:51 -0700955 tnode_free_flush();
Robert Olsson19baf832005-06-21 12:43:18 -0700956}
957
Robert Olsson2373ce12005-08-25 13:01:29 -0700958/* only used from updater-side */
959
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -0800960static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700961{
962 int pos, newpos;
963 struct tnode *tp = NULL, *tn = NULL;
Alexander Duyckadaf9812014-12-31 10:55:47 -0800964 struct tnode *n;
965 struct tnode *l;
Robert Olsson19baf832005-06-21 12:43:18 -0700966 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700967 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700968 struct leaf_info *li;
969 t_key cindex;
970
971 pos = 0;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700972 n = rtnl_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700973
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700974 /* If we point to NULL, stop. Either the tree is empty and we should
975 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -0700976 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700977 * If we point to a T_TNODE, check if it matches our key. Note that
978 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -0700979 * not be the parent's 'pos'+'bits'!
980 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700981 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -0700982 * the index from our key, push the T_TNODE and walk the tree.
983 *
984 * If it doesn't, we have to replace it with a new T_TNODE.
985 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700986 * If we point to a T_LEAF, it might or might not have the same key
987 * as we do. If it does, just change the value, update the T_LEAF's
988 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -0700989 * If it doesn't, we need to replace it with a T_TNODE.
990 */
991
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800992 while (n && IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800993 if (tkey_sub_equals(n->key, pos, n->pos-pos, key)) {
994 tp = n;
995 pos = n->pos + n->bits;
996 n = tnode_get_child(n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800997 tkey_extract_bits(key,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800998 n->pos,
999 n->bits));
Robert Olsson19baf832005-06-21 12:43:18 -07001000
Alexander Duyckadaf9812014-12-31 10:55:47 -08001001 BUG_ON(n && node_parent(n) != tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001002 } else
Robert Olsson19baf832005-06-21 12:43:18 -07001003 break;
1004 }
1005
1006 /*
1007 * n ----> NULL, LEAF or TNODE
1008 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001009 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001010 */
1011
Olof Johansson91b9a272005-08-09 20:24:39 -07001012 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001013
1014 /* Case 1: n is a leaf. Compare prefixes */
1015
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001016 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001017 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001018
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001019 if (!li)
1020 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001021
1022 fa_head = &li->falh;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001023 insert_leaf_info(&n->list, li);
Robert Olsson19baf832005-06-21 12:43:18 -07001024 goto done;
1025 }
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001026 l = leaf_new(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001027
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001028 if (!l)
1029 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001030
Robert Olsson19baf832005-06-21 12:43:18 -07001031 li = leaf_info_new(plen);
1032
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001033 if (!li) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001034 node_free(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001035 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001036 }
Robert Olsson19baf832005-06-21 12:43:18 -07001037
1038 fa_head = &li->falh;
1039 insert_leaf_info(&l->list, li);
1040
Robert Olsson19baf832005-06-21 12:43:18 -07001041 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001042 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001043
Alexander Duyckadaf9812014-12-31 10:55:47 -08001044 node_set_parent(l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001045
Olof Johansson91b9a272005-08-09 20:24:39 -07001046 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001047 put_child(tp, cindex, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001048 } else {
1049 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001050 /*
1051 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001052 * first tnode need some special handling
1053 */
1054
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001055 if (n) {
baker.zhang4c60f1d2013-10-08 11:36:51 +08001056 pos = tp ? tp->pos+tp->bits : 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001057 newpos = tkey_mismatch(key, pos, n->key);
1058 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001059 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001060 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001061 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001062 }
Robert Olsson19baf832005-06-21 12:43:18 -07001063
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001064 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001065 free_leaf_info(li);
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001066 node_free(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001067 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001068 }
1069
Alexander Duyckadaf9812014-12-31 10:55:47 -08001070 node_set_parent(tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001071
Olof Johansson91b9a272005-08-09 20:24:39 -07001072 missbit = tkey_extract_bits(key, newpos, 1);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001073 put_child(tn, missbit, l);
Lin Ming61648d92012-07-29 02:00:03 +00001074 put_child(tn, 1-missbit, n);
Robert Olsson19baf832005-06-21 12:43:18 -07001075
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001076 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001077 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001078 put_child(tp, cindex, tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001079 } else {
Alexander Duyckadaf9812014-12-31 10:55:47 -08001080 rcu_assign_pointer(t->trie, tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001081 }
Alexander Duycke962f302014-12-10 21:49:22 -08001082
1083 tp = tn;
Robert Olsson19baf832005-06-21 12:43:18 -07001084 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001085
1086 if (tp && tp->pos + tp->bits > 32)
Joe Perches058bd4d2012-03-11 18:36:11 +00001087 pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1088 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001089
Robert Olsson19baf832005-06-21 12:43:18 -07001090 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001091
Jarek Poplawski7b855762009-06-18 00:28:51 -07001092 trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001093done:
Robert Olsson19baf832005-06-21 12:43:18 -07001094 return fa_head;
1095}
1096
Robert Olssond562f1f2007-03-26 14:22:22 -07001097/*
1098 * Caller must hold RTNL.
1099 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001100int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001101{
1102 struct trie *t = (struct trie *) tb->tb_data;
1103 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001104 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001105 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001106 int plen = cfg->fc_dst_len;
1107 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001108 u32 key, mask;
1109 int err;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001110 struct tnode *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001111
1112 if (plen > 32)
1113 return -EINVAL;
1114
Thomas Graf4e902c52006-08-17 18:14:52 -07001115 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001116
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001117 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001118
Olof Johansson91b9a272005-08-09 20:24:39 -07001119 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001120
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001121 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001122 return -EINVAL;
1123
1124 key = key & mask;
1125
Thomas Graf4e902c52006-08-17 18:14:52 -07001126 fi = fib_create_info(cfg);
1127 if (IS_ERR(fi)) {
1128 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001129 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001130 }
Robert Olsson19baf832005-06-21 12:43:18 -07001131
1132 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001133 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001134
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001135 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001136 fa_head = get_fa_head(l, plen);
1137 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1138 }
1139
1140 /* Now fa, if non-NULL, points to the first fib alias
1141 * with the same keys [prefix,tos,priority], if such key already
1142 * exists or to the node before which we will insert new one.
1143 *
1144 * If fa is NULL, we will need to allocate a new one and
1145 * insert to the head of f.
1146 *
1147 * If f is NULL, no fib node matched the destination key
1148 * and we need to allocate a new one of those as well.
1149 */
1150
Julian Anastasov936f6f82008-01-28 21:18:06 -08001151 if (fa && fa->fa_tos == tos &&
1152 fa->fa_info->fib_priority == fi->fib_priority) {
1153 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001154
1155 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001156 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001157 goto out;
1158
Julian Anastasov936f6f82008-01-28 21:18:06 -08001159 /* We have 2 goals:
1160 * 1. Find exact match for type, scope, fib_info to avoid
1161 * duplicate routes
1162 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1163 */
1164 fa_match = NULL;
1165 fa_first = fa;
1166 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1167 list_for_each_entry_continue(fa, fa_head, fa_list) {
1168 if (fa->fa_tos != tos)
1169 break;
1170 if (fa->fa_info->fib_priority != fi->fib_priority)
1171 break;
1172 if (fa->fa_type == cfg->fc_type &&
Julian Anastasov936f6f82008-01-28 21:18:06 -08001173 fa->fa_info == fi) {
1174 fa_match = fa;
1175 break;
1176 }
1177 }
1178
Thomas Graf4e902c52006-08-17 18:14:52 -07001179 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001180 struct fib_info *fi_drop;
1181 u8 state;
1182
Julian Anastasov936f6f82008-01-28 21:18:06 -08001183 fa = fa_first;
1184 if (fa_match) {
1185 if (fa == fa_match)
1186 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001187 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001188 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001189 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001190 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001191 if (new_fa == NULL)
1192 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001193
1194 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001195 new_fa->fa_tos = fa->fa_tos;
1196 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001197 new_fa->fa_type = cfg->fc_type;
Robert Olsson19baf832005-06-21 12:43:18 -07001198 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001199 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001200
Robert Olsson2373ce12005-08-25 13:01:29 -07001201 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1202 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001203
1204 fib_release_info(fi_drop);
1205 if (state & FA_S_ACCESSED)
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001206 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Milan Kocianb8f55832007-05-23 14:55:06 -07001207 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1208 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001209
Olof Johansson91b9a272005-08-09 20:24:39 -07001210 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001211 }
1212 /* Error if we find a perfect match which
1213 * uses the same scope, type, and nexthop
1214 * information.
1215 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001216 if (fa_match)
1217 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001218
Thomas Graf4e902c52006-08-17 18:14:52 -07001219 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001220 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001221 }
1222 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001223 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001224 goto out;
1225
1226 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001227 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001228 if (new_fa == NULL)
1229 goto out;
1230
1231 new_fa->fa_info = fi;
1232 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001233 new_fa->fa_type = cfg->fc_type;
Robert Olsson19baf832005-06-21 12:43:18 -07001234 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001235 /*
1236 * Insert new entry to the list.
1237 */
1238
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001239 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001240 fa_head = fib_insert_node(t, key, plen);
1241 if (unlikely(!fa_head)) {
1242 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001243 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001244 }
Robert Olssonf835e472005-06-28 15:00:39 -07001245 }
Robert Olsson19baf832005-06-21 12:43:18 -07001246
David S. Miller21d8c492011-04-14 14:49:37 -07001247 if (!plen)
1248 tb->tb_num_default++;
1249
Robert Olsson2373ce12005-08-25 13:01:29 -07001250 list_add_tail_rcu(&new_fa->fa_list,
1251 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001252
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001253 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Thomas Graf4e902c52006-08-17 18:14:52 -07001254 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001255 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001256succeeded:
1257 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001258
1259out_free_new_fa:
1260 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001261out:
1262 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001263err:
Robert Olsson19baf832005-06-21 12:43:18 -07001264 return err;
1265}
1266
Robert Olsson772cb712005-09-19 15:31:18 -07001267/* should be called with rcu_read_lock */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001268static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
David S. Miller22bd5b92011-03-11 19:54:08 -05001269 t_key key, const struct flowi4 *flp,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001270 struct fib_result *res, int fib_flags)
Robert Olsson19baf832005-06-21 12:43:18 -07001271{
Robert Olsson19baf832005-06-21 12:43:18 -07001272 struct leaf_info *li;
1273 struct hlist_head *hhead = &l->list;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001274
Sasha Levinb67bfe02013-02-27 17:06:00 -08001275 hlist_for_each_entry_rcu(li, hhead, hlist) {
David S. Miller3be06862011-03-07 15:01:10 -08001276 struct fib_alias *fa;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001277
Eric Dumazet5c745012011-07-18 03:16:33 +00001278 if (l->key != (key & li->mask_plen))
Robert Olsson19baf832005-06-21 12:43:18 -07001279 continue;
1280
David S. Miller3be06862011-03-07 15:01:10 -08001281 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1282 struct fib_info *fi = fa->fa_info;
1283 int nhsel, err;
1284
David S. Miller22bd5b92011-03-11 19:54:08 -05001285 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
David S. Miller3be06862011-03-07 15:01:10 -08001286 continue;
David S. Millerdccd9ecc2012-05-10 22:16:32 -04001287 if (fi->fib_dead)
1288 continue;
David S. Miller37e826c2011-03-24 18:06:47 -07001289 if (fa->fa_info->fib_scope < flp->flowi4_scope)
David S. Miller3be06862011-03-07 15:01:10 -08001290 continue;
1291 fib_alias_accessed(fa);
1292 err = fib_props[fa->fa_type].error;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001293 if (unlikely(err < 0)) {
David S. Miller3be06862011-03-07 15:01:10 -08001294#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001295 this_cpu_inc(t->stats->semantic_match_passed);
David S. Miller3be06862011-03-07 15:01:10 -08001296#endif
Julian Anastasov1fbc7842011-03-25 20:33:23 -07001297 return err;
David S. Miller3be06862011-03-07 15:01:10 -08001298 }
1299 if (fi->fib_flags & RTNH_F_DEAD)
1300 continue;
1301 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1302 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1303
1304 if (nh->nh_flags & RTNH_F_DEAD)
1305 continue;
David S. Miller22bd5b92011-03-11 19:54:08 -05001306 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
David S. Miller3be06862011-03-07 15:01:10 -08001307 continue;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001308
Robert Olsson19baf832005-06-21 12:43:18 -07001309#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001310 this_cpu_inc(t->stats->semantic_match_passed);
Robert Olsson19baf832005-06-21 12:43:18 -07001311#endif
Eric Dumazet5c745012011-07-18 03:16:33 +00001312 res->prefixlen = li->plen;
David S. Miller3be06862011-03-07 15:01:10 -08001313 res->nh_sel = nhsel;
1314 res->type = fa->fa_type;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001315 res->scope = fi->fib_scope;
David S. Miller3be06862011-03-07 15:01:10 -08001316 res->fi = fi;
1317 res->table = tb;
1318 res->fa_head = &li->falh;
1319 if (!(fib_flags & FIB_LOOKUP_NOREF))
Eric Dumazet5c745012011-07-18 03:16:33 +00001320 atomic_inc(&fi->fib_clntref);
David S. Miller3be06862011-03-07 15:01:10 -08001321 return 0;
1322 }
1323 }
1324
1325#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001326 this_cpu_inc(t->stats->semantic_match_miss);
David S. Miller3be06862011-03-07 15:01:10 -08001327#endif
Robert Olsson19baf832005-06-21 12:43:18 -07001328 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001329
Ben Hutchings2e655572008-07-10 16:52:52 -07001330 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001331}
1332
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001333static inline t_key prefix_mismatch(t_key key, struct tnode *n)
1334{
1335 t_key prefix = n->key;
1336
1337 return (key ^ prefix) & (prefix | -prefix);
1338}
1339
David S. Miller22bd5b92011-03-11 19:54:08 -05001340int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001341 struct fib_result *res, int fib_flags)
Robert Olsson19baf832005-06-21 12:43:18 -07001342{
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001343 struct trie *t = (struct trie *)tb->tb_data;
Alexander Duyck8274a972014-12-31 10:55:29 -08001344#ifdef CONFIG_IP_FIB_TRIE_STATS
1345 struct trie_use_stats __percpu *stats = t->stats;
1346#endif
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001347 const t_key key = ntohl(flp->daddr);
1348 struct tnode *n, *pn;
1349 t_key cindex;
1350 int ret = 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001351
Robert Olsson2373ce12005-08-25 13:01:29 -07001352 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001353
Robert Olsson2373ce12005-08-25 13:01:29 -07001354 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001355 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001356 goto failed;
1357
1358#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001359 this_cpu_inc(stats->gets);
Robert Olsson19baf832005-06-21 12:43:18 -07001360#endif
1361
Alexander Duyckadaf9812014-12-31 10:55:47 -08001362 pn = n;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001363 cindex = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001364
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001365 /* Step 1: Travel to the longest prefix match in the trie */
1366 for (;;) {
1367 unsigned long index = get_index(key, n);
Robert Olsson19baf832005-06-21 12:43:18 -07001368
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001369 /* This bit of code is a bit tricky but it combines multiple
1370 * checks into a single check. The prefix consists of the
1371 * prefix plus zeros for the "bits" in the prefix. The index
1372 * is the difference between the key and this value. From
1373 * this we can actually derive several pieces of data.
1374 * if !(index >> bits)
1375 * we know the value is child index
1376 * else
1377 * we have a mismatch in skip bits and failed
1378 */
1379 if (index >> n->bits)
1380 break;
Robert Olsson19baf832005-06-21 12:43:18 -07001381
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001382 /* we have found a leaf. Prefixes have already been compared */
1383 if (IS_LEAF(n))
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001384 goto found;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001385
1386 /* only record pn and cindex if we are going to be chopping
1387 * bits later. Otherwise we are just wasting cycles.
1388 */
1389 if (index) {
1390 pn = n;
1391 cindex = index;
Olof Johansson91b9a272005-08-09 20:24:39 -07001392 }
1393
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001394 n = rcu_dereference(n->child[index]);
1395 if (unlikely(!n))
Robert Olsson19baf832005-06-21 12:43:18 -07001396 goto backtrace;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001397 }
1398
1399 /* Step 2: Sort out leaves and begin backtracing for longest prefix */
1400 for (;;) {
1401 /* record the pointer where our next node pointer is stored */
1402 struct tnode __rcu **cptr = n->child;
1403
1404 /* This test verifies that none of the bits that differ
1405 * between the key and the prefix exist in the region of
1406 * the lsb and higher in the prefix.
1407 */
1408 if (unlikely(prefix_mismatch(key, n)))
1409 goto backtrace;
1410
1411 /* exit out and process leaf */
1412 if (unlikely(IS_LEAF(n)))
1413 break;
1414
1415 /* Don't bother recording parent info. Since we are in
1416 * prefix match mode we will have to come back to wherever
1417 * we started this traversal anyway
1418 */
1419
1420 while ((n = rcu_dereference(*cptr)) == NULL) {
1421backtrace:
1422#ifdef CONFIG_IP_FIB_TRIE_STATS
1423 if (!n)
1424 this_cpu_inc(stats->null_node_hit);
1425#endif
1426 /* If we are at cindex 0 there are no more bits for
1427 * us to strip at this level so we must ascend back
1428 * up one level to see if there are any more bits to
1429 * be stripped there.
1430 */
1431 while (!cindex) {
1432 t_key pkey = pn->key;
1433
1434 pn = node_parent_rcu(pn);
1435 if (unlikely(!pn))
1436 goto failed;
1437#ifdef CONFIG_IP_FIB_TRIE_STATS
1438 this_cpu_inc(stats->backtrack);
1439#endif
1440 /* Get Child's index */
1441 cindex = get_index(pkey, pn);
1442 }
1443
1444 /* strip the least significant bit from the cindex */
1445 cindex &= cindex - 1;
1446
1447 /* grab pointer for next child node */
1448 cptr = &pn->child[cindex];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001449 }
Robert Olsson19baf832005-06-21 12:43:18 -07001450 }
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001451
Robert Olsson19baf832005-06-21 12:43:18 -07001452found:
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001453 /* Step 3: Process the leaf, if that fails fall back to backtracing */
1454 ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
1455 if (unlikely(ret > 0))
1456 goto backtrace;
1457failed:
Robert Olsson2373ce12005-08-25 13:01:29 -07001458 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001459 return ret;
1460}
Florian Westphal6fc01432011-08-25 13:46:12 +02001461EXPORT_SYMBOL_GPL(fib_table_lookup);
Robert Olsson19baf832005-06-21 12:43:18 -07001462
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001463/*
1464 * Remove the leaf and return parent.
1465 */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001466static void trie_leaf_remove(struct trie *t, struct tnode *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001467{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001468 struct tnode *tp = node_parent(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001469
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001470 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001471
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001472 if (tp) {
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001473 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
Lin Ming61648d92012-07-29 02:00:03 +00001474 put_child(tp, cindex, NULL);
Jarek Poplawski7b855762009-06-18 00:28:51 -07001475 trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001476 } else
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00001477 RCU_INIT_POINTER(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001478
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001479 node_free(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001480}
1481
Robert Olssond562f1f2007-03-26 14:22:22 -07001482/*
1483 * Caller must hold RTNL.
1484 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001485int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001486{
1487 struct trie *t = (struct trie *) tb->tb_data;
1488 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001489 int plen = cfg->fc_dst_len;
1490 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001491 struct fib_alias *fa, *fa_to_delete;
1492 struct list_head *fa_head;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001493 struct tnode *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001494 struct leaf_info *li;
1495
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001496 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001497 return -EINVAL;
1498
Thomas Graf4e902c52006-08-17 18:14:52 -07001499 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001500 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001501
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001502 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001503 return -EINVAL;
1504
1505 key = key & mask;
1506 l = fib_find_node(t, key);
1507
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001508 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001509 return -ESRCH;
1510
Igor Maravicad5b3102012-08-13 10:26:08 +02001511 li = find_leaf_info(l, plen);
1512
1513 if (!li)
1514 return -ESRCH;
1515
1516 fa_head = &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -07001517 fa = fib_find_alias(fa_head, tos, 0);
1518
1519 if (!fa)
1520 return -ESRCH;
1521
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001522 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001523
1524 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001525 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1526 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001527 struct fib_info *fi = fa->fa_info;
1528
1529 if (fa->fa_tos != tos)
1530 break;
1531
Thomas Graf4e902c52006-08-17 18:14:52 -07001532 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1533 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
David S. Miller37e826c2011-03-24 18:06:47 -07001534 fa->fa_info->fib_scope == cfg->fc_scope) &&
Julian Anastasov74cb3c12011-03-19 12:13:46 +00001535 (!cfg->fc_prefsrc ||
1536 fi->fib_prefsrc == cfg->fc_prefsrc) &&
Thomas Graf4e902c52006-08-17 18:14:52 -07001537 (!cfg->fc_protocol ||
1538 fi->fib_protocol == cfg->fc_protocol) &&
1539 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001540 fa_to_delete = fa;
1541 break;
1542 }
1543 }
1544
Olof Johansson91b9a272005-08-09 20:24:39 -07001545 if (!fa_to_delete)
1546 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001547
Olof Johansson91b9a272005-08-09 20:24:39 -07001548 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001549 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001550 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001551
Robert Olsson2373ce12005-08-25 13:01:29 -07001552 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001553
David S. Miller21d8c492011-04-14 14:49:37 -07001554 if (!plen)
1555 tb->tb_num_default--;
1556
Olof Johansson91b9a272005-08-09 20:24:39 -07001557 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001558 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001559 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001560 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001561
1562 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001563 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001564
1565 if (fa->fa_state & FA_S_ACCESSED)
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001566 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Olof Johansson91b9a272005-08-09 20:24:39 -07001567
Robert Olsson2373ce12005-08-25 13:01:29 -07001568 fib_release_info(fa->fa_info);
1569 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001570 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001571}
1572
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001573static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001574{
1575 struct fib_alias *fa, *fa_node;
1576 int found = 0;
1577
1578 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1579 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001580
Robert Olsson2373ce12005-08-25 13:01:29 -07001581 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1582 list_del_rcu(&fa->fa_list);
1583 fib_release_info(fa->fa_info);
1584 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001585 found++;
1586 }
1587 }
1588 return found;
1589}
1590
Alexander Duyckadaf9812014-12-31 10:55:47 -08001591static int trie_flush_leaf(struct tnode *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001592{
1593 int found = 0;
1594 struct hlist_head *lih = &l->list;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001595 struct hlist_node *tmp;
Robert Olsson19baf832005-06-21 12:43:18 -07001596 struct leaf_info *li = NULL;
1597
Sasha Levinb67bfe02013-02-27 17:06:00 -08001598 hlist_for_each_entry_safe(li, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001599 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001600
1601 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001602 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001603 free_leaf_info(li);
1604 }
1605 }
1606 return found;
1607}
1608
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001609/*
1610 * Scan for the next right leaf starting at node p->child[idx]
1611 * Since we have back pointer, no recursion necessary.
1612 */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001613static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001614{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001615 do {
1616 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001617
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001618 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001619 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001620 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001621 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001622
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001623 while (idx < 1u << p->bits) {
1624 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001625 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001626 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001627
Eric Dumazetaab515d2013-08-05 11:18:49 -07001628 if (IS_LEAF(c))
Alexander Duyckadaf9812014-12-31 10:55:47 -08001629 return c;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001630
1631 /* Rescan start scanning in new node */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001632 p = c;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001633 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001634 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001635
1636 /* Node empty, walk back up to parent */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001637 c = p;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001638 } while ((p = node_parent_rcu(c)) != NULL);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001639
1640 return NULL; /* Root of trie */
1641}
1642
Alexander Duyckadaf9812014-12-31 10:55:47 -08001643static struct tnode *trie_firstleaf(struct trie *t)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001644{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001645 struct tnode *n = rcu_dereference_rtnl(t->trie);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001646
1647 if (!n)
1648 return NULL;
1649
1650 if (IS_LEAF(n)) /* trie is just a leaf */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001651 return n;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001652
1653 return leaf_walk_rcu(n, NULL);
1654}
1655
Alexander Duyckadaf9812014-12-31 10:55:47 -08001656static struct tnode *trie_nextleaf(struct tnode *l)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001657{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001658 struct tnode *p = node_parent_rcu(l);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001659
1660 if (!p)
1661 return NULL; /* trie with just one leaf */
1662
Alexander Duyckadaf9812014-12-31 10:55:47 -08001663 return leaf_walk_rcu(p, l);
Robert Olsson19baf832005-06-21 12:43:18 -07001664}
1665
Alexander Duyckadaf9812014-12-31 10:55:47 -08001666static struct tnode *trie_leafindex(struct trie *t, int index)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001667{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001668 struct tnode *l = trie_firstleaf(t);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001669
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001670 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001671 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001672
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001673 return l;
1674}
1675
1676
Robert Olssond562f1f2007-03-26 14:22:22 -07001677/*
1678 * Caller must hold RTNL.
1679 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001680int fib_table_flush(struct fib_table *tb)
Robert Olsson19baf832005-06-21 12:43:18 -07001681{
1682 struct trie *t = (struct trie *) tb->tb_data;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001683 struct tnode *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001684 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001685
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001686 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001687 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001688
1689 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001690 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001691 ll = l;
1692 }
1693
1694 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001695 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001696
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001697 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001698 return found;
1699}
1700
Pavel Emelyanov4aa2c462010-10-28 02:00:43 +00001701void fib_free_table(struct fib_table *tb)
1702{
Alexander Duyck8274a972014-12-31 10:55:29 -08001703#ifdef CONFIG_IP_FIB_TRIE_STATS
1704 struct trie *t = (struct trie *)tb->tb_data;
1705
1706 free_percpu(t->stats);
1707#endif /* CONFIG_IP_FIB_TRIE_STATS */
Pavel Emelyanov4aa2c462010-10-28 02:00:43 +00001708 kfree(tb);
1709}
1710
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001711static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1712 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001713 struct sk_buff *skb, struct netlink_callback *cb)
1714{
1715 int i, s_i;
1716 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001717 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001718
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001719 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001720 i = 0;
1721
Robert Olsson2373ce12005-08-25 13:01:29 -07001722 /* rcu_read_lock is hold by caller */
1723
1724 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001725 if (i < s_i) {
1726 i++;
1727 continue;
1728 }
Robert Olsson19baf832005-06-21 12:43:18 -07001729
Eric W. Biederman15e47302012-09-07 20:12:54 +00001730 if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
Robert Olsson19baf832005-06-21 12:43:18 -07001731 cb->nlh->nlmsg_seq,
1732 RTM_NEWROUTE,
1733 tb->tb_id,
1734 fa->fa_type,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001735 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001736 plen,
1737 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001738 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001739 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001740 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001741 }
Robert Olsson19baf832005-06-21 12:43:18 -07001742 i++;
1743 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001744 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001745 return skb->len;
1746}
1747
Alexander Duyckadaf9812014-12-31 10:55:47 -08001748static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001749 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001750{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001751 struct leaf_info *li;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001752 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001753
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001754 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001755 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001756
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001757 /* rcu_read_lock is hold by caller */
Sasha Levinb67bfe02013-02-27 17:06:00 -08001758 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001759 if (i < s_i) {
1760 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001761 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001762 }
Robert Olsson19baf832005-06-21 12:43:18 -07001763
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001764 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001765 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001766
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001767 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001768 continue;
1769
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001770 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001771 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001772 return -1;
1773 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001774 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001775 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001776
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001777 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001778 return skb->len;
1779}
1780
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001781int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1782 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001783{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001784 struct tnode *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001785 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001786 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001787 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001788
Robert Olsson2373ce12005-08-25 13:01:29 -07001789 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001790 /* Dump starting at last key.
1791 * Note: 0.0.0.0/0 (ie default) is first key.
1792 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001793 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001794 l = trie_firstleaf(t);
1795 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001796 /* Normally, continue from last key, but if that is missing
1797 * fallback to using slow rescan
1798 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001799 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001800 if (!l)
1801 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001802 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001803
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001804 while (l) {
1805 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001806 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001807 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001808 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001809 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001810 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001811
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001812 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001813 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001814 memset(&cb->args[4], 0,
1815 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001816 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001817 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07001818 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001819
Robert Olsson19baf832005-06-21 12:43:18 -07001820 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07001821}
1822
David S. Miller5348ba82011-02-01 15:30:56 -08001823void __init fib_trie_init(void)
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001824{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001825 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1826 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08001827 0, SLAB_PANIC, NULL);
1828
1829 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
Alexander Duyckadaf9812014-12-31 10:55:47 -08001830 max(sizeof(struct tnode),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08001831 sizeof(struct leaf_info)),
1832 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001833}
Robert Olsson19baf832005-06-21 12:43:18 -07001834
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001835
David S. Miller5348ba82011-02-01 15:30:56 -08001836struct fib_table *fib_trie_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07001837{
1838 struct fib_table *tb;
1839 struct trie *t;
1840
Robert Olsson19baf832005-06-21 12:43:18 -07001841 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1842 GFP_KERNEL);
1843 if (tb == NULL)
1844 return NULL;
1845
1846 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08001847 tb->tb_default = -1;
David S. Miller21d8c492011-04-14 14:49:37 -07001848 tb->tb_num_default = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001849
1850 t = (struct trie *) tb->tb_data;
Alexander Duyck8274a972014-12-31 10:55:29 -08001851 RCU_INIT_POINTER(t->trie, NULL);
1852#ifdef CONFIG_IP_FIB_TRIE_STATS
1853 t->stats = alloc_percpu(struct trie_use_stats);
1854 if (!t->stats) {
1855 kfree(tb);
1856 tb = NULL;
1857 }
1858#endif
Robert Olsson19baf832005-06-21 12:43:18 -07001859
Robert Olsson19baf832005-06-21 12:43:18 -07001860 return tb;
1861}
1862
Robert Olsson19baf832005-06-21 12:43:18 -07001863#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001864/* Depth first Trie walk iterator */
1865struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08001866 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001867 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001868 struct tnode *tnode;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001869 unsigned int index;
1870 unsigned int depth;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001871};
Robert Olsson19baf832005-06-21 12:43:18 -07001872
Alexander Duyckadaf9812014-12-31 10:55:47 -08001873static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07001874{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001875 struct tnode *tn = iter->tnode;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001876 unsigned int cindex = iter->index;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001877 struct tnode *p;
1878
Eric W. Biederman6640e692007-01-24 14:42:04 -08001879 /* A single entry routing table */
1880 if (!tn)
1881 return NULL;
1882
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001883 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1884 iter->tnode, iter->index, iter->depth);
1885rescan:
1886 while (cindex < (1<<tn->bits)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08001887 struct tnode *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001888
1889 if (n) {
1890 if (IS_LEAF(n)) {
1891 iter->tnode = tn;
1892 iter->index = cindex + 1;
1893 } else {
1894 /* push down one level */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001895 iter->tnode = n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001896 iter->index = 0;
1897 ++iter->depth;
1898 }
1899 return n;
1900 }
1901
1902 ++cindex;
1903 }
1904
1905 /* Current node exhausted, pop back up */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001906 p = node_parent_rcu(tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001907 if (p) {
1908 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
1909 tn = p;
1910 --iter->depth;
1911 goto rescan;
1912 }
1913
1914 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07001915 return NULL;
1916}
1917
Alexander Duyckadaf9812014-12-31 10:55:47 -08001918static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001919 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07001920{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001921 struct tnode *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001922
Stephen Hemminger132adf52007-03-08 20:44:43 -08001923 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001924 return NULL;
1925
1926 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001927 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001928 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001929
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001930 if (IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08001931 iter->tnode = n;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001932 iter->index = 0;
1933 iter->depth = 1;
1934 } else {
1935 iter->tnode = NULL;
1936 iter->index = 0;
1937 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001938 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001939
1940 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07001941}
1942
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001943static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07001944{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001945 struct tnode *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001946 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07001947
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001948 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07001949
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001950 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001951 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001952 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08001953 struct leaf_info *li;
Stephen Hemminger93672292008-01-22 21:54:05 -08001954
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001955 s->leaves++;
1956 s->totdepth += iter.depth;
1957 if (iter.depth > s->maxdepth)
1958 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08001959
Alexander Duyckadaf9812014-12-31 10:55:47 -08001960 hlist_for_each_entry_rcu(li, &n->list, hlist)
Stephen Hemminger93672292008-01-22 21:54:05 -08001961 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001962 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001963 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07001964
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001965 s->tnodes++;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001966 if (n->bits < MAX_STAT_DEPTH)
1967 s->nodesizes[n->bits]++;
Robert Olsson06ef9212006-03-20 21:35:01 -08001968
Alexander Duyckadaf9812014-12-31 10:55:47 -08001969 for (i = 0; i < tnode_child_length(n); i++)
1970 if (!rcu_access_pointer(n->child[i]))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001971 s->nullpointers++;
1972 }
1973 }
1974 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001975}
1976
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001977/*
Robert Olsson19baf832005-06-21 12:43:18 -07001978 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07001979 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001980static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07001981{
Eric Dumazeta034ee32010-09-09 23:32:28 +00001982 unsigned int i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07001983
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001984 if (stat->leaves)
1985 avdepth = stat->totdepth*100 / stat->leaves;
1986 else
1987 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001988
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001989 seq_printf(seq, "\tAver depth: %u.%02d\n",
1990 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001991 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07001992
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001993 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001994 bytes = sizeof(struct tnode) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08001995
1996 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
1997 bytes += sizeof(struct leaf_info) * stat->prefixes;
1998
Stephen Hemminger187b5182008-01-12 20:55:55 -08001999 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002000 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07002001
Robert Olsson06ef9212006-03-20 21:35:01 -08002002 max = MAX_STAT_DEPTH;
2003 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002004 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07002005
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002006 pointers = 0;
Jerry Snitselaarf585a992013-07-22 12:01:58 -07002007 for (i = 1; i < max; i++)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002008 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002009 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002010 pointers += (1<<i) * stat->nodesizes[i];
2011 }
2012 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002013 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002014
Alexander Duyckadaf9812014-12-31 10:55:47 -08002015 bytes += sizeof(struct tnode *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002016 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2017 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002018}
Robert Olsson19baf832005-06-21 12:43:18 -07002019
2020#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002021static void trie_show_usage(struct seq_file *seq,
Alexander Duyck8274a972014-12-31 10:55:29 -08002022 const struct trie_use_stats __percpu *stats)
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002023{
Alexander Duyck8274a972014-12-31 10:55:29 -08002024 struct trie_use_stats s = { 0 };
2025 int cpu;
2026
2027 /* loop through all of the CPUs and gather up the stats */
2028 for_each_possible_cpu(cpu) {
2029 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2030
2031 s.gets += pcpu->gets;
2032 s.backtrack += pcpu->backtrack;
2033 s.semantic_match_passed += pcpu->semantic_match_passed;
2034 s.semantic_match_miss += pcpu->semantic_match_miss;
2035 s.null_node_hit += pcpu->null_node_hit;
2036 s.resize_node_skipped += pcpu->resize_node_skipped;
2037 }
2038
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002039 seq_printf(seq, "\nCounters:\n---------\n");
Alexander Duyck8274a972014-12-31 10:55:29 -08002040 seq_printf(seq, "gets = %u\n", s.gets);
2041 seq_printf(seq, "backtracks = %u\n", s.backtrack);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002042 seq_printf(seq, "semantic match passed = %u\n",
Alexander Duyck8274a972014-12-31 10:55:29 -08002043 s.semantic_match_passed);
2044 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2045 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2046 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002047}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002048#endif /* CONFIG_IP_FIB_TRIE_STATS */
2049
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002050static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002051{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002052 if (tb->tb_id == RT_TABLE_LOCAL)
2053 seq_puts(seq, "Local:\n");
2054 else if (tb->tb_id == RT_TABLE_MAIN)
2055 seq_puts(seq, "Main:\n");
2056 else
2057 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002058}
Robert Olsson19baf832005-06-21 12:43:18 -07002059
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002060
Robert Olsson19baf832005-06-21 12:43:18 -07002061static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2062{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002063 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002064 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002065
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002066 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002067 "Basic info: size of leaf:"
2068 " %Zd bytes, size of tnode: %Zd bytes.\n",
Alexander Duyckadaf9812014-12-31 10:55:47 -08002069 sizeof(struct tnode), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002070
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002071 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2072 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002073 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002074
Sasha Levinb67bfe02013-02-27 17:06:00 -08002075 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002076 struct trie *t = (struct trie *) tb->tb_data;
2077 struct trie_stat stat;
2078
2079 if (!t)
2080 continue;
2081
2082 fib_table_print(seq, tb);
2083
2084 trie_collect_stats(t, &stat);
2085 trie_show_stats(seq, &stat);
2086#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08002087 trie_show_usage(seq, t->stats);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002088#endif
2089 }
2090 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002091
Robert Olsson19baf832005-06-21 12:43:18 -07002092 return 0;
2093}
2094
Robert Olsson19baf832005-06-21 12:43:18 -07002095static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2096{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002097 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002098}
2099
Arjan van de Ven9a321442007-02-12 00:55:35 -08002100static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002101 .owner = THIS_MODULE,
2102 .open = fib_triestat_seq_open,
2103 .read = seq_read,
2104 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002105 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002106};
2107
Alexander Duyckadaf9812014-12-31 10:55:47 -08002108static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002109{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002110 struct fib_trie_iter *iter = seq->private;
2111 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002112 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002113 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002114
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002115 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2116 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002117 struct fib_table *tb;
2118
Sasha Levinb67bfe02013-02-27 17:06:00 -08002119 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08002120 struct tnode *n;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002121
2122 for (n = fib_trie_get_first(iter,
2123 (struct trie *) tb->tb_data);
2124 n; n = fib_trie_get_next(iter))
2125 if (pos == idx++) {
2126 iter->tb = tb;
2127 return n;
2128 }
2129 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002130 }
Robert Olsson19baf832005-06-21 12:43:18 -07002131
Robert Olsson19baf832005-06-21 12:43:18 -07002132 return NULL;
2133}
2134
2135static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002136 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002137{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002138 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002139 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002140}
2141
2142static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2143{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002144 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002145 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002146 struct fib_table *tb = iter->tb;
2147 struct hlist_node *tb_node;
2148 unsigned int h;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002149 struct tnode *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002150
Robert Olsson19baf832005-06-21 12:43:18 -07002151 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002152 /* next node in same table */
2153 n = fib_trie_get_next(iter);
2154 if (n)
2155 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002156
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002157 /* walk rest of this hash chain */
2158 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
Eric Dumazet0a5c0472011-03-31 01:51:35 -07002159 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002160 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2161 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2162 if (n)
2163 goto found;
2164 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002165
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002166 /* new hash chain */
2167 while (++h < FIB_TABLE_HASHSZ) {
2168 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Sasha Levinb67bfe02013-02-27 17:06:00 -08002169 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002170 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2171 if (n)
2172 goto found;
2173 }
2174 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002175 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002176
2177found:
2178 iter->tb = tb;
2179 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002180}
2181
2182static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002183 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002184{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002185 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002186}
2187
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002188static void seq_indent(struct seq_file *seq, int n)
2189{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002190 while (n-- > 0)
2191 seq_puts(seq, " ");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002192}
Robert Olsson19baf832005-06-21 12:43:18 -07002193
Eric Dumazet28d36e32008-01-14 23:09:56 -08002194static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002195{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002196 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002197 case RT_SCOPE_UNIVERSE: return "universe";
2198 case RT_SCOPE_SITE: return "site";
2199 case RT_SCOPE_LINK: return "link";
2200 case RT_SCOPE_HOST: return "host";
2201 case RT_SCOPE_NOWHERE: return "nowhere";
2202 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002203 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002204 return buf;
2205 }
2206}
2207
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -07002208static const char *const rtn_type_names[__RTN_MAX] = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002209 [RTN_UNSPEC] = "UNSPEC",
2210 [RTN_UNICAST] = "UNICAST",
2211 [RTN_LOCAL] = "LOCAL",
2212 [RTN_BROADCAST] = "BROADCAST",
2213 [RTN_ANYCAST] = "ANYCAST",
2214 [RTN_MULTICAST] = "MULTICAST",
2215 [RTN_BLACKHOLE] = "BLACKHOLE",
2216 [RTN_UNREACHABLE] = "UNREACHABLE",
2217 [RTN_PROHIBIT] = "PROHIBIT",
2218 [RTN_THROW] = "THROW",
2219 [RTN_NAT] = "NAT",
2220 [RTN_XRESOLVE] = "XRESOLVE",
2221};
2222
Eric Dumazeta034ee32010-09-09 23:32:28 +00002223static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002224{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002225 if (t < __RTN_MAX && rtn_type_names[t])
2226 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002227 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002228 return buf;
2229}
2230
2231/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002232static int fib_trie_seq_show(struct seq_file *seq, void *v)
2233{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002234 const struct fib_trie_iter *iter = seq->private;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002235 struct tnode *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002236
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002237 if (!node_parent_rcu(n))
2238 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002239
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002240 if (IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08002241 __be32 prf = htonl(n->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002242
Alexander Duyckadaf9812014-12-31 10:55:47 -08002243 seq_indent(seq, iter->depth - 1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002244 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
Alexander Duyckadaf9812014-12-31 10:55:47 -08002245 &prf, n->pos, n->bits, n->full_children,
2246 n->empty_children);
Olof Johansson91b9a272005-08-09 20:24:39 -07002247 } else {
Stephen Hemminger13280422008-01-22 21:54:37 -08002248 struct leaf_info *li;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002249 __be32 val = htonl(n->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002250
2251 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002252 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002253
Alexander Duyckadaf9812014-12-31 10:55:47 -08002254 hlist_for_each_entry_rcu(li, &n->list, hlist) {
Stephen Hemminger13280422008-01-22 21:54:37 -08002255 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002256
Stephen Hemminger13280422008-01-22 21:54:37 -08002257 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2258 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002259
Stephen Hemminger13280422008-01-22 21:54:37 -08002260 seq_indent(seq, iter->depth+1);
2261 seq_printf(seq, " /%d %s %s", li->plen,
2262 rtn_scope(buf1, sizeof(buf1),
David S. Miller37e826c2011-03-24 18:06:47 -07002263 fa->fa_info->fib_scope),
Stephen Hemminger13280422008-01-22 21:54:37 -08002264 rtn_type(buf2, sizeof(buf2),
2265 fa->fa_type));
2266 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002267 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002268 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002269 }
2270 }
Robert Olsson19baf832005-06-21 12:43:18 -07002271 }
2272
2273 return 0;
2274}
2275
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002276static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002277 .start = fib_trie_seq_start,
2278 .next = fib_trie_seq_next,
2279 .stop = fib_trie_seq_stop,
2280 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002281};
2282
2283static int fib_trie_seq_open(struct inode *inode, struct file *file)
2284{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002285 return seq_open_net(inode, file, &fib_trie_seq_ops,
2286 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002287}
2288
Arjan van de Ven9a321442007-02-12 00:55:35 -08002289static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002290 .owner = THIS_MODULE,
2291 .open = fib_trie_seq_open,
2292 .read = seq_read,
2293 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002294 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002295};
2296
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002297struct fib_route_iter {
2298 struct seq_net_private p;
2299 struct trie *main_trie;
2300 loff_t pos;
2301 t_key key;
2302};
2303
Alexander Duyckadaf9812014-12-31 10:55:47 -08002304static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002305{
Alexander Duyckadaf9812014-12-31 10:55:47 -08002306 struct tnode *l = NULL;
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002307 struct trie *t = iter->main_trie;
2308
2309 /* use cache location of last found key */
2310 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2311 pos -= iter->pos;
2312 else {
2313 iter->pos = 0;
2314 l = trie_firstleaf(t);
2315 }
2316
2317 while (l && pos-- > 0) {
2318 iter->pos++;
2319 l = trie_nextleaf(l);
2320 }
2321
2322 if (l)
2323 iter->key = pos; /* remember it */
2324 else
2325 iter->pos = 0; /* forget it */
2326
2327 return l;
2328}
2329
2330static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2331 __acquires(RCU)
2332{
2333 struct fib_route_iter *iter = seq->private;
2334 struct fib_table *tb;
2335
2336 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002337 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002338 if (!tb)
2339 return NULL;
2340
2341 iter->main_trie = (struct trie *) tb->tb_data;
2342 if (*pos == 0)
2343 return SEQ_START_TOKEN;
2344 else
2345 return fib_route_get_idx(iter, *pos - 1);
2346}
2347
2348static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2349{
2350 struct fib_route_iter *iter = seq->private;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002351 struct tnode *l = v;
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002352
2353 ++*pos;
2354 if (v == SEQ_START_TOKEN) {
2355 iter->pos = 0;
2356 l = trie_firstleaf(iter->main_trie);
2357 } else {
2358 iter->pos++;
2359 l = trie_nextleaf(l);
2360 }
2361
2362 if (l)
2363 iter->key = l->key;
2364 else
2365 iter->pos = 0;
2366 return l;
2367}
2368
2369static void fib_route_seq_stop(struct seq_file *seq, void *v)
2370 __releases(RCU)
2371{
2372 rcu_read_unlock();
2373}
2374
Eric Dumazeta034ee32010-09-09 23:32:28 +00002375static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002376{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002377 unsigned int flags = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002378
Eric Dumazeta034ee32010-09-09 23:32:28 +00002379 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2380 flags = RTF_REJECT;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002381 if (fi && fi->fib_nh->nh_gw)
2382 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002383 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002384 flags |= RTF_HOST;
2385 flags |= RTF_UP;
2386 return flags;
2387}
2388
2389/*
2390 * This outputs /proc/net/route.
2391 * The format of the file is not supposed to be changed
Eric Dumazeta034ee32010-09-09 23:32:28 +00002392 * and needs to be same as fib_hash output to avoid breaking
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002393 * legacy utilities
2394 */
2395static int fib_route_seq_show(struct seq_file *seq, void *v)
2396{
Alexander Duyckadaf9812014-12-31 10:55:47 -08002397 struct tnode *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002398 struct leaf_info *li;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002399
2400 if (v == SEQ_START_TOKEN) {
2401 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2402 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2403 "\tWindow\tIRTT");
2404 return 0;
2405 }
2406
Sasha Levinb67bfe02013-02-27 17:06:00 -08002407 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002408 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002409 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002410
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002411 mask = inet_make_mask(li->plen);
2412 prefix = htonl(l->key);
2413
2414 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002415 const struct fib_info *fi = fa->fa_info;
Eric Dumazeta034ee32010-09-09 23:32:28 +00002416 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002417
2418 if (fa->fa_type == RTN_BROADCAST
2419 || fa->fa_type == RTN_MULTICAST)
2420 continue;
2421
Tetsuo Handa652586d2013-11-14 14:31:57 -08002422 seq_setwidth(seq, 127);
2423
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002424 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002425 seq_printf(seq,
2426 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002427 "%d\t%08X\t%d\t%u\t%u",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002428 fi->fib_dev ? fi->fib_dev->name : "*",
2429 prefix,
2430 fi->fib_nh->nh_gw, flags, 0, 0,
2431 fi->fib_priority,
2432 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002433 (fi->fib_advmss ?
2434 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002435 fi->fib_window,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002436 fi->fib_rtt >> 3);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002437 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002438 seq_printf(seq,
2439 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002440 "%d\t%08X\t%d\t%u\t%u",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002441 prefix, 0, flags, 0, 0, 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002442 mask, 0, 0, 0);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002443
Tetsuo Handa652586d2013-11-14 14:31:57 -08002444 seq_pad(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002445 }
2446 }
2447
2448 return 0;
2449}
2450
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002451static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002452 .start = fib_route_seq_start,
2453 .next = fib_route_seq_next,
2454 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002455 .show = fib_route_seq_show,
2456};
2457
2458static int fib_route_seq_open(struct inode *inode, struct file *file)
2459{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002460 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002461 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002462}
2463
Arjan van de Ven9a321442007-02-12 00:55:35 -08002464static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002465 .owner = THIS_MODULE,
2466 .open = fib_route_seq_open,
2467 .read = seq_read,
2468 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002469 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002470};
2471
Denis V. Lunev61a02652008-01-10 03:21:09 -08002472int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002473{
Gao fengd4beaa62013-02-18 01:34:54 +00002474 if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002475 goto out1;
2476
Gao fengd4beaa62013-02-18 01:34:54 +00002477 if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
2478 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002479 goto out2;
2480
Gao fengd4beaa62013-02-18 01:34:54 +00002481 if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002482 goto out3;
2483
Robert Olsson19baf832005-06-21 12:43:18 -07002484 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002485
2486out3:
Gao fengece31ff2013-02-18 01:34:56 +00002487 remove_proc_entry("fib_triestat", net->proc_net);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002488out2:
Gao fengece31ff2013-02-18 01:34:56 +00002489 remove_proc_entry("fib_trie", net->proc_net);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002490out1:
2491 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002492}
2493
Denis V. Lunev61a02652008-01-10 03:21:09 -08002494void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002495{
Gao fengece31ff2013-02-18 01:34:56 +00002496 remove_proc_entry("fib_trie", net->proc_net);
2497 remove_proc_entry("fib_triestat", net->proc_net);
2498 remove_proc_entry("route", net->proc_net);
Robert Olsson19baf832005-06-21 12:43:18 -07002499}
2500
2501#endif /* CONFIG_PROC_FS */