blob: 3fe4dd917ce12cc3aeef8db6ab48ec4d5db25c4e [file] [log] [blame]
Robert Olsson19baf832005-06-21 12:43:18 -07001/*
2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
6 *
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
9 *
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090010 * Jens Laas <jens.laas@data.slu.se> Swedish University of
Robert Olsson19baf832005-06-21 12:43:18 -070011 * Agricultural Sciences.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090012 *
Robert Olsson19baf832005-06-21 12:43:18 -070013 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
14 *
Lucas De Marchi25985ed2011-03-30 22:57:33 -030015 * This work is based on the LPC-trie which is originally described in:
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090016 *
Robert Olsson19baf832005-06-21 12:43:18 -070017 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
Justin P. Mattock631dd1a2010-10-18 11:03:14 +020019 * http://www.csc.kth.se/~snilsson/software/dyntrie2/
Robert Olsson19baf832005-06-21 12:43:18 -070020 *
21 *
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
24 *
Robert Olsson19baf832005-06-21 12:43:18 -070025 *
26 * Code from fib_hash has been reused which includes the following header:
27 *
28 *
29 * INET An implementation of the TCP/IP protocol suite for the LINUX
30 * operating system. INET is implemented using the BSD Socket
31 * interface as the means of communication with the user level.
32 *
33 * IPv4 FIB: lookup engine and maintenance routines.
34 *
35 *
36 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
37 *
38 * This program is free software; you can redistribute it and/or
39 * modify it under the terms of the GNU General Public License
40 * as published by the Free Software Foundation; either version
41 * 2 of the License, or (at your option) any later version.
Robert Olssonfd966252005-12-22 11:25:10 -080042 *
43 * Substantial contributions to this work comes from:
44 *
45 * David S. Miller, <davem@davemloft.net>
46 * Stephen Hemminger <shemminger@osdl.org>
47 * Paul E. McKenney <paulmck@us.ibm.com>
48 * Patrick McHardy <kaber@trash.net>
Robert Olsson19baf832005-06-21 12:43:18 -070049 */
50
Jens Låås80b71b82009-08-28 23:57:15 -070051#define VERSION "0.409"
Robert Olsson19baf832005-06-21 12:43:18 -070052
Robert Olsson19baf832005-06-21 12:43:18 -070053#include <asm/uaccess.h>
Jiri Slaby1977f032007-10-18 23:40:25 -070054#include <linux/bitops.h>
Robert Olsson19baf832005-06-21 12:43:18 -070055#include <linux/types.h>
56#include <linux/kernel.h>
Robert Olsson19baf832005-06-21 12:43:18 -070057#include <linux/mm.h>
58#include <linux/string.h>
59#include <linux/socket.h>
60#include <linux/sockios.h>
61#include <linux/errno.h>
62#include <linux/in.h>
63#include <linux/inet.h>
Stephen Hemmingercd8787a2006-01-03 14:38:34 -080064#include <linux/inetdevice.h>
Robert Olsson19baf832005-06-21 12:43:18 -070065#include <linux/netdevice.h>
66#include <linux/if_arp.h>
67#include <linux/proc_fs.h>
Robert Olsson2373ce12005-08-25 13:01:29 -070068#include <linux/rcupdate.h>
Robert Olsson19baf832005-06-21 12:43:18 -070069#include <linux/skbuff.h>
70#include <linux/netlink.h>
71#include <linux/init.h>
72#include <linux/list.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090073#include <linux/slab.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040074#include <linux/export.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020075#include <net/net_namespace.h>
Robert Olsson19baf832005-06-21 12:43:18 -070076#include <net/ip.h>
77#include <net/protocol.h>
78#include <net/route.h>
79#include <net/tcp.h>
80#include <net/sock.h>
81#include <net/ip_fib.h>
82#include "fib_lookup.h"
83
Robert Olsson06ef9212006-03-20 21:35:01 -080084#define MAX_STAT_DEPTH 32
Robert Olsson19baf832005-06-21 12:43:18 -070085
Robert Olsson19baf832005-06-21 12:43:18 -070086#define KEYLENGTH (8*sizeof(t_key))
Robert Olsson19baf832005-06-21 12:43:18 -070087
Robert Olsson19baf832005-06-21 12:43:18 -070088typedef unsigned int t_key;
89
Alexander Duyck64c9b6f2014-12-31 10:55:35 -080090#define IS_TNODE(n) ((n)->bits)
91#define IS_LEAF(n) (!(n)->bits)
Robert Olsson2373ce12005-08-25 13:01:29 -070092
Alexander Duyck9f9e6362014-12-31 10:55:54 -080093#define get_shift(_kv) (KEYLENGTH - (_kv)->pos - (_kv)->bits)
94#define get_index(_key, _kv) (((_key) ^ (_kv)->key) >> get_shift(_kv))
95
Alexander Duyck64c9b6f2014-12-31 10:55:35 -080096struct tnode {
97 t_key key;
98 unsigned char bits; /* 2log(KEYLENGTH) bits needed */
99 unsigned char pos; /* 2log(KEYLENGTH) bits needed */
100 struct tnode __rcu *parent;
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800101 struct rcu_head rcu;
Alexander Duyckadaf9812014-12-31 10:55:47 -0800102 union {
103 /* The fields in this struct are valid if bits > 0 (TNODE) */
104 struct {
105 unsigned int full_children; /* KEYLENGTH bits needed */
106 unsigned int empty_children; /* KEYLENGTH bits needed */
107 struct tnode __rcu *child[0];
108 };
109 /* This list pointer if valid if bits == 0 (LEAF) */
110 struct hlist_head list;
111 };
Robert Olsson19baf832005-06-21 12:43:18 -0700112};
113
114struct leaf_info {
115 struct hlist_node hlist;
116 int plen;
Eric Dumazet5c745012011-07-18 03:16:33 +0000117 u32 mask_plen; /* ntohl(inet_make_mask(plen)) */
Robert Olsson19baf832005-06-21 12:43:18 -0700118 struct list_head falh;
Eric Dumazet5c745012011-07-18 03:16:33 +0000119 struct rcu_head rcu;
Robert Olsson19baf832005-06-21 12:43:18 -0700120};
121
Robert Olsson19baf832005-06-21 12:43:18 -0700122#ifdef CONFIG_IP_FIB_TRIE_STATS
123struct trie_use_stats {
124 unsigned int gets;
125 unsigned int backtrack;
126 unsigned int semantic_match_passed;
127 unsigned int semantic_match_miss;
128 unsigned int null_node_hit;
Robert Olsson2f368952005-07-05 15:02:40 -0700129 unsigned int resize_node_skipped;
Robert Olsson19baf832005-06-21 12:43:18 -0700130};
131#endif
132
133struct trie_stat {
134 unsigned int totdepth;
135 unsigned int maxdepth;
136 unsigned int tnodes;
137 unsigned int leaves;
138 unsigned int nullpointers;
Stephen Hemminger93672292008-01-22 21:54:05 -0800139 unsigned int prefixes;
Robert Olsson06ef9212006-03-20 21:35:01 -0800140 unsigned int nodesizes[MAX_STAT_DEPTH];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700141};
Robert Olsson19baf832005-06-21 12:43:18 -0700142
143struct trie {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800144 struct tnode __rcu *trie;
Robert Olsson19baf832005-06-21 12:43:18 -0700145#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800146 struct trie_use_stats __percpu *stats;
Robert Olsson19baf832005-06-21 12:43:18 -0700147#endif
Robert Olsson19baf832005-06-21 12:43:18 -0700148};
149
Alexander Duyckadaf9812014-12-31 10:55:47 -0800150static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800151 int wasfull);
Alexander Duyckadaf9812014-12-31 10:55:47 -0800152static struct tnode *resize(struct trie *t, struct tnode *tn);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700153static struct tnode *inflate(struct trie *t, struct tnode *tn);
154static struct tnode *halve(struct trie *t, struct tnode *tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700155/* tnodes to free after resize(); protected by RTNL */
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800156static struct callback_head *tnode_free_head;
Jarek Poplawskic3059472009-07-14 08:33:08 +0000157static size_t tnode_free_size;
158
159/*
160 * synchronize_rcu after call_rcu for that many pages; it should be especially
161 * useful before resizing the root node with PREEMPT_NONE configs; the value was
162 * obtained experimentally, aiming to avoid visible slowdown.
163 */
164static const int sync_pages = 128;
Robert Olsson19baf832005-06-21 12:43:18 -0700165
Christoph Lametere18b8902006-12-06 20:33:20 -0800166static struct kmem_cache *fn_alias_kmem __read_mostly;
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -0800167static struct kmem_cache *trie_leaf_kmem __read_mostly;
Robert Olsson19baf832005-06-21 12:43:18 -0700168
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800169/* caller must hold RTNL */
170#define node_parent(n) rtnl_dereference((n)->parent)
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700171
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800172/* caller must hold RCU read lock or RTNL */
173#define node_parent_rcu(n) rcu_dereference_rtnl((n)->parent)
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700174
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800175/* wrapper for rcu_assign_pointer */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800176static inline void node_set_parent(struct tnode *n, struct tnode *tp)
Stephen Hemminger06801912007-08-10 15:22:13 -0700177{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800178 if (n)
179 rcu_assign_pointer(n->parent, tp);
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800180}
181
182#define NODE_INIT_PARENT(n, p) RCU_INIT_POINTER((n)->parent, p)
183
184/* This provides us with the number of children in this node, in the case of a
185 * leaf this will return 0 meaning none of the children are accessible.
186 */
187static inline int tnode_child_length(const struct tnode *tn)
188{
189 return (1ul << tn->bits) & ~(1ul);
Stephen Hemminger06801912007-08-10 15:22:13 -0700190}
Robert Olsson2373ce12005-08-25 13:01:29 -0700191
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700192/*
193 * caller must hold RTNL
194 */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800195static inline struct tnode *tnode_get_child(const struct tnode *tn, unsigned int i)
Robert Olsson19baf832005-06-21 12:43:18 -0700196{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800197 BUG_ON(i >= tnode_child_length(tn));
Robert Olsson19baf832005-06-21 12:43:18 -0700198
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700199 return rtnl_dereference(tn->child[i]);
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800200}
201
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700202/*
203 * caller must hold RCU read lock or RTNL
204 */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800205static inline struct tnode *tnode_get_child_rcu(const struct tnode *tn, unsigned int i)
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800206{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800207 BUG_ON(i >= tnode_child_length(tn));
Eric Dumazetb59cfbf2008-01-18 03:31:36 -0800208
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700209 return rcu_dereference_rtnl(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700210}
211
David S. Miller3b004562011-02-16 14:56:22 -0800212static inline t_key mask_pfx(t_key k, unsigned int l)
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700213{
214 return (l == 0) ? 0 : k >> (KEYLENGTH-l) << (KEYLENGTH-l);
215}
216
David S. Miller3b004562011-02-16 14:56:22 -0800217static inline t_key tkey_extract_bits(t_key a, unsigned int offset, unsigned int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700218{
Olof Johansson91b9a272005-08-09 20:24:39 -0700219 if (offset < KEYLENGTH)
Robert Olsson19baf832005-06-21 12:43:18 -0700220 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
Olof Johansson91b9a272005-08-09 20:24:39 -0700221 else
Robert Olsson19baf832005-06-21 12:43:18 -0700222 return 0;
223}
224
225static inline int tkey_equals(t_key a, t_key b)
226{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700227 return a == b;
Robert Olsson19baf832005-06-21 12:43:18 -0700228}
229
230static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
231{
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700232 if (bits == 0 || offset >= KEYLENGTH)
233 return 1;
Olof Johansson91b9a272005-08-09 20:24:39 -0700234 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
235 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700236}
Robert Olsson19baf832005-06-21 12:43:18 -0700237
238static inline int tkey_mismatch(t_key a, int offset, t_key b)
239{
240 t_key diff = a ^ b;
241 int i = offset;
242
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700243 if (!diff)
244 return 0;
245 while ((diff << i) >> (KEYLENGTH-1) == 0)
Robert Olsson19baf832005-06-21 12:43:18 -0700246 i++;
247 return i;
248}
249
Robert Olsson19baf832005-06-21 12:43:18 -0700250/*
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900251 To understand this stuff, an understanding of keys and all their bits is
252 necessary. Every node in the trie has a key associated with it, but not
Robert Olsson19baf832005-06-21 12:43:18 -0700253 all of the bits in that key are significant.
254
255 Consider a node 'n' and its parent 'tp'.
256
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900257 If n is a leaf, every bit in its key is significant. Its presence is
258 necessitated by path compression, since during a tree traversal (when
259 searching for a leaf - unless we are doing an insertion) we will completely
260 ignore all skipped bits we encounter. Thus we need to verify, at the end of
261 a potentially successful search, that we have indeed been walking the
Robert Olsson19baf832005-06-21 12:43:18 -0700262 correct key path.
263
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900264 Note that we can never "miss" the correct key in the tree if present by
265 following the wrong path. Path compression ensures that segments of the key
266 that are the same for all keys with a given prefix are skipped, but the
267 skipped part *is* identical for each node in the subtrie below the skipped
268 bit! trie_insert() in this implementation takes care of that - note the
Robert Olsson19baf832005-06-21 12:43:18 -0700269 call to tkey_sub_equals() in trie_insert().
270
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900271 if n is an internal node - a 'tnode' here, the various parts of its key
Robert Olsson19baf832005-06-21 12:43:18 -0700272 have many different meanings.
273
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900274 Example:
Robert Olsson19baf832005-06-21 12:43:18 -0700275 _________________________________________________________________
276 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
277 -----------------------------------------------------------------
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900278 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
Robert Olsson19baf832005-06-21 12:43:18 -0700279
280 _________________________________________________________________
281 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
282 -----------------------------------------------------------------
283 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
284
285 tp->pos = 7
286 tp->bits = 3
287 n->pos = 15
Olof Johansson91b9a272005-08-09 20:24:39 -0700288 n->bits = 4
Robert Olsson19baf832005-06-21 12:43:18 -0700289
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900290 First, let's just ignore the bits that come before the parent tp, that is
291 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
Robert Olsson19baf832005-06-21 12:43:18 -0700292 not use them for anything.
293
294 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900295 index into the parent's child array. That is, they will be used to find
Robert Olsson19baf832005-06-21 12:43:18 -0700296 'n' among tp's children.
297
298 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
299 for the node n.
300
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900301 All the bits we have seen so far are significant to the node n. The rest
Robert Olsson19baf832005-06-21 12:43:18 -0700302 of the bits are really not needed or indeed known in n->key.
303
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900304 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
Robert Olsson19baf832005-06-21 12:43:18 -0700305 n's child array, and will of course be different for each child.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900306
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700307
Robert Olsson19baf832005-06-21 12:43:18 -0700308 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
309 at this point.
310
311*/
312
Denis V. Lunevf5026fa2007-12-13 09:47:57 -0800313static const int halve_threshold = 25;
314static const int inflate_threshold = 50;
Jarek Poplawski345aa032009-07-07 19:39:16 -0700315static const int halve_threshold_root = 15;
Jens Låås80b71b82009-08-28 23:57:15 -0700316static const int inflate_threshold_root = 30;
Robert Olsson2373ce12005-08-25 13:01:29 -0700317
318static void __alias_free_mem(struct rcu_head *head)
319{
320 struct fib_alias *fa = container_of(head, struct fib_alias, rcu);
321 kmem_cache_free(fn_alias_kmem, fa);
322}
323
324static inline void alias_free_mem_rcu(struct fib_alias *fa)
325{
326 call_rcu(&fa->rcu, __alias_free_mem);
327}
328
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800329#define TNODE_KMALLOC_MAX \
Alexander Duyckadaf9812014-12-31 10:55:47 -0800330 ilog2((PAGE_SIZE - sizeof(struct tnode)) / sizeof(struct tnode *))
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800331
332static void __node_free_rcu(struct rcu_head *head)
Robert Olsson2373ce12005-08-25 13:01:29 -0700333{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800334 struct tnode *n = container_of(head, struct tnode, rcu);
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800335
336 if (IS_LEAF(n))
337 kmem_cache_free(trie_leaf_kmem, n);
338 else if (n->bits <= TNODE_KMALLOC_MAX)
339 kfree(n);
340 else
341 vfree(n);
Robert Olsson2373ce12005-08-25 13:01:29 -0700342}
343
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800344#define node_free(n) call_rcu(&n->rcu, __node_free_rcu)
Stephen Hemminger387a5482008-04-10 03:47:34 -0700345
Robert Olsson2373ce12005-08-25 13:01:29 -0700346static inline void free_leaf_info(struct leaf_info *leaf)
347{
Lai Jiangshanbceb0f42011-03-18 11:42:34 +0800348 kfree_rcu(leaf, rcu);
Robert Olsson2373ce12005-08-25 13:01:29 -0700349}
350
Eric Dumazet8d965442008-01-13 22:31:44 -0800351static struct tnode *tnode_alloc(size_t size)
Robert Olsson2373ce12005-08-25 13:01:29 -0700352{
Robert Olsson2373ce12005-08-25 13:01:29 -0700353 if (size <= PAGE_SIZE)
Eric Dumazet8d965442008-01-13 22:31:44 -0800354 return kzalloc(size, GFP_KERNEL);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700355 else
Eric Dumazet7a1c8e52010-11-20 07:46:35 +0000356 return vzalloc(size);
Stephen Hemminger15be75c2008-04-10 02:56:38 -0700357}
Robert Olsson2373ce12005-08-25 13:01:29 -0700358
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700359static void tnode_free_safe(struct tnode *tn)
360{
361 BUG_ON(IS_LEAF(tn));
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800362 tn->rcu.next = tnode_free_head;
363 tnode_free_head = &tn->rcu;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700364}
365
366static void tnode_free_flush(void)
367{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800368 struct callback_head *head;
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700369
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800370 while ((head = tnode_free_head)) {
371 struct tnode *tn = container_of(head, struct tnode, rcu);
372
373 tnode_free_head = head->next;
374 tnode_free_size += offsetof(struct tnode, child[1 << tn->bits]);
375
376 node_free(tn);
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700377 }
Jarek Poplawskic3059472009-07-14 08:33:08 +0000378
379 if (tnode_free_size >= PAGE_SIZE * sync_pages) {
380 tnode_free_size = 0;
381 synchronize_rcu();
382 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700383}
384
Alexander Duyckadaf9812014-12-31 10:55:47 -0800385static struct tnode *leaf_new(t_key key)
Robert Olsson19baf832005-06-21 12:43:18 -0700386{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800387 struct tnode *l = kmem_cache_alloc(trie_leaf_kmem, GFP_KERNEL);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700388 if (l) {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800389 l->parent = NULL;
390 /* set key and pos to reflect full key value
391 * any trailing zeros in the key should be ignored
392 * as the nodes are searched
393 */
394 l->key = key;
395 l->pos = KEYLENGTH;
396 /* set bits to 0 indicating we are not a tnode */
397 l->bits = 0;
398
Robert Olsson19baf832005-06-21 12:43:18 -0700399 INIT_HLIST_HEAD(&l->list);
400 }
401 return l;
402}
403
404static struct leaf_info *leaf_info_new(int plen)
405{
406 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -0700407 if (li) {
408 li->plen = plen;
Eric Dumazet5c745012011-07-18 03:16:33 +0000409 li->mask_plen = ntohl(inet_make_mask(plen));
Robert Olsson2373ce12005-08-25 13:01:29 -0700410 INIT_LIST_HEAD(&li->falh);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700411 }
Robert Olsson2373ce12005-08-25 13:01:29 -0700412 return li;
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700413}
414
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800415static struct tnode *tnode_new(t_key key, int pos, int bits)
Robert Olsson19baf832005-06-21 12:43:18 -0700416{
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800417 size_t sz = offsetof(struct tnode, child[1 << bits]);
Patrick McHardyf0e36f82005-07-05 14:44:55 -0700418 struct tnode *tn = tnode_alloc(sz);
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800419 unsigned int shift = pos + bits;
420
421 /* verify bits and pos their msb bits clear and values are valid */
422 BUG_ON(!bits || (shift > KEYLENGTH));
Robert Olsson19baf832005-06-21 12:43:18 -0700423
Olof Johansson91b9a272005-08-09 20:24:39 -0700424 if (tn) {
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800425 tn->parent = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700426 tn->pos = pos;
427 tn->bits = bits;
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800428 tn->key = mask_pfx(key, pos);
Robert Olsson19baf832005-06-21 12:43:18 -0700429 tn->full_children = 0;
430 tn->empty_children = 1<<bits;
431 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700432
Eric Dumazeta034ee32010-09-09 23:32:28 +0000433 pr_debug("AT %p s=%zu %zu\n", tn, sizeof(struct tnode),
Alexander Duyckadaf9812014-12-31 10:55:47 -0800434 sizeof(struct tnode *) << bits);
Robert Olsson19baf832005-06-21 12:43:18 -0700435 return tn;
436}
437
Robert Olsson19baf832005-06-21 12:43:18 -0700438/*
439 * Check whether a tnode 'n' is "full", i.e. it is an internal node
440 * and no bits are skipped. See discussion in dyntree paper p. 6
441 */
442
Alexander Duyckadaf9812014-12-31 10:55:47 -0800443static inline int tnode_full(const struct tnode *tn, const struct tnode *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700444{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800445 return n && IS_TNODE(n) && (n->pos == (tn->pos + tn->bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700446}
447
Lin Ming61648d92012-07-29 02:00:03 +0000448static inline void put_child(struct tnode *tn, int i,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800449 struct tnode *n)
Robert Olsson19baf832005-06-21 12:43:18 -0700450{
451 tnode_put_child_reorg(tn, i, n, -1);
452}
453
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700454 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700455 * Add a child at position i overwriting the old value.
456 * Update the value of full_children and empty_children.
457 */
458
Alexander Duyckadaf9812014-12-31 10:55:47 -0800459static void tnode_put_child_reorg(struct tnode *tn, int i, struct tnode *n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800460 int wasfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700461{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800462 struct tnode *chi = rtnl_dereference(tn->child[i]);
Robert Olsson19baf832005-06-21 12:43:18 -0700463 int isfull;
464
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700465 BUG_ON(i >= 1<<tn->bits);
466
Robert Olsson19baf832005-06-21 12:43:18 -0700467 /* update emptyChildren */
468 if (n == NULL && chi != NULL)
469 tn->empty_children++;
470 else if (n != NULL && chi == NULL)
471 tn->empty_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700472
Robert Olsson19baf832005-06-21 12:43:18 -0700473 /* update fullChildren */
Olof Johansson91b9a272005-08-09 20:24:39 -0700474 if (wasfull == -1)
Robert Olsson19baf832005-06-21 12:43:18 -0700475 wasfull = tnode_full(tn, chi);
476
477 isfull = tnode_full(tn, n);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700478 if (wasfull && !isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700479 tn->full_children--;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700480 else if (!wasfull && isfull)
Robert Olsson19baf832005-06-21 12:43:18 -0700481 tn->full_children++;
Olof Johansson91b9a272005-08-09 20:24:39 -0700482
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800483 node_set_parent(n, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700484
Eric Dumazetcf778b02012-01-12 04:41:32 +0000485 rcu_assign_pointer(tn->child[i], n);
Robert Olsson19baf832005-06-21 12:43:18 -0700486}
487
Jens Låås80b71b82009-08-28 23:57:15 -0700488#define MAX_WORK 10
Alexander Duyckadaf9812014-12-31 10:55:47 -0800489static struct tnode *resize(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700490{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800491 struct tnode *old_tn, *n = NULL;
Robert Olssone6308be2005-10-04 13:01:58 -0700492 int inflate_threshold_use;
493 int halve_threshold_use;
Jens Låås80b71b82009-08-28 23:57:15 -0700494 int max_work;
Robert Olsson19baf832005-06-21 12:43:18 -0700495
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900496 if (!tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700497 return NULL;
498
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700499 pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
500 tn, inflate_threshold, halve_threshold);
Robert Olsson19baf832005-06-21 12:43:18 -0700501
502 /* No children */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800503 if (tn->empty_children > (tnode_child_length(tn) - 1))
504 goto no_children;
505
Robert Olsson19baf832005-06-21 12:43:18 -0700506 /* One child */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800507 if (tn->empty_children == (tnode_child_length(tn) - 1))
Jens Låås80b71b82009-08-28 23:57:15 -0700508 goto one_child;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700509 /*
Robert Olsson19baf832005-06-21 12:43:18 -0700510 * Double as long as the resulting node has a number of
511 * nonempty nodes that are above the threshold.
512 */
513
514 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700515 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
516 * the Helsinki University of Technology and Matti Tikkanen of Nokia
Robert Olsson19baf832005-06-21 12:43:18 -0700517 * Telecommunications, page 6:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700518 * "A node is doubled if the ratio of non-empty children to all
Robert Olsson19baf832005-06-21 12:43:18 -0700519 * children in the *doubled* node is at least 'high'."
520 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700521 * 'high' in this instance is the variable 'inflate_threshold'. It
522 * is expressed as a percentage, so we multiply it with
523 * tnode_child_length() and instead of multiplying by 2 (since the
524 * child array will be doubled by inflate()) and multiplying
525 * the left-hand side by 100 (to handle the percentage thing) we
Robert Olsson19baf832005-06-21 12:43:18 -0700526 * multiply the left-hand side by 50.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700527 *
528 * The left-hand side may look a bit weird: tnode_child_length(tn)
529 * - tn->empty_children is of course the number of non-null children
530 * in the current node. tn->full_children is the number of "full"
Robert Olsson19baf832005-06-21 12:43:18 -0700531 * children, that is non-null tnodes with a skip value of 0.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700532 * All of those will be doubled in the resulting inflated tnode, so
Robert Olsson19baf832005-06-21 12:43:18 -0700533 * we just count them one extra time here.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700534 *
Robert Olsson19baf832005-06-21 12:43:18 -0700535 * A clearer way to write this would be:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700536 *
Robert Olsson19baf832005-06-21 12:43:18 -0700537 * to_be_doubled = tn->full_children;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700538 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
Robert Olsson19baf832005-06-21 12:43:18 -0700539 * tn->full_children;
540 *
541 * new_child_length = tnode_child_length(tn) * 2;
542 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700543 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
Robert Olsson19baf832005-06-21 12:43:18 -0700544 * new_child_length;
545 * if (new_fill_factor >= inflate_threshold)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700546 *
547 * ...and so on, tho it would mess up the while () loop.
548 *
Robert Olsson19baf832005-06-21 12:43:18 -0700549 * anyway,
550 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
551 * inflate_threshold
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700552 *
Robert Olsson19baf832005-06-21 12:43:18 -0700553 * avoid a division:
554 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
555 * inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700556 *
Robert Olsson19baf832005-06-21 12:43:18 -0700557 * expand not_to_be_doubled and to_be_doubled, and shorten:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700558 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700559 * tn->full_children) >= inflate_threshold * new_child_length
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700560 *
Robert Olsson19baf832005-06-21 12:43:18 -0700561 * expand new_child_length:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700562 * 100 * (tnode_child_length(tn) - tn->empty_children +
Olof Johansson91b9a272005-08-09 20:24:39 -0700563 * tn->full_children) >=
Robert Olsson19baf832005-06-21 12:43:18 -0700564 * inflate_threshold * tnode_child_length(tn) * 2
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700565 *
Robert Olsson19baf832005-06-21 12:43:18 -0700566 * shorten again:
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700567 * 50 * (tn->full_children + tnode_child_length(tn) -
Olof Johansson91b9a272005-08-09 20:24:39 -0700568 * tn->empty_children) >= inflate_threshold *
Robert Olsson19baf832005-06-21 12:43:18 -0700569 * tnode_child_length(tn)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700570 *
Robert Olsson19baf832005-06-21 12:43:18 -0700571 */
572
Robert Olssone6308be2005-10-04 13:01:58 -0700573 /* Keep root node larger */
574
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800575 if (!node_parent(tn)) {
Jens Låås80b71b82009-08-28 23:57:15 -0700576 inflate_threshold_use = inflate_threshold_root;
577 halve_threshold_use = halve_threshold_root;
Eric Dumazeta034ee32010-09-09 23:32:28 +0000578 } else {
Robert Olssone6308be2005-10-04 13:01:58 -0700579 inflate_threshold_use = inflate_threshold;
Jens Låås80b71b82009-08-28 23:57:15 -0700580 halve_threshold_use = halve_threshold;
581 }
Robert Olssone6308be2005-10-04 13:01:58 -0700582
Jens Låås80b71b82009-08-28 23:57:15 -0700583 max_work = MAX_WORK;
584 while ((tn->full_children > 0 && max_work-- &&
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800585 50 * (tn->full_children + tnode_child_length(tn)
586 - tn->empty_children)
587 >= inflate_threshold_use * tnode_child_length(tn))) {
Robert Olsson19baf832005-06-21 12:43:18 -0700588
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700589 old_tn = tn;
590 tn = inflate(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800591
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700592 if (IS_ERR(tn)) {
593 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700594#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800595 this_cpu_inc(t->stats->resize_node_skipped);
Robert Olsson2f368952005-07-05 15:02:40 -0700596#endif
597 break;
598 }
Robert Olsson19baf832005-06-21 12:43:18 -0700599 }
600
Jens Låås80b71b82009-08-28 23:57:15 -0700601 /* Return if at least one inflate is run */
Eric Dumazeta034ee32010-09-09 23:32:28 +0000602 if (max_work != MAX_WORK)
Alexander Duyckadaf9812014-12-31 10:55:47 -0800603 return tn;
Jens Låås80b71b82009-08-28 23:57:15 -0700604
Robert Olsson19baf832005-06-21 12:43:18 -0700605 /*
606 * Halve as long as the number of empty children in this
607 * node is above threshold.
608 */
Robert Olsson2f368952005-07-05 15:02:40 -0700609
Jens Låås80b71b82009-08-28 23:57:15 -0700610 max_work = MAX_WORK;
611 while (tn->bits > 1 && max_work-- &&
Robert Olsson19baf832005-06-21 12:43:18 -0700612 100 * (tnode_child_length(tn) - tn->empty_children) <
Robert Olssone6308be2005-10-04 13:01:58 -0700613 halve_threshold_use * tnode_child_length(tn)) {
Robert Olsson19baf832005-06-21 12:43:18 -0700614
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700615 old_tn = tn;
616 tn = halve(t, tn);
617 if (IS_ERR(tn)) {
618 tn = old_tn;
Robert Olsson2f368952005-07-05 15:02:40 -0700619#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -0800620 this_cpu_inc(t->stats->resize_node_skipped);
Robert Olsson2f368952005-07-05 15:02:40 -0700621#endif
622 break;
623 }
624 }
625
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700626
Robert Olsson19baf832005-06-21 12:43:18 -0700627 /* Only one child remains */
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800628 if (tn->empty_children == (tnode_child_length(tn) - 1)) {
629 unsigned long i;
Jens Låås80b71b82009-08-28 23:57:15 -0700630one_child:
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800631 for (i = tnode_child_length(tn); !n && i;)
632 n = tnode_get_child(tn, --i);
633no_children:
634 /* compress one level */
635 node_set_parent(n, NULL);
636 tnode_free_safe(tn);
637 return n;
Jens Låås80b71b82009-08-28 23:57:15 -0700638 }
Alexander Duyckadaf9812014-12-31 10:55:47 -0800639 return tn;
Robert Olsson19baf832005-06-21 12:43:18 -0700640}
641
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700642
643static void tnode_clean_free(struct tnode *tn)
644{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800645 struct tnode *tofree;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700646 int i;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700647
648 for (i = 0; i < tnode_child_length(tn); i++) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800649 tofree = rtnl_dereference(tn->child[i]);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700650 if (tofree)
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800651 node_free(tofree);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700652 }
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800653 node_free(tn);
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700654}
655
Alexander Duyckadaf9812014-12-31 10:55:47 -0800656static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
Robert Olsson19baf832005-06-21 12:43:18 -0700657{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800658 int olen = tnode_child_length(oldtnode);
659 struct tnode *tn;
Robert Olsson19baf832005-06-21 12:43:18 -0700660 int i;
661
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700662 pr_debug("In inflate\n");
Robert Olsson19baf832005-06-21 12:43:18 -0700663
664 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
665
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700666 if (!tn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700667 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700668
669 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700670 * Preallocate and store tnodes before the actual work so we
671 * don't get into an inconsistent state if memory allocation
672 * fails. In case of failure we return the oldnode and inflate
Robert Olsson2f368952005-07-05 15:02:40 -0700673 * of tnode is ignored.
674 */
Olof Johansson91b9a272005-08-09 20:24:39 -0700675
676 for (i = 0; i < olen; i++) {
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800677 struct tnode *inode;
Robert Olsson2f368952005-07-05 15:02:40 -0700678
Alexander Duyckadaf9812014-12-31 10:55:47 -0800679 inode = tnode_get_child(oldtnode, i);
680 if (tnode_full(oldtnode, inode) && inode->bits > 1) {
Robert Olsson2f368952005-07-05 15:02:40 -0700681 struct tnode *left, *right;
Stephen Hemmingerab66b4a2007-08-10 15:22:58 -0700682 t_key m = ~0U << (KEYLENGTH - 1) >> inode->pos;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700683
Robert Olsson2f368952005-07-05 15:02:40 -0700684 left = tnode_new(inode->key&(~m), inode->pos + 1,
685 inode->bits - 1);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700686 if (!left)
687 goto nomem;
Olof Johansson91b9a272005-08-09 20:24:39 -0700688
Robert Olsson2f368952005-07-05 15:02:40 -0700689 right = tnode_new(inode->key|m, inode->pos + 1,
690 inode->bits - 1);
691
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900692 if (!right) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -0800693 node_free(left);
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700694 goto nomem;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900695 }
Robert Olsson2f368952005-07-05 15:02:40 -0700696
Alexander Duyckadaf9812014-12-31 10:55:47 -0800697 put_child(tn, 2*i, left);
698 put_child(tn, 2*i+1, right);
Robert Olsson2f368952005-07-05 15:02:40 -0700699 }
700 }
701
Olof Johansson91b9a272005-08-09 20:24:39 -0700702 for (i = 0; i < olen; i++) {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800703 struct tnode *inode = tnode_get_child(oldtnode, i);
Olof Johansson91b9a272005-08-09 20:24:39 -0700704 struct tnode *left, *right;
705 int size, j;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700706
Robert Olsson19baf832005-06-21 12:43:18 -0700707 /* An empty child */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800708 if (inode == NULL)
Robert Olsson19baf832005-06-21 12:43:18 -0700709 continue;
710
711 /* A leaf or an internal node with skipped bits */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800712 if (!tnode_full(oldtnode, inode)) {
baker.zhangbbe34cf2013-10-01 07:45:09 +0800713 put_child(tn,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800714 tkey_extract_bits(inode->key, tn->pos, tn->bits),
715 inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700716 continue;
717 }
718
719 /* An internal node with two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700720 if (inode->bits == 1) {
Lin Ming61648d92012-07-29 02:00:03 +0000721 put_child(tn, 2*i, rtnl_dereference(inode->child[0]));
722 put_child(tn, 2*i+1, rtnl_dereference(inode->child[1]));
Robert Olsson19baf832005-06-21 12:43:18 -0700723
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700724 tnode_free_safe(inode);
Olof Johansson91b9a272005-08-09 20:24:39 -0700725 continue;
Robert Olsson19baf832005-06-21 12:43:18 -0700726 }
727
Olof Johansson91b9a272005-08-09 20:24:39 -0700728 /* An internal node with more than two children */
Robert Olsson19baf832005-06-21 12:43:18 -0700729
Olof Johansson91b9a272005-08-09 20:24:39 -0700730 /* We will replace this node 'inode' with two new
731 * ones, 'left' and 'right', each with half of the
732 * original children. The two new nodes will have
733 * a position one bit further down the key and this
734 * means that the "significant" part of their keys
735 * (see the discussion near the top of this file)
736 * will differ by one bit, which will be "0" in
737 * left's key and "1" in right's key. Since we are
738 * moving the key position by one step, the bit that
739 * we are moving away from - the bit at position
740 * (inode->pos) - is the one that will differ between
741 * left and right. So... we synthesize that bit in the
742 * two new keys.
743 * The mask 'm' below will be a single "one" bit at
744 * the position (inode->pos)
745 */
Robert Olsson19baf832005-06-21 12:43:18 -0700746
Olof Johansson91b9a272005-08-09 20:24:39 -0700747 /* Use the old key, but set the new significant
748 * bit to zero.
749 */
Robert Olsson19baf832005-06-21 12:43:18 -0700750
Alexander Duyckadaf9812014-12-31 10:55:47 -0800751 left = tnode_get_child(tn, 2*i);
Lin Ming61648d92012-07-29 02:00:03 +0000752 put_child(tn, 2*i, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -0700753
Olof Johansson91b9a272005-08-09 20:24:39 -0700754 BUG_ON(!left);
Robert Olsson2f368952005-07-05 15:02:40 -0700755
Alexander Duyckadaf9812014-12-31 10:55:47 -0800756 right = tnode_get_child(tn, 2*i+1);
Lin Ming61648d92012-07-29 02:00:03 +0000757 put_child(tn, 2*i+1, NULL);
Robert Olsson2f368952005-07-05 15:02:40 -0700758
Olof Johansson91b9a272005-08-09 20:24:39 -0700759 BUG_ON(!right);
Robert Olsson2f368952005-07-05 15:02:40 -0700760
Olof Johansson91b9a272005-08-09 20:24:39 -0700761 size = tnode_child_length(left);
762 for (j = 0; j < size; j++) {
Lin Ming61648d92012-07-29 02:00:03 +0000763 put_child(left, j, rtnl_dereference(inode->child[j]));
764 put_child(right, j, rtnl_dereference(inode->child[j + size]));
Robert Olsson19baf832005-06-21 12:43:18 -0700765 }
Lin Ming61648d92012-07-29 02:00:03 +0000766 put_child(tn, 2*i, resize(t, left));
767 put_child(tn, 2*i+1, resize(t, right));
Olof Johansson91b9a272005-08-09 20:24:39 -0700768
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700769 tnode_free_safe(inode);
Robert Olsson19baf832005-06-21 12:43:18 -0700770 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700771 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700772 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700773nomem:
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700774 tnode_clean_free(tn);
775 return ERR_PTR(-ENOMEM);
Robert Olsson19baf832005-06-21 12:43:18 -0700776}
777
Alexander Duyckadaf9812014-12-31 10:55:47 -0800778static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
Robert Olsson19baf832005-06-21 12:43:18 -0700779{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800780 int olen = tnode_child_length(oldtnode);
781 struct tnode *tn, *left, *right;
Robert Olsson19baf832005-06-21 12:43:18 -0700782 int i;
Robert Olsson19baf832005-06-21 12:43:18 -0700783
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700784 pr_debug("In halve\n");
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700785
786 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
Robert Olsson19baf832005-06-21 12:43:18 -0700787
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700788 if (!tn)
789 return ERR_PTR(-ENOMEM);
Robert Olsson2f368952005-07-05 15:02:40 -0700790
791 /*
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700792 * Preallocate and store tnodes before the actual work so we
793 * don't get into an inconsistent state if memory allocation
794 * fails. In case of failure we return the oldnode and halve
Robert Olsson2f368952005-07-05 15:02:40 -0700795 * of tnode is ignored.
796 */
797
Olof Johansson91b9a272005-08-09 20:24:39 -0700798 for (i = 0; i < olen; i += 2) {
Robert Olsson2f368952005-07-05 15:02:40 -0700799 left = tnode_get_child(oldtnode, i);
800 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700801
Robert Olsson2f368952005-07-05 15:02:40 -0700802 /* Two nonempty children */
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700803 if (left && right) {
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700804 struct tnode *newn;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700805
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700806 newn = tnode_new(left->key, tn->pos + tn->bits, 1);
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700807
808 if (!newn)
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700809 goto nomem;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700810
Alexander Duyckadaf9812014-12-31 10:55:47 -0800811 put_child(tn, i/2, newn);
Robert Olsson2f368952005-07-05 15:02:40 -0700812 }
Robert Olsson2f368952005-07-05 15:02:40 -0700813
Robert Olsson2f368952005-07-05 15:02:40 -0700814 }
Robert Olsson19baf832005-06-21 12:43:18 -0700815
Olof Johansson91b9a272005-08-09 20:24:39 -0700816 for (i = 0; i < olen; i += 2) {
817 struct tnode *newBinNode;
818
Robert Olsson19baf832005-06-21 12:43:18 -0700819 left = tnode_get_child(oldtnode, i);
820 right = tnode_get_child(oldtnode, i+1);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700821
Robert Olsson19baf832005-06-21 12:43:18 -0700822 /* At least one of the children is empty */
823 if (left == NULL) {
824 if (right == NULL) /* Both are empty */
825 continue;
Lin Ming61648d92012-07-29 02:00:03 +0000826 put_child(tn, i/2, right);
Olof Johansson91b9a272005-08-09 20:24:39 -0700827 continue;
Stephen Hemminger0c7770c2005-08-23 21:59:41 -0700828 }
Olof Johansson91b9a272005-08-09 20:24:39 -0700829
830 if (right == NULL) {
Lin Ming61648d92012-07-29 02:00:03 +0000831 put_child(tn, i/2, left);
Olof Johansson91b9a272005-08-09 20:24:39 -0700832 continue;
833 }
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700834
Robert Olsson19baf832005-06-21 12:43:18 -0700835 /* Two nonempty children */
Alexander Duyckadaf9812014-12-31 10:55:47 -0800836 newBinNode = tnode_get_child(tn, i/2);
Lin Ming61648d92012-07-29 02:00:03 +0000837 put_child(tn, i/2, NULL);
838 put_child(newBinNode, 0, left);
839 put_child(newBinNode, 1, right);
840 put_child(tn, i/2, resize(t, newBinNode));
Robert Olsson19baf832005-06-21 12:43:18 -0700841 }
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700842 tnode_free_safe(oldtnode);
Robert Olsson19baf832005-06-21 12:43:18 -0700843 return tn;
Robert Olsson2f80b3c2005-08-09 20:25:06 -0700844nomem:
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700845 tnode_clean_free(tn);
846 return ERR_PTR(-ENOMEM);
Robert Olsson19baf832005-06-21 12:43:18 -0700847}
848
Robert Olsson772cb712005-09-19 15:31:18 -0700849/* readside must use rcu_read_lock currently dump routines
Robert Olsson2373ce12005-08-25 13:01:29 -0700850 via get_fa_head and dump */
851
Alexander Duyckadaf9812014-12-31 10:55:47 -0800852static struct leaf_info *find_leaf_info(struct tnode *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700853{
Robert Olsson772cb712005-09-19 15:31:18 -0700854 struct hlist_head *head = &l->list;
Robert Olsson19baf832005-06-21 12:43:18 -0700855 struct leaf_info *li;
856
Sasha Levinb67bfe02013-02-27 17:06:00 -0800857 hlist_for_each_entry_rcu(li, head, hlist)
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700858 if (li->plen == plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700859 return li;
Olof Johansson91b9a272005-08-09 20:24:39 -0700860
Robert Olsson19baf832005-06-21 12:43:18 -0700861 return NULL;
862}
863
Alexander Duyckadaf9812014-12-31 10:55:47 -0800864static inline struct list_head *get_fa_head(struct tnode *l, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700865{
Robert Olsson772cb712005-09-19 15:31:18 -0700866 struct leaf_info *li = find_leaf_info(l, plen);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700867
Olof Johansson91b9a272005-08-09 20:24:39 -0700868 if (!li)
869 return NULL;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700870
Olof Johansson91b9a272005-08-09 20:24:39 -0700871 return &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -0700872}
873
874static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
875{
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900876 struct leaf_info *li = NULL, *last = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700877
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900878 if (hlist_empty(head)) {
879 hlist_add_head_rcu(&new->hlist, head);
880 } else {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800881 hlist_for_each_entry(li, head, hlist) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900882 if (new->plen > li->plen)
883 break;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700884
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900885 last = li;
886 }
887 if (last)
Ken Helias1d023282014-08-06 16:09:16 -0700888 hlist_add_behind_rcu(&new->hlist, &last->hlist);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900889 else
890 hlist_add_before_rcu(&new->hlist, &li->hlist);
891 }
Robert Olsson19baf832005-06-21 12:43:18 -0700892}
893
Robert Olsson2373ce12005-08-25 13:01:29 -0700894/* rcu_read_lock needs to be hold by caller from readside */
895
Alexander Duyckadaf9812014-12-31 10:55:47 -0800896static struct tnode *fib_find_node(struct trie *t, u32 key)
Robert Olsson19baf832005-06-21 12:43:18 -0700897{
Alexander Duyckadaf9812014-12-31 10:55:47 -0800898 struct tnode *n = rcu_dereference_rtnl(t->trie);
899 int pos = 0;
Robert Olsson19baf832005-06-21 12:43:18 -0700900
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800901 while (n && IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800902 if (tkey_sub_equals(n->key, pos, n->pos-pos, key)) {
903 pos = n->pos + n->bits;
904 n = tnode_get_child_rcu(n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800905 tkey_extract_bits(key,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800906 n->pos,
907 n->bits));
Olof Johansson91b9a272005-08-09 20:24:39 -0700908 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700909 break;
910 }
911 /* Case we have found a leaf. Compare prefixes */
912
Olof Johansson91b9a272005-08-09 20:24:39 -0700913 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key))
Alexander Duyckadaf9812014-12-31 10:55:47 -0800914 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -0700915
Robert Olsson19baf832005-06-21 12:43:18 -0700916 return NULL;
917}
918
Jarek Poplawski7b855762009-06-18 00:28:51 -0700919static void trie_rebalance(struct trie *t, struct tnode *tn)
Robert Olsson19baf832005-06-21 12:43:18 -0700920{
Robert Olsson19baf832005-06-21 12:43:18 -0700921 int wasfull;
Robert Olsson3ed18d72009-05-21 15:20:59 -0700922 t_key cindex, key;
Stephen Hemminger06801912007-08-10 15:22:13 -0700923 struct tnode *tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700924
Robert Olsson3ed18d72009-05-21 15:20:59 -0700925 key = tn->key;
926
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800927 while (tn != NULL && (tp = node_parent(tn)) != NULL) {
Robert Olsson19baf832005-06-21 12:43:18 -0700928 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
929 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
Alexander Duyckadaf9812014-12-31 10:55:47 -0800930 tn = resize(t, tn);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800931
Alexander Duyckadaf9812014-12-31 10:55:47 -0800932 tnode_put_child_reorg(tp, cindex, tn, wasfull);
Olof Johansson91b9a272005-08-09 20:24:39 -0700933
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800934 tp = node_parent(tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -0700935 if (!tp)
Alexander Duyckadaf9812014-12-31 10:55:47 -0800936 rcu_assign_pointer(t->trie, tn);
Jarek Poplawski008440e2009-06-30 12:47:19 -0700937
Jarek Poplawskie0f7cb82009-06-15 02:31:29 -0700938 tnode_free_flush();
Stephen Hemminger06801912007-08-10 15:22:13 -0700939 if (!tp)
Robert Olsson19baf832005-06-21 12:43:18 -0700940 break;
Stephen Hemminger06801912007-08-10 15:22:13 -0700941 tn = tp;
Robert Olsson19baf832005-06-21 12:43:18 -0700942 }
Stephen Hemminger06801912007-08-10 15:22:13 -0700943
Robert Olsson19baf832005-06-21 12:43:18 -0700944 /* Handle last (top) tnode */
Jarek Poplawski7b855762009-06-18 00:28:51 -0700945 if (IS_TNODE(tn))
Alexander Duyckadaf9812014-12-31 10:55:47 -0800946 tn = resize(t, tn);
Robert Olsson19baf832005-06-21 12:43:18 -0700947
Alexander Duyckadaf9812014-12-31 10:55:47 -0800948 rcu_assign_pointer(t->trie, tn);
Jarek Poplawski7b855762009-06-18 00:28:51 -0700949 tnode_free_flush();
Robert Olsson19baf832005-06-21 12:43:18 -0700950}
951
Robert Olsson2373ce12005-08-25 13:01:29 -0700952/* only used from updater-side */
953
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -0800954static struct list_head *fib_insert_node(struct trie *t, u32 key, int plen)
Robert Olsson19baf832005-06-21 12:43:18 -0700955{
956 int pos, newpos;
957 struct tnode *tp = NULL, *tn = NULL;
Alexander Duyckadaf9812014-12-31 10:55:47 -0800958 struct tnode *n;
959 struct tnode *l;
Robert Olsson19baf832005-06-21 12:43:18 -0700960 int missbit;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700961 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -0700962 struct leaf_info *li;
963 t_key cindex;
964
965 pos = 0;
Eric Dumazet0a5c0472011-03-31 01:51:35 -0700966 n = rtnl_dereference(t->trie);
Robert Olsson19baf832005-06-21 12:43:18 -0700967
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700968 /* If we point to NULL, stop. Either the tree is empty and we should
969 * just put a new leaf in if, or we have reached an empty child slot,
Robert Olsson19baf832005-06-21 12:43:18 -0700970 * and we should just put our new leaf in that.
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700971 * If we point to a T_TNODE, check if it matches our key. Note that
972 * a T_TNODE might be skipping any number of bits - its 'pos' need
Robert Olsson19baf832005-06-21 12:43:18 -0700973 * not be the parent's 'pos'+'bits'!
974 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700975 * If it does match the current key, get pos/bits from it, extract
Robert Olsson19baf832005-06-21 12:43:18 -0700976 * the index from our key, push the T_TNODE and walk the tree.
977 *
978 * If it doesn't, we have to replace it with a new T_TNODE.
979 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -0700980 * If we point to a T_LEAF, it might or might not have the same key
981 * as we do. If it does, just change the value, update the T_LEAF's
982 * value, and return it.
Robert Olsson19baf832005-06-21 12:43:18 -0700983 * If it doesn't, we need to replace it with a T_TNODE.
984 */
985
Alexander Duyck64c9b6f2014-12-31 10:55:35 -0800986 while (n && IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -0800987 if (tkey_sub_equals(n->key, pos, n->pos-pos, key)) {
988 tp = n;
989 pos = n->pos + n->bits;
990 n = tnode_get_child(n,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -0800991 tkey_extract_bits(key,
Alexander Duyckadaf9812014-12-31 10:55:47 -0800992 n->pos,
993 n->bits));
Robert Olsson19baf832005-06-21 12:43:18 -0700994
Alexander Duyckadaf9812014-12-31 10:55:47 -0800995 BUG_ON(n && node_parent(n) != tp);
Olof Johansson91b9a272005-08-09 20:24:39 -0700996 } else
Robert Olsson19baf832005-06-21 12:43:18 -0700997 break;
998 }
999
1000 /*
1001 * n ----> NULL, LEAF or TNODE
1002 *
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001003 * tp is n's (parent) ----> NULL or TNODE
Robert Olsson19baf832005-06-21 12:43:18 -07001004 */
1005
Olof Johansson91b9a272005-08-09 20:24:39 -07001006 BUG_ON(tp && IS_LEAF(tp));
Robert Olsson19baf832005-06-21 12:43:18 -07001007
1008 /* Case 1: n is a leaf. Compare prefixes */
1009
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001010 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
Robert Olsson19baf832005-06-21 12:43:18 -07001011 li = leaf_info_new(plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001012
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001013 if (!li)
1014 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001015
1016 fa_head = &li->falh;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001017 insert_leaf_info(&n->list, li);
Robert Olsson19baf832005-06-21 12:43:18 -07001018 goto done;
1019 }
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001020 l = leaf_new(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001021
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001022 if (!l)
1023 return NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001024
Robert Olsson19baf832005-06-21 12:43:18 -07001025 li = leaf_info_new(plen);
1026
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001027 if (!li) {
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001028 node_free(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001029 return NULL;
Robert Olssonf835e472005-06-28 15:00:39 -07001030 }
Robert Olsson19baf832005-06-21 12:43:18 -07001031
1032 fa_head = &li->falh;
1033 insert_leaf_info(&l->list, li);
1034
Robert Olsson19baf832005-06-21 12:43:18 -07001035 if (t->trie && n == NULL) {
Olof Johansson91b9a272005-08-09 20:24:39 -07001036 /* Case 2: n is NULL, and will just insert a new leaf */
Robert Olsson19baf832005-06-21 12:43:18 -07001037
Alexander Duyckadaf9812014-12-31 10:55:47 -08001038 node_set_parent(l, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001039
Olof Johansson91b9a272005-08-09 20:24:39 -07001040 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001041 put_child(tp, cindex, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001042 } else {
1043 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001044 /*
1045 * Add a new tnode here
Robert Olsson19baf832005-06-21 12:43:18 -07001046 * first tnode need some special handling
1047 */
1048
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001049 if (n) {
baker.zhang4c60f1d2013-10-08 11:36:51 +08001050 pos = tp ? tp->pos+tp->bits : 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001051 newpos = tkey_mismatch(key, pos, n->key);
1052 tn = tnode_new(n->key, newpos, 1);
Olof Johansson91b9a272005-08-09 20:24:39 -07001053 } else {
Robert Olsson19baf832005-06-21 12:43:18 -07001054 newpos = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001055 tn = tnode_new(key, newpos, 1); /* First tnode */
Robert Olsson19baf832005-06-21 12:43:18 -07001056 }
Robert Olsson19baf832005-06-21 12:43:18 -07001057
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001058 if (!tn) {
Robert Olssonf835e472005-06-28 15:00:39 -07001059 free_leaf_info(li);
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001060 node_free(l);
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001061 return NULL;
Olof Johansson91b9a272005-08-09 20:24:39 -07001062 }
1063
Alexander Duyckadaf9812014-12-31 10:55:47 -08001064 node_set_parent(tn, tp);
Robert Olsson19baf832005-06-21 12:43:18 -07001065
Olof Johansson91b9a272005-08-09 20:24:39 -07001066 missbit = tkey_extract_bits(key, newpos, 1);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001067 put_child(tn, missbit, l);
Lin Ming61648d92012-07-29 02:00:03 +00001068 put_child(tn, 1-missbit, n);
Robert Olsson19baf832005-06-21 12:43:18 -07001069
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001070 if (tp) {
Robert Olsson19baf832005-06-21 12:43:18 -07001071 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001072 put_child(tp, cindex, tn);
Olof Johansson91b9a272005-08-09 20:24:39 -07001073 } else {
Alexander Duyckadaf9812014-12-31 10:55:47 -08001074 rcu_assign_pointer(t->trie, tn);
Robert Olsson19baf832005-06-21 12:43:18 -07001075 }
Alexander Duycke962f302014-12-10 21:49:22 -08001076
1077 tp = tn;
Robert Olsson19baf832005-06-21 12:43:18 -07001078 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001079
1080 if (tp && tp->pos + tp->bits > 32)
Joe Perches058bd4d2012-03-11 18:36:11 +00001081 pr_warn("fib_trie tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1082 tp, tp->pos, tp->bits, key, plen);
Olof Johansson91b9a272005-08-09 20:24:39 -07001083
Robert Olsson19baf832005-06-21 12:43:18 -07001084 /* Rebalance the trie */
Robert Olsson2373ce12005-08-25 13:01:29 -07001085
Jarek Poplawski7b855762009-06-18 00:28:51 -07001086 trie_rebalance(t, tp);
Robert Olssonf835e472005-06-28 15:00:39 -07001087done:
Robert Olsson19baf832005-06-21 12:43:18 -07001088 return fa_head;
1089}
1090
Robert Olssond562f1f2007-03-26 14:22:22 -07001091/*
1092 * Caller must hold RTNL.
1093 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001094int fib_table_insert(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001095{
1096 struct trie *t = (struct trie *) tb->tb_data;
1097 struct fib_alias *fa, *new_fa;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001098 struct list_head *fa_head = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001099 struct fib_info *fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001100 int plen = cfg->fc_dst_len;
1101 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001102 u32 key, mask;
1103 int err;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001104 struct tnode *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001105
1106 if (plen > 32)
1107 return -EINVAL;
1108
Thomas Graf4e902c52006-08-17 18:14:52 -07001109 key = ntohl(cfg->fc_dst);
Robert Olsson19baf832005-06-21 12:43:18 -07001110
Patrick McHardy2dfe55b2006-08-10 23:08:33 -07001111 pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen);
Robert Olsson19baf832005-06-21 12:43:18 -07001112
Olof Johansson91b9a272005-08-09 20:24:39 -07001113 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001114
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001115 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001116 return -EINVAL;
1117
1118 key = key & mask;
1119
Thomas Graf4e902c52006-08-17 18:14:52 -07001120 fi = fib_create_info(cfg);
1121 if (IS_ERR(fi)) {
1122 err = PTR_ERR(fi);
Robert Olsson19baf832005-06-21 12:43:18 -07001123 goto err;
Thomas Graf4e902c52006-08-17 18:14:52 -07001124 }
Robert Olsson19baf832005-06-21 12:43:18 -07001125
1126 l = fib_find_node(t, key);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001127 fa = NULL;
Robert Olsson19baf832005-06-21 12:43:18 -07001128
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001129 if (l) {
Robert Olsson19baf832005-06-21 12:43:18 -07001130 fa_head = get_fa_head(l, plen);
1131 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1132 }
1133
1134 /* Now fa, if non-NULL, points to the first fib alias
1135 * with the same keys [prefix,tos,priority], if such key already
1136 * exists or to the node before which we will insert new one.
1137 *
1138 * If fa is NULL, we will need to allocate a new one and
1139 * insert to the head of f.
1140 *
1141 * If f is NULL, no fib node matched the destination key
1142 * and we need to allocate a new one of those as well.
1143 */
1144
Julian Anastasov936f6f82008-01-28 21:18:06 -08001145 if (fa && fa->fa_tos == tos &&
1146 fa->fa_info->fib_priority == fi->fib_priority) {
1147 struct fib_alias *fa_first, *fa_match;
Robert Olsson19baf832005-06-21 12:43:18 -07001148
1149 err = -EEXIST;
Thomas Graf4e902c52006-08-17 18:14:52 -07001150 if (cfg->fc_nlflags & NLM_F_EXCL)
Robert Olsson19baf832005-06-21 12:43:18 -07001151 goto out;
1152
Julian Anastasov936f6f82008-01-28 21:18:06 -08001153 /* We have 2 goals:
1154 * 1. Find exact match for type, scope, fib_info to avoid
1155 * duplicate routes
1156 * 2. Find next 'fa' (or head), NLM_F_APPEND inserts before it
1157 */
1158 fa_match = NULL;
1159 fa_first = fa;
1160 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1161 list_for_each_entry_continue(fa, fa_head, fa_list) {
1162 if (fa->fa_tos != tos)
1163 break;
1164 if (fa->fa_info->fib_priority != fi->fib_priority)
1165 break;
1166 if (fa->fa_type == cfg->fc_type &&
Julian Anastasov936f6f82008-01-28 21:18:06 -08001167 fa->fa_info == fi) {
1168 fa_match = fa;
1169 break;
1170 }
1171 }
1172
Thomas Graf4e902c52006-08-17 18:14:52 -07001173 if (cfg->fc_nlflags & NLM_F_REPLACE) {
Robert Olsson19baf832005-06-21 12:43:18 -07001174 struct fib_info *fi_drop;
1175 u8 state;
1176
Julian Anastasov936f6f82008-01-28 21:18:06 -08001177 fa = fa_first;
1178 if (fa_match) {
1179 if (fa == fa_match)
1180 err = 0;
Joonwoo Park67250332008-01-18 03:45:18 -08001181 goto out;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001182 }
Robert Olsson2373ce12005-08-25 13:01:29 -07001183 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001184 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson2373ce12005-08-25 13:01:29 -07001185 if (new_fa == NULL)
1186 goto out;
Robert Olsson19baf832005-06-21 12:43:18 -07001187
1188 fi_drop = fa->fa_info;
Robert Olsson2373ce12005-08-25 13:01:29 -07001189 new_fa->fa_tos = fa->fa_tos;
1190 new_fa->fa_info = fi;
Thomas Graf4e902c52006-08-17 18:14:52 -07001191 new_fa->fa_type = cfg->fc_type;
Robert Olsson19baf832005-06-21 12:43:18 -07001192 state = fa->fa_state;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001193 new_fa->fa_state = state & ~FA_S_ACCESSED;
Robert Olsson19baf832005-06-21 12:43:18 -07001194
Robert Olsson2373ce12005-08-25 13:01:29 -07001195 list_replace_rcu(&fa->fa_list, &new_fa->fa_list);
1196 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001197
1198 fib_release_info(fi_drop);
1199 if (state & FA_S_ACCESSED)
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001200 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Milan Kocianb8f55832007-05-23 14:55:06 -07001201 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
1202 tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE);
Robert Olsson19baf832005-06-21 12:43:18 -07001203
Olof Johansson91b9a272005-08-09 20:24:39 -07001204 goto succeeded;
Robert Olsson19baf832005-06-21 12:43:18 -07001205 }
1206 /* Error if we find a perfect match which
1207 * uses the same scope, type, and nexthop
1208 * information.
1209 */
Julian Anastasov936f6f82008-01-28 21:18:06 -08001210 if (fa_match)
1211 goto out;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001212
Thomas Graf4e902c52006-08-17 18:14:52 -07001213 if (!(cfg->fc_nlflags & NLM_F_APPEND))
Julian Anastasov936f6f82008-01-28 21:18:06 -08001214 fa = fa_first;
Robert Olsson19baf832005-06-21 12:43:18 -07001215 }
1216 err = -ENOENT;
Thomas Graf4e902c52006-08-17 18:14:52 -07001217 if (!(cfg->fc_nlflags & NLM_F_CREATE))
Robert Olsson19baf832005-06-21 12:43:18 -07001218 goto out;
1219
1220 err = -ENOBUFS;
Christoph Lametere94b1762006-12-06 20:33:17 -08001221 new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL);
Robert Olsson19baf832005-06-21 12:43:18 -07001222 if (new_fa == NULL)
1223 goto out;
1224
1225 new_fa->fa_info = fi;
1226 new_fa->fa_tos = tos;
Thomas Graf4e902c52006-08-17 18:14:52 -07001227 new_fa->fa_type = cfg->fc_type;
Robert Olsson19baf832005-06-21 12:43:18 -07001228 new_fa->fa_state = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001229 /*
1230 * Insert new entry to the list.
1231 */
1232
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001233 if (!fa_head) {
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001234 fa_head = fib_insert_node(t, key, plen);
1235 if (unlikely(!fa_head)) {
1236 err = -ENOMEM;
Robert Olssonf835e472005-06-28 15:00:39 -07001237 goto out_free_new_fa;
Stephen Hemmingerfea86ad2008-01-12 20:57:07 -08001238 }
Robert Olssonf835e472005-06-28 15:00:39 -07001239 }
Robert Olsson19baf832005-06-21 12:43:18 -07001240
David S. Miller21d8c492011-04-14 14:49:37 -07001241 if (!plen)
1242 tb->tb_num_default++;
1243
Robert Olsson2373ce12005-08-25 13:01:29 -07001244 list_add_tail_rcu(&new_fa->fa_list,
1245 (fa ? &fa->fa_list : fa_head));
Robert Olsson19baf832005-06-21 12:43:18 -07001246
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001247 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Thomas Graf4e902c52006-08-17 18:14:52 -07001248 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001249 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001250succeeded:
1251 return 0;
Robert Olssonf835e472005-06-28 15:00:39 -07001252
1253out_free_new_fa:
1254 kmem_cache_free(fn_alias_kmem, new_fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001255out:
1256 fib_release_info(fi);
Olof Johansson91b9a272005-08-09 20:24:39 -07001257err:
Robert Olsson19baf832005-06-21 12:43:18 -07001258 return err;
1259}
1260
Robert Olsson772cb712005-09-19 15:31:18 -07001261/* should be called with rcu_read_lock */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001262static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
David S. Miller22bd5b92011-03-11 19:54:08 -05001263 t_key key, const struct flowi4 *flp,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001264 struct fib_result *res, int fib_flags)
Robert Olsson19baf832005-06-21 12:43:18 -07001265{
Robert Olsson19baf832005-06-21 12:43:18 -07001266 struct leaf_info *li;
1267 struct hlist_head *hhead = &l->list;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001268
Sasha Levinb67bfe02013-02-27 17:06:00 -08001269 hlist_for_each_entry_rcu(li, hhead, hlist) {
David S. Miller3be06862011-03-07 15:01:10 -08001270 struct fib_alias *fa;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001271
Eric Dumazet5c745012011-07-18 03:16:33 +00001272 if (l->key != (key & li->mask_plen))
Robert Olsson19baf832005-06-21 12:43:18 -07001273 continue;
1274
David S. Miller3be06862011-03-07 15:01:10 -08001275 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
1276 struct fib_info *fi = fa->fa_info;
1277 int nhsel, err;
1278
David S. Miller22bd5b92011-03-11 19:54:08 -05001279 if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
David S. Miller3be06862011-03-07 15:01:10 -08001280 continue;
David S. Millerdccd9ecc2012-05-10 22:16:32 -04001281 if (fi->fib_dead)
1282 continue;
David S. Miller37e826c2011-03-24 18:06:47 -07001283 if (fa->fa_info->fib_scope < flp->flowi4_scope)
David S. Miller3be06862011-03-07 15:01:10 -08001284 continue;
1285 fib_alias_accessed(fa);
1286 err = fib_props[fa->fa_type].error;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001287 if (unlikely(err < 0)) {
David S. Miller3be06862011-03-07 15:01:10 -08001288#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001289 this_cpu_inc(t->stats->semantic_match_passed);
David S. Miller3be06862011-03-07 15:01:10 -08001290#endif
Julian Anastasov1fbc7842011-03-25 20:33:23 -07001291 return err;
David S. Miller3be06862011-03-07 15:01:10 -08001292 }
1293 if (fi->fib_flags & RTNH_F_DEAD)
1294 continue;
1295 for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
1296 const struct fib_nh *nh = &fi->fib_nh[nhsel];
1297
1298 if (nh->nh_flags & RTNH_F_DEAD)
1299 continue;
David S. Miller22bd5b92011-03-11 19:54:08 -05001300 if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
David S. Miller3be06862011-03-07 15:01:10 -08001301 continue;
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001302
Robert Olsson19baf832005-06-21 12:43:18 -07001303#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001304 this_cpu_inc(t->stats->semantic_match_passed);
Robert Olsson19baf832005-06-21 12:43:18 -07001305#endif
Eric Dumazet5c745012011-07-18 03:16:33 +00001306 res->prefixlen = li->plen;
David S. Miller3be06862011-03-07 15:01:10 -08001307 res->nh_sel = nhsel;
1308 res->type = fa->fa_type;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001309 res->scope = fi->fib_scope;
David S. Miller3be06862011-03-07 15:01:10 -08001310 res->fi = fi;
1311 res->table = tb;
1312 res->fa_head = &li->falh;
1313 if (!(fib_flags & FIB_LOOKUP_NOREF))
Eric Dumazet5c745012011-07-18 03:16:33 +00001314 atomic_inc(&fi->fib_clntref);
David S. Miller3be06862011-03-07 15:01:10 -08001315 return 0;
1316 }
1317 }
1318
1319#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001320 this_cpu_inc(t->stats->semantic_match_miss);
David S. Miller3be06862011-03-07 15:01:10 -08001321#endif
Robert Olsson19baf832005-06-21 12:43:18 -07001322 }
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001323
Ben Hutchings2e655572008-07-10 16:52:52 -07001324 return 1;
Robert Olsson19baf832005-06-21 12:43:18 -07001325}
1326
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001327static inline t_key prefix_mismatch(t_key key, struct tnode *n)
1328{
1329 t_key prefix = n->key;
1330
1331 return (key ^ prefix) & (prefix | -prefix);
1332}
1333
David S. Miller22bd5b92011-03-11 19:54:08 -05001334int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001335 struct fib_result *res, int fib_flags)
Robert Olsson19baf832005-06-21 12:43:18 -07001336{
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001337 struct trie *t = (struct trie *)tb->tb_data;
Alexander Duyck8274a972014-12-31 10:55:29 -08001338#ifdef CONFIG_IP_FIB_TRIE_STATS
1339 struct trie_use_stats __percpu *stats = t->stats;
1340#endif
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001341 const t_key key = ntohl(flp->daddr);
1342 struct tnode *n, *pn;
1343 t_key cindex;
1344 int ret = 1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001345
Robert Olsson2373ce12005-08-25 13:01:29 -07001346 rcu_read_lock();
Robert Olsson19baf832005-06-21 12:43:18 -07001347
Robert Olsson2373ce12005-08-25 13:01:29 -07001348 n = rcu_dereference(t->trie);
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001349 if (!n)
Robert Olsson19baf832005-06-21 12:43:18 -07001350 goto failed;
1351
1352#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08001353 this_cpu_inc(stats->gets);
Robert Olsson19baf832005-06-21 12:43:18 -07001354#endif
1355
Alexander Duyckadaf9812014-12-31 10:55:47 -08001356 pn = n;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001357 cindex = 0;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001358
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001359 /* Step 1: Travel to the longest prefix match in the trie */
1360 for (;;) {
1361 unsigned long index = get_index(key, n);
Robert Olsson19baf832005-06-21 12:43:18 -07001362
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001363 /* This bit of code is a bit tricky but it combines multiple
1364 * checks into a single check. The prefix consists of the
1365 * prefix plus zeros for the "bits" in the prefix. The index
1366 * is the difference between the key and this value. From
1367 * this we can actually derive several pieces of data.
1368 * if !(index >> bits)
1369 * we know the value is child index
1370 * else
1371 * we have a mismatch in skip bits and failed
1372 */
1373 if (index >> n->bits)
1374 break;
Robert Olsson19baf832005-06-21 12:43:18 -07001375
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001376 /* we have found a leaf. Prefixes have already been compared */
1377 if (IS_LEAF(n))
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001378 goto found;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001379
1380 /* only record pn and cindex if we are going to be chopping
1381 * bits later. Otherwise we are just wasting cycles.
1382 */
1383 if (index) {
1384 pn = n;
1385 cindex = index;
Olof Johansson91b9a272005-08-09 20:24:39 -07001386 }
1387
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001388 n = rcu_dereference(n->child[index]);
1389 if (unlikely(!n))
Robert Olsson19baf832005-06-21 12:43:18 -07001390 goto backtrace;
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001391 }
1392
1393 /* Step 2: Sort out leaves and begin backtracing for longest prefix */
1394 for (;;) {
1395 /* record the pointer where our next node pointer is stored */
1396 struct tnode __rcu **cptr = n->child;
1397
1398 /* This test verifies that none of the bits that differ
1399 * between the key and the prefix exist in the region of
1400 * the lsb and higher in the prefix.
1401 */
1402 if (unlikely(prefix_mismatch(key, n)))
1403 goto backtrace;
1404
1405 /* exit out and process leaf */
1406 if (unlikely(IS_LEAF(n)))
1407 break;
1408
1409 /* Don't bother recording parent info. Since we are in
1410 * prefix match mode we will have to come back to wherever
1411 * we started this traversal anyway
1412 */
1413
1414 while ((n = rcu_dereference(*cptr)) == NULL) {
1415backtrace:
1416#ifdef CONFIG_IP_FIB_TRIE_STATS
1417 if (!n)
1418 this_cpu_inc(stats->null_node_hit);
1419#endif
1420 /* If we are at cindex 0 there are no more bits for
1421 * us to strip at this level so we must ascend back
1422 * up one level to see if there are any more bits to
1423 * be stripped there.
1424 */
1425 while (!cindex) {
1426 t_key pkey = pn->key;
1427
1428 pn = node_parent_rcu(pn);
1429 if (unlikely(!pn))
1430 goto failed;
1431#ifdef CONFIG_IP_FIB_TRIE_STATS
1432 this_cpu_inc(stats->backtrack);
1433#endif
1434 /* Get Child's index */
1435 cindex = get_index(pkey, pn);
1436 }
1437
1438 /* strip the least significant bit from the cindex */
1439 cindex &= cindex - 1;
1440
1441 /* grab pointer for next child node */
1442 cptr = &pn->child[cindex];
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001443 }
Robert Olsson19baf832005-06-21 12:43:18 -07001444 }
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001445
Robert Olsson19baf832005-06-21 12:43:18 -07001446found:
Alexander Duyck9f9e6362014-12-31 10:55:54 -08001447 /* Step 3: Process the leaf, if that fails fall back to backtracing */
1448 ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
1449 if (unlikely(ret > 0))
1450 goto backtrace;
1451failed:
Robert Olsson2373ce12005-08-25 13:01:29 -07001452 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001453 return ret;
1454}
Florian Westphal6fc01432011-08-25 13:46:12 +02001455EXPORT_SYMBOL_GPL(fib_table_lookup);
Robert Olsson19baf832005-06-21 12:43:18 -07001456
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001457/*
1458 * Remove the leaf and return parent.
1459 */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001460static void trie_leaf_remove(struct trie *t, struct tnode *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001461{
Alexander Duyck64c9b6f2014-12-31 10:55:35 -08001462 struct tnode *tp = node_parent(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001463
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001464 pr_debug("entering trie_leaf_remove(%p)\n", l);
Robert Olsson19baf832005-06-21 12:43:18 -07001465
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001466 if (tp) {
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001467 t_key cindex = tkey_extract_bits(l->key, tp->pos, tp->bits);
Lin Ming61648d92012-07-29 02:00:03 +00001468 put_child(tp, cindex, NULL);
Jarek Poplawski7b855762009-06-18 00:28:51 -07001469 trie_rebalance(t, tp);
Olof Johansson91b9a272005-08-09 20:24:39 -07001470 } else
Stephen Hemmingera9b3cd72011-08-01 16:19:00 +00001471 RCU_INIT_POINTER(t->trie, NULL);
Robert Olsson19baf832005-06-21 12:43:18 -07001472
Alexander Duyck37fd30f2014-12-31 10:55:41 -08001473 node_free(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001474}
1475
Robert Olssond562f1f2007-03-26 14:22:22 -07001476/*
1477 * Caller must hold RTNL.
1478 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001479int fib_table_delete(struct fib_table *tb, struct fib_config *cfg)
Robert Olsson19baf832005-06-21 12:43:18 -07001480{
1481 struct trie *t = (struct trie *) tb->tb_data;
1482 u32 key, mask;
Thomas Graf4e902c52006-08-17 18:14:52 -07001483 int plen = cfg->fc_dst_len;
1484 u8 tos = cfg->fc_tos;
Robert Olsson19baf832005-06-21 12:43:18 -07001485 struct fib_alias *fa, *fa_to_delete;
1486 struct list_head *fa_head;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001487 struct tnode *l;
Olof Johansson91b9a272005-08-09 20:24:39 -07001488 struct leaf_info *li;
1489
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001490 if (plen > 32)
Robert Olsson19baf832005-06-21 12:43:18 -07001491 return -EINVAL;
1492
Thomas Graf4e902c52006-08-17 18:14:52 -07001493 key = ntohl(cfg->fc_dst);
Olof Johansson91b9a272005-08-09 20:24:39 -07001494 mask = ntohl(inet_make_mask(plen));
Robert Olsson19baf832005-06-21 12:43:18 -07001495
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001496 if (key & ~mask)
Robert Olsson19baf832005-06-21 12:43:18 -07001497 return -EINVAL;
1498
1499 key = key & mask;
1500 l = fib_find_node(t, key);
1501
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001502 if (!l)
Robert Olsson19baf832005-06-21 12:43:18 -07001503 return -ESRCH;
1504
Igor Maravicad5b3102012-08-13 10:26:08 +02001505 li = find_leaf_info(l, plen);
1506
1507 if (!li)
1508 return -ESRCH;
1509
1510 fa_head = &li->falh;
Robert Olsson19baf832005-06-21 12:43:18 -07001511 fa = fib_find_alias(fa_head, tos, 0);
1512
1513 if (!fa)
1514 return -ESRCH;
1515
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001516 pr_debug("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
Robert Olsson19baf832005-06-21 12:43:18 -07001517
1518 fa_to_delete = NULL;
Julian Anastasov936f6f82008-01-28 21:18:06 -08001519 fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list);
1520 list_for_each_entry_continue(fa, fa_head, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001521 struct fib_info *fi = fa->fa_info;
1522
1523 if (fa->fa_tos != tos)
1524 break;
1525
Thomas Graf4e902c52006-08-17 18:14:52 -07001526 if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) &&
1527 (cfg->fc_scope == RT_SCOPE_NOWHERE ||
David S. Miller37e826c2011-03-24 18:06:47 -07001528 fa->fa_info->fib_scope == cfg->fc_scope) &&
Julian Anastasov74cb3c12011-03-19 12:13:46 +00001529 (!cfg->fc_prefsrc ||
1530 fi->fib_prefsrc == cfg->fc_prefsrc) &&
Thomas Graf4e902c52006-08-17 18:14:52 -07001531 (!cfg->fc_protocol ||
1532 fi->fib_protocol == cfg->fc_protocol) &&
1533 fib_nh_match(cfg, fi) == 0) {
Robert Olsson19baf832005-06-21 12:43:18 -07001534 fa_to_delete = fa;
1535 break;
1536 }
1537 }
1538
Olof Johansson91b9a272005-08-09 20:24:39 -07001539 if (!fa_to_delete)
1540 return -ESRCH;
Robert Olsson19baf832005-06-21 12:43:18 -07001541
Olof Johansson91b9a272005-08-09 20:24:39 -07001542 fa = fa_to_delete;
Thomas Graf4e902c52006-08-17 18:14:52 -07001543 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id,
Milan Kocianb8f55832007-05-23 14:55:06 -07001544 &cfg->fc_nlinfo, 0);
Robert Olsson19baf832005-06-21 12:43:18 -07001545
Robert Olsson2373ce12005-08-25 13:01:29 -07001546 list_del_rcu(&fa->fa_list);
Robert Olsson19baf832005-06-21 12:43:18 -07001547
David S. Miller21d8c492011-04-14 14:49:37 -07001548 if (!plen)
1549 tb->tb_num_default--;
1550
Olof Johansson91b9a272005-08-09 20:24:39 -07001551 if (list_empty(fa_head)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001552 hlist_del_rcu(&li->hlist);
Olof Johansson91b9a272005-08-09 20:24:39 -07001553 free_leaf_info(li);
Robert Olsson2373ce12005-08-25 13:01:29 -07001554 }
Olof Johansson91b9a272005-08-09 20:24:39 -07001555
1556 if (hlist_empty(&l->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001557 trie_leaf_remove(t, l);
Olof Johansson91b9a272005-08-09 20:24:39 -07001558
1559 if (fa->fa_state & FA_S_ACCESSED)
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00001560 rt_cache_flush(cfg->fc_nlinfo.nl_net);
Olof Johansson91b9a272005-08-09 20:24:39 -07001561
Robert Olsson2373ce12005-08-25 13:01:29 -07001562 fib_release_info(fa->fa_info);
1563 alias_free_mem_rcu(fa);
Olof Johansson91b9a272005-08-09 20:24:39 -07001564 return 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001565}
1566
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001567static int trie_flush_list(struct list_head *head)
Robert Olsson19baf832005-06-21 12:43:18 -07001568{
1569 struct fib_alias *fa, *fa_node;
1570 int found = 0;
1571
1572 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1573 struct fib_info *fi = fa->fa_info;
Robert Olsson19baf832005-06-21 12:43:18 -07001574
Robert Olsson2373ce12005-08-25 13:01:29 -07001575 if (fi && (fi->fib_flags & RTNH_F_DEAD)) {
1576 list_del_rcu(&fa->fa_list);
1577 fib_release_info(fa->fa_info);
1578 alias_free_mem_rcu(fa);
Robert Olsson19baf832005-06-21 12:43:18 -07001579 found++;
1580 }
1581 }
1582 return found;
1583}
1584
Alexander Duyckadaf9812014-12-31 10:55:47 -08001585static int trie_flush_leaf(struct tnode *l)
Robert Olsson19baf832005-06-21 12:43:18 -07001586{
1587 int found = 0;
1588 struct hlist_head *lih = &l->list;
Sasha Levinb67bfe02013-02-27 17:06:00 -08001589 struct hlist_node *tmp;
Robert Olsson19baf832005-06-21 12:43:18 -07001590 struct leaf_info *li = NULL;
1591
Sasha Levinb67bfe02013-02-27 17:06:00 -08001592 hlist_for_each_entry_safe(li, tmp, lih, hlist) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001593 found += trie_flush_list(&li->falh);
Robert Olsson19baf832005-06-21 12:43:18 -07001594
1595 if (list_empty(&li->falh)) {
Robert Olsson2373ce12005-08-25 13:01:29 -07001596 hlist_del_rcu(&li->hlist);
Robert Olsson19baf832005-06-21 12:43:18 -07001597 free_leaf_info(li);
1598 }
1599 }
1600 return found;
1601}
1602
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001603/*
1604 * Scan for the next right leaf starting at node p->child[idx]
1605 * Since we have back pointer, no recursion necessary.
1606 */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001607static struct tnode *leaf_walk_rcu(struct tnode *p, struct tnode *c)
Robert Olsson19baf832005-06-21 12:43:18 -07001608{
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001609 do {
1610 t_key idx;
Robert Olsson19baf832005-06-21 12:43:18 -07001611
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001612 if (c)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001613 idx = tkey_extract_bits(c->key, p->pos, p->bits) + 1;
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001614 else
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001615 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001616
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001617 while (idx < 1u << p->bits) {
1618 c = tnode_get_child_rcu(p, idx++);
Robert Olsson2373ce12005-08-25 13:01:29 -07001619 if (!c)
Olof Johansson91b9a272005-08-09 20:24:39 -07001620 continue;
Robert Olsson19baf832005-06-21 12:43:18 -07001621
Eric Dumazetaab515d2013-08-05 11:18:49 -07001622 if (IS_LEAF(c))
Alexander Duyckadaf9812014-12-31 10:55:47 -08001623 return c;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001624
1625 /* Rescan start scanning in new node */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001626 p = c;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001627 idx = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001628 }
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001629
1630 /* Node empty, walk back up to parent */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001631 c = p;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001632 } while ((p = node_parent_rcu(c)) != NULL);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001633
1634 return NULL; /* Root of trie */
1635}
1636
Alexander Duyckadaf9812014-12-31 10:55:47 -08001637static struct tnode *trie_firstleaf(struct trie *t)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001638{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001639 struct tnode *n = rcu_dereference_rtnl(t->trie);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001640
1641 if (!n)
1642 return NULL;
1643
1644 if (IS_LEAF(n)) /* trie is just a leaf */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001645 return n;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001646
1647 return leaf_walk_rcu(n, NULL);
1648}
1649
Alexander Duyckadaf9812014-12-31 10:55:47 -08001650static struct tnode *trie_nextleaf(struct tnode *l)
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001651{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001652 struct tnode *p = node_parent_rcu(l);
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001653
1654 if (!p)
1655 return NULL; /* trie with just one leaf */
1656
Alexander Duyckadaf9812014-12-31 10:55:47 -08001657 return leaf_walk_rcu(p, l);
Robert Olsson19baf832005-06-21 12:43:18 -07001658}
1659
Alexander Duyckadaf9812014-12-31 10:55:47 -08001660static struct tnode *trie_leafindex(struct trie *t, int index)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001661{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001662 struct tnode *l = trie_firstleaf(t);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001663
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001664 while (l && index-- > 0)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001665 l = trie_nextleaf(l);
Stephen Hemmingerec28cf72008-02-11 21:12:49 -08001666
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001667 return l;
1668}
1669
1670
Robert Olssond562f1f2007-03-26 14:22:22 -07001671/*
1672 * Caller must hold RTNL.
1673 */
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001674int fib_table_flush(struct fib_table *tb)
Robert Olsson19baf832005-06-21 12:43:18 -07001675{
1676 struct trie *t = (struct trie *) tb->tb_data;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001677 struct tnode *l, *ll = NULL;
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001678 int found = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001679
Stephen Hemminger82cfbb02008-01-22 21:55:32 -08001680 for (l = trie_firstleaf(t); l; l = trie_nextleaf(l)) {
Stephen Hemmingeref3660c2008-04-10 03:46:12 -07001681 found += trie_flush_leaf(l);
Robert Olsson19baf832005-06-21 12:43:18 -07001682
1683 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001684 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001685 ll = l;
1686 }
1687
1688 if (ll && hlist_empty(&ll->list))
Stephen Hemminger9195bef2008-01-22 21:56:34 -08001689 trie_leaf_remove(t, ll);
Robert Olsson19baf832005-06-21 12:43:18 -07001690
Stephen Hemminger0c7770c2005-08-23 21:59:41 -07001691 pr_debug("trie_flush found=%d\n", found);
Robert Olsson19baf832005-06-21 12:43:18 -07001692 return found;
1693}
1694
Pavel Emelyanov4aa2c462010-10-28 02:00:43 +00001695void fib_free_table(struct fib_table *tb)
1696{
Alexander Duyck8274a972014-12-31 10:55:29 -08001697#ifdef CONFIG_IP_FIB_TRIE_STATS
1698 struct trie *t = (struct trie *)tb->tb_data;
1699
1700 free_percpu(t->stats);
1701#endif /* CONFIG_IP_FIB_TRIE_STATS */
Pavel Emelyanov4aa2c462010-10-28 02:00:43 +00001702 kfree(tb);
1703}
1704
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001705static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah,
1706 struct fib_table *tb,
Robert Olsson19baf832005-06-21 12:43:18 -07001707 struct sk_buff *skb, struct netlink_callback *cb)
1708{
1709 int i, s_i;
1710 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07001711 __be32 xkey = htonl(key);
Robert Olsson19baf832005-06-21 12:43:18 -07001712
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001713 s_i = cb->args[5];
Robert Olsson19baf832005-06-21 12:43:18 -07001714 i = 0;
1715
Robert Olsson2373ce12005-08-25 13:01:29 -07001716 /* rcu_read_lock is hold by caller */
1717
1718 list_for_each_entry_rcu(fa, fah, fa_list) {
Robert Olsson19baf832005-06-21 12:43:18 -07001719 if (i < s_i) {
1720 i++;
1721 continue;
1722 }
Robert Olsson19baf832005-06-21 12:43:18 -07001723
Eric W. Biederman15e47302012-09-07 20:12:54 +00001724 if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid,
Robert Olsson19baf832005-06-21 12:43:18 -07001725 cb->nlh->nlmsg_seq,
1726 RTM_NEWROUTE,
1727 tb->tb_id,
1728 fa->fa_type,
Thomas Grafbe403ea2006-08-17 18:15:17 -07001729 xkey,
Robert Olsson19baf832005-06-21 12:43:18 -07001730 plen,
1731 fa->fa_tos,
Stephen Hemminger64347f72008-01-22 21:55:01 -08001732 fa->fa_info, NLM_F_MULTI) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001733 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001734 return -1;
Olof Johansson91b9a272005-08-09 20:24:39 -07001735 }
Robert Olsson19baf832005-06-21 12:43:18 -07001736 i++;
1737 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001738 cb->args[5] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001739 return skb->len;
1740}
1741
Alexander Duyckadaf9812014-12-31 10:55:47 -08001742static int fn_trie_dump_leaf(struct tnode *l, struct fib_table *tb,
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001743 struct sk_buff *skb, struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001744{
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001745 struct leaf_info *li;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001746 int i, s_i;
Robert Olsson19baf832005-06-21 12:43:18 -07001747
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001748 s_i = cb->args[4];
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001749 i = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001750
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001751 /* rcu_read_lock is hold by caller */
Sasha Levinb67bfe02013-02-27 17:06:00 -08001752 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001753 if (i < s_i) {
1754 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001755 continue;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001756 }
Robert Olsson19baf832005-06-21 12:43:18 -07001757
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001758 if (i > s_i)
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001759 cb->args[5] = 0;
Olof Johansson91b9a272005-08-09 20:24:39 -07001760
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001761 if (list_empty(&li->falh))
Robert Olsson19baf832005-06-21 12:43:18 -07001762 continue;
1763
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001764 if (fn_trie_dump_fa(l->key, li->plen, &li->falh, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001765 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001766 return -1;
1767 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001768 i++;
Robert Olsson19baf832005-06-21 12:43:18 -07001769 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001770
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001771 cb->args[4] = i;
Robert Olsson19baf832005-06-21 12:43:18 -07001772 return skb->len;
1773}
1774
Stephen Hemminger16c6cf82009-09-20 10:35:36 +00001775int fib_table_dump(struct fib_table *tb, struct sk_buff *skb,
1776 struct netlink_callback *cb)
Robert Olsson19baf832005-06-21 12:43:18 -07001777{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001778 struct tnode *l;
Robert Olsson19baf832005-06-21 12:43:18 -07001779 struct trie *t = (struct trie *) tb->tb_data;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001780 t_key key = cb->args[2];
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001781 int count = cb->args[3];
Robert Olsson19baf832005-06-21 12:43:18 -07001782
Robert Olsson2373ce12005-08-25 13:01:29 -07001783 rcu_read_lock();
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001784 /* Dump starting at last key.
1785 * Note: 0.0.0.0/0 (ie default) is first key.
1786 */
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001787 if (count == 0)
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001788 l = trie_firstleaf(t);
1789 else {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001790 /* Normally, continue from last key, but if that is missing
1791 * fallback to using slow rescan
1792 */
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001793 l = fib_find_node(t, key);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001794 if (!l)
1795 l = trie_leafindex(t, count);
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001796 }
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001797
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001798 while (l) {
1799 cb->args[2] = l->key;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001800 if (fn_trie_dump_leaf(l, tb, skb, cb) < 0) {
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001801 cb->args[3] = count;
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001802 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001803 return -1;
Robert Olsson19baf832005-06-21 12:43:18 -07001804 }
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001805
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001806 ++count;
Stephen Hemmingerd5ce8a02008-01-22 21:57:22 -08001807 l = trie_nextleaf(l);
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001808 memset(&cb->args[4], 0,
1809 sizeof(cb->args) - 4*sizeof(cb->args[0]));
Robert Olsson19baf832005-06-21 12:43:18 -07001810 }
Stephen Hemminger71d67e62008-01-31 16:45:47 -08001811 cb->args[3] = count;
Robert Olsson2373ce12005-08-25 13:01:29 -07001812 rcu_read_unlock();
Stephen Hemmingera88ee222008-01-22 21:56:11 -08001813
Robert Olsson19baf832005-06-21 12:43:18 -07001814 return skb->len;
Robert Olsson19baf832005-06-21 12:43:18 -07001815}
1816
David S. Miller5348ba82011-02-01 15:30:56 -08001817void __init fib_trie_init(void)
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001818{
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001819 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1820 sizeof(struct fib_alias),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08001821 0, SLAB_PANIC, NULL);
1822
1823 trie_leaf_kmem = kmem_cache_create("ip_fib_trie",
Alexander Duyckadaf9812014-12-31 10:55:47 -08001824 max(sizeof(struct tnode),
Stephen Hemmingerbc3c8c12008-01-22 21:51:50 -08001825 sizeof(struct leaf_info)),
1826 0, SLAB_PANIC, NULL);
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001827}
Robert Olsson19baf832005-06-21 12:43:18 -07001828
Stephen Hemminger7f9b8052008-01-14 23:14:20 -08001829
David S. Miller5348ba82011-02-01 15:30:56 -08001830struct fib_table *fib_trie_table(u32 id)
Robert Olsson19baf832005-06-21 12:43:18 -07001831{
1832 struct fib_table *tb;
1833 struct trie *t;
1834
Robert Olsson19baf832005-06-21 12:43:18 -07001835 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1836 GFP_KERNEL);
1837 if (tb == NULL)
1838 return NULL;
1839
1840 tb->tb_id = id;
Denis V. Lunev971b8932007-12-08 00:32:23 -08001841 tb->tb_default = -1;
David S. Miller21d8c492011-04-14 14:49:37 -07001842 tb->tb_num_default = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001843
1844 t = (struct trie *) tb->tb_data;
Alexander Duyck8274a972014-12-31 10:55:29 -08001845 RCU_INIT_POINTER(t->trie, NULL);
1846#ifdef CONFIG_IP_FIB_TRIE_STATS
1847 t->stats = alloc_percpu(struct trie_use_stats);
1848 if (!t->stats) {
1849 kfree(tb);
1850 tb = NULL;
1851 }
1852#endif
Robert Olsson19baf832005-06-21 12:43:18 -07001853
Robert Olsson19baf832005-06-21 12:43:18 -07001854 return tb;
1855}
1856
Robert Olsson19baf832005-06-21 12:43:18 -07001857#ifdef CONFIG_PROC_FS
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001858/* Depth first Trie walk iterator */
1859struct fib_trie_iter {
Denis V. Lunev1c340b22008-01-10 03:27:17 -08001860 struct seq_net_private p;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001861 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001862 struct tnode *tnode;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001863 unsigned int index;
1864 unsigned int depth;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001865};
Robert Olsson19baf832005-06-21 12:43:18 -07001866
Alexander Duyckadaf9812014-12-31 10:55:47 -08001867static struct tnode *fib_trie_get_next(struct fib_trie_iter *iter)
Robert Olsson19baf832005-06-21 12:43:18 -07001868{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001869 struct tnode *tn = iter->tnode;
Eric Dumazeta034ee32010-09-09 23:32:28 +00001870 unsigned int cindex = iter->index;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001871 struct tnode *p;
1872
Eric W. Biederman6640e692007-01-24 14:42:04 -08001873 /* A single entry routing table */
1874 if (!tn)
1875 return NULL;
1876
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001877 pr_debug("get_next iter={node=%p index=%d depth=%d}\n",
1878 iter->tnode, iter->index, iter->depth);
1879rescan:
1880 while (cindex < (1<<tn->bits)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08001881 struct tnode *n = tnode_get_child_rcu(tn, cindex);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001882
1883 if (n) {
1884 if (IS_LEAF(n)) {
1885 iter->tnode = tn;
1886 iter->index = cindex + 1;
1887 } else {
1888 /* push down one level */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001889 iter->tnode = n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001890 iter->index = 0;
1891 ++iter->depth;
1892 }
1893 return n;
1894 }
1895
1896 ++cindex;
1897 }
1898
1899 /* Current node exhausted, pop back up */
Alexander Duyckadaf9812014-12-31 10:55:47 -08001900 p = node_parent_rcu(tn);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001901 if (p) {
1902 cindex = tkey_extract_bits(tn->key, p->pos, p->bits)+1;
1903 tn = p;
1904 --iter->depth;
1905 goto rescan;
1906 }
1907
1908 /* got root? */
Robert Olsson19baf832005-06-21 12:43:18 -07001909 return NULL;
1910}
1911
Alexander Duyckadaf9812014-12-31 10:55:47 -08001912static struct tnode *fib_trie_get_first(struct fib_trie_iter *iter,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001913 struct trie *t)
Robert Olsson19baf832005-06-21 12:43:18 -07001914{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001915 struct tnode *n;
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001916
Stephen Hemminger132adf52007-03-08 20:44:43 -08001917 if (!t)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001918 return NULL;
1919
1920 n = rcu_dereference(t->trie);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001921 if (!n)
Robert Olsson5ddf0eb2006-03-20 21:34:12 -08001922 return NULL;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001923
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001924 if (IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08001925 iter->tnode = n;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001926 iter->index = 0;
1927 iter->depth = 1;
1928 } else {
1929 iter->tnode = NULL;
1930 iter->index = 0;
1931 iter->depth = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001932 }
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001933
1934 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07001935}
1936
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001937static void trie_collect_stats(struct trie *t, struct trie_stat *s)
Robert Olsson19baf832005-06-21 12:43:18 -07001938{
Alexander Duyckadaf9812014-12-31 10:55:47 -08001939 struct tnode *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001940 struct fib_trie_iter iter;
Robert Olsson19baf832005-06-21 12:43:18 -07001941
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001942 memset(s, 0, sizeof(*s));
Robert Olsson19baf832005-06-21 12:43:18 -07001943
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001944 rcu_read_lock();
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07001945 for (n = fib_trie_get_first(&iter, t); n; n = fib_trie_get_next(&iter)) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001946 if (IS_LEAF(n)) {
Stephen Hemminger93672292008-01-22 21:54:05 -08001947 struct leaf_info *li;
Stephen Hemminger93672292008-01-22 21:54:05 -08001948
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001949 s->leaves++;
1950 s->totdepth += iter.depth;
1951 if (iter.depth > s->maxdepth)
1952 s->maxdepth = iter.depth;
Stephen Hemminger93672292008-01-22 21:54:05 -08001953
Alexander Duyckadaf9812014-12-31 10:55:47 -08001954 hlist_for_each_entry_rcu(li, &n->list, hlist)
Stephen Hemminger93672292008-01-22 21:54:05 -08001955 ++s->prefixes;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001956 } else {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001957 int i;
Robert Olsson19baf832005-06-21 12:43:18 -07001958
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001959 s->tnodes++;
Alexander Duyckadaf9812014-12-31 10:55:47 -08001960 if (n->bits < MAX_STAT_DEPTH)
1961 s->nodesizes[n->bits]++;
Robert Olsson06ef9212006-03-20 21:35:01 -08001962
Alexander Duyckadaf9812014-12-31 10:55:47 -08001963 for (i = 0; i < tnode_child_length(n); i++)
1964 if (!rcu_access_pointer(n->child[i]))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001965 s->nullpointers++;
1966 }
1967 }
1968 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07001969}
1970
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07001971/*
Robert Olsson19baf832005-06-21 12:43:18 -07001972 * This outputs /proc/net/fib_triestats
Robert Olsson19baf832005-06-21 12:43:18 -07001973 */
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001974static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat)
Robert Olsson19baf832005-06-21 12:43:18 -07001975{
Eric Dumazeta034ee32010-09-09 23:32:28 +00001976 unsigned int i, max, pointers, bytes, avdepth;
Robert Olsson19baf832005-06-21 12:43:18 -07001977
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001978 if (stat->leaves)
1979 avdepth = stat->totdepth*100 / stat->leaves;
1980 else
1981 avdepth = 0;
Robert Olsson19baf832005-06-21 12:43:18 -07001982
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08001983 seq_printf(seq, "\tAver depth: %u.%02d\n",
1984 avdepth / 100, avdepth % 100);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001985 seq_printf(seq, "\tMax depth: %u\n", stat->maxdepth);
Robert Olsson19baf832005-06-21 12:43:18 -07001986
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001987 seq_printf(seq, "\tLeaves: %u\n", stat->leaves);
Alexander Duyckadaf9812014-12-31 10:55:47 -08001988 bytes = sizeof(struct tnode) * stat->leaves;
Stephen Hemminger93672292008-01-22 21:54:05 -08001989
1990 seq_printf(seq, "\tPrefixes: %u\n", stat->prefixes);
1991 bytes += sizeof(struct leaf_info) * stat->prefixes;
1992
Stephen Hemminger187b5182008-01-12 20:55:55 -08001993 seq_printf(seq, "\tInternal nodes: %u\n\t", stat->tnodes);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001994 bytes += sizeof(struct tnode) * stat->tnodes;
Robert Olsson19baf832005-06-21 12:43:18 -07001995
Robert Olsson06ef9212006-03-20 21:35:01 -08001996 max = MAX_STAT_DEPTH;
1997 while (max > 0 && stat->nodesizes[max-1] == 0)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07001998 max--;
Robert Olsson19baf832005-06-21 12:43:18 -07001999
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002000 pointers = 0;
Jerry Snitselaarf585a992013-07-22 12:01:58 -07002001 for (i = 1; i < max; i++)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002002 if (stat->nodesizes[i] != 0) {
Stephen Hemminger187b5182008-01-12 20:55:55 -08002003 seq_printf(seq, " %u: %u", i, stat->nodesizes[i]);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002004 pointers += (1<<i) * stat->nodesizes[i];
2005 }
2006 seq_putc(seq, '\n');
Stephen Hemminger187b5182008-01-12 20:55:55 -08002007 seq_printf(seq, "\tPointers: %u\n", pointers);
Robert Olsson19baf832005-06-21 12:43:18 -07002008
Alexander Duyckadaf9812014-12-31 10:55:47 -08002009 bytes += sizeof(struct tnode *) * pointers;
Stephen Hemminger187b5182008-01-12 20:55:55 -08002010 seq_printf(seq, "Null ptrs: %u\n", stat->nullpointers);
2011 seq_printf(seq, "Total size: %u kB\n", (bytes + 1023) / 1024);
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002012}
Robert Olsson19baf832005-06-21 12:43:18 -07002013
2014#ifdef CONFIG_IP_FIB_TRIE_STATS
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002015static void trie_show_usage(struct seq_file *seq,
Alexander Duyck8274a972014-12-31 10:55:29 -08002016 const struct trie_use_stats __percpu *stats)
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002017{
Alexander Duyck8274a972014-12-31 10:55:29 -08002018 struct trie_use_stats s = { 0 };
2019 int cpu;
2020
2021 /* loop through all of the CPUs and gather up the stats */
2022 for_each_possible_cpu(cpu) {
2023 const struct trie_use_stats *pcpu = per_cpu_ptr(stats, cpu);
2024
2025 s.gets += pcpu->gets;
2026 s.backtrack += pcpu->backtrack;
2027 s.semantic_match_passed += pcpu->semantic_match_passed;
2028 s.semantic_match_miss += pcpu->semantic_match_miss;
2029 s.null_node_hit += pcpu->null_node_hit;
2030 s.resize_node_skipped += pcpu->resize_node_skipped;
2031 }
2032
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002033 seq_printf(seq, "\nCounters:\n---------\n");
Alexander Duyck8274a972014-12-31 10:55:29 -08002034 seq_printf(seq, "gets = %u\n", s.gets);
2035 seq_printf(seq, "backtracks = %u\n", s.backtrack);
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002036 seq_printf(seq, "semantic match passed = %u\n",
Alexander Duyck8274a972014-12-31 10:55:29 -08002037 s.semantic_match_passed);
2038 seq_printf(seq, "semantic match miss = %u\n", s.semantic_match_miss);
2039 seq_printf(seq, "null node hit= %u\n", s.null_node_hit);
2040 seq_printf(seq, "skipped node resize = %u\n\n", s.resize_node_skipped);
Robert Olsson19baf832005-06-21 12:43:18 -07002041}
Stephen Hemminger66a2f7f2008-01-12 21:23:17 -08002042#endif /* CONFIG_IP_FIB_TRIE_STATS */
2043
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002044static void fib_table_print(struct seq_file *seq, struct fib_table *tb)
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002045{
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002046 if (tb->tb_id == RT_TABLE_LOCAL)
2047 seq_puts(seq, "Local:\n");
2048 else if (tb->tb_id == RT_TABLE_MAIN)
2049 seq_puts(seq, "Main:\n");
2050 else
2051 seq_printf(seq, "Id %d:\n", tb->tb_id);
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002052}
Robert Olsson19baf832005-06-21 12:43:18 -07002053
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002054
Robert Olsson19baf832005-06-21 12:43:18 -07002055static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2056{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002057 struct net *net = (struct net *)seq->private;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002058 unsigned int h;
Eric W. Biederman877a9bf2007-12-07 00:47:47 -08002059
Stephen Hemmingerd717a9a2008-01-14 23:11:54 -08002060 seq_printf(seq,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002061 "Basic info: size of leaf:"
2062 " %Zd bytes, size of tnode: %Zd bytes.\n",
Alexander Duyckadaf9812014-12-31 10:55:47 -08002063 sizeof(struct tnode), sizeof(struct tnode));
Olof Johansson91b9a272005-08-09 20:24:39 -07002064
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002065 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2066 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002067 struct fib_table *tb;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002068
Sasha Levinb67bfe02013-02-27 17:06:00 -08002069 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002070 struct trie *t = (struct trie *) tb->tb_data;
2071 struct trie_stat stat;
2072
2073 if (!t)
2074 continue;
2075
2076 fib_table_print(seq, tb);
2077
2078 trie_collect_stats(t, &stat);
2079 trie_show_stats(seq, &stat);
2080#ifdef CONFIG_IP_FIB_TRIE_STATS
Alexander Duyck8274a972014-12-31 10:55:29 -08002081 trie_show_usage(seq, t->stats);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002082#endif
2083 }
2084 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002085
Robert Olsson19baf832005-06-21 12:43:18 -07002086 return 0;
2087}
2088
Robert Olsson19baf832005-06-21 12:43:18 -07002089static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2090{
Pavel Emelyanovde05c552008-07-18 04:07:21 -07002091 return single_open_net(inode, file, fib_triestat_seq_show);
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002092}
2093
Arjan van de Ven9a321442007-02-12 00:55:35 -08002094static const struct file_operations fib_triestat_fops = {
Stephen Hemmingerc877efb2005-07-19 14:01:51 -07002095 .owner = THIS_MODULE,
2096 .open = fib_triestat_seq_open,
2097 .read = seq_read,
2098 .llseek = seq_lseek,
Pavel Emelyanovb6fcbdb2008-07-18 04:07:44 -07002099 .release = single_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002100};
2101
Alexander Duyckadaf9812014-12-31 10:55:47 -08002102static struct tnode *fib_trie_get_idx(struct seq_file *seq, loff_t pos)
Robert Olsson19baf832005-06-21 12:43:18 -07002103{
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002104 struct fib_trie_iter *iter = seq->private;
2105 struct net *net = seq_file_net(seq);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002106 loff_t idx = 0;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002107 unsigned int h;
Robert Olsson19baf832005-06-21 12:43:18 -07002108
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002109 for (h = 0; h < FIB_TABLE_HASHSZ; h++) {
2110 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002111 struct fib_table *tb;
2112
Sasha Levinb67bfe02013-02-27 17:06:00 -08002113 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08002114 struct tnode *n;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002115
2116 for (n = fib_trie_get_first(iter,
2117 (struct trie *) tb->tb_data);
2118 n; n = fib_trie_get_next(iter))
2119 if (pos == idx++) {
2120 iter->tb = tb;
2121 return n;
2122 }
2123 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002124 }
Robert Olsson19baf832005-06-21 12:43:18 -07002125
Robert Olsson19baf832005-06-21 12:43:18 -07002126 return NULL;
2127}
2128
2129static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002130 __acquires(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002131{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002132 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002133 return fib_trie_get_idx(seq, *pos);
Robert Olsson19baf832005-06-21 12:43:18 -07002134}
2135
2136static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2137{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002138 struct fib_trie_iter *iter = seq->private;
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002139 struct net *net = seq_file_net(seq);
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002140 struct fib_table *tb = iter->tb;
2141 struct hlist_node *tb_node;
2142 unsigned int h;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002143 struct tnode *n;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002144
Robert Olsson19baf832005-06-21 12:43:18 -07002145 ++*pos;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002146 /* next node in same table */
2147 n = fib_trie_get_next(iter);
2148 if (n)
2149 return n;
Olof Johansson91b9a272005-08-09 20:24:39 -07002150
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002151 /* walk rest of this hash chain */
2152 h = tb->tb_id & (FIB_TABLE_HASHSZ - 1);
Eric Dumazet0a5c0472011-03-31 01:51:35 -07002153 while ((tb_node = rcu_dereference(hlist_next_rcu(&tb->tb_hlist)))) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002154 tb = hlist_entry(tb_node, struct fib_table, tb_hlist);
2155 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2156 if (n)
2157 goto found;
2158 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002159
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002160 /* new hash chain */
2161 while (++h < FIB_TABLE_HASHSZ) {
2162 struct hlist_head *head = &net->ipv4.fib_table_hash[h];
Sasha Levinb67bfe02013-02-27 17:06:00 -08002163 hlist_for_each_entry_rcu(tb, head, tb_hlist) {
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002164 n = fib_trie_get_first(iter, (struct trie *) tb->tb_data);
2165 if (n)
2166 goto found;
2167 }
2168 }
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002169 return NULL;
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002170
2171found:
2172 iter->tb = tb;
2173 return n;
Robert Olsson19baf832005-06-21 12:43:18 -07002174}
2175
2176static void fib_trie_seq_stop(struct seq_file *seq, void *v)
Stephen Hemmingerc95aaf92008-01-12 21:25:02 -08002177 __releases(RCU)
Robert Olsson19baf832005-06-21 12:43:18 -07002178{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002179 rcu_read_unlock();
Robert Olsson19baf832005-06-21 12:43:18 -07002180}
2181
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002182static void seq_indent(struct seq_file *seq, int n)
2183{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002184 while (n-- > 0)
2185 seq_puts(seq, " ");
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002186}
Robert Olsson19baf832005-06-21 12:43:18 -07002187
Eric Dumazet28d36e32008-01-14 23:09:56 -08002188static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002189{
Stephen Hemminger132adf52007-03-08 20:44:43 -08002190 switch (s) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002191 case RT_SCOPE_UNIVERSE: return "universe";
2192 case RT_SCOPE_SITE: return "site";
2193 case RT_SCOPE_LINK: return "link";
2194 case RT_SCOPE_HOST: return "host";
2195 case RT_SCOPE_NOWHERE: return "nowhere";
2196 default:
Eric Dumazet28d36e32008-01-14 23:09:56 -08002197 snprintf(buf, len, "scope=%d", s);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002198 return buf;
2199 }
2200}
2201
Jan Engelhardt36cbd3d2009-08-05 10:42:58 -07002202static const char *const rtn_type_names[__RTN_MAX] = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002203 [RTN_UNSPEC] = "UNSPEC",
2204 [RTN_UNICAST] = "UNICAST",
2205 [RTN_LOCAL] = "LOCAL",
2206 [RTN_BROADCAST] = "BROADCAST",
2207 [RTN_ANYCAST] = "ANYCAST",
2208 [RTN_MULTICAST] = "MULTICAST",
2209 [RTN_BLACKHOLE] = "BLACKHOLE",
2210 [RTN_UNREACHABLE] = "UNREACHABLE",
2211 [RTN_PROHIBIT] = "PROHIBIT",
2212 [RTN_THROW] = "THROW",
2213 [RTN_NAT] = "NAT",
2214 [RTN_XRESOLVE] = "XRESOLVE",
2215};
2216
Eric Dumazeta034ee32010-09-09 23:32:28 +00002217static inline const char *rtn_type(char *buf, size_t len, unsigned int t)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002218{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002219 if (t < __RTN_MAX && rtn_type_names[t])
2220 return rtn_type_names[t];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002221 snprintf(buf, len, "type %u", t);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002222 return buf;
2223}
2224
2225/* Pretty print the trie */
Robert Olsson19baf832005-06-21 12:43:18 -07002226static int fib_trie_seq_show(struct seq_file *seq, void *v)
2227{
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002228 const struct fib_trie_iter *iter = seq->private;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002229 struct tnode *n = v;
Robert Olsson19baf832005-06-21 12:43:18 -07002230
Stephen Hemminger3d3b2d22008-03-23 22:43:56 -07002231 if (!node_parent_rcu(n))
2232 fib_table_print(seq, iter->tb);
Robert Olsson095b8502007-01-26 19:06:01 -08002233
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002234 if (IS_TNODE(n)) {
Alexander Duyckadaf9812014-12-31 10:55:47 -08002235 __be32 prf = htonl(n->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002236
Alexander Duyckadaf9812014-12-31 10:55:47 -08002237 seq_indent(seq, iter->depth - 1);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002238 seq_printf(seq, " +-- %pI4/%d %d %d %d\n",
Alexander Duyckadaf9812014-12-31 10:55:47 -08002239 &prf, n->pos, n->bits, n->full_children,
2240 n->empty_children);
Olof Johansson91b9a272005-08-09 20:24:39 -07002241 } else {
Stephen Hemminger13280422008-01-22 21:54:37 -08002242 struct leaf_info *li;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002243 __be32 val = htonl(n->key);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002244
2245 seq_indent(seq, iter->depth);
Harvey Harrison673d57e2008-10-31 00:53:57 -07002246 seq_printf(seq, " |-- %pI4\n", &val);
Eric Dumazet28d36e32008-01-14 23:09:56 -08002247
Alexander Duyckadaf9812014-12-31 10:55:47 -08002248 hlist_for_each_entry_rcu(li, &n->list, hlist) {
Stephen Hemminger13280422008-01-22 21:54:37 -08002249 struct fib_alias *fa;
Eric Dumazet28d36e32008-01-14 23:09:56 -08002250
Stephen Hemminger13280422008-01-22 21:54:37 -08002251 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
2252 char buf1[32], buf2[32];
Eric Dumazet28d36e32008-01-14 23:09:56 -08002253
Stephen Hemminger13280422008-01-22 21:54:37 -08002254 seq_indent(seq, iter->depth+1);
2255 seq_printf(seq, " /%d %s %s", li->plen,
2256 rtn_scope(buf1, sizeof(buf1),
David S. Miller37e826c2011-03-24 18:06:47 -07002257 fa->fa_info->fib_scope),
Stephen Hemminger13280422008-01-22 21:54:37 -08002258 rtn_type(buf2, sizeof(buf2),
2259 fa->fa_type));
2260 if (fa->fa_tos)
Denis V. Lunevb9c4d822008-02-05 02:58:45 -08002261 seq_printf(seq, " tos=%d", fa->fa_tos);
Stephen Hemminger13280422008-01-22 21:54:37 -08002262 seq_putc(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002263 }
2264 }
Robert Olsson19baf832005-06-21 12:43:18 -07002265 }
2266
2267 return 0;
2268}
2269
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002270static const struct seq_operations fib_trie_seq_ops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002271 .start = fib_trie_seq_start,
2272 .next = fib_trie_seq_next,
2273 .stop = fib_trie_seq_stop,
2274 .show = fib_trie_seq_show,
Robert Olsson19baf832005-06-21 12:43:18 -07002275};
2276
2277static int fib_trie_seq_open(struct inode *inode, struct file *file)
2278{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002279 return seq_open_net(inode, file, &fib_trie_seq_ops,
2280 sizeof(struct fib_trie_iter));
Robert Olsson19baf832005-06-21 12:43:18 -07002281}
2282
Arjan van de Ven9a321442007-02-12 00:55:35 -08002283static const struct file_operations fib_trie_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002284 .owner = THIS_MODULE,
2285 .open = fib_trie_seq_open,
2286 .read = seq_read,
2287 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002288 .release = seq_release_net,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002289};
2290
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002291struct fib_route_iter {
2292 struct seq_net_private p;
2293 struct trie *main_trie;
2294 loff_t pos;
2295 t_key key;
2296};
2297
Alexander Duyckadaf9812014-12-31 10:55:47 -08002298static struct tnode *fib_route_get_idx(struct fib_route_iter *iter, loff_t pos)
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002299{
Alexander Duyckadaf9812014-12-31 10:55:47 -08002300 struct tnode *l = NULL;
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002301 struct trie *t = iter->main_trie;
2302
2303 /* use cache location of last found key */
2304 if (iter->pos > 0 && pos >= iter->pos && (l = fib_find_node(t, iter->key)))
2305 pos -= iter->pos;
2306 else {
2307 iter->pos = 0;
2308 l = trie_firstleaf(t);
2309 }
2310
2311 while (l && pos-- > 0) {
2312 iter->pos++;
2313 l = trie_nextleaf(l);
2314 }
2315
2316 if (l)
2317 iter->key = pos; /* remember it */
2318 else
2319 iter->pos = 0; /* forget it */
2320
2321 return l;
2322}
2323
2324static void *fib_route_seq_start(struct seq_file *seq, loff_t *pos)
2325 __acquires(RCU)
2326{
2327 struct fib_route_iter *iter = seq->private;
2328 struct fib_table *tb;
2329
2330 rcu_read_lock();
YOSHIFUJI Hideaki12188542008-03-26 02:36:06 +09002331 tb = fib_get_table(seq_file_net(seq), RT_TABLE_MAIN);
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002332 if (!tb)
2333 return NULL;
2334
2335 iter->main_trie = (struct trie *) tb->tb_data;
2336 if (*pos == 0)
2337 return SEQ_START_TOKEN;
2338 else
2339 return fib_route_get_idx(iter, *pos - 1);
2340}
2341
2342static void *fib_route_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2343{
2344 struct fib_route_iter *iter = seq->private;
Alexander Duyckadaf9812014-12-31 10:55:47 -08002345 struct tnode *l = v;
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002346
2347 ++*pos;
2348 if (v == SEQ_START_TOKEN) {
2349 iter->pos = 0;
2350 l = trie_firstleaf(iter->main_trie);
2351 } else {
2352 iter->pos++;
2353 l = trie_nextleaf(l);
2354 }
2355
2356 if (l)
2357 iter->key = l->key;
2358 else
2359 iter->pos = 0;
2360 return l;
2361}
2362
2363static void fib_route_seq_stop(struct seq_file *seq, void *v)
2364 __releases(RCU)
2365{
2366 rcu_read_unlock();
2367}
2368
Eric Dumazeta034ee32010-09-09 23:32:28 +00002369static unsigned int fib_flag_trans(int type, __be32 mask, const struct fib_info *fi)
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002370{
Eric Dumazeta034ee32010-09-09 23:32:28 +00002371 unsigned int flags = 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002372
Eric Dumazeta034ee32010-09-09 23:32:28 +00002373 if (type == RTN_UNREACHABLE || type == RTN_PROHIBIT)
2374 flags = RTF_REJECT;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002375 if (fi && fi->fib_nh->nh_gw)
2376 flags |= RTF_GATEWAY;
Al Viro32ab5f82006-09-26 22:21:45 -07002377 if (mask == htonl(0xFFFFFFFF))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002378 flags |= RTF_HOST;
2379 flags |= RTF_UP;
2380 return flags;
2381}
2382
2383/*
2384 * This outputs /proc/net/route.
2385 * The format of the file is not supposed to be changed
Eric Dumazeta034ee32010-09-09 23:32:28 +00002386 * and needs to be same as fib_hash output to avoid breaking
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002387 * legacy utilities
2388 */
2389static int fib_route_seq_show(struct seq_file *seq, void *v)
2390{
Alexander Duyckadaf9812014-12-31 10:55:47 -08002391 struct tnode *l = v;
Stephen Hemminger13280422008-01-22 21:54:37 -08002392 struct leaf_info *li;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002393
2394 if (v == SEQ_START_TOKEN) {
2395 seq_printf(seq, "%-127s\n", "Iface\tDestination\tGateway "
2396 "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
2397 "\tWindow\tIRTT");
2398 return 0;
2399 }
2400
Sasha Levinb67bfe02013-02-27 17:06:00 -08002401 hlist_for_each_entry_rcu(li, &l->list, hlist) {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002402 struct fib_alias *fa;
Al Viro32ab5f82006-09-26 22:21:45 -07002403 __be32 mask, prefix;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002404
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002405 mask = inet_make_mask(li->plen);
2406 prefix = htonl(l->key);
2407
2408 list_for_each_entry_rcu(fa, &li->falh, fa_list) {
Herbert Xu1371e372005-10-15 09:42:39 +10002409 const struct fib_info *fi = fa->fa_info;
Eric Dumazeta034ee32010-09-09 23:32:28 +00002410 unsigned int flags = fib_flag_trans(fa->fa_type, mask, fi);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002411
2412 if (fa->fa_type == RTN_BROADCAST
2413 || fa->fa_type == RTN_MULTICAST)
2414 continue;
2415
Tetsuo Handa652586d2013-11-14 14:31:57 -08002416 seq_setwidth(seq, 127);
2417
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002418 if (fi)
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002419 seq_printf(seq,
2420 "%s\t%08X\t%08X\t%04X\t%d\t%u\t"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002421 "%d\t%08X\t%d\t%u\t%u",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002422 fi->fib_dev ? fi->fib_dev->name : "*",
2423 prefix,
2424 fi->fib_nh->nh_gw, flags, 0, 0,
2425 fi->fib_priority,
2426 mask,
Stephen Hemmingera07f5f52008-01-22 21:53:36 -08002427 (fi->fib_advmss ?
2428 fi->fib_advmss + 40 : 0),
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002429 fi->fib_window,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002430 fi->fib_rtt >> 3);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002431 else
Pavel Emelyanov5e659e42008-04-24 01:02:16 -07002432 seq_printf(seq,
2433 "*\t%08X\t%08X\t%04X\t%d\t%u\t"
Tetsuo Handa652586d2013-11-14 14:31:57 -08002434 "%d\t%08X\t%d\t%u\t%u",
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002435 prefix, 0, flags, 0, 0, 0,
Tetsuo Handa652586d2013-11-14 14:31:57 -08002436 mask, 0, 0, 0);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002437
Tetsuo Handa652586d2013-11-14 14:31:57 -08002438 seq_pad(seq, '\n');
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002439 }
2440 }
2441
2442 return 0;
2443}
2444
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002445static const struct seq_operations fib_route_seq_ops = {
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002446 .start = fib_route_seq_start,
2447 .next = fib_route_seq_next,
2448 .stop = fib_route_seq_stop,
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002449 .show = fib_route_seq_show,
2450};
2451
2452static int fib_route_seq_open(struct inode *inode, struct file *file)
2453{
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002454 return seq_open_net(inode, file, &fib_route_seq_ops,
Stephen Hemminger8315f5d2008-02-11 21:14:39 -08002455 sizeof(struct fib_route_iter));
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002456}
2457
Arjan van de Ven9a321442007-02-12 00:55:35 -08002458static const struct file_operations fib_route_fops = {
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002459 .owner = THIS_MODULE,
2460 .open = fib_route_seq_open,
2461 .read = seq_read,
2462 .llseek = seq_lseek,
Denis V. Lunev1c340b22008-01-10 03:27:17 -08002463 .release = seq_release_net,
Robert Olsson19baf832005-06-21 12:43:18 -07002464};
2465
Denis V. Lunev61a02652008-01-10 03:21:09 -08002466int __net_init fib_proc_init(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002467{
Gao fengd4beaa62013-02-18 01:34:54 +00002468 if (!proc_create("fib_trie", S_IRUGO, net->proc_net, &fib_trie_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002469 goto out1;
2470
Gao fengd4beaa62013-02-18 01:34:54 +00002471 if (!proc_create("fib_triestat", S_IRUGO, net->proc_net,
2472 &fib_triestat_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002473 goto out2;
2474
Gao fengd4beaa62013-02-18 01:34:54 +00002475 if (!proc_create("route", S_IRUGO, net->proc_net, &fib_route_fops))
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002476 goto out3;
2477
Robert Olsson19baf832005-06-21 12:43:18 -07002478 return 0;
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002479
2480out3:
Gao fengece31ff2013-02-18 01:34:56 +00002481 remove_proc_entry("fib_triestat", net->proc_net);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002482out2:
Gao fengece31ff2013-02-18 01:34:56 +00002483 remove_proc_entry("fib_trie", net->proc_net);
Stephen Hemmingercb7b5932005-09-09 13:35:42 -07002484out1:
2485 return -ENOMEM;
Robert Olsson19baf832005-06-21 12:43:18 -07002486}
2487
Denis V. Lunev61a02652008-01-10 03:21:09 -08002488void __net_exit fib_proc_exit(struct net *net)
Robert Olsson19baf832005-06-21 12:43:18 -07002489{
Gao fengece31ff2013-02-18 01:34:56 +00002490 remove_proc_entry("fib_trie", net->proc_net);
2491 remove_proc_entry("fib_triestat", net->proc_net);
2492 remove_proc_entry("route", net->proc_net);
Robert Olsson19baf832005-06-21 12:43:18 -07002493}
2494
2495#endif /* CONFIG_PROC_FS */