blob: 2d2ac23b8606deace67cad8dfe1d597705b79c97 [file] [log] [blame]
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001/*
2 * Linux IPv6 multicast routing support for BSD pim6sd
3 * Based on net/ipv4/ipmr.c.
4 *
5 * (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
6 * LSIIT Laboratory, Strasbourg, France
7 * (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
8 * 6WIND, Paris, France
9 * Copyright (C)2007,2008 USAGI/WIDE Project
10 * YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 */
18
19#include <asm/system.h>
20#include <asm/uaccess.h>
21#include <linux/types.h>
22#include <linux/sched.h>
23#include <linux/errno.h>
24#include <linux/timer.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <linux/fcntl.h>
28#include <linux/stat.h>
29#include <linux/socket.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090030#include <linux/inet.h>
31#include <linux/netdevice.h>
32#include <linux/inetdevice.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090033#include <linux/proc_fs.h>
34#include <linux/seq_file.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090035#include <linux/init.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090036#include <net/protocol.h>
37#include <linux/skbuff.h>
38#include <net/sock.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090039#include <net/raw.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090040#include <linux/notifier.h>
41#include <linux/if_arp.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090042#include <net/checksum.h>
43#include <net/netlink.h>
44
45#include <net/ipv6.h>
46#include <net/ip6_route.h>
47#include <linux/mroute6.h>
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090048#include <linux/pim.h>
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090049#include <net/addrconf.h>
50#include <linux/netfilter_ipv6.h>
51
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090052/* Big lock, protecting vif table, mrt cache and mroute socket state.
53 Note that the changes are semaphored via rtnl_lock.
54 */
55
56static DEFINE_RWLOCK(mrt_lock);
57
58/*
59 * Multicast router control variables
60 */
61
Benjamin Thery4e168802008-12-10 16:15:08 -080062#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090063
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090064static struct mfc6_cache *mfc_unres_queue; /* Queue of unresolved entries */
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090065
66/* Special spinlock for queue of unresolved entries */
67static DEFINE_SPINLOCK(mfc_unres_lock);
68
69/* We return to original Alan's scheme. Hash table of resolved
70 entries is changed only in process context and protected
71 with weak lock mrt_lock. Queue of unresolved entries is protected
72 with strong spinlock mfc_unres_lock.
73
74 In this case data path is free of exclusive locks at all.
75 */
76
77static struct kmem_cache *mrt_cachep __read_mostly;
78
79static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
Rami Rosen6ac7eb02008-04-10 12:40:10 +030080static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090081static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
82
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +090083#ifdef CONFIG_IPV6_PIMSM_V2
84static struct inet6_protocol pim6_protocol;
85#endif
86
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +090087static struct timer_list ipmr_expire_timer;
88
89
90#ifdef CONFIG_PROC_FS
91
92struct ipmr_mfc_iter {
93 struct mfc6_cache **cache;
94 int ct;
95};
96
97
98static struct mfc6_cache *ipmr_mfc_seq_idx(struct ipmr_mfc_iter *it, loff_t pos)
99{
100 struct mfc6_cache *mfc;
101
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800102 it->cache = init_net.ipv6.mfc6_cache_array;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900103 read_lock(&mrt_lock);
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800104 for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
105 for (mfc = init_net.ipv6.mfc6_cache_array[it->ct];
106 mfc; mfc = mfc->next)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900107 if (pos-- == 0)
108 return mfc;
109 read_unlock(&mrt_lock);
110
111 it->cache = &mfc_unres_queue;
112 spin_lock_bh(&mfc_unres_lock);
113 for (mfc = mfc_unres_queue; mfc; mfc = mfc->next)
114 if (pos-- == 0)
115 return mfc;
116 spin_unlock_bh(&mfc_unres_lock);
117
118 it->cache = NULL;
119 return NULL;
120}
121
122
123
124
125/*
126 * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
127 */
128
129struct ipmr_vif_iter {
130 int ct;
131};
132
133static struct mif_device *ip6mr_vif_seq_idx(struct ipmr_vif_iter *iter,
134 loff_t pos)
135{
Benjamin Thery4e168802008-12-10 16:15:08 -0800136 for (iter->ct = 0; iter->ct < init_net.ipv6.maxvif; ++iter->ct) {
137 if (!MIF_EXISTS(&init_net, iter->ct))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900138 continue;
139 if (pos-- == 0)
Benjamin Thery4e168802008-12-10 16:15:08 -0800140 return &init_net.ipv6.vif6_table[iter->ct];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900141 }
142 return NULL;
143}
144
145static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
146 __acquires(mrt_lock)
147{
148 read_lock(&mrt_lock);
149 return (*pos ? ip6mr_vif_seq_idx(seq->private, *pos - 1)
150 : SEQ_START_TOKEN);
151}
152
153static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
154{
155 struct ipmr_vif_iter *iter = seq->private;
156
157 ++*pos;
158 if (v == SEQ_START_TOKEN)
159 return ip6mr_vif_seq_idx(iter, 0);
160
Benjamin Thery4e168802008-12-10 16:15:08 -0800161 while (++iter->ct < init_net.ipv6.maxvif) {
162 if (!MIF_EXISTS(&init_net, iter->ct))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900163 continue;
Benjamin Thery4e168802008-12-10 16:15:08 -0800164 return &init_net.ipv6.vif6_table[iter->ct];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900165 }
166 return NULL;
167}
168
169static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
170 __releases(mrt_lock)
171{
172 read_unlock(&mrt_lock);
173}
174
175static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
176{
177 if (v == SEQ_START_TOKEN) {
178 seq_puts(seq,
179 "Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
180 } else {
181 const struct mif_device *vif = v;
182 const char *name = vif->dev ? vif->dev->name : "none";
183
184 seq_printf(seq,
Al Virod430a222008-06-02 10:59:02 +0100185 "%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
Benjamin Thery4e168802008-12-10 16:15:08 -0800186 vif - init_net.ipv6.vif6_table,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900187 name, vif->bytes_in, vif->pkt_in,
188 vif->bytes_out, vif->pkt_out,
189 vif->flags);
190 }
191 return 0;
192}
193
194static struct seq_operations ip6mr_vif_seq_ops = {
195 .start = ip6mr_vif_seq_start,
196 .next = ip6mr_vif_seq_next,
197 .stop = ip6mr_vif_seq_stop,
198 .show = ip6mr_vif_seq_show,
199};
200
201static int ip6mr_vif_open(struct inode *inode, struct file *file)
202{
203 return seq_open_private(file, &ip6mr_vif_seq_ops,
204 sizeof(struct ipmr_vif_iter));
205}
206
207static struct file_operations ip6mr_vif_fops = {
208 .owner = THIS_MODULE,
209 .open = ip6mr_vif_open,
210 .read = seq_read,
211 .llseek = seq_lseek,
Benjamin Theryeedd7262008-11-20 04:16:12 -0800212 .release = seq_release_private,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900213};
214
215static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
216{
217 return (*pos ? ipmr_mfc_seq_idx(seq->private, *pos - 1)
218 : SEQ_START_TOKEN);
219}
220
221static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
222{
223 struct mfc6_cache *mfc = v;
224 struct ipmr_mfc_iter *it = seq->private;
225
226 ++*pos;
227
228 if (v == SEQ_START_TOKEN)
229 return ipmr_mfc_seq_idx(seq->private, 0);
230
231 if (mfc->next)
232 return mfc->next;
233
234 if (it->cache == &mfc_unres_queue)
235 goto end_of_list;
236
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800237 BUG_ON(it->cache != init_net.ipv6.mfc6_cache_array);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900238
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800239 while (++it->ct < MFC6_LINES) {
240 mfc = init_net.ipv6.mfc6_cache_array[it->ct];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900241 if (mfc)
242 return mfc;
243 }
244
245 /* exhausted cache_array, show unresolved */
246 read_unlock(&mrt_lock);
247 it->cache = &mfc_unres_queue;
248 it->ct = 0;
249
250 spin_lock_bh(&mfc_unres_lock);
251 mfc = mfc_unres_queue;
252 if (mfc)
253 return mfc;
254
255 end_of_list:
256 spin_unlock_bh(&mfc_unres_lock);
257 it->cache = NULL;
258
259 return NULL;
260}
261
262static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
263{
264 struct ipmr_mfc_iter *it = seq->private;
265
266 if (it->cache == &mfc_unres_queue)
267 spin_unlock_bh(&mfc_unres_lock);
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800268 else if (it->cache == init_net.ipv6.mfc6_cache_array)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900269 read_unlock(&mrt_lock);
270}
271
272static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
273{
274 int n;
275
276 if (v == SEQ_START_TOKEN) {
277 seq_puts(seq,
278 "Group "
279 "Origin "
280 "Iif Pkts Bytes Wrong Oifs\n");
281 } else {
282 const struct mfc6_cache *mfc = v;
283 const struct ipmr_mfc_iter *it = seq->private;
284
Benjamin Thery999890b2008-12-03 22:22:16 -0800285 seq_printf(seq, "%pI6 %pI6 %-3hd",
Harvey Harrison0c6ce782008-10-28 16:09:23 -0700286 &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
Benjamin Thery1ea472e2008-12-03 22:21:47 -0800287 mfc->mf6c_parent);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900288
289 if (it->cache != &mfc_unres_queue) {
Benjamin Thery1ea472e2008-12-03 22:21:47 -0800290 seq_printf(seq, " %8lu %8lu %8lu",
291 mfc->mfc_un.res.pkt,
292 mfc->mfc_un.res.bytes,
293 mfc->mfc_un.res.wrong_if);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900294 for (n = mfc->mfc_un.res.minvif;
295 n < mfc->mfc_un.res.maxvif; n++) {
Benjamin Thery4e168802008-12-10 16:15:08 -0800296 if (MIF_EXISTS(&init_net, n) &&
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900297 mfc->mfc_un.res.ttls[n] < 255)
298 seq_printf(seq,
299 " %2d:%-3d",
300 n, mfc->mfc_un.res.ttls[n]);
301 }
Benjamin Thery1ea472e2008-12-03 22:21:47 -0800302 } else {
303 /* unresolved mfc_caches don't contain
304 * pkt, bytes and wrong_if values
305 */
306 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900307 }
308 seq_putc(seq, '\n');
309 }
310 return 0;
311}
312
313static struct seq_operations ipmr_mfc_seq_ops = {
314 .start = ipmr_mfc_seq_start,
315 .next = ipmr_mfc_seq_next,
316 .stop = ipmr_mfc_seq_stop,
317 .show = ipmr_mfc_seq_show,
318};
319
320static int ipmr_mfc_open(struct inode *inode, struct file *file)
321{
322 return seq_open_private(file, &ipmr_mfc_seq_ops,
323 sizeof(struct ipmr_mfc_iter));
324}
325
326static struct file_operations ip6mr_mfc_fops = {
327 .owner = THIS_MODULE,
328 .open = ipmr_mfc_open,
329 .read = seq_read,
330 .llseek = seq_lseek,
Benjamin Theryeedd7262008-11-20 04:16:12 -0800331 .release = seq_release_private,
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900332};
333#endif
334
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900335#ifdef CONFIG_IPV6_PIMSM_V2
336static int reg_vif_num = -1;
337
338static int pim6_rcv(struct sk_buff *skb)
339{
340 struct pimreghdr *pim;
341 struct ipv6hdr *encap;
342 struct net_device *reg_dev = NULL;
343
344 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
345 goto drop;
346
347 pim = (struct pimreghdr *)skb_transport_header(skb);
348 if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
349 (pim->flags & PIM_NULL_REGISTER) ||
350 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
Al Viroec6b4862008-04-26 22:28:58 -0700351 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900352 goto drop;
353
354 /* check if the inner packet is destined to mcast group */
355 encap = (struct ipv6hdr *)(skb_transport_header(skb) +
356 sizeof(*pim));
357
358 if (!ipv6_addr_is_multicast(&encap->daddr) ||
359 encap->payload_len == 0 ||
360 ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
361 goto drop;
362
363 read_lock(&mrt_lock);
364 if (reg_vif_num >= 0)
Benjamin Thery4e168802008-12-10 16:15:08 -0800365 reg_dev = init_net.ipv6.vif6_table[reg_vif_num].dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900366 if (reg_dev)
367 dev_hold(reg_dev);
368 read_unlock(&mrt_lock);
369
370 if (reg_dev == NULL)
371 goto drop;
372
373 skb->mac_header = skb->network_header;
374 skb_pull(skb, (u8 *)encap - skb->data);
375 skb_reset_network_header(skb);
376 skb->dev = reg_dev;
377 skb->protocol = htons(ETH_P_IP);
378 skb->ip_summed = 0;
379 skb->pkt_type = PACKET_HOST;
380 dst_release(skb->dst);
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700381 reg_dev->stats.rx_bytes += skb->len;
382 reg_dev->stats.rx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900383 skb->dst = NULL;
384 nf_reset(skb);
385 netif_rx(skb);
386 dev_put(reg_dev);
387 return 0;
388 drop:
389 kfree_skb(skb);
390 return 0;
391}
392
393static struct inet6_protocol pim6_protocol = {
394 .handler = pim6_rcv,
395};
396
397/* Service routines creating virtual interfaces: PIMREG */
398
399static int reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
400{
401 read_lock(&mrt_lock);
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700402 dev->stats.tx_bytes += skb->len;
403 dev->stats.tx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900404 ip6mr_cache_report(skb, reg_vif_num, MRT6MSG_WHOLEPKT);
405 read_unlock(&mrt_lock);
406 kfree_skb(skb);
407 return 0;
408}
409
Stephen Hemminger007c3832008-11-20 20:28:35 -0800410static const struct net_device_ops reg_vif_netdev_ops = {
411 .ndo_start_xmit = reg_vif_xmit,
412};
413
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900414static void reg_vif_setup(struct net_device *dev)
415{
416 dev->type = ARPHRD_PIMREG;
417 dev->mtu = 1500 - sizeof(struct ipv6hdr) - 8;
418 dev->flags = IFF_NOARP;
Stephen Hemminger007c3832008-11-20 20:28:35 -0800419 dev->netdev_ops = &reg_vif_netdev_ops;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900420 dev->destructor = free_netdev;
421}
422
423static struct net_device *ip6mr_reg_vif(void)
424{
425 struct net_device *dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900426
Pavel Emelyanovdc58c782008-05-21 14:17:54 -0700427 dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900428 if (dev == NULL)
429 return NULL;
430
431 if (register_netdevice(dev)) {
432 free_netdev(dev);
433 return NULL;
434 }
435 dev->iflink = 0;
436
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900437 if (dev_open(dev))
438 goto failure;
439
Wang Chen7af3db72008-07-14 20:54:54 -0700440 dev_hold(dev);
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900441 return dev;
442
443failure:
444 /* allow the register to be completed before unregistering. */
445 rtnl_unlock();
446 rtnl_lock();
447
448 unregister_netdevice(dev);
449 return NULL;
450}
451#endif
452
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900453/*
454 * Delete a VIF entry
455 */
456
457static int mif6_delete(int vifi)
458{
459 struct mif_device *v;
460 struct net_device *dev;
Benjamin Thery4e168802008-12-10 16:15:08 -0800461 if (vifi < 0 || vifi >= init_net.ipv6.maxvif)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900462 return -EADDRNOTAVAIL;
463
Benjamin Thery4e168802008-12-10 16:15:08 -0800464 v = &init_net.ipv6.vif6_table[vifi];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900465
466 write_lock_bh(&mrt_lock);
467 dev = v->dev;
468 v->dev = NULL;
469
470 if (!dev) {
471 write_unlock_bh(&mrt_lock);
472 return -EADDRNOTAVAIL;
473 }
474
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900475#ifdef CONFIG_IPV6_PIMSM_V2
476 if (vifi == reg_vif_num)
477 reg_vif_num = -1;
478#endif
479
Benjamin Thery4e168802008-12-10 16:15:08 -0800480 if (vifi + 1 == init_net.ipv6.maxvif) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900481 int tmp;
482 for (tmp = vifi - 1; tmp >= 0; tmp--) {
Benjamin Thery4e168802008-12-10 16:15:08 -0800483 if (MIF_EXISTS(&init_net, tmp))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900484 break;
485 }
Benjamin Thery4e168802008-12-10 16:15:08 -0800486 init_net.ipv6.maxvif = tmp + 1;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900487 }
488
489 write_unlock_bh(&mrt_lock);
490
491 dev_set_allmulti(dev, -1);
492
493 if (v->flags & MIFF_REGISTER)
494 unregister_netdevice(dev);
495
496 dev_put(dev);
497 return 0;
498}
499
Benjamin Thery58701ad2008-12-10 16:22:34 -0800500static inline void ip6mr_cache_free(struct mfc6_cache *c)
501{
502 release_net(mfc6_net(c));
503 kmem_cache_free(mrt_cachep, c);
504}
505
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900506/* Destroy an unresolved cache entry, killing queued skbs
507 and reporting error to netlink readers.
508 */
509
510static void ip6mr_destroy_unres(struct mfc6_cache *c)
511{
512 struct sk_buff *skb;
513
Benjamin Thery4045e572008-12-10 16:27:21 -0800514 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900515
516 while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
517 if (ipv6_hdr(skb)->version == 0) {
518 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
519 nlh->nlmsg_type = NLMSG_ERROR;
520 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
521 skb_trim(skb, nlh->nlmsg_len);
522 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
523 rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
524 } else
525 kfree_skb(skb);
526 }
527
Benjamin Thery58701ad2008-12-10 16:22:34 -0800528 ip6mr_cache_free(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900529}
530
531
532/* Single timer process for all the unresolved queue. */
533
534static void ipmr_do_expire_process(unsigned long dummy)
535{
536 unsigned long now = jiffies;
537 unsigned long expires = 10 * HZ;
538 struct mfc6_cache *c, **cp;
539
540 cp = &mfc_unres_queue;
541
542 while ((c = *cp) != NULL) {
543 if (time_after(c->mfc_un.unres.expires, now)) {
544 /* not yet... */
545 unsigned long interval = c->mfc_un.unres.expires - now;
546 if (interval < expires)
547 expires = interval;
548 cp = &c->next;
549 continue;
550 }
551
552 *cp = c->next;
553 ip6mr_destroy_unres(c);
554 }
555
Benjamin Thery4045e572008-12-10 16:27:21 -0800556 if (mfc_unres_queue != NULL)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900557 mod_timer(&ipmr_expire_timer, jiffies + expires);
558}
559
560static void ipmr_expire_process(unsigned long dummy)
561{
562 if (!spin_trylock(&mfc_unres_lock)) {
563 mod_timer(&ipmr_expire_timer, jiffies + 1);
564 return;
565 }
566
Benjamin Thery4045e572008-12-10 16:27:21 -0800567 if (mfc_unres_queue != NULL)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900568 ipmr_do_expire_process(dummy);
569
570 spin_unlock(&mfc_unres_lock);
571}
572
573/* Fill oifs list. It is called under write locked mrt_lock. */
574
575static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
576{
577 int vifi;
578
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300579 cache->mfc_un.res.minvif = MAXMIFS;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900580 cache->mfc_un.res.maxvif = 0;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300581 memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900582
Benjamin Thery4e168802008-12-10 16:15:08 -0800583 for (vifi = 0; vifi < init_net.ipv6.maxvif; vifi++) {
584 if (MIF_EXISTS(&init_net, vifi) &&
585 ttls[vifi] && ttls[vifi] < 255) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900586 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
587 if (cache->mfc_un.res.minvif > vifi)
588 cache->mfc_un.res.minvif = vifi;
589 if (cache->mfc_un.res.maxvif <= vifi)
590 cache->mfc_un.res.maxvif = vifi + 1;
591 }
592 }
593}
594
595static int mif6_add(struct mif6ctl *vifc, int mrtsock)
596{
597 int vifi = vifc->mif6c_mifi;
Benjamin Thery4e168802008-12-10 16:15:08 -0800598 struct mif_device *v = &init_net.ipv6.vif6_table[vifi];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900599 struct net_device *dev;
Wang Chen5ae7b442008-07-14 20:54:23 -0700600 int err;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900601
602 /* Is vif busy ? */
Benjamin Thery4e168802008-12-10 16:15:08 -0800603 if (MIF_EXISTS(&init_net, vifi))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900604 return -EADDRINUSE;
605
606 switch (vifc->mif6c_flags) {
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900607#ifdef CONFIG_IPV6_PIMSM_V2
608 case MIFF_REGISTER:
609 /*
610 * Special Purpose VIF in PIM
611 * All the packets will be sent to the daemon
612 */
613 if (reg_vif_num >= 0)
614 return -EADDRINUSE;
615 dev = ip6mr_reg_vif();
616 if (!dev)
617 return -ENOBUFS;
Wang Chen5ae7b442008-07-14 20:54:23 -0700618 err = dev_set_allmulti(dev, 1);
619 if (err) {
620 unregister_netdevice(dev);
Wang Chen7af3db72008-07-14 20:54:54 -0700621 dev_put(dev);
Wang Chen5ae7b442008-07-14 20:54:23 -0700622 return err;
623 }
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900624 break;
625#endif
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900626 case 0:
627 dev = dev_get_by_index(&init_net, vifc->mif6c_pifi);
628 if (!dev)
629 return -EADDRNOTAVAIL;
Wang Chen5ae7b442008-07-14 20:54:23 -0700630 err = dev_set_allmulti(dev, 1);
Wang Chen7af3db72008-07-14 20:54:54 -0700631 if (err) {
632 dev_put(dev);
Wang Chen5ae7b442008-07-14 20:54:23 -0700633 return err;
Wang Chen7af3db72008-07-14 20:54:54 -0700634 }
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900635 break;
636 default:
637 return -EINVAL;
638 }
639
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900640 /*
641 * Fill in the VIF structures
642 */
643 v->rate_limit = vifc->vifc_rate_limit;
644 v->flags = vifc->mif6c_flags;
645 if (!mrtsock)
646 v->flags |= VIFF_STATIC;
647 v->threshold = vifc->vifc_threshold;
648 v->bytes_in = 0;
649 v->bytes_out = 0;
650 v->pkt_in = 0;
651 v->pkt_out = 0;
652 v->link = dev->ifindex;
653 if (v->flags & MIFF_REGISTER)
654 v->link = dev->iflink;
655
656 /* And finish update writing critical data */
657 write_lock_bh(&mrt_lock);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900658 v->dev = dev;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900659#ifdef CONFIG_IPV6_PIMSM_V2
660 if (v->flags & MIFF_REGISTER)
661 reg_vif_num = vifi;
662#endif
Benjamin Thery4e168802008-12-10 16:15:08 -0800663 if (vifi + 1 > init_net.ipv6.maxvif)
664 init_net.ipv6.maxvif = vifi + 1;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900665 write_unlock_bh(&mrt_lock);
666 return 0;
667}
668
669static struct mfc6_cache *ip6mr_cache_find(struct in6_addr *origin, struct in6_addr *mcastgrp)
670{
671 int line = MFC6_HASH(mcastgrp, origin);
672 struct mfc6_cache *c;
673
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800674 for (c = init_net.ipv6.mfc6_cache_array[line]; c; c = c->next) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900675 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
676 ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
677 break;
678 }
679 return c;
680}
681
682/*
683 * Allocate a multicast cache entry
684 */
Benjamin Thery58701ad2008-12-10 16:22:34 -0800685static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900686{
Joe Perches36cbac52008-12-03 22:27:25 -0800687 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900688 if (c == NULL)
689 return NULL;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300690 c->mfc_un.res.minvif = MAXMIFS;
Benjamin Thery58701ad2008-12-10 16:22:34 -0800691 mfc6_net_set(c, net);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900692 return c;
693}
694
Benjamin Thery58701ad2008-12-10 16:22:34 -0800695static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900696{
Joe Perches36cbac52008-12-03 22:27:25 -0800697 struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900698 if (c == NULL)
699 return NULL;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900700 skb_queue_head_init(&c->mfc_un.unres.unresolved);
701 c->mfc_un.unres.expires = jiffies + 10 * HZ;
Benjamin Thery58701ad2008-12-10 16:22:34 -0800702 mfc6_net_set(c, net);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900703 return c;
704}
705
706/*
707 * A cache entry has gone into a resolved state from queued
708 */
709
710static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
711{
712 struct sk_buff *skb;
713
714 /*
715 * Play the pending entries through our router
716 */
717
718 while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
719 if (ipv6_hdr(skb)->version == 0) {
720 int err;
721 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
722
723 if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +0900724 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900725 } else {
726 nlh->nlmsg_type = NLMSG_ERROR;
727 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
728 skb_trim(skb, nlh->nlmsg_len);
729 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
730 }
731 err = rtnl_unicast(skb, &init_net, NETLINK_CB(skb).pid);
732 } else
733 ip6_mr_forward(skb, c);
734 }
735}
736
737/*
738 * Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
739 * expects the following bizarre scheme.
740 *
741 * Called under mrt_lock.
742 */
743
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300744static int ip6mr_cache_report(struct sk_buff *pkt, mifi_t mifi, int assert)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900745{
746 struct sk_buff *skb;
747 struct mrt6msg *msg;
748 int ret;
749
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900750#ifdef CONFIG_IPV6_PIMSM_V2
751 if (assert == MRT6MSG_WHOLEPKT)
752 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
753 +sizeof(*msg));
754 else
755#endif
756 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900757
758 if (!skb)
759 return -ENOBUFS;
760
761 /* I suppose that internal messages
762 * do not require checksums */
763
764 skb->ip_summed = CHECKSUM_UNNECESSARY;
765
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900766#ifdef CONFIG_IPV6_PIMSM_V2
767 if (assert == MRT6MSG_WHOLEPKT) {
768 /* Ugly, but we have no choice with this interface.
769 Duplicate old header, fix length etc.
770 And all this only to mangle msg->im6_msgtype and
771 to set msg->im6_mbz to "mbz" :-)
772 */
773 skb_push(skb, -skb_network_offset(pkt));
774
775 skb_push(skb, sizeof(*msg));
776 skb_reset_transport_header(skb);
777 msg = (struct mrt6msg *)skb_transport_header(skb);
778 msg->im6_mbz = 0;
779 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
780 msg->im6_mif = reg_vif_num;
781 msg->im6_pad = 0;
782 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
783 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
784
785 skb->ip_summed = CHECKSUM_UNNECESSARY;
786 } else
787#endif
788 {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900789 /*
790 * Copy the IP header
791 */
792
793 skb_put(skb, sizeof(struct ipv6hdr));
794 skb_reset_network_header(skb);
795 skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
796
797 /*
798 * Add our header
799 */
800 skb_put(skb, sizeof(*msg));
801 skb_reset_transport_header(skb);
802 msg = (struct mrt6msg *)skb_transport_header(skb);
803
804 msg->im6_mbz = 0;
805 msg->im6_msgtype = assert;
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300806 msg->im6_mif = mifi;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900807 msg->im6_pad = 0;
808 ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
809 ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
810
811 skb->dst = dst_clone(pkt->dst);
812 skb->ip_summed = CHECKSUM_UNNECESSARY;
813
814 skb_pull(skb, sizeof(struct ipv6hdr));
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +0900815 }
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900816
Benjamin Therybd91b8b2008-12-10 16:07:08 -0800817 if (init_net.ipv6.mroute6_sk == NULL) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900818 kfree_skb(skb);
819 return -EINVAL;
820 }
821
822 /*
823 * Deliver to user space multicast routing algorithms
824 */
Benjamin Therybd91b8b2008-12-10 16:07:08 -0800825 ret = sock_queue_rcv_skb(init_net.ipv6.mroute6_sk, skb);
826 if (ret < 0) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900827 if (net_ratelimit())
828 printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
829 kfree_skb(skb);
830 }
831
832 return ret;
833}
834
835/*
836 * Queue a packet for resolution. It gets locked cache entry!
837 */
838
839static int
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300840ip6mr_cache_unresolved(mifi_t mifi, struct sk_buff *skb)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900841{
842 int err;
843 struct mfc6_cache *c;
844
845 spin_lock_bh(&mfc_unres_lock);
846 for (c = mfc_unres_queue; c; c = c->next) {
Benjamin Thery4045e572008-12-10 16:27:21 -0800847 if (net_eq(mfc6_net(c), &init_net) &&
848 ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900849 ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
850 break;
851 }
852
853 if (c == NULL) {
854 /*
855 * Create a new entry if allowable
856 */
857
Benjamin Thery4045e572008-12-10 16:27:21 -0800858 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) >= 10 ||
Benjamin Thery58701ad2008-12-10 16:22:34 -0800859 (c = ip6mr_cache_alloc_unres(&init_net)) == NULL) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900860 spin_unlock_bh(&mfc_unres_lock);
861
862 kfree_skb(skb);
863 return -ENOBUFS;
864 }
865
866 /*
867 * Fill in the new cache entry
868 */
869 c->mf6c_parent = -1;
870 c->mf6c_origin = ipv6_hdr(skb)->saddr;
871 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
872
873 /*
874 * Reflect first query at pim6sd
875 */
Rami Rosen6ac7eb02008-04-10 12:40:10 +0300876 if ((err = ip6mr_cache_report(skb, mifi, MRT6MSG_NOCACHE)) < 0) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900877 /* If the report failed throw the cache entry
878 out - Brad Parker
879 */
880 spin_unlock_bh(&mfc_unres_lock);
881
Benjamin Thery58701ad2008-12-10 16:22:34 -0800882 ip6mr_cache_free(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900883 kfree_skb(skb);
884 return err;
885 }
886
Benjamin Thery4045e572008-12-10 16:27:21 -0800887 atomic_inc(&init_net.ipv6.cache_resolve_queue_len);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900888 c->next = mfc_unres_queue;
889 mfc_unres_queue = c;
890
891 ipmr_do_expire_process(1);
892 }
893
894 /*
895 * See if we can append the packet
896 */
897 if (c->mfc_un.unres.unresolved.qlen > 3) {
898 kfree_skb(skb);
899 err = -ENOBUFS;
900 } else {
901 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
902 err = 0;
903 }
904
905 spin_unlock_bh(&mfc_unres_lock);
906 return err;
907}
908
909/*
910 * MFC6 cache manipulation by user space
911 */
912
913static int ip6mr_mfc_delete(struct mf6cctl *mfc)
914{
915 int line;
916 struct mfc6_cache *c, **cp;
917
918 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
919
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800920 for (cp = &init_net.ipv6.mfc6_cache_array[line];
921 (c = *cp) != NULL; cp = &c->next) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900922 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
923 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
924 write_lock_bh(&mrt_lock);
925 *cp = c->next;
926 write_unlock_bh(&mrt_lock);
927
Benjamin Thery58701ad2008-12-10 16:22:34 -0800928 ip6mr_cache_free(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900929 return 0;
930 }
931 }
932 return -ENOENT;
933}
934
935static int ip6mr_device_event(struct notifier_block *this,
936 unsigned long event, void *ptr)
937{
938 struct net_device *dev = ptr;
939 struct mif_device *v;
940 int ct;
941
YOSHIFUJI Hideaki721499e2008-07-19 22:34:43 -0700942 if (!net_eq(dev_net(dev), &init_net))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900943 return NOTIFY_DONE;
944
945 if (event != NETDEV_UNREGISTER)
946 return NOTIFY_DONE;
947
Benjamin Thery4e168802008-12-10 16:15:08 -0800948 v = &init_net.ipv6.vif6_table[0];
949 for (ct = 0; ct < init_net.ipv6.maxvif; ct++, v++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +0900950 if (v->dev == dev)
951 mif6_delete(ct);
952 }
953 return NOTIFY_DONE;
954}
955
956static struct notifier_block ip6_mr_notifier = {
957 .notifier_call = ip6mr_device_event
958};
959
960/*
961 * Setup for IP multicast routing
962 */
963
Benjamin Thery4e168802008-12-10 16:15:08 -0800964static int __net_init ip6mr_net_init(struct net *net)
965{
966 int err = 0;
967
968 net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
969 GFP_KERNEL);
970 if (!net->ipv6.vif6_table) {
971 err = -ENOMEM;
972 goto fail;
973 }
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800974
975 /* Forwarding cache */
976 net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
977 sizeof(struct mfc6_cache *),
978 GFP_KERNEL);
979 if (!net->ipv6.mfc6_cache_array) {
980 err = -ENOMEM;
981 goto fail_mfc6_cache;
982 }
983 return 0;
984
985fail_mfc6_cache:
986 kfree(net->ipv6.vif6_table);
Benjamin Thery4e168802008-12-10 16:15:08 -0800987fail:
988 return err;
989}
990
991static void __net_exit ip6mr_net_exit(struct net *net)
992{
Benjamin Thery4a6258a2008-12-10 16:24:07 -0800993 kfree(net->ipv6.mfc6_cache_array);
Benjamin Thery4e168802008-12-10 16:15:08 -0800994 kfree(net->ipv6.vif6_table);
995}
996
997static struct pernet_operations ip6mr_net_ops = {
998 .init = ip6mr_net_init,
999 .exit = ip6mr_net_exit,
1000};
1001
Wang Chen623d1a12008-07-03 12:13:30 +08001002int __init ip6_mr_init(void)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001003{
Wang Chen623d1a12008-07-03 12:13:30 +08001004 int err;
1005
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001006 mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1007 sizeof(struct mfc6_cache),
1008 0, SLAB_HWCACHE_ALIGN,
1009 NULL);
1010 if (!mrt_cachep)
Wang Chen623d1a12008-07-03 12:13:30 +08001011 return -ENOMEM;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001012
Benjamin Thery4e168802008-12-10 16:15:08 -08001013 err = register_pernet_subsys(&ip6mr_net_ops);
1014 if (err)
1015 goto reg_pernet_fail;
1016
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001017 setup_timer(&ipmr_expire_timer, ipmr_expire_process, 0);
Wang Chen623d1a12008-07-03 12:13:30 +08001018 err = register_netdevice_notifier(&ip6_mr_notifier);
1019 if (err)
1020 goto reg_notif_fail;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001021#ifdef CONFIG_PROC_FS
Wang Chen623d1a12008-07-03 12:13:30 +08001022 err = -ENOMEM;
1023 if (!proc_net_fops_create(&init_net, "ip6_mr_vif", 0, &ip6mr_vif_fops))
1024 goto proc_vif_fail;
1025 if (!proc_net_fops_create(&init_net, "ip6_mr_cache",
1026 0, &ip6mr_mfc_fops))
1027 goto proc_cache_fail;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001028#endif
Wang Chen623d1a12008-07-03 12:13:30 +08001029 return 0;
Wang Chen623d1a12008-07-03 12:13:30 +08001030#ifdef CONFIG_PROC_FS
Wang Chen623d1a12008-07-03 12:13:30 +08001031proc_cache_fail:
1032 proc_net_remove(&init_net, "ip6_mr_vif");
Benjamin Thery87b30a62008-11-10 16:34:11 -08001033proc_vif_fail:
1034 unregister_netdevice_notifier(&ip6_mr_notifier);
Wang Chen623d1a12008-07-03 12:13:30 +08001035#endif
Benjamin Thery87b30a62008-11-10 16:34:11 -08001036reg_notif_fail:
1037 del_timer(&ipmr_expire_timer);
Benjamin Thery4e168802008-12-10 16:15:08 -08001038 unregister_pernet_subsys(&ip6mr_net_ops);
1039reg_pernet_fail:
Benjamin Thery87b30a62008-11-10 16:34:11 -08001040 kmem_cache_destroy(mrt_cachep);
Wang Chen623d1a12008-07-03 12:13:30 +08001041 return err;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001042}
1043
Wang Chen623d1a12008-07-03 12:13:30 +08001044void ip6_mr_cleanup(void)
1045{
1046#ifdef CONFIG_PROC_FS
1047 proc_net_remove(&init_net, "ip6_mr_cache");
1048 proc_net_remove(&init_net, "ip6_mr_vif");
1049#endif
1050 unregister_netdevice_notifier(&ip6_mr_notifier);
1051 del_timer(&ipmr_expire_timer);
Benjamin Thery4e168802008-12-10 16:15:08 -08001052 unregister_pernet_subsys(&ip6mr_net_ops);
Wang Chen623d1a12008-07-03 12:13:30 +08001053 kmem_cache_destroy(mrt_cachep);
1054}
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001055
1056static int ip6mr_mfc_add(struct mf6cctl *mfc, int mrtsock)
1057{
1058 int line;
1059 struct mfc6_cache *uc, *c, **cp;
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001060 unsigned char ttls[MAXMIFS];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001061 int i;
1062
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001063 memset(ttls, 255, MAXMIFS);
1064 for (i = 0; i < MAXMIFS; i++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001065 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1066 ttls[i] = 1;
1067
1068 }
1069
1070 line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1071
Benjamin Thery4a6258a2008-12-10 16:24:07 -08001072 for (cp = &init_net.ipv6.mfc6_cache_array[line];
1073 (c = *cp) != NULL; cp = &c->next) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001074 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1075 ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
1076 break;
1077 }
1078
1079 if (c != NULL) {
1080 write_lock_bh(&mrt_lock);
1081 c->mf6c_parent = mfc->mf6cc_parent;
1082 ip6mr_update_thresholds(c, ttls);
1083 if (!mrtsock)
1084 c->mfc_flags |= MFC_STATIC;
1085 write_unlock_bh(&mrt_lock);
1086 return 0;
1087 }
1088
1089 if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1090 return -EINVAL;
1091
Benjamin Thery58701ad2008-12-10 16:22:34 -08001092 c = ip6mr_cache_alloc(&init_net);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001093 if (c == NULL)
1094 return -ENOMEM;
1095
1096 c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1097 c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1098 c->mf6c_parent = mfc->mf6cc_parent;
1099 ip6mr_update_thresholds(c, ttls);
1100 if (!mrtsock)
1101 c->mfc_flags |= MFC_STATIC;
1102
1103 write_lock_bh(&mrt_lock);
Benjamin Thery4a6258a2008-12-10 16:24:07 -08001104 c->next = init_net.ipv6.mfc6_cache_array[line];
1105 init_net.ipv6.mfc6_cache_array[line] = c;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001106 write_unlock_bh(&mrt_lock);
1107
1108 /*
1109 * Check to see if we resolved a queued list. If so we
1110 * need to send on the frames and tidy up.
1111 */
1112 spin_lock_bh(&mfc_unres_lock);
1113 for (cp = &mfc_unres_queue; (uc = *cp) != NULL;
1114 cp = &uc->next) {
Benjamin Thery4045e572008-12-10 16:27:21 -08001115 if (net_eq(mfc6_net(uc), &init_net) &&
1116 ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001117 ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1118 *cp = uc->next;
Benjamin Thery4045e572008-12-10 16:27:21 -08001119 atomic_dec(&init_net.ipv6.cache_resolve_queue_len);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001120 break;
1121 }
1122 }
Benjamin Thery4045e572008-12-10 16:27:21 -08001123 if (mfc_unres_queue == NULL)
1124 del_timer(&ipmr_expire_timer);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001125 spin_unlock_bh(&mfc_unres_lock);
1126
1127 if (uc) {
1128 ip6mr_cache_resolve(uc, c);
Benjamin Thery58701ad2008-12-10 16:22:34 -08001129 ip6mr_cache_free(uc);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001130 }
1131 return 0;
1132}
1133
1134/*
1135 * Close the multicast socket, and clear the vif tables etc
1136 */
1137
1138static void mroute_clean_tables(struct sock *sk)
1139{
1140 int i;
1141
1142 /*
1143 * Shut down all active vif entries
1144 */
Benjamin Thery4e168802008-12-10 16:15:08 -08001145 for (i = 0; i < init_net.ipv6.maxvif; i++) {
1146 if (!(init_net.ipv6.vif6_table[i].flags & VIFF_STATIC))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001147 mif6_delete(i);
1148 }
1149
1150 /*
1151 * Wipe the cache
1152 */
Benjamin Thery4a6258a2008-12-10 16:24:07 -08001153 for (i = 0; i < MFC6_LINES; i++) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001154 struct mfc6_cache *c, **cp;
1155
Benjamin Thery4a6258a2008-12-10 16:24:07 -08001156 cp = &init_net.ipv6.mfc6_cache_array[i];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001157 while ((c = *cp) != NULL) {
1158 if (c->mfc_flags & MFC_STATIC) {
1159 cp = &c->next;
1160 continue;
1161 }
1162 write_lock_bh(&mrt_lock);
1163 *cp = c->next;
1164 write_unlock_bh(&mrt_lock);
1165
Benjamin Thery58701ad2008-12-10 16:22:34 -08001166 ip6mr_cache_free(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001167 }
1168 }
1169
Benjamin Thery4045e572008-12-10 16:27:21 -08001170 if (atomic_read(&init_net.ipv6.cache_resolve_queue_len) != 0) {
1171 struct mfc6_cache *c, **cp;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001172
1173 spin_lock_bh(&mfc_unres_lock);
Benjamin Thery4045e572008-12-10 16:27:21 -08001174 cp = &mfc_unres_queue;
1175 while ((c = *cp) != NULL) {
1176 if (!net_eq(mfc6_net(c), &init_net)) {
1177 cp = &c->next;
1178 continue;
1179 }
1180 *cp = c->next;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001181 ip6mr_destroy_unres(c);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001182 }
1183 spin_unlock_bh(&mfc_unres_lock);
1184 }
1185}
1186
1187static int ip6mr_sk_init(struct sock *sk)
1188{
1189 int err = 0;
1190
1191 rtnl_lock();
1192 write_lock_bh(&mrt_lock);
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001193 if (likely(init_net.ipv6.mroute6_sk == NULL))
1194 init_net.ipv6.mroute6_sk = sk;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001195 else
1196 err = -EADDRINUSE;
1197 write_unlock_bh(&mrt_lock);
1198
1199 rtnl_unlock();
1200
1201 return err;
1202}
1203
1204int ip6mr_sk_done(struct sock *sk)
1205{
1206 int err = 0;
1207
1208 rtnl_lock();
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001209 if (sk == init_net.ipv6.mroute6_sk) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001210 write_lock_bh(&mrt_lock);
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001211 init_net.ipv6.mroute6_sk = NULL;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001212 write_unlock_bh(&mrt_lock);
1213
1214 mroute_clean_tables(sk);
1215 } else
1216 err = -EACCES;
1217 rtnl_unlock();
1218
1219 return err;
1220}
1221
1222/*
1223 * Socket options and virtual interface manipulation. The whole
1224 * virtual interface system is a complete heap, but unfortunately
1225 * that's how BSD mrouted happens to think. Maybe one day with a proper
1226 * MOSPF/PIM router set up we can clean this up.
1227 */
1228
1229int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
1230{
1231 int ret;
1232 struct mif6ctl vif;
1233 struct mf6cctl mfc;
1234 mifi_t mifi;
1235
1236 if (optname != MRT6_INIT) {
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001237 if (sk != init_net.ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001238 return -EACCES;
1239 }
1240
1241 switch (optname) {
1242 case MRT6_INIT:
1243 if (sk->sk_type != SOCK_RAW ||
1244 inet_sk(sk)->num != IPPROTO_ICMPV6)
1245 return -EOPNOTSUPP;
1246 if (optlen < sizeof(int))
1247 return -EINVAL;
1248
1249 return ip6mr_sk_init(sk);
1250
1251 case MRT6_DONE:
1252 return ip6mr_sk_done(sk);
1253
1254 case MRT6_ADD_MIF:
1255 if (optlen < sizeof(vif))
1256 return -EINVAL;
1257 if (copy_from_user(&vif, optval, sizeof(vif)))
1258 return -EFAULT;
Rami Rosen6ac7eb02008-04-10 12:40:10 +03001259 if (vif.mif6c_mifi >= MAXMIFS)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001260 return -ENFILE;
1261 rtnl_lock();
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001262 ret = mif6_add(&vif, sk == init_net.ipv6.mroute6_sk);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001263 rtnl_unlock();
1264 return ret;
1265
1266 case MRT6_DEL_MIF:
1267 if (optlen < sizeof(mifi_t))
1268 return -EINVAL;
1269 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1270 return -EFAULT;
1271 rtnl_lock();
1272 ret = mif6_delete(mifi);
1273 rtnl_unlock();
1274 return ret;
1275
1276 /*
1277 * Manipulate the forwarding caches. These live
1278 * in a sort of kernel/user symbiosis.
1279 */
1280 case MRT6_ADD_MFC:
1281 case MRT6_DEL_MFC:
1282 if (optlen < sizeof(mfc))
1283 return -EINVAL;
1284 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1285 return -EFAULT;
1286 rtnl_lock();
1287 if (optname == MRT6_DEL_MFC)
1288 ret = ip6mr_mfc_delete(&mfc);
1289 else
Benjamin Therybd91b8b2008-12-10 16:07:08 -08001290 ret = ip6mr_mfc_add(&mfc, sk == init_net.ipv6.mroute6_sk);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001291 rtnl_unlock();
1292 return ret;
1293
1294 /*
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001295 * Control PIM assert (to activate pim will activate assert)
1296 */
1297 case MRT6_ASSERT:
1298 {
1299 int v;
1300 if (get_user(v, (int __user *)optval))
1301 return -EFAULT;
Benjamin Therya21f3f92008-12-10 16:28:44 -08001302 init_net.ipv6.mroute_do_assert = !!v;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001303 return 0;
1304 }
1305
1306#ifdef CONFIG_IPV6_PIMSM_V2
1307 case MRT6_PIM:
1308 {
YOSHIFUJI Hideakia9f83bf2008-04-10 15:41:28 +09001309 int v;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001310 if (get_user(v, (int __user *)optval))
1311 return -EFAULT;
1312 v = !!v;
1313 rtnl_lock();
1314 ret = 0;
Benjamin Therya21f3f92008-12-10 16:28:44 -08001315 if (v != init_net.ipv6.mroute_do_pim) {
1316 init_net.ipv6.mroute_do_pim = v;
1317 init_net.ipv6.mroute_do_assert = v;
1318 if (init_net.ipv6.mroute_do_pim)
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001319 ret = inet6_add_protocol(&pim6_protocol,
1320 IPPROTO_PIM);
1321 else
1322 ret = inet6_del_protocol(&pim6_protocol,
1323 IPPROTO_PIM);
1324 if (ret < 0)
1325 ret = -EAGAIN;
1326 }
1327 rtnl_unlock();
1328 return ret;
1329 }
1330
1331#endif
1332 /*
Rami Rosen7d120c52008-04-23 14:35:13 +03001333 * Spurious command, or MRT6_VERSION which you cannot
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001334 * set.
1335 */
1336 default:
1337 return -ENOPROTOOPT;
1338 }
1339}
1340
1341/*
1342 * Getsock opt support for the multicast routing system.
1343 */
1344
1345int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1346 int __user *optlen)
1347{
1348 int olr;
1349 int val;
1350
1351 switch (optname) {
1352 case MRT6_VERSION:
1353 val = 0x0305;
1354 break;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001355#ifdef CONFIG_IPV6_PIMSM_V2
1356 case MRT6_PIM:
Benjamin Therya21f3f92008-12-10 16:28:44 -08001357 val = init_net.ipv6.mroute_do_pim;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001358 break;
1359#endif
1360 case MRT6_ASSERT:
Benjamin Therya21f3f92008-12-10 16:28:44 -08001361 val = init_net.ipv6.mroute_do_assert;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001362 break;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001363 default:
1364 return -ENOPROTOOPT;
1365 }
1366
1367 if (get_user(olr, optlen))
1368 return -EFAULT;
1369
1370 olr = min_t(int, olr, sizeof(int));
1371 if (olr < 0)
1372 return -EINVAL;
1373
1374 if (put_user(olr, optlen))
1375 return -EFAULT;
1376 if (copy_to_user(optval, &val, olr))
1377 return -EFAULT;
1378 return 0;
1379}
1380
1381/*
1382 * The IP multicast ioctl support routines.
1383 */
1384
1385int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1386{
1387 struct sioc_sg_req6 sr;
1388 struct sioc_mif_req6 vr;
1389 struct mif_device *vif;
1390 struct mfc6_cache *c;
1391
1392 switch (cmd) {
1393 case SIOCGETMIFCNT_IN6:
1394 if (copy_from_user(&vr, arg, sizeof(vr)))
1395 return -EFAULT;
Benjamin Thery4e168802008-12-10 16:15:08 -08001396 if (vr.mifi >= init_net.ipv6.maxvif)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001397 return -EINVAL;
1398 read_lock(&mrt_lock);
Benjamin Thery4e168802008-12-10 16:15:08 -08001399 vif = &init_net.ipv6.vif6_table[vr.mifi];
1400 if (MIF_EXISTS(&init_net, vr.mifi)) {
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001401 vr.icount = vif->pkt_in;
1402 vr.ocount = vif->pkt_out;
1403 vr.ibytes = vif->bytes_in;
1404 vr.obytes = vif->bytes_out;
1405 read_unlock(&mrt_lock);
1406
1407 if (copy_to_user(arg, &vr, sizeof(vr)))
1408 return -EFAULT;
1409 return 0;
1410 }
1411 read_unlock(&mrt_lock);
1412 return -EADDRNOTAVAIL;
1413 case SIOCGETSGCNT_IN6:
1414 if (copy_from_user(&sr, arg, sizeof(sr)))
1415 return -EFAULT;
1416
1417 read_lock(&mrt_lock);
1418 c = ip6mr_cache_find(&sr.src.sin6_addr, &sr.grp.sin6_addr);
1419 if (c) {
1420 sr.pktcnt = c->mfc_un.res.pkt;
1421 sr.bytecnt = c->mfc_un.res.bytes;
1422 sr.wrong_if = c->mfc_un.res.wrong_if;
1423 read_unlock(&mrt_lock);
1424
1425 if (copy_to_user(arg, &sr, sizeof(sr)))
1426 return -EFAULT;
1427 return 0;
1428 }
1429 read_unlock(&mrt_lock);
1430 return -EADDRNOTAVAIL;
1431 default:
1432 return -ENOIOCTLCMD;
1433 }
1434}
1435
1436
1437static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1438{
Denis V. Lunev483a47d2008-10-08 11:09:27 -07001439 IP6_INC_STATS_BH(dev_net(skb->dst->dev), ip6_dst_idev(skb->dst),
1440 IPSTATS_MIB_OUTFORWDATAGRAMS);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001441 return dst_output(skb);
1442}
1443
1444/*
1445 * Processing handlers for ip6mr_forward
1446 */
1447
1448static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1449{
1450 struct ipv6hdr *ipv6h;
Benjamin Thery4e168802008-12-10 16:15:08 -08001451 struct mif_device *vif = &init_net.ipv6.vif6_table[vifi];
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001452 struct net_device *dev;
1453 struct dst_entry *dst;
1454 struct flowi fl;
1455
1456 if (vif->dev == NULL)
1457 goto out_free;
1458
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001459#ifdef CONFIG_IPV6_PIMSM_V2
1460 if (vif->flags & MIFF_REGISTER) {
1461 vif->pkt_out++;
1462 vif->bytes_out += skb->len;
Pavel Emelyanovdc58c782008-05-21 14:17:54 -07001463 vif->dev->stats.tx_bytes += skb->len;
1464 vif->dev->stats.tx_packets++;
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001465 ip6mr_cache_report(skb, vifi, MRT6MSG_WHOLEPKT);
1466 kfree_skb(skb);
1467 return 0;
1468 }
1469#endif
1470
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001471 ipv6h = ipv6_hdr(skb);
1472
1473 fl = (struct flowi) {
1474 .oif = vif->link,
1475 .nl_u = { .ip6_u =
1476 { .daddr = ipv6h->daddr, }
1477 }
1478 };
1479
1480 dst = ip6_route_output(&init_net, NULL, &fl);
1481 if (!dst)
1482 goto out_free;
1483
1484 dst_release(skb->dst);
1485 skb->dst = dst;
1486
1487 /*
1488 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1489 * not only before forwarding, but after forwarding on all output
1490 * interfaces. It is clear, if mrouter runs a multicasting
1491 * program, it should receive packets not depending to what interface
1492 * program is joined.
1493 * If we will not make it, the program will have to join on all
1494 * interfaces. On the other hand, multihoming host (or router, but
1495 * not mrouter) cannot join to more than one interface - it will
1496 * result in receiving multiple packets.
1497 */
1498 dev = vif->dev;
1499 skb->dev = dev;
1500 vif->pkt_out++;
1501 vif->bytes_out += skb->len;
1502
1503 /* We are about to write */
1504 /* XXX: extension headers? */
1505 if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
1506 goto out_free;
1507
1508 ipv6h = ipv6_hdr(skb);
1509 ipv6h->hop_limit--;
1510
1511 IP6CB(skb)->flags |= IP6SKB_FORWARDED;
1512
1513 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dev,
1514 ip6mr_forward2_finish);
1515
1516out_free:
1517 kfree_skb(skb);
1518 return 0;
1519}
1520
1521static int ip6mr_find_vif(struct net_device *dev)
1522{
1523 int ct;
Benjamin Thery4e168802008-12-10 16:15:08 -08001524 for (ct = init_net.ipv6.maxvif - 1; ct >= 0; ct--) {
1525 if (init_net.ipv6.vif6_table[ct].dev == dev)
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001526 break;
1527 }
1528 return ct;
1529}
1530
1531static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
1532{
1533 int psend = -1;
1534 int vif, ct;
1535
1536 vif = cache->mf6c_parent;
1537 cache->mfc_un.res.pkt++;
1538 cache->mfc_un.res.bytes += skb->len;
1539
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001540 /*
1541 * Wrong interface: drop packet and (maybe) send PIM assert.
1542 */
Benjamin Thery4e168802008-12-10 16:15:08 -08001543 if (init_net.ipv6.vif6_table[vif].dev != skb->dev) {
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001544 int true_vifi;
1545
1546 cache->mfc_un.res.wrong_if++;
1547 true_vifi = ip6mr_find_vif(skb->dev);
1548
Benjamin Therya21f3f92008-12-10 16:28:44 -08001549 if (true_vifi >= 0 && init_net.ipv6.mroute_do_assert &&
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001550 /* pimsm uses asserts, when switching from RPT to SPT,
1551 so that we cannot check that packet arrived on an oif.
1552 It is bad, but otherwise we would need to move pretty
1553 large chunk of pimd to kernel. Ough... --ANK
1554 */
Benjamin Therya21f3f92008-12-10 16:28:44 -08001555 (init_net.ipv6.mroute_do_pim ||
1556 cache->mfc_un.res.ttls[true_vifi] < 255) &&
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001557 time_after(jiffies,
1558 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1559 cache->mfc_un.res.last_assert = jiffies;
1560 ip6mr_cache_report(skb, true_vifi, MRT6MSG_WRONGMIF);
1561 }
1562 goto dont_forward;
1563 }
1564
Benjamin Thery4e168802008-12-10 16:15:08 -08001565 init_net.ipv6.vif6_table[vif].pkt_in++;
1566 init_net.ipv6.vif6_table[vif].bytes_in += skb->len;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001567
1568 /*
1569 * Forward the frame
1570 */
1571 for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
1572 if (ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
1573 if (psend != -1) {
1574 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1575 if (skb2)
1576 ip6mr_forward2(skb2, cache, psend);
1577 }
1578 psend = ct;
1579 }
1580 }
1581 if (psend != -1) {
1582 ip6mr_forward2(skb, cache, psend);
1583 return 0;
1584 }
1585
YOSHIFUJI Hideaki14fb64e2008-04-03 09:22:54 +09001586dont_forward:
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001587 kfree_skb(skb);
1588 return 0;
1589}
1590
1591
1592/*
1593 * Multicast packets for forwarding arrive here
1594 */
1595
1596int ip6_mr_input(struct sk_buff *skb)
1597{
1598 struct mfc6_cache *cache;
1599
1600 read_lock(&mrt_lock);
1601 cache = ip6mr_cache_find(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
1602
1603 /*
1604 * No usable cache entry
1605 */
1606 if (cache == NULL) {
1607 int vif;
1608
1609 vif = ip6mr_find_vif(skb->dev);
1610 if (vif >= 0) {
1611 int err = ip6mr_cache_unresolved(vif, skb);
1612 read_unlock(&mrt_lock);
1613
1614 return err;
1615 }
1616 read_unlock(&mrt_lock);
1617 kfree_skb(skb);
1618 return -ENODEV;
1619 }
1620
1621 ip6_mr_forward(skb, cache);
1622
1623 read_unlock(&mrt_lock);
1624
1625 return 0;
1626}
1627
1628
1629static int
1630ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
1631{
1632 int ct;
1633 struct rtnexthop *nhp;
Benjamin Thery4e168802008-12-10 16:15:08 -08001634 struct net_device *dev = init_net.ipv6.vif6_table[c->mf6c_parent].dev;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001635 u8 *b = skb_tail_pointer(skb);
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001636 struct rtattr *mp_head;
1637
1638 if (dev)
1639 RTA_PUT(skb, RTA_IIF, 4, &dev->ifindex);
1640
1641 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1642
1643 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1644 if (c->mfc_un.res.ttls[ct] < 255) {
1645 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1646 goto rtattr_failure;
1647 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1648 nhp->rtnh_flags = 0;
1649 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
Benjamin Thery4e168802008-12-10 16:15:08 -08001650 nhp->rtnh_ifindex = init_net.ipv6.vif6_table[ct].dev->ifindex;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001651 nhp->rtnh_len = sizeof(*nhp);
1652 }
1653 }
1654 mp_head->rta_type = RTA_MULTIPATH;
YOSHIFUJI Hideaki549e0282008-04-05 22:17:39 +09001655 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
YOSHIFUJI Hideaki7bc570c2008-04-03 09:22:53 +09001656 rtm->rtm_type = RTN_MULTICAST;
1657 return 1;
1658
1659rtattr_failure:
1660 nlmsg_trim(skb, b);
1661 return -EMSGSIZE;
1662}
1663
1664int ip6mr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1665{
1666 int err;
1667 struct mfc6_cache *cache;
1668 struct rt6_info *rt = (struct rt6_info *)skb->dst;
1669
1670 read_lock(&mrt_lock);
1671 cache = ip6mr_cache_find(&rt->rt6i_src.addr, &rt->rt6i_dst.addr);
1672
1673 if (!cache) {
1674 struct sk_buff *skb2;
1675 struct ipv6hdr *iph;
1676 struct net_device *dev;
1677 int vif;
1678
1679 if (nowait) {
1680 read_unlock(&mrt_lock);
1681 return -EAGAIN;
1682 }
1683
1684 dev = skb->dev;
1685 if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
1686 read_unlock(&mrt_lock);
1687 return -ENODEV;
1688 }
1689
1690 /* really correct? */
1691 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
1692 if (!skb2) {
1693 read_unlock(&mrt_lock);
1694 return -ENOMEM;
1695 }
1696
1697 skb_reset_transport_header(skb2);
1698
1699 skb_put(skb2, sizeof(struct ipv6hdr));
1700 skb_reset_network_header(skb2);
1701
1702 iph = ipv6_hdr(skb2);
1703 iph->version = 0;
1704 iph->priority = 0;
1705 iph->flow_lbl[0] = 0;
1706 iph->flow_lbl[1] = 0;
1707 iph->flow_lbl[2] = 0;
1708 iph->payload_len = 0;
1709 iph->nexthdr = IPPROTO_NONE;
1710 iph->hop_limit = 0;
1711 ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
1712 ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
1713
1714 err = ip6mr_cache_unresolved(vif, skb2);
1715 read_unlock(&mrt_lock);
1716
1717 return err;
1718 }
1719
1720 if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
1721 cache->mfc_flags |= MFC_NOTIFY;
1722
1723 err = ip6mr_fill_mroute(skb, cache, rtm);
1724 read_unlock(&mrt_lock);
1725 return err;
1726}
1727