blob: cbfc37f5f05a9305bb64791702e0343ab219da5b [file] [log] [blame]
Pravin B Shelarc5441932013-03-25 14:49:35 +00001/*
2 * Copyright (c) 2013 Nicira, Inc.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of version 2 of the GNU General Public
6 * License as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License
14 * along with this program; if not, write to the Free Software
15 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16 * 02110-1301, USA
17 */
18
19#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21#include <linux/capability.h>
22#include <linux/module.h>
23#include <linux/types.h>
24#include <linux/kernel.h>
25#include <linux/slab.h>
26#include <linux/uaccess.h>
27#include <linux/skbuff.h>
28#include <linux/netdevice.h>
29#include <linux/in.h>
30#include <linux/tcp.h>
31#include <linux/udp.h>
32#include <linux/if_arp.h>
33#include <linux/mroute.h>
34#include <linux/init.h>
35#include <linux/in6.h>
36#include <linux/inetdevice.h>
37#include <linux/igmp.h>
38#include <linux/netfilter_ipv4.h>
39#include <linux/etherdevice.h>
40#include <linux/if_ether.h>
41#include <linux/if_vlan.h>
42#include <linux/rculist.h>
43
44#include <net/sock.h>
45#include <net/ip.h>
46#include <net/icmp.h>
47#include <net/protocol.h>
48#include <net/ip_tunnels.h>
49#include <net/arp.h>
50#include <net/checksum.h>
51#include <net/dsfield.h>
52#include <net/inet_ecn.h>
53#include <net/xfrm.h>
54#include <net/net_namespace.h>
55#include <net/netns/generic.h>
56#include <net/rtnetlink.h>
57
58#if IS_ENABLED(CONFIG_IPV6)
59#include <net/ipv6.h>
60#include <net/ip6_fib.h>
61#include <net/ip6_route.h>
62#endif
63
64static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65 __be32 key, __be32 remote)
66{
67 return hash_32((__force u32)key ^ (__force u32)remote,
68 IP_TNL_HASH_BITS);
69}
70
71/* Often modified stats are per cpu, other are shared (netdev->stats) */
72struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73 struct rtnl_link_stats64 *tot)
74{
75 int i;
76
77 for_each_possible_cpu(i) {
78 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80 unsigned int start;
81
82 do {
83 start = u64_stats_fetch_begin_bh(&tstats->syncp);
84 rx_packets = tstats->rx_packets;
85 tx_packets = tstats->tx_packets;
86 rx_bytes = tstats->rx_bytes;
87 tx_bytes = tstats->tx_bytes;
88 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90 tot->rx_packets += rx_packets;
91 tot->tx_packets += tx_packets;
92 tot->rx_bytes += rx_bytes;
93 tot->tx_bytes += tx_bytes;
94 }
95
96 tot->multicast = dev->stats.multicast;
97
98 tot->rx_crc_errors = dev->stats.rx_crc_errors;
99 tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100 tot->rx_length_errors = dev->stats.rx_length_errors;
101 tot->rx_frame_errors = dev->stats.rx_frame_errors;
102 tot->rx_errors = dev->stats.rx_errors;
103
104 tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105 tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106 tot->tx_dropped = dev->stats.tx_dropped;
107 tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108 tot->tx_errors = dev->stats.tx_errors;
109
110 tot->collisions = dev->stats.collisions;
111
112 return tot;
113}
114EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117 __be16 flags, __be32 key)
118{
119 if (p->i_flags & TUNNEL_KEY) {
120 if (flags & TUNNEL_KEY)
121 return key == p->i_key;
122 else
123 /* key expected, none present */
124 return false;
125 } else
126 return !(flags & TUNNEL_KEY);
127}
128
129/* Fallback tunnel: no source, no destination, no key, no options
130
131 Tunnel hash table:
132 We require exact key match i.e. if a key is present in packet
133 it will match only tunnel with the same key; if it is not present,
134 it will match only keyless tunnel.
135
136 All keysless packets, if not matched configured keyless tunnels
137 will match fallback tunnel.
138 Given src, dst and key, find appropriate for input tunnel.
139*/
140struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141 int link, __be16 flags,
142 __be32 remote, __be32 local,
143 __be32 key)
144{
145 unsigned int hash;
146 struct ip_tunnel *t, *cand = NULL;
147 struct hlist_head *head;
148
149 hash = ip_tunnel_hash(itn, key, remote);
150 head = &itn->tunnels[hash];
151
152 hlist_for_each_entry_rcu(t, head, hash_node) {
153 if (local != t->parms.iph.saddr ||
154 remote != t->parms.iph.daddr ||
155 !(t->dev->flags & IFF_UP))
156 continue;
157
158 if (!ip_tunnel_key_match(&t->parms, flags, key))
159 continue;
160
161 if (t->parms.link == link)
162 return t;
163 else
164 cand = t;
165 }
166
167 hlist_for_each_entry_rcu(t, head, hash_node) {
168 if (remote != t->parms.iph.daddr ||
169 !(t->dev->flags & IFF_UP))
170 continue;
171
172 if (!ip_tunnel_key_match(&t->parms, flags, key))
173 continue;
174
175 if (t->parms.link == link)
176 return t;
177 else if (!cand)
178 cand = t;
179 }
180
181 hash = ip_tunnel_hash(itn, key, 0);
182 head = &itn->tunnels[hash];
183
184 hlist_for_each_entry_rcu(t, head, hash_node) {
185 if ((local != t->parms.iph.saddr &&
186 (local != t->parms.iph.daddr ||
187 !ipv4_is_multicast(local))) ||
188 !(t->dev->flags & IFF_UP))
189 continue;
190
191 if (!ip_tunnel_key_match(&t->parms, flags, key))
192 continue;
193
194 if (t->parms.link == link)
195 return t;
196 else if (!cand)
197 cand = t;
198 }
199
200 if (flags & TUNNEL_NO_KEY)
201 goto skip_key_lookup;
202
203 hlist_for_each_entry_rcu(t, head, hash_node) {
204 if (t->parms.i_key != key ||
205 !(t->dev->flags & IFF_UP))
206 continue;
207
208 if (t->parms.link == link)
209 return t;
210 else if (!cand)
211 cand = t;
212 }
213
214skip_key_lookup:
215 if (cand)
216 return cand;
217
218 if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219 return netdev_priv(itn->fb_tunnel_dev);
220
221
222 return NULL;
223}
224EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227 struct ip_tunnel_parm *parms)
228{
229 unsigned int h;
230 __be32 remote;
231
232 if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233 remote = parms->iph.daddr;
234 else
235 remote = 0;
236
237 h = ip_tunnel_hash(itn, parms->i_key, remote);
238 return &itn->tunnels[h];
239}
240
241static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242{
243 struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245 hlist_add_head_rcu(&t->hash_node, head);
246}
247
248static void ip_tunnel_del(struct ip_tunnel *t)
249{
250 hlist_del_init_rcu(&t->hash_node);
251}
252
253static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254 struct ip_tunnel_parm *parms,
255 int type)
256{
257 __be32 remote = parms->iph.daddr;
258 __be32 local = parms->iph.saddr;
259 __be32 key = parms->i_key;
260 int link = parms->link;
261 struct ip_tunnel *t = NULL;
262 struct hlist_head *head = ip_bucket(itn, parms);
263
264 hlist_for_each_entry_rcu(t, head, hash_node) {
265 if (local == t->parms.iph.saddr &&
266 remote == t->parms.iph.daddr &&
267 key == t->parms.i_key &&
268 link == t->parms.link &&
269 type == t->dev->type)
270 break;
271 }
272 return t;
273}
274
275static struct net_device *__ip_tunnel_create(struct net *net,
276 const struct rtnl_link_ops *ops,
277 struct ip_tunnel_parm *parms)
278{
279 int err;
280 struct ip_tunnel *tunnel;
281 struct net_device *dev;
282 char name[IFNAMSIZ];
283
284 if (parms->name[0])
285 strlcpy(name, parms->name, IFNAMSIZ);
286 else {
Pravin B Shelar54a5d382013-03-28 08:21:46 +0000287 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
Pravin B Shelarc5441932013-03-25 14:49:35 +0000288 err = -E2BIG;
289 goto failed;
290 }
291 strlcpy(name, ops->kind, IFNAMSIZ);
292 strncat(name, "%d", 2);
293 }
294
295 ASSERT_RTNL();
296 dev = alloc_netdev(ops->priv_size, name, ops->setup);
297 if (!dev) {
298 err = -ENOMEM;
299 goto failed;
300 }
301 dev_net_set(dev, net);
302
303 dev->rtnl_link_ops = ops;
304
305 tunnel = netdev_priv(dev);
306 tunnel->parms = *parms;
307
308 err = register_netdevice(dev);
309 if (err)
310 goto failed_free;
311
312 return dev;
313
314failed_free:
315 free_netdev(dev);
316failed:
317 return ERR_PTR(err);
318}
319
320static inline struct rtable *ip_route_output_tunnel(struct net *net,
321 struct flowi4 *fl4,
322 int proto,
323 __be32 daddr, __be32 saddr,
324 __be32 key, __u8 tos, int oif)
325{
326 memset(fl4, 0, sizeof(*fl4));
327 fl4->flowi4_oif = oif;
328 fl4->daddr = daddr;
329 fl4->saddr = saddr;
330 fl4->flowi4_tos = tos;
331 fl4->flowi4_proto = proto;
332 fl4->fl4_gre_key = key;
333 return ip_route_output_key(net, fl4);
334}
335
336static int ip_tunnel_bind_dev(struct net_device *dev)
337{
338 struct net_device *tdev = NULL;
339 struct ip_tunnel *tunnel = netdev_priv(dev);
340 const struct iphdr *iph;
341 int hlen = LL_MAX_HEADER;
342 int mtu = ETH_DATA_LEN;
343 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
344
345 iph = &tunnel->parms.iph;
346
347 /* Guess output device to choose reasonable mtu and needed_headroom */
348 if (iph->daddr) {
349 struct flowi4 fl4;
350 struct rtable *rt;
351
352 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353 tunnel->parms.iph.protocol,
354 iph->daddr, iph->saddr,
355 tunnel->parms.o_key,
356 RT_TOS(iph->tos),
357 tunnel->parms.link);
358 if (!IS_ERR(rt)) {
359 tdev = rt->dst.dev;
360 ip_rt_put(rt);
361 }
362 if (dev->type != ARPHRD_ETHER)
363 dev->flags |= IFF_POINTOPOINT;
364 }
365
366 if (!tdev && tunnel->parms.link)
367 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
368
369 if (tdev) {
370 hlen = tdev->hard_header_len + tdev->needed_headroom;
371 mtu = tdev->mtu;
372 }
373 dev->iflink = tunnel->parms.link;
374
375 dev->needed_headroom = t_hlen + hlen;
376 mtu -= (dev->hard_header_len + t_hlen);
377
378 if (mtu < 68)
379 mtu = 68;
380
381 return mtu;
382}
383
384static struct ip_tunnel *ip_tunnel_create(struct net *net,
385 struct ip_tunnel_net *itn,
386 struct ip_tunnel_parm *parms)
387{
388 struct ip_tunnel *nt, *fbt;
389 struct net_device *dev;
390
391 BUG_ON(!itn->fb_tunnel_dev);
392 fbt = netdev_priv(itn->fb_tunnel_dev);
393 dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394 if (IS_ERR(dev))
395 return NULL;
396
397 dev->mtu = ip_tunnel_bind_dev(dev);
398
399 nt = netdev_priv(dev);
400 ip_tunnel_add(itn, nt);
401 return nt;
402}
403
404int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405 const struct tnl_ptk_info *tpi, bool log_ecn_error)
406{
407 struct pcpu_tstats *tstats;
408 const struct iphdr *iph = ip_hdr(skb);
409 int err;
410
411 secpath_reset(skb);
412
413 skb->protocol = tpi->proto;
414
415 skb->mac_header = skb->network_header;
416 __pskb_pull(skb, tunnel->hlen);
417 skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418#ifdef CONFIG_NET_IPGRE_BROADCAST
419 if (ipv4_is_multicast(iph->daddr)) {
420 /* Looped back packet, drop it! */
421 if (rt_is_output_route(skb_rtable(skb)))
422 goto drop;
423 tunnel->dev->stats.multicast++;
424 skb->pkt_type = PACKET_BROADCAST;
425 }
426#endif
427
428 if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429 ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430 tunnel->dev->stats.rx_crc_errors++;
431 tunnel->dev->stats.rx_errors++;
432 goto drop;
433 }
434
435 if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436 if (!(tpi->flags&TUNNEL_SEQ) ||
437 (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438 tunnel->dev->stats.rx_fifo_errors++;
439 tunnel->dev->stats.rx_errors++;
440 goto drop;
441 }
442 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443 }
444
445 /* Warning: All skb pointers will be invalidated! */
446 if (tunnel->dev->type == ARPHRD_ETHER) {
447 if (!pskb_may_pull(skb, ETH_HLEN)) {
448 tunnel->dev->stats.rx_length_errors++;
449 tunnel->dev->stats.rx_errors++;
450 goto drop;
451 }
452
453 iph = ip_hdr(skb);
454 skb->protocol = eth_type_trans(skb, tunnel->dev);
455 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456 }
457
458 skb->pkt_type = PACKET_HOST;
459 __skb_tunnel_rx(skb, tunnel->dev);
460
461 skb_reset_network_header(skb);
462 err = IP_ECN_decapsulate(iph, skb);
463 if (unlikely(err)) {
464 if (log_ecn_error)
465 net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466 &iph->saddr, iph->tos);
467 if (err > 1) {
468 ++tunnel->dev->stats.rx_frame_errors;
469 ++tunnel->dev->stats.rx_errors;
470 goto drop;
471 }
472 }
473
474 tstats = this_cpu_ptr(tunnel->dev->tstats);
475 u64_stats_update_begin(&tstats->syncp);
476 tstats->rx_packets++;
477 tstats->rx_bytes += skb->len;
478 u64_stats_update_end(&tstats->syncp);
479
480 gro_cells_receive(&tunnel->gro_cells, skb);
481 return 0;
482
483drop:
484 kfree_skb(skb);
485 return 0;
486}
487EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488
Pravin B Shelaredb42ca2013-07-02 10:57:33 -0700489static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
490 struct rtable *rt, __be16 df)
491{
492 struct ip_tunnel *tunnel = netdev_priv(dev);
Alexander Duyck6afbcb52013-07-11 13:12:22 -0700493 int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
Pravin B Shelaredb42ca2013-07-02 10:57:33 -0700494 int mtu;
495
496 if (df)
497 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
498 - sizeof(struct iphdr) - tunnel->hlen;
499 else
500 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
501
502 if (skb_dst(skb))
503 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
504
505 if (skb->protocol == htons(ETH_P_IP)) {
506 if (!skb_is_gso(skb) &&
507 (df & htons(IP_DF)) && mtu < pkt_size) {
508 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
509 return -E2BIG;
510 }
511 }
512#if IS_ENABLED(CONFIG_IPV6)
513 else if (skb->protocol == htons(ETH_P_IPV6)) {
514 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
515
516 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
517 mtu >= IPV6_MIN_MTU) {
518 if ((tunnel->parms.iph.daddr &&
519 !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
520 rt6->rt6i_dst.plen == 128) {
521 rt6->rt6i_flags |= RTF_MODIFIED;
522 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
523 }
524 }
525
526 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
527 mtu < pkt_size) {
528 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
529 return -E2BIG;
530 }
531 }
532#endif
533 return 0;
534}
535
Pravin B Shelarc5441932013-03-25 14:49:35 +0000536void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
537 const struct iphdr *tnl_params)
538{
539 struct ip_tunnel *tunnel = netdev_priv(dev);
540 const struct iphdr *inner_iph;
541 struct iphdr *iph;
542 struct flowi4 fl4;
543 u8 tos, ttl;
544 __be16 df;
545 struct rtable *rt; /* Route to the other host */
546 struct net_device *tdev; /* Device to other host */
547 unsigned int max_headroom; /* The extra header space needed */
548 __be32 dst;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000549
550 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
551
Eric Dumazeta6222602013-05-24 05:49:58 +0000552 memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
Pravin B Shelarc5441932013-03-25 14:49:35 +0000553 dst = tnl_params->daddr;
554 if (dst == 0) {
555 /* NBMA tunnel */
556
557 if (skb_dst(skb) == NULL) {
558 dev->stats.tx_fifo_errors++;
559 goto tx_error;
560 }
561
562 if (skb->protocol == htons(ETH_P_IP)) {
563 rt = skb_rtable(skb);
564 dst = rt_nexthop(rt, inner_iph->daddr);
565 }
566#if IS_ENABLED(CONFIG_IPV6)
567 else if (skb->protocol == htons(ETH_P_IPV6)) {
568 const struct in6_addr *addr6;
569 struct neighbour *neigh;
570 bool do_tx_error_icmp;
571 int addr_type;
572
573 neigh = dst_neigh_lookup(skb_dst(skb),
574 &ipv6_hdr(skb)->daddr);
575 if (neigh == NULL)
576 goto tx_error;
577
578 addr6 = (const struct in6_addr *)&neigh->primary_key;
579 addr_type = ipv6_addr_type(addr6);
580
581 if (addr_type == IPV6_ADDR_ANY) {
582 addr6 = &ipv6_hdr(skb)->daddr;
583 addr_type = ipv6_addr_type(addr6);
584 }
585
586 if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
587 do_tx_error_icmp = true;
588 else {
589 do_tx_error_icmp = false;
590 dst = addr6->s6_addr32[3];
591 }
592 neigh_release(neigh);
593 if (do_tx_error_icmp)
594 goto tx_error_icmp;
595 }
596#endif
597 else
598 goto tx_error;
599 }
600
601 tos = tnl_params->tos;
602 if (tos & 0x1) {
603 tos &= ~0x1;
604 if (skb->protocol == htons(ETH_P_IP))
605 tos = inner_iph->tos;
606 else if (skb->protocol == htons(ETH_P_IPV6))
607 tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
608 }
609
610 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
611 tunnel->parms.iph.protocol,
612 dst, tnl_params->saddr,
613 tunnel->parms.o_key,
614 RT_TOS(tos),
615 tunnel->parms.link);
616 if (IS_ERR(rt)) {
617 dev->stats.tx_carrier_errors++;
618 goto tx_error;
619 }
620 tdev = rt->dst.dev;
621
622 if (tdev == dev) {
623 ip_rt_put(rt);
624 dev->stats.collisions++;
625 goto tx_error;
626 }
627
Pravin B Shelarc5441932013-03-25 14:49:35 +0000628
Pravin B Shelaredb42ca2013-07-02 10:57:33 -0700629 if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
630 ip_rt_put(rt);
631 goto tx_error;
Pravin B Shelarc5441932013-03-25 14:49:35 +0000632 }
Pravin B Shelarc5441932013-03-25 14:49:35 +0000633
634 if (tunnel->err_count > 0) {
635 if (time_before(jiffies,
636 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
637 tunnel->err_count--;
638
639 dst_link_failure(skb);
640 } else
641 tunnel->err_count = 0;
642 }
643
644 ttl = tnl_params->ttl;
645 if (ttl == 0) {
646 if (skb->protocol == htons(ETH_P_IP))
647 ttl = inner_iph->ttl;
648#if IS_ENABLED(CONFIG_IPV6)
649 else if (skb->protocol == htons(ETH_P_IPV6))
650 ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
651#endif
652 else
653 ttl = ip4_dst_hoplimit(&rt->dst);
654 }
655
Pravin B Shelaredb42ca2013-07-02 10:57:33 -0700656 df = tnl_params->frag_off;
657 if (skb->protocol == htons(ETH_P_IP))
658 df |= (inner_iph->frag_off&htons(IP_DF));
659
Pravin B Shelarc5441932013-03-25 14:49:35 +0000660 max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
661 + rt->dst.header_len;
662 if (max_headroom > dev->needed_headroom) {
663 dev->needed_headroom = max_headroom;
664 if (skb_cow_head(skb, dev->needed_headroom)) {
665 dev->stats.tx_dropped++;
666 dev_kfree_skb(skb);
667 return;
668 }
669 }
670
671 skb_dst_drop(skb);
672 skb_dst_set(skb, &rt->dst);
Pravin B Shelarc5441932013-03-25 14:49:35 +0000673
674 /* Push down and install the IP header. */
675 skb_push(skb, sizeof(struct iphdr));
676 skb_reset_network_header(skb);
677
678 iph = ip_hdr(skb);
679 inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
680
681 iph->version = 4;
682 iph->ihl = sizeof(struct iphdr) >> 2;
683 iph->frag_off = df;
684 iph->protocol = tnl_params->protocol;
685 iph->tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
686 iph->daddr = fl4.daddr;
687 iph->saddr = fl4.saddr;
688 iph->ttl = ttl;
689 tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
690
691 iptunnel_xmit(skb, dev);
692 return;
693
694#if IS_ENABLED(CONFIG_IPV6)
695tx_error_icmp:
696 dst_link_failure(skb);
697#endif
698tx_error:
699 dev->stats.tx_errors++;
700 dev_kfree_skb(skb);
701}
702EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
703
704static void ip_tunnel_update(struct ip_tunnel_net *itn,
705 struct ip_tunnel *t,
706 struct net_device *dev,
707 struct ip_tunnel_parm *p,
708 bool set_mtu)
709{
710 ip_tunnel_del(t);
711 t->parms.iph.saddr = p->iph.saddr;
712 t->parms.iph.daddr = p->iph.daddr;
713 t->parms.i_key = p->i_key;
714 t->parms.o_key = p->o_key;
715 if (dev->type != ARPHRD_ETHER) {
716 memcpy(dev->dev_addr, &p->iph.saddr, 4);
717 memcpy(dev->broadcast, &p->iph.daddr, 4);
718 }
719 ip_tunnel_add(itn, t);
720
721 t->parms.iph.ttl = p->iph.ttl;
722 t->parms.iph.tos = p->iph.tos;
723 t->parms.iph.frag_off = p->iph.frag_off;
724
725 if (t->parms.link != p->link) {
726 int mtu;
727
728 t->parms.link = p->link;
729 mtu = ip_tunnel_bind_dev(dev);
730 if (set_mtu)
731 dev->mtu = mtu;
732 }
733 netdev_state_change(dev);
734}
735
736int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
737{
738 int err = 0;
739 struct ip_tunnel *t;
740 struct net *net = dev_net(dev);
741 struct ip_tunnel *tunnel = netdev_priv(dev);
742 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
743
744 BUG_ON(!itn->fb_tunnel_dev);
745 switch (cmd) {
746 case SIOCGETTUNNEL:
747 t = NULL;
748 if (dev == itn->fb_tunnel_dev)
749 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
750 if (t == NULL)
751 t = netdev_priv(dev);
752 memcpy(p, &t->parms, sizeof(*p));
753 break;
754
755 case SIOCADDTUNNEL:
756 case SIOCCHGTUNNEL:
757 err = -EPERM;
758 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
759 goto done;
760 if (p->iph.ttl)
761 p->iph.frag_off |= htons(IP_DF);
762 if (!(p->i_flags&TUNNEL_KEY))
763 p->i_key = 0;
764 if (!(p->o_flags&TUNNEL_KEY))
765 p->o_key = 0;
766
767 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
768
769 if (!t && (cmd == SIOCADDTUNNEL))
770 t = ip_tunnel_create(net, itn, p);
771
772 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
773 if (t != NULL) {
774 if (t->dev != dev) {
775 err = -EEXIST;
776 break;
777 }
778 } else {
779 unsigned int nflags = 0;
780
781 if (ipv4_is_multicast(p->iph.daddr))
782 nflags = IFF_BROADCAST;
783 else if (p->iph.daddr)
784 nflags = IFF_POINTOPOINT;
785
786 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
787 err = -EINVAL;
788 break;
789 }
790
791 t = netdev_priv(dev);
792 }
793 }
794
795 if (t) {
796 err = 0;
797 ip_tunnel_update(itn, t, dev, p, true);
798 } else
799 err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
800 break;
801
802 case SIOCDELTUNNEL:
803 err = -EPERM;
804 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
805 goto done;
806
807 if (dev == itn->fb_tunnel_dev) {
808 err = -ENOENT;
809 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
810 if (t == NULL)
811 goto done;
812 err = -EPERM;
813 if (t == netdev_priv(itn->fb_tunnel_dev))
814 goto done;
815 dev = t->dev;
816 }
817 unregister_netdevice(dev);
818 err = 0;
819 break;
820
821 default:
822 err = -EINVAL;
823 }
824
825done:
826 return err;
827}
828EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
829
830int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
831{
832 struct ip_tunnel *tunnel = netdev_priv(dev);
833 int t_hlen = tunnel->hlen + sizeof(struct iphdr);
834
835 if (new_mtu < 68 ||
836 new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
837 return -EINVAL;
838 dev->mtu = new_mtu;
839 return 0;
840}
841EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
842
843static void ip_tunnel_dev_free(struct net_device *dev)
844{
845 struct ip_tunnel *tunnel = netdev_priv(dev);
846
847 gro_cells_destroy(&tunnel->gro_cells);
848 free_percpu(dev->tstats);
849 free_netdev(dev);
850}
851
852void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
853{
854 struct net *net = dev_net(dev);
855 struct ip_tunnel *tunnel = netdev_priv(dev);
856 struct ip_tunnel_net *itn;
857
858 itn = net_generic(net, tunnel->ip_tnl_net_id);
859
860 if (itn->fb_tunnel_dev != dev) {
861 ip_tunnel_del(netdev_priv(dev));
862 unregister_netdevice_queue(dev, head);
863 }
864}
865EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
866
Eric Dumazetd3b6f612013-06-07 13:26:05 -0700867int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
Pravin B Shelarc5441932013-03-25 14:49:35 +0000868 struct rtnl_link_ops *ops, char *devname)
869{
870 struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
871 struct ip_tunnel_parm parms;
872
873 itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
874 if (!itn->tunnels)
875 return -ENOMEM;
876
877 if (!ops) {
878 itn->fb_tunnel_dev = NULL;
879 return 0;
880 }
881 memset(&parms, 0, sizeof(parms));
882 if (devname)
883 strlcpy(parms.name, devname, IFNAMSIZ);
884
885 rtnl_lock();
886 itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
887 rtnl_unlock();
888 if (IS_ERR(itn->fb_tunnel_dev)) {
889 kfree(itn->tunnels);
890 return PTR_ERR(itn->fb_tunnel_dev);
891 }
892
893 return 0;
894}
895EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
896
897static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
898{
899 int h;
900
901 for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
902 struct ip_tunnel *t;
903 struct hlist_node *n;
904 struct hlist_head *thead = &itn->tunnels[h];
905
906 hlist_for_each_entry_safe(t, n, thead, hash_node)
907 unregister_netdevice_queue(t->dev, head);
908 }
909 if (itn->fb_tunnel_dev)
910 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
911}
912
Eric Dumazetd3b6f612013-06-07 13:26:05 -0700913void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
Pravin B Shelarc5441932013-03-25 14:49:35 +0000914{
915 LIST_HEAD(list);
916
917 rtnl_lock();
918 ip_tunnel_destroy(itn, &list);
919 unregister_netdevice_many(&list);
920 rtnl_unlock();
921 kfree(itn->tunnels);
922}
923EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
924
925int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
926 struct ip_tunnel_parm *p)
927{
928 struct ip_tunnel *nt;
929 struct net *net = dev_net(dev);
930 struct ip_tunnel_net *itn;
931 int mtu;
932 int err;
933
934 nt = netdev_priv(dev);
935 itn = net_generic(net, nt->ip_tnl_net_id);
936
937 if (ip_tunnel_find(itn, p, dev->type))
938 return -EEXIST;
939
940 nt->parms = *p;
941 err = register_netdevice(dev);
942 if (err)
943 goto out;
944
945 if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
946 eth_hw_addr_random(dev);
947
948 mtu = ip_tunnel_bind_dev(dev);
949 if (!tb[IFLA_MTU])
950 dev->mtu = mtu;
951
952 ip_tunnel_add(itn, nt);
953
954out:
955 return err;
956}
957EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
958
959int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
960 struct ip_tunnel_parm *p)
961{
962 struct ip_tunnel *t, *nt;
963 struct net *net = dev_net(dev);
964 struct ip_tunnel *tunnel = netdev_priv(dev);
965 struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
966
967 if (dev == itn->fb_tunnel_dev)
968 return -EINVAL;
969
970 nt = netdev_priv(dev);
971
972 t = ip_tunnel_find(itn, p, dev->type);
973
974 if (t) {
975 if (t->dev != dev)
976 return -EEXIST;
977 } else {
978 t = nt;
979
980 if (dev->type != ARPHRD_ETHER) {
981 unsigned int nflags = 0;
982
983 if (ipv4_is_multicast(p->iph.daddr))
984 nflags = IFF_BROADCAST;
985 else if (p->iph.daddr)
986 nflags = IFF_POINTOPOINT;
987
988 if ((dev->flags ^ nflags) &
989 (IFF_POINTOPOINT | IFF_BROADCAST))
990 return -EINVAL;
991 }
992 }
993
994 ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
995 return 0;
996}
997EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
998
999int ip_tunnel_init(struct net_device *dev)
1000{
1001 struct ip_tunnel *tunnel = netdev_priv(dev);
1002 struct iphdr *iph = &tunnel->parms.iph;
1003 int err;
1004
1005 dev->destructor = ip_tunnel_dev_free;
1006 dev->tstats = alloc_percpu(struct pcpu_tstats);
1007 if (!dev->tstats)
1008 return -ENOMEM;
1009
1010 err = gro_cells_init(&tunnel->gro_cells, dev);
1011 if (err) {
1012 free_percpu(dev->tstats);
1013 return err;
1014 }
1015
1016 tunnel->dev = dev;
1017 strcpy(tunnel->parms.name, dev->name);
1018 iph->version = 4;
1019 iph->ihl = 5;
1020
1021 return 0;
1022}
1023EXPORT_SYMBOL_GPL(ip_tunnel_init);
1024
1025void ip_tunnel_uninit(struct net_device *dev)
1026{
1027 struct net *net = dev_net(dev);
1028 struct ip_tunnel *tunnel = netdev_priv(dev);
1029 struct ip_tunnel_net *itn;
1030
1031 itn = net_generic(net, tunnel->ip_tnl_net_id);
1032 /* fb_tunnel_dev will be unregisted in net-exit call. */
1033 if (itn->fb_tunnel_dev != dev)
1034 ip_tunnel_del(netdev_priv(dev));
1035}
1036EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1037
1038/* Do least required initialization, rest of init is done in tunnel_init call */
1039void ip_tunnel_setup(struct net_device *dev, int net_id)
1040{
1041 struct ip_tunnel *tunnel = netdev_priv(dev);
1042 tunnel->ip_tnl_net_id = net_id;
1043}
1044EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1045
1046MODULE_LICENSE("GPL");