blob: 403e283028690a173b112f1600ef2ecf1b2f1550 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Herbert Xu352e5122007-11-13 21:34:06 -080092#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020093#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <net/protocol.h>
95#include <net/ip.h>
96#include <net/route.h>
97#include <net/inetpeer.h>
98#include <net/sock.h>
99#include <net/ip_fib.h>
100#include <net/arp.h>
101#include <net/tcp.h>
102#include <net/icmp.h>
103#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700104#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700105#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700106#ifdef CONFIG_SYSCTL
107#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000108#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700109#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700110#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111
David S. Miller68a5e3d2011-03-11 20:07:33 -0500112#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000113 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115#define IP_MAX_MTU 0xFFF0
116
117#define RT_GC_TIMEOUT (300*HZ)
118
Linus Torvalds1da177e2005-04-16 15:20:36 -0700119static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700120static int ip_rt_redirect_number __read_mostly = 9;
121static int ip_rt_redirect_load __read_mostly = HZ / 50;
122static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
123static int ip_rt_error_cost __read_mostly = HZ;
124static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700125static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
126static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
127static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500128
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129/*
130 * Interface to generic destination cache.
131 */
132
133static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800134static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000135static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
137static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700138static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
139 struct sk_buff *skb, u32 mtu);
140static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
141 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700142static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700143
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000144static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
145 int how)
146{
147}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
David S. Miller62fa8a82011-01-26 20:51:05 -0800149static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
150{
David S. Miller31248732012-07-10 07:08:18 -0700151 WARN_ON(1);
152 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800153}
154
David S. Millerf894cbf2012-07-02 21:52:24 -0700155static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
156 struct sk_buff *skb,
157 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700158
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159static struct dst_ops ipv4_dst_ops = {
160 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800161 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800163 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000164 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800165 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700166 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700167 .ifdown = ipv4_dst_ifdown,
168 .negative_advice = ipv4_negative_advice,
169 .link_failure = ipv4_link_failure,
170 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700171 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700172 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700173 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174};
175
176#define ECN_OR_COST(class) TC_PRIO_##class
177
Philippe De Muyter4839c522007-07-09 15:32:57 -0700178const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700179 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000180 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 TC_PRIO_BESTEFFORT,
182 ECN_OR_COST(BESTEFFORT),
183 TC_PRIO_BULK,
184 ECN_OR_COST(BULK),
185 TC_PRIO_BULK,
186 ECN_OR_COST(BULK),
187 TC_PRIO_INTERACTIVE,
188 ECN_OR_COST(INTERACTIVE),
189 TC_PRIO_INTERACTIVE,
190 ECN_OR_COST(INTERACTIVE),
191 TC_PRIO_INTERACTIVE_BULK,
192 ECN_OR_COST(INTERACTIVE_BULK),
193 TC_PRIO_INTERACTIVE_BULK,
194 ECN_OR_COST(INTERACTIVE_BULK)
195};
Amir Vadaid4a96862012-04-04 21:33:28 +0000196EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700197
Eric Dumazet2f970d82006-01-17 02:54:36 -0800198static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000199#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
203{
Eric Dumazet29e75252008-01-31 17:05:09 -0800204 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700205 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800206 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207}
208
209static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
210{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700212 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213}
214
215static void rt_cache_seq_stop(struct seq_file *seq, void *v)
216{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700217}
218
219static int rt_cache_seq_show(struct seq_file *seq, void *v)
220{
221 if (v == SEQ_START_TOKEN)
222 seq_printf(seq, "%-127s\n",
223 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
224 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
225 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900226 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700227}
228
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700229static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700230 .start = rt_cache_seq_start,
231 .next = rt_cache_seq_next,
232 .stop = rt_cache_seq_stop,
233 .show = rt_cache_seq_show,
234};
235
236static int rt_cache_seq_open(struct inode *inode, struct file *file)
237{
David S. Miller89aef892012-07-17 11:00:09 -0700238 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239}
240
Arjan van de Ven9a321442007-02-12 00:55:35 -0800241static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242 .owner = THIS_MODULE,
243 .open = rt_cache_seq_open,
244 .read = seq_read,
245 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700246 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247};
248
249
250static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
251{
252 int cpu;
253
254 if (*pos == 0)
255 return SEQ_START_TOKEN;
256
Rusty Russell0f23174a2008-12-29 12:23:42 +0000257 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700258 if (!cpu_possible(cpu))
259 continue;
260 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800261 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700262 }
263 return NULL;
264}
265
266static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
267{
268 int cpu;
269
Rusty Russell0f23174a2008-12-29 12:23:42 +0000270 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271 if (!cpu_possible(cpu))
272 continue;
273 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800274 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700275 }
276 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900277
Linus Torvalds1da177e2005-04-16 15:20:36 -0700278}
279
280static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
281{
282
283}
284
285static int rt_cpu_seq_show(struct seq_file *seq, void *v)
286{
287 struct rt_cache_stat *st = v;
288
289 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700290 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700291 return 0;
292 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900293
Linus Torvalds1da177e2005-04-16 15:20:36 -0700294 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
295 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000296 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700297 st->in_hit,
298 st->in_slow_tot,
299 st->in_slow_mc,
300 st->in_no_route,
301 st->in_brd,
302 st->in_martian_dst,
303 st->in_martian_src,
304
305 st->out_hit,
306 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900307 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
309 st->gc_total,
310 st->gc_ignored,
311 st->gc_goal_miss,
312 st->gc_dst_overflow,
313 st->in_hlist_search,
314 st->out_hlist_search
315 );
316 return 0;
317}
318
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700319static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700320 .start = rt_cpu_seq_start,
321 .next = rt_cpu_seq_next,
322 .stop = rt_cpu_seq_stop,
323 .show = rt_cpu_seq_show,
324};
325
326
327static int rt_cpu_seq_open(struct inode *inode, struct file *file)
328{
329 return seq_open(file, &rt_cpu_seq_ops);
330}
331
Arjan van de Ven9a321442007-02-12 00:55:35 -0800332static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 .owner = THIS_MODULE,
334 .open = rt_cpu_seq_open,
335 .read = seq_read,
336 .llseek = seq_lseek,
337 .release = seq_release,
338};
339
Patrick McHardyc7066f72011-01-14 13:36:42 +0100340#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800341static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800342{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800343 struct ip_rt_acct *dst, *src;
344 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800345
Alexey Dobriyana661c412009-11-25 15:40:35 -0800346 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
347 if (!dst)
348 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800349
Alexey Dobriyana661c412009-11-25 15:40:35 -0800350 for_each_possible_cpu(i) {
351 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
352 for (j = 0; j < 256; j++) {
353 dst[j].o_bytes += src[j].o_bytes;
354 dst[j].o_packets += src[j].o_packets;
355 dst[j].i_bytes += src[j].i_bytes;
356 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800357 }
358 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800359
360 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
361 kfree(dst);
362 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800363}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800364
365static int rt_acct_proc_open(struct inode *inode, struct file *file)
366{
367 return single_open(file, rt_acct_proc_show, NULL);
368}
369
370static const struct file_operations rt_acct_proc_fops = {
371 .owner = THIS_MODULE,
372 .open = rt_acct_proc_open,
373 .read = seq_read,
374 .llseek = seq_lseek,
375 .release = single_release,
376};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800377#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800378
Denis V. Lunev73b38712008-02-28 20:51:18 -0800379static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800380{
381 struct proc_dir_entry *pde;
382
Gao fengd4beaa62013-02-18 01:34:54 +0000383 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
384 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800385 if (!pde)
386 goto err1;
387
Wang Chen77020722008-02-28 14:14:25 -0800388 pde = proc_create("rt_cache", S_IRUGO,
389 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800390 if (!pde)
391 goto err2;
392
Patrick McHardyc7066f72011-01-14 13:36:42 +0100393#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800394 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800395 if (!pde)
396 goto err3;
397#endif
398 return 0;
399
Patrick McHardyc7066f72011-01-14 13:36:42 +0100400#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800401err3:
402 remove_proc_entry("rt_cache", net->proc_net_stat);
403#endif
404err2:
405 remove_proc_entry("rt_cache", net->proc_net);
406err1:
407 return -ENOMEM;
408}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800409
410static void __net_exit ip_rt_do_proc_exit(struct net *net)
411{
412 remove_proc_entry("rt_cache", net->proc_net_stat);
413 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100414#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800415 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000416#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800417}
418
419static struct pernet_operations ip_rt_proc_ops __net_initdata = {
420 .init = ip_rt_do_proc_init,
421 .exit = ip_rt_do_proc_exit,
422};
423
424static int __init ip_rt_proc_init(void)
425{
426 return register_pernet_subsys(&ip_rt_proc_ops);
427}
428
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800429#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800430static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800431{
432 return 0;
433}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900435
Eric Dumazet4331deb2012-07-25 05:11:23 +0000436static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700437{
Changli Gaod8d1f302010-06-10 23:31:35 -0700438 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700439}
440
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000441void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800442{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000443 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000444}
445
David S. Millerf894cbf2012-07-02 21:52:24 -0700446static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
447 struct sk_buff *skb,
448 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000449{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700450 struct net_device *dev = dst->dev;
451 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500452 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000453 struct neighbour *n;
454
David S. Miller39232972012-01-26 15:22:32 -0500455 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700456 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500457 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700458 else if (skb)
459 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700460
David S. Miller80703d22012-02-15 17:48:35 -0500461 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700462 if (n)
463 return n;
David Miller32092ec2011-07-25 00:01:41 +0000464 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700465}
466
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467/*
468 * Peer allocation may fail only in serious out-of-memory conditions. However
469 * we still can generate some output.
470 * Random ID selection looks a bit dangerous because we have no chances to
471 * select ID being unique in a reasonable period of time.
472 * But broken packet identifier may be better than no packet at all.
473 */
474static void ip_select_fb_ident(struct iphdr *iph)
475{
476 static DEFINE_SPINLOCK(ip_fb_id_lock);
477 static u32 ip_fallback_id;
478 u32 salt;
479
480 spin_lock_bh(&ip_fb_id_lock);
Al Viroe4485152006-09-26 22:15:01 -0700481 salt = secure_ip_id((__force __be32)ip_fallback_id ^ iph->daddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700482 iph->id = htons(salt & 0xFFFF);
483 ip_fallback_id = salt;
484 spin_unlock_bh(&ip_fb_id_lock);
485}
486
487void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more)
488{
David S. Miller1d861aa2012-07-10 03:58:16 -0700489 struct net *net = dev_net(dst->dev);
490 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700491
David S. Miller1d861aa2012-07-10 03:58:16 -0700492 peer = inet_getpeer_v4(net->ipv4.peers, iph->daddr, 1);
493 if (peer) {
494 iph->id = htons(inet_getid(peer, more));
495 inet_putpeer(peer);
496 return;
497 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700498
499 ip_select_fb_ident(iph);
500}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000501EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200503static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700504 const struct iphdr *iph,
505 int oif, u8 tos,
506 u8 prot, u32 mark, int flow_flags)
507{
508 if (sk) {
509 const struct inet_sock *inet = inet_sk(sk);
510
511 oif = sk->sk_bound_dev_if;
512 mark = sk->sk_mark;
513 tos = RT_CONN_FLAGS(sk);
514 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
515 }
516 flowi4_init_output(fl4, oif, mark, tos,
517 RT_SCOPE_UNIVERSE, prot,
518 flow_flags,
519 iph->daddr, iph->saddr, 0, 0);
520}
521
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200522static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
523 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700524{
525 const struct iphdr *iph = ip_hdr(skb);
526 int oif = skb->dev->ifindex;
527 u8 tos = RT_TOS(iph->tos);
528 u8 prot = iph->protocol;
529 u32 mark = skb->mark;
530
531 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
532}
533
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200534static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700535{
536 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200537 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700538 __be32 daddr = inet->inet_daddr;
539
540 rcu_read_lock();
541 inet_opt = rcu_dereference(inet->inet_opt);
542 if (inet_opt && inet_opt->opt.srr)
543 daddr = inet_opt->opt.faddr;
544 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
545 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
546 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
547 inet_sk_flowi_flags(sk),
548 daddr, inet->inet_saddr, 0, 0);
549 rcu_read_unlock();
550}
551
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200552static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
553 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700554{
555 if (skb)
556 build_skb_flow_key(fl4, skb, sk);
557 else
558 build_sk_flow_key(fl4, sk);
559}
560
David S. Millerc5038a82012-07-31 15:02:02 -0700561static inline void rt_free(struct rtable *rt)
562{
563 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
564}
565
566static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700567
Julian Anastasovaee06da2012-07-18 10:15:35 +0000568static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700569{
570 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700571 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700572
573 oldest = rcu_dereference(hash->chain);
574 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
575 fnhe = rcu_dereference(fnhe->fnhe_next)) {
576 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
577 oldest = fnhe;
578 }
David S. Millerc5038a82012-07-31 15:02:02 -0700579 orig = rcu_dereference(oldest->fnhe_rth);
580 if (orig) {
581 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
582 rt_free(orig);
583 }
David S. Miller4895c772012-07-17 04:19:00 -0700584 return oldest;
585}
586
David S. Millerd3a25c92012-07-17 13:23:08 -0700587static inline u32 fnhe_hashfun(__be32 daddr)
588{
589 u32 hval;
590
591 hval = (__force u32) daddr;
592 hval ^= (hval >> 11) ^ (hval >> 22);
593
594 return hval & (FNHE_HASH_SIZE - 1);
595}
596
Timo Teräs387aa652013-05-27 20:46:31 +0000597static void fill_route_from_fnhe(struct rtable *rt, struct fib_nh_exception *fnhe)
598{
599 rt->rt_pmtu = fnhe->fnhe_pmtu;
600 rt->dst.expires = fnhe->fnhe_expires;
601
602 if (fnhe->fnhe_gw) {
603 rt->rt_flags |= RTCF_REDIRECTED;
604 rt->rt_gateway = fnhe->fnhe_gw;
605 rt->rt_uses_gateway = 1;
606 }
607}
608
Julian Anastasovaee06da2012-07-18 10:15:35 +0000609static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
610 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700611{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000612 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700613 struct fib_nh_exception *fnhe;
Timo Teräs387aa652013-05-27 20:46:31 +0000614 struct rtable *rt;
615 unsigned int i;
David S. Miller4895c772012-07-17 04:19:00 -0700616 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000617 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700618
David S. Millerc5038a82012-07-31 15:02:02 -0700619 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000620
621 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700622 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000623 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700624 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000625 goto out_unlock;
626 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700627 }
628
David S. Miller4895c772012-07-17 04:19:00 -0700629 hash += hval;
630
631 depth = 0;
632 for (fnhe = rcu_dereference(hash->chain); fnhe;
633 fnhe = rcu_dereference(fnhe->fnhe_next)) {
634 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000635 break;
David S. Miller4895c772012-07-17 04:19:00 -0700636 depth++;
637 }
638
Julian Anastasovaee06da2012-07-18 10:15:35 +0000639 if (fnhe) {
640 if (gw)
641 fnhe->fnhe_gw = gw;
642 if (pmtu) {
643 fnhe->fnhe_pmtu = pmtu;
Timo Teräs387aa652013-05-27 20:46:31 +0000644 fnhe->fnhe_expires = max(1UL, expires);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000645 }
Timo Teräs387aa652013-05-27 20:46:31 +0000646 /* Update all cached dsts too */
647 rt = rcu_dereference(fnhe->fnhe_rth);
648 if (rt)
649 fill_route_from_fnhe(rt, fnhe);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000650 } else {
651 if (depth > FNHE_RECLAIM_DEPTH)
652 fnhe = fnhe_oldest(hash);
653 else {
654 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
655 if (!fnhe)
656 goto out_unlock;
657
658 fnhe->fnhe_next = hash->chain;
659 rcu_assign_pointer(hash->chain, fnhe);
660 }
Timo Teräs5aad1de2013-05-27 20:46:33 +0000661 fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev));
Julian Anastasovaee06da2012-07-18 10:15:35 +0000662 fnhe->fnhe_daddr = daddr;
663 fnhe->fnhe_gw = gw;
664 fnhe->fnhe_pmtu = pmtu;
665 fnhe->fnhe_expires = expires;
Timo Teräs387aa652013-05-27 20:46:31 +0000666
667 /* Exception created; mark the cached routes for the nexthop
668 * stale, so anyone caching it rechecks if this exception
669 * applies to them.
670 */
671 for_each_possible_cpu(i) {
672 struct rtable __rcu **prt;
673 prt = per_cpu_ptr(nh->nh_pcpu_rth_output, i);
674 rt = rcu_dereference(*prt);
675 if (rt)
676 rt->dst.obsolete = DST_OBSOLETE_KILL;
677 }
David S. Miller4895c772012-07-17 04:19:00 -0700678 }
David S. Miller4895c772012-07-17 04:19:00 -0700679
David S. Miller4895c772012-07-17 04:19:00 -0700680 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000681
682out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700683 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000684 return;
David S. Miller4895c772012-07-17 04:19:00 -0700685}
686
David S. Millerceb33202012-07-17 11:31:28 -0700687static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
688 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700689{
David S. Millere47a1852012-07-11 20:55:47 -0700690 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700691 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700692 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700693 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700694 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700695 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800696 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697
David S. Miller94206122012-07-11 20:38:08 -0700698 switch (icmp_hdr(skb)->code & 7) {
699 case ICMP_REDIR_NET:
700 case ICMP_REDIR_NETTOS:
701 case ICMP_REDIR_HOST:
702 case ICMP_REDIR_HOSTTOS:
703 break;
704
705 default:
706 return;
707 }
708
David S. Millere47a1852012-07-11 20:55:47 -0700709 if (rt->rt_gateway != old_gw)
710 return;
711
712 in_dev = __in_dev_get_rcu(dev);
713 if (!in_dev)
714 return;
715
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900716 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800717 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
718 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
719 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700720 goto reject_redirect;
721
722 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
723 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
724 goto reject_redirect;
725 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
726 goto reject_redirect;
727 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800728 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 goto reject_redirect;
730 }
731
David S. Miller4895c772012-07-17 04:19:00 -0700732 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700733 if (n) {
734 if (!(n->nud_state & NUD_VALID)) {
735 neigh_event_send(n, NULL);
736 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700737 if (fib_lookup(net, fl4, &res) == 0) {
738 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700739
Julian Anastasovaee06da2012-07-18 10:15:35 +0000740 update_or_create_fnhe(nh, fl4->daddr, new_gw,
741 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700742 }
David S. Millerceb33202012-07-17 11:31:28 -0700743 if (kill_route)
744 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700745 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
746 }
747 neigh_release(n);
748 }
749 return;
750
751reject_redirect:
752#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700753 if (IN_DEV_LOG_MARTIANS(in_dev)) {
754 const struct iphdr *iph = (const struct iphdr *) skb->data;
755 __be32 daddr = iph->daddr;
756 __be32 saddr = iph->saddr;
757
David S. Millere47a1852012-07-11 20:55:47 -0700758 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
759 " Advised path = %pI4 -> %pI4\n",
760 &old_gw, dev->name, &new_gw,
761 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700762 }
David S. Millere47a1852012-07-11 20:55:47 -0700763#endif
764 ;
765}
766
David S. Miller4895c772012-07-17 04:19:00 -0700767static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
768{
769 struct rtable *rt;
770 struct flowi4 fl4;
771
772 rt = (struct rtable *) dst;
773
774 ip_rt_build_flow_key(&fl4, sk, skb);
David S. Millerceb33202012-07-17 11:31:28 -0700775 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700776}
777
Linus Torvalds1da177e2005-04-16 15:20:36 -0700778static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
779{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800780 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 struct dst_entry *ret = dst;
782
783 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000784 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700785 ip_rt_put(rt);
786 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700787 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
788 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700789 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700790 ret = NULL;
791 }
792 }
793 return ret;
794}
795
796/*
797 * Algorithm:
798 * 1. The first ip_rt_redirect_number redirects are sent
799 * with exponential backoff, then we stop sending them at all,
800 * assuming that the host ignores our redirects.
801 * 2. If we did not see packets requiring redirects
802 * during ip_rt_redirect_silence, we assume that the host
803 * forgot redirected route and start to send redirects again.
804 *
805 * This algorithm is much cheaper and more intelligent than dumb load limiting
806 * in icmp.c.
807 *
808 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
809 * and "frag. need" (breaks PMTU discovery) in icmp.c.
810 */
811
812void ip_rt_send_redirect(struct sk_buff *skb)
813{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000814 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700815 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800816 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700817 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700818 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819
Eric Dumazet30038fc2009-08-28 23:52:01 -0700820 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700821 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700822 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
823 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700824 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700825 }
826 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
827 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700828
David S. Miller1d861aa2012-07-10 03:58:16 -0700829 net = dev_net(rt->dst.dev);
830 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800831 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000832 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
833 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800834 return;
835 }
836
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 /* No redirected packets during ip_rt_redirect_silence;
838 * reset the algorithm.
839 */
David S. Miller92d86822011-02-04 15:55:25 -0800840 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
841 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700842
843 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700844 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845 */
David S. Miller92d86822011-02-04 15:55:25 -0800846 if (peer->rate_tokens >= ip_rt_redirect_number) {
847 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700848 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700849 }
850
851 /* Check for load limit; set rate_last to the latest sent
852 * redirect.
853 */
David S. Miller92d86822011-02-04 15:55:25 -0800854 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800855 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800856 (peer->rate_last +
857 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000858 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
859
860 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800861 peer->rate_last = jiffies;
862 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700863#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700864 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000865 peer->rate_tokens == ip_rt_redirect_number)
866 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700867 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000868 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869#endif
870 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700871out_put_peer:
872 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700873}
874
875static int ip_error(struct sk_buff *skb)
876{
David S. Miller251da412012-06-26 16:27:09 -0700877 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000878 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800879 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700881 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800882 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700883 int code;
884
David S. Miller251da412012-06-26 16:27:09 -0700885 net = dev_net(rt->dst.dev);
886 if (!IN_DEV_FORWARD(in_dev)) {
887 switch (rt->dst.error) {
888 case EHOSTUNREACH:
889 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
890 break;
891
892 case ENETUNREACH:
893 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
894 break;
895 }
896 goto out;
897 }
898
Changli Gaod8d1f302010-06-10 23:31:35 -0700899 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000900 case EINVAL:
901 default:
902 goto out;
903 case EHOSTUNREACH:
904 code = ICMP_HOST_UNREACH;
905 break;
906 case ENETUNREACH:
907 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700908 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000909 break;
910 case EACCES:
911 code = ICMP_PKT_FILTERED;
912 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700913 }
914
David S. Miller1d861aa2012-07-10 03:58:16 -0700915 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800916
917 send = true;
918 if (peer) {
919 now = jiffies;
920 peer->rate_tokens += now - peer->rate_last;
921 if (peer->rate_tokens > ip_rt_error_burst)
922 peer->rate_tokens = ip_rt_error_burst;
923 peer->rate_last = now;
924 if (peer->rate_tokens >= ip_rt_error_cost)
925 peer->rate_tokens -= ip_rt_error_cost;
926 else
927 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700928 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700929 }
David S. Miller92d86822011-02-04 15:55:25 -0800930 if (send)
931 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932
933out: kfree_skb(skb);
934 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900935}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700936
Steffen Klassertd851c122012-10-07 22:47:25 +0000937static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700938{
Steffen Klassertd851c122012-10-07 22:47:25 +0000939 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700940 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800941
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000942 if (dst_metric_locked(dst, RTAX_MTU))
943 return;
944
Steffen Klassert7f92d3342012-10-07 22:48:18 +0000945 if (dst->dev->mtu < mtu)
946 return;
947
David S. Miller59436342012-07-10 06:58:42 -0700948 if (mtu < ip_rt_min_pmtu)
949 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000950
Timo Teräsf0162292013-05-27 20:46:32 +0000951 if (rt->rt_pmtu == mtu &&
952 time_before(jiffies, dst->expires - ip_rt_mtu_expires / 2))
953 return;
954
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000955 rcu_read_lock();
Steffen Klassertd851c122012-10-07 22:47:25 +0000956 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700957 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700958
Julian Anastasovaee06da2012-07-18 10:15:35 +0000959 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
960 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700961 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000962 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700963}
964
David S. Miller4895c772012-07-17 04:19:00 -0700965static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
966 struct sk_buff *skb, u32 mtu)
967{
968 struct rtable *rt = (struct rtable *) dst;
969 struct flowi4 fl4;
970
971 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +0000972 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -0700973}
974
David S. Miller36393392012-06-14 22:21:46 -0700975void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
976 int oif, u32 mark, u8 protocol, int flow_flags)
977{
David S. Miller4895c772012-07-17 04:19:00 -0700978 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700979 struct flowi4 fl4;
980 struct rtable *rt;
981
David S. Miller4895c772012-07-17 04:19:00 -0700982 __build_flow_key(&fl4, NULL, iph, oif,
983 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700984 rt = __ip_route_output_key(net, &fl4);
985 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700986 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700987 ip_rt_put(rt);
988 }
989}
990EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
991
Steffen Klassert9cb3a502013-01-21 01:59:11 +0000992static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -0700993{
David S. Miller4895c772012-07-17 04:19:00 -0700994 const struct iphdr *iph = (const struct iphdr *) skb->data;
995 struct flowi4 fl4;
996 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700997
David S. Miller4895c772012-07-17 04:19:00 -0700998 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
999 rt = __ip_route_output_key(sock_net(sk), &fl4);
1000 if (!IS_ERR(rt)) {
1001 __ip_rt_update_pmtu(rt, &fl4, mtu);
1002 ip_rt_put(rt);
1003 }
David S. Miller36393392012-06-14 22:21:46 -07001004}
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001005
1006void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
1007{
1008 const struct iphdr *iph = (const struct iphdr *) skb->data;
1009 struct flowi4 fl4;
1010 struct rtable *rt;
1011 struct dst_entry *dst;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001012 bool new = false;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001013
1014 bh_lock_sock(sk);
1015 rt = (struct rtable *) __sk_dst_get(sk);
1016
1017 if (sock_owned_by_user(sk) || !rt) {
1018 __ipv4_sk_update_pmtu(skb, sk, mtu);
1019 goto out;
1020 }
1021
1022 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1023
1024 if (!__sk_dst_check(sk, 0)) {
1025 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1026 if (IS_ERR(rt))
1027 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001028
1029 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001030 }
1031
1032 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1033
1034 dst = dst_check(&rt->dst, 0);
1035 if (!dst) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001036 if (new)
1037 dst_release(&rt->dst);
1038
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001039 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1040 if (IS_ERR(rt))
1041 goto out;
1042
Steffen Klassertb44108d2013-01-22 00:01:28 +00001043 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001044 }
1045
Steffen Klassertb44108d2013-01-22 00:01:28 +00001046 if (new)
1047 __sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001048
1049out:
1050 bh_unlock_sock(sk);
1051}
David S. Miller36393392012-06-14 22:21:46 -07001052EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001053
David S. Millerb42597e2012-07-11 21:25:45 -07001054void ipv4_redirect(struct sk_buff *skb, struct net *net,
1055 int oif, u32 mark, u8 protocol, int flow_flags)
1056{
David S. Miller4895c772012-07-17 04:19:00 -07001057 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001058 struct flowi4 fl4;
1059 struct rtable *rt;
1060
David S. Miller4895c772012-07-17 04:19:00 -07001061 __build_flow_key(&fl4, NULL, iph, oif,
1062 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001063 rt = __ip_route_output_key(net, &fl4);
1064 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001065 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001066 ip_rt_put(rt);
1067 }
1068}
1069EXPORT_SYMBOL_GPL(ipv4_redirect);
1070
1071void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1072{
David S. Miller4895c772012-07-17 04:19:00 -07001073 const struct iphdr *iph = (const struct iphdr *) skb->data;
1074 struct flowi4 fl4;
1075 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001076
David S. Miller4895c772012-07-17 04:19:00 -07001077 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1078 rt = __ip_route_output_key(sock_net(sk), &fl4);
1079 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001080 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001081 ip_rt_put(rt);
1082 }
David S. Millerb42597e2012-07-11 21:25:45 -07001083}
1084EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1085
David S. Millerefbc3682011-12-01 13:38:59 -05001086static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1087{
1088 struct rtable *rt = (struct rtable *) dst;
1089
David S. Millerceb33202012-07-17 11:31:28 -07001090 /* All IPV4 dsts are created with ->obsolete set to the value
1091 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1092 * into this function always.
1093 *
Timo Teräs387aa652013-05-27 20:46:31 +00001094 * When a PMTU/redirect information update invalidates a route,
1095 * this is indicated by setting obsolete to DST_OBSOLETE_KILL or
1096 * DST_OBSOLETE_DEAD by dst_free().
David S. Millerceb33202012-07-17 11:31:28 -07001097 */
Timo Teräs387aa652013-05-27 20:46:31 +00001098 if (dst->obsolete != DST_OBSOLETE_FORCE_CHK || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001099 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001100 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101}
1102
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103static void ipv4_link_failure(struct sk_buff *skb)
1104{
1105 struct rtable *rt;
1106
1107 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1108
Eric Dumazet511c3f92009-06-02 05:14:27 +00001109 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001110 if (rt)
1111 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112}
1113
1114static int ip_rt_bug(struct sk_buff *skb)
1115{
Joe Perches91df42b2012-05-15 14:11:54 +00001116 pr_debug("%s: %pI4 -> %pI4, %s\n",
1117 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1118 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001120 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 return 0;
1122}
1123
1124/*
1125 We do not cache source address of outgoing interface,
1126 because it is used only by IP RR, TS and SRR options,
1127 so that it out of fast path.
1128
1129 BTW remember: "addr" is allowed to be not aligned
1130 in IP options!
1131 */
1132
David S. Miller8e363602011-05-13 17:29:41 -04001133void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001134{
Al Viroa61ced52006-09-26 21:27:54 -07001135 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001136
David S. Millerc7537962010-11-11 17:07:48 -08001137 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001138 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001139 else {
David S. Miller8e363602011-05-13 17:29:41 -04001140 struct fib_result res;
1141 struct flowi4 fl4;
1142 struct iphdr *iph;
1143
1144 iph = ip_hdr(skb);
1145
1146 memset(&fl4, 0, sizeof(fl4));
1147 fl4.daddr = iph->daddr;
1148 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001149 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001150 fl4.flowi4_oif = rt->dst.dev->ifindex;
1151 fl4.flowi4_iif = skb->dev->ifindex;
1152 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001153
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001154 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001155 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001156 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001157 else
David S. Millerf8126f12012-07-13 05:03:45 -07001158 src = inet_select_addr(rt->dst.dev,
1159 rt_nexthop(rt, iph->daddr),
1160 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001161 rcu_read_unlock();
1162 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001163 memcpy(addr, &src, 4);
1164}
1165
Patrick McHardyc7066f72011-01-14 13:36:42 +01001166#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001167static void set_class_tag(struct rtable *rt, u32 tag)
1168{
Changli Gaod8d1f302010-06-10 23:31:35 -07001169 if (!(rt->dst.tclassid & 0xFFFF))
1170 rt->dst.tclassid |= tag & 0xFFFF;
1171 if (!(rt->dst.tclassid & 0xFFFF0000))
1172 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001173}
1174#endif
1175
David S. Miller0dbaee32010-12-13 12:52:14 -08001176static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1177{
1178 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1179
1180 if (advmss == 0) {
1181 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1182 ip_rt_min_advmss);
1183 if (advmss > 65535 - 40)
1184 advmss = 65535 - 40;
1185 }
1186 return advmss;
1187}
1188
Steffen Klassertebb762f2011-11-23 02:12:51 +00001189static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001190{
Steffen Klassert261663b2011-11-23 02:14:50 +00001191 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001192 unsigned int mtu = rt->rt_pmtu;
1193
Alexander Duyck98d75c32012-08-27 06:30:01 +00001194 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001195 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001196
Steffen Klassert38d523e2013-01-16 20:55:01 +00001197 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001198 return mtu;
1199
1200 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001201
1202 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001203 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001204 mtu = 576;
1205 }
1206
1207 if (mtu > IP_MAX_MTU)
1208 mtu = IP_MAX_MTU;
1209
1210 return mtu;
1211}
1212
David S. Millerf2bb4be2012-07-17 12:20:47 -07001213static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001214{
1215 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1216 struct fib_nh_exception *fnhe;
1217 u32 hval;
1218
David S. Millerf2bb4be2012-07-17 12:20:47 -07001219 if (!hash)
1220 return NULL;
1221
David S. Millerd3a25c92012-07-17 13:23:08 -07001222 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001223
1224 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1225 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001226 if (fnhe->fnhe_daddr == daddr)
1227 return fnhe;
1228 }
1229 return NULL;
1230}
David S. Miller4895c772012-07-17 04:19:00 -07001231
David S. Millercaacf052012-07-31 15:06:50 -07001232static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001233 __be32 daddr)
1234{
David S. Millercaacf052012-07-31 15:06:50 -07001235 bool ret = false;
1236
David S. Millerc5038a82012-07-31 15:02:02 -07001237 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001238
David S. Millerc5038a82012-07-31 15:02:02 -07001239 if (daddr == fnhe->fnhe_daddr) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00001240 int genid = fnhe_genid(dev_net(rt->dst.dev));
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001241 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
Timo Teräs5aad1de2013-05-27 20:46:33 +00001242
1243 if (fnhe->fnhe_genid != genid) {
1244 fnhe->fnhe_genid = genid;
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001245 fnhe->fnhe_gw = 0;
1246 fnhe->fnhe_pmtu = 0;
1247 fnhe->fnhe_expires = 0;
1248 }
Timo Teräs387aa652013-05-27 20:46:31 +00001249 fill_route_from_fnhe(rt, fnhe);
1250 if (!rt->rt_gateway)
Julian Anastasov155e8332012-10-08 11:41:18 +00001251 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001252
David S. Millerc5038a82012-07-31 15:02:02 -07001253 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1254 if (orig)
1255 rt_free(orig);
1256
1257 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001258 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001259 }
1260 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001261
1262 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001263}
1264
David S. Millercaacf052012-07-31 15:06:50 -07001265static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001266{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001267 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001268 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001269
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001270 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001271 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001272 } else {
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001273 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1274 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001275 orig = *p;
1276
1277 prev = cmpxchg(p, orig, rt);
1278 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001279 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001280 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001281 } else
David S. Millercaacf052012-07-31 15:06:50 -07001282 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001283
1284 return ret;
1285}
1286
1287static DEFINE_SPINLOCK(rt_uncached_lock);
1288static LIST_HEAD(rt_uncached_list);
1289
1290static void rt_add_uncached_list(struct rtable *rt)
1291{
1292 spin_lock_bh(&rt_uncached_lock);
1293 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1294 spin_unlock_bh(&rt_uncached_lock);
1295}
1296
1297static void ipv4_dst_destroy(struct dst_entry *dst)
1298{
1299 struct rtable *rt = (struct rtable *) dst;
1300
Eric Dumazet78df76a2012-08-24 05:40:47 +00001301 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001302 spin_lock_bh(&rt_uncached_lock);
1303 list_del(&rt->rt_uncached);
1304 spin_unlock_bh(&rt_uncached_lock);
1305 }
1306}
1307
1308void rt_flush_dev(struct net_device *dev)
1309{
1310 if (!list_empty(&rt_uncached_list)) {
1311 struct net *net = dev_net(dev);
1312 struct rtable *rt;
1313
1314 spin_lock_bh(&rt_uncached_lock);
1315 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1316 if (rt->dst.dev != dev)
1317 continue;
1318 rt->dst.dev = net->loopback_dev;
1319 dev_hold(rt->dst.dev);
1320 dev_put(dev);
1321 }
1322 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001323 }
1324}
1325
Eric Dumazet4331deb2012-07-25 05:11:23 +00001326static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001327{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001328 return rt &&
1329 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1330 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001331}
1332
David S. Millerf2bb4be2012-07-17 12:20:47 -07001333static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001334 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001335 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001336 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337{
David S. Millercaacf052012-07-31 15:06:50 -07001338 bool cached = false;
1339
Linus Torvalds1da177e2005-04-16 15:20:36 -07001340 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001341 struct fib_nh *nh = &FIB_RES_NH(*res);
1342
Julian Anastasov155e8332012-10-08 11:41:18 +00001343 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001344 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001345 rt->rt_uses_gateway = 1;
1346 }
David S. Miller28605832012-07-17 14:55:59 -07001347 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001348#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001349 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001350#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001351 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001352 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001353 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001354 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001355 if (unlikely(!cached)) {
1356 /* Routes we intend to cache in nexthop exception or
1357 * FIB nexthop have the DST_NOCACHE bit clear.
1358 * However, if we are unsuccessful at storing this
1359 * route into the cache we really need to set it.
1360 */
1361 rt->dst.flags |= DST_NOCACHE;
1362 if (!rt->rt_gateway)
1363 rt->rt_gateway = daddr;
1364 rt_add_uncached_list(rt);
1365 }
1366 } else
David S. Millercaacf052012-07-31 15:06:50 -07001367 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368
Patrick McHardyc7066f72011-01-14 13:36:42 +01001369#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001371 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372#endif
1373 set_class_tag(rt, itag);
1374#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375}
1376
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001377static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001378 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001379{
David S. Millerf5b0a872012-07-19 12:31:33 -07001380 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001381 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001382 (nopolicy ? DST_NOPOLICY : 0) |
1383 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001384}
1385
Eric Dumazet96d36222010-06-02 19:21:31 +00001386/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001387static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001388 u8 tos, struct net_device *dev, int our)
1389{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001391 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001393 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394
1395 /* Primary sanity checks. */
1396
1397 if (in_dev == NULL)
1398 return -EINVAL;
1399
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001400 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001401 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001402 goto e_inval;
1403
Thomas Grafd0daebc32012-06-12 00:44:01 +00001404 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1405 if (ipv4_is_loopback(saddr))
1406 goto e_inval;
1407
Joe Perchesf97c1e02007-12-16 13:45:43 -08001408 if (ipv4_is_zeronet(saddr)) {
1409 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001411 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001412 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1413 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001414 if (err < 0)
1415 goto e_err;
1416 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001417 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001418 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001419 if (!rth)
1420 goto e_nobufs;
1421
Patrick McHardyc7066f72011-01-14 13:36:42 +01001422#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001423 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001424#endif
David S. Millercf911662011-04-28 14:31:47 -07001425 rth->dst.output = ip_rt_bug;
1426
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001427 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001429 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001430 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001431 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001432 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001433 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001434 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001435 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001436 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001437 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 rth->rt_flags |= RTCF_LOCAL;
1439 }
1440
1441#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001442 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001443 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444#endif
1445 RT_CACHE_STAT_INC(in_slow_mc);
1446
David S. Miller89aef892012-07-17 11:00:09 -07001447 skb_dst_set(skb, &rth->dst);
1448 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001449
1450e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001453 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001454e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001455 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456}
1457
1458
1459static void ip_handle_martian_source(struct net_device *dev,
1460 struct in_device *in_dev,
1461 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001462 __be32 daddr,
1463 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464{
1465 RT_CACHE_STAT_INC(in_martian_src);
1466#ifdef CONFIG_IP_ROUTE_VERBOSE
1467 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1468 /*
1469 * RFC1812 recommendation, if source is martian,
1470 * the only hint is MAC header.
1471 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001472 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001473 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001474 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001475 print_hex_dump(KERN_WARNING, "ll header: ",
1476 DUMP_PREFIX_OFFSET, 16, 1,
1477 skb_mac_header(skb),
1478 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001479 }
1480 }
1481#endif
1482}
1483
Eric Dumazet47360222010-06-03 04:13:21 +00001484/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001485static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001486 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001487 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001488 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490 struct rtable *rth;
1491 int err;
1492 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001493 unsigned int flags = 0;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001494 bool do_cache;
Al Virod9c9df82006-09-26 21:28:14 -07001495 u32 itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496
1497 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001498 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001500 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 return -EINVAL;
1502 }
1503
Michael Smith5c04c812011-04-07 04:51:50 +00001504 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001505 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001506 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001507 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001509
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 goto cleanup;
1511 }
1512
Julian Anastasove81da0e2012-10-08 11:41:15 +00001513 do_cache = res->fi && !itag;
1514 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001515 (IN_DEV_SHARED_MEDIA(out_dev) ||
Julian Anastasove81da0e2012-10-08 11:41:15 +00001516 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001517 flags |= RTCF_DOREDIRECT;
Julian Anastasove81da0e2012-10-08 11:41:15 +00001518 do_cache = false;
1519 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520
1521 if (skb->protocol != htons(ETH_P_IP)) {
1522 /* Not IP (i.e. ARP). Do not create route, if it is
1523 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001524 *
1525 * Proxy arp feature have been extended to allow, ARP
1526 * replies back to the same interface, to support
1527 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001528 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001529 if (out_dev == in_dev &&
1530 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001531 err = -EINVAL;
1532 goto cleanup;
1533 }
1534 }
1535
Julian Anastasove81da0e2012-10-08 11:41:15 +00001536 if (do_cache) {
1537 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1538 if (rt_cache_valid(rth)) {
1539 skb_dst_set_noref(skb, &rth->dst);
1540 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001541 }
1542 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001543
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001544 rth = rt_dst_alloc(out_dev->dev,
1545 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001546 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001547 if (!rth) {
1548 err = -ENOBUFS;
1549 goto cleanup;
1550 }
1551
David S. Millercf911662011-04-28 14:31:47 -07001552 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1553 rth->rt_flags = flags;
1554 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001555 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001556 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001557 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001558 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001559 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001560 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561
Changli Gaod8d1f302010-06-10 23:31:35 -07001562 rth->dst.input = ip_forward;
1563 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001564
David S. Millerd2d68ba2012-07-17 12:58:50 -07001565 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001566 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001567out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001568 err = 0;
1569 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001571}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572
Stephen Hemminger5969f712008-04-10 01:52:09 -07001573static int ip_mkroute_input(struct sk_buff *skb,
1574 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001575 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001576 struct in_device *in_dev,
1577 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001578{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001580 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001581 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582#endif
1583
1584 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001585 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586}
1587
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588/*
1589 * NOTE. We drop all the packets that has local source
1590 * addresses, because every properly looped back packet
1591 * must have correct destination already attached by output routine.
1592 *
1593 * Such approach solves two big problems:
1594 * 1. Not simplex devices are handled properly.
1595 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001596 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 */
1598
Al Viro9e12bb22006-09-26 21:25:20 -07001599static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001600 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001601{
1602 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001603 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001604 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001605 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001607 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001609 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001610 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611
1612 /* IP on this device is disabled. */
1613
1614 if (!in_dev)
1615 goto out;
1616
1617 /* Check for the most weird martians, which can be not detected
1618 by fib_lookup.
1619 */
1620
Thomas Grafd0daebc32012-06-12 00:44:01 +00001621 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622 goto martian_source;
1623
David S. Millerd2d68ba2012-07-17 12:58:50 -07001624 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001625 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 goto brd_input;
1627
1628 /* Accept zero addresses only to limited broadcast;
1629 * I even do not know to fix it or not. Waiting for complains :-)
1630 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001631 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001632 goto martian_source;
1633
Thomas Grafd0daebc32012-06-12 00:44:01 +00001634 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635 goto martian_destination;
1636
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001637 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1638 * and call it once if daddr or/and saddr are loopback addresses
1639 */
1640 if (ipv4_is_loopback(daddr)) {
1641 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001642 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001643 } else if (ipv4_is_loopback(saddr)) {
1644 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001645 goto martian_source;
1646 }
1647
Linus Torvalds1da177e2005-04-16 15:20:36 -07001648 /*
1649 * Now we are ready to route packet.
1650 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001651 fl4.flowi4_oif = 0;
1652 fl4.flowi4_iif = dev->ifindex;
1653 fl4.flowi4_mark = skb->mark;
1654 fl4.flowi4_tos = tos;
1655 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1656 fl4.daddr = daddr;
1657 fl4.saddr = saddr;
1658 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001659 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001660 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001661
1662 RT_CACHE_STAT_INC(in_slow_tot);
1663
1664 if (res.type == RTN_BROADCAST)
1665 goto brd_input;
1666
1667 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001668 err = fib_validate_source(skb, saddr, daddr, tos,
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001669 LOOPBACK_IFINDEX,
David S. Miller9e56e382012-06-28 18:54:02 -07001670 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001671 if (err < 0)
1672 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 goto local_input;
1674 }
1675
1676 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001677 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 if (res.type != RTN_UNICAST)
1679 goto martian_destination;
1680
David S. Miller68a5e3d2011-03-11 20:07:33 -05001681 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682out: return err;
1683
1684brd_input:
1685 if (skb->protocol != htons(ETH_P_IP))
1686 goto e_inval;
1687
David S. Miller41347dc2012-06-28 04:05:27 -07001688 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001689 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1690 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001692 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 }
1694 flags |= RTCF_BROADCAST;
1695 res.type = RTN_BROADCAST;
1696 RT_CACHE_STAT_INC(in_brd);
1697
1698local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001699 do_cache = false;
1700 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001701 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001702 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001703 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001704 skb_dst_set_noref(skb, &rth->dst);
1705 err = 0;
1706 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001707 }
1708 do_cache = true;
1709 }
1710 }
1711
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001712 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001713 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 if (!rth)
1715 goto e_nobufs;
1716
David S. Millercf911662011-04-28 14:31:47 -07001717 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001718 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001719#ifdef CONFIG_IP_ROUTE_CLASSID
1720 rth->dst.tclassid = itag;
1721#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722
David S. Millercf911662011-04-28 14:31:47 -07001723 rth->rt_genid = rt_genid(net);
1724 rth->rt_flags = flags|RTCF_LOCAL;
1725 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001726 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001727 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001728 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001729 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001730 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001731 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001732 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001733 rth->dst.input= ip_error;
1734 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001735 rth->rt_flags &= ~RTCF_LOCAL;
1736 }
David S. Millerd2d68ba2012-07-17 12:58:50 -07001737 if (do_cache)
1738 rt_cache_route(&FIB_RES_NH(res), rth);
David S. Miller89aef892012-07-17 11:00:09 -07001739 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001740 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001741 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742
1743no_route:
1744 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001746 if (err == -ESRCH)
1747 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 goto local_input;
1749
1750 /*
1751 * Do not cache martian addresses: they should be logged (RFC1812)
1752 */
1753martian_destination:
1754 RT_CACHE_STAT_INC(in_martian_dst);
1755#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001756 if (IN_DEV_LOG_MARTIANS(in_dev))
1757 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1758 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001759#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001760
Linus Torvalds1da177e2005-04-16 15:20:36 -07001761e_inval:
1762 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001763 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764
1765e_nobufs:
1766 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001767 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001768
1769martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001770 err = -EINVAL;
1771martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001773 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001774}
1775
David S. Millerc6cffba2012-07-26 11:14:38 +00001776int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1777 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778{
Eric Dumazet96d36222010-06-02 19:21:31 +00001779 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780
Eric Dumazet96d36222010-06-02 19:21:31 +00001781 rcu_read_lock();
1782
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 /* Multicast recognition logic is moved from route cache to here.
1784 The problem was that too many Ethernet cards have broken/missing
1785 hardware multicast filters :-( As result the host on multicasting
1786 network acquires a lot of useless route cache entries, sort of
1787 SDR messages from all the world. Now we try to get rid of them.
1788 Really, provided software IP multicast filter is organized
1789 reasonably (at least, hashed), it does not result in a slowdown
1790 comparing with route cache reject entries.
1791 Note, that multicast routers are not affected, because
1792 route cache entry is created eventually.
1793 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001794 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001795 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796
Eric Dumazet96d36222010-06-02 19:21:31 +00001797 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001798 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1799 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 if (our
1801#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001802 ||
1803 (!ipv4_is_local_multicast(daddr) &&
1804 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001806 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001807 int res = ip_route_input_mc(skb, daddr, saddr,
1808 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001810 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811 }
1812 }
1813 rcu_read_unlock();
1814 return -EINVAL;
1815 }
David S. Millerc10237e2012-06-27 17:05:06 -07001816 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001817 rcu_read_unlock();
1818 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819}
David S. Millerc6cffba2012-07-26 11:14:38 +00001820EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001822/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001823static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001824 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001825 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001826 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827{
David S. Miller982721f2011-02-16 21:44:24 -08001828 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001829 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001830 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001831 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001832 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001833 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834
Thomas Grafd0daebc32012-06-12 00:44:01 +00001835 in_dev = __in_dev_get_rcu(dev_out);
1836 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001837 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838
Thomas Grafd0daebc32012-06-12 00:44:01 +00001839 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1840 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1841 return ERR_PTR(-EINVAL);
1842
David S. Miller68a5e3d2011-03-11 20:07:33 -05001843 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001844 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001845 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001846 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001847 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001848 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001849
1850 if (dev_out->flags & IFF_LOOPBACK)
1851 flags |= RTCF_LOCAL;
1852
Julian Anastasov63617422012-11-22 23:04:14 +02001853 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08001854 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001856 fi = NULL;
1857 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001858 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001859 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1860 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001861 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02001862 else
1863 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001865 * default one, but do not gateway in this case.
1866 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001867 */
David S. Miller982721f2011-02-16 21:44:24 -08001868 if (fi && res->prefixlen < 4)
1869 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001870 }
1871
David S. Millerf2bb4be2012-07-17 12:20:47 -07001872 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02001873 do_cache &= fi != NULL;
1874 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07001875 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001876 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001877
Julian Anastasovc92b9652012-10-08 11:41:19 +00001878 fnhe = find_exception(nh, fl4->daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001879 if (fnhe)
1880 prth = &fnhe->fnhe_rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001881 else {
1882 if (unlikely(fl4->flowi4_flags &
1883 FLOWI_FLAG_KNOWN_NH &&
1884 !(nh->nh_gw &&
1885 nh->nh_scope == RT_SCOPE_LINK))) {
1886 do_cache = false;
1887 goto add;
1888 }
1889 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1890 }
David S. Millerc5038a82012-07-31 15:02:02 -07001891 rth = rcu_dereference(*prth);
1892 if (rt_cache_valid(rth)) {
1893 dst_hold(&rth->dst);
1894 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001895 }
1896 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00001897
1898add:
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001899 rth = rt_dst_alloc(dev_out,
1900 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001901 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00001902 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001903 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001904 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001905
David S. Millercf911662011-04-28 14:31:47 -07001906 rth->dst.output = ip_output;
1907
David S. Millercf911662011-04-28 14:31:47 -07001908 rth->rt_genid = rt_genid(dev_net(dev_out));
1909 rth->rt_flags = flags;
1910 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001911 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001912 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001913 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001914 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001915 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001916 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917
1918 RT_CACHE_STAT_INC(out_slow_tot);
1919
David S. Miller41347dc2012-06-28 04:05:27 -07001920 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001921 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001923 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001924 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001925 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001926 RT_CACHE_STAT_INC(out_slow_mc);
1927 }
1928#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001929 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001931 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001932 rth->dst.input = ip_mr_input;
1933 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 }
1935 }
1936#endif
1937 }
1938
David S. Millerf2bb4be2012-07-17 12:20:47 -07001939 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001940
David S. Miller5ada5522011-02-17 15:29:00 -08001941 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942}
1943
Linus Torvalds1da177e2005-04-16 15:20:36 -07001944/*
1945 * Major route resolver routine.
1946 */
1947
David S. Miller89aef892012-07-17 11:00:09 -07001948struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001951 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001952 unsigned int flags = 0;
1953 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001954 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001955 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001956
David S. Miller85b91b02012-07-13 08:21:29 -07001957 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001959 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960
David S. Miller813b3b52011-04-28 14:48:42 -07001961 orig_oif = fl4->flowi4_oif;
1962
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001963 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07001964 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1965 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1966 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001967
David S. Miller010c2702011-02-17 15:37:09 -08001968 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001969 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001970 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001971 if (ipv4_is_multicast(fl4->saddr) ||
1972 ipv4_is_lbcast(fl4->saddr) ||
1973 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001974 goto out;
1975
Linus Torvalds1da177e2005-04-16 15:20:36 -07001976 /* I removed check for oif == dev_out->oif here.
1977 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001978 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1979 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001980 2. Moreover, we are allowed to send packets with saddr
1981 of another iface. --ANK
1982 */
1983
David S. Miller813b3b52011-04-28 14:48:42 -07001984 if (fl4->flowi4_oif == 0 &&
1985 (ipv4_is_multicast(fl4->daddr) ||
1986 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001987 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001988 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001989 if (dev_out == NULL)
1990 goto out;
1991
Linus Torvalds1da177e2005-04-16 15:20:36 -07001992 /* Special hack: user can direct multicasts
1993 and limited broadcast via necessary interface
1994 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
1995 This hack is not just for fun, it allows
1996 vic,vat and friends to work.
1997 They bind socket to loopback, set ttl to zero
1998 and expect that it will work.
1999 From the viewpoint of routing cache they are broken,
2000 because we are not allowed to build multicast path
2001 with loopback source addr (look, routing cache
2002 cannot know, that ttl is zero, so that packet
2003 will not leave this host and route is valid).
2004 Luckily, this hack is good workaround.
2005 */
2006
David S. Miller813b3b52011-04-28 14:48:42 -07002007 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 goto make_route;
2009 }
Julian Anastasova210d012008-10-01 07:28:28 -07002010
David S. Miller813b3b52011-04-28 14:48:42 -07002011 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002012 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002013 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002014 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002015 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002016 }
2017
2018
David S. Miller813b3b52011-04-28 14:48:42 -07002019 if (fl4->flowi4_oif) {
2020 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002021 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002022 if (dev_out == NULL)
2023 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002024
2025 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002026 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002027 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002028 goto out;
2029 }
David S. Miller813b3b52011-04-28 14:48:42 -07002030 if (ipv4_is_local_multicast(fl4->daddr) ||
2031 ipv4_is_lbcast(fl4->daddr)) {
2032 if (!fl4->saddr)
2033 fl4->saddr = inet_select_addr(dev_out, 0,
2034 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035 goto make_route;
2036 }
David S. Miller813b3b52011-04-28 14:48:42 -07002037 if (fl4->saddr) {
2038 if (ipv4_is_multicast(fl4->daddr))
2039 fl4->saddr = inet_select_addr(dev_out, 0,
2040 fl4->flowi4_scope);
2041 else if (!fl4->daddr)
2042 fl4->saddr = inet_select_addr(dev_out, 0,
2043 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002044 }
2045 }
2046
David S. Miller813b3b52011-04-28 14:48:42 -07002047 if (!fl4->daddr) {
2048 fl4->daddr = fl4->saddr;
2049 if (!fl4->daddr)
2050 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002051 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002052 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053 res.type = RTN_LOCAL;
2054 flags |= RTCF_LOCAL;
2055 goto make_route;
2056 }
2057
David S. Miller813b3b52011-04-28 14:48:42 -07002058 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002060 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07002061 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 /* Apparently, routing tables are wrong. Assume,
2063 that the destination is on link.
2064
2065 WHY? DW.
2066 Because we are allowed to send to iface
2067 even if it has NO routes and NO assigned
2068 addresses. When oif is specified, routing
2069 tables are looked up with only one purpose:
2070 to catch if destination is gatewayed, rather than
2071 direct. Moreover, if MSG_DONTROUTE is set,
2072 we send packet, ignoring both routing tables
2073 and ifaddr state. --ANK
2074
2075
2076 We could make it even if oif is unknown,
2077 likely IPv6, but we do not.
2078 */
2079
David S. Miller813b3b52011-04-28 14:48:42 -07002080 if (fl4->saddr == 0)
2081 fl4->saddr = inet_select_addr(dev_out, 0,
2082 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 res.type = RTN_UNICAST;
2084 goto make_route;
2085 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002086 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087 goto out;
2088 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002089
2090 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002091 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002092 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002093 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002094 else
David S. Miller813b3b52011-04-28 14:48:42 -07002095 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002096 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002097 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002098 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 flags |= RTCF_LOCAL;
2100 goto make_route;
2101 }
2102
2103#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002104 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002105 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002106 else
2107#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002108 if (!res.prefixlen &&
2109 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002110 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002111 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002112
David S. Miller813b3b52011-04-28 14:48:42 -07002113 if (!fl4->saddr)
2114 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115
Linus Torvalds1da177e2005-04-16 15:20:36 -07002116 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002117 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002118
2119
2120make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002121 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122
David S. Miller010c2702011-02-17 15:37:09 -08002123out:
2124 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002125 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002126}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002127EXPORT_SYMBOL_GPL(__ip_route_output_key);
2128
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002129static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2130{
2131 return NULL;
2132}
2133
Steffen Klassertebb762f2011-11-23 02:12:51 +00002134static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002135{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002136 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2137
2138 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002139}
2140
David S. Miller6700c272012-07-17 03:29:28 -07002141static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2142 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002143{
2144}
2145
David S. Miller6700c272012-07-17 03:29:28 -07002146static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2147 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002148{
2149}
2150
Held Bernhard0972ddb2011-04-24 22:07:32 +00002151static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2152 unsigned long old)
2153{
2154 return NULL;
2155}
2156
David S. Miller14e50e52007-05-24 18:17:54 -07002157static struct dst_ops ipv4_dst_blackhole_ops = {
2158 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002159 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002160 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002161 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002162 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002163 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002164 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002165 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002166 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002167};
2168
David S. Miller2774c132011-03-01 14:59:04 -08002169struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002170{
David S. Miller2774c132011-03-01 14:59:04 -08002171 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002172 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002173
David S. Millerf5b0a872012-07-19 12:31:33 -07002174 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002175 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002176 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002177
David S. Miller14e50e52007-05-24 18:17:54 -07002178 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002179 new->input = dst_discard;
2180 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002181
Changli Gaod8d1f302010-06-10 23:31:35 -07002182 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002183 if (new->dev)
2184 dev_hold(new->dev);
2185
David S. Miller9917e1e82012-07-17 14:44:26 -07002186 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002187 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002188 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002189
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002190 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002191 rt->rt_flags = ort->rt_flags;
2192 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002193 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002194 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002195
David S. Millercaacf052012-07-31 15:06:50 -07002196 INIT_LIST_HEAD(&rt->rt_uncached);
2197
David S. Miller14e50e52007-05-24 18:17:54 -07002198 dst_free(new);
2199 }
2200
David S. Miller2774c132011-03-01 14:59:04 -08002201 dst_release(dst_orig);
2202
2203 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002204}
2205
David S. Miller9d6ec932011-03-12 01:12:47 -05002206struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002207 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208{
David S. Miller9d6ec932011-03-12 01:12:47 -05002209 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210
David S. Millerb23dd4f2011-03-02 14:31:35 -08002211 if (IS_ERR(rt))
2212 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213
David S. Miller56157872011-05-02 14:37:45 -07002214 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002215 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2216 flowi4_to_flowi(flp4),
2217 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218
David S. Millerb23dd4f2011-03-02 14:31:35 -08002219 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002220}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002221EXPORT_SYMBOL_GPL(ip_route_output_flow);
2222
David S. Millerf1ce3062012-07-12 10:10:17 -07002223static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002224 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002225 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002227 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002229 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002230 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002231 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002232 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002233
Eric W. Biederman15e47302012-09-07 20:12:54 +00002234 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002235 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002236 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002237
2238 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002239 r->rtm_family = AF_INET;
2240 r->rtm_dst_len = 32;
2241 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002242 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002243 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002244 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2245 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002246 r->rtm_type = rt->rt_type;
2247 r->rtm_scope = RT_SCOPE_UNIVERSE;
2248 r->rtm_protocol = RTPROT_UNSPEC;
2249 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2250 if (rt->rt_flags & RTCF_NOTIFY)
2251 r->rtm_flags |= RTM_F_NOTIFY;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002252
David S. Millerf1ce3062012-07-12 10:10:17 -07002253 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002254 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002255 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002257 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002258 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002259 }
David S. Millerf3756b72012-04-01 20:39:02 -04002260 if (rt->dst.dev &&
2261 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2262 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002263#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002264 if (rt->dst.tclassid &&
2265 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2266 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002268 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002269 fl4->saddr != src) {
2270 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002271 goto nla_put_failure;
2272 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002273 if (rt->rt_uses_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002274 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2275 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002276
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002277 expires = rt->dst.expires;
2278 if (expires) {
2279 unsigned long now = jiffies;
2280
2281 if (time_before(now, expires))
2282 expires -= now;
2283 else
2284 expires = 0;
2285 }
2286
Julian Anastasov521f5492012-07-20 12:02:08 +03002287 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002288 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002289 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2290 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002291 goto nla_put_failure;
2292
David Millerb4869882012-07-01 02:03:01 +00002293 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002294 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002295 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002296
Changli Gaod8d1f302010-06-10 23:31:35 -07002297 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002298
David S. Millerc7537962010-11-11 17:07:48 -08002299 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002300#ifdef CONFIG_IP_MROUTE
2301 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2302 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2303 int err = ipmr_get_route(net, skb,
2304 fl4->saddr, fl4->daddr,
2305 r, nowait);
2306 if (err <= 0) {
2307 if (!nowait) {
2308 if (err == 0)
2309 return 0;
2310 goto nla_put_failure;
2311 } else {
2312 if (err == -EMSGSIZE)
2313 goto nla_put_failure;
2314 error = err;
2315 }
2316 }
2317 } else
2318#endif
2319 if (nla_put_u32(skb, RTA_IIF, rt->rt_iif))
2320 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002321 }
2322
David S. Millerf1850712012-07-10 07:26:01 -07002323 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002324 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002325
Thomas Grafbe403ea2006-08-17 18:15:17 -07002326 return nlmsg_end(skb, nlh);
2327
2328nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002329 nlmsg_cancel(skb, nlh);
2330 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002331}
2332
Thomas Graf661d2962013-03-21 07:45:29 +00002333static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002335 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002336 struct rtmsg *rtm;
2337 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002339 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002340 __be32 dst = 0;
2341 __be32 src = 0;
2342 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002343 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002344 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 struct sk_buff *skb;
2346
Thomas Grafd889ce32006-08-17 18:15:44 -07002347 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2348 if (err < 0)
2349 goto errout;
2350
2351 rtm = nlmsg_data(nlh);
2352
Linus Torvalds1da177e2005-04-16 15:20:36 -07002353 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002354 if (skb == NULL) {
2355 err = -ENOBUFS;
2356 goto errout;
2357 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358
2359 /* Reserve room for dummy headers, this skb can pass
2360 through good chunk of routing engine.
2361 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002362 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002363 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002364
2365 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002366 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002367 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2368
Al Viro17fb2c62006-09-26 22:15:25 -07002369 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2370 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002371 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002372 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002373
David Millerd6c0a4f2012-07-01 02:02:59 +00002374 memset(&fl4, 0, sizeof(fl4));
2375 fl4.daddr = dst;
2376 fl4.saddr = src;
2377 fl4.flowi4_tos = rtm->rtm_tos;
2378 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2379 fl4.flowi4_mark = mark;
2380
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002382 struct net_device *dev;
2383
Denis V. Lunev19375042008-02-28 20:52:04 -08002384 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002385 if (dev == NULL) {
2386 err = -ENODEV;
2387 goto errout_free;
2388 }
2389
Linus Torvalds1da177e2005-04-16 15:20:36 -07002390 skb->protocol = htons(ETH_P_IP);
2391 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002392 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393 local_bh_disable();
2394 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2395 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002396
Eric Dumazet511c3f92009-06-02 05:14:27 +00002397 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002398 if (err == 0 && rt->dst.error)
2399 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002401 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002402
2403 err = 0;
2404 if (IS_ERR(rt))
2405 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002406 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002407
Linus Torvalds1da177e2005-04-16 15:20:36 -07002408 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002409 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002410
Changli Gaod8d1f302010-06-10 23:31:35 -07002411 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002412 if (rtm->rtm_flags & RTM_F_NOTIFY)
2413 rt->rt_flags |= RTCF_NOTIFY;
2414
David S. Millerf1ce3062012-07-12 10:10:17 -07002415 err = rt_fill_info(net, dst, src, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002416 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002417 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002418 if (err <= 0)
2419 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002420
Eric W. Biederman15e47302012-09-07 20:12:54 +00002421 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002422errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002423 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002424
Thomas Grafd889ce32006-08-17 18:15:44 -07002425errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002426 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002427 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002428}
2429
2430int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2431{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002432 return skb->len;
2433}
2434
2435void ip_rt_multicast_event(struct in_device *in_dev)
2436{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002437 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002438}
2439
2440#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002441static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2442static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2443static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2444static int ip_rt_gc_elasticity __read_mostly = 8;
2445
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002446static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002447 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002448 size_t *lenp, loff_t *ppos)
2449{
Timo Teräs5aad1de2013-05-27 20:46:33 +00002450 struct net *net = (struct net *)__ctl->extra1;
2451
Linus Torvalds1da177e2005-04-16 15:20:36 -07002452 if (write) {
Timo Teräs5aad1de2013-05-27 20:46:33 +00002453 rt_cache_flush(net);
2454 fnhe_genid_bump(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002456 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002457
2458 return -EINVAL;
2459}
2460
Al Viroeeb61f72008-07-27 08:59:33 +01002461static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002462 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463 .procname = "gc_thresh",
2464 .data = &ipv4_dst_ops.gc_thresh,
2465 .maxlen = sizeof(int),
2466 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002467 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002468 },
2469 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002470 .procname = "max_size",
2471 .data = &ip_rt_max_size,
2472 .maxlen = sizeof(int),
2473 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002474 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002475 },
2476 {
2477 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002478
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 .procname = "gc_min_interval",
2480 .data = &ip_rt_gc_min_interval,
2481 .maxlen = sizeof(int),
2482 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002483 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002484 },
2485 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 .procname = "gc_min_interval_ms",
2487 .data = &ip_rt_gc_min_interval,
2488 .maxlen = sizeof(int),
2489 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002490 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002491 },
2492 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002493 .procname = "gc_timeout",
2494 .data = &ip_rt_gc_timeout,
2495 .maxlen = sizeof(int),
2496 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002497 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002498 },
2499 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002500 .procname = "gc_interval",
2501 .data = &ip_rt_gc_interval,
2502 .maxlen = sizeof(int),
2503 .mode = 0644,
2504 .proc_handler = proc_dointvec_jiffies,
2505 },
2506 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002507 .procname = "redirect_load",
2508 .data = &ip_rt_redirect_load,
2509 .maxlen = sizeof(int),
2510 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002511 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002512 },
2513 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514 .procname = "redirect_number",
2515 .data = &ip_rt_redirect_number,
2516 .maxlen = sizeof(int),
2517 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002518 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002519 },
2520 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002521 .procname = "redirect_silence",
2522 .data = &ip_rt_redirect_silence,
2523 .maxlen = sizeof(int),
2524 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002525 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 },
2527 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528 .procname = "error_cost",
2529 .data = &ip_rt_error_cost,
2530 .maxlen = sizeof(int),
2531 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002532 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002533 },
2534 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535 .procname = "error_burst",
2536 .data = &ip_rt_error_burst,
2537 .maxlen = sizeof(int),
2538 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002539 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 },
2541 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002542 .procname = "gc_elasticity",
2543 .data = &ip_rt_gc_elasticity,
2544 .maxlen = sizeof(int),
2545 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002546 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002547 },
2548 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002549 .procname = "mtu_expires",
2550 .data = &ip_rt_mtu_expires,
2551 .maxlen = sizeof(int),
2552 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002553 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002554 },
2555 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002556 .procname = "min_pmtu",
2557 .data = &ip_rt_min_pmtu,
2558 .maxlen = sizeof(int),
2559 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002560 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002561 },
2562 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002563 .procname = "min_adv_mss",
2564 .data = &ip_rt_min_advmss,
2565 .maxlen = sizeof(int),
2566 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002567 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002568 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002569 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002571
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002572static struct ctl_table ipv4_route_flush_table[] = {
2573 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002574 .procname = "flush",
2575 .maxlen = sizeof(int),
2576 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002577 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002578 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002579 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002580};
2581
2582static __net_init int sysctl_route_net_init(struct net *net)
2583{
2584 struct ctl_table *tbl;
2585
2586 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002587 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002588 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2589 if (tbl == NULL)
2590 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002591
2592 /* Don't export sysctls to unprivileged users */
2593 if (net->user_ns != &init_user_ns)
2594 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002595 }
2596 tbl[0].extra1 = net;
2597
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002598 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002599 if (net->ipv4.route_hdr == NULL)
2600 goto err_reg;
2601 return 0;
2602
2603err_reg:
2604 if (tbl != ipv4_route_flush_table)
2605 kfree(tbl);
2606err_dup:
2607 return -ENOMEM;
2608}
2609
2610static __net_exit void sysctl_route_net_exit(struct net *net)
2611{
2612 struct ctl_table *tbl;
2613
2614 tbl = net->ipv4.route_hdr->ctl_table_arg;
2615 unregister_net_sysctl_table(net->ipv4.route_hdr);
2616 BUG_ON(tbl == ipv4_route_flush_table);
2617 kfree(tbl);
2618}
2619
2620static __net_initdata struct pernet_operations sysctl_route_ops = {
2621 .init = sysctl_route_net_init,
2622 .exit = sysctl_route_net_exit,
2623};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624#endif
2625
Neil Horman3ee94372010-05-08 01:57:52 -07002626static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002627{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002628 atomic_set(&net->rt_genid, 0);
Timo Teräs5aad1de2013-05-27 20:46:33 +00002629 atomic_set(&net->fnhe_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002630 get_random_bytes(&net->ipv4.dev_addr_genid,
2631 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002632 return 0;
2633}
2634
Neil Horman3ee94372010-05-08 01:57:52 -07002635static __net_initdata struct pernet_operations rt_genid_ops = {
2636 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002637};
2638
David S. Millerc3426b42012-06-09 16:27:05 -07002639static int __net_init ipv4_inetpeer_init(struct net *net)
2640{
2641 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2642
2643 if (!bp)
2644 return -ENOMEM;
2645 inet_peer_base_init(bp);
2646 net->ipv4.peers = bp;
2647 return 0;
2648}
2649
2650static void __net_exit ipv4_inetpeer_exit(struct net *net)
2651{
2652 struct inet_peer_base *bp = net->ipv4.peers;
2653
2654 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002655 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002656 kfree(bp);
2657}
2658
2659static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2660 .init = ipv4_inetpeer_init,
2661 .exit = ipv4_inetpeer_exit,
2662};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002663
Patrick McHardyc7066f72011-01-14 13:36:42 +01002664#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002665struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002666#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002667
Linus Torvalds1da177e2005-04-16 15:20:36 -07002668int __init ip_rt_init(void)
2669{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002670 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002671
Patrick McHardyc7066f72011-01-14 13:36:42 +01002672#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002673 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674 if (!ip_rt_acct)
2675 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002676#endif
2677
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002678 ipv4_dst_ops.kmem_cachep =
2679 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002680 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002681
David S. Miller14e50e52007-05-24 18:17:54 -07002682 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2683
Eric Dumazetfc66f952010-10-08 06:37:34 +00002684 if (dst_entries_init(&ipv4_dst_ops) < 0)
2685 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2686
2687 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2688 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2689
David S. Miller89aef892012-07-17 11:00:09 -07002690 ipv4_dst_ops.gc_thresh = ~0;
2691 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002692
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693 devinet_init();
2694 ip_fib_init();
2695
Denis V. Lunev73b38712008-02-28 20:51:18 -08002696 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002697 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002698#ifdef CONFIG_XFRM
2699 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002700 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002701#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002702 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002703
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002704#ifdef CONFIG_SYSCTL
2705 register_pernet_subsys(&sysctl_route_ops);
2706#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002707 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002708 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002709 return rc;
2710}
2711
Al Viroa1bc6eb2008-07-30 06:32:52 -04002712#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002713/*
2714 * We really need to sanitize the damn ipv4 init order, then all
2715 * this nonsense will go away.
2716 */
2717void __init ip_static_sysctl_init(void)
2718{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002719 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002720}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002721#endif