blob: e23c5f64286b6c1611b5b7ebd5126b5064e2ce94 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * ROUTE - implementation of the IP router.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 * Linus Torvalds, <Linus.Torvalds@helsinki.fi>
12 * Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 *
14 * Fixes:
15 * Alan Cox : Verify area fixes.
16 * Alan Cox : cli() protects routing changes
17 * Rui Oliveira : ICMP routing table updates
18 * (rco@di.uminho.pt) Routing table insertion and update
19 * Linus Torvalds : Rewrote bits to be sensible
20 * Alan Cox : Added BSD route gw semantics
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090021 * Alan Cox : Super /proc >4K
Linus Torvalds1da177e2005-04-16 15:20:36 -070022 * Alan Cox : MTU in route table
23 * Alan Cox : MSS actually. Also added the window
24 * clamper.
25 * Sam Lantinga : Fixed route matching in rt_del()
26 * Alan Cox : Routing cache support.
27 * Alan Cox : Removed compatibility cruft.
28 * Alan Cox : RTF_REJECT support.
29 * Alan Cox : TCP irtt support.
30 * Jonathan Naylor : Added Metric support.
31 * Miquel van Smoorenburg : BSD API fixes.
32 * Miquel van Smoorenburg : Metrics.
33 * Alan Cox : Use __u32 properly
34 * Alan Cox : Aligned routing errors more closely with BSD
35 * our system is still very different.
36 * Alan Cox : Faster /proc handling
37 * Alexey Kuznetsov : Massive rework to support tree based routing,
38 * routing caches and better behaviour.
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +090039 *
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 * Olaf Erb : irtt wasn't being copied right.
41 * Bjorn Ekwall : Kerneld route support.
42 * Alan Cox : Multicast fixed (I hope)
43 * Pavel Krauz : Limited broadcast fixed
44 * Mike McLagan : Routing by source
45 * Alexey Kuznetsov : End of old history. Split to fib.c and
46 * route.c and rewritten from scratch.
47 * Andi Kleen : Load-limit warning messages.
48 * Vitaly E. Lavrov : Transparent proxy revived after year coma.
49 * Vitaly E. Lavrov : Race condition in ip_route_input_slow.
50 * Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
51 * Vladimir V. Ivanov : IP rule info (flowid) is really useful.
52 * Marc Boucher : routing by fwmark
53 * Robert Olsson : Added rt_cache statistics
54 * Arnaldo C. Melo : Convert proc stuff to seq_file
Eric Dumazetbb1d23b2005-07-05 15:00:32 -070055 * Eric Dumazet : hashed spinlocks and rt_check_expire() fixes.
Ilia Sotnikovcef26852006-03-25 01:38:55 -080056 * Ilia Sotnikov : Ignore TOS on PMTUD and Redirect
57 * Ilia Sotnikov : Removed TOS from hash calculations
Linus Torvalds1da177e2005-04-16 15:20:36 -070058 *
59 * This program is free software; you can redistribute it and/or
60 * modify it under the terms of the GNU General Public License
61 * as published by the Free Software Foundation; either version
62 * 2 of the License, or (at your option) any later version.
63 */
64
Joe Perchesafd465032012-03-12 07:03:32 +000065#define pr_fmt(fmt) "IPv4: " fmt
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067#include <linux/module.h>
68#include <asm/uaccess.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070069#include <linux/bitops.h>
70#include <linux/types.h>
71#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070072#include <linux/mm.h>
73#include <linux/string.h>
74#include <linux/socket.h>
75#include <linux/sockios.h>
76#include <linux/errno.h>
77#include <linux/in.h>
78#include <linux/inet.h>
79#include <linux/netdevice.h>
80#include <linux/proc_fs.h>
81#include <linux/init.h>
82#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070083#include <linux/inetdevice.h>
84#include <linux/igmp.h>
85#include <linux/pkt_sched.h>
86#include <linux/mroute.h>
87#include <linux/netfilter_ipv4.h>
88#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070089#include <linux/rcupdate.h>
90#include <linux/times.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090091#include <linux/slab.h>
Eric Dumazetff1f69a82014-06-02 05:26:03 -070092#include <linux/jhash.h>
Herbert Xu352e5122007-11-13 21:34:06 -080093#include <net/dst.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020094#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070095#include <net/protocol.h>
96#include <net/ip.h>
97#include <net/route.h>
98#include <net/inetpeer.h>
99#include <net/sock.h>
100#include <net/ip_fib.h>
101#include <net/arp.h>
102#include <net/tcp.h>
103#include <net/icmp.h>
104#include <net/xfrm.h>
Tom Tucker8d717402006-07-30 20:43:36 -0700105#include <net/netevent.h>
Thomas Graf63f34442007-03-22 11:55:17 -0700106#include <net/rtnetlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107#ifdef CONFIG_SYSCTL
108#include <linux/sysctl.h>
Shan Wei7426a562012-04-18 18:05:46 +0000109#include <linux/kmemleak.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110#endif
David S. Miller6e5714e2011-08-03 20:50:44 -0700111#include <net/secure_seq.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112
David S. Miller68a5e3d2011-03-11 20:07:33 -0500113#define RT_FL_TOS(oldflp4) \
Julian Anastasovf61759e2011-12-02 11:39:42 +0000114 ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115
116#define IP_MAX_MTU 0xFFF0
117
118#define RT_GC_TIMEOUT (300*HZ)
119
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120static int ip_rt_max_size;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700121static int ip_rt_redirect_number __read_mostly = 9;
122static int ip_rt_redirect_load __read_mostly = HZ / 50;
123static int ip_rt_redirect_silence __read_mostly = ((HZ / 50) << (9 + 1));
124static int ip_rt_error_cost __read_mostly = HZ;
125static int ip_rt_error_burst __read_mostly = 5 * HZ;
Stephen Hemminger817bc4d2008-03-22 17:43:59 -0700126static int ip_rt_mtu_expires __read_mostly = 10 * 60 * HZ;
127static int ip_rt_min_pmtu __read_mostly = 512 + 20 + 20;
128static int ip_rt_min_advmss __read_mostly = 256;
Eric Dumazet9f28a2f2011-12-21 15:47:16 -0500129
Linus Torvalds1da177e2005-04-16 15:20:36 -0700130/*
131 * Interface to generic destination cache.
132 */
133
134static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
David S. Miller0dbaee32010-12-13 12:52:14 -0800135static unsigned int ipv4_default_advmss(const struct dst_entry *dst);
Steffen Klassertebb762f2011-11-23 02:12:51 +0000136static unsigned int ipv4_mtu(const struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700137static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
138static void ipv4_link_failure(struct sk_buff *skb);
David S. Miller6700c272012-07-17 03:29:28 -0700139static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
140 struct sk_buff *skb, u32 mtu);
141static void ip_do_redirect(struct dst_entry *dst, struct sock *sk,
142 struct sk_buff *skb);
David S. Millercaacf052012-07-31 15:06:50 -0700143static void ipv4_dst_destroy(struct dst_entry *dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144
Eric Dumazet72cdd1d2010-11-11 07:14:07 +0000145static void ipv4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
146 int how)
147{
148}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700149
David S. Miller62fa8a82011-01-26 20:51:05 -0800150static u32 *ipv4_cow_metrics(struct dst_entry *dst, unsigned long old)
151{
David S. Miller31248732012-07-10 07:08:18 -0700152 WARN_ON(1);
153 return NULL;
David S. Miller62fa8a82011-01-26 20:51:05 -0800154}
155
David S. Millerf894cbf2012-07-02 21:52:24 -0700156static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
157 struct sk_buff *skb,
158 const void *daddr);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700159
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160static struct dst_ops ipv4_dst_ops = {
161 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -0800162 .protocol = cpu_to_be16(ETH_P_IP),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 .check = ipv4_dst_check,
David S. Miller0dbaee32010-12-13 12:52:14 -0800164 .default_advmss = ipv4_default_advmss,
Steffen Klassertebb762f2011-11-23 02:12:51 +0000165 .mtu = ipv4_mtu,
David S. Miller62fa8a82011-01-26 20:51:05 -0800166 .cow_metrics = ipv4_cow_metrics,
David S. Millercaacf052012-07-31 15:06:50 -0700167 .destroy = ipv4_dst_destroy,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700168 .ifdown = ipv4_dst_ifdown,
169 .negative_advice = ipv4_negative_advice,
170 .link_failure = ipv4_link_failure,
171 .update_pmtu = ip_rt_update_pmtu,
David S. Millere47a1852012-07-11 20:55:47 -0700172 .redirect = ip_do_redirect,
Herbert Xu1ac06e02008-05-20 14:32:14 -0700173 .local_out = __ip_local_out,
David S. Millerd3aaeb32011-07-18 00:40:17 -0700174 .neigh_lookup = ipv4_neigh_lookup,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700175};
176
177#define ECN_OR_COST(class) TC_PRIO_##class
178
Philippe De Muyter4839c522007-07-09 15:32:57 -0700179const __u8 ip_tos2prio[16] = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700180 TC_PRIO_BESTEFFORT,
Dan Siemon4a2b9c32011-03-15 13:56:07 +0000181 ECN_OR_COST(BESTEFFORT),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 TC_PRIO_BESTEFFORT,
183 ECN_OR_COST(BESTEFFORT),
184 TC_PRIO_BULK,
185 ECN_OR_COST(BULK),
186 TC_PRIO_BULK,
187 ECN_OR_COST(BULK),
188 TC_PRIO_INTERACTIVE,
189 ECN_OR_COST(INTERACTIVE),
190 TC_PRIO_INTERACTIVE,
191 ECN_OR_COST(INTERACTIVE),
192 TC_PRIO_INTERACTIVE_BULK,
193 ECN_OR_COST(INTERACTIVE_BULK),
194 TC_PRIO_INTERACTIVE_BULK,
195 ECN_OR_COST(INTERACTIVE_BULK)
196};
Amir Vadaid4a96862012-04-04 21:33:28 +0000197EXPORT_SYMBOL(ip_tos2prio);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198
Eric Dumazet2f970d82006-01-17 02:54:36 -0800199static DEFINE_PER_CPU(struct rt_cache_stat, rt_cache_stat);
Eric Dumazet27f39c73e2010-05-19 22:07:23 +0000200#define RT_CACHE_STAT_INC(field) __this_cpu_inc(rt_cache_stat.field)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201
Linus Torvalds1da177e2005-04-16 15:20:36 -0700202#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -0700203static void *rt_cache_seq_start(struct seq_file *seq, loff_t *pos)
204{
Eric Dumazet29e75252008-01-31 17:05:09 -0800205 if (*pos)
David S. Miller89aef892012-07-17 11:00:09 -0700206 return NULL;
Eric Dumazet29e75252008-01-31 17:05:09 -0800207 return SEQ_START_TOKEN;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208}
209
210static void *rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos)
211{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700212 ++*pos;
David S. Miller89aef892012-07-17 11:00:09 -0700213 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700214}
215
216static void rt_cache_seq_stop(struct seq_file *seq, void *v)
217{
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218}
219
220static int rt_cache_seq_show(struct seq_file *seq, void *v)
221{
222 if (v == SEQ_START_TOKEN)
223 seq_printf(seq, "%-127s\n",
224 "Iface\tDestination\tGateway \tFlags\t\tRefCnt\tUse\t"
225 "Metric\tSource\t\tMTU\tWindow\tIRTT\tTOS\tHHRef\t"
226 "HHUptod\tSpecDst");
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900227 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228}
229
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700230static const struct seq_operations rt_cache_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700231 .start = rt_cache_seq_start,
232 .next = rt_cache_seq_next,
233 .stop = rt_cache_seq_stop,
234 .show = rt_cache_seq_show,
235};
236
237static int rt_cache_seq_open(struct inode *inode, struct file *file)
238{
David S. Miller89aef892012-07-17 11:00:09 -0700239 return seq_open(file, &rt_cache_seq_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240}
241
Arjan van de Ven9a321442007-02-12 00:55:35 -0800242static const struct file_operations rt_cache_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243 .owner = THIS_MODULE,
244 .open = rt_cache_seq_open,
245 .read = seq_read,
246 .llseek = seq_lseek,
David S. Miller89aef892012-07-17 11:00:09 -0700247 .release = seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700248};
249
250
251static void *rt_cpu_seq_start(struct seq_file *seq, loff_t *pos)
252{
253 int cpu;
254
255 if (*pos == 0)
256 return SEQ_START_TOKEN;
257
Rusty Russell0f23174a2008-12-29 12:23:42 +0000258 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 if (!cpu_possible(cpu))
260 continue;
261 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800262 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263 }
264 return NULL;
265}
266
267static void *rt_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
268{
269 int cpu;
270
Rusty Russell0f23174a2008-12-29 12:23:42 +0000271 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272 if (!cpu_possible(cpu))
273 continue;
274 *pos = cpu+1;
Eric Dumazet2f970d82006-01-17 02:54:36 -0800275 return &per_cpu(rt_cache_stat, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700276 }
277 return NULL;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900278
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279}
280
281static void rt_cpu_seq_stop(struct seq_file *seq, void *v)
282{
283
284}
285
286static int rt_cpu_seq_show(struct seq_file *seq, void *v)
287{
288 struct rt_cache_stat *st = v;
289
290 if (v == SEQ_START_TOKEN) {
Olaf Rempel5bec0032005-04-28 12:16:08 -0700291 seq_printf(seq, "entries in_hit in_slow_tot in_slow_mc in_no_route in_brd in_martian_dst in_martian_src out_hit out_slow_tot out_slow_mc gc_total gc_ignored gc_goal_miss gc_dst_overflow in_hlist_search out_hlist_search\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -0700292 return 0;
293 }
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900294
Linus Torvalds1da177e2005-04-16 15:20:36 -0700295 seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x "
296 " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n",
Eric Dumazetfc66f952010-10-08 06:37:34 +0000297 dst_entries_get_slow(&ipv4_dst_ops),
Linus Torvalds1da177e2005-04-16 15:20:36 -0700298 st->in_hit,
299 st->in_slow_tot,
300 st->in_slow_mc,
301 st->in_no_route,
302 st->in_brd,
303 st->in_martian_dst,
304 st->in_martian_src,
305
306 st->out_hit,
307 st->out_slow_tot,
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900308 st->out_slow_mc,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
310 st->gc_total,
311 st->gc_ignored,
312 st->gc_goal_miss,
313 st->gc_dst_overflow,
314 st->in_hlist_search,
315 st->out_hlist_search
316 );
317 return 0;
318}
319
Stephen Hemmingerf6908082007-03-12 14:34:29 -0700320static const struct seq_operations rt_cpu_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700321 .start = rt_cpu_seq_start,
322 .next = rt_cpu_seq_next,
323 .stop = rt_cpu_seq_stop,
324 .show = rt_cpu_seq_show,
325};
326
327
328static int rt_cpu_seq_open(struct inode *inode, struct file *file)
329{
330 return seq_open(file, &rt_cpu_seq_ops);
331}
332
Arjan van de Ven9a321442007-02-12 00:55:35 -0800333static const struct file_operations rt_cpu_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700334 .owner = THIS_MODULE,
335 .open = rt_cpu_seq_open,
336 .read = seq_read,
337 .llseek = seq_lseek,
338 .release = seq_release,
339};
340
Patrick McHardyc7066f72011-01-14 13:36:42 +0100341#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800342static int rt_acct_proc_show(struct seq_file *m, void *v)
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800343{
Alexey Dobriyana661c412009-11-25 15:40:35 -0800344 struct ip_rt_acct *dst, *src;
345 unsigned int i, j;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800346
Alexey Dobriyana661c412009-11-25 15:40:35 -0800347 dst = kcalloc(256, sizeof(struct ip_rt_acct), GFP_KERNEL);
348 if (!dst)
349 return -ENOMEM;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800350
Alexey Dobriyana661c412009-11-25 15:40:35 -0800351 for_each_possible_cpu(i) {
352 src = (struct ip_rt_acct *)per_cpu_ptr(ip_rt_acct, i);
353 for (j = 0; j < 256; j++) {
354 dst[j].o_bytes += src[j].o_bytes;
355 dst[j].o_packets += src[j].o_packets;
356 dst[j].i_bytes += src[j].i_bytes;
357 dst[j].i_packets += src[j].i_packets;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800358 }
359 }
Alexey Dobriyana661c412009-11-25 15:40:35 -0800360
361 seq_write(m, dst, 256 * sizeof(struct ip_rt_acct));
362 kfree(dst);
363 return 0;
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800364}
Alexey Dobriyana661c412009-11-25 15:40:35 -0800365
366static int rt_acct_proc_open(struct inode *inode, struct file *file)
367{
368 return single_open(file, rt_acct_proc_show, NULL);
369}
370
371static const struct file_operations rt_acct_proc_fops = {
372 .owner = THIS_MODULE,
373 .open = rt_acct_proc_open,
374 .read = seq_read,
375 .llseek = seq_lseek,
376 .release = single_release,
377};
Pavel Emelyanov78c686e2007-12-05 21:13:48 -0800378#endif
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800379
Denis V. Lunev73b38712008-02-28 20:51:18 -0800380static int __net_init ip_rt_do_proc_init(struct net *net)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800381{
382 struct proc_dir_entry *pde;
383
Gao fengd4beaa62013-02-18 01:34:54 +0000384 pde = proc_create("rt_cache", S_IRUGO, net->proc_net,
385 &rt_cache_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800386 if (!pde)
387 goto err1;
388
Wang Chen77020722008-02-28 14:14:25 -0800389 pde = proc_create("rt_cache", S_IRUGO,
390 net->proc_net_stat, &rt_cpu_seq_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800391 if (!pde)
392 goto err2;
393
Patrick McHardyc7066f72011-01-14 13:36:42 +0100394#ifdef CONFIG_IP_ROUTE_CLASSID
Alexey Dobriyana661c412009-11-25 15:40:35 -0800395 pde = proc_create("rt_acct", 0, net->proc_net, &rt_acct_proc_fops);
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800396 if (!pde)
397 goto err3;
398#endif
399 return 0;
400
Patrick McHardyc7066f72011-01-14 13:36:42 +0100401#ifdef CONFIG_IP_ROUTE_CLASSID
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800402err3:
403 remove_proc_entry("rt_cache", net->proc_net_stat);
404#endif
405err2:
406 remove_proc_entry("rt_cache", net->proc_net);
407err1:
408 return -ENOMEM;
409}
Denis V. Lunev73b38712008-02-28 20:51:18 -0800410
411static void __net_exit ip_rt_do_proc_exit(struct net *net)
412{
413 remove_proc_entry("rt_cache", net->proc_net_stat);
414 remove_proc_entry("rt_cache", net->proc_net);
Patrick McHardyc7066f72011-01-14 13:36:42 +0100415#ifdef CONFIG_IP_ROUTE_CLASSID
Denis V. Lunev73b38712008-02-28 20:51:18 -0800416 remove_proc_entry("rt_acct", net->proc_net);
Alexey Dobriyan0a931ac2010-01-17 03:32:50 +0000417#endif
Denis V. Lunev73b38712008-02-28 20:51:18 -0800418}
419
420static struct pernet_operations ip_rt_proc_ops __net_initdata = {
421 .init = ip_rt_do_proc_init,
422 .exit = ip_rt_do_proc_exit,
423};
424
425static int __init ip_rt_proc_init(void)
426{
427 return register_pernet_subsys(&ip_rt_proc_ops);
428}
429
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800430#else
Denis V. Lunev73b38712008-02-28 20:51:18 -0800431static inline int ip_rt_proc_init(void)
Pavel Emelyanov107f1632007-12-05 21:14:28 -0800432{
433 return 0;
434}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700435#endif /* CONFIG_PROC_FS */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900436
Eric Dumazet4331deb2012-07-25 05:11:23 +0000437static inline bool rt_is_expired(const struct rtable *rth)
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700438{
Changli Gaod8d1f302010-06-10 23:31:35 -0700439 return rth->rt_genid != rt_genid(dev_net(rth->dst.dev));
Denis V. Luneve84f84f2008-07-05 19:04:32 -0700440}
441
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +0000442void rt_cache_flush(struct net *net)
Eric Dumazet29e75252008-01-31 17:05:09 -0800443{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +0000444 rt_genid_bump(net);
Eric Dumazet98376382010-03-08 03:20:00 +0000445}
446
David S. Millerf894cbf2012-07-02 21:52:24 -0700447static struct neighbour *ipv4_neigh_lookup(const struct dst_entry *dst,
448 struct sk_buff *skb,
449 const void *daddr)
David Miller3769cff2011-07-11 22:44:24 +0000450{
David S. Millerd3aaeb32011-07-18 00:40:17 -0700451 struct net_device *dev = dst->dev;
452 const __be32 *pkey = daddr;
David S. Miller39232972012-01-26 15:22:32 -0500453 const struct rtable *rt;
David Miller3769cff2011-07-11 22:44:24 +0000454 struct neighbour *n;
455
David S. Miller39232972012-01-26 15:22:32 -0500456 rt = (const struct rtable *) dst;
David S. Millera263b302012-07-02 02:02:15 -0700457 if (rt->rt_gateway)
David S. Miller39232972012-01-26 15:22:32 -0500458 pkey = (const __be32 *) &rt->rt_gateway;
David S. Millerf894cbf2012-07-02 21:52:24 -0700459 else if (skb)
460 pkey = &ip_hdr(skb)->daddr;
David S. Millerd3aaeb32011-07-18 00:40:17 -0700461
David S. Miller80703d22012-02-15 17:48:35 -0500462 n = __ipv4_neigh_lookup(dev, *(__force u32 *)pkey);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700463 if (n)
464 return n;
David Miller32092ec2011-07-25 00:01:41 +0000465 return neigh_create(&arp_tbl, pkey, dev);
David S. Millerd3aaeb32011-07-18 00:40:17 -0700466}
467
Eric Dumazet4176df02014-07-26 08:58:10 +0200468#define IP_IDENTS_SZ 2048u
469struct ip_ident_bucket {
470 atomic_t id;
471 u32 stamp32;
472};
473
474static struct ip_ident_bucket *ip_idents __read_mostly;
475
476/* In order to protect privacy, we add a perturbation to identifiers
477 * if one generator is seldom used. This makes hard for an attacker
478 * to infer how many packets were sent between two points in time.
479 */
480u32 ip_idents_reserve(u32 hash, int segs)
481{
482 struct ip_ident_bucket *bucket = ip_idents + hash % IP_IDENTS_SZ;
483 u32 old = ACCESS_ONCE(bucket->stamp32);
484 u32 now = (u32)jiffies;
485 u32 delta = 0;
486
487 if (old != now && cmpxchg(&bucket->stamp32, old, now) == old) {
488 u64 x = prandom_u32();
489
490 x *= (now - old);
491 delta = (u32)(x >> 32);
492 }
493
494 return atomic_add_return(segs + delta, &bucket->id) - segs;
495}
496EXPORT_SYMBOL(ip_idents_reserve);
Eric Dumazetff1f69a82014-06-02 05:26:03 -0700497
498void __ip_select_ident(struct iphdr *iph, int segs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499{
Eric Dumazetff1f69a82014-06-02 05:26:03 -0700500 static u32 ip_idents_hashrnd __read_mostly;
501 static bool hashrnd_initialized = false;
502 u32 hash, id;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503
Eric Dumazetff1f69a82014-06-02 05:26:03 -0700504 if (unlikely(!hashrnd_initialized)) {
505 hashrnd_initialized = true;
506 get_random_bytes(&ip_idents_hashrnd, sizeof(ip_idents_hashrnd));
David S. Miller1d861aa2012-07-10 03:58:16 -0700507 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508
Eric Dumazet4176df02014-07-26 08:58:10 +0200509 hash = jhash_3words((__force u32)iph->daddr,
510 (__force u32)iph->saddr,
511 iph->protocol,
512 ip_idents_hashrnd);
Eric Dumazetff1f69a82014-06-02 05:26:03 -0700513 id = ip_idents_reserve(hash, segs);
514 iph->id = htons(id);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515}
Eric Dumazet4bc2f182010-07-09 21:22:10 +0000516EXPORT_SYMBOL(__ip_select_ident);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700517
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200518static void __build_flow_key(struct flowi4 *fl4, const struct sock *sk,
David S. Miller4895c772012-07-17 04:19:00 -0700519 const struct iphdr *iph,
520 int oif, u8 tos,
521 u8 prot, u32 mark, int flow_flags)
522{
523 if (sk) {
524 const struct inet_sock *inet = inet_sk(sk);
525
526 oif = sk->sk_bound_dev_if;
527 mark = sk->sk_mark;
528 tos = RT_CONN_FLAGS(sk);
529 prot = inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol;
530 }
531 flowi4_init_output(fl4, oif, mark, tos,
532 RT_SCOPE_UNIVERSE, prot,
533 flow_flags,
534 iph->daddr, iph->saddr, 0, 0);
535}
536
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200537static void build_skb_flow_key(struct flowi4 *fl4, const struct sk_buff *skb,
538 const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700539{
540 const struct iphdr *iph = ip_hdr(skb);
541 int oif = skb->dev->ifindex;
542 u8 tos = RT_TOS(iph->tos);
543 u8 prot = iph->protocol;
544 u32 mark = skb->mark;
545
546 __build_flow_key(fl4, sk, iph, oif, tos, prot, mark, 0);
547}
548
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200549static void build_sk_flow_key(struct flowi4 *fl4, const struct sock *sk)
David S. Miller4895c772012-07-17 04:19:00 -0700550{
551 const struct inet_sock *inet = inet_sk(sk);
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200552 const struct ip_options_rcu *inet_opt;
David S. Miller4895c772012-07-17 04:19:00 -0700553 __be32 daddr = inet->inet_daddr;
554
555 rcu_read_lock();
556 inet_opt = rcu_dereference(inet->inet_opt);
557 if (inet_opt && inet_opt->opt.srr)
558 daddr = inet_opt->opt.faddr;
559 flowi4_init_output(fl4, sk->sk_bound_dev_if, sk->sk_mark,
560 RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
561 inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
562 inet_sk_flowi_flags(sk),
563 daddr, inet->inet_saddr, 0, 0);
564 rcu_read_unlock();
565}
566
Eric Dumazet5abf7f72012-07-17 22:42:13 +0200567static void ip_rt_build_flow_key(struct flowi4 *fl4, const struct sock *sk,
568 const struct sk_buff *skb)
David S. Miller4895c772012-07-17 04:19:00 -0700569{
570 if (skb)
571 build_skb_flow_key(fl4, skb, sk);
572 else
573 build_sk_flow_key(fl4, sk);
574}
575
David S. Millerc5038a82012-07-31 15:02:02 -0700576static inline void rt_free(struct rtable *rt)
577{
578 call_rcu(&rt->dst.rcu_head, dst_rcu_free);
579}
580
581static DEFINE_SPINLOCK(fnhe_lock);
David S. Miller4895c772012-07-17 04:19:00 -0700582
Julian Anastasovaee06da2012-07-18 10:15:35 +0000583static struct fib_nh_exception *fnhe_oldest(struct fnhe_hash_bucket *hash)
David S. Miller4895c772012-07-17 04:19:00 -0700584{
585 struct fib_nh_exception *fnhe, *oldest;
David S. Millerc5038a82012-07-31 15:02:02 -0700586 struct rtable *orig;
David S. Miller4895c772012-07-17 04:19:00 -0700587
588 oldest = rcu_dereference(hash->chain);
589 for (fnhe = rcu_dereference(oldest->fnhe_next); fnhe;
590 fnhe = rcu_dereference(fnhe->fnhe_next)) {
591 if (time_before(fnhe->fnhe_stamp, oldest->fnhe_stamp))
592 oldest = fnhe;
593 }
David S. Millerc5038a82012-07-31 15:02:02 -0700594 orig = rcu_dereference(oldest->fnhe_rth);
595 if (orig) {
596 RCU_INIT_POINTER(oldest->fnhe_rth, NULL);
597 rt_free(orig);
598 }
David S. Miller4895c772012-07-17 04:19:00 -0700599 return oldest;
600}
601
David S. Millerd3a25c92012-07-17 13:23:08 -0700602static inline u32 fnhe_hashfun(__be32 daddr)
603{
604 u32 hval;
605
606 hval = (__force u32) daddr;
607 hval ^= (hval >> 11) ^ (hval >> 22);
608
609 return hval & (FNHE_HASH_SIZE - 1);
610}
611
Julian Anastasovaee06da2012-07-18 10:15:35 +0000612static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw,
613 u32 pmtu, unsigned long expires)
David S. Miller4895c772012-07-17 04:19:00 -0700614{
Julian Anastasovaee06da2012-07-18 10:15:35 +0000615 struct fnhe_hash_bucket *hash;
David S. Miller4895c772012-07-17 04:19:00 -0700616 struct fib_nh_exception *fnhe;
617 int depth;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000618 u32 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -0700619
David S. Millerc5038a82012-07-31 15:02:02 -0700620 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000621
622 hash = nh->nh_exceptions;
David S. Miller4895c772012-07-17 04:19:00 -0700623 if (!hash) {
Julian Anastasovaee06da2012-07-18 10:15:35 +0000624 hash = kzalloc(FNHE_HASH_SIZE * sizeof(*hash), GFP_ATOMIC);
David S. Miller4895c772012-07-17 04:19:00 -0700625 if (!hash)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000626 goto out_unlock;
627 nh->nh_exceptions = hash;
David S. Miller4895c772012-07-17 04:19:00 -0700628 }
629
David S. Miller4895c772012-07-17 04:19:00 -0700630 hash += hval;
631
632 depth = 0;
633 for (fnhe = rcu_dereference(hash->chain); fnhe;
634 fnhe = rcu_dereference(fnhe->fnhe_next)) {
635 if (fnhe->fnhe_daddr == daddr)
Julian Anastasovaee06da2012-07-18 10:15:35 +0000636 break;
David S. Miller4895c772012-07-17 04:19:00 -0700637 depth++;
638 }
639
Julian Anastasovaee06da2012-07-18 10:15:35 +0000640 if (fnhe) {
641 if (gw)
642 fnhe->fnhe_gw = gw;
643 if (pmtu) {
644 fnhe->fnhe_pmtu = pmtu;
645 fnhe->fnhe_expires = expires;
646 }
647 } else {
648 if (depth > FNHE_RECLAIM_DEPTH)
649 fnhe = fnhe_oldest(hash);
650 else {
651 fnhe = kzalloc(sizeof(*fnhe), GFP_ATOMIC);
652 if (!fnhe)
653 goto out_unlock;
654
655 fnhe->fnhe_next = hash->chain;
656 rcu_assign_pointer(hash->chain, fnhe);
657 }
658 fnhe->fnhe_daddr = daddr;
659 fnhe->fnhe_gw = gw;
660 fnhe->fnhe_pmtu = pmtu;
661 fnhe->fnhe_expires = expires;
David S. Miller4895c772012-07-17 04:19:00 -0700662 }
David S. Miller4895c772012-07-17 04:19:00 -0700663
David S. Miller4895c772012-07-17 04:19:00 -0700664 fnhe->fnhe_stamp = jiffies;
Julian Anastasovaee06da2012-07-18 10:15:35 +0000665
666out_unlock:
David S. Millerc5038a82012-07-31 15:02:02 -0700667 spin_unlock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +0000668 return;
David S. Miller4895c772012-07-17 04:19:00 -0700669}
670
David S. Millerceb33202012-07-17 11:31:28 -0700671static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flowi4 *fl4,
672 bool kill_route)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673{
David S. Millere47a1852012-07-11 20:55:47 -0700674 __be32 new_gw = icmp_hdr(skb)->un.gateway;
David S. Miller94206122012-07-11 20:38:08 -0700675 __be32 old_gw = ip_hdr(skb)->saddr;
David S. Millere47a1852012-07-11 20:55:47 -0700676 struct net_device *dev = skb->dev;
David S. Millere47a1852012-07-11 20:55:47 -0700677 struct in_device *in_dev;
David S. Miller4895c772012-07-17 04:19:00 -0700678 struct fib_result res;
David S. Millere47a1852012-07-11 20:55:47 -0700679 struct neighbour *n;
Denis V. Lunev317805b2008-02-28 20:50:06 -0800680 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
David S. Miller94206122012-07-11 20:38:08 -0700682 switch (icmp_hdr(skb)->code & 7) {
683 case ICMP_REDIR_NET:
684 case ICMP_REDIR_NETTOS:
685 case ICMP_REDIR_HOST:
686 case ICMP_REDIR_HOSTTOS:
687 break;
688
689 default:
690 return;
691 }
692
David S. Millere47a1852012-07-11 20:55:47 -0700693 if (rt->rt_gateway != old_gw)
694 return;
695
696 in_dev = __in_dev_get_rcu(dev);
697 if (!in_dev)
698 return;
699
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900700 net = dev_net(dev);
Joe Perches9d4fb272009-11-23 10:41:23 -0800701 if (new_gw == old_gw || !IN_DEV_RX_REDIRECTS(in_dev) ||
702 ipv4_is_multicast(new_gw) || ipv4_is_lbcast(new_gw) ||
703 ipv4_is_zeronet(new_gw))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 goto reject_redirect;
705
706 if (!IN_DEV_SHARED_MEDIA(in_dev)) {
707 if (!inet_addr_onlink(in_dev, new_gw, old_gw))
708 goto reject_redirect;
709 if (IN_DEV_SEC_REDIRECTS(in_dev) && ip_fib_check_default(new_gw, dev))
710 goto reject_redirect;
711 } else {
Denis V. Lunev317805b2008-02-28 20:50:06 -0800712 if (inet_addr_type(net, new_gw) != RTN_UNICAST)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700713 goto reject_redirect;
714 }
715
David S. Miller4895c772012-07-17 04:19:00 -0700716 n = ipv4_neigh_lookup(&rt->dst, NULL, &new_gw);
David S. Millere47a1852012-07-11 20:55:47 -0700717 if (n) {
718 if (!(n->nud_state & NUD_VALID)) {
719 neigh_event_send(n, NULL);
720 } else {
David S. Miller4895c772012-07-17 04:19:00 -0700721 if (fib_lookup(net, fl4, &res) == 0) {
722 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700723
Julian Anastasovaee06da2012-07-18 10:15:35 +0000724 update_or_create_fnhe(nh, fl4->daddr, new_gw,
725 0, 0);
David S. Miller4895c772012-07-17 04:19:00 -0700726 }
David S. Millerceb33202012-07-17 11:31:28 -0700727 if (kill_route)
728 rt->dst.obsolete = DST_OBSOLETE_KILL;
David S. Millere47a1852012-07-11 20:55:47 -0700729 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, n);
730 }
731 neigh_release(n);
732 }
733 return;
734
735reject_redirect:
736#ifdef CONFIG_IP_ROUTE_VERBOSE
David S. Miller99ee0382012-07-12 07:40:05 -0700737 if (IN_DEV_LOG_MARTIANS(in_dev)) {
738 const struct iphdr *iph = (const struct iphdr *) skb->data;
739 __be32 daddr = iph->daddr;
740 __be32 saddr = iph->saddr;
741
David S. Millere47a1852012-07-11 20:55:47 -0700742 net_info_ratelimited("Redirect from %pI4 on %s about %pI4 ignored\n"
743 " Advised path = %pI4 -> %pI4\n",
744 &old_gw, dev->name, &new_gw,
745 &saddr, &daddr);
David S. Miller99ee0382012-07-12 07:40:05 -0700746 }
David S. Millere47a1852012-07-11 20:55:47 -0700747#endif
748 ;
749}
750
David S. Miller4895c772012-07-17 04:19:00 -0700751static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb)
752{
753 struct rtable *rt;
754 struct flowi4 fl4;
Michal Kubecekf96ef982013-05-28 08:26:49 +0200755 const struct iphdr *iph = (const struct iphdr *) skb->data;
756 int oif = skb->dev->ifindex;
757 u8 tos = RT_TOS(iph->tos);
758 u8 prot = iph->protocol;
759 u32 mark = skb->mark;
David S. Miller4895c772012-07-17 04:19:00 -0700760
761 rt = (struct rtable *) dst;
762
Michal Kubecekf96ef982013-05-28 08:26:49 +0200763 __build_flow_key(&fl4, sk, iph, oif, tos, prot, mark, 0);
David S. Millerceb33202012-07-17 11:31:28 -0700764 __ip_do_redirect(rt, skb, &fl4, true);
David S. Miller4895c772012-07-17 04:19:00 -0700765}
766
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst)
768{
Eric Dumazetee6b9672008-03-05 18:30:47 -0800769 struct rtable *rt = (struct rtable *)dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700770 struct dst_entry *ret = dst;
771
772 if (rt) {
Timo Teräsd11a4dc2010-03-18 23:20:20 +0000773 if (dst->obsolete > 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774 ip_rt_put(rt);
775 ret = NULL;
David S. Miller59436342012-07-10 06:58:42 -0700776 } else if ((rt->rt_flags & RTCF_REDIRECTED) ||
777 rt->dst.expires) {
David S. Miller89aef892012-07-17 11:00:09 -0700778 ip_rt_put(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779 ret = NULL;
780 }
781 }
782 return ret;
783}
784
785/*
786 * Algorithm:
787 * 1. The first ip_rt_redirect_number redirects are sent
788 * with exponential backoff, then we stop sending them at all,
789 * assuming that the host ignores our redirects.
790 * 2. If we did not see packets requiring redirects
791 * during ip_rt_redirect_silence, we assume that the host
792 * forgot redirected route and start to send redirects again.
793 *
794 * This algorithm is much cheaper and more intelligent than dumb load limiting
795 * in icmp.c.
796 *
797 * NOTE. Do not forget to inhibit load limiting for redirects (redundant)
798 * and "frag. need" (breaks PMTU discovery) in icmp.c.
799 */
800
801void ip_rt_send_redirect(struct sk_buff *skb)
802{
Eric Dumazet511c3f92009-06-02 05:14:27 +0000803 struct rtable *rt = skb_rtable(skb);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700804 struct in_device *in_dev;
David S. Miller92d86822011-02-04 15:55:25 -0800805 struct inet_peer *peer;
David S. Miller1d861aa2012-07-10 03:58:16 -0700806 struct net *net;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700807 int log_martians;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700808
Eric Dumazet30038fc2009-08-28 23:52:01 -0700809 rcu_read_lock();
Changli Gaod8d1f302010-06-10 23:31:35 -0700810 in_dev = __in_dev_get_rcu(rt->dst.dev);
Eric Dumazet30038fc2009-08-28 23:52:01 -0700811 if (!in_dev || !IN_DEV_TX_REDIRECTS(in_dev)) {
812 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700813 return;
Eric Dumazet30038fc2009-08-28 23:52:01 -0700814 }
815 log_martians = IN_DEV_LOG_MARTIANS(in_dev);
816 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700817
David S. Miller1d861aa2012-07-10 03:58:16 -0700818 net = dev_net(rt->dst.dev);
819 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800820 if (!peer) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000821 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
822 rt_nexthop(rt, ip_hdr(skb)->daddr));
David S. Miller92d86822011-02-04 15:55:25 -0800823 return;
824 }
825
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 /* No redirected packets during ip_rt_redirect_silence;
827 * reset the algorithm.
828 */
David S. Miller92d86822011-02-04 15:55:25 -0800829 if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence))
830 peer->rate_tokens = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700831
832 /* Too many ignored redirects; do not send anything
Changli Gaod8d1f302010-06-10 23:31:35 -0700833 * set dst.rate_last to the last seen redirected packet.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 */
David S. Miller92d86822011-02-04 15:55:25 -0800835 if (peer->rate_tokens >= ip_rt_redirect_number) {
836 peer->rate_last = jiffies;
David S. Miller1d861aa2012-07-10 03:58:16 -0700837 goto out_put_peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700838 }
839
840 /* Check for load limit; set rate_last to the latest sent
841 * redirect.
842 */
David S. Miller92d86822011-02-04 15:55:25 -0800843 if (peer->rate_tokens == 0 ||
Li Yewang14fb8a72006-12-18 00:26:35 -0800844 time_after(jiffies,
David S. Miller92d86822011-02-04 15:55:25 -0800845 (peer->rate_last +
846 (ip_rt_redirect_load << peer->rate_tokens)))) {
Julian Anastasove81da0e2012-10-08 11:41:15 +0000847 __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr);
848
849 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw);
David S. Miller92d86822011-02-04 15:55:25 -0800850 peer->rate_last = jiffies;
851 ++peer->rate_tokens;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700852#ifdef CONFIG_IP_ROUTE_VERBOSE
Eric Dumazet30038fc2009-08-28 23:52:01 -0700853 if (log_martians &&
Joe Perchese87cc472012-05-13 21:56:26 +0000854 peer->rate_tokens == ip_rt_redirect_number)
855 net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n",
David S. Miller92101b32012-07-23 16:29:00 -0700856 &ip_hdr(skb)->saddr, inet_iif(skb),
Julian Anastasove81da0e2012-10-08 11:41:15 +0000857 &ip_hdr(skb)->daddr, &gw);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700858#endif
859 }
David S. Miller1d861aa2012-07-10 03:58:16 -0700860out_put_peer:
861 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700862}
863
864static int ip_error(struct sk_buff *skb)
865{
David S. Miller251da412012-06-26 16:27:09 -0700866 struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
Eric Dumazet511c3f92009-06-02 05:14:27 +0000867 struct rtable *rt = skb_rtable(skb);
David S. Miller92d86822011-02-04 15:55:25 -0800868 struct inet_peer *peer;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700869 unsigned long now;
David S. Miller251da412012-06-26 16:27:09 -0700870 struct net *net;
David S. Miller92d86822011-02-04 15:55:25 -0800871 bool send;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700872 int code;
873
David S. Miller251da412012-06-26 16:27:09 -0700874 net = dev_net(rt->dst.dev);
875 if (!IN_DEV_FORWARD(in_dev)) {
876 switch (rt->dst.error) {
877 case EHOSTUNREACH:
878 IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
879 break;
880
881 case ENETUNREACH:
882 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
883 break;
884 }
885 goto out;
886 }
887
Changli Gaod8d1f302010-06-10 23:31:35 -0700888 switch (rt->dst.error) {
Joe Perches4500ebf2011-07-01 09:43:07 +0000889 case EINVAL:
890 default:
891 goto out;
892 case EHOSTUNREACH:
893 code = ICMP_HOST_UNREACH;
894 break;
895 case ENETUNREACH:
896 code = ICMP_NET_UNREACH;
David S. Miller251da412012-06-26 16:27:09 -0700897 IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
Joe Perches4500ebf2011-07-01 09:43:07 +0000898 break;
899 case EACCES:
900 code = ICMP_PKT_FILTERED;
901 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902 }
903
David S. Miller1d861aa2012-07-10 03:58:16 -0700904 peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1);
David S. Miller92d86822011-02-04 15:55:25 -0800905
906 send = true;
907 if (peer) {
908 now = jiffies;
909 peer->rate_tokens += now - peer->rate_last;
910 if (peer->rate_tokens > ip_rt_error_burst)
911 peer->rate_tokens = ip_rt_error_burst;
912 peer->rate_last = now;
913 if (peer->rate_tokens >= ip_rt_error_cost)
914 peer->rate_tokens -= ip_rt_error_cost;
915 else
916 send = false;
David S. Miller1d861aa2012-07-10 03:58:16 -0700917 inet_putpeer(peer);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 }
David S. Miller92d86822011-02-04 15:55:25 -0800919 if (send)
920 icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921
922out: kfree_skb(skb);
923 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +0900924}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700925
Steffen Klassertd851c122012-10-07 22:47:25 +0000926static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700927{
Steffen Klassertd851c122012-10-07 22:47:25 +0000928 struct dst_entry *dst = &rt->dst;
David S. Miller4895c772012-07-17 04:19:00 -0700929 struct fib_result res;
David S. Miller2c8cec52011-02-09 20:42:07 -0800930
Steffen Klassertfa1e4922013-01-16 20:58:10 +0000931 if (dst_metric_locked(dst, RTAX_MTU))
932 return;
933
Steffen Klassert7f92d3342012-10-07 22:48:18 +0000934 if (dst->dev->mtu < mtu)
935 return;
936
David S. Miller59436342012-07-10 06:58:42 -0700937 if (mtu < ip_rt_min_pmtu)
938 mtu = ip_rt_min_pmtu;
Eric Dumazetfe6fe792011-06-08 06:07:07 +0000939
Steffen Klassertd851c122012-10-07 22:47:25 +0000940 if (!rt->rt_pmtu) {
941 dst->obsolete = DST_OBSOLETE_KILL;
942 } else {
943 rt->rt_pmtu = mtu;
944 dst->expires = max(1UL, jiffies + ip_rt_mtu_expires);
945 }
946
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000947 rcu_read_lock();
Steffen Klassertd851c122012-10-07 22:47:25 +0000948 if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
David S. Miller4895c772012-07-17 04:19:00 -0700949 struct fib_nh *nh = &FIB_RES_NH(res);
David S. Miller4895c772012-07-17 04:19:00 -0700950
Julian Anastasovaee06da2012-07-18 10:15:35 +0000951 update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
952 jiffies + ip_rt_mtu_expires);
David S. Miller4895c772012-07-17 04:19:00 -0700953 }
Eric Dumazetc5ae7d42012-08-28 12:33:07 +0000954 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700955}
956
David S. Miller4895c772012-07-17 04:19:00 -0700957static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
958 struct sk_buff *skb, u32 mtu)
959{
960 struct rtable *rt = (struct rtable *) dst;
961 struct flowi4 fl4;
962
963 ip_rt_build_flow_key(&fl4, sk, skb);
Steffen Klassertd851c122012-10-07 22:47:25 +0000964 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller4895c772012-07-17 04:19:00 -0700965}
966
David S. Miller36393392012-06-14 22:21:46 -0700967void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu,
968 int oif, u32 mark, u8 protocol, int flow_flags)
969{
David S. Miller4895c772012-07-17 04:19:00 -0700970 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Miller36393392012-06-14 22:21:46 -0700971 struct flowi4 fl4;
972 struct rtable *rt;
973
David S. Miller4895c772012-07-17 04:19:00 -0700974 __build_flow_key(&fl4, NULL, iph, oif,
975 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Miller36393392012-06-14 22:21:46 -0700976 rt = __ip_route_output_key(net, &fl4);
977 if (!IS_ERR(rt)) {
David S. Miller4895c772012-07-17 04:19:00 -0700978 __ip_rt_update_pmtu(rt, &fl4, mtu);
David S. Miller36393392012-06-14 22:21:46 -0700979 ip_rt_put(rt);
980 }
981}
982EXPORT_SYMBOL_GPL(ipv4_update_pmtu);
983
Steffen Klassert9cb3a502013-01-21 01:59:11 +0000984static void __ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
David S. Miller36393392012-06-14 22:21:46 -0700985{
David S. Miller4895c772012-07-17 04:19:00 -0700986 const struct iphdr *iph = (const struct iphdr *) skb->data;
987 struct flowi4 fl4;
988 struct rtable *rt;
David S. Miller36393392012-06-14 22:21:46 -0700989
David S. Miller4895c772012-07-17 04:19:00 -0700990 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
991 rt = __ip_route_output_key(sock_net(sk), &fl4);
992 if (!IS_ERR(rt)) {
993 __ip_rt_update_pmtu(rt, &fl4, mtu);
994 ip_rt_put(rt);
995 }
David S. Miller36393392012-06-14 22:21:46 -0700996}
Steffen Klassert9cb3a502013-01-21 01:59:11 +0000997
998void ipv4_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, u32 mtu)
999{
1000 const struct iphdr *iph = (const struct iphdr *) skb->data;
1001 struct flowi4 fl4;
1002 struct rtable *rt;
Eric Dumazetf1e1b062014-06-30 01:26:23 -07001003 struct dst_entry *odst = NULL;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001004 bool new = false;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001005
1006 bh_lock_sock(sk);
Eric Dumazetf1e1b062014-06-30 01:26:23 -07001007 odst = sk_dst_get(sk);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001008
Eric Dumazetf1e1b062014-06-30 01:26:23 -07001009 if (sock_owned_by_user(sk) || !odst) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001010 __ipv4_sk_update_pmtu(skb, sk, mtu);
1011 goto out;
1012 }
1013
1014 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1015
Eric Dumazetf1e1b062014-06-30 01:26:23 -07001016 rt = (struct rtable *)odst;
1017 if (odst->obsolete && odst->ops->check(odst, 0) == NULL) {
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001018 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1019 if (IS_ERR(rt))
1020 goto out;
Steffen Klassertb44108d2013-01-22 00:01:28 +00001021
1022 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001023 }
1024
1025 __ip_rt_update_pmtu((struct rtable *) rt->dst.path, &fl4, mtu);
1026
Eric Dumazetf1e1b062014-06-30 01:26:23 -07001027 if (!dst_check(&rt->dst, 0)) {
Steffen Klassertb44108d2013-01-22 00:01:28 +00001028 if (new)
1029 dst_release(&rt->dst);
1030
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001031 rt = ip_route_output_flow(sock_net(sk), &fl4, sk);
1032 if (IS_ERR(rt))
1033 goto out;
1034
Steffen Klassertb44108d2013-01-22 00:01:28 +00001035 new = true;
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001036 }
1037
Steffen Klassertb44108d2013-01-22 00:01:28 +00001038 if (new)
Eric Dumazetf1e1b062014-06-30 01:26:23 -07001039 sk_dst_set(sk, &rt->dst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001040
1041out:
1042 bh_unlock_sock(sk);
Eric Dumazetf1e1b062014-06-30 01:26:23 -07001043 dst_release(odst);
Steffen Klassert9cb3a502013-01-21 01:59:11 +00001044}
David S. Miller36393392012-06-14 22:21:46 -07001045EXPORT_SYMBOL_GPL(ipv4_sk_update_pmtu);
David S. Millerf39925d2011-02-09 22:00:16 -08001046
David S. Millerb42597e2012-07-11 21:25:45 -07001047void ipv4_redirect(struct sk_buff *skb, struct net *net,
1048 int oif, u32 mark, u8 protocol, int flow_flags)
1049{
David S. Miller4895c772012-07-17 04:19:00 -07001050 const struct iphdr *iph = (const struct iphdr *) skb->data;
David S. Millerb42597e2012-07-11 21:25:45 -07001051 struct flowi4 fl4;
1052 struct rtable *rt;
1053
David S. Miller4895c772012-07-17 04:19:00 -07001054 __build_flow_key(&fl4, NULL, iph, oif,
1055 RT_TOS(iph->tos), protocol, mark, flow_flags);
David S. Millerb42597e2012-07-11 21:25:45 -07001056 rt = __ip_route_output_key(net, &fl4);
1057 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001058 __ip_do_redirect(rt, skb, &fl4, false);
David S. Millerb42597e2012-07-11 21:25:45 -07001059 ip_rt_put(rt);
1060 }
1061}
1062EXPORT_SYMBOL_GPL(ipv4_redirect);
1063
1064void ipv4_sk_redirect(struct sk_buff *skb, struct sock *sk)
1065{
David S. Miller4895c772012-07-17 04:19:00 -07001066 const struct iphdr *iph = (const struct iphdr *) skb->data;
1067 struct flowi4 fl4;
1068 struct rtable *rt;
David S. Millerb42597e2012-07-11 21:25:45 -07001069
David S. Miller4895c772012-07-17 04:19:00 -07001070 __build_flow_key(&fl4, sk, iph, 0, 0, 0, 0, 0);
1071 rt = __ip_route_output_key(sock_net(sk), &fl4);
1072 if (!IS_ERR(rt)) {
David S. Millerceb33202012-07-17 11:31:28 -07001073 __ip_do_redirect(rt, skb, &fl4, false);
David S. Miller4895c772012-07-17 04:19:00 -07001074 ip_rt_put(rt);
1075 }
David S. Millerb42597e2012-07-11 21:25:45 -07001076}
1077EXPORT_SYMBOL_GPL(ipv4_sk_redirect);
1078
David S. Millerefbc3682011-12-01 13:38:59 -05001079static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie)
1080{
1081 struct rtable *rt = (struct rtable *) dst;
1082
David S. Millerceb33202012-07-17 11:31:28 -07001083 /* All IPV4 dsts are created with ->obsolete set to the value
1084 * DST_OBSOLETE_FORCE_CHK which forces validation calls down
1085 * into this function always.
1086 *
1087 * When a PMTU/redirect information update invalidates a
1088 * route, this is indicated by setting obsolete to
1089 * DST_OBSOLETE_KILL.
1090 */
1091 if (dst->obsolete == DST_OBSOLETE_KILL || rt_is_expired(rt))
David S. Millerefbc3682011-12-01 13:38:59 -05001092 return NULL;
Timo Teräsd11a4dc2010-03-18 23:20:20 +00001093 return dst;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001094}
1095
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096static void ipv4_link_failure(struct sk_buff *skb)
1097{
1098 struct rtable *rt;
1099
1100 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
1101
Eric Dumazet511c3f92009-06-02 05:14:27 +00001102 rt = skb_rtable(skb);
David S. Miller59436342012-07-10 06:58:42 -07001103 if (rt)
1104 dst_set_expires(&rt->dst, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105}
1106
1107static int ip_rt_bug(struct sk_buff *skb)
1108{
Joe Perches91df42b2012-05-15 14:11:54 +00001109 pr_debug("%s: %pI4 -> %pI4, %s\n",
1110 __func__, &ip_hdr(skb)->saddr, &ip_hdr(skb)->daddr,
1111 skb->dev ? skb->dev->name : "?");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112 kfree_skb(skb);
Dave Jonesc378a9c2011-05-21 07:16:42 +00001113 WARN_ON(1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001114 return 0;
1115}
1116
1117/*
1118 We do not cache source address of outgoing interface,
1119 because it is used only by IP RR, TS and SRR options,
1120 so that it out of fast path.
1121
1122 BTW remember: "addr" is allowed to be not aligned
1123 in IP options!
1124 */
1125
David S. Miller8e363602011-05-13 17:29:41 -04001126void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127{
Al Viroa61ced52006-09-26 21:27:54 -07001128 __be32 src;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001129
David S. Millerc7537962010-11-11 17:07:48 -08001130 if (rt_is_output_route(rt))
David S. Millerc5be24f2011-05-13 18:01:21 -04001131 src = ip_hdr(skb)->saddr;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001132 else {
David S. Miller8e363602011-05-13 17:29:41 -04001133 struct fib_result res;
1134 struct flowi4 fl4;
1135 struct iphdr *iph;
1136
1137 iph = ip_hdr(skb);
1138
1139 memset(&fl4, 0, sizeof(fl4));
1140 fl4.daddr = iph->daddr;
1141 fl4.saddr = iph->saddr;
Julian Anastasovb0fe4a32011-07-23 02:00:41 +00001142 fl4.flowi4_tos = RT_TOS(iph->tos);
David S. Miller8e363602011-05-13 17:29:41 -04001143 fl4.flowi4_oif = rt->dst.dev->ifindex;
1144 fl4.flowi4_iif = skb->dev->ifindex;
1145 fl4.flowi4_mark = skb->mark;
David S. Miller5e2b61f2011-03-04 21:47:09 -08001146
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001147 rcu_read_lock();
David S. Miller68a5e3d2011-03-11 20:07:33 -05001148 if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
David S. Miller436c3b62011-03-24 17:42:21 -07001149 src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001150 else
David S. Millerf8126f12012-07-13 05:03:45 -07001151 src = inet_select_addr(rt->dst.dev,
1152 rt_nexthop(rt, iph->daddr),
1153 RT_SCOPE_UNIVERSE);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001154 rcu_read_unlock();
1155 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156 memcpy(addr, &src, 4);
1157}
1158
Patrick McHardyc7066f72011-01-14 13:36:42 +01001159#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001160static void set_class_tag(struct rtable *rt, u32 tag)
1161{
Changli Gaod8d1f302010-06-10 23:31:35 -07001162 if (!(rt->dst.tclassid & 0xFFFF))
1163 rt->dst.tclassid |= tag & 0xFFFF;
1164 if (!(rt->dst.tclassid & 0xFFFF0000))
1165 rt->dst.tclassid |= tag & 0xFFFF0000;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166}
1167#endif
1168
David S. Miller0dbaee32010-12-13 12:52:14 -08001169static unsigned int ipv4_default_advmss(const struct dst_entry *dst)
1170{
1171 unsigned int advmss = dst_metric_raw(dst, RTAX_ADVMSS);
1172
1173 if (advmss == 0) {
1174 advmss = max_t(unsigned int, dst->dev->mtu - 40,
1175 ip_rt_min_advmss);
1176 if (advmss > 65535 - 40)
1177 advmss = 65535 - 40;
1178 }
1179 return advmss;
1180}
1181
Steffen Klassertebb762f2011-11-23 02:12:51 +00001182static unsigned int ipv4_mtu(const struct dst_entry *dst)
David S. Millerd33e4552010-12-14 13:01:14 -08001183{
Steffen Klassert261663b2011-11-23 02:14:50 +00001184 const struct rtable *rt = (const struct rtable *) dst;
David S. Miller59436342012-07-10 06:58:42 -07001185 unsigned int mtu = rt->rt_pmtu;
1186
Alexander Duyck98d75c32012-08-27 06:30:01 +00001187 if (!mtu || time_after_eq(jiffies, rt->dst.expires))
David S. Miller59436342012-07-10 06:58:42 -07001188 mtu = dst_metric_raw(dst, RTAX_MTU);
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001189
Steffen Klassert38d523e2013-01-16 20:55:01 +00001190 if (mtu)
Steffen Klassert618f9bc2011-11-23 02:13:31 +00001191 return mtu;
1192
1193 mtu = dst->dev->mtu;
David S. Millerd33e4552010-12-14 13:01:14 -08001194
1195 if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
Julian Anastasov155e8332012-10-08 11:41:18 +00001196 if (rt->rt_uses_gateway && mtu > 576)
David S. Millerd33e4552010-12-14 13:01:14 -08001197 mtu = 576;
1198 }
1199
1200 if (mtu > IP_MAX_MTU)
1201 mtu = IP_MAX_MTU;
1202
1203 return mtu;
1204}
1205
David S. Millerf2bb4be2012-07-17 12:20:47 -07001206static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr)
David S. Miller4895c772012-07-17 04:19:00 -07001207{
1208 struct fnhe_hash_bucket *hash = nh->nh_exceptions;
1209 struct fib_nh_exception *fnhe;
1210 u32 hval;
1211
David S. Millerf2bb4be2012-07-17 12:20:47 -07001212 if (!hash)
1213 return NULL;
1214
David S. Millerd3a25c92012-07-17 13:23:08 -07001215 hval = fnhe_hashfun(daddr);
David S. Miller4895c772012-07-17 04:19:00 -07001216
1217 for (fnhe = rcu_dereference(hash[hval].chain); fnhe;
1218 fnhe = rcu_dereference(fnhe->fnhe_next)) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001219 if (fnhe->fnhe_daddr == daddr)
1220 return fnhe;
1221 }
1222 return NULL;
1223}
David S. Miller4895c772012-07-17 04:19:00 -07001224
David S. Millercaacf052012-07-31 15:06:50 -07001225static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001226 __be32 daddr)
1227{
David S. Millercaacf052012-07-31 15:06:50 -07001228 bool ret = false;
1229
David S. Millerc5038a82012-07-31 15:02:02 -07001230 spin_lock_bh(&fnhe_lock);
Julian Anastasovaee06da2012-07-18 10:15:35 +00001231
David S. Millerc5038a82012-07-31 15:02:02 -07001232 if (daddr == fnhe->fnhe_daddr) {
Steffen Klassert13d82bf2012-10-17 21:17:44 +00001233 struct rtable *orig = rcu_dereference(fnhe->fnhe_rth);
1234 if (orig && rt_is_expired(orig)) {
1235 fnhe->fnhe_gw = 0;
1236 fnhe->fnhe_pmtu = 0;
1237 fnhe->fnhe_expires = 0;
1238 }
David S. Millerc5038a82012-07-31 15:02:02 -07001239 if (fnhe->fnhe_pmtu) {
1240 unsigned long expires = fnhe->fnhe_expires;
1241 unsigned long diff = expires - jiffies;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001242
David S. Millerc5038a82012-07-31 15:02:02 -07001243 if (time_before(jiffies, expires)) {
1244 rt->rt_pmtu = fnhe->fnhe_pmtu;
1245 dst_set_expires(&rt->dst, diff);
1246 }
David S. Miller4895c772012-07-17 04:19:00 -07001247 }
David S. Millerc5038a82012-07-31 15:02:02 -07001248 if (fnhe->fnhe_gw) {
1249 rt->rt_flags |= RTCF_REDIRECTED;
1250 rt->rt_gateway = fnhe->fnhe_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001251 rt->rt_uses_gateway = 1;
1252 } else if (!rt->rt_gateway)
1253 rt->rt_gateway = daddr;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001254
David S. Millerc5038a82012-07-31 15:02:02 -07001255 rcu_assign_pointer(fnhe->fnhe_rth, rt);
1256 if (orig)
1257 rt_free(orig);
1258
1259 fnhe->fnhe_stamp = jiffies;
David S. Millercaacf052012-07-31 15:06:50 -07001260 ret = true;
David S. Millerc5038a82012-07-31 15:02:02 -07001261 }
1262 spin_unlock_bh(&fnhe_lock);
David S. Millercaacf052012-07-31 15:06:50 -07001263
1264 return ret;
Eric Dumazet54764bb2012-07-31 01:08:23 +00001265}
1266
David S. Millercaacf052012-07-31 15:06:50 -07001267static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
David S. Millerf2bb4be2012-07-17 12:20:47 -07001268{
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001269 struct rtable *orig, *prev, **p;
David S. Millercaacf052012-07-31 15:06:50 -07001270 bool ret = true;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001271
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001272 if (rt_is_input_route(rt)) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001273 p = (struct rtable **)&nh->nh_rth_input;
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001274 } else {
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001275 p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output);
1276 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001277 orig = *p;
1278
1279 prev = cmpxchg(p, orig, rt);
1280 if (prev == orig) {
David S. Millerf2bb4be2012-07-17 12:20:47 -07001281 if (orig)
Eric Dumazet54764bb2012-07-31 01:08:23 +00001282 rt_free(orig);
Julian Anastasov155e8332012-10-08 11:41:18 +00001283 } else
David S. Millercaacf052012-07-31 15:06:50 -07001284 ret = false;
David S. Millercaacf052012-07-31 15:06:50 -07001285
1286 return ret;
1287}
1288
1289static DEFINE_SPINLOCK(rt_uncached_lock);
1290static LIST_HEAD(rt_uncached_list);
1291
1292static void rt_add_uncached_list(struct rtable *rt)
1293{
1294 spin_lock_bh(&rt_uncached_lock);
1295 list_add_tail(&rt->rt_uncached, &rt_uncached_list);
1296 spin_unlock_bh(&rt_uncached_lock);
1297}
1298
1299static void ipv4_dst_destroy(struct dst_entry *dst)
1300{
1301 struct rtable *rt = (struct rtable *) dst;
1302
Eric Dumazet78df76a2012-08-24 05:40:47 +00001303 if (!list_empty(&rt->rt_uncached)) {
David S. Millercaacf052012-07-31 15:06:50 -07001304 spin_lock_bh(&rt_uncached_lock);
1305 list_del(&rt->rt_uncached);
1306 spin_unlock_bh(&rt_uncached_lock);
1307 }
1308}
1309
1310void rt_flush_dev(struct net_device *dev)
1311{
1312 if (!list_empty(&rt_uncached_list)) {
1313 struct net *net = dev_net(dev);
1314 struct rtable *rt;
1315
1316 spin_lock_bh(&rt_uncached_lock);
1317 list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
1318 if (rt->dst.dev != dev)
1319 continue;
1320 rt->dst.dev = net->loopback_dev;
1321 dev_hold(rt->dst.dev);
1322 dev_put(dev);
1323 }
1324 spin_unlock_bh(&rt_uncached_lock);
David S. Miller4895c772012-07-17 04:19:00 -07001325 }
1326}
1327
Eric Dumazet4331deb2012-07-25 05:11:23 +00001328static bool rt_cache_valid(const struct rtable *rt)
David S. Millerd2d68ba2012-07-17 12:58:50 -07001329{
Eric Dumazet4331deb2012-07-25 05:11:23 +00001330 return rt &&
1331 rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK &&
1332 !rt_is_expired(rt);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001333}
1334
David S. Millerf2bb4be2012-07-17 12:20:47 -07001335static void rt_set_nexthop(struct rtable *rt, __be32 daddr,
David S. Miller5e2b61f2011-03-04 21:47:09 -08001336 const struct fib_result *res,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001337 struct fib_nh_exception *fnhe,
David S. Miller982721f2011-02-16 21:44:24 -08001338 struct fib_info *fi, u16 type, u32 itag)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339{
David S. Millercaacf052012-07-31 15:06:50 -07001340 bool cached = false;
1341
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 if (fi) {
David S. Miller4895c772012-07-17 04:19:00 -07001343 struct fib_nh *nh = &FIB_RES_NH(*res);
1344
Julian Anastasov155e8332012-10-08 11:41:18 +00001345 if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) {
David S. Miller4895c772012-07-17 04:19:00 -07001346 rt->rt_gateway = nh->nh_gw;
Julian Anastasov155e8332012-10-08 11:41:18 +00001347 rt->rt_uses_gateway = 1;
1348 }
David S. Miller28605832012-07-17 14:55:59 -07001349 dst_init_metrics(&rt->dst, fi->fib_metrics, true);
Patrick McHardyc7066f72011-01-14 13:36:42 +01001350#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf2bb4be2012-07-17 12:20:47 -07001351 rt->dst.tclassid = nh->nh_tclassid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001352#endif
David S. Millerc5038a82012-07-31 15:02:02 -07001353 if (unlikely(fnhe))
David S. Millercaacf052012-07-31 15:06:50 -07001354 cached = rt_bind_exception(rt, fnhe, daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001355 else if (!(rt->dst.flags & DST_NOCACHE))
David S. Millercaacf052012-07-31 15:06:50 -07001356 cached = rt_cache_route(nh, rt);
Julian Anastasov155e8332012-10-08 11:41:18 +00001357 if (unlikely(!cached)) {
1358 /* Routes we intend to cache in nexthop exception or
1359 * FIB nexthop have the DST_NOCACHE bit clear.
1360 * However, if we are unsuccessful at storing this
1361 * route into the cache we really need to set it.
1362 */
1363 rt->dst.flags |= DST_NOCACHE;
1364 if (!rt->rt_gateway)
1365 rt->rt_gateway = daddr;
1366 rt_add_uncached_list(rt);
1367 }
1368 } else
David S. Millercaacf052012-07-31 15:06:50 -07001369 rt_add_uncached_list(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001370
Patrick McHardyc7066f72011-01-14 13:36:42 +01001371#ifdef CONFIG_IP_ROUTE_CLASSID
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372#ifdef CONFIG_IP_MULTIPLE_TABLES
David S. Miller85b91b02012-07-13 08:21:29 -07001373 set_class_tag(rt, res->tclassid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374#endif
1375 set_class_tag(rt, itag);
1376#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001377}
1378
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001379static struct rtable *rt_dst_alloc(struct net_device *dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001380 bool nopolicy, bool noxfrm, bool will_cache)
David S. Miller0c4dcd52011-02-17 15:42:37 -08001381{
David S. Millerf5b0a872012-07-19 12:31:33 -07001382 return dst_alloc(&ipv4_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK,
David S. Millerc6cffba2012-07-26 11:14:38 +00001383 (will_cache ? 0 : (DST_HOST | DST_NOCACHE)) |
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001384 (nopolicy ? DST_NOPOLICY : 0) |
1385 (noxfrm ? DST_NOXFRM : 0));
David S. Miller0c4dcd52011-02-17 15:42:37 -08001386}
1387
Eric Dumazet96d36222010-06-02 19:21:31 +00001388/* called in rcu_read_lock() section */
Al Viro9e12bb22006-09-26 21:25:20 -07001389static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001390 u8 tos, struct net_device *dev, int our)
1391{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001392 struct rtable *rth;
Eric Dumazet96d36222010-06-02 19:21:31 +00001393 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001394 u32 itag = 0;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001395 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001396
1397 /* Primary sanity checks. */
1398
1399 if (in_dev == NULL)
1400 return -EINVAL;
1401
Jan Engelhardt1e637c72008-01-21 03:18:08 -08001402 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
Thomas Grafd0daebc32012-06-12 00:44:01 +00001403 skb->protocol != htons(ETH_P_IP))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001404 goto e_inval;
1405
Thomas Grafd0daebc32012-06-12 00:44:01 +00001406 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1407 if (ipv4_is_loopback(saddr))
1408 goto e_inval;
1409
Joe Perchesf97c1e02007-12-16 13:45:43 -08001410 if (ipv4_is_zeronet(saddr)) {
1411 if (!ipv4_is_local_multicast(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 goto e_inval;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001413 } else {
David S. Miller9e56e382012-06-28 18:54:02 -07001414 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1415 in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001416 if (err < 0)
1417 goto e_err;
1418 }
Benjamin LaHaise4e7b2f12012-03-27 15:55:32 +00001419 rth = rt_dst_alloc(dev_net(dev)->loopback_dev,
David S. Millerf2bb4be2012-07-17 12:20:47 -07001420 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 if (!rth)
1422 goto e_nobufs;
1423
Patrick McHardyc7066f72011-01-14 13:36:42 +01001424#ifdef CONFIG_IP_ROUTE_CLASSID
Changli Gaod8d1f302010-06-10 23:31:35 -07001425 rth->dst.tclassid = itag;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001426#endif
David S. Millercf911662011-04-28 14:31:47 -07001427 rth->dst.output = ip_rt_bug;
1428
Denis V. Luneve84f84f2008-07-05 19:04:32 -07001429 rth->rt_genid = rt_genid(dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001430 rth->rt_flags = RTCF_MULTICAST;
Eric Dumazet29e75252008-01-31 17:05:09 -08001431 rth->rt_type = RTN_MULTICAST;
David S. Miller9917e1e82012-07-17 14:44:26 -07001432 rth->rt_is_input= 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001433 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001434 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001435 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001436 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001437 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438 if (our) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001439 rth->dst.input= ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 rth->rt_flags |= RTCF_LOCAL;
1441 }
1442
1443#ifdef CONFIG_IP_MROUTE
Joe Perchesf97c1e02007-12-16 13:45:43 -08001444 if (!ipv4_is_local_multicast(daddr) && IN_DEV_MFORWARD(in_dev))
Changli Gaod8d1f302010-06-10 23:31:35 -07001445 rth->dst.input = ip_mr_input;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001446#endif
1447 RT_CACHE_STAT_INC(in_slow_mc);
1448
David S. Miller89aef892012-07-17 11:00:09 -07001449 skb_dst_set(skb, &rth->dst);
1450 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001451
1452e_nobufs:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 return -ENOBUFS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454e_inval:
Eric Dumazet96d36222010-06-02 19:21:31 +00001455 return -EINVAL;
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001456e_err:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001457 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458}
1459
1460
1461static void ip_handle_martian_source(struct net_device *dev,
1462 struct in_device *in_dev,
1463 struct sk_buff *skb,
Al Viro9e12bb22006-09-26 21:25:20 -07001464 __be32 daddr,
1465 __be32 saddr)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466{
1467 RT_CACHE_STAT_INC(in_martian_src);
1468#ifdef CONFIG_IP_ROUTE_VERBOSE
1469 if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
1470 /*
1471 * RFC1812 recommendation, if source is martian,
1472 * the only hint is MAC header.
1473 */
Joe Perches058bd4d2012-03-11 18:36:11 +00001474 pr_warn("martian source %pI4 from %pI4, on dev %s\n",
Harvey Harrison673d57e2008-10-31 00:53:57 -07001475 &daddr, &saddr, dev->name);
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001476 if (dev->hard_header_len && skb_mac_header_was_set(skb)) {
Joe Perches058bd4d2012-03-11 18:36:11 +00001477 print_hex_dump(KERN_WARNING, "ll header: ",
1478 DUMP_PREFIX_OFFSET, 16, 1,
1479 skb_mac_header(skb),
1480 dev->hard_header_len, true);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 }
1482 }
1483#endif
1484}
1485
Eric Dumazet47360222010-06-03 04:13:21 +00001486/* called in rcu_read_lock() section */
Stephen Hemminger5969f712008-04-10 01:52:09 -07001487static int __mkroute_input(struct sk_buff *skb,
David S. Miller982721f2011-02-16 21:44:24 -08001488 const struct fib_result *res,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001489 struct in_device *in_dev,
David S. Millerc6cffba2012-07-26 11:14:38 +00001490 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001491{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 struct rtable *rth;
1493 int err;
1494 struct in_device *out_dev;
Eric Dumazet47360222010-06-03 04:13:21 +00001495 unsigned int flags = 0;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001496 bool do_cache;
Li RongQing809d0bf2014-05-22 16:36:55 +08001497 u32 itag = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001498
1499 /* get a working reference to the output device */
Eric Dumazet47360222010-06-03 04:13:21 +00001500 out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 if (out_dev == NULL) {
Joe Perchese87cc472012-05-13 21:56:26 +00001502 net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001503 return -EINVAL;
1504 }
1505
Michael Smith5c04c812011-04-07 04:51:50 +00001506 err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
David S. Miller9e56e382012-06-28 18:54:02 -07001507 in_dev->dev, in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001508 if (err < 0) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001509 ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001510 saddr);
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001511
Linus Torvalds1da177e2005-04-16 15:20:36 -07001512 goto cleanup;
1513 }
1514
Julian Anastasove81da0e2012-10-08 11:41:15 +00001515 do_cache = res->fi && !itag;
1516 if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) &&
Hannes Frederic Sowa8c6dafe2015-01-23 12:01:26 +01001517 skb->protocol == htons(ETH_P_IP) &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001518 (IN_DEV_SHARED_MEDIA(out_dev) ||
Hannes Frederic Sowa8c6dafe2015-01-23 12:01:26 +01001519 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1520 IPCB(skb)->flags |= IPSKB_DOREDIRECT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001521
1522 if (skb->protocol != htons(ETH_P_IP)) {
1523 /* Not IP (i.e. ARP). Do not create route, if it is
1524 * invalid for proxy arp. DNAT routes are always valid.
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001525 *
1526 * Proxy arp feature have been extended to allow, ARP
1527 * replies back to the same interface, to support
1528 * Private VLAN switch technologies. See arp.c.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001529 */
Jesper Dangaard Brouer65324142010-01-05 05:50:47 +00001530 if (out_dev == in_dev &&
1531 IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532 err = -EINVAL;
1533 goto cleanup;
1534 }
1535 }
1536
Julian Anastasove81da0e2012-10-08 11:41:15 +00001537 if (do_cache) {
1538 rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input);
1539 if (rt_cache_valid(rth)) {
1540 skb_dst_set_noref(skb, &rth->dst);
1541 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001542 }
1543 }
David S. Millerf2bb4be2012-07-17 12:20:47 -07001544
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001545 rth = rt_dst_alloc(out_dev->dev,
1546 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerd2d68ba2012-07-17 12:58:50 -07001547 IN_DEV_CONF_GET(out_dev, NOXFRM), do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 if (!rth) {
1549 err = -ENOBUFS;
1550 goto cleanup;
1551 }
1552
David S. Millercf911662011-04-28 14:31:47 -07001553 rth->rt_genid = rt_genid(dev_net(rth->dst.dev));
1554 rth->rt_flags = flags;
1555 rth->rt_type = res->type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001556 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001557 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001558 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001559 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001560 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001561 INIT_LIST_HEAD(&rth->rt_uncached);
Duan Jionge82a32a2014-02-17 15:23:43 +08001562 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001563
Changli Gaod8d1f302010-06-10 23:31:35 -07001564 rth->dst.input = ip_forward;
1565 rth->dst.output = ip_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001566
David S. Millerd2d68ba2012-07-17 12:58:50 -07001567 rt_set_nexthop(rth, daddr, res, NULL, res->fi, res->type, itag);
David S. Millerc6cffba2012-07-26 11:14:38 +00001568 skb_dst_set(skb, &rth->dst);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001569out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 err = 0;
1571 cleanup:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001572 return err;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001573}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001574
Stephen Hemminger5969f712008-04-10 01:52:09 -07001575static int ip_mkroute_input(struct sk_buff *skb,
1576 struct fib_result *res,
David S. Miller68a5e3d2011-03-11 20:07:33 -05001577 const struct flowi4 *fl4,
Stephen Hemminger5969f712008-04-10 01:52:09 -07001578 struct in_device *in_dev,
1579 __be32 daddr, __be32 saddr, u32 tos)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001580{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Millerff3fccb2011-03-10 16:23:24 -08001582 if (res->fi && res->fi->fib_nhs > 1)
David S. Miller1b7fe5932011-03-10 17:01:16 -08001583 fib_select_multipath(res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584#endif
1585
1586 /* create a routing cache entry */
David S. Millerc6cffba2012-07-26 11:14:38 +00001587 return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001588}
1589
Linus Torvalds1da177e2005-04-16 15:20:36 -07001590/*
1591 * NOTE. We drop all the packets that has local source
1592 * addresses, because every properly looped back packet
1593 * must have correct destination already attached by output routine.
1594 *
1595 * Such approach solves two big problems:
1596 * 1. Not simplex devices are handled properly.
1597 * 2. IP spoofing attempts are filtered with 100% of guarantee.
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001598 * called with rcu_read_lock()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 */
1600
Al Viro9e12bb22006-09-26 21:25:20 -07001601static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
David S. Millerc10237e2012-06-27 17:05:06 -07001602 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001603{
1604 struct fib_result res;
Eric Dumazet96d36222010-06-02 19:21:31 +00001605 struct in_device *in_dev = __in_dev_get_rcu(dev);
David S. Miller68a5e3d2011-03-11 20:07:33 -05001606 struct flowi4 fl4;
Eric Dumazet95c96172012-04-15 05:58:06 +00001607 unsigned int flags = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608 u32 itag = 0;
Eric Dumazet95c96172012-04-15 05:58:06 +00001609 struct rtable *rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 int err = -EINVAL;
Daniel Baluta5e73ea12012-04-15 01:34:41 +00001611 struct net *net = dev_net(dev);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001612 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001613
1614 /* IP on this device is disabled. */
1615
1616 if (!in_dev)
1617 goto out;
1618
1619 /* Check for the most weird martians, which can be not detected
1620 by fib_lookup.
1621 */
1622
Thomas Grafd0daebc32012-06-12 00:44:01 +00001623 if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001624 goto martian_source;
1625
David S. Millerd2d68ba2012-07-17 12:58:50 -07001626 res.fi = NULL;
Andy Walls27a954b2010-10-17 15:11:22 +00001627 if (ipv4_is_lbcast(daddr) || (saddr == 0 && daddr == 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 goto brd_input;
1629
1630 /* Accept zero addresses only to limited broadcast;
1631 * I even do not know to fix it or not. Waiting for complains :-)
1632 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001633 if (ipv4_is_zeronet(saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001634 goto martian_source;
1635
Thomas Grafd0daebc32012-06-12 00:44:01 +00001636 if (ipv4_is_zeronet(daddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637 goto martian_destination;
1638
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001639 /* Following code try to avoid calling IN_DEV_NET_ROUTE_LOCALNET(),
1640 * and call it once if daddr or/and saddr are loopback addresses
1641 */
1642 if (ipv4_is_loopback(daddr)) {
1643 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001644 goto martian_destination;
Eric Dumazet9eb43e72012-08-03 21:27:25 +00001645 } else if (ipv4_is_loopback(saddr)) {
1646 if (!IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
Thomas Grafd0daebc32012-06-12 00:44:01 +00001647 goto martian_source;
1648 }
1649
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 /*
1651 * Now we are ready to route packet.
1652 */
David S. Miller68a5e3d2011-03-11 20:07:33 -05001653 fl4.flowi4_oif = 0;
1654 fl4.flowi4_iif = dev->ifindex;
1655 fl4.flowi4_mark = skb->mark;
1656 fl4.flowi4_tos = tos;
1657 fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
1658 fl4.daddr = daddr;
1659 fl4.saddr = saddr;
1660 err = fib_lookup(net, &fl4, &res);
David S. Miller251da412012-06-26 16:27:09 -07001661 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001663
Linus Torvalds1da177e2005-04-16 15:20:36 -07001664 if (res.type == RTN_BROADCAST)
1665 goto brd_input;
1666
1667 if (res.type == RTN_LOCAL) {
Michael Smith5c04c812011-04-07 04:51:50 +00001668 err = fib_validate_source(skb, saddr, daddr, tos,
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001669 LOOPBACK_IFINDEX,
David S. Miller9e56e382012-06-28 18:54:02 -07001670 dev, in_dev, &itag);
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001671 if (err < 0)
1672 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001673 goto local_input;
1674 }
1675
1676 if (!IN_DEV_FORWARD(in_dev))
David S. Miller251da412012-06-26 16:27:09 -07001677 goto no_route;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001678 if (res.type != RTN_UNICAST)
1679 goto martian_destination;
1680
David S. Miller68a5e3d2011-03-11 20:07:33 -05001681 err = ip_mkroute_input(skb, &res, &fl4, in_dev, daddr, saddr, tos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682out: return err;
1683
1684brd_input:
1685 if (skb->protocol != htons(ETH_P_IP))
1686 goto e_inval;
1687
David S. Miller41347dc2012-06-28 04:05:27 -07001688 if (!ipv4_is_zeronet(saddr)) {
David S. Miller9e56e382012-06-28 18:54:02 -07001689 err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
1690 in_dev, &itag);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 if (err < 0)
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001692 goto martian_source_keep_err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 }
1694 flags |= RTCF_BROADCAST;
1695 res.type = RTN_BROADCAST;
1696 RT_CACHE_STAT_INC(in_brd);
1697
1698local_input:
David S. Millerd2d68ba2012-07-17 12:58:50 -07001699 do_cache = false;
1700 if (res.fi) {
David S. Millerfe3edf42012-07-23 13:22:20 -07001701 if (!itag) {
Eric Dumazet54764bb2012-07-31 01:08:23 +00001702 rth = rcu_dereference(FIB_RES_NH(res).nh_rth_input);
David S. Millerd2d68ba2012-07-17 12:58:50 -07001703 if (rt_cache_valid(rth)) {
David S. Millerc6cffba2012-07-26 11:14:38 +00001704 skb_dst_set_noref(skb, &rth->dst);
1705 err = 0;
1706 goto out;
David S. Millerd2d68ba2012-07-17 12:58:50 -07001707 }
1708 do_cache = true;
1709 }
1710 }
1711
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001712 rth = rt_dst_alloc(net->loopback_dev,
David S. Millerd2d68ba2012-07-17 12:58:50 -07001713 IN_DEV_CONF_GET(in_dev, NOPOLICY), false, do_cache);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 if (!rth)
1715 goto e_nobufs;
1716
David S. Millercf911662011-04-28 14:31:47 -07001717 rth->dst.input= ip_local_deliver;
Changli Gaod8d1f302010-06-10 23:31:35 -07001718 rth->dst.output= ip_rt_bug;
David S. Millercf911662011-04-28 14:31:47 -07001719#ifdef CONFIG_IP_ROUTE_CLASSID
1720 rth->dst.tclassid = itag;
1721#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001722
David S. Millercf911662011-04-28 14:31:47 -07001723 rth->rt_genid = rt_genid(net);
1724 rth->rt_flags = flags|RTCF_LOCAL;
1725 rth->rt_type = res.type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001726 rth->rt_is_input = 1;
David S. Miller13378ca2012-07-23 13:57:45 -07001727 rth->rt_iif = 0;
David S. Miller59436342012-07-10 06:58:42 -07001728 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001729 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001730 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001731 INIT_LIST_HEAD(&rth->rt_uncached);
Duan Jionge82a32a2014-02-17 15:23:43 +08001732 RT_CACHE_STAT_INC(in_slow_tot);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 if (res.type == RTN_UNREACHABLE) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001734 rth->dst.input= ip_error;
1735 rth->dst.error= -err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 rth->rt_flags &= ~RTCF_LOCAL;
1737 }
Alexei Starovoitov62713c42013-11-19 19:12:34 -08001738 if (do_cache) {
1739 if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) {
1740 rth->dst.flags |= DST_NOCACHE;
1741 rt_add_uncached_list(rth);
1742 }
1743 }
David S. Miller89aef892012-07-17 11:00:09 -07001744 skb_dst_set(skb, &rth->dst);
David S. Millerb23dd4f2011-03-02 14:31:35 -08001745 err = 0;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001746 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747
1748no_route:
1749 RT_CACHE_STAT_INC(in_no_route);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750 res.type = RTN_UNREACHABLE;
Mitsuru Chinen7f538782007-12-07 01:07:24 -08001751 if (err == -ESRCH)
1752 err = -ENETUNREACH;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001753 goto local_input;
1754
1755 /*
1756 * Do not cache martian addresses: they should be logged (RFC1812)
1757 */
1758martian_destination:
1759 RT_CACHE_STAT_INC(in_martian_dst);
1760#ifdef CONFIG_IP_ROUTE_VERBOSE
Joe Perchese87cc472012-05-13 21:56:26 +00001761 if (IN_DEV_LOG_MARTIANS(in_dev))
1762 net_warn_ratelimited("martian destination %pI4 from %pI4, dev %s\n",
1763 &daddr, &saddr, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764#endif
Dietmar Eggemann2c2910a2005-06-28 13:06:23 -07001765
Linus Torvalds1da177e2005-04-16 15:20:36 -07001766e_inval:
1767 err = -EINVAL;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001768 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
1770e_nobufs:
1771 err = -ENOBUFS;
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001772 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001773
1774martian_source:
Eric Dumazetb5f7e752010-06-02 12:05:27 +00001775 err = -EINVAL;
1776martian_source_keep_err:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001777 ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001778 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001779}
1780
David S. Millerc6cffba2012-07-26 11:14:38 +00001781int ip_route_input_noref(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1782 u8 tos, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783{
Eric Dumazet96d36222010-06-02 19:21:31 +00001784 int res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785
Eric Dumazet96d36222010-06-02 19:21:31 +00001786 rcu_read_lock();
1787
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 /* Multicast recognition logic is moved from route cache to here.
1789 The problem was that too many Ethernet cards have broken/missing
1790 hardware multicast filters :-( As result the host on multicasting
1791 network acquires a lot of useless route cache entries, sort of
1792 SDR messages from all the world. Now we try to get rid of them.
1793 Really, provided software IP multicast filter is organized
1794 reasonably (at least, hashed), it does not result in a slowdown
1795 comparing with route cache reject entries.
1796 Note, that multicast routers are not affected, because
1797 route cache entry is created eventually.
1798 */
Joe Perchesf97c1e02007-12-16 13:45:43 -08001799 if (ipv4_is_multicast(daddr)) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001800 struct in_device *in_dev = __in_dev_get_rcu(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801
Eric Dumazet96d36222010-06-02 19:21:31 +00001802 if (in_dev) {
David S. Millerdbdd9a52011-03-10 16:34:38 -08001803 int our = ip_check_mc_rcu(in_dev, daddr, saddr,
1804 ip_hdr(skb)->protocol);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805 if (our
1806#ifdef CONFIG_IP_MROUTE
Joe Perches9d4fb272009-11-23 10:41:23 -08001807 ||
1808 (!ipv4_is_local_multicast(daddr) &&
1809 IN_DEV_MFORWARD(in_dev))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810#endif
Joe Perches9d4fb272009-11-23 10:41:23 -08001811 ) {
Eric Dumazet96d36222010-06-02 19:21:31 +00001812 int res = ip_route_input_mc(skb, daddr, saddr,
1813 tos, dev, our);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814 rcu_read_unlock();
Eric Dumazet96d36222010-06-02 19:21:31 +00001815 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 }
1817 }
1818 rcu_read_unlock();
1819 return -EINVAL;
1820 }
David S. Millerc10237e2012-06-27 17:05:06 -07001821 res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
Eric Dumazet96d36222010-06-02 19:21:31 +00001822 rcu_read_unlock();
1823 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824}
David S. Millerc6cffba2012-07-26 11:14:38 +00001825EXPORT_SYMBOL(ip_route_input_noref);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826
Eric Dumazetebc0ffa2010-10-05 10:41:36 +00001827/* called with rcu_read_lock() */
David S. Miller982721f2011-02-16 21:44:24 -08001828static struct rtable *__mkroute_output(const struct fib_result *res,
David Miller1a00fee2012-07-01 02:02:56 +00001829 const struct flowi4 *fl4, int orig_oif,
Julian Anastasovf61759e2011-12-02 11:39:42 +00001830 struct net_device *dev_out,
David S. Miller5ada5522011-02-17 15:29:00 -08001831 unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001832{
David S. Miller982721f2011-02-16 21:44:24 -08001833 struct fib_info *fi = res->fi;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001834 struct fib_nh_exception *fnhe;
David S. Miller5ada5522011-02-17 15:29:00 -08001835 struct in_device *in_dev;
David S. Miller982721f2011-02-16 21:44:24 -08001836 u16 type = res->type;
David S. Miller5ada5522011-02-17 15:29:00 -08001837 struct rtable *rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001838 bool do_cache;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001839
Thomas Grafd0daebc32012-06-12 00:44:01 +00001840 in_dev = __in_dev_get_rcu(dev_out);
1841 if (!in_dev)
David S. Miller5ada5522011-02-17 15:29:00 -08001842 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001843
Thomas Grafd0daebc32012-06-12 00:44:01 +00001844 if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev)))
1845 if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK))
1846 return ERR_PTR(-EINVAL);
1847
David S. Miller68a5e3d2011-03-11 20:07:33 -05001848 if (ipv4_is_lbcast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001849 type = RTN_BROADCAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001850 else if (ipv4_is_multicast(fl4->daddr))
David S. Miller982721f2011-02-16 21:44:24 -08001851 type = RTN_MULTICAST;
David S. Miller68a5e3d2011-03-11 20:07:33 -05001852 else if (ipv4_is_zeronet(fl4->daddr))
David S. Miller5ada5522011-02-17 15:29:00 -08001853 return ERR_PTR(-EINVAL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001854
1855 if (dev_out->flags & IFF_LOOPBACK)
1856 flags |= RTCF_LOCAL;
1857
Julian Anastasov63617422012-11-22 23:04:14 +02001858 do_cache = true;
David S. Miller982721f2011-02-16 21:44:24 -08001859 if (type == RTN_BROADCAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860 flags |= RTCF_BROADCAST | RTCF_LOCAL;
David S. Miller982721f2011-02-16 21:44:24 -08001861 fi = NULL;
1862 } else if (type == RTN_MULTICAST) {
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001863 flags |= RTCF_MULTICAST | RTCF_LOCAL;
David S. Miller813b3b52011-04-28 14:48:42 -07001864 if (!ip_check_mc_rcu(in_dev, fl4->daddr, fl4->saddr,
1865 fl4->flowi4_proto))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001866 flags &= ~RTCF_LOCAL;
Julian Anastasov63617422012-11-22 23:04:14 +02001867 else
1868 do_cache = false;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 /* If multicast route do not exist use
Eric Dumazetdd28d1a2010-09-29 11:53:50 +00001870 * default one, but do not gateway in this case.
1871 * Yes, it is hack.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001872 */
David S. Miller982721f2011-02-16 21:44:24 -08001873 if (fi && res->prefixlen < 4)
1874 fi = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001875 }
1876
David S. Millerf2bb4be2012-07-17 12:20:47 -07001877 fnhe = NULL;
Julian Anastasov63617422012-11-22 23:04:14 +02001878 do_cache &= fi != NULL;
1879 if (do_cache) {
David S. Millerc5038a82012-07-31 15:02:02 -07001880 struct rtable __rcu **prth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001881 struct fib_nh *nh = &FIB_RES_NH(*res);
Eric Dumazetd26b3a72012-07-31 05:45:30 +00001882
Julian Anastasovc92b9652012-10-08 11:41:19 +00001883 fnhe = find_exception(nh, fl4->daddr);
David S. Millerc5038a82012-07-31 15:02:02 -07001884 if (fnhe)
1885 prth = &fnhe->fnhe_rth;
Julian Anastasovc92b9652012-10-08 11:41:19 +00001886 else {
1887 if (unlikely(fl4->flowi4_flags &
1888 FLOWI_FLAG_KNOWN_NH &&
1889 !(nh->nh_gw &&
1890 nh->nh_scope == RT_SCOPE_LINK))) {
1891 do_cache = false;
1892 goto add;
1893 }
1894 prth = __this_cpu_ptr(nh->nh_pcpu_rth_output);
1895 }
David S. Millerc5038a82012-07-31 15:02:02 -07001896 rth = rcu_dereference(*prth);
1897 if (rt_cache_valid(rth)) {
1898 dst_hold(&rth->dst);
1899 return rth;
David S. Millerf2bb4be2012-07-17 12:20:47 -07001900 }
1901 }
Julian Anastasovc92b9652012-10-08 11:41:19 +00001902
1903add:
David S. Miller5c1e6aa2011-04-28 14:13:38 -07001904 rth = rt_dst_alloc(dev_out,
1905 IN_DEV_CONF_GET(in_dev, NOPOLICY),
David S. Millerf2bb4be2012-07-17 12:20:47 -07001906 IN_DEV_CONF_GET(in_dev, NOXFRM),
Julian Anastasovc92b9652012-10-08 11:41:19 +00001907 do_cache);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001908 if (!rth)
David S. Miller5ada5522011-02-17 15:29:00 -08001909 return ERR_PTR(-ENOBUFS);
Dimitris Michailidis8391d072010-10-07 14:48:38 +00001910
David S. Millercf911662011-04-28 14:31:47 -07001911 rth->dst.output = ip_output;
1912
David S. Millercf911662011-04-28 14:31:47 -07001913 rth->rt_genid = rt_genid(dev_net(dev_out));
1914 rth->rt_flags = flags;
1915 rth->rt_type = type;
David S. Miller9917e1e82012-07-17 14:44:26 -07001916 rth->rt_is_input = 0;
David S. Miller13378ca2012-07-23 13:57:45 -07001917 rth->rt_iif = orig_oif ? : 0;
David S. Miller59436342012-07-10 06:58:42 -07001918 rth->rt_pmtu = 0;
David S. Millerf8126f12012-07-13 05:03:45 -07001919 rth->rt_gateway = 0;
Julian Anastasov155e8332012-10-08 11:41:18 +00001920 rth->rt_uses_gateway = 0;
David S. Millercaacf052012-07-31 15:06:50 -07001921 INIT_LIST_HEAD(&rth->rt_uncached);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922
1923 RT_CACHE_STAT_INC(out_slow_tot);
1924
David S. Miller41347dc2012-06-28 04:05:27 -07001925 if (flags & RTCF_LOCAL)
Changli Gaod8d1f302010-06-10 23:31:35 -07001926 rth->dst.input = ip_local_deliver;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09001928 if (flags & RTCF_LOCAL &&
Linus Torvalds1da177e2005-04-16 15:20:36 -07001929 !(dev_out->flags & IFF_LOOPBACK)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001930 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001931 RT_CACHE_STAT_INC(out_slow_mc);
1932 }
1933#ifdef CONFIG_IP_MROUTE
David S. Miller982721f2011-02-16 21:44:24 -08001934 if (type == RTN_MULTICAST) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001935 if (IN_DEV_MFORWARD(in_dev) &&
David S. Miller813b3b52011-04-28 14:48:42 -07001936 !ipv4_is_local_multicast(fl4->daddr)) {
Changli Gaod8d1f302010-06-10 23:31:35 -07001937 rth->dst.input = ip_mr_input;
1938 rth->dst.output = ip_mc_output;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 }
1940 }
1941#endif
1942 }
1943
David S. Millerf2bb4be2012-07-17 12:20:47 -07001944 rt_set_nexthop(rth, fl4->daddr, res, fnhe, fi, type, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945
David S. Miller5ada5522011-02-17 15:29:00 -08001946 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001947}
1948
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949/*
1950 * Major route resolver routine.
1951 */
1952
David S. Miller89aef892012-07-17 11:00:09 -07001953struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 struct net_device *dev_out = NULL;
Julian Anastasovf61759e2011-12-02 11:39:42 +00001956 __u8 tos = RT_FL_TOS(fl4);
David S. Miller813b3b52011-04-28 14:48:42 -07001957 unsigned int flags = 0;
1958 struct fib_result res;
David S. Miller5ada5522011-02-17 15:29:00 -08001959 struct rtable *rth;
David S. Miller813b3b52011-04-28 14:48:42 -07001960 int orig_oif;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961
David S. Miller85b91b02012-07-13 08:21:29 -07001962 res.tclassid = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07001964 res.table = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001965
David S. Miller813b3b52011-04-28 14:48:42 -07001966 orig_oif = fl4->flowi4_oif;
1967
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00001968 fl4->flowi4_iif = LOOPBACK_IFINDEX;
David S. Miller813b3b52011-04-28 14:48:42 -07001969 fl4->flowi4_tos = tos & IPTOS_RT_MASK;
1970 fl4->flowi4_scope = ((tos & RTO_ONLINK) ?
1971 RT_SCOPE_LINK : RT_SCOPE_UNIVERSE);
David S. Miller44713b62011-03-04 21:24:47 -08001972
David S. Miller010c2702011-02-17 15:37:09 -08001973 rcu_read_lock();
David S. Miller813b3b52011-04-28 14:48:42 -07001974 if (fl4->saddr) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08001975 rth = ERR_PTR(-EINVAL);
David S. Miller813b3b52011-04-28 14:48:42 -07001976 if (ipv4_is_multicast(fl4->saddr) ||
1977 ipv4_is_lbcast(fl4->saddr) ||
1978 ipv4_is_zeronet(fl4->saddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001979 goto out;
1980
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 /* I removed check for oif == dev_out->oif here.
1982 It was wrong for two reasons:
Denis V. Lunev1ab35272008-01-22 22:04:30 -08001983 1. ip_dev_find(net, saddr) can return wrong iface, if saddr
1984 is assigned to multiple interfaces.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001985 2. Moreover, we are allowed to send packets with saddr
1986 of another iface. --ANK
1987 */
1988
David S. Miller813b3b52011-04-28 14:48:42 -07001989 if (fl4->flowi4_oif == 0 &&
1990 (ipv4_is_multicast(fl4->daddr) ||
1991 ipv4_is_lbcast(fl4->daddr))) {
Julian Anastasova210d012008-10-01 07:28:28 -07001992 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07001993 dev_out = __ip_dev_find(net, fl4->saddr, false);
Julian Anastasova210d012008-10-01 07:28:28 -07001994 if (dev_out == NULL)
1995 goto out;
1996
Linus Torvalds1da177e2005-04-16 15:20:36 -07001997 /* Special hack: user can direct multicasts
1998 and limited broadcast via necessary interface
1999 without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
2000 This hack is not just for fun, it allows
2001 vic,vat and friends to work.
2002 They bind socket to loopback, set ttl to zero
2003 and expect that it will work.
2004 From the viewpoint of routing cache they are broken,
2005 because we are not allowed to build multicast path
2006 with loopback source addr (look, routing cache
2007 cannot know, that ttl is zero, so that packet
2008 will not leave this host and route is valid).
2009 Luckily, this hack is good workaround.
2010 */
2011
David S. Miller813b3b52011-04-28 14:48:42 -07002012 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002013 goto make_route;
2014 }
Julian Anastasova210d012008-10-01 07:28:28 -07002015
David S. Miller813b3b52011-04-28 14:48:42 -07002016 if (!(fl4->flowi4_flags & FLOWI_FLAG_ANYSRC)) {
Julian Anastasova210d012008-10-01 07:28:28 -07002017 /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
David S. Miller813b3b52011-04-28 14:48:42 -07002018 if (!__ip_dev_find(net, fl4->saddr, false))
Julian Anastasova210d012008-10-01 07:28:28 -07002019 goto out;
Julian Anastasova210d012008-10-01 07:28:28 -07002020 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002021 }
2022
2023
David S. Miller813b3b52011-04-28 14:48:42 -07002024 if (fl4->flowi4_oif) {
2025 dev_out = dev_get_by_index_rcu(net, fl4->flowi4_oif);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002026 rth = ERR_PTR(-ENODEV);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 if (dev_out == NULL)
2028 goto out;
Herbert Xue5ed6392005-10-03 14:35:55 -07002029
2030 /* RACE: Check return value of inet_select_addr instead. */
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002031 if (!(dev_out->flags & IFF_UP) || !__in_dev_get_rcu(dev_out)) {
David S. Millerb23dd4f2011-03-02 14:31:35 -08002032 rth = ERR_PTR(-ENETUNREACH);
Eric Dumazetfc75fc82010-12-22 04:39:39 +00002033 goto out;
2034 }
David S. Miller813b3b52011-04-28 14:48:42 -07002035 if (ipv4_is_local_multicast(fl4->daddr) ||
2036 ipv4_is_lbcast(fl4->daddr)) {
2037 if (!fl4->saddr)
2038 fl4->saddr = inet_select_addr(dev_out, 0,
2039 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002040 goto make_route;
2041 }
Jiri Bencb15e22d2013-10-04 17:04:48 +02002042 if (!fl4->saddr) {
David S. Miller813b3b52011-04-28 14:48:42 -07002043 if (ipv4_is_multicast(fl4->daddr))
2044 fl4->saddr = inet_select_addr(dev_out, 0,
2045 fl4->flowi4_scope);
2046 else if (!fl4->daddr)
2047 fl4->saddr = inet_select_addr(dev_out, 0,
2048 RT_SCOPE_HOST);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049 }
2050 }
2051
David S. Miller813b3b52011-04-28 14:48:42 -07002052 if (!fl4->daddr) {
2053 fl4->daddr = fl4->saddr;
2054 if (!fl4->daddr)
2055 fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK);
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002056 dev_out = net->loopback_dev;
Pavel Emelyanov1fb94892012-08-08 21:53:36 +00002057 fl4->flowi4_oif = LOOPBACK_IFINDEX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002058 res.type = RTN_LOCAL;
2059 flags |= RTCF_LOCAL;
2060 goto make_route;
2061 }
2062
David S. Miller813b3b52011-04-28 14:48:42 -07002063 if (fib_lookup(net, fl4, &res)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002064 res.fi = NULL;
David S. Miller8b96d222012-06-11 02:01:56 -07002065 res.table = NULL;
David S. Miller813b3b52011-04-28 14:48:42 -07002066 if (fl4->flowi4_oif) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002067 /* Apparently, routing tables are wrong. Assume,
2068 that the destination is on link.
2069
2070 WHY? DW.
2071 Because we are allowed to send to iface
2072 even if it has NO routes and NO assigned
2073 addresses. When oif is specified, routing
2074 tables are looked up with only one purpose:
2075 to catch if destination is gatewayed, rather than
2076 direct. Moreover, if MSG_DONTROUTE is set,
2077 we send packet, ignoring both routing tables
2078 and ifaddr state. --ANK
2079
2080
2081 We could make it even if oif is unknown,
2082 likely IPv6, but we do not.
2083 */
2084
David S. Miller813b3b52011-04-28 14:48:42 -07002085 if (fl4->saddr == 0)
2086 fl4->saddr = inet_select_addr(dev_out, 0,
2087 RT_SCOPE_LINK);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002088 res.type = RTN_UNICAST;
2089 goto make_route;
2090 }
David S. Millerb23dd4f2011-03-02 14:31:35 -08002091 rth = ERR_PTR(-ENETUNREACH);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002092 goto out;
2093 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094
2095 if (res.type == RTN_LOCAL) {
David S. Miller813b3b52011-04-28 14:48:42 -07002096 if (!fl4->saddr) {
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002097 if (res.fi->fib_prefsrc)
David S. Miller813b3b52011-04-28 14:48:42 -07002098 fl4->saddr = res.fi->fib_prefsrc;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002099 else
David S. Miller813b3b52011-04-28 14:48:42 -07002100 fl4->saddr = fl4->daddr;
Joel Sing9fc3bbb2011-01-03 20:24:20 +00002101 }
Denis V. Lunevb40afd02008-01-22 22:06:19 -08002102 dev_out = net->loopback_dev;
David S. Miller813b3b52011-04-28 14:48:42 -07002103 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 flags |= RTCF_LOCAL;
2105 goto make_route;
2106 }
2107
2108#ifdef CONFIG_IP_ROUTE_MULTIPATH
David S. Miller813b3b52011-04-28 14:48:42 -07002109 if (res.fi->fib_nhs > 1 && fl4->flowi4_oif == 0)
David S. Miller1b7fe5932011-03-10 17:01:16 -08002110 fib_select_multipath(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111 else
2112#endif
David S. Miller21d8c492011-04-14 14:49:37 -07002113 if (!res.prefixlen &&
2114 res.table->tb_num_default > 1 &&
David S. Miller813b3b52011-04-28 14:48:42 -07002115 res.type == RTN_UNICAST && !fl4->flowi4_oif)
David S. Miller0c838ff2011-01-31 16:16:50 -08002116 fib_select_default(&res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117
David S. Miller813b3b52011-04-28 14:48:42 -07002118 if (!fl4->saddr)
2119 fl4->saddr = FIB_RES_PREFSRC(net, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002120
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121 dev_out = FIB_RES_DEV(res);
David S. Miller813b3b52011-04-28 14:48:42 -07002122 fl4->flowi4_oif = dev_out->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002123
2124
2125make_route:
David Miller1a00fee2012-07-01 02:02:56 +00002126 rth = __mkroute_output(&res, fl4, orig_oif, dev_out, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127
David S. Miller010c2702011-02-17 15:37:09 -08002128out:
2129 rcu_read_unlock();
David S. Millerb23dd4f2011-03-02 14:31:35 -08002130 return rth;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002132EXPORT_SYMBOL_GPL(__ip_route_output_key);
2133
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002134static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
2135{
2136 return NULL;
2137}
2138
Steffen Klassertebb762f2011-11-23 02:12:51 +00002139static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
Roland Dreierec831ea2011-01-31 13:16:00 -08002140{
Steffen Klassert618f9bc2011-11-23 02:13:31 +00002141 unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2142
2143 return mtu ? : dst->dev->mtu;
Roland Dreierec831ea2011-01-31 13:16:00 -08002144}
2145
David S. Miller6700c272012-07-17 03:29:28 -07002146static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
2147 struct sk_buff *skb, u32 mtu)
David S. Miller14e50e52007-05-24 18:17:54 -07002148{
2149}
2150
David S. Miller6700c272012-07-17 03:29:28 -07002151static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
2152 struct sk_buff *skb)
David S. Millerb587ee32012-07-12 00:39:24 -07002153{
2154}
2155
Held Bernhard0972ddb2011-04-24 22:07:32 +00002156static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
2157 unsigned long old)
2158{
2159 return NULL;
2160}
2161
David S. Miller14e50e52007-05-24 18:17:54 -07002162static struct dst_ops ipv4_dst_blackhole_ops = {
2163 .family = AF_INET,
Harvey Harrison09640e62009-02-01 00:45:17 -08002164 .protocol = cpu_to_be16(ETH_P_IP),
Jianzhao Wangae2688d2010-09-08 14:35:43 -07002165 .check = ipv4_blackhole_dst_check,
Steffen Klassertebb762f2011-11-23 02:12:51 +00002166 .mtu = ipv4_blackhole_mtu,
Eric Dumazet214f45c2011-02-18 11:39:01 -08002167 .default_advmss = ipv4_default_advmss,
David S. Miller14e50e52007-05-24 18:17:54 -07002168 .update_pmtu = ipv4_rt_blackhole_update_pmtu,
David S. Millerb587ee32012-07-12 00:39:24 -07002169 .redirect = ipv4_rt_blackhole_redirect,
Held Bernhard0972ddb2011-04-24 22:07:32 +00002170 .cow_metrics = ipv4_rt_blackhole_cow_metrics,
David S. Millerd3aaeb32011-07-18 00:40:17 -07002171 .neigh_lookup = ipv4_neigh_lookup,
David S. Miller14e50e52007-05-24 18:17:54 -07002172};
2173
David S. Miller2774c132011-03-01 14:59:04 -08002174struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
David S. Miller14e50e52007-05-24 18:17:54 -07002175{
David S. Miller2774c132011-03-01 14:59:04 -08002176 struct rtable *ort = (struct rtable *) dst_orig;
David S. Millerf5b0a872012-07-19 12:31:33 -07002177 struct rtable *rt;
David S. Miller14e50e52007-05-24 18:17:54 -07002178
David S. Millerf5b0a872012-07-19 12:31:33 -07002179 rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
David S. Miller14e50e52007-05-24 18:17:54 -07002180 if (rt) {
Changli Gaod8d1f302010-06-10 23:31:35 -07002181 struct dst_entry *new = &rt->dst;
David S. Miller14e50e52007-05-24 18:17:54 -07002182
David S. Miller14e50e52007-05-24 18:17:54 -07002183 new->__use = 1;
Herbert Xu352e5122007-11-13 21:34:06 -08002184 new->input = dst_discard;
2185 new->output = dst_discard;
David S. Miller14e50e52007-05-24 18:17:54 -07002186
Changli Gaod8d1f302010-06-10 23:31:35 -07002187 new->dev = ort->dst.dev;
David S. Miller14e50e52007-05-24 18:17:54 -07002188 if (new->dev)
2189 dev_hold(new->dev);
2190
David S. Miller9917e1e82012-07-17 14:44:26 -07002191 rt->rt_is_input = ort->rt_is_input;
David S. Miller5e2b61f2011-03-04 21:47:09 -08002192 rt->rt_iif = ort->rt_iif;
David S. Miller59436342012-07-10 06:58:42 -07002193 rt->rt_pmtu = ort->rt_pmtu;
David S. Miller14e50e52007-05-24 18:17:54 -07002194
Denis V. Luneve84f84f2008-07-05 19:04:32 -07002195 rt->rt_genid = rt_genid(net);
David S. Miller14e50e52007-05-24 18:17:54 -07002196 rt->rt_flags = ort->rt_flags;
2197 rt->rt_type = ort->rt_type;
David S. Miller14e50e52007-05-24 18:17:54 -07002198 rt->rt_gateway = ort->rt_gateway;
Julian Anastasov155e8332012-10-08 11:41:18 +00002199 rt->rt_uses_gateway = ort->rt_uses_gateway;
David S. Miller14e50e52007-05-24 18:17:54 -07002200
David S. Millercaacf052012-07-31 15:06:50 -07002201 INIT_LIST_HEAD(&rt->rt_uncached);
2202
David S. Miller14e50e52007-05-24 18:17:54 -07002203 dst_free(new);
2204 }
2205
David S. Miller2774c132011-03-01 14:59:04 -08002206 dst_release(dst_orig);
2207
2208 return rt ? &rt->dst : ERR_PTR(-ENOMEM);
David S. Miller14e50e52007-05-24 18:17:54 -07002209}
2210
David S. Miller9d6ec932011-03-12 01:12:47 -05002211struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4,
David S. Millerb23dd4f2011-03-02 14:31:35 -08002212 struct sock *sk)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213{
David S. Miller9d6ec932011-03-12 01:12:47 -05002214 struct rtable *rt = __ip_route_output_key(net, flp4);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002215
David S. Millerb23dd4f2011-03-02 14:31:35 -08002216 if (IS_ERR(rt))
2217 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002218
David S. Miller56157872011-05-02 14:37:45 -07002219 if (flp4->flowi4_proto)
David S. Miller9d6ec932011-03-12 01:12:47 -05002220 rt = (struct rtable *) xfrm_lookup(net, &rt->dst,
2221 flowi4_to_flowi(flp4),
2222 sk, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002223
David S. Millerb23dd4f2011-03-02 14:31:35 -08002224 return rt;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225}
Arnaldo Carvalho de Melod8c97a92005-08-09 20:12:12 -07002226EXPORT_SYMBOL_GPL(ip_route_output_flow);
2227
David S. Millerf1ce3062012-07-12 10:10:17 -07002228static int rt_fill_info(struct net *net, __be32 dst, __be32 src,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002229 struct flowi4 *fl4, struct sk_buff *skb, u32 portid,
David S. Millerf1ce3062012-07-12 10:10:17 -07002230 u32 seq, int event, int nowait, unsigned int flags)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002231{
Eric Dumazet511c3f92009-06-02 05:14:27 +00002232 struct rtable *rt = skb_rtable(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002233 struct rtmsg *r;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002234 struct nlmsghdr *nlh;
Steffen Klassert2bc8ca42011-10-11 01:12:02 +00002235 unsigned long expires = 0;
David S. Millerf1850712012-07-10 07:26:01 -07002236 u32 error;
Julian Anastasov521f5492012-07-20 12:02:08 +03002237 u32 metrics[RTAX_MAX];
Thomas Grafbe403ea2006-08-17 18:15:17 -07002238
Eric W. Biederman15e47302012-09-07 20:12:54 +00002239 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags);
Thomas Grafbe403ea2006-08-17 18:15:17 -07002240 if (nlh == NULL)
Patrick McHardy26932562007-01-31 23:16:40 -08002241 return -EMSGSIZE;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002242
2243 r = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244 r->rtm_family = AF_INET;
2245 r->rtm_dst_len = 32;
2246 r->rtm_src_len = 0;
David Millerd6c0a4f2012-07-01 02:02:59 +00002247 r->rtm_tos = fl4->flowi4_tos;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002248 r->rtm_table = RT_TABLE_MAIN;
David S. Millerf3756b72012-04-01 20:39:02 -04002249 if (nla_put_u32(skb, RTA_TABLE, RT_TABLE_MAIN))
2250 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 r->rtm_type = rt->rt_type;
2252 r->rtm_scope = RT_SCOPE_UNIVERSE;
2253 r->rtm_protocol = RTPROT_UNSPEC;
2254 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED;
2255 if (rt->rt_flags & RTCF_NOTIFY)
2256 r->rtm_flags |= RTM_F_NOTIFY;
Hannes Frederic Sowa8c6dafe2015-01-23 12:01:26 +01002257 if (IPCB(skb)->flags & IPSKB_DOREDIRECT)
2258 r->rtm_flags |= RTCF_DOREDIRECT;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002259
David S. Millerf1ce3062012-07-12 10:10:17 -07002260 if (nla_put_be32(skb, RTA_DST, dst))
David S. Millerf3756b72012-04-01 20:39:02 -04002261 goto nla_put_failure;
David Miller1a00fee2012-07-01 02:02:56 +00002262 if (src) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 r->rtm_src_len = 32;
David Miller1a00fee2012-07-01 02:02:56 +00002264 if (nla_put_be32(skb, RTA_SRC, src))
David S. Millerf3756b72012-04-01 20:39:02 -04002265 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002266 }
David S. Millerf3756b72012-04-01 20:39:02 -04002267 if (rt->dst.dev &&
2268 nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex))
2269 goto nla_put_failure;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002270#ifdef CONFIG_IP_ROUTE_CLASSID
David S. Millerf3756b72012-04-01 20:39:02 -04002271 if (rt->dst.tclassid &&
2272 nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
2273 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002274#endif
David S. Miller41347dc2012-06-28 04:05:27 -07002275 if (!rt_is_input_route(rt) &&
David Millerd6c0a4f2012-07-01 02:02:59 +00002276 fl4->saddr != src) {
2277 if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr))
David S. Millerf3756b72012-04-01 20:39:02 -04002278 goto nla_put_failure;
2279 }
Julian Anastasov155e8332012-10-08 11:41:18 +00002280 if (rt->rt_uses_gateway &&
David S. Millerf3756b72012-04-01 20:39:02 -04002281 nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway))
2282 goto nla_put_failure;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002283
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002284 expires = rt->dst.expires;
2285 if (expires) {
2286 unsigned long now = jiffies;
2287
2288 if (time_before(now, expires))
2289 expires -= now;
2290 else
2291 expires = 0;
2292 }
2293
Julian Anastasov521f5492012-07-20 12:02:08 +03002294 memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics));
Steffen Klassertee9a8f72012-10-08 00:56:54 +00002295 if (rt->rt_pmtu && expires)
Julian Anastasov521f5492012-07-20 12:02:08 +03002296 metrics[RTAX_MTU - 1] = rt->rt_pmtu;
2297 if (rtnetlink_put_metrics(skb, metrics) < 0)
Thomas Grafbe403ea2006-08-17 18:15:17 -07002298 goto nla_put_failure;
2299
David Millerb4869882012-07-01 02:03:01 +00002300 if (fl4->flowi4_mark &&
stephen hemminger68aaed52012-10-10 08:27:25 +00002301 nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark))
David S. Millerf3756b72012-04-01 20:39:02 -04002302 goto nla_put_failure;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002303
Changli Gaod8d1f302010-06-10 23:31:35 -07002304 error = rt->dst.error;
Thomas Grafbe403ea2006-08-17 18:15:17 -07002305
David S. Millerc7537962010-11-11 17:07:48 -08002306 if (rt_is_input_route(rt)) {
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002307#ifdef CONFIG_IP_MROUTE
2308 if (ipv4_is_multicast(dst) && !ipv4_is_local_multicast(dst) &&
2309 IPV4_DEVCONF_ALL(net, MC_FORWARDING)) {
2310 int err = ipmr_get_route(net, skb,
2311 fl4->saddr, fl4->daddr,
2312 r, nowait);
2313 if (err <= 0) {
2314 if (!nowait) {
2315 if (err == 0)
2316 return 0;
2317 goto nla_put_failure;
2318 } else {
2319 if (err == -EMSGSIZE)
2320 goto nla_put_failure;
2321 error = err;
2322 }
2323 }
2324 } else
2325#endif
Julian Anastasovda1bba12014-04-13 18:08:02 +03002326 if (nla_put_u32(skb, RTA_IIF, skb->dev->ifindex))
Nicolas Dichtel8caaf7b2012-12-04 01:03:07 +00002327 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002328 }
2329
David S. Millerf1850712012-07-10 07:26:01 -07002330 if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
Thomas Grafe3703b32006-11-27 09:27:07 -08002331 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332
Thomas Grafbe403ea2006-08-17 18:15:17 -07002333 return nlmsg_end(skb, nlh);
2334
2335nla_put_failure:
Patrick McHardy26932562007-01-31 23:16:40 -08002336 nlmsg_cancel(skb, nlh);
2337 return -EMSGSIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338}
2339
Thomas Graf661d2962013-03-21 07:45:29 +00002340static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09002342 struct net *net = sock_net(in_skb->sk);
Thomas Grafd889ce32006-08-17 18:15:44 -07002343 struct rtmsg *rtm;
2344 struct nlattr *tb[RTA_MAX+1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 struct rtable *rt = NULL;
David Millerd6c0a4f2012-07-01 02:02:59 +00002346 struct flowi4 fl4;
Al Viro9e12bb22006-09-26 21:25:20 -07002347 __be32 dst = 0;
2348 __be32 src = 0;
2349 u32 iif;
Thomas Grafd889ce32006-08-17 18:15:44 -07002350 int err;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002351 int mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 struct sk_buff *skb;
2353
Thomas Grafd889ce32006-08-17 18:15:44 -07002354 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy);
2355 if (err < 0)
2356 goto errout;
2357
2358 rtm = nlmsg_data(nlh);
2359
Linus Torvalds1da177e2005-04-16 15:20:36 -07002360 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
Thomas Grafd889ce32006-08-17 18:15:44 -07002361 if (skb == NULL) {
2362 err = -ENOBUFS;
2363 goto errout;
2364 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365
2366 /* Reserve room for dummy headers, this skb can pass
2367 through good chunk of routing engine.
2368 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07002369 skb_reset_mac_header(skb);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002370 skb_reset_network_header(skb);
Stephen Hemmingerd2c962b2006-04-17 17:27:11 -07002371
2372 /* Bugfix: need to give ip_route_input enough of an IP header to not gag. */
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -07002373 ip_hdr(skb)->protocol = IPPROTO_ICMP;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002374 skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr));
2375
Al Viro17fb2c62006-09-26 22:15:25 -07002376 src = tb[RTA_SRC] ? nla_get_be32(tb[RTA_SRC]) : 0;
2377 dst = tb[RTA_DST] ? nla_get_be32(tb[RTA_DST]) : 0;
Thomas Grafd889ce32006-08-17 18:15:44 -07002378 iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002379 mark = tb[RTA_MARK] ? nla_get_u32(tb[RTA_MARK]) : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002380
David Millerd6c0a4f2012-07-01 02:02:59 +00002381 memset(&fl4, 0, sizeof(fl4));
2382 fl4.daddr = dst;
2383 fl4.saddr = src;
2384 fl4.flowi4_tos = rtm->rtm_tos;
2385 fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0;
2386 fl4.flowi4_mark = mark;
2387
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388 if (iif) {
Thomas Grafd889ce32006-08-17 18:15:44 -07002389 struct net_device *dev;
2390
Denis V. Lunev19375042008-02-28 20:52:04 -08002391 dev = __dev_get_by_index(net, iif);
Thomas Grafd889ce32006-08-17 18:15:44 -07002392 if (dev == NULL) {
2393 err = -ENODEV;
2394 goto errout_free;
2395 }
2396
Linus Torvalds1da177e2005-04-16 15:20:36 -07002397 skb->protocol = htons(ETH_P_IP);
2398 skb->dev = dev;
Eric Dumazet963bfee2010-07-20 22:03:14 +00002399 skb->mark = mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002400 local_bh_disable();
2401 err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev);
2402 local_bh_enable();
Thomas Grafd889ce32006-08-17 18:15:44 -07002403
Eric Dumazet511c3f92009-06-02 05:14:27 +00002404 rt = skb_rtable(skb);
Changli Gaod8d1f302010-06-10 23:31:35 -07002405 if (err == 0 && rt->dst.error)
2406 err = -rt->dst.error;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002407 } else {
David S. Miller9d6ec932011-03-12 01:12:47 -05002408 rt = ip_route_output_key(net, &fl4);
David S. Millerb23dd4f2011-03-02 14:31:35 -08002409
2410 err = 0;
2411 if (IS_ERR(rt))
2412 err = PTR_ERR(rt);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002413 }
Thomas Grafd889ce32006-08-17 18:15:44 -07002414
Linus Torvalds1da177e2005-04-16 15:20:36 -07002415 if (err)
Thomas Grafd889ce32006-08-17 18:15:44 -07002416 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002417
Changli Gaod8d1f302010-06-10 23:31:35 -07002418 skb_dst_set(skb, &rt->dst);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002419 if (rtm->rtm_flags & RTM_F_NOTIFY)
2420 rt->rt_flags |= RTCF_NOTIFY;
2421
David S. Millerf1ce3062012-07-12 10:10:17 -07002422 err = rt_fill_info(net, dst, src, &fl4, skb,
Eric W. Biederman15e47302012-09-07 20:12:54 +00002423 NETLINK_CB(in_skb).portid, nlh->nlmsg_seq,
Denis V. Lunev19375042008-02-28 20:52:04 -08002424 RTM_NEWROUTE, 0, 0);
Thomas Grafd889ce32006-08-17 18:15:44 -07002425 if (err <= 0)
2426 goto errout_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002427
Eric W. Biederman15e47302012-09-07 20:12:54 +00002428 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid);
Thomas Grafd889ce32006-08-17 18:15:44 -07002429errout:
Thomas Graf2942e902006-08-15 00:30:25 -07002430 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431
Thomas Grafd889ce32006-08-17 18:15:44 -07002432errout_free:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002433 kfree_skb(skb);
Thomas Grafd889ce32006-08-17 18:15:44 -07002434 goto errout;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435}
2436
2437int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb)
2438{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002439 return skb->len;
2440}
2441
2442void ip_rt_multicast_event(struct in_device *in_dev)
2443{
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002444 rt_cache_flush(dev_net(in_dev->dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002445}
2446
2447#ifdef CONFIG_SYSCTL
Gao feng082c7ca2013-02-19 00:43:12 +00002448static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT;
2449static int ip_rt_gc_interval __read_mostly = 60 * HZ;
2450static int ip_rt_gc_min_interval __read_mostly = HZ / 2;
2451static int ip_rt_gc_elasticity __read_mostly = 8;
2452
Denis V. Lunev81c684d2008-07-08 03:05:28 -07002453static int ipv4_sysctl_rtcache_flush(ctl_table *__ctl, int write,
Alexey Dobriyan8d65af72009-09-23 15:57:19 -07002454 void __user *buffer,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002455 size_t *lenp, loff_t *ppos)
2456{
2457 if (write) {
Nicolas Dichtel4ccfe6d2012-09-07 00:45:29 +00002458 rt_cache_flush((struct net *)__ctl->extra1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002459 return 0;
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002460 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002461
2462 return -EINVAL;
2463}
2464
Al Viroeeb61f72008-07-27 08:59:33 +01002465static ctl_table ipv4_route_table[] = {
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002466 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002467 .procname = "gc_thresh",
2468 .data = &ipv4_dst_ops.gc_thresh,
2469 .maxlen = sizeof(int),
2470 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002471 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002472 },
2473 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 .procname = "max_size",
2475 .data = &ip_rt_max_size,
2476 .maxlen = sizeof(int),
2477 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002478 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002479 },
2480 {
2481 /* Deprecated. Use gc_min_interval_ms */
YOSHIFUJI Hideakie905a9e2007-02-09 23:24:47 +09002482
Linus Torvalds1da177e2005-04-16 15:20:36 -07002483 .procname = "gc_min_interval",
2484 .data = &ip_rt_gc_min_interval,
2485 .maxlen = sizeof(int),
2486 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002487 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002488 },
2489 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002490 .procname = "gc_min_interval_ms",
2491 .data = &ip_rt_gc_min_interval,
2492 .maxlen = sizeof(int),
2493 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002494 .proc_handler = proc_dointvec_ms_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 },
2496 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002497 .procname = "gc_timeout",
2498 .data = &ip_rt_gc_timeout,
2499 .maxlen = sizeof(int),
2500 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002501 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502 },
2503 {
Eric Dumazet9f28a2f2011-12-21 15:47:16 -05002504 .procname = "gc_interval",
2505 .data = &ip_rt_gc_interval,
2506 .maxlen = sizeof(int),
2507 .mode = 0644,
2508 .proc_handler = proc_dointvec_jiffies,
2509 },
2510 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002511 .procname = "redirect_load",
2512 .data = &ip_rt_redirect_load,
2513 .maxlen = sizeof(int),
2514 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002515 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002516 },
2517 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002518 .procname = "redirect_number",
2519 .data = &ip_rt_redirect_number,
2520 .maxlen = sizeof(int),
2521 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002522 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002523 },
2524 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002525 .procname = "redirect_silence",
2526 .data = &ip_rt_redirect_silence,
2527 .maxlen = sizeof(int),
2528 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002529 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002530 },
2531 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002532 .procname = "error_cost",
2533 .data = &ip_rt_error_cost,
2534 .maxlen = sizeof(int),
2535 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002536 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002537 },
2538 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539 .procname = "error_burst",
2540 .data = &ip_rt_error_burst,
2541 .maxlen = sizeof(int),
2542 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002543 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002544 },
2545 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002546 .procname = "gc_elasticity",
2547 .data = &ip_rt_gc_elasticity,
2548 .maxlen = sizeof(int),
2549 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002550 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002551 },
2552 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 .procname = "mtu_expires",
2554 .data = &ip_rt_mtu_expires,
2555 .maxlen = sizeof(int),
2556 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002557 .proc_handler = proc_dointvec_jiffies,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002558 },
2559 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002560 .procname = "min_pmtu",
2561 .data = &ip_rt_min_pmtu,
2562 .maxlen = sizeof(int),
2563 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002564 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002565 },
2566 {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002567 .procname = "min_adv_mss",
2568 .data = &ip_rt_min_advmss,
2569 .maxlen = sizeof(int),
2570 .mode = 0644,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002571 .proc_handler = proc_dointvec,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002573 { }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002574};
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002575
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002576static struct ctl_table ipv4_route_flush_table[] = {
2577 {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002578 .procname = "flush",
2579 .maxlen = sizeof(int),
2580 .mode = 0200,
Alexey Dobriyan6d9f2392008-11-03 18:21:05 -08002581 .proc_handler = ipv4_sysctl_rtcache_flush,
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002582 },
Eric W. Biedermanf8572d82009-11-05 13:32:03 -08002583 { },
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002584};
2585
2586static __net_init int sysctl_route_net_init(struct net *net)
2587{
2588 struct ctl_table *tbl;
2589
2590 tbl = ipv4_route_flush_table;
Octavian Purdila09ad9bc2009-11-25 15:14:13 -08002591 if (!net_eq(net, &init_net)) {
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002592 tbl = kmemdup(tbl, sizeof(ipv4_route_flush_table), GFP_KERNEL);
2593 if (tbl == NULL)
2594 goto err_dup;
Eric W. Biederman464dc802012-11-16 03:02:59 +00002595
2596 /* Don't export sysctls to unprivileged users */
2597 if (net->user_ns != &init_user_ns)
2598 tbl[0].procname = NULL;
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002599 }
2600 tbl[0].extra1 = net;
2601
Eric W. Biedermanec8f23c2012-04-19 13:44:49 +00002602 net->ipv4.route_hdr = register_net_sysctl(net, "net/ipv4/route", tbl);
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002603 if (net->ipv4.route_hdr == NULL)
2604 goto err_reg;
2605 return 0;
2606
2607err_reg:
2608 if (tbl != ipv4_route_flush_table)
2609 kfree(tbl);
2610err_dup:
2611 return -ENOMEM;
2612}
2613
2614static __net_exit void sysctl_route_net_exit(struct net *net)
2615{
2616 struct ctl_table *tbl;
2617
2618 tbl = net->ipv4.route_hdr->ctl_table_arg;
2619 unregister_net_sysctl_table(net->ipv4.route_hdr);
2620 BUG_ON(tbl == ipv4_route_flush_table);
2621 kfree(tbl);
2622}
2623
2624static __net_initdata struct pernet_operations sysctl_route_ops = {
2625 .init = sysctl_route_net_init,
2626 .exit = sysctl_route_net_exit,
2627};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002628#endif
2629
Neil Horman3ee94372010-05-08 01:57:52 -07002630static __net_init int rt_genid_init(struct net *net)
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002631{
Nicolas Dichtelb42664f2012-09-10 22:09:44 +00002632 atomic_set(&net->rt_genid, 0);
David S. Miller436c3b62011-03-24 17:42:21 -07002633 get_random_bytes(&net->ipv4.dev_addr_genid,
2634 sizeof(net->ipv4.dev_addr_genid));
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002635 return 0;
2636}
2637
Neil Horman3ee94372010-05-08 01:57:52 -07002638static __net_initdata struct pernet_operations rt_genid_ops = {
2639 .init = rt_genid_init,
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002640};
2641
David S. Millerc3426b42012-06-09 16:27:05 -07002642static int __net_init ipv4_inetpeer_init(struct net *net)
2643{
2644 struct inet_peer_base *bp = kmalloc(sizeof(*bp), GFP_KERNEL);
2645
2646 if (!bp)
2647 return -ENOMEM;
2648 inet_peer_base_init(bp);
2649 net->ipv4.peers = bp;
2650 return 0;
2651}
2652
2653static void __net_exit ipv4_inetpeer_exit(struct net *net)
2654{
2655 struct inet_peer_base *bp = net->ipv4.peers;
2656
2657 net->ipv4.peers = NULL;
David S. Miller56a6b242012-06-09 16:32:41 -07002658 inetpeer_invalidate_tree(bp);
David S. Millerc3426b42012-06-09 16:27:05 -07002659 kfree(bp);
2660}
2661
2662static __net_initdata struct pernet_operations ipv4_inetpeer_ops = {
2663 .init = ipv4_inetpeer_init,
2664 .exit = ipv4_inetpeer_exit,
2665};
Denis V. Lunev9f5e97e2008-07-05 19:02:59 -07002666
Patrick McHardyc7066f72011-01-14 13:36:42 +01002667#ifdef CONFIG_IP_ROUTE_CLASSID
Tejun Heo7d720c32010-02-16 15:20:26 +00002668struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
Patrick McHardyc7066f72011-01-14 13:36:42 +01002669#endif /* CONFIG_IP_ROUTE_CLASSID */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002670
Linus Torvalds1da177e2005-04-16 15:20:36 -07002671int __init ip_rt_init(void)
2672{
Eric Dumazet424c4b72005-07-05 14:58:19 -07002673 int rc = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674
Eric Dumazetff1f69a82014-06-02 05:26:03 -07002675 ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
2676 if (!ip_idents)
2677 panic("IP: failed to allocate ip_idents\n");
2678
2679 prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
2680
Patrick McHardyc7066f72011-01-14 13:36:42 +01002681#ifdef CONFIG_IP_ROUTE_CLASSID
Ingo Molnar0dcec8c2009-02-25 14:07:33 +01002682 ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002683 if (!ip_rt_acct)
2684 panic("IP: failed to allocate ip_rt_acct\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002685#endif
2686
Alexey Dobriyane5d679f332006-08-26 19:25:52 -07002687 ipv4_dst_ops.kmem_cachep =
2688 kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0,
Paul Mundt20c2df82007-07-20 10:11:58 +09002689 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002690
David S. Miller14e50e52007-05-24 18:17:54 -07002691 ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep;
2692
Eric Dumazetfc66f952010-10-08 06:37:34 +00002693 if (dst_entries_init(&ipv4_dst_ops) < 0)
2694 panic("IP: failed to allocate ipv4_dst_ops counter\n");
2695
2696 if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0)
2697 panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n");
2698
David S. Miller89aef892012-07-17 11:00:09 -07002699 ipv4_dst_ops.gc_thresh = ~0;
2700 ip_rt_max_size = INT_MAX;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002701
Linus Torvalds1da177e2005-04-16 15:20:36 -07002702 devinet_init();
2703 ip_fib_init();
2704
Denis V. Lunev73b38712008-02-28 20:51:18 -08002705 if (ip_rt_proc_init())
Joe Perches058bd4d2012-03-11 18:36:11 +00002706 pr_err("Unable to create route proc files\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002707#ifdef CONFIG_XFRM
2708 xfrm_init();
Steffen Klassert703fb942012-11-13 08:52:24 +01002709 xfrm4_init();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002710#endif
Greg Rosec7ac8672011-06-10 01:27:09 +00002711 rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL);
Thomas Graf63f34442007-03-22 11:55:17 -07002712
Denis V. Lunev39a23e72008-07-05 19:02:33 -07002713#ifdef CONFIG_SYSCTL
2714 register_pernet_subsys(&sysctl_route_ops);
2715#endif
Neil Horman3ee94372010-05-08 01:57:52 -07002716 register_pernet_subsys(&rt_genid_ops);
David S. Millerc3426b42012-06-09 16:27:05 -07002717 register_pernet_subsys(&ipv4_inetpeer_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002718 return rc;
2719}
2720
Al Viroa1bc6eb2008-07-30 06:32:52 -04002721#ifdef CONFIG_SYSCTL
Al Viroeeb61f72008-07-27 08:59:33 +01002722/*
2723 * We really need to sanitize the damn ipv4 init order, then all
2724 * this nonsense will go away.
2725 */
2726void __init ip_static_sysctl_init(void)
2727{
Eric W. Biederman4e5ca782012-04-19 13:32:39 +00002728 register_net_sysctl(&init_net, "net/ipv4/route", ipv4_route_table);
Al Viroeeb61f72008-07-27 08:59:33 +01002729}
Al Viroa1bc6eb2008-07-30 06:32:52 -04002730#endif