blob: 3923d513305092ab7198f58713786ea0176e2b9f [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
93#include <linux/notifier.h>
94#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020095#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070096#include <net/sock.h>
97#include <linux/rtnetlink.h>
98#include <linux/proc_fs.h>
99#include <linux/seq_file.h>
100#include <linux/stat.h>
101#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700102#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700103#include <net/dst.h>
104#include <net/pkt_sched.h>
105#include <net/checksum.h>
106#include <linux/highmem.h>
107#include <linux/init.h>
108#include <linux/kmod.h>
109#include <linux/module.h>
110#include <linux/kallsyms.h>
111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700122
Linus Torvalds1da177e2005-04-16 15:20:36 -0700123/*
124 * The list of packet types we will receive (as opposed to discard)
125 * and the routines to invoke.
126 *
127 * Why 16. Because with 16 the only overlap we get on a hash of the
128 * low nibble of the protocol value is RARP/SNAP/X.25.
129 *
130 * NOTE: That is no longer true with the addition of VLAN tags. Not
131 * sure which should go first, but I bet it won't make much
132 * difference if we are running VLANs. The good news is that
133 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700134 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135 * --BLG
136 *
137 * 0800 IP
138 * 8100 802.1Q VLAN
139 * 0001 802.3
140 * 0002 AX.25
141 * 0004 802.2
142 * 8035 RARP
143 * 0005 SNAP
144 * 0805 X.25
145 * 0806 ARP
146 * 8137 IPX
147 * 0009 Localtalk
148 * 86DD IPv6
149 */
150
151static DEFINE_SPINLOCK(ptype_lock);
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700152static struct list_head ptype_base[16] __read_mostly; /* 16 way hashed list */
153static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700154
Chris Leechdb217332006-06-17 21:24:58 -0700155#ifdef CONFIG_NET_DMA
Dan Williamsd379b012007-07-09 11:56:42 -0700156struct net_dma {
157 struct dma_client client;
158 spinlock_t lock;
159 cpumask_t channel_mask;
160 struct dma_chan *channels[NR_CPUS];
161};
162
163static enum dma_state_client
164netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
165 enum dma_state state);
166
167static struct net_dma net_dma = {
168 .client = {
169 .event_callback = netdev_dma_event,
170 },
171};
Chris Leechdb217332006-06-17 21:24:58 -0700172#endif
173
Linus Torvalds1da177e2005-04-16 15:20:36 -0700174/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700175 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 * semaphore.
177 *
178 * Pure readers hold dev_base_lock for reading.
179 *
180 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700181 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182 * actual updates. This allows pure readers to access the list even
183 * while a writer is preparing to update it.
184 *
185 * To put it another way, dev_base_lock is held for writing only to
186 * protect against pure readers; the rtnl semaphore provides the
187 * protection against other writers.
188 *
189 * See, for example usages, register_netdevice() and
190 * unregister_netdevice(), which must be called with the rtnl
191 * semaphore held.
192 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700193DEFINE_RWLOCK(dev_base_lock);
194
Linus Torvalds1da177e2005-04-16 15:20:36 -0700195EXPORT_SYMBOL(dev_base_lock);
196
197#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700198#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199
Eric W. Biederman881d9662007-09-17 11:56:21 -0700200static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700201{
202 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700203 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204}
205
Eric W. Biederman881d9662007-09-17 11:56:21 -0700206static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700208 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209}
210
Eric W. Biedermance286d32007-09-12 13:53:49 +0200211/* Device list insertion */
212static int list_netdevice(struct net_device *dev)
213{
214 struct net *net = dev->nd_net;
215
216 ASSERT_RTNL();
217
218 write_lock_bh(&dev_base_lock);
219 list_add_tail(&dev->dev_list, &net->dev_base_head);
220 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
221 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
222 write_unlock_bh(&dev_base_lock);
223 return 0;
224}
225
226/* Device list removal */
227static void unlist_netdevice(struct net_device *dev)
228{
229 ASSERT_RTNL();
230
231 /* Unlink dev from the device chain */
232 write_lock_bh(&dev_base_lock);
233 list_del(&dev->dev_list);
234 hlist_del(&dev->name_hlist);
235 hlist_del(&dev->index_hlist);
236 write_unlock_bh(&dev_base_lock);
237}
238
Linus Torvalds1da177e2005-04-16 15:20:36 -0700239/*
240 * Our notifier list
241 */
242
Alan Sternf07d5b92006-05-09 15:23:03 -0700243static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244
245/*
246 * Device drivers call our routines to queue packets here. We empty the
247 * queue in the local softnet handler.
248 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700249
250DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251
Eric W. Biederman8b41d182007-09-26 22:02:53 -0700252extern int netdev_kobject_init(void);
253extern int netdev_register_kobject(struct net_device *);
254extern void netdev_unregister_kobject(struct net_device *);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700255
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700256#ifdef CONFIG_DEBUG_LOCK_ALLOC
257/*
258 * register_netdevice() inits dev->_xmit_lock and sets lockdep class
259 * according to dev->type
260 */
261static const unsigned short netdev_lock_type[] =
262 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
263 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
264 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
265 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
266 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
267 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
268 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
269 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
270 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
271 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
272 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
273 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
274 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
275 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
276 ARPHRD_NONE};
277
278static const char *netdev_lock_name[] =
279 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
280 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
281 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
282 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
283 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
284 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
285 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
286 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
287 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
288 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
289 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
290 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
291 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
292 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
293 "_xmit_NONE"};
294
295static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
296
297static inline unsigned short netdev_lock_pos(unsigned short dev_type)
298{
299 int i;
300
301 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
302 if (netdev_lock_type[i] == dev_type)
303 return i;
304 /* the last key is used by default */
305 return ARRAY_SIZE(netdev_lock_type) - 1;
306}
307
308static inline void netdev_set_lockdep_class(spinlock_t *lock,
309 unsigned short dev_type)
310{
311 int i;
312
313 i = netdev_lock_pos(dev_type);
314 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
315 netdev_lock_name[i]);
316}
317#else
318static inline void netdev_set_lockdep_class(spinlock_t *lock,
319 unsigned short dev_type)
320{
321}
322#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323
324/*******************************************************************************
325
326 Protocol management and registration routines
327
328*******************************************************************************/
329
330/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700331 * Add a protocol ID to the list. Now that the input handler is
332 * smarter we can dispense with all the messy stuff that used to be
333 * here.
334 *
335 * BEWARE!!! Protocol handlers, mangling input packets,
336 * MUST BE last in hash buckets and checking protocol handlers
337 * MUST start from promiscuous ptype_all chain in net_bh.
338 * It is true now, do not change it.
339 * Explanation follows: if protocol handler, mangling packet, will
340 * be the first on list, it is not able to sense, that packet
341 * is cloned and should be copied-on-write, so that it will
342 * change it and subsequent readers will get broken packet.
343 * --ANK (980803)
344 */
345
346/**
347 * dev_add_pack - add packet handler
348 * @pt: packet type declaration
349 *
350 * Add a protocol handler to the networking stack. The passed &packet_type
351 * is linked into kernel lists and may not be freed until it has been
352 * removed from the kernel lists.
353 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900354 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700355 * guarantee all CPU's that are in middle of receiving packets
356 * will see the new packet type (until the next received packet).
357 */
358
359void dev_add_pack(struct packet_type *pt)
360{
361 int hash;
362
363 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700364 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700366 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 hash = ntohs(pt->type) & 15;
368 list_add_rcu(&pt->list, &ptype_base[hash]);
369 }
370 spin_unlock_bh(&ptype_lock);
371}
372
Linus Torvalds1da177e2005-04-16 15:20:36 -0700373/**
374 * __dev_remove_pack - remove packet handler
375 * @pt: packet type declaration
376 *
377 * Remove a protocol handler that was previously added to the kernel
378 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
379 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900380 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700381 *
382 * The packet type might still be in use by receivers
383 * and must not be freed until after all the CPU's have gone
384 * through a quiescent state.
385 */
386void __dev_remove_pack(struct packet_type *pt)
387{
388 struct list_head *head;
389 struct packet_type *pt1;
390
391 spin_lock_bh(&ptype_lock);
392
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700393 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700395 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 head = &ptype_base[ntohs(pt->type) & 15];
397
398 list_for_each_entry(pt1, head, list) {
399 if (pt == pt1) {
400 list_del_rcu(&pt->list);
401 goto out;
402 }
403 }
404
405 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
406out:
407 spin_unlock_bh(&ptype_lock);
408}
409/**
410 * dev_remove_pack - remove packet handler
411 * @pt: packet type declaration
412 *
413 * Remove a protocol handler that was previously added to the kernel
414 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
415 * from the kernel lists and can be freed or reused once this function
416 * returns.
417 *
418 * This call sleeps to guarantee that no CPU is looking at the packet
419 * type after return.
420 */
421void dev_remove_pack(struct packet_type *pt)
422{
423 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900424
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 synchronize_net();
426}
427
428/******************************************************************************
429
430 Device Boot-time Settings Routines
431
432*******************************************************************************/
433
434/* Boot time configuration table */
435static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
436
437/**
438 * netdev_boot_setup_add - add new setup entry
439 * @name: name of the device
440 * @map: configured settings for the device
441 *
442 * Adds new setup entry to the dev_boot_setup list. The function
443 * returns 0 on error and 1 on success. This is a generic routine to
444 * all netdevices.
445 */
446static int netdev_boot_setup_add(char *name, struct ifmap *map)
447{
448 struct netdev_boot_setup *s;
449 int i;
450
451 s = dev_boot_setup;
452 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
453 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
454 memset(s[i].name, 0, sizeof(s[i].name));
455 strcpy(s[i].name, name);
456 memcpy(&s[i].map, map, sizeof(s[i].map));
457 break;
458 }
459 }
460
461 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
462}
463
464/**
465 * netdev_boot_setup_check - check boot time settings
466 * @dev: the netdevice
467 *
468 * Check boot time settings for the device.
469 * The found settings are set for the device to be used
470 * later in the device probing.
471 * Returns 0 if no settings found, 1 if they are.
472 */
473int netdev_boot_setup_check(struct net_device *dev)
474{
475 struct netdev_boot_setup *s = dev_boot_setup;
476 int i;
477
478 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
479 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
480 !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
481 dev->irq = s[i].map.irq;
482 dev->base_addr = s[i].map.base_addr;
483 dev->mem_start = s[i].map.mem_start;
484 dev->mem_end = s[i].map.mem_end;
485 return 1;
486 }
487 }
488 return 0;
489}
490
491
492/**
493 * netdev_boot_base - get address from boot time settings
494 * @prefix: prefix for network device
495 * @unit: id for network device
496 *
497 * Check boot time settings for the base address of device.
498 * The found settings are set for the device to be used
499 * later in the device probing.
500 * Returns 0 if no settings found.
501 */
502unsigned long netdev_boot_base(const char *prefix, int unit)
503{
504 const struct netdev_boot_setup *s = dev_boot_setup;
505 char name[IFNAMSIZ];
506 int i;
507
508 sprintf(name, "%s%d", prefix, unit);
509
510 /*
511 * If device already registered then return base of 1
512 * to indicate not to probe for this interface
513 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700514 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 return 1;
516
517 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
518 if (!strcmp(name, s[i].name))
519 return s[i].map.base_addr;
520 return 0;
521}
522
523/*
524 * Saves at boot time configured settings for any netdevice.
525 */
526int __init netdev_boot_setup(char *str)
527{
528 int ints[5];
529 struct ifmap map;
530
531 str = get_options(str, ARRAY_SIZE(ints), ints);
532 if (!str || !*str)
533 return 0;
534
535 /* Save settings */
536 memset(&map, 0, sizeof(map));
537 if (ints[0] > 0)
538 map.irq = ints[1];
539 if (ints[0] > 1)
540 map.base_addr = ints[2];
541 if (ints[0] > 2)
542 map.mem_start = ints[3];
543 if (ints[0] > 3)
544 map.mem_end = ints[4];
545
546 /* Add new entry to the list */
547 return netdev_boot_setup_add(str, &map);
548}
549
550__setup("netdev=", netdev_boot_setup);
551
552/*******************************************************************************
553
554 Device Interface Subroutines
555
556*******************************************************************************/
557
558/**
559 * __dev_get_by_name - find a device by its name
560 * @name: name to find
561 *
562 * Find an interface by name. Must be called under RTNL semaphore
563 * or @dev_base_lock. If the name is found a pointer to the device
564 * is returned. If the name is not found then %NULL is returned. The
565 * reference counters are not incremented so the caller must be
566 * careful with locks.
567 */
568
Eric W. Biederman881d9662007-09-17 11:56:21 -0700569struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700570{
571 struct hlist_node *p;
572
Eric W. Biederman881d9662007-09-17 11:56:21 -0700573 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700574 struct net_device *dev
575 = hlist_entry(p, struct net_device, name_hlist);
576 if (!strncmp(dev->name, name, IFNAMSIZ))
577 return dev;
578 }
579 return NULL;
580}
581
582/**
583 * dev_get_by_name - find a device by its name
584 * @name: name to find
585 *
586 * Find an interface by name. This can be called from any
587 * context and does its own locking. The returned handle has
588 * the usage count incremented and the caller must use dev_put() to
589 * release it when it is no longer needed. %NULL is returned if no
590 * matching device is found.
591 */
592
Eric W. Biederman881d9662007-09-17 11:56:21 -0700593struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700594{
595 struct net_device *dev;
596
597 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700598 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700599 if (dev)
600 dev_hold(dev);
601 read_unlock(&dev_base_lock);
602 return dev;
603}
604
605/**
606 * __dev_get_by_index - find a device by its ifindex
607 * @ifindex: index of device
608 *
609 * Search for an interface by index. Returns %NULL if the device
610 * is not found or a pointer to the device. The device has not
611 * had its reference counter increased so the caller must be careful
612 * about locking. The caller must hold either the RTNL semaphore
613 * or @dev_base_lock.
614 */
615
Eric W. Biederman881d9662007-09-17 11:56:21 -0700616struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700617{
618 struct hlist_node *p;
619
Eric W. Biederman881d9662007-09-17 11:56:21 -0700620 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621 struct net_device *dev
622 = hlist_entry(p, struct net_device, index_hlist);
623 if (dev->ifindex == ifindex)
624 return dev;
625 }
626 return NULL;
627}
628
629
630/**
631 * dev_get_by_index - find a device by its ifindex
632 * @ifindex: index of device
633 *
634 * Search for an interface by index. Returns NULL if the device
635 * is not found or a pointer to the device. The device returned has
636 * had a reference added and the pointer is safe until the user calls
637 * dev_put to indicate they have finished with it.
638 */
639
Eric W. Biederman881d9662007-09-17 11:56:21 -0700640struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641{
642 struct net_device *dev;
643
644 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700645 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 if (dev)
647 dev_hold(dev);
648 read_unlock(&dev_base_lock);
649 return dev;
650}
651
652/**
653 * dev_getbyhwaddr - find a device by its hardware address
654 * @type: media type of device
655 * @ha: hardware address
656 *
657 * Search for an interface by MAC address. Returns NULL if the device
658 * is not found or a pointer to the device. The caller must hold the
659 * rtnl semaphore. The returned device has not had its ref count increased
660 * and the caller must therefore be careful about locking
661 *
662 * BUGS:
663 * If the API was consistent this would be __dev_get_by_hwaddr
664 */
665
Eric W. Biederman881d9662007-09-17 11:56:21 -0700666struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667{
668 struct net_device *dev;
669
670 ASSERT_RTNL();
671
Eric W. Biederman881d9662007-09-17 11:56:21 -0700672 for_each_netdev(&init_net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 if (dev->type == type &&
674 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700675 return dev;
676
677 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700678}
679
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300680EXPORT_SYMBOL(dev_getbyhwaddr);
681
Eric W. Biederman881d9662007-09-17 11:56:21 -0700682struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700683{
684 struct net_device *dev;
685
686 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700687 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700688 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700689 return dev;
690
691 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700692}
693
694EXPORT_SYMBOL(__dev_getfirstbyhwtype);
695
Eric W. Biederman881d9662007-09-17 11:56:21 -0700696struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700697{
698 struct net_device *dev;
699
700 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700701 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700702 if (dev)
703 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 rtnl_unlock();
705 return dev;
706}
707
708EXPORT_SYMBOL(dev_getfirstbyhwtype);
709
710/**
711 * dev_get_by_flags - find any device with given flags
712 * @if_flags: IFF_* values
713 * @mask: bitmask of bits in if_flags to check
714 *
715 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900716 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 * had a reference added and the pointer is safe until the user calls
718 * dev_put to indicate they have finished with it.
719 */
720
Eric W. Biederman881d9662007-09-17 11:56:21 -0700721struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700722{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700723 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724
Pavel Emelianov7562f872007-05-03 15:13:45 -0700725 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700727 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700728 if (((dev->flags ^ if_flags) & mask) == 0) {
729 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700730 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 break;
732 }
733 }
734 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700735 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736}
737
738/**
739 * dev_valid_name - check if name is okay for network device
740 * @name: name string
741 *
742 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700743 * to allow sysfs to work. We also disallow any kind of
744 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800746int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700747{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700748 if (*name == '\0')
749 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700750 if (strlen(name) >= IFNAMSIZ)
751 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700752 if (!strcmp(name, ".") || !strcmp(name, ".."))
753 return 0;
754
755 while (*name) {
756 if (*name == '/' || isspace(*name))
757 return 0;
758 name++;
759 }
760 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700761}
762
763/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200764 * __dev_alloc_name - allocate a name for a device
765 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700766 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200767 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700768 *
769 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700770 * id. It scans list of devices to build up a free map, then chooses
771 * the first empty slot. The caller must hold the dev_base or rtnl lock
772 * while allocating the name and adding the device in order to avoid
773 * duplicates.
774 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
775 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700776 */
777
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200778static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700779{
780 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700781 const char *p;
782 const int max_netdevices = 8*PAGE_SIZE;
783 long *inuse;
784 struct net_device *d;
785
786 p = strnchr(name, IFNAMSIZ-1, '%');
787 if (p) {
788 /*
789 * Verify the string as this thing may have come from
790 * the user. There must be either one "%d" and no other "%"
791 * characters.
792 */
793 if (p[1] != 'd' || strchr(p + 2, '%'))
794 return -EINVAL;
795
796 /* Use one page as a bit array of possible slots */
797 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
798 if (!inuse)
799 return -ENOMEM;
800
Eric W. Biederman881d9662007-09-17 11:56:21 -0700801 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700802 if (!sscanf(d->name, name, &i))
803 continue;
804 if (i < 0 || i >= max_netdevices)
805 continue;
806
807 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200808 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 if (!strncmp(buf, d->name, IFNAMSIZ))
810 set_bit(i, inuse);
811 }
812
813 i = find_first_zero_bit(inuse, max_netdevices);
814 free_page((unsigned long) inuse);
815 }
816
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200817 snprintf(buf, IFNAMSIZ, name, i);
818 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700819 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700820
821 /* It is possible to run out of possible slots
822 * when the name is long and there isn't enough space left
823 * for the digits, or if all bits are used.
824 */
825 return -ENFILE;
826}
827
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200828/**
829 * dev_alloc_name - allocate a name for a device
830 * @dev: device
831 * @name: name format string
832 *
833 * Passed a format string - eg "lt%d" it will try and find a suitable
834 * id. It scans list of devices to build up a free map, then chooses
835 * the first empty slot. The caller must hold the dev_base or rtnl lock
836 * while allocating the name and adding the device in order to avoid
837 * duplicates.
838 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
839 * Returns the number of the unit assigned or a negative errno code.
840 */
841
842int dev_alloc_name(struct net_device *dev, const char *name)
843{
844 char buf[IFNAMSIZ];
845 struct net *net;
846 int ret;
847
848 BUG_ON(!dev->nd_net);
849 net = dev->nd_net;
850 ret = __dev_alloc_name(net, name, buf);
851 if (ret >= 0)
852 strlcpy(dev->name, buf, IFNAMSIZ);
853 return ret;
854}
855
Linus Torvalds1da177e2005-04-16 15:20:36 -0700856
857/**
858 * dev_change_name - change name of a device
859 * @dev: device
860 * @newname: name (or format string) must be at least IFNAMSIZ
861 *
862 * Change name of a device, can pass format strings "eth%d".
863 * for wildcarding.
864 */
865int dev_change_name(struct net_device *dev, char *newname)
866{
Herbert Xufcc5a032007-07-30 17:03:38 -0700867 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700868 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700869 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700870 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700871
872 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700873 BUG_ON(!dev->nd_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700874
Eric W. Biederman881d9662007-09-17 11:56:21 -0700875 net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700876 if (dev->flags & IFF_UP)
877 return -EBUSY;
878
879 if (!dev_valid_name(newname))
880 return -EINVAL;
881
Herbert Xufcc5a032007-07-30 17:03:38 -0700882 memcpy(oldname, dev->name, IFNAMSIZ);
883
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 if (strchr(newname, '%')) {
885 err = dev_alloc_name(dev, newname);
886 if (err < 0)
887 return err;
888 strcpy(newname, dev->name);
889 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700890 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700891 return -EEXIST;
892 else
893 strlcpy(dev->name, newname, IFNAMSIZ);
894
Herbert Xufcc5a032007-07-30 17:03:38 -0700895rollback:
Eric W. Biederman92749822007-04-03 00:07:30 -0600896 device_rename(&dev->dev, dev->name);
Herbert Xu7f988ea2007-07-30 16:35:46 -0700897
898 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600899 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700900 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700901 write_unlock_bh(&dev_base_lock);
902
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700903 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700904 ret = notifier_to_errno(ret);
905
906 if (ret) {
907 if (err) {
908 printk(KERN_ERR
909 "%s: name change rollback failed: %d.\n",
910 dev->name, ret);
911 } else {
912 err = ret;
913 memcpy(dev->name, oldname, IFNAMSIZ);
914 goto rollback;
915 }
916 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917
918 return err;
919}
920
921/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700922 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700923 * @dev: device to cause notification
924 *
925 * Called to indicate a device has changed features.
926 */
927void netdev_features_change(struct net_device *dev)
928{
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700929 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700930}
931EXPORT_SYMBOL(netdev_features_change);
932
933/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700934 * netdev_state_change - device changes state
935 * @dev: device to cause notification
936 *
937 * Called to indicate a device has changed state. This function calls
938 * the notifier chains for netdev_chain and sends a NEWLINK message
939 * to the routing socket.
940 */
941void netdev_state_change(struct net_device *dev)
942{
943 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700944 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
946 }
947}
948
949/**
950 * dev_load - load a network module
951 * @name: name of interface
952 *
953 * If a network interface is not present and the process has suitable
954 * privileges this function loads the module. If module loading is not
955 * available in this kernel then it becomes a nop.
956 */
957
Eric W. Biederman881d9662007-09-17 11:56:21 -0700958void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700959{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900960 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700961
962 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700963 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700964 read_unlock(&dev_base_lock);
965
966 if (!dev && capable(CAP_SYS_MODULE))
967 request_module("%s", name);
968}
969
970static int default_rebuild_header(struct sk_buff *skb)
971{
972 printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
973 skb->dev ? skb->dev->name : "NULL!!!");
974 kfree_skb(skb);
975 return 1;
976}
977
Linus Torvalds1da177e2005-04-16 15:20:36 -0700978/**
979 * dev_open - prepare an interface for use.
980 * @dev: device to open
981 *
982 * Takes a device from down to up state. The device's private open
983 * function is invoked and then the multicast lists are loaded. Finally
984 * the device is moved into the up state and a %NETDEV_UP message is
985 * sent to the netdev notifier chain.
986 *
987 * Calling this function on an active interface is a nop. On a failure
988 * a negative errno code is returned.
989 */
990int dev_open(struct net_device *dev)
991{
992 int ret = 0;
993
994 /*
995 * Is it already up?
996 */
997
998 if (dev->flags & IFF_UP)
999 return 0;
1000
1001 /*
1002 * Is it even present?
1003 */
1004 if (!netif_device_present(dev))
1005 return -ENODEV;
1006
1007 /*
1008 * Call device private open method
1009 */
1010 set_bit(__LINK_STATE_START, &dev->state);
1011 if (dev->open) {
1012 ret = dev->open(dev);
1013 if (ret)
1014 clear_bit(__LINK_STATE_START, &dev->state);
1015 }
1016
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001017 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001018 * If it went open OK then:
1019 */
1020
1021 if (!ret) {
1022 /*
1023 * Set the flags.
1024 */
1025 dev->flags |= IFF_UP;
1026
1027 /*
1028 * Initialize multicasting status
1029 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001030 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001031
1032 /*
1033 * Wakeup transmit queue engine
1034 */
1035 dev_activate(dev);
1036
1037 /*
1038 * ... and announce new interface.
1039 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001040 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001041 }
1042 return ret;
1043}
1044
1045/**
1046 * dev_close - shutdown an interface.
1047 * @dev: device to shutdown
1048 *
1049 * This function moves an active device into down state. A
1050 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1051 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1052 * chain.
1053 */
1054int dev_close(struct net_device *dev)
1055{
David S. Miller9d5010d2007-09-12 14:33:25 +02001056 might_sleep();
1057
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 if (!(dev->flags & IFF_UP))
1059 return 0;
1060
1061 /*
1062 * Tell people we are going down, so that they can
1063 * prepare to death, when device is still operating.
1064 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001065 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001066
1067 dev_deactivate(dev);
1068
1069 clear_bit(__LINK_STATE_START, &dev->state);
1070
1071 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001072 * it can be even on different cpu. So just clear netif_running().
1073 *
1074 * dev->stop() will invoke napi_disable() on all of it's
1075 * napi_struct instances on this device.
1076 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078
1079 /*
1080 * Call the device specific close. This cannot fail.
1081 * Only if device is UP
1082 *
1083 * We allow it to be called even after a DETACH hot-plug
1084 * event.
1085 */
1086 if (dev->stop)
1087 dev->stop(dev);
1088
1089 /*
1090 * Device is now down.
1091 */
1092
1093 dev->flags &= ~IFF_UP;
1094
1095 /*
1096 * Tell people we are down
1097 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001098 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001099
1100 return 0;
1101}
1102
1103
Eric W. Biederman881d9662007-09-17 11:56:21 -07001104static int dev_boot_phase = 1;
1105
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106/*
1107 * Device change register/unregister. These are not inline or static
1108 * as we export them to the world.
1109 */
1110
1111/**
1112 * register_netdevice_notifier - register a network notifier block
1113 * @nb: notifier
1114 *
1115 * Register a notifier to be called when network device events occur.
1116 * The notifier passed is linked into the kernel structures and must
1117 * not be reused until it has been unregistered. A negative errno code
1118 * is returned on a failure.
1119 *
1120 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001121 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001122 * view of the network device list.
1123 */
1124
1125int register_netdevice_notifier(struct notifier_block *nb)
1126{
1127 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001128 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001129 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 int err;
1131
1132 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001133 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001134 if (err)
1135 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001136 if (dev_boot_phase)
1137 goto unlock;
1138 for_each_net(net) {
1139 for_each_netdev(net, dev) {
1140 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1141 err = notifier_to_errno(err);
1142 if (err)
1143 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144
Eric W. Biederman881d9662007-09-17 11:56:21 -07001145 if (!(dev->flags & IFF_UP))
1146 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001147
Eric W. Biederman881d9662007-09-17 11:56:21 -07001148 nb->notifier_call(nb, NETDEV_UP, dev);
1149 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001151
1152unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001153 rtnl_unlock();
1154 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001155
1156rollback:
1157 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001158 for_each_net(net) {
1159 for_each_netdev(net, dev) {
1160 if (dev == last)
1161 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001162
Eric W. Biederman881d9662007-09-17 11:56:21 -07001163 if (dev->flags & IFF_UP) {
1164 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1165 nb->notifier_call(nb, NETDEV_DOWN, dev);
1166 }
1167 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001168 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001169 }
1170 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001171}
1172
1173/**
1174 * unregister_netdevice_notifier - unregister a network notifier block
1175 * @nb: notifier
1176 *
1177 * Unregister a notifier previously registered by
1178 * register_netdevice_notifier(). The notifier is unlinked into the
1179 * kernel structures and may then be reused. A negative errno code
1180 * is returned on a failure.
1181 */
1182
1183int unregister_netdevice_notifier(struct notifier_block *nb)
1184{
Herbert Xu9f514952006-03-25 01:24:25 -08001185 int err;
1186
1187 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001188 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001189 rtnl_unlock();
1190 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001191}
1192
1193/**
1194 * call_netdevice_notifiers - call all network notifier blocks
1195 * @val: value passed unmodified to notifier function
1196 * @v: pointer passed unmodified to notifier function
1197 *
1198 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001199 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 */
1201
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001202int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001203{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001204 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001205}
1206
1207/* When > 0 there are consumers of rx skb time stamps */
1208static atomic_t netstamp_needed = ATOMIC_INIT(0);
1209
1210void net_enable_timestamp(void)
1211{
1212 atomic_inc(&netstamp_needed);
1213}
1214
1215void net_disable_timestamp(void)
1216{
1217 atomic_dec(&netstamp_needed);
1218}
1219
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001220static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001221{
1222 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001223 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001224 else
1225 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001226}
1227
1228/*
1229 * Support routine. Sends outgoing frames to any network
1230 * taps currently in use.
1231 */
1232
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001233static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001234{
1235 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001236
1237 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238
1239 rcu_read_lock();
1240 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1241 /* Never send packets back to the socket
1242 * they originated from - MvS (miquels@drinkel.ow.org)
1243 */
1244 if ((ptype->dev == dev || !ptype->dev) &&
1245 (ptype->af_packet_priv == NULL ||
1246 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1247 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1248 if (!skb2)
1249 break;
1250
1251 /* skb->nh should be correctly
1252 set by sender, so that the second statement is
1253 just protection against buggy protocols.
1254 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001255 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001257 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001258 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001259 if (net_ratelimit())
1260 printk(KERN_CRIT "protocol %04x is "
1261 "buggy, dev %s\n",
1262 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001263 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001264 }
1265
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001266 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001268 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001269 }
1270 }
1271 rcu_read_unlock();
1272}
1273
Denis Vlasenko56079432006-03-29 15:57:29 -08001274
1275void __netif_schedule(struct net_device *dev)
1276{
1277 if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1278 unsigned long flags;
1279 struct softnet_data *sd;
1280
1281 local_irq_save(flags);
1282 sd = &__get_cpu_var(softnet_data);
1283 dev->next_sched = sd->output_queue;
1284 sd->output_queue = dev;
1285 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1286 local_irq_restore(flags);
1287 }
1288}
1289EXPORT_SYMBOL(__netif_schedule);
1290
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001291void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001292{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001293 if (atomic_dec_and_test(&skb->users)) {
1294 struct softnet_data *sd;
1295 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001296
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001297 local_irq_save(flags);
1298 sd = &__get_cpu_var(softnet_data);
1299 skb->next = sd->completion_queue;
1300 sd->completion_queue = skb;
1301 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1302 local_irq_restore(flags);
1303 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001304}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001305EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001306
1307void dev_kfree_skb_any(struct sk_buff *skb)
1308{
1309 if (in_irq() || irqs_disabled())
1310 dev_kfree_skb_irq(skb);
1311 else
1312 dev_kfree_skb(skb);
1313}
1314EXPORT_SYMBOL(dev_kfree_skb_any);
1315
1316
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001317/**
1318 * netif_device_detach - mark device as removed
1319 * @dev: network device
1320 *
1321 * Mark device as removed from system and therefore no longer available.
1322 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001323void netif_device_detach(struct net_device *dev)
1324{
1325 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1326 netif_running(dev)) {
1327 netif_stop_queue(dev);
1328 }
1329}
1330EXPORT_SYMBOL(netif_device_detach);
1331
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001332/**
1333 * netif_device_attach - mark device as attached
1334 * @dev: network device
1335 *
1336 * Mark device as attached from system and restart if needed.
1337 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001338void netif_device_attach(struct net_device *dev)
1339{
1340 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1341 netif_running(dev)) {
1342 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001343 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001344 }
1345}
1346EXPORT_SYMBOL(netif_device_attach);
1347
1348
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349/*
1350 * Invalidate hardware checksum when packet is to be mangled, and
1351 * complete checksum manually on outgoing path.
1352 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001353int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354{
Al Virod3bc23e2006-11-14 21:24:49 -08001355 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001356 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357
Patrick McHardy84fa7932006-08-29 16:44:56 -07001358 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001359 goto out_set_summed;
1360
1361 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001362 /* Let GSO fix up the checksum. */
1363 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 }
1365
1366 if (skb_cloned(skb)) {
1367 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1368 if (ret)
1369 goto out;
1370 }
1371
Herbert Xu663ead32007-04-09 11:59:07 -07001372 offset = skb->csum_start - skb_headroom(skb);
Kris Katterjohn09a62662006-01-08 22:24:28 -08001373 BUG_ON(offset > (int)skb->len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 csum = skb_checksum(skb, offset, skb->len-offset, 0);
1375
Herbert Xu663ead32007-04-09 11:59:07 -07001376 offset = skb_headlen(skb) - offset;
Kris Katterjohn09a62662006-01-08 22:24:28 -08001377 BUG_ON(offset <= 0);
Al Viroff1dcad2006-11-20 18:07:29 -08001378 BUG_ON(skb->csum_offset + 2 > offset);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379
Herbert Xu663ead32007-04-09 11:59:07 -07001380 *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1381 csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001382out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001384out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001385 return ret;
1386}
1387
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001388/**
1389 * skb_gso_segment - Perform segmentation on skb.
1390 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001391 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001392 *
1393 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001394 *
1395 * It may return NULL if the skb requires no segmentation. This is
1396 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001397 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001398struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001399{
1400 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1401 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001402 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001403 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001404
1405 BUG_ON(skb_shinfo(skb)->frag_list);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001406
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001407 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001408 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001409 __skb_pull(skb, skb->mac_len);
1410
Herbert Xuf9d106a2007-04-23 22:36:13 -07001411 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001412 if (skb_header_cloned(skb) &&
1413 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1414 return ERR_PTR(err);
1415 }
1416
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001417 rcu_read_lock();
1418 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1419 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001420 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001421 err = ptype->gso_send_check(skb);
1422 segs = ERR_PTR(err);
1423 if (err || skb_gso_ok(skb, features))
1424 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001425 __skb_push(skb, (skb->data -
1426 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001427 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001428 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001429 break;
1430 }
1431 }
1432 rcu_read_unlock();
1433
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001434 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001435
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001436 return segs;
1437}
1438
1439EXPORT_SYMBOL(skb_gso_segment);
1440
Herbert Xufb286bb2005-11-10 13:01:24 -08001441/* Take action when hardware reception checksum errors are detected. */
1442#ifdef CONFIG_BUG
1443void netdev_rx_csum_fault(struct net_device *dev)
1444{
1445 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001446 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001447 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001448 dump_stack();
1449 }
1450}
1451EXPORT_SYMBOL(netdev_rx_csum_fault);
1452#endif
1453
Linus Torvalds1da177e2005-04-16 15:20:36 -07001454/* Actually, we should eliminate this check as soon as we know, that:
1455 * 1. IOMMU is present and allows to map all the memory.
1456 * 2. No high memory really exists on this machine.
1457 */
1458
1459static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1460{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001461#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462 int i;
1463
1464 if (dev->features & NETIF_F_HIGHDMA)
1465 return 0;
1466
1467 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1468 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1469 return 1;
1470
Herbert Xu3d3a8532006-06-27 13:33:10 -07001471#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 return 0;
1473}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001475struct dev_gso_cb {
1476 void (*destructor)(struct sk_buff *skb);
1477};
1478
1479#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1480
1481static void dev_gso_skb_destructor(struct sk_buff *skb)
1482{
1483 struct dev_gso_cb *cb;
1484
1485 do {
1486 struct sk_buff *nskb = skb->next;
1487
1488 skb->next = nskb->next;
1489 nskb->next = NULL;
1490 kfree_skb(nskb);
1491 } while (skb->next);
1492
1493 cb = DEV_GSO_CB(skb);
1494 if (cb->destructor)
1495 cb->destructor(skb);
1496}
1497
1498/**
1499 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1500 * @skb: buffer to segment
1501 *
1502 * This function segments the given skb and stores the list of segments
1503 * in skb->next.
1504 */
1505static int dev_gso_segment(struct sk_buff *skb)
1506{
1507 struct net_device *dev = skb->dev;
1508 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001509 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1510 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001511
Herbert Xu576a30e2006-06-27 13:22:38 -07001512 segs = skb_gso_segment(skb, features);
1513
1514 /* Verifying header integrity only. */
1515 if (!segs)
1516 return 0;
1517
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001518 if (unlikely(IS_ERR(segs)))
1519 return PTR_ERR(segs);
1520
1521 skb->next = segs;
1522 DEV_GSO_CB(skb)->destructor = skb->destructor;
1523 skb->destructor = dev_gso_skb_destructor;
1524
1525 return 0;
1526}
1527
1528int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1529{
1530 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001531 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001532 dev_queue_xmit_nit(skb, dev);
1533
Herbert Xu576a30e2006-06-27 13:22:38 -07001534 if (netif_needs_gso(dev, skb)) {
1535 if (unlikely(dev_gso_segment(skb)))
1536 goto out_kfree_skb;
1537 if (skb->next)
1538 goto gso;
1539 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001540
Herbert Xu576a30e2006-06-27 13:22:38 -07001541 return dev->hard_start_xmit(skb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001542 }
1543
Herbert Xu576a30e2006-06-27 13:22:38 -07001544gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001545 do {
1546 struct sk_buff *nskb = skb->next;
1547 int rc;
1548
1549 skb->next = nskb->next;
1550 nskb->next = NULL;
1551 rc = dev->hard_start_xmit(nskb, dev);
1552 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001553 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001554 skb->next = nskb;
1555 return rc;
1556 }
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07001557 if (unlikely((netif_queue_stopped(dev) ||
1558 netif_subqueue_stopped(dev, skb->queue_mapping)) &&
1559 skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001560 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001561 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001562
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001563 skb->destructor = DEV_GSO_CB(skb)->destructor;
1564
1565out_kfree_skb:
1566 kfree_skb(skb);
1567 return 0;
1568}
1569
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570/**
1571 * dev_queue_xmit - transmit a buffer
1572 * @skb: buffer to transmit
1573 *
1574 * Queue a buffer for transmission to a network device. The caller must
1575 * have set the device and priority and built the buffer before calling
1576 * this function. The function can be called from an interrupt.
1577 *
1578 * A negative errno code is returned on a failure. A success does not
1579 * guarantee the frame will be transmitted as it may be dropped due
1580 * to congestion or traffic shaping.
Ben Greearaf191362005-04-24 20:12:36 -07001581 *
1582 * -----------------------------------------------------------------------------------
1583 * I notice this method can also return errors from the queue disciplines,
1584 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1585 * be positive.
1586 *
1587 * Regardless of the return value, the skb is consumed, so it is currently
1588 * difficult to retry a send to this method. (You can bump the ref count
1589 * before sending to hold a reference for retry if you are careful.)
1590 *
1591 * When calling this method, interrupts MUST be enabled. This is because
1592 * the BH enable code must have IRQs enabled so that it will not deadlock.
1593 * --BLG
Linus Torvalds1da177e2005-04-16 15:20:36 -07001594 */
1595
1596int dev_queue_xmit(struct sk_buff *skb)
1597{
1598 struct net_device *dev = skb->dev;
1599 struct Qdisc *q;
1600 int rc = -ENOMEM;
1601
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001602 /* GSO will handle the following emulations directly. */
1603 if (netif_needs_gso(dev, skb))
1604 goto gso;
1605
Linus Torvalds1da177e2005-04-16 15:20:36 -07001606 if (skb_shinfo(skb)->frag_list &&
1607 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001608 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 goto out_kfree_skb;
1610
1611 /* Fragmented skb is linearized if device does not support SG,
1612 * or if at least one of fragments is in highmem and device
1613 * does not support DMA from it.
1614 */
1615 if (skb_shinfo(skb)->nr_frags &&
1616 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001617 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001618 goto out_kfree_skb;
1619
1620 /* If packet is not checksummed and device does not support
1621 * checksumming for this protocol, complete checksumming here.
1622 */
Herbert Xu663ead32007-04-09 11:59:07 -07001623 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1624 skb_set_transport_header(skb, skb->csum_start -
1625 skb_headroom(skb));
1626
Herbert Xua2988302007-06-28 13:44:37 -07001627 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1628 !((dev->features & NETIF_F_IP_CSUM) &&
1629 skb->protocol == htons(ETH_P_IP)) &&
1630 !((dev->features & NETIF_F_IPV6_CSUM) &&
1631 skb->protocol == htons(ETH_P_IPV6)))
Herbert Xu663ead32007-04-09 11:59:07 -07001632 if (skb_checksum_help(skb))
1633 goto out_kfree_skb;
1634 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001635
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001636gso:
Eric Dumazet2d7ceec2005-09-27 15:22:58 -07001637 spin_lock_prefetch(&dev->queue_lock);
1638
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001639 /* Disable soft irqs for various locks below. Also
1640 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001641 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001642 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001644 /* Updates of qdisc are serialized by queue_lock.
1645 * The struct Qdisc which is pointed to by qdisc is now a
1646 * rcu structure - it may be accessed without acquiring
Linus Torvalds1da177e2005-04-16 15:20:36 -07001647 * a lock (but the structure may be stale.) The freeing of the
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001648 * qdisc will be deferred until it's known that there are no
Linus Torvalds1da177e2005-04-16 15:20:36 -07001649 * more references to it.
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001650 *
1651 * If the qdisc has an enqueue function, we still need to
Linus Torvalds1da177e2005-04-16 15:20:36 -07001652 * hold the queue_lock before calling it, since queue_lock
1653 * also serializes access to the device queue.
1654 */
1655
1656 q = rcu_dereference(dev->qdisc);
1657#ifdef CONFIG_NET_CLS_ACT
1658 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1659#endif
1660 if (q->enqueue) {
1661 /* Grab device queue */
1662 spin_lock(&dev->queue_lock);
Patrick McHardy85670cc2006-09-27 16:45:45 -07001663 q = dev->qdisc;
1664 if (q->enqueue) {
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07001665 /* reset queue_mapping to zero */
1666 skb->queue_mapping = 0;
Patrick McHardy85670cc2006-09-27 16:45:45 -07001667 rc = q->enqueue(skb, q);
1668 qdisc_run(dev);
1669 spin_unlock(&dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001670
Patrick McHardy85670cc2006-09-27 16:45:45 -07001671 rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1672 goto out;
1673 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 spin_unlock(&dev->queue_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001675 }
1676
1677 /* The device has no queue. Common case for software devices:
1678 loopback, all the sorts of tunnels...
1679
Herbert Xu932ff272006-06-09 12:20:56 -07001680 Really, it is unlikely that netif_tx_lock protection is necessary
1681 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 counters.)
1683 However, it is possible, that they rely on protection
1684 made by us here.
1685
1686 Check this and shot the lock. It is not prone from deadlocks.
1687 Either shot noqueue qdisc, it is even simpler 8)
1688 */
1689 if (dev->flags & IFF_UP) {
1690 int cpu = smp_processor_id(); /* ok because BHs are off */
1691
1692 if (dev->xmit_lock_owner != cpu) {
1693
1694 HARD_TX_LOCK(dev, cpu);
1695
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07001696 if (!netif_queue_stopped(dev) &&
1697 !netif_subqueue_stopped(dev, skb->queue_mapping)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 rc = 0;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001699 if (!dev_hard_start_xmit(skb, dev)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 HARD_TX_UNLOCK(dev);
1701 goto out;
1702 }
1703 }
1704 HARD_TX_UNLOCK(dev);
1705 if (net_ratelimit())
1706 printk(KERN_CRIT "Virtual device %s asks to "
1707 "queue packet!\n", dev->name);
1708 } else {
1709 /* Recursion is detected! It is possible,
1710 * unfortunately */
1711 if (net_ratelimit())
1712 printk(KERN_CRIT "Dead loop on virtual device "
1713 "%s, fix it urgently!\n", dev->name);
1714 }
1715 }
1716
1717 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001718 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001719
1720out_kfree_skb:
1721 kfree_skb(skb);
1722 return rc;
1723out:
Herbert Xud4828d82006-06-22 02:28:18 -07001724 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001725 return rc;
1726}
1727
1728
1729/*=======================================================================
1730 Receiver routines
1731 =======================================================================*/
1732
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001733int netdev_max_backlog __read_mostly = 1000;
1734int netdev_budget __read_mostly = 300;
1735int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736
1737DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1738
1739
Linus Torvalds1da177e2005-04-16 15:20:36 -07001740/**
1741 * netif_rx - post buffer to the network code
1742 * @skb: buffer to post
1743 *
1744 * This function receives a packet from a device driver and queues it for
1745 * the upper (protocol) levels to process. It always succeeds. The buffer
1746 * may be dropped during processing for congestion control or by the
1747 * protocol layers.
1748 *
1749 * return values:
1750 * NET_RX_SUCCESS (no congestion)
1751 * NET_RX_CN_LOW (low congestion)
1752 * NET_RX_CN_MOD (moderate congestion)
1753 * NET_RX_CN_HIGH (high congestion)
1754 * NET_RX_DROP (packet was dropped)
1755 *
1756 */
1757
1758int netif_rx(struct sk_buff *skb)
1759{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 struct softnet_data *queue;
1761 unsigned long flags;
1762
1763 /* if netpoll wants it, pretend we never saw it */
1764 if (netpoll_rx(skb))
1765 return NET_RX_DROP;
1766
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001767 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001768 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769
1770 /*
1771 * The code is rearranged so that the path is the most
1772 * short when CPU is congested, but is still operating.
1773 */
1774 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001775 queue = &__get_cpu_var(softnet_data);
1776
1777 __get_cpu_var(netdev_rx_stat).total++;
1778 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1779 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001780enqueue:
1781 dev_hold(skb->dev);
1782 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001784 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 }
1786
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001787 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 goto enqueue;
1789 }
1790
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 __get_cpu_var(netdev_rx_stat).dropped++;
1792 local_irq_restore(flags);
1793
1794 kfree_skb(skb);
1795 return NET_RX_DROP;
1796}
1797
1798int netif_rx_ni(struct sk_buff *skb)
1799{
1800 int err;
1801
1802 preempt_disable();
1803 err = netif_rx(skb);
1804 if (local_softirq_pending())
1805 do_softirq();
1806 preempt_enable();
1807
1808 return err;
1809}
1810
1811EXPORT_SYMBOL(netif_rx_ni);
1812
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001813static inline struct net_device *skb_bond(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001814{
1815 struct net_device *dev = skb->dev;
1816
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001817 if (dev->master) {
David S. Miller7ea49ed2006-08-14 17:08:36 -07001818 if (skb_bond_should_drop(skb)) {
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001819 kfree_skb(skb);
1820 return NULL;
1821 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 skb->dev = dev->master;
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001823 }
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001824
1825 return dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826}
1827
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001828
Linus Torvalds1da177e2005-04-16 15:20:36 -07001829static void net_tx_action(struct softirq_action *h)
1830{
1831 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1832
1833 if (sd->completion_queue) {
1834 struct sk_buff *clist;
1835
1836 local_irq_disable();
1837 clist = sd->completion_queue;
1838 sd->completion_queue = NULL;
1839 local_irq_enable();
1840
1841 while (clist) {
1842 struct sk_buff *skb = clist;
1843 clist = clist->next;
1844
1845 BUG_TRAP(!atomic_read(&skb->users));
1846 __kfree_skb(skb);
1847 }
1848 }
1849
1850 if (sd->output_queue) {
1851 struct net_device *head;
1852
1853 local_irq_disable();
1854 head = sd->output_queue;
1855 sd->output_queue = NULL;
1856 local_irq_enable();
1857
1858 while (head) {
1859 struct net_device *dev = head;
1860 head = head->next_sched;
1861
1862 smp_mb__before_clear_bit();
1863 clear_bit(__LINK_STATE_SCHED, &dev->state);
1864
1865 if (spin_trylock(&dev->queue_lock)) {
1866 qdisc_run(dev);
1867 spin_unlock(&dev->queue_lock);
1868 } else {
1869 netif_schedule(dev);
1870 }
1871 }
1872 }
1873}
1874
Stephen Hemminger6f05f622007-03-08 20:46:03 -08001875static inline int deliver_skb(struct sk_buff *skb,
1876 struct packet_type *pt_prev,
1877 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878{
1879 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001880 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001881}
1882
1883#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07001884/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885struct net_bridge;
1886struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1887 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07001888void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889
Stephen Hemminger6229e362007-03-21 13:38:47 -07001890/*
1891 * If bridge module is loaded call bridging hook.
1892 * returns NULL if packet was consumed.
1893 */
1894struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1895 struct sk_buff *skb) __read_mostly;
1896static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1897 struct packet_type **pt_prev, int *ret,
1898 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001899{
1900 struct net_bridge_port *port;
1901
Stephen Hemminger6229e362007-03-21 13:38:47 -07001902 if (skb->pkt_type == PACKET_LOOPBACK ||
1903 (port = rcu_dereference(skb->dev->br_port)) == NULL)
1904 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001905
1906 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07001907 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001908 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001909 }
1910
Stephen Hemminger6229e362007-03-21 13:38:47 -07001911 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001912}
1913#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07001914#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915#endif
1916
Patrick McHardyb863ceb2007-07-14 18:55:06 -07001917#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1918struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1919EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1920
1921static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1922 struct packet_type **pt_prev,
1923 int *ret,
1924 struct net_device *orig_dev)
1925{
1926 if (skb->dev->macvlan_port == NULL)
1927 return skb;
1928
1929 if (*pt_prev) {
1930 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1931 *pt_prev = NULL;
1932 }
1933 return macvlan_handle_frame_hook(skb);
1934}
1935#else
1936#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
1937#endif
1938
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939#ifdef CONFIG_NET_CLS_ACT
1940/* TODO: Maybe we should just force sch_ingress to be compiled in
1941 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1942 * a compare and 2 stores extra right now if we dont have it on
1943 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001944 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 * the ingress scheduler, you just cant add policies on ingress.
1946 *
1947 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001948static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001949{
1950 struct Qdisc *q;
1951 struct net_device *dev = skb->dev;
1952 int result = TC_ACT_OK;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001953
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 if (dev->qdisc_ingress) {
1955 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1956 if (MAX_RED_LOOP < ttl++) {
Patrick McHardyc01003c2007-03-29 11:46:52 -07001957 printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1958 skb->iif, skb->dev->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959 return TC_ACT_SHOT;
1960 }
1961
1962 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1963
1964 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
David S. Miller86e65da2005-08-09 19:36:29 -07001965
Patrick McHardyfd44de72007-04-16 17:07:08 -07001966 spin_lock(&dev->ingress_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001967 if ((q = dev->qdisc_ingress) != NULL)
1968 result = q->enqueue(skb, q);
Patrick McHardyfd44de72007-04-16 17:07:08 -07001969 spin_unlock(&dev->ingress_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001970
1971 }
1972
1973 return result;
1974}
1975#endif
1976
1977int netif_receive_skb(struct sk_buff *skb)
1978{
1979 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001980 struct net_device *orig_dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001981 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08001982 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001983
1984 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001985 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001986 return NET_RX_DROP;
1987
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001988 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001989 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990
Patrick McHardyc01003c2007-03-29 11:46:52 -07001991 if (!skb->iif)
1992 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07001993
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001994 orig_dev = skb_bond(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001995
Jay Vosburgh8f903c72006-02-21 16:36:44 -08001996 if (!orig_dev)
1997 return NET_RX_DROP;
1998
Linus Torvalds1da177e2005-04-16 15:20:36 -07001999 __get_cpu_var(netdev_rx_stat).total++;
2000
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002001 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002002 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002003 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002004
2005 pt_prev = NULL;
2006
2007 rcu_read_lock();
2008
2009#ifdef CONFIG_NET_CLS_ACT
2010 if (skb->tc_verd & TC_NCLS) {
2011 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2012 goto ncls;
2013 }
2014#endif
2015
2016 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2017 if (!ptype->dev || ptype->dev == skb->dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002018 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002019 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002020 pt_prev = ptype;
2021 }
2022 }
2023
2024#ifdef CONFIG_NET_CLS_ACT
2025 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002026 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002027 pt_prev = NULL; /* noone else should process this after*/
2028 } else {
2029 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2030 }
2031
2032 ret = ing_filter(skb);
2033
2034 if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
2035 kfree_skb(skb);
2036 goto out;
2037 }
2038
2039 skb->tc_verd = 0;
2040ncls:
2041#endif
2042
Stephen Hemminger6229e362007-03-21 13:38:47 -07002043 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2044 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002045 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002046 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2047 if (!skb)
2048 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002049
2050 type = skb->protocol;
2051 list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2052 if (ptype->type == type &&
2053 (!ptype->dev || ptype->dev == skb->dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002054 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002055 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002056 pt_prev = ptype;
2057 }
2058 }
2059
2060 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002061 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 } else {
2063 kfree_skb(skb);
2064 /* Jamal, now you will not able to escape explaining
2065 * me how you were going to use this. :-)
2066 */
2067 ret = NET_RX_DROP;
2068 }
2069
2070out:
2071 rcu_read_unlock();
2072 return ret;
2073}
2074
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002075static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002076{
2077 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002078 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2079 unsigned long start_time = jiffies;
2080
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002081 napi->weight = weight_p;
2082 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002083 struct sk_buff *skb;
2084 struct net_device *dev;
2085
2086 local_irq_disable();
2087 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002088 if (!skb) {
2089 __napi_complete(napi);
2090 local_irq_enable();
2091 break;
2092 }
2093
Linus Torvalds1da177e2005-04-16 15:20:36 -07002094 local_irq_enable();
2095
2096 dev = skb->dev;
2097
2098 netif_receive_skb(skb);
2099
2100 dev_put(dev);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002101 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002102
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002103 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104}
2105
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002106/**
2107 * __napi_schedule - schedule for receive
2108 * @napi: entry to schedule
2109 *
2110 * The entry's receive function will be scheduled to run
2111 */
2112void fastcall __napi_schedule(struct napi_struct *n)
2113{
2114 unsigned long flags;
2115
2116 local_irq_save(flags);
2117 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2118 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2119 local_irq_restore(flags);
2120}
2121EXPORT_SYMBOL(__napi_schedule);
2122
2123
Linus Torvalds1da177e2005-04-16 15:20:36 -07002124static void net_rx_action(struct softirq_action *h)
2125{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002126 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002127 unsigned long start_time = jiffies;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002128 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002129 void *have;
2130
Linus Torvalds1da177e2005-04-16 15:20:36 -07002131 local_irq_disable();
2132
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002133 while (!list_empty(list)) {
2134 struct napi_struct *n;
2135 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002136
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002137 /* If softirq window is exhuasted then punt.
2138 *
2139 * Note that this is a slight policy change from the
2140 * previous NAPI code, which would allow up to 2
2141 * jiffies to pass before breaking out. The test
2142 * used to be "jiffies - start_time > 1".
2143 */
2144 if (unlikely(budget <= 0 || jiffies != start_time))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002145 goto softnet_break;
2146
2147 local_irq_enable();
2148
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002149 /* Even though interrupts have been re-enabled, this
2150 * access is safe because interrupts can only add new
2151 * entries to the tail of this list, and only ->poll()
2152 * calls can remove this head entry from the list.
2153 */
2154 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002155
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002156 have = netpoll_poll_lock(n);
2157
2158 weight = n->weight;
2159
2160 work = n->poll(n, weight);
2161
2162 WARN_ON_ONCE(work > weight);
2163
2164 budget -= work;
2165
2166 local_irq_disable();
2167
2168 /* Drivers must not modify the NAPI state if they
2169 * consume the entire weight. In such cases this code
2170 * still "owns" the NAPI instance and therefore can
2171 * move the instance around on the list at-will.
2172 */
2173 if (unlikely(work == weight))
2174 list_move_tail(&n->poll_list, list);
2175
2176 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177 }
2178out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002179 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002180
Chris Leechdb217332006-06-17 21:24:58 -07002181#ifdef CONFIG_NET_DMA
2182 /*
2183 * There may not be any more sk_buffs coming right now, so push
2184 * any pending DMA copies to hardware
2185 */
Dan Williamsd379b012007-07-09 11:56:42 -07002186 if (!cpus_empty(net_dma.channel_mask)) {
2187 int chan_idx;
2188 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2189 struct dma_chan *chan = net_dma.channels[chan_idx];
2190 if (chan)
2191 dma_async_memcpy_issue_pending(chan);
2192 }
Chris Leechdb217332006-06-17 21:24:58 -07002193 }
2194#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002195
Linus Torvalds1da177e2005-04-16 15:20:36 -07002196 return;
2197
2198softnet_break:
2199 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2200 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2201 goto out;
2202}
2203
2204static gifconf_func_t * gifconf_list [NPROTO];
2205
2206/**
2207 * register_gifconf - register a SIOCGIF handler
2208 * @family: Address family
2209 * @gifconf: Function handler
2210 *
2211 * Register protocol dependent address dumping routines. The handler
2212 * that is passed must not be freed or reused until it has been replaced
2213 * by another handler.
2214 */
2215int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2216{
2217 if (family >= NPROTO)
2218 return -EINVAL;
2219 gifconf_list[family] = gifconf;
2220 return 0;
2221}
2222
2223
2224/*
2225 * Map an interface index to its name (SIOCGIFNAME)
2226 */
2227
2228/*
2229 * We need this ioctl for efficient implementation of the
2230 * if_indextoname() function required by the IPv6 API. Without
2231 * it, we would have to search all the interfaces to find a
2232 * match. --pb
2233 */
2234
Eric W. Biederman881d9662007-09-17 11:56:21 -07002235static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002236{
2237 struct net_device *dev;
2238 struct ifreq ifr;
2239
2240 /*
2241 * Fetch the caller's info block.
2242 */
2243
2244 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2245 return -EFAULT;
2246
2247 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002248 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249 if (!dev) {
2250 read_unlock(&dev_base_lock);
2251 return -ENODEV;
2252 }
2253
2254 strcpy(ifr.ifr_name, dev->name);
2255 read_unlock(&dev_base_lock);
2256
2257 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2258 return -EFAULT;
2259 return 0;
2260}
2261
2262/*
2263 * Perform a SIOCGIFCONF call. This structure will change
2264 * size eventually, and there is nothing I can do about it.
2265 * Thus we will need a 'compatibility mode'.
2266 */
2267
Eric W. Biederman881d9662007-09-17 11:56:21 -07002268static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269{
2270 struct ifconf ifc;
2271 struct net_device *dev;
2272 char __user *pos;
2273 int len;
2274 int total;
2275 int i;
2276
2277 /*
2278 * Fetch the caller's info block.
2279 */
2280
2281 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2282 return -EFAULT;
2283
2284 pos = ifc.ifc_buf;
2285 len = ifc.ifc_len;
2286
2287 /*
2288 * Loop over the interfaces, and write an info block for each.
2289 */
2290
2291 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002292 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002293 for (i = 0; i < NPROTO; i++) {
2294 if (gifconf_list[i]) {
2295 int done;
2296 if (!pos)
2297 done = gifconf_list[i](dev, NULL, 0);
2298 else
2299 done = gifconf_list[i](dev, pos + total,
2300 len - total);
2301 if (done < 0)
2302 return -EFAULT;
2303 total += done;
2304 }
2305 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002306 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002307
2308 /*
2309 * All done. Write the updated control block back to the caller.
2310 */
2311 ifc.ifc_len = total;
2312
2313 /*
2314 * Both BSD and Solaris return 0 here, so we do too.
2315 */
2316 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2317}
2318
2319#ifdef CONFIG_PROC_FS
2320/*
2321 * This is invoked by the /proc filesystem handler to display a device
2322 * in detail.
2323 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2325{
Eric W. Biederman881d9662007-09-17 11:56:21 -07002326 struct net *net = seq->private;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002327 loff_t off;
2328 struct net_device *dev;
2329
Linus Torvalds1da177e2005-04-16 15:20:36 -07002330 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002331 if (!*pos)
2332 return SEQ_START_TOKEN;
2333
2334 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002335 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002336 if (off++ == *pos)
2337 return dev;
2338
2339 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340}
2341
2342void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2343{
Eric W. Biederman881d9662007-09-17 11:56:21 -07002344 struct net *net = seq->private;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002345 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002346 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002347 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002348}
2349
2350void dev_seq_stop(struct seq_file *seq, void *v)
2351{
2352 read_unlock(&dev_base_lock);
2353}
2354
2355static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2356{
Rusty Russellc45d2862007-03-28 14:29:08 -07002357 struct net_device_stats *stats = dev->get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358
Rusty Russell5a1b5892007-04-28 21:04:03 -07002359 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2360 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2361 dev->name, stats->rx_bytes, stats->rx_packets,
2362 stats->rx_errors,
2363 stats->rx_dropped + stats->rx_missed_errors,
2364 stats->rx_fifo_errors,
2365 stats->rx_length_errors + stats->rx_over_errors +
2366 stats->rx_crc_errors + stats->rx_frame_errors,
2367 stats->rx_compressed, stats->multicast,
2368 stats->tx_bytes, stats->tx_packets,
2369 stats->tx_errors, stats->tx_dropped,
2370 stats->tx_fifo_errors, stats->collisions,
2371 stats->tx_carrier_errors +
2372 stats->tx_aborted_errors +
2373 stats->tx_window_errors +
2374 stats->tx_heartbeat_errors,
2375 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002376}
2377
2378/*
2379 * Called from the PROCfs module. This now uses the new arbitrary sized
2380 * /proc/net interface to create /proc/net/dev
2381 */
2382static int dev_seq_show(struct seq_file *seq, void *v)
2383{
2384 if (v == SEQ_START_TOKEN)
2385 seq_puts(seq, "Inter-| Receive "
2386 " | Transmit\n"
2387 " face |bytes packets errs drop fifo frame "
2388 "compressed multicast|bytes packets errs "
2389 "drop fifo colls carrier compressed\n");
2390 else
2391 dev_seq_printf_stats(seq, v);
2392 return 0;
2393}
2394
2395static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2396{
2397 struct netif_rx_stats *rc = NULL;
2398
2399 while (*pos < NR_CPUS)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002400 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002401 rc = &per_cpu(netdev_rx_stat, *pos);
2402 break;
2403 } else
2404 ++*pos;
2405 return rc;
2406}
2407
2408static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2409{
2410 return softnet_get_online(pos);
2411}
2412
2413static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2414{
2415 ++*pos;
2416 return softnet_get_online(pos);
2417}
2418
2419static void softnet_seq_stop(struct seq_file *seq, void *v)
2420{
2421}
2422
2423static int softnet_seq_show(struct seq_file *seq, void *v)
2424{
2425 struct netif_rx_stats *s = v;
2426
2427 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07002428 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07002429 0, 0, 0, 0, /* was fastroute */
2430 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002431 return 0;
2432}
2433
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002434static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002435 .start = dev_seq_start,
2436 .next = dev_seq_next,
2437 .stop = dev_seq_stop,
2438 .show = dev_seq_show,
2439};
2440
2441static int dev_seq_open(struct inode *inode, struct file *file)
2442{
Eric W. Biederman881d9662007-09-17 11:56:21 -07002443 struct seq_file *seq;
2444 int res;
2445 res = seq_open(file, &dev_seq_ops);
2446 if (!res) {
2447 seq = file->private_data;
Eric W. Biederman077130c2007-09-13 09:18:57 +02002448 seq->private = get_proc_net(inode);
2449 if (!seq->private) {
2450 seq_release(inode, file);
2451 res = -ENXIO;
2452 }
Eric W. Biederman881d9662007-09-17 11:56:21 -07002453 }
2454 return res;
2455}
2456
2457static int dev_seq_release(struct inode *inode, struct file *file)
2458{
2459 struct seq_file *seq = file->private_data;
2460 struct net *net = seq->private;
2461 put_net(net);
2462 return seq_release(inode, file);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002463}
2464
Arjan van de Ven9a321442007-02-12 00:55:35 -08002465static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466 .owner = THIS_MODULE,
2467 .open = dev_seq_open,
2468 .read = seq_read,
2469 .llseek = seq_lseek,
Eric W. Biederman881d9662007-09-17 11:56:21 -07002470 .release = dev_seq_release,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002471};
2472
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002473static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002474 .start = softnet_seq_start,
2475 .next = softnet_seq_next,
2476 .stop = softnet_seq_stop,
2477 .show = softnet_seq_show,
2478};
2479
2480static int softnet_seq_open(struct inode *inode, struct file *file)
2481{
2482 return seq_open(file, &softnet_seq_ops);
2483}
2484
Arjan van de Ven9a321442007-02-12 00:55:35 -08002485static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002486 .owner = THIS_MODULE,
2487 .open = softnet_seq_open,
2488 .read = seq_read,
2489 .llseek = seq_lseek,
2490 .release = seq_release,
2491};
2492
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002493static void *ptype_get_idx(loff_t pos)
2494{
2495 struct packet_type *pt = NULL;
2496 loff_t i = 0;
2497 int t;
2498
2499 list_for_each_entry_rcu(pt, &ptype_all, list) {
2500 if (i == pos)
2501 return pt;
2502 ++i;
2503 }
2504
2505 for (t = 0; t < 16; t++) {
2506 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2507 if (i == pos)
2508 return pt;
2509 ++i;
2510 }
2511 }
2512 return NULL;
2513}
2514
2515static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2516{
2517 rcu_read_lock();
2518 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2519}
2520
2521static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2522{
2523 struct packet_type *pt;
2524 struct list_head *nxt;
2525 int hash;
2526
2527 ++*pos;
2528 if (v == SEQ_START_TOKEN)
2529 return ptype_get_idx(0);
2530
2531 pt = v;
2532 nxt = pt->list.next;
2533 if (pt->type == htons(ETH_P_ALL)) {
2534 if (nxt != &ptype_all)
2535 goto found;
2536 hash = 0;
2537 nxt = ptype_base[0].next;
2538 } else
2539 hash = ntohs(pt->type) & 15;
2540
2541 while (nxt == &ptype_base[hash]) {
2542 if (++hash >= 16)
2543 return NULL;
2544 nxt = ptype_base[hash].next;
2545 }
2546found:
2547 return list_entry(nxt, struct packet_type, list);
2548}
2549
2550static void ptype_seq_stop(struct seq_file *seq, void *v)
2551{
2552 rcu_read_unlock();
2553}
2554
2555static void ptype_seq_decode(struct seq_file *seq, void *sym)
2556{
2557#ifdef CONFIG_KALLSYMS
2558 unsigned long offset = 0, symsize;
2559 const char *symname;
2560 char *modname;
2561 char namebuf[128];
2562
2563 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2564 &modname, namebuf);
2565
2566 if (symname) {
2567 char *delim = ":";
2568
2569 if (!modname)
2570 modname = delim = "";
2571 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2572 symname, offset);
2573 return;
2574 }
2575#endif
2576
2577 seq_printf(seq, "[%p]", sym);
2578}
2579
2580static int ptype_seq_show(struct seq_file *seq, void *v)
2581{
2582 struct packet_type *pt = v;
2583
2584 if (v == SEQ_START_TOKEN)
2585 seq_puts(seq, "Type Device Function\n");
2586 else {
2587 if (pt->type == htons(ETH_P_ALL))
2588 seq_puts(seq, "ALL ");
2589 else
2590 seq_printf(seq, "%04x", ntohs(pt->type));
2591
2592 seq_printf(seq, " %-8s ",
2593 pt->dev ? pt->dev->name : "");
2594 ptype_seq_decode(seq, pt->func);
2595 seq_putc(seq, '\n');
2596 }
2597
2598 return 0;
2599}
2600
2601static const struct seq_operations ptype_seq_ops = {
2602 .start = ptype_seq_start,
2603 .next = ptype_seq_next,
2604 .stop = ptype_seq_stop,
2605 .show = ptype_seq_show,
2606};
2607
2608static int ptype_seq_open(struct inode *inode, struct file *file)
2609{
2610 return seq_open(file, &ptype_seq_ops);
2611}
2612
2613static const struct file_operations ptype_seq_fops = {
2614 .owner = THIS_MODULE,
2615 .open = ptype_seq_open,
2616 .read = seq_read,
2617 .llseek = seq_lseek,
2618 .release = seq_release,
2619};
2620
2621
Eric W. Biederman881d9662007-09-17 11:56:21 -07002622static int dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002623{
2624 int rc = -ENOMEM;
2625
Eric W. Biederman881d9662007-09-17 11:56:21 -07002626 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002627 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002628 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002629 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002630 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002631 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002632
Eric W. Biederman881d9662007-09-17 11:56:21 -07002633 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002634 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635 rc = 0;
2636out:
2637 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002638out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002639 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002640out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002641 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002642out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002643 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644 goto out;
2645}
Eric W. Biederman881d9662007-09-17 11:56:21 -07002646
2647static void dev_proc_net_exit(struct net *net)
2648{
2649 wext_proc_exit(net);
2650
2651 proc_net_remove(net, "ptype");
2652 proc_net_remove(net, "softnet_stat");
2653 proc_net_remove(net, "dev");
2654}
2655
2656static struct pernet_operations dev_proc_ops = {
2657 .init = dev_proc_net_init,
2658 .exit = dev_proc_net_exit,
2659};
2660
2661static int __init dev_proc_init(void)
2662{
2663 return register_pernet_subsys(&dev_proc_ops);
2664}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002665#else
2666#define dev_proc_init() 0
2667#endif /* CONFIG_PROC_FS */
2668
2669
2670/**
2671 * netdev_set_master - set up master/slave pair
2672 * @slave: slave device
2673 * @master: new master device
2674 *
2675 * Changes the master device of the slave. Pass %NULL to break the
2676 * bonding. The caller must hold the RTNL semaphore. On a failure
2677 * a negative errno code is returned. On success the reference counts
2678 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2679 * function returns zero.
2680 */
2681int netdev_set_master(struct net_device *slave, struct net_device *master)
2682{
2683 struct net_device *old = slave->master;
2684
2685 ASSERT_RTNL();
2686
2687 if (master) {
2688 if (old)
2689 return -EBUSY;
2690 dev_hold(master);
2691 }
2692
2693 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002694
Linus Torvalds1da177e2005-04-16 15:20:36 -07002695 synchronize_net();
2696
2697 if (old)
2698 dev_put(old);
2699
2700 if (master)
2701 slave->flags |= IFF_SLAVE;
2702 else
2703 slave->flags &= ~IFF_SLAVE;
2704
2705 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2706 return 0;
2707}
2708
Patrick McHardy4417da62007-06-27 01:28:10 -07002709static void __dev_set_promiscuity(struct net_device *dev, int inc)
2710{
2711 unsigned short old_flags = dev->flags;
2712
Patrick McHardy24023452007-07-14 18:51:31 -07002713 ASSERT_RTNL();
2714
Patrick McHardy4417da62007-06-27 01:28:10 -07002715 if ((dev->promiscuity += inc) == 0)
2716 dev->flags &= ~IFF_PROMISC;
2717 else
2718 dev->flags |= IFF_PROMISC;
2719 if (dev->flags != old_flags) {
2720 printk(KERN_INFO "device %s %s promiscuous mode\n",
2721 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2722 "left");
2723 audit_log(current->audit_context, GFP_ATOMIC,
2724 AUDIT_ANOM_PROMISCUOUS,
2725 "dev=%s prom=%d old_prom=%d auid=%u",
2726 dev->name, (dev->flags & IFF_PROMISC),
2727 (old_flags & IFF_PROMISC),
2728 audit_get_loginuid(current->audit_context));
Patrick McHardy24023452007-07-14 18:51:31 -07002729
2730 if (dev->change_rx_flags)
2731 dev->change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07002732 }
2733}
2734
Linus Torvalds1da177e2005-04-16 15:20:36 -07002735/**
2736 * dev_set_promiscuity - update promiscuity count on a device
2737 * @dev: device
2738 * @inc: modifier
2739 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07002740 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07002741 * remains above zero the interface remains promiscuous. Once it hits zero
2742 * the device reverts back to normal filtering operation. A negative inc
2743 * value is used to drop promiscuity on the device.
2744 */
2745void dev_set_promiscuity(struct net_device *dev, int inc)
2746{
2747 unsigned short old_flags = dev->flags;
2748
Patrick McHardy4417da62007-06-27 01:28:10 -07002749 __dev_set_promiscuity(dev, inc);
2750 if (dev->flags != old_flags)
2751 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002752}
2753
2754/**
2755 * dev_set_allmulti - update allmulti count on a device
2756 * @dev: device
2757 * @inc: modifier
2758 *
2759 * Add or remove reception of all multicast frames to a device. While the
2760 * count in the device remains above zero the interface remains listening
2761 * to all interfaces. Once it hits zero the device reverts back to normal
2762 * filtering operation. A negative @inc value is used to drop the counter
2763 * when releasing a resource needing all multicasts.
2764 */
2765
2766void dev_set_allmulti(struct net_device *dev, int inc)
2767{
2768 unsigned short old_flags = dev->flags;
2769
Patrick McHardy24023452007-07-14 18:51:31 -07002770 ASSERT_RTNL();
2771
Linus Torvalds1da177e2005-04-16 15:20:36 -07002772 dev->flags |= IFF_ALLMULTI;
2773 if ((dev->allmulti += inc) == 0)
2774 dev->flags &= ~IFF_ALLMULTI;
Patrick McHardy24023452007-07-14 18:51:31 -07002775 if (dev->flags ^ old_flags) {
2776 if (dev->change_rx_flags)
2777 dev->change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07002778 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07002779 }
Patrick McHardy4417da62007-06-27 01:28:10 -07002780}
2781
2782/*
2783 * Upload unicast and multicast address lists to device and
2784 * configure RX filtering. When the device doesn't support unicast
2785 * filtering it is put in promiscous mode while unicast addresses
2786 * are present.
2787 */
2788void __dev_set_rx_mode(struct net_device *dev)
2789{
2790 /* dev_open will call this function so the list will stay sane. */
2791 if (!(dev->flags&IFF_UP))
2792 return;
2793
2794 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09002795 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07002796
2797 if (dev->set_rx_mode)
2798 dev->set_rx_mode(dev);
2799 else {
2800 /* Unicast addresses changes may only happen under the rtnl,
2801 * therefore calling __dev_set_promiscuity here is safe.
2802 */
2803 if (dev->uc_count > 0 && !dev->uc_promisc) {
2804 __dev_set_promiscuity(dev, 1);
2805 dev->uc_promisc = 1;
2806 } else if (dev->uc_count == 0 && dev->uc_promisc) {
2807 __dev_set_promiscuity(dev, -1);
2808 dev->uc_promisc = 0;
2809 }
2810
2811 if (dev->set_multicast_list)
2812 dev->set_multicast_list(dev);
2813 }
2814}
2815
2816void dev_set_rx_mode(struct net_device *dev)
2817{
2818 netif_tx_lock_bh(dev);
2819 __dev_set_rx_mode(dev);
2820 netif_tx_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002821}
2822
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002823int __dev_addr_delete(struct dev_addr_list **list, int *count,
2824 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07002825{
2826 struct dev_addr_list *da;
2827
2828 for (; (da = *list) != NULL; list = &da->next) {
2829 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2830 alen == da->da_addrlen) {
2831 if (glbl) {
2832 int old_glbl = da->da_gusers;
2833 da->da_gusers = 0;
2834 if (old_glbl == 0)
2835 break;
2836 }
2837 if (--da->da_users)
2838 return 0;
2839
2840 *list = da->next;
2841 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002842 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07002843 return 0;
2844 }
2845 }
2846 return -ENOENT;
2847}
2848
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002849int __dev_addr_add(struct dev_addr_list **list, int *count,
2850 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07002851{
2852 struct dev_addr_list *da;
2853
2854 for (da = *list; da != NULL; da = da->next) {
2855 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2856 da->da_addrlen == alen) {
2857 if (glbl) {
2858 int old_glbl = da->da_gusers;
2859 da->da_gusers = 1;
2860 if (old_glbl)
2861 return 0;
2862 }
2863 da->da_users++;
2864 return 0;
2865 }
2866 }
2867
2868 da = kmalloc(sizeof(*da), GFP_ATOMIC);
2869 if (da == NULL)
2870 return -ENOMEM;
2871 memcpy(da->da_addr, addr, alen);
2872 da->da_addrlen = alen;
2873 da->da_users = 1;
2874 da->da_gusers = glbl ? 1 : 0;
2875 da->next = *list;
2876 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002877 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07002878 return 0;
2879}
2880
Patrick McHardy4417da62007-06-27 01:28:10 -07002881/**
2882 * dev_unicast_delete - Release secondary unicast address.
2883 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07002884 * @addr: address to delete
2885 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07002886 *
2887 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07002888 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07002889 *
2890 * The caller must hold the rtnl_mutex.
2891 */
2892int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2893{
2894 int err;
2895
2896 ASSERT_RTNL();
2897
2898 netif_tx_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002899 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2900 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07002901 __dev_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07002902 netif_tx_unlock_bh(dev);
2903 return err;
2904}
2905EXPORT_SYMBOL(dev_unicast_delete);
2906
2907/**
2908 * dev_unicast_add - add a secondary unicast address
2909 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07002910 * @addr: address to delete
2911 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07002912 *
2913 * Add a secondary unicast address to the device or increase
2914 * the reference count if it already exists.
2915 *
2916 * The caller must hold the rtnl_mutex.
2917 */
2918int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2919{
2920 int err;
2921
2922 ASSERT_RTNL();
2923
2924 netif_tx_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07002925 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2926 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07002927 __dev_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07002928 netif_tx_unlock_bh(dev);
2929 return err;
2930}
2931EXPORT_SYMBOL(dev_unicast_add);
2932
Denis Cheng12972622007-07-18 02:12:56 -07002933static void __dev_addr_discard(struct dev_addr_list **list)
2934{
2935 struct dev_addr_list *tmp;
2936
2937 while (*list != NULL) {
2938 tmp = *list;
2939 *list = tmp->next;
2940 if (tmp->da_users > tmp->da_gusers)
2941 printk("__dev_addr_discard: address leakage! "
2942 "da_users=%d\n", tmp->da_users);
2943 kfree(tmp);
2944 }
2945}
2946
Denis Cheng26cc2522007-07-18 02:12:03 -07002947static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07002948{
2949 netif_tx_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07002950
Patrick McHardy4417da62007-06-27 01:28:10 -07002951 __dev_addr_discard(&dev->uc_list);
2952 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07002953
Denis Cheng456ad752007-07-18 02:10:54 -07002954 __dev_addr_discard(&dev->mc_list);
2955 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07002956
Denis Cheng456ad752007-07-18 02:10:54 -07002957 netif_tx_unlock_bh(dev);
2958}
2959
Linus Torvalds1da177e2005-04-16 15:20:36 -07002960unsigned dev_get_flags(const struct net_device *dev)
2961{
2962 unsigned flags;
2963
2964 flags = (dev->flags & ~(IFF_PROMISC |
2965 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08002966 IFF_RUNNING |
2967 IFF_LOWER_UP |
2968 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07002969 (dev->gflags & (IFF_PROMISC |
2970 IFF_ALLMULTI));
2971
Stefan Rompfb00055a2006-03-20 17:09:11 -08002972 if (netif_running(dev)) {
2973 if (netif_oper_up(dev))
2974 flags |= IFF_RUNNING;
2975 if (netif_carrier_ok(dev))
2976 flags |= IFF_LOWER_UP;
2977 if (netif_dormant(dev))
2978 flags |= IFF_DORMANT;
2979 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002980
2981 return flags;
2982}
2983
2984int dev_change_flags(struct net_device *dev, unsigned flags)
2985{
Thomas Graf7c355f52007-06-05 16:03:03 -07002986 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002987 int old_flags = dev->flags;
2988
Patrick McHardy24023452007-07-14 18:51:31 -07002989 ASSERT_RTNL();
2990
Linus Torvalds1da177e2005-04-16 15:20:36 -07002991 /*
2992 * Set the flags on our device.
2993 */
2994
2995 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2996 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2997 IFF_AUTOMEDIA)) |
2998 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2999 IFF_ALLMULTI));
3000
3001 /*
3002 * Load in the correct multicast list now the flags have changed.
3003 */
3004
Patrick McHardy24023452007-07-14 18:51:31 -07003005 if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
3006 dev->change_rx_flags(dev, IFF_MULTICAST);
3007
Patrick McHardy4417da62007-06-27 01:28:10 -07003008 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003009
3010 /*
3011 * Have we downed the interface. We handle IFF_UP ourselves
3012 * according to user attempts to set it, rather than blindly
3013 * setting it.
3014 */
3015
3016 ret = 0;
3017 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3018 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3019
3020 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003021 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003022 }
3023
3024 if (dev->flags & IFF_UP &&
3025 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3026 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003027 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003028
3029 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3030 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3031 dev->gflags ^= IFF_PROMISC;
3032 dev_set_promiscuity(dev, inc);
3033 }
3034
3035 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3036 is important. Some (broken) drivers set IFF_PROMISC, when
3037 IFF_ALLMULTI is requested not asking us and not reporting.
3038 */
3039 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3040 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3041 dev->gflags ^= IFF_ALLMULTI;
3042 dev_set_allmulti(dev, inc);
3043 }
3044
Thomas Graf7c355f52007-06-05 16:03:03 -07003045 /* Exclude state transition flags, already notified */
3046 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3047 if (changes)
3048 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003049
3050 return ret;
3051}
3052
3053int dev_set_mtu(struct net_device *dev, int new_mtu)
3054{
3055 int err;
3056
3057 if (new_mtu == dev->mtu)
3058 return 0;
3059
3060 /* MTU must be positive. */
3061 if (new_mtu < 0)
3062 return -EINVAL;
3063
3064 if (!netif_device_present(dev))
3065 return -ENODEV;
3066
3067 err = 0;
3068 if (dev->change_mtu)
3069 err = dev->change_mtu(dev, new_mtu);
3070 else
3071 dev->mtu = new_mtu;
3072 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003073 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003074 return err;
3075}
3076
3077int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3078{
3079 int err;
3080
3081 if (!dev->set_mac_address)
3082 return -EOPNOTSUPP;
3083 if (sa->sa_family != dev->type)
3084 return -EINVAL;
3085 if (!netif_device_present(dev))
3086 return -ENODEV;
3087 err = dev->set_mac_address(dev, sa);
3088 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003089 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003090 return err;
3091}
3092
3093/*
3094 * Perform the SIOCxIFxxx calls.
3095 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003096static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003097{
3098 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003099 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003100
3101 if (!dev)
3102 return -ENODEV;
3103
3104 switch (cmd) {
3105 case SIOCGIFFLAGS: /* Get interface flags */
3106 ifr->ifr_flags = dev_get_flags(dev);
3107 return 0;
3108
3109 case SIOCSIFFLAGS: /* Set interface flags */
3110 return dev_change_flags(dev, ifr->ifr_flags);
3111
3112 case SIOCGIFMETRIC: /* Get the metric on the interface
3113 (currently unused) */
3114 ifr->ifr_metric = 0;
3115 return 0;
3116
3117 case SIOCSIFMETRIC: /* Set the metric on the interface
3118 (currently unused) */
3119 return -EOPNOTSUPP;
3120
3121 case SIOCGIFMTU: /* Get the MTU of a device */
3122 ifr->ifr_mtu = dev->mtu;
3123 return 0;
3124
3125 case SIOCSIFMTU: /* Set the MTU of a device */
3126 return dev_set_mtu(dev, ifr->ifr_mtu);
3127
3128 case SIOCGIFHWADDR:
3129 if (!dev->addr_len)
3130 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3131 else
3132 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3133 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3134 ifr->ifr_hwaddr.sa_family = dev->type;
3135 return 0;
3136
3137 case SIOCSIFHWADDR:
3138 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3139
3140 case SIOCSIFHWBROADCAST:
3141 if (ifr->ifr_hwaddr.sa_family != dev->type)
3142 return -EINVAL;
3143 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3144 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003145 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003146 return 0;
3147
3148 case SIOCGIFMAP:
3149 ifr->ifr_map.mem_start = dev->mem_start;
3150 ifr->ifr_map.mem_end = dev->mem_end;
3151 ifr->ifr_map.base_addr = dev->base_addr;
3152 ifr->ifr_map.irq = dev->irq;
3153 ifr->ifr_map.dma = dev->dma;
3154 ifr->ifr_map.port = dev->if_port;
3155 return 0;
3156
3157 case SIOCSIFMAP:
3158 if (dev->set_config) {
3159 if (!netif_device_present(dev))
3160 return -ENODEV;
3161 return dev->set_config(dev, &ifr->ifr_map);
3162 }
3163 return -EOPNOTSUPP;
3164
3165 case SIOCADDMULTI:
3166 if (!dev->set_multicast_list ||
3167 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3168 return -EINVAL;
3169 if (!netif_device_present(dev))
3170 return -ENODEV;
3171 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3172 dev->addr_len, 1);
3173
3174 case SIOCDELMULTI:
3175 if (!dev->set_multicast_list ||
3176 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3177 return -EINVAL;
3178 if (!netif_device_present(dev))
3179 return -ENODEV;
3180 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3181 dev->addr_len, 1);
3182
3183 case SIOCGIFINDEX:
3184 ifr->ifr_ifindex = dev->ifindex;
3185 return 0;
3186
3187 case SIOCGIFTXQLEN:
3188 ifr->ifr_qlen = dev->tx_queue_len;
3189 return 0;
3190
3191 case SIOCSIFTXQLEN:
3192 if (ifr->ifr_qlen < 0)
3193 return -EINVAL;
3194 dev->tx_queue_len = ifr->ifr_qlen;
3195 return 0;
3196
3197 case SIOCSIFNAME:
3198 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3199 return dev_change_name(dev, ifr->ifr_newname);
3200
3201 /*
3202 * Unknown or private ioctl
3203 */
3204
3205 default:
3206 if ((cmd >= SIOCDEVPRIVATE &&
3207 cmd <= SIOCDEVPRIVATE + 15) ||
3208 cmd == SIOCBONDENSLAVE ||
3209 cmd == SIOCBONDRELEASE ||
3210 cmd == SIOCBONDSETHWADDR ||
3211 cmd == SIOCBONDSLAVEINFOQUERY ||
3212 cmd == SIOCBONDINFOQUERY ||
3213 cmd == SIOCBONDCHANGEACTIVE ||
3214 cmd == SIOCGMIIPHY ||
3215 cmd == SIOCGMIIREG ||
3216 cmd == SIOCSMIIREG ||
3217 cmd == SIOCBRADDIF ||
3218 cmd == SIOCBRDELIF ||
3219 cmd == SIOCWANDEV) {
3220 err = -EOPNOTSUPP;
3221 if (dev->do_ioctl) {
3222 if (netif_device_present(dev))
3223 err = dev->do_ioctl(dev, ifr,
3224 cmd);
3225 else
3226 err = -ENODEV;
3227 }
3228 } else
3229 err = -EINVAL;
3230
3231 }
3232 return err;
3233}
3234
3235/*
3236 * This function handles all "interface"-type I/O control requests. The actual
3237 * 'doing' part of this is dev_ifsioc above.
3238 */
3239
3240/**
3241 * dev_ioctl - network device ioctl
3242 * @cmd: command to issue
3243 * @arg: pointer to a struct ifreq in user space
3244 *
3245 * Issue ioctl functions to devices. This is normally called by the
3246 * user space syscall interfaces but can sometimes be useful for
3247 * other purposes. The return value is the return from the syscall if
3248 * positive or a negative errno code on error.
3249 */
3250
Eric W. Biederman881d9662007-09-17 11:56:21 -07003251int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003252{
3253 struct ifreq ifr;
3254 int ret;
3255 char *colon;
3256
3257 /* One special case: SIOCGIFCONF takes ifconf argument
3258 and requires shared lock, because it sleeps writing
3259 to user space.
3260 */
3261
3262 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003263 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003264 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003265 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003266 return ret;
3267 }
3268 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003269 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003270
3271 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3272 return -EFAULT;
3273
3274 ifr.ifr_name[IFNAMSIZ-1] = 0;
3275
3276 colon = strchr(ifr.ifr_name, ':');
3277 if (colon)
3278 *colon = 0;
3279
3280 /*
3281 * See which interface the caller is talking about.
3282 */
3283
3284 switch (cmd) {
3285 /*
3286 * These ioctl calls:
3287 * - can be done by all.
3288 * - atomic and do not require locking.
3289 * - return a value
3290 */
3291 case SIOCGIFFLAGS:
3292 case SIOCGIFMETRIC:
3293 case SIOCGIFMTU:
3294 case SIOCGIFHWADDR:
3295 case SIOCGIFSLAVE:
3296 case SIOCGIFMAP:
3297 case SIOCGIFINDEX:
3298 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003299 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003300 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07003301 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003302 read_unlock(&dev_base_lock);
3303 if (!ret) {
3304 if (colon)
3305 *colon = ':';
3306 if (copy_to_user(arg, &ifr,
3307 sizeof(struct ifreq)))
3308 ret = -EFAULT;
3309 }
3310 return ret;
3311
3312 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003313 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003314 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003315 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003316 rtnl_unlock();
3317 if (!ret) {
3318 if (colon)
3319 *colon = ':';
3320 if (copy_to_user(arg, &ifr,
3321 sizeof(struct ifreq)))
3322 ret = -EFAULT;
3323 }
3324 return ret;
3325
3326 /*
3327 * These ioctl calls:
3328 * - require superuser power.
3329 * - require strict serialization.
3330 * - return a value
3331 */
3332 case SIOCGMIIPHY:
3333 case SIOCGMIIREG:
3334 case SIOCSIFNAME:
3335 if (!capable(CAP_NET_ADMIN))
3336 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003337 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003338 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003339 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003340 rtnl_unlock();
3341 if (!ret) {
3342 if (colon)
3343 *colon = ':';
3344 if (copy_to_user(arg, &ifr,
3345 sizeof(struct ifreq)))
3346 ret = -EFAULT;
3347 }
3348 return ret;
3349
3350 /*
3351 * These ioctl calls:
3352 * - require superuser power.
3353 * - require strict serialization.
3354 * - do not return a value
3355 */
3356 case SIOCSIFFLAGS:
3357 case SIOCSIFMETRIC:
3358 case SIOCSIFMTU:
3359 case SIOCSIFMAP:
3360 case SIOCSIFHWADDR:
3361 case SIOCSIFSLAVE:
3362 case SIOCADDMULTI:
3363 case SIOCDELMULTI:
3364 case SIOCSIFHWBROADCAST:
3365 case SIOCSIFTXQLEN:
3366 case SIOCSMIIREG:
3367 case SIOCBONDENSLAVE:
3368 case SIOCBONDRELEASE:
3369 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003370 case SIOCBONDCHANGEACTIVE:
3371 case SIOCBRADDIF:
3372 case SIOCBRDELIF:
3373 if (!capable(CAP_NET_ADMIN))
3374 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08003375 /* fall through */
3376 case SIOCBONDSLAVEINFOQUERY:
3377 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003378 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003379 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003380 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003381 rtnl_unlock();
3382 return ret;
3383
3384 case SIOCGIFMEM:
3385 /* Get the per device memory space. We can add this but
3386 * currently do not support it */
3387 case SIOCSIFMEM:
3388 /* Set the per device memory buffer space.
3389 * Not applicable in our case */
3390 case SIOCSIFLINK:
3391 return -EINVAL;
3392
3393 /*
3394 * Unknown or private ioctl.
3395 */
3396 default:
3397 if (cmd == SIOCWANDEV ||
3398 (cmd >= SIOCDEVPRIVATE &&
3399 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003400 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003401 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003402 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003403 rtnl_unlock();
3404 if (!ret && copy_to_user(arg, &ifr,
3405 sizeof(struct ifreq)))
3406 ret = -EFAULT;
3407 return ret;
3408 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003409 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07003410 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003411 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003412 return -EINVAL;
3413 }
3414}
3415
3416
3417/**
3418 * dev_new_index - allocate an ifindex
3419 *
3420 * Returns a suitable unique value for a new device interface
3421 * number. The caller must hold the rtnl semaphore or the
3422 * dev_base_lock to be sure it remains unique.
3423 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003424static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003425{
3426 static int ifindex;
3427 for (;;) {
3428 if (++ifindex <= 0)
3429 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003430 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003431 return ifindex;
3432 }
3433}
3434
Linus Torvalds1da177e2005-04-16 15:20:36 -07003435/* Delayed registration/unregisteration */
3436static DEFINE_SPINLOCK(net_todo_list_lock);
3437static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3438
Stephen Hemminger6f05f622007-03-08 20:46:03 -08003439static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003440{
3441 spin_lock(&net_todo_list_lock);
3442 list_add_tail(&dev->todo_list, &net_todo_list);
3443 spin_unlock(&net_todo_list_lock);
3444}
3445
3446/**
3447 * register_netdevice - register a network device
3448 * @dev: device to register
3449 *
3450 * Take a completed network device structure and add it to the kernel
3451 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3452 * chain. 0 is returned on success. A negative errno code is returned
3453 * on a failure to set up the device, or if the name is a duplicate.
3454 *
3455 * Callers must hold the rtnl semaphore. You may want
3456 * register_netdev() instead of this.
3457 *
3458 * BUGS:
3459 * The locking appears insufficient to guarantee two parallel registers
3460 * will not get the same name.
3461 */
3462
3463int register_netdevice(struct net_device *dev)
3464{
3465 struct hlist_head *head;
3466 struct hlist_node *p;
3467 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003468 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003469
3470 BUG_ON(dev_boot_phase);
3471 ASSERT_RTNL();
3472
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003473 might_sleep();
3474
Linus Torvalds1da177e2005-04-16 15:20:36 -07003475 /* When net_device's are persistent, this will be fatal. */
3476 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Eric W. Biederman881d9662007-09-17 11:56:21 -07003477 BUG_ON(!dev->nd_net);
3478 net = dev->nd_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003479
3480 spin_lock_init(&dev->queue_lock);
Herbert Xu932ff272006-06-09 12:20:56 -07003481 spin_lock_init(&dev->_xmit_lock);
Jarek Poplawski723e98b2007-05-15 22:46:18 -07003482 netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003483 dev->xmit_lock_owner = -1;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003484 spin_lock_init(&dev->ingress_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003485
Linus Torvalds1da177e2005-04-16 15:20:36 -07003486 dev->iflink = -1;
3487
3488 /* Init, if this function is available */
3489 if (dev->init) {
3490 ret = dev->init(dev);
3491 if (ret) {
3492 if (ret > 0)
3493 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08003494 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003495 }
3496 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003497
Linus Torvalds1da177e2005-04-16 15:20:36 -07003498 if (!dev_valid_name(dev->name)) {
3499 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003500 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003501 }
3502
Eric W. Biederman881d9662007-09-17 11:56:21 -07003503 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003504 if (dev->iflink == -1)
3505 dev->iflink = dev->ifindex;
3506
3507 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003508 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003509 hlist_for_each(p, head) {
3510 struct net_device *d
3511 = hlist_entry(p, struct net_device, name_hlist);
3512 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3513 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003514 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003515 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003516 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003517
Stephen Hemmingerd212f872007-06-27 00:47:37 -07003518 /* Fix illegal checksum combinations */
3519 if ((dev->features & NETIF_F_HW_CSUM) &&
3520 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3521 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3522 dev->name);
3523 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3524 }
3525
3526 if ((dev->features & NETIF_F_NO_CSUM) &&
3527 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3528 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3529 dev->name);
3530 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3531 }
3532
3533
Linus Torvalds1da177e2005-04-16 15:20:36 -07003534 /* Fix illegal SG+CSUM combinations. */
3535 if ((dev->features & NETIF_F_SG) &&
Herbert Xu8648b302006-06-17 22:06:05 -07003536 !(dev->features & NETIF_F_ALL_CSUM)) {
Stephen Hemminger5a8da022006-07-07 16:54:05 -07003537 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07003538 dev->name);
3539 dev->features &= ~NETIF_F_SG;
3540 }
3541
3542 /* TSO requires that SG is present as well. */
3543 if ((dev->features & NETIF_F_TSO) &&
3544 !(dev->features & NETIF_F_SG)) {
Stephen Hemminger5a8da022006-07-07 16:54:05 -07003545 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07003546 dev->name);
3547 dev->features &= ~NETIF_F_TSO;
3548 }
Ananda Rajue89e9cf2005-10-18 15:46:41 -07003549 if (dev->features & NETIF_F_UFO) {
3550 if (!(dev->features & NETIF_F_HW_CSUM)) {
3551 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3552 "NETIF_F_HW_CSUM feature.\n",
3553 dev->name);
3554 dev->features &= ~NETIF_F_UFO;
3555 }
3556 if (!(dev->features & NETIF_F_SG)) {
3557 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3558 "NETIF_F_SG feature.\n",
3559 dev->name);
3560 dev->features &= ~NETIF_F_UFO;
3561 }
3562 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003563
3564 /*
3565 * nil rebuild_header routine,
3566 * that should be never called and used as just bug trap.
3567 */
3568
3569 if (!dev->rebuild_header)
3570 dev->rebuild_header = default_rebuild_header;
3571
Eric W. Biederman8b41d182007-09-26 22:02:53 -07003572 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003573 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003574 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003575 dev->reg_state = NETREG_REGISTERED;
3576
Linus Torvalds1da177e2005-04-16 15:20:36 -07003577 /*
3578 * Default initial state at registry is that the
3579 * device is present.
3580 */
3581
3582 set_bit(__LINK_STATE_PRESENT, &dev->state);
3583
Linus Torvalds1da177e2005-04-16 15:20:36 -07003584 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003585 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02003586 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003587
3588 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003589 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07003590 ret = notifier_to_errno(ret);
3591 if (ret)
3592 unregister_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003593
3594out:
3595 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003596
3597err_uninit:
3598 if (dev->uninit)
3599 dev->uninit(dev);
3600 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003601}
3602
3603/**
3604 * register_netdev - register a network device
3605 * @dev: device to register
3606 *
3607 * Take a completed network device structure and add it to the kernel
3608 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3609 * chain. 0 is returned on success. A negative errno code is returned
3610 * on a failure to set up the device, or if the name is a duplicate.
3611 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07003612 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07003613 * and expands the device name if you passed a format string to
3614 * alloc_netdev.
3615 */
3616int register_netdev(struct net_device *dev)
3617{
3618 int err;
3619
3620 rtnl_lock();
3621
3622 /*
3623 * If the name is a format string the caller wants us to do a
3624 * name allocation.
3625 */
3626 if (strchr(dev->name, '%')) {
3627 err = dev_alloc_name(dev, dev->name);
3628 if (err < 0)
3629 goto out;
3630 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003631
Linus Torvalds1da177e2005-04-16 15:20:36 -07003632 err = register_netdevice(dev);
3633out:
3634 rtnl_unlock();
3635 return err;
3636}
3637EXPORT_SYMBOL(register_netdev);
3638
3639/*
3640 * netdev_wait_allrefs - wait until all references are gone.
3641 *
3642 * This is called when unregistering network devices.
3643 *
3644 * Any protocol or device that holds a reference should register
3645 * for netdevice notification, and cleanup and put back the
3646 * reference if they receive an UNREGISTER event.
3647 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003648 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003649 */
3650static void netdev_wait_allrefs(struct net_device *dev)
3651{
3652 unsigned long rebroadcast_time, warning_time;
3653
3654 rebroadcast_time = warning_time = jiffies;
3655 while (atomic_read(&dev->refcnt) != 0) {
3656 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003657 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003658
3659 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003660 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003661
3662 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3663 &dev->state)) {
3664 /* We must not have linkwatch events
3665 * pending on unregister. If this
3666 * happens, we simply run the queue
3667 * unscheduled, resulting in a noop
3668 * for this device.
3669 */
3670 linkwatch_run_queue();
3671 }
3672
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003673 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003674
3675 rebroadcast_time = jiffies;
3676 }
3677
3678 msleep(250);
3679
3680 if (time_after(jiffies, warning_time + 10 * HZ)) {
3681 printk(KERN_EMERG "unregister_netdevice: "
3682 "waiting for %s to become free. Usage "
3683 "count = %d\n",
3684 dev->name, atomic_read(&dev->refcnt));
3685 warning_time = jiffies;
3686 }
3687 }
3688}
3689
3690/* The sequence is:
3691 *
3692 * rtnl_lock();
3693 * ...
3694 * register_netdevice(x1);
3695 * register_netdevice(x2);
3696 * ...
3697 * unregister_netdevice(y1);
3698 * unregister_netdevice(y2);
3699 * ...
3700 * rtnl_unlock();
3701 * free_netdev(y1);
3702 * free_netdev(y2);
3703 *
3704 * We are invoked by rtnl_unlock() after it drops the semaphore.
3705 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003706 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07003707 * without deadlocking with linkwatch via keventd.
3708 * 2) Since we run with the RTNL semaphore not held, we can sleep
3709 * safely in order to wait for the netdev refcnt to drop to zero.
3710 */
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003711static DEFINE_MUTEX(net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003712void netdev_run_todo(void)
3713{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07003714 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003715
3716 /* Need to guard against multiple cpu's getting out of order. */
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003717 mutex_lock(&net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003718
3719 /* Not safe to do outside the semaphore. We must not return
3720 * until all unregister events invoked by the local processor
3721 * have been completed (either by this todo run, or one on
3722 * another cpu).
3723 */
3724 if (list_empty(&net_todo_list))
3725 goto out;
3726
3727 /* Snapshot list, allow later requests */
3728 spin_lock(&net_todo_list_lock);
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07003729 list_replace_init(&net_todo_list, &list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003730 spin_unlock(&net_todo_list_lock);
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07003731
Linus Torvalds1da177e2005-04-16 15:20:36 -07003732 while (!list_empty(&list)) {
3733 struct net_device *dev
3734 = list_entry(list.next, struct net_device, todo_list);
3735 list_del(&dev->todo_list);
3736
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003737 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003738 printk(KERN_ERR "network todo '%s' but state %d\n",
3739 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003740 dump_stack();
3741 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003742 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003743
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003744 dev->reg_state = NETREG_UNREGISTERED;
3745
3746 netdev_wait_allrefs(dev);
3747
3748 /* paranoia */
3749 BUG_ON(atomic_read(&dev->refcnt));
3750 BUG_TRAP(!dev->ip_ptr);
3751 BUG_TRAP(!dev->ip6_ptr);
3752 BUG_TRAP(!dev->dn_ptr);
3753
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003754 if (dev->destructor)
3755 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07003756
3757 /* Free network device */
3758 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003759 }
3760
3761out:
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08003762 mutex_unlock(&net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003763}
3764
Rusty Russell5a1b5892007-04-28 21:04:03 -07003765static struct net_device_stats *internal_stats(struct net_device *dev)
Rusty Russellc45d2862007-03-28 14:29:08 -07003766{
Rusty Russell5a1b5892007-04-28 21:04:03 -07003767 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07003768}
3769
Linus Torvalds1da177e2005-04-16 15:20:36 -07003770/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003771 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07003772 * @sizeof_priv: size of private data to allocate space for
3773 * @name: device name format string
3774 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003775 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07003776 *
3777 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003778 * and performs basic initialization. Also allocates subquue structs
3779 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003780 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003781struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3782 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003783{
3784 void *p;
3785 struct net_device *dev;
3786 int alloc_size;
3787
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07003788 BUG_ON(strlen(name) >= sizeof(dev->name));
3789
Linus Torvalds1da177e2005-04-16 15:20:36 -07003790 /* ensure 32-byte alignment of both the device and private area */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003791 alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
Patrick McHardy31ce72a2007-07-20 19:45:45 -07003792 (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003793 ~NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003794 alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3795
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07003796 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003797 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07003798 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07003799 return NULL;
3800 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003801
3802 dev = (struct net_device *)
3803 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3804 dev->padded = (char *)dev - (char *)p;
Eric W. Biederman6d34b1c2007-09-12 12:57:33 +02003805 dev->nd_net = &init_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003806
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003807 if (sizeof_priv) {
3808 dev->priv = ((char *)dev +
3809 ((sizeof(struct net_device) +
3810 (sizeof(struct net_device_subqueue) *
Patrick McHardy31ce72a2007-07-20 19:45:45 -07003811 (queue_count - 1)) + NETDEV_ALIGN_CONST)
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003812 & ~NETDEV_ALIGN_CONST));
3813 }
3814
3815 dev->egress_subqueue_count = queue_count;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003816
Rusty Russell5a1b5892007-04-28 21:04:03 -07003817 dev->get_stats = internal_stats;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07003818 netpoll_netdev_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003819 setup(dev);
3820 strcpy(dev->name, name);
3821 return dev;
3822}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07003823EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003824
3825/**
3826 * free_netdev - free network device
3827 * @dev: device
3828 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003829 * This function does the last stage of destroying an allocated device
3830 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003831 * If this is the last reference then it will be freed.
3832 */
3833void free_netdev(struct net_device *dev)
3834{
Stephen Hemminger3041a062006-05-26 13:25:24 -07003835 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003836 if (dev->reg_state == NETREG_UNINITIALIZED) {
3837 kfree((char *)dev - dev->padded);
3838 return;
3839 }
3840
3841 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3842 dev->reg_state = NETREG_RELEASED;
3843
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07003844 /* will free via device release */
3845 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003846}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003847
Linus Torvalds1da177e2005-04-16 15:20:36 -07003848/* Synchronize with packet receive processing. */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003849void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003850{
3851 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07003852 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003853}
3854
3855/**
3856 * unregister_netdevice - remove device from the kernel
3857 * @dev: device
3858 *
3859 * This function shuts down a device interface and removes it
3860 * from the kernel tables. On success 0 is returned, on a failure
3861 * a negative errno code is returned.
3862 *
3863 * Callers must hold the rtnl semaphore. You may want
3864 * unregister_netdev() instead of this.
3865 */
3866
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08003867void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003868{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003869 BUG_ON(dev_boot_phase);
3870 ASSERT_RTNL();
3871
3872 /* Some devices call without registering for initialization unwind. */
3873 if (dev->reg_state == NETREG_UNINITIALIZED) {
3874 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3875 "was registered\n", dev->name, dev);
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08003876
3877 WARN_ON(1);
3878 return;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003879 }
3880
3881 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3882
3883 /* If device is running, close it first. */
3884 if (dev->flags & IFF_UP)
3885 dev_close(dev);
3886
3887 /* And unlink it from device chain. */
Eric W. Biedermance286d32007-09-12 13:53:49 +02003888 unlist_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003889
3890 dev->reg_state = NETREG_UNREGISTERING;
3891
3892 synchronize_net();
3893
3894 /* Shutdown queueing discipline. */
3895 dev_shutdown(dev);
3896
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003897
Linus Torvalds1da177e2005-04-16 15:20:36 -07003898 /* Notify protocols, that we are about to destroy
3899 this device. They should clean all the things.
3900 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003901 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003902
Linus Torvalds1da177e2005-04-16 15:20:36 -07003903 /*
Patrick McHardy4417da62007-06-27 01:28:10 -07003904 * Flush the unicast and multicast chains
Linus Torvalds1da177e2005-04-16 15:20:36 -07003905 */
Denis Cheng26cc2522007-07-18 02:12:03 -07003906 dev_addr_discard(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003907
3908 if (dev->uninit)
3909 dev->uninit(dev);
3910
3911 /* Notifier chain MUST detach us from master device. */
3912 BUG_TRAP(!dev->master);
3913
Eric W. Biederman8b41d182007-09-26 22:02:53 -07003914 /* Remove entries from kobject tree */
3915 netdev_unregister_kobject(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07003916
Linus Torvalds1da177e2005-04-16 15:20:36 -07003917 /* Finish processing unregister after unlock */
3918 net_set_todo(dev);
3919
3920 synchronize_net();
3921
3922 dev_put(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003923}
3924
3925/**
3926 * unregister_netdev - remove device from the kernel
3927 * @dev: device
3928 *
3929 * This function shuts down a device interface and removes it
3930 * from the kernel tables. On success 0 is returned, on a failure
3931 * a negative errno code is returned.
3932 *
3933 * This is just a wrapper for unregister_netdevice that takes
3934 * the rtnl semaphore. In general you want to use this and not
3935 * unregister_netdevice.
3936 */
3937void unregister_netdev(struct net_device *dev)
3938{
3939 rtnl_lock();
3940 unregister_netdevice(dev);
3941 rtnl_unlock();
3942}
3943
3944EXPORT_SYMBOL(unregister_netdev);
3945
Eric W. Biedermance286d32007-09-12 13:53:49 +02003946/**
3947 * dev_change_net_namespace - move device to different nethost namespace
3948 * @dev: device
3949 * @net: network namespace
3950 * @pat: If not NULL name pattern to try if the current device name
3951 * is already taken in the destination network namespace.
3952 *
3953 * This function shuts down a device interface and moves it
3954 * to a new network namespace. On success 0 is returned, on
3955 * a failure a netagive errno code is returned.
3956 *
3957 * Callers must hold the rtnl semaphore.
3958 */
3959
3960int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
3961{
3962 char buf[IFNAMSIZ];
3963 const char *destname;
3964 int err;
3965
3966 ASSERT_RTNL();
3967
3968 /* Don't allow namespace local devices to be moved. */
3969 err = -EINVAL;
3970 if (dev->features & NETIF_F_NETNS_LOCAL)
3971 goto out;
3972
3973 /* Ensure the device has been registrered */
3974 err = -EINVAL;
3975 if (dev->reg_state != NETREG_REGISTERED)
3976 goto out;
3977
3978 /* Get out if there is nothing todo */
3979 err = 0;
3980 if (dev->nd_net == net)
3981 goto out;
3982
3983 /* Pick the destination device name, and ensure
3984 * we can use it in the destination network namespace.
3985 */
3986 err = -EEXIST;
3987 destname = dev->name;
3988 if (__dev_get_by_name(net, destname)) {
3989 /* We get here if we can't use the current device name */
3990 if (!pat)
3991 goto out;
3992 if (!dev_valid_name(pat))
3993 goto out;
3994 if (strchr(pat, '%')) {
3995 if (__dev_alloc_name(net, pat, buf) < 0)
3996 goto out;
3997 destname = buf;
3998 } else
3999 destname = pat;
4000 if (__dev_get_by_name(net, destname))
4001 goto out;
4002 }
4003
4004 /*
4005 * And now a mini version of register_netdevice unregister_netdevice.
4006 */
4007
4008 /* If device is running close it first. */
4009 if (dev->flags & IFF_UP)
4010 dev_close(dev);
4011
4012 /* And unlink it from device chain */
4013 err = -ENODEV;
4014 unlist_netdevice(dev);
4015
4016 synchronize_net();
4017
4018 /* Shutdown queueing discipline. */
4019 dev_shutdown(dev);
4020
4021 /* Notify protocols, that we are about to destroy
4022 this device. They should clean all the things.
4023 */
4024 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4025
4026 /*
4027 * Flush the unicast and multicast chains
4028 */
4029 dev_addr_discard(dev);
4030
4031 /* Actually switch the network namespace */
4032 dev->nd_net = net;
4033
4034 /* Assign the new device name */
4035 if (destname != dev->name)
4036 strcpy(dev->name, destname);
4037
4038 /* If there is an ifindex conflict assign a new one */
4039 if (__dev_get_by_index(net, dev->ifindex)) {
4040 int iflink = (dev->iflink == dev->ifindex);
4041 dev->ifindex = dev_new_index(net);
4042 if (iflink)
4043 dev->iflink = dev->ifindex;
4044 }
4045
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004046 /* Fixup kobjects */
Eric W. Biedermance286d32007-09-12 13:53:49 +02004047 err = device_rename(&dev->dev, dev->name);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004048 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004049
4050 /* Add the device back in the hashes */
4051 list_netdevice(dev);
4052
4053 /* Notify protocols, that a new device appeared. */
4054 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4055
4056 synchronize_net();
4057 err = 0;
4058out:
4059 return err;
4060}
4061
Linus Torvalds1da177e2005-04-16 15:20:36 -07004062static int dev_cpu_callback(struct notifier_block *nfb,
4063 unsigned long action,
4064 void *ocpu)
4065{
4066 struct sk_buff **list_skb;
4067 struct net_device **list_net;
4068 struct sk_buff *skb;
4069 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4070 struct softnet_data *sd, *oldsd;
4071
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07004072 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004073 return NOTIFY_OK;
4074
4075 local_irq_disable();
4076 cpu = smp_processor_id();
4077 sd = &per_cpu(softnet_data, cpu);
4078 oldsd = &per_cpu(softnet_data, oldcpu);
4079
4080 /* Find end of our completion_queue. */
4081 list_skb = &sd->completion_queue;
4082 while (*list_skb)
4083 list_skb = &(*list_skb)->next;
4084 /* Append completion queue from offline CPU. */
4085 *list_skb = oldsd->completion_queue;
4086 oldsd->completion_queue = NULL;
4087
4088 /* Find end of our output_queue. */
4089 list_net = &sd->output_queue;
4090 while (*list_net)
4091 list_net = &(*list_net)->next_sched;
4092 /* Append output queue from offline CPU. */
4093 *list_net = oldsd->output_queue;
4094 oldsd->output_queue = NULL;
4095
4096 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4097 local_irq_enable();
4098
4099 /* Process offline CPU's input_pkt_queue */
4100 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4101 netif_rx(skb);
4102
4103 return NOTIFY_OK;
4104}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004105
Chris Leechdb217332006-06-17 21:24:58 -07004106#ifdef CONFIG_NET_DMA
4107/**
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004108 * net_dma_rebalance - try to maintain one DMA channel per CPU
4109 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4110 *
4111 * This is called when the number of channels allocated to the net_dma client
4112 * changes. The net_dma client tries to have one DMA channel per CPU.
Chris Leechdb217332006-06-17 21:24:58 -07004113 */
Dan Williamsd379b012007-07-09 11:56:42 -07004114
4115static void net_dma_rebalance(struct net_dma *net_dma)
Chris Leechdb217332006-06-17 21:24:58 -07004116{
Dan Williamsd379b012007-07-09 11:56:42 -07004117 unsigned int cpu, i, n, chan_idx;
Chris Leechdb217332006-06-17 21:24:58 -07004118 struct dma_chan *chan;
4119
Dan Williamsd379b012007-07-09 11:56:42 -07004120 if (cpus_empty(net_dma->channel_mask)) {
Chris Leechdb217332006-06-17 21:24:58 -07004121 for_each_online_cpu(cpu)
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004122 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
Chris Leechdb217332006-06-17 21:24:58 -07004123 return;
4124 }
4125
4126 i = 0;
4127 cpu = first_cpu(cpu_online_map);
4128
Dan Williamsd379b012007-07-09 11:56:42 -07004129 for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
4130 chan = net_dma->channels[chan_idx];
4131
4132 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4133 + (i < (num_online_cpus() %
4134 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
Chris Leechdb217332006-06-17 21:24:58 -07004135
4136 while(n) {
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004137 per_cpu(softnet_data, cpu).net_dma = chan;
Chris Leechdb217332006-06-17 21:24:58 -07004138 cpu = next_cpu(cpu, cpu_online_map);
4139 n--;
4140 }
4141 i++;
4142 }
Chris Leechdb217332006-06-17 21:24:58 -07004143}
4144
4145/**
4146 * netdev_dma_event - event callback for the net_dma_client
4147 * @client: should always be net_dma_client
Randy Dunlapf4b8ea72006-06-22 16:00:11 -07004148 * @chan: DMA channel for the event
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004149 * @state: DMA state to be handled
Chris Leechdb217332006-06-17 21:24:58 -07004150 */
Dan Williamsd379b012007-07-09 11:56:42 -07004151static enum dma_state_client
4152netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4153 enum dma_state state)
Chris Leechdb217332006-06-17 21:24:58 -07004154{
Dan Williamsd379b012007-07-09 11:56:42 -07004155 int i, found = 0, pos = -1;
4156 struct net_dma *net_dma =
4157 container_of(client, struct net_dma, client);
4158 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4159
4160 spin_lock(&net_dma->lock);
4161 switch (state) {
4162 case DMA_RESOURCE_AVAILABLE:
4163 for (i = 0; i < NR_CPUS; i++)
4164 if (net_dma->channels[i] == chan) {
4165 found = 1;
4166 break;
4167 } else if (net_dma->channels[i] == NULL && pos < 0)
4168 pos = i;
4169
4170 if (!found && pos >= 0) {
4171 ack = DMA_ACK;
4172 net_dma->channels[pos] = chan;
4173 cpu_set(pos, net_dma->channel_mask);
4174 net_dma_rebalance(net_dma);
4175 }
Chris Leechdb217332006-06-17 21:24:58 -07004176 break;
4177 case DMA_RESOURCE_REMOVED:
Dan Williamsd379b012007-07-09 11:56:42 -07004178 for (i = 0; i < NR_CPUS; i++)
4179 if (net_dma->channels[i] == chan) {
4180 found = 1;
4181 pos = i;
4182 break;
4183 }
4184
4185 if (found) {
4186 ack = DMA_ACK;
4187 cpu_clear(pos, net_dma->channel_mask);
4188 net_dma->channels[i] = NULL;
4189 net_dma_rebalance(net_dma);
4190 }
Chris Leechdb217332006-06-17 21:24:58 -07004191 break;
4192 default:
4193 break;
4194 }
Dan Williamsd379b012007-07-09 11:56:42 -07004195 spin_unlock(&net_dma->lock);
4196
4197 return ack;
Chris Leechdb217332006-06-17 21:24:58 -07004198}
4199
4200/**
4201 * netdev_dma_regiser - register the networking subsystem as a DMA client
4202 */
4203static int __init netdev_dma_register(void)
4204{
Dan Williamsd379b012007-07-09 11:56:42 -07004205 spin_lock_init(&net_dma.lock);
4206 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4207 dma_async_client_register(&net_dma.client);
4208 dma_async_client_chan_request(&net_dma.client);
Chris Leechdb217332006-06-17 21:24:58 -07004209 return 0;
4210}
4211
4212#else
4213static int __init netdev_dma_register(void) { return -ENODEV; }
4214#endif /* CONFIG_NET_DMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004215
Herbert Xu7f353bf2007-08-10 15:47:58 -07004216/**
4217 * netdev_compute_feature - compute conjunction of two feature sets
4218 * @all: first feature set
4219 * @one: second feature set
4220 *
4221 * Computes a new feature set after adding a device with feature set
4222 * @one to the master device with current feature set @all. Returns
4223 * the new feature set.
4224 */
4225int netdev_compute_features(unsigned long all, unsigned long one)
4226{
4227 /* if device needs checksumming, downgrade to hw checksumming */
4228 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4229 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4230
4231 /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4232 if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4233 all ^= NETIF_F_HW_CSUM
4234 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4235
4236 if (one & NETIF_F_GSO)
4237 one |= NETIF_F_GSO_SOFTWARE;
4238 one |= NETIF_F_GSO;
4239
4240 /* If even one device supports robust GSO, enable it for all. */
4241 if (one & NETIF_F_GSO_ROBUST)
4242 all |= NETIF_F_GSO_ROBUST;
4243
4244 all &= one | NETIF_F_LLTX;
4245
4246 if (!(all & NETIF_F_ALL_CSUM))
4247 all &= ~NETIF_F_SG;
4248 if (!(all & NETIF_F_SG))
4249 all &= ~NETIF_F_GSO_MASK;
4250
4251 return all;
4252}
4253EXPORT_SYMBOL(netdev_compute_features);
4254
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004255static struct hlist_head *netdev_create_hash(void)
4256{
4257 int i;
4258 struct hlist_head *hash;
4259
4260 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4261 if (hash != NULL)
4262 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4263 INIT_HLIST_HEAD(&hash[i]);
4264
4265 return hash;
4266}
4267
Eric W. Biederman881d9662007-09-17 11:56:21 -07004268/* Initialize per network namespace state */
4269static int netdev_init(struct net *net)
4270{
Eric W. Biederman881d9662007-09-17 11:56:21 -07004271 INIT_LIST_HEAD(&net->dev_base_head);
4272 rwlock_init(&dev_base_lock);
4273
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004274 net->dev_name_head = netdev_create_hash();
4275 if (net->dev_name_head == NULL)
4276 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004277
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004278 net->dev_index_head = netdev_create_hash();
4279 if (net->dev_index_head == NULL)
4280 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004281
4282 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004283
4284err_idx:
4285 kfree(net->dev_name_head);
4286err_name:
4287 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004288}
4289
4290static void netdev_exit(struct net *net)
4291{
4292 kfree(net->dev_name_head);
4293 kfree(net->dev_index_head);
4294}
4295
4296static struct pernet_operations netdev_net_ops = {
4297 .init = netdev_init,
4298 .exit = netdev_exit,
4299};
4300
Eric W. Biedermance286d32007-09-12 13:53:49 +02004301static void default_device_exit(struct net *net)
4302{
4303 struct net_device *dev, *next;
4304 /*
4305 * Push all migratable of the network devices back to the
4306 * initial network namespace
4307 */
4308 rtnl_lock();
4309 for_each_netdev_safe(net, dev, next) {
4310 int err;
4311
4312 /* Ignore unmoveable devices (i.e. loopback) */
4313 if (dev->features & NETIF_F_NETNS_LOCAL)
4314 continue;
4315
4316 /* Push remaing network devices to init_net */
4317 err = dev_change_net_namespace(dev, &init_net, "dev%d");
4318 if (err) {
4319 printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
4320 __func__, dev->name, err);
4321 unregister_netdevice(dev);
4322 }
4323 }
4324 rtnl_unlock();
4325}
4326
4327static struct pernet_operations default_device_ops = {
4328 .exit = default_device_exit,
4329};
4330
Linus Torvalds1da177e2005-04-16 15:20:36 -07004331/*
4332 * Initialize the DEV module. At boot time this walks the device list and
4333 * unhooks any devices that fail to initialise (normally hardware not
4334 * present) and leaves us with a valid list of present and active devices.
4335 *
4336 */
4337
4338/*
4339 * This is called single threaded during boot, so no need
4340 * to take the rtnl semaphore.
4341 */
4342static int __init net_dev_init(void)
4343{
4344 int i, rc = -ENOMEM;
4345
4346 BUG_ON(!dev_boot_phase);
4347
Linus Torvalds1da177e2005-04-16 15:20:36 -07004348 if (dev_proc_init())
4349 goto out;
4350
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004351 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07004352 goto out;
4353
4354 INIT_LIST_HEAD(&ptype_all);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004355 for (i = 0; i < 16; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004356 INIT_LIST_HEAD(&ptype_base[i]);
4357
Eric W. Biederman881d9662007-09-17 11:56:21 -07004358 if (register_pernet_subsys(&netdev_net_ops))
4359 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004360
Eric W. Biedermance286d32007-09-12 13:53:49 +02004361 if (register_pernet_device(&default_device_ops))
4362 goto out;
4363
Linus Torvalds1da177e2005-04-16 15:20:36 -07004364 /*
4365 * Initialise the packet receive queues.
4366 */
4367
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07004368 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004369 struct softnet_data *queue;
4370
4371 queue = &per_cpu(softnet_data, i);
4372 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004373 queue->completion_queue = NULL;
4374 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004375
4376 queue->backlog.poll = process_backlog;
4377 queue->backlog.weight = weight_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004378 }
4379
Chris Leechdb217332006-06-17 21:24:58 -07004380 netdev_dma_register();
4381
Linus Torvalds1da177e2005-04-16 15:20:36 -07004382 dev_boot_phase = 0;
4383
4384 open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4385 open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4386
4387 hotcpu_notifier(dev_cpu_callback, 0);
4388 dst_init();
4389 dev_mcast_init();
4390 rc = 0;
4391out:
4392 return rc;
4393}
4394
4395subsys_initcall(net_dev_init);
4396
4397EXPORT_SYMBOL(__dev_get_by_index);
4398EXPORT_SYMBOL(__dev_get_by_name);
4399EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08004400EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004401EXPORT_SYMBOL(dev_add_pack);
4402EXPORT_SYMBOL(dev_alloc_name);
4403EXPORT_SYMBOL(dev_close);
4404EXPORT_SYMBOL(dev_get_by_flags);
4405EXPORT_SYMBOL(dev_get_by_index);
4406EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004407EXPORT_SYMBOL(dev_open);
4408EXPORT_SYMBOL(dev_queue_xmit);
4409EXPORT_SYMBOL(dev_remove_pack);
4410EXPORT_SYMBOL(dev_set_allmulti);
4411EXPORT_SYMBOL(dev_set_promiscuity);
4412EXPORT_SYMBOL(dev_change_flags);
4413EXPORT_SYMBOL(dev_set_mtu);
4414EXPORT_SYMBOL(dev_set_mac_address);
4415EXPORT_SYMBOL(free_netdev);
4416EXPORT_SYMBOL(netdev_boot_setup_check);
4417EXPORT_SYMBOL(netdev_set_master);
4418EXPORT_SYMBOL(netdev_state_change);
4419EXPORT_SYMBOL(netif_receive_skb);
4420EXPORT_SYMBOL(netif_rx);
4421EXPORT_SYMBOL(register_gifconf);
4422EXPORT_SYMBOL(register_netdevice);
4423EXPORT_SYMBOL(register_netdevice_notifier);
4424EXPORT_SYMBOL(skb_checksum_help);
4425EXPORT_SYMBOL(synchronize_net);
4426EXPORT_SYMBOL(unregister_netdevice);
4427EXPORT_SYMBOL(unregister_netdevice_notifier);
4428EXPORT_SYMBOL(net_enable_timestamp);
4429EXPORT_SYMBOL(net_disable_timestamp);
4430EXPORT_SYMBOL(dev_get_flags);
4431
4432#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4433EXPORT_SYMBOL(br_handle_frame_hook);
4434EXPORT_SYMBOL(br_fdb_get_hook);
4435EXPORT_SYMBOL(br_fdb_put_hook);
4436#endif
4437
4438#ifdef CONFIG_KMOD
4439EXPORT_SYMBOL(dev_load);
4440#endif
4441
4442EXPORT_PER_CPU_SYMBOL(softnet_data);