blob: 812e57900591e40e37a615d2d8e9012d21ae370a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * net/sched/sch_api.c Packet scheduler API.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10 *
11 * Fixes:
12 *
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
16 */
17
Linus Torvalds1da177e2005-04-16 15:20:36 -070018#include <linux/module.h>
19#include <linux/types.h>
20#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070021#include <linux/string.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070022#include <linux/errno.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/skbuff.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/init.h>
25#include <linux/proc_fs.h>
26#include <linux/seq_file.h>
27#include <linux/kmod.h>
28#include <linux/list.h>
Patrick McHardy41794772007-03-16 01:19:15 -070029#include <linux/hrtimer.h>
Jarek Poplawski25bfcd52008-08-18 20:53:34 -070030#include <linux/lockdep.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090031#include <linux/slab.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070032
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020033#include <net/net_namespace.h>
Denis V. Lunevb8542722007-12-01 00:21:31 +110034#include <net/sock.h>
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -070035#include <net/netlink.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070036#include <net/pkt_sched.h>
37
Tom Goff7316ae82010-03-19 15:40:13 +000038static int qdisc_notify(struct net *net, struct sk_buff *oskb,
39 struct nlmsghdr *n, u32 clid,
Linus Torvalds1da177e2005-04-16 15:20:36 -070040 struct Qdisc *old, struct Qdisc *new);
Tom Goff7316ae82010-03-19 15:40:13 +000041static int tclass_notify(struct net *net, struct sk_buff *oskb,
42 struct nlmsghdr *n, struct Qdisc *q,
43 unsigned long cl, int event);
Linus Torvalds1da177e2005-04-16 15:20:36 -070044
45/*
46
47 Short review.
48 -------------
49
50 This file consists of two interrelated parts:
51
52 1. queueing disciplines manager frontend.
53 2. traffic classes manager frontend.
54
55 Generally, queueing discipline ("qdisc") is a black box,
56 which is able to enqueue packets and to dequeue them (when
57 device is ready to send something) in order and at times
58 determined by algorithm hidden in it.
59
60 qdisc's are divided to two categories:
61 - "queues", which have no internal structure visible from outside.
62 - "schedulers", which split all the packets to "traffic classes",
63 using "packet classifiers" (look at cls_api.c)
64
65 In turn, classes may have child qdiscs (as rule, queues)
66 attached to them etc. etc. etc.
67
68 The goal of the routines in this file is to translate
69 information supplied by user in the form of handles
70 to more intelligible for kernel form, to make some sanity
71 checks and part of work, which is common to all qdiscs
72 and to provide rtnetlink notifications.
73
74 All real intelligent work is done inside qdisc modules.
75
76
77
78 Every discipline has two major routines: enqueue and dequeue.
79
80 ---dequeue
81
82 dequeue usually returns a skb to send. It is allowed to return NULL,
83 but it does not mean that queue is empty, it just means that
84 discipline does not want to send anything this time.
85 Queue is really empty if q->q.qlen == 0.
86 For complicated disciplines with multiple queues q->q is not
87 real packet queue, but however q->q.qlen must be valid.
88
89 ---enqueue
90
91 enqueue returns 0, if packet was enqueued successfully.
92 If packet (this one or another one) was dropped, it returns
93 not zero error code.
94 NET_XMIT_DROP - this packet dropped
95 Expected action: do not backoff, but wait until queue will clear.
96 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
97 Expected action: backoff or ignore
98 NET_XMIT_POLICED - dropped by police.
99 Expected action: backoff or error to real-time apps.
100
101 Auxiliary routines:
102
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700103 ---peek
104
105 like dequeue but without removing a packet from the queue
106
Linus Torvalds1da177e2005-04-16 15:20:36 -0700107 ---reset
108
109 returns qdisc to initial state: purge all buffers, clear all
110 timers, counters (except for statistics) etc.
111
112 ---init
113
114 initializes newly created qdisc.
115
116 ---destroy
117
118 destroys resources allocated by init and during lifetime of qdisc.
119
120 ---change
121
122 changes qdisc parameters.
123 */
124
125/* Protects list of registered TC modules. It is pure SMP lock. */
126static DEFINE_RWLOCK(qdisc_mod_lock);
127
128
129/************************************************
130 * Queueing disciplines manipulation. *
131 ************************************************/
132
133
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700134/* Qdisc to use by default */
135
136const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
137EXPORT_SYMBOL(default_qdisc_ops);
138
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139/* The list of all installed queueing disciplines. */
140
141static struct Qdisc_ops *qdisc_base;
142
143/* Register/uregister queueing discipline */
144
145int register_qdisc(struct Qdisc_ops *qops)
146{
147 struct Qdisc_ops *q, **qp;
148 int rc = -EEXIST;
149
150 write_lock(&qdisc_mod_lock);
151 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
152 if (!strcmp(qops->id, q->id))
153 goto out;
154
155 if (qops->enqueue == NULL)
156 qops->enqueue = noop_qdisc_ops.enqueue;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700157 if (qops->peek == NULL) {
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000158 if (qops->dequeue == NULL)
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700159 qops->peek = noop_qdisc_ops.peek;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000160 else
161 goto out_einval;
Jarek Poplawski99c0db22008-10-31 00:45:27 -0700162 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163 if (qops->dequeue == NULL)
164 qops->dequeue = noop_qdisc_ops.dequeue;
165
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000166 if (qops->cl_ops) {
167 const struct Qdisc_class_ops *cops = qops->cl_ops;
168
Jarek Poplawski3e9e5a52010-08-10 22:31:20 +0000169 if (!(cops->get && cops->put && cops->walk && cops->leaf))
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000170 goto out_einval;
171
172 if (cops->tcf_chain && !(cops->bind_tcf && cops->unbind_tcf))
173 goto out_einval;
174 }
175
Linus Torvalds1da177e2005-04-16 15:20:36 -0700176 qops->next = NULL;
177 *qp = qops;
178 rc = 0;
179out:
180 write_unlock(&qdisc_mod_lock);
181 return rc;
Jarek Poplawski68fd26b2010-08-09 12:18:48 +0000182
183out_einval:
184 rc = -EINVAL;
185 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800187EXPORT_SYMBOL(register_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188
189int unregister_qdisc(struct Qdisc_ops *qops)
190{
191 struct Qdisc_ops *q, **qp;
192 int err = -ENOENT;
193
194 write_lock(&qdisc_mod_lock);
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000195 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 if (q == qops)
197 break;
198 if (q) {
199 *qp = q->next;
200 q->next = NULL;
201 err = 0;
202 }
203 write_unlock(&qdisc_mod_lock);
204 return err;
205}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800206EXPORT_SYMBOL(unregister_qdisc);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207
stephen hemminger6da7c8f2013-08-27 16:19:08 -0700208/* Get default qdisc if not otherwise specified */
209void qdisc_get_default(char *name, size_t len)
210{
211 read_lock(&qdisc_mod_lock);
212 strlcpy(name, default_qdisc_ops->id, len);
213 read_unlock(&qdisc_mod_lock);
214}
215
216static struct Qdisc_ops *qdisc_lookup_default(const char *name)
217{
218 struct Qdisc_ops *q = NULL;
219
220 for (q = qdisc_base; q; q = q->next) {
221 if (!strcmp(name, q->id)) {
222 if (!try_module_get(q->owner))
223 q = NULL;
224 break;
225 }
226 }
227
228 return q;
229}
230
231/* Set new default qdisc to use */
232int qdisc_set_default(const char *name)
233{
234 const struct Qdisc_ops *ops;
235
236 if (!capable(CAP_NET_ADMIN))
237 return -EPERM;
238
239 write_lock(&qdisc_mod_lock);
240 ops = qdisc_lookup_default(name);
241 if (!ops) {
242 /* Not found, drop lock and try to load module */
243 write_unlock(&qdisc_mod_lock);
244 request_module("sch_%s", name);
245 write_lock(&qdisc_mod_lock);
246
247 ops = qdisc_lookup_default(name);
248 }
249
250 if (ops) {
251 /* Set new default */
252 module_put(default_qdisc_ops->owner);
253 default_qdisc_ops = ops;
254 }
255 write_unlock(&qdisc_mod_lock);
256
257 return ops ? 0 : -ENOENT;
258}
259
Linus Torvalds1da177e2005-04-16 15:20:36 -0700260/* We know handle. Find qdisc among all qdisc's attached to device
261 (root qdisc, all its children, children of children etc.)
262 */
263
Hannes Eder6113b742008-11-28 03:06:46 -0800264static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
David S. Miller8123b422008-08-08 23:23:39 -0700265{
266 struct Qdisc *q;
267
268 if (!(root->flags & TCQ_F_BUILTIN) &&
269 root->handle == handle)
270 return root;
271
272 list_for_each_entry(q, &root->list, list) {
273 if (q->handle == handle)
274 return q;
275 }
276 return NULL;
277}
278
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700279static void qdisc_list_add(struct Qdisc *q)
280{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800281 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Patrick McHardyaf356af2009-09-04 06:41:18 +0000282 list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700283}
284
285void qdisc_list_del(struct Qdisc *q)
286{
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800287 if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS))
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700288 list_del(&q->list);
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700289}
290EXPORT_SYMBOL(qdisc_list_del);
291
David S. Milleread81cc2008-07-17 00:50:32 -0700292struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
Patrick McHardy43effa12006-11-29 17:35:48 -0800293{
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700294 struct Qdisc *q;
295
Patrick McHardyaf356af2009-09-04 06:41:18 +0000296 q = qdisc_match_from_root(dev->qdisc, handle);
297 if (q)
298 goto out;
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700299
Eric Dumazet24824a02010-10-02 06:11:55 +0000300 if (dev_ingress_queue(dev))
301 q = qdisc_match_from_root(
302 dev_ingress_queue(dev)->qdisc_sleeping,
303 handle);
Jarek Poplawskif6486d42008-11-25 13:56:06 -0800304out:
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700305 return q;
Patrick McHardy43effa12006-11-29 17:35:48 -0800306}
307
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
309{
310 unsigned long cl;
311 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -0800312 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700313
314 if (cops == NULL)
315 return NULL;
316 cl = cops->get(p, classid);
317
318 if (cl == 0)
319 return NULL;
320 leaf = cops->leaf(p, cl);
321 cops->put(p, cl);
322 return leaf;
323}
324
325/* Find queueing discipline by name */
326
Patrick McHardy1e904742008-01-22 22:11:17 -0800327static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700328{
329 struct Qdisc_ops *q = NULL;
330
331 if (kind) {
332 read_lock(&qdisc_mod_lock);
333 for (q = qdisc_base; q; q = q->next) {
Patrick McHardy1e904742008-01-22 22:11:17 -0800334 if (nla_strcmp(kind, q->id) == 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700335 if (!try_module_get(q->owner))
336 q = NULL;
337 break;
338 }
339 }
340 read_unlock(&qdisc_mod_lock);
341 }
342 return q;
343}
344
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200345/* The linklayer setting were not transferred from iproute2, in older
346 * versions, and the rate tables lookup systems have been dropped in
347 * the kernel. To keep backward compatible with older iproute2 tc
348 * utils, we detect the linklayer setting by detecting if the rate
349 * table were modified.
350 *
351 * For linklayer ATM table entries, the rate table will be aligned to
352 * 48 bytes, thus some table entries will contain the same value. The
353 * mpu (min packet unit) is also encoded into the old rate table, thus
354 * starting from the mpu, we find low and high table entries for
355 * mapping this cell. If these entries contain the same value, when
356 * the rate tables have been modified for linklayer ATM.
357 *
358 * This is done by rounding mpu to the nearest 48 bytes cell/entry,
359 * and then roundup to the next cell, calc the table entry one below,
360 * and compare.
361 */
362static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
363{
364 int low = roundup(r->mpu, 48);
365 int high = roundup(low+1, 48);
366 int cell_low = low >> r->cell_log;
367 int cell_high = (high >> r->cell_log) - 1;
368
369 /* rtab is too inaccurate at rates > 100Mbit/s */
370 if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
371 pr_debug("TC linklayer: Giving up ATM detection\n");
372 return TC_LINKLAYER_ETHERNET;
373 }
374
375 if ((cell_high > cell_low) && (cell_high < 256)
376 && (rtab[cell_low] == rtab[cell_high])) {
377 pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
378 cell_low, cell_high, rtab[cell_high]);
379 return TC_LINKLAYER_ATM;
380 }
381 return TC_LINKLAYER_ETHERNET;
382}
383
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384static struct qdisc_rate_table *qdisc_rtab_list;
385
Patrick McHardy1e904742008-01-22 22:11:17 -0800386struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387{
388 struct qdisc_rate_table *rtab;
389
Eric Dumazet40edeff2013-06-02 11:15:55 +0000390 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
391 nla_len(tab) != TC_RTAB_SIZE)
392 return NULL;
393
Linus Torvalds1da177e2005-04-16 15:20:36 -0700394 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
Eric Dumazet40edeff2013-06-02 11:15:55 +0000395 if (!memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) &&
396 !memcmp(&rtab->data, nla_data(tab), 1024)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 rtab->refcnt++;
398 return rtab;
399 }
400 }
401
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
403 if (rtab) {
404 rtab->rate = *r;
405 rtab->refcnt = 1;
Patrick McHardy1e904742008-01-22 22:11:17 -0800406 memcpy(rtab->data, nla_data(tab), 1024);
Jesper Dangaard Brouer8a8e3d82013-08-14 23:47:11 +0200407 if (r->linklayer == TC_LINKLAYER_UNAWARE)
408 r->linklayer = __detect_linklayer(r, rtab->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700409 rtab->next = qdisc_rtab_list;
410 qdisc_rtab_list = rtab;
411 }
412 return rtab;
413}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800414EXPORT_SYMBOL(qdisc_get_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700415
416void qdisc_put_rtab(struct qdisc_rate_table *tab)
417{
418 struct qdisc_rate_table *rtab, **rtabp;
419
420 if (!tab || --tab->refcnt)
421 return;
422
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000423 for (rtabp = &qdisc_rtab_list;
424 (rtab = *rtabp) != NULL;
425 rtabp = &rtab->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 if (rtab == tab) {
427 *rtabp = rtab->next;
428 kfree(rtab);
429 return;
430 }
431 }
432}
Patrick McHardy62e3ba12008-01-22 22:10:23 -0800433EXPORT_SYMBOL(qdisc_put_rtab);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700435static LIST_HEAD(qdisc_stab_list);
436static DEFINE_SPINLOCK(qdisc_stab_lock);
437
438static const struct nla_policy stab_policy[TCA_STAB_MAX + 1] = {
439 [TCA_STAB_BASE] = { .len = sizeof(struct tc_sizespec) },
440 [TCA_STAB_DATA] = { .type = NLA_BINARY },
441};
442
443static struct qdisc_size_table *qdisc_get_stab(struct nlattr *opt)
444{
445 struct nlattr *tb[TCA_STAB_MAX + 1];
446 struct qdisc_size_table *stab;
447 struct tc_sizespec *s;
448 unsigned int tsize = 0;
449 u16 *tab = NULL;
450 int err;
451
452 err = nla_parse_nested(tb, TCA_STAB_MAX, opt, stab_policy);
453 if (err < 0)
454 return ERR_PTR(err);
455 if (!tb[TCA_STAB_BASE])
456 return ERR_PTR(-EINVAL);
457
458 s = nla_data(tb[TCA_STAB_BASE]);
459
460 if (s->tsize > 0) {
461 if (!tb[TCA_STAB_DATA])
462 return ERR_PTR(-EINVAL);
463 tab = nla_data(tb[TCA_STAB_DATA]);
464 tsize = nla_len(tb[TCA_STAB_DATA]) / sizeof(u16);
465 }
466
Dan Carpenter00093fa2010-08-14 11:09:49 +0000467 if (tsize != s->tsize || (!tab && tsize > 0))
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700468 return ERR_PTR(-EINVAL);
469
David S. Millerf3b96052008-08-18 22:33:05 -0700470 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700471
472 list_for_each_entry(stab, &qdisc_stab_list, list) {
473 if (memcmp(&stab->szopts, s, sizeof(*s)))
474 continue;
475 if (tsize > 0 && memcmp(stab->data, tab, tsize * sizeof(u16)))
476 continue;
477 stab->refcnt++;
David S. Millerf3b96052008-08-18 22:33:05 -0700478 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700479 return stab;
480 }
481
David S. Millerf3b96052008-08-18 22:33:05 -0700482 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700483
484 stab = kmalloc(sizeof(*stab) + tsize * sizeof(u16), GFP_KERNEL);
485 if (!stab)
486 return ERR_PTR(-ENOMEM);
487
488 stab->refcnt = 1;
489 stab->szopts = *s;
490 if (tsize > 0)
491 memcpy(stab->data, tab, tsize * sizeof(u16));
492
David S. Millerf3b96052008-08-18 22:33:05 -0700493 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700494 list_add_tail(&stab->list, &qdisc_stab_list);
David S. Millerf3b96052008-08-18 22:33:05 -0700495 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700496
497 return stab;
498}
499
Eric Dumazeta2da5702011-01-20 03:48:19 +0000500static void stab_kfree_rcu(struct rcu_head *head)
501{
502 kfree(container_of(head, struct qdisc_size_table, rcu));
503}
504
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700505void qdisc_put_stab(struct qdisc_size_table *tab)
506{
507 if (!tab)
508 return;
509
David S. Millerf3b96052008-08-18 22:33:05 -0700510 spin_lock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700511
512 if (--tab->refcnt == 0) {
513 list_del(&tab->list);
Eric Dumazeta2da5702011-01-20 03:48:19 +0000514 call_rcu_bh(&tab->rcu, stab_kfree_rcu);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700515 }
516
David S. Millerf3b96052008-08-18 22:33:05 -0700517 spin_unlock(&qdisc_stab_lock);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700518}
519EXPORT_SYMBOL(qdisc_put_stab);
520
521static int qdisc_dump_stab(struct sk_buff *skb, struct qdisc_size_table *stab)
522{
523 struct nlattr *nest;
524
525 nest = nla_nest_start(skb, TCA_STAB);
Patrick McHardy3aa46142008-11-20 04:07:14 -0800526 if (nest == NULL)
527 goto nla_put_failure;
David S. Miller1b34ec42012-03-29 05:11:39 -0400528 if (nla_put(skb, TCA_STAB_BASE, sizeof(stab->szopts), &stab->szopts))
529 goto nla_put_failure;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700530 nla_nest_end(skb, nest);
531
532 return skb->len;
533
534nla_put_failure:
535 return -1;
536}
537
Eric Dumazeta2da5702011-01-20 03:48:19 +0000538void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700539{
540 int pkt_len, slot;
541
542 pkt_len = skb->len + stab->szopts.overhead;
543 if (unlikely(!stab->szopts.tsize))
544 goto out;
545
546 slot = pkt_len + stab->szopts.cell_align;
547 if (unlikely(slot < 0))
548 slot = 0;
549
550 slot >>= stab->szopts.cell_log;
551 if (likely(slot < stab->szopts.tsize))
552 pkt_len = stab->data[slot];
553 else
554 pkt_len = stab->data[stab->szopts.tsize - 1] *
555 (slot / stab->szopts.tsize) +
556 stab->data[slot % stab->szopts.tsize];
557
558 pkt_len <<= stab->szopts.size_log;
559out:
560 if (unlikely(pkt_len < 1))
561 pkt_len = 1;
562 qdisc_skb_cb(skb)->pkt_len = pkt_len;
563}
Eric Dumazeta2da5702011-01-20 03:48:19 +0000564EXPORT_SYMBOL(__qdisc_calculate_pkt_len);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700565
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800566void qdisc_warn_nonwc(char *txt, struct Qdisc *qdisc)
567{
568 if (!(qdisc->flags & TCQ_F_WARN_NONWC)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +0000569 pr_warn("%s: %s qdisc %X: is non-work-conserving?\n",
570 txt, qdisc->ops->id, qdisc->handle >> 16);
Jarek Poplawskib00355d2009-02-01 01:12:42 -0800571 qdisc->flags |= TCQ_F_WARN_NONWC;
572 }
573}
574EXPORT_SYMBOL(qdisc_warn_nonwc);
575
Patrick McHardy41794772007-03-16 01:19:15 -0700576static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
577{
578 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
David S. Miller2fbd3da2009-09-01 17:59:25 -0700579 timer);
Patrick McHardy41794772007-03-16 01:19:15 -0700580
Eric Dumazetfd245a42011-01-20 05:27:16 +0000581 qdisc_unthrottled(wd->qdisc);
David S. Miller8608db02008-08-18 20:51:18 -0700582 __netif_schedule(qdisc_root(wd->qdisc));
Stephen Hemminger19365022007-03-22 12:18:35 -0700583
Patrick McHardy41794772007-03-16 01:19:15 -0700584 return HRTIMER_NORESTART;
585}
586
587void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
588{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700589 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
590 wd->timer.function = qdisc_watchdog;
Patrick McHardy41794772007-03-16 01:19:15 -0700591 wd->qdisc = qdisc;
592}
593EXPORT_SYMBOL(qdisc_watchdog_init);
594
Jiri Pirko34c5d292013-02-12 00:12:04 +0000595void qdisc_watchdog_schedule_ns(struct qdisc_watchdog *wd, u64 expires)
Patrick McHardy41794772007-03-16 01:19:15 -0700596{
Jarek Poplawski2540e052008-08-21 05:11:14 -0700597 if (test_bit(__QDISC_STATE_DEACTIVATED,
598 &qdisc_root_sleeping(wd->qdisc)->state))
599 return;
600
Eric Dumazetfd245a42011-01-20 05:27:16 +0000601 qdisc_throttled(wd->qdisc);
Eric Dumazet46baac32012-10-20 00:40:51 +0000602
603 hrtimer_start(&wd->timer,
Jiri Pirko34c5d292013-02-12 00:12:04 +0000604 ns_to_ktime(expires),
Eric Dumazet46baac32012-10-20 00:40:51 +0000605 HRTIMER_MODE_ABS);
Patrick McHardy41794772007-03-16 01:19:15 -0700606}
Jiri Pirko34c5d292013-02-12 00:12:04 +0000607EXPORT_SYMBOL(qdisc_watchdog_schedule_ns);
Patrick McHardy41794772007-03-16 01:19:15 -0700608
609void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
610{
David S. Miller2fbd3da2009-09-01 17:59:25 -0700611 hrtimer_cancel(&wd->timer);
Eric Dumazetfd245a42011-01-20 05:27:16 +0000612 qdisc_unthrottled(wd->qdisc);
Patrick McHardy41794772007-03-16 01:19:15 -0700613}
614EXPORT_SYMBOL(qdisc_watchdog_cancel);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615
Adrian Bunka94f7792008-07-22 14:20:11 -0700616static struct hlist_head *qdisc_class_hash_alloc(unsigned int n)
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700617{
618 unsigned int size = n * sizeof(struct hlist_head), i;
619 struct hlist_head *h;
620
621 if (size <= PAGE_SIZE)
622 h = kmalloc(size, GFP_KERNEL);
623 else
624 h = (struct hlist_head *)
625 __get_free_pages(GFP_KERNEL, get_order(size));
626
627 if (h != NULL) {
628 for (i = 0; i < n; i++)
629 INIT_HLIST_HEAD(&h[i]);
630 }
631 return h;
632}
633
634static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n)
635{
636 unsigned int size = n * sizeof(struct hlist_head);
637
638 if (size <= PAGE_SIZE)
639 kfree(h);
640 else
641 free_pages((unsigned long)h, get_order(size));
642}
643
644void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash)
645{
646 struct Qdisc_class_common *cl;
Sasha Levinb67bfe02013-02-27 17:06:00 -0800647 struct hlist_node *next;
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700648 struct hlist_head *nhash, *ohash;
649 unsigned int nsize, nmask, osize;
650 unsigned int i, h;
651
652 /* Rehash when load factor exceeds 0.75 */
653 if (clhash->hashelems * 4 <= clhash->hashsize * 3)
654 return;
655 nsize = clhash->hashsize * 2;
656 nmask = nsize - 1;
657 nhash = qdisc_class_hash_alloc(nsize);
658 if (nhash == NULL)
659 return;
660
661 ohash = clhash->hash;
662 osize = clhash->hashsize;
663
664 sch_tree_lock(sch);
665 for (i = 0; i < osize; i++) {
Sasha Levinb67bfe02013-02-27 17:06:00 -0800666 hlist_for_each_entry_safe(cl, next, &ohash[i], hnode) {
Patrick McHardy6fe1c7a2008-07-05 23:21:31 -0700667 h = qdisc_class_hash(cl->classid, nmask);
668 hlist_add_head(&cl->hnode, &nhash[h]);
669 }
670 }
671 clhash->hash = nhash;
672 clhash->hashsize = nsize;
673 clhash->hashmask = nmask;
674 sch_tree_unlock(sch);
675
676 qdisc_class_hash_free(ohash, osize);
677}
678EXPORT_SYMBOL(qdisc_class_hash_grow);
679
680int qdisc_class_hash_init(struct Qdisc_class_hash *clhash)
681{
682 unsigned int size = 4;
683
684 clhash->hash = qdisc_class_hash_alloc(size);
685 if (clhash->hash == NULL)
686 return -ENOMEM;
687 clhash->hashsize = size;
688 clhash->hashmask = size - 1;
689 clhash->hashelems = 0;
690 return 0;
691}
692EXPORT_SYMBOL(qdisc_class_hash_init);
693
694void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash)
695{
696 qdisc_class_hash_free(clhash->hash, clhash->hashsize);
697}
698EXPORT_SYMBOL(qdisc_class_hash_destroy);
699
700void qdisc_class_hash_insert(struct Qdisc_class_hash *clhash,
701 struct Qdisc_class_common *cl)
702{
703 unsigned int h;
704
705 INIT_HLIST_NODE(&cl->hnode);
706 h = qdisc_class_hash(cl->classid, clhash->hashmask);
707 hlist_add_head(&cl->hnode, &clhash->hash[h]);
708 clhash->hashelems++;
709}
710EXPORT_SYMBOL(qdisc_class_hash_insert);
711
712void qdisc_class_hash_remove(struct Qdisc_class_hash *clhash,
713 struct Qdisc_class_common *cl)
714{
715 hlist_del(&cl->hnode);
716 clhash->hashelems--;
717}
718EXPORT_SYMBOL(qdisc_class_hash_remove);
719
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000720/* Allocate an unique handle from space managed by kernel
721 * Possible range is [8000-FFFF]:0000 (0x8000 values)
722 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700723static u32 qdisc_alloc_handle(struct net_device *dev)
724{
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000725 int i = 0x8000;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
727
728 do {
729 autohandle += TC_H_MAKE(0x10000U, 0);
730 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
731 autohandle = TC_H_MAKE(0x80000000U, 0);
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000732 if (!qdisc_lookup(dev, autohandle))
733 return autohandle;
734 cond_resched();
735 } while (--i > 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736
Eric Dumazetfa0f5aa2012-01-03 00:00:11 +0000737 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700738}
739
Patrick McHardy43effa12006-11-29 17:35:48 -0800740void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
741{
Eric Dumazet20fea082007-11-14 01:44:41 -0800742 const struct Qdisc_class_ops *cops;
Patrick McHardy43effa12006-11-29 17:35:48 -0800743 unsigned long cl;
744 u32 parentid;
745
746 if (n == 0)
747 return;
748 while ((parentid = sch->parent)) {
Jarek Poplawski066a3b52008-04-14 15:10:42 -0700749 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
750 return;
751
David S. Miller5ce2d482008-07-08 17:06:30 -0700752 sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid));
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700753 if (sch == NULL) {
754 WARN_ON(parentid != TC_H_ROOT);
755 return;
756 }
Patrick McHardy43effa12006-11-29 17:35:48 -0800757 cops = sch->ops->cl_ops;
758 if (cops->qlen_notify) {
759 cl = cops->get(sch, parentid);
760 cops->qlen_notify(sch, cl);
761 cops->put(sch, cl);
762 }
763 sch->q.qlen -= n;
764 }
765}
766EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700767
Tom Goff7316ae82010-03-19 15:40:13 +0000768static void notify_and_destroy(struct net *net, struct sk_buff *skb,
769 struct nlmsghdr *n, u32 clid,
David S. Miller99194cf2008-07-17 04:54:10 -0700770 struct Qdisc *old, struct Qdisc *new)
771{
772 if (new || old)
Tom Goff7316ae82010-03-19 15:40:13 +0000773 qdisc_notify(net, skb, n, clid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700774
David S. Miller4d8863a2008-08-18 21:03:15 -0700775 if (old)
David S. Miller99194cf2008-07-17 04:54:10 -0700776 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700777}
778
779/* Graft qdisc "new" to class "classid" of qdisc "parent" or
780 * to device "dev".
781 *
782 * When appropriate send a netlink notification using 'skb'
783 * and "n".
784 *
785 * On success, destroy old qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700786 */
787
788static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
David S. Miller99194cf2008-07-17 04:54:10 -0700789 struct sk_buff *skb, struct nlmsghdr *n, u32 classid,
790 struct Qdisc *new, struct Qdisc *old)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700791{
David S. Miller99194cf2008-07-17 04:54:10 -0700792 struct Qdisc *q = old;
Tom Goff7316ae82010-03-19 15:40:13 +0000793 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +0900796 if (parent == NULL) {
David S. Miller99194cf2008-07-17 04:54:10 -0700797 unsigned int i, num_q, ingress;
798
799 ingress = 0;
800 num_q = dev->num_tx_queues;
David S. Miller8d50b532008-07-30 02:37:46 -0700801 if ((q && q->flags & TCQ_F_INGRESS) ||
802 (new && new->flags & TCQ_F_INGRESS)) {
David S. Miller99194cf2008-07-17 04:54:10 -0700803 num_q = 1;
804 ingress = 1;
Eric Dumazet24824a02010-10-02 06:11:55 +0000805 if (!dev_ingress_queue(dev))
806 return -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807 }
David S. Miller99194cf2008-07-17 04:54:10 -0700808
809 if (dev->flags & IFF_UP)
810 dev_deactivate(dev);
811
David S. Miller6ec1c692009-09-06 01:58:51 -0700812 if (new && new->ops->attach) {
813 new->ops->attach(new);
814 num_q = 0;
815 }
816
David S. Miller99194cf2008-07-17 04:54:10 -0700817 for (i = 0; i < num_q; i++) {
Eric Dumazet24824a02010-10-02 06:11:55 +0000818 struct netdev_queue *dev_queue = dev_ingress_queue(dev);
David S. Miller99194cf2008-07-17 04:54:10 -0700819
820 if (!ingress)
821 dev_queue = netdev_get_tx_queue(dev, i);
822
David S. Miller8d50b532008-07-30 02:37:46 -0700823 old = dev_graft_qdisc(dev_queue, new);
824 if (new && i > 0)
825 atomic_inc(&new->refcnt);
826
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000827 if (!ingress)
828 qdisc_destroy(old);
David S. Miller99194cf2008-07-17 04:54:10 -0700829 }
830
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000831 if (!ingress) {
Tom Goff7316ae82010-03-19 15:40:13 +0000832 notify_and_destroy(net, skb, n, classid,
833 dev->qdisc, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000834 if (new && !new->ops->attach)
835 atomic_inc(&new->refcnt);
836 dev->qdisc = new ? : &noop_qdisc;
837 } else {
Tom Goff7316ae82010-03-19 15:40:13 +0000838 notify_and_destroy(net, skb, n, classid, old, new);
Jarek Poplawski036d6a62009-09-13 22:35:44 +0000839 }
Patrick McHardyaf356af2009-09-04 06:41:18 +0000840
David S. Miller99194cf2008-07-17 04:54:10 -0700841 if (dev->flags & IFF_UP)
842 dev_activate(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700843 } else {
Eric Dumazet20fea082007-11-14 01:44:41 -0800844 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700845
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000846 err = -EOPNOTSUPP;
847 if (cops && cops->graft) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848 unsigned long cl = cops->get(parent, classid);
849 if (cl) {
David S. Miller99194cf2008-07-17 04:54:10 -0700850 err = cops->graft(parent, cl, new, &old);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700851 cops->put(parent, cl);
Patrick McHardyc9f1d032009-09-04 06:41:13 +0000852 } else
853 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700854 }
David S. Miller99194cf2008-07-17 04:54:10 -0700855 if (!err)
Tom Goff7316ae82010-03-19 15:40:13 +0000856 notify_and_destroy(net, skb, n, classid, old, new);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857 }
858 return err;
859}
860
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700861/* lockdep annotation is needed for ingress; egress gets it only for name */
862static struct lock_class_key qdisc_tx_lock;
863static struct lock_class_key qdisc_rx_lock;
864
Linus Torvalds1da177e2005-04-16 15:20:36 -0700865/*
866 Allocate and initialize new qdisc.
867
868 Parameters are passed via opt.
869 */
870
871static struct Qdisc *
David S. Millerbb949fb2008-07-08 16:55:56 -0700872qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue,
Patrick McHardy23bcf632009-09-09 18:11:23 -0700873 struct Qdisc *p, u32 parent, u32 handle,
874 struct nlattr **tca, int *errp)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700875{
876 int err;
Patrick McHardy1e904742008-01-22 22:11:17 -0800877 struct nlattr *kind = tca[TCA_KIND];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700878 struct Qdisc *sch;
879 struct Qdisc_ops *ops;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700880 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881
882 ops = qdisc_lookup_ops(kind);
Johannes Berg95a5afc2008-10-16 15:24:51 -0700883#ifdef CONFIG_MODULES
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884 if (ops == NULL && kind != NULL) {
885 char name[IFNAMSIZ];
Patrick McHardy1e904742008-01-22 22:11:17 -0800886 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700887 /* We dropped the RTNL semaphore in order to
888 * perform the module load. So, even if we
889 * succeeded in loading the module we have to
890 * tell the caller to replay the request. We
891 * indicate this using -EAGAIN.
892 * We replay the request because the device may
893 * go away in the mean time.
894 */
895 rtnl_unlock();
896 request_module("sch_%s", name);
897 rtnl_lock();
898 ops = qdisc_lookup_ops(kind);
899 if (ops != NULL) {
900 /* We will try again qdisc_lookup_ops,
901 * so don't keep a reference.
902 */
903 module_put(ops->owner);
904 err = -EAGAIN;
905 goto err_out;
906 }
907 }
908 }
909#endif
910
Jamal Hadi Salimb9e2cc02006-08-03 16:36:51 -0700911 err = -ENOENT;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700912 if (ops == NULL)
913 goto err_out;
914
David S. Miller5ce2d482008-07-08 17:06:30 -0700915 sch = qdisc_alloc(dev_queue, ops);
Thomas Graf3d54b822005-07-05 14:15:09 -0700916 if (IS_ERR(sch)) {
917 err = PTR_ERR(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700918 goto err_out2;
Thomas Graf3d54b822005-07-05 14:15:09 -0700919 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700920
Patrick McHardyffc8fef2007-07-30 17:11:50 -0700921 sch->parent = parent;
922
Thomas Graf3d54b822005-07-05 14:15:09 -0700923 if (handle == TC_H_INGRESS) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700924 sch->flags |= TCQ_F_INGRESS;
Thomas Graf3d54b822005-07-05 14:15:09 -0700925 handle = TC_H_MAKE(TC_H_INGRESS, 0);
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700926 lockdep_set_class(qdisc_lock(sch), &qdisc_rx_lock);
Patrick McHardyfd44de72007-04-16 17:07:08 -0700927 } else {
Patrick McHardyfd44de72007-04-16 17:07:08 -0700928 if (handle == 0) {
929 handle = qdisc_alloc_handle(dev);
930 err = -ENOMEM;
931 if (handle == 0)
932 goto err_out3;
933 }
Jarek Poplawski25bfcd52008-08-18 20:53:34 -0700934 lockdep_set_class(qdisc_lock(sch), &qdisc_tx_lock);
Eric Dumazet1abbe132012-12-11 15:54:33 +0000935 if (!netif_is_multiqueue(dev))
936 sch->flags |= TCQ_F_ONETXQUEUE;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700937 }
938
Thomas Graf3d54b822005-07-05 14:15:09 -0700939 sch->handle = handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700940
Patrick McHardy1e904742008-01-22 22:11:17 -0800941 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700942 if (tca[TCA_STAB]) {
943 stab = qdisc_get_stab(tca[TCA_STAB]);
944 if (IS_ERR(stab)) {
945 err = PTR_ERR(stab);
Jarek Poplawski7c64b9f2009-09-15 23:42:05 -0700946 goto err_out4;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700947 }
Eric Dumazeta2da5702011-01-20 03:48:19 +0000948 rcu_assign_pointer(sch->stab, stab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700949 }
Patrick McHardy1e904742008-01-22 22:11:17 -0800950 if (tca[TCA_RATE]) {
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700951 spinlock_t *root_lock;
952
Patrick McHardy23bcf632009-09-09 18:11:23 -0700953 err = -EOPNOTSUPP;
954 if (sch->flags & TCQ_F_MQROOT)
955 goto err_out4;
956
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700957 if ((sch->parent != TC_H_ROOT) &&
Patrick McHardy23bcf632009-09-09 18:11:23 -0700958 !(sch->flags & TCQ_F_INGRESS) &&
959 (!p || !(p->flags & TCQ_F_MQROOT)))
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700960 root_lock = qdisc_root_sleeping_lock(sch);
961 else
962 root_lock = qdisc_lock(sch);
963
Thomas Graf023e09a2005-07-05 14:15:53 -0700964 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
Jarek Poplawskif6f9b932008-08-27 02:25:17 -0700965 root_lock, tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -0700966 if (err)
967 goto err_out4;
Thomas Graf023e09a2005-07-05 14:15:53 -0700968 }
Jarek Poplawskif6e0b232008-08-22 03:24:05 -0700969
970 qdisc_list_add(sch);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 return sch;
973 }
974err_out3:
975 dev_put(dev);
Thomas Graf3d54b822005-07-05 14:15:09 -0700976 kfree((char *) sch - sch->padded);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700977err_out2:
978 module_put(ops->owner);
979err_out:
980 *errp = err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700981 return NULL;
Patrick McHardy23bcf632009-09-09 18:11:23 -0700982
983err_out4:
984 /*
985 * Any broken qdiscs that would require a ops->reset() here?
986 * The qdisc was never in action so it shouldn't be necessary.
987 */
Eric Dumazeta2da5702011-01-20 03:48:19 +0000988 qdisc_put_stab(rtnl_dereference(sch->stab));
Patrick McHardy23bcf632009-09-09 18:11:23 -0700989 if (ops->destroy)
990 ops->destroy(sch);
991 goto err_out3;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992}
993
Patrick McHardy1e904742008-01-22 22:11:17 -0800994static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700995{
Eric Dumazeta2da5702011-01-20 03:48:19 +0000996 struct qdisc_size_table *ostab, *stab = NULL;
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700997 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998
Jussi Kivilinna175f9c12008-07-20 00:08:47 -0700999 if (tca[TCA_OPTIONS]) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001000 if (sch->ops->change == NULL)
1001 return -EINVAL;
Patrick McHardy1e904742008-01-22 22:11:17 -08001002 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 if (err)
1004 return err;
1005 }
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001006
1007 if (tca[TCA_STAB]) {
1008 stab = qdisc_get_stab(tca[TCA_STAB]);
1009 if (IS_ERR(stab))
1010 return PTR_ERR(stab);
1011 }
1012
Eric Dumazeta2da5702011-01-20 03:48:19 +00001013 ostab = rtnl_dereference(sch->stab);
1014 rcu_assign_pointer(sch->stab, stab);
1015 qdisc_put_stab(ostab);
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001016
Patrick McHardy23bcf632009-09-09 18:11:23 -07001017 if (tca[TCA_RATE]) {
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001018 /* NB: ignores errors from replace_estimator
1019 because change can't be undone. */
Patrick McHardy23bcf632009-09-09 18:11:23 -07001020 if (sch->flags & TCQ_F_MQROOT)
1021 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001022 gen_replace_estimator(&sch->bstats, &sch->rate_est,
Stephen Hemminger71bcb092008-11-25 21:13:31 -08001023 qdisc_root_sleeping_lock(sch),
1024 tca[TCA_RATE]);
Patrick McHardy23bcf632009-09-09 18:11:23 -07001025 }
1026out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001027 return 0;
1028}
1029
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001030struct check_loop_arg {
1031 struct qdisc_walker w;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001032 struct Qdisc *p;
1033 int depth;
1034};
1035
1036static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
1037
1038static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
1039{
1040 struct check_loop_arg arg;
1041
1042 if (q->ops->cl_ops == NULL)
1043 return 0;
1044
1045 arg.w.stop = arg.w.skip = arg.w.count = 0;
1046 arg.w.fn = check_loop_fn;
1047 arg.depth = depth;
1048 arg.p = p;
1049 q->ops->cl_ops->walk(q, &arg.w);
1050 return arg.w.stop ? -ELOOP : 0;
1051}
1052
1053static int
1054check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
1055{
1056 struct Qdisc *leaf;
Eric Dumazet20fea082007-11-14 01:44:41 -08001057 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001058 struct check_loop_arg *arg = (struct check_loop_arg *)w;
1059
1060 leaf = cops->leaf(q, cl);
1061 if (leaf) {
1062 if (leaf == arg->p || arg->depth > 7)
1063 return -ELOOP;
1064 return check_loop(leaf, arg->p, arg->depth + 1);
1065 }
1066 return 0;
1067}
1068
1069/*
1070 * Delete/get qdisc.
1071 */
1072
Thomas Graf661d2962013-03-21 07:45:29 +00001073static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001074{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001075 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001076 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001077 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 struct net_device *dev;
Hong zhi guode179c82013-03-25 17:36:33 +00001079 u32 clid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080 struct Qdisc *q = NULL;
1081 struct Qdisc *p = NULL;
1082 int err;
1083
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001084 if ((n->nlmsg_type != RTM_GETQDISC) && !capable(CAP_NET_ADMIN))
1085 return -EPERM;
1086
Patrick McHardy1e904742008-01-22 22:11:17 -08001087 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1088 if (err < 0)
1089 return err;
1090
Hong zhi guode179c82013-03-25 17:36:33 +00001091 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1092 if (!dev)
1093 return -ENODEV;
1094
1095 clid = tcm->tcm_parent;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001096 if (clid) {
1097 if (clid != TC_H_ROOT) {
1098 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001099 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1100 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 return -ENOENT;
1102 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001103 } else if (dev_ingress_queue(dev)) {
1104 q = dev_ingress_queue(dev)->qdisc_sleeping;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001105 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001107 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001108 }
1109 if (!q)
1110 return -ENOENT;
1111
1112 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
1113 return -EINVAL;
1114 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001115 q = qdisc_lookup(dev, tcm->tcm_handle);
1116 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 return -ENOENT;
1118 }
1119
Patrick McHardy1e904742008-01-22 22:11:17 -08001120 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001121 return -EINVAL;
1122
1123 if (n->nlmsg_type == RTM_DELQDISC) {
1124 if (!clid)
1125 return -EINVAL;
1126 if (q->handle == 0)
1127 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001128 err = qdisc_graft(dev, p, skb, n, clid, NULL, q);
1129 if (err != 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001130 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131 } else {
Tom Goff7316ae82010-03-19 15:40:13 +00001132 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001133 }
1134 return 0;
1135}
1136
1137/*
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001138 * Create/change qdisc.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139 */
1140
Thomas Graf661d2962013-03-21 07:45:29 +00001141static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001143 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 struct tcmsg *tcm;
Patrick McHardy1e904742008-01-22 22:11:17 -08001145 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146 struct net_device *dev;
1147 u32 clid;
1148 struct Qdisc *q, *p;
1149 int err;
1150
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001151 if (!capable(CAP_NET_ADMIN))
1152 return -EPERM;
1153
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154replay:
1155 /* Reinit, just in case something touches this. */
Hong zhi guode179c82013-03-25 17:36:33 +00001156 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1157 if (err < 0)
1158 return err;
1159
David S. Miller02ef22c2012-06-26 21:50:05 -07001160 tcm = nlmsg_data(n);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001161 clid = tcm->tcm_parent;
1162 q = p = NULL;
1163
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001164 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1165 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001166 return -ENODEV;
1167
Patrick McHardy1e904742008-01-22 22:11:17 -08001168
Linus Torvalds1da177e2005-04-16 15:20:36 -07001169 if (clid) {
1170 if (clid != TC_H_ROOT) {
1171 if (clid != TC_H_INGRESS) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001172 p = qdisc_lookup(dev, TC_H_MAJ(clid));
1173 if (!p)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001174 return -ENOENT;
1175 q = qdisc_leaf(p, clid);
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001176 } else if (dev_ingress_queue_create(dev)) {
1177 q = dev_ingress_queue(dev)->qdisc_sleeping;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001178 }
1179 } else {
Patrick McHardyaf356af2009-09-04 06:41:18 +00001180 q = dev->qdisc;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001181 }
1182
1183 /* It may be default qdisc, ignore it */
1184 if (q && q->handle == 0)
1185 q = NULL;
1186
1187 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
1188 if (tcm->tcm_handle) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001189 if (q && !(n->nlmsg_flags & NLM_F_REPLACE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001190 return -EEXIST;
1191 if (TC_H_MIN(tcm->tcm_handle))
1192 return -EINVAL;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001193 q = qdisc_lookup(dev, tcm->tcm_handle);
1194 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001195 goto create_n_graft;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001196 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001197 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001198 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001199 return -EINVAL;
1200 if (q == p ||
1201 (p && check_loop(q, p, 0)))
1202 return -ELOOP;
1203 atomic_inc(&q->refcnt);
1204 goto graft;
1205 } else {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001206 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001207 goto create_n_graft;
1208
1209 /* This magic test requires explanation.
1210 *
1211 * We know, that some child q is already
1212 * attached to this parent and have choice:
1213 * either to change it or to create/graft new one.
1214 *
1215 * 1. We are allowed to create/graft only
1216 * if CREATE and REPLACE flags are set.
1217 *
1218 * 2. If EXCL is set, requestor wanted to say,
1219 * that qdisc tcm_handle is not expected
1220 * to exist, so that we choose create/graft too.
1221 *
1222 * 3. The last case is when no flags are set.
1223 * Alas, it is sort of hole in API, we
1224 * cannot decide what to do unambiguously.
1225 * For now we select create/graft, if
1226 * user gave KIND, which does not match existing.
1227 */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001228 if ((n->nlmsg_flags & NLM_F_CREATE) &&
1229 (n->nlmsg_flags & NLM_F_REPLACE) &&
1230 ((n->nlmsg_flags & NLM_F_EXCL) ||
Patrick McHardy1e904742008-01-22 22:11:17 -08001231 (tca[TCA_KIND] &&
1232 nla_strcmp(tca[TCA_KIND], q->ops->id))))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 goto create_n_graft;
1234 }
1235 }
1236 } else {
1237 if (!tcm->tcm_handle)
1238 return -EINVAL;
1239 q = qdisc_lookup(dev, tcm->tcm_handle);
1240 }
1241
1242 /* Change qdisc parameters */
1243 if (q == NULL)
1244 return -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001245 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001246 return -EEXIST;
Patrick McHardy1e904742008-01-22 22:11:17 -08001247 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248 return -EINVAL;
1249 err = qdisc_change(q, tca);
1250 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001251 qdisc_notify(net, skb, n, clid, NULL, q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001252 return err;
1253
1254create_n_graft:
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001255 if (!(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 return -ENOENT;
Eric Dumazet24824a02010-10-02 06:11:55 +00001257 if (clid == TC_H_INGRESS) {
1258 if (dev_ingress_queue(dev))
1259 q = qdisc_create(dev, dev_ingress_queue(dev), p,
1260 tcm->tcm_parent, tcm->tcm_parent,
1261 tca, &err);
1262 else
1263 err = -ENOENT;
1264 } else {
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001265 struct netdev_queue *dev_queue;
David S. Miller6ec1c692009-09-06 01:58:51 -07001266
1267 if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue)
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001268 dev_queue = p->ops->cl_ops->select_queue(p, tcm);
1269 else if (p)
1270 dev_queue = p->dev_queue;
1271 else
1272 dev_queue = netdev_get_tx_queue(dev, 0);
David S. Miller6ec1c692009-09-06 01:58:51 -07001273
Jarek Poplawski926e61b2009-09-15 02:53:07 -07001274 q = qdisc_create(dev, dev_queue, p,
David S. Millerbb949fb2008-07-08 16:55:56 -07001275 tcm->tcm_parent, tcm->tcm_handle,
Patrick McHardyffc8fef2007-07-30 17:11:50 -07001276 tca, &err);
David S. Miller6ec1c692009-09-06 01:58:51 -07001277 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001278 if (q == NULL) {
1279 if (err == -EAGAIN)
1280 goto replay;
1281 return err;
1282 }
1283
1284graft:
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001285 err = qdisc_graft(dev, p, skb, n, clid, q, NULL);
1286 if (err) {
1287 if (q)
1288 qdisc_destroy(q);
1289 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 }
Ilpo Järvinene5befbd2008-08-18 22:30:01 -07001291
Linus Torvalds1da177e2005-04-16 15:20:36 -07001292 return 0;
1293}
1294
1295static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001296 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297{
1298 struct tcmsg *tcm;
1299 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001300 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001301 struct gnet_dump d;
Eric Dumazeta2da5702011-01-20 03:48:19 +00001302 struct qdisc_size_table *stab;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001303
Eric W. Biederman15e47302012-09-07 20:12:54 +00001304 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001305 if (!nlh)
1306 goto out_nlmsg_trim;
1307 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308 tcm->tcm_family = AF_UNSPEC;
Patrick McHardy9ef1d4c2005-06-28 12:55:30 -07001309 tcm->tcm__pad1 = 0;
1310 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001311 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001312 tcm->tcm_parent = clid;
1313 tcm->tcm_handle = q->handle;
1314 tcm->tcm_info = atomic_read(&q->refcnt);
David S. Miller1b34ec42012-03-29 05:11:39 -04001315 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1316 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 if (q->ops->dump && q->ops->dump(q, skb) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001318 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001319 q->qstats.qlen = q->q.qlen;
1320
Eric Dumazeta2da5702011-01-20 03:48:19 +00001321 stab = rtnl_dereference(q->stab);
1322 if (stab && qdisc_dump_stab(skb, stab) < 0)
Jussi Kivilinna175f9c12008-07-20 00:08:47 -07001323 goto nla_put_failure;
1324
Jarek Poplawski102396a2008-08-29 14:21:52 -07001325 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1326 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001327 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328
1329 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001330 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331
1332 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
Eric Dumazetd250a5f2009-10-02 10:32:18 +00001333 gnet_stats_copy_rate_est(&d, &q->bstats, &q->rate_est) < 0 ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07001334 gnet_stats_copy_queue(&d, &q->qstats) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001335 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001336
Linus Torvalds1da177e2005-04-16 15:20:36 -07001337 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001338 goto nla_put_failure;
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001339
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001340 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001341 return skb->len;
1342
David S. Miller02ef22c2012-06-26 21:50:05 -07001343out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001344nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001345 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346 return -1;
1347}
1348
Eric Dumazet53b0f082010-05-22 20:37:44 +00001349static bool tc_qdisc_dump_ignore(struct Qdisc *q)
1350{
1351 return (q->flags & TCQ_F_BUILTIN) ? true : false;
1352}
1353
Tom Goff7316ae82010-03-19 15:40:13 +00001354static int qdisc_notify(struct net *net, struct sk_buff *oskb,
1355 struct nlmsghdr *n, u32 clid,
1356 struct Qdisc *old, struct Qdisc *new)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357{
1358 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001359 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001360
1361 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1362 if (!skb)
1363 return -ENOBUFS;
1364
Eric Dumazet53b0f082010-05-22 20:37:44 +00001365 if (old && !tc_qdisc_dump_ignore(old)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001366 if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001367 0, RTM_DELQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 goto err_out;
1369 }
Eric Dumazet53b0f082010-05-22 20:37:44 +00001370 if (new && !tc_qdisc_dump_ignore(new)) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001371 if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001372 old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001373 goto err_out;
1374 }
1375
1376 if (skb->len)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001377 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001378 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001379
1380err_out:
1381 kfree_skb(skb);
1382 return -EINVAL;
1383}
1384
David S. Miller30723672008-07-18 22:50:15 -07001385static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
1386 struct netlink_callback *cb,
1387 int *q_idx_p, int s_q_idx)
1388{
1389 int ret = 0, q_idx = *q_idx_p;
1390 struct Qdisc *q;
1391
1392 if (!root)
1393 return 0;
1394
1395 q = root;
1396 if (q_idx < s_q_idx) {
1397 q_idx++;
1398 } else {
1399 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001400 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001401 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1402 goto done;
1403 q_idx++;
1404 }
1405 list_for_each_entry(q, &root->list, list) {
1406 if (q_idx < s_q_idx) {
1407 q_idx++;
1408 continue;
1409 }
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001410 if (!tc_qdisc_dump_ignore(q) &&
Eric W. Biederman15e47302012-09-07 20:12:54 +00001411 tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
David S. Miller30723672008-07-18 22:50:15 -07001412 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
1413 goto done;
1414 q_idx++;
1415 }
1416
1417out:
1418 *q_idx_p = q_idx;
1419 return ret;
1420done:
1421 ret = -1;
1422 goto out;
1423}
1424
Linus Torvalds1da177e2005-04-16 15:20:36 -07001425static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
1426{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001427 struct net *net = sock_net(skb->sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 int idx, q_idx;
1429 int s_idx, s_q_idx;
1430 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431
1432 s_idx = cb->args[0];
1433 s_q_idx = q_idx = cb->args[1];
stephen hemmingerf1e90162009-11-10 07:54:49 +00001434
1435 rcu_read_lock();
Pavel Emelianov7562f872007-05-03 15:13:45 -07001436 idx = 0;
Tom Goff7316ae82010-03-19 15:40:13 +00001437 for_each_netdev_rcu(net, dev) {
David S. Miller30723672008-07-18 22:50:15 -07001438 struct netdev_queue *dev_queue;
1439
Linus Torvalds1da177e2005-04-16 15:20:36 -07001440 if (idx < s_idx)
Pavel Emelianov7562f872007-05-03 15:13:45 -07001441 goto cont;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 if (idx > s_idx)
1443 s_q_idx = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 q_idx = 0;
David S. Miller30723672008-07-18 22:50:15 -07001445
Patrick McHardyaf356af2009-09-04 06:41:18 +00001446 if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001447 goto done;
1448
Eric Dumazet24824a02010-10-02 06:11:55 +00001449 dev_queue = dev_ingress_queue(dev);
1450 if (dev_queue &&
1451 tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb,
1452 &q_idx, s_q_idx) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001453 goto done;
1454
Pavel Emelianov7562f872007-05-03 15:13:45 -07001455cont:
1456 idx++;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001457 }
1458
1459done:
stephen hemmingerf1e90162009-11-10 07:54:49 +00001460 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461
1462 cb->args[0] = idx;
1463 cb->args[1] = q_idx;
1464
1465 return skb->len;
1466}
1467
1468
1469
1470/************************************************
1471 * Traffic classes manipulation. *
1472 ************************************************/
1473
1474
1475
Thomas Graf661d2962013-03-21 07:45:29 +00001476static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001477{
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001478 struct net *net = sock_net(skb->sk);
David S. Miller02ef22c2012-06-26 21:50:05 -07001479 struct tcmsg *tcm = nlmsg_data(n);
Patrick McHardy1e904742008-01-22 22:11:17 -08001480 struct nlattr *tca[TCA_MAX + 1];
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 struct net_device *dev;
1482 struct Qdisc *q = NULL;
Eric Dumazet20fea082007-11-14 01:44:41 -08001483 const struct Qdisc_class_ops *cops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001484 unsigned long cl = 0;
1485 unsigned long new_cl;
Hong zhi guode179c82013-03-25 17:36:33 +00001486 u32 portid;
1487 u32 clid;
1488 u32 qid;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489 int err;
1490
Eric W. Biedermandfc47ef2012-11-16 03:03:00 +00001491 if ((n->nlmsg_type != RTM_GETTCLASS) && !capable(CAP_NET_ADMIN))
1492 return -EPERM;
1493
Patrick McHardy1e904742008-01-22 22:11:17 -08001494 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
1495 if (err < 0)
1496 return err;
1497
Hong zhi guode179c82013-03-25 17:36:33 +00001498 dev = __dev_get_by_index(net, tcm->tcm_ifindex);
1499 if (!dev)
1500 return -ENODEV;
1501
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 /*
1503 parent == TC_H_UNSPEC - unspecified parent.
1504 parent == TC_H_ROOT - class is root, which has no parent.
1505 parent == X:0 - parent is root class.
1506 parent == X:Y - parent is a node in hierarchy.
1507 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
1508
1509 handle == 0:0 - generate handle from kernel pool.
1510 handle == 0:Y - class is X:Y, where X:0 is qdisc.
1511 handle == X:Y - clear.
1512 handle == X:0 - root class.
1513 */
1514
1515 /* Step 1. Determine qdisc handle X:0 */
1516
Hong zhi guode179c82013-03-25 17:36:33 +00001517 portid = tcm->tcm_parent;
1518 clid = tcm->tcm_handle;
1519 qid = TC_H_MAJ(clid);
1520
Eric W. Biederman15e47302012-09-07 20:12:54 +00001521 if (portid != TC_H_ROOT) {
1522 u32 qid1 = TC_H_MAJ(portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001523
1524 if (qid && qid1) {
1525 /* If both majors are known, they must be identical. */
1526 if (qid != qid1)
1527 return -EINVAL;
1528 } else if (qid1) {
1529 qid = qid1;
1530 } else if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001531 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001532
1533 /* Now qid is genuine qdisc handle consistent
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001534 * both with parent and child.
1535 *
Eric W. Biederman15e47302012-09-07 20:12:54 +00001536 * TC_H_MAJ(portid) still may be unspecified, complete it now.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001537 */
Eric W. Biederman15e47302012-09-07 20:12:54 +00001538 if (portid)
1539 portid = TC_H_MAKE(qid, portid);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001540 } else {
1541 if (qid == 0)
Patrick McHardyaf356af2009-09-04 06:41:18 +00001542 qid = dev->qdisc->handle;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001543 }
1544
1545 /* OK. Locate qdisc */
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001546 q = qdisc_lookup(dev, qid);
1547 if (!q)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001548 return -ENOENT;
1549
1550 /* An check that it supports classes */
1551 cops = q->ops->cl_ops;
1552 if (cops == NULL)
1553 return -EINVAL;
1554
1555 /* Now try to get class */
1556 if (clid == 0) {
Eric W. Biederman15e47302012-09-07 20:12:54 +00001557 if (portid == TC_H_ROOT)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001558 clid = qid;
1559 } else
1560 clid = TC_H_MAKE(qid, clid);
1561
1562 if (clid)
1563 cl = cops->get(q, clid);
1564
1565 if (cl == 0) {
1566 err = -ENOENT;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001567 if (n->nlmsg_type != RTM_NEWTCLASS ||
1568 !(n->nlmsg_flags & NLM_F_CREATE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569 goto out;
1570 } else {
1571 switch (n->nlmsg_type) {
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001572 case RTM_NEWTCLASS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001573 err = -EEXIST;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001574 if (n->nlmsg_flags & NLM_F_EXCL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001575 goto out;
1576 break;
1577 case RTM_DELTCLASS:
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001578 err = -EOPNOTSUPP;
1579 if (cops->delete)
1580 err = cops->delete(q, cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001581 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001582 tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583 goto out;
1584 case RTM_GETTCLASS:
Tom Goff7316ae82010-03-19 15:40:13 +00001585 err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001586 goto out;
1587 default:
1588 err = -EINVAL;
1589 goto out;
1590 }
1591 }
1592
1593 new_cl = cl;
Patrick McHardyde6d5cd2009-09-04 06:41:16 +00001594 err = -EOPNOTSUPP;
1595 if (cops->change)
Eric W. Biederman15e47302012-09-07 20:12:54 +00001596 err = cops->change(q, clid, portid, tca, &new_cl);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001597 if (err == 0)
Tom Goff7316ae82010-03-19 15:40:13 +00001598 tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599
1600out:
1601 if (cl)
1602 cops->put(q, cl);
1603
1604 return err;
1605}
1606
1607
1608static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1609 unsigned long cl,
Eric W. Biederman15e47302012-09-07 20:12:54 +00001610 u32 portid, u32 seq, u16 flags, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611{
1612 struct tcmsg *tcm;
1613 struct nlmsghdr *nlh;
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001614 unsigned char *b = skb_tail_pointer(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001615 struct gnet_dump d;
Eric Dumazet20fea082007-11-14 01:44:41 -08001616 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001617
Eric W. Biederman15e47302012-09-07 20:12:54 +00001618 nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
David S. Miller02ef22c2012-06-26 21:50:05 -07001619 if (!nlh)
1620 goto out_nlmsg_trim;
1621 tcm = nlmsg_data(nlh);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622 tcm->tcm_family = AF_UNSPEC;
Eric Dumazet16ebb5e2009-09-02 02:40:09 +00001623 tcm->tcm__pad1 = 0;
1624 tcm->tcm__pad2 = 0;
David S. Miller5ce2d482008-07-08 17:06:30 -07001625 tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001626 tcm->tcm_parent = q->handle;
1627 tcm->tcm_handle = q->handle;
1628 tcm->tcm_info = 0;
David S. Miller1b34ec42012-03-29 05:11:39 -04001629 if (nla_put_string(skb, TCA_KIND, q->ops->id))
1630 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001631 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001632 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001633
Jarek Poplawski102396a2008-08-29 14:21:52 -07001634 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS,
1635 qdisc_root_sleeping_lock(q), &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001636 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637
1638 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001639 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001640
1641 if (gnet_stats_finish_copy(&d) < 0)
Patrick McHardy1e904742008-01-22 22:11:17 -08001642 goto nla_put_failure;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001644 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001645 return skb->len;
1646
David S. Miller02ef22c2012-06-26 21:50:05 -07001647out_nlmsg_trim:
Patrick McHardy1e904742008-01-22 22:11:17 -08001648nla_put_failure:
Arnaldo Carvalho de Melodc5fc572007-03-25 23:06:12 -07001649 nlmsg_trim(skb, b);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001650 return -1;
1651}
1652
Tom Goff7316ae82010-03-19 15:40:13 +00001653static int tclass_notify(struct net *net, struct sk_buff *oskb,
1654 struct nlmsghdr *n, struct Qdisc *q,
1655 unsigned long cl, int event)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001656{
1657 struct sk_buff *skb;
Eric W. Biederman15e47302012-09-07 20:12:54 +00001658 u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001659
1660 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1661 if (!skb)
1662 return -ENOBUFS;
1663
Eric W. Biederman15e47302012-09-07 20:12:54 +00001664 if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001665 kfree_skb(skb);
1666 return -EINVAL;
1667 }
1668
Eric W. Biederman15e47302012-09-07 20:12:54 +00001669 return rtnetlink_send(skb, net, portid, RTNLGRP_TC,
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001670 n->nlmsg_flags & NLM_F_ECHO);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671}
1672
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001673struct qdisc_dump_args {
1674 struct qdisc_walker w;
1675 struct sk_buff *skb;
1676 struct netlink_callback *cb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677};
1678
1679static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1680{
1681 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1682
Eric W. Biederman15e47302012-09-07 20:12:54 +00001683 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1685}
1686
David S. Miller30723672008-07-18 22:50:15 -07001687static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
1688 struct tcmsg *tcm, struct netlink_callback *cb,
1689 int *t_p, int s_t)
1690{
1691 struct qdisc_dump_args arg;
1692
1693 if (tc_qdisc_dump_ignore(q) ||
1694 *t_p < s_t || !q->ops->cl_ops ||
1695 (tcm->tcm_parent &&
1696 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1697 (*t_p)++;
1698 return 0;
1699 }
1700 if (*t_p > s_t)
1701 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1702 arg.w.fn = qdisc_class_dump;
1703 arg.skb = skb;
1704 arg.cb = cb;
1705 arg.w.stop = 0;
1706 arg.w.skip = cb->args[1];
1707 arg.w.count = 0;
1708 q->ops->cl_ops->walk(q, &arg.w);
1709 cb->args[1] = arg.w.count;
1710 if (arg.w.stop)
1711 return -1;
1712 (*t_p)++;
1713 return 0;
1714}
1715
1716static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
1717 struct tcmsg *tcm, struct netlink_callback *cb,
1718 int *t_p, int s_t)
1719{
1720 struct Qdisc *q;
1721
1722 if (!root)
1723 return 0;
1724
1725 if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
1726 return -1;
1727
1728 list_for_each_entry(q, &root->list, list) {
1729 if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0)
1730 return -1;
1731 }
1732
1733 return 0;
1734}
1735
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1737{
David S. Miller02ef22c2012-06-26 21:50:05 -07001738 struct tcmsg *tcm = nlmsg_data(cb->nlh);
David S. Miller30723672008-07-18 22:50:15 -07001739 struct net *net = sock_net(skb->sk);
1740 struct netdev_queue *dev_queue;
1741 struct net_device *dev;
1742 int t, s_t;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743
Hong zhi guo573ce262013-03-27 06:47:04 +00001744 if (nlmsg_len(cb->nlh) < sizeof(*tcm))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001745 return 0;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001746 dev = dev_get_by_index(net, tcm->tcm_ifindex);
1747 if (!dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 return 0;
1749
1750 s_t = cb->args[0];
1751 t = 0;
1752
Patrick McHardyaf356af2009-09-04 06:41:18 +00001753 if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001754 goto done;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001755
Eric Dumazet24824a02010-10-02 06:11:55 +00001756 dev_queue = dev_ingress_queue(dev);
1757 if (dev_queue &&
1758 tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
1759 &t, s_t) < 0)
David S. Miller30723672008-07-18 22:50:15 -07001760 goto done;
1761
1762done:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001763 cb->args[0] = t;
1764
1765 dev_put(dev);
1766 return skb->len;
1767}
1768
1769/* Main classifier routine: scans classifier chain attached
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001770 * to this qdisc, (optionally) tests for protocol and asks
1771 * specific classifiers.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001772 */
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001773int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001774 struct tcf_result *res)
1775{
1776 __be16 protocol = skb->protocol;
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001777 int err;
Patrick McHardy73ca4912007-07-15 00:02:31 -07001778
1779 for (; tp; tp = tp->next) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001780 if (tp->protocol != protocol &&
1781 tp->protocol != htons(ETH_P_ALL))
1782 continue;
1783 err = tp->classify(skb, tp, res);
1784
1785 if (err >= 0) {
Patrick McHardy73ca4912007-07-15 00:02:31 -07001786#ifdef CONFIG_NET_CLS_ACT
1787 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1788 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1789#endif
1790 return err;
1791 }
1792 }
1793 return -1;
1794}
1795EXPORT_SYMBOL(tc_classify_compat);
1796
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001797int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
Patrick McHardy73ca4912007-07-15 00:02:31 -07001798 struct tcf_result *res)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001799{
1800 int err = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801#ifdef CONFIG_NET_CLS_ACT
Eric Dumazetdc7f9f62011-07-05 23:25:42 +00001802 const struct tcf_proto *otp = tp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001803reclassify:
Hagen Paul Pfeifer52bc9742011-02-25 05:45:21 +00001804#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001805
Patrick McHardy73ca4912007-07-15 00:02:31 -07001806 err = tc_classify_compat(skb, tp, res);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807#ifdef CONFIG_NET_CLS_ACT
Patrick McHardy73ca4912007-07-15 00:02:31 -07001808 if (err == TC_ACT_RECLASSIFY) {
1809 u32 verd = G_TC_VERD(skb->tc_verd);
1810 tp = otp;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001811
Patrick McHardy73ca4912007-07-15 00:02:31 -07001812 if (verd++ >= MAX_REC_LOOP) {
Joe Perchese87cc472012-05-13 21:56:26 +00001813 net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
1814 tp->q->ops->id,
1815 tp->prio & 0xffff,
1816 ntohs(tp->protocol));
Patrick McHardy73ca4912007-07-15 00:02:31 -07001817 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001819 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1820 goto reclassify;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 }
Patrick McHardy73ca4912007-07-15 00:02:31 -07001822#endif
1823 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001824}
Patrick McHardy73ca4912007-07-15 00:02:31 -07001825EXPORT_SYMBOL(tc_classify);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001826
Patrick McHardya48b5a62007-03-23 11:29:43 -07001827void tcf_destroy(struct tcf_proto *tp)
1828{
1829 tp->ops->destroy(tp);
1830 module_put(tp->ops->owner);
1831 kfree(tp);
1832}
1833
Patrick McHardyff31ab52008-07-01 19:52:38 -07001834void tcf_destroy_chain(struct tcf_proto **fl)
Patrick McHardya48b5a62007-03-23 11:29:43 -07001835{
1836 struct tcf_proto *tp;
1837
Patrick McHardyff31ab52008-07-01 19:52:38 -07001838 while ((tp = *fl) != NULL) {
1839 *fl = tp->next;
Patrick McHardya48b5a62007-03-23 11:29:43 -07001840 tcf_destroy(tp);
1841 }
1842}
1843EXPORT_SYMBOL(tcf_destroy_chain);
1844
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845#ifdef CONFIG_PROC_FS
1846static int psched_show(struct seq_file *seq, void *v)
1847{
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001848 struct timespec ts;
1849
1850 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001851 seq_printf(seq, "%08x %08x %08x %08x\n",
Jarek Poplawskica44d6e2009-06-15 02:31:47 -07001852 (u32)NSEC_PER_USEC, (u32)PSCHED_TICKS2NS(1),
Patrick McHardy514bca32007-03-16 12:34:52 -07001853 1000000,
Patrick McHardy3c0cfc12007-10-10 16:32:41 -07001854 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001855
1856 return 0;
1857}
1858
1859static int psched_open(struct inode *inode, struct file *file)
1860{
Tom Goff7e5ab152010-03-30 19:44:56 -07001861 return single_open(file, psched_show, NULL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862}
1863
Arjan van de Venda7071d2007-02-12 00:55:36 -08001864static const struct file_operations psched_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 .owner = THIS_MODULE,
1866 .open = psched_open,
1867 .read = seq_read,
1868 .llseek = seq_lseek,
1869 .release = single_release,
YOSHIFUJI Hideaki10297b92007-02-09 23:25:16 +09001870};
Tom Goff7316ae82010-03-19 15:40:13 +00001871
1872static int __net_init psched_net_init(struct net *net)
1873{
1874 struct proc_dir_entry *e;
1875
Gao fengd4beaa62013-02-18 01:34:54 +00001876 e = proc_create("psched", 0, net->proc_net, &psched_fops);
Tom Goff7316ae82010-03-19 15:40:13 +00001877 if (e == NULL)
1878 return -ENOMEM;
1879
1880 return 0;
1881}
1882
1883static void __net_exit psched_net_exit(struct net *net)
1884{
Gao fengece31ff2013-02-18 01:34:56 +00001885 remove_proc_entry("psched", net->proc_net);
Tom Goff7316ae82010-03-19 15:40:13 +00001886}
1887#else
1888static int __net_init psched_net_init(struct net *net)
1889{
1890 return 0;
1891}
1892
1893static void __net_exit psched_net_exit(struct net *net)
1894{
1895}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001896#endif
1897
Tom Goff7316ae82010-03-19 15:40:13 +00001898static struct pernet_operations psched_net_ops = {
1899 .init = psched_net_init,
1900 .exit = psched_net_exit,
1901};
1902
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903static int __init pktsched_init(void)
1904{
Tom Goff7316ae82010-03-19 15:40:13 +00001905 int err;
1906
1907 err = register_pernet_subsys(&psched_net_ops);
1908 if (err) {
Eric Dumazetcc7ec452011-01-19 19:26:56 +00001909 pr_err("pktsched_init: "
Tom Goff7316ae82010-03-19 15:40:13 +00001910 "cannot initialize per netns operations\n");
1911 return err;
1912 }
1913
stephen hemminger6da7c8f2013-08-27 16:19:08 -07001914 register_qdisc(&pfifo_fast_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915 register_qdisc(&pfifo_qdisc_ops);
1916 register_qdisc(&bfifo_qdisc_ops);
Hagen Paul Pfeifer57dbb2d2010-01-24 12:30:59 +00001917 register_qdisc(&pfifo_head_drop_qdisc_ops);
David S. Miller6ec1c692009-09-06 01:58:51 -07001918 register_qdisc(&mq_qdisc_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919
Greg Rosec7ac8672011-06-10 01:27:09 +00001920 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL);
1921 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL);
1922 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL);
1923 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL);
1924 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL);
1925 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL);
Thomas Grafbe577dd2007-03-22 11:55:50 -07001926
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 return 0;
1928}
1929
1930subsys_initcall(pktsched_init);