aboutsummaryrefslogtreecommitdiff
path: root/net/sched
diff options
context:
space:
mode:
Diffstat (limited to 'net/sched')
-rw-r--r--net/sched/Kconfig20
-rw-r--r--net/sched/Makefile2
-rw-r--r--net/sched/act_ipt.c46
-rw-r--r--net/sched/act_skbedit.c203
-rw-r--r--net/sched/cls_flow.c28
-rw-r--r--net/sched/em_cmp.c9
-rw-r--r--net/sched/sch_dsmark.c8
-rw-r--r--net/sched/sch_generic.c32
-rw-r--r--net/sched/sch_multiq.c477
-rw-r--r--net/sched/sch_netem.c18
-rw-r--r--net/sched/sch_prio.c6
-rw-r--r--net/sched/sch_sfq.c4
12 files changed, 789 insertions, 64 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 9437b27ff84..6767e54155d 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -106,6 +106,15 @@ config NET_SCH_PRIO
To compile this code as a module, choose M here: the
module will be called sch_prio.
+config NET_SCH_MULTIQ
+ tristate "Hardware Multiqueue-aware Multi Band Queuing (MULTIQ)"
+ ---help---
+ Say Y here if you want to use an n-band queue packet scheduler
+ to support devices that have multiple hardware transmit queues.
+
+ To compile this code as a module, choose M here: the
+ module will be called sch_multiq.
+
config NET_SCH_RED
tristate "Random Early Detection (RED)"
---help---
@@ -476,6 +485,17 @@ config NET_ACT_SIMP
To compile this code as a module, choose M here: the
module will be called simple.
+config NET_ACT_SKBEDIT
+ tristate "SKB Editing"
+ depends on NET_CLS_ACT
+ ---help---
+ Say Y here to change skb priority or queue_mapping settings.
+
+ If unsure, say N.
+
+ To compile this code as a module, choose M here: the
+ module will be called skbedit.
+
config NET_CLS_IND
bool "Incoming device classification"
depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 1d2b0f7df84..e60c9925b26 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_NET_ACT_IPT) += act_ipt.o
obj-$(CONFIG_NET_ACT_NAT) += act_nat.o
obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o
obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o
+obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
@@ -26,6 +27,7 @@ obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
+obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index d1263b3c96c..0453d79ebf5 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -40,6 +40,7 @@ static struct tcf_hashinfo ipt_hash_info = {
static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook)
{
+ struct xt_tgchk_param par;
struct xt_target *target;
int ret = 0;
@@ -49,29 +50,30 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int
return -ENOENT;
t->u.kernel.target = target;
-
- ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
- table, hook, 0, 0);
- if (ret) {
+ par.table = table;
+ par.entryinfo = NULL;
+ par.target = target;
+ par.targinfo = t->data;
+ par.hook_mask = hook;
+ par.family = NFPROTO_IPV4;
+
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
+ if (ret < 0) {
module_put(t->u.kernel.target->me);
return ret;
}
- if (t->u.kernel.target->checkentry
- && !t->u.kernel.target->checkentry(table, NULL,
- t->u.kernel.target, t->data,
- hook)) {
- module_put(t->u.kernel.target->me);
- ret = -EINVAL;
- }
-
- return ret;
+ return 0;
}
static void ipt_destroy_target(struct ipt_entry_target *t)
{
- if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->u.kernel.target, t->data);
- module_put(t->u.kernel.target->me);
+ struct xt_tgdtor_param par = {
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ };
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
}
static int tcf_ipt_release(struct tcf_ipt *ipt, int bind)
@@ -196,6 +198,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
{
int ret = 0, result = 0;
struct tcf_ipt *ipt = a->priv;
+ struct xt_target_param par;
if (skb_cloned(skb)) {
if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
@@ -211,10 +214,13 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
/* yes, we have to worry about both in and out dev
worry later - danger - this API seems to have changed
from earlier kernels */
- ret = ipt->tcfi_t->u.kernel.target->target(skb, skb->dev, NULL,
- ipt->tcfi_hook,
- ipt->tcfi_t->u.kernel.target,
- ipt->tcfi_t->data);
+ par.in = skb->dev;
+ par.out = NULL;
+ par.hooknum = ipt->tcfi_hook;
+ par.target = ipt->tcfi_t->u.kernel.target;
+ par.targinfo = ipt->tcfi_t->data;
+ ret = par.target->target(skb, &par);
+
switch (ret) {
case NF_ACCEPT:
result = TC_ACT_OK;
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
new file mode 100644
index 00000000000..fe9777e77f3
--- /dev/null
+++ b/net/sched/act_skbedit.c
@@ -0,0 +1,203 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_skbedit.h>
+#include <net/tc_act/tc_skbedit.h>
+
+#define SKBEDIT_TAB_MASK 15
+static struct tcf_common *tcf_skbedit_ht[SKBEDIT_TAB_MASK + 1];
+static u32 skbedit_idx_gen;
+static DEFINE_RWLOCK(skbedit_lock);
+
+static struct tcf_hashinfo skbedit_hash_info = {
+ .htab = tcf_skbedit_ht,
+ .hmask = SKBEDIT_TAB_MASK,
+ .lock = &skbedit_lock,
+};
+
+static int tcf_skbedit(struct sk_buff *skb, struct tc_action *a,
+ struct tcf_result *res)
+{
+ struct tcf_skbedit *d = a->priv;
+
+ spin_lock(&d->tcf_lock);
+ d->tcf_tm.lastuse = jiffies;
+ d->tcf_bstats.bytes += qdisc_pkt_len(skb);
+ d->tcf_bstats.packets++;
+
+ if (d->flags & SKBEDIT_F_PRIORITY)
+ skb->priority = d->priority;
+ if (d->flags & SKBEDIT_F_QUEUE_MAPPING &&
+ skb->dev->real_num_tx_queues > d->queue_mapping)
+ skb_set_queue_mapping(skb, d->queue_mapping);
+
+ spin_unlock(&d->tcf_lock);
+ return d->tcf_action;
+}
+
+static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
+ [TCA_SKBEDIT_PARMS] = { .len = sizeof(struct tc_skbedit) },
+ [TCA_SKBEDIT_PRIORITY] = { .len = sizeof(u32) },
+ [TCA_SKBEDIT_QUEUE_MAPPING] = { .len = sizeof(u16) },
+};
+
+static int tcf_skbedit_init(struct nlattr *nla, struct nlattr *est,
+ struct tc_action *a, int ovr, int bind)
+{
+ struct nlattr *tb[TCA_SKBEDIT_MAX + 1];
+ struct tc_skbedit *parm;
+ struct tcf_skbedit *d;
+ struct tcf_common *pc;
+ u32 flags = 0, *priority = NULL;
+ u16 *queue_mapping = NULL;
+ int ret = 0, err;
+
+ if (nla == NULL)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, TCA_SKBEDIT_MAX, nla, skbedit_policy);
+ if (err < 0)
+ return err;
+
+ if (tb[TCA_SKBEDIT_PARMS] == NULL)
+ return -EINVAL;
+
+ if (tb[TCA_SKBEDIT_PRIORITY] != NULL) {
+ flags |= SKBEDIT_F_PRIORITY;
+ priority = nla_data(tb[TCA_SKBEDIT_PRIORITY]);
+ }
+
+ if (tb[TCA_SKBEDIT_QUEUE_MAPPING] != NULL) {
+ flags |= SKBEDIT_F_QUEUE_MAPPING;
+ queue_mapping = nla_data(tb[TCA_SKBEDIT_QUEUE_MAPPING]);
+ }
+ if (!flags)
+ return -EINVAL;
+
+ parm = nla_data(tb[TCA_SKBEDIT_PARMS]);
+
+ pc = tcf_hash_check(parm->index, a, bind, &skbedit_hash_info);
+ if (!pc) {
+ pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind,
+ &skbedit_idx_gen, &skbedit_hash_info);
+ if (unlikely(!pc))
+ return -ENOMEM;
+
+ d = to_skbedit(pc);
+ ret = ACT_P_CREATED;
+ } else {
+ d = to_skbedit(pc);
+ if (!ovr) {
+ tcf_hash_release(pc, bind, &skbedit_hash_info);
+ return -EEXIST;
+ }
+ }
+
+ spin_lock_bh(&d->tcf_lock);
+
+ d->flags = flags;
+ if (flags & SKBEDIT_F_PRIORITY)
+ d->priority = *priority;
+ if (flags & SKBEDIT_F_QUEUE_MAPPING)
+ d->queue_mapping = *queue_mapping;
+ d->tcf_action = parm->action;
+
+ spin_unlock_bh(&d->tcf_lock);
+
+ if (ret == ACT_P_CREATED)
+ tcf_hash_insert(pc, &skbedit_hash_info);
+ return ret;
+}
+
+static inline int tcf_skbedit_cleanup(struct tc_action *a, int bind)
+{
+ struct tcf_skbedit *d = a->priv;
+
+ if (d)
+ return tcf_hash_release(&d->common, bind, &skbedit_hash_info);
+ return 0;
+}
+
+static inline int tcf_skbedit_dump(struct sk_buff *skb, struct tc_action *a,
+ int bind, int ref)
+{
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tcf_skbedit *d = a->priv;
+ struct tc_skbedit opt;
+ struct tcf_t t;
+
+ opt.index = d->tcf_index;
+ opt.refcnt = d->tcf_refcnt - ref;
+ opt.bindcnt = d->tcf_bindcnt - bind;
+ opt.action = d->tcf_action;
+ NLA_PUT(skb, TCA_SKBEDIT_PARMS, sizeof(opt), &opt);
+ if (d->flags & SKBEDIT_F_PRIORITY)
+ NLA_PUT(skb, TCA_SKBEDIT_PRIORITY, sizeof(d->priority),
+ &d->priority);
+ if (d->flags & SKBEDIT_F_QUEUE_MAPPING)
+ NLA_PUT(skb, TCA_SKBEDIT_QUEUE_MAPPING,
+ sizeof(d->queue_mapping), &d->queue_mapping);
+ t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+ t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+ t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+ NLA_PUT(skb, TCA_SKBEDIT_TM, sizeof(t), &t);
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static struct tc_action_ops act_skbedit_ops = {
+ .kind = "skbedit",
+ .hinfo = &skbedit_hash_info,
+ .type = TCA_ACT_SKBEDIT,
+ .capab = TCA_CAP_NONE,
+ .owner = THIS_MODULE,
+ .act = tcf_skbedit,
+ .dump = tcf_skbedit_dump,
+ .cleanup = tcf_skbedit_cleanup,
+ .init = tcf_skbedit_init,
+ .walk = tcf_generic_walker,
+};
+
+MODULE_AUTHOR("Alexander Duyck, <alexander.h.duyck@intel.com>");
+MODULE_DESCRIPTION("SKB Editing");
+MODULE_LICENSE("GPL");
+
+static int __init skbedit_init_module(void)
+{
+ return tcf_register_action(&act_skbedit_ops);
+}
+
+static void __exit skbedit_cleanup_module(void)
+{
+ tcf_unregister_action(&act_skbedit_ops);
+}
+
+module_init(skbedit_init_module);
+module_exit(skbedit_cleanup_module);
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 8f63a1a9401..0ebaff637e3 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -67,9 +67,9 @@ static inline u32 addr_fold(void *addr)
static u32 flow_get_src(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ntohl(ip_hdr(skb)->saddr);
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ntohl(ipv6_hdr(skb)->saddr.s6_addr32[3]);
default:
return addr_fold(skb->sk);
@@ -79,9 +79,9 @@ static u32 flow_get_src(const struct sk_buff *skb)
static u32 flow_get_dst(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ntohl(ip_hdr(skb)->daddr);
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ntohl(ipv6_hdr(skb)->daddr.s6_addr32[3]);
default:
return addr_fold(skb->dst) ^ (__force u16)skb->protocol;
@@ -91,9 +91,9 @@ static u32 flow_get_dst(const struct sk_buff *skb)
static u32 flow_get_proto(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ip_hdr(skb)->protocol;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ipv6_hdr(skb)->nexthdr;
default:
return 0;
@@ -120,7 +120,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb)
u32 res = 0;
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP): {
+ case htons(ETH_P_IP): {
struct iphdr *iph = ip_hdr(skb);
if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -128,7 +128,7 @@ static u32 flow_get_proto_src(const struct sk_buff *skb)
res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4));
break;
}
- case __constant_htons(ETH_P_IPV6): {
+ case htons(ETH_P_IPV6): {
struct ipv6hdr *iph = ipv6_hdr(skb);
if (has_ports(iph->nexthdr))
@@ -147,7 +147,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
u32 res = 0;
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP): {
+ case htons(ETH_P_IP): {
struct iphdr *iph = ip_hdr(skb);
if (!(iph->frag_off&htons(IP_MF|IP_OFFSET)) &&
@@ -155,7 +155,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb)
res = ntohs(*(__be16 *)((void *)iph + iph->ihl * 4 + 2));
break;
}
- case __constant_htons(ETH_P_IPV6): {
+ case htons(ETH_P_IPV6): {
struct ipv6hdr *iph = ipv6_hdr(skb);
if (has_ports(iph->nexthdr))
@@ -213,9 +213,9 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
static u32 flow_get_nfct_src(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, src.u3.ip));
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ntohl(CTTUPLE(skb, src.u3.ip6[3]));
}
fallback:
@@ -225,9 +225,9 @@ fallback:
static u32 flow_get_nfct_dst(const struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
return ntohl(CTTUPLE(skb, dst.u3.ip));
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
return ntohl(CTTUPLE(skb, dst.u3.ip6[3]));
}
fallback:
diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c
index cc49c932641..bc450397487 100644
--- a/net/sched/em_cmp.c
+++ b/net/sched/em_cmp.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/tc_ematch/tc_em_cmp.h>
+#include <asm/unaligned.h>
#include <net/pkt_cls.h>
static inline int cmp_needs_transformation(struct tcf_em_cmp *cmp)
@@ -37,8 +38,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
break;
case TCF_EM_ALIGN_U16:
- val = *ptr << 8;
- val |= *(ptr+1);
+ val = get_unaligned_be16(ptr);
if (cmp_needs_transformation(cmp))
val = be16_to_cpu(val);
@@ -47,10 +47,7 @@ static int em_cmp_match(struct sk_buff *skb, struct tcf_ematch *em,
case TCF_EM_ALIGN_U32:
/* Worth checking boundries? The branching seems
* to get worse. Visit again. */
- val = *ptr << 24;
- val |= *(ptr+1) << 16;
- val |= *(ptr+2) << 8;
- val |= *(ptr+3);
+ val = get_unaligned_be32(ptr);
if (cmp_needs_transformation(cmp))
val = be32_to_cpu(val);
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index edd1298f85f..ba43aab3a85 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -202,7 +202,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (p->set_tc_index) {
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
if (skb_cow_head(skb, sizeof(struct iphdr)))
goto drop;
@@ -210,7 +210,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
& ~INET_ECN_MASK;
break;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
if (skb_cow_head(skb, sizeof(struct ipv6hdr)))
goto drop;
@@ -289,11 +289,11 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
pr_debug("index %d->%d\n", skb->tc_index, index);
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
p->value[index]);
break;
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
ipv6_change_dsfield(ipv6_hdr(skb), p->mask[index],
p->value[index]);
break;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 9634091ee2f..7b5572d6beb 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -44,23 +44,30 @@ static inline int qdisc_qlen(struct Qdisc *q)
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
- if (unlikely(skb->next))
- q->gso_skb = skb;
- else
- q->ops->requeue(skb, q);
-
+ q->gso_skb = skb;
+ q->qstats.requeues++;
__netif_schedule(q);
+
return 0;
}
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
{
- struct sk_buff *skb;
+ struct sk_buff *skb = q->gso_skb;
- if ((skb = q->gso_skb))
- q->gso_skb = NULL;
- else
+ if (unlikely(skb)) {
+ struct net_device *dev = qdisc_dev(q);
+ struct netdev_queue *txq;
+
+ /* check the reason of requeuing without tx lock first */
+ txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+ if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
+ q->gso_skb = NULL;
+ else
+ skb = NULL;
+ } else {
skb = q->dequeue(q);
+ }
return skb;
}
@@ -215,10 +222,9 @@ static void dev_watchdog(unsigned long arg)
time_after(jiffies, (dev->trans_start +
dev->watchdog_timeo))) {
char drivername[64];
- printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
+ WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
dev->name, netdev_drivername(dev, drivername, 64));
dev->tx_timeout(dev);
- WARN_ON_ONCE(1);
}
if (!mod_timer(&dev->watchdog_timer,
round_jiffies(jiffies +
@@ -328,6 +334,7 @@ struct Qdisc noop_qdisc = {
.flags = TCQ_F_BUILTIN,
.ops = &noop_qdisc_ops,
.list = LIST_HEAD_INIT(noop_qdisc.list),
+ .requeue.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
.dev_queue = &noop_netdev_queue,
};
@@ -353,6 +360,7 @@ static struct Qdisc noqueue_qdisc = {
.flags = TCQ_F_BUILTIN,
.ops = &noqueue_qdisc_ops,
.list = LIST_HEAD_INIT(noqueue_qdisc.list),
+ .requeue.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
.q.lock = __SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
.dev_queue = &noqueue_netdev_queue,
};
@@ -473,6 +481,7 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
sch->padded = (char *) sch - (char *) p;
INIT_LIST_HEAD(&sch->list);
+ skb_queue_head_init(&sch->requeue);
skb_queue_head_init(&sch->q);
sch->ops = ops;
sch->enqueue = ops->enqueue;
@@ -541,6 +550,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
dev_put(qdisc_dev(qdisc));
kfree_skb(qdisc->gso_skb);
+ __skb_queue_purge(&qdisc->requeue);
kfree((char *) qdisc - qdisc->padded);
}
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
new file mode 100644
index 00000000000..915f3149dde
--- /dev/null
+++ b/net/sched/sch_multiq.c
@@ -0,0 +1,477 @@
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Author: Alexander Duyck <alexander.h.duyck@intel.com>
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+
+struct multiq_sched_data {
+ u16 bands;
+ u16 max_bands;
+ u16 curband;
+ struct tcf_proto *filter_list;
+ struct Qdisc **queues;
+};
+
+
+static struct Qdisc *
+multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ u32 band;
+ struct tcf_result res;
+ int err;
+
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
+ err = tc_classify(skb, q->filter_list, &res);
+#ifdef CONFIG_NET_CLS_ACT
+ switch (err) {
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
+ case TC_ACT_SHOT:
+ return NULL;
+ }
+#endif
+ band = skb_get_queue_mapping(skb);
+
+ if (band >= q->bands)
+ return q->queues[0];
+
+ return q->queues[band];
+}
+
+static int
+multiq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct Qdisc *qdisc;
+ int ret;
+
+ qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+ if (qdisc == NULL) {
+
+ if (ret & __NET_XMIT_BYPASS)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return ret;
+ }
+#endif
+
+ ret = qdisc_enqueue(skb, qdisc);
+ if (ret == NET_XMIT_SUCCESS) {
+ sch->bstats.bytes += qdisc_pkt_len(skb);
+ sch->bstats.packets++;
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+ }
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
+ return ret;
+}
+
+
+static int
+multiq_requeue(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct Qdisc *qdisc;
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ int ret;
+
+ qdisc = multiq_classify(skb, sch, &ret);
+#ifdef CONFIG_NET_CLS_ACT
+ if (qdisc == NULL) {
+ if (ret & __NET_XMIT_BYPASS)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return ret;
+ }
+#endif
+
+ ret = qdisc->ops->requeue(skb, qdisc);
+ if (ret == NET_XMIT_SUCCESS) {
+ sch->q.qlen++;
+ sch->qstats.requeues++;
+ if (q->curband)
+ q->curband--;
+ else
+ q->curband = q->bands - 1;
+ return NET_XMIT_SUCCESS;
+ }
+ if (net_xmit_drop_count(ret))
+ sch->qstats.drops++;
+ return ret;
+}
+
+
+static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *qdisc;
+ struct sk_buff *skb;
+ int band;
+
+ for (band = 0; band < q->bands; band++) {
+ /* cycle through bands to ensure fairness */
+ q->curband++;
+ if (q->curband >= q->bands)
+ q->curband = 0;
+
+ /* Check that target subqueue is available before
+ * pulling an skb to avoid excessive requeues
+ */
+ if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+ qdisc = q->queues[q->curband];
+ skb = qdisc->dequeue(qdisc);
+ if (skb) {
+ sch->q.qlen--;
+ return skb;
+ }
+ }
+ }
+ return NULL;
+
+}
+
+static unsigned int multiq_drop(struct Qdisc *sch)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ int band;
+ unsigned int len;
+ struct Qdisc *qdisc;
+
+ for (band = q->bands-1; band >= 0; band--) {
+ qdisc = q->queues[band];
+ if (qdisc->ops->drop) {
+ len = qdisc->ops->drop(qdisc);
+ if (len != 0) {
+ sch->q.qlen--;
+ return len;
+ }
+ }
+ }
+ return 0;
+}
+
+
+static void
+multiq_reset(struct Qdisc *sch)
+{
+ u16 band;
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ for (band = 0; band < q->bands; band++)
+ qdisc_reset(q->queues[band]);
+ sch->q.qlen = 0;
+ q->curband = 0;
+}
+
+static void
+multiq_destroy(struct Qdisc *sch)
+{
+ int band;
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ tcf_destroy_chain(&q->filter_list);
+ for (band = 0; band < q->bands; band++)
+ qdisc_destroy(q->queues[band]);
+
+ kfree(q->queues);
+}
+
+static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ struct tc_multiq_qopt *qopt;
+ int i;
+
+ if (!netif_is_multiqueue(qdisc_dev(sch)))
+ return -EINVAL;
+ if (nla_len(opt) < sizeof(*qopt))
+ return -EINVAL;
+
+ qopt = nla_data(opt);
+
+ qopt->bands = qdisc_dev(sch)->real_num_tx_queues;
+
+ sch_tree_lock(sch);
+ q->bands = qopt->bands;
+ for (i = q->bands; i < q->max_bands; i++) {
+ if (q->queues[i] != &noop_qdisc) {
+ struct Qdisc *child = xchg(&q->queues[i], &noop_qdisc);
+ qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_destroy(child);
+ }
+ }
+
+ sch_tree_unlock(sch);
+
+ for (i = 0; i < q->bands; i++) {
+ if (q->queues[i] == &noop_qdisc) {
+ struct Qdisc *child;
+ child = qdisc_create_dflt(qdisc_dev(sch),
+ sch->dev_queue,
+ &pfifo_qdisc_ops,
+ TC_H_MAKE(sch->handle,
+ i + 1));
+ if (child) {
+ sch_tree_lock(sch);
+ child = xchg(&q->queues[i], child);
+
+ if (child != &noop_qdisc) {
+ qdisc_tree_decrease_qlen(child,
+ child->q.qlen);
+ qdisc_destroy(child);
+ }
+ sch_tree_unlock(sch);
+ }
+ }
+ }
+ return 0;
+}
+
+static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ int i, err;
+
+ q->queues = NULL;
+
+ if (opt == NULL)
+ return -EINVAL;
+
+ q->max_bands = qdisc_dev(sch)->num_tx_queues;
+
+ q->queues = kcalloc(q->max_bands, sizeof(struct Qdisc *), GFP_KERNEL);
+ if (!q->queues)
+ return -ENOBUFS;
+ for (i = 0; i < q->max_bands; i++)
+ q->queues[i] = &noop_qdisc;
+
+ err = multiq_tune(sch,opt);
+
+ if (err)
+ kfree(q->queues);
+
+ return err;
+}
+
+static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned char *b = skb_tail_pointer(skb);
+ struct tc_multiq_qopt opt;
+
+ opt.bands = q->bands;
+ opt.max_bands = q->max_bands;
+
+ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
+
+ return skb->len;
+
+nla_put_failure:
+ nlmsg_trim(skb, b);
+ return -1;
+}
+
+static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
+ struct Qdisc **old)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned long band = arg - 1;
+
+ if (band >= q->bands)
+ return -EINVAL;
+
+ if (new == NULL)
+ new = &noop_qdisc;
+
+ sch_tree_lock(sch);
+ *old = q->queues[band];
+ q->queues[band] = new;
+ qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
+ qdisc_reset(*old);
+ sch_tree_unlock(sch);
+
+ return 0;
+}
+
+static struct Qdisc *
+multiq_leaf(struct Qdisc *sch, unsigned long arg)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned long band = arg - 1;
+
+ if (band >= q->bands)
+ return NULL;
+
+ return q->queues[band];
+}
+
+static unsigned long multiq_get(struct Qdisc *sch, u32 classid)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ unsigned long band = TC_H_MIN(classid);
+
+ if (band - 1 >= q->bands)
+ return 0;
+ return band;
+}
+
+static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent,
+ u32 classid)
+{
+ return multiq_get(sch, classid);
+}
+
+
+static void multiq_put(struct Qdisc *q, unsigned long cl)
+{
+ return;
+}
+
+static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent,
+ struct nlattr **tca, unsigned long *arg)
+{
+ unsigned long cl = *arg;
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ if (cl - 1 > q->bands)
+ return -ENOENT;
+ return 0;
+}
+
+static int multiq_delete(struct Qdisc *sch, unsigned long cl)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ if (cl - 1 > q->bands)
+ return -ENOENT;
+ return 0;
+}
+
+
+static int multiq_dump_class(struct Qdisc *sch, unsigned long cl,
+ struct sk_buff *skb, struct tcmsg *tcm)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ if (cl - 1 > q->bands)
+ return -ENOENT;
+ tcm->tcm_handle |= TC_H_MIN(cl);
+ if (q->queues[cl-1])
+ tcm->tcm_info = q->queues[cl-1]->handle;
+ return 0;
+}
+
+static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
+ struct gnet_dump *d)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ struct Qdisc *cl_q;
+
+ cl_q = q->queues[cl - 1];
+ if (gnet_stats_copy_basic(d, &cl_q->bstats) < 0 ||
+ gnet_stats_copy_queue(d, &cl_q->qstats) < 0)
+ return -1;
+
+ return 0;
+}
+
+static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+ int band;
+
+ if (arg->stop)
+ return;
+
+ for (band = 0; band < q->bands; band++) {
+ if (arg->count < arg->skip) {
+ arg->count++;
+ continue;
+ }
+ if (arg->fn(sch, band+1, arg) < 0) {
+ arg->stop = 1;
+ break;
+ }
+ arg->count++;
+ }
+}
+
+static struct tcf_proto **multiq_find_tcf(struct Qdisc *sch, unsigned long cl)
+{
+ struct multiq_sched_data *q = qdisc_priv(sch);
+
+ if (cl)
+ return NULL;
+ return &q->filter_list;
+}
+
+static const struct Qdisc_class_ops multiq_class_ops = {
+ .graft = multiq_graft,
+ .leaf = multiq_leaf,
+ .get = multiq_get,
+ .put = multiq_put,
+ .change = multiq_change,
+ .delete = multiq_delete,
+ .walk = multiq_walk,
+ .tcf_chain = multiq_find_tcf,
+ .bind_tcf = multiq_bind,
+ .unbind_tcf = multiq_put,
+ .dump = multiq_dump_class,
+ .dump_stats = multiq_dump_class_stats,
+};
+
+static struct Qdisc_ops multiq_qdisc_ops __read_mostly = {
+ .next = NULL,
+ .cl_ops = &multiq_class_ops,
+ .id = "multiq",
+ .priv_size = sizeof(struct multiq_sched_data),
+ .enqueue = multiq_enqueue,
+ .dequeue = multiq_dequeue,
+ .requeue = multiq_requeue,
+ .drop = multiq_drop,
+ .init = multiq_init,
+ .reset = multiq_reset,
+ .destroy = multiq_destroy,
+ .change = multiq_tune,
+ .dump = multiq_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __init multiq_module_init(void)
+{
+ return register_qdisc(&multiq_qdisc_ops);
+}
+
+static void __exit multiq_module_exit(void)
+{
+ unregister_qdisc(&multiq_qdisc_ops);
+}
+
+module_init(multiq_module_init)
+module_exit(multiq_module_exit)
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3781e55046d..a11959908d9 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -388,6 +388,20 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
[TCA_NETEM_CORRUPT] = { .len = sizeof(struct tc_netem_corrupt) },
};
+static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
+ const struct nla_policy *policy, int len)
+{
+ int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+ if (nested_len < 0)
+ return -EINVAL;
+ if (nested_len >= nla_attr_size(0))
+ return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+ nested_len, policy);
+ memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
+ return 0;
+}
+
/* Parse netlink message to set options */
static int netem_change(struct Qdisc *sch, struct nlattr *opt)
{
@@ -399,8 +413,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
if (opt == NULL)
return -EINVAL;
- ret = nla_parse_nested_compat(tb, TCA_NETEM_MAX, opt, netem_policy,
- qopt, sizeof(*qopt));
+ qopt = nla_data(opt);
+ ret = parse_attr(tb, TCA_NETEM_MAX, opt, netem_policy, sizeof(*qopt));
if (ret < 0)
return ret;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index a6697c686c7..504a78cdb71 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -254,16 +254,12 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb)
{
struct prio_sched_data *q = qdisc_priv(sch);
unsigned char *b = skb_tail_pointer(skb);
- struct nlattr *nest;
struct tc_prio_qopt opt;
opt.bands = q->bands;
memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1);
- nest = nla_nest_compat_start(skb, TCA_OPTIONS, sizeof(opt), &opt);
- if (nest == NULL)
- goto nla_put_failure;
- nla_nest_compat_end(skb, nest);
+ NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
return skb->len;
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 6e041d10dbd..fe1508ef0d3 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -119,7 +119,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
u32 h, h2;
switch (skb->protocol) {
- case __constant_htons(ETH_P_IP):
+ case htons(ETH_P_IP):
{
const struct iphdr *iph = ip_hdr(skb);
h = iph->daddr;
@@ -134,7 +134,7 @@ static unsigned sfq_hash(struct sfq_sched_data *q, struct sk_buff *skb)
h2 ^= *(((u32*)iph) + iph->ihl);
break;
}
- case __constant_htons(ETH_P_IPV6):
+ case htons(ETH_P_IPV6):
{
struct ipv6hdr *iph = ipv6_hdr(skb);
h = iph->daddr.s6_addr32[3];