path: root/net/core/dev.c
diff options
authorJason Wang <jasowang@redhat.com>2014-01-10 16:18:26 +0800
committerDavid S. Miller <davem@davemloft.net>2014-01-10 13:23:08 -0500
commitf663dd9aaf9ed124f25f0f8452edf238f087ad50 (patch)
tree2aee7dfcfd373c6905de5b2a9f810f9e209156b6 /net/core/dev.c
parentb13ba1b83f524732523db1079e56478b32c85c96 (diff)
net: core: explicitly select a txq before doing l2 forwarding
Currently, the tx queue were selected implicitly in ndo_dfwd_start_xmit(). The will cause several issues: - NETIF_F_LLTX were removed for macvlan, so txq lock were done for macvlan instead of lower device which misses the necessary txq synchronization for lower device such as txq stopping or frozen required by dev watchdog or control path. - dev_hard_start_xmit() was called with NULL txq which bypasses the net device watchdog. - dev_hard_start_xmit() does not check txq everywhere which will lead a crash when tso is disabled for lower device. Fix this by explicitly introducing a new param for .ndo_select_queue() for just selecting queues in the case of l2 forwarding offload. netdev_pick_tx() was also extended to accept this parameter and dev_queue_xmit_accel() was used to do l2 forwarding transmission. With this fixes, NETIF_F_LLTX could be preserved for macvlan and there's no need to check txq against NULL in dev_hard_start_xmit(). Also there's no need to keep a dedicated ndo_dfwd_start_xmit() and we can just reuse the code of dev_queue_xmit() to do the transmission. In the future, it was also required for macvtap l2 forwarding support since it provides a necessary synchronization method. Cc: John Fastabend <john.r.fastabend@intel.com> Cc: Neil Horman <nhorman@tuxdriver.com> Cc: e1000-devel@lists.sourceforge.net Signed-off-by: Jason Wang <jasowang@redhat.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Acked-by: John Fastabend <john.r.fastabend@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/core/dev.c')
1 files changed, 17 insertions, 12 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 4fc17221545d..0ce469e5ec80 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2539,7 +2539,7 @@ static inline int skb_needs_linearize(struct sk_buff *skb,
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
- struct netdev_queue *txq, void *accel_priv)
+ struct netdev_queue *txq)
const struct net_device_ops *ops = dev->netdev_ops;
int rc = NETDEV_TX_OK;
@@ -2605,13 +2605,10 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
dev_queue_xmit_nit(skb, dev);
skb_len = skb->len;
- if (accel_priv)
- rc = ops->ndo_dfwd_start_xmit(skb, dev, accel_priv);
- else
rc = ops->ndo_start_xmit(skb, dev);
trace_net_dev_xmit(skb, rc, dev, skb_len);
- if (rc == NETDEV_TX_OK && txq)
+ if (rc == NETDEV_TX_OK)
return rc;
@@ -2627,10 +2624,7 @@ gso:
dev_queue_xmit_nit(nskb, dev);
skb_len = nskb->len;
- if (accel_priv)
- rc = ops->ndo_dfwd_start_xmit(nskb, dev, accel_priv);
- else
- rc = ops->ndo_start_xmit(nskb, dev);
+ rc = ops->ndo_start_xmit(nskb, dev);
trace_net_dev_xmit(nskb, rc, dev, skb_len);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
@@ -2811,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
* the BH enable code must have IRQs enabled so that it will not deadlock.
* --BLG
-int dev_queue_xmit(struct sk_buff *skb)
+int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
@@ -2827,7 +2821,7 @@ int dev_queue_xmit(struct sk_buff *skb)
- txq = netdev_pick_tx(dev, skb);
+ txq = netdev_pick_tx(dev, skb, accel_priv);
q = rcu_dereference_bh(txq->qdisc);
@@ -2863,7 +2857,7 @@ int dev_queue_xmit(struct sk_buff *skb)
if (!netif_xmit_stopped(txq)) {
- rc = dev_hard_start_xmit(skb, dev, txq, NULL);
+ rc = dev_hard_start_xmit(skb, dev, txq);
if (dev_xmit_complete(rc)) {
HARD_TX_UNLOCK(dev, txq);
@@ -2892,8 +2886,19 @@ out:
return rc;
+int dev_queue_xmit(struct sk_buff *skb)
+ return __dev_queue_xmit(skb, NULL);
+int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv)
+ return __dev_queue_xmit(skb, accel_priv);
Receiver routines