From b055629eaef7758b35dc91c76cf4f158025562bf Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 30 May 2008 22:18:35 +0100 Subject: [netdrvr] sfc: Report XAUI link down at default log level This is normal when the external link is down so don't report it as an error. Signed-off-by: Ben Hutchings Signed-off-by: Jeff Garzik --- drivers/net/sfc/falcon_xmac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/sfc/falcon_xmac.c b/drivers/net/sfc/falcon_xmac.c index dbdcee4b0f8..55c0d9760be 100644 --- a/drivers/net/sfc/falcon_xmac.c +++ b/drivers/net/sfc/falcon_xmac.c @@ -459,7 +459,7 @@ static int falcon_check_xaui_link_up(struct efx_nic *efx) tries--; } - EFX_ERR(efx, "Failed to bring XAUI link back up in %d tries!\n", + EFX_LOG(efx, "Failed to bring XAUI link back up in %d tries!\n", max_tries); return 0; } -- cgit v1.2.3 From 17a9440f7deb781935c76e2e55d376a35611a6f9 Mon Sep 17 00:00:00 2001 From: Wang Chen Date: Fri, 30 May 2008 11:18:55 +0800 Subject: [netdrvr] CS89X0: Add cleanup for dma after fail After request_dma() succeeding, any error path should do free_dma(). Signed-off-by: Wang Chen Signed-off-by: Jeff Garzik --- drivers/net/cs89x0.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/drivers/net/cs89x0.c b/drivers/net/cs89x0.c index 348371fda59..fba87abe78e 100644 --- a/drivers/net/cs89x0.c +++ b/drivers/net/cs89x0.c @@ -1394,7 +1394,11 @@ net_open(struct net_device *dev) #endif if (!result) { printk(KERN_ERR "%s: EEPROM is configured for unavailable media\n", dev->name); - release_irq: +release_dma: +#if ALLOW_DMA + free_dma(dev->dma); +#endif +release_irq: #if ALLOW_DMA release_dma_buff(lp); #endif @@ -1442,12 +1446,12 @@ net_open(struct net_device *dev) if ((result = detect_bnc(dev)) != DETECTED_NONE) break; printk(KERN_ERR "%s: no media detected\n", dev->name); - goto release_irq; + goto release_dma; } switch(result) { case DETECTED_NONE: printk(KERN_ERR "%s: no network cable attached to configured media\n", dev->name); - goto release_irq; + goto release_dma; case DETECTED_RJ45H: printk(KERN_INFO "%s: using half-duplex 10Base-T (RJ-45)\n", dev->name); break; -- cgit v1.2.3 From 6f94f709b5b1d3a9b5f1ff7d4f3534de6cde3ff6 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Thu, 29 May 2008 21:58:36 -0300 Subject: sc92031: remove bogus unlikely() Commit 5a0a92e67b5009a71e011658da04fb92dad8961f mentions len < ETH_ZLEN is true for ARP packets. This obviously is not unlikely. Signed-off-by: Cesar Eduardo Barros Signed-off-by: Jeff Garzik --- drivers/net/sc92031.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c index b4b63805ee8..61955f8d801 100644 --- a/drivers/net/sc92031.c +++ b/drivers/net/sc92031.c @@ -972,7 +972,7 @@ static int sc92031_start_xmit(struct sk_buff *skb, struct net_device *dev) skb_copy_and_csum_dev(skb, priv->tx_bufs + entry * TX_BUF_SIZE); len = skb->len; - if (unlikely(len < ETH_ZLEN)) { + if (len < ETH_ZLEN) { memset(priv->tx_bufs + entry * TX_BUF_SIZE + len, 0, ETH_ZLEN - len); len = ETH_ZLEN; -- cgit v1.2.3 From d399cf8c04c595d738d82d02ae2755b902a51571 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Wed, 28 May 2008 09:10:01 +0200 Subject: myri10ge: update driver version Update myri10ge version to 1.3.99-1.347. Signed-off-by: Brice Goglin Signed-off-by: Jeff Garzik --- drivers/net/myri10ge/myri10ge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c index 36be6efc639..e0d76c75aea 100644 --- a/drivers/net/myri10ge/myri10ge.c +++ b/drivers/net/myri10ge/myri10ge.c @@ -75,7 +75,7 @@ #include "myri10ge_mcp.h" #include "myri10ge_mcp_gen_header.h" -#define MYRI10GE_VERSION_STR "1.3.2-1.287" +#define MYRI10GE_VERSION_STR "1.3.99-1.347" MODULE_DESCRIPTION("Myricom 10G driver (10GbE)"); MODULE_AUTHOR("Maintainer: help@myri.com"); -- cgit v1.2.3 From 7eb2e25112bf920bb0a4d1cca445f3d96874c25f Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 26 May 2008 17:42:42 +1000 Subject: virtio: fix virtio_net xmit of freed skb bug If we fail to transmit a packet, we assume the queue is full and put the skb into last_xmit_skb. However, if more space frees up before we xmit it, we loop, and the result can be transmitting the same skb twice. Fix is simple: set skb to NULL if we've used it in some way, and check before sending. Signed-off-by: Rusty Russell Signed-off-by: Jeff Garzik --- drivers/net/virtio_net.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fe7cdf2a2a2..d50f4fe352b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -287,21 +287,25 @@ again: free_old_xmit_skbs(vi); /* If we has a buffer left over from last time, send it now. */ - if (vi->last_xmit_skb) { + if (unlikely(vi->last_xmit_skb)) { if (xmit_skb(vi, vi->last_xmit_skb) != 0) { /* Drop this skb: we only queue one. */ vi->dev->stats.tx_dropped++; kfree_skb(skb); + skb = NULL; goto stop_queue; } vi->last_xmit_skb = NULL; } /* Put new one in send queue and do transmit */ - __skb_queue_head(&vi->send, skb); - if (xmit_skb(vi, skb) != 0) { - vi->last_xmit_skb = skb; - goto stop_queue; + if (likely(skb)) { + __skb_queue_head(&vi->send, skb); + if (xmit_skb(vi, skb) != 0) { + vi->last_xmit_skb = skb; + skb = NULL; + goto stop_queue; + } } done: vi->svq->vq_ops->kick(vi->svq); -- cgit v1.2.3 From 11a3a1546d0adc36485c2ad4af7ab950712df6ff Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 26 May 2008 17:48:13 +1000 Subject: virtio: fix delayed xmit of packet and freeing of old packets. Because we cache the last failed-to-xmit packet, if there are no packets queued behind that one we may never send it (reproduced here as TCP stalls, "cured" by an outgoing ping). Cc: Mark McLoughlin Signed-off-by: Rusty Russell Signed-off-by: Jeff Garzik --- drivers/net/virtio_net.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d50f4fe352b..5450eac9e26 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -47,6 +47,9 @@ struct virtnet_info /* Number of input buffers, and max we've ever had. */ unsigned int num, max; + /* For cleaning up after transmission. */ + struct tasklet_struct tasklet; + /* Receive & send queues. */ struct sk_buff_head recv; struct sk_buff_head send; @@ -68,8 +71,13 @@ static void skb_xmit_done(struct virtqueue *svq) /* Suppress further interrupts. */ svq->vq_ops->disable_cb(svq); + /* We were waiting for more output buffers. */ netif_wake_queue(vi->dev); + + /* Make sure we re-xmit last_xmit_skb: if there are no more packets + * queued, start_xmit won't be called. */ + tasklet_schedule(&vi->tasklet); } static void receive_skb(struct net_device *dev, struct sk_buff *skb, @@ -278,6 +286,18 @@ static int xmit_skb(struct virtnet_info *vi, struct sk_buff *skb) return vi->svq->vq_ops->add_buf(vi->svq, sg, num, 0, skb); } +static void xmit_tasklet(unsigned long data) +{ + struct virtnet_info *vi = (void *)data; + + netif_tx_lock_bh(vi->dev); + if (vi->last_xmit_skb && xmit_skb(vi, vi->last_xmit_skb) == 0) { + vi->svq->vq_ops->kick(vi->svq); + vi->last_xmit_skb = NULL; + } + netif_tx_unlock_bh(vi->dev); +} + static int start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); @@ -432,6 +452,8 @@ static int virtnet_probe(struct virtio_device *vdev) skb_queue_head_init(&vi->recv); skb_queue_head_init(&vi->send); + tasklet_init(&vi->tasklet, xmit_tasklet, (unsigned long)vi); + err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); -- cgit v1.2.3 From 25f03dcf63d233c13970751253b62a678bd85ccc Mon Sep 17 00:00:00 2001 From: Joakim Tjernlund Date: Fri, 23 May 2008 18:11:26 +0800 Subject: ucc_geth_ethtool: Fix typo Signed-off-by: Joakim Tjernlund Signed-off-by: Li Yang Signed-off-by: Jeff Garzik --- drivers/net/ucc_geth_ethtool.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c index 299b7f17695..b84ffd84c4e 100644 --- a/drivers/net/ucc_geth_ethtool.c +++ b/drivers/net/ucc_geth_ethtool.c @@ -308,7 +308,7 @@ static void uec_get_strings(struct net_device *netdev, u32 stringset, u8 *buf) buf += UEC_TX_FW_STATS_LEN * ETH_GSTRING_LEN; } if (stats_mode & UCC_GETH_STATISTICS_GATHERING_MODE_FIRMWARE_RX) - memcpy(buf, tx_fw_stat_gstrings, UEC_RX_FW_STATS_LEN * + memcpy(buf, rx_fw_stat_gstrings, UEC_RX_FW_STATS_LEN * ETH_GSTRING_LEN); } -- cgit v1.2.3 From 08722bc4a066705e3f5fb4a5a87ce717fe9f896e Mon Sep 17 00:00:00 2001 From: Li Yang Date: Fri, 23 May 2008 18:11:27 +0800 Subject: ucc_geth_ethtool: Add a missing HW stats counter Signed-off-by: Joakim Tjernlund Signed-off-by: Li Yang Signed-off-by: Jeff Garzik --- drivers/net/ucc_geth_ethtool.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c index b84ffd84c4e..f5839c4a5cb 100644 --- a/drivers/net/ucc_geth_ethtool.c +++ b/drivers/net/ucc_geth_ethtool.c @@ -73,6 +73,7 @@ static char tx_fw_stat_gstrings[][ETH_GSTRING_LEN] = { "tx-frames-ok", "tx-excessive-differ-frames", "tx-256-511-frames", + "tx-512-1023-frames", "tx-1024-1518-frames", "tx-jumbo-frames", }; -- cgit v1.2.3 From aefdbf1a3b832a580a50cf3d1dcbb717be7cbdbe Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Fri, 23 May 2008 02:00:25 +0400 Subject: atl1: fix 4G memory corruption bug When using 4+ GB RAM and SWIOTLB is active, the driver corrupts memory by writing an skb after the relevant DMA page has been unmapped. Although this doesn't happen when *not* using bounce buffers, clearing the pointer to the DMA page after unmapping it fixes the problem. http://marc.info/?t=120861317000005&r=2&w=2 Signed-off-by: Alexey Dobriyan Signed-off-by: Jay Cliburn Signed-off-by: Jeff Garzik --- drivers/net/atlx/atl1.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/atlx/atl1.c b/drivers/net/atlx/atl1.c index 9c2394d4942..79325c4fb54 100644 --- a/drivers/net/atlx/atl1.c +++ b/drivers/net/atlx/atl1.c @@ -2023,6 +2023,7 @@ rrd_ok: /* Good Receive */ pci_unmap_page(adapter->pdev, buffer_info->dma, buffer_info->length, PCI_DMA_FROMDEVICE); + buffer_info->dma = 0; skb = buffer_info->skb; length = le16_to_cpu(rrd->xsz.xsum_sz.pkt_size); -- cgit v1.2.3 From 56997fa838e333cea33ab641d4aeedd23aef0eb1 Mon Sep 17 00:00:00 2001 From: Grant Grundler Date: Mon, 12 May 2008 00:37:51 -0600 Subject: [netdrvr] tulip: oops in tulip_interrupt when hibernating with swsusp/suspend2 The following patch is seems to fix the tulip suspend/resume panic: http://bugzilla.kernel.org/show_bug.cgi?id=8952#c46 My attempts at a cleaner patch failed and Pavel thinks this is OK. Original from: kernelbugs@tap.homeip.net Signed-off-by: Grant Grundler Signed-off-by: Jeff Garzik --- drivers/net/tulip/tulip_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c index f9d13fa05d6..55670b5eb61 100644 --- a/drivers/net/tulip/tulip_core.c +++ b/drivers/net/tulip/tulip_core.c @@ -1729,12 +1729,15 @@ static int tulip_suspend (struct pci_dev *pdev, pm_message_t state) if (!dev) return -EINVAL; - if (netif_running(dev)) - tulip_down(dev); + if (!netif_running(dev)) + goto save_state; + + tulip_down(dev); netif_device_detach(dev); free_irq(dev->irq, dev); +save_state: pci_save_state(pdev); pci_disable_device(pdev); pci_set_power_state(pdev, pci_choose_state(pdev, state)); @@ -1754,6 +1757,9 @@ static int tulip_resume(struct pci_dev *pdev) pci_set_power_state(pdev, PCI_D0); pci_restore_state(pdev); + if (!netif_running(dev)) + return 0; + if ((retval = pci_enable_device(pdev))) { printk (KERN_ERR "tulip: pci_enable_device failed in resume\n"); return retval; -- cgit v1.2.3 From 537d59af73d894750cff14f90fe2b6d77fbab15b Mon Sep 17 00:00:00 2001 From: Dave Young Date: Sun, 1 Jun 2008 23:50:52 -0700 Subject: bluetooth: rfcomm_dev_state_change deadlock fix There's logic in __rfcomm_dlc_close: rfcomm_dlc_lock(d); d->state = BT_CLOSED; d->state_changed(d, err); rfcomm_dlc_unlock(d); In rfcomm_dev_state_change, it's possible that rfcomm_dev_put try to take the dlc lock, then we will deadlock. Here fixed it by unlock dlc before rfcomm_dev_get in rfcomm_dev_state_change. why not unlock just before rfcomm_dev_put? it's because there's another problem. rfcomm_dev_get/rfcomm_dev_del will take rfcomm_dev_lock, but in rfcomm_dev_add the lock order is : rfcomm_dev_lock --> dlc lock so I unlock dlc before the taken of rfcomm_dev_lock. Actually it's a regression caused by commit 1905f6c736cb618e07eca0c96e60e3c024023428 ("bluetooth : __rfcomm_dlc_close lock fix"), the dlc state_change could be two callbacks : rfcomm_sk_state_change and rfcomm_dev_state_change. I missed the rfcomm_sk_state_change that time. Thanks Arjan van de Ven for the effort in commit 4c8411f8c115def968820a4df6658ccfd55d7f1a ("bluetooth: fix locking bug in the rfcomm socket cleanup handling") but he missed the rfcomm_dev_state_change lock issue. Signed-off-by: Dave Young Acked-by: Marcel Holtmann Signed-off-by: David S. Miller --- net/bluetooth/rfcomm/tty.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c index c3f749abb2d..c9191871c1e 100644 --- a/net/bluetooth/rfcomm/tty.c +++ b/net/bluetooth/rfcomm/tty.c @@ -566,11 +566,22 @@ static void rfcomm_dev_state_change(struct rfcomm_dlc *dlc, int err) if (dlc->state == BT_CLOSED) { if (!dev->tty) { if (test_bit(RFCOMM_RELEASE_ONHUP, &dev->flags)) { - if (rfcomm_dev_get(dev->id) == NULL) + /* Drop DLC lock here to avoid deadlock + * 1. rfcomm_dev_get will take rfcomm_dev_lock + * but in rfcomm_dev_add there's lock order: + * rfcomm_dev_lock -> dlc lock + * 2. rfcomm_dev_put will deadlock if it's + * the last reference + */ + rfcomm_dlc_unlock(dlc); + if (rfcomm_dev_get(dev->id) == NULL) { + rfcomm_dlc_lock(dlc); return; + } rfcomm_dev_del(dev); rfcomm_dev_put(dev); + rfcomm_dlc_lock(dlc); } } else tty_hangup(dev->tty); -- cgit v1.2.3 From 7dccf1f4e1696c79bff064c3770867cc53cbc71c Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 3 Jun 2008 14:53:46 -0700 Subject: ax25: Fix NULL pointer dereference and lockup. From: Jarek Poplawski There is only one function in AX25 calling skb_append(), and it really looks suspicious: appends skb after previously enqueued one, but in the meantime this previous skb could be removed from the queue. This patch Fixes it the simple way, so this is not fully compatible with the current method, but testing hasn't shown any problems. Signed-off-by: Ralf Baechle Signed-off-by: David S. Miller --- net/ax25/ax25_subr.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index d8f21573317..034aa10a519 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -64,20 +64,15 @@ void ax25_frames_acked(ax25_cb *ax25, unsigned short nr) void ax25_requeue_frames(ax25_cb *ax25) { - struct sk_buff *skb, *skb_prev = NULL; + struct sk_buff *skb; /* * Requeue all the un-ack-ed frames on the output queue to be picked * up by ax25_kick called from the timer. This arrangement handles the * possibility of an empty output queue. */ - while ((skb = skb_dequeue(&ax25->ack_queue)) != NULL) { - if (skb_prev == NULL) - skb_queue_head(&ax25->write_queue, skb); - else - skb_append(skb_prev, skb, &ax25->write_queue); - skb_prev = skb; - } + while ((skb = skb_dequeue_tail(&ax25->ack_queue)) != NULL) + skb_queue_head(&ax25->write_queue, skb); } /* -- cgit v1.2.3 From 9ecad877948deb2871d29e03786a7d7911687009 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 3 Jun 2008 15:18:36 -0700 Subject: irda: Sock leak on error path in irda_create. Bad type/protocol specified result in sk leak. Fix is simple - release the sk if bad values are given, but to make it possible just to call sk_free(), I move some sk initialization a bit lower. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/irda/af_irda.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ae54b20d047..3eb5bcc75f9 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1093,11 +1093,6 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) init_waitqueue_head(&self->query_wait); - /* Initialise networking socket struct */ - sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ - sk->sk_family = PF_IRDA; - sk->sk_protocol = protocol; - switch (sock->type) { case SOCK_STREAM: sock->ops = &irda_stream_ops; @@ -1124,13 +1119,20 @@ static int irda_create(struct net *net, struct socket *sock, int protocol) self->max_sdu_size_rx = TTP_SAR_UNBOUND; break; default: + sk_free(sk); return -ESOCKTNOSUPPORT; } break; default: + sk_free(sk); return -ESOCKTNOSUPPORT; } + /* Initialise networking socket struct */ + sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ + sk->sk_family = PF_IRDA; + sk->sk_protocol = protocol; + /* Register as a client with IrLMP */ self->ckey = irlmp_register_client(0, NULL, NULL, NULL); self->mask.word = 0xffff; -- cgit v1.2.3 From 57c511d8d47caeeae375cb8106662c0bd6a7e7e0 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Tue, 3 Jun 2008 16:00:01 -0700 Subject: bridge: update URL This patch updates the URL of the bridge homepage. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- Documentation/networking/bridge.txt | 2 +- MAINTAINERS | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/networking/bridge.txt b/Documentation/networking/bridge.txt index bdae2db4119..bec69a8a169 100644 --- a/Documentation/networking/bridge.txt +++ b/Documentation/networking/bridge.txt @@ -1,6 +1,6 @@ In order to use the Ethernet bridging functionality, you'll need the userspace tools. These programs and documentation are available -at http://bridge.sourceforge.net. The download page is +at http://www.linux-foundation.org/en/Net:Bridge. The download page is http://prdownloads.sourceforge.net/bridge. If you still have questions, don't hesitate to post to the mailing list diff --git a/MAINTAINERS b/MAINTAINERS index 0a6d2ca03ce..46fa1797707 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1611,7 +1611,7 @@ ETHERNET BRIDGE P: Stephen Hemminger M: shemminger@linux-foundation.org L: bridge@lists.linux-foundation.org -W: http://bridge.sourceforge.net/ +W: http://www.linux-foundation.org/en/Net:Bridge S: Maintained ETHERTEAM 16I DRIVER -- cgit v1.2.3 From b9f5f52cca3e94f1e7509f366aa250ebbe1ed0b5 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 3 Jun 2008 16:03:15 -0700 Subject: net: neighbour table ABI problem The neighbor table time of last use information is returned in the incorrect unit. Kernel to user space ABI's need to use USER_HZ (or milliseconds), otherwise the application has to try and discover the real system HZ value which is problematic. Linux has standardized on keeping USER_HZ consistent (100hz) even when kernel is running internally at some other value. This change is small, but it breaks the ABI for older version of iproute2 utilities. But these utilities are already broken since they are looking at the psched_hz values which are completely different. So let's just go ahead and fix both kernel and user space. Older utilities will just print wrong values. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5d9d7130bd6..3896de79dfb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2057,9 +2057,9 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, goto nla_put_failure; } - ci.ndm_used = now - neigh->used; - ci.ndm_confirmed = now - neigh->confirmed; - ci.ndm_updated = now - neigh->updated; + ci.ndm_used = jiffies_to_clock_t(now - neigh->used); + ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); + ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; read_unlock_bh(&neigh->lock); -- cgit v1.2.3 From 7557af25155a82ac2dad73eec6b0166868bf8ea2 Mon Sep 17 00:00:00 2001 From: Brice Goglin Date: Tue, 3 Jun 2008 16:07:45 -0700 Subject: net_dma: remove duplicate assignment in dma_skb_copy_datagram_iovec No need to compute copy twice in the frags loop in dma_skb_copy_datagram_iovec(). Signed-off-by: Brice Goglin Acked-by: Shannon Nelson Signed-off-by: Maciej Sosnowski Signed-off-by: Dan Williams Signed-off-by: David S. Miller --- net/core/user_dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/core/user_dma.c b/net/core/user_dma.c index 0ad1cd57bc3..c77aff9c6eb 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -75,7 +75,7 @@ int dma_skb_copy_datagram_iovec(struct dma_chan *chan, end = start + skb_shinfo(skb)->frags[i].size; copy = end - offset; - if ((copy = end - offset) > 0) { + if (copy > 0) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; struct page *page = frag->page; -- cgit v1.2.3 From 51b77cae0d5aa8e1546fca855dcfe48ddfadfa9c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Jun 2008 16:36:01 -0700 Subject: route: Mark unused route cache flags as such. Also removes an obsolete check for the unused flag RTCF_MASQ. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/in_route.h | 12 ++++++------ net/ipv4/route.c | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/in_route.h b/include/linux/in_route.h index 61f25c30a2a..b261b8c915f 100644 --- a/include/linux/in_route.h +++ b/include/linux/in_route.h @@ -10,19 +10,19 @@ #define RTCF_NOPMTUDISC RTM_F_NOPMTUDISC #define RTCF_NOTIFY 0x00010000 -#define RTCF_DIRECTDST 0x00020000 +#define RTCF_DIRECTDST 0x00020000 /* unused */ #define RTCF_REDIRECTED 0x00040000 -#define RTCF_TPROXY 0x00080000 +#define RTCF_TPROXY 0x00080000 /* unused */ -#define RTCF_FAST 0x00200000 -#define RTCF_MASQ 0x00400000 -#define RTCF_SNAT 0x00800000 +#define RTCF_FAST 0x00200000 /* unused */ +#define RTCF_MASQ 0x00400000 /* unused */ +#define RTCF_SNAT 0x00800000 /* unused */ #define RTCF_DOREDIRECT 0x01000000 #define RTCF_DIRECTSRC 0x04000000 #define RTCF_DNAT 0x08000000 #define RTCF_BROADCAST 0x10000000 #define RTCF_MULTICAST 0x20000000 -#define RTCF_REJECT 0x40000000 +#define RTCF_REJECT 0x40000000 /* unused */ #define RTCF_LOCAL 0x80000000 #define RTCF_NAT (RTCF_DNAT|RTCF_SNAT) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index df41026b60d..96be336064f 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1792,7 +1792,7 @@ static int __mkroute_input(struct sk_buff *skb, if (err) flags |= RTCF_DIRECTSRC; - if (out_dev == in_dev && err && !(flags & RTCF_MASQ) && + if (out_dev == in_dev && err && (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) flags |= RTCF_DOREDIRECT; -- cgit v1.2.3 From 1f9d11c7c99da706e33646c3a9080dd5a8ef9a0b Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Jun 2008 16:36:27 -0700 Subject: route: Mark unused routing attributes as such Also removes an unused policy entry for an attribute which is only used in kernel->user direction. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 ++-- net/ipv4/fib_frontend.c | 1 - 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 44c81c74453..a2aec2c0cfb 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -267,10 +267,10 @@ enum rtattr_type_t RTA_PREFSRC, RTA_METRICS, RTA_MULTIPATH, - RTA_PROTOINFO, + RTA_PROTOINFO, /* no longer used */ RTA_FLOW, RTA_CACHEINFO, - RTA_SESSION, + RTA_SESSION, /* no longer used */ RTA_MP_ALGO, /* no longer used */ RTA_TABLE, __RTA_MAX diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 0f1557a4ac7..0b2ac6a3d90 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -506,7 +506,6 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX+1] = { [RTA_PREFSRC] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, - [RTA_PROTOINFO] = { .type = NLA_U32 }, [RTA_FLOW] = { .type = NLA_U32 }, }; -- cgit v1.2.3 From bc3ed28caaef55e7e3a9316464256353c5f9b1df Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Jun 2008 16:36:54 -0700 Subject: netlink: Improve returned error codes Make nlmsg_trim(), nlmsg_cancel(), genlmsg_cancel(), and nla_nest_cancel() void functions. Return -EMSGSIZE instead of -1 if the provided message buffer is not big enough. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/genetlink.h | 4 ++-- include/net/netlink.h | 20 +++++++++----------- net/core/neighbour.c | 3 ++- net/core/rtnetlink.c | 3 ++- net/netlink/attr.c | 12 ++++++------ net/netlink/genetlink.c | 6 ++++-- net/sched/sch_dsmark.c | 6 ++++-- net/sched/sch_gred.c | 3 ++- net/sched/sch_hfsc.c | 2 +- net/sched/sch_red.c | 3 ++- net/wireless/nl80211.c | 12 ++++++++---- 11 files changed, 42 insertions(+), 32 deletions(-) diff --git a/include/net/genetlink.h b/include/net/genetlink.h index decdda54682..747c255d1df 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -162,9 +162,9 @@ static inline int genlmsg_end(struct sk_buff *skb, void *hdr) * @skb: socket buffer the message is stored in * @hdr: generic netlink message header */ -static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr) +static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) { - return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); + nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN); } /** diff --git a/include/net/netlink.h b/include/net/netlink.h index 112dcdf7e34..dfc3701dfcc 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -556,14 +556,12 @@ static inline void *nlmsg_get_pos(struct sk_buff *skb) * @skb: socket buffer the message is stored in * @mark: mark to trim to * - * Trims the message to the provided mark. Returns -1. + * Trims the message to the provided mark. */ -static inline int nlmsg_trim(struct sk_buff *skb, const void *mark) +static inline void nlmsg_trim(struct sk_buff *skb, const void *mark) { if (mark) skb_trim(skb, (unsigned char *) mark - skb->data); - - return -1; } /** @@ -572,11 +570,11 @@ static inline int nlmsg_trim(struct sk_buff *skb, const void *mark) * @nlh: netlink message header * * Removes the complete netlink message including all - * attributes from the socket buffer again. Returns -1. + * attributes from the socket buffer again. */ -static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) +static inline void nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) { - return nlmsg_trim(skb, nlh); + nlmsg_trim(skb, nlh); } /** @@ -775,7 +773,7 @@ static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype, int nested_len = nla_len(nla) - NLA_ALIGN(len); if (nested_len < 0) - return -1; + return -EINVAL; if (nested_len >= nla_attr_size(0)) return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), nested_len, policy); @@ -1080,11 +1078,11 @@ static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start) * @start: container attribute * * Removes the container attribute and including all nested - * attributes. Returns -1. + * attributes. Returns -EMSGSIZE */ -static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) +static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) { - return nlmsg_trim(skb, start); + nlmsg_trim(skb, start); } /** diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 3896de79dfb..65f01f71b3f 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1714,7 +1714,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) return nla_nest_end(skb, nest); nla_put_failure: - return nla_nest_cancel(skb, nest); + nla_nest_cancel(skb, nest); + return -EMSGSIZE; } static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index cf857c4dc7b..a9a77216310 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) return nla_nest_end(skb, mx); nla_put_failure: - return nla_nest_cancel(skb, mx); + nla_nest_cancel(skb, mx); + return -EMSGSIZE; } int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, diff --git a/net/netlink/attr.c b/net/netlink/attr.c index feb326f4a75..47bbf45ae5d 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -400,13 +400,13 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute header and payload. */ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen))) - return -1; + return -EMSGSIZE; __nla_put(skb, attrtype, attrlen, data); return 0; @@ -418,13 +418,13 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; + return -EMSGSIZE; __nla_put_nohdr(skb, attrlen, data); return 0; @@ -436,13 +436,13 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) * @attrlen: length of attribute payload * @data: head of attribute payload * - * Returns -1 if the tailroom of the skb is insufficient to store + * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store * the attribute payload. */ int nla_append(struct sk_buff *skb, int attrlen, const void *data) { if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; + return -EMSGSIZE; memcpy(skb_put(skb, attrlen), data, attrlen); return 0; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index d16929c9b4b..f5aa23c3e88 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -554,7 +554,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, return genlmsg_end(skb, hdr); nla_put_failure: - return genlmsg_cancel(skb, hdr); + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; } static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, @@ -590,7 +591,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, return genlmsg_end(skb, hdr); nla_put_failure: - return genlmsg_cancel(skb, hdr); + genlmsg_cancel(skb, hdr); + return -EMSGSIZE; } static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 0df911fd67b..64465bacbe7 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -444,7 +444,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl, return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -466,7 +467,8 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb) return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static const struct Qdisc_class_ops dsmark_class_ops = { diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 3a9d226ff1e..c89fba56db5 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -582,7 +582,8 @@ append_opt: return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static void gred_destroy(struct Qdisc *sch) diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 87293d0db1d..fdfaa3fcc16 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1360,7 +1360,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb, nla_put_failure: nla_nest_cancel(skb, nest); - return -1; + return -EMSGSIZE; } static int diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 3dcd493f4f4..5c569853b9c 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -281,7 +281,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) return nla_nest_end(skb, opts); nla_put_failure: - return nla_nest_cancel(skb, opts); + nla_nest_cancel(skb, opts); + return -EMSGSIZE; } static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 2bdd4dddc0e..fb75f265b39 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -187,7 +187,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) @@ -273,7 +274,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb) @@ -928,7 +930,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_station(struct sk_buff *skb, @@ -1267,7 +1270,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, return genlmsg_end(msg, hdr); nla_put_failure: - return genlmsg_cancel(msg, hdr); + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; } static int nl80211_dump_mpath(struct sk_buff *skb, -- cgit v1.2.3 From ab32cd793dca21eec846a8204390d9594ed994d5 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 3 Jun 2008 16:37:33 -0700 Subject: route: Remove unused ifa_anycast field The field was supposed to allow the creation of an anycast route by assigning an anycast address to an address prefix. It was never implemented so this field is unused and serves no purpose. Remove it. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 - net/ipv4/devinet.c | 9 --------- 2 files changed, 10 deletions(-) diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 7009b0cdd06..c6f51ad52d5 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -117,7 +117,6 @@ struct in_ifaddr __be32 ifa_address; __be32 ifa_mask; __be32 ifa_broadcast; - __be32 ifa_anycast; unsigned char ifa_scope; unsigned char ifa_flags; unsigned char ifa_prefixlen; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6848e4760f3..79a7ef6209f 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -90,7 +90,6 @@ static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = { [IFA_LOCAL] = { .type = NLA_U32 }, [IFA_ADDRESS] = { .type = NLA_U32 }, [IFA_BROADCAST] = { .type = NLA_U32 }, - [IFA_ANYCAST] = { .type = NLA_U32 }, [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, }; @@ -536,9 +535,6 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh) if (tb[IFA_BROADCAST]) ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]); - if (tb[IFA_ANYCAST]) - ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]); - if (tb[IFA_LABEL]) nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); else @@ -745,7 +741,6 @@ int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg) break; inet_del_ifa(in_dev, ifap, 0); ifa->ifa_broadcast = 0; - ifa->ifa_anycast = 0; ifa->ifa_scope = 0; } @@ -1113,7 +1108,6 @@ static inline size_t inet_nlmsg_size(void) + nla_total_size(4) /* IFA_ADDRESS */ + nla_total_size(4) /* IFA_LOCAL */ + nla_total_size(4) /* IFA_BROADCAST */ - + nla_total_size(4) /* IFA_ANYCAST */ + nla_total_size(IFNAMSIZ); /* IFA_LABEL */ } @@ -1143,9 +1137,6 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, if (ifa->ifa_broadcast) NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast); - if (ifa->ifa_anycast) - NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast); - if (ifa->ifa_label[0]) NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); -- cgit v1.2.3 From d2ee3f2c4b1db1320c1efb4dcaceeaf6c7e6c2d3 Mon Sep 17 00:00:00 2001 From: Dong Wei Date: Wed, 4 Jun 2008 09:57:51 -0700 Subject: netfilter: xt_connlimit: fix accouning when receive RST packet in ESTABLISHED state In xt_connlimit match module, the counter of an IP is decreased when the TCP packet is go through the chain with ip_conntrack state TW. Well, it's very natural that the server and client close the socket with FIN packet. But when the client/server close the socket with RST packet(using so_linger), the counter for this connection still exsit. The following patch can fix it which is based on linux-2.6.25.4 Signed-off-by: Dong Wei Acked-by: Jan Engelhardt Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/netfilter/xt_connlimit.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 2e89a00df92..70907f6baac 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -73,7 +73,8 @@ connlimit_iphash6(const union nf_inet_addr *addr, static inline bool already_closed(const struct nf_conn *conn) { if (nf_ct_protonum(conn) == IPPROTO_TCP) - return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT; + return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT || + conn->proto.tcp.state == TCP_CONNTRACK_CLOSE; else return 0; } -- cgit v1.2.3 From b9c698964614f71b9c8afeca163a945b4c2e2d20 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 4 Jun 2008 09:58:27 -0700 Subject: netfilter: nf_conntrack_ipv6: fix inconsistent lock state in nf_ct_frag6_gather() [ 63.531438] ================================= [ 63.531520] [ INFO: inconsistent lock state ] [ 63.531520] 2.6.26-rc4 #7 [ 63.531520] --------------------------------- [ 63.531520] inconsistent {softirq-on-W} -> {in-softirq-W} usage. [ 63.531520] tcpsic6/3864 [HC0[0]:SC1[1]:HE1:SE0] takes: [ 63.531520] (&q->lock#2){-+..}, at: [] ipv6_frag_rcv+0xd0/0xbd0 [ 63.531520] {softirq-on-W} state was registered at: [ 63.531520] [] __lock_acquire+0x3aa/0x1080 [ 63.531520] [] lock_acquire+0x76/0xa0 [ 63.531520] [] _spin_lock+0x2b/0x40 [ 63.531520] [] nf_ct_frag6_gather+0x3f6/0x910 ... According to this and another similar lockdep report inet_fragment locks are taken from nf_ct_frag6_gather() with softirqs enabled, but these locks are mainly used in softirq context, so disabling BHs is necessary. Reported-and-tested-by: Eric Sesterhenn Signed-off-by: Jarek Poplawski Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 2dccad48058..e65e26e210e 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -209,7 +209,9 @@ fq_find(__be32 id, struct in6_addr *src, struct in6_addr *dst) arg.dst = dst; hash = ip6qhashfn(id, src, dst); + local_bh_disable(); q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + local_bh_enable(); if (q == NULL) goto oom; @@ -638,10 +640,10 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) goto ret_orig; } - spin_lock(&fq->q.lock); + spin_lock_bh(&fq->q.lock); if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); fq_put(fq); goto ret_orig; @@ -653,7 +655,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) if (ret_skb == NULL) pr_debug("Can't reassemble fragmented packets\n"); } - spin_unlock(&fq->q.lock); + spin_unlock_bh(&fq->q.lock); fq_put(fq); return ret_skb; -- cgit v1.2.3 From 8aca6cb1179ed9bef9351028c8d8af852903eae2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 4 Jun 2008 11:34:22 -0700 Subject: tcp: Fix inconsistency source (CA_Open only when !tcp_left_out(tp)) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is possible that this skip path causes TCP to end up into an invalid state where ca_state was left to CA_Open while some segments already came into sacked_out. If next valid ACK doesn't contain new SACK information TCP fails to enter into tcp_fastretrans_alert(). Thus at least high_seq is set incorrectly to a too high seqno because some new data segments could be sent in between (and also, limited transmit is not being correctly invoked there). Reordering in both directions can easily cause this situation to occur. I guess we would want to use tcp_moderate_cwnd(tp) there as well as it may be possible to use this to trigger oversized burst to network by sending an old ACK with huge amount of SACK info, but I'm a bit unsure about its effects (mainly to FlightSize), so to be on the safe side I just currently fixed it minimally to keep TCP's state consistent (obviously, such nasty ACKs have been possible this far). Though it seems that FlightSize is already underestimated by some amount, so probably on the long term we might want to trigger recovery there too, if appropriate, to make FlightSize calculation to resemble reality at the time when the losses where discovered (but such change scares me too much now and requires some more thinking anyway how to do that as it likely involves some code shuffling). This bug was found by Brian Vowell while running my TCP debug patch to find cause of another TCP issue (fackets_out miscount). Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b54d9d37b63..54a0b741278 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2483,6 +2483,20 @@ static inline void tcp_complete_cwr(struct sock *sk) tcp_ca_event(sk, CA_EVENT_COMPLETE_CWR); } +static void tcp_try_keep_open(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + int state = TCP_CA_Open; + + if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) + state = TCP_CA_Disorder; + + if (inet_csk(sk)->icsk_ca_state != state) { + tcp_set_ca_state(sk, state); + tp->high_seq = tp->snd_nxt; + } +} + static void tcp_try_to_open(struct sock *sk, int flag) { struct tcp_sock *tp = tcp_sk(sk); @@ -2496,15 +2510,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) tcp_enter_cwr(sk, 1); if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) { - int state = TCP_CA_Open; - - if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) - state = TCP_CA_Disorder; - - if (inet_csk(sk)->icsk_ca_state != state) { - tcp_set_ca_state(sk, state); - tp->high_seq = tp->snd_nxt; - } + tcp_try_keep_open(sk); tcp_moderate_cwnd(tp); } else { tcp_cwnd_down(sk, flag); @@ -3310,8 +3316,11 @@ no_queue: return 1; old_ack: - if (TCP_SKB_CB(skb)->sacked) + if (TCP_SKB_CB(skb)->sacked) { tcp_sacktag_write_queue(sk, skb, prior_snd_una); + if (icsk->icsk_ca_state == TCP_CA_Open) + tcp_try_keep_open(sk); + } uninteresting_ack: SOCK_DEBUG(sk, "Ack %u out of %u:%u\n", ack, tp->snd_una, tp->snd_nxt); -- cgit v1.2.3 From e51171019bb0e1f9fb57c25bd2e38ce652eaea27 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Thu, 29 May 2008 19:55:05 +0900 Subject: [SCTP]: Fix NULL dereference of asoc. Commit 7cbca67c073263c179f605bdbbdc565ab29d801d ("[IPV6]: Support Source Address Selection API (RFC5014)") introduced NULL dereference of asoc to sctp_v6_get_saddr in net/sctp/ipv6.c. Pointed out by Johann Felix Soden . Signed-off-by: YOSHIFUJI Hideaki --- include/net/sctp/structs.h | 3 ++- net/sctp/ipv6.c | 5 +++-- net/sctp/protocol.c | 3 ++- net/sctp/transport.c | 2 +- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0ce0443c5b7..917d425f054 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -548,7 +548,8 @@ struct sctp_af { struct dst_entry *(*get_dst) (struct sctp_association *asoc, union sctp_addr *daddr, union sctp_addr *saddr); - void (*get_saddr) (struct sctp_association *asoc, + void (*get_saddr) (struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e45e44c6063..e4aac3266fc 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1, /* Fills in the source address(saddr) based on the destination address(daddr) * and asoc's bind address list. */ -static void sctp_v6_get_saddr(struct sctp_association *asoc, +static void sctp_v6_get_saddr(struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr) @@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc, if (!asoc) { ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL, &daddr->v6.sin6_addr, - inet6_sk(asoc->base.sk)->srcprefs, + inet6_sk(&sk->inet.sk)->srcprefs, &saddr->v6.sin6_addr); SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n", NIP6(saddr->v6.sin6_addr)); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 0ec234b762c..13ee7fa92e0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -519,7 +519,8 @@ out: /* For v4, the source address is cached in the route entry(dst). So no need * to cache it separately and hence this is an empty routine. */ -static void sctp_v4_get_saddr(struct sctp_association *asoc, +static void sctp_v4_get_saddr(struct sctp_sock *sk, + struct sctp_association *asoc, struct dst_entry *dst, union sctp_addr *daddr, union sctp_addr *saddr) diff --git a/net/sctp/transport.c b/net/sctp/transport.c index f4938f6c5ab..62082e7b797 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -291,7 +291,7 @@ void sctp_transport_route(struct sctp_transport *transport, if (saddr) memcpy(&transport->saddr, saddr, sizeof(union sctp_addr)); else - af->get_saddr(asoc, dst, daddr, &transport->saddr); + af->get_saddr(opt, asoc, dst, daddr, &transport->saddr); transport->dst = dst; if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) { -- cgit v1.2.3 From a3c960899e042bc1c2b730a2115fa32da7802039 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 01:30:25 +0900 Subject: [IPV6] UDP: Possible dst leak in udpv6_sendmsg. ip6_sk_dst_lookup returns held dst entry. It should be released on all paths beyond this point. Add missed release when up->pending is set. Bug report and initial patch by Denis V. Lunev . Signed-off-by: YOSHIFUJI Hideaki Acked-by: Denis V. Lunev --- net/ipv6/udp.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1fd784f3e2e..47123bf5eb0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -848,12 +848,14 @@ do_append_data: } else { dst_release(dst); } + dst = NULL; } if (err > 0) err = np->recverr ? net_xmit_errno(err) : 0; release_sock(sk); out: + dst_release(dst); fl6_sock_release(flowlabel); if (!err) return len; -- cgit v1.2.3 From 24ef0da7b864435f221f668bc8a324160d063e78 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Wed, 28 May 2008 16:54:22 +0200 Subject: [IPV6] ADDRCONF: Check range of prefix length As of now, the prefix length is not vaildated when adding or deleting addresses. The value is passed directly into the inet6_ifaddr structure and later passed on to memcmp() as length indicator which relies on the value never to exceed 128 (bits). Due to the missing check, the currently code allows for any 8 bit value to be passed on as prefix length while using the netlink interface, and any 32 bit value while using the ioctl interface. [Use unsigned int instead to generate better code - yoshfuji] Signed-off-by: Thomas Graf Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/addrconf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3a835578fd1..c3b20c5afa3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2027,7 +2027,7 @@ err_exit: * Manual configuration of address on an interface */ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, - int plen, __u8 ifa_flags, __u32 prefered_lft, + unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; @@ -2039,6 +2039,9 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, ASSERT_RTNL(); + if (plen > 128) + return -EINVAL; + /* check the lifetime */ if (!valid_lft || prefered_lft > valid_lft) return -EINVAL; @@ -2095,12 +2098,15 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, } static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx, - int plen) + unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + if (plen > 128) + return -EINVAL; + dev = __dev_get_by_index(net, ifindex); if (!dev) return -ENODEV; -- cgit v1.2.3 From 82836372311a5cbf9cc5f4f47f9b56cb9edfe90d Mon Sep 17 00:00:00 2001 From: Colin Date: Tue, 27 May 2008 00:04:43 +0800 Subject: [IPV6] TUNNEL6: Fix incoming packet length check for inter-protocol tunnel. I discover a strange behavior in [ipv4 in ipv6] tunnel. When IPv6 tunnel payload is less than 40(0x28), packet can be sent to network, received in physical interface, but not seen in IP tunnel interface. No counter increase in tunnel interface. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/tunnel6.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 6323921b40b..669f280989c 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -109,7 +109,7 @@ static int tunnel46_rcv(struct sk_buff *skb) { struct xfrm6_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto drop; for (handler = tunnel46_handlers; handler; handler = handler->next) -- cgit v1.2.3 From baa2bfb8aef24bb7fe1875b256918724b3884662 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 30 May 2008 11:35:03 +0900 Subject: [IPV4] TUNNEL4: Fix incoming packet length check for inter-protocol tunnel. Signed-off-by: YOSHIFUJI Hideaki --- net/ipv4/tunnel4.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c index d3b709a6f26..cb1f0e83830 100644 --- a/net/ipv4/tunnel4.c +++ b/net/ipv4/tunnel4.c @@ -97,7 +97,7 @@ static int tunnel64_rcv(struct sk_buff *skb) { struct xfrm_tunnel *handler; - if (!pskb_may_pull(skb, sizeof(struct iphdr))) + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto drop; for (handler = tunnel64_handlers; handler; handler = handler->next) -- cgit v1.2.3 From 4bed72e4f5502ea3322f0a00794815fa58951abe Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 27 May 2008 17:37:49 +0900 Subject: [IPV6] ADDRCONF: Allow longer lifetime on 64bit archs. - Allow longer lifetimes (>= 0x7fffffff/HZ) on 64bit archs by using unsigned long. - Shadow this arithmetic overflow workaround by introducing helper functions: addrconf_timeout_fixup() and addrconf_finite_timeout(). Signed-off-by: YOSHIFUJI Hideaki --- include/net/addrconf.h | 22 ++++++++++++ net/ipv6/addrconf.c | 97 ++++++++++++++++++++++++++------------------------ net/ipv6/route.c | 12 ++----- 3 files changed, 75 insertions(+), 56 deletions(-) diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 0a2f0372df3..bbd3d583c6e 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -94,6 +94,28 @@ extern void addrconf_join_solict(struct net_device *dev, extern void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr); +static inline unsigned long addrconf_timeout_fixup(u32 timeout, + unsigned unit) +{ + if (timeout == 0xffffffff) + return ~0UL; + + /* + * Avoid arithmetic overflow. + * Assuming unit is constant and non-zero, this "if" statement + * will go away on 64bit archs. + */ + if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit) + return LONG_MAX / unit; + + return timeout; +} + +static inline int addrconf_finite_timeout(unsigned long timeout) +{ + return ~timeout; +} + /* * IPv6 Address Label subsystem (addrlabel.c) */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c3b20c5afa3..147588f4c7c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) onlink = -1; spin_lock(&ifa->lock); - lifetime = min_t(unsigned long, - ifa->valid_lft, 0x7fffffffUL/HZ); + + lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); + /* + * Note: Because this address is + * not permanent, lifetime < + * LONG_MAX / HZ here. + */ if (time_before(expires, ifa->tstamp + lifetime * HZ)) expires = ifa->tstamp + lifetime * HZ; @@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) __u32 valid_lft; __u32 prefered_lft; int addr_type; - unsigned long rt_expires; struct inet6_dev *in6_dev; pinfo = (struct prefix_info *) opt; @@ -1764,28 +1768,23 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) * 2) Configure prefixes with the auto flag set */ - if (valid_lft == INFINITY_LIFE_TIME) - rt_expires = ~0UL; - else if (valid_lft >= 0x7FFFFFFF/HZ) { + if (pinfo->onlink) { + struct rt6_info *rt; + unsigned long rt_expires; + /* Avoid arithmetic overflow. Really, we could * save rt_expires in seconds, likely valid_lft, * but it would require division in fib gc, that it * not good. */ - rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ); - } else - rt_expires = valid_lft * HZ; + if (HZ > USER_HZ) + rt_expires = addrconf_timeout_fixup(valid_lft, HZ); + else + rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ); - /* - * We convert this (in jiffies) to clock_t later. - * Avoid arithmetic overflow there as well. - * Overflow can happen only if HZ < USER_HZ. - */ - if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ) - rt_expires = 0x7FFFFFFF / USER_HZ; + if (addrconf_finite_timeout(rt_expires)) + rt_expires *= HZ; - if (pinfo->onlink) { - struct rt6_info *rt; rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL, dev->ifindex, 1); @@ -1794,7 +1793,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (valid_lft == 0) { ip6_del_rt(rt); rt = NULL; - } else if (~rt_expires) { + } else if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ rt->rt6i_expires = jiffies + rt_expires; rt->rt6i_flags |= RTF_EXPIRES; @@ -1803,9 +1802,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) rt->rt6i_expires = 0; } } else if (valid_lft) { - int flags = RTF_ADDRCONF | RTF_PREFIX_RT; clock_t expires = 0; - if (~rt_expires) { + int flags = RTF_ADDRCONF | RTF_PREFIX_RT; + if (addrconf_finite_timeout(rt_expires)) { /* not infinity */ flags |= RTF_EXPIRES; expires = jiffies_to_clock_t(rt_expires); @@ -2036,6 +2035,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, int scope; u32 flags; clock_t expires; + unsigned long timeout; ASSERT_RTNL(); @@ -2055,22 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx, scope = ipv6_addr_scope(pfx); - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; - flags = 0; - expires = 0; - } else { - if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; flags = RTF_EXPIRES; - expires = jiffies_to_clock_t(valid_lft * HZ); + } else { + expires = 0; + flags = 0; + ifa_flags |= IFA_F_PERMANENT; } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags); @@ -3175,26 +3176,28 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, { u32 flags; clock_t expires; + unsigned long timeout; if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; - if (valid_lft == INFINITY_LIFE_TIME) { - ifa_flags |= IFA_F_PERMANENT; - flags = 0; - expires = 0; - } else { - if (valid_lft >= 0x7FFFFFFF/HZ) - valid_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(valid_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + expires = jiffies_to_clock_t(timeout * HZ); + valid_lft = timeout; flags = RTF_EXPIRES; - expires = jiffies_to_clock_t(valid_lft * HZ); + } else { + expires = 0; + flags = 0; + ifa_flags |= IFA_F_PERMANENT; } - if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; - else if ((prefered_lft >= 0x7FFFFFFF/HZ) && - (prefered_lft != INFINITY_LIFE_TIME)) - prefered_lft = 0x7FFFFFFF/HZ; + timeout = addrconf_timeout_fixup(prefered_lft, HZ); + if (addrconf_finite_timeout(timeout)) { + if (timeout == 0) + ifa_flags |= IFA_F_DEPRECATED; + prefered_lft = timeout; + } spin_lock_bh(&ifp->lock); ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 48534c6c073..220cffe9e63 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, struct route_info *rinfo = (struct route_info *) opt; struct in6_addr prefix_buf, *prefix; unsigned int pref; - u32 lifetime; + unsigned long lifetime; struct rt6_info *rt; if (len < sizeof(struct route_info)) { @@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, if (pref == ICMPV6_ROUTER_PREF_INVALID) pref = ICMPV6_ROUTER_PREF_MEDIUM; - lifetime = ntohl(rinfo->lifetime); - if (lifetime == 0xffffffff) { - /* infinity */ - } else if (lifetime > 0x7fffffff/HZ - 1) { - /* Avoid arithmetic overflow */ - lifetime = 0x7fffffff/HZ - 1; - } + lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ); if (rinfo->length == 3) prefix = (struct in6_addr *)rinfo->prefix; @@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref); if (rt) { - if (lifetime == 0xffffffff) { + if (!addrconf_finite_timeout(lifetime)) { rt->rt6i_flags &= ~RTF_EXPIRES; } else { rt->rt6i_expires = jiffies + HZ * lifetime; -- cgit v1.2.3 From 05335c2220c4911b69cb1bdd79e603ab08088372 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Wed, 28 May 2008 16:23:47 +0800 Subject: [IPV6]: Fix the return value of get destination options with NULL data pointer If we pass NULL data buffer to getsockopt(), it will return 0, and the option length is set to -EFAULT: getsockopt(sk, IPPROTO_IPV6, IPV6_DSTOPTS, NULL, &len); This is because ipv6_getsockopt_sticky() will return -EFAULT or -EINVAL if some error occur. This patch fix this problem. Signed-off-by: Yang Hongyang Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 56d55fecf8e..aa7bedf780e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -975,6 +975,9 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, len = ipv6_getsockopt_sticky(sk, np->opt, optname, optval, len); release_sock(sk); + /* check if ipv6_getsockopt_sticky() returns err code */ + if (len < 0) + return len; return put_user(len, optlen); } -- cgit v1.2.3 From 95b496b66615d8c43f77702049b1bd01e2f06595 Mon Sep 17 00:00:00 2001 From: Yang Hongyang Date: Wed, 28 May 2008 16:27:28 +0800 Subject: [IPV6]: Fix the data length of get destination options with short length If get destination options with length which is not enough for that option,getsockopt() will still return the real length of the option, which is larger then the buffer space. This is because ipv6_getsockopt_sticky() returns the real length of the option. This patch fix this problem. Signed-off-by: Yang Hongyang Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index aa7bedf780e..9293b9f0ac2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -832,7 +832,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt, len = min_t(unsigned int, len, ipv6_optlen(hdr)); if (copy_to_user(optval, hdr, len)) return -EFAULT; - return ipv6_optlen(hdr); + return len; } static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, -- cgit v1.2.3 From 187e38384c4abfbbb1b880fab234d16c2df23a25 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 13:01:37 +0900 Subject: [IPV6]: Check outgoing interface even if source address is unspecified. The outgoing interface index (ipi6_ifindex) in IPV6_PKTINFO ancillary data, is not checked if the source address (ipi6_addr) is unspecified. If the ipi6_ifindex is the not-exist interface, it should be fail. Based on patch from Shan Wei and Brian Haley . Signed-off-by: Shan Wei Signed-off-by: Brian Haley Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/datagram.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 94fa6ae77cf..53e3883f766 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -509,7 +509,6 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { int addr_type; - struct net_device *dev = NULL; if (!CMSG_OK(msg, cmsg)) { err = -EINVAL; @@ -522,6 +521,9 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, switch (cmsg->cmsg_type) { case IPV6_PKTINFO: case IPV6_2292PKTINFO: + { + struct net_device *dev = NULL; + if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct in6_pktinfo))) { err = -EINVAL; goto exit_f; @@ -535,32 +537,32 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, fl->oif = src_info->ipi6_ifindex; } - addr_type = ipv6_addr_type(&src_info->ipi6_addr); + addr_type = __ipv6_addr_type(&src_info->ipi6_addr); - if (addr_type == IPV6_ADDR_ANY) - break; + if (fl->oif) { + dev = dev_get_by_index(&init_net, fl->oif); + if (!dev) + return -ENODEV; + } else if (addr_type & IPV6_ADDR_LINKLOCAL) + return -EINVAL; - if (addr_type & IPV6_ADDR_LINKLOCAL) { - if (!src_info->ipi6_ifindex) - return -EINVAL; - else { - dev = dev_get_by_index(&init_net, src_info->ipi6_ifindex); - if (!dev) - return -ENODEV; - } - } - if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, - dev, 0)) { - if (dev) - dev_put(dev); - err = -EINVAL; - goto exit_f; + if (addr_type != IPV6_ADDR_ANY) { + int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; + if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, + strict ? dev : NULL, 0)) + err = -EINVAL; + else + ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); } + if (dev) dev_put(dev); - ipv6_addr_copy(&fl->fl6_src, &src_info->ipi6_addr); + if (err) + goto exit_f; + break; + } case IPV6_FLOWINFO: if (cmsg->cmsg_len < CMSG_LEN(4)) { -- cgit v1.2.3 From 91e1908f569dd96a25a3947de8771e6cc93999dd Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 4 Jun 2008 13:02:49 +0900 Subject: [IPV6] NETNS: Handle ancillary data in appropriate namespace. Signed-off-by: YOSHIFUJI Hideaki --- include/net/transp_v6.h | 3 ++- net/ipv6/datagram.c | 7 ++++--- net/ipv6/ip6_flowlabel.c | 2 +- net/ipv6/ipv6_sockglue.c | 2 +- net/ipv6/raw.c | 2 +- net/ipv6/udp.c | 2 +- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 27394e0447d..112934a3288 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -40,7 +40,8 @@ extern int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb); -extern int datagram_send_ctl(struct msghdr *msg, +extern int datagram_send_ctl(struct net *net, + struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 53e3883f766..b9c2de84a8a 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) return 0; } -int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, +int datagram_send_ctl(struct net *net, + struct msghdr *msg, struct flowi *fl, struct ipv6_txoptions *opt, int *hlimit, int *tclass) { @@ -540,7 +541,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, addr_type = __ipv6_addr_type(&src_info->ipi6_addr); if (fl->oif) { - dev = dev_get_by_index(&init_net, fl->oif); + dev = dev_get_by_index(net, fl->oif); if (!dev) return -ENODEV; } else if (addr_type & IPV6_ADDR_LINKLOCAL) @@ -548,7 +549,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, if (addr_type != IPV6_ADDR_ANY) { int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; - if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr, + if (!ipv6_chk_addr(net, &src_info->ipi6_addr, strict ? dev : NULL, 0)) err = -EINVAL; else diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index eb7a940310f..37a4e777e34 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval, msg.msg_control = (void*)(fl->opt+1); flowi.oif = 0; - err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk); + err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk); if (err) goto done; err = -EINVAL; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 9293b9f0ac2..3eef8e5b363 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -416,7 +416,7 @@ sticky_done: msg.msg_controllen = optlen; msg.msg_control = (void*)(opt+1); - retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk); + retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk); if (retv) goto done; update: diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 232e0dc45bf..603df76e052 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -813,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(struct ipv6_txoptions); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 47123bf5eb0..1b35c472200 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -731,7 +731,7 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); if (err < 0) { fl6_sock_release(flowlabel); return err; -- cgit v1.2.3 From 49d074f4009a7b5ce9c17b040f978abcb4d7f6f6 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:06 +0400 Subject: [IPV6]: Do not change protocol for raw IPv6 sockets. It is not allowed to change underlying protocol for int fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3eef8e5b363..1afe210d628 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -161,6 +161,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct ipv6_txoptions *opt; struct sk_buff *pktopt; + if (sk->sk_type == SOCK_RAW) + break; + if (sk->sk_protocol != IPPROTO_UDP && sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) -- cgit v1.2.3 From 36d926b94a9908937593e5669162305a071b9cc3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:07 +0400 Subject: [IPV6]: inet_sk(sk)->cork.opt leak IPv6 UDP sockets wth IPv4 mapped address use udp_sendmsg to send the data actually. In this case ip_flush_pending_frames should be called instead of ip6_flush_pending_frames. Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- include/net/udp.h | 1 + net/ipv4/udp.c | 3 ++- net/ipv6/udp.c | 4 +++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 3e55a99b0ba..ccce8370704 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -135,6 +135,7 @@ extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len); +extern void udp_flush_pending_frames(struct sock *sk); extern int udp_rcv(struct sk_buff *skb); extern int udp_ioctl(struct sock *sk, int cmd, unsigned long arg); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index db1cb7c96d6..56fcda3694b 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info) /* * Throw away all pending data and cancel the corking. Socket is locked. */ -static void udp_flush_pending_frames(struct sock *sk) +void udp_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); @@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } +EXPORT_SYMBOL(udp_flush_pending_frames); /** * udp4_hwcsum_outgoing - handle outgoing HW checksumming diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 1b35c472200..dd309626ae9 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); - if (up->pending) { + if (up->pending == AF_INET) + udp_flush_pending_frames(sk); + else if (up->pending) { up->len = 0; up->pending = 0; ip6_flush_pending_frames(sk); -- cgit v1.2.3 From 9596cc826e2e52bfc318ca37a6c52fe3d72990a3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:49:08 +0400 Subject: [IPV6]: Do not change protocol for UDPv6 sockets with pending sent data. Signed-off-by: Denis V. Lunev Signed-off-by: YOSHIFUJI Hideaki --- net/ipv6/ipv6_sockglue.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1afe210d628..26b83e512a0 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -164,9 +164,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_type == SOCK_RAW) break; - if (sk->sk_protocol != IPPROTO_UDP && - sk->sk_protocol != IPPROTO_UDPLITE && - sk->sk_protocol != IPPROTO_TCP) + if (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_UDPLITE) { + struct udp_sock *up = udp_sk(sk); + if (up->pending == AF_INET6) { + retv = -EBUSY; + break; + } + } else if (sk->sk_protocol != IPPROTO_TCP) break; if (sk->sk_state != TCP_ESTABLISHED) { -- cgit v1.2.3 From a13366c632132bb9f8f2950a79773d8f68f4871e Mon Sep 17 00:00:00 2001 From: Adrian-Ken Rueegsegger Date: Wed, 4 Jun 2008 12:04:55 -0700 Subject: xfrm: xfrm_algo: correct usage of RIPEMD-160 This patch fixes the usage of RIPEMD-160 in xfrm_algo which in turn allows hmac(rmd160) to be used as authentication mechanism in IPsec ESP and AH (see RFC 2857). Signed-off-by: Adrian-Ken Rueegsegger Acked-by: Herbert Xu Signed-off-by: David S. Miller --- net/xfrm/xfrm_algo.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index ac765dd9c7f..23a2cc04b8c 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -200,8 +200,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "hmac(ripemd160)", - .compat = "ripemd160", + .name = "hmac(rmd160)", + .compat = "rmd160", .uinfo = { .auth = { -- cgit v1.2.3 From c03e05d81d70879273488206bfcb1805ebca9612 Mon Sep 17 00:00:00 2001 From: Mark Asselstine Date: Wed, 4 Jun 2008 12:06:28 -0700 Subject: sunhme: Cleanup use of deprecated calls to save_and_cli and restore_flags. Make use of local_irq_save and local_irq_restore rather then the deprecated save_and_cli and restore_flags calls. Signed-off-by: Mark Asselstine Signed-off-by: David S. Miller --- drivers/net/sunhme.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c index b4e7f30ea4e..1aa425be306 100644 --- a/drivers/net/sunhme.c +++ b/drivers/net/sunhme.c @@ -111,7 +111,7 @@ static __inline__ void tx_add_log(struct happy_meal *hp, unsigned int a, unsigne struct hme_tx_logent *tlp; unsigned long flags; - save_and_cli(flags); + local_irq_save(flags); tlp = &tx_log[txlog_cur_entry]; tlp->tstamp = (unsigned int)jiffies; tlp->tx_new = hp->tx_new; @@ -119,7 +119,7 @@ static __inline__ void tx_add_log(struct happy_meal *hp, unsigned int a, unsigne tlp->action = a; tlp->status = s; txlog_cur_entry = (txlog_cur_entry + 1) & (TX_LOG_LEN - 1); - restore_flags(flags); + local_irq_restore(flags); } static __inline__ void tx_dump_log(void) { -- cgit v1.2.3 From a6604471db5e7a33474a7f16c64d6b118fae3e74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= Date: Wed, 4 Jun 2008 12:07:44 -0700 Subject: tcp: fix skb vs fack_count out-of-sync condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This bug is able to corrupt fackets_out in very rare cases. In order for this to cause corruption: 1) DSACK in the middle of previous SACK block must be generated. 2) In order to take that particular branch, part or all of the DSACKed segment must already be SACKed so that we have that in cache in the first place. 3) The new info must be top enough so that fackets_out will be updated on this iteration. ...then fack_count is updated while skb wasn't, then we walk again that particular segment thus updating fack_count twice for a single skb and finally that value is assigned to fackets_out by tcp_sacktag_one. It is safe to call tcp_sacktag_one just once for a segment (at DSACK), no need to call again for plain SACK. Potential problem of the miscount are limited to premature entry to recovery and to inflated reordering metric (which could even cancel each other out in the most the luckiest scenarios :-)). Both are quite insignificant in worst case too and there exists also code to reset them (fackets_out once sacked_out becomes zero and reordering metric on RTO). This has been reported by a number of people, because it occurred quite rarely, it has been very evasive. Andy Furniss was able to get it to occur couple of times so that a bit more info was collected about the problem using a debug patch, though it still required lot of checking around. Thanks also to others who have tried to help here. This is listed as Bugzilla #10346. The bug was introduced by me in commit 68f8353b48 ([TCP]: Rewrite SACK block processing & sack_recv_cache use), I probably thought back then that there's need to scan that entry twice or didn't dare to make it go through it just once there. Going through twice would have required restoring fack_count after the walk but as noted above, I chose to drop the additional walk step altogether here. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 54a0b741278..eba873e9b56 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1392,9 +1392,9 @@ static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, if (before(next_dup->start_seq, skip_to_seq)) { skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count); - tcp_sacktag_walk(skb, sk, NULL, - next_dup->start_seq, next_dup->end_seq, - 1, fack_count, reord, flag); + skb = tcp_sacktag_walk(skb, sk, NULL, + next_dup->start_seq, next_dup->end_seq, + 1, fack_count, reord, flag); } return skb; -- cgit v1.2.3 From 4141ddc02a92a6e3e5793601554c6033e83c25b9 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Wed, 4 Jun 2008 12:37:33 -0700 Subject: sctp: retran_path update bug fix If the current retran_path is the only active one, it should update it to the the next inactive one. Signed-off-by: Gui Jianfeng Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/associola.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/net/sctp/associola.c b/net/sctp/associola.c index b4cd2b71953..532634861db 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1203,6 +1203,9 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) struct list_head *head = &asoc->peer.transport_addr_list; struct list_head *pos; + if (asoc->peer.transport_count == 1) + return; + /* Find the next transport in a round-robin fashion. */ t = asoc->peer.retran_path; pos = &t->transports; @@ -1217,6 +1220,15 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) t = list_entry(pos, struct sctp_transport, transports); + /* We have exhausted the list, but didn't find any + * other active transports. If so, use the next + * transport. + */ + if (t == asoc->peer.retran_path) { + t = next; + break; + } + /* Try to find an active transport. */ if ((t->state == SCTP_ACTIVE) || @@ -1229,15 +1241,6 @@ void sctp_assoc_update_retran_path(struct sctp_association *asoc) if (!next) next = t; } - - /* We have exhausted the list, but didn't find any - * other active transports. If so, use the next - * transport. - */ - if (t == asoc->peer.retran_path) { - t = next; - break; - } } asoc->peer.retran_path = t; -- cgit v1.2.3 From 159c6bea37c54dfae44409467e0f17600722d541 Mon Sep 17 00:00:00 2001 From: Gui Jianfeng Date: Wed, 4 Jun 2008 12:38:07 -0700 Subject: sctp: Move sctp_v4_dst_saddr out of loop There's no need to execute sctp_v4_dst_saddr() for each iteration, just move it out of loop. Signed-off-by: Gui Jianfeng Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- net/sctp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 13ee7fa92e0..56bdaf7fc42 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -470,11 +470,11 @@ static struct dst_entry *sctp_v4_get_dst(struct sctp_association *asoc, /* Walk through the bind address list and look for a bind * address that matches the source address of the returned dst. */ + sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); rcu_read_lock(); list_for_each_entry_rcu(laddr, &bp->address_list, list) { if (!laddr->valid || (laddr->state != SCTP_ADDR_SRC)) continue; - sctp_v4_dst_saddr(&dst_saddr, dst, htons(bp->port)); if (sctp_v4_cmp_addr(&dst_saddr, &laddr->a)) goto out_unlock; } -- cgit v1.2.3 From a6465234814efda9ed1dccdba852953f7508e827 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 4 Jun 2008 12:38:43 -0700 Subject: sctp: Correctly implement Fast Recovery cwnd manipulations. Correctly keep track of Fast Recovery state and do not reduce congestion window multiple times during sucht state. Signed-off-by: Vlad Yasevich Tested-by: Wei Yongjun Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 8 +++++++- net/sctp/transport.c | 44 ++++++++++++++++++++++++++++++++------------ 2 files changed, 39 insertions(+), 13 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 917d425f054..67592072a32 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -902,7 +902,10 @@ struct sctp_transport { * calculation completes (i.e. the DATA chunk * is SACK'd) clear this flag. */ - int rto_pending; + __u8 rto_pending; + + /* Flag to track the current fast recovery state */ + __u8 fast_recovery; /* * These are the congestion stats. @@ -921,6 +924,9 @@ struct sctp_transport { /* Data that has been sent, but not acknowledged. */ __u32 flight_size; + /* TSN marking the fast recovery exit point */ + __u32 fast_recovery_exit; + /* Destination */ struct dst_entry *dst; /* Source address. */ diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 62082e7b797..9647fb27722 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; + peer->fast_recovery = 0; peer->last_time_heard = jiffies; peer->last_time_used = jiffies; @@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, cwnd = transport->cwnd; flight_size = transport->flight_size; + /* See if we need to exit Fast Recovery first */ + if (transport->fast_recovery && + TSN_lte(transport->fast_recovery_exit, sack_ctsn)) + transport->fast_recovery = 0; + /* The appropriate cwnd increase algorithm is performed if, and only - * if the cumulative TSN has advanced and the congestion window is + * if the cumulative TSN whould advanced and the congestion window is * being fully utilized. */ - if ((transport->asoc->ctsn_ack_point >= sack_ctsn) || + if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) || (flight_size < cwnd)) return; @@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, pmtu = transport->asoc->pathmtu; if (cwnd <= ssthresh) { - /* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less - * than or equal to ssthresh an SCTP endpoint MUST use the - * slow start algorithm to increase cwnd only if the current - * congestion window is being fully utilized and an incoming - * SACK advances the Cumulative TSN Ack Point. Only when these - * two conditions are met can the cwnd be increased otherwise - * the cwnd MUST not be increased. If these conditions are met - * then cwnd MUST be increased by at most the lesser of - * 1) the total size of the previously outstanding DATA - * chunk(s) acknowledged, and 2) the destination's path MTU. + /* RFC 4960 7.2.1 + * o When cwnd is less than or equal to ssthresh, an SCTP + * endpoint MUST use the slow-start algorithm to increase + * cwnd only if the current congestion window is being fully + * utilized, an incoming SACK advances the Cumulative TSN + * Ack Point, and the data sender is not in Fast Recovery. + * Only when these three conditions are met can the cwnd be + * increased; otherwise, the cwnd MUST not be increased. + * If these conditions are met, then cwnd MUST be increased + * by, at most, the lesser of 1) the total size of the + * previously outstanding DATA chunk(s) acknowledged, and + * 2) the destination's path MTU. This upper bound protects + * against the ACK-Splitting attack outlined in [SAVAGE99]. */ + if (transport->fast_recovery) + return; + if (bytes_acked > pmtu) cwnd += pmtu; else @@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport, * cwnd = ssthresh * partial_bytes_acked = 0 */ + if (transport->fast_recovery) + return; + + /* Mark Fast recovery */ + transport->fast_recovery = 1; + transport->fast_recovery_exit = transport->asoc->next_tsn - 1; + transport->ssthresh = max(transport->cwnd/2, 4*transport->asoc->pathmtu); transport->cwnd = transport->ssthresh; @@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t) t->flight_size = 0; t->error_count = 0; t->rto_pending = 0; + t->fast_recovery = 0; /* Initialize the state information for SFR-CACC */ t->cacc.changeover_active = 0; -- cgit v1.2.3 From 62aeaff5ccd96462b7077046357a6d7886175a57 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 4 Jun 2008 12:39:11 -0700 Subject: sctp: Start T3-RTX timer when fast retransmitting lowest TSN When we are trying to fast retransmit the lowest outstanding TSN, we need to restart the T3-RTX timer, so that subsequent timeouts will correctly tag all the packets necessary for retransmissions. Signed-off-by: Vlad Yasevich Tested-by: Wei Yongjun Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 5 ++++- net/sctp/outqueue.c | 42 +++++++++++++++++++++++++++++++----------- net/sctp/transport.c | 4 ++-- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 67592072a32..714dc43c034 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1051,7 +1051,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *, struct sctp_sock *); void sctp_transport_pmtu(struct sctp_transport *); void sctp_transport_free(struct sctp_transport *); -void sctp_transport_reset_timers(struct sctp_transport *); +void sctp_transport_reset_timers(struct sctp_transport *, int); void sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); @@ -1141,6 +1141,9 @@ struct sctp_outq { /* How many unackd bytes do we have in-flight? */ __u32 outstanding_bytes; + /* Are we doing fast-rtx on this queue */ + char fast_rtx; + /* Corked? */ char cork; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 59edfd25a19..5d3c441e84d 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); + q->fast_rtx = 0; q->outstanding_bytes = 0; q->empty = 1; q->cork = 0; @@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, case SCTP_RTXR_FAST_RTX: SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); + q->fast_rtx = 1; break; case SCTP_RTXR_PMTUD: SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); @@ -543,10 +545,13 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; struct sctp_association *asoc; + int fast_rtx; int error = 0; + int timer = 0; asoc = q->asoc; lqueue = &q->retransmit; + fast_rtx = q->fast_rtx; /* RFC 2960 6.3.3 Handle T3-rtx Expiration * @@ -587,13 +592,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, switch (status) { case SCTP_XMIT_PMTU_FULL: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* If we are retransmitting, we should only * send a single packet. */ - if (rtx_timeout) { + if (rtx_timeout || fast_rtx) { list_add(lchunk, lqueue); lchunk = NULL; } @@ -603,8 +607,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, case SCTP_XMIT_RWND_FULL: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* Stop sending DATA as there is no more room * at the receiver. @@ -615,8 +618,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, case SCTP_XMIT_NAGLE_DELAY: /* Send this packet. */ - if ((error = sctp_packet_transmit(pkt)) == 0) - *start_timer = 1; + error = sctp_packet_transmit(pkt); /* Stop sending DATA because of nagle delay. */ list_add(lchunk, lqueue); @@ -635,7 +637,14 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (chunk->fast_retransmit > 0) chunk->fast_retransmit = -1; - *start_timer = 1; + /* Force start T3-rtx timer when fast retransmitting + * the earliest outstanding TSN + */ + if (!timer && fast_rtx && + ntohl(chunk->subh.data_hdr->tsn) == + asoc->ctsn_ack_point + 1) + timer = 2; + q->empty = 0; /* Retrieve a new chunk to bundle. */ @@ -643,12 +652,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, break; } + /* Set the timer if there were no errors */ + if (!error && !timer) + timer = 1; + /* If we are here due to a retransmit timeout or a fast * retransmit and if there are any chunks left in the retransmit * queue that could not fit in the PMTU sized packet, they need * to be marked as ineligible for a subsequent fast retransmit. */ - if (rtx_timeout && !lchunk) { + if (rtx_timeout && fast_rtx) { list_for_each_entry(chunk1, lqueue, transmitted_list) { if (chunk1->fast_retransmit > 0) chunk1->fast_retransmit = -1; @@ -656,6 +669,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, } } + *start_timer = timer; + + /* Clear fast retransmit hint */ + if (fast_rtx) + q->fast_rtx = 0; + return error; } @@ -862,7 +881,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) rtx_timeout, &start_timer); if (start_timer) - sctp_transport_reset_timers(transport); + sctp_transport_reset_timers(transport, + start_timer-1); /* This can happen on COOKIE-ECHO resend. Only * one chunk can get bundled with a COOKIE-ECHO. @@ -977,7 +997,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout) list_add_tail(&chunk->transmitted_list, &transport->transmitted); - sctp_transport_reset_timers(transport); + sctp_transport_reset_timers(transport, start_timer-1); q->empty = 0; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 9647fb27722..3f34f61221e 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -191,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport) /* Start T3_rtx timer if it is not already running and update the heartbeat * timer. This routine is called every time a DATA chunk is sent. */ -void sctp_transport_reset_timers(struct sctp_transport *transport) +void sctp_transport_reset_timers(struct sctp_transport *transport, int force) { /* RFC 2960 6.3.2 Retransmission Timer Rules * @@ -201,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport) * address. */ - if (!timer_pending(&transport->T3_rtx_timer)) + if (force || !timer_pending(&transport->T3_rtx_timer)) if (!mod_timer(&transport->T3_rtx_timer, jiffies + transport->rto)) sctp_transport_hold(transport); -- cgit v1.2.3 From 8b750ce54bd8ab5f75d519ee450e1b0c5226ebe9 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 4 Jun 2008 12:39:36 -0700 Subject: sctp: Flush the queue only once during fast retransmit. When fast retransmit is triggered by a sack, we should flush the queue only once so that only 1 retransmit happens. Also, since we could potentially have non-fast-rtx chunks on the retransmit queue, we need make sure any chunks eligable for fast retransmit are sent first during fast retransmission. Signed-off-by: Vlad Yasevich Tested-by: Wei Yongjun Signed-off-by: David S. Miller --- net/sctp/outqueue.c | 82 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 5d3c441e84d..ace6770e904 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -520,9 +520,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * the sender SHOULD try to advance the "Advanced.Peer.Ack.Point" by * following the procedures outlined in C1 - C5. */ - sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); + if (reason == SCTP_RTXR_T3_RTX) + sctp_generate_fwdtsn(q, q->asoc->ctsn_ack_point); - error = sctp_outq_flush(q, /* rtx_timeout */ 1); + /* Flush the queues only on timeout, since fast_rtx is only + * triggered during sack processing and the queue + * will be flushed at the end. + */ + if (reason != SCTP_RTXR_FAST_RTX) + error = sctp_outq_flush(q, /* rtx_timeout */ 1); if (error) q->asoc->base.sk->sk_err = -error; @@ -540,7 +546,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int rtx_timeout, int *start_timer) { struct list_head *lqueue; - struct list_head *lchunk; struct sctp_transport *transport = pkt->transport; sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; @@ -548,12 +553,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int fast_rtx; int error = 0; int timer = 0; + int done = 0; asoc = q->asoc; lqueue = &q->retransmit; fast_rtx = q->fast_rtx; - /* RFC 2960 6.3.3 Handle T3-rtx Expiration + /* This loop handles time-out retransmissions, fast retransmissions, + * and retransmissions due to opening of whindow. + * + * RFC 2960 6.3.3 Handle T3-rtx Expiration * * E3) Determine how many of the earliest (i.e., lowest TSN) * outstanding DATA chunks for the address for which the @@ -568,12 +577,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * [Just to be painfully clear, if we are retransmitting * because a timeout just happened, we should send only ONE * packet of retransmitted data.] + * + * For fast retransmissions we also send only ONE packet. However, + * if we are just flushing the queue due to open window, we'll + * try to send as much as possible. */ - lchunk = sctp_list_dequeue(lqueue); - - while (lchunk) { - chunk = list_entry(lchunk, struct sctp_chunk, - transmitted_list); + list_for_each_entry_safe(chunk, chunk1, lqueue, transmitted_list) { /* Make sure that Gap Acked TSNs are not retransmitted. A * simple approach is just to move such TSNs out of the @@ -581,11 +590,18 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, * next chunk. */ if (chunk->tsn_gap_acked) { - list_add_tail(lchunk, &transport->transmitted); - lchunk = sctp_list_dequeue(lqueue); + list_del(&chunk->transmitted_list); + list_add_tail(&chunk->transmitted_list, + &transport->transmitted); continue; } + /* If we are doing fast retransmit, ignore non-fast_rtransmit + * chunks + */ + if (fast_rtx && !chunk->fast_retransmit) + continue; + /* Attempt to append this chunk to the packet. */ status = sctp_packet_append_chunk(pkt, chunk); @@ -597,12 +613,10 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* If we are retransmitting, we should only * send a single packet. */ - if (rtx_timeout || fast_rtx) { - list_add(lchunk, lqueue); - lchunk = NULL; - } + if (rtx_timeout || fast_rtx) + done = 1; - /* Bundle lchunk in the next round. */ + /* Bundle next chunk in the next round. */ break; case SCTP_XMIT_RWND_FULL: @@ -612,8 +626,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, /* Stop sending DATA as there is no more room * at the receiver. */ - list_add(lchunk, lqueue); - lchunk = NULL; + done = 1; break; case SCTP_XMIT_NAGLE_DELAY: @@ -621,15 +634,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, error = sctp_packet_transmit(pkt); /* Stop sending DATA because of nagle delay. */ - list_add(lchunk, lqueue); - lchunk = NULL; + done = 1; break; default: /* The append was successful, so add this chunk to * the transmitted list. */ - list_add_tail(lchunk, &transport->transmitted); + list_del(&chunk->transmitted_list); + list_add_tail(&chunk->transmitted_list, + &transport->transmitted); /* Mark the chunk as ineligible for fast retransmit * after it is retransmitted. @@ -646,9 +660,6 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, timer = 2; q->empty = 0; - - /* Retrieve a new chunk to bundle. */ - lchunk = sctp_list_dequeue(lqueue); break; } @@ -656,16 +667,19 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, if (!error && !timer) timer = 1; - /* If we are here due to a retransmit timeout or a fast - * retransmit and if there are any chunks left in the retransmit - * queue that could not fit in the PMTU sized packet, they need - * to be marked as ineligible for a subsequent fast retransmit. - */ - if (rtx_timeout && fast_rtx) { - list_for_each_entry(chunk1, lqueue, transmitted_list) { - if (chunk1->fast_retransmit > 0) - chunk1->fast_retransmit = -1; - } + if (done) + break; + } + + /* If we are here due to a retransmit timeout or a fast + * retransmit and if there are any chunks left in the retransmit + * queue that could not fit in the PMTU sized packet, they need + * to be marked as ineligible for a subsequent fast retransmit. + */ + if (rtx_timeout || fast_rtx) { + list_for_each_entry(chunk1, lqueue, transmitted_list) { + if (chunk1->fast_retransmit > 0) + chunk1->fast_retransmit = -1; } } -- cgit v1.2.3 From b9031d9d87b24e24cd32ea15b5f4220a1e8da909 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Wed, 4 Jun 2008 12:40:15 -0700 Subject: sctp: Fix ECN markings for IPv6 Commit e9df2e8fd8fbc95c57dbd1d33dada66c4627b44c ("[IPV6]: Use appropriate sock tclass setting for routing lookup.") also changed the way that ECN capable transports mark this capability in IPv6. As a result, SCTP was not marking ECN capablity because the traffic class was never set. This patch brings back the markings for IPv6 traffic. Signed-off-by: Vlad Yasevich Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + net/sctp/ipv6.c | 6 ++++++ net/sctp/output.c | 2 +- net/sctp/protocol.c | 6 ++++++ 4 files changed, 14 insertions(+), 1 deletion(-) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 714dc43c034..7f25195f985 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -588,6 +588,7 @@ struct sctp_af { int (*is_ce) (const struct sk_buff *sk); void (*seq_dump_addr)(struct seq_file *seq, union sctp_addr *addr); + void (*ecn_capable)(struct sock *sk); __u16 net_header_len; int sockaddr_len; sa_family_t sa_family; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index e4aac3266fc..a2f4d4d5159 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -727,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr)); } +static void sctp_v6_ecn_capable(struct sock *sk) +{ + inet6_sk(sk)->tclass |= INET_ECN_ECT_0; +} + /* Initialize a PF_INET6 socket msg_name. */ static void sctp_inet6_msgname(char *msgname, int *addr_len) { @@ -997,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = { .skb_iif = sctp_v6_skb_iif, .is_ce = sctp_v6_is_ce, .seq_dump_addr = sctp_v6_seq_dump_addr, + .ecn_capable = sctp_v6_ecn_capable, .net_header_len = sizeof(struct ipv6hdr), .sockaddr_len = sizeof(struct sockaddr_in6), #ifdef CONFIG_COMPAT diff --git a/net/sctp/output.c b/net/sctp/output.c index cf4f9fb6819..6d45bae93b4 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet) * Note: The works for IPv6 layer checks this bit too later * in transmission. See IP6_ECN_flow_xmit(). */ - INET_ECN_xmit(nskb->sk); + (*tp->af_specific->ecn_capable)(nskb->sk); /* Set up the IP options. */ /* BUG: not implemented diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 56bdaf7fc42..b435a193c5d 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -617,6 +617,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr) seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr)); } +static void sctp_v4_ecn_capable(struct sock *sk) +{ + INET_ECN_xmit(sk); +} + /* Event handler for inet address addition/deletion events. * The sctp_local_addr_list needs to be protocted by a spin lock since * multiple notifiers (say IPv4 and IPv6) may be running at the same @@ -935,6 +940,7 @@ static struct sctp_af sctp_af_inet = { .skb_iif = sctp_v4_skb_iif, .is_ce = sctp_v4_is_ce, .seq_dump_addr = sctp_v4_seq_dump_addr, + .ecn_capable = sctp_v4_ecn_capable, .net_header_len = sizeof(struct iphdr), .sockaddr_len = sizeof(struct sockaddr_in), #ifdef CONFIG_COMPAT -- cgit v1.2.3 From 48e6c51bd326ce9faf07fbdf84d361c9755b7035 Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Thu, 22 May 2008 17:06:36 +0200 Subject: b43legacy: Fix controller restart crash This fixes a kernel crash on rmmod, in the case where the controller was restarted before doing the rmmod. Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/net/wireless/b43legacy/main.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/net/wireless/b43legacy/main.c b/drivers/net/wireless/b43legacy/main.c index 14a5eea2573..204077c1387 100644 --- a/drivers/net/wireless/b43legacy/main.c +++ b/drivers/net/wireless/b43legacy/main.c @@ -3039,7 +3039,6 @@ static void b43legacy_set_pretbtt(struct b43legacy_wldev *dev) /* Locking: wl->mutex */ static void b43legacy_wireless_core_exit(struct b43legacy_wldev *dev) { - struct b43legacy_wl *wl = dev->wl; struct b43legacy_phy *phy = &dev->phy; u32 macctl; @@ -3054,12 +3053,6 @@ static void b43legacy_wireless_core_exit(struct b43legacy_wldev *dev) macctl |= B43legacy_MACCTL_PSM_JMP0; b43legacy_write32(dev, B43legacy_MMIO_MACCTL, macctl); - mutex_unlock(&wl->mutex); - /* Must unlock as it would otherwise deadlock. No races here. - * Cancel possibly pending workqueues. */ - cancel_work_sync(&dev->restart_work); - mutex_lock(&wl->mutex); - b43legacy_leds_exit(dev); b43legacy_rng_exit(dev->wl); b43legacy_pio_free(dev); @@ -3486,6 +3479,8 @@ static void b43legacy_chip_reset(struct work_struct *work) } } out: + if (err) + wl->current_dev = NULL; /* Failed to init the dev. */ mutex_unlock(&wl->mutex); if (err) b43legacyerr(wl, "Controller restart FAILED\n"); @@ -3618,9 +3613,11 @@ static void b43legacy_one_core_detach(struct ssb_device *dev) struct b43legacy_wldev *wldev; struct b43legacy_wl *wl; + /* Do not cancel ieee80211-workqueue based work here. + * See comment in b43legacy_remove(). */ + wldev = ssb_get_drvdata(dev); wl = wldev->wl; - cancel_work_sync(&wldev->restart_work); b43legacy_debugfs_remove_device(wldev); b43legacy_wireless_core_detach(wldev); list_del(&wldev->list); @@ -3789,6 +3786,10 @@ static void b43legacy_remove(struct ssb_device *dev) struct b43legacy_wl *wl = ssb_get_devtypedata(dev); struct b43legacy_wldev *wldev = ssb_get_drvdata(dev); + /* We must cancel any work here before unregistering from ieee80211, + * as the ieee80211 unreg will destroy the workqueue. */ + cancel_work_sync(&wldev->restart_work); + B43legacy_WARN_ON(!wl); if (wl->current_dev == wldev) ieee80211_unregister_hw(wl->hw); -- cgit v1.2.3 From b212f3378a9cfca4da52d7c7e6f79ead8ec287fc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 28 May 2008 12:40:39 -0700 Subject: airo warning fix WARNING: space prohibited between function name and open parenthesis '(' #22: FILE: drivers/net/wireless/airo.c:2907: + while ((IN4500 (ai, COMMAND) & COMMAND_BUSY) && (delay < 10000)) { total: 0 errors, 1 warnings, 8 lines checked ./patches/wireless-airo-waitbusy-wont-delay.patch has style problems, please review. If any of these errors are false positives report them to the maintainer, see CHECKPATCH in MAINTAINERS. Please run checkpatch prior to sending patches Cc: Dan Williams Cc: Roel Kluin Signed-off-by: Andrew Morton Signed-off-by: John W. Linville --- drivers/net/wireless/airo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 4e1c690ff45..32019fb878d 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -2905,7 +2905,7 @@ EXPORT_SYMBOL(init_airo_card); static int waitbusy (struct airo_info *ai) { int delay = 0; - while ((IN4500 (ai, COMMAND) & COMMAND_BUSY) && (delay < 10000)) { + while ((IN4500(ai, COMMAND) & COMMAND_BUSY) && (delay < 10000)) { udelay (10); if ((++delay % 20) == 0) OUT4500(ai, EVACK, EV_CLEARCOMMANDBUSY); -- cgit v1.2.3 From a6d4eae80157830af9c9d80de2daf6611696a34e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 29 May 2008 14:38:28 -0400 Subject: ipw2200: expire and use oldest BSS on adhoc create If there are no networks on the free list, expire the oldest one when creating a new adhoc network. Because ipw2200 and the ieee80211 stack don't actually cull old networks and place them back on the free list unless they are needed for new probe responses, over time the free list would become empty and creating an adhoc network would fail due to the ! list_empty(...) check. Signed-off-by: Dan Williams Signed-off-by: John W. Linville --- drivers/net/wireless/ipw2200.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/ipw2200.c b/drivers/net/wireless/ipw2200.c index d74c061994a..72933677482 100644 --- a/drivers/net/wireless/ipw2200.c +++ b/drivers/net/wireless/ipw2200.c @@ -7558,8 +7558,31 @@ static int ipw_associate(void *data) priv->ieee->iw_mode == IW_MODE_ADHOC && priv->config & CFG_ADHOC_CREATE && priv->config & CFG_STATIC_ESSID && - priv->config & CFG_STATIC_CHANNEL && - !list_empty(&priv->ieee->network_free_list)) { + priv->config & CFG_STATIC_CHANNEL) { + /* Use oldest network if the free list is empty */ + if (list_empty(&priv->ieee->network_free_list)) { + struct ieee80211_network *oldest = NULL; + struct ieee80211_network *target; + DECLARE_MAC_BUF(mac); + + list_for_each_entry(target, &priv->ieee->network_list, list) { + if ((oldest == NULL) || + (target->last_scanned < oldest->last_scanned)) + oldest = target; + } + + /* If there are no more slots, expire the oldest */ + list_del(&oldest->list); + target = oldest; + IPW_DEBUG_ASSOC("Expired '%s' (%s) from " + "network list.\n", + escape_essid(target->ssid, + target->ssid_len), + print_mac(mac, target->bssid)); + list_add_tail(&target->list, + &priv->ieee->network_free_list); + } + element = priv->ieee->network_free_list.next; network = list_entry(element, struct ieee80211_network, list); ipw_adhoc_create(priv, network); -- cgit v1.2.3 From a75eda43dc4a64d0bd0502da546871c01f70e899 Mon Sep 17 00:00:00 2001 From: Holger Schurig Date: Fri, 30 May 2008 14:53:22 +0200 Subject: libertas: fix command size for CMD_802_11_SUBSCRIBE_EVENT The size was two small by two bytes. Signed-off-by: Holger Schurig Signed-off-by: John W. Linville --- drivers/net/wireless/libertas/debugfs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/wireless/libertas/debugfs.c b/drivers/net/wireless/libertas/debugfs.c index ad2fabca911..0aa0ce3b2c4 100644 --- a/drivers/net/wireless/libertas/debugfs.c +++ b/drivers/net/wireless/libertas/debugfs.c @@ -312,8 +312,8 @@ static ssize_t lbs_threshold_write(uint16_t tlv_type, uint16_t event_mask, if (tlv_type != TLV_TYPE_BCNMISS) tlv->freq = freq; - /* The command header, the event mask, and the one TLV */ - events->hdr.size = cpu_to_le16(sizeof(events->hdr) + 2 + sizeof(*tlv)); + /* The command header, the action, the event mask, and one TLV */ + events->hdr.size = cpu_to_le16(sizeof(events->hdr) + 4 + sizeof(*tlv)); ret = lbs_cmd_with_response(priv, CMD_802_11_SUBSCRIBE_EVENT, events); -- cgit v1.2.3 From a3bafeedfff2ac5fa0a316bea4570e27900b6fcc Mon Sep 17 00:00:00 2001 From: Michael Buesch Date: Mon, 2 Jun 2008 16:15:23 +0200 Subject: ssb: Fix context assertion in ssb_pcicore_dev_irqvecs_enable This fixes a context assertion in ssb that makes b44 print out warnings on resume. This fixes the following kernel oops: http://www.kerneloops.org/oops.php?number=12732 http://www.kerneloops.org/oops.php?number=11410 Signed-off-by: Michael Buesch Signed-off-by: John W. Linville --- drivers/ssb/driver_pcicore.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/ssb/driver_pcicore.c b/drivers/ssb/driver_pcicore.c index 75def13e797..d28c5386809 100644 --- a/drivers/ssb/driver_pcicore.c +++ b/drivers/ssb/driver_pcicore.c @@ -537,12 +537,12 @@ int ssb_pcicore_dev_irqvecs_enable(struct ssb_pcicore *pc, int err = 0; u32 tmp; - might_sleep(); - if (!pdev) goto out; bus = pdev->bus; + might_sleep_if(pdev->id.coreid != SSB_DEV_PCI); + /* Enable interrupts for this device. */ if (bus->host_pci && ((pdev->id.revision >= 6) || (pdev->id.coreid == SSB_DEV_PCIE))) { -- cgit v1.2.3 From 4546002c813568829b70d00fab752de3999c3f1a Mon Sep 17 00:00:00 2001 From: Felix Homann Date: Thu, 29 May 2008 00:36:45 -0700 Subject: USB ID for Philips CPWUA054/00 Wireless USB Adapter 11g Enable the Philips CPWUA054/00 in p54usb. Cc: Jeff Garzik Signed-off-by: Andrew Morton Signed-off-by: John W. Linville --- drivers/net/wireless/p54/p54usb.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/wireless/p54/p54usb.c b/drivers/net/wireless/p54/p54usb.c index 98ddbb3b327..1610a7308c1 100644 --- a/drivers/net/wireless/p54/p54usb.c +++ b/drivers/net/wireless/p54/p54usb.c @@ -49,6 +49,7 @@ static struct usb_device_id p54u_table[] __devinitdata = { {USB_DEVICE(0x5041, 0x2235)}, /* Linksys WUSB54G Portable */ /* Version 2 devices (3887) */ + {USB_DEVICE(0x0471, 0x1230)}, /* Philips CPWUA054/00 */ {USB_DEVICE(0x050d, 0x7050)}, /* Belkin F5D7050 ver 1000 */ {USB_DEVICE(0x0572, 0x2000)}, /* Cohiba Proto board */ {USB_DEVICE(0x0572, 0x2002)}, /* Cohiba Proto board */ -- cgit v1.2.3 From 199f7d24ae59894243687a234a909f44a8724506 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 4 Jun 2008 15:07:32 -0700 Subject: lt2p: Fix possible WARN_ON from socket code when UDP socket is closed If an L2TP daemon closes a tunnel socket while packets are queued in the tunnel's reorder queue, a kernel warning is logged because the socket is closed while skbs are still referencing it. The fix is to purge the queue in the socket's release handler. WARNING: at include/net/sock.h:351 udp_lib_unhash+0x41/0x68() Pid: 12998, comm: openl2tpd Not tainted 2.6.25 #8 [] warn_on_slowpath+0x41/0x51 [] udp_lib_unhash+0x41/0x68 [] sk_common_release+0x23/0x90 [] udp_lib_close+0x8/0xa [] inet_release+0x42/0x48 [] sock_release+0x14/0x60 [] sock_close+0x29/0x30 [] __fput+0xad/0x15b [] fput+0x17/0x19 [] filp_close+0x50/0x5a [] sys_close+0x69/0x9f [] syscall_call+0x7/0xb Signed-off-by: James Chapman Signed-off-by: David S. Miller --- drivers/net/pppol2tp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 8db342f2fdc..04c7e5b407f 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -1279,6 +1279,7 @@ out: static int pppol2tp_release(struct socket *sock) { struct sock *sk = sock->sk; + struct pppol2tp_session *session; int error; if (!sk) @@ -1296,9 +1297,18 @@ static int pppol2tp_release(struct socket *sock) sock_orphan(sk); sock->sk = NULL; + session = pppol2tp_sock_to_session(sk); + /* Purge any queued data */ skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); + if (session != NULL) { + struct sk_buff *skb; + while ((skb = skb_dequeue(&session->reorder_q))) { + kfree_skb(skb); + sock_put(sk); + } + } release_sock(sk); -- cgit v1.2.3 From 22dd485022f3d0b162ceb5e67d85de7c3806aa20 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Wed, 4 Jun 2008 15:16:12 -0700 Subject: raw: Raw socket leak. The program below just leaks the raw kernel socket int main() { int fd = socket(PF_INET, SOCK_RAW, IPPROTO_UDP); struct sockaddr_in addr; memset(&addr, 0, sizeof(addr)); inet_aton("127.0.0.1", &addr.sin_addr); addr.sin_family = AF_INET; addr.sin_port = htons(2048); sendto(fd, "a", 1, MSG_MORE, &addr, sizeof(addr)); return 0; } Corked packet is allocated via sock_wmalloc which holds the owner socket, so one should uncork it and flush all pending data on close. Do this in the same way as in UDP. Signed-off-by: Denis V. Lunev Acked-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/ipv4/raw.c | 9 +++++++++ net/ipv6/raw.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index fead049daf4..e7e091d365f 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -608,6 +608,14 @@ static void raw_close(struct sock *sk, long timeout) sk_common_release(sk); } +static int raw_destroy(struct sock *sk) +{ + lock_sock(sk); + ip_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + /* This gets rid of all the nasties in af_inet. -DaveM */ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -820,6 +828,7 @@ struct proto raw_prot = { .name = "RAW", .owner = THIS_MODULE, .close = raw_close, + .destroy = raw_destroy, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = raw_ioctl, diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 603df76e052..8fee9a15b2d 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1164,6 +1164,14 @@ static void rawv6_close(struct sock *sk, long timeout) sk_common_release(sk); } +static int raw6_destroy(struct sock *sk) +{ + lock_sock(sk); + ip6_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + static int rawv6_init_sk(struct sock *sk) { struct raw6_sock *rp = raw6_sk(sk); @@ -1187,6 +1195,7 @@ struct proto rawv6_prot = { .name = "RAWv6", .owner = THIS_MODULE, .close = rawv6_close, + .destroy = raw6_destroy, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, -- cgit v1.2.3 From 26af65cbeb2467a486ae4fc7242c94e470c67c50 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Wed, 4 Jun 2008 15:19:35 -0700 Subject: tcp: Increment OUTRSTS in tcp_send_active_reset() TCP "resets sent" counter is not incremented when a TCP Reset is sent via tcp_send_active_reset(). Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e399bde7813..ad993ecb481 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2131,6 +2131,8 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) TCP_SKB_CB(skb)->when = tcp_time_stamp; if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(LINUX_MIB_TCPABORTFAILED); + + TCP_INC_STATS(TCP_MIB_OUTRSTS); } /* WARNING: This routine must only be called when we have already sent -- cgit v1.2.3 From 293ad60401da621b8b329abbe8c388edb25f658a Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 4 Jun 2008 15:45:58 -0700 Subject: tcp: Fix for race due to temporary drop of the socket lock in skb_splice_bits. skb_splice_bits temporary drops the socket lock while iterating over the socket queue in order to break a reverse locking condition which happens with sendfile. This, however, opens a window of opportunity for tcp_collapse() to aggregate skbs and thus potentially free the current skb used in skb_splice_bits and tcp_read_sock. This patch fixes the problem by (re-)getting the same "logical skb" after the lock has been temporary dropped. Based on idea and initial patch from Evgeniy Polyakov. Signed-off-by: Octavian Purdila Acked-by: Evgeniy Polyakov Signed-off-by: David S. Miller --- net/core/skbuff.c | 5 +++-- net/ipv4/tcp.c | 9 ++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 5c459f2b798..1e556d31211 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1445,6 +1445,7 @@ done: if (spd.nr_pages) { int ret; + struct sock *sk = __skb->sk; /* * Drop the socket lock, otherwise we have reverse @@ -1455,9 +1456,9 @@ done: * we call into ->sendpage() with the i_mutex lock held * and networking will grab the socket lock. */ - release_sock(__skb->sk); + release_sock(sk); ret = splice_to_pipe(pipe, &spd); - lock_sock(__skb->sk); + lock_sock(sk); return ret; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f8865313862..ab66683b804 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1227,7 +1227,14 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, copied += used; offset += used; } - if (offset != skb->len) + /* + * If recv_actor drops the lock (e.g. TCP splice + * receive) the skb pointer might be invalid when + * getting here: tcp_collapse might have deleted it + * while aggregating skbs from the socket queue. + */ + skb = tcp_recv_skb(sk, seq-1, &offset); + if (!skb || (offset+1 != skb->len)) break; } if (tcp_hdr(skb)->fin) { -- cgit v1.2.3 From 24b95685ffcdb3dc28f64b9e8af6ea3e8360fbc5 Mon Sep 17 00:00:00 2001 From: James Chapman Date: Wed, 4 Jun 2008 15:54:07 -0700 Subject: l2tp: Fix possible oops if transmitting or receiving when tunnel goes down Some problems have been experienced in the field which cause an oops in the pppol2tp driver if L2TP tunnels fail while passing data. The pppol2tp driver uses private data that is referenced via the sk->sk_user_data of its UDP and PPPoL2TP sockets. This patch makes sure that the driver uses sock_hold() when it holds a reference to the sk pointer. This affects its sendmsg(), recvmsg(), getname(), [gs]etsockopt() and ioctl() handlers. Tested by ISP where problem was seen. System has been up 10 days with no oops since running this patch. Without the patch, an oops would occur every 1-2 days. Signed-off-by: James Chapman Signed-off-by: David S. Miller --- drivers/net/pppol2tp.c | 101 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 78 insertions(+), 23 deletions(-) diff --git a/drivers/net/pppol2tp.c b/drivers/net/pppol2tp.c index 04c7e5b407f..70cfdb46aa2 100644 --- a/drivers/net/pppol2tp.c +++ b/drivers/net/pppol2tp.c @@ -240,12 +240,15 @@ static inline struct pppol2tp_session *pppol2tp_sock_to_session(struct sock *sk) if (sk == NULL) return NULL; + sock_hold(sk); session = (struct pppol2tp_session *)(sk->sk_user_data); - if (session == NULL) - return NULL; + if (session == NULL) { + sock_put(sk); + goto out; + } BUG_ON(session->magic != L2TP_SESSION_MAGIC); - +out: return session; } @@ -256,12 +259,15 @@ static inline struct pppol2tp_tunnel *pppol2tp_sock_to_tunnel(struct sock *sk) if (sk == NULL) return NULL; + sock_hold(sk); tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data); - if (tunnel == NULL) - return NULL; + if (tunnel == NULL) { + sock_put(sk); + goto out; + } BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); - +out: return tunnel; } @@ -716,12 +722,14 @@ discard: session->stats.rx_errors++; kfree_skb(skb); sock_put(session->sock); + sock_put(sock); return 0; error: /* Put UDP header back */ __skb_push(skb, sizeof(struct udphdr)); + sock_put(sock); no_tunnel: return 1; @@ -745,10 +753,13 @@ static int pppol2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) "%s: received %d bytes\n", tunnel->name, skb->len); if (pppol2tp_recv_core(sk, skb)) - goto pass_up; + goto pass_up_put; + sock_put(sk); return 0; +pass_up_put: + sock_put(sk); pass_up: return 1; } @@ -858,7 +869,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock); if (tunnel == NULL) - goto error; + goto error_put_sess; /* What header length is configured for this session? */ hdr_len = pppol2tp_l2tp_header_len(session); @@ -870,7 +881,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh sizeof(ppph) + total_len, 0, GFP_KERNEL); if (!skb) - goto error; + goto error_put_sess_tun; /* Reserve space for headers. */ skb_reserve(skb, NET_SKB_PAD); @@ -900,7 +911,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh error = memcpy_fromiovec(skb->data, m->msg_iov, total_len); if (error < 0) { kfree_skb(skb); - goto error; + goto error_put_sess_tun; } skb_put(skb, total_len); @@ -947,10 +958,33 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh session->stats.tx_errors++; } + return error; + +error_put_sess_tun: + sock_put(session->tunnel_sock); +error_put_sess: + sock_put(sk); error: return error; } +/* Automatically called when the skb is freed. + */ +static void pppol2tp_sock_wfree(struct sk_buff *skb) +{ + sock_put(skb->sk); +} + +/* For data skbs that we transmit, we associate with the tunnel socket + * but don't do accounting. + */ +static inline void pppol2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk) +{ + sock_hold(sk); + skb->sk = sk; + skb->destructor = pppol2tp_sock_wfree; +} + /* Transmit function called by generic PPP driver. Sends PPP frame * over PPPoL2TP socket. * @@ -993,10 +1027,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) sk_tun = session->tunnel_sock; if (sk_tun == NULL) - goto abort; + goto abort_put_sess; tunnel = pppol2tp_sock_to_tunnel(sk_tun); if (tunnel == NULL) - goto abort; + goto abort_put_sess; /* What header length is configured for this session? */ hdr_len = pppol2tp_l2tp_header_len(session); @@ -1009,7 +1043,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) sizeof(struct udphdr) + hdr_len + sizeof(ppph); old_headroom = skb_headroom(skb); if (skb_cow_head(skb, headroom)) - goto abort; + goto abort_put_sess_tun; new_headroom = skb_headroom(skb); skb_orphan(skb); @@ -1069,7 +1103,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) /* Get routing info from the tunnel socket */ dst_release(skb->dst); skb->dst = dst_clone(__sk_dst_get(sk_tun)); - skb->sk = sk_tun; + pppol2tp_skb_set_owner_w(skb, sk_tun); /* Queue the packet to IP for output */ len = skb->len; @@ -1086,8 +1120,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) session->stats.tx_errors++; } + sock_put(sk_tun); + sock_put(sk); return 1; +abort_put_sess_tun: + sock_put(sk_tun); +abort_put_sess: + sock_put(sk); abort: /* Free the original skb */ kfree_skb(skb); @@ -1191,7 +1231,7 @@ static void pppol2tp_tunnel_destruct(struct sock *sk) { struct pppol2tp_tunnel *tunnel; - tunnel = pppol2tp_sock_to_tunnel(sk); + tunnel = sk->sk_user_data; if (tunnel == NULL) goto end; @@ -1230,10 +1270,12 @@ static void pppol2tp_session_destruct(struct sock *sk) if (sk->sk_user_data != NULL) { struct pppol2tp_tunnel *tunnel; - session = pppol2tp_sock_to_session(sk); + session = sk->sk_user_data; if (session == NULL) goto out; + BUG_ON(session->magic != L2TP_SESSION_MAGIC); + /* Don't use pppol2tp_sock_to_tunnel() here to * get the tunnel context because the tunnel * socket might have already been closed (its @@ -1611,7 +1653,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, error = ppp_register_channel(&po->chan); if (error) - goto end; + goto end_put_tun; /* This is how we get the session context from the socket. */ sk->sk_user_data = session; @@ -1631,6 +1673,8 @@ out_no_ppp: PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO, "%s: created\n", session->name); +end_put_tun: + sock_put(tunnel_sock); end: release_sock(sk); @@ -1678,6 +1722,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, *usockaddr_len = len; error = 0; + sock_put(sock->sk); end: return error; @@ -1916,14 +1961,17 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, err = -EBADF; tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock); if (tunnel == NULL) - goto end; + goto end_put_sess; err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg); - goto end; + sock_put(session->tunnel_sock); + goto end_put_sess; } err = pppol2tp_session_ioctl(session, cmd, arg); +end_put_sess: + sock_put(sk); end: return err; } @@ -2069,14 +2117,17 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, err = -EBADF; tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock); if (tunnel == NULL) - goto end; + goto end_put_sess; err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val); + sock_put(session->tunnel_sock); } else err = pppol2tp_session_setsockopt(sk, session, optname, val); err = 0; +end_put_sess: + sock_put(sk); end: return err; } @@ -2191,20 +2242,24 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, err = -EBADF; tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock); if (tunnel == NULL) - goto end; + goto end_put_sess; err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val); + sock_put(session->tunnel_sock); } else err = pppol2tp_session_getsockopt(sk, session, optname, &val); err = -EFAULT; if (put_user(len, (int __user *) optlen)) - goto end; + goto end_put_sess; if (copy_to_user((void __user *) optval, &val, len)) - goto end; + goto end_put_sess; err = 0; + +end_put_sess: + sock_put(sk); end: return err; } -- cgit v1.2.3