From 7b2e497a06c0e93719fda88820e057b635e8fae2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 7 Aug 2006 16:09:04 -0700 Subject: [NET]: Assign skb->dev in netdev_alloc_skb Signed-off-by: Christoph Hellwig Signed-off-by: David S. Miller --- net/core/skbuff.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 022d8894c11..c54f3664bce 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -268,8 +268,10 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, struct sk_buff *skb; skb = alloc_skb(length + NET_SKB_PAD, gfp_mask); - if (likely(skb)) + if (likely(skb)) { skb_reserve(skb, NET_SKB_PAD); + skb->dev = dev; + } return skb; } -- cgit v1.2.3 From 8b5cc5ef40c83c6ea4c90b203bb2c8b17edfa11b Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 7 Aug 2006 20:09:20 -0700 Subject: [IPX]: Header length validation needed This patch will linearize and check there is enough data. It handles the pprop case as well as avoiding a whole audit of the routing code. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index aa34ff4b707..c13e86b14f6 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1646,7 +1646,8 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty ipx_pktsize = ntohs(ipx->ipx_pktsize); /* Too small or invalid header? */ - if (ipx_pktsize < sizeof(struct ipxhdr) || ipx_pktsize > skb->len) + if (ipx_pktsize < sizeof(struct ipxhdr) || + !pskb_may_pull(skb, ipx_pktsize)) goto drop; if (ipx->ipx_checksum != IPX_NO_CHECKSUM && -- cgit v1.2.3 From 8d1502de27c46b365b5c86e17d173083d3d6c9ac Mon Sep 17 00:00:00 2001 From: Kirill Korotaev Date: Mon, 7 Aug 2006 20:44:22 -0700 Subject: [IPV4]: Limit rt cache size properly. From: Kirill Korotaev During OpenVZ stress testing we found that UDP traffic with random src can generate too much excessive rt hash growing leading finally to OOM and kernel panics. It was found that for 4GB i686 system (having 1048576 total pages and 225280 normal zone pages) kernel allocates the following route hash: syslog: IP route cache hash table entries: 262144 (order: 8, 1048576 bytes) => ip_rt_max_size = 4194304 entries, i.e. max rt size is 4194304 * 256b = 1Gb of RAM > normal_zone Attached the patch which removes HASH_HIGHMEM flag from alloc_large_system_hash() call. Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 19bd49d69d9..b873cbcdd0b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3157,7 +3157,7 @@ int __init ip_rt_init(void) rhash_entries, (num_physpages >= 128 * 1024) ? 15 : 17, - HASH_HIGHMEM, + 0, &rt_hash_log, &rt_hash_mask, 0); -- cgit v1.2.3 From aaf580601ff244df82324fff380ed6740f27ef03 Mon Sep 17 00:00:00 2001 From: Chen-Li Tien Date: Mon, 7 Aug 2006 20:49:07 -0700 Subject: [PKTGEN]: Fix oops when used with balance-tlb bonding Signed-off-by: Chen-Li Tien Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 67ed14ddabd..b1743375eb6 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2149,6 +2149,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, skb->mac.raw = ((u8 *) iph) - 14 - pkt_dev->nr_labels*sizeof(u32); skb->dev = odev; skb->pkt_type = PACKET_HOST; + skb->nh.iph = iph; + skb->h.uh = udph; if (pkt_dev->nfrags <= 0) pgh = (struct pktgen_hdr *)skb_put(skb, datalen); -- cgit v1.2.3 From 69d8c28c9578ce78b3dc1b9be36926d962282898 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Aug 2006 20:52:10 -0700 Subject: [PKTGEN]: Make sure skb->{nh,h} are initialized in fill_packet_ipv6() too. Mirror the bug fix from fill_packet_ipv4() Signed-off-by: David S. Miller --- net/core/pktgen.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/pktgen.c b/net/core/pktgen.c index b1743375eb6..6a7320b39ed 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2462,6 +2462,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, skb->protocol = protocol; skb->dev = odev; skb->pkt_type = PACKET_HOST; + skb->nh.ipv6h = iph; + skb->h.uh = udph; if (pkt_dev->nfrags <= 0) pgh = (struct pktgen_hdr *)skb_put(skb, datalen); -- cgit v1.2.3 From bd37a088596ccdb2b2dd3299e25e333bca7a9a34 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 7 Aug 2006 21:04:15 -0700 Subject: [TCP]: SNMPv2 tcpOutSegs counter error Do not count retransmitted segments. Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5c08ea20a18..507adefbc17 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -466,7 +466,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, if (skb->len != tcp_header_size) tcp_event_data_sent(tp, skb, sk); - TCP_INC_STATS(TCP_MIB_OUTSEGS); + if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq) + TCP_INC_STATS(TCP_MIB_OUTSEGS); err = icsk->icsk_af_ops->queue_xmit(skb, 0); if (likely(err <= 0)) @@ -2157,10 +2158,9 @@ int tcp_connect(struct sock *sk) skb_shinfo(buff)->gso_size = 0; skb_shinfo(buff)->gso_type = 0; buff->csum = 0; + tp->snd_nxt = tp->write_seq; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; - tp->snd_nxt = tp->write_seq; - tp->pushed_seq = tp->write_seq; /* Send it off. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; @@ -2170,6 +2170,12 @@ int tcp_connect(struct sock *sk) sk_charge_skb(sk, buff); tp->packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, buff, 1, GFP_KERNEL); + + /* We change tp->snd_nxt after the tcp_transmit_skb() call + * in order to make this packet get counted in tcpOutSegs. + */ + tp->snd_nxt = tp->write_seq; + tp->pushed_seq = tp->write_seq; TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ -- cgit v1.2.3 From 70f8e78e150425b01c1099087ad3decacf7e4ccf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 8 Aug 2006 16:47:37 -0700 Subject: [RTNETLINK]: Fix IFLA_ADDRESS handling. The ->set_mac_address handlers expect a pointer to a sockaddr which contains the MAC address, whereas IFLA_ADDRESS provides just the MAC address itself. So whip up a sockaddr to wrap around the netlink attribute for the ->set_mac_address call. Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 20e5bb73f14..30cc1ba6ed5 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -394,6 +394,9 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) } if (ida[IFLA_ADDRESS - 1]) { + struct sockaddr *sa; + int len; + if (!dev->set_mac_address) { err = -EOPNOTSUPP; goto out; @@ -405,7 +408,17 @@ static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len)) goto out; - err = dev->set_mac_address(dev, RTA_DATA(ida[IFLA_ADDRESS - 1])); + len = sizeof(sa_family_t) + dev->addr_len; + sa = kmalloc(len, GFP_KERNEL); + if (!sa) { + err = -ENOMEM; + goto out; + } + sa->sa_family = dev->type; + memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]), + dev->addr_len); + err = dev->set_mac_address(dev, sa); + kfree(sa); if (err) goto out; send_addr_notify = 1; -- cgit v1.2.3 From 7b1ba8de569460894efa892457af7a37c0d574f9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 8 Aug 2006 16:48:51 -0700 Subject: [IPX]: Another nonlinear receive fix Need to check some more cases in IPX receive. If the skb is purely fragments, the IPX header needs to be extracted. The function pskb_may_pull() may in theory invalidate all the pointers in the skb, so references to ipx header must be refreshed. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipx/af_ipx.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index c13e86b14f6..40196420486 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1642,14 +1642,17 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) goto out; - ipx = ipx_hdr(skb); - ipx_pktsize = ntohs(ipx->ipx_pktsize); + if (!pskb_may_pull(skb, sizeof(struct ipxhdr))) + goto drop; + + ipx_pktsize = ntohs(ipxhdr(skb)->ipx_pktsize); /* Too small or invalid header? */ if (ipx_pktsize < sizeof(struct ipxhdr) || !pskb_may_pull(skb, ipx_pktsize)) goto drop; + ipx = ipx_hdr(skb); if (ipx->ipx_checksum != IPX_NO_CHECKSUM && ipx->ipx_checksum != ipx_cksum(ipx, ipx_pktsize)) goto drop; -- cgit v1.2.3 From 7c91767a6b701543c93ebcd611dab61deff3dad1 Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Wed, 9 Aug 2006 02:25:54 -0700 Subject: [NET]: add_timer -> mod_timer() in dst_run_gc() Patch from Dmitry Mishin : Replace add_timer() by mod_timer() in dst_run_gc in order to avoid BUG message. CPU1 CPU2 dst_run_gc() entered dst_run_gc() entered spin_lock(&dst_lock) ..... del_timer(&dst_gc_timer) fail to get lock .... mod_timer() <--- puts timer back to the list add_timer(&dst_gc_timer) <--- BUG because timer is in list already. Found during OpenVZ internal testing. At first we thought that it is OpenVZ specific as we added dst_run_gc(0) call in dst_dev_event(), but as Alexey pointed to me it is possible to trigger this condition in mainstream kernel. F.e. timer has fired on CPU2, but the handler was preeempted by an irq before dst_lock is tried. Meanwhile, someone on CPU1 adds an entry to gc list and starts the timer. If CPU2 was preempted long enough, this timer can expire simultaneously with resuming timer handler on CPU1, arriving exactly to the situation described. Signed-off-by: Dmitry Mishin Signed-off-by: Kirill Korotaev Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- net/core/dst.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net') diff --git a/net/core/dst.c b/net/core/dst.c index 470c05bc4cb..1a5e49da0e7 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -95,12 +95,11 @@ static void dst_run_gc(unsigned long dummy) dst_gc_timer_inc = DST_GC_INC; dst_gc_timer_expires = DST_GC_MIN; } - dst_gc_timer.expires = jiffies + dst_gc_timer_expires; #if RT_CACHE_DEBUG >= 2 printk("dst_total: %d/%d %ld\n", atomic_read(&dst_total), delayed, dst_gc_timer_expires); #endif - add_timer(&dst_gc_timer); + mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires); out: spin_unlock(&dst_lock); -- cgit v1.2.3