From 0eae88f31ca2b88911ce843452054139e028771f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Apr 2010 19:06:52 -0700 Subject: net: Fix various endianness glitches Sparse can help us find endianness bugs, but we need to make some cleanups to be able to more easily spot real bugs. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 90824852f59..92bf9033e24 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -91,9 +91,9 @@ static unsigned int udp6_portaddr_hash(struct net *net, if (ipv6_addr_any(addr6)) hash = jhash_1word(0, mix); else if (ipv6_addr_v4mapped(addr6)) - hash = jhash_1word(addr6->s6_addr32[3], mix); + hash = jhash_1word((__force u32)addr6->s6_addr32[3], mix); else - hash = jhash2(addr6->s6_addr32, 4, mix); + hash = jhash2((__force u32 *)addr6->s6_addr32, 4, mix); return hash ^ port; } -- cgit v1.2.3 From 13b52cd44670e3359055e9918d0e766d89836425 Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Fri, 23 Apr 2010 11:26:08 +0000 Subject: IPv6: Add dontfrag argument to relevant functions Add dontfrag argument to relevant functions for IPV6_DONTFRAG support, as well as allowing the value to be passed-in via ancillary cmsg data. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/udp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 92bf9033e24..39e3665d946 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -919,6 +919,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, int ulen = len; int hlimit = -1; int tclass = -1; + int dontfrag = -1; int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; int err; int connected = 0; @@ -1049,7 +1050,8 @@ do_udp_sendmsg: memset(opt, 0, sizeof(struct ipv6_txoptions)); opt->tot_len = sizeof(*opt); - err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass); + err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, + &tclass, &dontfrag); if (err < 0) { fl6_sock_release(flowlabel); return err; @@ -1120,6 +1122,9 @@ do_udp_sendmsg: if (tclass < 0) tclass = np->tclass; + if (dontfrag < 0) + dontfrag = np->dontfrag; + if (msg->msg_flags&MSG_CONFIRM) goto do_confirm; back_from_confirm: @@ -1143,7 +1148,7 @@ do_append_data: err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, sizeof(struct udphdr), hlimit, tclass, opt, &fl, (struct rt6_info*)dst, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag); if (err) udp_v6_flush_pending_frames(sk); else if (!corkreq) -- cgit v1.2.3 From 4b340ae20d0e2366792abe70f46629e576adaf5e Mon Sep 17 00:00:00 2001 From: Brian Haley Date: Fri, 23 Apr 2010 11:26:09 +0000 Subject: IPv6: Complete IPV6_DONTFRAG support Finally add support to detect a local IPV6_DONTFRAG event and return the relevant data to the user if they've enabled IPV6_RECVPATHMTU on the socket. The next recvmsg() will return no data, but have an IPV6_PATHMTU as ancillary data. Signed-off-by: Brian Haley Signed-off-by: David S. Miller --- net/ipv6/udp.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 39e3665d946..2850e35cee3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -335,6 +335,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len); + if (np->rxpmtu && np->rxopt.bits.rxpmtu) + return ipv6_recv_rxpmtu(sk, msg, len); + try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), &peeked, &err); -- cgit v1.2.3 From c377411f2494a931ff7facdbb3a6839b1266bcf6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 27 Apr 2010 15:13:20 -0700 Subject: net: sk_add_backlog() take rmem_alloc into account Current socket backlog limit is not enough to really stop DDOS attacks, because user thread spend many time to process a full backlog each round, and user might crazy spin on socket lock. We should add backlog size and receive_queue size (aka rmem_alloc) to pace writers, and let user run without being slow down too much. Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in stress situations. Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp receiver can now process ~200.000 pps (instead of ~100 pps before the patch) on a 8 core machine. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2850e35cee3..3ead20ad9d0 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count, sk = stack[i]; if (skb1) { + if (sk_rcvqueues_full(sk, skb)) { + kfree_skb(skb1); + goto drop; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb1); @@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, /* deliver */ + if (sk_rcvqueues_full(sk, skb)) { + sock_put(sk); + goto discard; + } bh_lock_sock(sk); if (!sock_owned_by_user(sk)) udpv6_queue_rcv_skb(sk, skb); -- cgit v1.2.3 From 4b0b72f7dd617b13abd1b04c947e15873e011a24 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Apr 2010 14:35:48 -0700 Subject: net: speedup udp receive path Since commit 95766fff ([UDP]: Add memory accounting.), each received packet needs one extra sock_lock()/sock_release() pair. This added latency because of possible backlog handling. Then later, ticket spinlocks added yet another latency source in case of DDOS. This patch introduces lock_sock_bh() and unlock_sock_bh() synchronization primitives, avoiding one atomic operation and backlog processing. skb_free_datagram_locked() uses them instead of full blown lock_sock()/release_sock(). skb is orphaned inside locked section for proper socket memory reclaim, and finally freed outside of it. UDP receive path now take the socket spinlock only once. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3ead20ad9d0..91c60f0090a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -424,7 +424,7 @@ out: return err; csum_copy_err: - lock_sock(sk); + lock_sock_bh(sk); if (!skb_kill_datagram(sk, skb, flags)) { if (is_udp4) UDP_INC_STATS_USER(sock_net(sk), @@ -433,7 +433,7 @@ csum_copy_err: UDP6_INC_STATS_USER(sock_net(sk), UDP_MIB_INERRORS, is_udplite); } - release_sock(sk); + unlock_sock_bh(sk); if (flags & MSG_DONTWAIT) return -EAGAIN; -- cgit v1.2.3 From f84af32cbca70a3c6d30463dc08c7984af11c277 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 28 Apr 2010 15:31:51 -0700 Subject: net: ip_queue_rcv_skb() helper When queueing a skb to socket, we can immediately release its dst if target socket do not use IP_CMSG_PKTINFO. tcp_data_queue() can drop dst too. This to benefit from a hot cache line and avoid the receiver, possibly on another cpu, to dirty this cache line himself. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91c60f0090a..79359c8380b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -514,7 +514,7 @@ int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) goto drop; } - if ((rc = sock_queue_rcv_skb(sk, skb)) < 0) { + if ((rc = ip_queue_rcv_skb(sk, skb)) < 0) { /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) UDP6_INC_STATS_BH(sock_net(sk), -- cgit v1.2.3 From d6bc0149d8f2300bffa03ea6fea3ca39744277a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Thu, 6 May 2010 03:44:35 +0000 Subject: ipv6: udp: make short packet logging consistent with ipv4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adding addresses and ports to the short packet log message, like ipv4/udp.c does it, makes these messages a lot more useful: [ 822.182450] UDPv6: short packet: From [2001:db8:ffb4:3::1]:47839 23715/178 to [2001:db8:ffb4:3:5054:ff:feff:200]:1234 This requires us to drop logging in case pskb_may_pull() fails, which also is consistent with ipv4/udp.c Signed-off-by: Bjørn Mork Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/udp.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'net/ipv6/udp.c') diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 79359c8380b..3d7a2c0b836 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -699,7 +699,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, u32 ulen = 0; if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto short_packet; + goto discard; saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; @@ -781,9 +781,14 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", + LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", proto == IPPROTO_UDPLITE ? "-Lite" : "", - ulen, skb->len); + saddr, + ntohs(uh->source), + ulen, + skb->len, + daddr, + ntohs(uh->dest)); discard: UDP6_INC_STATS_BH(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); -- cgit v1.2.3