From 3294f202dc1acd82223e83ef59f272bd87bb06b2 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 11 Jun 2008 11:19:09 +0100
Subject: dccp ccid-3: Bug-Fix - Zero RTT is possible

In commit $(825de27d9e40b3117b29a79d412b7a4b78c5d815) (from 27th May, commit
message `dccp ccid-3: Fix "t_ipi explosion" bug'), the CCID-3 window counter
computation was fixed to cope with RTTs < 4 microseconds.

Such RTTs can be found e.g. when running CCID-3 over loopback. The fix removed
a check against RTT < 4, but introduced a divide-by-zero bug.

All steady-state RTTs in DCCP are filtered using dccp_sample_rtt(), which
ensures non-zero samples. However, a zero RTT is possible on initialisation,
when there is no RTT sample from the Request/Response exchange.

The fix is to use the fallback-RTT from RFC 4340, 3.4.

This is also better than just fixing update_win_count() since it allows other
parts of the code to always assume that the RTT is non-zero during the time
that the CCID is used.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/ccid3.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index f813077234b..0474f4c5707 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -329,8 +329,14 @@ static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb)
 			hctx->ccid3hctx_x    = rfc3390_initial_rate(sk);
 			hctx->ccid3hctx_t_ld = now;
 		} else {
-			/* Sender does not have RTT sample: X_pps = 1 pkt/sec */
-			hctx->ccid3hctx_x = hctx->ccid3hctx_s;
+			/*
+			 * Sender does not have RTT sample:
+			 * - set fallback RTT (RFC 4340, 3.4) since a RTT value
+			 *   is needed in several parts (e.g.  window counter);
+			 * - set sending rate X_pps = 1pps as per RFC 3448, 4.2.
+			 */
+			hctx->ccid3hctx_rtt = DCCP_FALLBACK_RTT;
+			hctx->ccid3hctx_x   = hctx->ccid3hctx_s;
 			hctx->ccid3hctx_x <<= 6;
 		}
 		ccid3_update_send_interval(hctx);
-- 
cgit v1.2.3


From 1e2f0e5e8376f2a0ada8760fc9d3104e1a81382b Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 11 Jun 2008 11:19:09 +0100
Subject: dccp: Fix sparse warnings

This patch fixes the following sparse warnings:
 * nested min(max()) expression:
   net/dccp/ccids/ccid3.c:91:21: warning: symbol '__x' shadows an earlier one
   net/dccp/ccids/ccid3.c:91:21: warning: symbol '__y' shadows an earlier one

 * Declaration of function prototypes in .c instead of .h file, resulting in
   "should it be static?" warnings.

 * Declared "struct dccpw" static (local to dccp_probe).

 * Disabled dccp_delayed_ack() - not fully removed due to RFC 4340, 11.3
   ("Receivers SHOULD implement delayed acknowledgement timers ...").

 * Used a different local variable name to avoid
   net/dccp/ackvec.c:293:13: warning: symbol 'state' shadows an earlier one
   net/dccp/ackvec.c:238:33: originally declared here

 * Removed unused functions `dccp_ackvector_print' and `dccp_ackvec_print'.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ackvec.c         | 29 ++---------------------------
 net/dccp/ccids/ccid3.c    |  4 ++--
 net/dccp/ccids/lib/tfrc.c |  8 --------
 net/dccp/ccids/lib/tfrc.h | 11 +++++++++--
 net/dccp/output.c         |  2 ++
 net/dccp/probe.c          |  2 +-
 6 files changed, 16 insertions(+), 40 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ackvec.c b/net/dccp/ackvec.c
index 6de4bd195d2..1e8be246ad1 100644
--- a/net/dccp/ackvec.c
+++ b/net/dccp/ackvec.c
@@ -290,12 +290,12 @@ int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
 
 		while (1) {
 			const u8 len = dccp_ackvec_len(av, index);
-			const u8 state = dccp_ackvec_state(av, index);
+			const u8 av_state = dccp_ackvec_state(av, index);
 			/*
 			 * valid packets not yet in av_buf have a reserved
 			 * entry, with a len equal to 0.
 			 */
-			if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
+			if (av_state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
 			    len == 0 && delta == 0) { /* Found our
 							 reserved seat! */
 				dccp_pr_debug("Found %llu reserved seat!\n",
@@ -325,31 +325,6 @@ out_duplicate:
 	return -EILSEQ;
 }
 
-#ifdef CONFIG_IP_DCCP_DEBUG
-void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
-{
-	dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len,
-			 (unsigned long long)ackno);
-
-	while (len--) {
-		const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
-		const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
-
-		dccp_pr_debug_cat("%d,%d|", state, rl);
-		++vector;
-	}
-
-	dccp_pr_debug_cat("\n");
-}
-
-void dccp_ackvec_print(const struct dccp_ackvec *av)
-{
-	dccp_ackvector_print(av->av_buf_ackno,
-			     av->av_buf + av->av_buf_head,
-			     av->av_vec_len);
-}
-#endif
-
 static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
 				     struct dccp_ackvec_record *avr)
 {
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
index 0474f4c5707..a1929f33d70 100644
--- a/net/dccp/ccids/ccid3.c
+++ b/net/dccp/ccids/ccid3.c
@@ -159,8 +159,8 @@ static void ccid3_hc_tx_update_x(struct sock *sk, ktime_t *stamp)
 	} else if (ktime_us_delta(now, hctx->ccid3hctx_t_ld)
 				- (s64)hctx->ccid3hctx_rtt >= 0) {
 
-		hctx->ccid3hctx_x =
-			max(min(2 * hctx->ccid3hctx_x, min_rate),
+		hctx->ccid3hctx_x = min(2 * hctx->ccid3hctx_x, min_rate);
+		hctx->ccid3hctx_x = max(hctx->ccid3hctx_x,
 			    scaled_div(((__u64)hctx->ccid3hctx_s) << 6,
 				       hctx->ccid3hctx_rtt));
 		hctx->ccid3hctx_t_ld = now;
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index d1dfbb8de64..97ecec0a8e7 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -14,14 +14,6 @@ module_param(tfrc_debug, bool, 0444);
 MODULE_PARM_DESC(tfrc_debug, "Enable debug messages");
 #endif
 
-extern int  tfrc_tx_packet_history_init(void);
-extern void tfrc_tx_packet_history_exit(void);
-extern int  tfrc_rx_packet_history_init(void);
-extern void tfrc_rx_packet_history_exit(void);
-
-extern int  tfrc_li_init(void);
-extern void tfrc_li_exit(void);
-
 static int __init tfrc_module_init(void)
 {
 	int rc = tfrc_li_init();
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index 1fb1187bbf1..ddd8107b927 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -58,7 +58,14 @@ static inline u32 tfrc_ewma(const u32 avg, const u32 newval, const u8 weight)
 	return avg ? (weight * avg + (10 - weight) * newval) / 10 : newval;
 }
 
-extern u32 tfrc_calc_x(u16 s, u32 R, u32 p);
-extern u32 tfrc_calc_x_reverse_lookup(u32 fvalue);
+extern u32  tfrc_calc_x(u16 s, u32 R, u32 p);
+extern u32  tfrc_calc_x_reverse_lookup(u32 fvalue);
 
+extern int  tfrc_tx_packet_history_init(void);
+extern void tfrc_tx_packet_history_exit(void);
+extern int  tfrc_rx_packet_history_init(void);
+extern void tfrc_rx_packet_history_exit(void);
+
+extern int  tfrc_li_init(void);
+extern void tfrc_li_exit(void);
 #endif /* _TFRC_H_ */
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 1f8a9b64c08..fe20068c5d8 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -508,6 +508,7 @@ void dccp_send_ack(struct sock *sk)
 
 EXPORT_SYMBOL_GPL(dccp_send_ack);
 
+#if 0
 /* FIXME: Is this still necessary (11.3) - currently nowhere used by DCCP. */
 void dccp_send_delayed_ack(struct sock *sk)
 {
@@ -538,6 +539,7 @@ void dccp_send_delayed_ack(struct sock *sk)
 	icsk->icsk_ack.timeout = timeout;
 	sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
 }
+#endif
 
 void dccp_send_sync(struct sock *sk, const u64 ackno,
 		    const enum dccp_pkt_type pkt_type)
diff --git a/net/dccp/probe.c b/net/dccp/probe.c
index 0bcdc925027..81368a7f537 100644
--- a/net/dccp/probe.c
+++ b/net/dccp/probe.c
@@ -42,7 +42,7 @@ static int bufsize = 64 * 1024;
 
 static const char procname[] = "dccpprobe";
 
-struct {
+static struct {
 	struct kfifo	  *fifo;
 	spinlock_t	  lock;
 	wait_queue_head_t wait;
-- 
cgit v1.2.3


From 65907a433ac0ca450c4408080f24c6e4743386b2 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 11 Jun 2008 11:19:09 +0100
Subject: dccp ccid-2: Bug-Fix - Ack Vectors need to be ignored on request
 sockets

This fixes an oversight from an earlier patch, ensuring that Ack Vectors
are not processed on request sockets.

The issue is that Ack Vectors must not be parsed on request sockets, since
the Ack Vector feature depends on the selection of the (TX) CCID. During the
initial handshake the CCIDs are undefined, and so RFC 4340, 10.3 applies:

 "Using CCID-specific options and feature options during a negotiation
  for the corresponding CCID feature is NOT RECOMMENDED [...]"

And it is not even possible: when the server receives the Request from the
client, the CCID and Ack vector features are undefined; when the Ack finalising
the 3-way hanshake arrives, the request socket has not been cloned yet into a
full socket. (This order is necessary, since otherwise the newly created socket
would have to be destroyed whenever an option error occurred - a malicious
hacker could simply send garbage options and exploit this.)

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/options.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/dccp/options.c b/net/dccp/options.c
index d2a84a2fece..43bc24e761d 100644
--- a/net/dccp/options.c
+++ b/net/dccp/options.c
@@ -107,9 +107,11 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq,
 		 *
 		 * CCID-specific options are ignored during connection setup, as
 		 * negotiation may still be in progress (see RFC 4340, 10.3).
+		 * The same applies to Ack Vectors, as these depend on the CCID.
 		 *
 		 */
-		if (dreq != NULL && opt >= 128)
+		if (dreq != NULL && (opt >= 128 ||
+		    opt == DCCPO_ACK_VECTOR_0 || opt == DCCPO_ACK_VECTOR_1))
 			goto ignore_option;
 
 		switch (opt) {
-- 
cgit v1.2.3


From 1e8a287c79f64226541f5c44aa52d4698bb84cf5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 11 Jun 2008 11:19:10 +0100
Subject: dccp ccid-3: TFRC reverse-lookup Bug-Fix

This fixes a bug in the reverse lookup of p: given a value f(p), instead of p,
the function returned the smallest tabulated value f(p).

The smallest tabulated value of

   10^6 * f(p) =  sqrt(2*p/3) + 12 * sqrt(3*p/8) * (32 * p^3 + p)

for p=0.0001 is 8172.

Since this value is scaled by 10^6, the outcome of this bug is that a loss
of 8172/10^6 = 0.8172% was reported whenever the input was below the table
resolution of 0.01%.

This means that the value was over 80 times too high, resulting in large spikes
of the initial loss interval, thus unnecessarily reducing the throughput.

Also corrected the printk format (%u for u32).

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/lib/tfrc_equation.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/tfrc_equation.c b/net/dccp/ccids/lib/tfrc_equation.c
index e4e64b76c10..2f20a29cffe 100644
--- a/net/dccp/ccids/lib/tfrc_equation.c
+++ b/net/dccp/ccids/lib/tfrc_equation.c
@@ -661,7 +661,7 @@ u32 tfrc_calc_x(u16 s, u32 R, u32 p)
 
 EXPORT_SYMBOL_GPL(tfrc_calc_x);
 
-/*
+/**
  *  tfrc_calc_x_reverse_lookup  -  try to find p given f(p)
  *
  *  @fvalue: function value to match, scaled by 1000000
@@ -676,11 +676,11 @@ u32 tfrc_calc_x_reverse_lookup(u32 fvalue)
 
 	/* Error cases. */
 	if (fvalue < tfrc_calc_x_lookup[0][1]) {
-		DCCP_WARN("fvalue %d smaller than resolution\n", fvalue);
-		return tfrc_calc_x_lookup[0][1];
+		DCCP_WARN("fvalue %u smaller than resolution\n", fvalue);
+		return TFRC_SMALLEST_P;
 	}
 	if (fvalue > tfrc_calc_x_lookup[TFRC_CALC_X_ARRSIZE - 1][0]) {
-		DCCP_WARN("fvalue %d exceeds bounds!\n", fvalue);
+		DCCP_WARN("fvalue %u exceeds bounds!\n", fvalue);
 		return 1000000;
 	}
 
-- 
cgit v1.2.3


From 7deb0f851003287d7e259bf6b33548b144c0f2d5 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 11 Jun 2008 11:19:10 +0100
Subject: dccp ccid-3: X truncated due to type conversion

This fixes a bug in computing the inter-packet-interval t_ipi = s/X:

 scaled_div32(a, b) uses u32 for b, but in "scaled_div32(s, X)" the type of the
 sending rate `X' is u64. Since X is scaled by 2^6, this truncates rates greater
 than 2^26 Bps (~537 Mbps).

Using full 64-bit division now.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/ccids/lib/tfrc.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'net')

diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index ddd8107b927..ed9857527ac 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -15,7 +15,7 @@
  *  (at your option) any later version.
  */
 #include <linux/types.h>
-#include <asm/div64.h>
+#include <linux/math64.h>
 #include "../../dccp.h"
 /* internal includes that this module exports: */
 #include "loss_interval.h"
@@ -29,21 +29,19 @@ extern int tfrc_debug;
 #endif
 
 /* integer-arithmetic divisions of type (a * 1000000)/b */
-static inline u64 scaled_div(u64 a, u32 b)
+static inline u64 scaled_div(u64 a, u64 b)
 {
 	BUG_ON(b==0);
-	a *= 1000000;
-	do_div(a, b);
-	return a;
+	return div64_u64(a * 1000000, b);
 }
 
-static inline u32 scaled_div32(u64 a, u32 b)
+static inline u32 scaled_div32(u64 a, u64 b)
 {
 	u64 result = scaled_div(a, b);
 
 	if (result > UINT_MAX) {
-		DCCP_CRIT("Overflow: a(%llu)/b(%u) > ~0U",
-			  (unsigned long long)a, b);
+		DCCP_CRIT("Overflow: %llu/%llu > UINT_MAX",
+			  (unsigned long long)a, (unsigned long long)b);
 		return UINT_MAX;
 	}
 	return result;
-- 
cgit v1.2.3


From be4c798a41bf626cdaacf96c382f116ed2f7dbe9 Mon Sep 17 00:00:00 2001
From: Gerrit Renker <gerrit@erg.abdn.ac.uk>
Date: Wed, 11 Jun 2008 11:19:10 +0100
Subject: dccp: Bug in initial acknowledgment number assignment

Step 8.5 in RFC 4340 says for the newly cloned socket

           Initialize S.GAR := S.ISS,

but what in fact the code (minisocks.c) does is

           Initialize S.GAR := S.ISR,

which is wrong (typo?) -- fixed by the patch.

Signed-off-by: Gerrit Renker <gerrit@erg.abdn.ac.uk>
---
 net/dccp/minisocks.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 33ad48321b0..66dca5bba85 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -165,12 +165,12 @@ out_free:
 		/* See dccp_v4_conn_request */
 		newdmsk->dccpms_sequence_window = req->rcv_wnd;
 
-		newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
-		dccp_update_gsr(newsk, dreq->dreq_isr);
-
-		newdp->dccps_iss = dreq->dreq_iss;
+		newdp->dccps_gar = newdp->dccps_iss = dreq->dreq_iss;
 		dccp_update_gss(newsk, dreq->dreq_iss);
 
+		newdp->dccps_isr = dreq->dreq_isr;
+		dccp_update_gsr(newsk, dreq->dreq_isr);
+
 		/*
 		 * SWL and AWL are initially adjusted so that they are not less than
 		 * the initial Sequence Numbers received and sent, respectively:
-- 
cgit v1.2.3


From 20c61fbd8deb2ada0ac3acecf6156a986dbfff2d Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 28 Apr 2008 14:40:55 +0900
Subject: ipv6 mcast: Check address family of gf_group in
 getsockopt(MS_FILTER).

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ipv6_sockglue.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 26b83e512a0..ce794d6acb7 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -874,6 +874,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			return -EINVAL;
 		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
 			return -EFAULT;
+		if (gsf.gf_group.ss_family != AF_INET6)
+			return -EADDRNOTAVAIL;
 		lock_sock(sk);
 		err = ip6_mc_msfget(sk, &gsf,
 			(struct group_filter __user *)optval, optlen);
-- 
cgit v1.2.3


From 36e3deae8ba84865fd9eb3f2f21bbc00d49b7544 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 13 May 2008 02:52:55 +0900
Subject: ipv6 route: Fix route lifetime in netlink message.

1) We may have route lifetime larger than INT_MAX.
In that case we had wired value in lifetime.
Use INT_MAX if lifetime does not fit in s32.

2) Lifetime is valid iif RTF_EXPIRES is set.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/route.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 220cffe9e63..d1f3e19b06c 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2196,8 +2196,12 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 
 	NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 
-	expires = (rt->rt6i_flags & RTF_EXPIRES) ?
-			rt->rt6i_expires - jiffies : 0;
+	if (!(rt->rt6i_flags & RTF_EXPIRES))
+		expires = 0;
+	else if (rt->rt6i_expires - jiffies < INT_MAX)
+		expires = rt->rt6i_expires - jiffies;
+	else
+		expires = INT_MAX;
 
 	if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
 			       expires, rt->u.dst.error) < 0)
-- 
cgit v1.2.3


From e8766fc86b34d44a8c55a2f9d71da69e091b1ca4 Mon Sep 17 00:00:00 2001
From: Shan Wei <shanwei@cn.fujitsu.com>
Date: Tue, 10 Jun 2008 15:50:55 +0800
Subject: ipv6: Check the hop limit setting in ancillary data.

When specifing the outgoing hop limit as ancillary data for sendmsg(),
the kernel doesn't check the integer hop limit value as specified in
[RFC-3542] section 6.3.

Signed-off-by: Shan Wei <shanwei@cn.fujitsu.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/datagram.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index b9c2de84a8a..0f0f94a4033 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -705,6 +705,11 @@ int datagram_send_ctl(struct net *net,
 			}
 
 			*hlimit = *(int *)CMSG_DATA(cmsg);
+			if (*hlimit < -1 || *hlimit > 0xff) {
+				err = -EINVAL;
+				goto exit_f;
+			}
+
 			break;
 
 		case IPV6_TCLASS:
-- 
cgit v1.2.3


From 28d4488216645cd71402925cffde9528b0cfdb7e Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 12 Jun 2008 03:14:51 +0900
Subject: ipv6: Check IPV6_MULTICAST_LOOP option value.

Only 0 and 1 are valid for IPV6_MULTICAST_LOOP socket option,
and we should return an error of EINVAL otherwise, per RFC3493.

Based on patch from Shan Wei <shanwei@cn.fujitsu.com>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ipv6_sockglue.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index ce794d6acb7..9a3697172d5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -458,6 +458,8 @@ done:
 	case IPV6_MULTICAST_LOOP:
 		if (optlen < sizeof(int))
 			goto e_inval;
+		if (val != valbool)
+			goto e_inval;
 		np->mc_loop = valbool;
 		retv = 0;
 		break;
-- 
cgit v1.2.3


From 1717699cd5130009b7cd6756e883d8582c1fe706 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 12 Jun 2008 03:27:26 +0900
Subject: ipv6: Fail with appropriate error code when setting not-applicable
 sockopt.

IPV6_MULTICAST_HOPS, for example, is not valid for stream sockets.
Since they are virtually unavailable for stream sockets,
we should return ENOPROTOOPT instead of EINVAL.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/ipv6_sockglue.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9a3697172d5..c042ce19bd1 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -67,7 +67,7 @@ int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *))
 
 	/* RA packet may be delivered ONLY to IPPROTO_RAW socket */
 	if (sk->sk_type != SOCK_RAW || inet_sk(sk)->num != IPPROTO_RAW)
-		return -EINVAL;
+		return -ENOPROTOOPT;
 
 	new_ra = (sel>=0) ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL;
 
@@ -446,7 +446,7 @@ done:
 
 	case IPV6_MULTICAST_HOPS:
 		if (sk->sk_type == SOCK_STREAM)
-			goto e_inval;
+			break;
 		if (optlen < sizeof(int))
 			goto e_inval;
 		if (val > 255 || val < -1)
@@ -466,7 +466,7 @@ done:
 
 	case IPV6_MULTICAST_IF:
 		if (sk->sk_type == SOCK_STREAM)
-			goto e_inval;
+			break;
 		if (optlen < sizeof(int))
 			goto e_inval;
 
@@ -862,7 +862,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		if (sk->sk_protocol != IPPROTO_UDP &&
 		    sk->sk_protocol != IPPROTO_UDPLITE &&
 		    sk->sk_protocol != IPPROTO_TCP)
-			return -EINVAL;
+			return -ENOPROTOOPT;
 		if (sk->sk_state != TCP_ESTABLISHED)
 			return -ENOTCONN;
 		val = sk->sk_family;
-- 
cgit v1.2.3


From b66985b11b8b00e1ec65b89a3112510ac9a9ec6e Mon Sep 17 00:00:00 2001
From: Eric Leblond <eric@inl.fr>
Date: Wed, 11 Jun 2008 17:50:27 -0700
Subject: netfilter: Make nflog quiet when no one listen in userspace.

The message "nf_log_packet: can't log since no backend logging module loaded
in! Please either load one, or disable logging explicitly" was displayed for
each logged packet when no userspace application is listening to nflog events.
The message seems to warn for a problem with a kernel module missing but as
said before this is not the case. I thus propose to suppress the message (I
don't see any reason to flood the log because a user application has crashed.)

Signed-off-by: Eric Leblond <eric@inl.fr>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_log.c | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index bc11d709203..9fda6ee95a3 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -92,10 +92,6 @@ void nf_log_packet(int pf,
 		vsnprintf(prefix, sizeof(prefix), fmt, args);
 		va_end(args);
 		logger->logfn(pf, hooknum, skb, in, out, loginfo, prefix);
-	} else if (net_ratelimit()) {
-		printk(KERN_WARNING "nf_log_packet: can\'t log since "
-		       "no backend logging module loaded in! Please either "
-		       "load one, or disable logging explicitly\n");
 	}
 	rcu_read_unlock();
 }
-- 
cgit v1.2.3


From ceeff7541e5a4ba8e8d97ffbae32b3f283cb7a3f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 11 Jun 2008 17:51:10 -0700
Subject: netfilter: nf_conntrack: fix ctnetlink related crash in
 nf_nat_setup_info()

When creation of a new conntrack entry in ctnetlink fails after having
set up the NAT mappings, the conntrack has an extension area allocated
that is not getting properly destroyed when freeing the conntrack again.
This means the NAT extension is still in the bysource hash, causing a
crash when walking over the hash chain the next time:

BUG: unable to handle kernel paging request at 00120fbd
IP: [<c03d394b>] nf_nat_setup_info+0x221/0x58a
*pde = 00000000
Oops: 0000 [#1] PREEMPT SMP

Pid: 2795, comm: conntrackd Not tainted (2.6.26-rc5 #1)
EIP: 0060:[<c03d394b>] EFLAGS: 00010206 CPU: 1
EIP is at nf_nat_setup_info+0x221/0x58a
EAX: 00120fbd EBX: 00120fbd ECX: 00000001 EDX: 00000000
ESI: 0000019e EDI: e853bbb4 EBP: e853bbc8 ESP: e853bb78
 DS: 007b ES: 007b FS: 00d8 GS: 0033 SS: 0068
Process conntrackd (pid: 2795, ti=e853a000 task=f7de10f0 task.ti=e853a000)
Stack: 00000000 e853bc2c e85672ec 00000008 c0561084 63c1db4a 00000000 00000000
       00000000 0002e109 61d2b1c3 00000000 00000000 00000000 01114e22 61d2b1c3
       00000000 00000000 f7444674 e853bc04 00000008 c038e728 0000000a f7444674
Call Trace:
 [<c038e728>] nla_parse+0x5c/0xb0
 [<c0397c1b>] ctnetlink_change_status+0x190/0x1c6
 [<c0397eec>] ctnetlink_new_conntrack+0x189/0x61f
 [<c0119aee>] update_curr+0x3d/0x52
 [<c03902d1>] nfnetlink_rcv_msg+0xc1/0xd8
 [<c0390228>] nfnetlink_rcv_msg+0x18/0xd8
 [<c0390210>] nfnetlink_rcv_msg+0x0/0xd8
 [<c038d2ce>] netlink_rcv_skb+0x2d/0x71
 [<c0390205>] nfnetlink_rcv+0x19/0x24
 [<c038d0f5>] netlink_unicast+0x1b3/0x216
 ...

Move invocation of the extension destructors to nf_conntrack_free()
to fix this problem.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=10875

Reported-and-Tested-by: Krzysztof Piotr Oledzki <ole@ans.pl>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_conntrack_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4b1799da5d..662c1ccfee2 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -196,8 +196,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	if (l4proto && l4proto->destroy)
 		l4proto->destroy(ct);
 
-	nf_ct_ext_destroy(ct);
-
 	rcu_read_unlock();
 
 	spin_lock_bh(&nf_conntrack_lock);
@@ -520,6 +518,7 @@ static void nf_conntrack_free_rcu(struct rcu_head *head)
 
 void nf_conntrack_free(struct nf_conn *ct)
 {
+	nf_ct_ext_destroy(ct);
 	call_rcu(&ct->rcu, nf_conntrack_free_rcu);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
-- 
cgit v1.2.3


From f23d60de719e639690b2dc5c2d0e4243ff614b7a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jun 2008 14:47:58 -0700
Subject: ipv6: Fix duplicate initialization of rawv6_prot.destroy

In changeset 22dd485022f3d0b162ceb5e67d85de7c3806aa20
("raw: Raw socket leak.") code was added so that we
flush pending frames on raw sockets to avoid leaks.

The ipv4 part was fine, but the ipv6 part was not
done correctly.  Unlike the ipv4 side, the ipv6 code
already has a .destroy method for rawv6_prot.

So now there were two assignments to this member, and
what the compiler does is use the last one, effectively
making the ipv6 parts of that changeset a NOP.

Fix this by removing the:

	.destroy	   = inet6_destroy_sock,

line, and adding an inet6_destroy_sock() call to the
end of raw6_destroy().

Noticed by Al Viro.

Signed-off-by: David S. Miller <davem@davemloft.net>
Acked-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 net/ipv6/raw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 8fee9a15b2d..3aee12310d9 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1169,7 +1169,8 @@ static int raw6_destroy(struct sock *sk)
 	lock_sock(sk);
 	ip6_flush_pending_frames(sk);
 	release_sock(sk);
-	return 0;
+
+	return inet6_destroy_sock(sk);
 }
 
 static int rawv6_init_sk(struct sock *sk)
@@ -1200,7 +1201,6 @@ struct proto rawv6_prot = {
 	.disconnect	   = udp_disconnect,
 	.ioctl		   = rawv6_ioctl,
 	.init		   = rawv6_init_sk,
-	.destroy	   = inet6_destroy_sock,
 	.setsockopt	   = rawv6_setsockopt,
 	.getsockopt	   = rawv6_getsockopt,
 	.sendmsg	   = rawv6_sendmsg,
-- 
cgit v1.2.3


From ec0a196626bd12e0ba108d7daa6d95a4fb25c2c5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jun 2008 16:31:35 -0700
Subject: tcp: Revert 'process defer accept as established' changes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts two changesets, ec3c0982a2dd1e671bad8e9d26c28dcba0039d87
("[TCP]: TCP_DEFER_ACCEPT updates - process as established") and
the follow-on bug fix 9ae27e0adbf471c7a6b80102e38e1d5a346b3b38
("tcp: Fix slab corruption with ipv6 and tcp6fuzz").

This change causes several problems, first reported by Ingo Molnar
as a distcc-over-loopback regression where connections were getting
stuck.

Ilpo Järvinen first spotted the locking problems.  The new function
added by this code, tcp_defer_accept_check(), only has the
child socket locked, yet it is modifying state of the parent
listening socket.

Fixing that is non-trivial at best, because we can't simply just grab
the parent listening socket lock at this point, because it would
create an ABBA deadlock.  The normal ordering is parent listening
socket --> child socket, but this code path would require the
reverse lock ordering.

Next is a problem noticed by Vitaliy Gusev, he noted:

----------------------------------------
>--- a/net/ipv4/tcp_timer.c
>+++ b/net/ipv4/tcp_timer.c
>@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data)
> 		goto death;
> 	}
>
>+	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
>+		tcp_send_active_reset(sk, GFP_ATOMIC);
>+		goto death;

Here socket sk is not attached to listening socket's request queue. tcp_done()
will not call inet_csk_destroy_sock() (and tcp_v4_destroy_sock() which should
release this sk) as socket is not DEAD. Therefore socket sk will be lost for
freeing.
----------------------------------------

Finally, Alexey Kuznetsov argues that there might not even be any
real value or advantage to these new semantics even if we fix all
of the bugs:

----------------------------------------
Hiding from accept() sockets with only out-of-order data only
is the only thing which is impossible with old approach. Is this really
so valuable? My opinion: no, this is nothing but a new loophole
to consume memory without control.
----------------------------------------

So revert this thing for now.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_connection_sock.c | 11 +++++++---
 net/ipv4/tcp.c                  | 18 ++++++++++-------
 net/ipv4/tcp_input.c            | 45 -----------------------------------------
 net/ipv4/tcp_ipv4.c             |  8 --------
 net/ipv4/tcp_minisocks.c        | 32 +++++++++++------------------
 net/ipv4/tcp_timer.c            |  5 -----
 6 files changed, 31 insertions(+), 88 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff2..045e799d3e1 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 	struct inet_connection_sock *icsk = inet_csk(parent);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 	struct listen_sock *lopt = queue->listen_opt;
-	int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int thresh = max_retries;
 	unsigned long now = jiffies;
 	struct request_sock **reqp, *req;
 	int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		}
 	}
 
+	if (queue->rskq_defer_accept)
+		max_retries = queue->rskq_defer_accept;
+
 	budget = 2 * (lopt->nr_table_entries / (timeout / interval));
 	i = lopt->clock_hand;
 
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		reqp=&lopt->syn_table[i];
 		while ((req = *reqp) != NULL) {
 			if (time_after_eq(now, req->expires)) {
-				if (req->retrans < thresh &&
-				    !req->rsk_ops->rtx_syn_ack(parent, req)) {
+				if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) &&
+				    (inet_rsk(req)->acked ||
+				     !req->rsk_ops->rtx_syn_ack(parent, req))) {
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab66683b804..fc54a48fde1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2112,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_DEFER_ACCEPT:
-		if (val < 0) {
-			err = -EINVAL;
-		} else {
-			if (val > MAX_TCP_ACCEPT_DEFERRED)
-				val = MAX_TCP_ACCEPT_DEFERRED;
-			icsk->icsk_accept_queue.rskq_defer_accept = val;
+		icsk->icsk_accept_queue.rskq_defer_accept = 0;
+		if (val > 0) {
+			/* Translate value in seconds to number of
+			 * retransmits */
+			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
+			       val > ((TCP_TIMEOUT_INIT / HZ) <<
+				       icsk->icsk_accept_queue.rskq_defer_accept))
+				icsk->icsk_accept_queue.rskq_defer_accept++;
+			icsk->icsk_accept_queue.rskq_defer_accept++;
 		}
 		break;
 
@@ -2299,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
 		break;
 	case TCP_DEFER_ACCEPT:
-		val = icsk->icsk_accept_queue.rskq_defer_accept;
+		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
+			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
 		break;
 	case TCP_WINDOW_CLAMP:
 		val = tp->window_clamp;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eba873e9b56..cad73b7dfef 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4541,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
 	}
 }
 
-static int tcp_defer_accept_check(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (tp->defer_tcp_accept.request) {
-		int queued_data =  tp->rcv_nxt - tp->copied_seq;
-		int hasfin =  !skb_queue_empty(&sk->sk_receive_queue) ?
-			tcp_hdr((struct sk_buff *)
-				sk->sk_receive_queue.prev)->fin : 0;
-
-		if (queued_data && hasfin)
-			queued_data--;
-
-		if (queued_data &&
-		    tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
-			if (sock_flag(sk, SOCK_KEEPOPEN)) {
-				inet_csk_reset_keepalive_timer(sk,
-							       keepalive_time_when(tp));
-			} else {
-				inet_csk_delete_keepalive_timer(sk);
-			}
-
-			inet_csk_reqsk_queue_add(
-				tp->defer_tcp_accept.listen_sk,
-				tp->defer_tcp_accept.request,
-				sk);
-
-			tp->defer_tcp_accept.listen_sk->sk_data_ready(
-				tp->defer_tcp_accept.listen_sk, 0);
-
-			sock_put(tp->defer_tcp_accept.listen_sk);
-			sock_put(sk);
-			tp->defer_tcp_accept.listen_sk = NULL;
-			tp->defer_tcp_accept.request = NULL;
-		} else if (hasfin ||
-			   tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
-			tcp_reset(sk);
-			return -1;
-		}
-	}
-	return 0;
-}
-
 static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4944,8 +4901,6 @@ step5:
 
 	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
-
-	tcp_defer_accept_check(sk);
 	return 0;
 
 csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4f8485c67d1..97a230026e1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1918,14 +1918,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
-	if (tp->defer_tcp_accept.request) {
-		reqsk_free(tp->defer_tcp_accept.request);
-		sock_put(tp->defer_tcp_accept.listen_sk);
-		sock_put(sk);
-		tp->defer_tcp_accept.listen_sk = NULL;
-		tp->defer_tcp_accept.request = NULL;
-	}
-
 	atomic_dec(&tcp_sockets_allocated);
 
 	return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5c..8245247a6ce 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	   does sequence test, SYN is truncated, and thus we consider
 	   it a bare ACK.
 
-	   Both ends (listening sockets) accept the new incoming
-	   connection and try to talk to each other. 8-)
+	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
+	   bare ACK.  Otherwise, we create an established connection.  Both
+	   ends (listening sockets) accept the new incoming connection and try
+	   to talk to each other. 8-)
 
 	   Note: This case is both harmless, and rare.  Possibility is about the
 	   same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		if (!(flg & TCP_FLAG_ACK))
 			return NULL;
 
+		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+			inet_rsk(req)->acked = 1;
+			return NULL;
+		}
+
 		/* OK, ACK is valid, create big socket and
 		 * feed this segment to it. It will repeat all
 		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		inet_csk_reqsk_queue_unlink(sk, req, prev);
 		inet_csk_reqsk_queue_removed(sk, req);
 
-		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
-		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
-
-			/* the accept queue handling is done is est recv slow
-			 * path so lets make sure to start there
-			 */
-			tcp_sk(child)->pred_flags = 0;
-			sock_hold(sk);
-			sock_hold(child);
-			tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
-			tcp_sk(child)->defer_tcp_accept.request = req;
-
-			inet_csk_reset_keepalive_timer(child,
-						       inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
-		} else {
-			inet_csk_reqsk_queue_add(sk, req, child);
-		}
-
+		inet_csk_reqsk_queue_add(sk, req, child);
 		return child;
 
 	listen_overflow:
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2a..63ed9d6830e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
 		goto death;
 	}
 
-	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
-		tcp_send_active_reset(sk, GFP_ATOMIC);
-		goto death;
-	}
-
 	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 		goto out;
 
-- 
cgit v1.2.3