From e6c654c9c09258dc0f82d1baa9ce69aa68bc735e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 27 Jan 2017 08:11:44 -0800
Subject: can: Fix kernel panic at security_sock_rcv_skb

[ Upstream commit f1712c73714088a7252d276a57126d56c7d37e64 ]

Zhang Yanmin reported crashes [1] and provided a patch adding a
synchronize_rcu() call in can_rx_unregister()

The main problem seems that the sockets themselves are not RCU
protected.

If CAN uses RCU for delivery, then sockets should be freed only after
one RCU grace period.

Recent kernels could use sock_set_flag(sk, SOCK_RCU_FREE), but let's
ease stable backports with the following fix instead.

[1]
BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [<ffffffff81495e25>] selinux_socket_sock_rcv_skb+0x65/0x2a0

Call Trace:
 <IRQ>
 [<ffffffff81485d8c>] security_sock_rcv_skb+0x4c/0x60
 [<ffffffff81d55771>] sk_filter+0x41/0x210
 [<ffffffff81d12913>] sock_queue_rcv_skb+0x53/0x3a0
 [<ffffffff81f0a2b3>] raw_rcv+0x2a3/0x3c0
 [<ffffffff81f06eab>] can_rcv_filter+0x12b/0x370
 [<ffffffff81f07af9>] can_receive+0xd9/0x120
 [<ffffffff81f07beb>] can_rcv+0xab/0x100
 [<ffffffff81d362ac>] __netif_receive_skb_core+0xd8c/0x11f0
 [<ffffffff81d36734>] __netif_receive_skb+0x24/0xb0
 [<ffffffff81d37f67>] process_backlog+0x127/0x280
 [<ffffffff81d36f7b>] net_rx_action+0x33b/0x4f0
 [<ffffffff810c88d4>] __do_softirq+0x184/0x440
 [<ffffffff81f9e86c>] do_softirq_own_stack+0x1c/0x30
 <EOI>
 [<ffffffff810c76fb>] do_softirq.part.18+0x3b/0x40
 [<ffffffff810c8bed>] do_softirq+0x1d/0x20
 [<ffffffff81d30085>] netif_rx_ni+0xe5/0x110
 [<ffffffff8199cc87>] slcan_receive_buf+0x507/0x520
 [<ffffffff8167ef7c>] flush_to_ldisc+0x21c/0x230
 [<ffffffff810e3baf>] process_one_work+0x24f/0x670
 [<ffffffff810e44ed>] worker_thread+0x9d/0x6f0
 [<ffffffff810e4450>] ? rescuer_thread+0x480/0x480
 [<ffffffff810ebafc>] kthread+0x12c/0x150
 [<ffffffff81f9ccef>] ret_from_fork+0x3f/0x70

Reported-by: Zhang Yanmin <yanmin.zhang@intel.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/can/af_can.c | 12 ++++++++++--
 net/can/af_can.h |  3 ++-
 net/can/bcm.c    |  4 ++--
 net/can/gw.c     |  2 +-
 net/can/raw.c    |  4 ++--
 5 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'net')
diff --git a/net/can/af_can.c b/net/can/af_can.c
index 166d436196c1..928f58064098 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -445,6 +445,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
  * @func: callback function on filter match
  * @data: returned parameter for callback function
  * @ident: string for calling module identification
+ * @sk: socket pointer (might be NULL)
  *
  * Description:
  *  Invokes the callback function with the received sk_buff and the given
@@ -468,7 +469,7 @@ static struct hlist_head *find_rcv_list(canid_t *can_id, canid_t *mask,
  */
 int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
 		    void (*func)(struct sk_buff *, void *), void *data,
-		    char *ident)
+		    char *ident, struct sock *sk)
 {
 	struct receiver *r;
 	struct hlist_head *rl;
@@ -496,6 +497,7 @@ int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask,
 		r->func    = func;
 		r->data    = data;
 		r->ident   = ident;
+		r->sk      = sk;
 
 		hlist_add_head_rcu(&r->list, rl);
 		d->entries++;
@@ -520,8 +522,11 @@ EXPORT_SYMBOL(can_rx_register);
 static void can_rx_delete_receiver(struct rcu_head *rp)
 {
 	struct receiver *r = container_of(rp, struct receiver, rcu);
+	struct sock *sk = r->sk;
 
 	kmem_cache_free(rcv_cache, r);
+	if (sk)
+		sock_put(sk);
 }
 
 /**
@@ -596,8 +601,11 @@ void can_rx_unregister(struct net_device *dev, canid_t can_id, canid_t mask,
 	spin_unlock(&can_rcvlists_lock);
 
 	/* schedule the receiver item for deletion */
-	if (r)
+	if (r) {
+		if (r->sk)
+			sock_hold(r->sk);
 		call_rcu(&r->rcu, can_rx_delete_receiver);
+	}
 }
 EXPORT_SYMBOL(can_rx_unregister);
 
diff --git a/net/can/af_can.h b/net/can/af_can.h
index fca0fe9fc45a..b86f5129e838 100644
--- a/net/can/af_can.h
+++ b/net/can/af_can.h
@@ -50,13 +50,14 @@
 
 struct receiver {
 	struct hlist_node list;
-	struct rcu_head rcu;
 	canid_t can_id;
 	canid_t mask;
 	unsigned long matches;
 	void (*func)(struct sk_buff *, void *);
 	void *data;
 	char *ident;
+	struct sock *sk;
+	struct rcu_head rcu;
 };
 
 #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 24d66c1cc0cd..4ccfd356baed 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1179,7 +1179,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 				err = can_rx_register(dev, op->can_id,
 						      REGMASK(op->can_id),
 						      bcm_rx_handler, op,
-						      "bcm");
+						      "bcm", sk);
 
 				op->rx_reg_dev = dev;
 				dev_put(dev);
@@ -1188,7 +1188,7 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		} else
 			err = can_rx_register(NULL, op->can_id,
 					      REGMASK(op->can_id),
-					      bcm_rx_handler, op, "bcm");
+					      bcm_rx_handler, op, "bcm", sk);
 		if (err) {
 			/* this bcm rx op is broken -> remove it */
 			list_del(&op->list);
diff --git a/net/can/gw.c b/net/can/gw.c
index 455168718c2e..77c8af4047ef 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -442,7 +442,7 @@ static inline int cgw_register_filter(struct cgw_job *gwj)
 {
 	return can_rx_register(gwj->src.dev, gwj->ccgw.filter.can_id,
 			       gwj->ccgw.filter.can_mask, can_can_gw_rcv,
-			       gwj, "gw");
+			       gwj, "gw", NULL);
 }
 
 static inline void cgw_unregister_filter(struct cgw_job *gwj)
diff --git a/net/can/raw.c b/net/can/raw.c
index 56af689ca999..e9403a26a1d5 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -190,7 +190,7 @@ static int raw_enable_filters(struct net_device *dev, struct sock *sk,
 	for (i = 0; i < count; i++) {
 		err = can_rx_register(dev, filter[i].can_id,
 				      filter[i].can_mask,
-				      raw_rcv, sk, "raw");
+				      raw_rcv, sk, "raw", sk);
 		if (err) {
 			/* clean up successfully registered filters */
 			while (--i >= 0)
@@ -211,7 +211,7 @@ static int raw_enable_errfilter(struct net_device *dev, struct sock *sk,
 
 	if (err_mask)
 		err = can_rx_register(dev, 0, err_mask | CAN_ERR_FLAG,
-				      raw_rcv, sk, "raw");
+				      raw_rcv, sk, "raw", sk);
 
 	return err;
 }
-- 
cgit v1.2.3


From 41e07a7e01d951cfd4c9a7dac90c921269d89513 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 23 Jan 2017 16:43:06 -0800
Subject: ipv6: fix ip6_tnl_parse_tlv_enc_lim()

[ Upstream commit fbfa743a9d2a0ffa24251764f10afc13eb21e739 ]

This function suffers from multiple issues.

First one is that pskb_may_pull() may reallocate skb->head,
so the 'raw' pointer needs either to be reloaded or not used at all.

Second issue is that NEXTHDR_DEST handling does not validate
that the options are present in skb->data, so we might read
garbage or access non existent memory.

With help from Willem de Bruijn.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov  <dvyukov@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_tunnel.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 2994d1f1a661..3fed666b95ae 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -479,18 +479,19 @@ ip6_tnl_dev_uninit(struct net_device *dev)
 
 __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 {
-	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw;
-	__u8 nexthdr = ipv6h->nexthdr;
-	__u16 off = sizeof(*ipv6h);
+	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)raw;
+	unsigned int nhoff = raw - skb->data;
+	unsigned int off = nhoff + sizeof(*ipv6h);
+	u8 next, nexthdr = ipv6h->nexthdr;
 
 	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
-		__u16 optlen = 0;
 		struct ipv6_opt_hdr *hdr;
-		if (raw + off + sizeof(*hdr) > skb->data &&
-		    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
+		u16 optlen;
+
+		if (!pskb_may_pull(skb, off + sizeof(*hdr)))
 			break;
 
-		hdr = (struct ipv6_opt_hdr *) (raw + off);
+		hdr = (struct ipv6_opt_hdr *)(skb->data + off);
 		if (nexthdr == NEXTHDR_FRAGMENT) {
 			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
 			if (frag_hdr->frag_off)
@@ -501,20 +502,29 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 		} else {
 			optlen = ipv6_optlen(hdr);
 		}
+		/* cache hdr->nexthdr, since pskb_may_pull() might
+		 * invalidate hdr
+		 */
+		next = hdr->nexthdr;
 		if (nexthdr == NEXTHDR_DEST) {
-			__u16 i = off + 2;
+			u16 i = 2;
+
+			/* Remember : hdr is no longer valid at this point. */
+			if (!pskb_may_pull(skb, off + optlen))
+				break;
+
 			while (1) {
 				struct ipv6_tlv_tnl_enc_lim *tel;
 
 				/* No more room for encapsulation limit */
-				if (i + sizeof (*tel) > off + optlen)
+				if (i + sizeof(*tel) > optlen)
 					break;
 
-				tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
+				tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i;
 				/* return index of option if found and valid */
 				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
 				    tel->length == 1)
-					return i;
+					return i + off - nhoff;
 				/* else jump to next option */
 				if (tel->type)
 					i += tel->length + 2;
@@ -522,7 +532,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 					i++;
 			}
 		}
-		nexthdr = hdr->nexthdr;
+		nexthdr = next;
 		off += optlen;
 	}
 	return 0;
-- 
cgit v1.2.3


From 3f5b5134ec7c9418415e4e660adebe6c6ec112fd Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 1 Feb 2017 11:46:32 +0300
Subject: ipv6: pointer math error in ip6_tnl_parse_tlv_enc_lim()

[ Upstream commit 63117f09c768be05a0bf465911297dc76394f686 ]

Casting is a high precedence operation but "off" and "i" are in terms of
bytes so we need to have some parenthesis here.

Fixes: fbfa743a9d2a ("ipv6: fix ip6_tnl_parse_tlv_enc_lim()")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_tunnel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 3fed666b95ae..6c6161763c2f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -520,7 +520,7 @@ __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw)
 				if (i + sizeof(*tel) > optlen)
 					break;
 
-				tel = (struct ipv6_tlv_tnl_enc_lim *) skb->data + off + i;
+				tel = (struct ipv6_tlv_tnl_enc_lim *)(skb->data + off + i);
 				/* return index of option if found and valid */
 				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
 				    tel->length == 1)
-- 
cgit v1.2.3


From f3ece3b38437fd06b60a74cd640d95f8d32f5b96 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 1 Feb 2017 08:33:53 -0800
Subject: tcp: fix 0 divide in __tcp_select_window()

[ Upstream commit 06425c308b92eaf60767bc71d359f4cbc7a561f8 ]

syszkaller fuzzer was able to trigger a divide by zero, when
TCP window scaling is not enabled.

SO_RCVBUF can be used not only to increase sk_rcvbuf, also
to decrease it below current receive buffers utilization.

If mss is negative or 0, just return a zero TCP window.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov  <dvyukov@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp_output.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 0795647e94c6..de95714d021c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2383,9 +2383,11 @@ u32 __tcp_select_window(struct sock *sk)
 	int full_space = min_t(int, tp->window_clamp, allowed_space);
 	int window;
 
-	if (mss > full_space)
+	if (unlikely(mss > full_space)) {
 		mss = full_space;
-
+		if (mss <= 0)
+			return 0;
+	}
 	if (free_space < (full_space >> 1)) {
 		icsk->icsk_ack.quick = 0;
 
-- 
cgit v1.2.3


From 22449a01baa59fc7fb2a09df26f55faf967828bc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 2 Feb 2017 10:31:35 -0800
Subject: net: use a work queue to defer net_disable_timestamp() work

[ Upstream commit 5fa8bbda38c668e56b0c6cdecced2eac2fe36dec ]

Dmitry reported a warning [1] showing that we were calling
net_disable_timestamp() -> static_key_slow_dec() from a non
process context.

Grabbing a mutex while holding a spinlock or rcu_read_lock()
is not allowed.

As Cong suggested, we now use a work queue.

It is possible netstamp_clear() exits while netstamp_needed_deferred
is not zero, but it is probably not worth trying to do better than that.

netstamp_needed_deferred atomic tracks the exact number of deferred
decrements.

[1]
[ INFO: suspicious RCU usage. ]
4.10.0-rc5+ #192 Not tainted
-------------------------------
./include/linux/rcupdate.h:561 Illegal context switch in RCU read-side
critical section!

other info that might help us debug this:

rcu_scheduler_active = 2, debug_locks = 0
2 locks held by syz-executor14/23111:
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>] lock_sock
include/net/sock.h:1454 [inline]
 #0:  (sk_lock-AF_INET6){+.+.+.}, at: [<ffffffff83a35c35>]
rawv6_sendmsg+0x1e65/0x3ec0 net/ipv6/raw.c:919
 #1:  (rcu_read_lock){......}, at: [<ffffffff83ae2678>] nf_hook
include/linux/netfilter.h:201 [inline]
 #1:  (rcu_read_lock){......}, at: [<ffffffff83ae2678>]
__ip6_local_out+0x258/0x840 net/ipv6/output_core.c:160

stack backtrace:
CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
 lockdep_rcu_suspicious+0x139/0x180 kernel/locking/lockdep.c:4452
 rcu_preempt_sleep_check include/linux/rcupdate.h:560 [inline]
 ___might_sleep+0x560/0x650 kernel/sched/core.c:7748
 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739
 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752
 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060
 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149
 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174
 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728
 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403
 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441
 sk_destruct+0x47/0x80 net/core/sock.c:1460
 __sk_free+0x57/0x230 net/core/sock.c:1468
 sock_wfree+0xae/0x120 net/core/sock.c:1645
 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655
 skb_release_all+0x15/0x60 net/core/skbuff.c:668
 __kfree_skb+0x15/0x20 net/core/skbuff.c:684
 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705
 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
 inet_frag_put include/net/inet_frag.h:133 [inline]
 nf_ct_frag6_gather+0x1106/0x3840
net/ipv6/netfilter/nf_conntrack_reasm.c:617
 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline]
 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
 nf_hook include/linux/netfilter.h:212 [inline]
 __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160
 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline]
 rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 sock_write_iter+0x326/0x600 net/socket.c:848
 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695
 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872
 vfs_writev+0x87/0xc0 fs/read_write.c:911
 do_writev+0x110/0x2c0 fs/read_write.c:944
 SYSC_writev fs/read_write.c:1017 [inline]
 SyS_writev+0x27/0x30 fs/read_write.c:1014
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x445559
RSP: 002b:00007f6f46fceb58 EFLAGS: 00000292 ORIG_RAX: 0000000000000014
RAX: ffffffffffffffda RBX: 0000000000000005 RCX: 0000000000445559
RDX: 0000000000000001 RSI: 0000000020f1eff0 RDI: 0000000000000005
RBP: 00000000006e19c0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000292 R12: 0000000000700000
R13: 0000000020f59000 R14: 0000000000000015 R15: 0000000000020400
BUG: sleeping function called from invalid context at
kernel/locking/mutex.c:752
in_atomic(): 1, irqs_disabled(): 0, pid: 23111, name: syz-executor14
INFO: lockdep is turned off.
CPU: 2 PID: 23111 Comm: syz-executor14 Not tainted 4.10.0-rc5+ #192
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs
01/01/2011
Call Trace:
 __dump_stack lib/dump_stack.c:15 [inline]
 dump_stack+0x2ee/0x3ef lib/dump_stack.c:51
 ___might_sleep+0x47e/0x650 kernel/sched/core.c:7780
 __might_sleep+0x95/0x1a0 kernel/sched/core.c:7739
 mutex_lock_nested+0x24f/0x1730 kernel/locking/mutex.c:752
 atomic_dec_and_mutex_lock+0x119/0x160 kernel/locking/mutex.c:1060
 __static_key_slow_dec+0x7a/0x1e0 kernel/jump_label.c:149
 static_key_slow_dec+0x51/0x90 kernel/jump_label.c:174
 net_disable_timestamp+0x3b/0x50 net/core/dev.c:1728
 sock_disable_timestamp+0x98/0xc0 net/core/sock.c:403
 __sk_destruct+0x27d/0x6b0 net/core/sock.c:1441
 sk_destruct+0x47/0x80 net/core/sock.c:1460
 __sk_free+0x57/0x230 net/core/sock.c:1468
 sock_wfree+0xae/0x120 net/core/sock.c:1645
 skb_release_head_state+0xfc/0x200 net/core/skbuff.c:655
 skb_release_all+0x15/0x60 net/core/skbuff.c:668
 __kfree_skb+0x15/0x20 net/core/skbuff.c:684
 kfree_skb+0x16e/0x4c0 net/core/skbuff.c:705
 inet_frag_destroy+0x121/0x290 net/ipv4/inet_fragment.c:304
 inet_frag_put include/net/inet_frag.h:133 [inline]
 nf_ct_frag6_gather+0x1106/0x3840
net/ipv6/netfilter/nf_conntrack_reasm.c:617
 ipv6_defrag+0x1be/0x2b0 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c:68
 nf_hook_entry_hookfn include/linux/netfilter.h:102 [inline]
 nf_hook_slow+0xc3/0x290 net/netfilter/core.c:310
 nf_hook include/linux/netfilter.h:212 [inline]
 __ip6_local_out+0x489/0x840 net/ipv6/output_core.c:160
 ip6_local_out+0x2d/0x170 net/ipv6/output_core.c:170
 ip6_send_skb+0xa1/0x340 net/ipv6/ip6_output.c:1722
 ip6_push_pending_frames+0xb3/0xe0 net/ipv6/ip6_output.c:1742
 rawv6_push_pending_frames net/ipv6/raw.c:613 [inline]
 rawv6_sendmsg+0x2d1a/0x3ec0 net/ipv6/raw.c:927
 inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
 sock_sendmsg_nosec net/socket.c:635 [inline]
 sock_sendmsg+0xca/0x110 net/socket.c:645
 sock_write_iter+0x326/0x600 net/socket.c:848
 do_iter_readv_writev+0x2e3/0x5b0 fs/read_write.c:695
 do_readv_writev+0x42c/0x9b0 fs/read_write.c:872
 vfs_writev+0x87/0xc0 fs/read_write.c:911
 do_writev+0x110/0x2c0 fs/read_write.c:944
 SYSC_writev fs/read_write.c:1017 [inline]
 SyS_writev+0x27/0x30 fs/read_write.c:1014
 entry_SYSCALL_64_fastpath+0x1f/0xc2
RIP: 0033:0x445559

Fixes: b90e5794c5bd ("net: dont call jump_label_dec from irq context")
Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/core/dev.c | 31 +++++++++++++------------------
 1 file changed, 13 insertions(+), 18 deletions(-)

(limited to 'net')

diff --git a/net/core/dev.c b/net/core/dev.c
index 0798a0f1b395..08215a85c742 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1676,24 +1676,19 @@ EXPORT_SYMBOL_GPL(net_dec_ingress_queue);
 
 static struct static_key netstamp_needed __read_mostly;
 #ifdef HAVE_JUMP_LABEL
-/* We are not allowed to call static_key_slow_dec() from irq context
- * If net_disable_timestamp() is called from irq context, defer the
- * static_key_slow_dec() calls.
- */
 static atomic_t netstamp_needed_deferred;
-#endif
-
-void net_enable_timestamp(void)
+static void netstamp_clear(struct work_struct *work)
 {
-#ifdef HAVE_JUMP_LABEL
 	int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
 
-	if (deferred) {
-		while (--deferred)
-			static_key_slow_dec(&netstamp_needed);
-		return;
-	}
+	while (deferred--)
+		static_key_slow_dec(&netstamp_needed);
+}
+static DECLARE_WORK(netstamp_work, netstamp_clear);
 #endif
+
+void net_enable_timestamp(void)
+{
 	static_key_slow_inc(&netstamp_needed);
 }
 EXPORT_SYMBOL(net_enable_timestamp);
@@ -1701,12 +1696,12 @@ EXPORT_SYMBOL(net_enable_timestamp);
 void net_disable_timestamp(void)
 {
 #ifdef HAVE_JUMP_LABEL
-	if (in_interrupt()) {
-		atomic_inc(&netstamp_needed_deferred);
-		return;
-	}
-#endif
+	/* net_disable_timestamp() can be called from non process context */
+	atomic_inc(&netstamp_needed_deferred);
+	schedule_work(&netstamp_work);
+#else
 	static_key_slow_dec(&netstamp_needed);
+#endif
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 
-- 
cgit v1.2.3


From 2d9c2e011fd3f1be4e5643d6ad186faa5e50d4d1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Feb 2017 11:16:52 -0800
Subject: ipv4: keep skb->dst around in presence of IP options

[ Upstream commit 34b2cef20f19c87999fff3da4071e66937db9644 ]

Andrey Konovalov got crashes in __ip_options_echo() when a NULL skb->dst
is accessed.

ipv4_pktinfo_prepare() should not drop the dst if (evil) IP options
are present.

We could refine the test to the presence of ts_needtime or srr,
but IP options are not often used, so let's be conservative.

Thanks to syzkaller team for finding this bug.

Fixes: d826eb14ecef ("ipv4: PKTINFO doesnt need dst reference")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ip_sockglue.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 9ce202549e7a..bc14c5bb124b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1192,7 +1192,14 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
 		pktinfo->ipi_ifindex = 0;
 		pktinfo->ipi_spec_dst.s_addr = 0;
 	}
-	skb_dst_drop(skb);
+	/* We need to keep the dst for __ip_options_echo()
+	 * We could restrict the test to opt.ts_needtime || opt.srr,
+	 * but the following is good enough as IP options are not often used.
+	 */
+	if (unlikely(IPCB(skb)->opt.optlen))
+		skb_dst_force(skb);
+	else
+		skb_dst_drop(skb);
 }
 
 int ip_setsockopt(struct sock *sk, int level,
-- 
cgit v1.2.3


From 13c3646dac70dbad417cce1ddfd87bd6f8650224 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Feb 2017 00:03:26 -0800
Subject: netlabel: out of bound access in cipso_v4_validate()

[ Upstream commit d71b7896886345c53ef1d84bda2bc758554f5d61 ]

syzkaller found another out of bound access in ip_options_compile(),
or more exactly in cipso_v4_validate()

Fixes: 20e2a8648596 ("cipso: handle CIPSO options correctly when NetLabel is disabled")
Fixes: 446fda4f2682 ("[NetLabel]: CIPSOv4 engine")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov  <dvyukov@google.com>
Cc: Paul Moore <paul@paul-moore.com>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/cipso_ipv4.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index bdb2a07ec363..6cc3e1d602fb 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1657,6 +1657,10 @@ int cipso_v4_validate(const struct sk_buff *skb, unsigned char **option)
 				goto validate_return_locked;
 			}
 
+		if (opt_iter + 1 == opt_len) {
+			err_offset = opt_iter;
+			goto validate_return_locked;
+		}
 		tag_len = tag[1];
 		if (tag_len > (opt_len - opt_iter)) {
 			err_offset = opt_iter + 1;
-- 
cgit v1.2.3


From eaa3a58f450383aaa8632eea23f2a70d85c55d9d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 4 Feb 2017 23:18:55 -0800
Subject: ip6_gre: fix ip6gre_err() invalid reads

[ Upstream commit 7892032cfe67f4bde6fc2ee967e45a8fbaf33756 ]

Andrey Konovalov reported out of bound accesses in ip6gre_err()

If GRE flags contains GRE_KEY, the following expression
*(((__be32 *)p) + (grehlen / 4) - 1)

accesses data ~40 bytes after the expected point, since
grehlen includes the size of IPv6 headers.

Let's use a "struct gre_base_hdr *greh" pointer to make this
code more readable.

p[1] becomes greh->protocol.
grhlen is the GRE header length.

Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/ip6_gre.c | 41 ++++++++++++++++++++++-------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 17430f341073..e89135828c3d 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -55,6 +55,7 @@
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
 #include <net/ip6_tunnel.h>
+#include <net/gre.h>
 
 
 static bool log_ecn_error = true;
@@ -367,35 +368,37 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
 
 
 static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
-		u8 type, u8 code, int offset, __be32 info)
+		       u8 type, u8 code, int offset, __be32 info)
 {
-	const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data;
-	__be16 *p = (__be16 *)(skb->data + offset);
-	int grehlen = offset + 4;
+	const struct gre_base_hdr *greh;
+	const struct ipv6hdr *ipv6h;
+	int grehlen = sizeof(*greh);
 	struct ip6_tnl *t;
+	int key_off = 0;
 	__be16 flags;
+	__be32 key;
 
-	flags = p[0];
-	if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) {
-		if (flags&(GRE_VERSION|GRE_ROUTING))
-			return;
-		if (flags&GRE_KEY) {
-			grehlen += 4;
-			if (flags&GRE_CSUM)
-				grehlen += 4;
-		}
+	if (!pskb_may_pull(skb, offset + grehlen))
+		return;
+	greh = (const struct gre_base_hdr *)(skb->data + offset);
+	flags = greh->flags;
+	if (flags & (GRE_VERSION | GRE_ROUTING))
+		return;
+	if (flags & GRE_CSUM)
+		grehlen += 4;
+	if (flags & GRE_KEY) {
+		key_off = grehlen + offset;
+		grehlen += 4;
 	}
 
-	/* If only 8 bytes returned, keyed message will be dropped here */
-	if (!pskb_may_pull(skb, grehlen))
+	if (!pskb_may_pull(skb, offset + grehlen))
 		return;
 	ipv6h = (const struct ipv6hdr *)skb->data;
-	p = (__be16 *)(skb->data + offset);
+	greh = (const struct gre_base_hdr *)(skb->data + offset);
+	key = key_off ? *(__be32 *)(skb->data + key_off) : 0;
 
 	t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr,
-				flags & GRE_KEY ?
-				*(((__be32 *)p) + (grehlen / 4) - 1) : 0,
-				p[1]);
+				 key, greh->protocol);
 	if (!t)
 		return;
 
-- 
cgit v1.2.3


From 96ada0a978fa1a186477416d2932e8b36a3dfc99 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Feb 2017 20:23:22 -0800
Subject: ipv6: tcp: add a missing tcp_v6_restore_cb()

[ Upstream commit ebf6c9cb23d7e56eec8575a88071dec97ad5c6e2 ]

Dmitry reported use-after-free in ip6_datagram_recv_specific_ctl()

A similar bug was fixed in commit 8ce48623f0cf ("ipv6: tcp: restore
IP6CB for pktoptions skbs"), but I missed another spot.

tcp_v6_syn_recv_sock() can indeed set np->pktoptions from ireq->pktopts

Fixes: 971f10eca186 ("tcp: better TCP_SKB_CB layout to reduce cache line misses")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/tcp_ipv6.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

(limited to 'net')

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5f581616bf6a..76a8c8057a23 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -974,6 +974,16 @@ drop:
 	return 0; /* don't send reset */
 }
 
+static void tcp_v6_restore_cb(struct sk_buff *skb)
+{
+	/* We need to move header back to the beginning if xfrm6_policy_check()
+	 * and tcp_v6_fill_cb() are going to be called again.
+	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
+	 */
+	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
+		sizeof(struct inet6_skb_parm));
+}
+
 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
 					 struct request_sock *req,
 					 struct dst_entry *dst,
@@ -1163,8 +1173,10 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 						      sk_gfp_atomic(sk, GFP_ATOMIC));
 			consume_skb(ireq->pktopts);
 			ireq->pktopts = NULL;
-			if (newnp->pktoptions)
+			if (newnp->pktoptions) {
+				tcp_v6_restore_cb(newnp->pktoptions);
 				skb_set_owner_r(newnp->pktoptions, newsk);
+			}
 		}
 	}
 
@@ -1179,16 +1191,6 @@ out:
 	return NULL;
 }
 
-static void tcp_v6_restore_cb(struct sk_buff *skb)
-{
-	/* We need to move header back to the beginning if xfrm6_policy_check()
-	 * and tcp_v6_fill_cb() are going to be called again.
-	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
-	 */
-	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
-		sizeof(struct inet6_skb_parm));
-}
-
 /* The socket must have it's spinlock held when we get
  * here, unless it is a TCP_LISTEN socket.
  *
-- 
cgit v1.2.3


From 82e9f6b90a0e7fbc017bdce23845c7580db6f657 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Feb 2017 14:59:38 -0800
Subject: tcp: avoid infinite loop in tcp_splice_read()

[ Upstream commit ccf7abb93af09ad0868ae9033d1ca8108bdaec82 ]

Splicing from TCP socket is vulnerable when a packet with URG flag is
received and stored into receive queue.

__tcp_splice_read() returns 0, and sk_wait_data() immediately
returns since there is the problematic skb in queue.

This is a nice way to burn cpu (aka infinite loop) and trigger
soft lockups.

Again, this gem was found by syzkaller tool.

Fixes: 9c55e01c0cc8 ("[TCP]: Splice receive support.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Dmitry Vyukov  <dvyukov@google.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/tcp.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 69daa81736f6..600dcda840d1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -783,6 +783,12 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
 				ret = -EAGAIN;
 				break;
 			}
+			/* if __tcp_splice_read() got nothing while we have
+			 * an skb in receive queue, we do not want to loop.
+			 * This might happen with URG data.
+			 */
+			if (!skb_queue_empty(&sk->sk_receive_queue))
+				break;
 			sk_wait_data(sk, &timeo, NULL);
 			if (signal_pending(current)) {
 				ret = sock_intr_errno(timeo);
-- 
cgit v1.2.3


From a4226c7ebfb5748447f1640c97f0306ed69e44f8 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Date: Mon, 6 Feb 2017 18:10:31 -0200
Subject: sctp: avoid BUG_ON on sctp_wait_for_sndbuf

[ Upstream commit 2dcab598484185dea7ec22219c76dcdd59e3cb90 ]

Alexander Popov reported that an application may trigger a BUG_ON in
sctp_wait_for_sndbuf if the socket tx buffer is full, a thread is
waiting on it to queue more data and meanwhile another thread peels off
the association being used by the first thread.

This patch replaces the BUG_ON call with a proper error handling. It
will return -EPIPE to the original sendmsg call, similarly to what would
have been done if the association wasn't found in the first place.

Acked-by: Alexander Popov <alex.popov@linux.com>
Signed-off-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Reviewed-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/sctp/socket.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net')

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index b5fd4ab56156..138f2d667212 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6960,7 +6960,8 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 		 */
 		release_sock(sk);
 		current_timeo = schedule_timeout(current_timeo);
-		BUG_ON(sk != asoc->base.sk);
+		if (sk != asoc->base.sk)
+			goto do_error;
 		lock_sock(sk);
 
 		*timeo_p = current_timeo;
-- 
cgit v1.2.3


From 6f99825e7632ba94b0e5f9986e5f64d9eb9370b1 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Wed, 8 Feb 2017 10:02:13 -0800
Subject: sit: fix a double free on error path

[ Upstream commit d7426c69a1942b2b9b709bf66b944ff09f561484 ]

Dmitry reported a double free in sit_init_net():

  kernel BUG at mm/percpu.c:689!
  invalid opcode: 0000 [#1] SMP KASAN
  Dumping ftrace buffer:
     (ftrace buffer empty)
  Modules linked in:
  CPU: 0 PID: 15692 Comm: syz-executor1 Not tainted 4.10.0-rc6-next-20170206 #1
  Hardware name: Google Google Compute Engine/Google Compute Engine,
  BIOS Google 01/01/2011
  task: ffff8801c9cc27c0 task.stack: ffff88017d1d8000
  RIP: 0010:pcpu_free_area+0x68b/0x810 mm/percpu.c:689
  RSP: 0018:ffff88017d1df488 EFLAGS: 00010046
  RAX: 0000000000010000 RBX: 00000000000007c0 RCX: ffffc90002829000
  RDX: 0000000000010000 RSI: ffffffff81940efb RDI: ffff8801db841d94
  RBP: ffff88017d1df590 R08: dffffc0000000000 R09: 1ffffffff0bb3bdd
  R10: dffffc0000000000 R11: 00000000000135dd R12: ffff8801db841d80
  R13: 0000000000038e40 R14: 00000000000007c0 R15: 00000000000007c0
  FS:  00007f6ea608f700(0000) GS:ffff8801dbe00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 000000002000aff8 CR3: 00000001c8d44000 CR4: 00000000001426f0
  DR0: 0000000020000000 DR1: 0000000020000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600
  Call Trace:
   free_percpu+0x212/0x520 mm/percpu.c:1264
   ipip6_dev_free+0x43/0x60 net/ipv6/sit.c:1335
   sit_init_net+0x3cb/0xa10 net/ipv6/sit.c:1831
   ops_init+0x10a/0x530 net/core/net_namespace.c:115
   setup_net+0x2ed/0x690 net/core/net_namespace.c:291
   copy_net_ns+0x26c/0x530 net/core/net_namespace.c:396
   create_new_namespaces+0x409/0x860 kernel/nsproxy.c:106
   unshare_nsproxy_namespaces+0xae/0x1e0 kernel/nsproxy.c:205
   SYSC_unshare kernel/fork.c:2281 [inline]
   SyS_unshare+0x64e/0xfc0 kernel/fork.c:2231
   entry_SYSCALL_64_fastpath+0x1f/0xc2

This is because when tunnel->dst_cache init fails, we free dev->tstats
once in ipip6_tunnel_init() and twice in sit_init_net(). This looks
redundant but its ndo_uinit() does not seem enough to clean up everything
here. So avoid this by setting dev->tstats to NULL after the first free,
at least for -net.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv6/sit.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3da2b16356eb..184f0fe35dc6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1389,6 +1389,7 @@ static int ipip6_tunnel_init(struct net_device *dev)
 	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
 	if (!tunnel->dst_cache) {
 		free_percpu(dev->tstats);
+		dev->tstats = NULL;
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3


From 58691e5b4f277cf1876dc95654a794b093f88a0f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 7 Feb 2017 15:57:20 -0500
Subject: net: introduce device min_header_len

[ Upstream commit 217e6fa24ce28ec87fca8da93c9016cb78028612 ]

The stack must not pass packets to device drivers that are shorter
than the minimum link layer header length.

Previously, packet sockets would drop packets smaller than or equal
to dev->hard_header_len, but this has false positives. Zero length
payload is used over Ethernet. Other link layer protocols support
variable length headers. Support for validation of these protocols
removed the min length check for all protocols.

Introduce an explicit dev->min_header_len parameter and drop all
packets below this value. Initially, set it to non-zero only for
Ethernet and loopback. Other protocols can follow in a patch to
net-next.

Fixes: 9ed988cd5915 ("packet: validate variable length ll headers")
Reported-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ethernet/eth.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net')

diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index de85d4e1cf43..52dcd414c2af 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -353,6 +353,7 @@ void ether_setup(struct net_device *dev)
 	dev->header_ops		= &eth_header_ops;
 	dev->type		= ARPHRD_ETHER;
 	dev->hard_header_len 	= ETH_HLEN;
+	dev->min_header_len	= ETH_HLEN;
 	dev->mtu		= ETH_DATA_LEN;
 	dev->addr_len		= ETH_ALEN;
 	dev->tx_queue_len	= 1000;	/* Ethernet wants good queues */
-- 
cgit v1.2.3


From 9117c897c9aa213fa0f682720f88a259b7ce7af9 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 7 Feb 2017 15:57:21 -0500
Subject: packet: round up linear to header len

[ Upstream commit 57031eb794906eea4e1c7b31dc1e2429c0af0c66 ]

Link layer protocols may unconditionally pull headers, as Ethernet
does in eth_type_trans. Ensure that the entire link layer header
always lies in the skb linear segment. tpacket_snd has such a check.
Extend this to packet_snd.

Variable length link layer headers complicate the computation
somewhat. Here skb->len may be smaller than dev->hard_header_len.

Round up the linear length to be at least as long as the smallest of
the two.

Reported-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/packet/af_packet.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'net')

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f223d1c80ccf..f2d28ed74a0a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2637,7 +2637,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	int vnet_hdr_len;
 	struct packet_sock *po = pkt_sk(sk);
 	unsigned short gso_type = 0;
-	int hlen, tlen;
+	int hlen, tlen, linear;
 	int extra_len = 0;
 	ssize_t n;
 
@@ -2741,8 +2741,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	err = -ENOBUFS;
 	hlen = LL_RESERVED_SPACE(dev);
 	tlen = dev->needed_tailroom;
-	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len,
-			       __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len),
+	linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
+	linear = max(linear, min_t(int, len, dev->hard_header_len));
+	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
 			       msg->msg_flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto out_unlock;
-- 
cgit v1.2.3


From afb4feb8bfef716b12864459d47672cab56dbfd7 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 7 Feb 2017 12:59:46 -0800
Subject: ping: fix a null pointer dereference

[ Upstream commit 73d2c6678e6c3af7e7a42b1e78cd0211782ade32 ]

Andrey reported a kernel crash:

  general protection fault: 0000 [#1] SMP KASAN
  Dumping ftrace buffer:
     (ftrace buffer empty)
  Modules linked in:
  CPU: 2 PID: 3880 Comm: syz-executor1 Not tainted 4.10.0-rc6+ #124
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
  task: ffff880060048040 task.stack: ffff880069be8000
  RIP: 0010:ping_v4_push_pending_frames net/ipv4/ping.c:647 [inline]
  RIP: 0010:ping_v4_sendmsg+0x1acd/0x23f0 net/ipv4/ping.c:837
  RSP: 0018:ffff880069bef8b8 EFLAGS: 00010206
  RAX: dffffc0000000000 RBX: ffff880069befb90 RCX: 0000000000000000
  RDX: 0000000000000018 RSI: ffff880069befa30 RDI: 00000000000000c2
  RBP: ffff880069befbb8 R08: 0000000000000008 R09: 0000000000000000
  R10: 0000000000000002 R11: 0000000000000000 R12: ffff880069befab0
  R13: ffff88006c624a80 R14: ffff880069befa70 R15: 0000000000000000
  FS:  00007f6f7c716700(0000) GS:ffff88006de00000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00000000004a6f28 CR3: 000000003a134000 CR4: 00000000000006e0
  Call Trace:
   inet_sendmsg+0x164/0x5b0 net/ipv4/af_inet.c:744
   sock_sendmsg_nosec net/socket.c:635 [inline]
   sock_sendmsg+0xca/0x110 net/socket.c:645
   SYSC_sendto+0x660/0x810 net/socket.c:1687
   SyS_sendto+0x40/0x50 net/socket.c:1655
   entry_SYSCALL_64_fastpath+0x1f/0xc2

This is because we miss a check for NULL pointer for skb_peek() when
the queue is empty. Other places already have the same check.

Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/ipv4/ping.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'net')

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 23160d2b3f71..3a00512addbc 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -645,6 +645,8 @@ static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh,
 {
 	struct sk_buff *skb = skb_peek(&sk->sk_write_queue);
 
+	if (!skb)
+		return 0;
 	pfh->wcheck = csum_partial((char *)&pfh->icmph,
 		sizeof(struct icmphdr), pfh->wcheck);
 	pfh->icmph.checksum = csum_fold(pfh->wcheck);
-- 
cgit v1.2.3


From b9dee56027ae5704e3c4bc243a0a3313ea3814ec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 9 Feb 2017 16:15:52 -0800
Subject: l2tp: do not use udp_ioctl()

[ Upstream commit 72fb96e7bdbbdd4421b0726992496531060f3636 ]

udp_ioctl(), as its name suggests, is used by UDP protocols,
but is also used by L2TP :(

L2TP should use its own handler, because it really does not
look the same.

SIOCINQ for instance should not assume UDP checksum or headers.

Thanks to Andrey and syzkaller team for providing the report
and a nice reproducer.

While crashes only happen on recent kernels (after commit
7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue")), this
probably needs to be backported to older kernels.

Fixes: 7c13f97ffde6 ("udp: do fwd memory scheduling on dequeue")
Fixes: 85584672012e ("udp: Fix udp_poll() and ioctl()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Andrey Konovalov <andreyknvl@google.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 net/l2tp/l2tp_core.h |  1 +
 net/l2tp/l2tp_ip.c   | 27 ++++++++++++++++++++++++++-
 net/l2tp/l2tp_ip6.c  |  2 +-
 3 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'net')

diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 5871537af387..763e8e241ce3 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -273,6 +273,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb,
 int l2tp_nl_register_ops(enum l2tp_pwtype pw_type,
 			 const struct l2tp_nl_cmd_ops *ops);
 void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
+int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 
 /* Session reference counts. Incremented when code obtains a reference
  * to a session.
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index d0e906d39642..445b7cd0826a 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -11,6 +11,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <asm/ioctls.h>
 #include <linux/icmp.h>
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -555,6 +556,30 @@ out:
 	return err ? err : copied;
 }
 
+int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+	struct sk_buff *skb;
+	int amount;
+
+	switch (cmd) {
+	case SIOCOUTQ:
+		amount = sk_wmem_alloc_get(sk);
+		break;
+	case SIOCINQ:
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		skb = skb_peek(&sk->sk_receive_queue);
+		amount = skb ? skb->len : 0;
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+		break;
+
+	default:
+		return -ENOIOCTLCMD;
+	}
+
+	return put_user(amount, (int __user *)arg);
+}
+EXPORT_SYMBOL(l2tp_ioctl);
+
 static struct proto l2tp_ip_prot = {
 	.name		   = "L2TP/IP",
 	.owner		   = THIS_MODULE,
@@ -563,7 +588,7 @@ static struct proto l2tp_ip_prot = {
 	.bind		   = l2tp_ip_bind,
 	.connect	   = l2tp_ip_connect,
 	.disconnect	   = l2tp_ip_disconnect,
-	.ioctl		   = udp_ioctl,
+	.ioctl		   = l2tp_ioctl,
 	.destroy	   = l2tp_ip_destroy_sock,
 	.setsockopt	   = ip_setsockopt,
 	.getsockopt	   = ip_getsockopt,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 3c4f867d3633..bcdab1cba773 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -714,7 +714,7 @@ static struct proto l2tp_ip6_prot = {
 	.bind		   = l2tp_ip6_bind,
 	.connect	   = l2tp_ip6_connect,
 	.disconnect	   = l2tp_ip6_disconnect,
-	.ioctl		   = udp_ioctl,
+	.ioctl		   = l2tp_ioctl,
 	.destroy	   = l2tp_ip6_destroy_sock,
 	.setsockopt	   = ipv6_setsockopt,
 	.getsockopt	   = ipv6_getsockopt,
-- 
cgit v1.2.3