From df32cc193ad88f7b1326b90af799c927b27f7654 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 1 Nov 2010 12:55:52 -0700
Subject: net: check queue_index from sock is valid for device

In dev_pick_tx recompute the queue index if the value stored in the
socket is greater than or equal to the number of real queues for the
device.  The saved index in the sock structure is not guaranteed to
be appropriate for the egress device (this could happen on a route
change or in presence of tunnelling).  The result of the queue index
being bad would be to return a bogus queue (crash could prersumably
follow).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core')

diff --git a/net/core/dev.c b/net/core/dev.c
index 35dfb831848..0dd54a69dac 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2131,7 +2131,7 @@ static struct netdev_queue *dev_pick_tx(struct net_device *dev,
 	} else {
 		struct sock *sk = skb->sk;
 		queue_index = sk_tx_queue_get(sk);
-		if (queue_index < 0) {
+		if (queue_index < 0 || queue_index >= dev->real_num_tx_queues) {
 
 			queue_index = 0;
 			if (dev->real_num_tx_queues > 1)
-- 
cgit v1.2.3


From 86c2c0a8a4965ae5cbc0ff97ed39a4472e8e9b23 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 6 Nov 2010 20:11:38 +0000
Subject: NET: pktgen - fix compile warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This should fix the following warning:

net/core/pktgen.c: In function ‘pktgen_if_write’:
net/core/pktgen.c:890: warning: comparison of distinct pointer types lacks a cast

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Reviewed-by: Nelson Elhage <nelhage@ksplice.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fbce4b05a53..1992cd050e2 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -887,7 +887,7 @@ static ssize_t pktgen_if_write(struct file *file,
 	i += len;
 
 	if (debug) {
-		size_t copy = min(count, 1023);
+		size_t copy = min_t(size_t, count, 1023);
 		char tb[copy + 1];
 		if (copy_from_user(tb, user_buffer, copy))
 			return -EFAULT;
-- 
cgit v1.2.3


From eb589063ed482f5592b1378e4136d6998419af6e Mon Sep 17 00:00:00 2001
From: Junchang Wang <junchangwang@gmail.com>
Date: Sun, 7 Nov 2010 23:19:43 +0000
Subject: pktgen: correct uninitialized queue_map

This fix a bug reported by backyes.
Right the first time pktgen's using queue_map that's not been initialized
by set_cur_queue_map(pkt_dev);

Signed-off-by: Junchang Wang <junchangwang@gmail.com>
Signed-off-by: Backyes <backyes@mail.ustc.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/pktgen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/core')

diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 1992cd050e2..33bc3823ac6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2612,8 +2612,8 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev,
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
 	 */
-	queue_map = pkt_dev->cur_queue_map;
 	mod_cur_headers(pkt_dev);
+	queue_map = pkt_dev->cur_queue_map;
 
 	datalen = (odev->hard_header_len + 16) & ~0xf;
 
@@ -2976,8 +2976,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	/* Update any of the values, used when we're incrementing various
 	 * fields.
 	 */
-	queue_map = pkt_dev->cur_queue_map;
 	mod_cur_headers(pkt_dev);
+	queue_map = pkt_dev->cur_queue_map;
 
 	skb = __netdev_alloc_skb(odev,
 				 pkt_dev->cur_pkt_size + 64
-- 
cgit v1.2.3


From 332dd96f7ac15e937088fe11f15cfe0210e8edd1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Nov 2010 11:46:33 -0800
Subject: net/dst: dst_dev_event() called after other notifiers

Followup of commit ef885afbf8a37689 (net: use rcu_barrier() in
rollback_registered_many)

dst_dev_event() scans a garbage dst list that might be feeded by various
network notifiers at device dismantle time.

Its important to call dst_dev_event() after other notifiers, or we might
enter the infamous msleep(250) in netdev_wait_allrefs(), and wait one
second before calling again call_netdevice_notifiers(NETDEV_UNREGISTER,
dev) to properly remove last device references.

Use priority -10 to let dst_dev_notifier be called after other network
notifiers (they have the default 0 priority)

Reported-by: Ben Greear <greearb@candelatech.com>
Reported-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Reported-by: Octavian Purdila <opurdila@ixiacom.com>
Reported-by: Benjamin LaHaise <bcrl@kvack.org>
Tested-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dst.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'net/core')

diff --git a/net/core/dst.c b/net/core/dst.c
index 8abe628b79f..b99c7c7ffce 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -370,6 +370,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
 
 static struct notifier_block dst_dev_notifier = {
 	.notifier_call	= dst_dev_event,
+	.priority = -10, /* must be called after other network notifiers */
 };
 
 void __init dst_init(void)
-- 
cgit v1.2.3


From 57fe93b374a6b8711995c2d466c502af9f3a08bb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 10 Nov 2010 10:38:24 -0800
Subject: filter: make sure filters dont read uninitialized memory

There is a possibility malicious users can get limited information about
uninitialized stack mem array. Even if sk_run_filter() result is bound
to packet length (0 .. 65535), we could imagine this can be used by
hostile user.

Initializing mem[] array, like Dan Rosenberg suggested in his patch is
expensive since most filters dont even use this array.

Its hard to make the filter validation in sk_chk_filter(), because of
the jumps. This might be done later.

In this patch, I use a bitmap (a single long var) so that only filters
using mem[] loads/stores pay the price of added security checks.

For other filters, additional cost is a single instruction.

[ Since we access fentry->k a lot now, cache it in a local variable
  and mark filter entry pointer as const. -DaveM ]

Reported-by: Dan Rosenberg <drosenberg@vsecurity.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 64 ++++++++++++++++++++++++++++++-------------------------
 1 file changed, 35 insertions(+), 29 deletions(-)

(limited to 'net/core')

diff --git a/net/core/filter.c b/net/core/filter.c
index 7beaec36b54..23e9b2a6b4c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -112,39 +112,41 @@ EXPORT_SYMBOL(sk_filter);
  */
 unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 {
-	struct sock_filter *fentry;	/* We walk down these */
 	void *ptr;
 	u32 A = 0;			/* Accumulator */
 	u32 X = 0;			/* Index Register */
 	u32 mem[BPF_MEMWORDS];		/* Scratch Memory Store */
+	unsigned long memvalid = 0;
 	u32 tmp;
 	int k;
 	int pc;
 
+	BUILD_BUG_ON(BPF_MEMWORDS > BITS_PER_LONG);
 	/*
 	 * Process array of filter instructions.
 	 */
 	for (pc = 0; pc < flen; pc++) {
-		fentry = &filter[pc];
+		const struct sock_filter *fentry = &filter[pc];
+		u32 f_k = fentry->k;
 
 		switch (fentry->code) {
 		case BPF_S_ALU_ADD_X:
 			A += X;
 			continue;
 		case BPF_S_ALU_ADD_K:
-			A += fentry->k;
+			A += f_k;
 			continue;
 		case BPF_S_ALU_SUB_X:
 			A -= X;
 			continue;
 		case BPF_S_ALU_SUB_K:
-			A -= fentry->k;
+			A -= f_k;
 			continue;
 		case BPF_S_ALU_MUL_X:
 			A *= X;
 			continue;
 		case BPF_S_ALU_MUL_K:
-			A *= fentry->k;
+			A *= f_k;
 			continue;
 		case BPF_S_ALU_DIV_X:
 			if (X == 0)
@@ -152,49 +154,49 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 			A /= X;
 			continue;
 		case BPF_S_ALU_DIV_K:
-			A /= fentry->k;
+			A /= f_k;
 			continue;
 		case BPF_S_ALU_AND_X:
 			A &= X;
 			continue;
 		case BPF_S_ALU_AND_K:
-			A &= fentry->k;
+			A &= f_k;
 			continue;
 		case BPF_S_ALU_OR_X:
 			A |= X;
 			continue;
 		case BPF_S_ALU_OR_K:
-			A |= fentry->k;
+			A |= f_k;
 			continue;
 		case BPF_S_ALU_LSH_X:
 			A <<= X;
 			continue;
 		case BPF_S_ALU_LSH_K:
-			A <<= fentry->k;
+			A <<= f_k;
 			continue;
 		case BPF_S_ALU_RSH_X:
 			A >>= X;
 			continue;
 		case BPF_S_ALU_RSH_K:
-			A >>= fentry->k;
+			A >>= f_k;
 			continue;
 		case BPF_S_ALU_NEG:
 			A = -A;
 			continue;
 		case BPF_S_JMP_JA:
-			pc += fentry->k;
+			pc += f_k;
 			continue;
 		case BPF_S_JMP_JGT_K:
-			pc += (A > fentry->k) ? fentry->jt : fentry->jf;
+			pc += (A > f_k) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGE_K:
-			pc += (A >= fentry->k) ? fentry->jt : fentry->jf;
+			pc += (A >= f_k) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JEQ_K:
-			pc += (A == fentry->k) ? fentry->jt : fentry->jf;
+			pc += (A == f_k) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JSET_K:
-			pc += (A & fentry->k) ? fentry->jt : fentry->jf;
+			pc += (A & f_k) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_JMP_JGT_X:
 			pc += (A > X) ? fentry->jt : fentry->jf;
@@ -209,7 +211,7 @@ unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int
 			pc += (A & X) ? fentry->jt : fentry->jf;
 			continue;
 		case BPF_S_LD_W_ABS:
-			k = fentry->k;
+			k = f_k;
 load_w:
 			ptr = load_pointer(skb, k, 4, &tmp);
 			if (ptr != NULL) {
@@ -218,7 +220,7 @@ load_w:
 			}
 			break;
 		case BPF_S_LD_H_ABS:
-			k = fentry->k;
+			k = f_k;
 load_h:
 			ptr = load_pointer(skb, k, 2, &tmp);
 			if (ptr != NULL) {
@@ -227,7 +229,7 @@ load_h:
 			}
 			break;
 		case BPF_S_LD_B_ABS:
-			k = fentry->k;
+			k = f_k;
 load_b:
 			ptr = load_pointer(skb, k, 1, &tmp);
 			if (ptr != NULL) {
@@ -242,32 +244,34 @@ load_b:
 			X = skb->len;
 			continue;
 		case BPF_S_LD_W_IND:
-			k = X + fentry->k;
+			k = X + f_k;
 			goto load_w;
 		case BPF_S_LD_H_IND:
-			k = X + fentry->k;
+			k = X + f_k;
 			goto load_h;
 		case BPF_S_LD_B_IND:
-			k = X + fentry->k;
+			k = X + f_k;
 			goto load_b;
 		case BPF_S_LDX_B_MSH:
-			ptr = load_pointer(skb, fentry->k, 1, &tmp);
+			ptr = load_pointer(skb, f_k, 1, &tmp);
 			if (ptr != NULL) {
 				X = (*(u8 *)ptr & 0xf) << 2;
 				continue;
 			}
 			return 0;
 		case BPF_S_LD_IMM:
-			A = fentry->k;
+			A = f_k;
 			continue;
 		case BPF_S_LDX_IMM:
-			X = fentry->k;
+			X = f_k;
 			continue;
 		case BPF_S_LD_MEM:
-			A = mem[fentry->k];
+			A = (memvalid & (1UL << f_k)) ?
+				mem[f_k] : 0;
 			continue;
 		case BPF_S_LDX_MEM:
-			X = mem[fentry->k];
+			X = (memvalid & (1UL << f_k)) ?
+				mem[f_k] : 0;
 			continue;
 		case BPF_S_MISC_TAX:
 			X = A;
@@ -276,14 +280,16 @@ load_b:
 			A = X;
 			continue;
 		case BPF_S_RET_K:
-			return fentry->k;
+			return f_k;
 		case BPF_S_RET_A:
 			return A;
 		case BPF_S_ST:
-			mem[fentry->k] = A;
+			memvalid |= 1UL << f_k;
+			mem[f_k] = A;
 			continue;
 		case BPF_S_STX:
-			mem[fentry->k] = X;
+			memvalid |= 1UL << f_k;
+			mem[f_k] = X;
 			continue;
 		default:
 			WARN_ON(1);
-- 
cgit v1.2.3


From 8d987e5c75107ca7515fa19e857cfa24aab6ec8f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 9 Nov 2010 23:24:26 +0000
Subject: net: avoid limits overflow

Robin Holt tried to boot a 16TB machine and found some limits were
reached : sysctl_tcp_mem[2], sysctl_udp_mem[2]

We can switch infrastructure to use long "instead" of "int", now
atomic_long_t primitives are available for free.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reported-by: Robin Holt <holt@sgi.com>
Reviewed-by: Robin Holt <holt@sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'net/core')

diff --git a/net/core/sock.c b/net/core/sock.c
index 3eed5424e65..fb608011146 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1653,10 +1653,10 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 {
 	struct proto *prot = sk->sk_prot;
 	int amt = sk_mem_pages(size);
-	int allocated;
+	long allocated;
 
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
-	allocated = atomic_add_return(amt, prot->memory_allocated);
+	allocated = atomic_long_add_return(amt, prot->memory_allocated);
 
 	/* Under limit. */
 	if (allocated <= prot->sysctl_mem[0]) {
@@ -1714,7 +1714,7 @@ suppress_allocation:
 
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
-	atomic_sub(amt, prot->memory_allocated);
+	atomic_long_sub(amt, prot->memory_allocated);
 	return 0;
 }
 EXPORT_SYMBOL(__sk_mem_schedule);
@@ -1727,12 +1727,12 @@ void __sk_mem_reclaim(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
 
-	atomic_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
+	atomic_long_sub(sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT,
 		   prot->memory_allocated);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
 	if (prot->memory_pressure && *prot->memory_pressure &&
-	    (atomic_read(prot->memory_allocated) < prot->sysctl_mem[0]))
+	    (atomic_long_read(prot->memory_allocated) < prot->sysctl_mem[0]))
 		*prot->memory_pressure = 0;
 }
 EXPORT_SYMBOL(__sk_mem_reclaim);
@@ -2452,12 +2452,12 @@ static char proto_method_implemented(const void *method)
 
 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
 {
-	seq_printf(seq, "%-9s %4u %6d  %6d   %-3s %6u   %-3s  %-10s "
+	seq_printf(seq, "%-9s %4u %6d  %6ld   %-3s %6u   %-3s  %-10s "
 			"%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
 		   proto->name,
 		   proto->obj_size,
 		   sock_prot_inuse_get(seq_file_net(seq), proto),
-		   proto->memory_allocated != NULL ? atomic_read(proto->memory_allocated) : -1,
+		   proto->memory_allocated != NULL ? atomic_long_read(proto->memory_allocated) : -1L,
 		   proto->memory_pressure != NULL ? *proto->memory_pressure ? "yes" : "no" : "NI",
 		   proto->max_header,
 		   proto->slab == NULL ? "no" : "yes",
-- 
cgit v1.2.3


From 369cf77a6a3e41b1110506ddf43d45804103bfde Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@infradead.org>
Date: Thu, 11 Nov 2010 15:47:59 +0000
Subject: rtnetlink: Fix message size calculation for link messages

nlmsg_total_size() calculates the length of a netlink message
including header and alignment. nla_total_size() calculates the
space an individual attribute consumes which was meant to be used
in this context.

Also, ensure to account for the attribute header for the
IFLA_INFO_XSTATS attribute as implementations of get_xstats_size()
seem to assume that we do so.

The addition of two message headers minus the missing attribute
header resulted in a calculated message size that was larger than
required. Therefore we never risked running out of skb tailroom.

Signed-off-by: Thomas Graf <tgraf@infradead.org>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'net/core')

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8121268ddbd..841c287ef40 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -347,16 +347,17 @@ static size_t rtnl_link_get_size(const struct net_device *dev)
 	if (!ops)
 		return 0;
 
-	size = nlmsg_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
-	       nlmsg_total_size(strlen(ops->kind) + 1);	 /* IFLA_INFO_KIND */
+	size = nla_total_size(sizeof(struct nlattr)) + /* IFLA_LINKINFO */
+	       nla_total_size(strlen(ops->kind) + 1);  /* IFLA_INFO_KIND */
 
 	if (ops->get_size)
 		/* IFLA_INFO_DATA + nested data */
-		size += nlmsg_total_size(sizeof(struct nlattr)) +
+		size += nla_total_size(sizeof(struct nlattr)) +
 			ops->get_size(dev);
 
 	if (ops->get_xstats_size)
-		size += ops->get_xstats_size(dev);	/* IFLA_INFO_XSTATS */
+		/* IFLA_INFO_XSTATS */
+		size += nla_total_size(ops->get_xstats_size(dev));
 
 	return size;
 }
-- 
cgit v1.2.3


From 7d8e76bf9ac3604897f0ce12e8bf09b68c2a2c89 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 16 Nov 2010 19:42:53 +0000
Subject: net: zero kobject in rx_queue_release

netif_set_real_num_rx_queues() can decrement and increment
the number of rx queues. For example ixgbe does this as
features and offloads are toggled. Presumably this could
also happen across down/up on most devices if the available
resources changed (cpu offlined).

The kobject needs to be zero'd in this case so that the
state is not preserved across kobject_put()/kobject_init_and_add().

This resolves the following error report.

ixgbe 0000:03:00.0: eth2: NIC Link is Up 10 Gbps, Flow Control: RX/TX
kobject (ffff880324b83210): tried to init an initialized object, something is seriously wrong.
Pid: 1972, comm: lldpad Not tainted 2.6.37-rc18021qaz+ #169
Call Trace:
 [<ffffffff8121c940>] kobject_init+0x3a/0x83
 [<ffffffff8121cf77>] kobject_init_and_add+0x23/0x57
 [<ffffffff8107b800>] ? mark_lock+0x21/0x267
 [<ffffffff813c6d11>] net_rx_queue_update_kobjects+0x63/0xc6
 [<ffffffff813b5e0e>] netif_set_real_num_rx_queues+0x5f/0x78
 [<ffffffffa0261d49>] ixgbe_set_num_queues+0x1c6/0x1ca [ixgbe]
 [<ffffffffa0262509>] ixgbe_init_interrupt_scheme+0x1e/0x79c [ixgbe]
 [<ffffffffa0274596>] ixgbe_dcbnl_set_state+0x167/0x189 [ixgbe]

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'net/core')

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a5ff5a89f37..7f902cad10f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -712,15 +712,21 @@ static void rx_queue_release(struct kobject *kobj)
 
 
 	map = rcu_dereference_raw(queue->rps_map);
-	if (map)
+	if (map) {
+		RCU_INIT_POINTER(queue->rps_map, NULL);
 		call_rcu(&map->rcu, rps_map_release);
+	}
 
 	flow_table = rcu_dereference_raw(queue->rps_flow_table);
-	if (flow_table)
+	if (flow_table) {
+		RCU_INIT_POINTER(queue->rps_flow_table, NULL);
 		call_rcu(&flow_table->rcu, rps_dev_flow_table_release);
+	}
 
 	if (atomic_dec_and_test(&first->count))
 		kfree(first);
+	else
+		memset(kobj, 0, sizeof(*kobj));
 }
 
 static struct kobj_type rx_queue_ktype = {
-- 
cgit v1.2.3


From 0302b8622ce696af1cda22fcf207d3793350e896 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 18 Nov 2010 13:02:37 +0000
Subject: net: fix kernel-doc for sk_filter_rcu_release

Fix kernel-doc warning for sk_filter_rcu_release():

Warning(net/core/filter.c:586): missing initial short description on line:
 * 	sk_filter_rcu_release: Release a socket filter by rcu_head

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc:	"David S. Miller" <davem@davemloft.net>
Cc:	netdev@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'net/core')

diff --git a/net/core/filter.c b/net/core/filter.c
index 23e9b2a6b4c..c1ee800bc08 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -589,7 +589,7 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 EXPORT_SYMBOL(sk_chk_filter);
 
 /**
- * 	sk_filter_rcu_release: Release a socket filter by rcu_head
+ * 	sk_filter_rcu_release - Release a socket filter by rcu_head
  *	@rcu: rcu_head that contains the sk_filter to free
  */
 static void sk_filter_rcu_release(struct rcu_head *rcu)
-- 
cgit v1.2.3


From 7a1c8e5ab120a5f352e78bbc1fa5bb64e6f23639 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 20 Nov 2010 07:46:35 +0000
Subject: net: allow GFP_HIGHMEM in __vmalloc()

We forgot to use __GFP_HIGHMEM in several __vmalloc() calls.

In ceph, add the missing flag.

In fib_trie.c, xfrm_hash.c and request_sock.c, using vzalloc() is
cleaner and allows using HIGHMEM pages as well.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/request_sock.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'net/core')

diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 7552495aff7..fceeb37d716 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -45,9 +45,7 @@ int reqsk_queue_alloc(struct request_sock_queue *queue,
 	nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
 	lopt_size += nr_table_entries * sizeof(struct request_sock *);
 	if (lopt_size > PAGE_SIZE)
-		lopt = __vmalloc(lopt_size,
-			GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
-			PAGE_KERNEL);
+		lopt = vzalloc(lopt_size);
 	else
 		lopt = kzalloc(lopt_size, GFP_KERNEL);
 	if (lopt == NULL)
-- 
cgit v1.2.3


From 46bcf14f44d8f31ecfdc8b6708ec15a3b33316d9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 6 Dec 2010 09:29:43 -0800
Subject: filter: fix sk_filter rcu handling

Pavel Emelyanov tried to fix a race between sk_filter_(de|at)tach and
sk_clone() in commit 47e958eac280c263397

Problem is we can have several clones sharing a common sk_filter, and
these clones might want to sk_filter_attach() their own filters at the
same time, and can overwrite old_filter->rcu, corrupting RCU queues.

We can not use filter->rcu without being sure no other thread could do
the same thing.

Switch code to a more conventional ref-counting technique : Do the
atomic decrement immediately and queue one rcu call back when last
reference is released.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/filter.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'net/core')

diff --git a/net/core/filter.c b/net/core/filter.c
index c1ee800bc08..ae21a0d3c4a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -589,23 +589,16 @@ int sk_chk_filter(struct sock_filter *filter, int flen)
 EXPORT_SYMBOL(sk_chk_filter);
 
 /**
- * 	sk_filter_rcu_release - Release a socket filter by rcu_head
+ * 	sk_filter_release_rcu - Release a socket filter by rcu_head
  *	@rcu: rcu_head that contains the sk_filter to free
  */
-static void sk_filter_rcu_release(struct rcu_head *rcu)
+void sk_filter_release_rcu(struct rcu_head *rcu)
 {
 	struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu);
 
-	sk_filter_release(fp);
-}
-
-static void sk_filter_delayed_uncharge(struct sock *sk, struct sk_filter *fp)
-{
-	unsigned int size = sk_filter_len(fp);
-
-	atomic_sub(size, &sk->sk_omem_alloc);
-	call_rcu_bh(&fp->rcu, sk_filter_rcu_release);
+	kfree(fp);
 }
+EXPORT_SYMBOL(sk_filter_release_rcu);
 
 /**
  *	sk_attach_filter - attach a socket filter
@@ -649,7 +642,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	rcu_assign_pointer(sk->sk_filter, fp);
 
 	if (old_fp)
-		sk_filter_delayed_uncharge(sk, old_fp);
+		sk_filter_uncharge(sk, old_fp);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(sk_attach_filter);
@@ -663,7 +656,7 @@ int sk_detach_filter(struct sock *sk)
 					   sock_owned_by_user(sk));
 	if (filter) {
 		rcu_assign_pointer(sk->sk_filter, NULL);
-		sk_filter_delayed_uncharge(sk, filter);
+		sk_filter_uncharge(sk, filter);
 		ret = 0;
 	}
 	return ret;
-- 
cgit v1.2.3


From a19faf0250e09b16cac169354126404bc8aa342b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 5 Dec 2010 18:50:32 +0000
Subject: net: fix skb_defer_rx_timestamp()

After commit c1f19b51d1d8 (net: support time stamping in phy devices.),
kernel might crash if CONFIG_NETWORK_PHY_TIMESTAMPING=y and
skb_defer_rx_timestamp() handles a packet without an ethernet header.

Fixes kernel bugzilla #24102

Reference: https://bugzilla.kernel.org/show_bug.cgi?id=24102
Reported-and-tested-by: Andrew Watts <akwatts@ymail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/timestamping.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'net/core')

diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index 0ae6c22da85..c19bb4ee405 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -96,11 +96,13 @@ bool skb_defer_rx_timestamp(struct sk_buff *skb)
 	struct phy_device *phydev;
 	unsigned int type;
 
-	skb_push(skb, ETH_HLEN);
+	if (skb_headroom(skb) < ETH_HLEN)
+		return false;
+	__skb_push(skb, ETH_HLEN);
 
 	type = classify(skb);
 
-	skb_pull(skb, ETH_HLEN);
+	__skb_pull(skb, ETH_HLEN);
 
 	switch (type) {
 	case PTP_CLASS_V1_IPV4:
-- 
cgit v1.2.3