aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
authorJens Axboe <jens.axboe@oracle.com>2009-05-22 20:25:34 +0200
committerJens Axboe <jens.axboe@oracle.com>2009-05-22 20:25:34 +0200
commite4b636366c00738b9609cda307014d71b1225b7f (patch)
tree760b67b3624eda62e943e48ce93635c30a5b47bf /net/ipv4
parentb9ed7252d219c1c663944bf03846eabb515dbe75 (diff)
parent279e677faa775ad16e75c32e1bf4a37f8158bc61 (diff)
Merge branch 'master' into for-2.6.31
Conflicts: drivers/block/hd.c drivers/block/mg_disk.c Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/Kconfig2
-rw-r--r--net/ipv4/netfilter/arp_tables.c125
-rw-r--r--net/ipv4/netfilter/ip_tables.c126
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp_input.c10
5 files changed, 73 insertions, 192 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index b2cf91e4cca..9d26a3da37e 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -407,7 +407,7 @@ config INET_XFRM_MODE_BEET
If unsure, say Y.
config INET_LRO
- tristate "Large Receive Offload (ipv4/tcp)"
+ bool "Large Receive Offload (ipv4/tcp)"
---help---
Support for Large Receive Offload (ipv4/tcp).
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5ba533d234d..831fe1879dc 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len);
+
ADD_COUNTER(e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
-
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
if (hotdrop)
return NF_DROP;
@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
* with data used by 'current' CPU
- * We dont care about preemption here.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
ARPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-}
-
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct arpt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- ARPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-static inline int
-zero_entry_counter(struct arpt_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters *alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care
@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
-
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
+ return ERR_PTR(-ENOMEM);
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int copy_entries_to_user(unsigned int total_size,
@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct arpt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
static int do_add_counters(struct net *net, void __user *user, unsigned int len,
int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
goto free;
}
- mutex_lock(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[smp_processor_id()];
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_info_wrlock(curcpu);
ARPT_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
unlock_up_free:
- mutex_unlock(&t->lock);
-
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 810c0b62c7d..2ec8d7290c4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb,
tgpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb,
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
-
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
- * with data used by 'current' CPU
- * We dont care about preemption here.
+ * with data used by 'current' CPU.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
IPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- IPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-
-static inline int
-zero_entry_counter(struct ipt_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters * alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
+ return ERR_PTR(-ENOMEM);
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
-
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int
@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len)
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
static int
do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
goto free;
}
- mutex_lock(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_info_wrlock(curcpu);
IPT_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
unlock_up_free:
- mutex_unlock(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c40debe51b3..c4c60e9f068 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3397,7 +3397,7 @@ int __init ip_rt_init(void)
0,
&rt_hash_log,
&rt_hash_mask,
- 0);
+ rhash_entries ? 0 : 512 * 1024);
memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
rt_hash_lock_init();
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c96a6bb2543..eec3e6f9956 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_grow_window(sk, skb);
}
-static u32 tcp_rto_min(struct sock *sk)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = TCP_RTO_MIN;
-
- if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
- rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
- return rto_min;
-}
-
/* Called to compute a smoothed rtt estimate. The data fed to this
* routine either comes from timestamps, or from segments that were
* known _not_ to have been retransmitted [see Karn/Partridge