aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/netfilter/arp_tables.c125
-rw-r--r--net/ipv4/netfilter/ip_tables.c126
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c3
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/tcp.c6
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/ipv4/udp.c3
8 files changed, 83 insertions, 197 deletions
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5ba533d234d..831fe1879dc 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,9 +253,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -273,6 +273,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len);
+
ADD_COUNTER(e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -328,8 +329,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
-
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
if (hotdrop)
return NF_DROP;
@@ -711,9 +711,12 @@ static void get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
* with data used by 'current' CPU
- * We dont care about preemption here.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
ARPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -726,73 +729,22 @@ static void get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-}
-
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct arpt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- ARPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-static inline int
-zero_entry_counter(struct arpt_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters *alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care
@@ -802,30 +754,11 @@ static struct xt_counters *alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
-
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
+ return ERR_PTR(-ENOMEM);
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int copy_entries_to_user(unsigned int total_size,
@@ -1094,8 +1027,9 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1165,10 +1099,23 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct arpt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
static int do_add_counters(struct net *net, void __user *user, unsigned int len,
int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1224,26 +1171,26 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
goto free;
}
- mutex_lock(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[smp_processor_id()];
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_info_wrlock(curcpu);
ARPT_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
unlock_up_free:
- mutex_unlock(&t->lock);
-
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 810c0b62c7d..2ec8d7290c4 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -338,10 +338,9 @@ ipt_do_table(struct sk_buff *skb,
tgpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -436,8 +435,7 @@ ipt_do_table(struct sk_buff *skb,
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
-
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -896,10 +894,13 @@ get_counters(const struct xt_table_info *t,
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
- * with data used by 'current' CPU
- * We dont care about preemption here.
+ * with data used by 'current' CPU.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
IPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -912,74 +913,22 @@ get_counters(const struct xt_table_info *t,
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- IPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-
-static inline int
-zero_entry_counter(struct ipt_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters * alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -988,30 +937,11 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
+ return ERR_PTR(-ENOMEM);
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
-
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int
@@ -1306,8 +1236,9 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1377,11 +1308,23 @@ do_replace(struct net *net, void __user *user, unsigned int len)
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
static int
do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1437,25 +1380,26 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
goto free;
}
- mutex_lock(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_info_wrlock(curcpu);
IPT_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
unlock_up_free:
- mutex_unlock(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe65187810f..3229e0a81ba 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -211,7 +211,8 @@ find_best_ips_proto(struct nf_conntrack_tuple *tuple,
minip = ntohl(range->min_ip);
maxip = ntohl(range->max_ip);
j = jhash_2words((__force u32)tuple->src.u3.ip,
- (__force u32)tuple->dst.u3.ip, 0);
+ range->flags & IP_NAT_RANGE_PERSISTENT ?
+ (__force u32)tuple->dst.u3.ip : 0, 0);
j = ((u64)j * (maxip - minip + 1)) >> 32;
*var_ipp = htonl(minip + j);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index c40debe51b3..c4c60e9f068 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3397,7 +3397,7 @@ int __init ip_rt_init(void)
0,
&rt_hash_log,
&rt_hash_mask,
- 0);
+ rhash_entries ? 0 : 512 * 1024);
memset(rt_hash_table, 0, (rt_hash_mask + 1) * sizeof(struct rt_hash_bucket));
rt_hash_lock_init();
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index fafbec8b073..1d7f49c6f0c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2511,6 +2511,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
struct sk_buff *p;
struct tcphdr *th;
struct tcphdr *th2;
+ unsigned int len;
unsigned int thlen;
unsigned int flags;
unsigned int mss = 1;
@@ -2531,6 +2532,7 @@ struct sk_buff **tcp_gro_receive(struct sk_buff **head, struct sk_buff *skb)
skb_gro_pull(skb, thlen);
+ len = skb_gro_len(skb);
flags = tcp_flag_word(th);
for (; (p = *head); head = &p->next) {
@@ -2561,7 +2563,7 @@ found:
mss = skb_shinfo(p)->gso_size;
- flush |= (skb_gro_len(skb) > mss) | !skb_gro_len(skb);
+ flush |= (len > mss) | !len;
flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq);
if (flush || skb_gro_receive(head, skb)) {
@@ -2574,7 +2576,7 @@ found:
tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH);
out_check_final:
- flush = skb_gro_len(skb) < mss;
+ flush = len < mss;
flush |= flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST |
TCP_FLAG_SYN | TCP_FLAG_FIN);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2bc8e27a163..eec3e6f9956 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -597,16 +597,6 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
tcp_grow_window(sk, skb);
}
-static u32 tcp_rto_min(struct sock *sk)
-{
- struct dst_entry *dst = __sk_dst_get(sk);
- u32 rto_min = TCP_RTO_MIN;
-
- if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
- rto_min = dst_metric_rtt(dst, RTAX_RTO_MIN);
- return rto_min;
-}
-
/* Called to compute a smoothed rtt estimate. The data fed to this
* routine either comes from timestamps, or from segments that were
* known _not_ to have been retransmitted [see Karn/Partridge
@@ -928,6 +918,8 @@ static void tcp_init_metrics(struct sock *sk)
tcp_set_rto(sk);
if (inet_csk(sk)->icsk_rto < TCP_TIMEOUT_INIT && !tp->rx_opt.saw_tstamp)
goto reset;
+
+cwnd:
tp->snd_cwnd = tcp_init_cwnd(tp, dst);
tp->snd_cwnd_stamp = tcp_time_stamp;
return;
@@ -942,6 +934,7 @@ reset:
tp->mdev = tp->mdev_max = tp->rttvar = TCP_TIMEOUT_INIT;
inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
}
+ goto cwnd;
}
static void tcp_update_reordering(struct sock *sk, const int metric,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 53300fa2359..59aec609cec 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -778,7 +778,7 @@ static void tcp_adjust_pcount(struct sock *sk, struct sk_buff *skb, int decr)
if (tp->lost_skb_hint &&
before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) &&
- (tcp_is_fack(tp) || TCP_SKB_CB(skb)->sacked))
+ (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)))
tp->lost_cnt_hint -= decr;
tcp_verify_left_out(tp);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bda08a09357..7a1d1ce22e6 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -222,7 +222,7 @@ fail:
return error;
}
-int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
+static int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
@@ -1823,7 +1823,6 @@ EXPORT_SYMBOL(udp_lib_getsockopt);
EXPORT_SYMBOL(udp_lib_setsockopt);
EXPORT_SYMBOL(udp_poll);
EXPORT_SYMBOL(udp_lib_get_port);
-EXPORT_SYMBOL(ipv4_rcv_saddr_equal);
#ifdef CONFIG_PROC_FS
EXPORT_SYMBOL(udp_proc_register);