aboutsummaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorEric Dumazet <edumazet@google.com>2014-06-02 05:26:03 -0700
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-08-14 09:24:15 +0800
commitff1f69a89a613223c57c13190a6c9be928ac4b9d (patch)
treead430caf7c9cc77a23f562ab5a625213280dbf20 /include
parentf5980d099295d4113635ded88425834a38d17125 (diff)
inetpeer: get rid of ip_id_count
[ Upstream commit 73f156a6e8c1074ac6327e0abd1169e95eb66463 ] Ideally, we would need to generate IP ID using a per destination IP generator. linux kernels used inet_peer cache for this purpose, but this had a huge cost on servers disabling MTU discovery. 1) each inet_peer struct consumes 192 bytes 2) inetpeer cache uses a binary tree of inet_peer structs, with a nominal size of ~66000 elements under load. 3) lookups in this tree are hitting a lot of cache lines, as tree depth is about 20. 4) If server deals with many tcp flows, we have a high probability of not finding the inet_peer, allocating a fresh one, inserting it in the tree with same initial ip_id_count, (cf secure_ip_id()) 5) We garbage collect inet_peer aggressively. IP ID generation do not have to be 'perfect' Goal is trying to avoid duplicates in a short period of time, so that reassembly units have a chance to complete reassembly of fragments belonging to one message before receiving other fragments with a recycled ID. We simply use an array of generators, and a Jenkin hash using the dst IP as a key. ipv6_select_ident() is put back into net/ipv6/ip6_output.c where it belongs (it is only used from this file) secure_ip_id() and secure_ipv6_id() no longer are needed. Rename ip_select_ident_more() to ip_select_ident_segs() to avoid unnecessary decrement/increment of the number of segments. Signed-off-by: Eric Dumazet <edumazet@google.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'include')
-rw-r--r--include/net/inetpeer.h16
-rw-r--r--include/net/ip.h40
-rw-r--r--include/net/ipv6.h11
-rw-r--r--include/net/secure_seq.h2
4 files changed, 33 insertions, 36 deletions
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 6ca347a0717e..bb06fd26a7bd 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -41,14 +41,13 @@ struct inet_peer {
struct rcu_head gc_rcu;
};
/*
- * Once inet_peer is queued for deletion (refcnt == -1), following fields
- * are not available: rid, ip_id_count
+ * Once inet_peer is queued for deletion (refcnt == -1), following field
+ * is not available: rid
* We can share memory with rcu_head to help keep inet_peer small.
*/
union {
struct {
atomic_t rid; /* Frag reception counter */
- atomic_t ip_id_count; /* IP ID for the next packet */
};
struct rcu_head rcu;
struct inet_peer *gc_next;
@@ -166,7 +165,7 @@ extern void inetpeer_invalidate_tree(struct inet_peer_base *);
extern void inetpeer_invalidate_family(int family);
/*
- * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+ * temporary check to make sure we dont access rid, tcp_ts,
* tcp_ts_stamp if no refcount is taken on inet_peer
*/
static inline void inet_peer_refcheck(const struct inet_peer *p)
@@ -174,13 +173,4 @@ static inline void inet_peer_refcheck(const struct inet_peer *p)
WARN_ON_ONCE(atomic_read(&p->refcnt) <= 0);
}
-
-/* can be called with or without local BH being disabled */
-static inline int inet_getid(struct inet_peer *p, int more)
-{
- more++;
- inet_peer_refcheck(p);
- return atomic_add_return(more, &p->ip_id_count) - more;
-}
-
#endif /* _NET_INETPEER_H */
diff --git a/include/net/ip.h b/include/net/ip.h
index 788f1d8a796f..dd72c8f93797 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -252,9 +252,19 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
!(dst_metric_locked(dst, RTAX_MTU)));
}
-extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
+#define IP_IDENTS_SZ 2048u
+extern atomic_t *ip_idents;
-static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk)
+static inline u32 ip_idents_reserve(u32 hash, int segs)
+{
+ atomic_t *id_ptr = ip_idents + hash % IP_IDENTS_SZ;
+
+ return atomic_add_return(segs, id_ptr) - segs;
+}
+
+void __ip_select_ident(struct iphdr *iph, int segs);
+
+static inline void ip_select_ident_segs(struct sk_buff *skb, struct sock *sk, int segs)
{
struct iphdr *iph = ip_hdr(skb);
@@ -264,24 +274,20 @@ static inline void ip_select_ident(struct sk_buff *skb, struct dst_entry *dst, s
* does not change, they drop every other packet in
* a TCP stream using header compression.
*/
- iph->id = (sk && inet_sk(sk)->inet_daddr) ?
- htons(inet_sk(sk)->inet_id++) : 0;
- } else
- __ip_select_ident(iph, dst, 0);
-}
-
-static inline void ip_select_ident_more(struct sk_buff *skb, struct dst_entry *dst, struct sock *sk, int more)
-{
- struct iphdr *iph = ip_hdr(skb);
-
- if ((iph->frag_off & htons(IP_DF)) && !skb->local_df) {
if (sk && inet_sk(sk)->inet_daddr) {
iph->id = htons(inet_sk(sk)->inet_id);
- inet_sk(sk)->inet_id += 1 + more;
- } else
+ inet_sk(sk)->inet_id += segs;
+ } else {
iph->id = 0;
- } else
- __ip_select_ident(iph, dst, more);
+ }
+ } else {
+ __ip_select_ident(iph, segs);
+ }
+}
+
+static inline void ip_select_ident(struct sk_buff *skb, struct sock *sk)
+{
+ ip_select_ident_segs(skb, sk, 1);
}
/*
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9e093fc33dab..087370ff05f1 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -530,14 +530,19 @@ static inline u32 ipv6_addr_hash(const struct in6_addr *a)
}
/* more secured version of ipv6_addr_hash() */
-static inline u32 ipv6_addr_jhash(const struct in6_addr *a)
+static inline u32 __ipv6_addr_jhash(const struct in6_addr *a, const u32 initval)
{
u32 v = (__force u32)a->s6_addr32[0] ^ (__force u32)a->s6_addr32[1];
return jhash_3words(v,
(__force u32)a->s6_addr32[2],
(__force u32)a->s6_addr32[3],
- ipv6_hash_secret);
+ initval);
+}
+
+static inline u32 ipv6_addr_jhash(const struct in6_addr *a)
+{
+ return __ipv6_addr_jhash(a, ipv6_hash_secret);
}
static inline bool ipv6_addr_loopback(const struct in6_addr *a)
@@ -649,8 +654,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
}
-extern void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
-
/*
* Header manipulation
*/
diff --git a/include/net/secure_seq.h b/include/net/secure_seq.h
index c2e542b27a5a..b1c3d1c63c4e 100644
--- a/include/net/secure_seq.h
+++ b/include/net/secure_seq.h
@@ -3,8 +3,6 @@
#include <linux/types.h>
-extern __u32 secure_ip_id(__be32 daddr);
-extern __u32 secure_ipv6_id(const __be32 daddr[4]);
extern u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport);
extern u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport);