aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c6
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/skbuff.c1
-rw-r--r--net/core/sock.c3
-rw-r--r--net/core/sock_reuseport.c12
-rw-r--r--net/dccp/ipv4.c13
-rw-r--r--net/dsa/Kconfig5
-rw-r--r--net/ipv4/ah4.c3
-rw-r--r--net/ipv4/cipso_ipv4.c24
-rw-r--r--net/ipv4/gre_offload.c2
-rw-r--r--net/ipv4/inet_connection_sock.c9
-rw-r--r--net/ipv4/inet_hashtables.c5
-rw-r--r--net/ipv4/ipip.c59
-rw-r--r--net/ipv4/syncookies.c2
-rw-r--r--net/ipv4/tcp.c18
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/tcp_ipv4.c21
-rw-r--r--net/ipv4/tcp_nv.c2
-rw-r--r--net/ipv4/tcp_output.c12
-rw-r--r--net/ipv4/udp.c5
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/addrconf.c1
-rw-r--r--net/ipv6/ip6_flowlabel.c1
-rw-r--r--net/ipv6/ip6_gre.c20
-rw-r--r--net/ipv6/ip6_offload.c2
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/l2tp/l2tp_ppp.c3
-rw-r--r--net/mac80211/key.c54
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_nat_core.c146
-rw-r--r--net/netfilter/nft_meta.c28
-rw-r--r--net/netlink/af_netlink.c30
-rw-r--r--net/netlink/af_netlink.h1
-rw-r--r--net/packet/af_packet.c24
-rw-r--r--net/sched/sch_api.c2
-rw-r--r--net/sctp/input.c2
-rw-r--r--net/sctp/ipv6.c11
-rw-r--r--net/sctp/socket.c36
-rw-r--r--net/unix/diag.c2
39 files changed, 357 insertions, 224 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 8d213f974448..4a47074d1d7f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -376,6 +376,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
dev->name);
vlan_vid_add(dev, htons(ETH_P_8021Q), 0);
}
+ if (event == NETDEV_DOWN &&
+ (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
+ vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
vlan_info = rtnl_dereference(dev->vlan_info);
if (!vlan_info)
@@ -423,9 +426,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
struct net_device *tmp;
LIST_HEAD(close_list);
- if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)
- vlan_vid_del(dev, htons(ETH_P_8021Q), 0);
-
/* Put all VLANs for this dev in the down state too. */
vlan_group_for_each_dev(grp, i, vlandev) {
flgs = vlandev->flags;
diff --git a/net/core/dev.c b/net/core/dev.c
index cfed44b8e175..3235360cd9a4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1117,9 +1117,8 @@ static int dev_alloc_name_ns(struct net *net,
return ret;
}
-static int dev_get_valid_name(struct net *net,
- struct net_device *dev,
- const char *name)
+int dev_get_valid_name(struct net *net, struct net_device *dev,
+ const char *name)
{
BUG_ON(!net);
@@ -1135,6 +1134,7 @@ static int dev_get_valid_name(struct net *net,
return 0;
}
+EXPORT_SYMBOL(dev_get_valid_name);
/**
* dev_change_name - change name of a device
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 9fa6bea3dd3f..d4cb8bfdb83c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4390,6 +4390,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
if (!xnet)
return;
+ ipvs_reset(skb);
skb_orphan(skb);
skb->mark = 0;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index d8e0aed19903..8d15848c3a22 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1526,6 +1526,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
sock_reset_flag(newsk, SOCK_DONE);
+ cgroup_sk_alloc(&newsk->sk_cgrp_data);
skb_queue_head_init(&newsk->sk_error_queue);
filter = rcu_dereference_protected(newsk->sk_filter, 1);
@@ -1560,8 +1561,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
atomic64_set(&newsk->sk_cookie, 0);
mem_cgroup_sk_alloc(newsk);
- cgroup_sk_alloc(&newsk->sk_cgrp_data);
-
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c
index 9a1a352fd1eb..77f396b679ce 100644
--- a/net/core/sock_reuseport.c
+++ b/net/core/sock_reuseport.c
@@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk)
* soft irq of receive path or setsockopt from process context
*/
spin_lock_bh(&reuseport_lock);
- WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
- lockdep_is_held(&reuseport_lock)),
- "multiple allocations for the same socket");
+
+ /* Allocation attempts can occur concurrently via the setsockopt path
+ * and the bind/hash path. Nothing to do when we lose the race.
+ */
+ if (rcu_dereference_protected(sk->sk_reuseport_cb,
+ lockdep_is_held(&reuseport_lock)))
+ goto out;
+
reuse = __reuseport_alloc(INIT_SOCKS);
if (!reuse) {
spin_unlock_bh(&reuseport_lock);
@@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk)
reuse->num_socks = 1;
rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
+out:
spin_unlock_bh(&reuseport_lock);
return 0;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 8fc160098e11..8c7799cdd3cf 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newinet->inet_saddr = ireq->ir_loc_addr;
- newinet->inet_opt = ireq->opt;
- ireq->opt = NULL;
+ RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->inet_id = jiffies;
@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
-
+ if (*own_req)
+ ireq->ireq_opt = NULL;
+ else
+ newinet->inet_opt = NULL;
return newsk;
exit_overflow:
@@ -441,6 +443,7 @@ exit:
__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
dccp_done(newsk);
goto exit;
@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req
ireq->ir_rmt_addr);
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -548,7 +551,7 @@ out:
static void dccp_v4_reqsk_destructor(struct request_sock *req)
{
dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
void dccp_syn_ack_timeout(const struct request_sock *req)
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 96e47c539bee..39bb5b3a82f2 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,12 +1,13 @@
config HAVE_NET_DSA
def_bool y
- depends on NETDEVICES && !S390
+ depends on INET && NETDEVICES && !S390
# Drivers must select NET_DSA and the appropriate tagging format
config NET_DSA
tristate "Distributed Switch Architecture"
- depends on HAVE_NET_DSA && NET_SWITCHDEV
+ depends on HAVE_NET_DSA
+ select NET_SWITCHDEV
select PHYLIB
---help---
Say Y if you want to enable support for the hardware switches supported
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index f2a71025a770..22377c8ff14b 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -270,6 +270,9 @@ static void ah_input_done(struct crypto_async_request *base, int err)
int ihl = ip_hdrlen(skb);
int ah_hlen = (ah->hdrlen + 2) << 2;
+ if (err)
+ goto out;
+
work_iph = AH_SKB_CB(skb)->tmp;
auth_data = ah_tmp_auth(work_iph, ihl);
icv = ah_tmp_icv(ahp->ahash, auth_data, ahp->icv_trunc_len);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index ae206163c273..972353cd1778 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1943,7 +1943,7 @@ int cipso_v4_req_setattr(struct request_sock *req,
buf = NULL;
req_inet = inet_rsk(req);
- opt = xchg(&req_inet->opt, opt);
+ opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
if (opt)
kfree_rcu(opt, rcu);
@@ -1965,11 +1965,13 @@ req_setattr_failure:
* values on failure.
*
*/
-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
+static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
{
+ struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
int hdr_delta = 0;
- struct ip_options_rcu *opt = *opt_ptr;
+ if (!opt || opt->opt.cipso == 0)
+ return 0;
if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
u8 cipso_len;
u8 cipso_off;
@@ -2031,14 +2033,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
*/
void cipso_v4_sock_delattr(struct sock *sk)
{
- int hdr_delta;
- struct ip_options_rcu *opt;
struct inet_sock *sk_inet;
+ int hdr_delta;
sk_inet = inet_sk(sk);
- opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
- if (!opt || opt->opt.cipso == 0)
- return;
hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
if (sk_inet->is_icsk && hdr_delta > 0) {
@@ -2058,15 +2056,7 @@ void cipso_v4_sock_delattr(struct sock *sk)
*/
void cipso_v4_req_delattr(struct request_sock *req)
{
- struct ip_options_rcu *opt;
- struct inet_request_sock *req_inet;
-
- req_inet = inet_rsk(req);
- opt = req_inet->opt;
- if (!opt || opt->opt.cipso == 0)
- return;
-
- cipso_v4_delopt(&req_inet->opt);
+ cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
}
/**
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d5cac99170b1..8c72034df28e 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -98,7 +98,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
greh = (struct gre_base_hdr *)skb_transport_header(skb);
pcsum = (__sum16 *)(greh + 1);
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
unsigned int partial_adj;
/* Adjust checksum to account for the fact that
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index cf3d5674846a..d1cab49393e2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -407,9 +407,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk,
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct net *net = read_pnet(&ireq->ireq_net);
- struct ip_options_rcu *opt = ireq->opt;
+ struct ip_options_rcu *opt;
struct rtable *rt;
+ opt = ireq_opt_deref(ireq);
+
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -443,10 +445,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
struct flowi4 *fl4;
struct rtable *rt;
+ opt = rcu_dereference(ireq->ireq_opt);
fl4 = &newinet->cork.fl.u.ip4;
- rcu_read_lock();
- opt = rcu_dereference(newinet->inet_opt);
flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
@@ -459,13 +460,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk,
goto no_route;
if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
goto route_err;
- rcu_read_unlock();
return &rt->dst;
route_err:
ip_rt_put(rt);
no_route:
- rcu_read_unlock();
__IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
return NULL;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index ca97835bfec4..b9bcf3db3af9 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -455,10 +455,7 @@ static int inet_reuseport_add_sock(struct sock *sk,
return reuseport_add_sock(sk, sk2);
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
int __inet_hash(struct sock *sk, struct sock *osk,
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c9392589c415..56d71a004dce 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly;
static int ipip_err(struct sk_buff *skb, u32 info)
{
-
-/* All the routers (except for Linux) return only
- 8 bytes of packet payload. It means, that precise relaying of
- ICMP in the real Internet is absolutely infeasible.
- */
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ */
struct net *net = dev_net(skb->dev);
struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
const struct iphdr *iph = (const struct iphdr *)skb->data;
- struct ip_tunnel *t;
- int err;
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
+ struct ip_tunnel *t;
+ int err = 0;
+
+ switch (type) {
+ case ICMP_DEST_UNREACH:
+ switch (code) {
+ case ICMP_SR_FAILED:
+ /* Impossible event. */
+ goto out;
+ default:
+ /* All others are translated to HOST_UNREACH.
+ * rfc2003 contains "deep thoughts" about NET_UNREACH,
+ * I believe they are just ether pollution. --ANK
+ */
+ break;
+ }
+ break;
+
+ case ICMP_TIME_EXCEEDED:
+ if (code != ICMP_EXC_TTL)
+ goto out;
+ break;
+
+ case ICMP_REDIRECT:
+ break;
+
+ default:
+ goto out;
+ }
- err = -ENOENT;
t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
iph->daddr, iph->saddr, 0);
- if (!t)
+ if (!t) {
+ err = -ENOENT;
goto out;
+ }
if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
- ipv4_update_pmtu(skb, dev_net(skb->dev), info,
- t->parms.link, 0, iph->protocol, 0);
- err = 0;
+ ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
+ iph->protocol, 0);
goto out;
}
if (type == ICMP_REDIRECT) {
- ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
- iph->protocol, 0);
- err = 0;
+ ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
goto out;
}
- if (t->parms.iph.daddr == 0)
+ if (t->parms.iph.daddr == 0) {
+ err = -ENOENT;
goto out;
+ }
- err = 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
goto out;
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index b6f710d515d0..0597ad73a1fa 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -354,7 +354,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
/* We throwed the options of the initial SYN away, so we hope
* the ACK carries the same options again (see RFC1122 4.2.3.8)
*/
- ireq->opt = tcp_v4_save_options(skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
if (security_inet_conn_request(sk, skb, req)) {
reqsk_free(req);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6b3d27e50317..dd33c785ce16 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -431,7 +431,7 @@ EXPORT_SYMBOL(tcp_init_sock);
static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
{
- if (tsflags) {
+ if (tsflags && skb) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -966,10 +966,8 @@ new_segment:
copied += copy;
offset += copy;
size -= copy;
- if (!size) {
- tcp_tx_timestamp(sk, sk->sk_tsflags, skb);
+ if (!size)
goto out;
- }
if (skb->len < size_goal || (flags & MSG_OOB))
continue;
@@ -995,8 +993,11 @@ wait_for_memory:
}
out:
- if (copied && !(flags & MSG_SENDPAGE_NOTLAST))
- tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ if (copied) {
+ tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk));
+ if (!(flags & MSG_SENDPAGE_NOTLAST))
+ tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ }
return copied;
do_error:
@@ -1289,7 +1290,6 @@ new_segment:
copied += copy;
if (!msg_data_left(msg)) {
- tcp_tx_timestamp(sk, sockc.tsflags, skb);
if (unlikely(flags & MSG_EOR))
TCP_SKB_CB(skb)->eor = 1;
goto out;
@@ -1320,8 +1320,10 @@ wait_for_memory:
}
out:
- if (copied)
+ if (copied) {
+ tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk));
tcp_push(sk, flags, mss_now, tp->nonagle, size_goal);
+ }
out_nopush:
release_sock(sk);
return copied + copied_syn;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index c03850771a4e..8fcd0c642742 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6237,7 +6237,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops,
struct inet_request_sock *ireq = inet_rsk(req);
kmemcheck_annotate_bitfield(ireq, flags);
- ireq->opt = NULL;
+ ireq->ireq_opt = NULL;
#if IS_ENABLED(CONFIG_IPV6)
ireq->pktopts = NULL;
#endif
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 672fffcde28c..3336e1534bc5 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -868,7 +868,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
- ireq->opt);
+ ireq_opt_deref(ireq));
err = net_xmit_eval(err);
}
@@ -880,7 +880,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
*/
static void tcp_v4_reqsk_destructor(struct request_sock *req)
{
- kfree(inet_rsk(req)->opt);
+ kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
}
#ifdef CONFIG_TCP_MD5SIG
@@ -1206,7 +1206,7 @@ static void tcp_v4_init_req(struct request_sock *req,
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->opt = tcp_v4_save_options(skb);
+ RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
}
static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
@@ -1302,10 +1302,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
sk_daddr_set(newsk, ireq->ir_rmt_addr);
sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
newsk->sk_bound_dev_if = ireq->ir_iif;
- newinet->inet_saddr = ireq->ir_loc_addr;
- inet_opt = ireq->opt;
- rcu_assign_pointer(newinet->inet_opt, inet_opt);
- ireq->opt = NULL;
+ newinet->inet_saddr = ireq->ir_loc_addr;
+ inet_opt = rcu_dereference(ireq->ireq_opt);
+ RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
newinet->mc_index = inet_iif(skb);
newinet->mc_ttl = ip_hdr(skb)->ttl;
newinet->rcv_tos = ip_hdr(skb)->tos;
@@ -1353,9 +1352,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
if (__inet_inherit_port(sk, newsk) < 0)
goto put_and_exit;
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
- if (*own_req)
+ if (likely(*own_req)) {
tcp_move_syn(newtp, req);
-
+ ireq->ireq_opt = NULL;
+ } else {
+ newinet->inet_opt = NULL;
+ }
return newsk;
exit_overflow:
@@ -1366,6 +1368,7 @@ exit:
tcp_listendrop(sk);
return NULL;
put_and_exit:
+ newinet->inet_opt = NULL;
inet_csk_prepare_forced_close(newsk);
tcp_done(newsk);
goto exit;
diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c
index 5de82a8d4d87..e45e2c41c7bd 100644
--- a/net/ipv4/tcp_nv.c
+++ b/net/ipv4/tcp_nv.c
@@ -263,7 +263,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample)
/* rate in 100's bits per second */
rate64 = ((u64)sample->in_flight) * 8000000;
- rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100));
+ rate = (u32)div64_u64(rate64, (u64)(avg_rtt ?: 1) * 100);
/* Remember the maximum rate seen during this RTT
* Note: It may be more than one RTT. This function should be
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 85920707c4d3..3d7b59ecc76c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1996,6 +1996,7 @@ static int tcp_mtu_probe(struct sock *sk)
nskb->ip_summed = skb->ip_summed;
tcp_insert_write_queue_before(nskb, skb, sk);
+ tcp_highest_sack_replace(sk, skb, nskb);
len = 0;
tcp_for_write_queue_from_safe(skb, next, sk) {
@@ -2535,7 +2536,7 @@ static void tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb)
BUG_ON(tcp_skb_pcount(skb) != 1 || tcp_skb_pcount(next_skb) != 1);
- tcp_highest_sack_combine(sk, next_skb, skb);
+ tcp_highest_sack_replace(sk, next_skb, skb);
tcp_unlink_write_queue(next_skb, sk);
@@ -3109,13 +3110,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
tcp_ecn_make_synack(req, th);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
- /* Setting of flags are superfluous here for callers (and ECE is
- * not even correctly set)
- */
- tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
- TCPHDR_SYN | TCPHDR_ACK);
-
- th->seq = htonl(TCP_SKB_CB(skb)->seq);
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ th->seq = htonl(tcp_rsk(req)->snt_isn);
/* XXX data is queued and acked as is. No buffer/window check */
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4363b1e89bdf..bef4a94ce1a0 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -222,10 +222,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot,
}
}
- /* Initial allocation may have already happened via setsockopt */
- if (!rcu_access_pointer(sk->sk_reuseport_cb))
- return reuseport_alloc(sk);
- return 0;
+ return reuseport_alloc(sk);
}
/**
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0932c85b42af..6401574cd638 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -122,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
* will be using a length value equal to only one MSS sized
* segment instead of the entire frame.
*/
- if (gso_partial) {
+ if (gso_partial && skb_is_gso(skb)) {
uh->len = htons(skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)uh);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cc101b1be903..a4fb90c4819f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3299,6 +3299,7 @@ static void addrconf_permanent_addr(struct net_device *dev)
if ((ifp->flags & IFA_F_PERMANENT) &&
fixup_permanent_addr(idev, ifp) < 0) {
write_unlock_bh(&idev->lock);
+ in6_ifa_hold(ifp);
ipv6_del_addr(ifp);
write_lock_bh(&idev->lock);
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index b912f0dbaf72..b82e439804d1 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
}
opt_space->dst1opt = fopt->dst1opt;
opt_space->opt_flen = fopt->opt_flen;
+ opt_space->tot_len = fopt->tot_len;
return opt_space;
}
EXPORT_SYMBOL_GPL(fl6_merge_options);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 41c10486cf7e..e9b14e3493f2 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
case ICMPV6_DEST_UNREACH:
net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
t->parms.name);
- break;
+ if (code != ICMPV6_PORT_UNREACH)
+ break;
+ return;
case ICMPV6_TIME_EXCEED:
if (code == ICMPV6_EXC_HOPLIMIT) {
net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
t->parms.name);
+ break;
}
- break;
+ return;
case ICMPV6_PARAMPROB:
teli = 0;
if (code == ICMPV6_HDR_FIELD)
@@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
t->parms.name);
}
- break;
+ return;
case ICMPV6_PKT_TOOBIG:
mtu = be32_to_cpu(info) - offset - t->tun_hlen;
if (t->dev->type == ARPHRD_ETHER)
@@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
t->dev->mtu = mtu;
- break;
+ return;
}
if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
@@ -505,8 +508,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
__u32 *pmtu, __be16 proto)
{
struct ip6_tnl *tunnel = netdev_priv(dev);
- __be16 protocol = (dev->type == ARPHRD_ETHER) ?
- htons(ETH_P_TEB) : proto;
+ struct dst_entry *dst = skb_dst(skb);
+ __be16 protocol;
if (dev->type == ARPHRD_ETHER)
IPCB(skb)->flags = 0;
@@ -520,9 +523,14 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
tunnel->o_seqno++;
/* Push GRE header. */
+ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
+ /* TooBig packet may have updated dst->dev's mtu */
+ if (dst && dst_mtu(dst) > dst->dev->mtu)
+ dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
NEXTHDR_GRE);
}
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 424fbe1f8978..649f4d87b318 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
- if (gso_partial)
+ if (gso_partial && skb_is_gso(skb))
payload_len = skb_shinfo(skb)->gso_size +
SKB_GSO_CB(skb)->data_offset +
skb->head - (unsigned char *)(ipv6h + 1);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index e0236e902ea7..6e01c9a8dfd3 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1215,11 +1215,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
if (WARN_ON(v6_cork->opt))
return -EINVAL;
- v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
+ v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
if (unlikely(!v6_cork->opt))
return -ENOBUFS;
- v6_cork->opt->tot_len = opt->tot_len;
+ v6_cork->opt->tot_len = sizeof(*opt);
v6_cork->opt->opt_flen = opt->opt_flen;
v6_cork->opt->opt_nflen = opt->opt_nflen;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 1696f1fd5877..163f1fa53917 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -993,6 +993,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session,
session->name, cmd, arg);
sk = ps->sock;
+ if (!sk)
+ return -EBADR;
+
sock_hold(sk);
switch (cmd) {
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index edd6f2945f69..4c625a325ce2 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright 2015 Intel Deutschland GmbH
+ * Copyright 2015-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -19,6 +19,7 @@
#include <linux/slab.h>
#include <linux/export.h>
#include <net/mac80211.h>
+#include <crypto/algapi.h>
#include <asm/unaligned.h>
#include "ieee80211_i.h"
#include "driver-ops.h"
@@ -608,6 +609,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key)
ieee80211_key_free_common(key);
}
+static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_key *old,
+ struct ieee80211_key *new)
+{
+ u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP];
+ u8 *tk_old, *tk_new;
+
+ if (!old || new->conf.keylen != old->conf.keylen)
+ return false;
+
+ tk_old = old->conf.key;
+ tk_new = new->conf.key;
+
+ /*
+ * In station mode, don't compare the TX MIC key, as it's never used
+ * and offloaded rekeying may not care to send it to the host. This
+ * is the case in iwlwifi, for example.
+ */
+ if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+ new->conf.cipher == WLAN_CIPHER_SUITE_TKIP &&
+ new->conf.keylen == WLAN_KEY_LEN_TKIP &&
+ !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) {
+ memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP);
+ memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP);
+ memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8);
+ tk_old = tkip_old;
+ tk_new = tkip_new;
+ }
+
+ return !crypto_memneq(tk_old, tk_new, new->conf.keylen);
+}
+
int ieee80211_key_link(struct ieee80211_key *key,
struct ieee80211_sub_if_data *sdata,
struct sta_info *sta)
@@ -619,9 +653,6 @@ int ieee80211_key_link(struct ieee80211_key *key,
pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE;
idx = key->conf.keyidx;
- key->local = sdata->local;
- key->sdata = sdata;
- key->sta = sta;
mutex_lock(&sdata->local->key_mtx);
@@ -632,6 +663,20 @@ int ieee80211_key_link(struct ieee80211_key *key,
else
old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]);
+ /*
+ * Silently accept key re-installation without really installing the
+ * new version of the key to avoid nonce reuse or replay issues.
+ */
+ if (ieee80211_key_identical(sdata, old_key, key)) {
+ ieee80211_key_free_unused(key);
+ ret = 0;
+ goto out;
+ }
+
+ key->local = sdata->local;
+ key->sdata = sdata;
+ key->sta = sta;
+
increment_tailroom_need_count(sdata);
ieee80211_key_replace(sdata, sta, pairwise, old_key, key);
@@ -647,6 +692,7 @@ int ieee80211_key_link(struct ieee80211_key *key,
ret = 0;
}
+ out:
mutex_unlock(&sdata->local->key_mtx);
return ret;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index ed9ce7c63252..750b8bf13e60 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -689,7 +689,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb,
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
if (l4proto->allow_clash &&
- !nfct_nat(ct) &&
+ ((ct->status & IPS_NAT_DONE_MASK) == 0) &&
!nf_ct_is_dying(ct) &&
atomic_inc_not_zero(&ct->ct_general.use)) {
nf_ct_acct_merge(ct, ctinfo, (struct nf_conn *)skb->nfct);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2916f4815c9c..624d6e4dcd5c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,19 +30,17 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
+static DEFINE_SPINLOCK(nf_nat_lock);
+
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-struct nf_nat_conn_key {
- const struct net *net;
- const struct nf_conntrack_tuple *tuple;
- const struct nf_conntrack_zone *zone;
-};
-
-static struct rhltable nf_nat_bysource_table;
+static struct hlist_head *nf_nat_bysource __read_mostly;
+static unsigned int nf_nat_htable_size __read_mostly;
+static unsigned int nf_nat_hash_rnd __read_mostly;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -121,17 +119,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
-static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- const struct nf_conntrack_tuple *t;
- const struct nf_conn *ct = data;
+ unsigned int hash;
+
+ get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
+ hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
+ tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- seed ^= net_hash_mix(nf_ct_net(ct));
- return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
- t->dst.protonum ^ seed);
+ return reciprocal_scale(hash, nf_nat_htable_size);
}
/* Is this tuple already taken? (not by us) */
@@ -187,28 +187,6 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
-static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
- const void *obj)
-{
- const struct nf_nat_conn_key *key = arg->key;
- const struct nf_conn *ct = obj;
-
- if (!same_src(ct, key->tuple) ||
- !net_eq(nf_ct_net(ct), key->net) ||
- !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL))
- return 1;
-
- return 0;
-}
-
-static struct rhashtable_params nf_nat_bysource_params = {
- .head_offset = offsetof(struct nf_conn, nat_bysource),
- .obj_hashfn = nf_nat_bysource_hash,
- .obj_cmpfn = nf_nat_bysource_cmp,
- .nelem_hint = 256,
- .min_size = 1024,
-};
-
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
@@ -219,26 +197,22 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
+ unsigned int h = hash_by_src(net, tuple);
const struct nf_conn *ct;
- struct nf_nat_conn_key key = {
- .net = net,
- .tuple = tuple,
- .zone = zone
- };
- struct rhlist_head *hl, *h;
-
- hl = rhltable_lookup(&nf_nat_bysource_table, &key,
- nf_nat_bysource_params);
- rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) {
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(l3proto, l4proto, result, range))
- return 1;
+ hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) {
+ if (same_src(ct, tuple) &&
+ net_eq(net, nf_ct_net(ct)) &&
+ nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
+ /* Copy source part from reply tuple. */
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ if (in_range(l3proto, l4proto, result, range))
+ return 1;
+ }
}
-
return 0;
}
@@ -411,6 +385,7 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
+ struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
@@ -452,19 +427,16 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
- struct nf_nat_conn_key key = {
- .net = nf_ct_net(ct),
- .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- .zone = nf_ct_zone(ct),
- };
- int err;
-
- err = rhltable_insert_key(&nf_nat_bysource_table,
- &key,
- &ct->nat_bysource,
- nf_nat_bysource_params);
- if (err)
- return NF_DROP;
+ unsigned int srchash;
+
+ srchash = hash_by_src(net,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ spin_lock_bh(&nf_nat_lock);
+ /* nf_conntrack_alter_reply might re-allocate extension aera */
+ nat = nfct_nat(ct);
+ hlist_add_head_rcu(&ct->nat_bysource,
+ &nf_nat_bysource[srchash]);
+ spin_unlock_bh(&nf_nat_lock);
}
/* It's done. */
@@ -550,10 +522,6 @@ struct nf_nat_proto_clean {
static int nf_nat_proto_remove(struct nf_conn *i, void *data)
{
const struct nf_nat_proto_clean *clean = data;
- struct nf_conn_nat *nat = nfct_nat(i);
-
- if (!nat)
- return 0;
if ((clean->l3proto && nf_ct_l3num(i) != clean->l3proto) ||
(clean->l4proto && nf_ct_protonum(i) != clean->l4proto))
@@ -564,12 +532,10 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data)
static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
{
- struct nf_conn_nat *nat = nfct_nat(ct);
-
if (nf_nat_proto_remove(ct, data))
return 1;
- if (!nat)
+ if ((ct->status & IPS_SRC_NAT_DONE) == 0)
return 0;
/* This netns is being destroyed, and conntrack has nat null binding.
@@ -578,9 +544,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
* Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack()
* will delete entry from already-freed table.
*/
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&ct->nat_bysource);
ct->status &= ~IPS_NAT_DONE_MASK;
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ spin_unlock_bh(&nf_nat_lock);
/* don't delete conntrack. Although that would make things a lot
* simpler, we'd end up flushing all conntracks on nat rmmod.
@@ -705,13 +672,11 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister);
/* No one using conntrack by the time this called. */
static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
- struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
-
- if (!nat)
- return;
-
- rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource,
- nf_nat_bysource_params);
+ if (ct->status & IPS_SRC_NAT_DONE) {
+ spin_lock_bh(&nf_nat_lock);
+ hlist_del_rcu(&ct->nat_bysource);
+ spin_unlock_bh(&nf_nat_lock);
+ }
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
@@ -846,13 +811,16 @@ static int __init nf_nat_init(void)
{
int ret;
- ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
- if (ret)
- return ret;
+ /* Leave them the same for the moment. */
+ nf_nat_htable_size = nf_conntrack_htable_size;
+
+ nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
+ if (!nf_nat_bysource)
+ return -ENOMEM;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -876,7 +844,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
- rhltable_destroy(&nf_nat_bysource_table);
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -896,8 +864,8 @@ static void __exit nf_nat_cleanup(void)
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
-
- rhltable_destroy(&nf_nat_bysource_table);
+ synchronize_net();
+ nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
}
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 6c1e0246706e..7c3395513ff0 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -159,8 +159,34 @@ void nft_meta_get_eval(const struct nft_expr *expr,
else
*dest = PACKET_BROADCAST;
break;
+ case NFPROTO_NETDEV:
+ switch (skb->protocol) {
+ case htons(ETH_P_IP): {
+ int noff = skb_network_offset(skb);
+ struct iphdr *iph, _iph;
+
+ iph = skb_header_pointer(skb, noff,
+ sizeof(_iph), &_iph);
+ if (!iph)
+ goto err;
+
+ if (ipv4_is_multicast(iph->daddr))
+ *dest = PACKET_MULTICAST;
+ else
+ *dest = PACKET_BROADCAST;
+
+ break;
+ }
+ case htons(ETH_P_IPV6):
+ *dest = PACKET_MULTICAST;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ goto err;
+ }
+ break;
default:
- WARN_ON(1);
+ WARN_ON_ONCE(1);
goto err;
}
break;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2a5775f8a6ca..c9fac08a53b1 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2077,7 +2077,7 @@ static int netlink_dump(struct sock *sk)
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
struct module *module;
- int len, err = -ENOBUFS;
+ int err = -ENOBUFS;
int alloc_min_size;
int alloc_size;
@@ -2124,9 +2124,11 @@ static int netlink_dump(struct sock *sk)
skb_reserve(skb, skb_tailroom(skb) - alloc_size);
netlink_skb_set_owner_r(skb, sk);
- len = cb->dump(skb, cb);
+ if (nlk->dump_done_errno > 0)
+ nlk->dump_done_errno = cb->dump(skb, cb);
- if (len > 0) {
+ if (nlk->dump_done_errno > 0 ||
+ skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) {
mutex_unlock(nlk->cb_mutex);
if (sk_filter(sk, skb))
@@ -2136,13 +2138,15 @@ static int netlink_dump(struct sock *sk)
return 0;
}
- nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI);
- if (!nlh)
+ nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE,
+ sizeof(nlk->dump_done_errno), NLM_F_MULTI);
+ if (WARN_ON(!nlh))
goto errout_skb;
nl_dump_check_consistent(cb, nlh);
- memcpy(nlmsg_data(nlh), &len, sizeof(len));
+ memcpy(nlmsg_data(nlh), &nlk->dump_done_errno,
+ sizeof(nlk->dump_done_errno));
if (sk_filter(sk, skb))
kfree_skb(skb);
@@ -2207,16 +2211,18 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
cb->min_dump_alloc = control->min_dump_alloc;
cb->skb = skb;
+ if (cb->start) {
+ ret = cb->start(cb);
+ if (ret)
+ goto error_unlock;
+ }
+
nlk->cb_running = true;
+ nlk->dump_done_errno = INT_MAX;
mutex_unlock(nlk->cb_mutex);
- ret = 0;
- if (cb->start)
- ret = cb->start(cb);
-
- if (!ret)
- ret = netlink_dump(sk);
+ ret = netlink_dump(sk);
sock_put(sk);
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 4fdb38318977..bae961cfa3ad 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -24,6 +24,7 @@ struct netlink_sock {
wait_queue_head_t wait;
bool bound;
bool cb_running;
+ int dump_done_errno;
struct netlink_callback cb;
struct mutex *cb_mutex;
struct mutex cb_def_mutex;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index dd181bdadfb8..f635b773d28f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1721,7 +1721,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
out:
if (err && rollover) {
- kfree(rollover);
+ kfree_rcu(rollover, rcu);
po->rollover = NULL;
}
mutex_unlock(&fanout_mutex);
@@ -1748,8 +1748,10 @@ static struct packet_fanout *fanout_release(struct sock *sk)
else
f = NULL;
- if (po->rollover)
+ if (po->rollover) {
kfree_rcu(po->rollover, rcu);
+ po->rollover = NULL;
+ }
}
mutex_unlock(&fanout_mutex);
@@ -3852,6 +3854,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
void *data = &val;
union tpacket_stats_u st;
struct tpacket_rollover_stats rstats;
+ struct packet_rollover *rollover;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -3930,13 +3933,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
0);
break;
case PACKET_ROLLOVER_STATS:
- if (!po->rollover)
+ rcu_read_lock();
+ rollover = rcu_dereference(po->rollover);
+ if (rollover) {
+ rstats.tp_all = atomic_long_read(&rollover->num);
+ rstats.tp_huge = atomic_long_read(&rollover->num_huge);
+ rstats.tp_failed = atomic_long_read(&rollover->num_failed);
+ data = &rstats;
+ lv = sizeof(rstats);
+ }
+ rcu_read_unlock();
+ if (!rollover)
return -EINVAL;
- rstats.tp_all = atomic_long_read(&po->rollover->num);
- rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
- rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
- data = &rstats;
- lv = sizeof(rstats);
break;
case PACKET_TX_HAS_OFF:
val = po->tp_tx_has_off;
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 7bf29e3fd71c..76c20745b502 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -296,6 +296,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
{
struct Qdisc *q;
+ if (!handle)
+ return NULL;
q = qdisc_match_from_root(dev->qdisc, handle);
if (q)
goto out;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 6c79915c7dbc..68b84d3a7cac 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t,
{
struct dst_entry *dst;
- if (!t)
+ if (sock_owned_by_user(sk) || !t)
return;
dst = sctp_transport_dst_check(t);
if (dst)
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index ca4a63e3eadd..5d015270e454 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -806,9 +806,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_port = sh->source;
addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
- if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) {
+ if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb);
- }
+ else
+ addr->v6.sin6_scope_id = 0;
}
*addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr);
@@ -881,8 +882,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
net = sock_net(&opt->inet.sk);
rcu_read_lock();
dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
- if (!dev ||
- !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
+ if (!dev || !(opt->inet.freebind ||
+ net->ipv6.sysctl.ip_nonlocal_bind ||
+ ipv6_chk_addr(net, &addr->v6.sin6_addr,
+ dev, 0))) {
rcu_read_unlock();
return 0;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3ef725229449..c062ceae19e6 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -168,6 +168,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk)
sk_mem_charge(sk, chunk->skb->truesize);
}
+static void sctp_clear_owner_w(struct sctp_chunk *chunk)
+{
+ skb_orphan(chunk->skb);
+}
+
+static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
+ void (*cb)(struct sctp_chunk *))
+
+{
+ struct sctp_outq *q = &asoc->outqueue;
+ struct sctp_transport *t;
+ struct sctp_chunk *chunk;
+
+ list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
+ list_for_each_entry(chunk, &t->transmitted, transmitted_list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->retransmit, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->sacked, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->abandoned, list)
+ cb(chunk);
+
+ list_for_each_entry(chunk, &q->out_chunk_list, list)
+ cb(chunk);
+}
+
/* Verify that this is a valid address. */
static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
int len)
@@ -4734,6 +4764,10 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp)
struct socket *sock;
int err = 0;
+ /* Do not peel off from one netns to another one. */
+ if (!net_eq(current->nsproxy->net_ns, sock_net(sk)))
+ return -EINVAL;
+
if (!asoc)
return -EINVAL;
@@ -7826,7 +7860,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
* paths won't try to lock it and then oldsk.
*/
lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
+ sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w);
sctp_assoc_migrate(assoc, newsk);
+ sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w);
/* If the association on the newsk is already closed before accept()
* is called, set RCV_SHUTDOWN flag.
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 4d9679701a6d..384c84e83462 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb,
err = -ENOENT;
if (sk == NULL)
goto out_nosk;
+ if (!net_eq(sock_net(sk), net))
+ goto out;
err = sock_diag_check_cookie(sk, req->udiag_cookie);
if (err)