aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c3
-rw-r--r--net/ax25/af_ax25.c3
-rw-r--r--net/batman-adv/distributed-arp-table.c7
-rw-r--r--net/batman-adv/network-coding.c7
-rw-r--r--net/batman-adv/routing.c9
-rw-r--r--net/batman-adv/send.c6
-rw-r--r--net/batman-adv/soft-interface.c15
-rw-r--r--net/batman-adv/translation-table.c19
-rw-r--r--net/batman-adv/types.h10
-rw-r--r--net/bluetooth/sco.c3
-rw-r--r--net/bridge/br_fdb.c2
-rw-r--r--net/bridge/br_ioctl.c7
-rw-r--r--net/bridge/br_mdb.c3
-rw-r--r--net/bridge/br_multicast.c11
-rw-r--r--net/bridge/br_stp_if.c9
-rw-r--r--net/caif/caif_socket.c8
-rw-r--r--net/ceph/crush/mapper.c16
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/ceph/osdmap.c156
-rw-r--r--net/core/datagram.c57
-rw-r--r--net/core/dev.c41
-rw-r--r--net/core/dst.c3
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/fib_rules.c14
-rw-r--r--net/core/filter.c5
-rw-r--r--net/core/neighbour.c17
-rw-r--r--net/core/pktgen.c4
-rw-r--r--net/core/rtnetlink.c217
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/skbuff.c17
-rw-r--r--net/core/sock.c7
-rw-r--r--net/core/sysctl_net_core.c1
-rw-r--r--net/dccp/ipv6.c33
-rw-r--r--net/decnet/af_decnet.c3
-rw-r--r--net/decnet/dn_route.c9
-rw-r--r--net/ipv4/af_inet.c5
-rw-r--r--net/ipv4/datagram.c16
-rw-r--r--net/ipv4/devinet.c4
-rw-r--r--net/ipv4/fib_frontend.c4
-rw-r--r--net/ipv4/fou.c3
-rw-r--r--net/ipv4/gre_offload.c3
-rw-r--r--net/ipv4/ip_fragment.c7
-rw-r--r--net/ipv4/ip_tunnel.c8
-rw-r--r--net/ipv4/ip_vti.c14
-rw-r--r--net/ipv4/ipmr.c21
-rw-r--r--net/ipv4/netfilter/arp_tables.c337
-rw-r--r--net/ipv4/netfilter/ip_tables.c371
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c12
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv4/sysctl_net_ipv4.c4
-rw-r--r--net/ipv4/tcp.c17
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_cubic.c22
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_input.c23
-rw-r--r--net/ipv4/tcp_ipv4.c8
-rw-r--r--net/ipv4/tcp_metrics.c2
-rw-r--r--net/ipv4/tcp_minisocks.c5
-rw-r--r--net/ipv4/tcp_output.c7
-rw-r--r--net/ipv4/tcp_yeah.c2
-rw-r--r--net/ipv4/udp.c41
-rw-r--r--net/ipv4/xfrm4_policy.c46
-rw-r--r--net/ipv6/addrconf.c18
-rw-r--r--net/ipv6/addrlabel.c2
-rw-r--r--net/ipv6/af_inet6.c16
-rw-r--r--net/ipv6/datagram.c24
-rw-r--r--net/ipv6/exthdrs.c3
-rw-r--r--net/ipv6/exthdrs_core.c6
-rw-r--r--net/ipv6/exthdrs_offload.c2
-rw-r--r--net/ipv6/inet6_connection_sock.c11
-rw-r--r--net/ipv6/ip6_gre.c9
-rw-r--r--net/ipv6/ip6_input.c6
-rw-r--r--net/ipv6/ip6_output.c3
-rw-r--r--net/ipv6/ip6_tunnel.c2
-rw-r--r--net/ipv6/ip6_vti.c16
-rw-r--r--net/ipv6/ip6mr.c17
-rw-r--r--net/ipv6/ipv6_sockglue.c36
-rw-r--r--net/ipv6/mcast.c2
-rw-r--r--net/ipv6/netfilter/ip6_tables.c365
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c5
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/reassembly.c10
-rw-r--r--net/ipv6/sit.c27
-rw-r--r--net/ipv6/syncookies.c2
-rw-r--r--net/ipv6/tcp_ipv6.c39
-rw-r--r--net/ipv6/udp.c24
-rw-r--r--net/ipv6/xfrm6_mode_tunnel.c2
-rw-r--r--net/ipv6/xfrm6_policy.c53
-rw-r--r--net/irda/af_irda.c3
-rw-r--r--net/irda/irlmp.c2
-rw-r--r--net/l2tp/l2tp_core.c11
-rw-r--r--net/l2tp/l2tp_ip.c8
-rw-r--r--net/l2tp/l2tp_ip6.c16
-rw-r--r--net/llc/af_llc.c1
-rw-r--r--net/mac80211/agg-rx.c2
-rw-r--r--net/mac80211/debugfs_netdev.c1
-rw-r--r--net/mac80211/ibss.c1
-rw-r--r--net/mac80211/main.c3
-rw-r--r--net/mac80211/mesh.c15
-rw-r--r--net/mac80211/mesh.h4
-rw-r--r--net/mac80211/mlme.c2
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c2
-rw-r--r--net/mac80211/rx.c6
-rw-r--r--net/mac80211/scan.c12
-rw-r--r--net/mac80211/tx.c3
-rw-r--r--net/mac802154/mib.c6
-rw-r--r--net/netfilter/core.c1
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c16
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c78
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_sched.c12
-rw-r--r--net/netfilter/ipvs/ip_vs_sync.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c28
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/netfilter/nf_conntrack_expect.c3
-rw-r--r--net/netfilter/nf_conntrack_netlink.c5
-rw-r--r--net/netfilter/nf_internals.h1
-rw-r--r--net/netfilter/nf_log.c30
-rw-r--r--net/netfilter/nf_queue.c17
-rw-r--r--net/netfilter/nf_tables_api.c5
-rw-r--r--net/netfilter/nfnetlink.c8
-rw-r--r--net/netfilter/nfnetlink_cthelper.c7
-rw-r--r--net/netfilter/nfnetlink_queue_core.c24
-rw-r--r--net/netfilter/nft_compat.c38
-rw-r--r--net/netfilter/x_tables.c295
-rw-r--r--net/netfilter/xt_cgroup.c2
-rw-r--r--net/netlink/af_netlink.c121
-rw-r--r--net/netlink/af_netlink.h9
-rw-r--r--net/openvswitch/datapath.c4
-rw-r--r--net/openvswitch/flow_table.c23
-rw-r--r--net/openvswitch/flow_table.h2
-rw-r--r--net/packet/af_packet.c196
-rw-r--r--net/phonet/af_phonet.c4
-rw-r--r--net/rds/connection.c6
-rw-r--r--net/rds/ib_rdma.c4
-rw-r--r--net/rds/info.c2
-rw-r--r--net/rds/send.c4
-rw-r--r--net/rds/tcp_recv.c11
-rw-r--r--net/rfkill/core.c16
-rw-r--r--net/rose/af_rose.c3
-rw-r--r--net/sched/cls_api.c5
-rw-r--r--net/sched/sch_api.c18
-rw-r--r--net/sched/sch_cbq.c12
-rw-r--r--net/sched/sch_choke.c6
-rw-r--r--net/sched/sch_codel.c10
-rw-r--r--net/sched/sch_drr.c9
-rw-r--r--net/sched/sch_dsmark.c11
-rw-r--r--net/sched/sch_fq.c4
-rw-r--r--net/sched/sch_fq_codel.c17
-rw-r--r--net/sched/sch_generic.c9
-rw-r--r--net/sched/sch_hfsc.c9
-rw-r--r--net/sched/sch_hhf.c10
-rw-r--r--net/sched/sch_htb.c24
-rw-r--r--net/sched/sch_multiq.c16
-rw-r--r--net/sched/sch_netem.c74
-rw-r--r--net/sched/sch_pie.c5
-rw-r--r--net/sched/sch_prio.c15
-rw-r--r--net/sched/sch_qfq.c9
-rw-r--r--net/sched/sch_red.c10
-rw-r--r--net/sched/sch_sfb.c10
-rw-r--r--net/sched/sch_sfq.c16
-rw-r--r--net/sched/sch_tbf.c15
-rw-r--r--net/sctp/auth.c4
-rw-r--r--net/sctp/ipv6.c10
-rw-r--r--net/sctp/output.c4
-rw-r--r--net/sctp/protocol.c64
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_statefuns.c6
-rw-r--r--net/sctp/socket.c59
-rw-r--r--net/sctp/sysctl.c2
-rw-r--r--net/socket.c62
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/backchannel_rqst.c2
-rw-r--r--net/sunrpc/cache.c8
-rw-r--r--net/sunrpc/clnt.c12
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c11
-rw-r--r--net/sunrpc/xprtrdma/verbs.c40
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h1
-rw-r--r--net/tipc/socket.c1
-rw-r--r--net/unix/af_unix.c314
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c16
-rw-r--r--net/vmw_vsock/af_vsock.c21
-rw-r--r--net/wireless/core.c2
-rw-r--r--net/wireless/nl80211.c2
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--net/wireless/wext-core.c77
-rw-r--r--net/x25/x25_facilities.c1
-rw-r--r--net/xfrm/xfrm_input.c20
-rw-r--r--net/xfrm/xfrm_policy.c38
190 files changed, 2780 insertions, 1848 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index e86a9bea1d16..53fe98ee8a55 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -850,7 +850,8 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
if (err < 0) {
if (err == -EIO)
c->status = Disconnected;
- goto reterr;
+ if (err != -ERESTARTSYS)
+ goto reterr;
}
if (req->status == REQ_STATUS_ERROR) {
p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c35c3f48fc0f..1428c3ff3341 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -806,6 +806,9 @@ static int ax25_create(struct net *net, struct socket *sock, int protocol,
struct sock *sk;
ax25_cb *ax25;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index b5981113c9a7..4bbd72e90756 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -15,6 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bitops.h>
#include <linux/if_ether.h>
#include <linux/if_arp.h>
#include <linux/if_vlan.h>
@@ -422,7 +423,7 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res,
int j;
/* check if orig node candidate is running DAT */
- if (!(candidate->capabilities & BATADV_ORIG_CAPA_HAS_DAT))
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_DAT, &candidate->capabilities))
goto out;
/* Check if this node has already been selected... */
@@ -682,9 +683,9 @@ static void batadv_dat_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
uint16_t tvlv_value_len)
{
if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)
- orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_DAT;
+ clear_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities);
else
- orig->capabilities |= BATADV_ORIG_CAPA_HAS_DAT;
+ set_bit(BATADV_ORIG_CAPA_HAS_DAT, &orig->capabilities);
}
/**
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index 8d04d174669e..65d19690d8ae 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -15,6 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bitops.h>
#include <linux/debugfs.h>
#include "main.h"
@@ -105,9 +106,9 @@ static void batadv_nc_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv,
uint16_t tvlv_value_len)
{
if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND)
- orig->capabilities &= ~BATADV_ORIG_CAPA_HAS_NC;
+ clear_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities);
else
- orig->capabilities |= BATADV_ORIG_CAPA_HAS_NC;
+ set_bit(BATADV_ORIG_CAPA_HAS_NC, &orig->capabilities);
}
/**
@@ -871,7 +872,7 @@ void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
goto out;
/* check if orig node is network coding enabled */
- if (!(orig_node->capabilities & BATADV_ORIG_CAPA_HAS_NC))
+ if (!test_bit(BATADV_ORIG_CAPA_HAS_NC, &orig_node->capabilities))
goto out;
/* accept ogms from 'good' neighbors and single hop neighbors */
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 35f76f2f7824..bbb1d04e7f8f 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -88,6 +88,15 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
neigh_node = NULL;
spin_lock_bh(&orig_node->neigh_list_lock);
+ /* curr_router used earlier may not be the current orig_ifinfo->router
+ * anymore because it was dereferenced outside of the neigh_list_lock
+ * protected region. After the new best neighbor has replace the current
+ * best neighbor the reference counter needs to decrease. Consequently,
+ * the code needs to ensure the curr_router variable contains a pointer
+ * to the replaced best neighbor.
+ */
+ curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
+
rcu_assign_pointer(orig_ifinfo->router, neigh_node);
spin_unlock_bh(&orig_node->neigh_list_lock);
batadv_orig_ifinfo_free_ref(orig_ifinfo);
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 3d64ed20c393..6004c2de7b2a 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -611,6 +611,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->bcast_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
@@ -638,6 +641,9 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv,
if (pending) {
hlist_del(&forw_packet->list);
+ if (!forw_packet->own)
+ atomic_inc(&bat_priv->batman_queue_left);
+
batadv_forw_packet_free(forw_packet);
}
}
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 5467955eb27c..0bb7cae486b3 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -173,6 +173,7 @@ static int batadv_interface_tx(struct sk_buff *skb,
int gw_mode;
enum batadv_forw_mode forw_mode;
struct batadv_orig_node *mcast_single_orig = NULL;
+ int network_offset = ETH_HLEN;
if (atomic_read(&bat_priv->mesh_state) != BATADV_MESH_ACTIVE)
goto dropped;
@@ -185,14 +186,18 @@ static int batadv_interface_tx(struct sk_buff *skb,
case ETH_P_8021Q:
vhdr = vlan_eth_hdr(skb);
- if (vhdr->h_vlan_encapsulated_proto != ethertype)
+ if (vhdr->h_vlan_encapsulated_proto != ethertype) {
+ network_offset += VLAN_HLEN;
break;
+ }
/* fall through */
case ETH_P_BATMAN:
goto dropped;
}
+ skb_set_network_header(skb, network_offset);
+
if (batadv_bla_tx(bat_priv, skb, vid))
goto dropped;
@@ -373,11 +378,17 @@ void batadv_interface_rx(struct net_device *soft_iface,
*/
nf_reset(skb);
+ if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
+ goto dropped;
+
vid = batadv_get_vid(skb, 0);
ethhdr = eth_hdr(skb);
switch (ntohs(ethhdr->h_proto)) {
case ETH_P_8021Q:
+ if (!pskb_may_pull(skb, VLAN_ETH_HLEN))
+ goto dropped;
+
vhdr = (struct vlan_ethhdr *)skb->data;
if (vhdr->h_vlan_encapsulated_proto != ethertype)
@@ -389,8 +400,6 @@ void batadv_interface_rx(struct net_device *soft_iface,
}
/* skb->dev & skb->pkt_type are set here */
- if (unlikely(!pskb_may_pull(skb, ETH_HLEN)))
- goto dropped;
skb->protocol = eth_type_trans(skb, soft_iface);
/* should not be necessary anymore as we use skb_pull_rcsum()
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 5f59e7f899a0..58ad6ba429b3 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -15,6 +15,7 @@
* along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
+#include <linux/bitops.h>
#include "main.h"
#include "translation-table.h"
#include "soft-interface.h"
@@ -1015,6 +1016,7 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
struct batadv_tt_local_entry *tt_local_entry;
uint16_t flags, curr_flags = BATADV_NO_FLAGS;
struct batadv_softif_vlan *vlan;
+ void *tt_entry_exists;
tt_local_entry = batadv_tt_local_hash_find(bat_priv, addr, vid);
if (!tt_local_entry)
@@ -1042,7 +1044,15 @@ uint16_t batadv_tt_local_remove(struct batadv_priv *bat_priv,
* immediately purge it
*/
batadv_tt_local_event(bat_priv, tt_local_entry, BATADV_TT_CLIENT_DEL);
- hlist_del_rcu(&tt_local_entry->common.hash_entry);
+
+ tt_entry_exists = batadv_hash_remove(bat_priv->tt.local_hash,
+ batadv_compare_tt,
+ batadv_choose_tt,
+ &tt_local_entry->common);
+ if (!tt_entry_exists)
+ goto out;
+
+ /* extra call to free the local tt entry */
batadv_tt_local_entry_free_ref(tt_local_entry);
/* decrease the reference held for this vlan */
@@ -1844,7 +1854,7 @@ void batadv_tt_global_del_orig(struct batadv_priv *bat_priv,
}
spin_unlock_bh(list_lock);
}
- orig_node->capa_initialized &= ~BATADV_ORIG_CAPA_HAS_TT;
+ clear_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized);
}
static bool batadv_tt_global_to_purge(struct batadv_tt_global_entry *tt_global,
@@ -2804,7 +2814,7 @@ static void _batadv_tt_update_changes(struct batadv_priv *bat_priv,
return;
}
}
- orig_node->capa_initialized |= BATADV_ORIG_CAPA_HAS_TT;
+ set_bit(BATADV_ORIG_CAPA_HAS_TT, &orig_node->capa_initialized);
}
static void batadv_tt_fill_gtable(struct batadv_priv *bat_priv,
@@ -3304,7 +3314,8 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
bool has_tt_init;
tt_vlan = (struct batadv_tvlv_tt_vlan_data *)tt_buff;
- has_tt_init = orig_node->capa_initialized & BATADV_ORIG_CAPA_HAS_TT;
+ has_tt_init = test_bit(BATADV_ORIG_CAPA_HAS_TT,
+ &orig_node->capa_initialized);
/* orig table not initialised AND first diff is in the OGM OR the ttvn
* increased by one -> we can apply the attached changes
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8854c05622a9..fdf65b50e3ec 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -258,8 +258,8 @@ struct batadv_orig_node {
struct hlist_node mcast_want_all_ipv4_node;
struct hlist_node mcast_want_all_ipv6_node;
#endif
- uint8_t capabilities;
- uint8_t capa_initialized;
+ unsigned long capabilities;
+ unsigned long capa_initialized;
atomic_t last_ttvn;
unsigned char *tt_buff;
int16_t tt_buff_len;
@@ -298,9 +298,9 @@ struct batadv_orig_node {
* (= orig node announces a tvlv of type BATADV_TVLV_MCAST)
*/
enum batadv_orig_capabilities {
- BATADV_ORIG_CAPA_HAS_DAT = BIT(0),
- BATADV_ORIG_CAPA_HAS_NC = BIT(1),
- BATADV_ORIG_CAPA_HAS_TT = BIT(2),
+ BATADV_ORIG_CAPA_HAS_DAT,
+ BATADV_ORIG_CAPA_HAS_NC,
+ BATADV_ORIG_CAPA_HAS_TT,
BATADV_ORIG_CAPA_HAS_MCAST = BIT(3),
};
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 7ee9e4ab00f8..b3ef78a644ed 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -520,6 +520,9 @@ static int sco_sock_bind(struct socket *sock, struct sockaddr *addr, int addr_le
if (!addr || addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ if (addr_len < sizeof(struct sockaddr_sco))
+ return -EINVAL;
+
lock_sock(sk);
if (sk->sk_state != BT_OPEN) {
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 6f6c95cfe8f2..eab8862d9f88 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -789,9 +789,11 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
int err = 0;
if (ndm->ndm_flags & NTF_USE) {
+ local_bh_disable();
rcu_read_lock();
br_fdb_update(p->br, p, addr, vid, true);
rcu_read_unlock();
+ local_bh_enable();
} else {
spin_lock_bh(&p->br->hash_lock);
err = fdb_add_entry(p, addr, ndm->ndm_state,
diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index a9a4a1b7863d..f876f707fd9e 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -21,18 +21,19 @@
#include <asm/uaccess.h>
#include "br_private.h"
-/* called with RTNL */
static int get_bridge_ifindices(struct net *net, int *indices, int num)
{
struct net_device *dev;
int i = 0;
- for_each_netdev(net, dev) {
+ rcu_read_lock();
+ for_each_netdev_rcu(net, dev) {
if (i >= num)
break;
if (dev->priv_flags & IFF_EBRIDGE)
indices[i++] = dev->ifindex;
}
+ rcu_read_unlock();
return i;
}
@@ -247,9 +248,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
return -EPERM;
- spin_lock_bh(&br->lock);
br_stp_set_bridge_priority(br, args[1]);
- spin_unlock_bh(&br->lock);
return 0;
case BRCTL_SET_PORT_PRIORITY:
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 5df05269d17a..cc641541d38f 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -347,7 +347,6 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port,
return -ENOMEM;
rcu_assign_pointer(*pp, p);
- br_mdb_notify(br->dev, port, group, RTM_NEWMDB);
return 0;
}
@@ -370,6 +369,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br,
if (!p || p->br != br || p->state == BR_STATE_DISABLED)
return -EINVAL;
+ memset(&ip, 0, sizeof(ip));
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP))
ip.u.ip4 = entry->addr.u.ip4;
@@ -416,6 +416,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry)
if (!netif_running(br->dev) || br->multicast_disabled)
return -EINVAL;
+ memset(&ip, 0, sizeof(ip));
ip.proto = entry->addr.proto;
if (ip.proto == htons(ETH_P_IP)) {
if (timer_pending(&br->ip4_other_query.timer))
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index c465876c7861..c08f510fce30 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1071,7 +1071,7 @@ static int br_ip6_multicast_mld2_report(struct net_bridge *br,
err = br_ip6_multicast_add_group(br, port, &grec->grec_mca,
vid);
- if (!err)
+ if (err)
break;
}
@@ -1166,6 +1166,9 @@ static void br_multicast_add_router(struct net_bridge *br,
struct net_bridge_port *p;
struct hlist_node *slot = NULL;
+ if (!hlist_unhashed(&port->rlist))
+ return;
+
hlist_for_each_entry(p, &br->router_list, rlist) {
if ((unsigned long) port >= (unsigned long) p)
break;
@@ -1193,12 +1196,8 @@ static void br_multicast_mark_router(struct net_bridge *br,
if (port->multicast_router != 1)
return;
- if (!hlist_unhashed(&port->rlist))
- goto timer;
-
br_multicast_add_router(br, port);
-timer:
mod_timer(&port->multicast_router_timer,
now + br->multicast_querier_interval);
}
@@ -1821,7 +1820,7 @@ static void br_multicast_query_expired(struct net_bridge *br,
if (query->startup_sent < br->multicast_startup_query_count)
query->startup_sent++;
- RCU_INIT_POINTER(querier, NULL);
+ RCU_INIT_POINTER(querier->port, NULL);
br_multicast_send_query(br, NULL, query);
spin_unlock(&br->multicast_lock);
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 41146872c1b4..ce658abdc2c8 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -128,7 +128,10 @@ static void br_stp_start(struct net_bridge *br)
char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL };
char *envp[] = { NULL };
- r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ if (net_eq(dev_net(br->dev), &init_net))
+ r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+ else
+ r = -ENOENT;
spin_lock_bh(&br->lock);
@@ -243,12 +246,13 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br)
return true;
}
-/* called under bridge lock */
+/* Acquires and releases bridge lock */
void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
{
struct net_bridge_port *p;
int wasroot;
+ spin_lock_bh(&br->lock);
wasroot = br_is_root_bridge(br);
list_for_each_entry(p, &br->port_list, list) {
@@ -266,6 +270,7 @@ void br_stp_set_bridge_priority(struct net_bridge *br, u16 newprio)
br_port_state_selection(br);
if (br_is_root_bridge(br) && !wasroot)
br_become_root_bridge(br);
+ spin_unlock_bh(&br->lock);
}
/* called under bridge lock */
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 765c78110fa8..5e10ee0efffb 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -330,6 +330,10 @@ static long caif_stream_data_wait(struct sock *sk, long timeo)
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
+
+ if (sock_flag(sk, SOCK_DEAD))
+ break;
+
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
}
@@ -374,6 +378,10 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sk_buff *skb;
lock_sock(sk);
+ if (sock_flag(sk, SOCK_DEAD)) {
+ err = -ECONNRESET;
+ goto unlock;
+ }
skb = skb_dequeue(&sk->sk_receive_queue);
caif_check_flow_release(sk);
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index a1ef53c04415..b1f2d1f44d37 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -290,6 +290,7 @@ static int is_out(const struct crush_map *map,
* @type: the type of item to choose
* @out: pointer to output vector
* @outpos: our position in that vector
+ * @out_size: size of the out vector
* @tries: number of attempts to make
* @recurse_tries: number of attempts to have recursive chooseleaf make
* @local_retries: localized retries
@@ -304,6 +305,7 @@ static int crush_choose_firstn(const struct crush_map *map,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
+ int out_size,
unsigned int tries,
unsigned int recurse_tries,
unsigned int local_retries,
@@ -322,6 +324,7 @@ static int crush_choose_firstn(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
+ int count = out_size;
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d\n",
recurse_to_leaf ? "_LEAF" : "",
@@ -329,7 +332,7 @@ static int crush_choose_firstn(const struct crush_map *map,
tries, recurse_tries, local_retries, local_fallback_retries,
parent_r);
- for (rep = outpos; rep < numrep; rep++) {
+ for (rep = outpos; rep < numrep && count > 0 ; rep++) {
/* keep trying until we get a non-out, non-colliding item */
ftotal = 0;
skip_rep = 0;
@@ -403,7 +406,7 @@ static int crush_choose_firstn(const struct crush_map *map,
map->buckets[-1-item],
weight, weight_max,
x, outpos+1, 0,
- out2, outpos,
+ out2, outpos, count,
recurse_tries, 0,
local_retries,
local_fallback_retries,
@@ -463,6 +466,7 @@ reject:
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
+ count--;
}
dprintk("CHOOSE returns %d\n", outpos);
@@ -654,6 +658,7 @@ int crush_do_rule(const struct crush_map *map,
__u32 step;
int i, j;
int numrep;
+ int out_size;
/*
* the original choose_total_tries value was off by one (it
* counted "retries" and not "tries"). add one.
@@ -761,6 +766,7 @@ int crush_do_rule(const struct crush_map *map,
x, numrep,
curstep->arg2,
o+osize, j,
+ result_max-osize,
choose_tries,
recurse_tries,
choose_local_retries,
@@ -770,11 +776,13 @@ int crush_do_rule(const struct crush_map *map,
c+osize,
0);
} else {
+ out_size = ((numrep < (result_max-osize)) ?
+ numrep : (result_max-osize));
crush_choose_indep(
map,
map->buckets[-1-w[i]],
weight, weight_max,
- x, numrep, numrep,
+ x, out_size, numrep,
curstep->arg2,
o+osize, j,
choose_tries,
@@ -783,7 +791,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_to_leaf,
c+osize,
0);
- osize += numrep;
+ osize += out_size;
}
}
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 56a65536c8f1..1519051603ff 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -2287,7 +2287,7 @@ static int read_partial_message(struct ceph_connection *con)
con->in_base_pos = -front_len - middle_len - data_len -
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
- return 0;
+ return 1;
} else if ((s64)seq - (s64)con->in_seq > 1) {
pr_err("read_partial_message bad seq %lld expected %lld\n",
seq, con->in_seq + 1);
@@ -2320,7 +2320,7 @@ static int read_partial_message(struct ceph_connection *con)
sizeof(m->footer);
con->in_tag = CEPH_MSGR_TAG_READY;
con->in_seq++;
- return 0;
+ return 1;
}
BUG_ON(!con->in_msg);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index b8c3fde5b04f..60441b6a9546 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1167,6 +1167,115 @@ struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
}
/*
+ * Encoding order is (new_up_client, new_state, new_weight). Need to
+ * apply in the (new_weight, new_state, new_up_client) order, because
+ * an incremental map may look like e.g.
+ *
+ * new_up_client: { osd=6, addr=... } # set osd_state and addr
+ * new_state: { osd=6, xorstate=EXISTS } # clear osd_state
+ */
+static int decode_new_up_state_weight(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ void *new_up_client;
+ void *new_state;
+ void *new_weight_end;
+ u32 len;
+
+ new_up_client = *p;
+ ceph_decode_32_safe(p, end, len, e_inval);
+ len *= sizeof(u32) + sizeof(struct ceph_entity_addr);
+ ceph_decode_need(p, end, len, e_inval);
+ *p += len;
+
+ new_state = *p;
+ ceph_decode_32_safe(p, end, len, e_inval);
+ len *= sizeof(u32) + sizeof(u8);
+ ceph_decode_need(p, end, len, e_inval);
+ *p += len;
+
+ /* new_weight */
+ ceph_decode_32_safe(p, end, len, e_inval);
+ while (len--) {
+ s32 osd;
+ u32 w;
+
+ ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
+ osd = ceph_decode_32(p);
+ w = ceph_decode_32(p);
+ BUG_ON(osd >= map->max_osd);
+ pr_info("osd%d weight 0x%x %s\n", osd, w,
+ w == CEPH_OSD_IN ? "(in)" :
+ (w == CEPH_OSD_OUT ? "(out)" : ""));
+ map->osd_weight[osd] = w;
+
+ /*
+ * If we are marking in, set the EXISTS, and clear the
+ * AUTOOUT and NEW bits.
+ */
+ if (w) {
+ map->osd_state[osd] |= CEPH_OSD_EXISTS;
+ map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
+ CEPH_OSD_NEW);
+ }
+ }
+ new_weight_end = *p;
+
+ /* new_state (up/down) */
+ *p = new_state;
+ len = ceph_decode_32(p);
+ while (len--) {
+ s32 osd;
+ u8 xorstate;
+ int ret;
+
+ osd = ceph_decode_32(p);
+ xorstate = ceph_decode_8(p);
+ if (xorstate == 0)
+ xorstate = CEPH_OSD_UP;
+ BUG_ON(osd >= map->max_osd);
+ if ((map->osd_state[osd] & CEPH_OSD_UP) &&
+ (xorstate & CEPH_OSD_UP))
+ pr_info("osd%d down\n", osd);
+ if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
+ (xorstate & CEPH_OSD_EXISTS)) {
+ pr_info("osd%d does not exist\n", osd);
+ map->osd_weight[osd] = CEPH_OSD_IN;
+ ret = set_primary_affinity(map, osd,
+ CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
+ if (ret)
+ return ret;
+ memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
+ map->osd_state[osd] = 0;
+ } else {
+ map->osd_state[osd] ^= xorstate;
+ }
+ }
+
+ /* new_up_client */
+ *p = new_up_client;
+ len = ceph_decode_32(p);
+ while (len--) {
+ s32 osd;
+ struct ceph_entity_addr addr;
+
+ osd = ceph_decode_32(p);
+ ceph_decode_copy(p, &addr, sizeof(addr));
+ ceph_decode_addr(&addr);
+ BUG_ON(osd >= map->max_osd);
+ pr_info("osd%d up\n", osd);
+ map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
+ map->osd_addr[osd] = addr;
+ }
+
+ *p = new_weight_end;
+ return 0;
+
+e_inval:
+ return -EINVAL;
+}
+
+/*
* decode and apply an incremental map update.
*/
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
@@ -1265,49 +1374,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
__remove_pg_pool(&map->pg_pools, pi);
}
- /* new_up */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd;
- struct ceph_entity_addr addr;
- ceph_decode_32_safe(p, end, osd, e_inval);
- ceph_decode_copy_safe(p, end, &addr, sizeof(addr), e_inval);
- ceph_decode_addr(&addr);
- pr_info("osd%d up\n", osd);
- BUG_ON(osd >= map->max_osd);
- map->osd_state[osd] |= CEPH_OSD_UP;
- map->osd_addr[osd] = addr;
- }
-
- /* new_state */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd;
- u8 xorstate;
- ceph_decode_32_safe(p, end, osd, e_inval);
- xorstate = **(u8 **)p;
- (*p)++; /* clean flag */
- if (xorstate == 0)
- xorstate = CEPH_OSD_UP;
- if (xorstate & CEPH_OSD_UP)
- pr_info("osd%d down\n", osd);
- if (osd < map->max_osd)
- map->osd_state[osd] ^= xorstate;
- }
-
- /* new_weight */
- ceph_decode_32_safe(p, end, len, e_inval);
- while (len--) {
- u32 osd, off;
- ceph_decode_need(p, end, sizeof(u32)*2, e_inval);
- osd = ceph_decode_32(p);
- off = ceph_decode_32(p);
- pr_info("osd%d weight 0x%x %s\n", osd, off,
- off == CEPH_OSD_IN ? "(in)" :
- (off == CEPH_OSD_OUT ? "(out)" : ""));
- if (osd < map->max_osd)
- map->osd_weight[osd] = off;
- }
+ /* new_up_client, new_state, new_weight */
+ err = decode_new_up_state_weight(p, end, map);
+ if (err)
+ goto bad;
/* new_pg_temp */
err = decode_new_pg_temp(p, end, map);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index fdbc9a81d4c2..2850ab32f28a 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -130,6 +130,35 @@ out_noerr:
goto out;
}
+static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
+{
+ struct sk_buff *nskb;
+
+ if (skb->peeked)
+ return skb;
+
+ /* We have to unshare an skb before modifying it. */
+ if (!skb_shared(skb))
+ goto done;
+
+ nskb = skb_clone(skb, GFP_ATOMIC);
+ if (!nskb)
+ return ERR_PTR(-ENOMEM);
+
+ skb->prev->next = nskb;
+ skb->next->prev = nskb;
+ nskb->prev = skb->prev;
+ nskb->next = skb->next;
+
+ consume_skb(skb);
+ skb = nskb;
+
+done:
+ skb->peeked = 1;
+
+ return skb;
+}
+
/**
* __skb_recv_datagram - Receive a datagram skbuff
* @sk: socket
@@ -164,7 +193,9 @@ out_noerr:
struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
int *peeked, int *off, int *err)
{
+ struct sk_buff_head *queue = &sk->sk_receive_queue;
struct sk_buff *skb, *last;
+ unsigned long cpu_flags;
long timeo;
/*
* Caller is allowed not to check sk->sk_err before skb_recv_datagram()
@@ -183,8 +214,6 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
* Look at current nfs client by the way...
* However, this function was correct in any case. 8)
*/
- unsigned long cpu_flags;
- struct sk_buff_head *queue = &sk->sk_receive_queue;
int _off = *off;
last = (struct sk_buff *)queue;
@@ -198,7 +227,12 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
_off -= skb->len;
continue;
}
- skb->peeked = 1;
+
+ skb = skb_set_peeked(skb);
+ error = PTR_ERR(skb);
+ if (IS_ERR(skb))
+ goto unlock_err;
+
atomic_inc(&skb->users);
} else
__skb_unlink(skb, queue);
@@ -222,6 +256,8 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
return NULL;
+unlock_err:
+ spin_unlock_irqrestore(&queue->lock, cpu_flags);
no_packet:
*err = error;
return NULL;
@@ -744,7 +780,8 @@ __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
!skb->csum_complete_sw)
netdev_rx_csum_fault(skb->dev);
}
- skb->csum_valid = !sum;
+ if (!skb_shared(skb))
+ skb->csum_valid = !sum;
return sum;
}
EXPORT_SYMBOL(__skb_checksum_complete_head);
@@ -764,11 +801,13 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
netdev_rx_csum_fault(skb->dev);
}
- /* Save full packet checksum */
- skb->csum = csum;
- skb->ip_summed = CHECKSUM_COMPLETE;
- skb->csum_complete_sw = 1;
- skb->csum_valid = !sum;
+ if (!skb_shared(skb)) {
+ /* Save full packet checksum */
+ skb->csum = csum;
+ skb->ip_summed = CHECKSUM_COMPLETE;
+ skb->csum_complete_sw = 1;
+ skb->csum_valid = !sum;
+ }
return sum;
}
diff --git a/net/core/dev.c b/net/core/dev.c
index 9c18ef86044d..1cbcf08cc224 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3317,6 +3317,8 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
local_irq_save(flags);
rps_lock(sd);
+ if (!netif_running(skb->dev))
+ goto drop;
qlen = skb_queue_len(&sd->input_pkt_queue);
if (qlen <= netdev_max_backlog && !skb_flow_limit(skb, qlen)) {
if (skb_queue_len(&sd->input_pkt_queue)) {
@@ -3338,6 +3340,7 @@ enqueue:
goto enqueue;
}
+drop:
sd->dropped++;
rps_unlock(sd);
@@ -3675,8 +3678,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
pt_prev = NULL;
- rcu_read_lock();
-
another_round:
skb->skb_iif = skb->dev->ifindex;
@@ -3686,7 +3687,7 @@ another_round:
skb->protocol == cpu_to_be16(ETH_P_8021AD)) {
skb = skb_vlan_untag(skb);
if (unlikely(!skb))
- goto unlock;
+ goto out;
}
#ifdef CONFIG_NET_CLS_ACT
@@ -3711,7 +3712,7 @@ skip_taps:
#ifdef CONFIG_NET_CLS_ACT
skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
if (!skb)
- goto unlock;
+ goto out;
ncls:
#endif
@@ -3726,7 +3727,7 @@ ncls:
if (vlan_do_receive(&skb))
goto another_round;
else if (unlikely(!skb))
- goto unlock;
+ goto out;
}
rx_handler = rcu_dereference(skb->dev->rx_handler);
@@ -3738,7 +3739,7 @@ ncls:
switch (rx_handler(&skb)) {
case RX_HANDLER_CONSUMED:
ret = NET_RX_SUCCESS;
- goto unlock;
+ goto out;
case RX_HANDLER_ANOTHER:
goto another_round;
case RX_HANDLER_EXACT:
@@ -3790,8 +3791,7 @@ drop:
ret = NET_RX_DROP;
}
-unlock:
- rcu_read_unlock();
+out:
return ret;
}
@@ -3822,29 +3822,30 @@ static int __netif_receive_skb(struct sk_buff *skb)
static int netif_receive_skb_internal(struct sk_buff *skb)
{
+ int ret;
+
net_timestamp_check(netdev_tstamp_prequeue, skb);
if (skb_defer_rx_timestamp(skb))
return NET_RX_SUCCESS;
+ rcu_read_lock();
+
#ifdef CONFIG_RPS
if (static_key_false(&rps_needed)) {
struct rps_dev_flow voidflow, *rflow = &voidflow;
- int cpu, ret;
-
- rcu_read_lock();
-
- cpu = get_rps_cpu(skb->dev, skb, &rflow);
+ int cpu = get_rps_cpu(skb->dev, skb, &rflow);
if (cpu >= 0) {
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
return ret;
}
- rcu_read_unlock();
}
#endif
- return __netif_receive_skb(skb);
+ ret = __netif_receive_skb(skb);
+ rcu_read_unlock();
+ return ret;
}
/**
@@ -4384,8 +4385,10 @@ static int process_backlog(struct napi_struct *napi, int quota)
struct sk_buff *skb;
while ((skb = __skb_dequeue(&sd->process_queue))) {
+ rcu_read_lock();
local_irq_enable();
__netif_receive_skb(skb);
+ rcu_read_unlock();
local_irq_disable();
input_queue_head_incr(sd);
if (++work >= quota) {
@@ -5129,7 +5132,7 @@ static int __netdev_upper_dev_link(struct net_device *dev,
if (__netdev_find_adj(upper_dev, dev, &upper_dev->all_adj_list.upper))
return -EBUSY;
- if (__netdev_find_adj(dev, upper_dev, &dev->all_adj_list.upper))
+ if (__netdev_find_adj(dev, upper_dev, &dev->adj_list.upper))
return -EEXIST;
if (master && netdev_master_upper_dev_get(dev))
@@ -5916,6 +5919,7 @@ static void rollback_registered_many(struct list_head *head)
unlist_netdevice(dev);
dev->reg_state = NETREG_UNREGISTERING;
+ on_each_cpu(flush_backlog, dev, 1);
}
synchronize_net();
@@ -6177,7 +6181,8 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
struct netdev_queue *tx;
size_t sz = count * sizeof(*tx);
- BUG_ON(count < 1 || count > 0xffff);
+ if (count < 1 || count > 0xffff)
+ return -EINVAL;
tx = kzalloc(sz, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
if (!tx) {
@@ -6535,8 +6540,6 @@ void netdev_run_todo(void)
dev->reg_state = NETREG_UNREGISTERED;
- on_each_cpu(flush_backlog, dev, 1);
-
netdev_wait_allrefs(dev);
/* paranoia */
diff --git a/net/core/dst.c b/net/core/dst.c
index a028409ee438..57746a18c957 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -282,10 +282,11 @@ void dst_release(struct dst_entry *dst)
{
if (dst) {
int newrefcnt;
+ unsigned short nocache = dst->flags & DST_NOCACHE;
newrefcnt = atomic_dec_return(&dst->__refcnt);
WARN_ON(newrefcnt < 0);
- if (unlikely(dst->flags & DST_NOCACHE) && !newrefcnt)
+ if (!newrefcnt && unlikely(nocache))
call_rcu(&dst->rcu_head, dst_destroy_rcu);
}
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 06dfb293e5aa..14bb1583947e 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1257,7 +1257,7 @@ static int ethtool_get_strings(struct net_device *dev, void __user *useraddr)
gstrings.len = ret;
- data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER);
+ data = kcalloc(gstrings.len, ETH_GSTRING_LEN, GFP_USER);
if (!data)
return -ENOMEM;
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 185c341fafbd..99ae718b79be 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -621,15 +621,17 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb,
{
int idx = 0;
struct fib_rule *rule;
+ int err = 0;
rcu_read_lock();
list_for_each_entry_rcu(rule, &ops->rules_list, list) {
if (idx < cb->args[1])
goto skip;
- if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, RTM_NEWRULE,
- NLM_F_MULTI, ops) < 0)
+ err = fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, RTM_NEWRULE,
+ NLM_F_MULTI, ops);
+ if (err < 0)
break;
skip:
idx++;
@@ -638,7 +640,7 @@ skip:
cb->args[1] = idx;
rules_ops_put(ops);
- return skb->len;
+ return err;
}
static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
@@ -654,7 +656,9 @@ static int fib_nl_dumprule(struct sk_buff *skb, struct netlink_callback *cb)
if (ops == NULL)
return -EAFNOSUPPORT;
- return dump_rules(skb, cb, ops);
+ dump_rules(skb, cb, ops);
+
+ return skb->len;
}
rcu_read_lock();
diff --git a/net/core/filter.c b/net/core/filter.c
index 647b12265e18..e6cde173851d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -728,6 +728,11 @@ int bpf_check_classic(const struct sock_filter *filter, unsigned int flen)
if (ftest->k == 0)
return -EINVAL;
break;
+ case BPF_ALU | BPF_LSH | BPF_K:
+ case BPF_ALU | BPF_RSH | BPF_K:
+ if (ftest->k >= 32)
+ return -EINVAL;
+ break;
case BPF_LD | BPF_MEM:
case BPF_LDX | BPF_MEM:
case BPF_ST:
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ef31fef25e5a..0478423afd29 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -977,6 +977,8 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
rc = 0;
if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
goto out_unlock_bh;
+ if (neigh->dead)
+ goto out_dead;
if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
@@ -1033,6 +1035,13 @@ out_unlock_bh:
write_unlock(&neigh->lock);
local_bh_enable();
return rc;
+
+out_dead:
+ if (neigh->nud_state & NUD_STALE)
+ goto out_unlock_bh;
+ write_unlock_bh(&neigh->lock);
+ kfree_skb(skb);
+ return 1;
}
EXPORT_SYMBOL(__neigh_event_send);
@@ -1096,6 +1105,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
(old & (NUD_NOARP | NUD_PERMANENT)))
goto out;
+ if (neigh->dead)
+ goto out;
if (!(new & NUD_VALID)) {
neigh_del_timer(neigh);
@@ -1245,6 +1256,8 @@ EXPORT_SYMBOL(neigh_update);
*/
void __neigh_set_probe_once(struct neighbour *neigh)
{
+ if (neigh->dead)
+ return;
neigh->updated = jiffies;
if (!(neigh->nud_state & NUD_FAILED))
return;
@@ -2250,7 +2263,7 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
ndm->ndm_pad2 = 0;
ndm->ndm_flags = pn->flags | NTF_PROXY;
ndm->ndm_type = RTN_UNICAST;
- ndm->ndm_ifindex = pn->dev->ifindex;
+ ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0;
ndm->ndm_state = NUD_NONE;
if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
@@ -2324,7 +2337,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
if (h > s_h)
s_idx = 0;
for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
- if (dev_net(n->dev) != net)
+ if (pneigh_net(n) != net)
continue;
if (idx < s_idx)
goto next;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0b320d93fb56..4ff3eacc99f5 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3490,8 +3490,10 @@ static int pktgen_thread_worker(void *arg)
pktgen_rem_thread(t);
/* Wait for kthread_stop */
- while (!kthread_should_stop()) {
+ for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
+ if (kthread_should_stop())
+ break;
schedule();
}
__set_current_state(TASK_RUNNING);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index c522f7a00eab..e1574edf4e43 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -805,7 +805,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
nla_total_size(sizeof(struct ifla_vf_vlan)) +
nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
nla_total_size(sizeof(struct ifla_vf_rate)) +
- nla_total_size(sizeof(struct ifla_vf_link_state)));
+ nla_total_size(sizeof(struct ifla_vf_link_state)) +
+ nla_total_size(sizeof(struct ifla_vf_rss_query_en)));
return size;
} else
return 0;
@@ -1018,14 +1019,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
goto nla_put_failure;
if (1) {
- struct rtnl_link_ifmap map = {
- .mem_start = dev->mem_start,
- .mem_end = dev->mem_end,
- .base_addr = dev->base_addr,
- .irq = dev->irq,
- .dma = dev->dma,
- .port = dev->if_port,
- };
+ struct rtnl_link_ifmap map;
+
+ memset(&map, 0, sizeof(map));
+ map.mem_start = dev->mem_start;
+ map.mem_end = dev->mem_end;
+ map.base_addr = dev->base_addr;
+ map.irq = dev->irq;
+ map.dma = dev->dma;
+ map.port = dev->if_port;
+
if (nla_put(skb, IFLA_MAP, sizeof(map), &map))
goto nla_put_failure;
}
@@ -1075,14 +1078,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
struct ifla_vf_tx_rate vf_tx_rate;
struct ifla_vf_spoofchk vf_spoofchk;
struct ifla_vf_link_state vf_linkstate;
+ struct ifla_vf_rss_query_en vf_rss_query_en;
/*
* Not all SR-IOV capable drivers support the
- * spoofcheck query. Preset to -1 so the user
- * space tool can detect that the driver didn't
- * report anything.
+ * spoofcheck and "RSS query enable" query. Preset to
+ * -1 so the user space tool can detect that the driver
+ * didn't report anything.
*/
ivi.spoofchk = -1;
+ ivi.rss_query_en = -1;
memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1095,7 +1100,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_rate.vf =
vf_tx_rate.vf =
vf_spoofchk.vf =
- vf_linkstate.vf = ivi.vf;
+ vf_linkstate.vf =
+ vf_rss_query_en.vf = ivi.vf;
memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
vf_vlan.vlan = ivi.vlan;
@@ -1105,6 +1111,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
vf_rate.max_tx_rate = ivi.max_tx_rate;
vf_spoofchk.setting = ivi.spoofchk;
vf_linkstate.link_state = ivi.linkstate;
+ vf_rss_query_en.setting = ivi.rss_query_en;
vf = nla_nest_start(skb, IFLA_VF_INFO);
if (!vf) {
nla_nest_cancel(skb, vfinfo);
@@ -1119,7 +1126,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
&vf_spoofchk) ||
nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
- &vf_linkstate))
+ &vf_linkstate) ||
+ nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+ sizeof(vf_rss_query_en),
+ &vf_rss_query_en))
goto nla_put_failure;
nla_nest_end(skb, vf);
}
@@ -1207,10 +1217,6 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
[IFLA_INFO_SLAVE_DATA] = { .type = NLA_NESTED },
};
-static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
- [IFLA_VF_INFO] = { .type = NLA_NESTED },
-};
-
static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) },
[IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) },
@@ -1218,6 +1224,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
[IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) },
[IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) },
[IFLA_VF_LINK_STATE] = { .len = sizeof(struct ifla_vf_link_state) },
+ [IFLA_VF_RSS_QUERY_EN] = { .len = sizeof(struct ifla_vf_rss_query_en) },
};
static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1356,85 +1363,98 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
return 0;
}
-static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
{
- int rem, err = -EINVAL;
- struct nlattr *vf;
const struct net_device_ops *ops = dev->netdev_ops;
+ int err = -EINVAL;
- nla_for_each_nested(vf, attr, rem) {
- switch (nla_type(vf)) {
- case IFLA_VF_MAC: {
- struct ifla_vf_mac *ivm;
- ivm = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_mac)
- err = ops->ndo_set_vf_mac(dev, ivm->vf,
- ivm->mac);
- break;
- }
- case IFLA_VF_VLAN: {
- struct ifla_vf_vlan *ivv;
- ivv = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_vlan)
- err = ops->ndo_set_vf_vlan(dev, ivv->vf,
- ivv->vlan,
- ivv->qos);
- break;
- }
- case IFLA_VF_TX_RATE: {
- struct ifla_vf_tx_rate *ivt;
- struct ifla_vf_info ivf;
- ivt = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_get_vf_config)
- err = ops->ndo_get_vf_config(dev, ivt->vf,
- &ivf);
- if (err)
- break;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivf.min_tx_rate,
- ivt->rate);
- break;
- }
- case IFLA_VF_RATE: {
- struct ifla_vf_rate *ivt;
- ivt = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivt->min_tx_rate,
- ivt->max_tx_rate);
- break;
- }
- case IFLA_VF_SPOOFCHK: {
- struct ifla_vf_spoofchk *ivs;
- ivs = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_spoofchk)
- err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
- ivs->setting);
- break;
- }
- case IFLA_VF_LINK_STATE: {
- struct ifla_vf_link_state *ivl;
- ivl = nla_data(vf);
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_link_state)
- err = ops->ndo_set_vf_link_state(dev, ivl->vf,
- ivl->link_state);
- break;
- }
- default:
- err = -EINVAL;
- break;
- }
- if (err)
- break;
+ if (tb[IFLA_VF_MAC]) {
+ struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_mac)
+ err = ops->ndo_set_vf_mac(dev, ivm->vf,
+ ivm->mac);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_VLAN]) {
+ struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_vlan)
+ err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
+ ivv->qos);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_TX_RATE]) {
+ struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
+ struct ifla_vf_info ivf;
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_get_vf_config)
+ err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
+ if (err < 0)
+ return err;
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate,
+ ivt->rate);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_RATE]) {
+ struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_rate)
+ err = ops->ndo_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate,
+ ivt->max_tx_rate);
+ if (err < 0)
+ return err;
}
+
+ if (tb[IFLA_VF_SPOOFCHK]) {
+ struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_spoofchk)
+ err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
+ ivs->setting);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_LINK_STATE]) {
+ struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
+
+ err = -EOPNOTSUPP;
+ if (ops->ndo_set_vf_link_state)
+ err = ops->ndo_set_vf_link_state(dev, ivl->vf,
+ ivl->link_state);
+ if (err < 0)
+ return err;
+ }
+
+ if (tb[IFLA_VF_RSS_QUERY_EN]) {
+ struct ifla_vf_rss_query_en *ivrssq_en;
+
+ err = -EOPNOTSUPP;
+ ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
+ if (ops->ndo_set_vf_rss_query_en)
+ err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
+ ivrssq_en->setting);
+ if (err < 0)
+ return err;
+ }
+
return err;
}
@@ -1630,14 +1650,21 @@ static int do_setlink(const struct sk_buff *skb,
}
if (tb[IFLA_VFINFO_LIST]) {
+ struct nlattr *vfinfo[IFLA_VF_MAX + 1];
struct nlattr *attr;
int rem;
+
nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
- if (nla_type(attr) != IFLA_VF_INFO) {
+ if (nla_type(attr) != IFLA_VF_INFO ||
+ nla_len(attr) < NLA_HDRLEN) {
err = -EINVAL;
goto errout;
}
- err = do_setvfinfo(dev, attr);
+ err = nla_parse_nested(vfinfo, IFLA_VF_MAX, attr,
+ ifla_vf_policy);
+ if (err < 0)
+ goto errout;
+ err = do_setvfinfo(dev, vfinfo);
if (err < 0)
goto errout;
status |= DO_SETLINK_NOTIFY;
diff --git a/net/core/scm.c b/net/core/scm.c
index b442e7e25e60..d30eb057fa7b 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -306,6 +306,8 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
err = put_user(cmlen, &cm->cmsg_len);
if (!err) {
cmlen = CMSG_SPACE(i*sizeof(int));
+ if (msg->msg_controllen < cmlen)
+ cmlen = msg->msg_controllen;
msg->msg_control += cmlen;
msg->msg_controllen -= cmlen;
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index eea9e99e3a0a..7481660f3251 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2883,11 +2883,12 @@ EXPORT_SYMBOL(skb_append_datato_frags);
*/
unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
{
+ unsigned char *data = skb->data;
+
BUG_ON(len > skb->len);
- skb->len -= len;
- BUG_ON(skb->len < skb->data_len);
- skb_postpull_rcsum(skb, skb->data, len);
- return skb->data += len;
+ __skb_pull(skb, len);
+ skb_postpull_rcsum(skb, data, len);
+ return skb->data;
}
EXPORT_SYMBOL_GPL(skb_pull_rcsum);
@@ -3600,7 +3601,8 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
serr->ee.ee_info = tstype;
if (sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID) {
serr->ee.ee_data = skb_shinfo(skb)->tskey;
- if (sk->sk_protocol == IPPROTO_TCP)
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM)
serr->ee.ee_data -= sk->sk_tskey;
}
@@ -4109,7 +4111,8 @@ static struct sk_buff *skb_reorder_vlan_header(struct sk_buff *skb)
return NULL;
}
- memmove(skb->data - ETH_HLEN, skb->data - VLAN_ETH_HLEN, 2 * ETH_ALEN);
+ memmove(skb->data - ETH_HLEN, skb->data - skb->mac_len - VLAN_HLEN,
+ 2 * ETH_ALEN);
skb->mac_header += VLAN_HLEN;
return skb;
}
@@ -4201,7 +4204,7 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
while (order) {
if (npages >= 1 << order) {
- page = alloc_pages(gfp_mask |
+ page = alloc_pages((gfp_mask & ~__GFP_WAIT) |
__GFP_COMP |
__GFP_NOWARN |
__GFP_NORETRY,
diff --git a/net/core/sock.c b/net/core/sock.c
index 2ac6eec30de3..e99645c41a1b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -422,8 +422,6 @@ static void sock_warn_obsolete_bsdism(const char *name)
}
}
-#define SK_FLAGS_TIMESTAMP ((1UL << SOCK_TIMESTAMP) | (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE))
-
static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
{
if (sk->sk_flags & flags) {
@@ -861,7 +859,8 @@ set_rcvbuf:
}
if (val & SOF_TIMESTAMPING_OPT_ID &&
!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
- if (sk->sk_protocol == IPPROTO_TCP) {
+ if (sk->sk_protocol == IPPROTO_TCP &&
+ sk->sk_type == SOCK_STREAM) {
if (sk->sk_state != TCP_ESTABLISHED) {
ret = -EINVAL;
break;
@@ -1854,7 +1853,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
pfrag->offset = 0;
if (SKB_FRAG_PAGE_ORDER) {
- pfrag->page = alloc_pages(gfp | __GFP_COMP |
+ pfrag->page = alloc_pages((gfp & ~__GFP_WAIT) | __GFP_COMP |
__GFP_NOWARN | __GFP_NORETRY,
SKB_FRAG_PAGE_ORDER);
if (likely(pfrag->page)) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index e731c96eac4b..8725b91120f8 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,7 +23,6 @@
#include <net/pkt_sched.h>
static int zero = 0;
-static int one = 1;
static int ushort_max = USHRT_MAX;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6bcaa33cd804..7bcb22317841 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -238,7 +238,9 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
security_req_classify_flow(req, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -255,7 +257,10 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req)
&ireq->ir_v6_loc_addr,
&ireq->ir_v6_rmt_addr);
fl6.daddr = ireq->ir_v6_rmt_addr;
- err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -450,6 +455,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
{
struct inet_request_sock *ireq = inet_rsk(req);
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
struct sock *newsk;
@@ -573,13 +579,15 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
* Yes, keeping reference count would be much more clever, but we make
* one more one thing there: reattach optmem to newsk.
*/
- if (np->opt != NULL)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt != NULL)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
dccp_sync_mss(newsk, dst_mtu(dst));
@@ -832,6 +840,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -933,7 +942,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
@@ -953,9 +963,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
__ip6_dst_store(sk, dst, NULL, NULL);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt != NULL)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
inet->inet_dport = usin->sin6_port;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 25733d538147..2aeeb4f22e9d 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -678,6 +678,9 @@ static int dn_create(struct net *net, struct socket *sock, int protocol,
{
struct sock *sk;
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (!net_eq(net, &init_net))
return -EAFNOSUPPORT;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index daccc4a36d80..4047341f6c07 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1042,10 +1042,13 @@ source_ok:
if (!fld.daddr) {
fld.daddr = fld.saddr;
- err = -EADDRNOTAVAIL;
if (dev_out)
dev_put(dev_out);
+ err = -EINVAL;
dev_out = init_net.loopback_dev;
+ if (!dev_out->dn_ptr)
+ goto out;
+ err = -EADDRNOTAVAIL;
dev_hold(dev_out);
if (!fld.daddr) {
fld.daddr =
@@ -1118,6 +1121,8 @@ source_ok:
if (dev_out == NULL)
goto out;
dn_db = rcu_dereference_raw(dev_out->dn_ptr);
+ if (!dn_db)
+ goto e_inval;
/* Possible improvement - check all devices for local addr */
if (dn_dev_islocal(dev_out, fld.daddr)) {
dev_put(dev_out);
@@ -1159,6 +1164,8 @@ select_source:
dev_put(dev_out);
dev_out = init_net.loopback_dev;
dev_hold(dev_out);
+ if (!dev_out->dn_ptr)
+ goto e_inval;
fld.flowidn_oif = dev_out->ifindex;
if (res.fi)
dn_fib_info_put(res.fi);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index e67da4e6c324..6cf020ea4f46 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -228,6 +228,8 @@ int inet_listen(struct socket *sock, int backlog)
err = 0;
if (err)
goto out;
+
+ tcp_fastopen_init_key_once(true);
}
err = inet_csk_listen_start(sk, backlog);
if (err)
@@ -257,6 +259,9 @@ static int inet_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
sock->state = SS_UNCONNECTED;
/* Look for the requested type/protocol pair. */
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index 90c0e8386116..574fad9cca05 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -20,7 +20,7 @@
#include <net/route.h>
#include <net/tcp_states.h>
-int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *usin = (struct sockaddr_in *) uaddr;
@@ -39,8 +39,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_reset(sk);
- lock_sock(sk);
-
oif = sk->sk_bound_dev_if;
saddr = inet->inet_saddr;
if (ipv4_is_multicast(usin->sin_addr.s_addr)) {
@@ -82,9 +80,19 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sk_dst_set(sk, &rt->dst);
err = 0;
out:
- release_sock(sk);
return err;
}
+EXPORT_SYMBOL(__ip4_datagram_connect);
+
+int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip4_datagram_connect(sk, uaddr, addr_len);
+ release_sock(sk);
+ return res;
+}
EXPORT_SYMBOL(ip4_datagram_connect);
/* Because UDP xmit path can manipulate sk_dst_cache without holding
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 214882e7d6de..464293c34b7a 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -334,6 +334,9 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
ASSERT_RTNL();
+ if (in_dev->dead)
+ goto no_promotions;
+
/* 1. Deleting primary ifaddr forces deletion all secondaries
* unless alias promotion is set
**/
@@ -380,6 +383,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
fib_del_ifaddr(ifa, ifa1);
}
+no_promotions:
/* 2. Unlink it */
*ifap = ifa1->ifa_next;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 23104a3f2924..d4c698ce0838 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -814,6 +814,9 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
subnet = 1;
}
+ if (in_dev->dead)
+ goto no_promotions;
+
/* Deletion is more complicated than add.
* We should take care of not to delete too much :-)
*
@@ -889,6 +892,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
}
}
+no_promotions:
if (!(ok & BRD_OK))
fib_magic(RTM_DELROUTE, RTN_BROADCAST, ifa->ifa_broadcast, 32, prim);
if (subnet && ifa->ifa_prefixlen < 31) {
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 606c520ffd5a..8ce8e82d1abb 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -25,6 +25,7 @@ struct fou {
u16 port;
struct udp_offload udp_offloads;
struct list_head list;
+ struct rcu_head rcu;
};
struct fou_cfg {
@@ -287,7 +288,7 @@ static void fou_release(struct fou *fou)
sock_release(sock);
- kfree(fou);
+ kfree_rcu(fou, rcu);
}
static int fou_encap_init(struct sock *sk, struct fou *fou, struct fou_cfg *cfg)
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index 51973ddc05a6..abc50b41bc39 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -36,7 +36,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
SKB_GSO_TCP_ECN |
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
- SKB_GSO_IPIP)))
+ SKB_GSO_IPIP |
+ SKB_GSO_SIT)))
goto out;
if (!skb->encapsulation)
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index b48e03cd6656..9516031847f1 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -342,7 +342,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
ihl = ip_hdrlen(skb);
/* Determine the position of this fragment. */
- end = offset + skb->len - ihl;
+ end = offset + skb->len - skb_network_offset(skb) - ihl;
err = -EINVAL;
/* Is this the final fragment? */
@@ -372,7 +372,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
goto err;
err = -ENOMEM;
- if (pskb_pull(skb, ihl) == NULL)
+ if (!pskb_pull(skb, skb_network_offset(skb) + ihl))
goto err;
err = pskb_trim_rcsum(skb, end - offset);
@@ -612,6 +612,9 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
iph->frag_off = qp->q.max_size ? htons(IP_DF) : 0;
iph->tot_len = htons(len);
iph->tos |= ecn;
+
+ ip_send_check(iph);
+
IP_INC_STATS_BH(net, IPSTATS_MIB_REASMOKS);
qp->q.fragments = NULL;
qp->q.fragments_tail = NULL;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 0bb8e141eacc..682257242971 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -587,7 +587,8 @@ int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
EXPORT_SYMBOL(ip_tunnel_encap);
static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
- struct rtable *rt, __be16 df)
+ struct rtable *rt, __be16 df,
+ const struct iphdr *inner_iph)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
@@ -604,7 +605,8 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
if (skb->protocol == htons(ETH_P_IP)) {
if (!skb_is_gso(skb) &&
- (df & htons(IP_DF)) && mtu < pkt_size) {
+ (inner_iph->frag_off & htons(IP_DF)) &&
+ mtu < pkt_size) {
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
return -E2BIG;
@@ -738,7 +740,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
+ if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
ip_rt_put(rt);
goto tx_error;
}
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1a7e979e80ba..15046aec8484 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -65,7 +65,6 @@ static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi,
goto drop;
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel;
- skb->mark = be32_to_cpu(tunnel->parms.i_key);
return xfrm_input(skb, nexthdr, spi, encap_type);
}
@@ -91,6 +90,8 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4;
+ u32 orig_mark = skb->mark;
+ int ret;
if (!tunnel)
return 1;
@@ -107,7 +108,11 @@ static int vti_rcv_cb(struct sk_buff *skb, int err)
x = xfrm_input_state(skb);
family = x->inner_mode->afinfo->family;
- if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ skb->mark = be32_to_cpu(tunnel->parms.i_key);
+ ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
+ skb->mark = orig_mark;
+
+ if (!ret)
return -EPERM;
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev)));
@@ -216,8 +221,6 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&fl, 0, sizeof(fl));
- skb->mark = be32_to_cpu(tunnel->parms.o_key);
-
switch (skb->protocol) {
case htons(ETH_P_IP):
xfrm_decode_session(skb, &fl, AF_INET);
@@ -233,6 +236,9 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
return NETDEV_TX_OK;
}
+ /* override mark with tunnel output key */
+ fl.flowi_mark = be32_to_cpu(tunnel->parms.o_key);
+
return vti_xmit(skb, dev, &fl);
}
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c8034587859d..1b7f6da99ef4 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -136,7 +136,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
struct mfc_cache *c, struct rtmsg *rtm);
static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
int cmd);
-static void mroute_clean_tables(struct mr_table *mrt);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
@@ -348,7 +348,7 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id)
static void ipmr_free_table(struct mr_table *mrt)
{
del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -1201,7 +1201,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr_table *mrt)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
@@ -1210,8 +1210,9 @@ static void mroute_clean_tables(struct mr_table *mrt)
/* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif_table[i].flags & VIFF_STATIC))
- vif_delete(mrt, i, 0, &list);
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
+ continue;
+ vif_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
@@ -1219,7 +1220,7 @@ static void mroute_clean_tables(struct mr_table *mrt)
for (i = 0; i < MFC_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
list_del_rcu(&c->list);
mroute_netlink_event(mrt, c, RTM_DELROUTE);
@@ -1254,7 +1255,7 @@ static void mrtsock_destruct(struct sock *sk)
NETCONFA_IFINDEX_ALL,
net->ipv4.devconf_all);
RCU_INIT_POINTER(mrt->mroute_sk, NULL);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
}
}
rtnl_unlock();
@@ -1674,8 +1675,8 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
- IP_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
+ IP_INC_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_ADD_STATS(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTOCTETS, skb->len);
if (unlikely(opt->optlen))
ip_forward_options(skb);
@@ -1737,7 +1738,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
* to blackhole.
*/
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ IP_INC_STATS(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
ip_rt_put(rt);
goto out_free;
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index f95b6f93814b..b3bfa5111799 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -355,18 +355,20 @@ unsigned int arpt_do_table(struct sk_buff *skb,
}
/* All zeroes == unconditional rule. */
-static inline bool unconditional(const struct arpt_arp *arp)
+static inline bool unconditional(const struct arpt_entry *e)
{
static const struct arpt_arp uncond;
- return memcmp(arp, &uncond, sizeof(uncond)) == 0;
+ return e->target_offset == sizeof(struct arpt_entry) &&
+ memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
}
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
static int mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -398,11 +400,10 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
|= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
/* Unconditional return/END. */
- if ((e->target_offset == sizeof(struct arpt_entry) &&
+ if ((unconditional(e) &&
(strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < 0 && unconditional(&e->arp)) ||
- visited) {
+ t->verdict < 0) || visited) {
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
@@ -435,6 +436,8 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct arpt_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -454,9 +457,16 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
+ e = (struct arpt_entry *)
+ (entry0 + newpos);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct arpt_entry *)
(entry0 + newpos);
@@ -470,25 +480,6 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
return 1;
}
-static inline int check_entry(const struct arpt_entry *e, const char *name)
-{
- const struct xt_entry_target *t;
-
- if (!arp_checkentry(&e->arp)) {
- duprintf("arp_tables: arp check failed %p %s.\n", e, name);
- return -EINVAL;
- }
-
- if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
- return -EINVAL;
-
- t = arpt_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
static inline int check_target(struct arpt_entry *e, const char *name)
{
struct xt_entry_target *t = arpt_get_target(e);
@@ -518,10 +509,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
struct xt_target *target;
int ret;
- ret = check_entry(e, name);
- if (ret)
- return ret;
-
t = arpt_get_target(e);
target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
t->u.user.revision);
@@ -547,7 +534,7 @@ static bool check_underflow(const struct arpt_entry *e)
const struct xt_entry_target *t;
unsigned int verdict;
- if (!unconditional(&e->arp))
+ if (!unconditional(e))
return false;
t = arpt_get_target_c(e);
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -566,9 +553,11 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
unsigned int valid_hooks)
{
unsigned int h;
+ int err;
if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
- (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct arpt_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
@@ -580,6 +569,14 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
return -EINVAL;
}
+ if (!arp_checkentry(&e->arp))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
+ if (err)
+ return err;
+
/* Check hooks & underflows */
for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
if (!(valid_hooks & (1 << h)))
@@ -588,9 +585,9 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
if (!check_underflow(e)) {
- pr_err("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ pr_debug("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
return -EINVAL;
}
newinfo->underflow[h] = underflows[h];
@@ -624,6 +621,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
const struct arpt_replace *repl)
{
struct arpt_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -637,8 +635,10 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
-
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
ret = check_entry_size_and_hooks(iter, newinfo, entry0,
@@ -647,7 +647,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- break;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(arpt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
@@ -655,12 +657,13 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
- return ret;
+ goto out_free;
+ ret = -EINVAL;
if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
i, repl->num_entries);
- return -EINVAL;
+ goto out_free;
}
/* Check hooks all assigned */
@@ -671,19 +674,20 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
i, repl->hook_entry[i]);
- return -EINVAL;
+ goto out_free;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
i, repl->underflow[i]);
- return -EINVAL;
+ goto out_free;
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
- duprintf("Looping hook\n");
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
}
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -710,6 +714,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void get_counters(const struct xt_table_info *t,
@@ -1116,56 +1123,18 @@ static int do_add_counters(struct net *net, const void __user *user,
unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- const char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
struct arpt_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
-
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
-
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
- t = xt_find_table_lock(net, NFPROTO_ARP, name);
+ t = xt_find_table_lock(net, NFPROTO_ARP, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1173,7 +1142,7 @@ static int do_add_counters(struct net *net, const void __user *user,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1199,6 +1168,18 @@ static int do_add_counters(struct net *net, const void __user *user,
}
#ifdef CONFIG_COMPAT
+struct compat_arpt_replace {
+ char name[XT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_ARP_NUMHOOKS];
+ u32 underflow[NF_ARP_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters;
+ struct compat_arpt_entry entries[0];
+};
+
static inline void compat_release_entry(struct compat_arpt_entry *e)
{
struct xt_entry_target *t;
@@ -1207,24 +1188,22 @@ static inline void compat_release_entry(struct compat_arpt_entry *e)
module_put(t->u.kernel.target->me);
}
-static inline int
+static int
check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
- (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p, limit = %p\n", e, limit);
return -EINVAL;
}
@@ -1236,8 +1215,11 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct arpt_entry *)e, name);
+ if (!arp_checkentry(&e->arp))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
if (ret)
return ret;
@@ -1261,17 +1243,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
if (ret)
goto release_target;
- /* Check hooks & underflows */
- for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
release_target:
@@ -1280,18 +1251,17 @@ out:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct xt_target *target;
struct arpt_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
- ret = 0;
origsize = *size;
de = (struct arpt_entry *)*dstptr;
memcpy(de, e, sizeof(struct arpt_entry));
@@ -1312,144 +1282,81 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
}
-static int translate_compat_table(const char *name,
- unsigned int valid_hooks,
- struct xt_table_info **pinfo,
+static int translate_compat_table(struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_arpt_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_arpt_entry *iter0;
- struct arpt_entry *iter1;
+ struct arpt_replace repl;
unsigned int size;
int ret = 0;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(NFPROTO_ARP);
- xt_compat_init_offsets(NFPROTO_ARP, number);
+ xt_compat_init_offsets(NFPROTO_ARP, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
newinfo->hook_entry[i] = info->hook_entry[i];
newinfo->underflow[i] = info->underflow[i];
}
entry1 = newinfo->entries[raw_smp_processor_id()];
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone */
+
xt_compat_flush_offsets(NFPROTO_ARP);
xt_compat_unlock(NFPROTO_ARP);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = check_target(iter1, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(arpt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
*pinfo = newinfo;
*pentry0 = entry1;
@@ -1458,31 +1365,18 @@ static int translate_compat_table(const char *name,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(NFPROTO_ARP);
- xt_compat_unlock(NFPROTO_ARP);
- goto out;
}
-struct compat_arpt_replace {
- char name[XT_TABLE_MAXNAMELEN];
- u32 valid_hooks;
- u32 num_entries;
- u32 size;
- u32 hook_entry[NF_ARP_NUMHOOKS];
- u32 underflow[NF_ARP_NUMHOOKS];
- u32 num_counters;
- compat_uptr_t counters;
- struct compat_arpt_entry entries[0];
-};
-
static int compat_do_replace(struct net *net, void __user *user,
unsigned int len)
{
@@ -1513,10 +1407,7 @@ static int compat_do_replace(struct net *net, void __user *user,
goto free_newinfo;
}
- ret = translate_compat_table(tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(&newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 99e810f84671..4e40f2ec2b68 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -168,11 +168,12 @@ get_entry(const void *base, unsigned int offset)
/* All zeroes == unconditional rule. */
/* Mildly perf critical (only if packet tracing is on) */
-static inline bool unconditional(const struct ipt_ip *ip)
+static inline bool unconditional(const struct ipt_entry *e)
{
static const struct ipt_ip uncond;
- return memcmp(ip, &uncond, sizeof(uncond)) == 0;
+ return e->target_offset == sizeof(struct ipt_entry) &&
+ memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
#undef FWINV
}
@@ -229,11 +230,10 @@ get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
} else if (s == e) {
(*rulenum)++;
- if (s->target_offset == sizeof(struct ipt_entry) &&
+ if (unconditional(s) &&
strcmp(t->target.u.kernel.target->name,
XT_STANDARD_TARGET) == 0 &&
- t->verdict < 0 &&
- unconditional(&s->ip)) {
+ t->verdict < 0) {
/* Tail of chains: STANDARD target (return/policy) */
*comment = *chainname == hookname
? comments[NF_IP_TRACE_COMMENT_POLICY]
@@ -443,7 +443,8 @@ ipt_do_table(struct sk_buff *skb,
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -472,11 +473,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
- if ((e->target_offset == sizeof(struct ipt_entry) &&
+ if ((unconditional(e) &&
(strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < 0 && unconditional(&e->ip)) ||
- visited) {
+ t->verdict < 0) || visited) {
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
@@ -517,6 +517,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct ipt_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -535,9 +537,16 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
+ e = (struct ipt_entry *)
+ (entry0 + newpos);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct ipt_entry *)
(entry0 + newpos);
@@ -565,27 +574,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
}
static int
-check_entry(const struct ipt_entry *e, const char *name)
-{
- const struct xt_entry_target *t;
-
- if (!ip_checkentry(&e->ip)) {
- duprintf("ip check failed %p %s.\n", e, name);
- return -EINVAL;
- }
-
- if (e->target_offset + sizeof(struct xt_entry_target) >
- e->next_offset)
- return -EINVAL;
-
- t = ipt_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
-static int
check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ipt_ip *ip = par->entryinfo;
@@ -662,10 +650,6 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
- ret = check_entry(e, name);
- if (ret)
- return ret;
-
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -709,7 +693,7 @@ static bool check_underflow(const struct ipt_entry *e)
const struct xt_entry_target *t;
unsigned int verdict;
- if (!unconditional(&e->ip))
+ if (!unconditional(e))
return false;
t = ipt_get_target_c(e);
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -729,9 +713,11 @@ check_entry_size_and_hooks(struct ipt_entry *e,
unsigned int valid_hooks)
{
unsigned int h;
+ int err;
if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
- (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
@@ -743,6 +729,14 @@ check_entry_size_and_hooks(struct ipt_entry *e,
return -EINVAL;
}
+ if (!ip_checkentry(&e->ip))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
+ if (err)
+ return err;
+
/* Check hooks & underflows */
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
if (!(valid_hooks & (1 << h)))
@@ -751,9 +745,9 @@ check_entry_size_and_hooks(struct ipt_entry *e,
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
if (!check_underflow(e)) {
- pr_err("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ pr_debug("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
return -EINVAL;
}
newinfo->underflow[h] = underflows[h];
@@ -794,6 +788,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ipt_replace *repl)
{
struct ipt_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -807,6 +802,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -816,17 +814,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- return ret;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ipt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
+ ret = -EINVAL;
if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
i, repl->num_entries);
- return -EINVAL;
+ goto out_free;
}
/* Check hooks all assigned */
@@ -837,17 +838,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
i, repl->hook_entry[i]);
- return -EINVAL;
+ goto out_free;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
i, repl->underflow[i]);
- return -EINVAL;
+ goto out_free;
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -874,6 +878,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void
@@ -1304,56 +1311,18 @@ do_add_counters(struct net *net, const void __user *user,
unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- const char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
struct ipt_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
-
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
-
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
-
- t = xt_find_table_lock(net, AF_INET, name);
+ t = xt_find_table_lock(net, AF_INET, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1361,7 +1330,7 @@ do_add_counters(struct net *net, const void __user *user,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1440,7 +1409,6 @@ compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
static int
compat_find_calc_match(struct xt_entry_match *m,
- const char *name,
const struct ipt_ip *ip,
unsigned int hookmask,
int *size)
@@ -1476,21 +1444,19 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_match *ematch;
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
- (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p, limit = %p\n", e, limit);
return -EINVAL;
}
@@ -1502,8 +1468,11 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct ipt_entry *)e, name);
+ if (!ip_checkentry(&e->ip))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems,
+ e->target_offset, e->next_offset);
if (ret)
return ret;
@@ -1511,8 +1480,8 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name,
- &e->ip, e->comefrom, &off);
+ ret = compat_find_calc_match(ematch, &e->ip, e->comefrom,
+ &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1535,17 +1504,6 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
if (ret)
goto out;
- /* Check hooks & underflows */
- for (h = 0; h < NF_INET_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
out:
@@ -1559,19 +1517,18 @@ release_matches:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct xt_target *target;
struct ipt_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
struct xt_entry_match *ematch;
- ret = 0;
origsize = *size;
de = (struct ipt_entry *)*dstptr;
memcpy(de, e, sizeof(struct ipt_entry));
@@ -1580,198 +1537,104 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
*dstptr += sizeof(struct ipt_entry);
*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
- xt_ematch_foreach(ematch, e) {
- ret = xt_compat_match_from_user(ematch, dstptr, size);
- if (ret != 0)
- return ret;
- }
+ xt_ematch_foreach(ematch, e)
+ xt_compat_match_from_user(ematch, dstptr, size);
+
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ipt_get_target(e);
target = t->u.kernel.target;
xt_compat_target_from_user(t, dstptr, size);
de->next_offset = e->next_offset - (origsize - *size);
+
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
if ((unsigned char *)de - base < newinfo->hook_entry[h])
newinfo->hook_entry[h] -= origsize - *size;
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
-}
-
-static int
-compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
-{
- struct xt_entry_match *ematch;
- struct xt_mtchk_param mtpar;
- unsigned int j;
- int ret = 0;
-
- j = 0;
- mtpar.net = net;
- mtpar.table = name;
- mtpar.entryinfo = &e->ip;
- mtpar.hook_mask = e->comefrom;
- mtpar.family = NFPROTO_IPV4;
- xt_ematch_foreach(ematch, e) {
- ret = check_match(ematch, &mtpar);
- if (ret != 0)
- goto cleanup_matches;
- ++j;
- }
-
- ret = check_target(e, net, name);
- if (ret)
- goto cleanup_matches;
- return 0;
-
- cleanup_matches:
- xt_ematch_foreach(ematch, e) {
- if (j-- == 0)
- break;
- cleanup_match(ematch, net);
- }
- return ret;
}
static int
translate_compat_table(struct net *net,
- const char *name,
- unsigned int valid_hooks,
struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_ipt_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_ipt_entry *iter0;
- struct ipt_entry *iter1;
+ struct ipt_replace repl;
unsigned int size;
int ret;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET);
- xt_compat_init_offsets(AF_INET, number);
+ xt_compat_init_offsets(AF_INET, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries[raw_smp_processor_id()];
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone.
+ * entry1/newinfo contains a 64bit ruleset that looks exactly as
+ * generated by 64bit userspace.
+ *
+ * Call standard translate_table() to validate all hook_entrys,
+ * underflows, check for loops, etc.
+ */
xt_compat_flush_offsets(AF_INET);
xt_compat_unlock(AF_INET);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = compat_check_entry(iter1, net, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(ipt_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1, net);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(net, newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
*pinfo = newinfo;
*pentry0 = entry1;
@@ -1780,17 +1643,16 @@ translate_compat_table(struct net *net,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(AF_INET);
- xt_compat_unlock(AF_INET);
- goto out;
}
static int
@@ -1824,10 +1686,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index c6eb42100e9a..ea91058b5f6f 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -108,10 +108,18 @@ static int masq_inet_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+ struct in_device *idev = ((struct in_ifaddr *)ptr)->ifa_dev;
struct netdev_notifier_info info;
- netdev_notifier_info_init(&info, dev);
+ /* The masq_dev_notifier will catch the case of the device going
+ * down. So if the inetdev is dead and being destroyed we have
+ * no work to do. Otherwise this is an individual address removal
+ * and we have to perform the flush.
+ */
+ if (idev->dead)
+ return NOTIFY_DONE;
+
+ netdev_notifier_info_init(&info, idev->dev);
return masq_device_event(this, event, &info);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d58dd0ec3e53..12c5df33c0b7 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -903,6 +903,10 @@ static int ip_error(struct sk_buff *skb)
bool send;
int code;
+ /* IP on this device is disabled. */
+ if (!in_dev)
+ goto out;
+
net = dev_net(rt->dst.dev);
if (!IN_DEV_FORWARD(in_dev)) {
switch (rt->dst.error) {
@@ -1920,6 +1924,18 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
*/
if (fi && res->prefixlen < 4)
fi = NULL;
+ } else if ((type == RTN_LOCAL) && (orig_oif != 0) &&
+ (orig_oif != dev_out->ifindex)) {
+ /* For local routes that require a particular output interface
+ * we do not want to cache the result. Caching the result
+ * causes incorrect behaviour when there are multiple source
+ * addresses on the interface, the end result being that if the
+ * intended recipient is waiting on that interface for the
+ * packet he won't receive it because it will be delivered on
+ * the loopback interface and the IP_PKTINFO ipi_ifindex will
+ * be set to the loopback interface as well.
+ */
+ fi = NULL;
}
fnhe = NULL;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 9096dbb067d1..034e4c0e2121 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -45,10 +45,10 @@ static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
/* Update system visible IP port range */
static void set_local_port_range(struct net *net, int range[2])
{
- write_seqlock(&net->ipv4.ip_local_ports.lock);
+ write_seqlock_bh(&net->ipv4.ip_local_ports.lock);
net->ipv4.ip_local_ports.range[0] = range[0];
net->ipv4.ip_local_ports.range[1] = range[1];
- write_sequnlock(&net->ipv4.ip_local_ports.lock);
+ write_sequnlock_bh(&net->ipv4.ip_local_ports.lock);
}
/* Validate changes from /proc interface. */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 38c2bcb8dd5d..32b25cc96fff 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2585,10 +2585,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_FASTOPEN:
if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE |
- TCPF_LISTEN)))
+ TCPF_LISTEN))) {
+ tcp_fastopen_init_key_once(true);
+
err = fastopen_init_queue(sk, val);
- else
+ } else {
err = -EINVAL;
+ }
break;
case TCP_TIMESTAMP:
if (!tp->repair)
@@ -2639,6 +2642,7 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
+ u32 rate;
memset(info, 0, sizeof(*info));
@@ -2699,10 +2703,11 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
info->tcpi_total_retrans = tp->total_retrans;
- info->tcpi_pacing_rate = sk->sk_pacing_rate != ~0U ?
- sk->sk_pacing_rate : ~0ULL;
- info->tcpi_max_pacing_rate = sk->sk_max_pacing_rate != ~0U ?
- sk->sk_max_pacing_rate : ~0ULL;
+ rate = READ_ONCE(sk->sk_pacing_rate);
+ info->tcpi_pacing_rate = rate != ~0U ? rate : ~0ULL;
+
+ rate = READ_ONCE(sk->sk_max_pacing_rate);
+ info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
}
EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index b1c5970d47a1..b3316c8279a0 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -248,9 +248,10 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
ca = tcp_ca_find(name);
/* no change asking for existing value */
- if (ca == icsk->icsk_ca_ops)
+ if (ca == icsk->icsk_ca_ops) {
+ icsk->icsk_ca_setsockopt = 1;
goto out;
-
+ }
#ifdef CONFIG_MODULES
/* not found attempt to autoload module */
if (!ca && capable(CAP_NET_ADMIN)) {
@@ -273,6 +274,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
else {
tcp_cleanup_congestion_control(sk);
icsk->icsk_ca_ops = ca;
+ icsk->icsk_ca_setsockopt = 1;
if (sk->sk_state != TCP_CLOSE && icsk->icsk_ca_ops->init)
icsk->icsk_ca_ops->init(sk);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 20de0118c98e..9332ec123a6c 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -154,6 +154,27 @@ static void bictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
+static void bictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+{
+ if (event == CA_EVENT_TX_START) {
+ struct bictcp *ca = inet_csk_ca(sk);
+ u32 now = tcp_time_stamp;
+ s32 delta;
+
+ delta = now - tcp_sk(sk)->lsndtime;
+
+ /* We were application limited (idle) for a while.
+ * Shift epoch_start to keep cwnd growth to cubic curve.
+ */
+ if (ca->epoch_start && delta > 0) {
+ ca->epoch_start += delta;
+ if (after(ca->epoch_start, now))
+ ca->epoch_start = now;
+ }
+ return;
+ }
+}
+
/* calculate the cubic root of x using a table lookup followed by one
* Newton-Raphson iteration.
* Avg err ~= 0.195%
@@ -440,6 +461,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = {
.cong_avoid = bictcp_cong_avoid,
.set_state = bictcp_state,
.undo_cwnd = bictcp_undo_cwnd,
+ .cwnd_event = bictcp_cwnd_event,
.pkts_acked = bictcp_acked,
.owner = THIS_MODULE,
.name = "cubic",
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index c73077280ad4..b01d5bd6d6ca 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -78,8 +78,6 @@ static bool __tcp_fastopen_cookie_gen(const void *path,
struct tcp_fastopen_context *ctx;
bool ok = false;
- tcp_fastopen_init_key_once(true);
-
rcu_read_lock();
ctx = rcu_dereference(tcp_fastopen_ctx);
if (ctx) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 6f46cde58e54..0c96055b2382 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4356,19 +4356,34 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
{
struct sk_buff *skb;
+ int err = -ENOMEM;
+ int data_len = 0;
bool fragstolen;
if (size == 0)
return 0;
- skb = alloc_skb(size, sk->sk_allocation);
+ if (size > PAGE_SIZE) {
+ int npages = min_t(size_t, size >> PAGE_SHIFT, MAX_SKB_FRAGS);
+
+ data_len = npages << PAGE_SHIFT;
+ size = data_len + (size & ~PAGE_MASK);
+ }
+ skb = alloc_skb_with_frags(size - data_len, data_len,
+ PAGE_ALLOC_COSTLY_ORDER,
+ &err, sk->sk_allocation);
if (!skb)
goto err;
+ skb_put(skb, size - data_len);
+ skb->data_len = data_len;
+ skb->len = size;
+
if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
goto err_free;
- if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
+ err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
+ if (err)
goto err_free;
TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
@@ -4384,7 +4399,8 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
err_free:
kfree_skb(skb);
err:
- return -ENOMEM;
+ return err;
+
}
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
@@ -5524,6 +5540,7 @@ discard:
}
tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
+ tp->copied_seq = tp->rcv_nxt;
tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
/* RFC1323: The window in SYN & SYN/ACK segments is
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a5fdfe9fa542..5d5390299277 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -959,7 +959,8 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
}
md5sig = rcu_dereference_protected(tp->md5sig_info,
- sock_owned_by_user(sk));
+ sock_owned_by_user(sk) ||
+ lockdep_is_held(&sk->sk_lock.slock));
if (!md5sig) {
md5sig = kmalloc(sizeof(*md5sig), gfp);
if (!md5sig)
@@ -1552,7 +1553,7 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
if (likely(sk->sk_rx_dst))
skb_dst_drop(skb);
else
- skb_dst_force(skb);
+ skb_dst_force_safe(skb);
__skb_queue_tail(&tp->ucopy.prequeue, skb);
tp->ucopy.memory += skb->truesize;
@@ -1757,8 +1758,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
- dst_hold(dst);
+ if (dst && dst_hold_safe(dst)) {
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index ed9c9a91851c..c90e76c2eca0 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -550,7 +550,7 @@ reset:
*/
if (crtt > tp->srtt_us) {
/* Set RTO like tcp_rtt_estimator(), but from cached RTT. */
- crtt /= 8 * USEC_PER_MSEC;
+ crtt /= 8 * USEC_PER_SEC / HZ;
inet_csk(sk)->icsk_rto = crtt + max(2 * crtt, tcp_rto_min(sk));
} else if (tp->srtt_us == 0) {
/* RFC6298: 5.7 We've failed to get a valid RTT sample from
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 63d2680b65db..2f6667116e85 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -297,7 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
tw->tw_v6_daddr = sk->sk_v6_daddr;
tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
tw->tw_tclass = np->tclass;
- tw->tw_flowlabel = np->flow_label >> 12;
+ tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
tw->tw_ipv6only = sk->sk_ipv6only;
}
#endif
@@ -451,7 +451,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
newtp->snd_cwnd = TCP_INIT_CWND;
newtp->snd_cwnd_cnt = 0;
- if (!try_module_get(newicsk->icsk_ca_ops->owner))
+ if (!newicsk->icsk_ca_setsockopt ||
+ !try_module_get(newicsk->icsk_ca_ops->owner))
tcp_assign_congestion_control(newsk);
tcp_set_ca_state(newsk, TCP_CA_Open);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dc9f925b0cd5..af2f64eeb98f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2532,8 +2532,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
*/
if (unlikely((NET_IP_ALIGN && ((unsigned long)skb->data & 3)) ||
skb_headroom(skb) >= 0xFFFF)) {
- struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
- GFP_ATOMIC);
+ struct sk_buff *nskb;
+
+ skb_mstamp_get(&skb->skb_mstamp);
+ nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
-ENOBUFS;
} else {
@@ -2798,6 +2800,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority)
skb_reserve(skb, MAX_TCP_HEADER);
tcp_init_nondata_skb(skb, tcp_acceptable_seq(sk),
TCPHDR_ACK | TCPHDR_RST);
+ skb_mstamp_get(&skb->skb_mstamp);
/* Send it off. */
if (tcp_transmit_skb(sk, skb, 0, priority))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED);
diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c
index cd7273218598..25a229764e7f 100644
--- a/net/ipv4/tcp_yeah.c
+++ b/net/ipv4/tcp_yeah.c
@@ -219,7 +219,7 @@ static u32 tcp_yeah_ssthresh(struct sock *sk)
yeah->fast_count = 0;
yeah->reno_count = max(yeah->reno_count>>1, 2U);
- return tp->snd_cwnd - reduction;
+ return max_t(int, tp->snd_cwnd - reduction, 2);
}
static struct tcp_congestion_ops tcp_yeah __read_mostly = {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index cd0db5471bb5..2a5d388d76a4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -90,6 +90,7 @@
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/igmp.h>
+#include <linux/inetdevice.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/timer.h>
@@ -1336,10 +1337,8 @@ csum_copy_err:
}
unlock_sock_fast(sk, slow);
- if (noblock)
- return -EAGAIN;
-
- /* starting over for a new packet */
+ /* starting over for a new packet, but check if we need to yield */
+ cond_resched();
msg->msg_flags &= ~MSG_TRUNC;
goto try_again;
}
@@ -1950,6 +1949,7 @@ void udp_v4_early_demux(struct sk_buff *skb)
struct sock *sk;
struct dst_entry *dst;
int dif = skb->dev->ifindex;
+ int ours;
/* validate the packet */
if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr)))
@@ -1959,26 +1959,47 @@ void udp_v4_early_demux(struct sk_buff *skb)
uh = udp_hdr(skb);
if (skb->pkt_type == PACKET_BROADCAST ||
- skb->pkt_type == PACKET_MULTICAST)
+ skb->pkt_type == PACKET_MULTICAST) {
+ struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+
+ if (!in_dev)
+ return;
+
+ /* we are supposed to accept bcast packets */
+ if (skb->pkt_type == PACKET_MULTICAST) {
+ ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr,
+ iph->protocol);
+ if (!ours)
+ return;
+ }
+
sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr,
uh->source, iph->saddr, dif);
- else if (skb->pkt_type == PACKET_HOST)
+ } else if (skb->pkt_type == PACKET_HOST) {
sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr,
uh->source, iph->saddr, dif);
- else
+ } else {
return;
+ }
if (!sk)
return;
skb->sk = sk;
skb->destructor = sock_efree;
- dst = sk->sk_rx_dst;
+ dst = READ_ONCE(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, 0);
- if (dst)
- skb_dst_set_noref(skb, dst);
+ if (dst) {
+ /* DST_NOCACHE can not be used without taking a reference */
+ if (dst->flags & DST_NOCACHE) {
+ if (likely(atomic_inc_not_zero(&dst->__refcnt)))
+ skb_dst_set(skb, dst);
+ } else {
+ skb_dst_set_noref(skb, dst);
+ }
+ }
}
int udp_rcv(struct sk_buff *skb)
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 6156f68a1e90..94fc16dad6c6 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -230,7 +230,7 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm4_dst_ops = {
+static struct dst_ops xfrm4_dst_ops_template = {
.family = AF_INET,
.protocol = cpu_to_be16(ETH_P_IP),
.gc = xfrm4_garbage_collect,
@@ -245,7 +245,7 @@ static struct dst_ops xfrm4_dst_ops = {
static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
.family = AF_INET,
- .dst_ops = &xfrm4_dst_ops,
+ .dst_ops = &xfrm4_dst_ops_template,
.dst_lookup = xfrm4_dst_lookup,
.get_saddr = xfrm4_get_saddr,
.decode_session = _decode_session4,
@@ -267,7 +267,7 @@ static struct ctl_table xfrm4_policy_table[] = {
{ }
};
-static int __net_init xfrm4_net_init(struct net *net)
+static int __net_init xfrm4_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -295,7 +295,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm4_net_exit(struct net *net)
+static void __net_exit xfrm4_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -307,12 +307,44 @@ static void __net_exit xfrm4_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm4_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm4_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm4_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm4_dst_ops, &xfrm4_dst_ops_template,
+ sizeof(xfrm4_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm4_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm4_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm4_net_exit(struct net *net)
+{
+ xfrm4_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm4_dst_ops);
+}
static struct pernet_operations __net_initdata xfrm4_net_ops = {
.init = xfrm4_net_init,
.exit = xfrm4_net_exit,
};
-#endif
static void __init xfrm4_policy_init(void)
{
@@ -321,13 +353,9 @@ static void __init xfrm4_policy_init(void)
void __init xfrm4_init(void)
{
- dst_entries_init(&xfrm4_dst_ops);
-
xfrm4_state_init();
xfrm4_policy_init();
xfrm4_protocol_init();
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm4_net_ops);
-#endif
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 17a025847999..e34efa766031 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -396,6 +396,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
if (err) {
ipv6_mc_destroy_dev(ndev);
del_timer(&ndev->regen_timer);
+ snmp6_unregister_dev(ndev);
goto err_release;
}
/* protected by rtnl_lock */
@@ -4843,6 +4844,21 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
return ret;
}
+static
+int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct inet6_dev *idev = ctl->extra1;
+ int min_mtu = IPV6_MIN_MTU;
+ struct ctl_table lctl;
+
+ lctl = *ctl;
+ lctl.extra1 = &min_mtu;
+ lctl.extra2 = idev ? &idev->dev->mtu : NULL;
+
+ return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos);
+}
+
static void dev_disable_change(struct inet6_dev *idev)
{
struct netdev_notifier_info info;
@@ -4994,7 +5010,7 @@ static struct addrconf_sysctl_table
.data = &ipv6_devconf.mtu6,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = addrconf_sysctl_mtu,
},
{
.procname = "accept_ra",
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index fd0dc47f471d..70efbdd4d308 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -558,7 +558,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)
rcu_read_lock();
p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index);
- if (p && ip6addrlbl_hold(p))
+ if (p && !ip6addrlbl_hold(p))
p = NULL;
lseq = ip6addrlbl_table.seq;
rcu_read_unlock();
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index e8c4400f23e9..ad95905e7a70 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -109,6 +109,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,
int try_loading_module = 0;
int err;
+ if (protocol < 0 || protocol >= IPPROTO_MAX)
+ return -EINVAL;
+
/* Look for the requested type/protocol pair. */
lookup_protocol:
err = -ESOCKTNOSUPPORT;
@@ -425,9 +428,11 @@ void inet6_destroy_sock(struct sock *sk)
/* Free tx options */
- opt = xchg(&np->opt, NULL);
- if (opt != NULL)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
}
EXPORT_SYMBOL_GPL(inet6_destroy_sock);
@@ -656,7 +661,10 @@ int inet6_sk_rebuild_header(struct sock *sk)
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+ &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
if (IS_ERR(dst)) {
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 11e3945eeac7..9e3b0b66a4f3 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
-int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
@@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (usin->sin6_family == AF_INET) {
if (__ipv6_only_sock(sk))
return -EAFNOSUPPORT;
- err = ip4_datagram_connect(sk, uaddr, addr_len);
+ err = __ip4_datagram_connect(sk, uaddr, addr_len);
goto ipv4_connected;
}
@@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
sin.sin_addr.s_addr = daddr->s6_addr32[3];
sin.sin_port = usin->sin6_port;
- err = ip4_datagram_connect(sk,
- (struct sockaddr *) &sin,
- sizeof(sin));
+ err = __ip4_datagram_connect(sk,
+ (struct sockaddr *) &sin,
+ sizeof(sin));
ipv4_connected:
if (err)
@@ -167,8 +167,10 @@ ipv4_connected:
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
- opt = flowlabel ? flowlabel->opt : np->opt;
+ rcu_read_lock();
+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();
dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
err = 0;
@@ -204,6 +206,16 @@ out:
fl6_sock_release(flowlabel);
return err;
}
+
+int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+ int res;
+
+ lock_sock(sk);
+ res = __ip6_datagram_connect(sk, uaddr, addr_len);
+ release_sock(sk);
+ return res;
+}
EXPORT_SYMBOL_GPL(ip6_datagram_connect);
int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr,
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index bfde361b6134..4f08a0f452eb 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -727,6 +727,7 @@ ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt)
*((char **)&opt2->dst1opt) += dif;
if (opt2->srcrt)
*((char **)&opt2->srcrt) += dif;
+ atomic_set(&opt2->refcnt, 1);
}
return opt2;
}
@@ -790,7 +791,7 @@ ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt,
return ERR_PTR(-ENOBUFS);
memset(opt2, 0, tot_len);
-
+ atomic_set(&opt2->refcnt, 1);
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 8af3eb57f438..c7c8f71d0d48 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -257,7 +257,11 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
*fragoff = _frag_off;
return hp->nexthdr;
}
- return -ENOENT;
+ if (!found)
+ return -ENOENT;
+ if (fragoff)
+ *fragoff = _frag_off;
+ break;
}
hdrlen = 8;
} else if (nexthdr == NEXTHDR_AUTH) {
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index 447a7fbd1bb6..f5e2ba1c18bf 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -36,6 +36,6 @@ out:
return ret;
out_rt:
- inet_del_offload(&rthdr_offload, IPPROTO_ROUTING);
+ inet6_del_offload(&rthdr_offload, IPPROTO_ROUTING);
goto out;
}
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 29b32206e494..6cc516c825b6 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -77,7 +77,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_proto = IPPROTO_TCP;
fl6->daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
fl6->saddr = ireq->ir_v6_loc_addr;
fl6->flowi6_oif = ireq->ir_iif;
fl6->flowi6_mark = ireq->ir_mark;
@@ -208,7 +210,9 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,
fl6->fl6_dport = inet->inet_dport;
security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
- final_p = fl6_update_dst(fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
dst = __inet6_csk_dst_check(sk, np->dst_cookie);
if (!dst) {
@@ -241,7 +245,8 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused
/* Restore final destination back after routing done */
fl6.daddr = sk->sk_v6_daddr;
- res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 0e32d2e1bdbf..1a93a39b2aab 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -361,6 +361,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev)
struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id);
ip6gre_tunnel_unlink(ign, t);
+ ip6_tnl_dst_reset(netdev_priv(dev));
dev_put(dev);
}
@@ -1563,13 +1564,11 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],
return -EEXIST;
} else {
t = nt;
-
- ip6gre_tunnel_unlink(ign, t);
- ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
- ip6gre_tunnel_link(ign, t);
- netdev_state_change(dev);
}
+ ip6gre_tunnel_unlink(ign, t);
+ ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]);
+ ip6gre_tunnel_link(ign, t);
return 0;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index a3084ab5df6c..ac5e973e9eb5 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -329,10 +329,10 @@ int ip6_mc_input(struct sk_buff *skb)
if (offset < 0)
goto out;
- if (!ipv6_is_mld(skb, nexthdr, offset))
- goto out;
+ if (ipv6_is_mld(skb, nexthdr, offset))
+ deliver = true;
- deliver = true;
+ goto out;
}
/* unknown RA - process it normally */
}
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 7b5cb003ee22..9524b4596bf5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -374,6 +374,9 @@ int ip6_forward(struct sk_buff *skb)
if (skb->pkt_type != PACKET_HOST)
goto drop;
+ if (unlikely(skb->sk))
+ goto drop;
+
if (skb_warn_if_lro(skb))
goto drop;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 9cb94cfa0ae7..6987d3cb4163 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -273,12 +273,12 @@ static int ip6_tnl_create2(struct net_device *dev)
t = netdev_priv(dev);
+ dev->rtnl_link_ops = &ip6_link_ops;
err = register_netdevice(dev);
if (err < 0)
goto out;
strcpy(t->parms.name, dev->name);
- dev->rtnl_link_ops = &ip6_link_ops;
dev_hold(dev);
ip6_tnl_link(ip6n, t);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index bcda14de7f84..1d67b37592d1 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -271,8 +271,7 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p,
static void vti6_dev_uninit(struct net_device *dev)
{
struct ip6_tnl *t = netdev_priv(dev);
- struct net *net = dev_net(dev);
- struct vti6_net *ip6n = net_generic(net, vti6_net_id);
+ struct vti6_net *ip6n = net_generic(t->net, vti6_net_id);
if (dev == ip6n->fb_tnl_dev)
RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL);
@@ -306,7 +305,6 @@ static int vti6_rcv(struct sk_buff *skb)
}
XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
- skb->mark = be32_to_cpu(t->parms.i_key);
rcu_read_unlock();
@@ -326,6 +324,8 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
struct pcpu_sw_netstats *tstats;
struct xfrm_state *x;
struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6;
+ u32 orig_mark = skb->mark;
+ int ret;
if (!t)
return 1;
@@ -342,7 +342,11 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err)
x = xfrm_input_state(skb);
family = x->inner_mode->afinfo->family;
- if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ skb->mark = be32_to_cpu(t->parms.i_key);
+ ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family);
+ skb->mark = orig_mark;
+
+ if (!ret)
return -EPERM;
skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev)));
@@ -473,7 +477,6 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
int ret;
memset(&fl, 0, sizeof(fl));
- skb->mark = be32_to_cpu(t->parms.o_key);
switch (skb->protocol) {
case htons(ETH_P_IPV6):
@@ -494,6 +497,9 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
goto tx_err;
}
+ /* override mark with tunnel output key */
+ fl.flowi_mark = be32_to_cpu(t->parms.o_key);
+
ret = vti6_xmit(skb, dev, &fl);
if (ret < 0)
goto tx_err;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 1a01d79b8698..4ca7cdd15aad 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -120,7 +120,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
int cmd);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt);
+static void mroute_clean_tables(struct mr6_table *mrt, bool all);
static void ipmr_expire_process(unsigned long arg);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
@@ -337,7 +337,7 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
static void ip6mr_free_table(struct mr6_table *mrt)
{
del_timer(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, true);
kfree(mrt);
}
@@ -552,7 +552,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
if (it->cache == &mrt->mfc6_unres_queue)
spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == mrt->mfc6_cache_array)
+ else if (it->cache == &mrt->mfc6_cache_array[it->ct])
read_unlock(&mrt_lock);
}
@@ -1540,7 +1540,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt)
+static void mroute_clean_tables(struct mr6_table *mrt, bool all)
{
int i;
LIST_HEAD(list);
@@ -1550,8 +1550,9 @@ static void mroute_clean_tables(struct mr6_table *mrt)
* Shut down all active vif entries
*/
for (i = 0; i < mrt->maxvif; i++) {
- if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
- mif6_delete(mrt, i, &list);
+ if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ continue;
+ mif6_delete(mrt, i, &list);
}
unregister_netdevice_many(&list);
@@ -1560,7 +1561,7 @@ static void mroute_clean_tables(struct mr6_table *mrt)
*/
for (i = 0; i < MFC6_LINES; i++) {
list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (c->mfc_flags & MFC_STATIC)
+ if (!all && (c->mfc_flags & MFC_STATIC))
continue;
write_lock_bh(&mrt_lock);
list_del(&c->list);
@@ -1623,7 +1624,7 @@ int ip6mr_sk_done(struct sock *sk)
net->ipv6.devconf_all);
write_unlock_bh(&mrt_lock);
- mroute_clean_tables(mrt);
+ mroute_clean_tables(mrt, false);
err = 0;
break;
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index e1a9583bb419..f81fcc09ea6c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -110,10 +110,12 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk,
icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
} else {
spin_lock(&sk->sk_dst_lock);
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
spin_unlock(&sk->sk_dst_lock);
}
sk_dst_reset(sk);
@@ -213,9 +215,12 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+ NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);
@@ -385,7 +390,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
if (optname != IPV6_RTHDR && !ns_capable(net->user_ns, CAP_NET_RAW))
break;
- opt = ipv6_renew_options(sk, np->opt, optname,
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
if (IS_ERR(opt)) {
@@ -414,8 +420,10 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
retv = 0;
opt = ipv6_update_options(sk, opt);
sticky_done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
@@ -468,6 +476,7 @@ sticky_done:
break;
memset(opt, 0, sizeof(*opt));
+ atomic_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
if (copy_from_user(opt+1, optval, optlen))
@@ -484,8 +493,10 @@ update:
retv = 0;
opt = ipv6_update_options(sk, opt);
done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
case IPV6_UNICAST_HOPS:
@@ -1092,10 +1103,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
case IPV6_RTHDR:
case IPV6_DSTOPTS:
{
+ struct ipv6_txoptions *opt;
lock_sock(sk);
- len = ipv6_getsockopt_sticky(sk, np->opt,
- optname, optval, len);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
if (len < 0)
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index ed2c4e400b46..5aedf76fe287 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1652,7 +1652,6 @@ out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
} else {
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
}
@@ -2015,7 +2014,6 @@ out:
if (!err) {
ICMP6MSGOUT_INC_STATS(net, idev, type);
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, full_len);
} else
IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index e080fbbbc0e5..cb3cc2a4426a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -195,11 +195,12 @@ get_entry(const void *base, unsigned int offset)
/* All zeroes == unconditional rule. */
/* Mildly perf critical (only if packet tracing is on) */
-static inline bool unconditional(const struct ip6t_ip6 *ipv6)
+static inline bool unconditional(const struct ip6t_entry *e)
{
static const struct ip6t_ip6 uncond;
- return memcmp(ipv6, &uncond, sizeof(uncond)) == 0;
+ return e->target_offset == sizeof(struct ip6t_entry) &&
+ memcmp(&e->ipv6, &uncond, sizeof(uncond)) == 0;
}
static inline const struct xt_entry_target *
@@ -255,11 +256,10 @@ get_chainname_rulenum(const struct ip6t_entry *s, const struct ip6t_entry *e,
} else if (s == e) {
(*rulenum)++;
- if (s->target_offset == sizeof(struct ip6t_entry) &&
+ if (unconditional(s) &&
strcmp(t->target.u.kernel.target->name,
XT_STANDARD_TARGET) == 0 &&
- t->verdict < 0 &&
- unconditional(&s->ipv6)) {
+ t->verdict < 0) {
/* Tail of chains: STANDARD target (return/policy) */
*comment = *chainname == hookname
? comments[NF_IP6_TRACE_COMMENT_POLICY]
@@ -453,7 +453,8 @@ ip6t_do_table(struct sk_buff *skb,
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -482,11 +483,10 @@ mark_source_chains(const struct xt_table_info *newinfo,
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
- if ((e->target_offset == sizeof(struct ip6t_entry) &&
+ if ((unconditional(e) &&
(strcmp(t->target.u.user.name,
XT_STANDARD_TARGET) == 0) &&
- t->verdict < 0 &&
- unconditional(&e->ipv6)) || visited) {
+ t->verdict < 0) || visited) {
unsigned int oldpos, size;
if ((strcmp(t->target.u.user.name,
@@ -527,6 +527,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
size = e->next_offset;
e = (struct ip6t_entry *)
(entry0 + pos + size);
+ if (pos + size >= newinfo->size)
+ return 0;
e->counters.pcnt = pos;
pos += size;
} else {
@@ -545,9 +547,16 @@ mark_source_chains(const struct xt_table_info *newinfo,
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
+ e = (struct ip6t_entry *)
+ (entry0 + newpos);
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
+ if (newpos >= newinfo->size)
+ return 0;
}
e = (struct ip6t_entry *)
(entry0 + newpos);
@@ -574,27 +583,6 @@ static void cleanup_match(struct xt_entry_match *m, struct net *net)
module_put(par.match->me);
}
-static int
-check_entry(const struct ip6t_entry *e, const char *name)
-{
- const struct xt_entry_target *t;
-
- if (!ip6_checkentry(&e->ipv6)) {
- duprintf("ip_tables: ip check failed %p %s.\n", e, name);
- return -EINVAL;
- }
-
- if (e->target_offset + sizeof(struct xt_entry_target) >
- e->next_offset)
- return -EINVAL;
-
- t = ip6t_get_target_c(e);
- if (e->target_offset + t->u.target_size > e->next_offset)
- return -EINVAL;
-
- return 0;
-}
-
static int check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ip6t_ip6 *ipv6 = par->entryinfo;
@@ -673,10 +661,6 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name,
struct xt_mtchk_param mtpar;
struct xt_entry_match *ematch;
- ret = check_entry(e, name);
- if (ret)
- return ret;
-
j = 0;
mtpar.net = net;
mtpar.table = name;
@@ -720,7 +704,7 @@ static bool check_underflow(const struct ip6t_entry *e)
const struct xt_entry_target *t;
unsigned int verdict;
- if (!unconditional(&e->ipv6))
+ if (!unconditional(e))
return false;
t = ip6t_get_target_c(e);
if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
@@ -740,9 +724,11 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
unsigned int valid_hooks)
{
unsigned int h;
+ int err;
if ((unsigned long)e % __alignof__(struct ip6t_entry) != 0 ||
- (unsigned char *)e + sizeof(struct ip6t_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct ip6t_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
@@ -754,6 +740,14 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
return -EINVAL;
}
+ if (!ip6_checkentry(&e->ipv6))
+ return -EINVAL;
+
+ err = xt_check_entry_offsets(e, e->elems, e->target_offset,
+ e->next_offset);
+ if (err)
+ return err;
+
/* Check hooks & underflows */
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
if (!(valid_hooks & (1 << h)))
@@ -762,9 +756,9 @@ check_entry_size_and_hooks(struct ip6t_entry *e,
newinfo->hook_entry[h] = hook_entries[h];
if ((unsigned char *)e - base == underflows[h]) {
if (!check_underflow(e)) {
- pr_err("Underflows must be unconditional and "
- "use the STANDARD target with "
- "ACCEPT/DROP\n");
+ pr_debug("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
return -EINVAL;
}
newinfo->underflow[h] = underflows[h];
@@ -804,6 +798,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ip6t_replace *repl)
{
struct ip6t_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -817,6 +812,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
duprintf("translate_table: size %u\n", newinfo->size);
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -826,17 +824,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- return ret;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ip6t_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
+ ret = -EINVAL;
if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
i, repl->num_entries);
- return -EINVAL;
+ goto out_free;
}
/* Check hooks all assigned */
@@ -847,17 +848,20 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
i, repl->hook_entry[i]);
- return -EINVAL;
+ goto out_free;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
i, repl->underflow[i]);
- return -EINVAL;
+ goto out_free;
}
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -884,6 +888,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void
@@ -1314,56 +1321,17 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
- unsigned int num_counters;
- char *name;
- int size;
- void *ptmp;
struct xt_table *t;
const struct xt_table_info *private;
int ret = 0;
const void *loc_cpu_entry;
struct ip6t_entry *iter;
unsigned int addend;
-#ifdef CONFIG_COMPAT
- struct compat_xt_counters_info compat_tmp;
-
- if (compat) {
- ptmp = &compat_tmp;
- size = sizeof(struct compat_xt_counters_info);
- } else
-#endif
- {
- ptmp = &tmp;
- size = sizeof(struct xt_counters_info);
- }
-
- if (copy_from_user(ptmp, user, size) != 0)
- return -EFAULT;
-
-#ifdef CONFIG_COMPAT
- if (compat) {
- num_counters = compat_tmp.num_counters;
- name = compat_tmp.name;
- } else
-#endif
- {
- num_counters = tmp.num_counters;
- name = tmp.name;
- }
-
- if (len != size + num_counters * sizeof(struct xt_counters))
- return -EINVAL;
-
- paddc = vmalloc(len - size);
- if (!paddc)
- return -ENOMEM;
-
- if (copy_from_user(paddc, user + size, len - size) != 0) {
- ret = -EFAULT;
- goto free;
- }
- t = xt_find_table_lock(net, AF_INET6, name);
+ paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
+ if (IS_ERR(paddc))
+ return PTR_ERR(paddc);
+ t = xt_find_table_lock(net, AF_INET6, tmp.name);
if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
@@ -1372,7 +1340,7 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len,
local_bh_disable();
private = t->private;
- if (private->number != num_counters) {
+ if (private->number != tmp.num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
@@ -1452,7 +1420,6 @@ compat_copy_entry_to_user(struct ip6t_entry *e, void __user **dstptr,
static int
compat_find_calc_match(struct xt_entry_match *m,
- const char *name,
const struct ip6t_ip6 *ipv6,
unsigned int hookmask,
int *size)
@@ -1488,21 +1455,19 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
const unsigned char *base,
- const unsigned char *limit,
- const unsigned int *hook_entries,
- const unsigned int *underflows,
- const char *name)
+ const unsigned char *limit)
{
struct xt_entry_match *ematch;
struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
- int ret, off, h;
+ int ret, off;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
if ((unsigned long)e % __alignof__(struct compat_ip6t_entry) != 0 ||
- (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit) {
+ (unsigned char *)e + sizeof(struct compat_ip6t_entry) >= limit ||
+ (unsigned char *)e + e->next_offset > limit) {
duprintf("Bad offset %p, limit = %p\n", e, limit);
return -EINVAL;
}
@@ -1514,8 +1479,11 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
return -EINVAL;
}
- /* For purposes of check_entry casting the compat entry is fine */
- ret = check_entry((struct ip6t_entry *)e, name);
+ if (!ip6_checkentry(&e->ipv6))
+ return -EINVAL;
+
+ ret = xt_compat_check_entry_offsets(e, e->elems,
+ e->target_offset, e->next_offset);
if (ret)
return ret;
@@ -1523,8 +1491,8 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
entry_offset = (void *)e - (void *)base;
j = 0;
xt_ematch_foreach(ematch, e) {
- ret = compat_find_calc_match(ematch, name,
- &e->ipv6, e->comefrom, &off);
+ ret = compat_find_calc_match(ematch, &e->ipv6, e->comefrom,
+ &off);
if (ret != 0)
goto release_matches;
++j;
@@ -1547,17 +1515,6 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e,
if (ret)
goto out;
- /* Check hooks & underflows */
- for (h = 0; h < NF_INET_NUMHOOKS; h++) {
- if ((unsigned char *)e - base == hook_entries[h])
- newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
- newinfo->underflow[h] = underflows[h];
- }
-
- /* Clear counters and comefrom */
- memset(&e->counters, 0, sizeof(e->counters));
- e->comefrom = 0;
return 0;
out:
@@ -1571,18 +1528,17 @@ release_matches:
return ret;
}
-static int
+static void
compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
- unsigned int *size, const char *name,
+ unsigned int *size,
struct xt_table_info *newinfo, unsigned char *base)
{
struct xt_entry_target *t;
struct ip6t_entry *de;
unsigned int origsize;
- int ret, h;
+ int h;
struct xt_entry_match *ematch;
- ret = 0;
origsize = *size;
de = (struct ip6t_entry *)*dstptr;
memcpy(de, e, sizeof(struct ip6t_entry));
@@ -1591,11 +1547,9 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
*dstptr += sizeof(struct ip6t_entry);
*size += sizeof(struct ip6t_entry) - sizeof(struct compat_ip6t_entry);
- xt_ematch_foreach(ematch, e) {
- ret = xt_compat_match_from_user(ematch, dstptr, size);
- if (ret != 0)
- return ret;
- }
+ xt_ematch_foreach(ematch, e)
+ xt_compat_match_from_user(ematch, dstptr, size);
+
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ip6t_get_target(e);
xt_compat_target_from_user(t, dstptr, size);
@@ -1607,181 +1561,82 @@ compat_copy_entry_from_user(struct compat_ip6t_entry *e, void **dstptr,
if ((unsigned char *)de - base < newinfo->underflow[h])
newinfo->underflow[h] -= origsize - *size;
}
- return ret;
-}
-
-static int compat_check_entry(struct ip6t_entry *e, struct net *net,
- const char *name)
-{
- unsigned int j;
- int ret = 0;
- struct xt_mtchk_param mtpar;
- struct xt_entry_match *ematch;
-
- j = 0;
- mtpar.net = net;
- mtpar.table = name;
- mtpar.entryinfo = &e->ipv6;
- mtpar.hook_mask = e->comefrom;
- mtpar.family = NFPROTO_IPV6;
- xt_ematch_foreach(ematch, e) {
- ret = check_match(ematch, &mtpar);
- if (ret != 0)
- goto cleanup_matches;
- ++j;
- }
-
- ret = check_target(e, net, name);
- if (ret)
- goto cleanup_matches;
- return 0;
-
- cleanup_matches:
- xt_ematch_foreach(ematch, e) {
- if (j-- == 0)
- break;
- cleanup_match(ematch, net);
- }
- return ret;
}
static int
translate_compat_table(struct net *net,
- const char *name,
- unsigned int valid_hooks,
struct xt_table_info **pinfo,
void **pentry0,
- unsigned int total_size,
- unsigned int number,
- unsigned int *hook_entries,
- unsigned int *underflows)
+ const struct compat_ip6t_replace *compatr)
{
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
struct compat_ip6t_entry *iter0;
- struct ip6t_entry *iter1;
+ struct ip6t_replace repl;
unsigned int size;
int ret = 0;
info = *pinfo;
entry0 = *pentry0;
- size = total_size;
- info->number = number;
-
- /* Init all hooks to impossible value. */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- info->hook_entry[i] = 0xFFFFFFFF;
- info->underflow[i] = 0xFFFFFFFF;
- }
+ size = compatr->size;
+ info->number = compatr->num_entries;
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET6);
- xt_compat_init_offsets(AF_INET6, number);
+ xt_compat_init_offsets(AF_INET6, compatr->num_entries);
/* Walk through entries, checking offsets. */
- xt_entry_foreach(iter0, entry0, total_size) {
+ xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
entry0,
- entry0 + total_size,
- hook_entries,
- underflows,
- name);
+ entry0 + compatr->size);
if (ret != 0)
goto out_unlock;
++j;
}
ret = -EINVAL;
- if (j != number) {
+ if (j != compatr->num_entries) {
duprintf("translate_compat_table: %u not %u entries\n",
- j, number);
+ j, compatr->num_entries);
goto out_unlock;
}
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
- continue;
- if (info->hook_entry[i] == 0xFFFFFFFF) {
- duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
- goto out_unlock;
- }
- if (info->underflow[i] == 0xFFFFFFFF) {
- duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
- goto out_unlock;
- }
- }
-
ret = -ENOMEM;
newinfo = xt_alloc_table_info(size);
if (!newinfo)
goto out_unlock;
- newinfo->number = number;
+ newinfo->number = compatr->num_entries;
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- newinfo->hook_entry[i] = info->hook_entry[i];
- newinfo->underflow[i] = info->underflow[i];
+ newinfo->hook_entry[i] = compatr->hook_entry[i];
+ newinfo->underflow[i] = compatr->underflow[i];
}
entry1 = newinfo->entries[raw_smp_processor_id()];
pos = entry1;
- size = total_size;
- xt_entry_foreach(iter0, entry0, total_size) {
- ret = compat_copy_entry_from_user(iter0, &pos, &size,
- name, newinfo, entry1);
- if (ret != 0)
- break;
- }
+ size = compatr->size;
+ xt_entry_foreach(iter0, entry0, compatr->size)
+ compat_copy_entry_from_user(iter0, &pos, &size,
+ newinfo, entry1);
+
+ /* all module references in entry0 are now gone. */
xt_compat_flush_offsets(AF_INET6);
xt_compat_unlock(AF_INET6);
- if (ret)
- goto free_newinfo;
- ret = -ELOOP;
- if (!mark_source_chains(newinfo, valid_hooks, entry1))
- goto free_newinfo;
+ memcpy(&repl, compatr, sizeof(*compatr));
- i = 0;
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- ret = compat_check_entry(iter1, net, name);
- if (ret != 0)
- break;
- ++i;
- if (strcmp(ip6t_get_target(iter1)->u.user.name,
- XT_ERROR_TARGET) == 0)
- ++newinfo->stacksize;
- }
- if (ret) {
- /*
- * The first i matches need cleanup_entry (calls ->destroy)
- * because they had called ->check already. The other j-i
- * entries need only release.
- */
- int skip = i;
- j -= i;
- xt_entry_foreach(iter0, entry0, newinfo->size) {
- if (skip-- > 0)
- continue;
- if (j-- == 0)
- break;
- compat_release_entry(iter0);
- }
- xt_entry_foreach(iter1, entry1, newinfo->size) {
- if (i-- == 0)
- break;
- cleanup_entry(iter1, net);
- }
- xt_free_table_info(newinfo);
- return ret;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ repl.hook_entry[i] = newinfo->hook_entry[i];
+ repl.underflow[i] = newinfo->underflow[i];
}
- /* And one copy for every other CPU */
- for_each_possible_cpu(i)
- if (newinfo->entries[i] && newinfo->entries[i] != entry1)
- memcpy(newinfo->entries[i], entry1, newinfo->size);
+ repl.num_counters = 0;
+ repl.counters = NULL;
+ repl.size = newinfo->size;
+ ret = translate_table(net, newinfo, entry1, &repl);
+ if (ret)
+ goto free_newinfo;
*pinfo = newinfo;
*pentry0 = entry1;
@@ -1790,17 +1645,16 @@ translate_compat_table(struct net *net,
free_newinfo:
xt_free_table_info(newinfo);
-out:
- xt_entry_foreach(iter0, entry0, total_size) {
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET6);
+ xt_compat_unlock(AF_INET6);
+ xt_entry_foreach(iter0, entry0, compatr->size) {
if (j-- == 0)
break;
compat_release_entry(iter0);
}
return ret;
-out_unlock:
- xt_compat_flush_offsets(AF_INET6);
- xt_compat_unlock(AF_INET6);
- goto out;
}
static int
@@ -1834,10 +1688,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
- &newinfo, &loc_cpu_entry, tmp.size,
- tmp.num_entries, tmp.hook_entry,
- tmp.underflow);
+ ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 6f187c8d8a1b..d235ed7f47ab 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -190,7 +190,7 @@ static void nf_ct_frag6_expire(unsigned long data)
/* Creation primitives. */
static inline struct frag_queue *fq_find(struct net *net, __be32 id,
u32 user, struct in6_addr *src,
- struct in6_addr *dst, u8 ecn)
+ struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -200,6 +200,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,
arg.user = user;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
local_bh_disable();
@@ -603,7 +604,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user)
fhdr = (struct frag_hdr *)skb_transport_header(clone);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
goto ret_orig;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 896af8807979..a66a67d17ed6 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -735,6 +735,7 @@ static int rawv6_probe_proto_opt(struct flowi6 *fl6, struct msghdr *msg)
static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len)
{
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions opt_space;
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
@@ -840,8 +841,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
}
- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -902,6 +905,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
do_confirm:
dst_confirm(dst);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 1a157ca2ebc1..28e72f396fde 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -108,7 +108,10 @@ bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
return fq->id == arg->id &&
fq->user == arg->user &&
ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst);
+ ipv6_addr_equal(&fq->daddr, arg->dst) &&
+ (arg->iif == fq->iif ||
+ !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)));
}
EXPORT_SYMBOL(ip6_frag_match);
@@ -180,7 +183,7 @@ static void ip6_frag_expire(unsigned long data)
static __inline__ struct frag_queue *
fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, u8 ecn)
+ const struct in6_addr *dst, int iif, u8 ecn)
{
struct inet_frag_queue *q;
struct ip6_create_arg arg;
@@ -190,6 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,
arg.user = IP6_DEFRAG_LOCAL_DELIVER;
arg.src = src;
arg.dst = dst;
+ arg.iif = iif;
arg.ecn = ecn;
hash = inet6_hash_frag(id, src, dst);
@@ -550,7 +554,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
}
fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- ip6_frag_ecn(hdr));
+ skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq != NULL) {
int ret;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index a24557a1c1d8..45eae1e609d6 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1394,34 +1394,20 @@ static int ipip6_tunnel_init(struct net_device *dev)
return 0;
}
-static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
+static void __net_init ipip6_fb_tunnel_init(struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
struct iphdr *iph = &tunnel->parms.iph;
struct net *net = dev_net(dev);
struct sit_net *sitn = net_generic(net, sit_net_id);
- tunnel->dev = dev;
- tunnel->net = dev_net(dev);
-
iph->version = 4;
iph->protocol = IPPROTO_IPV6;
iph->ihl = 5;
iph->ttl = 64;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
- tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
- if (!tunnel->dst_cache) {
- free_percpu(dev->tstats);
- return -ENOMEM;
- }
-
dev_hold(dev);
rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
- return 0;
}
static int ipip6_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -1831,23 +1817,18 @@ static int __net_init sit_init_net(struct net *net)
*/
sitn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
- err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
- if (err)
- goto err_dev_free;
-
- ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
-
if ((err = register_netdev(sitn->fb_tunnel_dev)))
goto err_reg_dev;
+ ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn);
+ ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
+
t = netdev_priv(sitn->fb_tunnel_dev);
strcpy(t->parms.name, sitn->fb_tunnel_dev->name);
return 0;
err_reg_dev:
- dev_put(sitn->fb_tunnel_dev);
-err_dev_free:
ipip6_dev_free(sitn->fb_tunnel_dev);
err_alloc_dev:
return err;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 2f25cb6347ca..aa9699301ea8 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -241,7 +241,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = ireq->ir_v6_rmt_addr;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
fl6.saddr = ireq->ir_v6_loc_addr;
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = ireq->ir_mark;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 79fe58510ee8..b8e14a5ae0b1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -93,10 +93,9 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
- if (dst) {
+ if (dst && dst_hold_safe(dst)) {
const struct rt6_info *rt = (const struct rt6_info *)dst;
- dst_hold(dst);
sk->sk_rx_dst = dst;
inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
if (rt->rt6i_node)
@@ -134,6 +133,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct rt6_info *rt;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -253,7 +253,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
@@ -282,9 +283,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
tcp_fetch_timewait_stamp(sk, dst);
icsk->icsk_ext_hdr_len = 0;
- if (np->opt)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen +
+ opt->opt_nflen;
tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
@@ -501,7 +502,10 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
skb_set_queue_mapping(skb, queue_mapping);
- err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}
@@ -975,7 +979,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
tcp_time_stamp + tcptw->tw_ts_offset,
tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
- tw->tw_tclass, (tw->tw_flowlabel << 12));
+ tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
inet_twsk_put(tw);
}
@@ -1052,6 +1056,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
struct inet_request_sock *ireq;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct tcp6_sock *newtcp6sk;
+ struct ipv6_txoptions *opt;
struct inet_sock *newinet;
struct tcp_sock *newtp;
struct sock *newsk;
@@ -1191,13 +1196,15 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
but we make one more one thing there: reattach optmem
to newsk.
*/
- if (np->opt)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;
tcp_sync_mss(newsk, dst_mtu(dst));
newtp->advmss = dst_metric_advmss(dst);
@@ -1742,7 +1749,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
destp = ntohs(inet->inet_dport);
srcp = ntohs(inet->inet_sport);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
timer_active = 1;
timer_expires = icsk->icsk_timeout;
} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f6ba535b6feb..7d0111696190 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -514,10 +514,8 @@ csum_copy_err:
}
unlock_sock_fast(sk, slow);
- if (noblock)
- return -EAGAIN;
-
- /* starting over for a new packet */
+ /* starting over for a new packet, but check if we need to yield */
+ cond_resched();
msg->msg_flags &= ~MSG_TRUNC;
goto try_again;
}
@@ -721,7 +719,9 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
(inet->inet_dport && inet->inet_dport != rmt_port) ||
(!ipv6_addr_any(&sk->sk_v6_daddr) &&
!ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
- (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+ (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) ||
+ (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
+ !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
return false;
if (!inet6_mc_check(sk, loc_addr, rmt_addr))
return false;
@@ -898,11 +898,9 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
ret = udpv6_queue_rcv_skb(sk, skb);
sock_put(sk);
- /* a return value > 0 means to resubmit the input, but
- * it wants the return to be -protocol, or 0
- */
+ /* a return value > 0 means to resubmit the input */
if (ret > 0)
- return -ret;
+ return ret;
return 0;
}
@@ -1082,6 +1080,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -1234,8 +1233,10 @@ do_udp_sendmsg:
opt = NULL;
connected = 0;
}
- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -1329,6 +1330,7 @@ do_append_data:
out:
dst_release(dst);
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
if (!err)
return len;
/*
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 901ef6f8addc..5266ad2d6419 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -24,7 +24,7 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
struct ipv6hdr *inner_iph = ipipv6_hdr(skb);
if (INET_ECN_is_ce(ipv6_get_dsfield(outer_iph)))
- IP6_ECN_set_ce(inner_iph);
+ IP6_ECN_set_ce(skb, inner_iph);
}
/* Add encapsulation header.
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 5f983644373a..d2425cef8a38 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -284,7 +284,7 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
xfrm_dst_ifdown(dst, dev);
}
-static struct dst_ops xfrm6_dst_ops = {
+static struct dst_ops xfrm6_dst_ops_template = {
.family = AF_INET6,
.protocol = cpu_to_be16(ETH_P_IPV6),
.gc = xfrm6_garbage_collect,
@@ -299,7 +299,7 @@ static struct dst_ops xfrm6_dst_ops = {
static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
.family = AF_INET6,
- .dst_ops = &xfrm6_dst_ops,
+ .dst_ops = &xfrm6_dst_ops_template,
.dst_lookup = xfrm6_dst_lookup,
.get_saddr = xfrm6_get_saddr,
.decode_session = _decode_session6,
@@ -332,7 +332,7 @@ static struct ctl_table xfrm6_policy_table[] = {
{ }
};
-static int __net_init xfrm6_net_init(struct net *net)
+static int __net_init xfrm6_net_sysctl_init(struct net *net)
{
struct ctl_table *table;
struct ctl_table_header *hdr;
@@ -360,7 +360,7 @@ err_alloc:
return -ENOMEM;
}
-static void __net_exit xfrm6_net_exit(struct net *net)
+static void __net_exit xfrm6_net_sysctl_exit(struct net *net)
{
struct ctl_table *table;
@@ -372,24 +372,52 @@ static void __net_exit xfrm6_net_exit(struct net *net)
if (!net_eq(net, &init_net))
kfree(table);
}
+#else /* CONFIG_SYSCTL */
+static int inline xfrm6_net_sysctl_init(struct net *net)
+{
+ return 0;
+}
+
+static void inline xfrm6_net_sysctl_exit(struct net *net)
+{
+}
+#endif
+
+static int __net_init xfrm6_net_init(struct net *net)
+{
+ int ret;
+
+ memcpy(&net->xfrm.xfrm6_dst_ops, &xfrm6_dst_ops_template,
+ sizeof(xfrm6_dst_ops_template));
+ ret = dst_entries_init(&net->xfrm.xfrm6_dst_ops);
+ if (ret)
+ return ret;
+
+ ret = xfrm6_net_sysctl_init(net);
+ if (ret)
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+
+ return ret;
+}
+
+static void __net_exit xfrm6_net_exit(struct net *net)
+{
+ xfrm6_net_sysctl_exit(net);
+ dst_entries_destroy(&net->xfrm.xfrm6_dst_ops);
+}
static struct pernet_operations xfrm6_net_ops = {
.init = xfrm6_net_init,
.exit = xfrm6_net_exit,
};
-#endif
int __init xfrm6_init(void)
{
int ret;
- dst_entries_init(&xfrm6_dst_ops);
-
ret = xfrm6_policy_init();
- if (ret) {
- dst_entries_destroy(&xfrm6_dst_ops);
+ if (ret)
goto out;
- }
ret = xfrm6_state_init();
if (ret)
goto out_policy;
@@ -398,9 +426,7 @@ int __init xfrm6_init(void)
if (ret)
goto out_state;
-#ifdef CONFIG_SYSCTL
register_pernet_subsys(&xfrm6_net_ops);
-#endif
out:
return ret;
out_state:
@@ -412,11 +438,8 @@ out_policy:
void xfrm6_fini(void)
{
-#ifdef CONFIG_SYSCTL
unregister_pernet_subsys(&xfrm6_net_ops);
-#endif
xfrm6_protocol_fini();
xfrm6_policy_fini();
xfrm6_state_fini();
- dst_entries_destroy(&xfrm6_dst_ops);
}
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 3f3a6cbdceb7..b3f6ec0df426 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1100,6 +1100,9 @@ static int irda_create(struct net *net, struct socket *sock, int protocol,
IRDA_DEBUG(2, "%s()\n", __func__);
+ if (protocol < 0 || protocol > SK_PROTOCOL_MAX)
+ return -EINVAL;
+
if (net != &init_net)
return -EAFNOSUPPORT;
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index a5f28d421ea8..c3a939d7848f 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1877,7 +1877,7 @@ static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off)
for (element = hashbin_get_first(iter->hashbin);
element != NULL;
element = hashbin_get_next(iter->hashbin)) {
- if (!off || *off-- == 0) {
+ if (!off || (*off)-- == 0) {
/* NB: hashbin left locked */
return element;
}
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 895348e44c7d..508154a04558 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1319,7 +1319,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
tunnel = container_of(work, struct l2tp_tunnel, del_work);
sk = l2tp_tunnel_sock_lookup(tunnel);
if (!sk)
- return;
+ goto out;
sock = sk->sk_socket;
@@ -1340,6 +1340,8 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
}
l2tp_tunnel_sock_put(sk);
+out:
+ l2tp_tunnel_dec_refcount(tunnel);
}
/* Create a socket for the tunnel, if one isn't set up by
@@ -1639,8 +1641,13 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
*/
int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
{
+ l2tp_tunnel_inc_refcount(tunnel);
l2tp_tunnel_closeall(tunnel);
- return (false == queue_work(l2tp_wq, &tunnel->del_work));
+ if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
+ l2tp_tunnel_dec_refcount(tunnel);
+ return 1;
+ }
+ return 0;
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 369a9822488c..e45d2b77bb42 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -123,12 +123,11 @@ static int l2tp_ip_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
int length;
- /* Point to L2TP header */
- optr = ptr = skb->data;
-
if (!pskb_may_pull(skb, 4))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
session_id = ntohl(*((__be32 *) ptr));
ptr += 4;
@@ -156,6 +155,9 @@ static int l2tp_ip_recv(struct sk_buff *skb)
if (!pskb_may_pull(skb, length))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+ ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 0edb263cc002..b8e469b832df 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -135,12 +135,11 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
struct l2tp_tunnel *tunnel = NULL;
int length;
- /* Point to L2TP header */
- optr = ptr = skb->data;
-
if (!pskb_may_pull(skb, 4))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
session_id = ntohl(*((__be32 *) ptr));
ptr += 4;
@@ -168,6 +167,9 @@ static int l2tp_ip6_recv(struct sk_buff *skb)
if (!pskb_may_pull(skb, length))
goto discard;
+ /* Point to L2TP header */
+ optr = ptr = skb->data;
+ ptr += 4;
pr_debug("%s: ip recv\n", tunnel->name);
print_hex_dump_bytes("", DUMP_PREFIX_OFFSET, ptr, length);
}
@@ -487,6 +489,7 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name);
struct in6_addr *daddr, *final_p, final;
struct ipv6_pinfo *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
@@ -576,8 +579,10 @@ static int l2tp_ip6_sendmsg(struct kiocb *iocb, struct sock *sk,
opt = NULL;
}
- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -632,6 +637,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err < 0 ? err : len;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index bb9cbc17d926..3e8691895385 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -626,6 +626,7 @@ static void llc_cmsg_rcv(struct msghdr *msg, struct sk_buff *skb)
if (llc->cmsg_flags & LLC_CMSG_PKTINFO) {
struct llc_pktinfo info;
+ memset(&info, 0, sizeof(info));
info.lpi_ifindex = llc_sk(skb->sk)->dev->ifindex;
llc_pdu_decode_dsap(skb, &info.lpi_sap);
llc_pdu_decode_da(skb, info.lpi_mac);
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 7702978a4c99..f73fddfa548c 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -281,7 +281,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
}
/* prepare A-MPDU MLME for Rx aggregation */
- tid_agg_rx = kmalloc(sizeof(struct tid_ampdu_rx), GFP_KERNEL);
+ tid_agg_rx = kzalloc(sizeof(*tid_agg_rx), GFP_KERNEL);
if (!tid_agg_rx)
goto end;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index c68896adfa96..2d19963e6d97 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -725,6 +725,7 @@ void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata)
debugfs_remove_recursive(sdata->vif.debugfs_dir);
sdata->vif.debugfs_dir = NULL;
+ sdata->debugfs.subdir_stations = NULL;
}
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 509bc157ce55..76de04be8900 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1620,7 +1620,6 @@ void ieee80211_ibss_notify_scan_completed(struct ieee80211_local *local)
if (sdata->vif.type != NL80211_IFTYPE_ADHOC)
continue;
sdata->u.ibss.last_scan_completed = jiffies;
- ieee80211_queue_work(&local->hw, &sdata->work);
}
mutex_unlock(&local->iflist_mtx);
}
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0de7c93bf62b..67943cb5df8a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -249,6 +249,7 @@ static void ieee80211_restart_work(struct work_struct *work)
{
struct ieee80211_local *local =
container_of(work, struct ieee80211_local, restart_work);
+ struct ieee80211_sub_if_data *sdata;
/* wait for scan work complete */
flush_workqueue(local->workqueue);
@@ -257,6 +258,8 @@ static void ieee80211_restart_work(struct work_struct *work)
"%s called with hardware scan in progress\n", __func__);
rtnl_lock();
+ list_for_each_entry(sdata, &local->interfaces, list)
+ flush_delayed_work(&sdata->dec_tailroom_needed_wk);
ieee80211_scan_cancel(local);
ieee80211_reconfig(local);
rtnl_unlock();
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 0c8b2a77d312..ac04e3874af1 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -161,6 +161,10 @@ void mesh_sta_cleanup(struct sta_info *sta)
del_timer_sync(&sta->plink_timer);
}
+ /* make sure no readers can access nexthop sta from here on */
+ mesh_path_flush_by_nexthop(sta);
+ synchronize_net();
+
if (changed)
ieee80211_mbss_info_change_notify(sdata, changed);
}
@@ -1297,17 +1301,6 @@ out:
sdata_unlock(sdata);
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local)
-{
- struct ieee80211_sub_if_data *sdata;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list)
- if (ieee80211_vif_is_mesh(&sdata->vif) &&
- ieee80211_sdata_running(sdata))
- ieee80211_queue_work(&local->hw, &sdata->work);
- rcu_read_unlock();
-}
void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
{
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index f39a19f9090f..1ca355b5b608 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -355,14 +355,10 @@ static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
return sdata->u.mesh.mesh_pp_id == IEEE80211_PATH_PROTOCOL_HWMP;
}
-void ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local);
-
void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata);
void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata);
void ieee80211s_stop(void);
#else
-static inline void
-ieee80211_mesh_notify_scan_completed(struct ieee80211_local *local) {}
static inline bool mesh_path_sel_is_hwmp(struct ieee80211_sub_if_data *sdata)
{ return false; }
static inline void mesh_path_flush_by_iface(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index da1f639ecfb6..b13634c677f1 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3715,8 +3715,6 @@ static void ieee80211_restart_sta_timer(struct ieee80211_sub_if_data *sdata)
if (!(flags & IEEE80211_HW_CONNECTION_MONITOR))
ieee80211_queue_work(&sdata->local->hw,
&sdata->u.mgd.monitor_work);
- /* and do all the other regular work too */
- ieee80211_queue_work(&sdata->local->hw, &sdata->work);
}
}
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 408fd8ab4eef..0f632d6c6904 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -557,7 +557,7 @@ minstrel_aggr_check(struct ieee80211_sta *pubsta, struct sk_buff *skb)
if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO)
return;
- ieee80211_start_tx_ba_session(pubsta, tid, 5000);
+ ieee80211_start_tx_ba_session(pubsta, tid, 0);
}
static void
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 86864c4e248f..911a972b93ef 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2090,7 +2090,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- u16 q, hdrlen;
+ u16 ac, q, hdrlen;
hdr = (struct ieee80211_hdr *) skb->data;
hdrlen = ieee80211_hdrlen(hdr->frame_control);
@@ -2160,7 +2160,8 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
ether_addr_equal(sdata->vif.addr, hdr->addr3))
return RX_CONTINUE;
- q = ieee80211_select_queue_80211(sdata, skb, hdr);
+ ac = ieee80211_select_queue_80211(sdata, skb, hdr);
+ q = sdata->vif.hw_queue[ac];
if (ieee80211_queue_stopped(&local->hw, q)) {
IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
return RX_DROP_MONITOR;
@@ -3171,6 +3172,7 @@ static bool prepare_for_handlers(struct ieee80211_rx_data *rx,
return false;
/* ignore action frames to TDLS-peers */
if (ieee80211_is_action(hdr->frame_control) &&
+ !is_broadcast_ether_addr(bssid) &&
!ether_addr_equal(bssid, hdr->addr1))
return false;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index af0d094b2f2f..4835db45e197 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -290,6 +290,7 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
struct ieee80211_local *local = hw_to_local(hw);
bool hw_scan = local->ops->hw_scan;
bool was_scanning = local->scanning;
+ struct ieee80211_sub_if_data *sdata;
lockdep_assert_held(&local->mtx);
@@ -343,7 +344,16 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
ieee80211_mlme_notify_scan_completed(local);
ieee80211_ibss_notify_scan_completed(local);
- ieee80211_mesh_notify_scan_completed(local);
+
+ /* Requeue all the work that might have been ignored while
+ * the scan was in progress; if there was none this will
+ * just be a no-op for the particular interface.
+ */
+ list_for_each_entry_rcu(sdata, &local->interfaces, list) {
+ if (ieee80211_sdata_running(sdata))
+ ieee80211_queue_work(&sdata->local->hw, &sdata->work);
+ }
+
if (was_scanning)
ieee80211_start_next_roc(local);
}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 80ce44f6693d..45e782825567 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -299,9 +299,6 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx)
if (tx->sdata->vif.type == NL80211_IFTYPE_WDS)
return TX_CONTINUE;
- if (tx->sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
- return TX_CONTINUE;
-
if (tx->flags & IEEE80211_TX_PS_BUFFERED)
return TX_CONTINUE;
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 868a040fd422..488d1f428edb 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -93,7 +93,7 @@ void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val)
if ((priv->hw->ops->set_hw_addr_filt) &&
(priv->hw->hw.hw_filt.short_addr != priv->short_addr)) {
priv->hw->hw.hw_filt.short_addr = priv->short_addr;
- set_hw_addr_filt(dev, IEEE802515_AFILT_SADDR_CHANGED);
+ set_hw_addr_filt(dev, IEEE802154_AFILT_SADDR_CHANGED);
}
}
@@ -121,7 +121,7 @@ void mac802154_dev_set_ieee_addr(struct net_device *dev)
if (mac->ops->set_hw_addr_filt &&
mac->hw.hw_filt.ieee_addr != priv->extended_addr) {
mac->hw.hw_filt.ieee_addr = priv->extended_addr;
- set_hw_addr_filt(dev, IEEE802515_AFILT_IEEEADDR_CHANGED);
+ set_hw_addr_filt(dev, IEEE802154_AFILT_IEEEADDR_CHANGED);
}
}
@@ -152,7 +152,7 @@ void mac802154_dev_set_pan_id(struct net_device *dev, __le16 val)
if ((priv->hw->ops->set_hw_addr_filt) &&
(priv->hw->hw.hw_filt.pan_id != priv->pan_id)) {
priv->hw->hw.hw_filt.pan_id = priv->pan_id;
- set_hw_addr_filt(dev, IEEE802515_AFILT_PANID_CHANGED);
+ set_hw_addr_filt(dev, IEEE802154_AFILT_PANID_CHANGED);
}
}
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 656b5ebb0eff..942479a61b61 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -94,6 +94,7 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
+ nf_queue_nf_hook_drop(reg);
}
EXPORT_SYMBOL(nf_unregister_hook);
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 990decba1fe4..3a2fa9c044f8 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -313,7 +313,13 @@ ip_vs_sched_persist(struct ip_vs_service *svc,
* return *ignored=0 i.e. ICMP and NF_DROP
*/
sched = rcu_dereference(svc->scheduler);
- dest = sched->schedule(svc, skb, iph);
+ if (sched) {
+ /* read svc->sched_data after svc->scheduler */
+ smp_rmb();
+ dest = sched->schedule(svc, skb, iph);
+ } else {
+ dest = NULL;
+ }
if (!dest) {
IP_VS_DBG(1, "p-schedule: no dest found.\n");
kfree(param.pe_data);
@@ -461,7 +467,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
}
sched = rcu_dereference(svc->scheduler);
- dest = sched->schedule(svc, skb, iph);
+ if (sched) {
+ /* read svc->sched_data after svc->scheduler */
+ smp_rmb();
+ dest = sched->schedule(svc, skb, iph);
+ } else {
+ dest = NULL;
+ }
if (dest == NULL) {
IP_VS_DBG(1, "Schedule: no dest found.\n");
return NULL;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index ac7ba689efe7..9b1452e8e868 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -828,15 +828,16 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
__ip_vs_dst_cache_reset(dest);
spin_unlock_bh(&dest->dst_lock);
- sched = rcu_dereference_protected(svc->scheduler, 1);
if (add) {
ip_vs_start_estimator(svc->net, &dest->stats);
list_add_rcu(&dest->n_list, &svc->destinations);
svc->num_dests++;
- if (sched->add_dest)
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched && sched->add_dest)
sched->add_dest(svc, dest);
} else {
- if (sched->upd_dest)
+ sched = rcu_dereference_protected(svc->scheduler, 1);
+ if (sched && sched->upd_dest)
sched->upd_dest(svc, dest);
}
}
@@ -1070,7 +1071,7 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
struct ip_vs_scheduler *sched;
sched = rcu_dereference_protected(svc->scheduler, 1);
- if (sched->del_dest)
+ if (sched && sched->del_dest)
sched->del_dest(svc, dest);
}
}
@@ -1161,11 +1162,14 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
ip_vs_use_count_inc();
/* Lookup the scheduler by 'u->sched_name' */
- sched = ip_vs_scheduler_get(u->sched_name);
- if (sched == NULL) {
- pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
- ret = -ENOENT;
- goto out_err;
+ if (strcmp(u->sched_name, "none")) {
+ sched = ip_vs_scheduler_get(u->sched_name);
+ if (!sched) {
+ pr_info("Scheduler module ip_vs_%s not found\n",
+ u->sched_name);
+ ret = -ENOENT;
+ goto out_err;
+ }
}
if (u->pe_name && *u->pe_name) {
@@ -1226,10 +1230,12 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
spin_lock_init(&svc->stats.lock);
/* Bind the scheduler */
- ret = ip_vs_bind_scheduler(svc, sched);
- if (ret)
- goto out_err;
- sched = NULL;
+ if (sched) {
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret)
+ goto out_err;
+ sched = NULL;
+ }
/* Bind the ct retriever */
RCU_INIT_POINTER(svc->pe, pe);
@@ -1277,17 +1283,20 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
static int
ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
{
- struct ip_vs_scheduler *sched, *old_sched;
+ struct ip_vs_scheduler *sched = NULL, *old_sched;
struct ip_vs_pe *pe = NULL, *old_pe = NULL;
int ret = 0;
/*
* Lookup the scheduler, by 'u->sched_name'
*/
- sched = ip_vs_scheduler_get(u->sched_name);
- if (sched == NULL) {
- pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
- return -ENOENT;
+ if (strcmp(u->sched_name, "none")) {
+ sched = ip_vs_scheduler_get(u->sched_name);
+ if (!sched) {
+ pr_info("Scheduler module ip_vs_%s not found\n",
+ u->sched_name);
+ return -ENOENT;
+ }
}
old_sched = sched;
@@ -1315,14 +1324,20 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
old_sched = rcu_dereference_protected(svc->scheduler, 1);
if (sched != old_sched) {
+ if (old_sched) {
+ ip_vs_unbind_scheduler(svc, old_sched);
+ RCU_INIT_POINTER(svc->scheduler, NULL);
+ /* Wait all svc->sched_data users */
+ synchronize_rcu();
+ }
/* Bind the new scheduler */
- ret = ip_vs_bind_scheduler(svc, sched);
- if (ret) {
- old_sched = sched;
- goto out;
+ if (sched) {
+ ret = ip_vs_bind_scheduler(svc, sched);
+ if (ret) {
+ ip_vs_scheduler_put(sched);
+ goto out;
+ }
}
- /* Unbind the old scheduler on success */
- ip_vs_unbind_scheduler(svc, old_sched);
}
/*
@@ -1962,6 +1977,7 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
const struct ip_vs_iter *iter = seq->private;
const struct ip_vs_dest *dest;
struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
+ char *sched_name = sched ? sched->name : "none";
if (iter->table == ip_vs_svc_table) {
#ifdef CONFIG_IP_VS_IPV6
@@ -1970,18 +1986,18 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
ip_vs_proto_name(svc->protocol),
&svc->addr.in6,
ntohs(svc->port),
- sched->name);
+ sched_name);
else
#endif
seq_printf(seq, "%s %08X:%04X %s %s ",
ip_vs_proto_name(svc->protocol),
ntohl(svc->addr.ip),
ntohs(svc->port),
- sched->name,
+ sched_name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
} else {
seq_printf(seq, "FWM %08X %s %s",
- svc->fwmark, sched->name,
+ svc->fwmark, sched_name,
(svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
}
@@ -2401,13 +2417,15 @@ static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
struct ip_vs_scheduler *sched;
+ char *sched_name;
sched = rcu_dereference_protected(src->scheduler, 1);
+ sched_name = sched ? sched->name : "none";
dst->protocol = src->protocol;
dst->addr = src->addr.ip;
dst->port = src->port;
dst->fwmark = src->fwmark;
- strlcpy(dst->sched_name, sched->name, sizeof(dst->sched_name));
+ strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name));
dst->flags = src->flags;
dst->timeout = src->timeout / HZ;
dst->netmask = src->netmask;
@@ -2836,6 +2854,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
struct nlattr *nl_service;
struct ip_vs_flags flags = { .flags = svc->flags,
.mask = ~0 };
+ char *sched_name;
nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
if (!nl_service)
@@ -2854,8 +2873,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
}
sched = rcu_dereference_protected(svc->scheduler, 1);
+ sched_name = sched ? sched->name : "none";
pe = rcu_dereference_protected(svc->pe, 1);
- if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched->name) ||
+ if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) ||
(pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) ||
nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) ||
nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index bed5f7042529..bb318e4623a3 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -88,7 +88,7 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
dptr = skb->data + dataoff;
datalen = skb->len - dataoff;
- if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
+ if (get_callid(dptr, 0, datalen, &matchoff, &matchlen))
return -EINVAL;
/* N.B: pe_data is only set on success,
diff --git a/net/netfilter/ipvs/ip_vs_sched.c b/net/netfilter/ipvs/ip_vs_sched.c
index 4dbcda6258bc..21b6b515a09c 100644
--- a/net/netfilter/ipvs/ip_vs_sched.c
+++ b/net/netfilter/ipvs/ip_vs_sched.c
@@ -74,7 +74,7 @@ void ip_vs_unbind_scheduler(struct ip_vs_service *svc,
if (sched->done_service)
sched->done_service(svc);
- /* svc->scheduler can not be set to NULL */
+ /* svc->scheduler can be set to NULL only by caller */
}
@@ -148,21 +148,21 @@ void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler)
void ip_vs_scheduler_err(struct ip_vs_service *svc, const char *msg)
{
- struct ip_vs_scheduler *sched;
+ struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler);
+ char *sched_name = sched ? sched->name : "none";
- sched = rcu_dereference(svc->scheduler);
if (svc->fwmark) {
IP_VS_ERR_RL("%s: FWM %u 0x%08X - %s\n",
- sched->name, svc->fwmark, svc->fwmark, msg);
+ sched_name, svc->fwmark, svc->fwmark, msg);
#ifdef CONFIG_IP_VS_IPV6
} else if (svc->af == AF_INET6) {
IP_VS_ERR_RL("%s: %s [%pI6c]:%d - %s\n",
- sched->name, ip_vs_proto_name(svc->protocol),
+ sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.in6, ntohs(svc->port), msg);
#endif
} else {
IP_VS_ERR_RL("%s: %s %pI4:%d - %s\n",
- sched->name, ip_vs_proto_name(svc->protocol),
+ sched_name, ip_vs_proto_name(svc->protocol),
&svc->addr.ip, ntohs(svc->port), msg);
}
}
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 7162c86fd50d..72fac696c85e 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -612,7 +612,7 @@ static void ip_vs_sync_conn_v0(struct net *net, struct ip_vs_conn *cp,
pkts = atomic_add_return(1, &cp->in_pkts);
else
pkts = sysctl_sync_threshold(ipvs);
- ip_vs_sync_conn(net, cp->control, pkts);
+ ip_vs_sync_conn(net, cp, pkts);
}
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index bd90bf8107da..72f030878e7a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -130,7 +130,6 @@ static struct rtable *do_output_route4(struct net *net, __be32 daddr,
memset(&fl4, 0, sizeof(fl4));
fl4.daddr = daddr;
- fl4.saddr = (rt_mode & IP_VS_RT_MODE_CONNECT) ? *saddr : 0;
fl4.flowi4_flags = (rt_mode & IP_VS_RT_MODE_KNOWN_NH) ?
FLOWI_FLAG_KNOWN_NH : 0;
@@ -524,6 +523,21 @@ static inline int ip_vs_tunnel_xmit_prepare(struct sk_buff *skb,
return ret;
}
+/* In the event of a remote destination, it's possible that we would have
+ * matches against an old socket (particularly a TIME-WAIT socket). This
+ * causes havoc down the line (ip_local_out et. al. expect regular sockets
+ * and invalid memory accesses will happen) so simply drop the association
+ * in this case.
+*/
+static inline void ip_vs_drop_early_demux_sk(struct sk_buff *skb)
+{
+ /* If dev is set, the packet came from the LOCAL_IN callback and
+ * not from a local TCP socket.
+ */
+ if (skb->dev)
+ skb_orphan(skb);
+}
+
/* return NF_STOLEN (sent) or NF_ACCEPT if local=1 (not sent) */
static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
struct ip_vs_conn *cp, int local)
@@ -535,12 +549,21 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
ip_vs_notrack(skb);
else
ip_vs_update_conntrack(skb, cp, 1);
+
+ /* Remove the early_demux association unless it's bound for the
+ * exact same port and address on this host after translation.
+ */
+ if (!local || cp->vport != cp->dport ||
+ !ip_vs_addr_equal(cp->af, &cp->vaddr, &cp->daddr))
+ ip_vs_drop_early_demux_sk(skb);
+
if (!local) {
skb_forward_csum(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
dst_output);
} else
ret = NF_ACCEPT;
+
return ret;
}
@@ -554,6 +577,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
if (likely(!(cp->flags & IP_VS_CONN_F_NFCT)))
ip_vs_notrack(skb);
if (!local) {
+ ip_vs_drop_early_demux_sk(skb);
skb_forward_csum(skb);
NF_HOOK(pf, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
dst_output);
@@ -842,6 +866,8 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
struct ipv6hdr *old_ipv6h = NULL;
#endif
+ ip_vs_drop_early_demux_sk(skb);
+
if (skb_headroom(skb) < max_headroom || skb_cloned(skb)) {
new_skb = skb_realloc_headroom(skb, max_headroom);
if (!new_skb)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c5880124ec0d..98cd0e78c94c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1739,6 +1739,7 @@ void nf_conntrack_init_end(void)
int nf_conntrack_init_net(struct net *net)
{
+ static atomic64_t unique_id;
int ret = -ENOMEM;
int cpu;
@@ -1762,7 +1763,8 @@ int nf_conntrack_init_net(struct net *net)
if (!net->ct.stat)
goto err_pcpu_lists;
- net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+ net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%llu",
+ (u64)atomic64_inc_return(&unique_id));
if (!net->ct.slabname)
goto err_slabname;
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 91a1837acd0e..26af45193ab7 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -219,7 +219,8 @@ static inline int expect_clash(const struct nf_conntrack_expect *a,
a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
}
- return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
+ return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask) &&
+ nf_ct_zone(a->master) == nf_ct_zone(b->master);
}
static inline int expect_matches(const struct nf_conntrack_expect *a,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1bd9ed9e62f6..d3ea2999d0dc 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2956,11 +2956,6 @@ ctnetlink_create_expect(struct net *net, u16 zone,
}
err = nf_ct_expect_related_report(exp, portid, report);
- if (err < 0)
- goto err_exp;
-
- return 0;
-err_exp:
nf_ct_expect_put(exp);
err_ct:
nf_ct_put(ct);
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 61a3c927e63c..aba1d7dac17c 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -24,6 +24,7 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, u_int8_t pf,
unsigned int hook, struct net_device *indev,
struct net_device *outdev, int (*okfn)(struct sk_buff *),
unsigned int queuenum);
+void nf_queue_nf_hook_drop(struct nf_hook_ops *ops);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index d7197649dba6..cfe93c2227c5 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -19,6 +19,9 @@
static struct nf_logger __rcu *loggers[NFPROTO_NUMPROTO][NF_LOG_TYPE_MAX] __read_mostly;
static DEFINE_MUTEX(nf_log_mutex);
+#define nft_log_dereference(logger) \
+ rcu_dereference_protected(logger, lockdep_is_held(&nf_log_mutex))
+
static struct nf_logger *__find_logger(int pf, const char *str_logger)
{
struct nf_logger *log;
@@ -28,8 +31,7 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger)
if (loggers[pf][i] == NULL)
continue;
- log = rcu_dereference_protected(loggers[pf][i],
- lockdep_is_held(&nf_log_mutex));
+ log = nft_log_dereference(loggers[pf][i]);
if (!strncasecmp(str_logger, log->name, strlen(log->name)))
return log;
}
@@ -45,8 +47,7 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger)
return;
mutex_lock(&nf_log_mutex);
- log = rcu_dereference_protected(net->nf.nf_loggers[pf],
- lockdep_is_held(&nf_log_mutex));
+ log = nft_log_dereference(net->nf.nf_loggers[pf]);
if (log == NULL)
rcu_assign_pointer(net->nf.nf_loggers[pf], logger);
@@ -61,8 +62,7 @@ void nf_log_unset(struct net *net, const struct nf_logger *logger)
mutex_lock(&nf_log_mutex);
for (i = 0; i < NFPROTO_NUMPROTO; i++) {
- log = rcu_dereference_protected(net->nf.nf_loggers[i],
- lockdep_is_held(&nf_log_mutex));
+ log = nft_log_dereference(net->nf.nf_loggers[i]);
if (log == logger)
RCU_INIT_POINTER(net->nf.nf_loggers[i], NULL);
}
@@ -97,12 +97,17 @@ EXPORT_SYMBOL(nf_log_register);
void nf_log_unregister(struct nf_logger *logger)
{
+ const struct nf_logger *log;
int i;
mutex_lock(&nf_log_mutex);
- for (i = 0; i < NFPROTO_NUMPROTO; i++)
- RCU_INIT_POINTER(loggers[i][logger->type], NULL);
+ for (i = 0; i < NFPROTO_NUMPROTO; i++) {
+ log = nft_log_dereference(loggers[i][logger->type]);
+ if (log == logger)
+ RCU_INIT_POINTER(loggers[i][logger->type], NULL);
+ }
mutex_unlock(&nf_log_mutex);
+ synchronize_rcu();
}
EXPORT_SYMBOL(nf_log_unregister);
@@ -297,8 +302,7 @@ static int seq_show(struct seq_file *s, void *v)
int i, ret;
struct net *net = seq_file_net(s);
- logger = rcu_dereference_protected(net->nf.nf_loggers[*pos],
- lockdep_is_held(&nf_log_mutex));
+ logger = nft_log_dereference(net->nf.nf_loggers[*pos]);
if (!logger)
ret = seq_printf(s, "%2lld NONE (", *pos);
@@ -312,8 +316,7 @@ static int seq_show(struct seq_file *s, void *v)
if (loggers[*pos][i] == NULL)
continue;
- logger = rcu_dereference_protected(loggers[*pos][i],
- lockdep_is_held(&nf_log_mutex));
+ logger = nft_log_dereference(loggers[*pos][i]);
ret = seq_printf(s, "%s", logger->name);
if (ret < 0)
return ret;
@@ -385,8 +388,7 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write,
mutex_unlock(&nf_log_mutex);
} else {
mutex_lock(&nf_log_mutex);
- logger = rcu_dereference_protected(net->nf.nf_loggers[tindex],
- lockdep_is_held(&nf_log_mutex));
+ logger = nft_log_dereference(net->nf.nf_loggers[tindex]);
if (!logger)
table->data = "NONE";
else
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 4c8b68e5fa16..77ee2d4d5046 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -95,6 +95,23 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
+void nf_queue_nf_hook_drop(struct nf_hook_ops *ops)
+{
+ const struct nf_queue_handler *qh;
+ struct net *net;
+
+ rtnl_lock();
+ rcu_read_lock();
+ qh = rcu_dereference(queue_handler);
+ if (qh) {
+ for_each_net(net) {
+ qh->nf_hook_drop(net, ops);
+ }
+ }
+ rcu_read_unlock();
+ rtnl_unlock();
+}
+
/*
* Any packet that leaves via this function must come back
* through nf_reinject().
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 71b574c7bde9..9fe2baa01fbe 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1221,7 +1221,10 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
if (nla[NFTA_CHAIN_POLICY]) {
if ((chain != NULL &&
- !(chain->flags & NFT_BASE_CHAIN)) ||
+ !(chain->flags & NFT_BASE_CHAIN)))
+ return -EOPNOTSUPP;
+
+ if (chain == NULL &&
nla[NFTA_CHAIN_HOOK] == NULL)
return -EOPNOTSUPP;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 1aa7049c93f5..e41bab38a3ca 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -433,6 +433,7 @@ done:
static void nfnetlink_rcv(struct sk_buff *skb)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
+ u_int16_t res_id;
int msglen;
if (nlh->nlmsg_len < NLMSG_HDRLEN ||
@@ -457,7 +458,12 @@ static void nfnetlink_rcv(struct sk_buff *skb)
nfgenmsg = nlmsg_data(nlh);
skb_pull(skb, msglen);
- nfnetlink_rcv_batch(skb, nlh, nfgenmsg->res_id);
+ /* Work around old nft using host byte order */
+ if (nfgenmsg->res_id == NFNL_SUBSYS_NFTABLES)
+ res_id = NFNL_SUBSYS_NFTABLES;
+ else
+ res_id = ntohs(nfgenmsg->res_id);
+ nfnetlink_rcv_batch(skb, nlh, res_id);
} else {
netlink_rcv_skb(skb, &nfnetlink_rcv_msg);
}
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index 9e287cb56a04..54330fb5efaf 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -77,6 +77,9 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
if (!tb[NFCTH_TUPLE_L3PROTONUM] || !tb[NFCTH_TUPLE_L4PROTONUM])
return -EINVAL;
+ /* Not all fields are initialized so first zero the tuple */
+ memset(tuple, 0, sizeof(struct nf_conntrack_tuple));
+
tuple->src.l3num = ntohs(nla_get_be16(tb[NFCTH_TUPLE_L3PROTONUM]));
tuple->dst.protonum = nla_get_u8(tb[NFCTH_TUPLE_L4PROTONUM]);
@@ -86,7 +89,7 @@ nfnl_cthelper_parse_tuple(struct nf_conntrack_tuple *tuple,
static int
nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
{
- const struct nf_conn_help *help = nfct_help(ct);
+ struct nf_conn_help *help = nfct_help(ct);
if (attr == NULL)
return -EINVAL;
@@ -94,7 +97,7 @@ nfnl_cthelper_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
if (help->helper->data_len == 0)
return -EINVAL;
- memcpy(&help->data, nla_data(attr), help->helper->data_len);
+ memcpy(help->data, nla_data(attr), help->helper->data_len);
return 0;
}
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 7c60ccd61a3e..8add272654f6 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -815,6 +815,27 @@ static struct notifier_block nfqnl_dev_notifier = {
.notifier_call = nfqnl_rcv_dev_event,
};
+static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr)
+{
+ return entry->elem == (struct nf_hook_ops *)ops_ptr;
+}
+
+static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook)
+{
+ struct nfnl_queue_net *q = nfnl_queue_pernet(net);
+ int i;
+
+ rcu_read_lock();
+ for (i = 0; i < INSTANCE_BUCKETS; i++) {
+ struct nfqnl_instance *inst;
+ struct hlist_head *head = &q->instance_table[i];
+
+ hlist_for_each_entry_rcu(inst, head, hlist)
+ nfqnl_flush(inst, nf_hook_cmp, (unsigned long)hook);
+ }
+ rcu_read_unlock();
+}
+
static int
nfqnl_rcv_nl_event(struct notifier_block *this,
unsigned long event, void *ptr)
@@ -1022,7 +1043,8 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
};
static const struct nf_queue_handler nfqh = {
- .outfn = &nfqnl_enqueue_packet,
+ .outfn = &nfqnl_enqueue_packet,
+ .nf_hook_drop = &nfqnl_nf_hook_drop,
};
static int
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 265e190f2218..ff6f35971ea2 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -97,6 +97,9 @@ nft_target_set_tgchk_param(struct xt_tgchk_param *par,
entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
break;
case AF_INET6:
+ if (proto)
+ entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
entry->e6.ipv6.proto = proto;
entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
break;
@@ -304,6 +307,9 @@ nft_match_set_mtchk_param(struct xt_mtchk_param *par, const struct nft_ctx *ctx,
entry->e4.ip.invflags = inv ? IPT_INV_PROTO : 0;
break;
case AF_INET6:
+ if (proto)
+ entry->e6.ipv6.flags |= IP6T_F_PROTO;
+
entry->e6.ipv6.proto = proto;
entry->e6.ipv6.invflags = inv ? IP6T_INV_PROTO : 0;
break;
@@ -555,6 +561,13 @@ struct nft_xt {
static struct nft_expr_type nft_match_type;
+static bool nft_match_cmp(const struct xt_match *match,
+ const char *name, u32 rev, u32 family)
+{
+ return strcmp(match->name, name) == 0 && match->revision == rev &&
+ (match->family == NFPROTO_UNSPEC || match->family == family);
+}
+
static const struct nft_expr_ops *
nft_match_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -562,7 +575,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
struct nft_xt *nft_match;
struct xt_match *match;
char *mt_name;
- __u32 rev, family;
+ u32 rev, family;
if (tb[NFTA_MATCH_NAME] == NULL ||
tb[NFTA_MATCH_REV] == NULL ||
@@ -577,9 +590,12 @@ nft_match_select_ops(const struct nft_ctx *ctx,
list_for_each_entry(nft_match, &nft_match_list, head) {
struct xt_match *match = nft_match->ops.data;
- if (strcmp(match->name, mt_name) == 0 &&
- match->revision == rev && match->family == family)
+ if (nft_match_cmp(match, mt_name, rev, family)) {
+ if (!try_module_get(match->me))
+ return ERR_PTR(-ENOENT);
+
return &nft_match->ops;
+ }
}
match = xt_request_find_match(family, mt_name, rev);
@@ -625,6 +641,13 @@ static LIST_HEAD(nft_target_list);
static struct nft_expr_type nft_target_type;
+static bool nft_target_cmp(const struct xt_target *tg,
+ const char *name, u32 rev, u32 family)
+{
+ return strcmp(tg->name, name) == 0 && tg->revision == rev &&
+ (tg->family == NFPROTO_UNSPEC || tg->family == family);
+}
+
static const struct nft_expr_ops *
nft_target_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -632,7 +655,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
struct nft_xt *nft_target;
struct xt_target *target;
char *tg_name;
- __u32 rev, family;
+ u32 rev, family;
if (tb[NFTA_TARGET_NAME] == NULL ||
tb[NFTA_TARGET_REV] == NULL ||
@@ -647,9 +670,12 @@ nft_target_select_ops(const struct nft_ctx *ctx,
list_for_each_entry(nft_target, &nft_target_list, head) {
struct xt_target *target = nft_target->ops.data;
- if (strcmp(target->name, tg_name) == 0 &&
- target->revision == rev && target->family == family)
+ if (nft_target_cmp(target, tg_name, rev, family)) {
+ if (!try_module_get(target->me))
+ return ERR_PTR(-ENOENT);
+
return &nft_target->ops;
+ }
}
target = xt_request_find_target(family, tg_name, rev);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 133eb4772f12..489899325bf7 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -418,6 +418,47 @@ int xt_check_match(struct xt_mtchk_param *par,
}
EXPORT_SYMBOL_GPL(xt_check_match);
+/** xt_check_entry_match - check that matches end before start of target
+ *
+ * @match: beginning of xt_entry_match
+ * @target: beginning of this rules target (alleged end of matches)
+ * @alignment: alignment requirement of match structures
+ *
+ * Validates that all matches add up to the beginning of the target,
+ * and that each match covers at least the base structure size.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+static int xt_check_entry_match(const char *match, const char *target,
+ const size_t alignment)
+{
+ const struct xt_entry_match *pos;
+ int length = target - match;
+
+ if (length == 0) /* no matches */
+ return 0;
+
+ pos = (struct xt_entry_match *)match;
+ do {
+ if ((unsigned long)pos % alignment)
+ return -EINVAL;
+
+ if (length < (int)sizeof(struct xt_entry_match))
+ return -EINVAL;
+
+ if (pos->u.match_size < sizeof(struct xt_entry_match))
+ return -EINVAL;
+
+ if (pos->u.match_size > length)
+ return -EINVAL;
+
+ length -= pos->u.match_size;
+ pos = ((void *)((char *)(pos) + (pos)->u.match_size));
+ } while (length > 0);
+
+ return 0;
+}
+
#ifdef CONFIG_COMPAT
int xt_compat_add_offset(u_int8_t af, unsigned int offset, int delta)
{
@@ -487,13 +528,14 @@ int xt_compat_match_offset(const struct xt_match *match)
}
EXPORT_SYMBOL_GPL(xt_compat_match_offset);
-int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
- unsigned int *size)
+void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
+ unsigned int *size)
{
const struct xt_match *match = m->u.kernel.match;
struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m;
int pad, off = xt_compat_match_offset(match);
u_int16_t msize = cm->u.user.match_size;
+ char name[sizeof(m->u.user.name)];
m = *dstptr;
memcpy(m, cm, sizeof(*cm));
@@ -507,10 +549,12 @@ int xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr,
msize += off;
m->u.user.match_size = msize;
+ strlcpy(name, match->name, sizeof(name));
+ module_put(match->me);
+ strncpy(m->u.user.name, name, sizeof(m->u.user.name));
*size += off;
*dstptr += msize;
- return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_from_user);
@@ -541,8 +585,175 @@ int xt_compat_match_to_user(const struct xt_entry_match *m,
return 0;
}
EXPORT_SYMBOL_GPL(xt_compat_match_to_user);
+
+/* non-compat version may have padding after verdict */
+struct compat_xt_standard_target {
+ struct compat_xt_entry_target t;
+ compat_uint_t verdict;
+};
+
+int xt_compat_check_entry_offsets(const void *base, const char *elems,
+ unsigned int target_offset,
+ unsigned int next_offset)
+{
+ long size_of_base_struct = elems - (const char *)base;
+ const struct compat_xt_entry_target *t;
+ const char *e = base;
+
+ if (target_offset < size_of_base_struct)
+ return -EINVAL;
+
+ if (target_offset + sizeof(*t) > next_offset)
+ return -EINVAL;
+
+ t = (void *)(e + target_offset);
+ if (t->u.target_size < sizeof(*t))
+ return -EINVAL;
+
+ if (target_offset + t->u.target_size > next_offset)
+ return -EINVAL;
+
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
+ COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset)
+ return -EINVAL;
+
+ /* compat_xt_entry match has less strict aligment requirements,
+ * otherwise they are identical. In case of padding differences
+ * we need to add compat version of xt_check_entry_match.
+ */
+ BUILD_BUG_ON(sizeof(struct compat_xt_entry_match) != sizeof(struct xt_entry_match));
+
+ return xt_check_entry_match(elems, base + target_offset,
+ __alignof__(struct compat_xt_entry_match));
+}
+EXPORT_SYMBOL(xt_compat_check_entry_offsets);
#endif /* CONFIG_COMPAT */
+/**
+ * xt_check_entry_offsets - validate arp/ip/ip6t_entry
+ *
+ * @base: pointer to arp/ip/ip6t_entry
+ * @elems: pointer to first xt_entry_match, i.e. ip(6)t_entry->elems
+ * @target_offset: the arp/ip/ip6_t->target_offset
+ * @next_offset: the arp/ip/ip6_t->next_offset
+ *
+ * validates that target_offset and next_offset are sane and that all
+ * match sizes (if any) align with the target offset.
+ *
+ * This function does not validate the targets or matches themselves, it
+ * only tests that all the offsets and sizes are correct, that all
+ * match structures are aligned, and that the last structure ends where
+ * the target structure begins.
+ *
+ * Also see xt_compat_check_entry_offsets for CONFIG_COMPAT version.
+ *
+ * The arp/ip/ip6t_entry structure @base must have passed following tests:
+ * - it must point to a valid memory location
+ * - base to base + next_offset must be accessible, i.e. not exceed allocated
+ * length.
+ *
+ * A well-formed entry looks like this:
+ *
+ * ip(6)t_entry match [mtdata] match [mtdata] target [tgdata] ip(6)t_entry
+ * e->elems[]-----' | |
+ * matchsize | |
+ * matchsize | |
+ * | |
+ * target_offset---------------------------------' |
+ * next_offset---------------------------------------------------'
+ *
+ * elems[]: flexible array member at end of ip(6)/arpt_entry struct.
+ * This is where matches (if any) and the target reside.
+ * target_offset: beginning of target.
+ * next_offset: start of the next rule; also: size of this rule.
+ * Since targets have a minimum size, target_offset + minlen <= next_offset.
+ *
+ * Every match stores its size, sum of sizes must not exceed target_offset.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int xt_check_entry_offsets(const void *base,
+ const char *elems,
+ unsigned int target_offset,
+ unsigned int next_offset)
+{
+ long size_of_base_struct = elems - (const char *)base;
+ const struct xt_entry_target *t;
+ const char *e = base;
+
+ /* target start is within the ip/ip6/arpt_entry struct */
+ if (target_offset < size_of_base_struct)
+ return -EINVAL;
+
+ if (target_offset + sizeof(*t) > next_offset)
+ return -EINVAL;
+
+ t = (void *)(e + target_offset);
+ if (t->u.target_size < sizeof(*t))
+ return -EINVAL;
+
+ if (target_offset + t->u.target_size > next_offset)
+ return -EINVAL;
+
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 &&
+ XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset)
+ return -EINVAL;
+
+ return xt_check_entry_match(elems, base + target_offset,
+ __alignof__(struct xt_entry_match));
+}
+EXPORT_SYMBOL(xt_check_entry_offsets);
+
+/**
+ * xt_alloc_entry_offsets - allocate array to store rule head offsets
+ *
+ * @size: number of entries
+ *
+ * Return: NULL or kmalloc'd or vmalloc'd array
+ */
+unsigned int *xt_alloc_entry_offsets(unsigned int size)
+{
+ unsigned int *off;
+
+ off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN);
+
+ if (off)
+ return off;
+
+ if (size < (SIZE_MAX / sizeof(unsigned int)))
+ off = vmalloc(size * sizeof(unsigned int));
+
+ return off;
+}
+EXPORT_SYMBOL(xt_alloc_entry_offsets);
+
+/**
+ * xt_find_jump_offset - check if target is a valid jump offset
+ *
+ * @offsets: array containing all valid rule start offsets of a rule blob
+ * @target: the jump target to search for
+ * @size: entries in @offset
+ */
+bool xt_find_jump_offset(const unsigned int *offsets,
+ unsigned int target, unsigned int size)
+{
+ int m, low = 0, hi = size;
+
+ while (hi > low) {
+ m = (low + hi) / 2u;
+
+ if (offsets[m] > target)
+ hi = m;
+ else if (offsets[m] < target)
+ low = m + 1;
+ else
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(xt_find_jump_offset);
+
int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
@@ -593,6 +804,80 @@ int xt_check_target(struct xt_tgchk_param *par,
}
EXPORT_SYMBOL_GPL(xt_check_target);
+/**
+ * xt_copy_counters_from_user - copy counters and metadata from userspace
+ *
+ * @user: src pointer to userspace memory
+ * @len: alleged size of userspace memory
+ * @info: where to store the xt_counters_info metadata
+ * @compat: true if we setsockopt call is done by 32bit task on 64bit kernel
+ *
+ * Copies counter meta data from @user and stores it in @info.
+ *
+ * vmallocs memory to hold the counters, then copies the counter data
+ * from @user to the new memory and returns a pointer to it.
+ *
+ * If @compat is true, @info gets converted automatically to the 64bit
+ * representation.
+ *
+ * The metadata associated with the counters is stored in @info.
+ *
+ * Return: returns pointer that caller has to test via IS_ERR().
+ * If IS_ERR is false, caller has to vfree the pointer.
+ */
+void *xt_copy_counters_from_user(const void __user *user, unsigned int len,
+ struct xt_counters_info *info, bool compat)
+{
+ void *mem;
+ u64 size;
+
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ /* structures only differ in size due to alignment */
+ struct compat_xt_counters_info compat_tmp;
+
+ if (len <= sizeof(compat_tmp))
+ return ERR_PTR(-EINVAL);
+
+ len -= sizeof(compat_tmp);
+ if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0)
+ return ERR_PTR(-EFAULT);
+
+ strlcpy(info->name, compat_tmp.name, sizeof(info->name));
+ info->num_counters = compat_tmp.num_counters;
+ user += sizeof(compat_tmp);
+ } else
+#endif
+ {
+ if (len <= sizeof(*info))
+ return ERR_PTR(-EINVAL);
+
+ len -= sizeof(*info);
+ if (copy_from_user(info, user, sizeof(*info)) != 0)
+ return ERR_PTR(-EFAULT);
+
+ info->name[sizeof(info->name) - 1] = '\0';
+ user += sizeof(*info);
+ }
+
+ size = sizeof(struct xt_counters);
+ size *= info->num_counters;
+
+ if (size != (u64)len)
+ return ERR_PTR(-EINVAL);
+
+ mem = vmalloc(len);
+ if (!mem)
+ return ERR_PTR(-ENOMEM);
+
+ if (copy_from_user(mem, user, len) == 0)
+ return mem;
+
+ vfree(mem);
+ return ERR_PTR(-EFAULT);
+}
+EXPORT_SYMBOL_GPL(xt_copy_counters_from_user);
+
#ifdef CONFIG_COMPAT
int xt_compat_target_offset(const struct xt_target *target)
{
@@ -608,6 +893,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t;
int pad, off = xt_compat_target_offset(target);
u_int16_t tsize = ct->u.user.target_size;
+ char name[sizeof(t->u.user.name)];
t = *dstptr;
memcpy(t, ct, sizeof(*ct));
@@ -621,6 +907,9 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr,
tsize += off;
t->u.user.target_size = tsize;
+ strlcpy(name, target->name, sizeof(name));
+ module_put(target->me);
+ strncpy(t->u.user.name, name, sizeof(t->u.user.name));
*size += off;
*dstptr += tsize;
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7198d660b4de..a1d126f29463 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -39,7 +39,7 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_cgroup_info *info = par->matchinfo;
- if (skb->sk == NULL)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
return (info->id == skb->sk->sk_classid) ^ info->invert;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index c0a418766f9c..15fc0938e1c4 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -133,6 +133,24 @@ static inline u32 netlink_group_mask(u32 group)
return group ? 1 << (group - 1) : 0;
}
+static struct sk_buff *netlink_to_full_skb(const struct sk_buff *skb,
+ gfp_t gfp_mask)
+{
+ unsigned int len = skb_end_offset(skb);
+ struct sk_buff *new;
+
+ new = alloc_skb(len, gfp_mask);
+ if (new == NULL)
+ return NULL;
+
+ NETLINK_CB(new).portid = NETLINK_CB(skb).portid;
+ NETLINK_CB(new).dst_group = NETLINK_CB(skb).dst_group;
+ NETLINK_CB(new).creds = NETLINK_CB(skb).creds;
+
+ memcpy(skb_put(new, len), skb->data, len);
+ return new;
+}
+
int netlink_add_tap(struct netlink_tap *nt)
{
if (unlikely(nt->dev->type != ARPHRD_NETLINK))
@@ -215,7 +233,11 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
int ret = -ENOMEM;
dev_hold(dev);
- nskb = skb_clone(skb, GFP_ATOMIC);
+
+ if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head))
+ nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
+ else
+ nskb = skb_clone(skb, GFP_ATOMIC);
if (nskb) {
nskb->dev = dev;
nskb->protocol = htons((u16) sk->sk_protocol);
@@ -287,11 +309,6 @@ static void netlink_rcv_wake(struct sock *sk)
}
#ifdef CONFIG_NETLINK_MMAP
-static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
-{
- return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
-}
-
static bool netlink_rx_is_mmaped(struct sock *sk)
{
return nlk_sk(sk)->rx_ring.pg_vec != NULL;
@@ -366,25 +383,52 @@ err1:
return NULL;
}
+
+static void
+__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+ unsigned int order)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct sk_buff_head *queue;
+ struct netlink_ring *ring;
+
+ queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+ ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+
+ spin_lock_bh(&queue->lock);
+
+ ring->frame_max = req->nm_frame_nr - 1;
+ ring->head = 0;
+ ring->frame_size = req->nm_frame_size;
+ ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
+
+ swap(ring->pg_vec_len, req->nm_block_nr);
+ swap(ring->pg_vec_order, order);
+ swap(ring->pg_vec, pg_vec);
+
+ __skb_queue_purge(queue);
+ spin_unlock_bh(&queue->lock);
+
+ WARN_ON(atomic_read(&nlk->mapped));
+
+ if (pg_vec)
+ free_pg_vec(pg_vec, order, req->nm_block_nr);
+}
+
static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
- bool closing, bool tx_ring)
+ bool tx_ring)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ring *ring;
- struct sk_buff_head *queue;
void **pg_vec = NULL;
unsigned int order = 0;
- int err;
ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
- if (!closing) {
- if (atomic_read(&nlk->mapped))
- return -EBUSY;
- if (atomic_read(&ring->pending))
- return -EBUSY;
- }
+ if (atomic_read(&nlk->mapped))
+ return -EBUSY;
+ if (atomic_read(&ring->pending))
+ return -EBUSY;
if (req->nm_block_nr) {
if (ring->pg_vec != NULL)
@@ -416,31 +460,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
return -EINVAL;
}
- err = -EBUSY;
mutex_lock(&nlk->pg_vec_lock);
- if (closing || atomic_read(&nlk->mapped) == 0) {
- err = 0;
- spin_lock_bh(&queue->lock);
-
- ring->frame_max = req->nm_frame_nr - 1;
- ring->head = 0;
- ring->frame_size = req->nm_frame_size;
- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
-
- swap(ring->pg_vec_len, req->nm_block_nr);
- swap(ring->pg_vec_order, order);
- swap(ring->pg_vec, pg_vec);
-
- __skb_queue_purge(queue);
- spin_unlock_bh(&queue->lock);
-
- WARN_ON(atomic_read(&nlk->mapped));
+ if (atomic_read(&nlk->mapped) == 0) {
+ __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+ mutex_unlock(&nlk->pg_vec_lock);
+ return 0;
}
+
mutex_unlock(&nlk->pg_vec_lock);
if (pg_vec)
free_pg_vec(pg_vec, order, req->nm_block_nr);
- return err;
+
+ return -EBUSY;
}
static void netlink_mm_open(struct vm_area_struct *vma)
@@ -828,7 +860,6 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
}
#else /* CONFIG_NETLINK_MMAP */
-#define netlink_skb_is_mmaped(skb) false
#define netlink_rx_is_mmaped(sk) false
#define netlink_tx_is_mmaped(sk) false
#define netlink_mmap sock_no_mmap
@@ -909,10 +940,10 @@ static void netlink_sock_destruct(struct sock *sk)
memset(&req, 0, sizeof(req));
if (nlk->rx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, false);
+ __netlink_set_ring(sk, &req, false, NULL, 0);
memset(&req, 0, sizeof(req));
if (nlk->tx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, true);
+ __netlink_set_ring(sk, &req, true, NULL, 0);
}
#endif /* CONFIG_NETLINK_MMAP */
@@ -1003,11 +1034,8 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
.net = net,
.portid = portid,
};
- u32 hash;
- hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid));
-
- return rhashtable_lookup_compare(&table->hash, hash,
+ return rhashtable_lookup_compare(&table->hash, &portid,
&netlink_compare, &arg);
}
@@ -2166,7 +2194,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
if (copy_from_user(&req, optval, sizeof(req)))
return -EFAULT;
- err = netlink_set_ring(sk, &req, false,
+ err = netlink_set_ring(sk, &req,
optname == NETLINK_TX_RING);
break;
}
@@ -2602,6 +2630,7 @@ static int netlink_dump(struct sock *sk)
struct netlink_callback *cb;
struct sk_buff *skb = NULL;
struct nlmsghdr *nlh;
+ struct module *module;
int len, err = -ENOBUFS;
int alloc_size;
@@ -2671,9 +2700,11 @@ static int netlink_dump(struct sock *sk)
cb->done(cb);
nlk->cb_running = false;
+ module = cb->module;
+ skb = cb->skb;
mutex_unlock(nlk->cb_mutex);
- module_put(cb->module);
- consume_skb(cb->skb);
+ module_put(module);
+ consume_skb(skb);
return 0;
errout_skb:
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index b20a1731759b..3951874e715d 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -57,6 +57,15 @@ static inline struct netlink_sock *nlk_sk(struct sock *sk)
return container_of(sk, struct netlink_sock, sk);
}
+static inline bool netlink_skb_is_mmaped(const struct sk_buff *skb)
+{
+#ifdef CONFIG_NETLINK_MMAP
+ return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
+#else
+ return false;
+#endif /* CONFIG_NETLINK_MMAP */
+}
+
struct netlink_table {
struct rhashtable hash;
struct hlist_head mc_list;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 28213dff723d..acf6b2edba65 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -834,7 +834,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
if (error)
goto err_kfree_flow;
- ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+ ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, true, &mask);
/* Validate actions. */
acts = ovs_nla_alloc_flow_actions(nla_len(a[OVS_FLOW_ATTR_ACTIONS]));
@@ -949,7 +949,7 @@ static struct sw_flow_actions *get_flow_actions(const struct nlattr *a,
if (IS_ERR(acts))
return acts;
- ovs_flow_mask_key(&masked_key, key, mask);
+ ovs_flow_mask_key(&masked_key, key, true, mask);
error = ovs_nla_copy_actions(a, &masked_key, 0, &acts);
if (error) {
OVS_NLERR("Flow actions may not be safe on all matching packets.\n");
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf2d853646f0..740041a09b9d 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -56,20 +56,21 @@ static u16 range_n_bytes(const struct sw_flow_key_range *range)
}
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
- const struct sw_flow_mask *mask)
+ bool full, const struct sw_flow_mask *mask)
{
- const long *m = (const long *)((const u8 *)&mask->key +
- mask->range.start);
- const long *s = (const long *)((const u8 *)src +
- mask->range.start);
- long *d = (long *)((u8 *)dst + mask->range.start);
+ int start = full ? 0 : mask->range.start;
+ int len = full ? sizeof *dst : range_n_bytes(&mask->range);
+ const long *m = (const long *)((const u8 *)&mask->key + start);
+ const long *s = (const long *)((const u8 *)src + start);
+ long *d = (long *)((u8 *)dst + start);
int i;
- /* The memory outside of the 'mask->range' are not set since
- * further operations on 'dst' only uses contents within
- * 'mask->range'.
+ /* If 'full' is true then all of 'dst' is fully initialized. Otherwise,
+ * if 'full' is false the memory outside of the 'mask->range' is left
+ * uninitialized. This can be used as an optimization when further
+ * operations on 'dst' only use contents within 'mask->range'.
*/
- for (i = 0; i < range_n_bytes(&mask->range); i += sizeof(long))
+ for (i = 0; i < len; i += sizeof(long))
*d++ = *s++ & *m++;
}
@@ -418,7 +419,7 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
u32 hash;
struct sw_flow_key masked_key;
- ovs_flow_mask_key(&masked_key, unmasked, mask);
+ ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, key_start, key_end);
head = find_bucket(ti, hash);
hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 5918bff7f3f6..2f0cf200ede9 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -82,5 +82,5 @@ bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
struct sw_flow_match *match);
void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
- const struct sw_flow_mask *mask);
+ bool full, const struct sw_flow_mask *mask);
#endif /* flow_table.h */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b2b1dad71160..c7088d23a90d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1256,16 +1256,6 @@ static void packet_sock_destruct(struct sock *sk)
sk_refcnt_debug_dec(sk);
}
-static int fanout_rr_next(struct packet_fanout *f, unsigned int num)
-{
- int x = atomic_read(&f->rr_cur) + 1;
-
- if (x >= num)
- x = 0;
-
- return x;
-}
-
static unsigned int fanout_demux_hash(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
@@ -1277,13 +1267,9 @@ static unsigned int fanout_demux_lb(struct packet_fanout *f,
struct sk_buff *skb,
unsigned int num)
{
- int cur, old;
+ unsigned int val = atomic_inc_return(&f->rr_cur);
- cur = atomic_read(&f->rr_cur);
- while ((old = atomic_cmpxchg(&f->rr_cur, cur,
- fanout_rr_next(f, num))) != cur)
- cur = old;
- return cur;
+ return val % num;
}
static unsigned int fanout_demux_cpu(struct packet_fanout *f,
@@ -1337,7 +1323,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
struct packet_type *pt, struct net_device *orig_dev)
{
struct packet_fanout *f = pt->af_packet_priv;
- unsigned int num = f->num_members;
+ unsigned int num = READ_ONCE(f->num_members);
struct packet_sock *po;
unsigned int idx;
@@ -1522,6 +1508,20 @@ static void fanout_release(struct sock *sk)
mutex_unlock(&fanout_mutex);
}
+static bool packet_extra_vlan_len_allowed(const struct net_device *dev,
+ struct sk_buff *skb)
+{
+ /* Earlier code assumed this would be a VLAN pkt, double-check
+ * this now that we have the actual packet in hand. We can only
+ * do this check on Ethernet devices.
+ */
+ if (unlikely(dev->type != ARPHRD_ETHER))
+ return false;
+
+ skb_reset_mac_header(skb);
+ return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
+}
+
static const struct proto_ops packet_ops;
static const struct proto_ops packet_ops_spkt;
@@ -1683,18 +1683,10 @@ retry:
goto retry;
}
- if (len > (dev->mtu + dev->hard_header_len + extra_len)) {
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
- struct ethhdr *ehdr;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q)) {
- err = -EMSGSIZE;
- goto out_unlock;
- }
+ if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
+ !packet_extra_vlan_len_allowed(dev, skb)) {
+ err = -EMSGSIZE;
+ goto out_unlock;
}
skb->protocol = proto;
@@ -2096,6 +2088,15 @@ static void tpacket_destruct_skb(struct sk_buff *skb)
sock_wfree(skb);
}
+static void tpacket_set_protocol(const struct net_device *dev,
+ struct sk_buff *skb)
+{
+ if (dev->type == ARPHRD_ETHER) {
+ skb_reset_mac_header(skb);
+ skb->protocol = eth_hdr(skb)->h_proto;
+ }
+}
+
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, int size_max,
__be16 proto, unsigned char *addr, int hlen)
@@ -2132,8 +2133,6 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
skb_reserve(skb, hlen);
skb_reset_network_header(skb);
- if (!packet_use_direct_xmit(po))
- skb_probe_transport_header(skb, 0);
if (unlikely(po->tp_tx_has_off)) {
int off_min, off_max, off;
off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
@@ -2183,6 +2182,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
dev->hard_header_len);
if (unlikely(err))
return err;
+ if (!skb->protocol)
+ tpacket_set_protocol(dev, skb);
data += dev->hard_header_len;
to_write -= dev->hard_header_len;
@@ -2217,6 +2218,8 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
len = ((to_write > len_max) ? len_max : to_write);
}
+ skb_probe_transport_header(skb, 0);
+
return tp_len;
}
@@ -2261,12 +2264,13 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
- reserve = dev->hard_header_len + VLAN_HLEN;
+ if (po->sk.sk_socket->type == SOCK_RAW)
+ reserve = dev->hard_header_len;
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
- if (size_max > dev->mtu + reserve)
- size_max = dev->mtu + reserve;
+ if (size_max > dev->mtu + reserve + VLAN_HLEN)
+ size_max = dev->mtu + reserve + VLAN_HLEN;
do {
ph = packet_current_frame(po, &po->tx_ring,
@@ -2289,18 +2293,11 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
addr, hlen);
- if (tp_len > dev->mtu + dev->hard_header_len) {
- struct ethhdr *ehdr;
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
+ if (likely(tp_len >= 0) &&
+ tp_len > dev->mtu + reserve &&
+ !packet_extra_vlan_len_allowed(dev, skb))
+ tp_len = -EMSGSIZE;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q))
- tp_len = -EMSGSIZE;
- }
if (unlikely(tp_len < 0)) {
if (po->tp_loss) {
__packet_set_status(po, ph,
@@ -2512,18 +2509,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
- if (!gso_type && (len > dev->mtu + reserve + extra_len)) {
- /* Earlier code assumed this would be a VLAN pkt,
- * double-check this now that we have the actual
- * packet in hand.
- */
- struct ethhdr *ehdr;
- skb_reset_mac_header(skb);
- ehdr = eth_hdr(skb);
- if (ehdr->h_proto != htons(ETH_P_8021Q)) {
- err = -EMSGSIZE;
- goto out_free;
- }
+ if (!gso_type && (len > dev->mtu + reserve + extra_len) &&
+ !packet_extra_vlan_len_allowed(dev, skb)) {
+ err = -EMSGSIZE;
+ goto out_free;
}
skb->protocol = proto;
@@ -2552,8 +2541,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
len += vnet_hdr_len;
}
- if (!packet_use_direct_xmit(po))
- skb_probe_transport_header(skb, reserve);
+ skb_probe_transport_header(skb, reserve);
+
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -2657,22 +2646,40 @@ static int packet_release(struct socket *sock)
* Attach a packet hook.
*/
-static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
+static int packet_do_bind(struct sock *sk, const char *name, int ifindex,
+ __be16 proto)
{
struct packet_sock *po = pkt_sk(sk);
- const struct net_device *dev_curr;
+ struct net_device *dev_curr;
__be16 proto_curr;
bool need_rehook;
+ struct net_device *dev = NULL;
+ int ret = 0;
+ bool unlisted = false;
- if (po->fanout) {
- if (dev)
- dev_put(dev);
-
+ if (po->fanout)
return -EINVAL;
- }
lock_sock(sk);
spin_lock(&po->bind_lock);
+ rcu_read_lock();
+
+ if (name) {
+ dev = dev_get_by_name_rcu(sock_net(sk), name);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+ } else if (ifindex) {
+ dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
+ if (!dev) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+ }
+
+ if (dev)
+ dev_hold(dev);
proto_curr = po->prot_hook.type;
dev_curr = po->prot_hook.dev;
@@ -2680,24 +2687,37 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
need_rehook = proto_curr != proto || dev_curr != dev;
if (need_rehook) {
- unregister_prot_hook(sk, true);
+ if (po->running) {
+ rcu_read_unlock();
+ __unregister_prot_hook(sk, true);
+ rcu_read_lock();
+ dev_curr = po->prot_hook.dev;
+ if (dev)
+ unlisted = !dev_get_by_index_rcu(sock_net(sk),
+ dev->ifindex);
+ }
po->num = proto;
po->prot_hook.type = proto;
- if (po->prot_hook.dev)
- dev_put(po->prot_hook.dev);
-
- po->prot_hook.dev = dev;
-
- po->ifindex = dev ? dev->ifindex : 0;
- packet_cached_dev_assign(po, dev);
+ if (unlikely(unlisted)) {
+ dev_put(dev);
+ po->prot_hook.dev = NULL;
+ po->ifindex = -1;
+ packet_cached_dev_reset(po);
+ } else {
+ po->prot_hook.dev = dev;
+ po->ifindex = dev ? dev->ifindex : 0;
+ packet_cached_dev_assign(po, dev);
+ }
}
+ if (dev_curr)
+ dev_put(dev_curr);
if (proto == 0 || !need_rehook)
goto out_unlock;
- if (!dev || (dev->flags & IFF_UP)) {
+ if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
register_prot_hook(sk);
} else {
sk->sk_err = ENETDOWN;
@@ -2706,9 +2726,10 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 proto)
}
out_unlock:
+ rcu_read_unlock();
spin_unlock(&po->bind_lock);
release_sock(sk);
- return 0;
+ return ret;
}
/*
@@ -2720,8 +2741,6 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
{
struct sock *sk = sock->sk;
char name[15];
- struct net_device *dev;
- int err = -ENODEV;
/*
* Check legality
@@ -2731,19 +2750,13 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
return -EINVAL;
strlcpy(name, uaddr->sa_data, sizeof(name));
- dev = dev_get_by_name(sock_net(sk), name);
- if (dev)
- err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
- return err;
+ return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
}
static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
struct sock *sk = sock->sk;
- struct net_device *dev = NULL;
- int err;
-
/*
* Check legality
@@ -2754,16 +2767,8 @@ static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len
if (sll->sll_family != AF_PACKET)
return -EINVAL;
- if (sll->sll_ifindex) {
- err = -ENODEV;
- dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
- if (dev == NULL)
- goto out;
- }
- err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
-
-out:
- return err;
+ return packet_do_bind(sk, NULL, sll->sll_ifindex,
+ sll->sll_protocol ? : pkt_sk(sk)->num);
}
static struct proto packet_proto = {
@@ -3139,6 +3144,7 @@ static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
i->ifindex = mreq->mr_ifindex;
i->alen = mreq->mr_alen;
memcpy(i->addr, mreq->mr_address, i->alen);
+ memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
i->count = 1;
i->next = po->mclist;
po->mclist = i;
diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c
index 5a940dbd74a3..f0229223bf91 100644
--- a/net/phonet/af_phonet.c
+++ b/net/phonet/af_phonet.c
@@ -377,6 +377,10 @@ static int phonet_rcv(struct sk_buff *skb, struct net_device *dev,
struct sockaddr_pn sa;
u16 len;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (!skb)
+ return NET_RX_DROP;
+
/* check we have at least a full Phonet header */
if (!pskb_pull(skb, sizeof(struct phonethdr)))
goto out;
diff --git a/net/rds/connection.c b/net/rds/connection.c
index 378c3a6acf84..f5fb7d6b7c41 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -183,6 +183,12 @@ static struct rds_connection *__rds_conn_create(__be32 laddr, __be32 faddr,
}
}
+ if (trans == NULL) {
+ kmem_cache_free(rds_conn_slab, conn);
+ conn = ERR_PTR(-ENODEV);
+ goto out;
+ }
+
conn->c_trans = trans;
ret = trans->conn_alloc(conn, gfp);
diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c
index f64b6e83ab4c..c8faaf36423a 100644
--- a/net/rds/ib_rdma.c
+++ b/net/rds/ib_rdma.c
@@ -760,8 +760,10 @@ void *rds_ib_get_mr(struct scatterlist *sg, unsigned long nents,
}
ibmr = rds_ib_alloc_fmr(rds_ibdev);
- if (IS_ERR(ibmr))
+ if (IS_ERR(ibmr)) {
+ rds_ib_dev_put(rds_ibdev);
return ibmr;
+ }
ret = rds_ib_map_fmr(rds_ibdev, ibmr, sg, nents);
if (ret == 0)
diff --git a/net/rds/info.c b/net/rds/info.c
index 9a6b4f66187c..140a44a5f7b7 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -176,7 +176,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
/* check for all kinds of wrapping and the like */
start = (unsigned long)optval;
- if (len < 0 || len + PAGE_SIZE - 1 < len || start + len < start) {
+ if (len < 0 || len > INT_MAX - PAGE_SIZE + 1 || start + len < start) {
ret = -EINVAL;
goto out;
}
diff --git a/net/rds/send.c b/net/rds/send.c
index 0a64541020b0..0bae8d43b012 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -958,11 +958,13 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
release_sock(sk);
}
- /* racing with another thread binding seems ok here */
+ lock_sock(sk);
if (daddr == 0 || rs->rs_bound_addr == 0) {
+ release_sock(sk);
ret = -ENOTCONN; /* XXX not a great errno */
goto out;
}
+ release_sock(sk);
/* size of rm including all sgs */
ret = rds_rm_size(msg, payload_len);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index 9ae6e0a264ec..2dd88db4a1a2 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -234,8 +234,15 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb,
}
to_copy = min(tc->t_tinc_data_rem, left);
- pskb_pull(clone, offset);
- pskb_trim(clone, to_copy);
+ if (!pskb_pull(clone, offset) ||
+ pskb_trim(clone, to_copy)) {
+ pr_warn("rds_tcp_data_recv: pull/trim failed "
+ "left %zu data_rem %zu skb_len %d\n",
+ left, tc->t_tinc_data_rem, skb->len);
+ kfree_skb(clone);
+ desc->error = -ENOMEM;
+ goto out;
+ }
skb_queue_tail(&tinc->ti_skb_list, clone);
rdsdebug("skb %p data %p len %d off %u to_copy %zu -> "
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index fa7cd792791c..a97bb7332607 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -1081,17 +1081,6 @@ static unsigned int rfkill_fop_poll(struct file *file, poll_table *wait)
return res;
}
-static bool rfkill_readable(struct rfkill_data *data)
-{
- bool r;
-
- mutex_lock(&data->mtx);
- r = !list_empty(&data->events);
- mutex_unlock(&data->mtx);
-
- return r;
-}
-
static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
size_t count, loff_t *pos)
{
@@ -1108,8 +1097,11 @@ static ssize_t rfkill_fop_read(struct file *file, char __user *buf,
goto out;
}
mutex_unlock(&data->mtx);
+ /* since we re-check and it just compares pointers,
+ * using !list_empty() without locking isn't a problem
+ */
ret = wait_event_interruptible(data->read_wait,
- rfkill_readable(data));
+ !list_empty(&data->events));
mutex_lock(&data->mtx);
if (ret)
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index a85c1a086ae4..67984fb42f14 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -192,7 +192,8 @@ static void rose_kill_by_device(struct net_device *dev)
if (rose->device == dev) {
rose_disconnect(s, ENETUNREACH, ROSE_OUT_OF_ORDER, 0);
- rose->neighbour->use--;
+ if (rose->neighbour)
+ rose->neighbour->use--;
rose->device = NULL;
}
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index aad6a679fb13..fae88709aaa2 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -81,6 +81,11 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
struct tcf_proto_ops *t;
int rc = -ENOENT;
+ /* Wait for outstanding call_rcu()s, if any, from a
+ * tcf_proto_ops's destroy() handler.
+ */
+ rcu_barrier();
+
write_lock(&cls_mod_lock);
list_for_each_entry(t, &tcf_proto_base, head) {
if (t == ops) {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 76f402e05bd6..a2a7a81b2b0b 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -740,14 +740,15 @@ static u32 qdisc_alloc_handle(struct net_device *dev)
return 0;
}
-void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
+void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n,
+ unsigned int len)
{
const struct Qdisc_class_ops *cops;
unsigned long cl;
u32 parentid;
int drops;
- if (n == 0)
+ if (n == 0 && len == 0)
return;
drops = max_t(int, n, 0);
while ((parentid = sch->parent)) {
@@ -766,10 +767,11 @@ void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
cops->put(sch, cl);
}
sch->q.qlen -= n;
+ sch->qstats.backlog -= len;
__qdisc_qstats_drop(sch, drops);
}
}
-EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
+EXPORT_SYMBOL(qdisc_tree_reduce_backlog);
static void notify_and_destroy(struct net *net, struct sk_buff *skb,
struct nlmsghdr *n, u32 clid,
@@ -815,10 +817,8 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
if (dev->flags & IFF_UP)
dev_deactivate(dev);
- if (new && new->ops->attach) {
- new->ops->attach(new);
- num_q = 0;
- }
+ if (new && new->ops->attach)
+ goto skip;
for (i = 0; i < num_q; i++) {
struct netdev_queue *dev_queue = dev_ingress_queue(dev);
@@ -834,12 +834,16 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
qdisc_destroy(old);
}
+skip:
if (!ingress) {
notify_and_destroy(net, skb, n, classid,
dev->qdisc, new);
if (new && !new->ops->attach)
atomic_inc(&new->refcnt);
dev->qdisc = new ? : &noop_qdisc;
+
+ if (new && new->ops->attach)
+ new->ops->attach(new);
} else {
notify_and_destroy(net, skb, n, classid, old, new);
}
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index beeb75f80fdb..f6e7a60012b1 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1624,13 +1624,8 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new->reshape_fail = cbq_reshape_fail;
#endif
}
- sch_tree_lock(sch);
- *old = cl->q;
- cl->q = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->q);
return 0;
}
@@ -1914,7 +1909,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *cl = (struct cbq_class *)arg;
- unsigned int qlen;
+ unsigned int qlen, backlog;
if (cl->filters || cl->children || cl == &q->link)
return -EBUSY;
@@ -1922,8 +1917,9 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
qlen = cl->q->q.qlen;
+ backlog = cl->q->qstats.backlog;
qdisc_reset(cl->q);
- qdisc_tree_decrease_qlen(cl->q, qlen);
+ qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
if (cl->next_alive)
cbq_deactivate_class(cl);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index c009eb9045ce..3f6437db9b0f 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -128,8 +128,8 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
choke_zap_tail_holes(q);
qdisc_qstats_backlog_dec(sch, skb);
+ qdisc_tree_reduce_backlog(sch, 1, qdisc_pkt_len(skb));
qdisc_drop(skb, sch);
- qdisc_tree_decrease_qlen(sch, 1);
--sch->q.qlen;
}
@@ -449,6 +449,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
old = q->tab;
if (old) {
unsigned int oqlen = sch->q.qlen, tail = 0;
+ unsigned dropped = 0;
while (q->head != q->tail) {
struct sk_buff *skb = q->tab[q->head];
@@ -460,11 +461,12 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
ntab[tail++] = skb;
continue;
}
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
--sch->q.qlen;
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, oqlen - sch->q.qlen, dropped);
q->head = 0;
q->tail = tail;
}
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index de28f8e968e8..0d60ea5a5bb6 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -79,12 +79,13 @@ static struct sk_buff *codel_qdisc_dequeue(struct Qdisc *sch)
skb = codel_dequeue(sch, &q->params, &q->vars, &q->stats, dequeue);
- /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
*/
if (q->stats.drop_count && sch->q.qlen) {
- qdisc_tree_decrease_qlen(sch, q->stats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->stats.drop_count, q->stats.drop_len);
q->stats.drop_count = 0;
+ q->stats.drop_len = 0;
}
if (skb)
qdisc_bstats_update(sch, skb);
@@ -115,7 +116,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
{
struct codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_CODEL_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
int err;
if (!opt)
@@ -149,10 +150,11 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 338706092c27..e599803caa1e 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -53,9 +53,10 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
static void drr_purge_queue(struct drr_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
@@ -226,11 +227,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- drr_purge_queue(cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 227114f27f94..eb87a2a94d19 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -67,13 +67,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- *old = p->q;
- p->q = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &p->q);
return 0;
}
@@ -262,6 +256,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return err;
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
@@ -284,6 +279,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
return NULL;
qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
index = skb->tc_index & (p->indices - 1);
@@ -399,6 +395,7 @@ static void dsmark_reset(struct Qdisc *sch)
pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
qdisc_reset(p->q);
+ sch->qstats.backlog = 0;
sch->q.qlen = 0;
}
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index cbd7e1fd23b4..01fe9d532075 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -644,6 +644,7 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
struct fq_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_MAX + 1];
int err, drop_count = 0;
+ unsigned drop_len = 0;
u32 fq_log;
if (!opt)
@@ -709,10 +710,11 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
if (!skb)
break;
+ drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
drop_count++;
}
- qdisc_tree_decrease_qlen(sch, drop_count);
+ qdisc_tree_reduce_backlog(sch, drop_count, drop_len);
sch_tree_unlock(sch);
return err;
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 1e52decb7b59..8d21f1b5d5b9 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -173,7 +173,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch)
static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
- unsigned int idx;
+ unsigned int idx, prev_backlog;
struct fq_codel_flow *flow;
int uninitialized_var(ret);
@@ -201,6 +201,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
+ prev_backlog = sch->qstats.backlog;
q->drop_overlimit++;
/* Return Congestion Notification only if we dropped a packet
* from this flow.
@@ -209,7 +210,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
return NET_XMIT_SUCCESS;
}
@@ -239,6 +240,7 @@ static struct sk_buff *fq_codel_dequeue(struct Qdisc *sch)
struct fq_codel_flow *flow;
struct list_head *head;
u32 prev_drop_count, prev_ecn_mark;
+ unsigned int prev_backlog;
begin:
head = &q->new_flows;
@@ -257,6 +259,7 @@ begin:
prev_drop_count = q->cstats.drop_count;
prev_ecn_mark = q->cstats.ecn_mark;
+ prev_backlog = sch->qstats.backlog;
skb = codel_dequeue(sch, &q->cparams, &flow->cvars, &q->cstats,
dequeue);
@@ -274,12 +277,14 @@ begin:
}
qdisc_bstats_update(sch, skb);
flow->deficit -= qdisc_pkt_len(skb);
- /* We cant call qdisc_tree_decrease_qlen() if our qlen is 0,
+ /* We cant call qdisc_tree_reduce_backlog() if our qlen is 0,
* or HTB crashes. Defer it for next round.
*/
if (q->cstats.drop_count && sch->q.qlen) {
- qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->cstats.drop_count,
+ q->cstats.drop_len);
q->cstats.drop_count = 0;
+ q->cstats.drop_len = 0;
}
return skb;
}
@@ -347,11 +352,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = fq_codel_dequeue(sch);
+ q->cstats.drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
q->cstats.drop_count++;
}
- qdisc_tree_decrease_qlen(sch, q->cstats.drop_count);
+ qdisc_tree_reduce_backlog(sch, q->cstats.drop_count, q->cstats.drop_len);
q->cstats.drop_count = 0;
+ q->cstats.drop_len = 0;
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 1e346523fc50..0fd98bd9ea8d 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -159,12 +159,15 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
if (validate)
skb = validate_xmit_skb_list(skb, dev);
- if (skb) {
+ if (likely(skb)) {
HARD_TX_LOCK(dev, txq, smp_processor_id());
if (!netif_xmit_frozen_or_stopped(txq))
skb = dev_hard_start_xmit(skb, dev, txq, &ret);
HARD_TX_UNLOCK(dev, txq);
+ } else {
+ spin_lock(root_lock);
+ return qdisc_qlen(q);
}
spin_lock(root_lock);
@@ -666,8 +669,10 @@ static void qdisc_rcu_free(struct rcu_head *head)
{
struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head);
- if (qdisc_is_percpu_stats(qdisc))
+ if (qdisc_is_percpu_stats(qdisc)) {
free_percpu(qdisc->cpu_bstats);
+ free_percpu(qdisc->cpu_qstats);
+ }
kfree((char *) qdisc - qdisc->padded);
}
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index e6c7416d0332..d3e21dac8b40 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -895,9 +895,10 @@ static void
hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static void
@@ -1215,11 +1216,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- hfsc_purge_queue(sch, cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 15d3aabfe250..792c6f330f77 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -390,6 +390,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
struct hhf_sched_data *q = qdisc_priv(sch);
enum wdrr_bucket_idx idx;
struct wdrr_bucket *bucket;
+ unsigned int prev_backlog;
idx = hhf_classify(skb, sch);
@@ -417,6 +418,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
if (++sch->q.qlen <= sch->limit)
return NET_XMIT_SUCCESS;
+ prev_backlog = sch->qstats.backlog;
q->drop_overlimit++;
/* Return Congestion Notification only if we dropped a packet from this
* bucket.
@@ -425,7 +427,7 @@ static int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch)
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this. */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog);
return NET_XMIT_SUCCESS;
}
@@ -535,7 +537,7 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
{
struct hhf_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_HHF_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, prev_backlog;
int err;
u64 non_hh_quantum;
u32 new_quantum = q->quantum;
@@ -585,12 +587,14 @@ static int hhf_change(struct Qdisc *sch, struct nlattr *opt)
}
qlen = sch->q.qlen;
+ prev_backlog = sch->qstats.backlog;
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = hhf_dequeue(sch);
kfree_skb(skb);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen,
+ prev_backlog - sch->qstats.backlog);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index f1acb0f60dc3..ccff00640713 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -600,6 +600,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
htb_activate(q, cl);
}
+ qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
@@ -889,6 +890,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch)
ok:
qdisc_bstats_update(sch, skb);
qdisc_unthrottled(sch);
+ qdisc_qstats_backlog_dec(sch, skb);
sch->q.qlen--;
return skb;
}
@@ -955,6 +957,7 @@ static unsigned int htb_drop(struct Qdisc *sch)
unsigned int len;
if (cl->un.leaf.q->ops->drop &&
(len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) {
+ sch->qstats.backlog -= len;
sch->q.qlen--;
if (!cl->un.leaf.q->q.qlen)
htb_deactivate(q, cl);
@@ -984,12 +987,12 @@ static void htb_reset(struct Qdisc *sch)
}
cl->prio_activity = 0;
cl->cmode = HTB_CAN_SEND;
-
}
}
qdisc_watchdog_cancel(&q->watchdog);
__skb_queue_purge(&q->direct_queue);
sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
memset(q->hlevel, 0, sizeof(q->hlevel));
memset(q->row_mask, 0, sizeof(q->row_mask));
for (i = 0; i < TC_HTB_NUMPRIO; i++)
@@ -1165,14 +1168,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
cl->common.classid)) == NULL)
return -ENOBUFS;
- sch_tree_lock(sch);
- *old = cl->un.leaf.q;
- cl->un.leaf.q = new;
- if (*old != NULL) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->un.leaf.q);
return 0;
}
@@ -1274,7 +1270,6 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)arg;
- unsigned int qlen;
struct Qdisc *new_q = NULL;
int last_child = 0;
@@ -1294,9 +1289,11 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
sch_tree_lock(sch);
if (!cl->level) {
- qlen = cl->un.leaf.q->q.qlen;
+ unsigned int qlen = cl->un.leaf.q->q.qlen;
+ unsigned int backlog = cl->un.leaf.q->qstats.backlog;
+
qdisc_reset(cl->un.leaf.q);
- qdisc_tree_decrease_qlen(cl->un.leaf.q, qlen);
+ qdisc_tree_reduce_backlog(cl->un.leaf.q, qlen, backlog);
}
/* delete from hash and active; remainder in destroy_class */
@@ -1430,10 +1427,11 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
sch_tree_lock(sch);
if (parent && !parent->level) {
unsigned int qlen = parent->un.leaf.q->q.qlen;
+ unsigned int backlog = parent->un.leaf.q->qstats.backlog;
/* turn parent into inner node */
qdisc_reset(parent->un.leaf.q);
- qdisc_tree_decrease_qlen(parent->un.leaf.q, qlen);
+ qdisc_tree_reduce_backlog(parent->un.leaf.q, qlen, backlog);
qdisc_destroy(parent->un.leaf.q);
if (parent->prio_activity)
htb_deactivate(q, parent);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 42dd218871e0..23437d62a8db 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -218,7 +218,8 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
if (q->queues[i] != &noop_qdisc) {
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
- qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_tree_reduce_backlog(child, child->q.qlen,
+ child->qstats.backlog);
qdisc_destroy(child);
}
}
@@ -238,8 +239,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt)
q->queues[i] = child;
if (old != &noop_qdisc) {
- qdisc_tree_decrease_qlen(old,
- old->q.qlen);
+ qdisc_tree_reduce_backlog(old,
+ old->q.qlen,
+ old->qstats.backlog);
qdisc_destroy(old);
}
sch_tree_unlock(sch);
@@ -303,13 +305,7 @@ static int multiq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->queues[band];
- q->queues[band] = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->queues[band]);
return 0;
}
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b34331967e02..f60db2b3e5ef 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -408,6 +408,25 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
sch->q.qlen++;
}
+/* netem can't properly corrupt a megapacket (like we get from GSO), so instead
+ * when we statistically choose to corrupt one, we instead segment it, returning
+ * the first packet to be corrupted, and re-enqueue the remaining frames
+ */
+static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch)
+{
+ struct sk_buff *segs;
+ netdev_features_t features = netif_skb_features(skb);
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+
+ if (IS_ERR_OR_NULL(segs)) {
+ qdisc_reshape_fail(skb, sch);
+ return NULL;
+ }
+ consume_skb(skb);
+ return segs;
+}
+
/*
* Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length.
@@ -420,7 +439,11 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
/* We don't fill cb now as skb_unshare() may invalidate it */
struct netem_skb_cb *cb;
struct sk_buff *skb2;
+ struct sk_buff *segs = NULL;
+ unsigned int len = 0, last_len, prev_len = qdisc_pkt_len(skb);
+ int nb = 0;
int count = 1;
+ int rc = NET_XMIT_SUCCESS;
/* Random duplication */
if (q->duplicate && q->duplicate >= get_crandom(&q->dup_cor))
@@ -466,10 +489,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
* do it now in software before we mangle it.
*/
if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+ if (skb_is_gso(skb)) {
+ segs = netem_segment(skb, sch);
+ if (!segs)
+ return NET_XMIT_DROP;
+ } else {
+ segs = skb;
+ }
+
+ skb = segs;
+ segs = segs->next;
+
if (!(skb = skb_unshare(skb, GFP_ATOMIC)) ||
(skb->ip_summed == CHECKSUM_PARTIAL &&
- skb_checksum_help(skb)))
- return qdisc_drop(skb, sch);
+ skb_checksum_help(skb))) {
+ rc = qdisc_drop(skb, sch);
+ goto finish_segs;
+ }
skb->data[prandom_u32() % skb_headlen(skb)] ^=
1<<(prandom_u32() % 8);
@@ -529,6 +565,27 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
sch->qstats.requeues++;
}
+finish_segs:
+ if (segs) {
+ while (segs) {
+ skb2 = segs->next;
+ segs->next = NULL;
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ last_len = segs->len;
+ rc = qdisc_enqueue(segs, sch);
+ if (rc != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(rc))
+ qdisc_qstats_drop(sch);
+ } else {
+ nb++;
+ len += last_len;
+ }
+ segs = skb2;
+ }
+ sch->q.qlen += nb;
+ if (nb > 1)
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
+ }
return NET_XMIT_SUCCESS;
}
@@ -610,7 +667,8 @@ deliver:
if (unlikely(err != NET_XMIT_SUCCESS)) {
if (net_xmit_drop_count(err)) {
qdisc_qstats_drop(sch);
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1,
+ qdisc_pkt_len(skb));
}
}
goto tfifo_dequeue;
@@ -1049,15 +1107,7 @@ static int netem_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
{
struct netem_sched_data *q = qdisc_priv(sch);
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- if (*old) {
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- }
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index b783a446d884..71ae3b9629f9 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -183,7 +183,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
{
struct pie_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_PIE_MAX + 1];
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
int err;
if (!opt)
@@ -232,10 +232,11 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt)
while (sch->q.qlen > sch->limit) {
struct sk_buff *skb = __skb_dequeue(&sch->q);
+ dropped += qdisc_pkt_len(skb);
qdisc_qstats_backlog_dec(sch, skb);
qdisc_drop(skb, sch);
}
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
sch_tree_unlock(sch);
return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 8e5cd34aaa74..e671b1a4e815 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -191,7 +191,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
struct Qdisc *child = q->queues[i];
q->queues[i] = &noop_qdisc;
if (child != &noop_qdisc) {
- qdisc_tree_decrease_qlen(child, child->q.qlen);
+ qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog);
qdisc_destroy(child);
}
}
@@ -210,8 +210,9 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt)
q->queues[i] = child;
if (old != &noop_qdisc) {
- qdisc_tree_decrease_qlen(old,
- old->q.qlen);
+ qdisc_tree_reduce_backlog(old,
+ old->q.qlen,
+ old->qstats.backlog);
qdisc_destroy(old);
}
sch_tree_unlock(sch);
@@ -268,13 +269,7 @@ static int prio_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->queues[band];
- q->queues[band] = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->queues[band]);
return 0;
}
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 3ec7e88a43ca..e2b8fd47008b 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -221,9 +221,10 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
static void qfq_purge_queue(struct qfq_class *cl)
{
unsigned int len = cl->qdisc->q.qlen;
+ unsigned int backlog = cl->qdisc->qstats.backlog;
qdisc_reset(cl->qdisc);
- qdisc_tree_decrease_qlen(cl->qdisc, len);
+ qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
}
static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
@@ -619,11 +620,7 @@ static int qfq_graft_class(struct Qdisc *sch, unsigned long arg,
new = &noop_qdisc;
}
- sch_tree_lock(sch);
- qfq_purge_queue(cl);
- *old = cl->qdisc;
- cl->qdisc = new;
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &cl->qdisc);
return 0;
}
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 6c0534cc7758..8c0508c0e287 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -210,7 +210,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
q->flags = ctl->flags;
q->limit = ctl->limit;
if (child) {
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
}
@@ -313,12 +314,7 @@ static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index 5819dd82630d..e1d634e3c255 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -518,7 +518,8 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
@@ -614,12 +615,7 @@ static int sfb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index b877140beda5..4417fb25166f 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -369,7 +369,7 @@ static int
sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct sfq_sched_data *q = qdisc_priv(sch);
- unsigned int hash;
+ unsigned int hash, dropped;
sfq_index x, qlen;
struct sfq_slot *slot;
int uninitialized_var(ret);
@@ -484,7 +484,7 @@ enqueue:
return NET_XMIT_SUCCESS;
qlen = slot->qlen;
- sfq_drop(sch);
+ dropped = sfq_drop(sch);
/* Return Congestion Notification only if we dropped a packet
* from this flow.
*/
@@ -492,7 +492,7 @@ enqueue:
return NET_XMIT_CN;
/* As we dropped a packet, better let upper stack know this */
- qdisc_tree_decrease_qlen(sch, 1);
+ qdisc_tree_reduce_backlog(sch, 1, dropped);
return NET_XMIT_SUCCESS;
}
@@ -560,6 +560,7 @@ static void sfq_rehash(struct Qdisc *sch)
struct sfq_slot *slot;
struct sk_buff_head list;
int dropped = 0;
+ unsigned int drop_len = 0;
__skb_queue_head_init(&list);
@@ -588,6 +589,7 @@ static void sfq_rehash(struct Qdisc *sch)
if (x >= SFQ_MAX_FLOWS) {
drop:
qdisc_qstats_backlog_dec(sch, skb);
+ drop_len += qdisc_pkt_len(skb);
kfree_skb(skb);
dropped++;
continue;
@@ -617,7 +619,7 @@ drop:
}
}
sch->q.qlen -= dropped;
- qdisc_tree_decrease_qlen(sch, dropped);
+ qdisc_tree_reduce_backlog(sch, dropped, drop_len);
}
static void sfq_perturbation(unsigned long arg)
@@ -641,7 +643,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
struct sfq_sched_data *q = qdisc_priv(sch);
struct tc_sfq_qopt *ctl = nla_data(opt);
struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
- unsigned int qlen;
+ unsigned int qlen, dropped = 0;
struct red_parms *p = NULL;
if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
@@ -690,8 +692,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
qlen = sch->q.qlen;
while (sch->q.qlen > q->limit)
- sfq_drop(sch);
- qdisc_tree_decrease_qlen(sch, qlen - sch->q.qlen);
+ dropped += sfq_drop(sch);
+ qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped);
del_timer(&q->perturb_timer);
if (q->perturb_period) {
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index a4afde14e865..c2fbde742f37 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -160,6 +160,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
struct tbf_sched_data *q = qdisc_priv(sch);
struct sk_buff *segs, *nskb;
netdev_features_t features = netif_skb_features(skb);
+ unsigned int len = 0, prev_len = qdisc_pkt_len(skb);
int ret, nb;
segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
@@ -172,6 +173,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
nskb = segs->next;
segs->next = NULL;
qdisc_skb_cb(segs)->pkt_len = segs->len;
+ len += segs->len;
ret = qdisc_enqueue(segs, q->qdisc);
if (ret != NET_XMIT_SUCCESS) {
if (net_xmit_drop_count(ret))
@@ -183,7 +185,7 @@ static int tbf_segment(struct sk_buff *skb, struct Qdisc *sch)
}
sch->q.qlen += nb;
if (nb > 1)
- qdisc_tree_decrease_qlen(sch, 1 - nb);
+ qdisc_tree_reduce_backlog(sch, 1 - nb, prev_len - len);
consume_skb(skb);
return nb > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
}
@@ -399,7 +401,8 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
sch_tree_lock(sch);
if (child) {
- qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
+ qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
+ q->qdisc->qstats.backlog);
qdisc_destroy(q->qdisc);
q->qdisc = child;
}
@@ -502,13 +505,7 @@ static int tbf_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
if (new == NULL)
new = &noop_qdisc;
- sch_tree_lock(sch);
- *old = q->qdisc;
- q->qdisc = new;
- qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
- qdisc_reset(*old);
- sch_tree_unlock(sch);
-
+ *old = qdisc_replace(sch, new, &q->qdisc);
return 0;
}
diff --git a/net/sctp/auth.c b/net/sctp/auth.c
index fb7976aee61c..603c3bbc5923 100644
--- a/net/sctp/auth.c
+++ b/net/sctp/auth.c
@@ -800,8 +800,8 @@ int sctp_auth_ep_set_hmacs(struct sctp_endpoint *ep,
if (!has_sha1)
return -EINVAL;
- memcpy(ep->auth_hmacs_list->hmac_ids, &hmacs->shmac_idents[0],
- hmacs->shmac_num_idents * sizeof(__u16));
+ for (i = 0; i < hmacs->shmac_num_idents; i++)
+ ep->auth_hmacs_list->hmac_ids[i] = htons(hmacs->shmac_idents[i]);
ep->auth_hmacs_list->param_hdr.length = htons(sizeof(sctp_paramhdr_t) +
hmacs->shmac_num_idents * sizeof(__u16));
return 0;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0e4198ee2370..18361cbfc882 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -519,6 +519,8 @@ static int sctp_v6_cmp_addr(const union sctp_addr *addr1,
}
return 0;
}
+ if (addr1->v6.sin6_port != addr2->v6.sin6_port)
+ return 0;
if (!ipv6_addr_equal(&addr1->v6.sin6_addr, &addr2->v6.sin6_addr))
return 0;
/* If this is a linklocal address, compare the scope_id. */
@@ -634,6 +636,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
struct sock *newsk;
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
struct sctp6_sock *newsctp6sk;
+ struct ipv6_txoptions *opt;
newsk = sk_alloc(sock_net(sk), PF_INET6, GFP_KERNEL, sk->sk_prot);
if (!newsk)
@@ -653,6 +656,13 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt)
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ rcu_read_unlock();
+
/* Initialize sk's sport, dport, rcv_saddr and daddr for getsockname()
* and getpeername().
*/
diff --git a/net/sctp/output.c b/net/sctp/output.c
index fc5e45b8a832..abe7c2db2412 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -599,7 +599,9 @@ out:
return err;
no_route:
kfree_skb(nskb);
- IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
+
+ if (asoc)
+ IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES);
/* FIXME: Returning the 'err' will effect all the associations
* associated with a socket, although only one of the paths of the
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 8f34b27d5775..143c4ebd55fa 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1166,7 +1166,7 @@ static void sctp_v4_del_protocol(void)
unregister_inetaddr_notifier(&sctp_inetaddr_notifier);
}
-static int __net_init sctp_net_init(struct net *net)
+static int __net_init sctp_defaults_init(struct net *net)
{
int status;
@@ -1259,12 +1259,6 @@ static int __net_init sctp_net_init(struct net *net)
sctp_dbg_objcnt_init(net);
- /* Initialize the control inode/socket for handling OOTB packets. */
- if ((status = sctp_ctl_sock_init(net))) {
- pr_err("Failed to initialize the SCTP control sock\n");
- goto err_ctl_sock_init;
- }
-
/* Initialize the local address list. */
INIT_LIST_HEAD(&net->sctp.local_addr_list);
spin_lock_init(&net->sctp.local_addr_lock);
@@ -1280,9 +1274,6 @@ static int __net_init sctp_net_init(struct net *net)
return 0;
-err_ctl_sock_init:
- sctp_dbg_objcnt_exit(net);
- sctp_proc_exit(net);
err_init_proc:
cleanup_sctp_mibs(net);
err_init_mibs:
@@ -1291,15 +1282,12 @@ err_sysctl_register:
return status;
}
-static void __net_exit sctp_net_exit(struct net *net)
+static void __net_exit sctp_defaults_exit(struct net *net)
{
/* Free the local address list */
sctp_free_addr_wq(net);
sctp_free_local_addr_list(net);
- /* Free the control endpoint. */
- inet_ctl_sock_destroy(net->sctp.ctl_sock);
-
sctp_dbg_objcnt_exit(net);
sctp_proc_exit(net);
@@ -1307,9 +1295,32 @@ static void __net_exit sctp_net_exit(struct net *net)
sctp_sysctl_net_unregister(net);
}
-static struct pernet_operations sctp_net_ops = {
- .init = sctp_net_init,
- .exit = sctp_net_exit,
+static struct pernet_operations sctp_defaults_ops = {
+ .init = sctp_defaults_init,
+ .exit = sctp_defaults_exit,
+};
+
+static int __net_init sctp_ctrlsock_init(struct net *net)
+{
+ int status;
+
+ /* Initialize the control inode/socket for handling OOTB packets. */
+ status = sctp_ctl_sock_init(net);
+ if (status)
+ pr_err("Failed to initialize the SCTP control sock\n");
+
+ return status;
+}
+
+static void __net_init sctp_ctrlsock_exit(struct net *net)
+{
+ /* Free the control endpoint. */
+ inet_ctl_sock_destroy(net->sctp.ctl_sock);
+}
+
+static struct pernet_operations sctp_ctrlsock_ops = {
+ .init = sctp_ctrlsock_init,
+ .exit = sctp_ctrlsock_exit,
};
/* Initialize the universe into something sensible. */
@@ -1443,8 +1454,11 @@ static __init int sctp_init(void)
sctp_v4_pf_init();
sctp_v6_pf_init();
- status = sctp_v4_protosw_init();
+ status = register_pernet_subsys(&sctp_defaults_ops);
+ if (status)
+ goto err_register_defaults;
+ status = sctp_v4_protosw_init();
if (status)
goto err_protosw_init;
@@ -1452,9 +1466,9 @@ static __init int sctp_init(void)
if (status)
goto err_v6_protosw_init;
- status = register_pernet_subsys(&sctp_net_ops);
+ status = register_pernet_subsys(&sctp_ctrlsock_ops);
if (status)
- goto err_register_pernet_subsys;
+ goto err_register_ctrlsock;
status = sctp_v4_add_protocol();
if (status)
@@ -1470,12 +1484,14 @@ out:
err_v6_add_protocol:
sctp_v4_del_protocol();
err_add_protocol:
- unregister_pernet_subsys(&sctp_net_ops);
-err_register_pernet_subsys:
+ unregister_pernet_subsys(&sctp_ctrlsock_ops);
+err_register_ctrlsock:
sctp_v6_protosw_exit();
err_v6_protosw_init:
sctp_v4_protosw_exit();
err_protosw_init:
+ unregister_pernet_subsys(&sctp_defaults_ops);
+err_register_defaults:
sctp_v4_pf_exit();
sctp_v6_pf_exit();
sctp_sysctl_unregister();
@@ -1508,12 +1524,14 @@ static __exit void sctp_exit(void)
sctp_v6_del_protocol();
sctp_v4_del_protocol();
- unregister_pernet_subsys(&sctp_net_ops);
+ unregister_pernet_subsys(&sctp_ctrlsock_ops);
/* Free protosw registrations */
sctp_v6_protosw_exit();
sctp_v4_protosw_exit();
+ unregister_pernet_subsys(&sctp_defaults_ops);
+
/* Unregister with socket layer. */
sctp_v6_pf_exit();
sctp_v4_pf_exit();
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 371a152d9759..642c11570285 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1652,7 +1652,7 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep,
/* Set an expiration time for the cookie. */
cookie->c.expiration = ktime_add(asoc->cookie_life,
- ktime_get());
+ ktime_get_real());
/* Copy the peer's init packet. */
memcpy(&cookie->c.peer_init[0], init_chunk->chunk_hdr,
@@ -1780,7 +1780,7 @@ no_hmac:
if (sock_flag(ep->base.sk, SOCK_TIMESTAMP))
kt = skb_get_ktime(skb);
else
- kt = ktime_get();
+ kt = ktime_get_real();
if (!asoc && ktime_before(bear_cookie->expiration, kt)) {
/*
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 3ee27b7704ff..e6bb98e583fb 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -4829,7 +4829,8 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort(
retval = SCTP_DISPOSITION_CONSUME;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
/* Even if we can't send the ABORT due to low memory delete the
* TCB. This is a departure from our typical NOMEM handling.
@@ -4966,7 +4967,8 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort(
SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT));
retval = SCTP_DISPOSITION_CONSUME;
- sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ if (abort)
+ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_CLOSED));
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 634a2abb5f3a..72da7d58ccca 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1518,8 +1518,7 @@ static void sctp_close(struct sock *sk, long timeout)
struct sctp_chunk *chunk;
chunk = sctp_make_abort_user(asoc, NULL, 0);
- if (chunk)
- sctp_primitive_ABORT(net, asoc, chunk);
+ sctp_primitive_ABORT(net, asoc, chunk);
} else
sctp_primitive_SHUTDOWN(net, asoc, NULL);
}
@@ -1533,8 +1532,10 @@ static void sctp_close(struct sock *sk, long timeout)
/* Supposedly, no process has access to the socket, but
* the net layers still may.
+ * Also, sctp_destroy_sock() needs to be called with addr_wq_lock
+ * held and that should be grabbed before socket lock.
*/
- local_bh_disable();
+ spin_lock_bh(&net->sctp.addr_wq_lock);
bh_lock_sock(sk);
/* Hold the sock, since sk_common_release() will put sock_put()
@@ -1544,7 +1545,7 @@ static void sctp_close(struct sock *sk, long timeout)
sk_common_release(sk);
bh_unlock_sock(sk);
- local_bh_enable();
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
sock_put(sk);
@@ -3581,6 +3582,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
if ((val && sp->do_auto_asconf) || (!val && !sp->do_auto_asconf))
return 0;
+ spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock);
if (val == 0 && sp->do_auto_asconf) {
list_del(&sp->auto_asconf_list);
sp->do_auto_asconf = 0;
@@ -3589,6 +3591,7 @@ static int sctp_setsockopt_auto_asconf(struct sock *sk, char __user *optval,
&sock_net(sk)->sctp.auto_asconf_splist);
sp->do_auto_asconf = 1;
}
+ spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock);
return 0;
}
@@ -4122,18 +4125,28 @@ static int sctp_init_sock(struct sock *sk)
local_bh_disable();
percpu_counter_inc(&sctp_sockets_allocated);
sock_prot_inuse_add(net, sk->sk_prot, 1);
+
+ /* Nothing can fail after this block, otherwise
+ * sctp_destroy_sock() will be called without addr_wq_lock held
+ */
if (net->sctp.default_auto_asconf) {
+ spin_lock(&sock_net(sk)->sctp.addr_wq_lock);
list_add_tail(&sp->auto_asconf_list,
&net->sctp.auto_asconf_splist);
sp->do_auto_asconf = 1;
- } else
+ spin_unlock(&sock_net(sk)->sctp.addr_wq_lock);
+ } else {
sp->do_auto_asconf = 0;
+ }
+
local_bh_enable();
return 0;
}
-/* Cleanup any SCTP per socket resources. */
+/* Cleanup any SCTP per socket resources. Must be called with
+ * sock_net(sk)->sctp.addr_wq_lock held if sp->do_auto_asconf is true
+ */
static void sctp_destroy_sock(struct sock *sk)
{
struct sctp_sock *sp;
@@ -7167,6 +7180,7 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newsk->sk_type = sk->sk_type;
newsk->sk_bound_dev_if = sk->sk_bound_dev_if;
newsk->sk_flags = sk->sk_flags;
+ newsk->sk_tsflags = sk->sk_tsflags;
newsk->sk_no_check_tx = sk->sk_no_check_tx;
newsk->sk_no_check_rx = sk->sk_no_check_rx;
newsk->sk_reuse = sk->sk_reuse;
@@ -7199,6 +7213,22 @@ void sctp_copy_sock(struct sock *newsk, struct sock *sk,
newinet->mc_ttl = 1;
newinet->mc_index = 0;
newinet->mc_list = NULL;
+
+ if (newsk->sk_flags & SK_FLAGS_TIMESTAMP)
+ net_enable_timestamp();
+}
+
+static inline void sctp_copy_descendant(struct sock *sk_to,
+ const struct sock *sk_from)
+{
+ int ancestor_size = sizeof(struct inet_sock) +
+ sizeof(struct sctp_sock) -
+ offsetof(struct sctp_sock, auto_asconf_list);
+
+ if (sk_from->sk_family == PF_INET6)
+ ancestor_size += sizeof(struct ipv6_pinfo);
+
+ __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
}
/* Populate the fields of the newsk from the oldsk and migrate the assoc
@@ -7215,7 +7245,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
struct sk_buff *skb, *tmp;
struct sctp_ulpevent *event;
struct sctp_bind_hashbucket *head;
- struct list_head tmplist;
/* Migrate socket buffer sizes and all the socket level options to the
* new socket.
@@ -7223,12 +7252,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
newsk->sk_sndbuf = oldsk->sk_sndbuf;
newsk->sk_rcvbuf = oldsk->sk_rcvbuf;
/* Brute force copy old sctp opt. */
- if (oldsp->do_auto_asconf) {
- memcpy(&tmplist, &newsp->auto_asconf_list, sizeof(tmplist));
- inet_sk_copy_descendant(newsk, oldsk);
- memcpy(&newsp->auto_asconf_list, &tmplist, sizeof(tmplist));
- } else
- inet_sk_copy_descendant(newsk, oldsk);
+ sctp_copy_descendant(newsk, oldsk);
/* Restore the ep value that was overwritten with the above structure
* copy.
@@ -7372,6 +7396,13 @@ struct proto sctp_prot = {
#if IS_ENABLED(CONFIG_IPV6)
+#include <net/transp_v6.h>
+static void sctp_v6_destroy_sock(struct sock *sk)
+{
+ sctp_destroy_sock(sk);
+ inet6_destroy_sock(sk);
+}
+
struct proto sctpv6_prot = {
.name = "SCTPv6",
.owner = THIS_MODULE,
@@ -7381,7 +7412,7 @@ struct proto sctpv6_prot = {
.accept = sctp_accept,
.ioctl = sctp_ioctl,
.init = sctp_init_sock,
- .destroy = sctp_destroy_sock,
+ .destroy = sctp_v6_destroy_sock,
.shutdown = sctp_shutdown,
.setsockopt = sctp_setsockopt,
.getsockopt = sctp_getsockopt,
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 2e9ada10fd84..5b6e7571081c 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -324,7 +324,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
struct ctl_table tbl;
bool changed = false;
char *none = "none";
- char tmp[8];
+ char tmp[8] = {0};
int ret;
memset(&tbl, 0, sizeof(struct ctl_table));
diff --git a/net/socket.c b/net/socket.c
index cf9ebf10c841..7f61789c78ff 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2017,14 +2017,12 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
int err, ctl_len, total_len;
err = -EFAULT;
- if (MSG_CMSG_COMPAT & flags) {
- if (get_compat_msghdr(msg_sys, msg_compat))
- return -EFAULT;
- } else {
+ if (MSG_CMSG_COMPAT & flags)
+ err = get_compat_msghdr(msg_sys, msg_compat);
+ else
err = copy_msghdr_from_user(msg_sys, msg);
- if (err)
- return err;
- }
+ if (err)
+ return err;
if (msg_sys->msg_iovlen > UIO_FASTIOV) {
err = -EMSGSIZE;
@@ -2229,14 +2227,12 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
struct sockaddr __user *uaddr;
int __user *uaddr_len;
- if (MSG_CMSG_COMPAT & flags) {
- if (get_compat_msghdr(msg_sys, msg_compat))
- return -EFAULT;
- } else {
+ if (MSG_CMSG_COMPAT & flags)
+ err = get_compat_msghdr(msg_sys, msg_compat);
+ else
err = copy_msghdr_from_user(msg_sys, msg);
- if (err)
- return err;
- }
+ if (err)
+ return err;
if (msg_sys->msg_iovlen > UIO_FASTIOV) {
err = -EMSGSIZE;
@@ -2414,31 +2410,31 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
break;
}
-out_put:
- fput_light(sock->file, fput_needed);
-
if (err == 0)
- return datagrams;
+ goto out_put;
- if (datagrams != 0) {
+ if (datagrams == 0) {
+ datagrams = err;
+ goto out_put;
+ }
+
+ /*
+ * We may return less entries than requested (vlen) if the
+ * sock is non block and there aren't enough datagrams...
+ */
+ if (err != -EAGAIN) {
/*
- * We may return less entries than requested (vlen) if the
- * sock is non block and there aren't enough datagrams...
+ * ... or if recvmsg returns an error after we
+ * received some datagrams, where we record the
+ * error to return on the next call or if the
+ * app asks about it using getsockopt(SO_ERROR).
*/
- if (err != -EAGAIN) {
- /*
- * ... or if recvmsg returns an error after we
- * received some datagrams, where we record the
- * error to return on the next call or if the
- * app asks about it using getsockopt(SO_ERROR).
- */
- sock->sk->sk_err = -err;
- }
-
- return datagrams;
+ sock->sk->sk_err = -err;
}
+out_put:
+ fput_light(sock->file, fput_needed);
- return err;
+ return datagrams;
}
SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg,
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index c548ab213f76..d08f75972c57 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -855,8 +855,8 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
goto out;
if (svc_getnl(&buf->head[0]) != seq)
goto out;
- /* trim off the mic at the end before returning */
- xdr_buf_trim(buf, mic.len + 4);
+ /* trim off the mic and padding at the end before returning */
+ xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
stat = 0;
out:
kfree(mic.data);
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 1bb4d26fbd6b..ca9ff71134f9 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -60,7 +60,7 @@ static void xprt_free_allocation(struct rpc_rqst *req)
dprintk("RPC: free allocations for req= %p\n", req);
WARN_ON_ONCE(test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state));
- xbufp = &req->rq_private_buf;
+ xbufp = &req->rq_rcv_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
xbufp = &req->rq_snd_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 48f14003af10..11e7b55f04e2 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -1187,14 +1187,14 @@ int sunrpc_cache_pipe_upcall(struct cache_detail *detail, struct cache_head *h)
}
crq->q.reader = 0;
- crq->item = cache_get(h);
crq->buf = buf;
crq->len = 0;
crq->readers = 0;
spin_lock(&queue_lock);
- if (test_bit(CACHE_PENDING, &h->flags))
+ if (test_bit(CACHE_PENDING, &h->flags)) {
+ crq->item = cache_get(h);
list_add_tail(&crq->q.list, &detail->queue);
- else
+ } else
/* Lost a race, no longer PENDING, so don't enqueue */
ret = -EAGAIN;
spin_unlock(&queue_lock);
@@ -1230,7 +1230,7 @@ int qword_get(char **bpp, char *dest, int bufsize)
if (bp[0] == '\\' && bp[1] == 'x') {
/* HEX STRING */
bp += 2;
- while (len < bufsize) {
+ while (len < bufsize - 1) {
int h, l;
h = hex_to_bin(bp[0]);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index ae46f0198608..7b738b53f061 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -439,7 +439,7 @@ out_no_rpciod:
return ERR_PTR(err);
}
-struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
+static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
struct rpc_xprt *xprt)
{
struct rpc_clnt *clnt = NULL;
@@ -471,7 +471,6 @@ struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
return clnt;
}
-EXPORT_SYMBOL_GPL(rpc_create_xprt);
/**
* rpc_create - create an RPC client and transport with one call
@@ -497,6 +496,15 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
};
char servername[48];
+ if (args->bc_xprt) {
+ WARN_ON(args->protocol != XPRT_TRANSPORT_BC_TCP);
+ xprt = args->bc_xprt->xpt_bc_xprt;
+ if (xprt) {
+ xprt_get(xprt);
+ return rpc_create_xprt(args, xprt);
+ }
+ }
+
if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 9f1b50689c0f..03252a652e4b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -372,6 +372,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
int byte_count)
{
struct ib_send_wr send_wr;
+ u32 xdr_off;
int sge_no;
int sge_bytes;
int page_no;
@@ -406,8 +407,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->direction = DMA_TO_DEVICE;
/* Map the payload indicated by 'byte_count' */
+ xdr_off = 0;
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
- int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
ctxt->sge[sge_no].addr =
@@ -443,6 +444,14 @@ static int send_reply(struct svcxprt_rdma *rdma,
rqstp->rq_next_page = rqstp->rq_respages + 1;
BUG_ON(sge_no > rdma->sc_max_sge);
+
+ /* The loop above bumps sc_dma_used for each sge. The
+ * xdr_buf.tail gets a separate sge, but resides in the
+ * same page as xdr_buf.head. Don't count it twice.
+ */
+ if (sge_no > ctxt->count)
+ atomic_dec(&rdma->sc_dma_used);
+
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
send_wr.wr_id = (unsigned long)ctxt;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 61c41298b4ea..6e5d5034b0f8 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -511,8 +511,8 @@ int
rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
{
int rc, mem_priv;
- struct ib_device_attr devattr;
struct rpcrdma_ia *ia = &xprt->rx_ia;
+ struct ib_device_attr *devattr = &ia->ri_devattr;
ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
if (IS_ERR(ia->ri_id)) {
@@ -528,28 +528,24 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
goto out2;
}
- /*
- * Query the device to determine if the requested memory
- * registration strategy is supported. If it isn't, set the
- * strategy to a globally supported model.
- */
- rc = ib_query_device(ia->ri_id->device, &devattr);
+ rc = ib_query_device(ia->ri_id->device, devattr);
if (rc) {
dprintk("RPC: %s: ib_query_device failed %d\n",
__func__, rc);
goto out2;
}
- if (devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
+ if (devattr->device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) {
ia->ri_have_dma_lkey = 1;
ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
}
if (memreg == RPCRDMA_FRMR) {
/* Requires both frmr reg and local dma lkey */
- if ((devattr.device_cap_flags &
+ if (((devattr->device_cap_flags &
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
- (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
+ (IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) ||
+ (devattr->max_fast_reg_page_list_len == 0)) {
dprintk("RPC: %s: FRMR registration "
"not supported by HCA\n", __func__);
memreg = RPCRDMA_MTHCAFMR;
@@ -557,7 +553,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
/* Mind the ia limit on FRMR page list depth */
ia->ri_max_frmr_depth = min_t(unsigned int,
RPCRDMA_MAX_DATA_SEGS,
- devattr.max_fast_reg_page_list_len);
+ devattr->max_fast_reg_page_list_len);
}
}
if (memreg == RPCRDMA_MTHCAFMR) {
@@ -655,20 +651,13 @@ int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
- struct ib_device_attr devattr;
+ struct ib_device_attr *devattr = &ia->ri_devattr;
struct ib_cq *sendcq, *recvcq;
int rc, err;
- rc = ib_query_device(ia->ri_id->device, &devattr);
- if (rc) {
- dprintk("RPC: %s: ib_query_device failed %d\n",
- __func__, rc);
- return rc;
- }
-
/* check provider's send/recv wr limits */
- if (cdata->max_requests > devattr.max_qp_wr)
- cdata->max_requests = devattr.max_qp_wr;
+ if (cdata->max_requests > devattr->max_qp_wr)
+ cdata->max_requests = devattr->max_qp_wr;
ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
ep->rep_attr.qp_context = ep;
@@ -703,8 +692,8 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
}
ep->rep_attr.cap.max_send_wr *= depth;
- if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
- cdata->max_requests = devattr.max_qp_wr / depth;
+ if (ep->rep_attr.cap.max_send_wr > devattr->max_qp_wr) {
+ cdata->max_requests = devattr->max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests *
@@ -786,10 +775,11 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ if (devattr->max_qp_rd_atom > 32) /* arbitrary but <= 255 */
ep->rep_remote_cma.responder_resources = 32;
else
- ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
+ ep->rep_remote_cma.responder_resources =
+ devattr->max_qp_rd_atom;
ep->rep_remote_cma.retry_count = 7;
ep->rep_remote_cma.flow_control = 0;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ac7fc9a31342..11f0e06f6623 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -70,6 +70,7 @@ struct rpcrdma_ia {
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
unsigned int ri_max_frmr_depth;
+ struct ib_device_attr ri_devattr;
};
/*
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 51bddc236a15..8224016ebd70 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1996,6 +1996,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1);
if (res)
goto exit;
+ security_sk_clone(sock->sk, new_sock->sk);
new_sk = new_sock->sk;
new_tsock = tipc_sk(new_sk);
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e96884380732..287087b10a7e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -305,7 +305,7 @@ static struct sock *unix_find_socket_byinode(struct inode *i)
&unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
struct dentry *dentry = unix_sk(s)->path.dentry;
- if (dentry && dentry->d_inode == i) {
+ if (dentry && d_backing_inode(dentry) == i) {
sock_hold(s);
goto found;
}
@@ -316,6 +316,118 @@ found:
return s;
}
+/* Support code for asymmetrically connected dgram sockets
+ *
+ * If a datagram socket is connected to a socket not itself connected
+ * to the first socket (eg, /dev/log), clients may only enqueue more
+ * messages if the present receive queue of the server socket is not
+ * "too large". This means there's a second writeability condition
+ * poll and sendmsg need to test. The dgram recv code will do a wake
+ * up on the peer_wait wait queue of a socket upon reception of a
+ * datagram which needs to be propagated to sleeping would-be writers
+ * since these might not have sent anything so far. This can't be
+ * accomplished via poll_wait because the lifetime of the server
+ * socket might be less than that of its clients if these break their
+ * association with it or if the server socket is closed while clients
+ * are still connected to it and there's no way to inform "a polling
+ * implementation" that it should let go of a certain wait queue
+ *
+ * In order to propagate a wake up, a wait_queue_t of the client
+ * socket is enqueued on the peer_wait queue of the server socket
+ * whose wake function does a wake_up on the ordinary client socket
+ * wait queue. This connection is established whenever a write (or
+ * poll for write) hit the flow control condition and broken when the
+ * association to the server socket is dissolved or after a wake up
+ * was relayed.
+ */
+
+static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
+ void *key)
+{
+ struct unix_sock *u;
+ wait_queue_head_t *u_sleep;
+
+ u = container_of(q, struct unix_sock, peer_wake);
+
+ __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
+ q);
+ u->peer_wake.private = NULL;
+
+ /* relaying can only happen while the wq still exists */
+ u_sleep = sk_sleep(&u->sk);
+ if (u_sleep)
+ wake_up_interruptible_poll(u_sleep, key);
+
+ return 0;
+}
+
+static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
+{
+ struct unix_sock *u, *u_other;
+ int rc;
+
+ u = unix_sk(sk);
+ u_other = unix_sk(other);
+ rc = 0;
+ spin_lock(&u_other->peer_wait.lock);
+
+ if (!u->peer_wake.private) {
+ u->peer_wake.private = other;
+ __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
+
+ rc = 1;
+ }
+
+ spin_unlock(&u_other->peer_wait.lock);
+ return rc;
+}
+
+static void unix_dgram_peer_wake_disconnect(struct sock *sk,
+ struct sock *other)
+{
+ struct unix_sock *u, *u_other;
+
+ u = unix_sk(sk);
+ u_other = unix_sk(other);
+ spin_lock(&u_other->peer_wait.lock);
+
+ if (u->peer_wake.private == other) {
+ __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
+ u->peer_wake.private = NULL;
+ }
+
+ spin_unlock(&u_other->peer_wait.lock);
+}
+
+static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
+ struct sock *other)
+{
+ unix_dgram_peer_wake_disconnect(sk, other);
+ wake_up_interruptible_poll(sk_sleep(sk),
+ POLLOUT |
+ POLLWRNORM |
+ POLLWRBAND);
+}
+
+/* preconditions:
+ * - unix_peer(sk) == other
+ * - association is stable
+ */
+static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
+{
+ int connected;
+
+ connected = unix_dgram_peer_wake_connect(sk, other);
+
+ if (unix_recvq_full(other))
+ return 1;
+
+ if (connected)
+ unix_dgram_peer_wake_disconnect(sk, other);
+
+ return 0;
+}
+
static inline int unix_writable(struct sock *sk)
{
return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
@@ -420,6 +532,8 @@ static void unix_release_sock(struct sock *sk, int embrion)
skpair->sk_state_change(skpair);
sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
}
+
+ unix_dgram_peer_wake_disconnect(sk, skpair);
sock_put(skpair); /* It may now die */
unix_peer(sk) = NULL;
}
@@ -653,6 +767,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock)
INIT_LIST_HEAD(&u->link);
mutex_init(&u->readlock); /* single task reading lock */
init_waitqueue_head(&u->peer_wait);
+ init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
unix_insert_socket(unix_sockets_unbound(sk), sk);
out:
if (sk == NULL)
@@ -783,7 +898,7 @@ static struct sock *unix_find_other(struct net *net,
err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
if (err)
goto fail;
- inode = path.dentry->d_inode;
+ inode = d_backing_inode(path.dentry);
err = inode_permission(inode, MAY_WRITE);
if (err)
goto put_fail;
@@ -825,32 +940,20 @@ fail:
return NULL;
}
-static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
+static int unix_mknod(struct dentry *dentry, struct path *path, umode_t mode,
+ struct path *res)
{
- struct dentry *dentry;
- struct path path;
- int err = 0;
- /*
- * Get the parent directory, calculate the hash for last
- * component.
- */
- dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
- err = PTR_ERR(dentry);
- if (IS_ERR(dentry))
- return err;
+ int err;
- /*
- * All right, let's create it.
- */
- err = security_path_mknod(&path, dentry, mode, 0);
+ err = security_path_mknod(path, dentry, mode, 0);
if (!err) {
- err = vfs_mknod(path.dentry->d_inode, dentry, mode, 0);
+ err = vfs_mknod(d_inode(path->dentry), dentry, mode, 0);
if (!err) {
- res->mnt = mntget(path.mnt);
+ res->mnt = mntget(path->mnt);
res->dentry = dget(dentry);
}
}
- done_path_create(&path, dentry);
+
return err;
}
@@ -861,10 +964,12 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
char *sun_path = sunaddr->sun_path;
- int err;
+ int err, name_err;
unsigned int hash;
struct unix_address *addr;
struct hlist_head *list;
+ struct path path;
+ struct dentry *dentry;
err = -EINVAL;
if (sunaddr->sun_family != AF_UNIX)
@@ -880,14 +985,34 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
+ name_err = 0;
+ dentry = NULL;
+ if (sun_path[0]) {
+ /* Get the parent directory, calculate the hash for last
+ * component.
+ */
+ dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
+
+ if (IS_ERR(dentry)) {
+ /* delay report until after 'already bound' check */
+ name_err = PTR_ERR(dentry);
+ dentry = NULL;
+ }
+ }
+
err = mutex_lock_interruptible(&u->readlock);
if (err)
- goto out;
+ goto out_path;
err = -EINVAL;
if (u->addr)
goto out_up;
+ if (name_err) {
+ err = name_err == -EEXIST ? -EADDRINUSE : name_err;
+ goto out_up;
+ }
+
err = -ENOMEM;
addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
if (!addr)
@@ -898,11 +1023,11 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
addr->hash = hash ^ sk->sk_type;
atomic_set(&addr->refcnt, 1);
- if (sun_path[0]) {
- struct path path;
+ if (dentry) {
+ struct path u_path;
umode_t mode = S_IFSOCK |
(SOCK_INODE(sock)->i_mode & ~current_umask());
- err = unix_mknod(sun_path, mode, &path);
+ err = unix_mknod(dentry, &path, mode, &u_path);
if (err) {
if (err == -EEXIST)
err = -EADDRINUSE;
@@ -910,9 +1035,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out_up;
}
addr->hash = UNIX_HASH_SIZE;
- hash = path.dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1);
+ hash = d_backing_inode(dentry)->i_ino & (UNIX_HASH_SIZE - 1);
spin_lock(&unix_table_lock);
- u->path = path;
+ u->path = u_path;
list = &unix_socket_table[hash];
} else {
spin_lock(&unix_table_lock);
@@ -935,6 +1060,10 @@ out_unlock:
spin_unlock(&unix_table_lock);
out_up:
mutex_unlock(&u->readlock);
+out_path:
+ if (dentry)
+ done_path_create(&path, dentry);
+
out:
return err;
}
@@ -1020,6 +1149,8 @@ restart:
if (unix_peer(sk)) {
struct sock *old_peer = unix_peer(sk);
unix_peer(sk) = other;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
+
unix_state_double_unlock(sk, other);
if (other != old_peer)
@@ -1369,6 +1500,21 @@ static void unix_destruct_scm(struct sk_buff *skb)
sock_wfree(skb);
}
+/*
+ * The "user->unix_inflight" variable is protected by the garbage
+ * collection lock, and we just read it locklessly here. If you go
+ * over the limit, there might be a tiny race in actually noticing
+ * it across threads. Tough.
+ */
+static inline bool too_many_unix_fds(struct task_struct *p)
+{
+ struct user_struct *user = current_user();
+
+ if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
+ return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
+ return false;
+}
+
#define MAX_RECURSION_LEVEL 4
static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1377,6 +1523,9 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
unsigned char max_level = 0;
int unix_sock_count = 0;
+ if (too_many_unix_fds(current))
+ return -ETOOMANYREFS;
+
for (i = scm->fp->count - 1; i >= 0; i--) {
struct sock *sk = unix_get_socket(scm->fp->fp[i]);
@@ -1398,10 +1547,8 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
if (!UNIXCB(skb).fp)
return -ENOMEM;
- if (unix_sock_count) {
- for (i = scm->fp->count - 1; i >= 0; i--)
- unix_inflight(scm->fp->fp[i]);
- }
+ for (i = scm->fp->count - 1; i >= 0; i--)
+ unix_inflight(scm->fp->fp[i]);
return max_level;
}
@@ -1459,6 +1606,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
struct scm_cookie tmp_scm;
int max_level;
int data_len = 0;
+ int sk_locked;
if (NULL == siocb->scm)
siocb->scm = &tmp_scm;
@@ -1540,12 +1688,14 @@ restart:
goto out_free;
}
+ sk_locked = 0;
unix_state_lock(other);
+restart_locked:
err = -EPERM;
if (!unix_may_send(sk, other))
goto out_unlock;
- if (sock_flag(other, SOCK_DEAD)) {
+ if (unlikely(sock_flag(other, SOCK_DEAD))) {
/*
* Check with 1003.1g - what should
* datagram error
@@ -1553,10 +1703,14 @@ restart:
unix_state_unlock(other);
sock_put(other);
+ if (!sk_locked)
+ unix_state_lock(sk);
+
err = 0;
- unix_state_lock(sk);
if (unix_peer(sk) == other) {
unix_peer(sk) = NULL;
+ unix_dgram_peer_wake_disconnect_wakeup(sk, other);
+
unix_state_unlock(sk);
unix_dgram_disconnected(sk, other);
@@ -1582,21 +1736,38 @@ restart:
goto out_unlock;
}
- if (unix_peer(other) != sk && unix_recvq_full(other)) {
- if (!timeo) {
- err = -EAGAIN;
- goto out_unlock;
+ if (unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
+ if (timeo) {
+ timeo = unix_wait_for_peer(other, timeo);
+
+ err = sock_intr_errno(timeo);
+ if (signal_pending(current))
+ goto out_free;
+
+ goto restart;
}
- timeo = unix_wait_for_peer(other, timeo);
+ if (!sk_locked) {
+ unix_state_unlock(other);
+ unix_state_double_lock(sk, other);
+ }
- err = sock_intr_errno(timeo);
- if (signal_pending(current))
- goto out_free;
+ if (unix_peer(sk) != other ||
+ unix_dgram_peer_wake_me(sk, other)) {
+ err = -EAGAIN;
+ sk_locked = 1;
+ goto out_unlock;
+ }
- goto restart;
+ if (!sk_locked) {
+ sk_locked = 1;
+ goto restart_locked;
+ }
}
+ if (unlikely(sk_locked))
+ unix_state_unlock(sk);
+
if (sock_flag(other, SOCK_RCVTSTAMP))
__net_timestamp(skb);
maybe_add_creds(skb, sock, other);
@@ -1610,6 +1781,8 @@ restart:
return len;
out_unlock:
+ if (sk_locked)
+ unix_state_unlock(sk);
unix_state_unlock(other);
out_free:
kfree_skb(skb);
@@ -1899,6 +2072,10 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
unix_state_unlock(sk);
timeo = freezable_schedule_timeout(timeo);
unix_state_lock(sk);
+
+ if (sock_flag(sk, SOCK_DEAD))
+ break;
+
clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
}
@@ -1949,20 +2126,22 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
memset(&tmp_scm, 0, sizeof(tmp_scm));
}
- err = mutex_lock_interruptible(&u->readlock);
- if (unlikely(err)) {
- /* recvmsg() in non blocking mode is supposed to return -EAGAIN
- * sk_rcvtimeo is not honored by mutex_lock_interruptible()
- */
- err = noblock ? -EAGAIN : -ERESTARTSYS;
- goto out;
- }
+ mutex_lock(&u->readlock);
+
+ if (flags & MSG_PEEK)
+ skip = sk_peek_offset(sk, flags);
+ else
+ skip = 0;
do {
int chunk;
struct sk_buff *skb, *last;
unix_state_lock(sk);
+ if (sock_flag(sk, SOCK_DEAD)) {
+ err = -ECONNRESET;
+ goto unlock;
+ }
last = skb = skb_peek(&sk->sk_receive_queue);
again:
if (skb == NULL) {
@@ -1988,19 +2167,18 @@ again:
timeo = unix_stream_data_wait(sk, timeo, last);
- if (signal_pending(current)
- || mutex_lock_interruptible(&u->readlock)) {
+ if (signal_pending(current)) {
err = sock_intr_errno(timeo);
goto out;
}
+ mutex_lock(&u->readlock);
continue;
unlock:
unix_state_unlock(sk);
break;
}
- skip = sk_peek_offset(sk, flags);
while (skip >= unix_skb_len(skb)) {
skip -= unix_skb_len(skb);
last = skb;
@@ -2064,6 +2242,16 @@ again:
sk_peek_offset_fwd(sk, chunk);
+ if (UNIXCB(skb).fp)
+ break;
+
+ skip = 0;
+ last = skb;
+ unix_state_lock(sk);
+ skb = skb_peek_next(skb, &sk->sk_receive_queue);
+ if (skb)
+ goto again;
+ unix_state_unlock(sk);
break;
}
} while (size);
@@ -2247,14 +2435,16 @@ static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
return mask;
writable = unix_writable(sk);
- other = unix_peer_get(sk);
- if (other) {
- if (unix_peer(other) != sk) {
- sock_poll_wait(file, &unix_sk(other)->peer_wait, wait);
- if (unix_recvq_full(other))
- writable = 0;
- }
- sock_put(other);
+ if (writable) {
+ unix_state_lock(sk);
+
+ other = unix_peer(sk);
+ if (other && unix_peer(other) != sk &&
+ unix_recvq_full(other) &&
+ unix_dgram_peer_wake_me(sk, other))
+ writable = 0;
+
+ unix_state_unlock(sk);
}
if (writable)
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 86fa0f3b2caf..9d4218fc0a61 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -25,7 +25,7 @@ static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb)
if (dentry) {
struct unix_diag_vfs uv = {
- .udiag_vfs_ino = dentry->d_inode->i_ino,
+ .udiag_vfs_ino = d_backing_inode(dentry)->i_ino,
.udiag_vfs_dev = dentry->d_sb->s_dev,
};
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 99f7012b23b9..6247787e4760 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -125,9 +125,12 @@ struct sock *unix_get_socket(struct file *filp)
void unix_inflight(struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+
+ spin_lock(&unix_gc_lock);
+
if (s) {
struct unix_sock *u = unix_sk(s);
- spin_lock(&unix_gc_lock);
+
if (atomic_long_inc_return(&u->inflight) == 1) {
BUG_ON(!list_empty(&u->link));
list_add_tail(&u->link, &gc_inflight_list);
@@ -135,22 +138,27 @@ void unix_inflight(struct file *fp)
BUG_ON(list_empty(&u->link));
}
unix_tot_inflight++;
- spin_unlock(&unix_gc_lock);
}
+ fp->f_cred->user->unix_inflight++;
+ spin_unlock(&unix_gc_lock);
}
void unix_notinflight(struct file *fp)
{
struct sock *s = unix_get_socket(fp);
+
+ spin_lock(&unix_gc_lock);
+
if (s) {
struct unix_sock *u = unix_sk(s);
- spin_lock(&unix_gc_lock);
+
BUG_ON(list_empty(&u->link));
if (atomic_long_dec_and_test(&u->inflight))
list_del_init(&u->link);
unix_tot_inflight--;
- spin_unlock(&unix_gc_lock);
}
+ fp->f_cred->user->unix_inflight--;
+ spin_unlock(&unix_gc_lock);
}
static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *),
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 85d232bed87d..e8d3313ea2c9 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1796,27 +1796,8 @@ vsock_stream_recvmsg(struct kiocb *kiocb,
else if (sk->sk_shutdown & RCV_SHUTDOWN)
err = 0;
- if (copied > 0) {
- /* We only do these additional bookkeeping/notification steps
- * if we actually copied something out of the queue pair
- * instead of just peeking ahead.
- */
-
- if (!(flags & MSG_PEEK)) {
- /* If the other side has shutdown for sending and there
- * is nothing more to read, then modify the socket
- * state.
- */
- if (vsk->peer_shutdown & SEND_SHUTDOWN) {
- if (vsock_stream_has_data(vsk) <= 0) {
- sk->sk_state = SS_UNCONNECTED;
- sock_set_flag(sk, SOCK_DONE);
- sk->sk_state_change(sk);
- }
- }
- }
+ if (copied > 0)
err = copied;
- }
out_wait:
finish_wait(sk_sleep(sk), &wait);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index f52a4cd7017c..6c23065490d3 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -1039,6 +1039,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
return NOTIFY_DONE;
}
+ wireless_nlevent_flush();
+
return NOTIFY_OK;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5fed79cfe45a..0286733f2e8c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11951,7 +11951,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
struct wireless_dev *wdev;
struct cfg80211_beacon_registration *reg, *tmp;
- if (state != NETLINK_URELEASE)
+ if (state != NETLINK_URELEASE || notify->protocol != NETLINK_GENERIC)
return NOTIFY_DONE;
rcu_read_lock();
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 0f47948c572f..d2c77d5707b2 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1333,6 +1333,8 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
memcpy(bssid, wdev->current_bss->pub.bssid, ETH_ALEN);
wdev_unlock(wdev);
+ memset(&sinfo, 0, sizeof(sinfo));
+
if (rdev_get_station(rdev, dev, bssid, &sinfo))
return NULL;
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index c8717c1d082e..c753211cb83f 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -342,6 +342,40 @@ static const int compat_event_type_size[] = {
/* IW event code */
+void wireless_nlevent_flush(void)
+{
+ struct sk_buff *skb;
+ struct net *net;
+
+ ASSERT_RTNL();
+
+ for_each_net(net) {
+ while ((skb = skb_dequeue(&net->wext_nlevents)))
+ rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
+ GFP_KERNEL);
+ }
+}
+EXPORT_SYMBOL_GPL(wireless_nlevent_flush);
+
+static int wext_netdev_notifier_call(struct notifier_block *nb,
+ unsigned long state, void *ptr)
+{
+ /*
+ * When a netdev changes state in any way, flush all pending messages
+ * to avoid them going out in a strange order, e.g. RTM_NEWLINK after
+ * RTM_DELLINK, or with IFF_UP after without IFF_UP during dev_close()
+ * or similar - all of which could otherwise happen due to delays from
+ * schedule_work().
+ */
+ wireless_nlevent_flush();
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block wext_netdev_notifier = {
+ .notifier_call = wext_netdev_notifier_call,
+};
+
static int __net_init wext_pernet_init(struct net *net)
{
skb_queue_head_init(&net->wext_nlevents);
@@ -360,7 +394,12 @@ static struct pernet_operations wext_pernet_ops = {
static int __init wireless_nlevent_init(void)
{
- return register_pernet_subsys(&wext_pernet_ops);
+ int err = register_pernet_subsys(&wext_pernet_ops);
+
+ if (err)
+ return err;
+
+ return register_netdevice_notifier(&wext_netdev_notifier);
}
subsys_initcall(wireless_nlevent_init);
@@ -368,17 +407,8 @@ subsys_initcall(wireless_nlevent_init);
/* Process events generated by the wireless layer or the driver. */
static void wireless_nlevent_process(struct work_struct *work)
{
- struct sk_buff *skb;
- struct net *net;
-
rtnl_lock();
-
- for_each_net(net) {
- while ((skb = skb_dequeue(&net->wext_nlevents)))
- rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL,
- GFP_KERNEL);
- }
-
+ wireless_nlevent_flush();
rtnl_unlock();
}
@@ -925,8 +955,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr,
return private(dev, iwr, cmd, info, handler);
}
/* Old driver API : call driver ioctl handler */
- if (dev->netdev_ops->ndo_do_ioctl)
- return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ if (dev->netdev_ops->ndo_do_ioctl) {
+#ifdef CONFIG_COMPAT
+ if (info->flags & IW_REQUEST_FLAG_COMPAT) {
+ int ret = 0;
+ struct iwreq iwr_lcl;
+ struct compat_iw_point *iwp_compat = (void *) &iwr->u.data;
+
+ memcpy(&iwr_lcl, iwr, sizeof(struct iwreq));
+ iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer);
+ iwr_lcl.u.data.length = iwp_compat->length;
+ iwr_lcl.u.data.flags = iwp_compat->flags;
+
+ ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd);
+
+ iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer);
+ iwp_compat->length = iwr_lcl.u.data.length;
+ iwp_compat->flags = iwr_lcl.u.data.flags;
+
+ return ret;
+ } else
+#endif
+ return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd);
+ }
return -EOPNOTSUPP;
}
diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c
index 7ecd04c21360..997ff7b2509b 100644
--- a/net/x25/x25_facilities.c
+++ b/net/x25/x25_facilities.c
@@ -277,6 +277,7 @@ int x25_negotiate_facilities(struct sk_buff *skb, struct sock *sk,
memset(&theirs, 0, sizeof(theirs));
memcpy(new, ours, sizeof(*new));
+ memset(dte, 0, sizeof(*dte));
len = x25_parse_facilities(skb, &theirs, dte, &x25->vc_facil_mask);
if (len < 0)
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 85d1d4764612..d0e6d351aba8 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -13,6 +13,8 @@
#include <net/dst.h>
#include <net/ip.h>
#include <net/xfrm.h>
+#include <net/ip_tunnels.h>
+#include <net/ip6_tunnel.h>
static struct kmem_cache *secpath_cachep __read_mostly;
@@ -186,6 +188,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
struct xfrm_state *x = NULL;
xfrm_address_t *daddr;
struct xfrm_mode *inner_mode;
+ u32 mark = skb->mark;
unsigned int family;
int decaps = 0;
int async = 0;
@@ -203,6 +206,18 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
XFRM_SPI_SKB_CB(skb)->daddroff);
family = XFRM_SPI_SKB_CB(skb)->family;
+ /* if tunnel is present override skb->mark value with tunnel i_key */
+ if (XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4) {
+ switch (family) {
+ case AF_INET:
+ mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4->parms.i_key);
+ break;
+ case AF_INET6:
+ mark = be32_to_cpu(XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6->parms.i_key);
+ break;
+ }
+ }
+
/* Allocate new secpath or COW existing one. */
if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
struct sec_path *sp;
@@ -229,7 +244,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
- x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family);
+ x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
if (x == NULL) {
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
xfrm_audit_state_notfound(skb, family, spi, seq);
@@ -277,12 +292,15 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
XFRM_SKB_CB(skb)->seq.input.hi = seq_hi;
skb_dst_force(skb);
+ dev_hold(skb->dev);
nexthdr = x->type->input(x, skb);
if (nexthdr == -EINPROGRESS)
return 0;
resume:
+ dev_put(skb->dev);
+
spin_lock(&x->lock);
if (nexthdr <= 0) {
if (nexthdr == -EBADMSG) {
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f62fd2c5d836..4769382d9478 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2801,7 +2801,6 @@ static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
{
- struct net *net;
int err = 0;
if (unlikely(afinfo == NULL))
return -EINVAL;
@@ -2832,26 +2831,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
}
spin_unlock(&xfrm_policy_afinfo_lock);
- rtnl_lock();
- for_each_net(net) {
- struct dst_ops *xfrm_dst_ops;
-
- switch (afinfo->family) {
- case AF_INET:
- xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
- break;
-#if IS_ENABLED(CONFIG_IPV6)
- case AF_INET6:
- xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
- break;
-#endif
- default:
- BUG();
- }
- *xfrm_dst_ops = *afinfo->dst_ops;
- }
- rtnl_unlock();
-
return err;
}
EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2887,22 +2866,6 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
}
EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
-static void __net_init xfrm_dst_ops_init(struct net *net)
-{
- struct xfrm_policy_afinfo *afinfo;
-
- rcu_read_lock();
- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
- if (afinfo)
- net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
-#if IS_ENABLED(CONFIG_IPV6)
- afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
- if (afinfo)
- net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
-#endif
- rcu_read_unlock();
-}
-
static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
@@ -3050,7 +3013,6 @@ static int __net_init xfrm_net_init(struct net *net)
rv = xfrm_policy_init(net);
if (rv < 0)
goto out_policy;
- xfrm_dst_ops_init(net);
rv = xfrm_sysctl_init(net);
if (rv < 0)
goto out_sysctl;