aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c2
-rw-r--r--net/9p/trans_virtio.c5
-rw-r--r--net/batman-adv/bat_iv_ogm.c30
-rw-r--r--net/batman-adv/hard-interface.c22
-rw-r--r--net/batman-adv/originator.c36
-rw-r--r--net/batman-adv/originator.h4
-rw-r--r--net/batman-adv/routing.c4
-rw-r--r--net/batman-adv/send.c9
-rw-r--r--net/batman-adv/translation-table.c23
-rw-r--r--net/bluetooth/hidp/core.c16
-rw-r--r--net/bluetooth/hidp/hidp.h4
-rw-r--r--net/bridge/br_device.c54
-rw-r--r--net/bridge/br_fdb.c137
-rw-r--r--net/bridge/br_if.c6
-rw-r--r--net/bridge/br_input.c4
-rw-r--r--net/bridge/br_multicast.c6
-rw-r--r--net/bridge/br_private.h13
-rw-r--r--net/bridge/br_stp_if.c2
-rw-r--r--net/bridge/br_vlan.c27
-rw-r--r--net/caif/caif_dev.c1
-rw-r--r--net/caif/cfsrvl.c1
-rw-r--r--net/can/af_can.c3
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/can/raw.c27
-rw-r--r--net/ceph/messenger.c8
-rw-r--r--net/ceph/osd_client.c84
-rw-r--r--net/compat.c9
-rw-r--r--net/core/dev.c28
-rw-r--r--net/core/fib_rules.c7
-rw-r--r--net/core/flow_dissector.c20
-rw-r--r--net/core/neighbour.c8
-rw-r--r--net/core/netpoll.c4
-rw-r--r--net/core/rtnetlink.c21
-rw-r--r--net/core/skbuff.c3
-rw-r--r--net/core/sock.c6
-rw-r--r--net/dccp/ccids/lib/tfrc.c2
-rw-r--r--net/dccp/ccids/lib/tfrc.h1
-rw-r--r--net/decnet/af_decnet.c5
-rw-r--r--net/hsr/hsr_framereg.c2
-rw-r--r--net/ieee802154/6lowpan.c23
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/devinet.c3
-rw-r--r--net/ipv4/inet_fragment.c5
-rw-r--r--net/ipv4/ip_forward.c71
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_tunnel.c82
-rw-r--r--net/ipv4/ip_tunnel_core.c47
-rw-r--r--net/ipv4/ipconfig.c2
-rw-r--r--net/ipv4/netfilter/Kconfig5
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c5
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c4
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c75
-rw-r--r--net/ipv4/route.c13
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_cong.c3
-rw-r--r--net/ipv4/tcp_input.c21
-rw-r--r--net/ipv4/tcp_output.c37
-rw-r--r--net/ipv4/udp_offload.c17
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/exthdrs_offload.c4
-rw-r--r--net/ipv6/icmp.c2
-rw-r--r--net/ipv6/ip6_offload.c20
-rw-r--r--net/ipv6/ip6_output.c20
-rw-r--r--net/ipv6/netfilter/Kconfig5
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/nft_reject_ipv6.c76
-rw-r--r--net/ipv6/ping.c1
-rw-r--r--net/ipv6/route.c2
-rw-r--r--net/ipv6/sit.c19
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/ipx/af_ipx.c22
-rw-r--r--net/ipx/ipx_route.c4
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/l2tp/l2tp_core.h1
-rw-r--r--net/l2tp/l2tp_netlink.c4
-rw-r--r--net/l2tp/l2tp_ppp.c13
-rw-r--r--net/mac80211/iface.c6
-rw-r--r--net/netfilter/Kconfig6
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c8
-rw-r--r--net/netfilter/nf_conntrack_core.c55
-rw-r--r--net/netfilter/nf_conntrack_netlink.c35
-rw-r--r--net/netfilter/nf_nat_core.c56
-rw-r--r--net/netfilter/nf_synproxy_core.c5
-rw-r--r--net/netfilter/nf_tables_api.c82
-rw-r--r--net/netfilter/nf_tables_core.c6
-rw-r--r--net/netfilter/nft_ct.c16
-rw-r--r--net/netfilter/nft_log.c5
-rw-r--r--net/netfilter/nft_lookup.c1
-rw-r--r--net/netfilter/nft_meta.c4
-rw-r--r--net/netfilter/nft_payload.c3
-rw-r--r--net/netfilter/nft_queue.c4
-rw-r--r--net/netfilter/nft_rbtree.c16
-rw-r--r--net/netfilter/nft_reject.c89
-rw-r--r--net/netfilter/nft_reject_inet.c63
-rw-r--r--net/netfilter/xt_CT.c7
-rw-r--r--net/netlink/af_netlink.c4
-rw-r--r--net/openvswitch/datapath.c23
-rw-r--r--net/openvswitch/flow_table.c88
-rw-r--r--net/openvswitch/flow_table.h2
-rw-r--r--net/packet/af_packet.c26
-rw-r--r--net/sched/sch_pie.c21
-rw-r--r--net/sched/sch_tbf.c24
-rw-r--r--net/sctp/associola.c211
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_sideeffect.c7
-rw-r--r--net/sctp/sm_statefuns.c12
-rw-r--r--net/sctp/socket.c47
-rw-r--r--net/sctp/sysctl.c18
-rw-r--r--net/sctp/ulpevent.c8
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c27
-rw-r--r--net/sunrpc/backchannel_rqst.c6
-rw-r--r--net/sunrpc/svc_xprt.c6
-rw-r--r--net/sunrpc/xprtsock.c6
-rw-r--r--net/tipc/bearer.c7
-rw-r--r--net/tipc/config.c11
-rw-r--r--net/tipc/core.c109
-rw-r--r--net/tipc/core.h2
-rw-r--r--net/tipc/handler.c1
-rw-r--r--net/tipc/link.c7
-rw-r--r--net/tipc/name_table.c40
-rw-r--r--net/tipc/netlink.c8
-rw-r--r--net/tipc/ref.c3
-rw-r--r--net/tipc/server.c19
-rw-r--r--net/tipc/server.h2
-rw-r--r--net/tipc/socket.c12
-rw-r--r--net/tipc/subscr.c19
-rw-r--r--net/unix/af_unix.c3
-rw-r--r--net/xfrm/xfrm_policy.c2
-rw-r--r--net/xfrm/xfrm_state.c23
-rw-r--r--net/xfrm/xfrm_user.c5
135 files changed, 1594 insertions, 976 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index a5e4d2dcb03e..9186550d77a6 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -204,7 +204,7 @@ free_and_return:
return ret;
}
-struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
{
struct p9_fcall *fc;
fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index cd1e1ede73a4..ac2666c1d011 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -340,7 +340,10 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
int count = nr_pages;
while (nr_pages) {
s = rest_of_page(data);
- pages[index++] = kmap_to_page(data);
+ if (is_vmalloc_addr(data))
+ pages[index++] = vmalloc_to_page(data);
+ else
+ pages[index++] = kmap_to_page(data);
data += s;
nr_pages--;
}
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 512159bf607f..8323bced8e5b 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -241,19 +241,19 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const uint8_t *addr)
size = bat_priv->num_ifaces * sizeof(uint8_t);
orig_node->bat_iv.bcast_own_sum = kzalloc(size, GFP_ATOMIC);
if (!orig_node->bat_iv.bcast_own_sum)
- goto free_bcast_own;
+ goto free_orig_node;
hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig,
batadv_choose_orig, orig_node,
&orig_node->hash_entry);
if (hash_added != 0)
- goto free_bcast_own;
+ goto free_orig_node;
return orig_node;
-free_bcast_own:
- kfree(orig_node->bat_iv.bcast_own);
free_orig_node:
+ /* free twice, as batadv_orig_node_new sets refcount to 2 */
+ batadv_orig_node_free_ref(orig_node);
batadv_orig_node_free_ref(orig_node);
return NULL;
@@ -266,7 +266,7 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
struct batadv_orig_node *orig_neigh)
{
struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface);
- struct batadv_neigh_node *neigh_node;
+ struct batadv_neigh_node *neigh_node, *tmp_neigh_node;
neigh_node = batadv_neigh_node_new(hard_iface, neigh_addr, orig_node);
if (!neigh_node)
@@ -281,14 +281,24 @@ batadv_iv_ogm_neigh_new(struct batadv_hard_iface *hard_iface,
neigh_node->orig_node = orig_neigh;
neigh_node->if_incoming = hard_iface;
- batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
- "Creating new neighbor %pM for orig_node %pM on interface %s\n",
- neigh_addr, orig_node->orig, hard_iface->net_dev->name);
-
spin_lock_bh(&orig_node->neigh_list_lock);
- hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+ tmp_neigh_node = batadv_neigh_node_get(orig_node, hard_iface,
+ neigh_addr);
+ if (!tmp_neigh_node) {
+ hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list);
+ } else {
+ kfree(neigh_node);
+ batadv_hardif_free_ref(hard_iface);
+ neigh_node = tmp_neigh_node;
+ }
spin_unlock_bh(&orig_node->neigh_list_lock);
+ if (!tmp_neigh_node)
+ batadv_dbg(BATADV_DBG_BATMAN, bat_priv,
+ "Creating new neighbor %pM for orig_node %pM on interface %s\n",
+ neigh_addr, orig_node->orig,
+ hard_iface->net_dev->name);
+
out:
return neigh_node;
}
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index 3d417d3641c6..b851cc580853 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -241,7 +241,7 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
{
struct batadv_priv *bat_priv = netdev_priv(soft_iface);
const struct batadv_hard_iface *hard_iface;
- int min_mtu = ETH_DATA_LEN;
+ int min_mtu = INT_MAX;
rcu_read_lock();
list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) {
@@ -256,8 +256,6 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
}
rcu_read_unlock();
- atomic_set(&bat_priv->packet_size_max, min_mtu);
-
if (atomic_read(&bat_priv->fragmentation) == 0)
goto out;
@@ -268,13 +266,21 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface)
min_mtu = min_t(int, min_mtu, BATADV_FRAG_MAX_FRAG_SIZE);
min_mtu -= sizeof(struct batadv_frag_packet);
min_mtu *= BATADV_FRAG_MAX_FRAGMENTS;
- atomic_set(&bat_priv->packet_size_max, min_mtu);
-
- /* with fragmentation enabled we can fragment external packets easily */
- min_mtu = min_t(int, min_mtu, ETH_DATA_LEN);
out:
- return min_mtu - batadv_max_header_len();
+ /* report to the other components the maximum amount of bytes that
+ * batman-adv can send over the wire (without considering the payload
+ * overhead). For example, this value is used by TT to compute the
+ * maximum local table table size
+ */
+ atomic_set(&bat_priv->packet_size_max, min_mtu);
+
+ /* the real soft-interface MTU is computed by removing the payload
+ * overhead from the maximum amount of bytes that was just computed.
+ *
+ * However batman-adv does not support MTUs bigger than ETH_DATA_LEN
+ */
+ return min_t(int, min_mtu - batadv_max_header_len(), ETH_DATA_LEN);
}
/* adjusts the MTU if a new interface with a smaller MTU appeared. */
diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c
index 6df12a2e3605..853941629dc1 100644
--- a/net/batman-adv/originator.c
+++ b/net/batman-adv/originator.c
@@ -458,6 +458,42 @@ out:
}
/**
+ * batadv_neigh_node_get - retrieve a neighbour from the list
+ * @orig_node: originator which the neighbour belongs to
+ * @hard_iface: the interface where this neighbour is connected to
+ * @addr: the address of the neighbour
+ *
+ * Looks for and possibly returns a neighbour belonging to this originator list
+ * which is connected through the provided hard interface.
+ * Returns NULL if the neighbour is not found.
+ */
+struct batadv_neigh_node *
+batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
+ const struct batadv_hard_iface *hard_iface,
+ const uint8_t *addr)
+{
+ struct batadv_neigh_node *tmp_neigh_node, *res = NULL;
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(tmp_neigh_node, &orig_node->neigh_list, list) {
+ if (!batadv_compare_eth(tmp_neigh_node->addr, addr))
+ continue;
+
+ if (tmp_neigh_node->if_incoming != hard_iface)
+ continue;
+
+ if (!atomic_inc_not_zero(&tmp_neigh_node->refcount))
+ continue;
+
+ res = tmp_neigh_node;
+ break;
+ }
+ rcu_read_unlock();
+
+ return res;
+}
+
+/**
* batadv_orig_ifinfo_free_rcu - free the orig_ifinfo object
* @rcu: rcu pointer of the orig_ifinfo object
*/
diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h
index 37be290f63f6..db3a9ed734cb 100644
--- a/net/batman-adv/originator.h
+++ b/net/batman-adv/originator.h
@@ -29,6 +29,10 @@ void batadv_orig_node_free_ref_now(struct batadv_orig_node *orig_node);
struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv,
const uint8_t *addr);
struct batadv_neigh_node *
+batadv_neigh_node_get(const struct batadv_orig_node *orig_node,
+ const struct batadv_hard_iface *hard_iface,
+ const uint8_t *addr);
+struct batadv_neigh_node *
batadv_neigh_node_new(struct batadv_hard_iface *hard_iface,
const uint8_t *neigh_addr,
struct batadv_orig_node *orig_node);
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 1ed9f7c9ecea..a953d5b196a3 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -688,7 +688,7 @@ static int batadv_check_unicast_ttvn(struct batadv_priv *bat_priv,
int is_old_ttvn;
/* check if there is enough data before accessing it */
- if (pskb_may_pull(skb, hdr_len + ETH_HLEN) < 0)
+ if (!pskb_may_pull(skb, hdr_len + ETH_HLEN))
return 0;
/* create a copy of the skb (in case of for re-routing) to modify it. */
@@ -918,6 +918,8 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
if (ret != NET_RX_SUCCESS)
ret = batadv_route_unicast_packet(skb, recv_if);
+ else
+ consume_skb(skb);
return ret;
}
diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c
index 579f5f00a385..843febd1e519 100644
--- a/net/batman-adv/send.c
+++ b/net/batman-adv/send.c
@@ -254,9 +254,9 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
unsigned short vid)
{
- struct ethhdr *ethhdr = (struct ethhdr *)skb->data;
+ struct ethhdr *ethhdr;
struct batadv_unicast_packet *unicast_packet;
- int ret = NET_XMIT_DROP;
+ int ret = NET_XMIT_DROP, hdr_size;
if (!orig_node)
goto out;
@@ -265,12 +265,16 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
case BATADV_UNICAST:
if (!batadv_send_skb_prepare_unicast(skb, orig_node))
goto out;
+
+ hdr_size = sizeof(*unicast_packet);
break;
case BATADV_UNICAST_4ADDR:
if (!batadv_send_skb_prepare_unicast_4addr(bat_priv, skb,
orig_node,
packet_subtype))
goto out;
+
+ hdr_size = sizeof(struct batadv_unicast_4addr_packet);
break;
default:
/* this function supports UNICAST and UNICAST_4ADDR only. It
@@ -279,6 +283,7 @@ static int batadv_send_skb_unicast(struct batadv_priv *bat_priv,
goto out;
}
+ ethhdr = (struct ethhdr *)(skb->data + hdr_size);
unicast_packet = (struct batadv_unicast_packet *)skb->data;
/* inform the destination node that we are still missing a correct route
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index b6071f675a3e..959dde721c46 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1975,6 +1975,7 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
struct hlist_head *head;
uint32_t i, crc_tmp, crc = 0;
uint8_t flags;
+ __be16 tmp_vid;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -2011,8 +2012,11 @@ static uint32_t batadv_tt_global_crc(struct batadv_priv *bat_priv,
orig_node))
continue;
- crc_tmp = crc32c(0, &tt_common->vid,
- sizeof(tt_common->vid));
+ /* use network order to read the VID: this ensures that
+ * every node reads the bytes in the same order.
+ */
+ tmp_vid = htons(tt_common->vid);
+ crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid));
/* compute the CRC on flags that have to be kept in sync
* among nodes
@@ -2046,6 +2050,7 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
struct hlist_head *head;
uint32_t i, crc_tmp, crc = 0;
uint8_t flags;
+ __be16 tmp_vid;
for (i = 0; i < hash->size; i++) {
head = &hash->table[i];
@@ -2064,8 +2069,11 @@ static uint32_t batadv_tt_local_crc(struct batadv_priv *bat_priv,
if (tt_common->flags & BATADV_TT_CLIENT_NEW)
continue;
- crc_tmp = crc32c(0, &tt_common->vid,
- sizeof(tt_common->vid));
+ /* use network order to read the VID: this ensures that
+ * every node reads the bytes in the same order.
+ */
+ tmp_vid = htons(tt_common->vid);
+ crc_tmp = crc32c(0, &tmp_vid, sizeof(tmp_vid));
/* compute the CRC on flags that have to be kept in sync
* among nodes
@@ -2262,6 +2270,7 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
{
struct batadv_tvlv_tt_vlan_data *tt_vlan_tmp;
struct batadv_orig_node_vlan *vlan;
+ uint32_t crc;
int i;
/* check if each received CRC matches the locally stored one */
@@ -2281,7 +2290,10 @@ static bool batadv_tt_global_check_crc(struct batadv_orig_node *orig_node,
if (!vlan)
return false;
- if (vlan->tt.crc != ntohl(tt_vlan_tmp->crc))
+ crc = vlan->tt.crc;
+ batadv_orig_node_vlan_free_ref(vlan);
+
+ if (crc != ntohl(tt_vlan_tmp->crc))
return false;
}
@@ -3218,7 +3230,6 @@ static void batadv_tt_update_orig(struct batadv_priv *bat_priv,
spin_lock_bh(&orig_node->tt_lock);
- tt_change = (struct batadv_tvlv_tt_change *)tt_buff;
batadv_tt_update_changes(bat_priv, orig_node, tt_num_changes,
ttvn, tt_change);
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 292e619db896..d9fb93451442 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -430,6 +430,16 @@ static void hidp_del_timer(struct hidp_session *session)
del_timer(&session->timer);
}
+static void hidp_process_report(struct hidp_session *session,
+ int type, const u8 *data, int len, int intr)
+{
+ if (len > HID_MAX_BUFFER_SIZE)
+ len = HID_MAX_BUFFER_SIZE;
+
+ memcpy(session->input_buf, data, len);
+ hid_input_report(session->hid, type, session->input_buf, len, intr);
+}
+
static void hidp_process_handshake(struct hidp_session *session,
unsigned char param)
{
@@ -502,7 +512,8 @@ static int hidp_process_data(struct hidp_session *session, struct sk_buff *skb,
hidp_input_report(session, skb);
if (session->hid)
- hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 0);
+ hidp_process_report(session, HID_INPUT_REPORT,
+ skb->data, skb->len, 0);
break;
case HIDP_DATA_RTYPE_OTHER:
@@ -584,7 +595,8 @@ static void hidp_recv_intr_frame(struct hidp_session *session,
hidp_input_report(session, skb);
if (session->hid) {
- hid_input_report(session->hid, HID_INPUT_REPORT, skb->data, skb->len, 1);
+ hidp_process_report(session, HID_INPUT_REPORT,
+ skb->data, skb->len, 1);
BT_DBG("report len %d", skb->len);
}
} else {
diff --git a/net/bluetooth/hidp/hidp.h b/net/bluetooth/hidp/hidp.h
index ab5241400cf7..8798492a6e99 100644
--- a/net/bluetooth/hidp/hidp.h
+++ b/net/bluetooth/hidp/hidp.h
@@ -24,6 +24,7 @@
#define __HIDP_H
#include <linux/types.h>
+#include <linux/hid.h>
#include <linux/kref.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/l2cap.h>
@@ -179,6 +180,9 @@ struct hidp_session {
/* Used in hidp_output_raw_report() */
int output_report_success; /* boolean */
+
+ /* temporary input buffer */
+ u8 input_buf[HID_MAX_BUFFER_SIZE];
};
/* HIDP init defines */
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index e4401a531afb..63f0455c0bc3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -187,8 +187,7 @@ static int br_set_mac_address(struct net_device *dev, void *p)
spin_lock_bh(&br->lock);
if (!ether_addr_equal(dev->dev_addr, addr->sa_data)) {
- memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
- br_fdb_change_mac_address(br, addr->sa_data);
+ /* Mac address will be changed in br_stp_change_bridge_id(). */
br_stp_change_bridge_id(br, addr->sa_data);
}
spin_unlock_bh(&br->lock);
@@ -226,6 +225,33 @@ static void br_netpoll_cleanup(struct net_device *dev)
br_netpoll_disable(p);
}
+static int __br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+{
+ struct netpoll *np;
+ int err;
+
+ np = kzalloc(sizeof(*p->np), gfp);
+ if (!np)
+ return -ENOMEM;
+
+ err = __netpoll_setup(np, p->dev, gfp);
+ if (err) {
+ kfree(np);
+ return err;
+ }
+
+ p->np = np;
+ return err;
+}
+
+int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
+{
+ if (!p->br->dev->npinfo)
+ return 0;
+
+ return __br_netpoll_enable(p, gfp);
+}
+
static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
gfp_t gfp)
{
@@ -236,7 +262,7 @@ static int br_netpoll_setup(struct net_device *dev, struct netpoll_info *ni,
list_for_each_entry(p, &br->port_list, list) {
if (!p->dev)
continue;
- err = br_netpoll_enable(p, gfp);
+ err = __br_netpoll_enable(p, gfp);
if (err)
goto fail;
}
@@ -249,28 +275,6 @@ fail:
goto out;
}
-int br_netpoll_enable(struct net_bridge_port *p, gfp_t gfp)
-{
- struct netpoll *np;
- int err;
-
- if (!p->br->dev->npinfo)
- return 0;
-
- np = kzalloc(sizeof(*p->np), gfp);
- if (!np)
- return -ENOMEM;
-
- err = __netpoll_setup(np, p->dev, gfp);
- if (err) {
- kfree(np);
- return err;
- }
-
- p->np = np;
- return err;
-}
-
void br_netpoll_disable(struct net_bridge_port *p)
{
struct netpoll *np = p->np;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index c5f5a4a933f4..9203d5a1943f 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -27,6 +27,9 @@
#include "br_private.h"
static struct kmem_cache *br_fdb_cache __read_mostly;
+static struct net_bridge_fdb_entry *fdb_find(struct hlist_head *head,
+ const unsigned char *addr,
+ __u16 vid);
static int fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
static void fdb_notify(struct net_bridge *br,
@@ -89,11 +92,57 @@ static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f)
call_rcu(&f->rcu, fdb_rcu_free);
}
+/* Delete a local entry if no other port had the same address. */
+static void fdb_delete_local(struct net_bridge *br,
+ const struct net_bridge_port *p,
+ struct net_bridge_fdb_entry *f)
+{
+ const unsigned char *addr = f->addr.addr;
+ u16 vid = f->vlan_id;
+ struct net_bridge_port *op;
+
+ /* Maybe another port has same hw addr? */
+ list_for_each_entry(op, &br->port_list, list) {
+ if (op != p && ether_addr_equal(op->dev->dev_addr, addr) &&
+ (!vid || nbp_vlan_find(op, vid))) {
+ f->dst = op;
+ f->added_by_user = 0;
+ return;
+ }
+ }
+
+ /* Maybe bridge device has same hw addr? */
+ if (p && ether_addr_equal(br->dev->dev_addr, addr) &&
+ (!vid || br_vlan_find(br, vid))) {
+ f->dst = NULL;
+ f->added_by_user = 0;
+ return;
+ }
+
+ fdb_delete(br, f);
+}
+
+void br_fdb_find_delete_local(struct net_bridge *br,
+ const struct net_bridge_port *p,
+ const unsigned char *addr, u16 vid)
+{
+ struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
+ struct net_bridge_fdb_entry *f;
+
+ spin_lock_bh(&br->hash_lock);
+ f = fdb_find(head, addr, vid);
+ if (f && f->is_local && !f->added_by_user && f->dst == p)
+ fdb_delete_local(br, p, f);
+ spin_unlock_bh(&br->hash_lock);
+}
+
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
{
struct net_bridge *br = p->br;
- bool no_vlan = (nbp_get_vlan_info(p) == NULL) ? true : false;
+ struct net_port_vlans *pv = nbp_get_vlan_info(p);
+ bool no_vlan = !pv;
int i;
+ u16 vid;
spin_lock_bh(&br->hash_lock);
@@ -104,38 +153,34 @@ void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr)
struct net_bridge_fdb_entry *f;
f = hlist_entry(h, struct net_bridge_fdb_entry, hlist);
- if (f->dst == p && f->is_local) {
- /* maybe another port has same hw addr? */
- struct net_bridge_port *op;
- u16 vid = f->vlan_id;
- list_for_each_entry(op, &br->port_list, list) {
- if (op != p &&
- ether_addr_equal(op->dev->dev_addr,
- f->addr.addr) &&
- nbp_vlan_find(op, vid)) {
- f->dst = op;
- goto insert;
- }
- }
-
+ if (f->dst == p && f->is_local && !f->added_by_user) {
/* delete old one */
- fdb_delete(br, f);
-insert:
- /* insert new address, may fail if invalid
- * address or dup.
- */
- fdb_insert(br, p, newaddr, vid);
+ fdb_delete_local(br, p, f);
/* if this port has no vlan information
* configured, we can safely be done at
* this point.
*/
if (no_vlan)
- goto done;
+ goto insert;
}
}
}
+insert:
+ /* insert new address, may fail if invalid address or dup. */
+ fdb_insert(br, p, newaddr, 0);
+
+ if (no_vlan)
+ goto done;
+
+ /* Now add entries for every VLAN configured on the port.
+ * This function runs under RTNL so the bitmap will not change
+ * from under us.
+ */
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID)
+ fdb_insert(br, p, newaddr, vid);
+
done:
spin_unlock_bh(&br->hash_lock);
}
@@ -146,10 +191,12 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
struct net_port_vlans *pv;
u16 vid = 0;
+ spin_lock_bh(&br->hash_lock);
+
/* If old entry was unassociated with any port, then delete it. */
f = __br_fdb_get(br, br->dev->dev_addr, 0);
if (f && f->is_local && !f->dst)
- fdb_delete(br, f);
+ fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, 0);
@@ -159,14 +206,16 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
*/
pv = br_get_vlan_info(br);
if (!pv)
- return;
+ goto out;
for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
f = __br_fdb_get(br, br->dev->dev_addr, vid);
if (f && f->is_local && !f->dst)
- fdb_delete(br, f);
+ fdb_delete_local(br, NULL, f);
fdb_insert(br, NULL, newaddr, vid);
}
+out:
+ spin_unlock_bh(&br->hash_lock);
}
void br_fdb_cleanup(unsigned long _data)
@@ -235,25 +284,11 @@ void br_fdb_delete_by_port(struct net_bridge *br,
if (f->is_static && !do_all)
continue;
- /*
- * if multiple ports all have the same device address
- * then when one port is deleted, assign
- * the local entry to other port
- */
- if (f->is_local) {
- struct net_bridge_port *op;
- list_for_each_entry(op, &br->port_list, list) {
- if (op != p &&
- ether_addr_equal(op->dev->dev_addr,
- f->addr.addr)) {
- f->dst = op;
- goto skip_delete;
- }
- }
- }
- fdb_delete(br, f);
- skip_delete: ;
+ if (f->is_local)
+ fdb_delete_local(br, p, f);
+ else
+ fdb_delete(br, f);
}
}
spin_unlock_bh(&br->hash_lock);
@@ -397,6 +432,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
fdb->vlan_id = vid;
fdb->is_local = 0;
fdb->is_static = 0;
+ fdb->added_by_user = 0;
fdb->updated = fdb->used = jiffies;
hlist_add_head_rcu(&fdb->hlist, head);
}
@@ -447,7 +483,7 @@ int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
}
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid)
+ const unsigned char *addr, u16 vid, bool added_by_user)
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vid)];
struct net_bridge_fdb_entry *fdb;
@@ -473,13 +509,18 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
/* fastpath: update of existing entry */
fdb->dst = source;
fdb->updated = jiffies;
+ if (unlikely(added_by_user))
+ fdb->added_by_user = 1;
}
} else {
spin_lock(&br->hash_lock);
if (likely(!fdb_find(head, addr, vid))) {
fdb = fdb_create(head, source, addr, vid);
- if (fdb)
+ if (fdb) {
+ if (unlikely(added_by_user))
+ fdb->added_by_user = 1;
fdb_notify(br, fdb, RTM_NEWNEIGH);
+ }
}
/* else we lose race and someone else inserts
* it first, don't bother updating
@@ -647,6 +688,7 @@ static int fdb_add_entry(struct net_bridge_port *source, const __u8 *addr,
modified = true;
}
+ fdb->added_by_user = 1;
fdb->used = jiffies;
if (modified) {
@@ -664,7 +706,7 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
if (ndm->ndm_flags & NTF_USE) {
rcu_read_lock();
- br_fdb_update(p->br, p, addr, vid);
+ br_fdb_update(p->br, p, addr, vid, true);
rcu_read_unlock();
} else {
spin_lock_bh(&p->br->hash_lock);
@@ -749,8 +791,7 @@ out:
return err;
}
-int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr,
- u16 vlan)
+static int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vlan)
{
struct hlist_head *head = &br->hash[br_mac_hash(addr, vlan)];
struct net_bridge_fdb_entry *fdb;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index cffe1d666ba1..54d207d3a31c 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -389,6 +389,9 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
if (br->dev->needed_headroom < dev->needed_headroom)
br->dev->needed_headroom = dev->needed_headroom;
+ if (br_fdb_insert(br, p, dev->dev_addr, 0))
+ netdev_err(dev, "failed insert local address bridge forwarding table\n");
+
spin_lock_bh(&br->lock);
changed_addr = br_stp_recalculate_bridge_id(br);
@@ -404,9 +407,6 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
dev_set_mtu(br->dev, br_min_mtu(br));
- if (br_fdb_insert(br, p, dev->dev_addr, 0))
- netdev_err(dev, "failed insert local address bridge forwarding table\n");
-
kobject_uevent(&p->kobj, KOBJ_ADD);
return 0;
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index bf8dc7d308d6..28d544627422 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -77,7 +77,7 @@ int br_handle_frame_finish(struct sk_buff *skb)
/* insert into forwarding database after filtering to avoid spoofing */
br = p->br;
if (p->flags & BR_LEARNING)
- br_fdb_update(br, p, eth_hdr(skb)->h_source, vid);
+ br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false);
if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) &&
br_multicast_rcv(br, p, skb, vid))
@@ -148,7 +148,7 @@ static int br_handle_local_finish(struct sk_buff *skb)
br_vlan_get_tag(skb, &vid);
if (p->flags & BR_LEARNING)
- br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid);
+ br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false);
return 0; /* process further */
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index ef66365b7354..fb0e36fac668 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1235,6 +1235,12 @@ static int br_ip6_multicast_query(struct net_bridge *br,
(port && port->state == BR_STATE_DISABLED))
goto out;
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
+ if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (skb->len == sizeof(*mld)) {
if (!pskb_may_pull(skb, sizeof(*mld))) {
err = -EINVAL;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index fcd12333c59b..3ba11bc99b65 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -104,6 +104,7 @@ struct net_bridge_fdb_entry
mac_addr addr;
unsigned char is_local;
unsigned char is_static;
+ unsigned char added_by_user;
__u16 vlan_id;
};
@@ -370,6 +371,9 @@ static inline void br_netpoll_disable(struct net_bridge_port *p)
int br_fdb_init(void);
void br_fdb_fini(void);
void br_fdb_flush(struct net_bridge *br);
+void br_fdb_find_delete_local(struct net_bridge *br,
+ const struct net_bridge_port *p,
+ const unsigned char *addr, u16 vid);
void br_fdb_changeaddr(struct net_bridge_port *p, const unsigned char *newaddr);
void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr);
void br_fdb_cleanup(unsigned long arg);
@@ -383,8 +387,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, unsigned long count,
int br_fdb_insert(struct net_bridge *br, struct net_bridge_port *source,
const unsigned char *addr, u16 vid);
void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
- const unsigned char *addr, u16 vid);
-int fdb_delete_by_addr(struct net_bridge *br, const u8 *addr, u16 vid);
+ const unsigned char *addr, u16 vid, bool added_by_user);
int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct net_device *dev, const unsigned char *addr);
@@ -584,6 +587,7 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br,
int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags);
int br_vlan_delete(struct net_bridge *br, u16 vid);
void br_vlan_flush(struct net_bridge *br);
+bool br_vlan_find(struct net_bridge *br, u16 vid);
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val);
int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags);
int nbp_vlan_delete(struct net_bridge_port *port, u16 vid);
@@ -665,6 +669,11 @@ static inline void br_vlan_flush(struct net_bridge *br)
{
}
+static inline bool br_vlan_find(struct net_bridge *br, u16 vid)
+{
+ return false;
+}
+
static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags)
{
return -EOPNOTSUPP;
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index 656a6f3e40de..189ba1e7d851 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -194,6 +194,8 @@ void br_stp_change_bridge_id(struct net_bridge *br, const unsigned char *addr)
wasroot = br_is_root_bridge(br);
+ br_fdb_change_mac_address(br, addr);
+
memcpy(oldaddr, br->bridge_id.addr, ETH_ALEN);
memcpy(br->bridge_id.addr, addr, ETH_ALEN);
memcpy(br->dev->dev_addr, addr, ETH_ALEN);
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 4ca4d0a0151c..8249ca764c79 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -275,9 +275,7 @@ int br_vlan_delete(struct net_bridge *br, u16 vid)
if (!pv)
return -EINVAL;
- spin_lock_bh(&br->hash_lock);
- fdb_delete_by_addr(br, br->dev->dev_addr, vid);
- spin_unlock_bh(&br->hash_lock);
+ br_fdb_find_delete_local(br, NULL, br->dev->dev_addr, vid);
__vlan_del(pv, vid);
return 0;
@@ -295,6 +293,25 @@ void br_vlan_flush(struct net_bridge *br)
__vlan_flush(pv);
}
+bool br_vlan_find(struct net_bridge *br, u16 vid)
+{
+ struct net_port_vlans *pv;
+ bool found = false;
+
+ rcu_read_lock();
+ pv = rcu_dereference(br->vlan_info);
+
+ if (!pv)
+ goto out;
+
+ if (test_bit(vid, pv->vlan_bitmap))
+ found = true;
+
+out:
+ rcu_read_unlock();
+ return found;
+}
+
int br_vlan_filter_toggle(struct net_bridge *br, unsigned long val)
{
if (!rtnl_trylock())
@@ -359,9 +376,7 @@ int nbp_vlan_delete(struct net_bridge_port *port, u16 vid)
if (!pv)
return -EINVAL;
- spin_lock_bh(&port->br->hash_lock);
- fdb_delete_by_addr(port->br, port->dev->dev_addr, vid);
- spin_unlock_bh(&port->br->hash_lock);
+ br_fdb_find_delete_local(port->br, port, port->dev->dev_addr, vid);
return __vlan_del(pv, vid);
}
diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 4dca159435cf..edbca468fa73 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -22,6 +22,7 @@
#include <net/pkt_sched.h>
#include <net/caif/caif_device.h>
#include <net/caif/caif_layer.h>
+#include <net/caif/caif_dev.h>
#include <net/caif/cfpkt.h>
#include <net/caif/cfcnfg.h>
#include <net/caif/cfserl.h>
diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c
index 353f793d1b3b..a6e115463052 100644
--- a/net/caif/cfsrvl.c
+++ b/net/caif/cfsrvl.c
@@ -15,6 +15,7 @@
#include <net/caif/caif_layer.h>
#include <net/caif/cfsrvl.h>
#include <net/caif/cfpkt.h>
+#include <net/caif/caif_dev.h>
#define SRVL_CTRL_PKT_SIZE 1
#define SRVL_FLOW_OFF 0x81
diff --git a/net/can/af_can.c b/net/can/af_can.c
index d249874a366d..a27f8aad9e99 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -57,6 +57,7 @@
#include <linux/skbuff.h>
#include <linux/can.h>
#include <linux/can/core.h>
+#include <linux/can/skb.h>
#include <linux/ratelimit.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -290,7 +291,7 @@ int can_send(struct sk_buff *skb, int loop)
return -ENOMEM;
}
- newskb->sk = skb->sk;
+ can_skb_set_owner(newskb, skb->sk);
newskb->ip_summed = CHECKSUM_UNNECESSARY;
newskb->pkt_type = PACKET_BROADCAST;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 3fc737b214c7..dcb75c0e66c1 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -268,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op)
/* send with loopback */
skb->dev = dev;
- skb->sk = op->sk;
+ can_skb_set_owner(skb, op->sk);
can_send(skb, 1);
/* update statistics */
@@ -1223,7 +1223,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
can_skb_prv(skb)->ifindex = dev->ifindex;
skb->dev = dev;
- skb->sk = sk;
+ can_skb_set_owner(skb, sk);
err = can_send(skb, 1); /* send with loopback */
dev_put(dev);
diff --git a/net/can/raw.c b/net/can/raw.c
index 07d72d852324..081e81fd017f 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -121,13 +121,9 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
if (!ro->recv_own_msgs && oskb->sk == sk)
return;
- /* do not pass frames with DLC > 8 to a legacy socket */
- if (!ro->fd_frames) {
- struct canfd_frame *cfd = (struct canfd_frame *)oskb->data;
-
- if (unlikely(cfd->len > CAN_MAX_DLEN))
- return;
- }
+ /* do not pass non-CAN2.0 frames to a legacy socket */
+ if (!ro->fd_frames && oskb->len != CAN_MTU)
+ return;
/* clone the given skb to be able to enqueue it into the rcv queue */
skb = skb_clone(oskb, GFP_ATOMIC);
@@ -715,6 +711,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
skb->dev = dev;
skb->sk = sk;
+ skb->priority = sk->sk_priority;
err = can_send(skb, ro->loopback);
@@ -737,9 +734,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
struct sock *sk = sock->sk;
- struct raw_sock *ro = raw_sk(sk);
struct sk_buff *skb;
- int rxmtu;
int err = 0;
int noblock;
@@ -750,20 +745,10 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!skb)
return err;
- /*
- * when serving a legacy socket the DLC <= 8 is already checked inside
- * raw_rcv(). Now check if we need to pass a canfd_frame to a legacy
- * socket and cut the possible CANFD_MTU/CAN_MTU length to CAN_MTU
- */
- if (!ro->fd_frames)
- rxmtu = CAN_MTU;
- else
- rxmtu = skb->len;
-
- if (size < rxmtu)
+ if (size < skb->len)
msg->msg_flags |= MSG_TRUNC;
else
- size = rxmtu;
+ size = skb->len;
err = memcpy_toiovec(msg->msg_iov, skb->data, size);
if (err < 0) {
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 0e478a0f4204..30efc5c18622 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -840,9 +840,13 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
if (!cursor->bvec_iter.bi_size) {
bio = bio->bi_next;
- cursor->bvec_iter = bio->bi_iter;
+ cursor->bio = bio;
+ if (bio)
+ cursor->bvec_iter = bio->bi_iter;
+ else
+ memset(&cursor->bvec_iter, 0,
+ sizeof(cursor->bvec_iter));
}
- cursor->bio = bio;
if (!cursor->last_piece) {
BUG_ON(!cursor->resid);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 010ff3bd58ad..0676f2b199d6 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1427,6 +1427,40 @@ static void __send_queued(struct ceph_osd_client *osdc)
}
/*
+ * Caller should hold map_sem for read and request_mutex.
+ */
+static int __ceph_osdc_start_request(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req,
+ bool nofail)
+{
+ int rc;
+
+ __register_request(osdc, req);
+ req->r_sent = 0;
+ req->r_got_reply = 0;
+ rc = __map_request(osdc, req, 0);
+ if (rc < 0) {
+ if (nofail) {
+ dout("osdc_start_request failed map, "
+ " will retry %lld\n", req->r_tid);
+ rc = 0;
+ } else {
+ __unregister_request(osdc, req);
+ }
+ return rc;
+ }
+
+ if (req->r_osd == NULL) {
+ dout("send_request %p no up osds in pg\n", req);
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+ } else {
+ __send_queued(osdc);
+ }
+
+ return 0;
+}
+
+/*
* Timeout callback, called every N seconds when 1 or more osd
* requests has been active for more than N seconds. When this
* happens, we ping all OSDs with requests who have timed out to
@@ -1653,6 +1687,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
osdmap_epoch = ceph_decode_32(&p);
/* lookup */
+ down_read(&osdc->map_sem);
mutex_lock(&osdc->request_mutex);
req = __lookup_request(osdc, tid);
if (req == NULL) {
@@ -1709,7 +1744,6 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
dout("redirect pool %lld\n", redir.oloc.pool);
__unregister_request(osdc, req);
- mutex_unlock(&osdc->request_mutex);
req->r_target_oloc = redir.oloc; /* struct */
@@ -1721,10 +1755,10 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
* successfully. In the future we might want to follow
* original request's nofail setting here.
*/
- err = ceph_osdc_start_request(osdc, req, true);
+ err = __ceph_osdc_start_request(osdc, req, true);
BUG_ON(err);
- goto done;
+ goto out_unlock;
}
already_completed = req->r_got_reply;
@@ -1742,8 +1776,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
req->r_got_reply = 1;
} else if ((flags & CEPH_OSD_FLAG_ONDISK) == 0) {
dout("handle_reply tid %llu dup ack\n", tid);
- mutex_unlock(&osdc->request_mutex);
- goto done;
+ goto out_unlock;
}
dout("handle_reply tid %llu flags %d\n", tid, flags);
@@ -1758,6 +1791,7 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
__unregister_request(osdc, req);
mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
if (!already_completed) {
if (req->r_unsafe_callback &&
@@ -1775,10 +1809,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
complete_request(req);
}
-done:
+out:
dout("req=%p req->r_linger=%d\n", req, req->r_linger);
ceph_osdc_put_request(req);
return;
+out_unlock:
+ mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
+ goto out;
bad_put:
req->r_result = -EIO;
@@ -1791,6 +1829,7 @@ bad_put:
ceph_osdc_put_request(req);
bad_mutex:
mutex_unlock(&osdc->request_mutex);
+ up_read(&osdc->map_sem);
bad:
pr_err("corrupt osd_op_reply got %d %d\n",
(int)msg->front.iov_len, le32_to_cpu(msg->hdr.front_len));
@@ -2351,34 +2390,16 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req,
bool nofail)
{
- int rc = 0;
+ int rc;
down_read(&osdc->map_sem);
mutex_lock(&osdc->request_mutex);
- __register_request(osdc, req);
- req->r_sent = 0;
- req->r_got_reply = 0;
- rc = __map_request(osdc, req, 0);
- if (rc < 0) {
- if (nofail) {
- dout("osdc_start_request failed map, "
- " will retry %lld\n", req->r_tid);
- rc = 0;
- } else {
- __unregister_request(osdc, req);
- }
- goto out_unlock;
- }
- if (req->r_osd == NULL) {
- dout("send_request %p no up osds in pg\n", req);
- ceph_monc_request_next_osdmap(&osdc->client->monc);
- } else {
- __send_queued(osdc);
- }
- rc = 0;
-out_unlock:
+
+ rc = __ceph_osdc_start_request(osdc, req, nofail);
+
mutex_unlock(&osdc->request_mutex);
up_read(&osdc->map_sem);
+
return rc;
}
EXPORT_SYMBOL(ceph_osdc_start_request);
@@ -2504,9 +2525,12 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
err = -ENOMEM;
osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
if (!osdc->notify_wq)
- goto out_msgpool;
+ goto out_msgpool_reply;
+
return 0;
+out_msgpool_reply:
+ ceph_msgpool_destroy(&osdc->msgpool_op_reply);
out_msgpool:
ceph_msgpool_destroy(&osdc->msgpool_op);
out_mempool:
diff --git a/net/compat.c b/net/compat.c
index dd32e34c1e2c..f50161fb812e 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -780,21 +780,16 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
if (flags & MSG_CMSG_COMPAT)
return -EINVAL;
- if (COMPAT_USE_64BIT_TIME)
- return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
- flags | MSG_CMSG_COMPAT,
- (struct timespec *) timeout);
-
if (timeout == NULL)
return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, NULL);
- if (get_compat_timespec(&ktspec, timeout))
+ if (compat_get_timespec(&ktspec, timeout))
return -EFAULT;
datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, &ktspec);
- if (datagrams > 0 && put_compat_timespec(&ktspec, timeout))
+ if (datagrams > 0 && compat_put_timespec(&ktspec, timeout))
datagrams = -EFAULT;
return datagrams;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3721db716350..b1b0c8d4d7df 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2420,7 +2420,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine.
*/
-static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
@@ -2495,34 +2495,36 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
}
static netdev_features_t harmonize_features(struct sk_buff *skb,
- netdev_features_t features)
+ const struct net_device *dev,
+ netdev_features_t features)
{
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, skb_network_protocol(skb))) {
features &= ~NETIF_F_ALL_CSUM;
- } else if (illegal_highdma(skb->dev, skb)) {
+ } else if (illegal_highdma(dev, skb)) {
features &= ~NETIF_F_SG;
}
return features;
}
-netdev_features_t netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
+ const struct net_device *dev)
{
__be16 protocol = skb->protocol;
- netdev_features_t features = skb->dev->features;
+ netdev_features_t features = dev->features;
- if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+ if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, features);
+ return harmonize_features(skb, dev, features);
}
- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+ features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD))
@@ -2530,9 +2532,9 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
- return harmonize_features(skb, features);
+ return harmonize_features(skb, dev, features);
}
-EXPORT_SYMBOL(netif_skb_features);
+EXPORT_SYMBOL(netif_skb_dev_features);
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
@@ -2803,7 +2805,7 @@ EXPORT_SYMBOL(dev_loopback_xmit);
* the BH enable code must have IRQs enabled so that it will not deadlock.
* --BLG
*/
-int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
+static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq;
@@ -4637,7 +4639,7 @@ struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev)
}
EXPORT_SYMBOL(netdev_master_upper_dev_get_rcu);
-int netdev_adjacent_sysfs_add(struct net_device *dev,
+static int netdev_adjacent_sysfs_add(struct net_device *dev,
struct net_device *adj_dev,
struct list_head *dev_list)
{
@@ -4647,7 +4649,7 @@ int netdev_adjacent_sysfs_add(struct net_device *dev,
return sysfs_create_link(&(dev->dev.kobj), &(adj_dev->dev.kobj),
linkname);
}
-void netdev_adjacent_sysfs_del(struct net_device *dev,
+static void netdev_adjacent_sysfs_del(struct net_device *dev,
char *name,
struct list_head *dev_list)
{
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index f409e0bd35c0..185c341fafbd 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -745,6 +745,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
attach_rules(&ops->rules_list, dev);
break;
+ case NETDEV_CHANGENAME:
+ list_for_each_entry(ops, &net->rules_ops, list) {
+ detach_rules(&ops->rules_list, dev);
+ attach_rules(&ops->rules_list, dev);
+ }
+ break;
+
case NETDEV_UNREGISTER:
list_for_each_entry(ops, &net->rules_ops, list)
detach_rules(&ops->rules_list, dev);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 87577d447554..e29e810663d7 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -323,17 +323,6 @@ u32 __skb_get_poff(const struct sk_buff *skb)
return poff;
}
-static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
-{
- if (unlikely(queue_index >= dev->real_num_tx_queues)) {
- net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
- dev->name, queue_index,
- dev->real_num_tx_queues);
- return 0;
- }
- return queue_index;
-}
-
static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_XPS
@@ -372,7 +361,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
#endif
}
-u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
{
struct sock *sk = skb->sk;
int queue_index = sk_tx_queue_get(sk);
@@ -392,7 +381,6 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
return queue_index;
}
-EXPORT_SYMBOL(__netdev_pick_tx);
struct netdev_queue *netdev_pick_tx(struct net_device *dev,
struct sk_buff *skb,
@@ -403,13 +391,13 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
if (dev->real_num_tx_queues != 1) {
const struct net_device_ops *ops = dev->netdev_ops;
if (ops->ndo_select_queue)
- queue_index = ops->ndo_select_queue(dev, skb,
- accel_priv);
+ queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+ __netdev_pick_tx);
else
queue_index = __netdev_pick_tx(dev, skb);
if (!accel_priv)
- queue_index = dev_cap_txqueue(dev, queue_index);
+ queue_index = netdev_cap_txqueue(dev, queue_index);
}
skb_set_queue_mapping(skb, queue_index);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b9e9e0d38672..e16129019c66 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -766,9 +766,6 @@ static void neigh_periodic_work(struct work_struct *work)
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
- goto out;
-
/*
* periodically recompute ReachableTime from random function
*/
@@ -781,6 +778,9 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
}
+ if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+ goto out;
+
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
@@ -3046,7 +3046,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (!t)
goto err;
- for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) {
+ for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
t->neigh_vars[i].data += (long) p;
t->neigh_vars[i].extra1 = dev;
t->neigh_vars[i].extra2 = p;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c03f3dec4763..a664f7829a6d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -948,6 +948,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
int ipv6;
+ bool ipversion_set = false;
if (*cur != '@') {
if ((delim = strchr(cur, '@')) == NULL)
@@ -960,6 +961,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
cur++;
if (*cur != '/') {
+ ipversion_set = true;
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
*delim = 0;
@@ -1002,7 +1004,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
if (ipv6 < 0)
goto parse_failed;
- else if (np->ipv6 != (bool)ipv6)
+ else if (ipversion_set && np->ipv6 != (bool)ipv6)
goto parse_failed;
else
np->ipv6 = (bool)ipv6;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 393b1bc9a618..1a0dac2ef9ad 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -374,7 +374,7 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev)
if (!master_dev)
return 0;
ops = master_dev->rtnl_link_ops;
- if (!ops->get_slave_size)
+ if (!ops || !ops->get_slave_size)
return 0;
/* IFLA_INFO_SLAVE_DATA + nested data */
return nla_total_size(sizeof(struct nlattr)) +
@@ -1963,16 +1963,21 @@ replay:
dev->ifindex = ifm->ifi_index;
- if (ops->newlink)
+ if (ops->newlink) {
err = ops->newlink(net, dev, tb, data);
- else
+ /* Drivers should call free_netdev() in ->destructor
+ * and unregister it on failure so that device could be
+ * finally freed in rtnl_unlock.
+ */
+ if (err < 0)
+ goto out;
+ } else {
err = register_netdevice(dev);
-
- if (err < 0) {
- free_netdev(dev);
- goto out;
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
+ }
}
-
err = rtnl_configure_link(dev, ifm);
if (err < 0)
unregister_netdevice(dev);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5976ef0846bd..5d6236d9fdce 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -707,9 +707,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mark = old->mark;
new->skb_iif = old->skb_iif;
__nf_copy(new, old);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- new->nf_trace = old->nf_trace;
-#endif
#ifdef CONFIG_NET_SCHED
new->tc_index = old->tc_index;
#ifdef CONFIG_NET_CLS_ACT
diff --git a/net/core/sock.c b/net/core/sock.c
index 0c127dcdf6a8..5b6a9431b017 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1775,7 +1775,9 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
while (order) {
if (npages >= 1 << order) {
page = alloc_pages(sk->sk_allocation |
- __GFP_COMP | __GFP_NOWARN,
+ __GFP_COMP |
+ __GFP_NOWARN |
+ __GFP_NORETRY,
order);
if (page)
goto fill_page;
@@ -1845,7 +1847,7 @@ bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio)
gfp_t gfp = prio;
if (order)
- gfp |= __GFP_COMP | __GFP_NOWARN;
+ gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
pfrag->page = alloc_pages(gfp, order);
if (likely(pfrag->page)) {
pfrag->offset = 0;
diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c
index c073b81a1f3e..62b5828acde0 100644
--- a/net/dccp/ccids/lib/tfrc.c
+++ b/net/dccp/ccids/lib/tfrc.c
@@ -8,7 +8,7 @@
#include "tfrc.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
-static bool tfrc_debug;
+bool tfrc_debug;
module_param(tfrc_debug, bool, 0644);
MODULE_PARM_DESC(tfrc_debug, "Enable TFRC debug messages");
#endif
diff --git a/net/dccp/ccids/lib/tfrc.h b/net/dccp/ccids/lib/tfrc.h
index a3d8f7c76ae0..40ee7d62b652 100644
--- a/net/dccp/ccids/lib/tfrc.h
+++ b/net/dccp/ccids/lib/tfrc.h
@@ -21,6 +21,7 @@
#include "packet_history.h"
#ifdef CONFIG_IP_DCCP_TFRC_DEBUG
+extern bool tfrc_debug;
#define tfrc_pr_debug(format, a...) DCCP_PR_DEBUG(tfrc_debug, format, ##a)
#else
#define tfrc_pr_debug(format, a...)
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 2954dcbca832..4c04848953bd 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2104,8 +2104,6 @@ static struct notifier_block dn_dev_notifier = {
.notifier_call = dn_device_event,
};
-extern int dn_route_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
-
static struct packet_type dn_dix_packet_type __read_mostly = {
.type = cpu_to_be16(ETH_P_DNA_RT),
.func = dn_route_rcv,
@@ -2353,9 +2351,6 @@ static const struct proto_ops dn_proto_ops = {
.sendpage = sock_no_sendpage,
};
-void dn_register_sysctl(void);
-void dn_unregister_sysctl(void);
-
MODULE_DESCRIPTION("The Linux DECnet Network Protocol");
MODULE_AUTHOR("Linux DECnet Project Team");
MODULE_LICENSE("GPL");
diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c
index 327060c6c874..7ae0d7f6dbd0 100644
--- a/net/hsr/hsr_framereg.c
+++ b/net/hsr/hsr_framereg.c
@@ -297,7 +297,7 @@ static bool seq_nr_after(u16 a, u16 b)
void hsr_register_frame_in(struct node_entry *node, enum hsr_dev_idx dev_idx)
{
- if ((dev_idx < 0) || (dev_idx >= HSR_MAX_DEV)) {
+ if ((dev_idx < 0) || (dev_idx >= HSR_MAX_SLAVE)) {
WARN_ONCE(1, "%s: Invalid dev_idx (%d)\n", __func__, dev_idx);
return;
}
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 48b25c0af4d0..8edfea5da572 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -106,7 +106,6 @@ static int lowpan_header_create(struct sk_buff *skb,
unsigned short type, const void *_daddr,
const void *_saddr, unsigned int len)
{
- struct ipv6hdr *hdr;
const u8 *saddr = _saddr;
const u8 *daddr = _daddr;
struct ieee802154_addr sa, da;
@@ -117,8 +116,6 @@ static int lowpan_header_create(struct sk_buff *skb,
if (type != ETH_P_IPV6)
return 0;
- hdr = ipv6_hdr(skb);
-
if (!saddr)
saddr = dev->dev_addr;
@@ -533,7 +530,27 @@ static struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
+static struct lock_class_key lowpan_tx_busylock;
+static struct lock_class_key lowpan_netdev_xmit_lock_key;
+
+static void lowpan_set_lockdep_class_one(struct net_device *dev,
+ struct netdev_queue *txq,
+ void *_unused)
+{
+ lockdep_set_class(&txq->_xmit_lock,
+ &lowpan_netdev_xmit_lock_key);
+}
+
+
+static int lowpan_dev_init(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
+ dev->qdisc_tx_busylock = &lowpan_tx_busylock;
+ return 0;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
+ .ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_set_mac_address = lowpan_set_address,
};
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ecd2c3f245ce..19ab78aca547 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1296,8 +1296,11 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
segs = ERR_PTR(-EPROTONOSUPPORT);
- /* Note : following gso_segment() might change skb->encapsulation */
- udpfrag = !skb->encapsulation && proto == IPPROTO_UDP;
+ if (skb->encapsulation &&
+ skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ udpfrag = proto == IPPROTO_UDP && encap;
+ else
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
ops = rcu_dereference(inet_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment))
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index ac2dff3c2c1c..bdbf68bb2e2d 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1443,7 +1443,8 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_LOCAL */
+ nla_total_size(4) /* IFA_BROADCAST */
+ nla_total_size(IFNAMSIZ) /* IFA_LABEL */
- + nla_total_size(4); /* IFA_FLAGS */
+ + nla_total_size(4) /* IFA_FLAGS */
+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
static inline u32 cstamp_delta(unsigned long cstamp)
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index bb075fc9a14f..3b01959bf4bb 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -208,7 +208,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
}
work = frag_mem_limit(nf) - nf->low_thresh;
- while (work > 0) {
+ while (work > 0 || force) {
spin_lock(&nf->lru_lock);
if (list_empty(&nf->lru_list)) {
@@ -278,9 +278,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &hb->chain);
+ inet_frag_lru_add(nf, qp);
spin_unlock(&hb->chain_lock);
read_unlock(&f->lock);
- inet_frag_lru_add(nf, qp);
+
return qp;
}
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index e9f1217a8afd..f3869c186d97 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,6 +39,71 @@
#include <net/route.h>
#include <net/xfrm.h>
+static bool ip_may_fragment(const struct sk_buff *skb)
+{
+ return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
+ !skb->local_df;
+}
+
+static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu || skb->local_df)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
+static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
+{
+ unsigned int mtu;
+
+ if (skb->local_df || !skb_is_gso(skb))
+ return false;
+
+ mtu = ip_dst_mtu_maybe_forward(skb_dst(skb), true);
+
+ /* if seglen > mtu, do software segmentation for IP fragmentation on
+ * output. DF bit cannot be set since ip_forward would have sent
+ * icmp error.
+ */
+ return skb_gso_network_seglen(skb) > mtu;
+}
+
+/* called if GSO skb needs to be fragmented on forward */
+static int ip_forward_finish_gso(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ netdev_features_t features;
+ struct sk_buff *segs;
+ int ret = 0;
+
+ features = netif_skb_dev_features(skb, dst->dev);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR(segs)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ consume_skb(skb);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+ err = dst_output(segs);
+
+ if (err && ret == 0)
+ ret = err;
+ segs = nskb;
+ } while (segs);
+
+ return ret;
+}
+
static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen))
ip_forward_options(skb);
+ if (ip_gso_exceeds_dst_mtu(skb))
+ return ip_forward_finish_gso(skb);
+
return dst_output(skb);
}
@@ -91,8 +159,7 @@ int ip_forward(struct sk_buff *skb)
IPCB(skb)->flags |= IPSKB_FORWARDED;
mtu = ip_dst_mtu_maybe_forward(&rt->dst, true);
- if (unlikely(skb->len > mtu && !skb_is_gso(skb) &&
- (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+ if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, mtu)) {
IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(mtu));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 8971780aec7c..73c6b63bba74 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -422,9 +422,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- to->nf_trace = from->nf_trace;
-#endif
#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE)
to->ipvs_property = from->ipvs_property;
#endif
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index bd28f386bd02..78a89e61925d 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -93,83 +93,32 @@ static void tunnel_dst_reset(struct ip_tunnel *t)
tunnel_dst_set(t, NULL);
}
-static void tunnel_dst_reset_all(struct ip_tunnel *t)
+void ip_tunnel_dst_reset_all(struct ip_tunnel *t)
{
int i;
for_each_possible_cpu(i)
__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL);
}
+EXPORT_SYMBOL(ip_tunnel_dst_reset_all);
-static struct dst_entry *tunnel_dst_get(struct ip_tunnel *t)
+static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie)
{
struct dst_entry *dst;
rcu_read_lock();
dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst);
- if (dst)
+ if (dst) {
+ if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
+ rcu_read_unlock();
+ tunnel_dst_reset(t);
+ return NULL;
+ }
dst_hold(dst);
- rcu_read_unlock();
- return dst;
-}
-
-static struct dst_entry *tunnel_dst_check(struct ip_tunnel *t, u32 cookie)
-{
- struct dst_entry *dst = tunnel_dst_get(t);
-
- if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
- tunnel_dst_reset(t);
- return NULL;
- }
-
- return dst;
-}
-
-/* Often modified stats are per cpu, other are shared (netdev->stats) */
-struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
- struct rtnl_link_stats64 *tot)
-{
- int i;
-
- for_each_possible_cpu(i) {
- const struct pcpu_sw_netstats *tstats =
- per_cpu_ptr(dev->tstats, i);
- u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
- unsigned int start;
-
- do {
- start = u64_stats_fetch_begin_bh(&tstats->syncp);
- rx_packets = tstats->rx_packets;
- tx_packets = tstats->tx_packets;
- rx_bytes = tstats->rx_bytes;
- tx_bytes = tstats->tx_bytes;
- } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
-
- tot->rx_packets += rx_packets;
- tot->tx_packets += tx_packets;
- tot->rx_bytes += rx_bytes;
- tot->tx_bytes += tx_bytes;
}
-
- tot->multicast = dev->stats.multicast;
-
- tot->rx_crc_errors = dev->stats.rx_crc_errors;
- tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
- tot->rx_length_errors = dev->stats.rx_length_errors;
- tot->rx_frame_errors = dev->stats.rx_frame_errors;
- tot->rx_errors = dev->stats.rx_errors;
-
- tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
- tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
- tot->tx_dropped = dev->stats.tx_dropped;
- tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
- tot->tx_errors = dev->stats.tx_errors;
-
- tot->collisions = dev->stats.collisions;
-
- return tot;
+ rcu_read_unlock();
+ return (struct rtable *)dst;
}
-EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
__be16 flags, __be32 key)
@@ -584,7 +533,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
struct flowi4 fl4;
u8 tos, ttl;
__be16 df;
- struct rtable *rt = NULL; /* Route to the other host */
+ struct rtable *rt; /* Route to the other host */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst;
int err;
@@ -657,8 +606,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr,
tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link);
- if (connected)
- rt = (struct rtable *)tunnel_dst_check(tunnel, 0);
+ rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL;
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
@@ -766,7 +714,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (set_mtu)
dev->mtu = mtu;
}
- tunnel_dst_reset_all(t);
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
}
@@ -1095,7 +1043,7 @@ void ip_tunnel_uninit(struct net_device *dev)
if (itn->fb_tunnel_dev != dev)
ip_tunnel_del(netdev_priv(dev));
- tunnel_dst_reset_all(tunnel);
+ ip_tunnel_dst_reset_all(tunnel);
}
EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 6156f4ef5e91..6f847dd56dbc 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -108,7 +108,6 @@ int iptunnel_pull_header(struct sk_buff *skb, int hdr_len, __be16 inner_proto)
nf_reset(skb);
secpath_reset(skb);
skb_clear_hash_if_not_l4(skb);
- skb_dst_drop(skb);
skb->vlan_tci = 0;
skb_set_queue_mapping(skb, 0);
skb->pkt_type = PACKET_HOST;
@@ -148,3 +147,49 @@ error:
return ERR_PTR(err);
}
EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
+
+/* Often modified stats are per cpu, other are shared (netdev->stats) */
+struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
+ struct rtnl_link_stats64 *tot)
+{
+ int i;
+
+ for_each_possible_cpu(i) {
+ const struct pcpu_sw_netstats *tstats =
+ per_cpu_ptr(dev->tstats, i);
+ u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
+ unsigned int start;
+
+ do {
+ start = u64_stats_fetch_begin_bh(&tstats->syncp);
+ rx_packets = tstats->rx_packets;
+ tx_packets = tstats->tx_packets;
+ rx_bytes = tstats->rx_bytes;
+ tx_bytes = tstats->tx_bytes;
+ } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
+
+ tot->rx_packets += rx_packets;
+ tot->tx_packets += tx_packets;
+ tot->rx_bytes += rx_bytes;
+ tot->tx_bytes += tx_bytes;
+ }
+
+ tot->multicast = dev->stats.multicast;
+
+ tot->rx_crc_errors = dev->stats.rx_crc_errors;
+ tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
+ tot->rx_length_errors = dev->stats.rx_length_errors;
+ tot->rx_frame_errors = dev->stats.rx_frame_errors;
+ tot->rx_errors = dev->stats.rx_errors;
+
+ tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
+ tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
+ tot->tx_dropped = dev->stats.tx_dropped;
+ tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
+ tot->tx_errors = dev->stats.tx_errors;
+
+ tot->collisions = dev->stats.collisions;
+
+ return tot;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index efa1138fa523..b3e86ea7b71b 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -273,7 +273,7 @@ static int __init ic_open_devs(void)
msleep(1);
- if time_before(jiffies, next_msg)
+ if (time_before(jiffies, next_msg))
continue;
elapsed = jiffies_to_msecs(jiffies - start);
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 81c6910cfa92..a26ce035e3fa 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -61,6 +61,11 @@ config NFT_CHAIN_NAT_IPV4
packet transformations such as the source, destination address and
source and destination ports.
+config NFT_REJECT_IPV4
+ depends on NF_TABLES_IPV4
+ default NFT_REJECT
+ tristate
+
config NF_TABLES_ARP
depends on NF_TABLES
tristate "ARP nf_tables support"
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index c16be9d58420..90b82405331e 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9eea059dd621..574f7ebba0b6 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -229,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
ret = nf_ct_expect_related(rtcp_exp);
if (ret == 0)
break;
- else if (ret != -EBUSY) {
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
nf_ct_unexpect_related(rtp_exp);
nated_port = 0;
break;
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index d551e31b416e..7c676671329d 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1198,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct,
map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
} else {
/* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
}
if (map.from == map.to)
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 000000000000..e79718a382f2
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
+
+void nft_reject_ipv4_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval);
+
+static struct nft_expr_type nft_reject_ipv4_type;
+static const struct nft_expr_ops nft_reject_ipv4_ops = {
+ .type = &nft_reject_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv4_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "reject",
+ .ops = &nft_reject_ipv4_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv4_type);
+}
+
+static void __exit nft_reject_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv4_type);
+}
+
+module_init(nft_reject_ipv4_module_init);
+module_exit(nft_reject_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 25071b48921c..4c011ec69ed4 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1597,6 +1597,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+ RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
@@ -1695,10 +1696,11 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
fl4.daddr = daddr;
fl4.saddr = saddr;
err = fib_lookup(net, &fl4, &res);
- if (err != 0)
+ if (err != 0) {
+ if (!IN_DEV_FORWARD(in_dev))
+ err = -EHOSTUNREACH;
goto no_route;
-
- RT_CACHE_STAT_INC(in_slow_tot);
+ }
if (res.type == RTN_BROADCAST)
goto brd_input;
@@ -1712,8 +1714,10 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
goto local_input;
}
- if (!IN_DEV_FORWARD(in_dev))
+ if (!IN_DEV_FORWARD(in_dev)) {
+ err = -EHOSTUNREACH;
goto no_route;
+ }
if (res.type != RTN_UNICAST)
goto martian_destination;
@@ -1768,6 +1772,7 @@ local_input:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+ RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4475b3bb494d..97c8f5620c43 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1044,7 +1044,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
}
}
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+ int *copied, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
int err, flags;
@@ -1059,11 +1060,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
if (unlikely(tp->fastopen_req == NULL))
return -ENOBUFS;
tp->fastopen_req->data = msg;
+ tp->fastopen_req->size = size;
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
msg->msg_namelen, flags);
- *size = tp->fastopen_req->copied;
+ *copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
return err;
}
@@ -1083,7 +1085,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
flags = msg->msg_flags;
if (flags & MSG_FASTOPEN) {
- err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+ err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
else if (err)
@@ -2229,7 +2231,7 @@ adjudge_to_death:
/* This is a (useful) BSD violating of the RFC. There is a
* problem with TCP as specified in that the other end could
* keep a socket open forever with no application left this end.
- * We use a 3 minute timeout (about the same as BSD) then kill
+ * We use a 1 minute timeout (about the same as BSD) then kill
* our end. If they send after that then tough - BUT: long enough
* that we won't make the old 4*rto = almost no time - whoops
* reset mistake.
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index ad37bf18ae4b..2388275adb9b 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -290,8 +290,7 @@ bool tcp_is_cwnd_limited(const struct sock *sk, u32 in_flight)
left = tp->snd_cwnd - in_flight;
if (sk_can_gso(sk) &&
left * sysctl_tcp_tso_win_divisor < tp->snd_cwnd &&
- left * tp->mss_cache < sk->sk_gso_max_size &&
- left < sk->sk_gso_max_segs)
+ left < tp->xmit_size_goal_segs)
return true;
return left <= tcp_max_tso_deferred_mss(tp);
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 65cf90e063d5..eeaac399420d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -671,6 +671,7 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
{
struct tcp_sock *tp = tcp_sk(sk);
long m = mrtt; /* RTT */
+ u32 srtt = tp->srtt;
/* The following amusing code comes from Jacobson's
* article in SIGCOMM '88. Note that rtt and mdev
@@ -688,11 +689,9 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
* does not matter how to _calculate_ it. Seems, it was trap
* that VJ failed to avoid. 8)
*/
- if (m == 0)
- m = 1;
- if (tp->srtt != 0) {
- m -= (tp->srtt >> 3); /* m is now error in rtt est */
- tp->srtt += m; /* rtt = 7/8 rtt + 1/8 new */
+ if (srtt != 0) {
+ m -= (srtt >> 3); /* m is now error in rtt est */
+ srtt += m; /* rtt = 7/8 rtt + 1/8 new */
if (m < 0) {
m = -m; /* m is now abs(error) */
m -= (tp->mdev >> 2); /* similar update on mdev */
@@ -723,11 +722,12 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
}
} else {
/* no previous measure. */
- tp->srtt = m << 3; /* take the measured time to be rtt */
+ srtt = m << 3; /* take the measured time to be rtt */
tp->mdev = m << 1; /* make sure rto = 3*rtt */
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
tp->rtt_seq = tp->snd_nxt;
}
+ tp->srtt = max(1U, srtt);
}
/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
@@ -746,8 +746,10 @@ static void tcp_update_pacing_rate(struct sock *sk)
rate *= max(tp->snd_cwnd, tp->packets_out);
- /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
- * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
+ /* Correction for small srtt and scheduling constraints.
+ * For small rtt, consider noise is too high, and use
+ * the minimal value (srtt = 1 -> 125 us for HZ=1000)
+ *
* We probably need usec resolution in the future.
* Note: This also takes care of possible srtt=0 case,
* when tcp_rtt_estimator() was not yet called.
@@ -1943,8 +1945,9 @@ void tcp_enter_loss(struct sock *sk, int how)
if (skb == tcp_send_head(sk))
break;
- if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
tp->undo_marker = 0;
+
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 03d26b85eab8..f0eb4e337ec8 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -698,7 +698,8 @@ static void tcp_tsq_handler(struct sock *sk)
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
- tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
+ tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ 0, GFP_ATOMIC);
}
/*
* One tasklet per cpu tries to send more skbs.
@@ -863,8 +864,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
fclone->fclone == SKB_FCLONE_CLONE))
- NET_INC_STATS_BH(sock_net(sk),
- LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
+ NET_INC_STATS(sock_net(sk),
+ LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
@@ -1904,7 +1905,15 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- break;
+ /* It is possible TX completion already happened
+ * before we set TSQ_THROTTLED, so we must
+ * test again the condition.
+ * We abuse smp_mb__after_clear_bit() because
+ * there is no smp_mb__after_set_bit() yet
+ */
+ smp_mb__after_clear_bit();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ break;
}
limit = mss_now;
@@ -1977,7 +1986,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
- if (sysctl_tcp_early_retrans < 3 || !rtt || !tp->packets_out ||
+ if (sysctl_tcp_early_retrans < 3 || !tp->srtt || !tp->packets_out ||
!tcp_is_sack(tp) || inet_csk(sk)->icsk_ca_state != TCP_CA_Open)
return false;
@@ -2328,6 +2337,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned int cur_mss;
+ int err;
/* Inconslusive MTU probe */
if (icsk->icsk_mtup.probe_size) {
@@ -2391,11 +2401,15 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
skb_headroom(skb) >= 0xFFFF)) {
struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
GFP_ATOMIC);
- return nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
- -ENOBUFS;
+ err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+ -ENOBUFS;
} else {
- return tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+ err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
}
+
+ if (likely(!err))
+ TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS;
+ return err;
}
int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
@@ -2899,7 +2913,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
- syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
+ space = min_t(size_t, space, fo->size);
+
+ /* limit to order-0 allocations */
+ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
+
+ syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
sk->sk_allocation);
if (syn_data == NULL)
goto fallback;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 25f5cee3a08a..88b4023ecfcf 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -17,6 +17,8 @@
static DEFINE_SPINLOCK(udp_offload_lock);
static struct udp_offload_priv __rcu *udp_offload_base __read_mostly;
+#define udp_deref_protected(X) rcu_dereference_protected(X, lockdep_is_held(&udp_offload_lock))
+
struct udp_offload_priv {
struct udp_offload *offload;
struct rcu_head rcu;
@@ -100,8 +102,7 @@ out:
int udp_add_offload(struct udp_offload *uo)
{
- struct udp_offload_priv __rcu **head = &udp_offload_base;
- struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_KERNEL);
+ struct udp_offload_priv *new_offload = kzalloc(sizeof(*new_offload), GFP_ATOMIC);
if (!new_offload)
return -ENOMEM;
@@ -109,8 +110,8 @@ int udp_add_offload(struct udp_offload *uo)
new_offload->offload = uo;
spin_lock(&udp_offload_lock);
- rcu_assign_pointer(new_offload->next, rcu_dereference(*head));
- rcu_assign_pointer(*head, new_offload);
+ new_offload->next = udp_offload_base;
+ rcu_assign_pointer(udp_offload_base, new_offload);
spin_unlock(&udp_offload_lock);
return 0;
@@ -130,12 +131,12 @@ void udp_del_offload(struct udp_offload *uo)
spin_lock(&udp_offload_lock);
- uo_priv = rcu_dereference(*head);
+ uo_priv = udp_deref_protected(*head);
for (; uo_priv != NULL;
- uo_priv = rcu_dereference(*head)) {
-
+ uo_priv = udp_deref_protected(*head)) {
if (uo_priv->offload == uo) {
- rcu_assign_pointer(*head, rcu_dereference(uo_priv->next));
+ rcu_assign_pointer(*head,
+ udp_deref_protected(uo_priv->next));
goto unlock;
}
head = &uo_priv->next;
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index d92e5586783e..438a73aa777c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -138,6 +138,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION
config IPV6_VTI
tristate "Virtual (secure) IPv6: tunneling"
select IPV6_TUNNEL
+ select NET_IP_TUNNEL
depends on INET6_XFRM_MODE_TUNNEL
---help---
Tunneling means encapsulating data of one protocol type within
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ad235690684c..fdbfeca36d63 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2783,6 +2783,8 @@ static void addrconf_gre_config(struct net_device *dev)
ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))
addrconf_add_linklocal(idev, &addr);
+ else
+ addrconf_prefix_route(&addr, 64, dev, 0, 0);
}
#endif
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 140748debc4a..8af3eb57f438 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
found = (nexthdr == target);
if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0)
+ if (target < 0 || found)
break;
return -ENOENT;
}
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index cf77f3abfd06..447a7fbd1bb6 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void)
int ret;
ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
- if (!ret)
+ if (ret)
goto out;
ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
- if (!ret)
+ if (ret)
goto out_rt;
out:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index f81f59686f21..f2610e157660 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -414,7 +414,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
addr_type = ipv6_addr_type(&hdr->daddr);
if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
- ipv6_anycast_destination(skb))
+ ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
saddr = &hdr->daddr;
/*
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 1e8683b135bb..59f95affceb0 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -89,7 +89,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
unsigned int unfrag_ip6hlen;
u8 *prevhdr;
int offset = 0;
- bool tunnel;
+ bool encap, udpfrag;
int nhoff;
if (unlikely(skb_shinfo(skb)->gso_type &
@@ -110,8 +110,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
goto out;
- tunnel = SKB_GSO_CB(skb)->encap_level > 0;
- if (tunnel)
+ encap = SKB_GSO_CB(skb)->encap_level > 0;
+ if (encap)
features = skb->dev->hw_enc_features & netif_skb_features(skb);
SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h);
@@ -121,6 +121,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr);
+ if (skb->encapsulation &&
+ skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP))
+ udpfrag = proto == IPPROTO_UDP && encap;
+ else
+ udpfrag = proto == IPPROTO_UDP && !skb->encapsulation;
+
ops = rcu_dereference(inet6_offloads[proto]);
if (likely(ops && ops->callbacks.gso_segment)) {
skb_reset_transport_header(skb);
@@ -133,13 +139,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
for (skb = segs; skb; skb = skb->next) {
ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h));
- if (tunnel) {
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
- }
skb->network_header = (u8 *)ipv6h - skb->head;
- if (!tunnel && proto == IPPROTO_UDP) {
+ if (udpfrag) {
unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr);
fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);
fptr->frag_off = htons(offset);
@@ -148,6 +150,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
offset += (ntohs(ipv6h->payload_len) -
sizeof(struct frag_hdr));
}
+ if (encap)
+ skb_reset_inner_headers(skb);
}
out:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ef02b26ccf81..16f91a2e7888 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -342,6 +342,20 @@ static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst)
return mtu;
}
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu || skb->local_df)
+ return false;
+
+ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+ return true;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -466,8 +480,7 @@ int ip6_forward(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
- (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
+ if (ip6_pkt_too_big(skb, mtu)) {
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -517,9 +530,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->tc_index = from->tc_index;
#endif
nf_copy(to, from);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- to->nf_trace = from->nf_trace;
-#endif
skb_copy_secmark(to, from);
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 35750df744dc..4bff1f297e39 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -50,6 +50,11 @@ config NFT_CHAIN_NAT_IPV6
packet transformations such as the source, destination address and
source and destination ports.
+config NFT_REJECT_IPV6
+ depends on NF_TABLES_IPV6
+ default NFT_REJECT
+ tristate
+
config IP6_NF_IPTABLES
tristate "IP6 tables support (required for filtering)"
depends on INET && IPV6
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d1b4928f34f7..70d3dd66f2cd 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o
obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
+obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
# matches
obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o
diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c
new file mode 100644
index 000000000000..0bc19fa87821
--- /dev/null
+++ b/net/ipv6/netfilter/nft_reject_ipv6.c
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+#include <net/netfilter/ipv6/nf_reject.h>
+
+void nft_reject_ipv6_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+ struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach6(net, pkt->skb, priv->icmp_code,
+ pkt->ops->hooknum);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval);
+
+static struct nft_expr_type nft_reject_ipv6_type;
+static const struct nft_expr_ops nft_reject_ipv6_ops = {
+ .type = &nft_reject_ipv6_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv6_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv6_type __read_mostly = {
+ .family = NFPROTO_IPV6,
+ .name = "reject",
+ .ops = &nft_reject_ipv6_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv6_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv6_type);
+}
+
+static void __exit nft_reject_ipv6_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv6_type);
+}
+
+module_init(nft_reject_ipv6_module_init);
+module_exit(nft_reject_ipv6_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject");
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index fb9beb78f00b..587bbdcb22b4 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -135,6 +135,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
fl6.flowi6_proto = IPPROTO_ICMPV6;
fl6.saddr = np->saddr;
fl6.daddr = *daddr;
+ fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_icmp_type = user_icmph.icmp6_type;
fl6.fl6_icmp_code = user_icmph.icmp6_code;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 11dac21e6586..fba54a407bb2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1513,7 +1513,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
+ rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
if (!rt) {
err = -ENOMEM;
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 3dfbcf1dcb1c..b4d74c86586c 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -475,6 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev)
ipip6_tunnel_unlink(sitn, tunnel);
ipip6_tunnel_del_prl(tunnel, NULL);
}
+ ip_tunnel_dst_reset_all(tunnel);
dev_put(dev);
}
@@ -1082,6 +1083,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)
t->parms.link = p->link;
ipip6_tunnel_bind_dev(t->dev);
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
}
@@ -1112,6 +1114,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
t->ip6rd.relay_prefix = relay_prefix;
t->ip6rd.prefixlen = ip6rd->prefixlen;
t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen;
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(t->dev);
return 0;
}
@@ -1271,6 +1274,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
break;
}
+ ip_tunnel_dst_reset_all(t);
netdev_state_change(dev);
break;
@@ -1326,6 +1330,9 @@ static const struct net_device_ops ipip6_netdev_ops = {
static void ipip6_dev_free(struct net_device *dev)
{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ free_percpu(tunnel->dst_cache);
free_percpu(dev->tstats);
free_netdev(dev);
}
@@ -1375,6 +1382,12 @@ static int ipip6_tunnel_init(struct net_device *dev)
u64_stats_init(&ipip6_tunnel_stats->syncp);
}
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
+ }
+
return 0;
}
@@ -1405,6 +1418,12 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)
u64_stats_init(&ipip6_fb_stats->syncp);
}
+ tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst);
+ if (!tunnel->dst_cache) {
+ free_percpu(dev->tstats);
+ return -ENOMEM;
+ }
+
dev_hold(dev);
rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);
return 0;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e7359f9eaa8d..b261ee8b83fc 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
fptr->nexthdr = nexthdr;
fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;
/* Fragment the skb. ipv6 header and the remaining fields of the
* fragment header are updated in ipv6_gso_segment()
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 994e28bfb32e..00b2a6d1c009 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -52,18 +52,12 @@
#include <net/p8022.h>
#include <net/psnap.h>
#include <net/sock.h>
+#include <net/datalink.h>
#include <net/tcp_states.h>
+#include <net/net_namespace.h>
#include <asm/uaccess.h>
-#ifdef CONFIG_SYSCTL
-extern void ipx_register_sysctl(void);
-extern void ipx_unregister_sysctl(void);
-#else
-#define ipx_register_sysctl()
-#define ipx_unregister_sysctl()
-#endif
-
/* Configuration Variables */
static unsigned char ipxcfg_max_hops = 16;
static char ipxcfg_auto_select_primary;
@@ -84,15 +78,6 @@ DEFINE_SPINLOCK(ipx_interfaces_lock);
struct ipx_interface *ipx_primary_net;
struct ipx_interface *ipx_internal_net;
-extern int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
- unsigned char *node);
-extern void ipxrtr_del_routes(struct ipx_interface *intrfc);
-extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
- struct iovec *iov, size_t len, int noblock);
-extern int ipxrtr_route_skb(struct sk_buff *skb);
-extern struct ipx_route *ipxrtr_lookup(__be32 net);
-extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
-
struct ipx_interface *ipx_interfaces_head(void)
{
struct ipx_interface *rc = NULL;
@@ -1986,9 +1971,6 @@ static struct notifier_block ipx_dev_notifier = {
.notifier_call = ipxitf_device_event,
};
-extern struct datalink_proto *make_EII_client(void);
-extern void destroy_EII_client(struct datalink_proto *);
-
static const unsigned char ipx_8022_type = 0xE0;
static const unsigned char ipx_snap_id[5] = { 0x0, 0x0, 0x0, 0x81, 0x37 };
static const char ipx_EII_err_msg[] __initconst =
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 30f4519b092f..c1f03185c5e1 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -20,15 +20,11 @@ DEFINE_RWLOCK(ipx_routes_lock);
extern struct ipx_interface *ipx_internal_net;
-extern __be16 ipx_cksum(struct ipxhdr *packet, int length);
extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
struct sk_buff *skb, int copy);
extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
struct sk_buff *skb, int copy);
-extern int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb,
- char *node);
-extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
struct ipx_route *ipxrtr_lookup(__be32 net)
{
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 735d0f60c83a..85d9d94c0a3c 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -112,7 +112,6 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
-static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
@@ -1863,7 +1862,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_delete);
/* We come here whenever a session's send_seq, cookie_len or
* l2specific_len parameters are set.
*/
-static void l2tp_session_set_header_len(struct l2tp_session *session, int version)
+void l2tp_session_set_header_len(struct l2tp_session *session, int version)
{
if (version == L2TP_HDR_VER_2) {
session->hdr_len = 6;
@@ -1876,6 +1875,7 @@ static void l2tp_session_set_header_len(struct l2tp_session *session, int versio
}
}
+EXPORT_SYMBOL_GPL(l2tp_session_set_header_len);
struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
{
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 1f01ba3435bc..3f93ccd6ba97 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -263,6 +263,7 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
int length, int (*payload_hook)(struct sk_buff *skb));
int l2tp_session_queue_purge(struct l2tp_session *session);
int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
+void l2tp_session_set_header_len(struct l2tp_session *session, int version);
int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb,
int hdr_len);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 4cfd722e9153..bd7387adea9e 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -578,8 +578,10 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
if (info->attrs[L2TP_ATTR_RECV_SEQ])
session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
- if (info->attrs[L2TP_ATTR_SEND_SEQ])
+ if (info->attrs[L2TP_ATTR_SEND_SEQ]) {
session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+ l2tp_session_set_header_len(session, session->tunnel->version);
+ }
if (info->attrs[L2TP_ATTR_LNS_MODE])
session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index be5fadf34739..5990919356a5 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -254,12 +254,14 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int
po = pppox_sk(sk);
ppp_input(&po->chan, skb);
} else {
- l2tp_info(session, PPPOL2TP_MSG_DATA, "%s: socket not bound\n",
- session->name);
+ l2tp_dbg(session, PPPOL2TP_MSG_DATA,
+ "%s: recv %d byte data frame, passing to L2TP socket\n",
+ session->name, data_len);
- /* Not bound. Nothing we can do, so discard. */
- atomic_long_inc(&session->stats.rx_errors);
- kfree_skb(skb);
+ if (sock_queue_rcv_skb(sk, skb) < 0) {
+ atomic_long_inc(&session->stats.rx_errors);
+ kfree_skb(skb);
+ }
}
return;
@@ -1312,6 +1314,7 @@ static int pppol2tp_session_setsockopt(struct sock *sk,
po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
}
+ l2tp_session_set_header_len(session, session->tunnel->version);
l2tp_info(session, PPPOL2TP_MSG_CONTROL,
"%s: set send_seq=%d\n",
session->name, session->send_seq);
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d6d1f1df9119..ce1c44370610 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1057,7 +1057,8 @@ static void ieee80211_uninit(struct net_device *dev)
static u16 ieee80211_netdev_select_queue(struct net_device *dev,
struct sk_buff *skb,
- void *accel_priv)
+ void *accel_priv,
+ select_queue_fallback_t fallback)
{
return ieee80211_select_queue(IEEE80211_DEV_TO_SUB_IF(dev), skb);
}
@@ -1075,7 +1076,8 @@ static const struct net_device_ops ieee80211_dataif_ops = {
static u16 ieee80211_monitor_select_queue(struct net_device *dev,
struct sk_buff *skb,
- void *accel_priv)
+ void *accel_priv,
+ select_queue_fallback_t fallback)
{
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = sdata->local;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index c37467562fd0..e9410d17619d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -513,7 +513,6 @@ config NFT_QUEUE
config NFT_REJECT
depends on NF_TABLES
- depends on NF_TABLES_IPV6 || !NF_TABLES_IPV6
default m if NETFILTER_ADVANCED=n
tristate "Netfilter nf_tables reject support"
help
@@ -521,6 +520,11 @@ config NFT_REJECT
explicitly deny and notify via TCP reset/ICMP informational errors
unallowed traffic.
+config NFT_REJECT_INET
+ depends on NF_TABLES_INET
+ default NFT_REJECT
+ tristate
+
config NFT_COMPAT
depends on NF_TABLES
depends on NETFILTER_XTABLES
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index ee9c4de5f8ed..bffdad774da7 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -79,6 +79,7 @@ obj-$(CONFIG_NFT_LIMIT) += nft_limit.o
obj-$(CONFIG_NFT_NAT) += nft_nat.o
obj-$(CONFIG_NFT_QUEUE) += nft_queue.o
obj-$(CONFIG_NFT_REJECT) += nft_reject.o
+obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o
obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o
obj-$(CONFIG_NFT_HASH) += nft_hash.o
obj-$(CONFIG_NFT_COUNTER) += nft_counter.o
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 59a1a85bcb3e..a8eb0a89326a 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -871,11 +871,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p,
cp->protocol = p->protocol;
ip_vs_addr_set(p->af, &cp->caddr, p->caddr);
cp->cport = p->cport;
- ip_vs_addr_set(p->af, &cp->vaddr, p->vaddr);
- cp->vport = p->vport;
- /* proto should only be IPPROTO_IP if d_addr is a fwmark */
+ /* proto should only be IPPROTO_IP if p->vaddr is a fwmark */
ip_vs_addr_set(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af,
- &cp->daddr, daddr);
+ &cp->vaddr, p->vaddr);
+ cp->vport = p->vport;
+ ip_vs_addr_set(p->af, &cp->daddr, daddr);
cp->dport = dport;
cp->flags = flags;
cp->fwmark = fwmark;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 8824ed0ccc9c..356bef519fe5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -312,6 +312,21 @@ static void death_by_timeout(unsigned long ul_conntrack)
nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0);
}
+static inline bool
+nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
+ const struct nf_conntrack_tuple *tuple,
+ u16 zone)
+{
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
+
+ /* A conntrack can be recreated with the equal tuple,
+ * so we need to check that the conntrack is confirmed
+ */
+ return nf_ct_tuple_equal(tuple, &h->tuple) &&
+ nf_ct_zone(ct) == zone &&
+ nf_ct_is_confirmed(ct);
+}
+
/*
* Warning :
* - Caller must take a reference on returned object
@@ -333,8 +348,7 @@ ____nf_conntrack_find(struct net *net, u16 zone,
local_bh_disable();
begin:
hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[bucket], hnnode) {
- if (nf_ct_tuple_equal(tuple, &h->tuple) &&
- nf_ct_zone(nf_ct_tuplehash_to_ctrack(h)) == zone) {
+ if (nf_ct_key_equal(h, tuple, zone)) {
NF_CT_STAT_INC(net, found);
local_bh_enable();
return h;
@@ -372,8 +386,7 @@ begin:
!atomic_inc_not_zero(&ct->ct_general.use)))
h = NULL;
else {
- if (unlikely(!nf_ct_tuple_equal(tuple, &h->tuple) ||
- nf_ct_zone(ct) != zone)) {
+ if (unlikely(!nf_ct_key_equal(h, tuple, zone))) {
nf_ct_put(ct);
goto begin;
}
@@ -435,7 +448,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
goto out;
add_timer(&ct->timeout);
- nf_conntrack_get(&ct->ct_general);
+ smp_wmb();
+ /* The caller holds a reference to this object */
+ atomic_set(&ct->ct_general.use, 2);
__nf_conntrack_hash_insert(ct, hash, repl_hash);
NF_CT_STAT_INC(net, insert);
spin_unlock_bh(&nf_conntrack_lock);
@@ -449,6 +464,21 @@ out:
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
+/* deletion from this larval template list happens via nf_ct_put() */
+void nf_conntrack_tmpl_insert(struct net *net, struct nf_conn *tmpl)
+{
+ __set_bit(IPS_TEMPLATE_BIT, &tmpl->status);
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+ nf_conntrack_get(&tmpl->ct_general);
+
+ spin_lock_bh(&nf_conntrack_lock);
+ /* Overload tuple linked list to put us in template list. */
+ hlist_nulls_add_head_rcu(&tmpl->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
+ &net->ct.tmpl);
+ spin_unlock_bh(&nf_conntrack_lock);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_tmpl_insert);
+
/* Confirm a connection given skb; places it in hash table */
int
__nf_conntrack_confirm(struct sk_buff *skb)
@@ -720,11 +750,10 @@ __nf_conntrack_alloc(struct net *net, u16 zone,
nf_ct_zone->id = zone;
}
#endif
- /*
- * changes to lookup keys must be done before setting refcnt to 1
+ /* Because we use RCU lookups, we set ct_general.use to zero before
+ * this is inserted in any list.
*/
- smp_wmb();
- atomic_set(&ct->ct_general.use, 1);
+ atomic_set(&ct->ct_general.use, 0);
return ct;
#ifdef CONFIG_NF_CONNTRACK_ZONES
@@ -748,6 +777,11 @@ void nf_conntrack_free(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
+ /* A freed object has refcnt == 0, that's
+ * the golden rule for SLAB_DESTROY_BY_RCU
+ */
+ NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0);
+
nf_ct_ext_destroy(ct);
nf_ct_ext_free(ct);
kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
@@ -843,6 +877,9 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
NF_CT_STAT_INC(net, new);
}
+ /* Now it is inserted into the unconfirmed list, bump refcount */
+ nf_conntrack_get(&ct->ct_general);
+
/* Overload tuple linked list to put us in unconfirmed list. */
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.unconfirmed);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index bb322d0beb48..b9f0e0374322 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1310,27 +1310,22 @@ ctnetlink_change_status(struct nf_conn *ct, const struct nlattr * const cda[])
}
static int
-ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
+ctnetlink_setup_nat(struct nf_conn *ct, const struct nlattr * const cda[])
{
#ifdef CONFIG_NF_NAT_NEEDED
int ret;
- if (cda[CTA_NAT_DST]) {
- ret = ctnetlink_parse_nat_setup(ct,
- NF_NAT_MANIP_DST,
- cda[CTA_NAT_DST]);
- if (ret < 0)
- return ret;
- }
- if (cda[CTA_NAT_SRC]) {
- ret = ctnetlink_parse_nat_setup(ct,
- NF_NAT_MANIP_SRC,
- cda[CTA_NAT_SRC]);
- if (ret < 0)
- return ret;
- }
- return 0;
+ ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_DST,
+ cda[CTA_NAT_DST]);
+ if (ret < 0)
+ return ret;
+
+ ret = ctnetlink_parse_nat_setup(ct, NF_NAT_MANIP_SRC,
+ cda[CTA_NAT_SRC]);
+ return ret;
#else
+ if (!cda[CTA_NAT_DST] && !cda[CTA_NAT_SRC])
+ return 0;
return -EOPNOTSUPP;
#endif
}
@@ -1659,11 +1654,9 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,
goto err2;
}
- if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) {
- err = ctnetlink_change_nat(ct, cda);
- if (err < 0)
- goto err2;
- }
+ err = ctnetlink_setup_nat(ct, cda);
+ if (err < 0)
+ goto err2;
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
nf_ct_tstamp_ext_add(ct, GFP_ATOMIC);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index d3f5cd6dd962..52ca952b802c 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -432,15 +432,15 @@ nf_nat_setup_info(struct nf_conn *ct,
}
EXPORT_SYMBOL(nf_nat_setup_info);
-unsigned int
-nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+static unsigned int
+__nf_nat_alloc_null_binding(struct nf_conn *ct, enum nf_nat_manip_type manip)
{
/* Force range to this IP; let proto decide mapping for
* per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
* Use reply in case it's already been mangled (eg local packet).
*/
union nf_inet_addr ip =
- (HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ (manip == NF_NAT_MANIP_SRC ?
ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3 :
ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3);
struct nf_nat_range range = {
@@ -448,7 +448,13 @@ nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
.min_addr = ip,
.max_addr = ip,
};
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+ return nf_nat_setup_info(ct, &range, manip);
+}
+
+unsigned int
+nf_nat_alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ return __nf_nat_alloc_null_binding(ct, HOOK2MANIP(hooknum));
}
EXPORT_SYMBOL_GPL(nf_nat_alloc_null_binding);
@@ -702,9 +708,9 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
static int
nfnetlink_parse_nat(const struct nlattr *nat,
- const struct nf_conn *ct, struct nf_nat_range *range)
+ const struct nf_conn *ct, struct nf_nat_range *range,
+ const struct nf_nat_l3proto *l3proto)
{
- const struct nf_nat_l3proto *l3proto;
struct nlattr *tb[CTA_NAT_MAX+1];
int err;
@@ -714,38 +720,46 @@ nfnetlink_parse_nat(const struct nlattr *nat,
if (err < 0)
return err;
- rcu_read_lock();
- l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
- if (l3proto == NULL) {
- err = -EAGAIN;
- goto out;
- }
err = l3proto->nlattr_to_range(tb, range);
if (err < 0)
- goto out;
+ return err;
if (!tb[CTA_NAT_PROTO])
- goto out;
+ return 0;
- err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
-out:
- rcu_read_unlock();
- return err;
+ return nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
}
+/* This function is called under rcu_read_lock() */
static int
nfnetlink_parse_nat_setup(struct nf_conn *ct,
enum nf_nat_manip_type manip,
const struct nlattr *attr)
{
struct nf_nat_range range;
+ const struct nf_nat_l3proto *l3proto;
int err;
- err = nfnetlink_parse_nat(attr, ct, &range);
+ /* Should not happen, restricted to creating new conntracks
+ * via ctnetlink.
+ */
+ if (WARN_ON_ONCE(nf_nat_initialized(ct, manip)))
+ return -EEXIST;
+
+ /* Make sure that L3 NAT is there by when we call nf_nat_setup_info to
+ * attach the null binding, otherwise this may oops.
+ */
+ l3proto = __nf_nat_l3proto_find(nf_ct_l3num(ct));
+ if (l3proto == NULL)
+ return -EAGAIN;
+
+ /* No NAT information has been passed, allocate the null-binding */
+ if (attr == NULL)
+ return __nf_nat_alloc_null_binding(ct, manip);
+
+ err = nfnetlink_parse_nat(attr, ct, &range, l3proto);
if (err < 0)
return err;
- if (nf_nat_initialized(ct, manip))
- return -EEXIST;
return nf_nat_setup_info(ct, &range, manip);
}
diff --git a/net/netfilter/nf_synproxy_core.c b/net/netfilter/nf_synproxy_core.c
index 9858e3e51a3a..52e20c9a46a5 100644
--- a/net/netfilter/nf_synproxy_core.c
+++ b/net/netfilter/nf_synproxy_core.c
@@ -363,9 +363,8 @@ static int __net_init synproxy_net_init(struct net *net)
goto err2;
if (!nfct_synproxy_ext_add(ct))
goto err2;
- __set_bit(IPS_TEMPLATE_BIT, &ct->status);
- __set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ nf_conntrack_tmpl_insert(net, ct);
snet->tmpl = ct;
snet->stats = alloc_percpu(struct synproxy_stats);
@@ -390,7 +389,7 @@ static void __net_exit synproxy_net_exit(struct net *net)
{
struct synproxy_net *snet = synproxy_pernet(net);
- nf_conntrack_free(snet->tmpl);
+ nf_ct_put(snet->tmpl);
synproxy_proc_exit(net);
free_percpu(snet->stats);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 117bbaaddde6..adce01e8bb57 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1008,10 +1008,8 @@ notify:
return 0;
}
-static void nf_tables_rcu_chain_destroy(struct rcu_head *head)
+static void nf_tables_chain_destroy(struct nft_chain *chain)
{
- struct nft_chain *chain = container_of(head, struct nft_chain, rcu_head);
-
BUG_ON(chain->use > 0);
if (chain->flags & NFT_BASE_CHAIN) {
@@ -1045,7 +1043,7 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
if (IS_ERR(chain))
return PTR_ERR(chain);
- if (!list_empty(&chain->rules))
+ if (!list_empty(&chain->rules) || chain->use > 0)
return -EBUSY;
list_del(&chain->list);
@@ -1059,7 +1057,9 @@ static int nf_tables_delchain(struct sock *nlsk, struct sk_buff *skb,
family);
/* Make sure all rule references are gone before this is released */
- call_rcu(&chain->rcu_head, nf_tables_rcu_chain_destroy);
+ synchronize_rcu();
+
+ nf_tables_chain_destroy(chain);
return 0;
}
@@ -1114,35 +1114,45 @@ void nft_unregister_expr(struct nft_expr_type *type)
}
EXPORT_SYMBOL_GPL(nft_unregister_expr);
-static const struct nft_expr_type *__nft_expr_type_get(struct nlattr *nla)
+static const struct nft_expr_type *__nft_expr_type_get(u8 family,
+ struct nlattr *nla)
{
const struct nft_expr_type *type;
list_for_each_entry(type, &nf_tables_expressions, list) {
- if (!nla_strcmp(nla, type->name))
+ if (!nla_strcmp(nla, type->name) &&
+ (!type->family || type->family == family))
return type;
}
return NULL;
}
-static const struct nft_expr_type *nft_expr_type_get(struct nlattr *nla)
+static const struct nft_expr_type *nft_expr_type_get(u8 family,
+ struct nlattr *nla)
{
const struct nft_expr_type *type;
if (nla == NULL)
return ERR_PTR(-EINVAL);
- type = __nft_expr_type_get(nla);
+ type = __nft_expr_type_get(family, nla);
if (type != NULL && try_module_get(type->owner))
return type;
#ifdef CONFIG_MODULES
if (type == NULL) {
nfnl_unlock(NFNL_SUBSYS_NFTABLES);
+ request_module("nft-expr-%u-%.*s", family,
+ nla_len(nla), (char *)nla_data(nla));
+ nfnl_lock(NFNL_SUBSYS_NFTABLES);
+ if (__nft_expr_type_get(family, nla))
+ return ERR_PTR(-EAGAIN);
+
+ nfnl_unlock(NFNL_SUBSYS_NFTABLES);
request_module("nft-expr-%.*s",
nla_len(nla), (char *)nla_data(nla));
nfnl_lock(NFNL_SUBSYS_NFTABLES);
- if (__nft_expr_type_get(nla))
+ if (__nft_expr_type_get(family, nla))
return ERR_PTR(-EAGAIN);
}
#endif
@@ -1193,7 +1203,7 @@ static int nf_tables_expr_parse(const struct nft_ctx *ctx,
if (err < 0)
return err;
- type = nft_expr_type_get(tb[NFTA_EXPR_NAME]);
+ type = nft_expr_type_get(ctx->afi->family, tb[NFTA_EXPR_NAME]);
if (IS_ERR(type))
return PTR_ERR(type);
@@ -1521,9 +1531,8 @@ err:
return err;
}
-static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
+static void nf_tables_rule_destroy(struct nft_rule *rule)
{
- struct nft_rule *rule = container_of(head, struct nft_rule, rcu_head);
struct nft_expr *expr;
/*
@@ -1538,11 +1547,6 @@ static void nf_tables_rcu_rule_destroy(struct rcu_head *head)
kfree(rule);
}
-static void nf_tables_rule_destroy(struct nft_rule *rule)
-{
- call_rcu(&rule->rcu_head, nf_tables_rcu_rule_destroy);
-}
-
#define NFT_RULE_MAXEXPRS 128
static struct nft_expr_info *info;
@@ -1809,9 +1813,6 @@ static int nf_tables_commit(struct sk_buff *skb)
synchronize_rcu();
list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- /* Delete this rule from the dirty list */
- list_del(&rupd->list);
-
/* This rule was inactive in the past and just became active.
* Clear the next bit of the genmask since its meaning has
* changed, now it is the future.
@@ -1822,6 +1823,7 @@ static int nf_tables_commit(struct sk_buff *skb)
rupd->chain, rupd->rule,
NFT_MSG_NEWRULE, 0,
rupd->family);
+ list_del(&rupd->list);
kfree(rupd);
continue;
}
@@ -1831,7 +1833,15 @@ static int nf_tables_commit(struct sk_buff *skb)
nf_tables_rule_notify(skb, rupd->nlh, rupd->table, rupd->chain,
rupd->rule, NFT_MSG_DELRULE, 0,
rupd->family);
+ }
+
+ /* Make sure we don't see any packet traversing old rules */
+ synchronize_rcu();
+
+ /* Now we can safely release unused old rules */
+ list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
nf_tables_rule_destroy(rupd->rule);
+ list_del(&rupd->list);
kfree(rupd);
}
@@ -1844,20 +1854,26 @@ static int nf_tables_abort(struct sk_buff *skb)
struct nft_rule_trans *rupd, *tmp;
list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
- /* Delete all rules from the dirty list */
- list_del(&rupd->list);
-
if (!nft_rule_is_active_next(net, rupd->rule)) {
nft_rule_clear(net, rupd->rule);
+ list_del(&rupd->list);
kfree(rupd);
continue;
}
/* This rule is inactive, get rid of it */
list_del_rcu(&rupd->rule->list);
+ }
+
+ /* Make sure we don't see any packet accessing aborted rules */
+ synchronize_rcu();
+
+ list_for_each_entry_safe(rupd, tmp, &net->nft.commit_list, list) {
nf_tables_rule_destroy(rupd->rule);
+ list_del(&rupd->list);
kfree(rupd);
}
+
return 0;
}
@@ -1943,6 +1959,9 @@ static int nft_ctx_init_from_setattr(struct nft_ctx *ctx,
}
if (nla[NFTA_SET_TABLE] != NULL) {
+ if (afi == NULL)
+ return -EAFNOSUPPORT;
+
table = nf_tables_table_lookup(afi, nla[NFTA_SET_TABLE]);
if (IS_ERR(table))
return PTR_ERR(table);
@@ -1989,13 +2008,13 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
if (!sscanf(i->name, name, &tmp))
continue;
- if (tmp < 0 || tmp > BITS_PER_LONG * PAGE_SIZE)
+ if (tmp < 0 || tmp >= BITS_PER_BYTE * PAGE_SIZE)
continue;
set_bit(tmp, inuse);
}
- n = find_first_zero_bit(inuse, BITS_PER_LONG * PAGE_SIZE);
+ n = find_first_zero_bit(inuse, BITS_PER_BYTE * PAGE_SIZE);
free_page((unsigned long)inuse);
}
@@ -2428,6 +2447,8 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
struct nft_ctx ctx;
int err;
+ if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
+ return -EAFNOSUPPORT;
if (nla[NFTA_SET_TABLE] == NULL)
return -EINVAL;
@@ -2435,9 +2456,6 @@ static int nf_tables_delset(struct sock *nlsk, struct sk_buff *skb,
if (err < 0)
return err;
- if (nfmsg->nfgen_family == NFPROTO_UNSPEC)
- return -EAFNOSUPPORT;
-
set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]);
if (IS_ERR(set))
return PTR_ERR(set);
@@ -2723,6 +2741,9 @@ static int nft_add_set_elem(const struct nft_ctx *ctx, struct nft_set *set,
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
!(elem.flags & NFT_SET_ELEM_INTERVAL_END))
return -EINVAL;
+ if (nla[NFTA_SET_ELEM_DATA] != NULL &&
+ elem.flags & NFT_SET_ELEM_INTERVAL_END)
+ return -EINVAL;
} else {
if (nla[NFTA_SET_ELEM_DATA] != NULL)
return -EINVAL;
@@ -2977,6 +2998,9 @@ static int nf_tables_loop_check_setelem(const struct nft_ctx *ctx,
const struct nft_set_iter *iter,
const struct nft_set_elem *elem)
{
+ if (elem->flags & NFT_SET_ELEM_INTERVAL_END)
+ return 0;
+
switch (elem->data.verdict) {
case NFT_JUMP:
case NFT_GOTO:
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 0d879fcb8763..90998a6ff8b9 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -103,9 +103,9 @@ static struct nf_loginfo trace_loginfo = {
},
};
-static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
+static void nft_trace_packet(const struct nft_pktinfo *pkt,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace type)
{
struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 917052e20602..46e275403838 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -226,6 +226,7 @@ static int nft_ct_init_validate_get(const struct nft_expr *expr,
if (tb[NFTA_CT_DIRECTION] != NULL)
return -EINVAL;
break;
+ case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
case NFT_CT_SRC:
case NFT_CT_DST:
@@ -311,8 +312,19 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr)
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_CT_KEY, htonl(priv->key)))
goto nla_put_failure;
- if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
- goto nla_put_failure;
+
+ switch (priv->key) {
+ case NFT_CT_PROTOCOL:
+ case NFT_CT_SRC:
+ case NFT_CT_DST:
+ case NFT_CT_PROTO_SRC:
+ case NFT_CT_PROTO_DST:
+ if (nla_put_u8(skb, NFTA_CT_DIRECTION, priv->dir))
+ goto nla_put_failure;
+ default:
+ break;
+ }
+
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 5af790123ad8..26c5154e05f3 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -23,7 +23,6 @@ static const char *nft_log_null_prefix = "";
struct nft_log {
struct nf_loginfo loginfo;
char *prefix;
- int family;
};
static void nft_log_eval(const struct nft_expr *expr,
@@ -33,7 +32,7 @@ static void nft_log_eval(const struct nft_expr *expr,
const struct nft_log *priv = nft_expr_priv(expr);
struct net *net = dev_net(pkt->in ? pkt->in : pkt->out);
- nf_log_packet(net, priv->family, pkt->ops->hooknum, pkt->skb, pkt->in,
+ nf_log_packet(net, pkt->ops->pf, pkt->ops->hooknum, pkt->skb, pkt->in,
pkt->out, &priv->loginfo, "%s", priv->prefix);
}
@@ -52,8 +51,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nf_loginfo *li = &priv->loginfo;
const struct nlattr *nla;
- priv->family = ctx->afi->family;
-
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
priv->prefix = kmalloc(nla_len(nla) + 1, GFP_KERNEL);
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 8a6116b75b5a..bb4ef4cccb6e 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -16,6 +16,7 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
struct nft_lookup {
struct nft_set *set;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e8254ad2e5a9..425cf39af890 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -116,7 +116,7 @@ static void nft_meta_get_eval(const struct nft_expr *expr,
skb->sk->sk_socket->file->f_cred->fsgid);
read_unlock_bh(&skb->sk->sk_callback_lock);
break;
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
case NFT_META_RTCLASSID: {
const struct dst_entry *dst = skb_dst(skb);
@@ -199,7 +199,7 @@ static int nft_meta_init_validate_get(uint32_t key)
case NFT_META_OIFTYPE:
case NFT_META_SKUID:
case NFT_META_SKGID:
-#ifdef CONFIG_NET_CLS_ROUTE
+#ifdef CONFIG_IP_ROUTE_CLASSID
case NFT_META_RTCLASSID:
#endif
#ifdef CONFIG_NETWORK_SECMARK
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index a2aeb318678f..85daa84bfdfe 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -135,7 +135,8 @@ nft_payload_select_ops(const struct nft_ctx *ctx,
if (len == 0 || len > FIELD_SIZEOF(struct nft_data, data))
return ERR_PTR(-EINVAL);
- if (len <= 4 && IS_ALIGNED(offset, len) && base != NFT_PAYLOAD_LL_HEADER)
+ if (len <= 4 && is_power_of_2(len) && IS_ALIGNED(offset, len) &&
+ base != NFT_PAYLOAD_LL_HEADER)
return &nft_payload_fast_ops;
else
return &nft_payload_ops;
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index cbea473d69e9..e8ae2f6bf232 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -25,7 +25,6 @@ struct nft_queue {
u16 queuenum;
u16 queues_total;
u16 flags;
- u8 family;
};
static void nft_queue_eval(const struct nft_expr *expr,
@@ -43,7 +42,7 @@ static void nft_queue_eval(const struct nft_expr *expr,
queue = priv->queuenum + cpu % priv->queues_total;
} else {
queue = nfqueue_hash(pkt->skb, queue,
- priv->queues_total, priv->family,
+ priv->queues_total, pkt->ops->pf,
jhash_initval);
}
}
@@ -71,7 +70,6 @@ static int nft_queue_init(const struct nft_ctx *ctx,
return -EINVAL;
init_hashrandom(&jhash_initval);
- priv->family = ctx->afi->family;
priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
if (tb[NFTA_QUEUE_TOTAL] != NULL)
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index ca0c1b231bfe..e21d69d13506 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -69,8 +69,10 @@ static void nft_rbtree_elem_destroy(const struct nft_set *set,
struct nft_rbtree_elem *rbe)
{
nft_data_uninit(&rbe->key, NFT_DATA_VALUE);
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_uninit(rbe->data, set->dtype);
+
kfree(rbe);
}
@@ -108,7 +110,8 @@ static int nft_rbtree_insert(const struct nft_set *set,
int err;
size = sizeof(*rbe);
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(elem->flags & NFT_SET_ELEM_INTERVAL_END))
size += sizeof(rbe->data[0]);
rbe = kzalloc(size, GFP_KERNEL);
@@ -117,7 +120,8 @@ static int nft_rbtree_insert(const struct nft_set *set,
rbe->flags = elem->flags;
nft_data_copy(&rbe->key, &elem->key);
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(rbe->data, &elem->data);
err = __nft_rbtree_insert(set, rbe);
@@ -153,7 +157,8 @@ static int nft_rbtree_get(const struct nft_set *set, struct nft_set_elem *elem)
parent = parent->rb_right;
else {
elem->cookie = rbe;
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(&elem->data, rbe->data);
elem->flags = rbe->flags;
return 0;
@@ -177,7 +182,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx,
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_data_copy(&elem.key, &rbe->key);
- if (set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP &&
+ !(rbe->flags & NFT_SET_ELEM_INTERVAL_END))
nft_data_copy(&elem.data, rbe->data);
elem.flags = rbe->flags;
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index 5e204711d704..f3448c296446 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -16,65 +16,23 @@
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
-#include <net/icmp.h>
-#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
-#include <net/netfilter/ipv6/nf_reject.h>
-#endif
-
-struct nft_reject {
- enum nft_reject_types type:8;
- u8 icmp_code;
- u8 family;
-};
-
-static void nft_reject_eval(const struct nft_expr *expr,
- struct nft_data data[NFT_REG_MAX + 1],
- const struct nft_pktinfo *pkt)
-{
- struct nft_reject *priv = nft_expr_priv(expr);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out);
-#endif
- switch (priv->type) {
- case NFT_REJECT_ICMP_UNREACH:
- if (priv->family == NFPROTO_IPV4)
- nf_send_unreach(pkt->skb, priv->icmp_code);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- else if (priv->family == NFPROTO_IPV6)
- nf_send_unreach6(net, pkt->skb, priv->icmp_code,
- pkt->ops->hooknum);
-#endif
- break;
- case NFT_REJECT_TCP_RST:
- if (priv->family == NFPROTO_IPV4)
- nf_send_reset(pkt->skb, pkt->ops->hooknum);
-#if IS_ENABLED(CONFIG_NF_TABLES_IPV6)
- else if (priv->family == NFPROTO_IPV6)
- nf_send_reset6(net, pkt->skb, pkt->ops->hooknum);
-#endif
- break;
- }
-
- data[NFT_REG_VERDICT].verdict = NF_DROP;
-}
-
-static const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
+const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
[NFTA_REJECT_TYPE] = { .type = NLA_U32 },
[NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 },
};
+EXPORT_SYMBOL_GPL(nft_reject_policy);
-static int nft_reject_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+int nft_reject_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_reject *priv = nft_expr_priv(expr);
if (tb[NFTA_REJECT_TYPE] == NULL)
return -EINVAL;
- priv->family = ctx->afi->family;
priv->type = ntohl(nla_get_be32(tb[NFTA_REJECT_TYPE]));
switch (priv->type) {
case NFT_REJECT_ICMP_UNREACH:
@@ -89,8 +47,9 @@ static int nft_reject_init(const struct nft_ctx *ctx,
return 0;
}
+EXPORT_SYMBOL_GPL(nft_reject_init);
-static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
+int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_reject *priv = nft_expr_priv(expr);
@@ -109,37 +68,7 @@ static int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr)
nla_put_failure:
return -1;
}
-
-static struct nft_expr_type nft_reject_type;
-static const struct nft_expr_ops nft_reject_ops = {
- .type = &nft_reject_type,
- .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
- .eval = nft_reject_eval,
- .init = nft_reject_init,
- .dump = nft_reject_dump,
-};
-
-static struct nft_expr_type nft_reject_type __read_mostly = {
- .name = "reject",
- .ops = &nft_reject_ops,
- .policy = nft_reject_policy,
- .maxattr = NFTA_REJECT_MAX,
- .owner = THIS_MODULE,
-};
-
-static int __init nft_reject_module_init(void)
-{
- return nft_register_expr(&nft_reject_type);
-}
-
-static void __exit nft_reject_module_exit(void)
-{
- nft_unregister_expr(&nft_reject_type);
-}
-
-module_init(nft_reject_module_init);
-module_exit(nft_reject_module_exit);
+EXPORT_SYMBOL_GPL(nft_reject_dump);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_EXPR("reject");
diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c
new file mode 100644
index 000000000000..b718a52a4654
--- /dev/null
+++ b/net/netfilter/nft_reject_inet.c
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2014 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nft_reject.h>
+
+static void nft_reject_inet_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ switch (pkt->ops->pf) {
+ case NFPROTO_IPV4:
+ return nft_reject_ipv4_eval(expr, data, pkt);
+ case NFPROTO_IPV6:
+ return nft_reject_ipv6_eval(expr, data, pkt);
+ }
+}
+
+static struct nft_expr_type nft_reject_inet_type;
+static const struct nft_expr_ops nft_reject_inet_ops = {
+ .type = &nft_reject_inet_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_inet_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_inet_type __read_mostly = {
+ .family = NFPROTO_INET,
+ .name = "reject",
+ .ops = &nft_reject_inet_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_inet_module_init(void)
+{
+ return nft_register_expr(&nft_reject_inet_type);
+}
+
+static void __exit nft_reject_inet_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_inet_type);
+}
+
+module_init(nft_reject_inet_module_init);
+module_exit(nft_reject_inet_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(1, "reject");
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 5929be622c5c..75747aecdebe 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -228,12 +228,7 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par,
goto err3;
}
- __set_bit(IPS_TEMPLATE_BIT, &ct->status);
- __set_bit(IPS_CONFIRMED_BIT, &ct->status);
-
- /* Overload tuple linked list to put us in template list. */
- hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
- &par->net->ct.tmpl);
+ nf_conntrack_tmpl_insert(par->net, ct);
out:
info->ct = ct;
return 0;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index fdf51353cf78..04748ab649c2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1489,8 +1489,8 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
if (addr->sa_family != AF_NETLINK)
return -EINVAL;
- /* Only superuser is allowed to send multicasts */
- if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
+ if ((nladdr->nl_groups || nladdr->nl_pid) &&
+ !netlink_capable(sock, NL_CFG_F_NONROOT_SEND))
return -EPERM;
if (!nlk->portid)
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index df4692826ead..e9a48baf8551 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -55,6 +55,7 @@
#include "datapath.h"
#include "flow.h"
+#include "flow_table.h"
#include "flow_netlink.h"
#include "vport-internal_dev.h"
#include "vport-netdev.h"
@@ -160,7 +161,6 @@ static void destroy_dp_rcu(struct rcu_head *rcu)
{
struct datapath *dp = container_of(rcu, struct datapath, rcu);
- ovs_flow_tbl_destroy(&dp->table);
free_percpu(dp->stats_percpu);
release_net(ovs_dp_get_net(dp));
kfree(dp->ports);
@@ -466,6 +466,14 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
skb_zerocopy(user_skb, skb, skb->len, hlen);
+ /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
+ if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
+ size_t plen = NLA_ALIGN(user_skb->len) - user_skb->len;
+
+ if (plen > 0)
+ memset(skb_put(user_skb, plen), 0, plen);
+ }
+
((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
@@ -852,11 +860,8 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
goto err_unlock_ovs;
/* The unmasked key has to be the same for flow updates. */
- error = -EINVAL;
- if (!ovs_flow_cmp_unmasked_key(flow, &match)) {
- OVS_NLERR("Flow modification message rejected, unmasked key does not match.\n");
+ if (!ovs_flow_cmp_unmasked_key(flow, &match))
goto err_unlock_ovs;
- }
/* Update actions. */
old_acts = ovsl_dereference(flow->sf_acts);
@@ -1079,6 +1084,7 @@ static size_t ovs_dp_cmd_msg_size(void)
msgsize += nla_total_size(IFNAMSIZ);
msgsize += nla_total_size(sizeof(struct ovs_dp_stats));
msgsize += nla_total_size(sizeof(struct ovs_dp_megaflow_stats));
+ msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
return msgsize;
}
@@ -1279,7 +1285,7 @@ err_destroy_ports_array:
err_destroy_percpu:
free_percpu(dp->stats_percpu);
err_destroy_table:
- ovs_flow_tbl_destroy(&dp->table);
+ ovs_flow_tbl_destroy(&dp->table, false);
err_free_dp:
release_net(ovs_dp_get_net(dp));
kfree(dp);
@@ -1306,10 +1312,13 @@ static void __dp_destroy(struct datapath *dp)
list_del_rcu(&dp->list_node);
/* OVSP_LOCAL is datapath internal port. We need to make sure that
- * all port in datapath are destroyed first before freeing datapath.
+ * all ports in datapath are destroyed first before freeing datapath.
*/
ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
+ /* RCU destroy the flow table */
+ ovs_flow_tbl_destroy(&dp->table, true);
+
call_rcu(&dp->rcu, destroy_dp_rcu);
}
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index c58a0fe3c889..3c268b3d71c3 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -153,29 +153,29 @@ static void rcu_free_flow_callback(struct rcu_head *rcu)
flow_free(flow);
}
-static void flow_mask_del_ref(struct sw_flow_mask *mask, bool deferred)
-{
- if (!mask)
- return;
-
- BUG_ON(!mask->ref_count);
- mask->ref_count--;
-
- if (!mask->ref_count) {
- list_del_rcu(&mask->list);
- if (deferred)
- kfree_rcu(mask, rcu);
- else
- kfree(mask);
- }
-}
-
void ovs_flow_free(struct sw_flow *flow, bool deferred)
{
if (!flow)
return;
- flow_mask_del_ref(flow->mask, deferred);
+ if (flow->mask) {
+ struct sw_flow_mask *mask = flow->mask;
+
+ /* ovs-lock is required to protect mask-refcount and
+ * mask list.
+ */
+ ASSERT_OVSL();
+ BUG_ON(!mask->ref_count);
+ mask->ref_count--;
+
+ if (!mask->ref_count) {
+ list_del_rcu(&mask->list);
+ if (deferred)
+ kfree_rcu(mask, rcu);
+ else
+ kfree(mask);
+ }
+ }
if (deferred)
call_rcu(&flow->rcu, rcu_free_flow_callback);
@@ -188,26 +188,9 @@ static void free_buckets(struct flex_array *buckets)
flex_array_free(buckets);
}
+
static void __table_instance_destroy(struct table_instance *ti)
{
- int i;
-
- if (ti->keep_flows)
- goto skip_flows;
-
- for (i = 0; i < ti->n_buckets; i++) {
- struct sw_flow *flow;
- struct hlist_head *head = flex_array_get(ti->buckets, i);
- struct hlist_node *n;
- int ver = ti->node_ver;
-
- hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
- hlist_del(&flow->hash_node[ver]);
- ovs_flow_free(flow, false);
- }
- }
-
-skip_flows:
free_buckets(ti->buckets);
kfree(ti);
}
@@ -258,20 +241,38 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
static void table_instance_destroy(struct table_instance *ti, bool deferred)
{
+ int i;
+
if (!ti)
return;
+ if (ti->keep_flows)
+ goto skip_flows;
+
+ for (i = 0; i < ti->n_buckets; i++) {
+ struct sw_flow *flow;
+ struct hlist_head *head = flex_array_get(ti->buckets, i);
+ struct hlist_node *n;
+ int ver = ti->node_ver;
+
+ hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
+ hlist_del_rcu(&flow->hash_node[ver]);
+ ovs_flow_free(flow, deferred);
+ }
+ }
+
+skip_flows:
if (deferred)
call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
else
__table_instance_destroy(ti);
}
-void ovs_flow_tbl_destroy(struct flow_table *table)
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred)
{
struct table_instance *ti = ovsl_dereference(table->ti);
- table_instance_destroy(ti, false);
+ table_instance_destroy(ti, deferred);
}
struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -504,16 +505,11 @@ static struct sw_flow_mask *mask_alloc(void)
mask = kmalloc(sizeof(*mask), GFP_KERNEL);
if (mask)
- mask->ref_count = 0;
+ mask->ref_count = 1;
return mask;
}
-static void mask_add_ref(struct sw_flow_mask *mask)
-{
- mask->ref_count++;
-}
-
static bool mask_equal(const struct sw_flow_mask *a,
const struct sw_flow_mask *b)
{
@@ -554,9 +550,11 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
mask->key = new->key;
mask->range = new->range;
list_add_rcu(&mask->list, &tbl->mask_list);
+ } else {
+ BUG_ON(!mask->ref_count);
+ mask->ref_count++;
}
- mask_add_ref(mask);
flow->mask = mask;
return 0;
}
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 1996e34c0fd8..baaeb101924d 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -60,7 +60,7 @@ void ovs_flow_free(struct sw_flow *, bool deferred);
int ovs_flow_tbl_init(struct flow_table *);
int ovs_flow_tbl_count(struct flow_table *table);
-void ovs_flow_tbl_destroy(struct flow_table *table);
+void ovs_flow_tbl_destroy(struct flow_table *table, bool deferred);
int ovs_flow_tbl_flush(struct flow_table *flow_table);
int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6a2bb37506c5..48a6a93db296 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -308,11 +308,27 @@ static bool packet_use_direct_xmit(const struct packet_sock *po)
return po->xmit == packet_direct_xmit;
}
-static u16 packet_pick_tx_queue(struct net_device *dev)
+static u16 __packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
{
return (u16) raw_smp_processor_id() % dev->real_num_tx_queues;
}
+static void packet_pick_tx_queue(struct net_device *dev, struct sk_buff *skb)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+ u16 queue_index;
+
+ if (ops->ndo_select_queue) {
+ queue_index = ops->ndo_select_queue(dev, skb, NULL,
+ __packet_pick_tx_queue);
+ queue_index = netdev_cap_txqueue(dev, queue_index);
+ } else {
+ queue_index = __packet_pick_tx_queue(dev, skb);
+ }
+
+ skb_set_queue_mapping(skb, queue_index);
+}
+
/* register_prot_hook must be invoked with the po->bind_lock held,
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
@@ -2285,7 +2301,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
}
}
- skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
+ packet_pick_tx_queue(dev, skb);
+
skb->destructor = tpacket_destruct_skb;
__packet_set_status(po, ph, TP_STATUS_SENDING);
packet_inc_pending(&po->tx_ring);
@@ -2499,7 +2516,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- skb_set_queue_mapping(skb, packet_pick_tx_queue(dev));
+
+ packet_pick_tx_queue(dev, skb);
if (po->has_vnet_hdr) {
if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
@@ -3786,7 +3804,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
*/
if (!tx_ring)
init_prb_bdqc(po, rb, pg_vec, req_u, tx_ring);
- break;
+ break;
default:
break;
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index a255d0200a59..fefeeb73f15f 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -15,6 +15,11 @@
*
* ECN support is added by Naeem Khademi <naeemk@ifi.uio.no>
* University of Oslo, Norway.
+ *
+ * References:
+ * IETF draft submission: http://tools.ietf.org/html/draft-pan-aqm-pie-00
+ * IEEE Conference on High Performance Switching and Routing 2013 :
+ * "PIE: A * Lightweight Control Scheme to Address the Bufferbloat Problem"
*/
#include <linux/module.h>
@@ -36,7 +41,7 @@ struct pie_params {
psched_time_t target; /* user specified target delay in pschedtime */
u32 tupdate; /* timer frequency (in jiffies) */
u32 limit; /* number of packets that can be enqueued */
- u32 alpha; /* alpha and beta are between -4 and 4 */
+ u32 alpha; /* alpha and beta are between 0 and 32 */
u32 beta; /* and are used for shift relative to 1 */
bool ecn; /* true if ecn is enabled */
bool bytemode; /* to scale drop early prob based on pkt size */
@@ -326,10 +331,16 @@ static void calculate_probability(struct Qdisc *sch)
if (qdelay == 0 && qlen != 0)
update_prob = false;
- /* Add ranges for alpha and beta, more aggressive for high dropping
- * mode and gentle steps for light dropping mode
- * In light dropping mode, take gentle steps; in medium dropping mode,
- * take medium steps; in high dropping mode, take big steps.
+ /* In the algorithm, alpha and beta are between 0 and 2 with typical
+ * value for alpha as 0.125. In this implementation, we use values 0-32
+ * passed from user space to represent this. Also, alpha and beta have
+ * unit of HZ and need to be scaled before they can used to update
+ * probability. alpha/beta are updated locally below by 1) scaling them
+ * appropriately 2) scaling down by 16 to come to 0-2 range.
+ * Please see paper for details.
+ *
+ * We scale alpha and beta differently depending on whether we are in
+ * light, medium or high dropping mode.
*/
if (q->vars.prob < MAX_PROB / 100) {
alpha =
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 1cb413fead89..4f505a006896 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -334,18 +334,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
qdisc_put_rtab(qdisc_get_rtab(&qopt->peakrate,
tb[TCA_TBF_PTAB]));
- if (q->qdisc != &noop_qdisc) {
- err = fifo_set_limit(q->qdisc, qopt->limit);
- if (err)
- goto done;
- } else if (qopt->limit > 0) {
- child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
- if (IS_ERR(child)) {
- err = PTR_ERR(child);
- goto done;
- }
- }
-
buffer = min_t(u64, PSCHED_TICKS2NS(qopt->buffer), ~0U);
mtu = min_t(u64, PSCHED_TICKS2NS(qopt->mtu), ~0U);
@@ -390,6 +378,18 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt)
goto done;
}
+ if (q->qdisc != &noop_qdisc) {
+ err = fifo_set_limit(q->qdisc, qopt->limit);
+ if (err)
+ goto done;
+ } else if (qopt->limit > 0) {
+ child = fifo_create_dflt(sch, &bfifo_qdisc_ops, qopt->limit);
+ if (IS_ERR(child)) {
+ err = PTR_ERR(child);
+ goto done;
+ }
+ }
+
sch_tree_lock(sch);
if (child) {
qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 5ae609200674..ee13d28d39d1 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1239,78 +1239,107 @@ void sctp_assoc_update(struct sctp_association *asoc,
}
/* Update the retran path for sending a retransmitted packet.
- * Round-robin through the active transports, else round-robin
- * through the inactive transports as this is the next best thing
- * we can try.
+ * See also RFC4960, 6.4. Multi-Homed SCTP Endpoints:
+ *
+ * When there is outbound data to send and the primary path
+ * becomes inactive (e.g., due to failures), or where the
+ * SCTP user explicitly requests to send data to an
+ * inactive destination transport address, before reporting
+ * an error to its ULP, the SCTP endpoint should try to send
+ * the data to an alternate active destination transport
+ * address if one exists.
+ *
+ * When retransmitting data that timed out, if the endpoint
+ * is multihomed, it should consider each source-destination
+ * address pair in its retransmission selection policy.
+ * When retransmitting timed-out data, the endpoint should
+ * attempt to pick the most divergent source-destination
+ * pair from the original source-destination pair to which
+ * the packet was transmitted.
+ *
+ * Note: Rules for picking the most divergent source-destination
+ * pair are an implementation decision and are not specified
+ * within this document.
+ *
+ * Our basic strategy is to round-robin transports in priorities
+ * according to sctp_state_prio_map[] e.g., if no such
+ * transport with state SCTP_ACTIVE exists, round-robin through
+ * SCTP_UNKNOWN, etc. You get the picture.
*/
-void sctp_assoc_update_retran_path(struct sctp_association *asoc)
+static const u8 sctp_trans_state_to_prio_map[] = {
+ [SCTP_ACTIVE] = 3, /* best case */
+ [SCTP_UNKNOWN] = 2,
+ [SCTP_PF] = 1,
+ [SCTP_INACTIVE] = 0, /* worst case */
+};
+
+static u8 sctp_trans_score(const struct sctp_transport *trans)
{
- struct sctp_transport *t, *next;
- struct list_head *head = &asoc->peer.transport_addr_list;
- struct list_head *pos;
+ return sctp_trans_state_to_prio_map[trans->state];
+}
- if (asoc->peer.transport_count == 1)
- return;
+static struct sctp_transport *sctp_trans_elect_best(struct sctp_transport *curr,
+ struct sctp_transport *best)
+{
+ if (best == NULL)
+ return curr;
- /* Find the next transport in a round-robin fashion. */
- t = asoc->peer.retran_path;
- pos = &t->transports;
- next = NULL;
+ return sctp_trans_score(curr) > sctp_trans_score(best) ? curr : best;
+}
- while (1) {
- /* Skip the head. */
- if (pos->next == head)
- pos = head->next;
- else
- pos = pos->next;
+void sctp_assoc_update_retran_path(struct sctp_association *asoc)
+{
+ struct sctp_transport *trans = asoc->peer.retran_path;
+ struct sctp_transport *trans_next = NULL;
- t = list_entry(pos, struct sctp_transport, transports);
+ /* We're done as we only have the one and only path. */
+ if (asoc->peer.transport_count == 1)
+ return;
+ /* If active_path and retran_path are the same and active,
+ * then this is the only active path. Use it.
+ */
+ if (asoc->peer.active_path == asoc->peer.retran_path &&
+ asoc->peer.active_path->state == SCTP_ACTIVE)
+ return;
- /* We have exhausted the list, but didn't find any
- * other active transports. If so, use the next
- * transport.
- */
- if (t == asoc->peer.retran_path) {
- t = next;
+ /* Iterate from retran_path's successor back to retran_path. */
+ for (trans = list_next_entry(trans, transports); 1;
+ trans = list_next_entry(trans, transports)) {
+ /* Manually skip the head element. */
+ if (&trans->transports == &asoc->peer.transport_addr_list)
+ continue;
+ if (trans->state == SCTP_UNCONFIRMED)
+ continue;
+ trans_next = sctp_trans_elect_best(trans, trans_next);
+ /* Active is good enough for immediate return. */
+ if (trans_next->state == SCTP_ACTIVE)
break;
- }
-
- /* Try to find an active transport. */
-
- if ((t->state == SCTP_ACTIVE) ||
- (t->state == SCTP_UNKNOWN)) {
+ /* We've reached the end, time to update path. */
+ if (trans == asoc->peer.retran_path)
break;
- } else {
- /* Keep track of the next transport in case
- * we don't find any active transport.
- */
- if (t->state != SCTP_UNCONFIRMED && !next)
- next = t;
- }
}
- if (t)
- asoc->peer.retran_path = t;
- else
- t = asoc->peer.retran_path;
+ if (trans_next != NULL)
+ asoc->peer.retran_path = trans_next;
- pr_debug("%s: association:%p addr:%pISpc\n", __func__, asoc,
- &t->ipaddr.sa);
+ pr_debug("%s: association:%p updated new path to addr:%pISpc\n",
+ __func__, asoc, &asoc->peer.retran_path->ipaddr.sa);
}
-/* Choose the transport for sending retransmit packet. */
-struct sctp_transport *sctp_assoc_choose_alter_transport(
- struct sctp_association *asoc, struct sctp_transport *last_sent_to)
+struct sctp_transport *
+sctp_assoc_choose_alter_transport(struct sctp_association *asoc,
+ struct sctp_transport *last_sent_to)
{
/* If this is the first time packet is sent, use the active path,
* else use the retran path. If the last packet was sent over the
* retran path, update the retran path and use it.
*/
- if (!last_sent_to)
+ if (last_sent_to == NULL) {
return asoc->peer.active_path;
- else {
+ } else {
if (last_sent_to == asoc->peer.retran_path)
sctp_assoc_update_retran_path(asoc);
+
return asoc->peer.retran_path;
}
}
@@ -1367,44 +1396,35 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc)
return false;
}
-/* Increase asoc's rwnd by len and send any window update SACK if needed. */
-void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
+/* Update asoc's rwnd for the approximated state in the buffer,
+ * and check whether SACK needs to be sent.
+ */
+void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
{
+ int rx_count;
struct sctp_chunk *sack;
struct timer_list *timer;
- if (asoc->rwnd_over) {
- if (asoc->rwnd_over >= len) {
- asoc->rwnd_over -= len;
- } else {
- asoc->rwnd += (len - asoc->rwnd_over);
- asoc->rwnd_over = 0;
- }
- } else {
- asoc->rwnd += len;
- }
+ if (asoc->ep->rcvbuf_policy)
+ rx_count = atomic_read(&asoc->rmem_alloc);
+ else
+ rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
- /* If we had window pressure, start recovering it
- * once our rwnd had reached the accumulated pressure
- * threshold. The idea is to recover slowly, but up
- * to the initial advertised window.
- */
- if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
- int change = min(asoc->pathmtu, asoc->rwnd_press);
- asoc->rwnd += change;
- asoc->rwnd_press -= change;
- }
+ if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
+ asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
+ else
+ asoc->rwnd = 0;
- pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
- __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
- asoc->a_rwnd);
+ pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n",
+ __func__, asoc, asoc->rwnd, rx_count,
+ asoc->base.sk->sk_rcvbuf);
/* Send a window update SACK if the rwnd has increased by at least the
* minimum of the association's PMTU and half of the receive buffer.
* The algorithm used is similar to the one described in
* Section 4.2.3.3 of RFC 1122.
*/
- if (sctp_peer_needs_update(asoc)) {
+ if (update_peer && sctp_peer_needs_update(asoc)) {
asoc->a_rwnd = asoc->rwnd;
pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1426,45 +1446,6 @@ void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
}
}
-/* Decrease asoc's rwnd by len. */
-void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
-{
- int rx_count;
- int over = 0;
-
- if (unlikely(!asoc->rwnd || asoc->rwnd_over))
- pr_debug("%s: association:%p has asoc->rwnd:%u, "
- "asoc->rwnd_over:%u!\n", __func__, asoc,
- asoc->rwnd, asoc->rwnd_over);
-
- if (asoc->ep->rcvbuf_policy)
- rx_count = atomic_read(&asoc->rmem_alloc);
- else
- rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
-
- /* If we've reached or overflowed our receive buffer, announce
- * a 0 rwnd if rwnd would still be positive. Store the
- * the potential pressure overflow so that the window can be restored
- * back to original value.
- */
- if (rx_count >= asoc->base.sk->sk_rcvbuf)
- over = 1;
-
- if (asoc->rwnd >= len) {
- asoc->rwnd -= len;
- if (over) {
- asoc->rwnd_press += asoc->rwnd;
- asoc->rwnd = 0;
- }
- } else {
- asoc->rwnd_over = len - asoc->rwnd;
- asoc->rwnd = 0;
- }
-
- pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
- __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
- asoc->rwnd_press);
-}
/* Build the bind address list for the association based on info from the
* local endpoint and the remote peer.
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0f6259a6a932..2b1738ef9394 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -662,6 +662,8 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
*/
sctp_v6_to_sk_daddr(&asoc->peer.primary_addr, newsk);
+ newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
+
sk_refcnt_debug_inc(newsk);
if (newsk->sk_prot->init(newsk)) {
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 632090b961c3..3a1767ef3201 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1421,8 +1421,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk)
BUG_ON(!list_empty(&chunk->list));
list_del_init(&chunk->transmitted_list);
- /* Free the chunk skb data and the SCTP_chunk stub itself. */
- dev_kfree_skb(chunk->skb);
+ consume_skb(chunk->skb);
+ consume_skb(chunk->auth_chunk);
SCTP_DBG_OBJCNT_DEC(chunk);
kmem_cache_free(sctp_chunk_cachep, chunk);
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index bd859154000e..5d6883ff00c3 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -495,11 +495,12 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands,
}
/* If the transport error count is greater than the pf_retrans
- * threshold, and less than pathmaxrtx, then mark this transport
- * as Partially Failed, ee SCTP Quick Failover Draft, secon 5.1,
- * point 1
+ * threshold, and less than pathmaxrtx, and if the current state
+ * is not SCTP_UNCONFIRMED, then mark this transport as Partially
+ * Failed, see SCTP Quick Failover Draft, section 5.1
*/
if ((transport->state != SCTP_PF) &&
+ (transport->state != SCTP_UNCONFIRMED) &&
(asoc->pf_retrans < transport->pathmaxrxt) &&
(transport->error_count > asoc->pf_retrans)) {
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 483dcd71b3c5..01e002430c85 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -758,6 +758,12 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
struct sctp_chunk auth;
sctp_ierror_t ret;
+ /* Make sure that we and the peer are AUTH capable */
+ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ }
+
/* set-up our fake chunk so that we can process it */
auth.skb = chunk->auth_chunk;
auth.asoc = chunk->asoc;
@@ -768,10 +774,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
auth.transport = chunk->transport;
ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
-
- /* We can now safely free the auth_chunk clone */
- kfree_skb(chunk->auth_chunk);
-
if (ret != SCTP_IERROR_NO_ERROR) {
sctp_association_free(new_asoc);
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
@@ -6176,7 +6178,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
* PMTU. In cases, such as loopback, this might be a rather
* large spill over.
*/
- if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
+ if ((!chunk->data_accepted) && (!asoc->rwnd ||
(datalen > asoc->rwnd + asoc->frag_point))) {
/* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 9e91d6e5df63..981aaf8b6ace 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -64,6 +64,7 @@
#include <linux/crypto.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/compat.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -1368,11 +1369,19 @@ static int sctp_setsockopt_connectx(struct sock *sk,
/*
* New (hopefully final) interface for the API.
* We use the sctp_getaddrs_old structure so that use-space library
- * can avoid any unnecessary allocations. The only defferent part
+ * can avoid any unnecessary allocations. The only different part
* is that we store the actual length of the address buffer into the
- * addrs_num structure member. That way we can re-use the existing
+ * addrs_num structure member. That way we can re-use the existing
* code.
*/
+#ifdef CONFIG_COMPAT
+struct compat_sctp_getaddrs_old {
+ sctp_assoc_t assoc_id;
+ s32 addr_num;
+ compat_uptr_t addrs; /* struct sockaddr * */
+};
+#endif
+
static int sctp_getsockopt_connectx3(struct sock *sk, int len,
char __user *optval,
int __user *optlen)
@@ -1381,16 +1390,30 @@ static int sctp_getsockopt_connectx3(struct sock *sk, int len,
sctp_assoc_t assoc_id = 0;
int err = 0;
- if (len < sizeof(param))
- return -EINVAL;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ struct compat_sctp_getaddrs_old param32;
- if (copy_from_user(&param, optval, sizeof(param)))
- return -EFAULT;
+ if (len < sizeof(param32))
+ return -EINVAL;
+ if (copy_from_user(&param32, optval, sizeof(param32)))
+ return -EFAULT;
- err = __sctp_setsockopt_connectx(sk,
- (struct sockaddr __user *)param.addrs,
- param.addr_num, &assoc_id);
+ param.assoc_id = param32.assoc_id;
+ param.addr_num = param32.addr_num;
+ param.addrs = compat_ptr(param32.addrs);
+ } else
+#endif
+ {
+ if (len < sizeof(param))
+ return -EINVAL;
+ if (copy_from_user(&param, optval, sizeof(param)))
+ return -EFAULT;
+ }
+ err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *)
+ param.addrs, param.addr_num,
+ &assoc_id);
if (err == 0 || err == -EINPROGRESS) {
if (copy_to_user(optval, &assoc_id, sizeof(assoc_id)))
return -EFAULT;
@@ -2092,12 +2115,6 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
sctp_skb_pull(skb, copied);
skb_queue_head(&sk->sk_receive_queue, skb);
- /* When only partial message is copied to the user, increase
- * rwnd by that amount. If all the data in the skb is read,
- * rwnd is updated when the event is freed.
- */
- if (!sctp_ulpevent_is_notification(event))
- sctp_assoc_rwnd_increase(event->asoc, copied);
goto out;
} else if ((event->msg_flags & MSG_NOTIFICATION) ||
(event->msg_flags & MSG_EOR))
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 7135e617ab0f..35c8923b5554 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -151,6 +151,7 @@ static struct ctl_table sctp_net_table[] = {
},
{
.procname = "cookie_hmac_alg",
+ .data = &init_net.sctp.sctp_hmac_alg,
.maxlen = 8,
.mode = 0644,
.proc_handler = proc_sctp_do_hmac_alg,
@@ -401,15 +402,18 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
int sctp_sysctl_net_register(struct net *net)
{
- struct ctl_table *table;
- int i;
+ struct ctl_table *table = sctp_net_table;
+
+ if (!net_eq(net, &init_net)) {
+ int i;
- table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
- if (!table)
- return -ENOMEM;
+ table = kmemdup(sctp_net_table, sizeof(sctp_net_table), GFP_KERNEL);
+ if (!table)
+ return -ENOMEM;
- for (i = 0; table[i].data; i++)
- table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+ for (i = 0; table[i].data; i++)
+ table[i].data += (char *)(&net->sctp) - (char *)&init_net.sctp;
+ }
net->sctp.sysctl_header = register_net_sysctl(net, "net/sctp", table);
return 0;
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index 85c64658bd0b..8d198ae03606 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
skb = sctp_event2skb(event);
/* Set the owner and charge rwnd for bytes received. */
sctp_ulpevent_set_owner(event, asoc);
- sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
+ sctp_assoc_rwnd_update(asoc, false);
if (!skb->data_len)
return;
@@ -1011,6 +1011,7 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
{
struct sk_buff *skb, *frag;
unsigned int len;
+ struct sctp_association *asoc;
/* Current stack structures assume that the rcv buffer is
* per socket. For UDP style sockets this is not true as
@@ -1035,8 +1036,11 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
}
done:
- sctp_assoc_rwnd_increase(event->asoc, len);
+ asoc = event->asoc;
+ sctp_association_hold(asoc);
sctp_ulpevent_release_owner(event);
+ sctp_assoc_rwnd_update(asoc, true);
+ sctp_association_put(asoc);
}
static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 0a2aee060f9f..36e431ee1c90 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -108,6 +108,7 @@ struct gss_auth {
static DEFINE_SPINLOCK(pipe_version_lock);
static struct rpc_wait_queue pipe_version_rpc_waitqueue;
static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue);
+static void gss_put_auth(struct gss_auth *gss_auth);
static void gss_free_ctx(struct gss_cl_ctx *);
static const struct rpc_pipe_ops gss_upcall_ops_v0;
@@ -320,6 +321,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg)
if (gss_msg->ctx != NULL)
gss_put_ctx(gss_msg->ctx);
rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue);
+ gss_put_auth(gss_msg->auth);
kfree(gss_msg);
}
@@ -498,9 +500,12 @@ gss_alloc_msg(struct gss_auth *gss_auth,
default:
err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name);
if (err)
- goto err_free_msg;
+ goto err_put_pipe_version;
};
+ kref_get(&gss_auth->kref);
return gss_msg;
+err_put_pipe_version:
+ put_pipe_version(gss_auth->net);
err_free_msg:
kfree(gss_msg);
err:
@@ -532,13 +537,7 @@ gss_setup_upcall(struct gss_auth *gss_auth, struct rpc_cred *cred)
static void warn_gssd(void)
{
- static unsigned long ratelimit;
- unsigned long now = jiffies;
-
- if (time_after(now, ratelimit)) {
- pr_warn("RPC: AUTH_GSS upcall failed. Please check user daemon is running.\n");
- ratelimit = now + 15*HZ;
- }
+ dprintk("AUTH_GSS upcall failed. Please check user daemon is running.\n");
}
static inline int
@@ -997,6 +996,8 @@ gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
goto err_put_mech;
+ if (!gssd_running(gss_auth->net))
+ goto err_put_mech;
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
auth->au_rslack = GSS_VERF_SLACK >> 2;
@@ -1068,6 +1069,12 @@ gss_free_callback(struct kref *kref)
}
static void
+gss_put_auth(struct gss_auth *gss_auth)
+{
+ kref_put(&gss_auth->kref, gss_free_callback);
+}
+
+static void
gss_destroy(struct rpc_auth *auth)
{
struct gss_auth *gss_auth = container_of(auth,
@@ -1088,7 +1095,7 @@ gss_destroy(struct rpc_auth *auth)
gss_auth->gss_pipe[1] = NULL;
rpcauth_destroy_credcache(auth);
- kref_put(&gss_auth->kref, gss_free_callback);
+ gss_put_auth(gss_auth);
}
/*
@@ -1259,7 +1266,7 @@ gss_destroy_nullcred(struct rpc_cred *cred)
call_rcu(&cred->cr_rcu, gss_free_cred_callback);
if (ctx)
gss_put_ctx(ctx);
- kref_put(&gss_auth->kref, gss_free_callback);
+ gss_put_auth(gss_auth);
}
static void
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 890a29912d5a..e860d4f7ed2a 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -64,7 +64,6 @@ static void xprt_free_allocation(struct rpc_rqst *req)
free_page((unsigned long)xbufp->head[0].iov_base);
xbufp = &req->rq_snd_buf;
free_page((unsigned long)xbufp->head[0].iov_base);
- list_del(&req->rq_bc_pa_list);
kfree(req);
}
@@ -168,8 +167,10 @@ out_free:
/*
* Memory allocation failed, free the temporary list
*/
- list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list)
+ list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) {
+ list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
+ }
dprintk("RPC: setup backchannel transport failed\n");
return -ENOMEM;
@@ -198,6 +199,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
xprt_dec_alloc_count(xprt, max_reqs);
list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
dprintk("RPC: req=%p\n", req);
+ list_del(&req->rq_bc_pa_list);
xprt_free_allocation(req);
if (--max_reqs == 0)
break;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 80a6640f329b..06c6ff0cb911 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -571,7 +571,7 @@ static void svc_check_conn_limits(struct svc_serv *serv)
}
}
-int svc_alloc_arg(struct svc_rqst *rqstp)
+static int svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg;
@@ -612,7 +612,7 @@ int svc_alloc_arg(struct svc_rqst *rqstp)
return 0;
}
-struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
+static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
{
struct svc_xprt *xprt;
struct svc_pool *pool = rqstp->rq_pool;
@@ -691,7 +691,7 @@ struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
return xprt;
}
-void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
+static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
{
spin_lock_bh(&serv->sv_lock);
set_bit(XPT_TEMP, &newxpt->xpt_flags);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 817a1e523969..0addefca8e77 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -510,6 +510,7 @@ static int xs_nospace(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
int ret = -EAGAIN;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
@@ -527,7 +528,7 @@ static int xs_nospace(struct rpc_task *task)
* window size
*/
set_bit(SOCK_NOSPACE, &transport->sock->flags);
- transport->inet->sk_write_pending++;
+ sk->sk_write_pending++;
/* ...and wait for more buffer space */
xprt_wait_for_buffer_space(task, xs_nospace_callback);
}
@@ -537,6 +538,9 @@ static int xs_nospace(struct rpc_task *task)
}
spin_unlock_bh(&xprt->transport_lock);
+
+ /* Race breaker in case memory is freed before above code is called */
+ sk->sk_write_space(sk);
return ret;
}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index a38c89969c68..574b86193b15 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -610,8 +610,13 @@ static struct notifier_block notifier = {
int tipc_bearer_setup(void)
{
+ int err;
+
+ err = register_netdevice_notifier(&notifier);
+ if (err)
+ return err;
dev_add_pack(&tipc_packet_type);
- return register_netdevice_notifier(&notifier);
+ return 0;
}
void tipc_bearer_cleanup(void)
diff --git a/net/tipc/config.c b/net/tipc/config.c
index c301a9a592d8..e6d721692ae0 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -181,7 +181,7 @@ static struct sk_buff *cfg_set_own_addr(void)
if (tipc_own_addr)
return tipc_cfg_reply_error_string(TIPC_CFG_NOT_SUPPORTED
" (cannot change node address once assigned)");
- tipc_core_start_net(addr);
+ tipc_net_start(addr);
return tipc_cfg_reply_none();
}
@@ -376,7 +376,6 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr,
struct tipc_cfg_msg_hdr *req_hdr;
struct tipc_cfg_msg_hdr *rep_hdr;
struct sk_buff *rep_buf;
- int ret;
/* Validate configuration message header (ignore invalid message) */
req_hdr = (struct tipc_cfg_msg_hdr *)buf;
@@ -398,12 +397,8 @@ static void cfg_conn_msg_event(int conid, struct sockaddr_tipc *addr,
memcpy(rep_hdr, req_hdr, sizeof(*rep_hdr));
rep_hdr->tcm_len = htonl(rep_buf->len);
rep_hdr->tcm_flags &= htons(~TCM_F_REQUEST);
-
- ret = tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data,
- rep_buf->len);
- if (ret < 0)
- pr_err("Sending cfg reply message failed, no memory\n");
-
+ tipc_conn_sendmsg(&cfgsrv, conid, addr, rep_buf->data,
+ rep_buf->len);
kfree_skb(rep_buf);
}
}
diff --git a/net/tipc/core.c b/net/tipc/core.c
index f9e88d8b04ca..80c20647b3d2 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -77,37 +77,13 @@ struct sk_buff *tipc_buf_acquire(u32 size)
}
/**
- * tipc_core_stop_net - shut down TIPC networking sub-systems
- */
-static void tipc_core_stop_net(void)
-{
- tipc_net_stop();
- tipc_bearer_cleanup();
-}
-
-/**
- * start_net - start TIPC networking sub-systems
- */
-int tipc_core_start_net(unsigned long addr)
-{
- int res;
-
- tipc_net_start(addr);
- res = tipc_bearer_setup();
- if (res < 0)
- goto err;
- return res;
-
-err:
- tipc_core_stop_net();
- return res;
-}
-
-/**
* tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode
*/
static void tipc_core_stop(void)
{
+ tipc_handler_stop();
+ tipc_net_stop();
+ tipc_bearer_cleanup();
tipc_netlink_stop();
tipc_cfg_stop();
tipc_subscr_stop();
@@ -122,30 +98,65 @@ static void tipc_core_stop(void)
*/
static int tipc_core_start(void)
{
- int res;
+ int err;
get_random_bytes(&tipc_random, sizeof(tipc_random));
- res = tipc_handler_start();
- if (!res)
- res = tipc_ref_table_init(tipc_max_ports, tipc_random);
- if (!res)
- res = tipc_nametbl_init();
- if (!res)
- res = tipc_netlink_start();
- if (!res)
- res = tipc_socket_init();
- if (!res)
- res = tipc_register_sysctl();
- if (!res)
- res = tipc_subscr_start();
- if (!res)
- res = tipc_cfg_init();
- if (res) {
- tipc_handler_stop();
- tipc_core_stop();
- }
- return res;
+ err = tipc_handler_start();
+ if (err)
+ goto out_handler;
+
+ err = tipc_ref_table_init(tipc_max_ports, tipc_random);
+ if (err)
+ goto out_reftbl;
+
+ err = tipc_nametbl_init();
+ if (err)
+ goto out_nametbl;
+
+ err = tipc_netlink_start();
+ if (err)
+ goto out_netlink;
+
+ err = tipc_socket_init();
+ if (err)
+ goto out_socket;
+
+ err = tipc_register_sysctl();
+ if (err)
+ goto out_sysctl;
+
+ err = tipc_subscr_start();
+ if (err)
+ goto out_subscr;
+
+ err = tipc_cfg_init();
+ if (err)
+ goto out_cfg;
+
+ err = tipc_bearer_setup();
+ if (err)
+ goto out_bearer;
+
+ return 0;
+out_bearer:
+ tipc_cfg_stop();
+out_cfg:
+ tipc_subscr_stop();
+out_subscr:
+ tipc_unregister_sysctl();
+out_sysctl:
+ tipc_socket_stop();
+out_socket:
+ tipc_netlink_stop();
+out_netlink:
+ tipc_nametbl_stop();
+out_nametbl:
+ tipc_ref_table_stop();
+out_reftbl:
+ tipc_handler_stop();
+out_handler:
+ return err;
}
static int __init tipc_init(void)
@@ -174,8 +185,6 @@ static int __init tipc_init(void)
static void __exit tipc_exit(void)
{
- tipc_handler_stop();
- tipc_core_stop_net();
tipc_core_stop();
pr_info("Deactivated\n");
}
diff --git a/net/tipc/core.h b/net/tipc/core.h
index 1ff477b0450d..4dfe137587bb 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -90,7 +90,6 @@ extern int tipc_random __read_mostly;
/*
* Routines available to privileged subsystems
*/
-int tipc_core_start_net(unsigned long);
int tipc_handler_start(void);
void tipc_handler_stop(void);
int tipc_netlink_start(void);
@@ -192,6 +191,7 @@ static inline void k_term_timer(struct timer_list *timer)
struct tipc_skb_cb {
void *handle;
+ bool deferred;
};
#define TIPC_SKB_CB(__skb) ((struct tipc_skb_cb *)&((__skb)->cb[0]))
diff --git a/net/tipc/handler.c b/net/tipc/handler.c
index e4bc8a296744..1fabf160501f 100644
--- a/net/tipc/handler.c
+++ b/net/tipc/handler.c
@@ -58,7 +58,6 @@ unsigned int tipc_k_signal(Handler routine, unsigned long argument)
spin_lock_bh(&qitem_lock);
if (!handler_enabled) {
- pr_err("Signal request ignored by handler\n");
spin_unlock_bh(&qitem_lock);
return -ENOPROTOOPT;
}
diff --git a/net/tipc/link.c b/net/tipc/link.c
index d4b5de41b682..da6018beb6eb 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1391,6 +1391,12 @@ static int link_recv_buf_validate(struct sk_buff *buf)
u32 hdr_size;
u32 min_hdr_size;
+ /* If this packet comes from the defer queue, the skb has already
+ * been validated
+ */
+ if (unlikely(TIPC_SKB_CB(buf)->deferred))
+ return 1;
+
if (unlikely(buf->len < MIN_H_SIZE))
return 0;
@@ -1703,6 +1709,7 @@ static void link_handle_out_of_seq_msg(struct tipc_link *l_ptr,
&l_ptr->newest_deferred_in, buf)) {
l_ptr->deferred_inqueue_sz++;
l_ptr->stats.deferred_recv++;
+ TIPC_SKB_CB(buf)->deferred = true;
if ((l_ptr->deferred_inqueue_sz % 16) == 1)
tipc_link_send_proto_msg(l_ptr, STATE_MSG, 0, 0, 0, 0, 0);
} else
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 92a1533af4e0..042e8e3cabc0 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -941,20 +941,48 @@ int tipc_nametbl_init(void)
return 0;
}
-void tipc_nametbl_stop(void)
+/**
+ * tipc_purge_publications - remove all publications for a given type
+ *
+ * tipc_nametbl_lock must be held when calling this function
+ */
+static void tipc_purge_publications(struct name_seq *seq)
{
- u32 i;
+ struct publication *publ, *safe;
+ struct sub_seq *sseq;
+ struct name_info *info;
- if (!table.types)
+ if (!seq->sseqs) {
+ nameseq_delete_empty(seq);
return;
+ }
+ sseq = seq->sseqs;
+ info = sseq->info;
+ list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
+ tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
+ publ->ref, publ->key);
+ }
+}
+
+void tipc_nametbl_stop(void)
+{
+ u32 i;
+ struct name_seq *seq;
+ struct hlist_head *seq_head;
+ struct hlist_node *safe;
- /* Verify name table is empty, then release it */
+ /* Verify name table is empty and purge any lingering
+ * publications, then release the name table
+ */
write_lock_bh(&tipc_nametbl_lock);
for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
if (hlist_empty(&table.types[i]))
continue;
- pr_err("nametbl_stop(): orphaned hash chain detected\n");
- break;
+ seq_head = &table.types[i];
+ hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) {
+ tipc_purge_publications(seq);
+ }
+ continue;
}
kfree(table.types);
table.types = NULL;
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index 9f72a6376362..3aaf73de9e2d 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -83,8 +83,6 @@ static struct genl_ops tipc_genl_ops[] = {
},
};
-static int tipc_genl_family_registered;
-
int tipc_netlink_start(void)
{
int res;
@@ -94,16 +92,10 @@ int tipc_netlink_start(void)
pr_err("Failed to register netlink interface\n");
return res;
}
-
- tipc_genl_family_registered = 1;
return 0;
}
void tipc_netlink_stop(void)
{
- if (!tipc_genl_family_registered)
- return;
-
genl_unregister_family(&tipc_genl_family);
- tipc_genl_family_registered = 0;
}
diff --git a/net/tipc/ref.c b/net/tipc/ref.c
index 2a2a938dc22c..de3d593e2fee 100644
--- a/net/tipc/ref.c
+++ b/net/tipc/ref.c
@@ -126,9 +126,6 @@ int tipc_ref_table_init(u32 requested_size, u32 start)
*/
void tipc_ref_table_stop(void)
{
- if (!tipc_ref_table.entries)
- return;
-
vfree(tipc_ref_table.entries);
tipc_ref_table.entries = NULL;
}
diff --git a/net/tipc/server.c b/net/tipc/server.c
index b635ca347a87..646a930eefbf 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -87,7 +87,6 @@ static void tipc_clean_outqueues(struct tipc_conn *con);
static void tipc_conn_kref_release(struct kref *kref)
{
struct tipc_conn *con = container_of(kref, struct tipc_conn, kref);
- struct tipc_server *s = con->server;
if (con->sock) {
tipc_sock_release_local(con->sock);
@@ -95,10 +94,6 @@ static void tipc_conn_kref_release(struct kref *kref)
}
tipc_clean_outqueues(con);
-
- if (con->conid)
- s->tipc_conn_shutdown(con->conid, con->usr_data);
-
kfree(con);
}
@@ -181,6 +176,9 @@ static void tipc_close_conn(struct tipc_conn *con)
struct tipc_server *s = con->server;
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
+ if (con->conid)
+ s->tipc_conn_shutdown(con->conid, con->usr_data);
+
spin_lock_bh(&s->idr_lock);
idr_remove(&s->conn_idr, con->conid);
s->idr_in_use--;
@@ -429,10 +427,12 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid,
list_add_tail(&e->list, &con->outqueue);
spin_unlock_bh(&con->outqueue_lock);
- if (test_bit(CF_CONNECTED, &con->flags))
+ if (test_bit(CF_CONNECTED, &con->flags)) {
if (!queue_work(s->send_wq, &con->swork))
conn_put(con);
-
+ } else {
+ conn_put(con);
+ }
return 0;
}
@@ -573,7 +573,6 @@ int tipc_server_start(struct tipc_server *s)
kmem_cache_destroy(s->rcvbuf_cache);
return ret;
}
- s->enabled = 1;
return ret;
}
@@ -583,10 +582,6 @@ void tipc_server_stop(struct tipc_server *s)
int total = 0;
int id;
- if (!s->enabled)
- return;
-
- s->enabled = 0;
spin_lock_bh(&s->idr_lock);
for (id = 0; total < s->idr_in_use; id++) {
con = idr_find(&s->conn_idr, id);
diff --git a/net/tipc/server.h b/net/tipc/server.h
index 98b23f20bc0f..be817b0b547e 100644
--- a/net/tipc/server.h
+++ b/net/tipc/server.h
@@ -56,7 +56,6 @@
* @name: server name
* @imp: message importance
* @type: socket type
- * @enabled: identify whether server is launched or not
*/
struct tipc_server {
struct idr conn_idr;
@@ -74,7 +73,6 @@ struct tipc_server {
const char name[TIPC_SERVER_NAME_LEN];
int imp;
int type;
- int enabled;
};
int tipc_conn_sendmsg(struct tipc_server *s, int conid,
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index aab4948f0aff..0ed0eaa62f29 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -70,8 +70,6 @@ static const struct proto_ops msg_ops;
static struct proto tipc_proto;
static struct proto tipc_proto_kern;
-static int sockets_enabled;
-
/*
* Revised TIPC socket locking policy:
*
@@ -999,7 +997,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long timeo)
for (;;) {
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
- if (skb_queue_empty(&sk->sk_receive_queue)) {
+ if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
if (sock->state == SS_DISCONNECTING) {
err = -ENOTCONN;
break;
@@ -1625,7 +1623,7 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo)
for (;;) {
prepare_to_wait_exclusive(sk_sleep(sk), &wait,
TASK_INTERRUPTIBLE);
- if (skb_queue_empty(&sk->sk_receive_queue)) {
+ if (timeo && skb_queue_empty(&sk->sk_receive_queue)) {
release_sock(sk);
timeo = schedule_timeout(timeo);
lock_sock(sk);
@@ -2027,8 +2025,6 @@ int tipc_socket_init(void)
proto_unregister(&tipc_proto);
goto out;
}
-
- sockets_enabled = 1;
out:
return res;
}
@@ -2038,10 +2034,6 @@ int tipc_socket_init(void)
*/
void tipc_socket_stop(void)
{
- if (!sockets_enabled)
- return;
-
- sockets_enabled = 0;
sock_unregister(tipc_family_ops.family);
proto_unregister(&tipc_proto);
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 7cb0bd5b1176..11c9ae00837d 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -96,20 +96,16 @@ static void subscr_send_event(struct tipc_subscription *sub, u32 found_lower,
{
struct tipc_subscriber *subscriber = sub->subscriber;
struct kvec msg_sect;
- int ret;
msg_sect.iov_base = (void *)&sub->evt;
msg_sect.iov_len = sizeof(struct tipc_event);
-
sub->evt.event = htohl(event, sub->swap);
sub->evt.found_lower = htohl(found_lower, sub->swap);
sub->evt.found_upper = htohl(found_upper, sub->swap);
sub->evt.port.ref = htohl(port_ref, sub->swap);
sub->evt.port.node = htohl(node, sub->swap);
- ret = tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL,
- msg_sect.iov_base, msg_sect.iov_len);
- if (ret < 0)
- pr_err("Sending subscription event failed, no memory\n");
+ tipc_conn_sendmsg(&topsrv, subscriber->conid, NULL, msg_sect.iov_base,
+ msg_sect.iov_len);
}
/**
@@ -153,14 +149,6 @@ static void subscr_timeout(struct tipc_subscription *sub)
/* The spin lock per subscriber is used to protect its members */
spin_lock_bh(&subscriber->lock);
- /* Validate if the connection related to the subscriber is
- * closed (in case subscriber is terminating)
- */
- if (subscriber->conid == 0) {
- spin_unlock_bh(&subscriber->lock);
- return;
- }
-
/* Validate timeout (in case subscription is being cancelled) */
if (sub->timeout == TIPC_WAIT_FOREVER) {
spin_unlock_bh(&subscriber->lock);
@@ -215,9 +203,6 @@ static void subscr_release(struct tipc_subscriber *subscriber)
spin_lock_bh(&subscriber->lock);
- /* Invalidate subscriber reference */
- subscriber->conid = 0;
-
/* Destroy any existing subscriptions for subscriber */
list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
subscription_list) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 29fc8bee9702..ce6ec6c2f4de 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -163,9 +163,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
static inline unsigned int unix_hash_fold(__wsum n)
{
- unsigned int hash = (__force unsigned int)n;
+ unsigned int hash = (__force unsigned int)csum_fold(n);
- hash ^= hash>>16;
hash ^= hash>>8;
return hash&(UNIX_HASH_SIZE-1);
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4b98b25793c5..1d5c7bf29938 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1158,7 +1158,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
if (hlist_unhashed(&pol->bydst))
return NULL;
- hlist_del(&pol->bydst);
+ hlist_del_init(&pol->bydst);
hlist_del(&pol->byidx);
list_del(&pol->walk.all);
net->xfrm.policy_count[dir]--;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a26b7aa79475..40f1b3e92e78 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1159,6 +1159,11 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
}
x->props.aalgo = orig->props.aalgo;
+ if (orig->aead) {
+ x->aead = xfrm_algo_aead_clone(orig->aead);
+ if (!x->aead)
+ goto error;
+ }
if (orig->ealg) {
x->ealg = xfrm_algo_clone(orig->ealg);
if (!x->ealg)
@@ -1201,6 +1206,9 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
x->props.flags = orig->props.flags;
x->props.extra_flags = orig->props.extra_flags;
+ x->tfcpad = orig->tfcpad;
+ x->replay_maxdiff = orig->replay_maxdiff;
+ x->replay_maxage = orig->replay_maxage;
x->curlft.add_time = orig->curlft.add_time;
x->km.state = orig->km.state;
x->km.seq = orig->km.seq;
@@ -1215,11 +1223,12 @@ out:
return NULL;
}
-/* net->xfrm.xfrm_state_lock is held */
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
{
unsigned int h;
- struct xfrm_state *x;
+ struct xfrm_state *x = NULL;
+
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
if (m->reqid) {
h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
@@ -1236,7 +1245,7 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
m->old_family))
continue;
xfrm_state_hold(x);
- return x;
+ break;
}
} else {
h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
@@ -1251,11 +1260,13 @@ struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *n
m->old_family))
continue;
xfrm_state_hold(x);
- return x;
+ break;
}
}
- return NULL;
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
+ return x;
}
EXPORT_SYMBOL(xfrm_migrate_state_find);
@@ -1451,7 +1462,7 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
{
int err = 0;
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
- struct net *net = xs_net(*dst);
+ struct net *net = xs_net(*src);
if (!afinfo)
return -EAFNOSUPPORT;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1ae3ec7c18b0..c274179d60a2 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -32,11 +32,6 @@
#include <linux/in6.h>
#endif
-static inline int aead_len(struct xfrm_algo_aead *alg)
-{
- return sizeof(*alg) + ((alg->alg_key_len + 7) / 8);
-}
-
static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type)
{
struct nlattr *rt = attrs[type];