aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan.c4
-rw-r--r--net/8021q/vlan_core.c2
-rw-r--r--net/8021q/vlan_dev.c32
-rw-r--r--net/8021q/vlan_netlink.c2
-rw-r--r--net/9p/trans_common.c10
-rw-r--r--net/9p/trans_virtio.c10
-rw-r--r--net/appletalk/ddp.c16
-rw-r--r--net/atm/common.c2
-rw-r--r--net/ax25/af_ax25.c4
-rw-r--r--net/batman-adv/main.c5
-rw-r--r--net/batman-adv/network-coding.c28
-rw-r--r--net/batman-adv/network-coding.h14
-rw-r--r--net/bluetooth/af_bluetooth.c4
-rw-r--r--net/bluetooth/hci_core.c26
-rw-r--r--net/bluetooth/hci_event.c6
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/hidp/core.c15
-rw-r--r--net/bluetooth/rfcomm/sock.c1
-rw-r--r--net/bluetooth/sco.c1
-rw-r--r--net/bridge/br_fdb.c10
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_multicast.c15
-rw-r--r--net/bridge/br_netlink.c8
-rw-r--r--net/bridge/br_private.h8
-rw-r--r--net/bridge/br_stp.c23
-rw-r--r--net/bridge/br_stp_if.c12
-rw-r--r--net/bridge/br_vlan.c4
-rw-r--r--net/caif/caif_socket.c4
-rw-r--r--net/caif/cfctrl.c3
-rw-r--r--net/can/af_can.c3
-rw-r--r--net/can/bcm.c4
-rw-r--r--net/ceph/auth_none.c6
-rw-r--r--net/ceph/osd_client.c141
-rw-r--r--net/ceph/osdmap.c2
-rw-r--r--net/compat.c14
-rw-r--r--net/core/dev.c40
-rw-r--r--net/core/drop_monitor.c1
-rw-r--r--net/core/fib_rules.c10
-rw-r--r--net/core/filter.c30
-rw-r--r--net/core/flow_dissector.c17
-rw-r--r--net/core/iovec.c3
-rw-r--r--net/core/neighbour.c30
-rw-r--r--net/core/netpoll.c28
-rw-r--r--net/core/pktgen.c7
-rw-r--r--net/core/rtnetlink.c14
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/secure_seq.c29
-rw-r--r--net/core/skbuff.c63
-rw-r--r--net/core/sock.c10
-rw-r--r--net/core/sysctl_net_core.c6
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/ieee802154/6lowpan.c24
-rw-r--r--net/ieee802154/dgram.c3
-rw-r--r--net/ieee802154/nl-phy.c6
-rw-r--r--net/ipv4/af_inet.c4
-rw-r--r--net/ipv4/datagram.c2
-rw-r--r--net/ipv4/devinet.c7
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_trie.c5
-rw-r--r--net/ipv4/igmp.c8
-rw-r--r--net/ipv4/inet_diag.c20
-rw-r--r--net/ipv4/inet_fragment.c5
-rw-r--r--net/ipv4/inet_hashtables.c2
-rw-r--r--net/ipv4/inetpeer.c4
-rw-r--r--net/ipv4/ip_forward.c71
-rw-r--r--net/ipv4/ip_gre.c14
-rw-r--r--net/ipv4/ip_input.c9
-rw-r--r--net/ipv4/ip_output.c10
-rw-r--r--net/ipv4/ip_sockglue.c3
-rw-r--r--net/ipv4/ip_tunnel.c116
-rw-r--r--net/ipv4/ip_vti.c22
-rw-r--r--net/ipv4/ipip.c14
-rw-r--r--net/ipv4/ipmr.c22
-rw-r--r--net/ipv4/ping.c13
-rw-r--r--net/ipv4/raw.c11
-rw-r--r--net/ipv4/route.c14
-rw-r--r--net/ipv4/sysctl_net_ipv4.c16
-rw-r--r--net/ipv4/tcp.c57
-rw-r--r--net/ipv4/tcp_cubic.c12
-rw-r--r--net/ipv4/tcp_input.c50
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_metrics.c56
-rw-r--r--net/ipv4/tcp_output.c68
-rw-r--r--net/ipv4/udp.c17
-rw-r--r--net/ipv4/xfrm4_mode_tunnel.c2
-rw-r--r--net/ipv6/addrconf.c70
-rw-r--r--net/ipv6/addrlabel.c48
-rw-r--r--net/ipv6/datagram.c8
-rw-r--r--net/ipv6/exthdrs.c6
-rw-r--r--net/ipv6/exthdrs_core.c2
-rw-r--r--net/ipv6/exthdrs_offload.c4
-rw-r--r--net/ipv6/icmp.c12
-rw-r--r--net/ipv6/inet6_hashtables.c2
-rw-r--r--net/ipv6/ip6_fib.c33
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c4
-rw-r--r--net/ipv6/ip6_input.c2
-rw-r--r--net/ipv6/ip6_output.c101
-rw-r--r--net/ipv6/ip6_tunnel.c15
-rw-r--r--net/ipv6/ip6mr.c25
-rw-r--r--net/ipv6/mcast.c33
-rw-r--r--net/ipv6/ndisc.c14
-rw-r--r--net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c54
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c19
-rw-r--r--net/ipv6/netfilter/nf_defrag_ipv6_hooks.c7
-rw-r--r--net/ipv6/raw.c8
-rw-r--r--net/ipv6/reassembly.c5
-rw-r--r--net/ipv6/route.c168
-rw-r--r--net/ipv6/sit.c97
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/ipv6/udp.c16
-rw-r--r--net/ipv6/udp_offload.c4
-rw-r--r--net/ipx/af_ipx.c3
-rw-r--r--net/irda/af_irda.c4
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/key/af_key.c5
-rw-r--r--net/l2tp/l2tp_core.c36
-rw-r--r--net/l2tp/l2tp_core.h3
-rw-r--r--net/l2tp/l2tp_ip.c4
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c9
-rw-r--r--net/llc/af_llc.c7
-rw-r--r--net/mac80211/cfg.c48
-rw-r--r--net/mac80211/ibss.c1
-rw-r--r--net/mac80211/ieee80211_i.h3
-rw-r--r--net/mac80211/iface.c9
-rw-r--r--net/mac80211/mesh_ps.c1
-rw-r--r--net/mac80211/mlme.c77
-rw-r--r--net/mac80211/pm.c7
-rw-r--r--net/mac80211/rc80211_minstrel.c3
-rw-r--r--net/mac80211/rc80211_minstrel_ht.c13
-rw-r--r--net/mac80211/rx.c16
-rw-r--r--net/mac80211/scan.c19
-rw-r--r--net/mac80211/sta_info.c5
-rw-r--r--net/mac80211/sta_info.h7
-rw-r--r--net/mac80211/status.c3
-rw-r--r--net/mac80211/tx.c43
-rw-r--r--net/mac80211/util.c4
-rw-r--r--net/mac80211/wme.c5
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h28
-rw-r--r--net/netfilter/ipvs/ip_vs_core.c55
-rw-r--r--net/netfilter/ipvs/ip_vs_pe_sip.c8
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c6
-rw-r--r--net/netfilter/nf_nat_irc.c32
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netlink/genetlink.c69
-rw-r--r--net/netrom/af_netrom.c3
-rw-r--r--net/nfc/llcp.h1
-rw-r--r--net/nfc/llcp_sock.c10
-rw-r--r--net/nfc/rawsock.c2
-rw-r--r--net/packet/af_packet.c110
-rw-r--r--net/packet/internal.h1
-rw-r--r--net/phonet/datagram.c9
-rw-r--r--net/rds/ib.c3
-rw-r--r--net/rds/ib_recv.c7
-rw-r--r--net/rds/ib_send.c5
-rw-r--r--net/rds/iw.c3
-rw-r--r--net/rds/recv.c2
-rw-r--r--net/rose/af_rose.c24
-rw-r--r--net/rxrpc/ar-recvmsg.c9
-rw-r--r--net/sched/sch_api.c41
-rw-r--r--net/sched/sch_atm.c1
-rw-r--r--net/sched/sch_cbq.c1
-rw-r--r--net/sched/sch_generic.c1
-rw-r--r--net/sched/sch_htb.c17
-rw-r--r--net/sched/sch_qfq.c85
-rw-r--r--net/sctp/input.c3
-rw-r--r--net/sctp/ipv6.c44
-rw-r--r--net/sctp/output.c3
-rw-r--r--net/sctp/sm_make_chunk.c4
-rw-r--r--net/sctp/sm_statefuns.c10
-rw-r--r--net/sctp/socket.c46
-rw-r--r--net/socket.c50
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c29
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c50
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.h5
-rw-r--r--net/sunrpc/clnt.c14
-rw-r--r--net/sunrpc/netns.h1
-rw-r--r--net/sunrpc/rpcb_clnt.c48
-rw-r--r--net/sunrpc/svcauth_unix.c2
-rw-r--r--net/sunrpc/svcsock.c9
-rw-r--r--net/sunrpc/xdr.c9
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c20
-rw-r--r--net/sunrpc/xprtsock.c34
-rw-r--r--net/sysctl_net.c4
-rw-r--r--net/tipc/socket.c10
-rw-r--r--net/unix/af_unix.c51
-rw-r--r--net/unix/diag.c1
-rw-r--r--net/vmw_vsock/af_vsock.c2
-rw-r--r--net/vmw_vsock/vmci_transport.c2
-rw-r--r--net/wireless/core.c1
-rw-r--r--net/wireless/ibss.c3
-rw-r--r--net/wireless/nl80211.c13
-rw-r--r--net/wireless/radiotap.c11
-rw-r--r--net/wireless/scan.c4
-rw-r--r--net/x25/af_x25.c18
198 files changed, 2291 insertions, 1276 deletions
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 9424f3718ea..86abb2e59ae 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -305,9 +305,11 @@ static void vlan_sync_address(struct net_device *dev,
static void vlan_transfer_features(struct net_device *dev,
struct net_device *vlandev)
{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
+
vlandev->gso_max_size = dev->gso_max_size;
- if (dev->features & NETIF_F_HW_VLAN_CTAG_TX)
+ if (vlan_hw_offload_capable(dev->features, vlan->vlan_proto))
vlandev->hard_header_len = dev->hard_header_len;
else
vlandev->hard_header_len = dev->hard_header_len + VLAN_HLEN;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 8a15eaadc4b..4a78c4de9f2 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
{
struct sk_buff *skb = *skbp;
__be16 vlan_proto = skb->vlan_proto;
- u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK;
+ u16 vlan_id = vlan_tx_tag_get_id(skb);
struct net_device *vlan_dev;
struct vlan_pcpu_stats *rx_stats;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 3a8c8fd63c8..cf35f383db4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
{
struct vlan_priority_tci_mapping *mp;
+ smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */
+
mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)];
while (mp) {
if (mp->priority == skb->priority) {
@@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
np->next = mp;
np->priority = skb_prio;
np->vlan_qos = vlan_qos;
+ /* Before inserting this element in hash table, make sure all its fields
+ * are committed to memory.
+ * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask()
+ */
+ smp_wmb();
vlan->egress_priority_map[skb_prio & 0xF] = np;
if (vlan_qos)
vlan->nr_egress_mappings++;
@@ -542,6 +549,26 @@ static const struct header_ops vlan_header_ops = {
.parse = eth_header_parse,
};
+static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type,
+ const void *daddr, const void *saddr,
+ unsigned int len)
+{
+ struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
+ struct net_device *real_dev = vlan->real_dev;
+
+ if (saddr == NULL)
+ saddr = dev->dev_addr;
+
+ return dev_hard_header(skb, real_dev, type, daddr, saddr, len);
+}
+
+static const struct header_ops vlan_passthru_header_ops = {
+ .create = vlan_passthru_hard_header,
+ .rebuild = dev_rebuild_header,
+ .parse = eth_header_parse,
+};
+
static struct device_type vlan_type = {
.name = "vlan",
};
@@ -584,8 +611,9 @@ static int vlan_dev_init(struct net_device *dev)
#endif
dev->needed_headroom = real_dev->needed_headroom;
- if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) {
- dev->header_ops = real_dev->header_ops;
+ if (vlan_hw_offload_capable(real_dev->features,
+ vlan_dev_priv(dev)->vlan_proto)) {
+ dev->header_ops = &vlan_passthru_header_ops;
dev->hard_header_len = real_dev->hard_header_len;
} else {
dev->header_ops = &vlan_header_ops;
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 30912973228..c7e634af851 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev)
return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */
nla_total_size(2) + /* IFLA_VLAN_ID */
- sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
+ nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */
vlan_qos_map_size(vlan->nr_ingress_mappings) +
vlan_qos_map_size(vlan->nr_egress_mappings);
}
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index de8df957867..2ee3879161b 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -24,11 +24,11 @@
*/
void p9_release_pages(struct page **pages, int nr_pages)
{
- int i = 0;
- while (pages[i] && nr_pages--) {
- put_page(pages[i]);
- i++;
- }
+ int i;
+
+ for (i = 0; i < nr_pages; i++)
+ if (pages[i])
+ put_page(pages[i]);
}
EXPORT_SYMBOL(p9_release_pages);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index e1c26b10183..c76a4388a5d 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -340,7 +340,10 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
int count = nr_pages;
while (nr_pages) {
s = rest_of_page(data);
- pages[index++] = kmap_to_page(data);
+ if (is_vmalloc_addr(data))
+ pages[index++] = vmalloc_to_page(data);
+ else
+ pages[index++] = kmap_to_page(data);
data += s;
nr_pages--;
}
@@ -577,6 +580,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
mutex_lock(&virtio_9p_lock);
list_add_tail(&chan->chan_list, &virtio_chan_list);
mutex_unlock(&virtio_9p_lock);
+
+ /* Let udev rules use the new mount_tag attribute. */
+ kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
+
return 0;
out_free_tag:
@@ -654,6 +661,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
list_del(&chan->chan_list);
mutex_unlock(&virtio_9p_lock);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+ kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
kfree(chan->tag);
kfree(chan->vc_wq);
kfree(chan);
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index ef12839a7cf..0018daccdea 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1735,7 +1735,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
size_t size, int flags)
{
struct sock *sk = sock->sk;
- struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name;
struct ddpehdr *ddp;
int copied = 0;
int offset = 0;
@@ -1764,14 +1763,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
}
err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied);
- if (!err) {
- if (sat) {
- sat->sat_family = AF_APPLETALK;
- sat->sat_port = ddp->deh_sport;
- sat->sat_addr.s_node = ddp->deh_snode;
- sat->sat_addr.s_net = ddp->deh_snet;
- }
- msg->msg_namelen = sizeof(*sat);
+ if (!err && msg->msg_name) {
+ struct sockaddr_at *sat = msg->msg_name;
+ sat->sat_family = AF_APPLETALK;
+ sat->sat_port = ddp->deh_sport;
+ sat->sat_addr.s_node = ddp->deh_snode;
+ sat->sat_addr.s_net = ddp->deh_snet;
+ msg->msg_namelen = sizeof(*sat);
}
skb_free_datagram(sk, skb); /* Free the datagram. */
diff --git a/net/atm/common.c b/net/atm/common.c
index 737bef59ce8..7b491006eaf 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -531,8 +531,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
struct sk_buff *skb;
int copied, error = -EINVAL;
- msg->msg_namelen = 0;
-
if (sock->state != SS_CONNECTED)
return -ENOTCONN;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index e277e38f736..ba6db78a02b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1636,11 +1636,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
- if (msg->msg_namelen != 0) {
- struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name;
+ if (msg->msg_name) {
ax25_digi digi;
ax25_address src;
const unsigned char *mac = skb_mac_header(skb);
+ struct sockaddr_ax25 *sax = msg->msg_name;
memset(sax, 0, sizeof(struct full_sockaddr_ax25));
ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL,
diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c
index 51aafd669cb..f1cb1f56cda 100644
--- a/net/batman-adv/main.c
+++ b/net/batman-adv/main.c
@@ -61,6 +61,7 @@ static int __init batadv_init(void)
batadv_recv_handler_init();
batadv_iv_init();
+ batadv_nc_init();
batadv_event_workqueue = create_singlethread_workqueue("bat_events");
@@ -138,7 +139,7 @@ int batadv_mesh_init(struct net_device *soft_iface)
if (ret < 0)
goto err;
- ret = batadv_nc_init(bat_priv);
+ ret = batadv_nc_mesh_init(bat_priv);
if (ret < 0)
goto err;
@@ -163,7 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface)
batadv_vis_quit(bat_priv);
batadv_gw_node_purge(bat_priv);
- batadv_nc_free(bat_priv);
+ batadv_nc_mesh_free(bat_priv);
batadv_dat_free(bat_priv);
batadv_bla_free(bat_priv);
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index e84629ece9b..f97aeee2201 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb,
struct batadv_hard_iface *recv_if);
/**
+ * batadv_nc_init - one-time initialization for network coding
+ */
+int __init batadv_nc_init(void)
+{
+ int ret;
+
+ /* Register our packet type */
+ ret = batadv_recv_handler_register(BATADV_CODED,
+ batadv_nc_recv_coded_packet);
+
+ return ret;
+}
+
+/**
* batadv_nc_start_timer - initialise the nc periodic worker
* @bat_priv: the bat priv with all the soft interface information
*/
@@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv)
}
/**
- * batadv_nc_init - initialise coding hash table and start house keeping
+ * batadv_nc_mesh_init - initialise coding hash table and start house keeping
* @bat_priv: the bat priv with all the soft interface information
*/
-int batadv_nc_init(struct batadv_priv *bat_priv)
+int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
{
bat_priv->nc.timestamp_fwd_flush = jiffies;
bat_priv->nc.timestamp_sniffed_purge = jiffies;
@@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv)
batadv_hash_set_lock_class(bat_priv->nc.coding_hash,
&batadv_nc_decoding_hash_lock_class_key);
- /* Register our packet type */
- if (batadv_recv_handler_register(BATADV_CODED,
- batadv_nc_recv_coded_packet) < 0)
- goto err;
-
INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker);
batadv_nc_start_timer(bat_priv);
@@ -1722,12 +1731,11 @@ free_nc_packet:
}
/**
- * batadv_nc_free - clean up network coding memory
+ * batadv_nc_mesh_free - clean up network coding memory
* @bat_priv: the bat priv with all the soft interface information
*/
-void batadv_nc_free(struct batadv_priv *bat_priv)
+void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
{
- batadv_recv_handler_unregister(BATADV_CODED);
cancel_delayed_work_sync(&bat_priv->nc.work);
batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL);
diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h
index 4fa6d0caddb..bd4295fb960 100644
--- a/net/batman-adv/network-coding.h
+++ b/net/batman-adv/network-coding.h
@@ -22,8 +22,9 @@
#ifdef CONFIG_BATMAN_ADV_NC
-int batadv_nc_init(struct batadv_priv *bat_priv);
-void batadv_nc_free(struct batadv_priv *bat_priv);
+int batadv_nc_init(void);
+int batadv_nc_mesh_init(struct batadv_priv *bat_priv);
+void batadv_nc_mesh_free(struct batadv_priv *bat_priv);
void batadv_nc_update_nc_node(struct batadv_priv *bat_priv,
struct batadv_orig_node *orig_node,
struct batadv_orig_node *orig_neigh_node,
@@ -47,12 +48,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv);
#else /* ifdef CONFIG_BATMAN_ADV_NC */
-static inline int batadv_nc_init(struct batadv_priv *bat_priv)
+static inline int batadv_nc_init(void)
{
return 0;
}
-static inline void batadv_nc_free(struct batadv_priv *bat_priv)
+static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
+{
+ return 0;
+}
+
+static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv)
{
return;
}
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 9096137c889..6629cdc134d 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -221,8 +221,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (flags & (MSG_OOB))
return -EOPNOTSUPP;
- msg->msg_namelen = 0;
-
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb) {
if (sk->sk_shutdown & RCV_SHUTDOWN)
@@ -287,8 +285,6 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- msg->msg_namelen = 0;
-
BT_DBG("sk %p size %zu", sk, size);
lock_sock(sk);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ace5e55fe5a..7c88f5f8359 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1123,7 +1123,11 @@ int hci_dev_open(__u16 dev)
goto done;
}
- if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) {
+ /* Check for rfkill but allow the HCI setup stage to proceed
+ * (which in itself doesn't cause any RF activity).
+ */
+ if (test_bit(HCI_RFKILLED, &hdev->dev_flags) &&
+ !test_bit(HCI_SETUP, &hdev->dev_flags)) {
ret = -ERFKILL;
goto done;
}
@@ -1545,10 +1549,13 @@ static int hci_rfkill_set_block(void *data, bool blocked)
BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked);
- if (!blocked)
- return 0;
-
- hci_dev_do_close(hdev);
+ if (blocked) {
+ set_bit(HCI_RFKILLED, &hdev->dev_flags);
+ if (!test_bit(HCI_SETUP, &hdev->dev_flags))
+ hci_dev_do_close(hdev);
+ } else {
+ clear_bit(HCI_RFKILLED, &hdev->dev_flags);
+}
return 0;
}
@@ -1570,9 +1577,13 @@ static void hci_power_on(struct work_struct *work)
return;
}
- if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
+ if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) {
+ clear_bit(HCI_AUTO_OFF, &hdev->dev_flags);
+ hci_dev_do_close(hdev);
+ } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) {
queue_delayed_work(hdev->req_workqueue, &hdev->power_off,
HCI_AUTO_OFF_TIMEOUT);
+ }
if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags))
mgmt_index_added(hdev);
@@ -2241,6 +2252,9 @@ int hci_register_dev(struct hci_dev *hdev)
}
}
+ if (hdev->rfkill && rfkill_blocked(hdev->rfkill))
+ set_bit(HCI_RFKILLED, &hdev->dev_flags);
+
set_bit(HCI_SETUP, &hdev->dev_flags);
if (hdev->dev_type != HCI_AMP)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index b93cd2eb5d5..dcaa6dbbab2 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3611,7 +3611,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
cp.handle = cpu_to_le16(conn->handle);
if (ltk->authenticated)
- conn->sec_level = BT_SECURITY_HIGH;
+ conn->pending_sec_level = BT_SECURITY_HIGH;
+ else
+ conn->pending_sec_level = BT_SECURITY_MEDIUM;
+
+ conn->enc_key_size = ltk->enc_size;
hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp);
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 9bd7d959e38..fa4bf663142 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -752,8 +752,6 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!skb)
return err;
- msg->msg_namelen = 0;
-
copied = skb->len;
if (len < copied) {
msg->msg_flags |= MSG_TRUNC;
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index 940f5acb669..de030f50f72 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -231,17 +231,22 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb)
static int hidp_send_report(struct hidp_session *session, struct hid_report *report)
{
- unsigned char buf[32], hdr;
- int rsize;
+ unsigned char hdr;
+ u8 *buf;
+ int rsize, ret;
- rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
- if (rsize > sizeof(buf))
+ buf = hid_alloc_report_buf(report, GFP_ATOMIC);
+ if (!buf)
return -EIO;
hid_output_report(report, buf);
hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT;
- return hidp_send_intr_message(session, hdr, buf, rsize);
+ rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0);
+ ret = hidp_send_intr_message(session, hdr, buf, rsize);
+
+ kfree(buf);
+ return ret;
}
static int hidp_get_raw_report(struct hid_device *hid,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 30b3721dc6d..c1c6028e389 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -608,7 +608,6 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) {
rfcomm_dlc_accept(d);
- msg->msg_namelen = 0;
return 0;
}
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index e7bd4eea575..2bb1d3a5e76 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -700,7 +700,6 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
sco_conn_defer_accept(pi->conn->hcon, 0);
sk->sk_state = BT_CONFIG;
- msg->msg_namelen = 0;
release_sock(sk);
return 0;
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index ebfa4443c69..84dd783abe5 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -161,7 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr)
if (!pv)
return;
- for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) {
f = __br_fdb_get(br, br->dev->dev_addr, vid);
if (f && f->is_local && !f->dst)
fdb_delete(br, f);
@@ -725,7 +725,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* VID was specified, so use it. */
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
} else {
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, 0);
goto out;
}
@@ -734,7 +734,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
* specify a VLAN. To be nice, add/update entry for every
* vlan on this port.
*/
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
err = __br_fdb_add(ndm, p, addr, nlh_flags, vid);
if (err)
goto out;
@@ -812,7 +812,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
err = __br_fdb_delete(p, addr, vid);
} else {
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) {
err = __br_fdb_delete(p, addr, 0);
goto out;
}
@@ -822,7 +822,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
* vlan on this port.
*/
err = -ENOENT;
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
err &= __br_fdb_delete(p, addr, vid);
}
}
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 4cdba60926f..32bd1e87f14 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -172,6 +172,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head)
del_nbp(p);
}
+ br_fdb_delete_by_port(br, NULL, 1);
+
del_timer_sync(&br->gc_timer);
br_sysfs_delbr(br->dev);
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d6448e35e02..81de0106528 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1176,6 +1176,12 @@ static int br_ip6_multicast_query(struct net_bridge *br,
br_multicast_query_received(br, port, !ipv6_addr_any(&ip6h->saddr));
+ /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */
+ if (!(ipv6_addr_type(&ip6h->saddr) & IPV6_ADDR_LINKLOCAL)) {
+ err = -EINVAL;
+ goto out;
+ }
+
if (skb->len == sizeof(*mld)) {
if (!pskb_may_pull(skb, sizeof(*mld))) {
err = -EINVAL;
@@ -1185,7 +1191,7 @@ static int br_ip6_multicast_query(struct net_bridge *br,
max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay));
if (max_delay)
group = &mld->mld_mca;
- } else if (skb->len >= sizeof(*mld2q)) {
+ } else {
if (!pskb_may_pull(skb, sizeof(*mld2q))) {
err = -EINVAL;
goto out;
@@ -1193,7 +1199,8 @@ static int br_ip6_multicast_query(struct net_bridge *br,
mld2q = (struct mld2_query *)icmp6_hdr(skb);
if (!mld2q->mld2q_nsrcs)
group = &mld2q->mld2q_mca;
- max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1;
+
+ max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL);
}
if (!group)
@@ -1838,7 +1845,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val)
u32 old;
struct net_bridge_mdb_htable *mdb;
- spin_lock(&br->multicast_lock);
+ spin_lock_bh(&br->multicast_lock);
if (!netif_running(br->dev))
goto unlock;
@@ -1870,7 +1877,7 @@ rollback:
}
unlock:
- spin_unlock(&br->multicast_lock);
+ spin_unlock_bh(&br->multicast_lock);
return err;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8e3abf56479..06873e80a43 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -128,7 +128,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
else
pv = br_get_vlan_info(br);
- if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN))
+ if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID))
goto done;
af = nla_nest_start(skb, IFLA_AF_SPEC);
@@ -136,7 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb,
goto nla_put_failure;
pvid = br_get_pvid(pv);
- for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) {
+ for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) {
vinfo.vid = vid;
vinfo.flags = 0;
if (vid == pvid)
@@ -203,7 +203,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq,
struct net_device *dev, u32 filter_mask)
{
int err = 0;
- struct net_bridge_port *port = br_port_get_rcu(dev);
+ struct net_bridge_port *port = br_port_get_rtnl(dev);
/* not a bridge port and */
if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN))
@@ -443,7 +443,7 @@ static size_t br_get_link_af_size(const struct net_device *dev)
struct net_port_vlans *pv;
if (br_port_exists(dev))
- pv = nbp_get_vlan_info(br_port_get_rcu(dev));
+ pv = nbp_get_vlan_info(br_port_get_rtnl(dev));
else if (dev->priv_flags & IFF_EBRIDGE)
pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev));
else
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d2c043a857b..e696833a31b 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -183,13 +183,10 @@ struct net_bridge_port
static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev)
{
- struct net_bridge_port *port =
- rcu_dereference_rtnl(dev->rx_handler_data);
-
- return br_port_exists(dev) ? port : NULL;
+ return rcu_dereference(dev->rx_handler_data);
}
-static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev)
+static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev)
{
return br_port_exists(dev) ?
rtnl_dereference(dev->rx_handler_data) : NULL;
@@ -714,6 +711,7 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br,
extern void br_init_port(struct net_bridge_port *p);
extern void br_become_designated_port(struct net_bridge_port *p);
+extern void __br_set_forward_delay(struct net_bridge *br, unsigned long t);
extern int br_set_forward_delay(struct net_bridge *br, unsigned long x);
extern int br_set_hello_time(struct net_bridge *br, unsigned long x);
extern int br_set_max_age(struct net_bridge *br, unsigned long x);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1c0a50f1322..3c86f0538cb 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -209,7 +209,7 @@ static void br_record_config_information(struct net_bridge_port *p,
p->designated_age = jiffies - bpdu->message_age;
mod_timer(&p->message_age_timer, jiffies
- + (p->br->max_age - bpdu->message_age));
+ + (bpdu->max_age - bpdu->message_age));
}
/* called under bridge lock */
@@ -544,18 +544,27 @@ int br_set_max_age(struct net_bridge *br, unsigned long val)
}
+void __br_set_forward_delay(struct net_bridge *br, unsigned long t)
+{
+ br->bridge_forward_delay = t;
+ if (br_is_root_bridge(br))
+ br->forward_delay = br->bridge_forward_delay;
+}
+
int br_set_forward_delay(struct net_bridge *br, unsigned long val)
{
unsigned long t = clock_t_to_jiffies(val);
+ int err = -ERANGE;
+ spin_lock_bh(&br->lock);
if (br->stp_enabled != BR_NO_STP &&
(t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY))
- return -ERANGE;
+ goto unlock;
- spin_lock_bh(&br->lock);
- br->bridge_forward_delay = t;
- if (br_is_root_bridge(br))
- br->forward_delay = br->bridge_forward_delay;
+ __br_set_forward_delay(br, t);
+ err = 0;
+
+unlock:
spin_unlock_bh(&br->lock);
- return 0;
+ return err;
}
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d45e760141b..656a6f3e40d 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -129,6 +129,14 @@ static void br_stp_start(struct net_bridge *br)
char *envp[] = { NULL };
r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC);
+
+ spin_lock_bh(&br->lock);
+
+ if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY)
+ __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY);
+ else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY)
+ __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY);
+
if (r == 0) {
br->stp_enabled = BR_USER_STP;
br_debug(br, "userspace STP started\n");
@@ -137,10 +145,10 @@ static void br_stp_start(struct net_bridge *br)
br_debug(br, "using kernel STP\n");
/* To start timers on any ports left in blocking */
- spin_lock_bh(&br->lock);
br_port_state_selection(br);
- spin_unlock_bh(&br->lock);
}
+
+ spin_unlock_bh(&br->lock);
}
static void br_stp_stop(struct net_bridge *br)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bd58b45f5f9..9a9ffe7e401 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -108,7 +108,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid)
clear_bit(vid, v->vlan_bitmap);
v->num_vlans--;
- if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) {
+ if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) {
if (v->port_idx)
rcu_assign_pointer(v->parent.port->vlan_info, NULL);
else
@@ -122,7 +122,7 @@ static void __vlan_flush(struct net_port_vlans *v)
{
smp_wmb();
v->pvid = 0;
- bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN);
+ bitmap_zero(v->vlan_bitmap, VLAN_N_VID);
if (v->port_idx)
rcu_assign_pointer(v->parent.port->vlan_info, NULL);
else
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 05a41c7ec30..d6be3edb7a4 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
if (m->msg_flags&MSG_OOB)
goto read_error;
- m->msg_namelen = 0;
-
skb = skb_recv_datagram(sk, flags, 0 , &ret);
if (!skb)
goto read_error;
@@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
if (flags&MSG_OOB)
goto out;
- msg->msg_namelen = 0;
-
/*
* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index 2bd4b58f437..0f455227da8 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -293,9 +293,10 @@ int cfctrl_linkup_request(struct cflayer *layer,
count = cfctrl_cancel_req(&cfctrl->serv.layer,
user_layer);
- if (count != 1)
+ if (count != 1) {
pr_err("Could not remove request (%d)", count);
return -ENODEV;
+ }
}
return 0;
}
diff --git a/net/can/af_can.c b/net/can/af_can.c
index c4e50852c9f..f59859a3f56 100644
--- a/net/can/af_can.c
+++ b/net/can/af_can.c
@@ -57,6 +57,7 @@
#include <linux/skbuff.h>
#include <linux/can.h>
#include <linux/can/core.h>
+#include <linux/can/skb.h>
#include <linux/ratelimit.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -290,7 +291,7 @@ int can_send(struct sk_buff *skb, int loop)
return -ENOMEM;
}
- newskb->sk = skb->sk;
+ can_skb_set_owner(newskb, skb->sk);
newskb->ip_summed = CHECKSUM_UNNECESSARY;
newskb->pkt_type = PACKET_BROADCAST;
}
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 8f113e6ff32..35cf02d9276 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -268,7 +268,7 @@ static void bcm_can_tx(struct bcm_op *op)
/* send with loopback */
skb->dev = dev;
- skb->sk = op->sk;
+ can_skb_set_owner(skb, op->sk);
can_send(skb, 1);
/* update statistics */
@@ -1223,7 +1223,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
can_skb_prv(skb)->ifindex = dev->ifindex;
skb->dev = dev;
- skb->sk = sk;
+ can_skb_set_owner(skb, sk);
err = can_send(skb, 1); /* send with loopback */
dev_put(dev);
diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c
index 925ca583c09..8c93fa8d81b 100644
--- a/net/ceph/auth_none.c
+++ b/net/ceph/auth_none.c
@@ -39,6 +39,11 @@ static int should_authenticate(struct ceph_auth_client *ac)
return xi->starting;
}
+static int build_request(struct ceph_auth_client *ac, void *buf, void *end)
+{
+ return 0;
+}
+
/*
* the generic auth code decode the global_id, and we carry no actual
* authenticate state, so nothing happens here.
@@ -106,6 +111,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = {
.destroy = destroy,
.is_authenticated = is_authenticated,
.should_authenticate = should_authenticate,
+ .build_request = build_request,
.handle_reply = handle_reply,
.create_authorizer = ceph_auth_none_create_authorizer,
.destroy_authorizer = ceph_auth_none_destroy_authorizer,
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 3a246a6cab4..3663a305daf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -733,12 +733,14 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
object_size = le32_to_cpu(layout->fl_object_size);
object_base = off - objoff;
- if (truncate_size <= object_base) {
- truncate_size = 0;
- } else {
- truncate_size -= object_base;
- if (truncate_size > object_size)
- truncate_size = object_size;
+ if (!(truncate_seq == 1 && truncate_size == -1ULL)) {
+ if (truncate_size <= object_base) {
+ truncate_size = 0;
+ } else {
+ truncate_size -= object_base;
+ if (truncate_size > object_size)
+ truncate_size = object_size;
+ }
}
osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
@@ -1174,6 +1176,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc,
struct ceph_osd_request *req)
{
dout("__register_linger_request %p\n", req);
+ ceph_osdc_get_request(req);
list_add_tail(&req->r_linger_item, &osdc->req_linger);
if (req->r_osd)
list_add_tail(&req->r_linger_osd,
@@ -1196,6 +1199,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc,
if (list_empty(&req->r_osd_item))
req->r_osd = NULL;
}
+ ceph_osdc_put_request(req);
}
void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
@@ -1203,9 +1207,8 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc,
{
mutex_lock(&osdc->request_mutex);
if (req->r_linger) {
- __unregister_linger_request(osdc, req);
req->r_linger = 0;
- ceph_osdc_put_request(req);
+ __unregister_linger_request(osdc, req);
}
mutex_unlock(&osdc->request_mutex);
}
@@ -1217,16 +1220,27 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc,
if (!req->r_linger) {
dout("set_request_linger %p\n", req);
req->r_linger = 1;
- /*
- * caller is now responsible for calling
- * unregister_linger_request
- */
- ceph_osdc_get_request(req);
}
}
EXPORT_SYMBOL(ceph_osdc_set_request_linger);
/*
+ * Returns whether a request should be blocked from being sent
+ * based on the current osdmap and osd_client settings.
+ *
+ * Caller should hold map_sem for read.
+ */
+static bool __req_should_be_paused(struct ceph_osd_client *osdc,
+ struct ceph_osd_request *req)
+{
+ bool pauserd = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD);
+ bool pausewr = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR) ||
+ ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL);
+ return (req->r_flags & CEPH_OSD_FLAG_READ && pauserd) ||
+ (req->r_flags & CEPH_OSD_FLAG_WRITE && pausewr);
+}
+
+/*
* Pick an osd (the first 'up' osd in the pg), allocate the osd struct
* (as needed), and set the request r_osd appropriately. If there is
* no up osd, set r_osd to NULL. Move the request to the appropriate list
@@ -1243,6 +1257,7 @@ static int __map_request(struct ceph_osd_client *osdc,
int acting[CEPH_PG_MAX_SIZE];
int o = -1, num = 0;
int err;
+ bool was_paused;
dout("map_request %p tid %lld\n", req, req->r_tid);
err = ceph_calc_ceph_pg(&pgid, req->r_oid, osdc->osdmap,
@@ -1259,12 +1274,18 @@ static int __map_request(struct ceph_osd_client *osdc,
num = err;
}
+ was_paused = req->r_paused;
+ req->r_paused = __req_should_be_paused(osdc, req);
+ if (was_paused && !req->r_paused)
+ force_resend = 1;
+
if ((!force_resend &&
req->r_osd && req->r_osd->o_osd == o &&
req->r_sent >= req->r_osd->o_incarnation &&
req->r_num_pg_osds == num &&
memcmp(req->r_pg_osds, acting, sizeof(acting[0])*num) == 0) ||
- (req->r_osd == NULL && o == -1))
+ (req->r_osd == NULL && o == -1) ||
+ req->r_paused)
return 0; /* no change */
dout("map_request tid %llu pgid %lld.%x osd%d (was osd%d)\n",
@@ -1339,10 +1360,6 @@ static void __send_request(struct ceph_osd_client *osdc,
ceph_msg_get(req->r_request); /* send consumes a ref */
- /* Mark the request unsafe if this is the first timet's being sent. */
-
- if (!req->r_sent && req->r_unsafe_callback)
- req->r_unsafe_callback(req, true);
req->r_sent = req->r_osd->o_incarnation;
ceph_con_send(&req->r_osd->o_con, req->r_request);
@@ -1433,8 +1450,6 @@ static void handle_osds_timeout(struct work_struct *work)
static void complete_request(struct ceph_osd_request *req)
{
- if (req->r_unsafe_callback)
- req->r_unsafe_callback(req, false);
complete_all(&req->r_safe_completion); /* fsync waiter */
}
@@ -1496,14 +1511,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
dout("handle_reply %p tid %llu req %p result %d\n", msg, tid,
req, result);
- ceph_decode_need(&p, end, 4, bad);
+ ceph_decode_need(&p, end, 4, bad_put);
numops = ceph_decode_32(&p);
if (numops > CEPH_OSD_MAX_OP)
goto bad_put;
if (numops != req->r_num_ops)
goto bad_put;
payload_len = 0;
- ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad);
+ ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put);
for (i = 0; i < numops; i++) {
struct ceph_osd_op *op = p;
int len;
@@ -1521,11 +1536,13 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
goto bad_put;
}
- ceph_decode_need(&p, end, 4 + numops * 4, bad);
+ ceph_decode_need(&p, end, 4 + numops * 4, bad_put);
retry_attempt = ceph_decode_32(&p);
for (i = 0; i < numops; i++)
req->r_reply_op_result[i] = ceph_decode_32(&p);
+ already_completed = req->r_got_reply;
+
if (!req->r_got_reply) {
req->r_result = result;
@@ -1556,19 +1573,23 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg,
((flags & CEPH_OSD_FLAG_WRITE) == 0))
__unregister_request(osdc, req);
- already_completed = req->r_completed;
- req->r_completed = 1;
mutex_unlock(&osdc->request_mutex);
- if (already_completed)
- goto done;
- if (req->r_callback)
- req->r_callback(req, msg);
- else
- complete_all(&req->r_completion);
+ if (!already_completed) {
+ if (req->r_unsafe_callback &&
+ result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK))
+ req->r_unsafe_callback(req, true);
+ if (req->r_callback)
+ req->r_callback(req, msg);
+ else
+ complete_all(&req->r_completion);
+ }
- if (flags & CEPH_OSD_FLAG_ONDISK)
+ if (flags & CEPH_OSD_FLAG_ONDISK) {
+ if (req->r_unsafe_callback && already_completed)
+ req->r_unsafe_callback(req, false);
complete_request(req);
+ }
done:
dout("req=%p req->r_linger=%d\n", req, req->r_linger);
@@ -1608,14 +1629,17 @@ static void reset_changed_osds(struct ceph_osd_client *osdc)
*
* Caller should hold map_sem for read.
*/
-static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
+static void kick_requests(struct ceph_osd_client *osdc, bool force_resend,
+ bool force_resend_writes)
{
struct ceph_osd_request *req, *nreq;
struct rb_node *p;
int needmap = 0;
int err;
+ bool force_resend_req;
- dout("kick_requests %s\n", force_resend ? " (force resend)" : "");
+ dout("kick_requests %s %s\n", force_resend ? " (force resend)" : "",
+ force_resend_writes ? " (force resend writes)" : "");
mutex_lock(&osdc->request_mutex);
for (p = rb_first(&osdc->requests); p; ) {
req = rb_entry(p, struct ceph_osd_request, r_node);
@@ -1633,12 +1657,17 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
dout("%p tid %llu restart on osd%d\n",
req, req->r_tid,
req->r_osd ? req->r_osd->o_osd : -1);
+ ceph_osdc_get_request(req);
__unregister_request(osdc, req);
__register_linger_request(osdc, req);
+ ceph_osdc_put_request(req);
continue;
}
- err = __map_request(osdc, req, force_resend);
+ force_resend_req = force_resend ||
+ (force_resend_writes &&
+ req->r_flags & CEPH_OSD_FLAG_WRITE);
+ err = __map_request(osdc, req, force_resend_req);
if (err < 0)
continue; /* error */
if (req->r_osd == NULL) {
@@ -1658,7 +1687,8 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend)
r_linger_item) {
dout("linger req=%p req->r_osd=%p\n", req, req->r_osd);
- err = __map_request(osdc, req, force_resend);
+ err = __map_request(osdc, req,
+ force_resend || force_resend_writes);
dout("__map_request returned %d\n", err);
if (err == 0)
continue; /* no change and no osd was specified */
@@ -1700,6 +1730,7 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
struct ceph_osdmap *newmap = NULL, *oldmap;
int err;
struct ceph_fsid fsid;
+ bool was_full;
dout("handle_map have %u\n", osdc->osdmap ? osdc->osdmap->epoch : 0);
p = msg->front.iov_base;
@@ -1713,6 +1744,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
down_write(&osdc->map_sem);
+ was_full = ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL);
+
/* incremental maps */
ceph_decode_32_safe(&p, end, nr_maps, bad);
dout(" %d inc maps\n", nr_maps);
@@ -1737,7 +1770,10 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
ceph_osdmap_destroy(osdc->osdmap);
osdc->osdmap = newmap;
}
- kick_requests(osdc, 0);
+ was_full = was_full ||
+ ceph_osdmap_flag(osdc->osdmap,
+ CEPH_OSDMAP_FULL);
+ kick_requests(osdc, 0, was_full);
} else {
dout("ignoring incremental map %u len %d\n",
epoch, maplen);
@@ -1780,12 +1816,17 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
skipped_map = 1;
ceph_osdmap_destroy(oldmap);
}
- kick_requests(osdc, skipped_map);
+ was_full = was_full ||
+ ceph_osdmap_flag(osdc->osdmap,
+ CEPH_OSDMAP_FULL);
+ kick_requests(osdc, skipped_map, was_full);
}
p += maplen;
nr_maps--;
}
+ if (!osdc->osdmap)
+ goto bad;
done:
downgrade_write(&osdc->map_sem);
ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
@@ -1795,7 +1836,9 @@ done:
* we find out when we are no longer full and stop returning
* ENOSPC.
*/
- if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
+ if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL) ||
+ ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSERD) ||
+ ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_PAUSEWR))
ceph_monc_request_next_osdmap(&osdc->client->monc);
mutex_lock(&osdc->request_mutex);
@@ -2123,13 +2166,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
__register_request(osdc, req);
req->r_sent = 0;
req->r_got_reply = 0;
- req->r_completed = 0;
rc = __map_request(osdc, req, 0);
if (rc < 0) {
if (nofail) {
dout("osdc_start_request failed map, "
" will retry %lld\n", req->r_tid);
rc = 0;
+ } else {
+ __unregister_request(osdc, req);
}
goto out_unlock;
}
@@ -2206,6 +2250,17 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc)
EXPORT_SYMBOL(ceph_osdc_sync);
/*
+ * Call all pending notify callbacks - for use after a watch is
+ * unregistered, to make sure no more callbacks for it will be invoked
+ */
+extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc)
+{
+ flush_workqueue(osdc->notify_wq);
+}
+EXPORT_SYMBOL(ceph_osdc_flush_notifies);
+
+
+/*
* init, shutdown
*/
int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
@@ -2254,12 +2309,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client)
if (err < 0)
goto out_msgpool;
+ err = -ENOMEM;
osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify");
- if (IS_ERR(osdc->notify_wq)) {
- err = PTR_ERR(osdc->notify_wq);
- osdc->notify_wq = NULL;
+ if (!osdc->notify_wq)
goto out_msgpool;
- }
return 0;
out_msgpool:
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 603ddd92db1..dbd9a479242 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -1129,7 +1129,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid,
/* pg_temp? */
pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num,
- pool->pgp_num_mask);
+ pool->pg_num_mask);
pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid);
if (pg) {
*num = pg->len;
diff --git a/net/compat.c b/net/compat.c
index f0a1ba6c808..f50161fb812 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg)
__get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
__get_user(kmsg->msg_flags, &umsg->msg_flags))
return -EFAULT;
+ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
+ kmsg->msg_namelen = sizeof(struct sockaddr_storage);
kmsg->msg_name = compat_ptr(tmp1);
kmsg->msg_iov = compat_ptr(tmp2);
kmsg->msg_control = compat_ptr(tmp3);
@@ -91,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov,
if (err < 0)
return err;
}
- kern_msg->msg_name = kern_address;
+ if (kern_msg->msg_name)
+ kern_msg->msg_name = kern_address;
} else
kern_msg->msg_name = NULL;
@@ -777,21 +780,16 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
if (flags & MSG_CMSG_COMPAT)
return -EINVAL;
- if (COMPAT_USE_64BIT_TIME)
- return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
- flags | MSG_CMSG_COMPAT,
- (struct timespec *) timeout);
-
if (timeout == NULL)
return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, NULL);
- if (get_compat_timespec(&ktspec, timeout))
+ if (compat_get_timespec(&ktspec, timeout))
return -EFAULT;
datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
flags | MSG_CMSG_COMPAT, &ktspec);
- if (datagrams > 0 && put_compat_timespec(&ktspec, timeout))
+ if (datagrams > 0 && compat_put_timespec(&ktspec, timeout))
datagrams = -EFAULT;
return datagrams;
diff --git a/net/core/dev.c b/net/core/dev.c
index faebb398fb4..a0e55ffc03c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2374,7 +2374,7 @@ EXPORT_SYMBOL(netdev_rx_csum_fault);
* 2. No high memory really exists on this machine.
*/
-static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
+static int illegal_highdma(const struct net_device *dev, struct sk_buff *skb)
{
#ifdef CONFIG_HIGHMEM
int i;
@@ -2454,46 +2454,51 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
}
static netdev_features_t harmonize_features(struct sk_buff *skb,
- __be16 protocol, netdev_features_t features)
+ __be16 protocol,
+ const struct net_device *dev,
+ netdev_features_t features)
{
if (skb->ip_summed != CHECKSUM_NONE &&
!can_checksum_protocol(features, protocol)) {
features &= ~NETIF_F_ALL_CSUM;
- } else if (illegal_highdma(skb->dev, skb)) {
+ } else if (illegal_highdma(dev, skb)) {
features &= ~NETIF_F_SG;
}
return features;
}
-netdev_features_t netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_dev_features(struct sk_buff *skb,
+ const struct net_device *dev)
{
__be16 protocol = skb->protocol;
- netdev_features_t features = skb->dev->features;
+ netdev_features_t features = dev->features;
- if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+ if (skb_shinfo(skb)->gso_segs > dev->gso_max_segs)
features &= ~NETIF_F_GSO_MASK;
if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
protocol = veh->h_vlan_encapsulated_proto;
} else if (!vlan_tx_tag_present(skb)) {
- return harmonize_features(skb, protocol, features);
+ return harmonize_features(skb, protocol, dev, features);
}
- features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
+ features &= (dev->vlan_features | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX);
if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) {
- return harmonize_features(skb, protocol, features);
+ return harmonize_features(skb, protocol, dev, features);
} else {
features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
NETIF_F_HW_VLAN_STAG_TX;
- return harmonize_features(skb, protocol, features);
+ return harmonize_features(skb, protocol, dev, features);
}
+
+ return harmonize_features(skb, protocol, dev, features);
}
-EXPORT_SYMBOL(netif_skb_features);
+EXPORT_SYMBOL(netif_skb_dev_features);
/*
* Returns true if either:
@@ -3513,8 +3518,15 @@ ncls:
}
}
- if (vlan_tx_nonzero_tag_present(skb))
- skb->pkt_type = PACKET_OTHERHOST;
+ if (unlikely(vlan_tx_tag_present(skb))) {
+ if (vlan_tx_tag_get_id(skb))
+ skb->pkt_type = PACKET_OTHERHOST;
+ /* Note: we might in the future use prio bits
+ * and set skb->priority like in vlan_do_receive()
+ * For the time being, just ignore Priority Code Point
+ */
+ skb->vlan_tci = 0;
+ }
/* deliver only exact match when indicated */
null_or_dev = deliver_exact ? skb->dev : NULL;
@@ -4471,7 +4483,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
{
const struct net_device_ops *ops = dev->netdev_ops;
- if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
+ if (ops->ndo_change_rx_flags)
ops->ndo_change_rx_flags(dev, flags);
}
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index d23b6682f4e..a974dfec4bf 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = {
.hdrsize = 0,
.name = "NET_DM",
.version = 2,
- .maxattr = NET_DM_CMD_MAX,
};
static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index d5a9f8ead0d..55e08e2de3a 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -445,7 +445,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh)
if (frh->action && (frh->action != rule->action))
continue;
- if (frh->table && (frh_get_table(frh, tb) != rule->table))
+ if (frh_get_table(frh, tb) &&
+ (frh_get_table(frh, tb) != rule->table))
continue;
if (tb[FRA_PRIORITY] &&
@@ -719,6 +720,13 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event,
attach_rules(&ops->rules_list, dev);
break;
+ case NETDEV_CHANGENAME:
+ list_for_each_entry(ops, &net->rules_ops, list) {
+ detach_rules(&ops->rules_list, dev);
+ attach_rules(&ops->rules_list, dev);
+ }
+ break;
+
case NETDEV_UNREGISTER:
list_for_each_entry(ops, &net->rules_ops, list)
detach_rules(&ops->rules_list, dev);
diff --git a/net/core/filter.c b/net/core/filter.c
index 6438f29ff26..52f01229ee0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -36,7 +36,6 @@
#include <asm/uaccess.h>
#include <asm/unaligned.h>
#include <linux/filter.h>
-#include <linux/reciprocal_div.h>
#include <linux/ratelimit.h>
#include <linux/seccomp.h>
#include <linux/if_vlan.h>
@@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb,
A /= X;
continue;
case BPF_S_ALU_DIV_K:
- A = reciprocal_divide(A, K);
+ A /= K;
continue;
case BPF_S_ALU_MOD_X:
if (X == 0)
@@ -553,11 +552,6 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen)
/* Some instructions need special checks */
switch (code) {
case BPF_S_ALU_DIV_K:
- /* check for division by zero */
- if (ftest->k == 0)
- return -EINVAL;
- ftest->k = reciprocal_value(ftest->k);
- break;
case BPF_S_ALU_MOD_K:
/* check for division by zero */
if (ftest->k == 0)
@@ -853,27 +847,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to)
to->code = decodes[code];
to->jt = filt->jt;
to->jf = filt->jf;
-
- if (code == BPF_S_ALU_DIV_K) {
- /*
- * When loaded this rule user gave us X, which was
- * translated into R = r(X). Now we calculate the
- * RR = r(R) and report it back. If next time this
- * value is loaded and RRR = r(RR) is calculated
- * then the R == RRR will be true.
- *
- * One exception. X == 1 translates into R == 0 and
- * we can't calculate RR out of it with r().
- */
-
- if (filt->k == 0)
- to->k = 1;
- else
- to->k = reciprocal_value(filt->k);
-
- BUG_ON(reciprocal_value(to->k) != filt->k);
- } else
- to->k = filt->k;
+ to->k = filt->k;
}
int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 00ee068efc1..f97101b4d37 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -40,7 +40,7 @@ again:
struct iphdr _iph;
ip:
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
- if (!iph)
+ if (!iph || iph->ihl < 5)
return false;
if (ip_is_fragment(iph))
@@ -149,8 +149,8 @@ ipv6:
if (poff >= 0) {
__be32 *ports, _ports;
- nhoff += poff;
- ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports);
+ ports = skb_header_pointer(skb, nhoff + poff,
+ sizeof(_ports), &_ports);
if (ports)
flow->ports = *ports;
}
@@ -345,14 +345,9 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
if (new_index < 0)
new_index = skb_tx_hash(dev, skb);
- if (queue_index != new_index && sk) {
- struct dst_entry *dst =
- rcu_dereference_check(sk->sk_dst_cache, 1);
-
- if (dst && skb_dst(skb) == dst)
- sk_tx_queue_set(sk, queue_index);
-
- }
+ if (queue_index != new_index && sk &&
+ rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, new_index);
queue_index = new_index;
}
diff --git a/net/core/iovec.c b/net/core/iovec.c
index de178e46268..9a31515fb8e 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a
if (err < 0)
return err;
}
- m->msg_name = address;
+ if (m->msg_name)
+ m->msg_name = address;
} else {
m->msg_name = NULL;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 5c56b217b99..b49e8bafab1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
we must kill timers etc. and move
it to safe state.
*/
- skb_queue_purge(&n->arp_queue);
+ __skb_queue_purge(&n->arp_queue);
n->arp_queue_len_bytes = 0;
n->output = neigh_blackhole;
if (n->nud_state & NUD_VALID)
@@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device
if (!n)
goto out_entries;
- skb_queue_head_init(&n->arp_queue);
+ __skb_queue_head_init(&n->arp_queue);
rwlock_init(&n->lock);
seqlock_init(&n->ha_lock);
n->updated = n->used = now;
@@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh)
if (neigh_del_timer(neigh))
pr_warn("Impossible event\n");
- skb_queue_purge(&neigh->arp_queue);
+ write_lock_bh(&neigh->lock);
+ __skb_queue_purge(&neigh->arp_queue);
+ write_unlock_bh(&neigh->lock);
neigh->arp_queue_len_bytes = 0;
if (dev->netdev_ops->ndo_neigh_destroy)
@@ -762,9 +764,6 @@ static void neigh_periodic_work(struct work_struct *work)
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
- goto out;
-
/*
* periodically recompute ReachableTime from random function
*/
@@ -777,6 +776,9 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(p->base_reachable_time);
}
+ if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+ goto out;
+
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
@@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh)
neigh->ops->error_report(neigh, skb);
write_lock(&neigh->lock);
}
- skb_queue_purge(&neigh->arp_queue);
+ __skb_queue_purge(&neigh->arp_queue);
neigh->arp_queue_len_bytes = 0;
}
@@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
write_lock_bh(&neigh->lock);
}
- skb_queue_purge(&neigh->arp_queue);
+ __skb_queue_purge(&neigh->arp_queue);
neigh->arp_queue_len_bytes = 0;
}
out:
@@ -1272,7 +1274,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
skb->len) < 0 &&
- dev->header_ops->rebuild(skb))
+ dev_rebuild_header(skb))
return 0;
return dev_queue_xmit(skb);
@@ -1443,16 +1445,18 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
atomic_set(&p->refcnt, 1);
p->reachable_time =
neigh_rand_reach_time(p->base_reachable_time);
+ dev_hold(dev);
+ p->dev = dev;
+ write_pnet(&p->net, hold_net(net));
+ p->sysctl_table = NULL;
if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
+ release_net(net);
+ dev_put(dev);
kfree(p);
return NULL;
}
- dev_hold(dev);
- p->dev = dev;
- write_pnet(&p->net, hold_net(net));
- p->sysctl_table = NULL;
write_lock_bh(&tbl->lock);
p->next = tbl->parms.next;
tbl->parms.next = p;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index cec074be8c4..e861438d545 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
!vlan_hw_offload_capable(netif_skb_features(skb),
skb->vlan_proto)) {
skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
- if (unlikely(!skb))
- break;
+ if (unlikely(!skb)) {
+ /* This is actually a packet drop, but we
+ * don't want the code at the end of this
+ * function to try and re-queue a NULL skb.
+ */
+ status = NETDEV_TX_OK;
+ goto unlock_txq;
+ }
skb->vlan_tci = 0;
}
@@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
if (status == NETDEV_TX_OK)
txq_trans_update(txq);
}
+ unlock_txq:
__netif_tx_unlock(txq);
if (status == NETDEV_TX_OK)
@@ -550,7 +557,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo
return;
proto = ntohs(eth_hdr(skb)->h_proto);
- if (proto == ETH_P_IP) {
+ if (proto == ETH_P_ARP) {
struct arphdr *arp;
unsigned char *arp_ptr;
/* No arp on this interface */
@@ -738,7 +745,7 @@ static bool pkt_is_ns(struct sk_buff *skb)
struct nd_msg *msg;
struct ipv6hdr *hdr;
- if (skb->protocol != htons(ETH_P_ARP))
+ if (skb->protocol != htons(ETH_P_IPV6))
return false;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
return false;
@@ -941,6 +948,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
{
char *cur=opt, *delim;
int ipv6;
+ bool ipversion_set = false;
if (*cur != '@') {
if ((delim = strchr(cur, '@')) == NULL)
@@ -953,6 +961,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
cur++;
if (*cur != '/') {
+ ipversion_set = true;
if ((delim = strchr(cur, '/')) == NULL)
goto parse_failed;
*delim = 0;
@@ -995,7 +1004,7 @@ int netpoll_parse_options(struct netpoll *np, char *opt)
ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
if (ipv6 < 0)
goto parse_failed;
- else if (np->ipv6 != (bool)ipv6)
+ else if (ipversion_set && np->ipv6 != (bool)ipv6)
goto parse_failed;
else
np->ipv6 = (bool)ipv6;
@@ -1289,15 +1298,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async);
void netpoll_cleanup(struct netpoll *np)
{
- if (!np->dev)
- return;
-
rtnl_lock();
+ if (!np->dev)
+ goto out;
__netpoll_cleanup(np);
- rtnl_unlock();
-
dev_put(np->dev);
np->dev = NULL;
+out:
+ rtnl_unlock();
}
EXPORT_SYMBOL(netpoll_cleanup);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 11f2704c381..ebbea537196 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2515,6 +2515,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
if (x) {
int ret;
__u8 *eth;
+ struct iphdr *iph;
+
nhead = x->props.header_len - skb_headroom(skb);
if (nhead > 0) {
ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
@@ -2536,6 +2538,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev,
eth = (__u8 *) skb_push(skb, ETH_HLEN);
memcpy(eth, pkt_dev->hh, 12);
*(u16 *) &eth[12] = protocol;
+
+ /* Update IPv4 header len as well as checksum value */
+ iph = ip_hdr(skb);
+ iph->tot_len = htons(skb->len - ETH_HLEN);
+ ip_send_check(iph);
}
}
return 1;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index a08bd2b7fe3..4c3087dffe7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1973,12 +1973,13 @@ EXPORT_SYMBOL(rtmsg_ifinfo);
static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
struct net_device *dev,
u8 *addr, u32 pid, u32 seq,
- int type, unsigned int flags)
+ int type, unsigned int flags,
+ int nlflags)
{
struct nlmsghdr *nlh;
struct ndmsg *ndm;
- nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), nlflags);
if (!nlh)
return -EMSGSIZE;
@@ -2016,7 +2017,7 @@ static void rtnl_fdb_notify(struct net_device *dev, u8 *addr, int type)
if (!skb)
goto errout;
- err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF);
+ err = nlmsg_populate_fdb_fill(skb, dev, addr, 0, 0, type, NTF_SELF, 0);
if (err < 0) {
kfree_skb(skb);
goto errout;
@@ -2142,7 +2143,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm,
/* If aging addresses are supported device will need to
* implement its own handler for this.
*/
- if (ndm->ndm_state & NUD_PERMANENT) {
+ if (!(ndm->ndm_state & NUD_PERMANENT)) {
pr_info("%s: FDB only supports static addresses\n", dev->name);
return -EINVAL;
}
@@ -2249,7 +2250,8 @@ static int nlmsg_populate_fdb(struct sk_buff *skb,
err = nlmsg_populate_fdb_fill(skb, dev, ha->addr,
portid, seq,
- RTM_NEWNEIGH, NTF_SELF);
+ RTM_NEWNEIGH, NTF_SELF,
+ NLM_F_MULTI);
if (err < 0)
return err;
skip:
@@ -2374,7 +2376,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
struct nlattr *extfilt;
u32 filter_mask = 0;
- extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg),
+ extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg),
IFLA_EXT_MASK);
if (extfilt)
filter_mask = nla_get_u32(extfilt);
diff --git a/net/core/scm.c b/net/core/scm.c
index 03795d0147f..b4da80b1cc0 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -54,7 +54,7 @@ static __inline__ int scm_check_creds(struct ucred *creds)
return -EINVAL;
if ((creds->pid == task_tgid_vnr(current) ||
- ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) &&
+ ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) &&
((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) ||
uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) &&
((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) ||
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 6a2f13cee86..8d9d05edd2e 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -10,12 +10,27 @@
#include <net/secure_seq.h>
-static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned;
+#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)
+#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)
-void net_secret_init(void)
+static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;
+
+static void net_secret_init(void)
{
- get_random_bytes(net_secret, sizeof(net_secret));
+ u32 tmp;
+ int i;
+
+ if (likely(net_secret[0]))
+ return;
+
+ for (i = NET_SECRET_SIZE; i > 0;) {
+ do {
+ get_random_bytes(&tmp, sizeof(tmp));
+ } while (!tmp);
+ cmpxchg(&net_secret[--i], 0, tmp);
+ }
}
+#endif
#ifdef CONFIG_INET
static u32 seq_scale(u32 seq)
@@ -42,6 +57,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32)daddr[i];
@@ -63,6 +79,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
u32 hash[MD5_DIGEST_WORDS];
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + (__force u32) daddr[i];
@@ -82,6 +99,7 @@ __u32 secure_ip_id(__be32 daddr)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force __u32) daddr;
hash[1] = net_secret[13];
hash[2] = net_secret[14];
@@ -96,6 +114,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4])
{
__u32 hash[4];
+ net_secret_init();
memcpy(hash, daddr, 16);
md5_transform(hash, net_secret);
@@ -107,6 +126,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -121,6 +141,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
u32 hash[MD5_DIGEST_WORDS];
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = (__force u32)dport ^ net_secret[14];
@@ -140,6 +161,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
u32 hash[MD5_DIGEST_WORDS];
u64 seq;
+ net_secret_init();
hash[0] = (__force u32)saddr;
hash[1] = (__force u32)daddr;
hash[2] = ((__force u16)sport << 16) + (__force u16)dport;
@@ -164,6 +186,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
u64 seq;
u32 i;
+ net_secret_init();
memcpy(hash, saddr, 16);
for (i = 0; i < 4; i++)
secret[i] = net_secret[i] + daddr[i];
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1c1738cc453..79143b7af7e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -47,6 +47,8 @@
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/slab.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/netdevice.h>
#ifdef CONFIG_NET_CLS_ACT
#include <net/pkt_sched.h>
@@ -74,36 +76,6 @@
struct kmem_cache *skbuff_head_cache __read_mostly;
static struct kmem_cache *skbuff_fclone_cache __read_mostly;
-static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- put_page(buf->page);
-}
-
-static void sock_pipe_buf_get(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- get_page(buf->page);
-}
-
-static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
- struct pipe_buffer *buf)
-{
- return 1;
-}
-
-
-/* Pipe buffer operations for a socket. */
-static const struct pipe_buf_operations sock_pipe_buf_ops = {
- .can_merge = 0,
- .map = generic_pipe_buf_map,
- .unmap = generic_pipe_buf_unmap,
- .confirm = generic_pipe_buf_confirm,
- .release = sock_pipe_buf_release,
- .steal = sock_pipe_buf_steal,
- .get = sock_pipe_buf_get,
-};
-
/**
* skb_panic - private function for out-of-line support
* @skb: buffer
@@ -585,9 +557,6 @@ static void skb_release_head_state(struct sk_buff *skb)
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
nf_conntrack_put(skb->nfct);
#endif
-#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
- nf_conntrack_put_reasm(skb->nfct_reasm);
-#endif
#ifdef CONFIG_BRIDGE_NETFILTER
nf_bridge_put(skb->nf_bridge);
#endif
@@ -1814,7 +1783,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
.partial = partial,
.nr_pages_max = MAX_SKB_FRAGS,
.flags = flags,
- .ops = &sock_pipe_buf_ops,
+ .ops = &nosteal_pipe_buf_ops,
.spd_release = sock_spd_release,
};
struct sk_buff *frag_iter;
@@ -2857,7 +2826,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
doffset + tnl_hlen);
if (fskb != skb_shinfo(skb)->frag_list)
- continue;
+ goto perform_csum_check;
if (!sg) {
nskb->ip_summed = CHECKSUM_NONE;
@@ -2921,6 +2890,7 @@ skip_fraglist:
nskb->len += nskb->data_len;
nskb->truesize += nskb->data_len;
+perform_csum_check:
if (!csum) {
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
@@ -3503,3 +3473,26 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,
return true;
}
EXPORT_SYMBOL(skb_try_coalesce);
+
+/**
+ * skb_gso_transport_seglen - Return length of individual segments of a gso packet
+ *
+ * @skb: GSO skb
+ *
+ * skb_gso_transport_seglen is used to determine the real size of the
+ * individual segments, including Layer4 headers (TCP/UDP).
+ *
+ * The MAC/L2 or network (IP, IPv6) headers are not accounted for.
+ */
+unsigned int skb_gso_transport_seglen(const struct sk_buff *skb)
+{
+ const struct skb_shared_info *shinfo = skb_shinfo(skb);
+ unsigned int hdr_len;
+
+ if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6)))
+ hdr_len = tcp_hdrlen(skb);
+ else
+ hdr_len = sizeof(struct udphdr);
+ return hdr_len + shinfo->gso_size;
+}
+EXPORT_SYMBOL_GPL(skb_gso_transport_seglen);
diff --git a/net/core/sock.c b/net/core/sock.c
index d6d024cfaaa..d743099250f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -885,7 +885,7 @@ set_rcvbuf:
case SO_PEEK_OFF:
if (sock->ops->set_peek_off)
- sock->ops->set_peek_off(sk, val);
+ ret = sock->ops->set_peek_off(sk, val);
else
ret = -EOPNOTSUPP;
break;
@@ -1814,7 +1814,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
gfp_t gfp = sk->sk_allocation;
if (order)
- gfp |= __GFP_COMP | __GFP_NOWARN;
+ gfp |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
pfrag->page = alloc_pages(gfp, order);
if (likely(pfrag->page)) {
pfrag->offset = 0;
@@ -2271,6 +2271,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_stamp = ktime_set(-1L, 0);
+ sk->sk_pacing_rate = ~0U;
/*
* Before updating sk_refcnt, we must commit prior changes to memory
* (Documentation/RCU/rculist_nulls.txt for details)
@@ -2308,10 +2309,13 @@ void release_sock(struct sock *sk)
if (sk->sk_backlog.tail)
__release_sock(sk);
+ /* Warning : release_cb() might need to release sk ownership,
+ * ie call sock_release_ownership(sk) before us.
+ */
if (sk->sk_prot->release_cb)
sk->sk_prot->release_cb(sk);
- sk->sk_lock.owned = 0;
+ sock_release_ownership(sk);
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
spin_unlock_bh(&sk->sk_lock.slock);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index cfdb46ab3a7..2ff093b7c45 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -20,7 +20,9 @@
#include <net/sock.h>
#include <net/net_ratelimit.h>
+static int zero = 0;
static int one = 1;
+static int ushort_max = USHRT_MAX;
#ifdef CONFIG_RPS
static int rps_sock_flow_sysctl(ctl_table *table, int write,
@@ -204,7 +206,9 @@ static struct ctl_table netns_core_table[] = {
.data = &init_net.core.sysctl_somaxconn,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .extra1 = &zero,
+ .extra2 = &ushort_max,
+ .proc_handler = proc_dointvec_minmax
},
{ }
};
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9c61f9c02fd..6cf9f7782ad 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -135,6 +135,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
if (dst)
dst->ops->redirect(dst, sk, skb);
+ goto out;
}
if (type == ICMPV6_PKT_TOOBIG) {
diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c
index 55e1fd5b3e5..ca118e8cb14 100644
--- a/net/ieee802154/6lowpan.c
+++ b/net/ieee802154/6lowpan.c
@@ -862,7 +862,7 @@ lowpan_process_data(struct sk_buff *skb)
* Traffic class carried in-line
* ECN + DSCP (1 byte), Flow Label is elided
*/
- case 1: /* 10b */
+ case 2: /* 10b */
if (lowpan_fetch_skb_u8(skb, &tmp))
goto drop;
@@ -875,7 +875,7 @@ lowpan_process_data(struct sk_buff *skb)
* Flow Label carried in-line
* ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided
*/
- case 2: /* 01b */
+ case 1: /* 01b */
if (lowpan_fetch_skb_u8(skb, &tmp))
goto drop;
@@ -1173,7 +1173,27 @@ static struct header_ops lowpan_header_ops = {
.create = lowpan_header_create,
};
+static struct lock_class_key lowpan_tx_busylock;
+static struct lock_class_key lowpan_netdev_xmit_lock_key;
+
+static void lowpan_set_lockdep_class_one(struct net_device *dev,
+ struct netdev_queue *txq,
+ void *_unused)
+{
+ lockdep_set_class(&txq->_xmit_lock,
+ &lowpan_netdev_xmit_lock_key);
+}
+
+
+static int lowpan_dev_init(struct net_device *dev)
+{
+ netdev_for_each_tx_queue(dev, lowpan_set_lockdep_class_one, NULL);
+ dev->qdisc_tx_busylock = &lowpan_tx_busylock;
+ return 0;
+}
+
static const struct net_device_ops lowpan_netdev_ops = {
+ .ndo_init = lowpan_dev_init,
.ndo_start_xmit = lowpan_xmit,
.ndo_set_mac_address = lowpan_set_address,
};
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 581a59504bd..1865fdf5a5a 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -315,9 +315,8 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
if (saddr) {
saddr->family = AF_IEEE802154;
saddr->addr = mac_cb(skb)->sa;
- }
- if (addr_len)
*addr_len = sizeof(*saddr);
+ }
if (flags & MSG_TRUNC)
copied = skb->len;
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 22b1a7058fd..4efd2375d7e 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -224,8 +224,10 @@ static int ieee802154_add_iface(struct sk_buff *skb,
if (info->attrs[IEEE802154_ATTR_DEV_TYPE]) {
type = nla_get_u8(info->attrs[IEEE802154_ATTR_DEV_TYPE]);
- if (type >= __IEEE802154_DEV_MAX)
- return -EINVAL;
+ if (type >= __IEEE802154_DEV_MAX) {
+ rc = -EINVAL;
+ goto nla_put_failure;
+ }
}
dev = phy->add_iface(phy, devname, type);
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d01be2a3ae5..c4adc319cc2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -263,10 +263,8 @@ void build_ehash_secret(void)
get_random_bytes(&rnd, sizeof(rnd));
} while (rnd == 0);
- if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) {
+ if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0)
get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret));
- net_secret_init();
- }
}
EXPORT_SYMBOL(build_ehash_secret);
diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index b28e863fe0a..19e36376d2a 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -57,7 +57,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
goto out;
}
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index dfc39d4d48b..e40eef4ac69 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -771,7 +771,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
ci = nla_data(tb[IFA_CACHEINFO]);
if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) {
err = -EINVAL;
- goto errout;
+ goto errout_free;
}
*pvalid_lft = ci->ifa_valid;
*pprefered_lft = ci->ifa_prefered;
@@ -779,6 +779,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh,
return ifa;
+errout_free:
+ inet_free_ifa(ifa);
errout:
return ERR_PTR(err);
}
@@ -1431,7 +1433,8 @@ static size_t inet_nlmsg_size(void)
+ nla_total_size(4) /* IFA_ADDRESS */
+ nla_total_size(4) /* IFA_LOCAL */
+ nla_total_size(4) /* IFA_BROADCAST */
- + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
+ + nla_total_size(IFNAMSIZ) /* IFA_LABEL */
+ + nla_total_size(sizeof(struct ifa_cacheinfo)); /* IFA_CACHEINFO */
}
static inline u32 cstamp_delta(unsigned long cstamp)
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index c7629a209f9..4556cd25acd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1049,6 +1049,8 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
}
in_dev = __in_dev_get_rtnl(dev);
+ if (!in_dev)
+ return NOTIFY_DONE;
switch (event) {
case NETDEV_UP:
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 49616fed934..6e8a13da6cb 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -71,7 +71,6 @@
#include <linux/init.h>
#include <linux/list.h>
#include <linux/slab.h>
-#include <linux/prefetch.h>
#include <linux/export.h>
#include <net/net_namespace.h>
#include <net/ip.h>
@@ -1761,10 +1760,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c)
if (!c)
continue;
- if (IS_LEAF(c)) {
- prefetch(rcu_dereference_rtnl(p->child[idx]));
+ if (IS_LEAF(c))
return (struct leaf *) c;
- }
/* Rescan start scanning in new node */
p = (struct tnode *) c;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index d8c232794bc..089b4af4fec 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -343,7 +343,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size)
pip->saddr = fl4.saddr;
pip->protocol = IPPROTO_IGMP;
pip->tot_len = 0; /* filled in later */
- ip_select_ident(pip, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
((u8 *)&pip[1])[0] = IPOPT_RA;
((u8 *)&pip[1])[1] = 4;
((u8 *)&pip[1])[2] = 0;
@@ -687,7 +687,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc,
iph->daddr = dst;
iph->saddr = fl4.saddr;
iph->protocol = IPPROTO_IGMP;
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
((u8 *)&iph[1])[0] = IPOPT_RA;
((u8 *)&iph[1])[1] = 4;
((u8 *)&iph[1])[2] = 0;
@@ -709,7 +709,7 @@ static void igmp_gq_timer_expire(unsigned long data)
in_dev->mr_gq_running = 0;
igmpv3_send_report(in_dev, NULL);
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_timer_expire(unsigned long data)
@@ -721,7 +721,7 @@ static void igmp_ifc_timer_expire(unsigned long data)
in_dev->mr_ifc_count--;
igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval);
}
- __in_dev_put(in_dev);
+ in_dev_put(in_dev);
}
static void igmp_ifc_event(struct in_device *in_dev)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5f648751fce..45dbdab915e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -106,6 +106,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
r->id.idiag_sport = inet->inet_sport;
r->id.idiag_dport = inet->inet_dport;
+
+ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+
r->id.idiag_src[0] = inet->inet_rcv_saddr;
r->id.idiag_dst[0] = inet->inet_daddr;
@@ -240,12 +244,19 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
r->idiag_family = tw->tw_family;
r->idiag_retrans = 0;
+
r->id.idiag_if = tw->tw_bound_dev_if;
sock_diag_save_cookie(tw, r->id.idiag_cookie);
+
r->id.idiag_sport = tw->tw_sport;
r->id.idiag_dport = tw->tw_dport;
+
+ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+
r->id.idiag_src[0] = tw->tw_rcv_saddr;
r->id.idiag_dst[0] = tw->tw_daddr;
+
r->idiag_state = tw->tw_substate;
r->idiag_timer = 3;
r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ);
@@ -732,8 +743,13 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
r->id.idiag_sport = inet->inet_sport;
r->id.idiag_dport = ireq->rmt_port;
+
+ memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src));
+ memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst));
+
r->id.idiag_src[0] = ireq->loc_addr;
r->id.idiag_dst[0] = ireq->rmt_addr;
+
r->idiag_expires = jiffies_to_msecs(tmo);
r->idiag_rqueue = 0;
r->idiag_wqueue = 0;
@@ -945,7 +961,7 @@ next_normal:
++num;
}
- if (r->idiag_states & TCPF_TIME_WAIT) {
+ if (r->idiag_states & (TCPF_TIME_WAIT | TCPF_FIN_WAIT2)) {
struct inet_timewait_sock *tw;
inet_twsk_for_each(tw, node,
@@ -955,6 +971,8 @@ next_normal:
if (num < s_num)
goto next_dying;
+ if (!(r->idiag_states & (1 << tw->tw_substate)))
+ goto next_dying;
if (r->sdiag_family != AF_UNSPEC &&
tw->tw_family != r->sdiag_family)
goto next_dying;
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 7e06641e36a..af02a39175e 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -211,7 +211,7 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
}
work = frag_mem_limit(nf) - nf->low_thresh;
- while (work > 0) {
+ while (work > 0 || force) {
spin_lock(&nf->lru_lock);
if (list_empty(&nf->lru_list)) {
@@ -283,9 +283,10 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &hb->chain);
+ inet_frag_lru_add(nf, qp);
spin_unlock(&hb->chain_lock);
read_unlock(&f->lock);
- inet_frag_lru_add(nf, qp);
+
return qp;
}
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 6af375afeee..c95848d0003 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -287,7 +287,7 @@ begintw:
if (unlikely(!INET_TW_MATCH(sk, net, acookie,
saddr, daddr, ports,
dif))) {
- sock_put(sk);
+ inet_twsk_put(inet_twsk(sk));
goto begintw;
}
goto out;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 000e3d239d6..33d5537881e 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -32,8 +32,8 @@
* At the moment of writing this notes identifier of IP packets is generated
* to be unpredictable using this code only for packets subjected
* (actually or potentially) to defragmentation. I.e. DF packets less than
- * PMTU in size uses a constant ID and do not use this code (see
- * ip_select_ident() in include/net/ip.h).
+ * PMTU in size when local fragmentation is disabled use a constant ID and do
+ * not use this code (see ip_select_ident() in include/net/ip.h).
*
* Route cache entries hold references to our nodes.
* New cache entries get references via lookup by destination IP address in
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 694de3b7aeb..98d7e53d2af 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -39,6 +39,71 @@
#include <net/route.h>
#include <net/xfrm.h>
+static bool ip_may_fragment(const struct sk_buff *skb)
+{
+ return unlikely((ip_hdr(skb)->frag_off & htons(IP_DF)) == 0) ||
+ !skb->local_df;
+}
+
+static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu || skb->local_df)
+ return false;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
+static bool ip_gso_exceeds_dst_mtu(const struct sk_buff *skb)
+{
+ unsigned int mtu;
+
+ if (skb->local_df || !skb_is_gso(skb))
+ return false;
+
+ mtu = dst_mtu(skb_dst(skb));
+
+ /* if seglen > mtu, do software segmentation for IP fragmentation on
+ * output. DF bit cannot be set since ip_forward would have sent
+ * icmp error.
+ */
+ return skb_gso_network_seglen(skb) > mtu;
+}
+
+/* called if GSO skb needs to be fragmented on forward */
+static int ip_forward_finish_gso(struct sk_buff *skb)
+{
+ struct dst_entry *dst = skb_dst(skb);
+ netdev_features_t features;
+ struct sk_buff *segs;
+ int ret = 0;
+
+ features = netif_skb_dev_features(skb, dst->dev);
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR(segs)) {
+ kfree_skb(skb);
+ return -ENOMEM;
+ }
+
+ consume_skb(skb);
+
+ do {
+ struct sk_buff *nskb = segs->next;
+ int err;
+
+ segs->next = NULL;
+ err = dst_output(segs);
+
+ if (err && ret == 0)
+ ret = err;
+ segs = nskb;
+ } while (segs);
+
+ return ret;
+}
+
static int ip_forward_finish(struct sk_buff *skb)
{
struct ip_options *opt = &(IPCB(skb)->opt);
@@ -49,6 +114,9 @@ static int ip_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen))
ip_forward_options(skb);
+ if (ip_gso_exceeds_dst_mtu(skb))
+ return ip_forward_finish_gso(skb);
+
return dst_output(skb);
}
@@ -88,8 +156,7 @@ int ip_forward(struct sk_buff *skb)
if (opt->is_strictroute && rt->rt_uses_gateway)
goto sr_failed;
- if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) &&
- (ip_hdr(skb)->frag_off & htons(IP_DF))) && !skb->local_df) {
+ if (!ip_may_fragment(skb) && ip_exceeds_mtu(skb, dst_mtu(&rt->dst))) {
IP_INC_STATS(dev_net(rt->dst.dev), IPSTATS_MIB_FRAGFAILS);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
htonl(dst_mtu(&rt->dst)));
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 2a83591492d..828b2e8631e 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -335,7 +335,8 @@ static int ipgre_rcv(struct sk_buff *skb)
iph->saddr, iph->daddr, tpi.key);
if (tunnel) {
- ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ skb_pop_mac_header(skb);
+ ip_tunnel_rcv(tunnel, skb, &tpi, hdr_len, log_ecn_error);
return 0;
}
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
@@ -503,10 +504,11 @@ static int ipgre_tunnel_ioctl(struct net_device *dev,
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
return -EFAULT;
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
- ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) {
- return -EINVAL;
+ if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
+ p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
+ ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+ return -EINVAL;
}
p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
@@ -571,7 +573,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
if (daddr)
memcpy(&iph->daddr, daddr, 4);
if (iph->daddr)
- return t->hlen;
+ return t->hlen + sizeof(*iph);
return -(t->hlen + sizeof(*iph));
}
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 3da817b89e9..0a22bb0ce1a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -190,10 +190,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
- __skb_pull(skb, ip_hdrlen(skb));
-
- /* Point into the IP datagram, just past the header. */
- skb_reset_transport_header(skb);
+ __skb_pull(skb, skb_network_header_len(skb));
rcu_read_lock();
{
@@ -316,7 +313,7 @@ static int ip_rcv_finish(struct sk_buff *skb)
const struct iphdr *iph = ip_hdr(skb);
struct rtable *rt;
- if (sysctl_ip_early_demux && !skb_dst(skb)) {
+ if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct net_protocol *ipprot;
int protocol = iph->protocol;
@@ -437,6 +434,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
goto drop;
}
+ skb->transport_header = skb->network_header + iph->ihl*4;
+
/* Remove any debris in the socket control block */
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4bcabf3ab4c..6ca5873d617 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
- ip_select_ident(iph, &rt->dst, sk);
+ ip_select_ident(skb, &rt->dst, sk);
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
@@ -394,7 +394,7 @@ packet_routed:
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
- ip_select_ident_more(iph, &rt->dst, sk,
+ ip_select_ident_more(skb, &rt->dst, sk,
(skb_shinfo(skb)->gso_segs ?: 1) - 1);
skb->priority = sk->sk_priority;
@@ -844,7 +844,7 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if (((length > mtu) || (skb && skb_is_gso(skb))) &&
+ if (((length > mtu) || (skb && skb_has_frags(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
@@ -1324,7 +1324,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
else
ttl = ip_select_ttl(inet, &rt->dst);
- iph = (struct iphdr *)skb->data;
+ iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = inet->tos;
@@ -1332,7 +1332,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
iph->ttl = ttl;
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
- ip_select_ident(iph, &rt->dst, sk);
+ ip_select_ident(skb, &rt->dst, sk);
if (opt) {
iph->ihl += opt->optlen>>2;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index d9c4f113d70..23e6ab0a2dc 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -368,7 +368,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
/*
* Handle MSG_ERRQUEUE
*/
-int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
+int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct sock_exterr_skb *serr;
struct sk_buff *skb, *skb2;
@@ -405,6 +405,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len)
serr->addr_offset);
sin->sin_port = serr->port;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ *addr_len = sizeof(*sin);
}
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 7fa8f08fa7a..fa6573264c8 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -402,7 +402,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
}
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
- const struct tnl_ptk_info *tpi, bool log_ecn_error)
+ const struct tnl_ptk_info *tpi, int hdr_len, bool log_ecn_error)
{
struct pcpu_tstats *tstats;
const struct iphdr *iph = ip_hdr(skb);
@@ -413,7 +413,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
skb->protocol = tpi->proto;
skb->mac_header = skb->network_header;
- __pskb_pull(skb, tunnel->hlen);
+ __pskb_pull(skb, hdr_len);
skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
@@ -486,6 +486,53 @@ drop:
}
EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
+static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
+ struct rtable *rt, __be16 df)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+ int mtu;
+
+ if (df)
+ mtu = dst_mtu(&rt->dst) - dev->hard_header_len
+ - sizeof(struct iphdr) - tunnel->hlen;
+ else
+ mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+
+ if (skb_dst(skb))
+ skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ if (!skb_is_gso(skb) &&
+ (df & htons(IP_DF)) && mtu < pkt_size) {
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ return -E2BIG;
+ }
+ }
+#if IS_ENABLED(CONFIG_IPV6)
+ else if (skb->protocol == htons(ETH_P_IPV6)) {
+ struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+
+ if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
+ mtu >= IPV6_MIN_MTU) {
+ if ((tunnel->parms.iph.daddr &&
+ !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+ rt6->rt6i_dst.plen == 128) {
+ rt6->rt6i_flags |= RTF_MODIFIED;
+ dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
+ }
+ }
+
+ if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
+ mtu < pkt_size) {
+ icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ return -E2BIG;
+ }
+ }
+#endif
+ return 0;
+}
+
void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params)
{
@@ -499,7 +546,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
struct net_device *tdev; /* Device to other host */
unsigned int max_headroom; /* The extra header space needed */
__be32 dst;
- int mtu;
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
@@ -579,56 +625,18 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
goto tx_error;
}
- df = tnl_params->frag_off;
- if (df)
- mtu = dst_mtu(&rt->dst) - dev->hard_header_len
- - sizeof(struct iphdr);
- else
- mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
-
- if (skb_dst(skb))
- skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
-
- if (skb->protocol == htons(ETH_P_IP)) {
- df |= (inner_iph->frag_off&htons(IP_DF));
-
- if (!skb_is_gso(skb) &&
- (inner_iph->frag_off&htons(IP_DF)) &&
- mtu < ntohs(inner_iph->tot_len)) {
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- ip_rt_put(rt);
- goto tx_error;
- }
- }
-#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
-
- if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
- mtu >= IPV6_MIN_MTU) {
- if ((tunnel->parms.iph.daddr &&
- !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
- rt6->rt6i_dst.plen == 128) {
- rt6->rt6i_flags |= RTF_MODIFIED;
- dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
- }
- }
-
- if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
- mtu < skb->len) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
- ip_rt_put(rt);
- goto tx_error;
- }
+ if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) {
+ ip_rt_put(rt);
+ goto tx_error;
}
-#endif
if (tunnel->err_count > 0) {
if (time_before(jiffies,
tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
tunnel->err_count--;
+ memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
dst_link_failure(skb);
} else
tunnel->err_count = 0;
@@ -646,15 +654,19 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
ttl = ip4_dst_hoplimit(&rt->dst);
}
+ df = tnl_params->frag_off;
+ if (skb->protocol == htons(ETH_P_IP))
+ df |= (inner_iph->frag_off&htons(IP_DF));
+
max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
+ rt->dst.header_len;
- if (max_headroom > dev->needed_headroom) {
+ if (max_headroom > dev->needed_headroom)
dev->needed_headroom = max_headroom;
- if (skb_cow_head(skb, dev->needed_headroom)) {
- dev->stats.tx_dropped++;
- dev_kfree_skb(skb);
- return;
- }
+
+ if (skb_cow_head(skb, dev->needed_headroom)) {
+ dev->stats.tx_dropped++;
+ dev_kfree_skb(skb);
+ return;
}
skb_dst_drop(skb);
@@ -675,7 +687,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
iph->daddr = fl4.daddr;
iph->saddr = fl4.saddr;
iph->ttl = ttl;
- tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
+ __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1);
iptunnel_xmit(skb, dev);
return;
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index c118f6b576b..feb19db6235 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -285,8 +285,17 @@ static int vti_rcv(struct sk_buff *skb)
tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
if (tunnel != NULL) {
struct pcpu_tstats *tstats;
+ u32 oldmark = skb->mark;
+ int ret;
- if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+
+ /* temporarily mark the skb with the tunnel o_key, to
+ * only match policies with this mark.
+ */
+ skb->mark = be32_to_cpu(tunnel->parms.o_key);
+ ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb);
+ skb->mark = oldmark;
+ if (!ret)
return -1;
tstats = this_cpu_ptr(tunnel->dev->tstats);
@@ -295,7 +304,6 @@ static int vti_rcv(struct sk_buff *skb)
tstats->rx_bytes += skb->len;
u64_stats_update_end(&tstats->syncp);
- skb->mark = 0;
secpath_reset(skb);
skb->dev = tunnel->dev;
return 1;
@@ -327,7 +335,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
memset(&fl4, 0, sizeof(fl4));
flowi4_init_output(&fl4, tunnel->parms.link,
- be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos),
+ be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos),
RT_SCOPE_UNIVERSE,
IPPROTO_IPIP, 0,
dst, tiph->saddr, 0, 0);
@@ -342,6 +350,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
if (!rt->dst.xfrm ||
rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) {
dev->stats.tx_carrier_errors++;
+ ip_rt_put(rt);
goto tx_error_icmp;
}
tdev = rt->dst.dev;
@@ -606,17 +615,10 @@ static int __net_init vti_fb_tunnel_init(struct net_device *dev)
struct iphdr *iph = &tunnel->parms.iph;
struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id);
- tunnel->dev = dev;
- strcpy(tunnel->parms.name, dev->name);
-
iph->version = 4;
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
- dev->tstats = alloc_percpu(struct pcpu_tstats);
- if (!dev->tstats)
- return -ENOMEM;
-
dev_hold(dev);
rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
return 0;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 77bfcce64fe..f5cc7b33151 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -195,7 +195,7 @@ static int ipip_rcv(struct sk_buff *skb)
if (tunnel) {
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
- return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);
+ return ip_tunnel_rcv(tunnel, skb, &tpi, 0, log_ecn_error);
}
return -1;
@@ -240,11 +240,13 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
return -EFAULT;
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
- return -EINVAL;
- if (p.i_key || p.o_key || p.i_flags || p.o_flags)
- return -EINVAL;
+ if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
+ if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
+ p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+ return -EINVAL;
+ }
+
+ p.i_key = p.o_key = p.i_flags = p.o_flags = 0;
if (p.iph.ttl)
p.iph.frag_off |= htons(IP_DF);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d9610ae785..49797ed0917 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -157,9 +157,12 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id)
static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4,
struct mr_table **mrt)
{
- struct ipmr_result res;
- struct fib_lookup_arg arg = { .result = &res, };
int err;
+ struct ipmr_result res;
+ struct fib_lookup_arg arg = {
+ .result = &res,
+ .flags = FIB_LOOKUP_NOREF,
+ };
err = fib_rules_lookup(net->ipv4.mr_rules_ops,
flowi4_to_flowi(flp4), 0, &arg);
@@ -1658,7 +1661,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
iph->protocol = IPPROTO_IPIP;
iph->ihl = 5;
iph->tot_len = htons(skb->len);
- ip_select_ident(iph, skb_dst(skb), NULL);
+ ip_select_ident(skb, skb_dst(skb), NULL);
ip_send_check(iph);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -2252,13 +2255,14 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb,
}
static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
- u32 portid, u32 seq, struct mfc_cache *c, int cmd)
+ u32 portid, u32 seq, struct mfc_cache *c, int cmd,
+ int flags)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
int err;
- nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2326,7 +2330,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc,
if (skb == NULL)
goto errout;
- err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
+ err = ipmr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
if (err < 0)
goto errout;
@@ -2365,7 +2369,8 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (ipmr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0)
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0)
goto done;
next_entry:
e++;
@@ -2379,7 +2384,8 @@ next_entry:
if (ipmr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0) {
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0) {
spin_unlock_bh(&mfc_unres_lock);
goto done;
}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 7d93d62cd5f..8cae28f5c3c 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -570,7 +570,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
err = PTR_ERR(rt);
rt = NULL;
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
goto out;
}
@@ -626,7 +626,6 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *isk = inet_sk(sk);
- struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
int copied, err;
@@ -636,11 +635,8 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- if (addr_len)
- *addr_len = sizeof(*sin);
-
if (flags & MSG_ERRQUEUE)
- return ip_recv_error(sk, msg, len);
+ return ip_recv_error(sk, msg, len, addr_len);
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
@@ -660,11 +656,14 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sock_recv_timestamp(msg, sk, skb);
/* Copy the address. */
- if (sin) {
+ if (msg->msg_name) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+
sin->sin_family = AF_INET;
sin->sin_port = 0 /* skb->h.uh->source */;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ *addr_len = sizeof(*sin);
}
if (isk->cmsg_flags)
ip_cmsg_recv(msg, skb);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dd44e0ab600..402870fdfa0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -387,7 +387,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
iph->check = 0;
iph->tot_len = htons(length);
if (!iph->id)
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
}
@@ -571,7 +571,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos,
RT_SCOPE_UNIVERSE,
inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol,
- inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP,
+ inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP |
+ (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0),
daddr, saddr, 0, 0);
if (!inet->hdrincl) {
@@ -691,11 +692,8 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (flags & MSG_OOB)
goto out;
- if (addr_len)
- *addr_len = sizeof(*sin);
-
if (flags & MSG_ERRQUEUE) {
- err = ip_recv_error(sk, msg, len);
+ err = ip_recv_error(sk, msg, len, addr_len);
goto out;
}
@@ -721,6 +719,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ *addr_len = sizeof(*sin);
}
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index d35bbf0cf40..1a362f375e6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1544,6 +1544,7 @@ static int __mkroute_input(struct sk_buff *skb,
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+ RT_CACHE_STAT_INC(in_slow_tot);
rth->dst.input = ip_forward;
rth->dst.output = ip_output;
@@ -1645,8 +1646,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
if (err != 0)
goto no_route;
- RT_CACHE_STAT_INC(in_slow_tot);
-
if (res.type == RTN_BROADCAST)
goto brd_input;
@@ -1715,13 +1714,18 @@ local_input:
rth->rt_gateway = 0;
rth->rt_uses_gateway = 0;
INIT_LIST_HEAD(&rth->rt_uncached);
+ RT_CACHE_STAT_INC(in_slow_tot);
if (res.type == RTN_UNREACHABLE) {
rth->dst.input= ip_error;
rth->dst.error= -err;
rth->rt_flags &= ~RTCF_LOCAL;
}
- if (do_cache)
- rt_cache_route(&FIB_RES_NH(res), rth);
+ if (do_cache) {
+ if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) {
+ rth->dst.flags |= DST_NOCACHE;
+ rt_add_uncached_list(rth);
+ }
+ }
skb_dst_set(skb, &rth->dst);
err = 0;
goto out;
@@ -2020,7 +2024,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
RT_SCOPE_LINK);
goto make_route;
}
- if (fl4->saddr) {
+ if (!fl4->saddr) {
if (ipv4_is_multicast(fl4->daddr))
fl4->saddr = inet_select_addr(dev_out, 0,
fl4->flowi4_scope);
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index fa2f63fc453..90b26beb84d 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -29,6 +29,7 @@
static int zero;
static int one = 1;
static int four = 4;
+static int gso_max_segs = GSO_MAX_SEGS;
static int tcp_retr1_max = 255;
static int ip_local_port_range_min[] = { 1, 1 };
static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -36,6 +37,8 @@ static int tcp_adv_win_scale_min = -31;
static int tcp_adv_win_scale_max = 31;
static int ip_ttl_min = 1;
static int ip_ttl_max = 255;
+static int tcp_syn_retries_min = 1;
+static int tcp_syn_retries_max = MAX_TCP_SYNCNT;
static int ip_ping_group_range_min[] = { 0, 0 };
static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX };
@@ -331,7 +334,9 @@ static struct ctl_table ipv4_table[] = {
.data = &sysctl_tcp_syn_retries,
.maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = proc_dointvec
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &tcp_syn_retries_min,
+ .extra2 = &tcp_syn_retries_max
},
{
.procname = "tcp_synack_retries",
@@ -749,6 +754,15 @@ static struct ctl_table ipv4_table[] = {
.extra2 = &four,
},
{
+ .procname = "tcp_min_tso_segs",
+ .data = &sysctl_tcp_min_tso_segs,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &gso_max_segs,
+ },
+ {
.procname = "udp_mem",
.data = &sysctl_udp_mem,
.maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab450c099aa..39bdb14b321 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,6 +282,8 @@
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
+int sysctl_tcp_min_tso_segs __read_mostly = 2;
+
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -786,14 +788,24 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
xmit_size_goal = mss_now;
if (large_allowed && sk_can_gso(sk)) {
- xmit_size_goal = ((sk->sk_gso_max_size - 1) -
- inet_csk(sk)->icsk_af_ops->net_header_len -
- inet_csk(sk)->icsk_ext_hdr_len -
- tp->tcp_header_len);
+ u32 gso_size, hlen;
+
+ /* Maybe we should/could use sk->sk_prot->max_header here ? */
+ hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+ inet_csk(sk)->icsk_ext_hdr_len +
+ tp->tcp_header_len;
+
+ /* Goal is to send at least one packet per ms,
+ * not one big TSO packet every 100 ms.
+ * This preserves ACK clocking and is consistent
+ * with tcp_tso_should_defer() heuristic.
+ */
+ gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
+ gso_size = max_t(u32, gso_size,
+ sysctl_tcp_min_tso_segs * mss_now);
- /* TSQ : try to have two TSO segments in flight */
- xmit_size_goal = min_t(u32, xmit_size_goal,
- sysctl_tcp_limit_output_bytes >> 1);
+ xmit_size_goal = min_t(u32, gso_size,
+ sk->sk_gso_max_size - 1 - hlen);
xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
@@ -989,7 +1001,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp)
}
}
-static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
+static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
+ int *copied, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
int err, flags;
@@ -1004,11 +1017,12 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *size)
if (unlikely(tp->fastopen_req == NULL))
return -ENOBUFS;
tp->fastopen_req->data = msg;
+ tp->fastopen_req->size = size;
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
msg->msg_namelen, flags);
- *size = tp->fastopen_req->copied;
+ *copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
return err;
}
@@ -1028,7 +1042,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
flags = msg->msg_flags;
if (flags & MSG_FASTOPEN) {
- err = tcp_sendmsg_fastopen(sk, msg, &copied_syn);
+ err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
else if (err)
@@ -1118,6 +1132,13 @@ new_segment:
goto wait_for_memory;
/*
+ * All packets are restored as if they have
+ * already been sent.
+ */
+ if (tp->repair)
+ TCP_SKB_CB(skb)->when = tcp_time_stamp;
+
+ /*
* Check whether we can use HW checksum.
*/
if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
@@ -2440,10 +2461,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_THIN_DUPACK:
if (val < 0 || val > 1)
err = -EINVAL;
- else
+ else {
tp->thin_dupack = val;
if (tp->thin_dupack)
tcp_disable_early_retrans(tp);
+ }
break;
case TCP_REPAIR:
@@ -2879,6 +2901,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
+ unsigned int sum_truesize = 0;
struct tcphdr *th;
unsigned int thlen;
unsigned int seq;
@@ -2962,13 +2985,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
if (copy_destructor) {
skb->destructor = gso_skb->destructor;
skb->sk = gso_skb->sk;
- /* {tcp|sock}_wfree() use exact truesize accounting :
- * sum(skb->truesize) MUST be exactly be gso_skb->truesize
- * So we account mss bytes of 'true size' for each segment.
- * The last segment will contain the remaining.
- */
- skb->truesize = mss;
- gso_skb->truesize -= mss;
+ sum_truesize += skb->truesize;
}
skb = skb->next;
th = tcp_hdr(skb);
@@ -2985,7 +3002,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
if (copy_destructor) {
swap(gso_skb->sk, skb->sk);
swap(gso_skb->destructor, skb->destructor);
- swap(gso_skb->truesize, skb->truesize);
+ sum_truesize += skb->truesize;
+ atomic_add(sum_truesize - gso_skb->truesize,
+ &skb->sk->sk_wmem_alloc);
}
delta = htonl(oldlen + (skb->tail - skb->transport_header) +
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index a9077f441cb..b6ae92a51f5 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -206,8 +206,8 @@ static u32 cubic_root(u64 a)
*/
static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
{
- u64 offs;
- u32 delta, t, bic_target, max_cnt;
+ u32 delta, bic_target, max_cnt;
+ u64 offs, t;
ca->ack_cnt++; /* count the number of ACKs */
@@ -250,9 +250,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
* if the cwnd < 1 million packets !!!
*/
+ t = (s32)(tcp_time_stamp - ca->epoch_start);
+ t += msecs_to_jiffies(ca->delay_min >> 3);
/* change the unit from HZ to bictcp_HZ */
- t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
- - ca->epoch_start) << BICTCP_HZ) / HZ;
+ t <<= BICTCP_HZ;
+ do_div(t, HZ);
if (t < ca->bic_K) /* t - K */
offs = ca->bic_K - t;
@@ -414,7 +416,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
return;
/* Discard delay samples right after fast recovery */
- if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
+ if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ)
return;
delay = (rtt_us << 3) / USEC_PER_MSEC;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c6225780bd..e15d330919a 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -699,6 +699,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt)
}
}
+/* Set the sk_pacing_rate to allow proper sizing of TSO packets.
+ * Note: TCP stack does not yet implement pacing.
+ * FQ packet scheduler can be used to implement cheap but effective
+ * TCP pacing, to smooth the burst on large writes when packets
+ * in flight is significantly lower than cwnd (or rwin)
+ */
+static void tcp_update_pacing_rate(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ u64 rate;
+
+ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
+ rate = (u64)tp->mss_cache * 2 * (HZ << 3);
+
+ rate *= max(tp->snd_cwnd, tp->packets_out);
+
+ /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3),
+ * be conservative and assume srtt = 1 (125 us instead of 1.25 ms)
+ * We probably need usec resolution in the future.
+ * Note: This also takes care of possible srtt=0 case,
+ * when tcp_rtt_estimator() was not yet called.
+ */
+ if (tp->srtt > 8 + 2)
+ do_div(rate, tp->srtt);
+
+ sk->sk_pacing_rate = min_t(u64, rate, ~0U);
+}
+
/* Calculate rto without backoff. This is the second half of Van Jacobson's
* routine referred to above.
*/
@@ -1264,7 +1292,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb,
tp->lost_cnt_hint -= tcp_skb_pcount(prev);
}
- TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags;
+ TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags;
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
+ TCP_SKB_CB(prev)->end_seq++;
+
if (skb == tcp_highest_sack(sk))
tcp_advance_highest_sack(sk, skb);
@@ -3314,7 +3345,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag)
tcp_init_cwnd_reduction(sk, true);
tcp_set_ca_state(sk, TCP_CA_CWR);
tcp_end_cwnd_reduction(sk);
- tcp_set_ca_state(sk, TCP_CA_Open);
+ tcp_try_keep_open(sk);
NET_INC_STATS_BH(sock_net(sk),
LINUX_MIB_TCPLOSSPROBERECOVERY);
}
@@ -3330,7 +3361,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
u32 ack_seq = TCP_SKB_CB(skb)->seq;
u32 ack = TCP_SKB_CB(skb)->ack_seq;
bool is_dupack = false;
- u32 prior_in_flight;
+ u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt;
u32 prior_fackets;
int prior_packets = tp->packets_out;
int prior_sacked = tp->sacked_out;
@@ -3438,6 +3469,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (icsk->icsk_pending == ICSK_TIME_RETRANS)
tcp_schedule_loss_probe(sk);
+ if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd)
+ tcp_update_pacing_rate(sk);
return 1;
no_queue:
@@ -3598,7 +3631,10 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr
++ptr;
tp->rx_opt.rcv_tsval = ntohl(*ptr);
++ptr;
- tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
+ if (*ptr)
+ tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset;
+ else
+ tp->rx_opt.rcv_tsecr = 0;
return true;
}
return false;
@@ -3623,7 +3659,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb,
}
tcp_parse_options(skb, &tp->rx_opt, 1, NULL);
- if (tp->rx_opt.saw_tstamp)
+ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
return true;
@@ -5376,7 +5412,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
int saved_clamp = tp->rx_opt.mss_clamp;
tcp_parse_options(skb, &tp->rx_opt, 0, &foc);
- if (tp->rx_opt.saw_tstamp)
+ if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr)
tp->rx_opt.rcv_tsecr -= tp->tsoffset;
if (th->ack) {
@@ -5733,6 +5769,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
} else
tcp_init_metrics(sk);
+ tcp_update_pacing_rate(sk);
+
/* Prevent spurious tcp_cwnd_restart() on
* first data packet.
*/
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 7999fc55c83..5d87806d3ad 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -176,7 +176,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
}
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index f6a005c485a..b500d2ddc47 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -22,6 +22,9 @@
int sysctl_tcp_nometrics_save __read_mostly;
+static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr,
+ struct net *net, unsigned int hash);
+
struct tcp_fastopen_metrics {
u16 mss;
u16 syn_loss:10; /* Recurring Fast Open SYN losses */
@@ -130,16 +133,41 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst,
}
}
+#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
+
+static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
+{
+ if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
+ tcpm_suck_dst(tm, dst, false);
+}
+
+#define TCP_METRICS_RECLAIM_DEPTH 5
+#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
+
static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst,
struct inetpeer_addr *addr,
- unsigned int hash,
- bool reclaim)
+ unsigned int hash)
{
struct tcp_metrics_block *tm;
struct net *net;
+ bool reclaim = false;
spin_lock_bh(&tcp_metrics_lock);
net = dev_net(dst->dev);
+
+ /* While waiting for the spin-lock the cache might have been populated
+ * with this entry and so we have to check again.
+ */
+ tm = __tcp_get_metrics(addr, net, hash);
+ if (tm == TCP_METRICS_RECLAIM_PTR) {
+ reclaim = true;
+ tm = NULL;
+ }
+ if (tm) {
+ tcpm_check_stamp(tm, dst);
+ goto out_unlock;
+ }
+
if (unlikely(reclaim)) {
struct tcp_metrics_block *oldest;
@@ -169,17 +197,6 @@ out_unlock:
return tm;
}
-#define TCP_METRICS_TIMEOUT (60 * 60 * HZ)
-
-static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst)
-{
- if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT)))
- tcpm_suck_dst(tm, dst, false);
-}
-
-#define TCP_METRICS_RECLAIM_DEPTH 5
-#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL
-
static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth)
{
if (tm)
@@ -280,7 +297,6 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
struct inetpeer_addr addr;
unsigned int hash;
struct net *net;
- bool reclaim;
addr.family = sk->sk_family;
switch (addr.family) {
@@ -300,13 +316,10 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk,
hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log);
tm = __tcp_get_metrics(&addr, net, hash);
- reclaim = false;
- if (tm == TCP_METRICS_RECLAIM_PTR) {
- reclaim = true;
+ if (tm == TCP_METRICS_RECLAIM_PTR)
tm = NULL;
- }
if (!tm && create)
- tm = tcpm_new(dst, &addr, hash, reclaim);
+ tm = tcpm_new(dst, &addr, hash);
else
tcpm_check_stamp(tm, dst);
@@ -665,10 +678,13 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss,
void tcp_fastopen_cache_set(struct sock *sk, u16 mss,
struct tcp_fastopen_cookie *cookie, bool syn_lost)
{
+ struct dst_entry *dst = __sk_dst_get(sk);
struct tcp_metrics_block *tm;
+ if (!dst)
+ return;
rcu_read_lock();
- tm = tcp_get_metrics(sk, __sk_dst_get(sk), true);
+ tm = tcp_get_metrics(sk, dst, true);
if (tm) {
struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ec335fabd5c..4a4e8746d1b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -686,7 +686,8 @@ static void tcp_tsq_handler(struct sock *sk)
if ((1 << sk->sk_state) &
(TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING |
TCPF_CLOSE_WAIT | TCPF_LAST_ACK))
- tcp_write_xmit(sk, tcp_current_mss(sk), 0, 0, GFP_ATOMIC);
+ tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle,
+ 0, GFP_ATOMIC);
}
/*
* One tasklest per cpu tries to send more skbs.
@@ -754,6 +755,17 @@ void tcp_release_cb(struct sock *sk)
if (flags & (1UL << TCP_TSQ_DEFERRED))
tcp_tsq_handler(sk);
+ /* Here begins the tricky part :
+ * We are called from release_sock() with :
+ * 1) BH disabled
+ * 2) sk_lock.slock spinlock held
+ * 3) socket owned by us (sk->sk_lock.owned == 1)
+ *
+ * But following code is meant to be called from BH handlers,
+ * so we should keep BH disabled, but early release socket ownership
+ */
+ sock_release_ownership(sk);
+
if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) {
tcp_write_timer_handler(sk);
__sock_put(sk);
@@ -887,8 +899,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb_orphan(skb);
skb->sk = sk;
- skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ?
- tcp_wfree : sock_wfree;
+ skb->destructor = tcp_wfree;
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it. */
@@ -977,6 +988,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb,
unsigned int mss_now)
{
+ /* Make sure we own this skb before messing gso_size/gso_segs */
+ WARN_ON_ONCE(skb_cloned(skb));
+
if (skb->len <= mss_now || !sk_can_gso(sk) ||
skb->ip_summed == CHECKSUM_NONE) {
/* Avoid the costly divide in the normal
@@ -1058,9 +1072,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
if (nsize < 0)
nsize = 0;
- if (skb_cloned(skb) &&
- skb_is_nonlinear(skb) &&
- pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+ if (skb_unclone(skb, GFP_ATOMIC))
return -ENOMEM;
/* Get a new skb... force flag on. */
@@ -1623,7 +1635,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb)
/* If a full-sized TSO skb can be sent, do it. */
if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
- sk->sk_gso_max_segs * tp->mss_cache))
+ tp->xmit_size_goal_segs * tp->mss_cache))
goto send_now;
/* Middle in queue won't get any more data, full sendable already? */
@@ -1832,7 +1844,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
while ((skb = tcp_send_head(sk))) {
unsigned int limit;
-
tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
@@ -1861,13 +1872,32 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
break;
}
- /* TSQ : sk_wmem_alloc accounts skb truesize,
- * including skb overhead. But thats OK.
+ /* TCP Small Queues :
+ * Control number of packets in qdisc/devices to two packets / or ~1 ms.
+ * This allows for :
+ * - better RTT estimation and ACK scheduling
+ * - faster recovery
+ * - high rates
+ * Alas, some drivers / subsystems require a fair amount
+ * of queued bytes to ensure line rate.
+ * One example is wifi aggregation (802.11 AMPDU)
*/
- if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) {
+ limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
+ sk->sk_pacing_rate >> 10);
+
+ if (atomic_read(&sk->sk_wmem_alloc) > limit) {
set_bit(TSQ_THROTTLED, &tp->tsq_flags);
- break;
+ /* It is possible TX completion already happened
+ * before we set TSQ_THROTTLED, so we must
+ * test again the condition.
+ * We abuse smp_mb__after_clear_bit() because
+ * there is no smp_mb__after_set_bit() yet
+ */
+ smp_mb__after_clear_bit();
+ if (atomic_read(&sk->sk_wmem_alloc) > limit)
+ break;
}
+
limit = mss_now;
if (tso_segs > 1 && !tcp_urg_mode(tp))
limit = tcp_mss_split_point(sk, skb, mss_now,
@@ -2329,6 +2359,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
int oldpcount = tcp_skb_pcount(skb);
if (unlikely(oldpcount > 1)) {
+ if (skb_unclone(skb, GFP_ATOMIC))
+ return -ENOMEM;
tcp_init_tso_segs(sk, skb, cur_mss);
tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb));
}
@@ -2664,7 +2696,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
int tcp_header_size;
int mss;
- skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC));
+ skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC);
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
@@ -2808,6 +2840,8 @@ void tcp_connect_init(struct sock *sk)
if (likely(!tp->repair))
tp->rcv_nxt = 0;
+ else
+ tp->rcv_tstamp = tcp_time_stamp;
tp->rcv_wup = tp->rcv_nxt;
tp->copied_seq = tp->rcv_nxt;
@@ -2869,7 +2903,12 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
space = __tcp_mtu_to_mss(sk, inet_csk(sk)->icsk_pmtu_cookie) -
MAX_TCP_OPTION_SPACE;
- syn_data = skb_copy_expand(syn, skb_headroom(syn), space,
+ space = min_t(size_t, space, fo->size);
+
+ /* limit to order-0 allocations */
+ space = min_t(size_t, space, SKB_MAX_HEAD(MAX_TCP_HEADER));
+
+ syn_data = skb_copy_expand(syn, MAX_TCP_HEADER, space,
sk->sk_allocation);
if (syn_data == NULL)
goto fallback;
@@ -3088,7 +3127,6 @@ void tcp_send_window_probe(struct sock *sk)
{
if (sk->sk_state == TCP_ESTABLISHED) {
tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
- tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq;
tcp_xmit_probe_skb(sk, 0);
}
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 0bf5d399a03..c3075b55224 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -799,7 +799,7 @@ send:
/*
* Push out all pending data as one UDP datagram. Socket is locked.
*/
-static int udp_push_pending_frames(struct sock *sk)
+int udp_push_pending_frames(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
@@ -818,6 +818,7 @@ out:
up->pending = 0;
return err;
}
+EXPORT_SYMBOL(udp_push_pending_frames);
int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
@@ -970,7 +971,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
err = PTR_ERR(rt);
rt = NULL;
if (err == -ENETUNREACH)
- IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES);
+ IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
goto out;
}
@@ -1069,6 +1070,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset,
struct udp_sock *up = udp_sk(sk);
int ret;
+ if (flags & MSG_SENDPAGE_NOTLAST)
+ flags |= MSG_MORE;
+
if (!up->pending) {
struct msghdr msg = { .msg_flags = flags|MSG_MORE };
@@ -1206,14 +1210,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
int is_udplite = IS_UDPLITE(sk);
bool slow;
- /*
- * Check any passed addresses
- */
- if (addr_len)
- *addr_len = sizeof(*sin);
-
if (flags & MSG_ERRQUEUE)
- return ip_recv_error(sk, msg, len);
+ return ip_recv_error(sk, msg, len, addr_len);
try_again:
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
@@ -1273,6 +1271,7 @@ try_again:
sin->sin_port = udp_hdr(skb)->source;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ *addr_len = sizeof(*sin);
}
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c
index eb1dd4d643f..b5663c37f08 100644
--- a/net/ipv4/xfrm4_mode_tunnel.c
+++ b/net/ipv4/xfrm4_mode_tunnel.c
@@ -117,7 +117,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF));
- ip_select_ident(top_iph, dst->child, NULL);
+ ip_select_ident(skb, dst->child, NULL);
top_iph->ttl = ip4_dst_hoplimit(dst->child);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ab4c38958c..7bcdd0df68d 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1111,8 +1111,11 @@ retry:
* Lifetime is greater than REGEN_ADVANCE time units. In particular,
* an implementation must not create a temporary address with a zero
* Preferred Lifetime.
+ * Use age calculation as in addrconf_verify to avoid unnecessary
+ * temporary addresses being generated.
*/
- if (tmp_prefered_lft <= regen_advance) {
+ age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ;
+ if (tmp_prefered_lft <= regen_advance + age) {
in6_ifa_put(ifp);
in6_dev_put(idev);
ret = -1;
@@ -1124,12 +1127,10 @@ retry:
if (ifp->flags & IFA_F_OPTIMISTIC)
addr_flags |= IFA_F_OPTIMISTIC;
- ift = !max_addresses ||
- ipv6_count_addresses(idev) < max_addresses ?
- ipv6_add_addr(idev, &addr, tmp_plen,
- ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
- addr_flags) : NULL;
- if (IS_ERR_OR_NULL(ift)) {
+ ift = ipv6_add_addr(idev, &addr, tmp_plen,
+ ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK,
+ addr_flags);
+ if (IS_ERR(ift)) {
in6_ifa_put(ifp);
in6_dev_put(idev);
pr_info("%s: retry temporary address regeneration\n", __func__);
@@ -1448,6 +1449,23 @@ try_nextdev:
}
EXPORT_SYMBOL(ipv6_dev_get_saddr);
+int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,
+ unsigned char banned_flags)
+{
+ struct inet6_ifaddr *ifp;
+ int err = -EADDRNOTAVAIL;
+
+ list_for_each_entry(ifp, &idev->addr_list, if_list) {
+ if (ifp->scope == IFA_LINK &&
+ !(ifp->flags & banned_flags)) {
+ *addr = ifp->addr;
+ err = 0;
+ break;
+ }
+ }
+ return err;
+}
+
int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
unsigned char banned_flags)
{
@@ -1457,17 +1475,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
rcu_read_lock();
idev = __in6_dev_get(dev);
if (idev) {
- struct inet6_ifaddr *ifp;
-
read_lock_bh(&idev->lock);
- list_for_each_entry(ifp, &idev->addr_list, if_list) {
- if (ifp->scope == IFA_LINK &&
- !(ifp->flags & banned_flags)) {
- *addr = ifp->addr;
- err = 0;
- break;
- }
- }
+ err = __ipv6_get_lladdr(idev, addr, banned_flags);
read_unlock_bh(&idev->lock);
}
rcu_read_unlock();
@@ -1527,6 +1536,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
return false;
}
+/* Compares an address/prefix_len with addresses on device @dev.
+ * If one is found it returns true.
+ */
+bool ipv6_chk_custom_prefix(const struct in6_addr *addr,
+ const unsigned int prefix_len, struct net_device *dev)
+{
+ struct inet6_dev *idev;
+ struct inet6_ifaddr *ifa;
+ bool ret = false;
+
+ rcu_read_lock();
+ idev = __in6_dev_get(dev);
+ if (idev) {
+ read_lock_bh(&idev->lock);
+ list_for_each_entry(ifa, &idev->addr_list, if_list) {
+ ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len);
+ if (ret)
+ break;
+ }
+ read_unlock_bh(&idev->lock);
+ }
+ rcu_read_unlock();
+
+ return ret;
+}
+EXPORT_SYMBOL(ipv6_chk_custom_prefix);
+
int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev)
{
struct inet6_dev *idev;
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index f083a583a05..b30ad3741b4 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net,
/* add a label */
static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace)
{
+ struct hlist_node *n;
+ struct ip6addrlbl_entry *last = NULL, *p = NULL;
int ret = 0;
- ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n",
- __func__,
- newp, replace);
+ ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp,
+ replace);
- if (hlist_empty(&ip6addrlbl_table.head)) {
- hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
- } else {
- struct hlist_node *n;
- struct ip6addrlbl_entry *p = NULL;
- hlist_for_each_entry_safe(p, n,
- &ip6addrlbl_table.head, list) {
- if (p->prefixlen == newp->prefixlen &&
- net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
- p->ifindex == newp->ifindex &&
- ipv6_addr_equal(&p->prefix, &newp->prefix)) {
- if (!replace) {
- ret = -EEXIST;
- goto out;
- }
- hlist_replace_rcu(&p->list, &newp->list);
- ip6addrlbl_put(p);
- goto out;
- } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
- (p->prefixlen < newp->prefixlen)) {
- hlist_add_before_rcu(&newp->list, &p->list);
+ hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) {
+ if (p->prefixlen == newp->prefixlen &&
+ net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) &&
+ p->ifindex == newp->ifindex &&
+ ipv6_addr_equal(&p->prefix, &newp->prefix)) {
+ if (!replace) {
+ ret = -EEXIST;
goto out;
}
+ hlist_replace_rcu(&p->list, &newp->list);
+ ip6addrlbl_put(p);
+ goto out;
+ } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) ||
+ (p->prefixlen < newp->prefixlen)) {
+ hlist_add_before_rcu(&newp->list, &p->list);
+ goto out;
}
- hlist_add_after_rcu(&p->list, &newp->list);
+ last = p;
}
+ if (last)
+ hlist_add_after_rcu(&last->list, &newp->list);
+ else
+ hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head);
out:
if (!ret)
ip6addrlbl_table.seq++;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 4b56cbbc789..8997340e374 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
/*
* Handle MSG_ERRQUEUE
*/
-int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
+int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sock_exterr_skb *serr;
@@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
&sin->sin6_addr);
sin->sin6_scope_id = 0;
}
+ *addr_len = sizeof(*sin);
}
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
@@ -377,6 +378,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
+ sin->sin6_port = 0;
if (skb->protocol == htons(ETH_P_IPV6)) {
sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
@@ -423,7 +425,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error);
/*
* Handle IPV6_RECVPATHMTU
*/
-int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
+ int *addr_len)
{
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff *skb;
@@ -457,6 +460,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
sin->sin6_port = 0;
sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
+ *addr_len = sizeof(*sin);
}
put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 07a7d65a7cb..8d67900aa00 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -162,12 +162,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
off += optlen;
len -= optlen;
}
- /* This case will not be caught by above check since its padding
- * length is smaller than 7:
- * 1 byte NH + 1 byte Length + 6 bytes Padding
- */
- if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8))
- goto bad;
if (len == 0)
return true;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index c5e83fae4df..51af9d0d019 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
found = (nexthdr == target);
if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
- if (target < 0)
+ if (target < 0 || found)
break;
return -ENOENT;
}
diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c
index cf77f3abfd0..447a7fbd1bb 100644
--- a/net/ipv6/exthdrs_offload.c
+++ b/net/ipv6/exthdrs_offload.c
@@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void)
int ret;
ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING);
- if (!ret)
+ if (ret)
goto out;
ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS);
- if (!ret)
+ if (ret)
goto out_rt;
out:
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index b4ff0a42b8c..2dee1d9d730 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -508,7 +508,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
np->tclass, NULL, &fl6, (struct rt6_info *)dst,
MSG_DONTWAIT, np->dontfrag);
if (err) {
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
} else {
err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
@@ -931,6 +931,14 @@ static const struct icmp6_err {
.err = ECONNREFUSED,
.fatal = 1,
},
+ { /* POLICY_FAIL */
+ .err = EACCES,
+ .fatal = 1,
+ },
+ { /* REJECT_ROUTE */
+ .err = EACCES,
+ .fatal = 1,
+ },
};
int icmpv6_err_convert(u8 type, u8 code, int *err)
@@ -942,7 +950,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)
switch (type) {
case ICMPV6_DEST_UNREACH:
fatal = 1;
- if (code <= ICMPV6_PORT_UNREACH) {
+ if (code < ARRAY_SIZE(tab_unreach)) {
*err = tab_unreach[code].err;
fatal = tab_unreach[code].fatal;
}
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 32b4a1675d8..066640e0ba8 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -116,7 +116,7 @@ begintw:
}
if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr,
ports, dif))) {
- sock_put(sk);
+ inet_twsk_put(inet_twsk(sk));
goto begintw;
}
goto out;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 192dd1a0e18..9c06ecb6556 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -632,6 +632,12 @@ insert_above:
return ln;
}
+static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) ==
+ RTF_GATEWAY;
+}
+
/*
* Insert routing information in a node.
*/
@@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
int add = (!info->nlh ||
(info->nlh->nlmsg_flags & NLM_F_CREATE));
int found = 0;
+ bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
ins = &fn->leaf;
@@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
* To avoid long list, we only had siblings if the
* route have a gateway.
*/
- if (rt->rt6i_flags & RTF_GATEWAY &&
- !(rt->rt6i_flags & RTF_EXPIRES) &&
- !(iter->rt6i_flags & RTF_EXPIRES))
+ if (rt_can_ecmp &&
+ rt6_qualify_for_ecmp(iter))
rt->rt6i_nsiblings++;
}
@@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
/* Find the first route that have the same metric */
sibling = fn->leaf;
while (sibling) {
- if (sibling->rt6i_metric == rt->rt6i_metric) {
+ if (sibling->rt6i_metric == rt->rt6i_metric &&
+ rt6_qualify_for_ecmp(sibling)) {
list_add_tail(&rt->rt6i_siblings,
&sibling->rt6i_siblings);
break;
@@ -818,9 +825,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr),
rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst),
allow_create, replace_required);
-
if (IS_ERR(fn)) {
err = PTR_ERR(fn);
+ fn = NULL;
goto out;
}
@@ -986,14 +993,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root,
if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) {
#ifdef CONFIG_IPV6_SUBTREES
- if (fn->subtree)
- fn = fib6_lookup_1(fn->subtree, args + 1);
+ if (fn->subtree) {
+ struct fib6_node *sfn;
+ sfn = fib6_lookup_1(fn->subtree,
+ args + 1);
+ if (!sfn)
+ goto backtrack;
+ fn = sfn;
+ }
#endif
- if (!fn || fn->fn_flags & RTN_RTINFO)
+ if (fn->fn_flags & RTN_RTINFO)
return fn;
}
}
-
+#ifdef CONFIG_IPV6_SUBTREES
+backtrack:
+#endif
if (fn->fn_flags & RTN_ROOT)
break;
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 46e88433ec7..f0ccdb78710 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -453,8 +453,10 @@ static int mem_check(struct sock *sk)
if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
return 0;
+ rcu_read_lock_bh();
for_each_sk_fl_rcu(np, sfl)
count++;
+ rcu_read_unlock_bh();
if (room <= 0 ||
((count >= FL_MAX_PER_SOCK ||
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index ecd60733e5e..1f9a1a5b61f 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -620,7 +620,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
struct ip6_tnl *tunnel = netdev_priv(dev);
struct net_device *tdev; /* Device to other host */
struct ipv6hdr *ipv6h; /* Our new IP header */
- unsigned int max_headroom; /* The extra header space needed */
+ unsigned int max_headroom = 0; /* The extra header space needed */
int gre_hlen;
struct ipv6_tel_txoption opt;
int mtu;
@@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,
tunnel->err_count = 0;
}
- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
+ max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len;
if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
(skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 2bab2aa5974..774b09cb292 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -49,7 +49,7 @@
int ip6_rcv_finish(struct sk_buff *skb)
{
- if (sysctl_ip_early_demux && !skb_dst(skb)) {
+ if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
const struct inet6_protocol *ipprot;
ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index d5d20cde8d9..32fb114b86b 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -130,7 +130,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
+ nexthop = rt6_nexthop((struct rt6_info *)dst);
neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
if (unlikely(!neigh))
neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
@@ -141,8 +141,8 @@ static int ip6_finish_output2(struct sk_buff *skb)
}
rcu_read_unlock_bh();
- IP6_INC_STATS_BH(dev_net(dst->dev),
- ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(dev_net(dst->dev),
+ ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
@@ -150,7 +150,8 @@ static int ip6_finish_output2(struct sk_buff *skb)
static int ip6_finish_output(struct sk_buff *skb)
{
if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
- dst_allfrag(skb_dst(skb)))
+ dst_allfrag(skb_dst(skb)) ||
+ (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
return ip6_fragment(skb, ip6_finish_output2);
else
return ip6_finish_output2(skb);
@@ -344,6 +345,20 @@ static inline int ip6_forward_finish(struct sk_buff *skb)
return dst_output(skb);
}
+static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
+{
+ if (skb->len <= mtu || skb->local_df)
+ return false;
+
+ if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
+ return true;
+
+ if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu)
+ return false;
+
+ return true;
+}
+
int ip6_forward(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
@@ -466,8 +481,7 @@ int ip6_forward(struct sk_buff *skb)
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
- if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
- (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
+ if (ip6_pkt_too_big(skb, mtu)) {
/* Again, force OUTPUT device used as source address */
skb->dev = dst->dev;
icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
@@ -898,7 +912,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,
*/
rt = (struct rt6_info *) *dst;
rcu_read_lock_bh();
- n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr));
+ n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));
err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
rcu_read_unlock_bh();
@@ -1039,6 +1053,8 @@ static inline int ip6_ufo_append_data(struct sock *sk,
* udp datagram
*/
if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
+ struct frag_hdr fhdr;
+
skb = sock_alloc_send_skb(sk,
hh_len + fragheaderlen + transhdrlen + 20,
(flags & MSG_DONTWAIT), &err);
@@ -1059,12 +1075,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,
skb->ip_summed = CHECKSUM_PARTIAL;
skb->csum = 0;
- }
-
- err = skb_append_datato_frags(sk,skb, getfrag, from,
- (length - transhdrlen));
- if (!err) {
- struct frag_hdr fhdr;
/* Specify the length of each IPv6 datagram fragment.
* It has to be a multiple of 8.
@@ -1075,15 +1085,10 @@ static inline int ip6_ufo_append_data(struct sock *sk,
ipv6_select_ident(&fhdr, rt);
skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
__skb_queue_tail(&sk->sk_write_queue, skb);
-
- return 0;
}
- /* There is not enough support do UPD LSO,
- * so follow normal path
- */
- kfree_skb(skb);
- return err;
+ return skb_append_datato_frags(sk, skb, getfrag, from,
+ (length - transhdrlen));
}
static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
@@ -1098,23 +1103,24 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
}
-static void ip6_append_data_mtu(int *mtu,
+static void ip6_append_data_mtu(unsigned int *mtu,
int *maxfraglen,
unsigned int fragheaderlen,
struct sk_buff *skb,
- struct rt6_info *rt)
+ struct rt6_info *rt,
+ unsigned int orig_mtu)
{
if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
if (skb == NULL) {
/* first fragment, reserve header_len */
- *mtu = *mtu - rt->dst.header_len;
+ *mtu = orig_mtu - rt->dst.header_len;
} else {
/*
* this fragment is not first, the headers
* space is regarded as data space.
*/
- *mtu = dst_mtu(rt->dst.path);
+ *mtu = orig_mtu;
}
*maxfraglen = ((*mtu - fragheaderlen) & ~7)
+ fragheaderlen - sizeof(struct frag_hdr);
@@ -1131,11 +1137,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
struct ipv6_pinfo *np = inet6_sk(sk);
struct inet_cork *cork;
struct sk_buff *skb, *skb_prev = NULL;
- unsigned int maxfraglen, fragheaderlen;
+ unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
int exthdrlen;
int dst_exthdrlen;
int hh_len;
- int mtu;
int copy;
int err;
int offset = 0;
@@ -1214,6 +1219,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
dst_exthdrlen = 0;
mtu = cork->fragsize;
}
+ orig_mtu = mtu;
hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1248,27 +1254,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
* --yoshfuji
*/
- cork->length += length;
- if (length > mtu) {
- int proto = sk->sk_protocol;
- if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
- ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
- return -EMSGSIZE;
- }
-
- if (proto == IPPROTO_UDP &&
- (rt->dst.dev->features & NETIF_F_UFO)) {
+ if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP ||
+ sk->sk_protocol == IPPROTO_RAW)) {
+ ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
+ return -EMSGSIZE;
+ }
- err = ip6_ufo_append_data(sk, getfrag, from, length,
- hh_len, fragheaderlen,
- transhdrlen, mtu, flags, rt);
- if (err)
- goto error;
- return 0;
- }
+ skb = skb_peek_tail(&sk->sk_write_queue);
+ cork->length += length;
+ if (((length > mtu) ||
+ (skb && skb_has_frags(skb))) &&
+ (sk->sk_protocol == IPPROTO_UDP) &&
+ (rt->dst.dev->features & NETIF_F_UFO)) {
+ err = ip6_ufo_append_data(sk, getfrag, from, length,
+ hh_len, fragheaderlen,
+ transhdrlen, mtu, flags, rt);
+ if (err)
+ goto error;
+ return 0;
}
- if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
+ if (!skb)
goto alloc_new_skb;
while (length > 0) {
@@ -1292,7 +1298,8 @@ alloc_new_skb:
/* update mtu and maxfraglen if necessary */
if (skb == NULL || skb_prev == NULL)
ip6_append_data_mtu(&mtu, &maxfraglen,
- fragheaderlen, skb, rt);
+ fragheaderlen, skb, rt,
+ orig_mtu);
skb_prev = skb;
@@ -1547,8 +1554,8 @@ int ip6_push_pending_frames(struct sock *sk)
if (proto == IPPROTO_ICMPV6) {
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
- ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
+ ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
}
err = ip6_local_out(skb);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 1e55866cead..f21cf476b00 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1617,6 +1617,15 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],
return ip6_tnl_update(t, &p);
}
+static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
+
+ if (dev != ip6n->fb_tnl_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static size_t ip6_tnl_get_size(const struct net_device *dev)
{
return
@@ -1646,9 +1655,9 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) ||
nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr),
- &parm->raddr) ||
- nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
&parm->laddr) ||
+ nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr),
+ &parm->raddr) ||
nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) ||
nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) ||
nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) ||
@@ -1681,6 +1690,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = {
.validate = ip6_tnl_validate,
.newlink = ip6_tnl_newlink,
.changelink = ip6_tnl_changelink,
+ .dellink = ip6_tnl_dellink,
.get_size = ip6_tnl_get_size,
.fill_info = ip6_tnl_fill_info,
};
@@ -1732,6 +1742,7 @@ static int __net_init ip6_tnl_init_net(struct net *net)
if (!ip6n->fb_tnl_dev)
goto err_alloc_dev;
dev_net_set(ip6n->fb_tnl_dev, net);
+ ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops;
err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev);
if (err < 0)
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 241fb8ad9fc..2c84072b1da 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -141,9 +141,12 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
struct mr6_table **mrt)
{
- struct ip6mr_result res;
- struct fib_lookup_arg arg = { .result = &res, };
int err;
+ struct ip6mr_result res;
+ struct fib_lookup_arg arg = {
+ .result = &res,
+ .flags = FIB_LOOKUP_NOREF,
+ };
err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
flowi6_to_flowi(flp6), 0, &arg);
@@ -259,10 +262,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
{
struct mr6_table *mrt, *next;
+ rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
list_del(&mrt->list);
ip6mr_free_table(mrt);
}
+ rtnl_unlock();
fib_rules_unregister(net->ipv6.mr6_rules_ops);
}
#else
@@ -289,7 +294,10 @@ static int __net_init ip6mr_rules_init(struct net *net)
static void __net_exit ip6mr_rules_exit(struct net *net)
{
+ rtnl_lock();
ip6mr_free_table(net->ipv6.mrt6);
+ net->ipv6.mrt6 = NULL;
+ rtnl_unlock();
}
#endif
@@ -2343,13 +2351,14 @@ int ip6mr_get_route(struct net *net,
}
static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
+ u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
+ int flags)
{
struct nlmsghdr *nlh;
struct rtmsg *rtm;
int err;
- nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
+ nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
if (nlh == NULL)
return -EMSGSIZE;
@@ -2417,7 +2426,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
if (skb == NULL)
goto errout;
- err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
+ err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
if (err < 0)
goto errout;
@@ -2456,7 +2465,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0)
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0)
goto done;
next_entry:
e++;
@@ -2470,7 +2480,8 @@ next_entry:
if (ip6mr_fill_mroute(mrt, skb,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE) < 0) {
+ mfc, RTM_NEWROUTE,
+ NLM_F_MULTI) < 0) {
spin_unlock_bh(&mfc_unres_lock);
goto done;
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bfa6cc36ef2..734aec059ff 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1343,8 +1343,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb,
hdr->daddr = *daddr;
}
-static struct sk_buff *mld_newpack(struct net_device *dev, int size)
+static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size)
{
+ struct net_device *dev = idev->dev;
struct net *net = dev_net(dev);
struct sock *sk = net->ipv6.igmp_sk;
struct sk_buff *skb;
@@ -1369,7 +1370,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
skb_reserve(skb, hlen);
- if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) {
+ if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) {
/* <draft-ietf-magma-mld-source-05.txt>:
* use unspecified address as the source address
* when a valid link-local address is not available.
@@ -1438,11 +1439,12 @@ static void mld_sendpack(struct sk_buff *skb)
dst_output);
out:
if (!err) {
- ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT);
- ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
- IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
- } else
- IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
+ ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len);
+ } else {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
+ }
rcu_read_unlock();
return;
@@ -1465,7 +1467,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
struct mld2_grec *pgr;
if (!skb)
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(pmc->idev, dev->mtu);
if (!skb)
return NULL;
pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec));
@@ -1485,7 +1487,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,
static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
int type, int gdeleted, int sdeleted)
{
- struct net_device *dev = pmc->idev->dev;
+ struct inet6_dev *idev = pmc->idev;
+ struct net_device *dev = idev->dev;
struct mld2_report *pmr;
struct mld2_grec *pgr = NULL;
struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list;
@@ -1514,7 +1517,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) {
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(idev, dev->mtu);
}
}
first = 1;
@@ -1541,7 +1544,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc,
pgr->grec_nsrcs = htons(scount);
if (skb)
mld_sendpack(skb);
- skb = mld_newpack(dev, dev->mtu);
+ skb = mld_newpack(idev, dev->mtu);
first = 1;
scount = 0;
}
@@ -1596,8 +1599,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
struct sk_buff *skb = NULL;
int type;
+ read_lock_bh(&idev->lock);
if (!pmc) {
- read_lock_bh(&idev->lock);
for (pmc=idev->mc_list; pmc; pmc=pmc->next) {
if (pmc->mca_flags & MAF_NOREPORT)
continue;
@@ -1609,7 +1612,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
skb = add_grec(skb, pmc, type, 0, 0);
spin_unlock_bh(&pmc->mca_lock);
}
- read_unlock_bh(&idev->lock);
} else {
spin_lock_bh(&pmc->mca_lock);
if (pmc->mca_sfcount[MCAST_EXCLUDE])
@@ -1619,6 +1621,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)
skb = add_grec(skb, pmc, type, 0, 0);
spin_unlock_bh(&pmc->mca_lock);
}
+ read_unlock_bh(&idev->lock);
if (skb)
mld_sendpack(skb);
}
@@ -2156,7 +2159,7 @@ static void mld_gq_timer_expire(unsigned long data)
idev->mc_gq_running = 0;
mld_send_report(idev, NULL);
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_timer_expire(unsigned long data)
@@ -2169,7 +2172,7 @@ static void mld_ifc_timer_expire(unsigned long data)
if (idev->mc_ifc_count)
mld_ifc_start_timer(idev, idev->mc_maxdelay);
}
- __in6_dev_put(idev);
+ in6_dev_put(idev);
}
static void mld_ifc_event(struct inet6_dev *idev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ca4ffcc287f..060a0449aca 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
int tlen = dev->needed_tailroom;
struct sock *sk = dev_net(dev)->ipv6.ndisc_sk;
struct sk_buff *skb;
- int err;
- skb = sock_alloc_send_skb(sk,
- hlen + sizeof(struct ipv6hdr) + len + tlen,
- 1, &err);
+ skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC);
if (!skb) {
- ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n",
- __func__, err);
+ ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n",
+ __func__);
return NULL;
}
@@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev,
skb_reserve(skb, hlen + sizeof(struct ipv6hdr));
skb_reset_transport_header(skb);
+ /* Manually assign socket ownership as we avoid calling
+ * sock_alloc_send_pskb() to bypass wmem buffer limits
+ */
+ skb_set_owner_w(skb, sk);
+
return skb;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index c9b6a6e6a1e..97cd7507c1a 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -172,63 +172,13 @@ out:
return nf_conntrack_confirm(skb);
}
-static unsigned int __ipv6_conntrack_in(struct net *net,
- unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct sk_buff *reasm = skb->nfct_reasm;
- const struct nf_conn_help *help;
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
-
- /* This packet is fragmented and has reassembled packet. */
- if (reasm) {
- /* Reassembled packet isn't parsed yet ? */
- if (!reasm->nfct) {
- unsigned int ret;
-
- ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm);
- if (ret != NF_ACCEPT)
- return ret;
- }
-
- /* Conntrack helpers need the entire reassembled packet in the
- * POST_ROUTING hook. In case of unconfirmed connections NAT
- * might reassign a helper, so the entire packet is also
- * required.
- */
- ct = nf_ct_get(reasm, &ctinfo);
- if (ct != NULL && !nf_ct_is_untracked(ct)) {
- help = nfct_help(ct);
- if ((help && help->helper) || !nf_ct_is_confirmed(ct)) {
- nf_conntrack_get_reasm(reasm);
- NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
- (struct net_device *)in,
- (struct net_device *)out,
- okfn, NF_IP6_PRI_CONNTRACK + 1);
- return NF_DROP_ERR(-ECANCELED);
- }
- }
-
- nf_conntrack_get(reasm->nfct);
- skb->nfct = reasm->nfct;
- skb->nfctinfo = reasm->nfctinfo;
- return NF_ACCEPT;
- }
-
- return nf_conntrack_in(net, PF_INET6, hooknum, skb);
-}
-
static unsigned int ipv6_conntrack_in(unsigned int hooknum,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn);
+ return nf_conntrack_in(dev_net(in), PF_INET6, hooknum, skb);
}
static unsigned int ipv6_conntrack_local(unsigned int hooknum,
@@ -242,7 +192,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,
net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");
return NF_ACCEPT;
}
- return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn);
+ return nf_conntrack_in(dev_net(out), PF_INET6, hooknum, skb);
}
static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = {
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index dffdc1a389c..253566a8d55 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -621,31 +621,16 @@ ret_orig:
return skb;
}
-void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
- struct net_device *in, struct net_device *out,
- int (*okfn)(struct sk_buff *))
+void nf_ct_frag6_consume_orig(struct sk_buff *skb)
{
struct sk_buff *s, *s2;
- unsigned int ret = 0;
for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
- nf_conntrack_put_reasm(s->nfct_reasm);
- nf_conntrack_get_reasm(skb);
- s->nfct_reasm = skb;
-
s2 = s->next;
s->next = NULL;
-
- if (ret != -ECANCELED)
- ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s,
- in, out, okfn,
- NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
- else
- kfree_skb(s);
-
+ consume_skb(s);
s = s2;
}
- nf_conntrack_put_reasm(skb);
}
static int nf_ct_net_init(struct net *net)
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index aacd121fe8c..581dd9ede0d 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,
if (reasm == skb)
return NF_ACCEPT;
- nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in,
- (struct net_device *)out, okfn);
+ nf_ct_frag6_consume_orig(reasm);
+
+ NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm,
+ (struct net_device *) in, (struct net_device *) out,
+ okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
return NF_STOLEN;
}
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index eedff8ccded..464b1c9c08e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -459,14 +459,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
if (flags & MSG_OOB)
return -EOPNOTSUPP;
- if (addr_len)
- *addr_len=sizeof(*sin6);
-
if (flags & MSG_ERRQUEUE)
- return ipv6_recv_error(sk, msg, len);
+ return ipv6_recv_error(sk, msg, len, addr_len);
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
- return ipv6_recv_rxpmtu(sk, msg, len);
+ return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
@@ -500,6 +497,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
sin6->sin6_flowinfo = 0;
sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,
IP6CB(skb)->iif);
+ *addr_len = sizeof(*sin6);
}
sock_recv_ts_and_drops(msg, sk, skb);
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index 790d9f4b8b0..1aeb473b2cc 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
ipv6_hdr(head)->payload_len = htons(payload_len);
ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn);
IP6CB(head)->nhoff = nhoff;
+ IP6CB(head)->flags |= IP6SKB_FRAGMENTED;
/* Yes, and fold redundant checksum back. 8) */
if (head->ip_summed == CHECKSUM_COMPLETE)
@@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct net *net = dev_net(skb_dst(skb)->dev);
int evicted;
+ if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
+ goto fail_hdr;
+
IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS);
/* Jumbo payload inhibits frag. header */
@@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS);
IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb);
+ IP6CB(skb)->flags |= IP6SKB_FRAGMENTED;
return 1;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index ad0aa6b0b86..3fde3e97786 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -65,6 +65,12 @@
#include <linux/sysctl.h>
#endif
+enum rt6_nud_state {
+ RT6_NUD_FAIL_HARD = -2,
+ RT6_NUD_FAIL_SOFT = -1,
+ RT6_NUD_SUCCEED = 1
+};
+
static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
const struct in6_addr *dest);
static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
@@ -78,6 +84,8 @@ static int ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct sk_buff *skb);
+static int ip6_pkt_prohibit(struct sk_buff *skb);
+static int ip6_pkt_prohibit_out(struct sk_buff *skb);
static void ip6_link_failure(struct sk_buff *skb);
static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct sk_buff *skb, u32 mtu);
@@ -227,9 +235,6 @@ static const struct rt6_info ip6_null_entry_template = {
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-static int ip6_pkt_prohibit(struct sk_buff *skb);
-static int ip6_pkt_prohibit_out(struct sk_buff *skb);
-
static const struct rt6_info ip6_prohibit_entry_template = {
.dst = {
.__refcnt = ATOMIC_INIT(1),
@@ -467,6 +472,24 @@ out:
}
#ifdef CONFIG_IPV6_ROUTER_PREF
+struct __rt6_probe_work {
+ struct work_struct work;
+ struct in6_addr target;
+ struct net_device *dev;
+};
+
+static void rt6_probe_deferred(struct work_struct *w)
+{
+ struct in6_addr mcaddr;
+ struct __rt6_probe_work *work =
+ container_of(w, struct __rt6_probe_work, work);
+
+ addrconf_addr_solict_mult(&work->target, &mcaddr);
+ ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL);
+ dev_put(work->dev);
+ kfree(w);
+}
+
static void rt6_probe(struct rt6_info *rt)
{
struct neighbour *neigh;
@@ -490,17 +513,23 @@ static void rt6_probe(struct rt6_info *rt)
if (!neigh ||
time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
- struct in6_addr mcaddr;
- struct in6_addr *target;
+ struct __rt6_probe_work *work;
+
+ work = kmalloc(sizeof(*work), GFP_ATOMIC);
- if (neigh) {
+ if (neigh && work)
neigh->updated = jiffies;
+
+ if (neigh)
write_unlock(&neigh->lock);
- }
- target = (struct in6_addr *)&rt->rt6i_gateway;
- addrconf_addr_solict_mult(target, &mcaddr);
- ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL);
+ if (work) {
+ INIT_WORK(&work->work, rt6_probe_deferred);
+ work->target = rt->rt6i_gateway;
+ dev_hold(rt->dst.dev);
+ work->dev = rt->dst.dev;
+ schedule_work(&work->work);
+ }
} else {
out:
write_unlock(&neigh->lock);
@@ -527,26 +556,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif)
return 0;
}
-static inline bool rt6_check_neigh(struct rt6_info *rt)
+static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)
{
struct neighbour *neigh;
- bool ret = false;
+ enum rt6_nud_state ret = RT6_NUD_FAIL_HARD;
if (rt->rt6i_flags & RTF_NONEXTHOP ||
!(rt->rt6i_flags & RTF_GATEWAY))
- return true;
+ return RT6_NUD_SUCCEED;
rcu_read_lock_bh();
neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway);
if (neigh) {
read_lock(&neigh->lock);
if (neigh->nud_state & NUD_VALID)
- ret = true;
+ ret = RT6_NUD_SUCCEED;
#ifdef CONFIG_IPV6_ROUTER_PREF
else if (!(neigh->nud_state & NUD_FAILED))
- ret = true;
+ ret = RT6_NUD_SUCCEED;
#endif
read_unlock(&neigh->lock);
+ } else {
+ ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ?
+ RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT;
}
rcu_read_unlock_bh();
@@ -560,43 +592,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif,
m = rt6_check_dev(rt, oif);
if (!m && (strict & RT6_LOOKUP_F_IFACE))
- return -1;
+ return RT6_NUD_FAIL_HARD;
#ifdef CONFIG_IPV6_ROUTER_PREF
m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
#endif
- if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE))
- return -1;
+ if (strict & RT6_LOOKUP_F_REACHABLE) {
+ int n = rt6_check_neigh(rt);
+ if (n < 0)
+ return n;
+ }
return m;
}
static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
- int *mpri, struct rt6_info *match)
+ int *mpri, struct rt6_info *match,
+ bool *do_rr)
{
int m;
+ bool match_do_rr = false;
if (rt6_check_expired(rt))
goto out;
m = rt6_score_route(rt, oif, strict);
- if (m < 0)
+ if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) {
+ match_do_rr = true;
+ m = 0; /* lowest valid score */
+ } else if (m < 0) {
goto out;
+ }
+
+ if (strict & RT6_LOOKUP_F_REACHABLE)
+ rt6_probe(rt);
if (m > *mpri) {
- if (strict & RT6_LOOKUP_F_REACHABLE)
- rt6_probe(match);
+ *do_rr = match_do_rr;
*mpri = m;
match = rt;
- } else if (strict & RT6_LOOKUP_F_REACHABLE) {
- rt6_probe(rt);
}
-
out:
return match;
}
static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
struct rt6_info *rr_head,
- u32 metric, int oif, int strict)
+ u32 metric, int oif, int strict,
+ bool *do_rr)
{
struct rt6_info *rt, *match;
int mpri = -1;
@@ -604,10 +645,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
match = NULL;
for (rt = rr_head; rt && rt->rt6i_metric == metric;
rt = rt->dst.rt6_next)
- match = find_match(rt, oif, strict, &mpri, match);
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
rt = rt->dst.rt6_next)
- match = find_match(rt, oif, strict, &mpri, match);
+ match = find_match(rt, oif, strict, &mpri, match, do_rr);
return match;
}
@@ -616,15 +657,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
{
struct rt6_info *match, *rt0;
struct net *net;
+ bool do_rr = false;
rt0 = fn->rr_ptr;
if (!rt0)
fn->rr_ptr = rt0 = fn->leaf;
- match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
+ match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict,
+ &do_rr);
- if (!match &&
- (strict & RT6_LOOKUP_F_REACHABLE)) {
+ if (do_rr) {
struct rt6_info *next = rt0->dst.rt6_next;
/* no entries matched; do round-robin */
@@ -685,8 +727,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
prefix = &prefix_buf;
}
- rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
- dev->ifindex);
+ if (rinfo->prefix_len == 0)
+ rt = rt6_get_dflt_router(gwaddr, dev);
+ else
+ rt = rt6_get_route_info(net, prefix, rinfo->prefix_len,
+ gwaddr, dev->ifindex);
if (rt && !lifetime) {
ip6_del_rt(rt);
@@ -829,7 +874,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
if (ort->rt6i_dst.plen != 128 &&
ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
rt->rt6i_flags |= RTF_ANYCAST;
- rt->rt6i_gateway = *daddr;
}
rt->rt6i_flags |= RTF_CACHE;
@@ -1042,10 +1086,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev)))
return NULL;
- if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
- return dst;
+ if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ return NULL;
- return NULL;
+ if (rt6_check_expired(rt))
+ return NULL;
+
+ return dst;
}
static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
@@ -1074,10 +1121,13 @@ static void ip6_link_failure(struct sk_buff *skb)
rt = (struct rt6_info *) skb_dst(skb);
if (rt) {
- if (rt->rt6i_flags & RTF_CACHE)
- rt6_update_expires(rt, 0);
- else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
+ if (rt->rt6i_flags & RTF_CACHE) {
+ dst_hold(&rt->dst);
+ if (ip6_del_rt(rt))
+ dst_free(&rt->dst);
+ } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
rt->rt6i_node->fn_sernum = -1;
+ }
}
}
@@ -1223,6 +1273,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
rt->dst.flags |= DST_HOST;
rt->dst.output = ip6_output;
atomic_set(&rt->dst.__refcnt, 1);
+ rt->rt6i_gateway = fl6->daddr;
rt->rt6i_dst.addr = fl6->daddr;
rt->rt6i_dst.plen = 128;
rt->rt6i_idev = idev;
@@ -1377,7 +1428,7 @@ int ip6_route_add(struct fib6_config *cfg)
if (!table)
goto out;
- rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table);
+ rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);
if (!rt) {
err = -ENOMEM;
@@ -1446,21 +1497,24 @@ int ip6_route_add(struct fib6_config *cfg)
goto out;
}
}
- rt->dst.output = ip6_pkt_discard_out;
- rt->dst.input = ip6_pkt_discard;
rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
switch (cfg->fc_type) {
case RTN_BLACKHOLE:
rt->dst.error = -EINVAL;
+ rt->dst.output = dst_discard;
+ rt->dst.input = dst_discard;
break;
case RTN_PROHIBIT:
rt->dst.error = -EACCES;
+ rt->dst.output = ip6_pkt_prohibit_out;
+ rt->dst.input = ip6_pkt_prohibit;
break;
case RTN_THROW:
- rt->dst.error = -EAGAIN;
- break;
default:
- rt->dst.error = -ENETUNREACH;
+ rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN
+ : -ENETUNREACH;
+ rt->dst.output = ip6_pkt_discard_out;
+ rt->dst.input = ip6_pkt_discard;
break;
}
goto install_route;
@@ -1779,11 +1833,12 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,
in6_dev_hold(rt->rt6i_idev);
rt->dst.lastuse = jiffies;
- rt->rt6i_gateway = ort->rt6i_gateway;
+ if (ort->rt6i_flags & RTF_GATEWAY)
+ rt->rt6i_gateway = ort->rt6i_gateway;
+ else
+ rt->rt6i_gateway = *dest;
rt->rt6i_flags = ort->rt6i_flags;
- if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) ==
- (RTF_DEFAULT | RTF_ADDRCONF))
- rt6_set_from(rt, ort);
+ rt6_set_from(rt, ort);
rt->rt6i_metric = 0;
#ifdef CONFIG_IPV6_SUBTREES
@@ -2022,8 +2077,6 @@ static int ip6_pkt_discard_out(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
}
-#ifdef CONFIG_IPV6_MULTIPLE_TABLES
-
static int ip6_pkt_prohibit(struct sk_buff *skb)
{
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
@@ -2035,8 +2088,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb)
return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
}
-#endif
-
/*
* Allocate a dst for local (unicast / anycast) address.
*/
@@ -2046,12 +2097,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
bool anycast)
{
struct net *net = dev_net(idev->dev);
- struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL);
-
- if (!rt) {
- net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n");
+ struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev,
+ DST_NOCOUNT, NULL);
+ if (!rt)
return ERR_PTR(-ENOMEM);
- }
in6_dev_hold(idev);
@@ -2066,6 +2115,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
else
rt->rt6i_flags |= RTF_LOCAL;
+ rt->rt6i_gateway = *addr;
rt->rt6i_dst.addr = *addr;
rt->rt6i_dst.plen = 128;
rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 335363478bb..620d326e8fd 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr,
return false;
}
+/* Checks if an address matches an address on the tunnel interface.
+ * Used to detect the NAT of proto 41 packets and let them pass spoofing test.
+ * Long story:
+ * This function is called after we considered the packet as spoofed
+ * in is_spoofed_6rd.
+ * We may have a router that is doing NAT for proto 41 packets
+ * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb
+ * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd
+ * function will return true, dropping the packet.
+ * But, we can still check if is spoofed against the IP
+ * addresses associated with the interface.
+ */
+static bool only_dnatted(const struct ip_tunnel *tunnel,
+ const struct in6_addr *v6dst)
+{
+ int prefix_len;
+
+#ifdef CONFIG_IPV6_SIT_6RD
+ prefix_len = tunnel->ip6rd.prefixlen + 32
+ - tunnel->ip6rd.relay_prefixlen;
+#else
+ prefix_len = 48;
+#endif
+ return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev);
+}
+
+/* Returns true if a packet is spoofed */
+static bool packet_is_spoofed(struct sk_buff *skb,
+ const struct iphdr *iph,
+ struct ip_tunnel *tunnel)
+{
+ const struct ipv6hdr *ipv6h;
+
+ if (tunnel->dev->priv_flags & IFF_ISATAP) {
+ if (!isatap_chksrc(skb, iph, tunnel))
+ return true;
+
+ return false;
+ }
+
+ if (tunnel->dev->flags & IFF_POINTOPOINT)
+ return false;
+
+ ipv6h = ipv6_hdr(skb);
+
+ if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) {
+ net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+ }
+
+ if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr)))
+ return false;
+
+ if (only_dnatted(tunnel, &ipv6h->daddr))
+ return false;
+
+ net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n",
+ &iph->saddr, &ipv6h->saddr,
+ &iph->daddr, &ipv6h->daddr);
+ return true;
+}
+
static int ipip6_rcv(struct sk_buff *skb)
{
const struct iphdr *iph = ip_hdr(skb);
@@ -584,19 +648,9 @@ static int ipip6_rcv(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
skb->pkt_type = PACKET_HOST;
- if (tunnel->dev->priv_flags & IFF_ISATAP) {
- if (!isatap_chksrc(skb, iph, tunnel)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
- } else {
- if (is_spoofed_6rd(tunnel, iph->saddr,
- &ipv6_hdr(skb)->saddr) ||
- is_spoofed_6rd(tunnel, iph->daddr,
- &ipv6_hdr(skb)->daddr)) {
- tunnel->dev->stats.rx_errors++;
- goto out;
- }
+ if (packet_is_spoofed(skb, iph, tunnel)) {
+ tunnel->dev->stats.rx_errors++;
+ goto out;
}
__skb_tunnel_rx(skb, tunnel->dev);
@@ -713,7 +767,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -742,7 +796,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr);
if (neigh == NULL) {
- net_dbg_ratelimited("sit: nexthop == NULL\n");
+ net_dbg_ratelimited("nexthop == NULL\n");
goto tx_error;
}
@@ -865,7 +919,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
iph->ttl = iph6->hop_limit;
skb->ip_summed = CHECKSUM_NONE;
- ip_select_ident(iph, skb_dst(skb), NULL);
+ ip_select_ident(skb, skb_dst(skb), NULL);
iptunnel_xmit(skb, dev);
return NETDEV_TX_OK;
@@ -1453,6 +1507,15 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {
#endif
};
+static void ipip6_dellink(struct net_device *dev, struct list_head *head)
+{
+ struct net *net = dev_net(dev);
+ struct sit_net *sitn = net_generic(net, sit_net_id);
+
+ if (dev != sitn->fb_tunnel_dev)
+ unregister_netdevice_queue(dev, head);
+}
+
static struct rtnl_link_ops sit_link_ops __read_mostly = {
.kind = "sit",
.maxtype = IFLA_IPTUN_MAX,
@@ -1463,6 +1526,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {
.changelink = ipip6_changelink,
.get_size = ipip6_get_size,
.fill_info = ipip6_fill_info,
+ .dellink = ipip6_dellink,
};
static struct xfrm_tunnel sit_handler __read_mostly = {
@@ -1507,6 +1571,7 @@ static int __net_init sit_init_net(struct net *net)
goto err_alloc_dev;
}
dev_net_set(sitn->fb_tunnel_dev, net);
+ sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops;
err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev);
if (err)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 0a17ed9eaf3..66c718854e5 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1426,7 +1426,7 @@ ipv6_pktoptions:
if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
if (np->rxopt.bits.rxtclass)
- np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb));
+ np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb)) {
skb_set_owner_r(opt_skb, sk);
opt_skb = xchg(&np->pktoptions, opt_skb);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 42923b14dfa..6b298dc614e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -373,14 +373,11 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
int is_udp4;
bool slow;
- if (addr_len)
- *addr_len = sizeof(struct sockaddr_in6);
-
if (flags & MSG_ERRQUEUE)
- return ipv6_recv_error(sk, msg, len);
+ return ipv6_recv_error(sk, msg, len, addr_len);
if (np->rxpmtu && np->rxopt.bits.rxpmtu)
- return ipv6_recv_rxpmtu(sk, msg, len);
+ return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
@@ -461,7 +458,7 @@ try_again:
ipv6_iface_scope_id(&sin6->sin6_addr,
IP6CB(skb)->iif);
}
-
+ *addr_len = sizeof(*sin6);
}
if (is_udp4) {
if (inet->cmsg_flags)
@@ -955,11 +952,16 @@ static int udp_v6_push_pending_frames(struct sock *sk)
struct udphdr *uh;
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+ struct flowi6 *fl6;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
__wsum csum = 0;
+ if (up->pending == AF_INET)
+ return udp_push_pending_frames(sk);
+
+ fl6 = &inet->cork.fl.u.ip6;
+
/* Grab the skbuff where UDP header space exists. */
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index d3cfaf9c7a0..2f65b022627 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -85,7 +85,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
/* Check if there is enough headroom to insert fragment header. */
tnl_hlen = skb_tnl_header_len(skb);
- if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) {
+ if (skb_mac_header(skb) < skb->head + tnl_hlen + frag_hdr_sz) {
if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))
goto out;
}
@@ -108,7 +108,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);
fptr->nexthdr = nexthdr;
fptr->reserved = 0;
- ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb));
+ fptr->identification = skb_shinfo(skb)->ip6_frag_id;
/* Fragment the skb. ipv6 header and the remaining fields of the
* fragment header are updated in ipv6_gso_segment()
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index f547a47d381..e0897377b3b 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1823,8 +1823,6 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
if (skb->tstamp.tv64)
sk->sk_stamp = skb->tstamp;
- msg->msg_namelen = sizeof(*sipx);
-
if (sipx) {
sipx->sipx_family = AF_IPX;
sipx->sipx_port = ipx->ipx_source.sock;
@@ -1832,6 +1830,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net;
sipx->sipx_type = ipx->ipx_type;
sipx->sipx_zero = 0;
+ msg->msg_namelen = sizeof(*sipx);
}
rc = copied;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 0578d4fa00a..a5e62ef5715 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1385,8 +1385,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
IRDA_DEBUG(4, "%s()\n", __func__);
- msg->msg_namelen = 0;
-
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &err);
if (!skb)
@@ -1451,8 +1449,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
timeo = sock_rcvtimeo(sk, noblock);
- msg->msg_namelen = 0;
-
do {
int chunk;
struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue);
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index ae691651b72..276aa86f366 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1324,8 +1324,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
int err = 0;
u32 offset;
- msg->msg_namelen = 0;
-
if ((sk->sk_state == IUCV_DISCONN) &&
skb_queue_empty(&iucv->backlog_skb_q) &&
skb_queue_empty(&sk->sk_receive_queue) &&
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9da862070dd..66f51c5a8a3 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2081,6 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy *
pol->sadb_x_policy_type = IPSEC_POLICY_NONE;
}
pol->sadb_x_policy_dir = dir+1;
+ pol->sadb_x_policy_reserved = 0;
pol->sadb_x_policy_id = xp->index;
pol->sadb_x_policy_priority = xp->priority;
@@ -3137,7 +3138,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1;
+ pol->sadb_x_policy_reserved = 0;
pol->sadb_x_policy_id = xp->index;
+ pol->sadb_x_policy_priority = xp->priority;
/* Set sadb_comb's. */
if (x->id.proto == IPPROTO_AH)
@@ -3525,6 +3528,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
pol->sadb_x_policy_dir = dir + 1;
+ pol->sadb_x_policy_reserved = 0;
pol->sadb_x_policy_id = 0;
pol->sadb_x_policy_priority = 0;
@@ -3619,7 +3623,6 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
goto out;
- msg->msg_namelen = 0;
skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
if (skb == NULL)
goto out;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 6984c3a353c..8c27de2b4d5 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -115,6 +115,11 @@ struct l2tp_net {
static void l2tp_session_set_header_len(struct l2tp_session *session, int version);
static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
+{
+ return sk->sk_user_data;
+}
+
static inline struct l2tp_net *l2tp_pernet(struct net *net)
{
BUG_ON(!net);
@@ -507,7 +512,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk,
return 0;
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6) {
+ if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) {
if (!uh->check) {
LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n");
return 1;
@@ -1071,7 +1076,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
/* Queue the packet to IP for output */
skb->local_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (skb->sk->sk_family == PF_INET6)
+ if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
error = inet6_csk_xmit(skb, NULL);
else
#endif
@@ -1198,7 +1203,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
/* Calculate UDP checksum if configured to do so */
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6)
+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
l2tp_xmit_ipv6_csum(sk, skb, udp_len);
else
#endif
@@ -1247,10 +1252,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
*/
static void l2tp_tunnel_destruct(struct sock *sk)
{
- struct l2tp_tunnel *tunnel;
+ struct l2tp_tunnel *tunnel = l2tp_tunnel(sk);
struct l2tp_net *pn;
- tunnel = sk->sk_user_data;
if (tunnel == NULL)
goto end;
@@ -1618,7 +1622,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
}
/* Check if this socket has already been prepped */
- tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
+ tunnel = l2tp_tunnel(sk);
if (tunnel != NULL) {
/* This socket has already been prepped */
err = -EBUSY;
@@ -1647,6 +1651,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
if (cfg != NULL)
tunnel->debug = cfg->debug;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == PF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ if (ipv6_addr_v4mapped(&np->saddr) &&
+ ipv6_addr_v4mapped(&np->daddr)) {
+ struct inet_sock *inet = inet_sk(sk);
+
+ tunnel->v4mapped = true;
+ inet->inet_saddr = np->saddr.s6_addr32[3];
+ inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3];
+ inet->inet_daddr = np->daddr.s6_addr32[3];
+ } else {
+ tunnel->v4mapped = false;
+ }
+ }
+#endif
+
/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
tunnel->encap = encap;
if (encap == L2TP_ENCAPTYPE_UDP) {
@@ -1655,7 +1677,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy;
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == PF_INET6)
+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
udpv6_encap_enable();
else
#endif
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 485a490fd99..2f89d43877d 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -189,6 +189,9 @@ struct l2tp_tunnel {
struct sock *sock; /* Parent socket */
int fd; /* Parent fd, if tunnel socket
* was created by userspace */
+#if IS_ENABLED(CONFIG_IPV6)
+ bool v4mapped;
+#endif
struct work_struct del_work;
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 571db8dd229..da1a1cee1a0 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -518,9 +518,6 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
if (flags & MSG_OOB)
goto out;
- if (addr_len)
- *addr_len = sizeof(*sin);
-
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
goto out;
@@ -543,6 +540,7 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
sin->sin_port = 0;
memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+ *addr_len = sizeof(*sin);
}
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index b8a6039314e..e6e8408c9e3 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
*addr_len = sizeof(*lsa);
if (flags & MSG_ERRQUEUE)
- return ipv6_recv_error(sk, msg, len);
+ return ipv6_recv_error(sk, msg, len, addr_len);
skb = skb_recv_datagram(sk, flags, noblock, &err);
if (!skb)
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 8dec6876dc5..44441c0c503 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -197,8 +197,6 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sk->sk_state & PPPOX_BOUND)
goto end;
- msg->msg_namelen = 0;
-
err = 0;
skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
flags & MSG_DONTWAIT, &err);
@@ -353,7 +351,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
goto error_put_sess_tun;
}
+ local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
+ local_bh_enable();
sock_put(ps->tunnel_sock);
sock_put(sk);
@@ -422,7 +422,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
skb->data[0] = ppph[0];
skb->data[1] = ppph[1];
+ local_bh_disable();
l2tp_xmit_skb(session, skb, session->hdr_len);
+ local_bh_enable();
sock_put(sk_tun);
sock_put(sk);
@@ -1793,7 +1795,8 @@ static const struct proto_ops pppol2tp_ops = {
static const struct pppox_proto pppol2tp_proto = {
.create = pppol2tp_create,
- .ioctl = pppol2tp_ioctl
+ .ioctl = pppol2tp_ioctl,
+ .owner = THIS_MODULE,
};
#ifdef CONFIG_L2TP_V3
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 48aaa89253e..c3ee8054706 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -715,13 +715,11 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
unsigned long cpu_flags;
size_t copied = 0;
u32 peek_seq = 0;
- u32 *seq;
+ u32 *seq, skb_len;
unsigned long used;
int target; /* Read at least this many bytes */
long timeo;
- msg->msg_namelen = 0;
-
lock_sock(sk);
copied = -ENOTCONN;
if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN))
@@ -814,6 +812,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
}
continue;
found_ok_skb:
+ skb_len = skb->len;
/* Ok so how much can we use? */
used = skb->len - offset;
if (len < used)
@@ -846,7 +845,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
}
/* Partial read */
- if (used + offset < skb->len)
+ if (used + offset < skb_len)
continue;
} while (len > 0);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 4fdb306e42e..e922bf3f422 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -652,6 +652,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy,
if (sta->sdata->dev != dev)
continue;
+ sinfo.filled = 0;
+ sta_set_sinfo(sta, &sinfo);
i = 0;
ADD_STA_STATS(sta);
}
@@ -973,8 +975,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
IEEE80211_P2P_OPPPS_ENABLE_BIT;
err = ieee80211_assign_beacon(sdata, &params->beacon);
- if (err < 0)
+ if (err < 0) {
+ ieee80211_vif_release_channel(sdata);
return err;
+ }
changed |= err;
err = drv_start_ap(sdata->local, sdata);
@@ -983,6 +987,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
if (old)
kfree_rcu(old, rcu_head);
RCU_INIT_POINTER(sdata->u.ap.beacon, NULL);
+ ieee80211_vif_release_channel(sdata);
return err;
}
@@ -2354,8 +2359,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
- if (sdata->vif.type != NL80211_IFTYPE_STATION &&
- sdata->vif.type != NL80211_IFTYPE_MESH_POINT)
+ if (sdata->vif.type != NL80211_IFTYPE_STATION)
return -EOPNOTSUPP;
if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS))
@@ -2475,6 +2479,24 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work);
INIT_LIST_HEAD(&roc->dependents);
+ /*
+ * cookie is either the roc cookie (for normal roc)
+ * or the SKB (for mgmt TX)
+ */
+ if (!txskb) {
+ /* local->mtx protects this */
+ local->roc_cookie_counter++;
+ roc->cookie = local->roc_cookie_counter;
+ /* wow, you wrapped 64 bits ... more likely a bug */
+ if (WARN_ON(roc->cookie == 0)) {
+ roc->cookie = 1;
+ local->roc_cookie_counter++;
+ }
+ *cookie = roc->cookie;
+ } else {
+ *cookie = (unsigned long)txskb;
+ }
+
/* if there's one pending or we're scanning, queue this one */
if (!list_empty(&local->roc_list) ||
local->scanning || local->radar_detect_enabled)
@@ -2609,24 +2631,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
if (!queued)
list_add_tail(&roc->list, &local->roc_list);
- /*
- * cookie is either the roc cookie (for normal roc)
- * or the SKB (for mgmt TX)
- */
- if (!txskb) {
- /* local->mtx protects this */
- local->roc_cookie_counter++;
- roc->cookie = local->roc_cookie_counter;
- /* wow, you wrapped 64 bits ... more likely a bug */
- if (WARN_ON(roc->cookie == 0)) {
- roc->cookie = 1;
- local->roc_cookie_counter++;
- }
- *cookie = roc->cookie;
- } else {
- *cookie = (unsigned long)txskb;
- }
-
return 0;
}
@@ -3313,7 +3317,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev,
return -EINVAL;
}
band = chanctx_conf->def.chan->band;
- sta = sta_info_get(sdata, peer);
+ sta = sta_info_get_bss(sdata, peer);
if (sta) {
qos = test_sta_flag(sta, WLAN_STA_WME);
} else {
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 170f9a7fa31..3052672e37f 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1166,6 +1166,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_IBSS);
+ ieee80211_vif_release_channel(sdata);
synchronize_rcu();
kfree(presp);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9ca8e3278cc..92ef04c72c5 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -842,6 +842,8 @@ struct tpt_led_trigger {
* that the scan completed.
* @SCAN_ABORTED: Set for our scan work function when the driver reported
* a scan complete for an aborted scan.
+ * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being
+ * cancelled.
*/
enum {
SCAN_SW_SCANNING,
@@ -849,6 +851,7 @@ enum {
SCAN_ONCHANNEL_SCANNING,
SCAN_COMPLETED,
SCAN_ABORTED,
+ SCAN_HW_CANCELLED,
};
/**
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 98d20c0f6fe..514e90f470b 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1726,6 +1726,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local)
if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
dev_close(sdata->dev);
+ /*
+ * Close all AP_VLAN interfaces first, as otherwise they
+ * might be closed while the AP interface they belong to
+ * is closed, causing unregister_netdevice_many() to crash.
+ */
+ list_for_each_entry(sdata, &local->interfaces, list)
+ if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+ dev_close(sdata->dev);
+
mutex_lock(&local->iflist_mtx);
list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) {
list_del(&sdata->list);
diff --git a/net/mac80211/mesh_ps.c b/net/mac80211/mesh_ps.c
index 3b7bfc01ee3..ddda201832b 100644
--- a/net/mac80211/mesh_ps.c
+++ b/net/mac80211/mesh_ps.c
@@ -36,6 +36,7 @@ static struct sk_buff *mps_qos_null_get(struct sta_info *sta)
sdata->vif.addr);
nullfunc->frame_control = fc;
nullfunc->duration_id = 0;
+ nullfunc->seq_ctrl = 0;
/* no address resolution for this frame -> set addr 1 immediately */
memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
memset(skb_put(skb, 2), 0, 2); /* append QoS control field */
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 741448b3082..49bc2246bd8 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -31,10 +31,12 @@
#include "led.h"
#define IEEE80211_AUTH_TIMEOUT (HZ / 5)
+#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2)
#define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10)
#define IEEE80211_AUTH_MAX_TRIES 3
#define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5)
#define IEEE80211_ASSOC_TIMEOUT (HZ / 5)
+#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2)
#define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10)
#define IEEE80211_ASSOC_MAX_TRIES 3
@@ -237,8 +239,9 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
struct ieee80211_channel *channel,
const struct ieee80211_ht_operation *ht_oper,
const struct ieee80211_vht_operation *vht_oper,
- struct cfg80211_chan_def *chandef, bool verbose)
+ struct cfg80211_chan_def *chandef, bool tracking)
{
+ struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct cfg80211_chan_def vht_chandef;
u32 ht_cfreq, ret;
@@ -257,7 +260,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan,
channel->band);
/* check that channel matches the right operating channel */
- if (channel->center_freq != ht_cfreq) {
+ if (!tracking && channel->center_freq != ht_cfreq) {
/*
* It's possible that some APs are confused here;
* Netgear WNDR3700 sometimes reports 4 higher than
@@ -265,11 +268,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
* since we look at probe response/beacon data here
* it should be OK.
*/
- if (verbose)
- sdata_info(sdata,
- "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
- channel->center_freq, ht_cfreq,
- ht_oper->primary_chan, channel->band);
+ sdata_info(sdata,
+ "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n",
+ channel->center_freq, ht_cfreq,
+ ht_oper->primary_chan, channel->band);
ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT;
goto out;
}
@@ -308,6 +310,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
switch (vht_oper->chan_width) {
case IEEE80211_VHT_CHANWIDTH_USE_HT:
vht_chandef.width = chandef->width;
+ vht_chandef.center_freq1 = chandef->center_freq1;
break;
case IEEE80211_VHT_CHANWIDTH_80MHZ:
vht_chandef.width = NL80211_CHAN_WIDTH_80;
@@ -323,7 +326,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
channel->band);
break;
default:
- if (verbose)
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
sdata_info(sdata,
"AP VHT operation IE has invalid channel width (%d), disable VHT\n",
vht_oper->chan_width);
@@ -332,7 +335,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
}
if (!cfg80211_chandef_valid(&vht_chandef)) {
- if (verbose)
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
sdata_info(sdata,
"AP VHT information is invalid, disable VHT\n");
ret = IEEE80211_STA_DISABLE_VHT;
@@ -345,7 +348,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
}
if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) {
- if (verbose)
+ if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
sdata_info(sdata,
"AP VHT information doesn't match HT, disable VHT\n");
ret = IEEE80211_STA_DISABLE_VHT;
@@ -357,22 +360,53 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
ret = 0;
out:
+ /*
+ * When tracking the current AP, don't do any further checks if the
+ * new chandef is identical to the one we're currently using for the
+ * connection. This keeps us from playing ping-pong with regulatory,
+ * without it the following can happen (for example):
+ * - connect to an AP with 80 MHz, world regdom allows 80 MHz
+ * - AP advertises regdom US
+ * - CRDA loads regdom US with 80 MHz prohibited (old database)
+ * - the code below detects an unsupported channel, downgrades, and
+ * we disconnect from the AP in the caller
+ * - disconnect causes CRDA to reload world regdomain and the game
+ * starts anew.
+ * (see https://bugzilla.kernel.org/show_bug.cgi?id=70881)
+ *
+ * It seems possible that there are still scenarios with CSA or real
+ * bandwidth changes where a this could happen, but those cases are
+ * less common and wouldn't completely prevent using the AP.
+ */
+ if (tracking &&
+ cfg80211_chandef_identical(chandef, &sdata->vif.bss_conf.chandef))
+ return ret;
+
/* don't print the message below for VHT mismatch if VHT is disabled */
if (ret & IEEE80211_STA_DISABLE_VHT)
vht_chandef = *chandef;
+ /*
+ * Ignore the DISABLED flag when we're already connected and only
+ * tracking the APs beacon for bandwidth changes - otherwise we
+ * might get disconnected here if we connect to an AP, update our
+ * regulatory information based on the AP's country IE and the
+ * information we have is wrong/outdated and disables the channel
+ * that we're actually using for the connection to the AP.
+ */
while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef,
- IEEE80211_CHAN_DISABLED)) {
+ tracking ? 0 :
+ IEEE80211_CHAN_DISABLED)) {
if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) {
ret = IEEE80211_STA_DISABLE_HT |
IEEE80211_STA_DISABLE_VHT;
- goto out;
+ break;
}
ret |= chandef_downgrade(chandef);
}
- if (chandef->width != vht_chandef.width && verbose)
+ if (chandef->width != vht_chandef.width && !tracking)
sdata_info(sdata,
"capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n");
@@ -412,7 +446,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
/* calculate new channel (type) based on HT/VHT operation IEs */
flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper,
- vht_oper, &chandef, false);
+ vht_oper, &chandef, true);
/*
* Downgrade the new channel if we associated with restricted
@@ -3461,10 +3495,13 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata)
if (tx_flags == 0) {
auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT;
- ifmgd->auth_data->timeout_started = true;
+ auth_data->timeout_started = true;
run_again(ifmgd, auth_data->timeout);
} else {
- auth_data->timeout_started = false;
+ auth_data->timeout =
+ round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG);
+ auth_data->timeout_started = true;
+ run_again(ifmgd, auth_data->timeout);
}
return 0;
@@ -3501,7 +3538,11 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata)
assoc_data->timeout_started = true;
run_again(&sdata->u.mgd, assoc_data->timeout);
} else {
- assoc_data->timeout_started = false;
+ assoc_data->timeout =
+ round_jiffies_up(jiffies +
+ IEEE80211_ASSOC_TIMEOUT_LONG);
+ assoc_data->timeout_started = true;
+ run_again(&sdata->u.mgd, assoc_data->timeout);
}
return 0;
@@ -3906,7 +3947,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
cbss->channel,
ht_oper, vht_oper,
- &chandef, true);
+ &chandef, false);
sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss),
local->rx_chains);
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 7fc5d0d8149..34012620434 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -99,10 +99,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
}
mutex_unlock(&local->sta_mtx);
- /* remove all interfaces */
+ /* remove all interfaces that were created in the driver */
list_for_each_entry(sdata, &local->interfaces, list) {
- if (!ieee80211_sdata_running(sdata))
+ if (!ieee80211_sdata_running(sdata) ||
+ sdata->vif.type == NL80211_IFTYPE_AP_VLAN ||
+ sdata->vif.type == NL80211_IFTYPE_MONITOR)
continue;
+
drv_remove_interface(local, sdata);
}
diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c
index ac7ef5414bd..e6512e2ffd2 100644
--- a/net/mac80211/rc80211_minstrel.c
+++ b/net/mac80211/rc80211_minstrel.c
@@ -290,7 +290,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
struct minstrel_rate *msr, *mr;
unsigned int ndx;
bool mrr_capable;
- bool prev_sample = mi->prev_sample;
+ bool prev_sample;
int delta;
int sampling_ratio;
@@ -314,6 +314,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta,
(mi->sample_count + mi->sample_deferred / 2);
/* delta < 0: no sampling required */
+ prev_sample = mi->prev_sample;
mi->prev_sample = false;
if (delta < 0 || (!mrr_capable && prev_sample))
return;
diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 5b2d3012b98..f3bbea1eb9e 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -804,10 +804,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta,
sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES];
info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE;
+ rate->count = 1;
+
+ if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) {
+ int idx = sample_idx % ARRAY_SIZE(mp->cck_rates);
+ rate->idx = mp->cck_rates[idx];
+ rate->flags = 0;
+ return;
+ }
+
rate->idx = sample_idx % MCS_GROUP_RATES +
(sample_group->streams - 1) * MCS_GROUP_RATES;
rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags;
- rate->count = 1;
}
static void
@@ -820,6 +828,9 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi,
if (sband->band != IEEE80211_BAND_2GHZ)
return;
+ if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES))
+ return;
+
mi->cck_supported = 0;
mi->cck_supported_short = 0;
for (i = 0; i < 4; i++) {
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8e295262025..fae73b0ef14 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -864,7 +864,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
u16 sc;
u8 tid, ack_policy;
- if (!ieee80211_is_data_qos(hdr->frame_control))
+ if (!ieee80211_is_data_qos(hdr->frame_control) ||
+ is_multicast_ether_addr(hdr->addr1))
goto dont_reorder;
/*
@@ -932,8 +933,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx)
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb);
- /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */
- if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) {
+ /*
+ * Drop duplicate 802.11 retransmissions
+ * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery")
+ */
+ if (rx->skb->len >= 24 && rx->sta &&
+ !ieee80211_is_ctl(hdr->frame_control) &&
+ !ieee80211_is_qos_nullfunc(hdr->frame_control) &&
+ !is_multicast_ether_addr(hdr->addr1)) {
if (unlikely(ieee80211_has_retry(hdr->frame_control) &&
rx->sta->last_seq_ctrl[rx->seqno_idx] ==
hdr->seq_ctrl)) {
@@ -2996,6 +3003,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
case NL80211_IFTYPE_ADHOC:
if (!bssid)
return 0;
+ if (ether_addr_equal(sdata->vif.addr, hdr->addr2) ||
+ ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2))
+ return 0;
if (ieee80211_is_beacon(hdr->frame_control)) {
return 1;
} else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) {
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 99b103921a4..eb03337b654 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -202,6 +202,9 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
enum ieee80211_band band;
int i, ielen, n_chans;
+ if (test_bit(SCAN_HW_CANCELLED, &local->scanning))
+ return false;
+
do {
if (local->hw_scan_band == IEEE80211_NUM_BANDS)
return false;
@@ -878,7 +881,23 @@ void ieee80211_scan_cancel(struct ieee80211_local *local)
if (!local->scan_req)
goto out;
+ /*
+ * We have a scan running and the driver already reported completion,
+ * but the worker hasn't run yet or is stuck on the mutex - mark it as
+ * cancelled.
+ */
+ if (test_bit(SCAN_HW_SCANNING, &local->scanning) &&
+ test_bit(SCAN_COMPLETED, &local->scanning)) {
+ set_bit(SCAN_HW_CANCELLED, &local->scanning);
+ goto out;
+ }
+
if (test_bit(SCAN_HW_SCANNING, &local->scanning)) {
+ /*
+ * Make sure that __ieee80211_scan_completed doesn't trigger a
+ * scan on another band.
+ */
+ set_bit(SCAN_HW_CANCELLED, &local->scanning);
if (local->ops->cancel_hw_scan)
drv_cancel_hw_scan(local,
rcu_dereference_protected(local->scan_sdata,
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 11216bc13b2..0418777c361 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -339,6 +339,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
return NULL;
spin_lock_init(&sta->lock);
+ spin_lock_init(&sta->ps_lock);
INIT_WORK(&sta->drv_unblock_wk, sta_unblock);
INIT_WORK(&sta->ampdu_mlme.work, ieee80211_ba_session_work);
mutex_init(&sta->ampdu_mlme.mtx);
@@ -1045,6 +1046,8 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
skb_queue_head_init(&pending);
+ /* sync with ieee80211_tx_h_unicast_ps_buf */
+ spin_lock(&sta->ps_lock);
/* Send all buffered frames to the station */
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
int count = skb_queue_len(&pending), tmp;
@@ -1064,6 +1067,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
}
ieee80211_add_pending_skbs_fn(local, &pending, clear_sta_ps_flags, sta);
+ spin_unlock(&sta->ps_lock);
local->total_ps_buffered -= buffered;
@@ -1110,6 +1114,7 @@ static void ieee80211_send_null_response(struct ieee80211_sub_if_data *sdata,
memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN);
memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN);
memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN);
+ nullfunc->seq_ctrl = 0;
skb->priority = tid;
skb_set_queue_mapping(skb, ieee802_1d_to_ac[tid]);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index adc30045f99..3184b2b2853 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -244,6 +244,7 @@ struct sta_ampdu_mlme {
* @drv_unblock_wk: used for driver PS unblocking
* @listen_interval: listen interval of this station, when we're acting as AP
* @_flags: STA flags, see &enum ieee80211_sta_info_flags, do not use directly
+ * @ps_lock: used for powersave (when mac80211 is the AP) related locking
* @ps_tx_buf: buffers (per AC) of frames to transmit to this station
* when it leaves power saving state or polls
* @tx_filtered: buffers (per AC) of frames we already tried to
@@ -324,10 +325,8 @@ struct sta_info {
/* use the accessors defined below */
unsigned long _flags;
- /*
- * STA powersave frame queues, no more than the internal
- * locking required.
- */
+ /* STA powersave lock and frame queues */
+ spinlock_t ps_lock;
struct sk_buff_head ps_tx_buf[IEEE80211_NUM_ACS];
struct sk_buff_head tx_filtered[IEEE80211_NUM_ACS];
unsigned long driver_buffered_tids;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 43439203f4e..9e78206bd9b 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -180,6 +180,9 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb)
struct ieee80211_local *local = sta->local;
struct ieee80211_sub_if_data *sdata = sta->sdata;
+ if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
+ sta->last_rx = jiffies;
+
if (ieee80211_is_data_qos(mgmt->frame_control)) {
struct ieee80211_hdr *hdr = (void *) skb->data;
u8 *qc = ieee80211_get_qos_ctl(hdr);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 9972e07a2f9..6d5791d735f 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -447,7 +447,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
{
struct sta_info *sta = tx->sta;
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
- struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
struct ieee80211_local *local = tx->local;
if (unlikely(!sta))
@@ -458,19 +457,24 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
!(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) {
int ac = skb_get_queue_mapping(tx->skb);
- /* only deauth, disassoc and action are bufferable MMPDUs */
- if (ieee80211_is_mgmt(hdr->frame_control) &&
- !ieee80211_is_deauth(hdr->frame_control) &&
- !ieee80211_is_disassoc(hdr->frame_control) &&
- !ieee80211_is_action(hdr->frame_control)) {
- info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
- return TX_CONTINUE;
- }
-
ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n",
sta->sta.addr, sta->sta.aid, ac);
if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER)
purge_old_ps_buffers(tx->local);
+
+ /* sync with ieee80211_sta_ps_deliver_wakeup */
+ spin_lock(&sta->ps_lock);
+ /*
+ * STA woke up the meantime and all the frames on ps_tx_buf have
+ * been queued to pending queue. No reordering can happen, go
+ * ahead and Tx the packet.
+ */
+ if (!test_sta_flag(sta, WLAN_STA_PS_STA) &&
+ !test_sta_flag(sta, WLAN_STA_PS_DRIVER)) {
+ spin_unlock(&sta->ps_lock);
+ return TX_CONTINUE;
+ }
+
if (skb_queue_len(&sta->ps_tx_buf[ac]) >= STA_MAX_TX_BUFFER) {
struct sk_buff *old = skb_dequeue(&sta->ps_tx_buf[ac]);
ps_dbg(tx->sdata,
@@ -484,6 +488,7 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
info->control.vif = &tx->sdata->vif;
info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING;
skb_queue_tail(&sta->ps_tx_buf[ac], tx->skb);
+ spin_unlock(&sta->ps_lock);
if (!timer_pending(&local->sta_cleanup))
mod_timer(&local->sta_cleanup,
@@ -509,9 +514,22 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx)
static ieee80211_tx_result debug_noinline
ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx)
{
+ struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb);
+ struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data;
+
if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED))
return TX_CONTINUE;
+ /* only deauth, disassoc and action are bufferable MMPDUs */
+ if (ieee80211_is_mgmt(hdr->frame_control) &&
+ !ieee80211_is_deauth(hdr->frame_control) &&
+ !ieee80211_is_disassoc(hdr->frame_control) &&
+ !ieee80211_is_action(hdr->frame_control)) {
+ if (tx->flags & IEEE80211_TX_UNICAST)
+ info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER;
+ return TX_CONTINUE;
+ }
+
if (tx->flags & IEEE80211_TX_UNICAST)
return ieee80211_tx_h_unicast_ps_buf(tx);
else
@@ -851,7 +869,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx,
}
/* adjust first fragment's length */
- skb->len = hdrlen + per_fragm;
+ skb_trim(skb, hdrlen + per_fragm);
return 0;
}
@@ -1100,7 +1118,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
tx->sta = rcu_dereference(sdata->u.vlan.sta);
if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr)
return TX_DROP;
- } else if (info->flags & IEEE80211_TX_CTL_INJECTED ||
+ } else if (info->flags & (IEEE80211_TX_CTL_INJECTED |
+ IEEE80211_TX_INTFL_NL80211_FRAME_TX) ||
tx->sdata->control_port_protocol == tx->skb->protocol) {
tx->sta = sta_info_get_bss(sdata, hdr->addr1);
}
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 72e6292955b..5db8eb5d56c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2174,6 +2174,10 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
}
rate = cfg80211_calculate_bitrate(&ri);
+ if (WARN_ONCE(!rate,
+ "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n",
+ status->flag, status->rate_idx, status->vht_nss))
+ return 0;
/* rewind from end of MPDU */
if (status->flag & RX_FLAG_MACTIME_END)
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index afba19cb6f8..a282fddf8b0 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -153,6 +153,11 @@ u16 ieee80211_select_queue(struct ieee80211_sub_if_data *sdata,
return IEEE80211_AC_BE;
}
+ if (skb->protocol == sdata->control_port_protocol) {
+ skb->priority = 7;
+ return ieee80211_downgrade_queue(sdata, skb);
+ }
+
/* use the data classifier to determine what 802.1d tag the
* data frame has */
skb->priority = cfg80211_classify8021d(skb);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 57beb1762b2..707bc520d62 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -325,18 +325,22 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length)
static void
mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length)
{
- u8 i, j;
-
- for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++)
- ;
- h->nets[i].nets--;
-
- if (h->nets[i].nets != 0)
- return;
-
- for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) {
- h->nets[j].cidr = h->nets[j + 1].cidr;
- h->nets[j].nets = h->nets[j + 1].nets;
+ u8 i, j, net_end = nets_length - 1;
+
+ for (i = 0; i < nets_length; i++) {
+ if (h->nets[i].cidr != cidr)
+ continue;
+ if (h->nets[i].nets > 1 || i == net_end ||
+ h->nets[i + 1].nets == 0) {
+ h->nets[i].nets--;
+ return;
+ }
+ for (j = i; j < net_end && h->nets[j].nets; j++) {
+ h->nets[j].cidr = h->nets[j + 1].cidr;
+ h->nets[j].nets = h->nets[j + 1].nets;
+ }
+ h->nets[j].nets = 0;
+ return;
}
}
#endif
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 23b8eb53a56..21a3a475d7c 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -1131,12 +1131,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
ip_vs_fill_iph_skb(af, skb, &iph);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (!iph.fragoffs && skb_nfct_reasm(skb)) {
- struct sk_buff *reasm = skb_nfct_reasm(skb);
- /* Save fw mark for coming frags */
- reasm->ipvs_property = 1;
- reasm->mark = skb->mark;
- }
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
int verdict = ip_vs_out_icmp_v6(skb, &related,
@@ -1606,12 +1600,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
- if (!iph.fragoffs && skb_nfct_reasm(skb)) {
- struct sk_buff *reasm = skb_nfct_reasm(skb);
- /* Save fw mark for coming frags. */
- reasm->ipvs_property = 1;
- reasm->mark = skb->mark;
- }
if (unlikely(iph.protocol == IPPROTO_ICMPV6)) {
int related;
int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum,
@@ -1663,9 +1651,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
/* sorry, all this trouble for a no-hit :) */
IP_VS_DBG_PKT(12, af, pp, skb, 0,
"ip_vs_in: packet continues traversal as normal");
- if (iph.fragoffs && !skb_nfct_reasm(skb)) {
+ if (iph.fragoffs) {
/* Fragment that couldn't be mapped to a conn entry
- * and don't have any pointer to a reasm skb
* is missing module nf_defrag_ipv6
*/
IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n");
@@ -1748,38 +1735,6 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb,
#ifdef CONFIG_IP_VS_IPV6
/*
- * AF_INET6 fragment handling
- * Copy info from first fragment, to the rest of them.
- */
-static unsigned int
-ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct sk_buff *reasm = skb_nfct_reasm(skb);
- struct net *net;
-
- /* Skip if not a "replay" from nf_ct_frag6_output or first fragment.
- * ipvs_property is set when checking first fragment
- * in ip_vs_in() and ip_vs_out().
- */
- if (reasm)
- IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property);
- if (!reasm || !reasm->ipvs_property)
- return NF_ACCEPT;
-
- net = skb_net(skb);
- if (!net_ipvs(net)->enable)
- return NF_ACCEPT;
-
- /* Copy stored fw mark, saved in ip_vs_{in,out} */
- skb->mark = reasm->mark;
-
- return NF_ACCEPT;
-}
-
-/*
* AF_INET6 handler in NF_INET_LOCAL_IN chain
* Schedule and forward packets from remote clients
*/
@@ -1916,14 +1871,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
.priority = 100,
},
#ifdef CONFIG_IP_VS_IPV6
- /* After mangle & nat fetch 2:nd fragment and following */
- {
- .hook = ip_vs_preroute_frag6,
- .owner = THIS_MODULE,
- .pf = NFPROTO_IPV6,
- .hooknum = NF_INET_PRE_ROUTING,
- .priority = NF_IP6_PRI_NAT_DST + 1,
- },
/* After packet filtering, change source only for VS/NAT */
{
.hook = ip_vs_reply6,
diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c
index 9ef22bdce9f..bed5f704252 100644
--- a/net/netfilter/ipvs/ip_vs_pe_sip.c
+++ b/net/netfilter/ipvs/ip_vs_pe_sip.c
@@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff,
static int
ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
{
- struct sk_buff *reasm = skb_nfct_reasm(skb);
struct ip_vs_iphdr iph;
unsigned int dataoff, datalen, matchoff, matchlen;
const char *dptr;
@@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
/* todo: IPv6 fragments:
* I think this only should be done for the first fragment. /HS
*/
- if (reasm) {
- skb = reasm;
- dataoff = iph.thoff_reasm + sizeof(struct udphdr);
- } else
- dataoff = iph.len + sizeof(struct udphdr);
+ dataoff = iph.len + sizeof(struct udphdr);
if (dataoff >= skb->len)
return -EINVAL;
- /* todo: Check if this will mess-up the reasm skb !!! /HS */
retc = skb_linearize(skb);
if (retc < 0)
return retc;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index b75ff6429a0..c47444e4cf8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
iph->daddr = cp->daddr.ip;
iph->saddr = saddr;
iph->ttl = old_iph->ttl;
- ip_select_ident(iph, &rt->dst, NULL);
+ ip_select_ident(skb, &rt->dst, NULL);
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index bdebd03bc8c..70866d192ef 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src,
flowi6_to_flowi(&fl1), false)) {
if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
flowi6_to_flowi(&fl2), false)) {
- if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway,
- sizeof(rt1->rt6i_gateway)) &&
+ if (ipv6_addr_equal(rt6_nexthop(rt1),
+ rt6_nexthop(rt2)) &&
rt1->dst.dev == rt2->dst.dev)
ret = 1;
dst_release(&rt2->dst);
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index a99b6c3427b..59359bec328 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -428,7 +428,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb,
const char *msg;
u_int8_t state;
- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
+ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
BUG_ON(dh == NULL);
state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE];
@@ -486,7 +486,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb,
u_int8_t type, old_state, new_state;
enum ct_dccp_roles role;
- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
+ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
BUG_ON(dh == NULL);
type = dh->dccph_type;
@@ -577,7 +577,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl,
unsigned int cscov;
const char *msg;
- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &dh);
+ dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
if (dh == NULL) {
msg = "nf_ct_dccp: short packet ";
goto out_invalid;
diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c
index f02b3605823..1fb2258c353 100644
--- a/net/netfilter/nf_nat_irc.c
+++ b/net/netfilter/nf_nat_irc.c
@@ -34,10 +34,14 @@ static unsigned int help(struct sk_buff *skb,
struct nf_conntrack_expect *exp)
{
char buffer[sizeof("4294967296 65635")];
+ struct nf_conn *ct = exp->master;
+ union nf_inet_addr newaddr;
u_int16_t port;
unsigned int ret;
/* Reply comes from server. */
+ newaddr = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3;
+
exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = nf_nat_follow_master;
@@ -57,17 +61,35 @@ static unsigned int help(struct sk_buff *skb,
}
if (port == 0) {
- nf_ct_helper_log(skb, exp->master, "all ports in use");
+ nf_ct_helper_log(skb, ct, "all ports in use");
return NF_DROP;
}
- ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
- protoff, matchoff, matchlen, buffer,
- strlen(buffer));
+ /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
+ * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
+ * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
+ * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
+ * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
+ *
+ * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
+ * 255.255.255.255==4294967296, 10 digits)
+ * P: bound port (min 1 d, max 5d (65635))
+ * F: filename (min 1 d )
+ * S: size (min 1 d )
+ * 0x01, \n: terminators
+ */
+ /* AAA = "us", ie. where server normally talks to. */
+ snprintf(buffer, sizeof(buffer), "%u %u", ntohl(newaddr.ip), port);
+ pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n",
+ buffer, &newaddr.ip, port);
+
+ ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff,
+ matchlen, buffer, strlen(buffer));
if (ret != NF_ACCEPT) {
- nf_ct_helper_log(skb, exp->master, "cannot mangle packet");
+ nf_ct_helper_log(skb, ct, "cannot mangle packet");
nf_ct_unexpect_related(exp);
}
+
return ret;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 57ee84d2147..c9c2a8441d3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2168,8 +2168,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
}
#endif
- msg->msg_namelen = 0;
-
copied = data_skb->len;
if (len < copied) {
msg->msg_flags |= MSG_TRUNC;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2fd6dbea327..393f17eea1a 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -364,7 +364,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops)
EXPORT_SYMBOL(genl_unregister_ops);
/**
- * genl_register_family - register a generic netlink family
+ * __genl_register_family - register a generic netlink family
* @family: generic netlink family
*
* Registers the specified family after validating it first. Only one
@@ -374,7 +374,7 @@ EXPORT_SYMBOL(genl_unregister_ops);
*
* Return 0 on success or a negative error code.
*/
-int genl_register_family(struct genl_family *family)
+int __genl_register_family(struct genl_family *family)
{
int err = -EINVAL;
@@ -430,10 +430,10 @@ errout_locked:
errout:
return err;
}
-EXPORT_SYMBOL(genl_register_family);
+EXPORT_SYMBOL(__genl_register_family);
/**
- * genl_register_family_with_ops - register a generic netlink family
+ * __genl_register_family_with_ops - register a generic netlink family
* @family: generic netlink family
* @ops: operations to be registered
* @n_ops: number of elements to register
@@ -457,12 +457,12 @@ EXPORT_SYMBOL(genl_register_family);
*
* Return 0 on success or a negative error code.
*/
-int genl_register_family_with_ops(struct genl_family *family,
+int __genl_register_family_with_ops(struct genl_family *family,
struct genl_ops *ops, size_t n_ops)
{
int err, i;
- err = genl_register_family(family);
+ err = __genl_register_family(family);
if (err)
return err;
@@ -476,7 +476,7 @@ err_out:
genl_unregister_family(family);
return err;
}
-EXPORT_SYMBOL(genl_register_family_with_ops);
+EXPORT_SYMBOL(__genl_register_family_with_ops);
/**
* genl_unregister_family - unregister generic netlink family
@@ -544,6 +544,30 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq,
}
EXPORT_SYMBOL(genlmsg_put);
+static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct genl_ops *ops = cb->data;
+ int rc;
+
+ genl_lock();
+ rc = ops->dumpit(skb, cb);
+ genl_unlock();
+ return rc;
+}
+
+static int genl_lock_done(struct netlink_callback *cb)
+{
+ struct genl_ops *ops = cb->data;
+ int rc = 0;
+
+ if (ops->done) {
+ genl_lock();
+ rc = ops->done(cb);
+ genl_unlock();
+ }
+ return rc;
+}
+
static int genl_family_rcv_msg(struct genl_family *family,
struct sk_buff *skb,
struct nlmsghdr *nlh)
@@ -572,15 +596,34 @@ static int genl_family_rcv_msg(struct genl_family *family,
return -EPERM;
if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct netlink_dump_control c = {
- .dump = ops->dumpit,
- .done = ops->done,
- };
+ int rc;
if (ops->dumpit == NULL)
return -EOPNOTSUPP;
- return netlink_dump_start(net->genl_sock, skb, nlh, &c);
+ if (!family->parallel_ops) {
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .data = ops,
+ .dump = genl_lock_dumpit,
+ .done = genl_lock_done,
+ };
+
+ genl_unlock();
+ rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
+ genl_lock();
+
+ } else {
+ struct netlink_dump_control c = {
+ .module = family->module,
+ .dump = ops->dumpit,
+ .done = ops->done,
+ };
+
+ rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c);
+ }
+
+ return rc;
}
if (ops->doit == NULL)
@@ -877,8 +920,10 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
#ifdef CONFIG_MODULES
if (res == NULL) {
genl_unlock();
+ up_read(&cb_lock);
request_module("net-pf-%d-proto-%d-family-%s",
PF_NETLINK, NETLINK_GENERIC, name);
+ down_read(&cb_lock);
genl_lock();
res = genl_family_find_byname(name);
}
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ec0c80fde69..13b92982a50 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1179,10 +1179,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
sax->sax25_family = AF_NETROM;
skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call,
AX25_ADDR_LEN);
+ msg->msg_namelen = sizeof(*sax);
}
- msg->msg_namelen = sizeof(*sax);
-
skb_free_datagram(sk, skb);
release_sock(sk);
diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h
index ff8c434f7df..f924dd209b3 100644
--- a/net/nfc/llcp.h
+++ b/net/nfc/llcp.h
@@ -19,6 +19,7 @@
enum llcp_state {
LLCP_CONNECTED = 1, /* wait_for_packet() wants that */
+ LLCP_CONNECTING,
LLCP_CLOSED,
LLCP_BOUND,
LLCP_LISTEN,
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 380253eccb7..86470cf54ce 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -571,7 +571,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
if (sk->sk_shutdown == SHUTDOWN_MASK)
mask |= POLLHUP;
- if (sock_writeable(sk))
+ if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED)
mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
else
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
@@ -722,14 +722,16 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
if (ret)
goto sock_unlink;
+ sk->sk_state = LLCP_CONNECTING;
+
ret = sock_wait_state(sk, LLCP_CONNECTED,
sock_sndtimeo(sk, flags & O_NONBLOCK));
- if (ret)
+ if (ret && ret != -EINPROGRESS)
goto sock_unlink;
release_sock(sk);
- return 0;
+ return ret;
sock_unlink:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
@@ -798,8 +800,6 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
pr_debug("%p %zu\n", sk, len);
- msg->msg_namelen = 0;
-
lock_sock(sk);
if (sk->sk_state == LLCP_CLOSED &&
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 313bf1bc848..5d11f4ac3ec 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -241,8 +241,6 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock,
if (!skb)
return rc;
- msg->msg_namelen = 0;
-
copied = skb->len;
if (len < copied) {
msg->msg_flags |= MSG_TRUNC;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 20a1bd0e654..e8b5a0dfca2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -237,6 +237,30 @@ struct packet_skb_cb {
static void __fanout_unlink(struct sock *sk, struct packet_sock *po);
static void __fanout_link(struct sock *sk, struct packet_sock *po);
+static struct net_device *packet_cached_dev_get(struct packet_sock *po)
+{
+ struct net_device *dev;
+
+ rcu_read_lock();
+ dev = rcu_dereference(po->cached_dev);
+ if (likely(dev))
+ dev_hold(dev);
+ rcu_read_unlock();
+
+ return dev;
+}
+
+static void packet_cached_dev_assign(struct packet_sock *po,
+ struct net_device *dev)
+{
+ rcu_assign_pointer(po->cached_dev, dev);
+}
+
+static void packet_cached_dev_reset(struct packet_sock *po)
+{
+ RCU_INIT_POINTER(po->cached_dev, NULL);
+}
+
/* register_prot_hook must be invoked with the po->bind_lock held,
* or from a context in which asynchronous accesses to the packet
* socket is not possible (packet_create()).
@@ -244,11 +268,13 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
static void register_prot_hook(struct sock *sk)
{
struct packet_sock *po = pkt_sk(sk);
+
if (!po->running) {
if (po->fanout)
__fanout_link(sk, po);
else
dev_add_pack(&po->prot_hook);
+
sock_hold(sk);
po->running = 1;
}
@@ -266,10 +292,12 @@ static void __unregister_prot_hook(struct sock *sk, bool sync)
struct packet_sock *po = pkt_sk(sk);
po->running = 0;
+
if (po->fanout)
__fanout_unlink(sk, po);
else
__dev_remove_pack(&po->prot_hook);
+
__sock_put(sk);
if (sync) {
@@ -432,9 +460,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po,
pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc;
- spin_lock(&rb_queue->lock);
+ spin_lock_bh(&rb_queue->lock);
pkc->delete_blk_timer = 1;
- spin_unlock(&rb_queue->lock);
+ spin_unlock_bh(&rb_queue->lock);
prb_del_retire_blk_timer(pkc);
}
@@ -2046,7 +2074,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- bool need_rls_dev = false;
int err, reserve = 0;
void *ph;
struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
@@ -2058,8 +2085,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
mutex_lock(&po->pg_vec_lock);
- if (saddr == NULL) {
- dev = po->prot_hook.dev;
+ if (likely(saddr == NULL)) {
+ dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
} else {
@@ -2073,19 +2100,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
- need_rls_dev = true;
}
err = -ENXIO;
if (unlikely(dev == NULL))
goto out;
-
- reserve = dev->hard_header_len;
-
err = -ENETDOWN;
if (unlikely(!(dev->flags & IFF_UP)))
goto out_put;
+ reserve = dev->hard_header_len;
+
size_max = po->tx_ring.frame_size
- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
@@ -2162,8 +2187,7 @@ out_status:
__packet_set_status(po, ph, status);
kfree_skb(skb);
out_put:
- if (need_rls_dev)
- dev_put(dev);
+ dev_put(dev);
out:
mutex_unlock(&po->pg_vec_lock);
return err;
@@ -2201,7 +2225,6 @@ static int packet_snd(struct socket *sock,
struct sk_buff *skb;
struct net_device *dev;
__be16 proto;
- bool need_rls_dev = false;
unsigned char *addr;
int err, reserve = 0;
struct virtio_net_hdr vnet_hdr = { 0 };
@@ -2216,8 +2239,8 @@ static int packet_snd(struct socket *sock,
* Get and verify the address.
*/
- if (saddr == NULL) {
- dev = po->prot_hook.dev;
+ if (likely(saddr == NULL)) {
+ dev = packet_cached_dev_get(po);
proto = po->num;
addr = NULL;
} else {
@@ -2229,19 +2252,17 @@ static int packet_snd(struct socket *sock,
proto = saddr->sll_protocol;
addr = saddr->sll_addr;
dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
- need_rls_dev = true;
}
err = -ENXIO;
- if (dev == NULL)
+ if (unlikely(dev == NULL))
goto out_unlock;
- if (sock->type == SOCK_RAW)
- reserve = dev->hard_header_len;
-
err = -ENETDOWN;
- if (!(dev->flags & IFF_UP))
+ if (unlikely(!(dev->flags & IFF_UP)))
goto out_unlock;
+ if (sock->type == SOCK_RAW)
+ reserve = dev->hard_header_len;
if (po->has_vnet_hdr) {
vnet_hdr_len = sizeof(vnet_hdr);
@@ -2375,15 +2396,14 @@ static int packet_snd(struct socket *sock,
if (err > 0 && (err = net_xmit_errno(err)) != 0)
goto out_unlock;
- if (need_rls_dev)
- dev_put(dev);
+ dev_put(dev);
return len;
out_free:
kfree_skb(skb);
out_unlock:
- if (dev && need_rls_dev)
+ if (dev)
dev_put(dev);
out:
return err;
@@ -2428,6 +2448,8 @@ static int packet_release(struct socket *sock)
spin_lock(&po->bind_lock);
unregister_prot_hook(sk, false);
+ packet_cached_dev_reset(po);
+
if (po->prot_hook.dev) {
dev_put(po->prot_hook.dev);
po->prot_hook.dev = NULL;
@@ -2483,14 +2505,17 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc
spin_lock(&po->bind_lock);
unregister_prot_hook(sk, true);
+
po->num = protocol;
po->prot_hook.type = protocol;
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
- po->prot_hook.dev = dev;
+ po->prot_hook.dev = dev;
po->ifindex = dev ? dev->ifindex : 0;
+ packet_cached_dev_assign(po, dev);
+
if (protocol == 0)
goto out_unlock;
@@ -2604,6 +2629,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
sk->sk_family = PF_PACKET;
po->num = proto;
+ packet_cached_dev_reset(po);
+
sk->sk_destruct = packet_sock_destruct;
sk_refcnt_debug_inc(sk);
@@ -2694,7 +2721,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
- struct sockaddr_ll *sll;
int vnet_hdr_len = 0;
err = -EINVAL;
@@ -2777,22 +2803,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out_free;
}
- /*
- * If the address length field is there to be filled in, we fill
- * it in now.
+ /* You lose any data beyond the buffer you gave. If it worries
+ * a user program they can ask the device for its MTU
+ * anyway.
*/
-
- sll = &PACKET_SKB_CB(skb)->sa.ll;
- if (sock->type == SOCK_PACKET)
- msg->msg_namelen = sizeof(struct sockaddr_pkt);
- else
- msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
-
- /*
- * You lose any data beyond the buffer you gave. If it worries a
- * user program they can ask the device for its MTU anyway.
- */
-
copied = skb->len;
if (copied > len) {
copied = len;
@@ -2805,9 +2819,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
sock_recv_ts_and_drops(msg, sk, skb);
- if (msg->msg_name)
+ if (msg->msg_name) {
+ /* If the address length field is there to be filled
+ * in, we fill it in now.
+ */
+ if (sock->type == SOCK_PACKET) {
+ msg->msg_namelen = sizeof(struct sockaddr_pkt);
+ } else {
+ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+ msg->msg_namelen = sll->sll_halen +
+ offsetof(struct sockaddr_ll, sll_addr);
+ }
memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
msg->msg_namelen);
+ }
if (pkt_sk(sk)->auxdata) {
struct tpacket_auxdata aux;
@@ -3259,9 +3284,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
if (po->tp_version == TPACKET_V3) {
lv = sizeof(struct tpacket_stats_v3);
+ st.stats3.tp_packets += st.stats3.tp_drops;
data = &st.stats3;
} else {
lv = sizeof(struct tpacket_stats);
+ st.stats1.tp_packets += st.stats1.tp_drops;
data = &st.stats1;
}
@@ -3356,6 +3383,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void
sk->sk_error_report(sk);
}
if (msg == NETDEV_UNREGISTER) {
+ packet_cached_dev_reset(po);
po->ifindex = -1;
if (po->prot_hook.dev)
dev_put(po->prot_hook.dev);
diff --git a/net/packet/internal.h b/net/packet/internal.h
index c4e4b456120..1035fa2d909 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -113,6 +113,7 @@ struct packet_sock {
unsigned int tp_loss:1;
unsigned int tp_tx_has_off:1;
unsigned int tp_tstamp;
+ struct net_device __rcu *cached_dev;
struct packet_type prot_hook ____cacheline_aligned_in_smp;
};
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 12c30f3e643..38946b26e47 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -139,9 +139,6 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
MSG_CMSG_COMPAT))
goto out_nofree;
- if (addr_len)
- *addr_len = sizeof(sa);
-
skb = skb_recv_datagram(sk, flags, noblock, &rval);
if (skb == NULL)
goto out_nofree;
@@ -162,8 +159,10 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
rval = (flags & MSG_TRUNC) ? skb->len : copylen;
- if (msg->msg_name != NULL)
- memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn));
+ if (msg->msg_name != NULL) {
+ memcpy(msg->msg_name, &sa, sizeof(sa));
+ *addr_len = sizeof(sa);
+ }
out:
skb_free_datagram(sk, skb);
diff --git a/net/rds/ib.c b/net/rds/ib.c
index b4c8b0022fe..ba2dffeff60 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 addr)
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
/* due to this, we will claim to support iWARP devices unless we
check node_type. */
- if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA)
+ if (ret || !cm_id->device ||
+ cm_id->device->node_type != RDMA_NODE_IB_CA)
ret = -EADDRNOTAVAIL;
rdsdebug("addr %pI4 ret %d node type %d\n",
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index 8eb9501e3d6..b7ebe23cded 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -421,8 +421,7 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
struct rds_ib_refill_cache *cache)
{
unsigned long flags;
- struct list_head *old;
- struct list_head __percpu *chpfirst;
+ struct list_head *old, *chpfirst;
local_irq_save(flags);
@@ -432,7 +431,7 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
else /* put on front */
list_add_tail(new_item, chpfirst);
- __this_cpu_write(chpfirst, new_item);
+ __this_cpu_write(cache->percpu->first, new_item);
__this_cpu_inc(cache->percpu->count);
if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT)
@@ -452,7 +451,7 @@ static void rds_ib_recv_cache_put(struct list_head *new_item,
} while (old);
- __this_cpu_write(chpfirst, NULL);
+ __this_cpu_write(cache->percpu->first, NULL);
__this_cpu_write(cache->percpu->count, 0);
end:
local_irq_restore(flags);
diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c
index e5909498117..37be6e226d1 100644
--- a/net/rds/ib_send.c
+++ b/net/rds/ib_send.c
@@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm,
&& rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) {
rds_cong_map_updated(conn->c_fcong, ~(u64) 0);
scat = &rm->data.op_sg[sg];
- ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES;
- ret = min_t(int, ret, scat->length - conn->c_xmit_data_off);
- return ret;
+ ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length);
+ return sizeof(struct rds_header) + ret;
}
/* FIXME we may overallocate here */
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 7826d46baa7..589935661d6 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -239,7 +239,8 @@ static int rds_iw_laddr_check(__be32 addr)
ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
/* due to this, we will claim to support IB devices unless we
check node_type. */
- if (ret || cm_id->device->node_type != RDMA_NODE_RNIC)
+ if (ret || !cm_id->device ||
+ cm_id->device->node_type != RDMA_NODE_RNIC)
ret = -EADDRNOTAVAIL;
rdsdebug("addr %pI4 ret %d node type %d\n",
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 9f0f17cf6bf..de339b24ca1 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo);
- msg->msg_namelen = 0;
-
if (msg_flags & MSG_OOB)
goto out;
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9c834745159..27e6896705e 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1216,7 +1216,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
{
struct sock *sk = sock->sk;
struct rose_sock *rose = rose_sk(sk);
- struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name;
size_t copied;
unsigned char *asmptr;
struct sk_buff *skb;
@@ -1252,24 +1251,19 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
- if (srose != NULL) {
- memset(srose, 0, msg->msg_namelen);
+ if (msg->msg_name) {
+ struct sockaddr_rose *srose;
+ struct full_sockaddr_rose *full_srose = msg->msg_name;
+
+ memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose));
+ srose = msg->msg_name;
srose->srose_family = AF_ROSE;
srose->srose_addr = rose->dest_addr;
srose->srose_call = rose->dest_call;
srose->srose_ndigis = rose->dest_ndigis;
- if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) {
- struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name;
- for (n = 0 ; n < rose->dest_ndigis ; n++)
- full_srose->srose_digis[n] = rose->dest_digis[n];
- msg->msg_namelen = sizeof(struct full_sockaddr_rose);
- } else {
- if (rose->dest_ndigis >= 1) {
- srose->srose_ndigis = 1;
- srose->srose_digi = rose->dest_digis[0];
- }
- msg->msg_namelen = sizeof(struct sockaddr_rose);
- }
+ for (n = 0 ; n < rose->dest_ndigis ; n++)
+ full_srose->srose_digis[n] = rose->dest_digis[n];
+ msg->msg_namelen = sizeof(struct full_sockaddr_rose);
}
skb_free_datagram(sk, skb);
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 4b48687c389..898492a8d61 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
/* copy the peer address and timestamp */
if (!continue_call) {
- if (msg->msg_name && msg->msg_namelen > 0)
+ if (msg->msg_name) {
+ size_t len =
+ sizeof(call->conn->trans->peer->srx);
memcpy(msg->msg_name,
- &call->conn->trans->peer->srx,
- sizeof(call->conn->trans->peer->srx));
+ &call->conn->trans->peer->srx, len);
+ msg->msg_namelen = len;
+ }
sock_recv_ts_and_drops(msg, &rx->sk, skb);
}
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 281c1bded1f..51b968d3feb 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
return q;
}
+/* The linklayer setting were not transferred from iproute2, in older
+ * versions, and the rate tables lookup systems have been dropped in
+ * the kernel. To keep backward compatible with older iproute2 tc
+ * utils, we detect the linklayer setting by detecting if the rate
+ * table were modified.
+ *
+ * For linklayer ATM table entries, the rate table will be aligned to
+ * 48 bytes, thus some table entries will contain the same value. The
+ * mpu (min packet unit) is also encoded into the old rate table, thus
+ * starting from the mpu, we find low and high table entries for
+ * mapping this cell. If these entries contain the same value, when
+ * the rate tables have been modified for linklayer ATM.
+ *
+ * This is done by rounding mpu to the nearest 48 bytes cell/entry,
+ * and then roundup to the next cell, calc the table entry one below,
+ * and compare.
+ */
+static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab)
+{
+ int low = roundup(r->mpu, 48);
+ int high = roundup(low+1, 48);
+ int cell_low = low >> r->cell_log;
+ int cell_high = (high >> r->cell_log) - 1;
+
+ /* rtab is too inaccurate at rates > 100Mbit/s */
+ if ((r->rate > (100000000/8)) || (rtab[0] == 0)) {
+ pr_debug("TC linklayer: Giving up ATM detection\n");
+ return TC_LINKLAYER_ETHERNET;
+ }
+
+ if ((cell_high > cell_low) && (cell_high < 256)
+ && (rtab[cell_low] == rtab[cell_high])) {
+ pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n",
+ cell_low, cell_high, rtab[cell_high]);
+ return TC_LINKLAYER_ATM;
+ }
+ return TC_LINKLAYER_ETHERNET;
+}
+
static struct qdisc_rate_table *qdisc_rtab_list;
struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
@@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta
rtab->rate = *r;
rtab->refcnt = 1;
memcpy(rtab->data, nla_data(tab), 1024);
+ if (r->linklayer == TC_LINKLAYER_UNAWARE)
+ r->linklayer = __detect_linklayer(r, rtab->data);
rtab->next = qdisc_rtab_list;
qdisc_rtab_list = rtab;
}
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index ca8e0a57d94..1f9c31411f1 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -605,6 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
struct sockaddr_atmpvc pvc;
int state;
+ memset(&pvc, 0, sizeof(pvc));
pvc.sap_family = AF_ATMPVC;
pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
pvc.sap_addr.vpi = flow->vcc->vpi;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 1bc210ffcba..8ec15988b5f 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1465,6 +1465,7 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
unsigned char *b = skb_tail_pointer(skb);
struct tc_cbq_wrropt opt;
+ memset(&opt, 0, sizeof(opt));
opt.flags = 0;
opt.allot = cl->allot;
opt.priority = cl->priority + 1;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 20224086cc2..a7f838b45dc 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -908,6 +908,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r,
memset(r, 0, sizeof(*r));
r->overhead = conf->overhead;
r->rate_bps = (u64)conf->rate << 3;
+ r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
r->mult = 1;
/*
* Calibrate mult, shift so that token counting is accurate
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index adaedd79389..e09b074bb8a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -87,7 +87,7 @@ struct htb_class {
unsigned int children;
struct htb_class *parent; /* parent class */
- int prio; /* these two are used only by leaves... */
+ u32 prio; /* these two are used only by leaves... */
int quantum; /* but stored for parent-to-leaf return */
union {
@@ -1312,6 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = (struct htb_class *)*arg, *parent;
struct nlattr *opt = tca[TCA_OPTIONS];
+ struct qdisc_rate_table *rtab = NULL, *ctab = NULL;
struct nlattr *tb[TCA_HTB_MAX + 1];
struct tc_htb_opt *hopt;
@@ -1333,6 +1334,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
if (!hopt->rate.rate || !hopt->ceil.rate)
goto failure;
+ /* Keeping backward compatible with rate_table based iproute2 tc */
+ if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) {
+ rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]);
+ if (rtab)
+ qdisc_put_rtab(rtab);
+ }
+ if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) {
+ ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]);
+ if (ctab)
+ qdisc_put_rtab(ctab);
+ }
+
if (!cl) { /* new class */
struct Qdisc *new_q;
int prio;
@@ -1463,7 +1476,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
- cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
+ cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
sch_tree_unlock(sch);
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index d51852bba01..57922524f0c 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -113,7 +113,6 @@
#define FRAC_BITS 30 /* fixed point arithmetic */
#define ONE_FP (1UL << FRAC_BITS)
-#define IWSUM (ONE_FP/QFQ_MAX_WSUM)
#define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */
#define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */
@@ -189,6 +188,7 @@ struct qfq_sched {
struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */
u32 num_active_agg; /* Num. of active aggregates */
u32 wsum; /* weight sum */
+ u32 iwsum; /* inverse weight sum */
unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */
struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */
@@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg,
q->wsum +=
(int) agg->class_weight * (new_num_classes - agg->num_classes);
+ q->iwsum = ONE_FP / q->wsum;
agg->num_classes = new_num_classes;
}
@@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg)
{
if (!hlist_unhashed(&agg->nonfull_next))
hlist_del_init(&agg->nonfull_next);
+ q->wsum -= agg->class_weight;
+ if (q->wsum != 0)
+ q->iwsum = ONE_FP / q->wsum;
+
if (q->in_serv_agg == agg)
q->in_serv_agg = qfq_choose_next_agg(q);
kfree(agg);
@@ -827,38 +832,60 @@ static void qfq_make_eligible(struct qfq_sched *q)
}
}
-
/*
- * The index of the slot in which the aggregate is to be inserted must
- * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1'
- * because the start time of the group may be moved backward by one
- * slot after the aggregate has been inserted, and this would cause
- * non-empty slots to be right-shifted by one position.
+ * The index of the slot in which the input aggregate agg is to be
+ * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2'
+ * and not a '-1' because the start time of the group may be moved
+ * backward by one slot after the aggregate has been inserted, and
+ * this would cause non-empty slots to be right-shifted by one
+ * position.
+ *
+ * QFQ+ fully satisfies this bound to the slot index if the parameters
+ * of the classes are not changed dynamically, and if QFQ+ never
+ * happens to postpone the service of agg unjustly, i.e., it never
+ * happens that the aggregate becomes backlogged and eligible, or just
+ * eligible, while an aggregate with a higher approximated finish time
+ * is being served. In particular, in this case QFQ+ guarantees that
+ * the timestamps of agg are low enough that the slot index is never
+ * higher than 2. Unfortunately, QFQ+ cannot provide the same
+ * guarantee if it happens to unjustly postpone the service of agg, or
+ * if the parameters of some class are changed.
+ *
+ * As for the first event, i.e., an out-of-order service, the
+ * upper bound to the slot index guaranteed by QFQ+ grows to
+ * 2 +
+ * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) *
+ * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1.
*
- * If the weight and lmax (max_pkt_size) of the classes do not change,
- * then QFQ+ does meet the above contraint according to the current
- * values of its parameters. In fact, if the weight and lmax of the
- * classes do not change, then, from the theory, QFQ+ guarantees that
- * the slot index is never higher than
- * 2 + QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) *
- * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM) = 2 + 8 * 128 * (1 / 64) = 18
+ * The following function deals with this problem by backward-shifting
+ * the timestamps of agg, if needed, so as to guarantee that the slot
+ * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may
+ * cause the service of other aggregates to be postponed, yet the
+ * worst-case guarantees of these aggregates are not violated. In
+ * fact, in case of no out-of-order service, the timestamps of agg
+ * would have been even lower than they are after the backward shift,
+ * because QFQ+ would have guaranteed a maximum value equal to 2 for
+ * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose
+ * service is postponed because of the backward-shift would have
+ * however waited for the service of agg before being served.
*
- * When the weight of a class is increased or the lmax of the class is
- * decreased, a new aggregate with smaller slot size than the original
- * parent aggregate of the class may happen to be activated. The
- * activation of this aggregate should be properly delayed to when the
- * service of the class has finished in the ideal system tracked by
- * QFQ+. If the activation of the aggregate is not delayed to this
- * reference time instant, then this aggregate may be unjustly served
- * before other aggregates waiting for service. This may cause the
- * above bound to the slot index to be violated for some of these
- * unlucky aggregates.
+ * The other event that may cause the slot index to be higher than 2
+ * for agg is a recent change of the parameters of some class. If the
+ * weight of a class is increased or the lmax (max_pkt_size) of the
+ * class is decreased, then a new aggregate with smaller slot size
+ * than the original parent aggregate of the class may happen to be
+ * activated. The activation of this aggregate should be properly
+ * delayed to when the service of the class has finished in the ideal
+ * system tracked by QFQ+. If the activation of the aggregate is not
+ * delayed to this reference time instant, then this aggregate may be
+ * unjustly served before other aggregates waiting for service. This
+ * may cause the above bound to the slot index to be violated for some
+ * of these unlucky aggregates.
*
* Instead of delaying the activation of the new aggregate, which is
- * quite complex, the following inaccurate but simple solution is used:
- * if the slot index is higher than QFQ_MAX_SLOTS-2, then the
- * timestamps of the aggregate are shifted backward so as to let the
- * slot index become equal to QFQ_MAX_SLOTS-2.
+ * quite complex, the above-discussed capping of the slot index is
+ * used to handle also the consequences of a change of the parameters
+ * of a class.
*/
static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg,
u64 roundedS)
@@ -1077,7 +1104,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
else
in_serv_agg->budget -= len;
- q->V += (u64)len * IWSUM;
+ q->V += (u64)len * q->iwsum;
pr_debug("qfq dequeue: len %u F %lld now %lld\n",
len, (unsigned long long) in_serv_agg->F,
(unsigned long long) q->V);
diff --git a/net/sctp/input.c b/net/sctp/input.c
index 4b2c83146aa..bd4fb459c63 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -648,8 +648,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info)
break;
case ICMP_REDIRECT:
sctp_icmp_redirect(sk, transport, skb);
- err = 0;
- break;
+ /* Fall through to out_unlock. */
default:
goto out_unlock;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 391a245d520..422d8bdacc0 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -189,7 +189,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
break;
case NDISC_REDIRECT:
sctp_icmp_redirect(sk, transport, skb);
- break;
+ goto out_unlock;
default:
break;
}
@@ -210,45 +210,24 @@ out:
in6_dev_put(idev);
}
-/* Based on tcp_v6_xmit() in tcp_ipv6.c. */
static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
{
struct sock *sk = skb->sk;
struct ipv6_pinfo *np = inet6_sk(sk);
- struct flowi6 fl6;
-
- memset(&fl6, 0, sizeof(fl6));
-
- fl6.flowi6_proto = sk->sk_protocol;
-
- /* Fill in the dest address from the route entry passed with the skb
- * and the source address from the transport.
- */
- fl6.daddr = transport->ipaddr.v6.sin6_addr;
- fl6.saddr = transport->saddr.v6.sin6_addr;
-
- fl6.flowlabel = np->flow_label;
- IP6_ECN_flow_xmit(sk, fl6.flowlabel);
- if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL)
- fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id;
- else
- fl6.flowi6_oif = sk->sk_bound_dev_if;
-
- if (np->opt && np->opt->srcrt) {
- struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
- fl6.daddr = *rt0->addr;
- }
+ struct flowi6 *fl6 = &transport->fl.u.ip6;
SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n",
__func__, skb, skb->len,
- &fl6.saddr, &fl6.daddr);
+ &fl6->saddr, &fl6->daddr);
- SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
+ IP6_ECN_flow_xmit(sk, fl6->flowlabel);
if (!(transport->param_flags & SPP_PMTUD_ENABLE))
skb->local_df = 1;
- return ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS);
+
+ return ip6_xmit(sk, skb, fl6, np->opt, np->tclass);
}
/* Returns the dst cache entry for the given source and destination ip
@@ -261,10 +240,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
struct dst_entry *dst = NULL;
struct flowi6 *fl6 = &fl->u.ip6;
struct sctp_bind_addr *bp;
+ struct ipv6_pinfo *np = inet6_sk(sk);
struct sctp_sockaddr_entry *laddr;
union sctp_addr *baddr = NULL;
union sctp_addr *daddr = &t->ipaddr;
union sctp_addr dst_saddr;
+ struct in6_addr *final_p, final;
__u8 matchlen = 0;
__u8 bmatchlen;
sctp_scope_t scope;
@@ -287,7 +268,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr);
}
- dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
if (!asoc || saddr)
goto out;
@@ -339,10 +321,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
}
}
rcu_read_unlock();
+
if (baddr) {
fl6->saddr = baddr->v6.sin6_addr;
fl6->fl6_sport = baddr->v6.sin6_port;
- dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
+ final_p = fl6_update_dst(fl6, np->opt, &final);
+ dst = ip6_dst_lookup_flow(sk, fl6, final_p, false);
}
out:
diff --git a/net/sctp/output.c b/net/sctp/output.c
index bbef4a7a9b5..0beb2f9c8a7 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -547,7 +547,8 @@ int sctp_packet_transmit(struct sctp_packet *packet)
* by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>.
*/
if (!sctp_checksum_disable) {
- if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) {
+ if (!(dst->dev->features & NETIF_F_SCTP_CSUM) ||
+ (dst_xfrm(dst) != NULL) || packet->ipfragok) {
__u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len);
/* 3) Put the resultant value into the checksum field in the
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index cf579e71cff..673921cfb97 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1403,8 +1403,8 @@ static void sctp_chunk_destroy(struct sctp_chunk *chunk)
BUG_ON(!list_empty(&chunk->list));
list_del_init(&chunk->transmitted_list);
- /* Free the chunk skb data and the SCTP_chunk stub itself. */
- dev_kfree_skb(chunk->skb);
+ consume_skb(chunk->skb);
+ consume_skb(chunk->auth_chunk);
SCTP_DBG_OBJCNT_DEC(chunk);
kmem_cache_free(sctp_chunk_cachep, chunk);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index de1a0138317..9973079401c 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -765,6 +765,12 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
struct sctp_chunk auth;
sctp_ierror_t ret;
+ /* Make sure that we and the peer are AUTH capable */
+ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) {
+ sctp_association_free(new_asoc);
+ return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
+ }
+
/* set-up our fake chunk so that we can process it */
auth.skb = chunk->auth_chunk;
auth.asoc = chunk->asoc;
@@ -775,10 +781,6 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net,
auth.transport = chunk->transport;
ret = sctp_sf_authenticate(net, ep, new_asoc, type, &auth);
-
- /* We can now safely free the auth_chunk clone */
- kfree_skb(chunk->auth_chunk);
-
if (ret != SCTP_IERROR_NO_ERROR) {
sctp_association_free(new_asoc);
return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6abb1caf983..8554e5eebae 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -71,6 +71,7 @@
#include <linux/crypto.h>
#include <linux/slab.h>
#include <linux/file.h>
+#include <linux/compat.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -820,6 +821,9 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
goto skip_mkasconf;
}
+ if (laddr == NULL)
+ return -EINVAL;
+
/* We do not need RCU protection throughout this loop
* because this is done under a socket lock from the
* setsockopt call.
@@ -1381,11 +1385,19 @@ SCTP_STATIC int sctp_setsockopt_connectx(struct sock* sk,
/*
* New (hopefully final) interface for the API.
* We use the sctp_getaddrs_old structure so that use-space library
- * can avoid any unnecessary allocations. The only defferent part
+ * can avoid any unnecessary allocations. The only different part
* is that we store the actual length of the address buffer into the
- * addrs_num structure member. That way we can re-use the existing
+ * addrs_num structure member. That way we can re-use the existing
* code.
*/
+#ifdef CONFIG_COMPAT
+struct compat_sctp_getaddrs_old {
+ sctp_assoc_t assoc_id;
+ s32 addr_num;
+ compat_uptr_t addrs; /* struct sockaddr * */
+};
+#endif
+
SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len,
char __user *optval,
int __user *optlen)
@@ -1394,16 +1406,30 @@ SCTP_STATIC int sctp_getsockopt_connectx3(struct sock* sk, int len,
sctp_assoc_t assoc_id = 0;
int err = 0;
- if (len < sizeof(param))
- return -EINVAL;
+#ifdef CONFIG_COMPAT
+ if (is_compat_task()) {
+ struct compat_sctp_getaddrs_old param32;
- if (copy_from_user(&param, optval, sizeof(param)))
- return -EFAULT;
+ if (len < sizeof(param32))
+ return -EINVAL;
+ if (copy_from_user(&param32, optval, sizeof(param32)))
+ return -EFAULT;
- err = __sctp_setsockopt_connectx(sk,
- (struct sockaddr __user *)param.addrs,
- param.addr_num, &assoc_id);
+ param.assoc_id = param32.assoc_id;
+ param.addr_num = param32.addr_num;
+ param.addrs = compat_ptr(param32.addrs);
+ } else
+#endif
+ {
+ if (len < sizeof(param))
+ return -EINVAL;
+ if (copy_from_user(&param, optval, sizeof(param)))
+ return -EFAULT;
+ }
+ err = __sctp_setsockopt_connectx(sk, (struct sockaddr __user *)
+ param.addrs, param.addr_num,
+ &assoc_id);
if (err == 0 || err == -EINPROGRESS) {
if (copy_to_user(optval, &assoc_id, sizeof(assoc_id)))
return -EFAULT;
@@ -6193,7 +6219,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
/* Is there any exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
mask |= POLLERR |
- sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
+ (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
if (sk->sk_shutdown & RCV_SHUTDOWN)
mask |= POLLRDHUP | POLLIN | POLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
diff --git a/net/socket.c b/net/socket.c
index 4ca1526db75..fc90b4f0da3 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -215,12 +215,13 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen,
int err;
int len;
+ BUG_ON(klen > sizeof(struct sockaddr_storage));
err = get_user(len, ulen);
if (err)
return err;
if (len > klen)
len = klen;
- if (len < 0 || len > sizeof(struct sockaddr_storage))
+ if (len < 0)
return -EINVAL;
if (len) {
if (audit_sockaddr(klen, kaddr))
@@ -1832,8 +1833,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
msg.msg_iov = &iov;
iov.iov_len = size;
iov.iov_base = ubuf;
- msg.msg_name = (struct sockaddr *)&address;
- msg.msg_namelen = sizeof(address);
+ /* Save some cycles and don't copy the address if not needed */
+ msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
+ /* We assume all kernel code knows the size of sockaddr_storage */
+ msg.msg_namelen = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, size, flags);
@@ -1956,6 +1959,20 @@ struct used_address {
unsigned int name_len;
};
+static int copy_msghdr_from_user(struct msghdr *kmsg,
+ struct msghdr __user *umsg)
+{
+ if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
+ return -EFAULT;
+
+ if (kmsg->msg_namelen < 0)
+ return -EINVAL;
+
+ if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
+ kmsg->msg_namelen = sizeof(struct sockaddr_storage);
+ return 0;
+}
+
static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
struct msghdr *msg_sys, unsigned int flags,
struct used_address *used_address)
@@ -1974,8 +1991,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(msg_sys, msg_compat))
return -EFAULT;
- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
- return -EFAULT;
+ } else {
+ err = copy_msghdr_from_user(msg_sys, msg);
+ if (err)
+ return err;
+ }
if (msg_sys->msg_iovlen > UIO_FASTIOV) {
err = -EMSGSIZE;
@@ -2183,8 +2203,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
if (MSG_CMSG_COMPAT & flags) {
if (get_compat_msghdr(msg_sys, msg_compat))
return -EFAULT;
- } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr)))
- return -EFAULT;
+ } else {
+ err = copy_msghdr_from_user(msg_sys, msg);
+ if (err)
+ return err;
+ }
if (msg_sys->msg_iovlen > UIO_FASTIOV) {
err = -EMSGSIZE;
@@ -2197,16 +2220,14 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
goto out;
}
- /*
- * Save the user-mode address (verify_iovec will change the
- * kernel msghdr to use the kernel address space)
+ /* Save the user-mode address (verify_iovec will change the
+ * kernel msghdr to use the kernel address space)
*/
-
uaddr = (__force void __user *)msg_sys->msg_name;
uaddr_len = COMPAT_NAMELEN(msg);
- if (MSG_CMSG_COMPAT & flags) {
+ if (MSG_CMSG_COMPAT & flags)
err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
- } else
+ else
err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE);
if (err < 0)
goto out_freeiov;
@@ -2215,6 +2236,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
cmsg_ptr = (unsigned long)msg_sys->msg_control;
msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT);
+ /* We assume all kernel code knows the size of sockaddr_storage */
+ msg_sys->msg_namelen = 0;
+
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys,
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index d304f41260f..f1eb0d16666 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -120,7 +120,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
if (IS_ERR(clnt)) {
dprintk("RPC: failed to create AF_LOCAL gssproxy "
"client (errno %ld).\n", PTR_ERR(clnt));
- result = -PTR_ERR(clnt);
+ result = PTR_ERR(clnt);
*_clnt = NULL;
goto out;
}
@@ -213,6 +213,26 @@ static int gssp_call(struct net *net, struct rpc_message *msg)
return status;
}
+static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
+{
+ int i;
+
+ for (i = 0; i < arg->npages && arg->pages[i]; i++)
+ __free_page(arg->pages[i]);
+}
+
+static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
+{
+ arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
+ arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL);
+ /*
+ * XXX: actual pages are allocated by xdr layer in
+ * xdr_partial_copy_from_skb.
+ */
+ if (!arg->pages)
+ return -ENOMEM;
+ return 0;
+}
/*
* Public functions
@@ -261,10 +281,16 @@ int gssp_accept_sec_context_upcall(struct net *net,
arg.context_handle = &ctxh;
res.output_token->len = GSSX_max_output_token_sz;
+ ret = gssp_alloc_receive_pages(&arg);
+ if (ret)
+ return ret;
+
/* use nfs/ for targ_name ? */
ret = gssp_call(net, &msg);
+ gssp_free_receive_pages(&arg);
+
/* we need to fetch all data even in case of error so
* that we can free special strctures is they have been allocated */
data->major_status = res.status.major_status;
@@ -328,7 +354,6 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data)
kfree(data->in_handle.data);
kfree(data->out_handle.data);
kfree(data->out_token.data);
- kfree(data->mech_oid.data);
free_svc_cred(&data->creds);
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 357f613df7f..f0f78c5f1c7 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -166,14 +166,15 @@ static int dummy_dec_opt_array(struct xdr_stream *xdr,
return 0;
}
-static int get_s32(void **p, void *max, s32 *res)
+static int get_host_u32(struct xdr_stream *xdr, u32 *res)
{
- void *base = *p;
- void *next = (void *)((char *)base + sizeof(s32));
- if (unlikely(next > max || next < base))
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4);
+ if (!p)
return -EINVAL;
- memcpy(res, base, sizeof(s32));
- *p = next;
+ /* Contents of linux creds are all host-endian: */
+ memcpy(res, p, sizeof(u32));
return 0;
}
@@ -182,9 +183,9 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
{
u32 length;
__be32 *p;
- void *q, *end;
- s32 tmp;
- int N, i, err;
+ u32 tmp;
+ u32 N;
+ int i, err;
p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
@@ -192,33 +193,28 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
length = be32_to_cpup(p);
- /* FIXME: we do not want to use the scratch buffer for this one
- * may need to use functions that allows us to access an io vector
- * directly */
- p = xdr_inline_decode(xdr, length);
- if (unlikely(p == NULL))
+ if (length > (3 + NGROUPS_MAX) * sizeof(u32))
return -ENOSPC;
- q = p;
- end = q + length;
-
/* uid */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
creds->cr_uid = make_kuid(&init_user_ns, tmp);
/* gid */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
creds->cr_gid = make_kgid(&init_user_ns, tmp);
/* number of additional gid's */
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
return err;
N = tmp;
+ if ((3 + N) * sizeof(u32) != length)
+ return -EINVAL;
creds->cr_group_info = groups_alloc(N);
if (creds->cr_group_info == NULL)
return -ENOMEM;
@@ -226,7 +222,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr,
/* gid's */
for (i = 0; i < N; i++) {
kgid_t kgid;
- err = get_s32(&q, end, &tmp);
+ err = get_host_u32(xdr, &tmp);
if (err)
goto out_free_groups;
err = -EINVAL;
@@ -430,7 +426,7 @@ static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
struct gssx_name_attr_array *naa)
{
- struct gssx_name_attr dummy;
+ struct gssx_name_attr dummy = { .attr = {.len = 0} };
u32 count, i;
__be32 *p;
@@ -493,12 +489,13 @@ static int gssx_enc_name(struct xdr_stream *xdr,
return err;
}
+
static int gssx_dec_name(struct xdr_stream *xdr,
struct gssx_name *name)
{
- struct xdr_netobj dummy_netobj;
- struct gssx_name_attr_array dummy_name_attr_array;
- struct gssx_option_array dummy_option_array;
+ struct xdr_netobj dummy_netobj = { .len = 0 };
+ struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 };
+ struct gssx_option_array dummy_option_array = { .count = 0 };
int err;
/* name->display_name */
@@ -783,6 +780,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
/* arg->options */
err = dummy_enc_opt_array(xdr, &arg->options);
+ xdr_inline_pages(&req->rq_rcv_buf,
+ PAGE_SIZE/2 /* pretty arbitrary */,
+ arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
done:
if (err)
dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 1c98b27d870..685a688f3d8 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -147,6 +147,8 @@ struct gssx_arg_accept_sec_context {
struct gssx_cb *input_cb;
u32 ret_deleg_cred;
struct gssx_option_array options;
+ struct page **pages;
+ unsigned int npages;
};
struct gssx_res_accept_sec_context {
@@ -240,7 +242,8 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
2 * GSSX_max_princ_sz + \
8 + 8 + 4 + 4 + 4)
#define GSSX_max_output_token_sz 1024
-#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
+/* grouplist not included; we allocate separate pages for that: */
+#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */)
#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
GSSX_default_ctx_sz + \
GSSX_max_output_token_sz + \
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 5a750b9c364..3524a8b5004 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1402,14 +1402,18 @@ call_refreshresult(struct rpc_task *task)
task->tk_action = call_refresh;
switch (status) {
case 0:
- if (rpcauth_uptodatecred(task))
+ if (rpcauth_uptodatecred(task)) {
task->tk_action = call_allocate;
- return;
+ return;
+ }
+ /* Use rate-limiting and a max number of retries if refresh
+ * had status 0 but failed to update the cred.
+ */
case -ETIMEDOUT:
rpc_delay(task, 3*HZ);
- case -EKEYEXPIRED:
case -EAGAIN:
status = -EACCES;
+ case -EKEYEXPIRED:
if (!task->tk_cred_retry)
break;
task->tk_cred_retry--;
@@ -1644,6 +1648,10 @@ call_connect(struct rpc_task *task)
task->tk_action = call_connect_status;
if (task->tk_status < 0)
return;
+ if (task->tk_flags & RPC_TASK_NOCONNECT) {
+ rpc_exit(task, -ENOTCONN);
+ return;
+ }
xprt_connect(task);
}
}
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 74d948f5d5a..779742cfc1f 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -23,6 +23,7 @@ struct sunrpc_net {
struct rpc_clnt *rpcb_local_clnt4;
spinlock_t rpcb_clnt_lock;
unsigned int rpcb_users;
+ unsigned int rpcb_is_af_local : 1;
struct mutex gssp_lock;
wait_queue_head_t gssp_wq;
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 3df764dc330..1891a1022c1 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net)
}
static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt,
- struct rpc_clnt *clnt4)
+ struct rpc_clnt *clnt4,
+ bool is_af_local)
{
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
/* Protected by rpcb_create_local_mutex */
sn->rpcb_local_clnt = clnt;
sn->rpcb_local_clnt4 = clnt4;
+ sn->rpcb_is_af_local = is_af_local ? 1 : 0;
smp_wmb();
sn->rpcb_users = 1;
dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: "
@@ -238,6 +240,14 @@ static int rpcb_create_local_unix(struct net *net)
.program = &rpcb_program,
.version = RPCBVERS_2,
.authflavor = RPC_AUTH_NULL,
+ /*
+ * We turn off the idle timeout to prevent the kernel
+ * from automatically disconnecting the socket.
+ * Otherwise, we'd have to cache the mount namespace
+ * of the caller and somehow pass that to the socket
+ * reconnect code.
+ */
+ .flags = RPC_CLNT_CREATE_NO_IDLE_TIMEOUT,
};
struct rpc_clnt *clnt, *clnt4;
int result = 0;
@@ -263,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net)
clnt4 = NULL;
}
- rpcb_set_local(net, clnt, clnt4);
+ rpcb_set_local(net, clnt, clnt4, true);
out:
return result;
@@ -315,7 +325,7 @@ static int rpcb_create_local_net(struct net *net)
clnt4 = NULL;
}
- rpcb_set_local(net, clnt, clnt4);
+ rpcb_set_local(net, clnt, clnt4, false);
out:
return result;
@@ -376,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname,
return rpc_create(&args);
}
-static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg)
+static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set)
{
- int result, error = 0;
+ int flags = RPC_TASK_NOCONNECT;
+ int error, result = 0;
+ if (is_set || !sn->rpcb_is_af_local)
+ flags = RPC_TASK_SOFTCONN;
msg->rpc_resp = &result;
- error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN);
+ error = rpc_call_sync(clnt, msg, flags);
if (error < 0) {
dprintk("RPC: failed to contact local rpcbind "
"server (errno %d).\n", -error);
@@ -439,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short
.rpc_argp = &map,
};
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+ bool is_set = false;
dprintk("RPC: %sregistering (%u, %u, %d, %u) with local "
"rpcbind\n", (port ? "" : "un"),
prog, vers, prot, port);
msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET];
- if (port)
+ if (port != 0) {
msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET];
+ is_set = true;
+ }
- return rpcb_register_call(sn->rpcb_local_clnt, &msg);
+ return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set);
}
/*
@@ -461,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
const struct sockaddr_in *sin = (const struct sockaddr_in *)sap;
struct rpcbind_args *map = msg->rpc_argp;
unsigned short port = ntohs(sin->sin_port);
+ bool is_set = false;
int result;
map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -471,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn,
map->r_addr, map->r_netid);
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
- if (port)
+ if (port != 0) {
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+ is_set = true;
+ }
- result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
+ result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
kfree(map->r_addr);
return result;
}
@@ -489,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap;
struct rpcbind_args *map = msg->rpc_argp;
unsigned short port = ntohs(sin6->sin6_port);
+ bool is_set = false;
int result;
map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
@@ -499,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn,
map->r_addr, map->r_netid);
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
- if (port)
+ if (port != 0) {
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET];
+ is_set = true;
+ }
- result = rpcb_register_call(sn->rpcb_local_clnt4, msg);
+ result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set);
kfree(map->r_addr);
return result;
}
@@ -519,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn,
map->r_addr = "";
msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET];
- return rpcb_register_call(sn->rpcb_local_clnt4, msg);
+ return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false);
}
/**
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 06bdf5a1082..1583c8a4eb7 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd,
if (rv)
return -EINVAL;
uid = make_kuid(&init_user_ns, id);
- if (!uid_valid(uid))
- return -EINVAL;
ug.uid = uid;
expiry = get_expiry(&mesg);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 0f679df7d07..305374d4fb9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
len = svsk->sk_datalen;
npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < npages; i++) {
- BUG_ON(svsk->sk_pages[i] == NULL);
+ if (svsk->sk_pages[i] == NULL) {
+ WARN_ON_ONCE(1);
+ continue;
+ }
put_page(svsk->sk_pages[i]);
svsk->sk_pages[i] = NULL;
}
@@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
goto err_noclose;
}
- if (svc_sock_reclen(svsk) < 8)
+ if (svsk->sk_datalen < 8) {
+ svsk->sk_datalen = 0;
goto err_delete; /* client is nuts. */
+ }
rqstp->rq_arg.len = svsk->sk_datalen;
rqstp->rq_arg.page_base = 0;
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 75edcfad6e2..1504bb11e4f 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -207,10 +207,13 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base,
pgfrom_base -= copy;
vto = kmap_atomic(*pgto);
- vfrom = kmap_atomic(*pgfrom);
- memmove(vto + pgto_base, vfrom + pgfrom_base, copy);
+ if (*pgto != *pgfrom) {
+ vfrom = kmap_atomic(*pgfrom);
+ memcpy(vto + pgto_base, vfrom + pgfrom_base, copy);
+ kunmap_atomic(vfrom);
+ } else
+ memmove(vto + pgto_base, vto + pgfrom_base, copy);
flush_dcache_page(*pgto);
- kunmap_atomic(vfrom);
kunmap_atomic(vto);
} while ((len -= copy) != 0);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 8d2edddf48c..65b146297f5 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -98,6 +98,7 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch,
*/
static u32 *decode_write_list(u32 *va, u32 *vaend)
{
+ unsigned long start, end;
int nchunks;
struct rpcrdma_write_array *ary =
@@ -113,9 +114,12 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
return NULL;
}
nchunks = ntohl(ary->wc_nchunks);
- if (((unsigned long)&ary->wc_array[0] +
- (sizeof(struct rpcrdma_write_chunk) * nchunks)) >
- (unsigned long)vaend) {
+
+ start = (unsigned long)&ary->wc_array[0];
+ end = (unsigned long)vaend;
+ if (nchunks < 0 ||
+ nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
+ (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, nchunks, vaend);
return NULL;
@@ -129,6 +133,7 @@ static u32 *decode_write_list(u32 *va, u32 *vaend)
static u32 *decode_reply_array(u32 *va, u32 *vaend)
{
+ unsigned long start, end;
int nchunks;
struct rpcrdma_write_array *ary =
(struct rpcrdma_write_array *)va;
@@ -143,9 +148,12 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend)
return NULL;
}
nchunks = ntohl(ary->wc_nchunks);
- if (((unsigned long)&ary->wc_array[0] +
- (sizeof(struct rpcrdma_write_chunk) * nchunks)) >
- (unsigned long)vaend) {
+
+ start = (unsigned long)&ary->wc_array[0];
+ end = (unsigned long)vaend;
+ if (nchunks < 0 ||
+ nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
+ (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
ary, nchunks, vaend);
return NULL;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ffd50348a50..fc47165dc25 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -391,8 +391,10 @@ static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen,
return kernel_sendmsg(sock, &msg, NULL, 0, 0);
}
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more)
+static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy)
{
+ ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags);
struct page **ppage;
unsigned int remainder;
int err, sent = 0;
@@ -401,6 +403,9 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
base += xdr->page_base;
ppage = xdr->pages + (base >> PAGE_SHIFT);
base &= ~PAGE_MASK;
+ do_sendpage = sock->ops->sendpage;
+ if (!zerocopy)
+ do_sendpage = sock_no_sendpage;
for(;;) {
unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
int flags = XS_SENDMSG_FLAGS;
@@ -408,7 +413,7 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
remainder -= len;
if (remainder != 0 || more)
flags |= MSG_MORE;
- err = sock->ops->sendpage(sock, *ppage, base, len, flags);
+ err = do_sendpage(sock, *ppage, base, len, flags);
if (remainder == 0 || err != len)
break;
sent += err;
@@ -429,9 +434,10 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
* @addrlen: UDP only -- length of destination address
* @xdr: buffer containing this request
* @base: starting position in the buffer
+ * @zerocopy: true if it is safe to use sendpage()
*
*/
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy)
{
unsigned int remainder = xdr->len - base;
int err, sent = 0;
@@ -459,7 +465,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
- err = xs_send_pagedata(sock, xdr, base, remainder != 0);
+ err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy);
if (remainder == 0 || err != len)
goto out;
sent += err;
@@ -496,6 +502,7 @@ static int xs_nospace(struct rpc_task *task)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct sock *sk = transport->inet;
int ret = -EAGAIN;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
@@ -513,7 +520,7 @@ static int xs_nospace(struct rpc_task *task)
* window size
*/
set_bit(SOCK_NOSPACE, &transport->sock->flags);
- transport->inet->sk_write_pending++;
+ sk->sk_write_pending++;
/* ...and wait for more buffer space */
xprt_wait_for_buffer_space(task, xs_nospace_callback);
}
@@ -523,6 +530,9 @@ static int xs_nospace(struct rpc_task *task)
}
spin_unlock_bh(&xprt->transport_lock);
+
+ /* Race breaker in case memory is freed before above code is called */
+ sk->sk_write_space(sk);
return ret;
}
@@ -562,7 +572,7 @@ static int xs_local_send_request(struct rpc_task *task)
req->rq_svec->iov_base, req->rq_svec->iov_len);
status = xs_sendpages(transport->sock, NULL, 0,
- xdr, req->rq_bytes_sent);
+ xdr, req->rq_bytes_sent, true);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - req->rq_bytes_sent, status);
if (likely(status >= 0)) {
@@ -618,7 +628,7 @@ static int xs_udp_send_request(struct rpc_task *task)
status = xs_sendpages(transport->sock,
xs_addr(xprt),
xprt->addrlen, xdr,
- req->rq_bytes_sent);
+ req->rq_bytes_sent, true);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
@@ -689,6 +699,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
+ bool zerocopy = true;
int status;
xs_encode_stream_record_marker(&req->rq_snd_buf);
@@ -696,13 +707,20 @@ static int xs_tcp_send_request(struct rpc_task *task)
xs_pktdump("packet data:",
req->rq_svec->iov_base,
req->rq_svec->iov_len);
+ /* Don't use zero copy if this is a resend. If the RPC call
+ * completes while the socket holds a reference to the pages,
+ * then we may end up resending corrupted data.
+ */
+ if (task->tk_flags & RPC_TASK_SENT)
+ zerocopy = false;
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
while (1) {
status = xs_sendpages(transport->sock,
- NULL, 0, xdr, req->rq_bytes_sent);
+ NULL, 0, xdr, req->rq_bytes_sent,
+ zerocopy);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
diff --git a/net/sysctl_net.c b/net/sysctl_net.c
index 9bc6db04be3..e7000be321b 100644
--- a/net/sysctl_net.c
+++ b/net/sysctl_net.c
@@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head,
/* Allow network administrator to have same access as root. */
if (ns_capable(net->user_ns, CAP_NET_ADMIN) ||
- uid_eq(root_uid, current_uid())) {
+ uid_eq(root_uid, current_euid())) {
int mode = (table->mode >> 6) & 7;
return (mode << 6) | (mode << 3) | mode;
}
/* Allow netns root group to have the same access as the root group */
- if (gid_eq(root_gid, current_gid())) {
+ if (in_egroup_p(root_gid)) {
int mode = (table->mode >> 3) & 7;
return (mode << 3) | mode;
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 515ce38e4f4..2b1d7c2d677 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -905,9 +905,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock,
goto exit;
}
- /* will be updated in set_orig_addr() if needed */
- m->msg_namelen = 0;
-
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
restart:
@@ -1017,9 +1014,6 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock,
goto exit;
}
- /* will be updated in set_orig_addr() if needed */
- m->msg_namelen = 0;
-
target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len);
timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
@@ -1179,7 +1173,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
/* Accept only ACK or NACK message */
if (unlikely(msg_errcode(msg))) {
sock->state = SS_DISCONNECTING;
- sk->sk_err = -ECONNREFUSED;
+ sk->sk_err = ECONNREFUSED;
retval = TIPC_OK;
break;
}
@@ -1190,7 +1184,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf)
res = auto_connect(sock, msg);
if (res) {
sock->state = SS_DISCONNECTING;
- sk->sk_err = res;
+ sk->sk_err = -res;
retval = TIPC_OK;
break;
}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 826e09938bf..75e198d029d 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -160,9 +160,8 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
static inline unsigned int unix_hash_fold(__wsum n)
{
- unsigned int hash = (__force unsigned int)n;
+ unsigned int hash = (__force unsigned int)csum_fold(n);
- hash ^= hash>>16;
hash ^= hash>>8;
return hash&(UNIX_HASH_SIZE-1);
}
@@ -529,13 +528,17 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *,
struct msghdr *, size_t, int);
-static void unix_set_peek_off(struct sock *sk, int val)
+static int unix_set_peek_off(struct sock *sk, int val)
{
struct unix_sock *u = unix_sk(sk);
- mutex_lock(&u->readlock);
+ if (mutex_lock_interruptible(&u->readlock))
+ return -EINTR;
+
sk->sk_peek_off = val;
mutex_unlock(&u->readlock);
+
+ return 0;
}
@@ -713,7 +716,9 @@ static int unix_autobind(struct socket *sock)
int err;
unsigned int retries = 0;
- mutex_lock(&u->readlock);
+ err = mutex_lock_interruptible(&u->readlock);
+ if (err)
+ return err;
err = 0;
if (u->addr)
@@ -872,7 +877,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
goto out;
addr_len = err;
- mutex_lock(&u->readlock);
+ err = mutex_lock_interruptible(&u->readlock);
+ if (err)
+ goto out;
err = -EINVAL;
if (u->addr)
@@ -1245,6 +1252,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb)
return 0;
}
+static void unix_sock_inherit_flags(const struct socket *old,
+ struct socket *new)
+{
+ if (test_bit(SOCK_PASSCRED, &old->flags))
+ set_bit(SOCK_PASSCRED, &new->flags);
+ if (test_bit(SOCK_PASSSEC, &old->flags))
+ set_bit(SOCK_PASSSEC, &new->flags);
+}
+
static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
{
struct sock *sk = sock->sk;
@@ -1279,6 +1295,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
/* attach accepted sock to socket */
unix_state_lock(tsk);
newsock->state = SS_CONNECTED;
+ unix_sock_inherit_flags(sock, newsock);
sock_graft(tsk, newsock);
unix_state_unlock(tsk);
return 0;
@@ -1751,7 +1768,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
{
struct unix_sock *u = unix_sk(sk);
- msg->msg_namelen = 0;
if (u->addr) {
msg->msg_namelen = u->addr->len;
memcpy(msg->msg_name, u->addr->name, u->addr->len);
@@ -1775,11 +1791,12 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
if (flags&MSG_OOB)
goto out;
- msg->msg_namelen = 0;
-
err = mutex_lock_interruptible(&u->readlock);
- if (err) {
- err = sock_intr_errno(sock_rcvtimeo(sk, noblock));
+ if (unlikely(err)) {
+ /* recvmsg() in non blocking mode is supposed to return -EAGAIN
+ * sk_rcvtimeo is not honored by mutex_lock_interruptible()
+ */
+ err = noblock ? -EAGAIN : -ERESTARTSYS;
goto out;
}
@@ -1899,6 +1916,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
struct unix_sock *u = unix_sk(sk);
struct sockaddr_un *sunaddr = msg->msg_name;
int copied = 0;
+ int noblock = flags & MSG_DONTWAIT;
int check_creds = 0;
int target;
int err = 0;
@@ -1914,9 +1932,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
goto out;
target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
- timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
-
- msg->msg_namelen = 0;
+ timeo = sock_rcvtimeo(sk, noblock);
/* Lock the socket to prevent queue disordering
* while sleeps in memcpy_tomsg
@@ -1928,8 +1944,11 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
}
err = mutex_lock_interruptible(&u->readlock);
- if (err) {
- err = sock_intr_errno(timeo);
+ if (unlikely(err)) {
+ /* recvmsg() in non blocking mode is supposed to return -EAGAIN
+ * sk_rcvtimeo is not honored by mutex_lock_interruptible()
+ */
+ err = noblock ? -EAGAIN : -ERESTARTSYS;
goto out;
}
diff --git a/net/unix/diag.c b/net/unix/diag.c
index d591091603b..86fa0f3b2ca 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
rep->udiag_family = AF_UNIX;
rep->udiag_type = sk->sk_type;
rep->udiag_state = sk->sk_state;
+ rep->pad = 0;
rep->udiag_ino = sk_ino;
sock_diag_save_cookie(sk, rep->udiag_cookie);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3f77f42a3b5..9b88693bcc9 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1670,8 +1670,6 @@ vsock_stream_recvmsg(struct kiocb *kiocb,
vsk = vsock_sk(sk);
err = 0;
- msg->msg_namelen = 0;
-
lock_sock(sk);
if (sk->sk_state != SS_CONNECTED) {
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index daff75200e2..62bbf7d7398 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1746,8 +1746,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
if (flags & MSG_OOB || flags & MSG_ERRQUEUE)
return -EOPNOTSUPP;
- msg->msg_namelen = 0;
-
/* Retrieve the head sk_buff from the socket's receive queue. */
err = 0;
skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err);
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 73405e00c80..64fcbae020d 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -876,6 +876,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev,
cfg80211_leave_mesh(rdev, dev);
break;
case NL80211_IFTYPE_AP:
+ case NL80211_IFTYPE_P2P_GO:
cfg80211_stop_ap(rdev, dev);
break;
default:
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index d80e47194d4..e62c1ad4e4c 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -269,6 +269,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
if (chan->flags & IEEE80211_CHAN_DISABLED)
continue;
wdev->wext.ibss.chandef.chan = chan;
+ wdev->wext.ibss.chandef.center_freq1 =
+ chan->center_freq;
break;
}
@@ -353,6 +355,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev,
if (chan) {
wdev->wext.ibss.chandef.chan = chan;
wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
+ wdev->wext.ibss.chandef.center_freq1 = freq;
wdev->wext.ibss.channel_fixed = true;
} else {
/* cfg80211_ibss_wext_join will pick one if needed */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b14b7e3cb6e..448c034184e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -471,10 +471,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb,
goto out_unlock;
}
*rdev = wiphy_to_dev((*wdev)->wiphy);
- cb->args[0] = (*rdev)->wiphy_idx;
+ /* 0 is the first index - add 1 to parse only once */
+ cb->args[0] = (*rdev)->wiphy_idx + 1;
cb->args[1] = (*wdev)->identifier;
} else {
- struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]);
+ /* subtract the 1 again here */
+ struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1);
struct wireless_dev *tmp;
if (!wiphy) {
@@ -6588,12 +6590,14 @@ EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb);
void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp)
{
+ struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0];
void *hdr = ((void **)skb->cb)[1];
struct nlattr *data = ((void **)skb->cb)[2];
nla_nest_end(skb, data);
genlmsg_end(skb, hdr);
- genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp);
+ genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0,
+ nl80211_testmode_mcgrp.id, gfp);
}
EXPORT_SYMBOL(cfg80211_testmode_event);
#endif
@@ -10028,7 +10032,8 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
genlmsg_end(msg, hdr);
- genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp);
+ genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+ nl80211_mlme_mcgrp.id, gfp);
return;
nla_put_failure:
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 7d604c06c3d..722da616438 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -97,6 +97,10 @@ int ieee80211_radiotap_iterator_init(
struct ieee80211_radiotap_header *radiotap_header,
int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns)
{
+ /* check the radiotap header can actually be present */
+ if (max_length < sizeof(struct ieee80211_radiotap_header))
+ return -EINVAL;
+
/* Linux only supports version 0 radiotap format */
if (radiotap_header->it_version)
return -EINVAL;
@@ -120,6 +124,10 @@ int ieee80211_radiotap_iterator_init(
/* find payload start allowing for extended bitmap(s) */
if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) {
+ if ((unsigned long)iterator->_arg -
+ (unsigned long)iterator->_rtheader + sizeof(uint32_t) >
+ (unsigned long)iterator->_max_length)
+ return -EINVAL;
while (get_unaligned_le32(iterator->_arg) &
(1 << IEEE80211_RADIOTAP_EXT)) {
iterator->_arg += sizeof(uint32_t);
@@ -131,7 +139,8 @@ int ieee80211_radiotap_iterator_init(
*/
if ((unsigned long)iterator->_arg -
- (unsigned long)iterator->_rtheader >
+ (unsigned long)iterator->_rtheader +
+ sizeof(uint32_t) >
(unsigned long)iterator->_max_length)
return -EINVAL;
}
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index fd99ea495b7..81019ee3ddc 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -253,10 +253,10 @@ void __cfg80211_sched_scan_results(struct work_struct *wk)
rdev = container_of(wk, struct cfg80211_registered_device,
sched_scan_results_wk);
- request = rdev->sched_scan_req;
-
mutex_lock(&rdev->sched_scan_mtx);
+ request = rdev->sched_scan_req;
+
/* we don't have sched_scan_req anymore if the scan is stopping */
if (request) {
if (request->flags & NL80211_SCAN_FLAG_FLUSH) {
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 37ca9694aab..f96af3b9632 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1340,10 +1340,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
if (sx25) {
sx25->sx25_family = AF_X25;
sx25->sx25_addr = x25->dest_addr;
+ msg->msg_namelen = sizeof(*sx25);
}
- msg->msg_namelen = sizeof(struct sockaddr_x25);
-
x25_check_rbuf(sk);
rc = copied;
out_free_dgram:
@@ -1583,11 +1582,11 @@ out_cud_release:
case SIOCX25CALLACCPTAPPRV: {
rc = -EINVAL;
lock_sock(sk);
- if (sk->sk_state != TCP_CLOSE)
- break;
- clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
+ if (sk->sk_state == TCP_CLOSE) {
+ clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags);
+ rc = 0;
+ }
release_sock(sk);
- rc = 0;
break;
}
@@ -1595,14 +1594,15 @@ out_cud_release:
rc = -EINVAL;
lock_sock(sk);
if (sk->sk_state != TCP_ESTABLISHED)
- break;
+ goto out_sendcallaccpt_release;
/* must call accptapprv above */
if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags))
- break;
+ goto out_sendcallaccpt_release;
x25_write_internal(sk, X25_CALL_ACCEPTED);
x25->state = X25_STATE_3;
- release_sock(sk);
rc = 0;
+out_sendcallaccpt_release:
+ release_sock(sk);
break;
}