diff options
Diffstat (limited to 'net')
183 files changed, 1858 insertions, 1169 deletions
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 8a15eaadc4bd..4a78c4de9f20 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; __be16 vlan_proto = skb->vlan_proto; - u16 vlan_id = skb->vlan_tci & VLAN_VID_MASK; + u16 vlan_id = vlan_tx_tag_get_id(skb); struct net_device *vlan_dev; struct vlan_pcpu_stats *rx_stats; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 3a8c8fd63c88..4af64afc7022 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -73,6 +73,8 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb) { struct vlan_priority_tci_mapping *mp; + smp_rmb(); /* coupled with smp_wmb() in vlan_dev_set_egress_priority() */ + mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)]; while (mp) { if (mp->priority == skb->priority) { @@ -249,6 +251,11 @@ int vlan_dev_set_egress_priority(const struct net_device *dev, np->next = mp; np->priority = skb_prio; np->vlan_qos = vlan_qos; + /* Before inserting this element in hash table, make sure all its fields + * are committed to memory. + * coupled with smp_rmb() in vlan_dev_get_egress_qos_mask() + */ + smp_wmb(); vlan->egress_priority_map[skb_prio & 0xF] = np; if (vlan_qos) vlan->nr_egress_mappings++; @@ -542,6 +549,23 @@ static const struct header_ops vlan_header_ops = { .parse = eth_header_parse, }; +static int vlan_passthru_hard_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, + unsigned int len) +{ + struct vlan_dev_priv *vlan = vlan_dev_priv(dev); + struct net_device *real_dev = vlan->real_dev; + + return dev_hard_header(skb, real_dev, type, daddr, saddr, len); +} + +static const struct header_ops vlan_passthru_header_ops = { + .create = vlan_passthru_hard_header, + .rebuild = dev_rebuild_header, + .parse = eth_header_parse, +}; + static struct device_type vlan_type = { .name = "vlan", }; @@ -585,7 +609,7 @@ static int vlan_dev_init(struct net_device *dev) dev->needed_headroom = real_dev->needed_headroom; if (real_dev->features & NETIF_F_HW_VLAN_CTAG_TX) { - dev->header_ops = real_dev->header_ops; + dev->header_ops = &vlan_passthru_header_ops; dev->hard_header_len = real_dev->hard_header_len; } else { dev->header_ops = &vlan_header_ops; diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 309129732285..c7e634af8516 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -171,7 +171,7 @@ static size_t vlan_get_size(const struct net_device *dev) return nla_total_size(2) + /* IFLA_VLAN_PROTOCOL */ nla_total_size(2) + /* IFLA_VLAN_ID */ - sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */ + nla_total_size(sizeof(struct ifla_vlan_flags)) + /* IFLA_VLAN_FLAGS */ vlan_qos_map_size(vlan->nr_ingress_mappings) + vlan_qos_map_size(vlan->nr_egress_mappings); } diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index de8df957867d..2ee3879161b1 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -24,11 +24,11 @@ */ void p9_release_pages(struct page **pages, int nr_pages) { - int i = 0; - while (pages[i] && nr_pages--) { - put_page(pages[i]); - i++; - } + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); } EXPORT_SYMBOL(p9_release_pages); diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index e1c26b101830..990afab2be1b 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -577,6 +577,10 @@ static int p9_virtio_probe(struct virtio_device *vdev) mutex_lock(&virtio_9p_lock); list_add_tail(&chan->chan_list, &virtio_chan_list); mutex_unlock(&virtio_9p_lock); + + /* Let udev rules use the new mount_tag attribute. */ + kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); + return 0; out_free_tag: @@ -654,6 +658,7 @@ static void p9_virtio_remove(struct virtio_device *vdev) list_del(&chan->chan_list); mutex_unlock(&virtio_9p_lock); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); kfree(chan->tag); kfree(chan->vc_wq); kfree(chan); diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index ef12839a7cfe..0018daccdea9 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1735,7 +1735,6 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr size_t size, int flags) { struct sock *sk = sock->sk; - struct sockaddr_at *sat = (struct sockaddr_at *)msg->msg_name; struct ddpehdr *ddp; int copied = 0; int offset = 0; @@ -1764,14 +1763,13 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr } err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied); - if (!err) { - if (sat) { - sat->sat_family = AF_APPLETALK; - sat->sat_port = ddp->deh_sport; - sat->sat_addr.s_node = ddp->deh_snode; - sat->sat_addr.s_net = ddp->deh_snet; - } - msg->msg_namelen = sizeof(*sat); + if (!err && msg->msg_name) { + struct sockaddr_at *sat = msg->msg_name; + sat->sat_family = AF_APPLETALK; + sat->sat_port = ddp->deh_sport; + sat->sat_addr.s_node = ddp->deh_snode; + sat->sat_addr.s_net = ddp->deh_snet; + msg->msg_namelen = sizeof(*sat); } skb_free_datagram(sk, skb); /* Free the datagram. */ diff --git a/net/atm/common.c b/net/atm/common.c index 737bef59ce89..7b491006eaf4 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -531,8 +531,6 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, struct sk_buff *skb; int copied, error = -EINVAL; - msg->msg_namelen = 0; - if (sock->state != SS_CONNECTED) return -ENOTCONN; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index e277e38f736b..ba6db78a02b1 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1636,11 +1636,11 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (msg->msg_namelen != 0) { - struct sockaddr_ax25 *sax = (struct sockaddr_ax25 *)msg->msg_name; + if (msg->msg_name) { ax25_digi digi; ax25_address src; const unsigned char *mac = skb_mac_header(skb); + struct sockaddr_ax25 *sax = msg->msg_name; memset(sax, 0, sizeof(struct full_sockaddr_ax25)); ax25_addr_parse(mac + 1, skb->data - mac - 1, &src, NULL, diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index 51aafd669cbb..f1cb1f56cda9 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -61,6 +61,7 @@ static int __init batadv_init(void) batadv_recv_handler_init(); batadv_iv_init(); + batadv_nc_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); @@ -138,7 +139,7 @@ int batadv_mesh_init(struct net_device *soft_iface) if (ret < 0) goto err; - ret = batadv_nc_init(bat_priv); + ret = batadv_nc_mesh_init(bat_priv); if (ret < 0) goto err; @@ -163,7 +164,7 @@ void batadv_mesh_free(struct net_device *soft_iface) batadv_vis_quit(bat_priv); batadv_gw_node_purge(bat_priv); - batadv_nc_free(bat_priv); + batadv_nc_mesh_free(bat_priv); batadv_dat_free(bat_priv); batadv_bla_free(bat_priv); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index e84629ece9b7..f97aeee2201c 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -35,6 +35,20 @@ static int batadv_nc_recv_coded_packet(struct sk_buff *skb, struct batadv_hard_iface *recv_if); /** + * batadv_nc_init - one-time initialization for network coding + */ +int __init batadv_nc_init(void) +{ + int ret; + + /* Register our packet type */ + ret = batadv_recv_handler_register(BATADV_CODED, + batadv_nc_recv_coded_packet); + + return ret; +} + +/** * batadv_nc_start_timer - initialise the nc periodic worker * @bat_priv: the bat priv with all the soft interface information */ @@ -45,10 +59,10 @@ static void batadv_nc_start_timer(struct batadv_priv *bat_priv) } /** - * batadv_nc_init - initialise coding hash table and start house keeping + * batadv_nc_mesh_init - initialise coding hash table and start house keeping * @bat_priv: the bat priv with all the soft interface information */ -int batadv_nc_init(struct batadv_priv *bat_priv) +int batadv_nc_mesh_init(struct batadv_priv *bat_priv) { bat_priv->nc.timestamp_fwd_flush = jiffies; bat_priv->nc.timestamp_sniffed_purge = jiffies; @@ -70,11 +84,6 @@ int batadv_nc_init(struct batadv_priv *bat_priv) batadv_hash_set_lock_class(bat_priv->nc.coding_hash, &batadv_nc_decoding_hash_lock_class_key); - /* Register our packet type */ - if (batadv_recv_handler_register(BATADV_CODED, - batadv_nc_recv_coded_packet) < 0) - goto err; - INIT_DELAYED_WORK(&bat_priv->nc.work, batadv_nc_worker); batadv_nc_start_timer(bat_priv); @@ -1722,12 +1731,11 @@ free_nc_packet: } /** - * batadv_nc_free - clean up network coding memory + * batadv_nc_mesh_free - clean up network coding memory * @bat_priv: the bat priv with all the soft interface information */ -void batadv_nc_free(struct batadv_priv *bat_priv) +void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { - batadv_recv_handler_unregister(BATADV_CODED); cancel_delayed_work_sync(&bat_priv->nc.work); batadv_nc_purge_paths(bat_priv, bat_priv->nc.coding_hash, NULL); diff --git a/net/batman-adv/network-coding.h b/net/batman-adv/network-coding.h index 4fa6d0caddbd..bd4295fb960f 100644 --- a/net/batman-adv/network-coding.h +++ b/net/batman-adv/network-coding.h @@ -22,8 +22,9 @@ #ifdef CONFIG_BATMAN_ADV_NC -int batadv_nc_init(struct batadv_priv *bat_priv); -void batadv_nc_free(struct batadv_priv *bat_priv); +int batadv_nc_init(void); +int batadv_nc_mesh_init(struct batadv_priv *bat_priv); +void batadv_nc_mesh_free(struct batadv_priv *bat_priv); void batadv_nc_update_nc_node(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, struct batadv_orig_node *orig_neigh_node, @@ -47,12 +48,17 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv); #else /* ifdef CONFIG_BATMAN_ADV_NC */ -static inline int batadv_nc_init(struct batadv_priv *bat_priv) +static inline int batadv_nc_init(void) { return 0; } -static inline void batadv_nc_free(struct batadv_priv *bat_priv) +static inline int batadv_nc_mesh_init(struct batadv_priv *bat_priv) +{ + return 0; +} + +static inline void batadv_nc_mesh_free(struct batadv_priv *bat_priv) { return; } diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 9096137c889c..6629cdc134dc 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -221,8 +221,6 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & (MSG_OOB)) return -EOPNOTSUPP; - msg->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) { if (sk->sk_shutdown & RCV_SHUTDOWN) @@ -287,8 +285,6 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags & MSG_OOB) return -EOPNOTSUPP; - msg->msg_namelen = 0; - BT_DBG("sk %p size %zu", sk, size); lock_sock(sk); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ace5e55fe5a3..7c88f5f83598 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1123,7 +1123,11 @@ int hci_dev_open(__u16 dev) goto done; } - if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) { + /* Check for rfkill but allow the HCI setup stage to proceed + * (which in itself doesn't cause any RF activity). + */ + if (test_bit(HCI_RFKILLED, &hdev->dev_flags) && + !test_bit(HCI_SETUP, &hdev->dev_flags)) { ret = -ERFKILL; goto done; } @@ -1545,10 +1549,13 @@ static int hci_rfkill_set_block(void *data, bool blocked) BT_DBG("%p name %s blocked %d", hdev, hdev->name, blocked); - if (!blocked) - return 0; - - hci_dev_do_close(hdev); + if (blocked) { + set_bit(HCI_RFKILLED, &hdev->dev_flags); + if (!test_bit(HCI_SETUP, &hdev->dev_flags)) + hci_dev_do_close(hdev); + } else { + clear_bit(HCI_RFKILLED, &hdev->dev_flags); +} return 0; } @@ -1570,9 +1577,13 @@ static void hci_power_on(struct work_struct *work) return; } - if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) + if (test_bit(HCI_RFKILLED, &hdev->dev_flags)) { + clear_bit(HCI_AUTO_OFF, &hdev->dev_flags); + hci_dev_do_close(hdev); + } else if (test_bit(HCI_AUTO_OFF, &hdev->dev_flags)) { queue_delayed_work(hdev->req_workqueue, &hdev->power_off, HCI_AUTO_OFF_TIMEOUT); + } if (test_and_clear_bit(HCI_SETUP, &hdev->dev_flags)) mgmt_index_added(hdev); @@ -2241,6 +2252,9 @@ int hci_register_dev(struct hci_dev *hdev) } } + if (hdev->rfkill && rfkill_blocked(hdev->rfkill)) + set_bit(HCI_RFKILLED, &hdev->dev_flags); + set_bit(HCI_SETUP, &hdev->dev_flags); if (hdev->dev_type != HCI_AMP) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index b93cd2eb5d58..dcaa6dbbab2c 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3611,7 +3611,11 @@ static void hci_le_ltk_request_evt(struct hci_dev *hdev, struct sk_buff *skb) cp.handle = cpu_to_le16(conn->handle); if (ltk->authenticated) - conn->sec_level = BT_SECURITY_HIGH; + conn->pending_sec_level = BT_SECURITY_HIGH; + else + conn->pending_sec_level = BT_SECURITY_MEDIUM; + + conn->enc_key_size = ltk->enc_size; hci_send_cmd(hdev, HCI_OP_LE_LTK_REPLY, sizeof(cp), &cp); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 9bd7d959e384..fa4bf6631425 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -752,8 +752,6 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return err; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 940f5acb6694..de030f50f72b 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -231,17 +231,22 @@ static void hidp_input_report(struct hidp_session *session, struct sk_buff *skb) static int hidp_send_report(struct hidp_session *session, struct hid_report *report) { - unsigned char buf[32], hdr; - int rsize; + unsigned char hdr; + u8 *buf; + int rsize, ret; - rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0); - if (rsize > sizeof(buf)) + buf = hid_alloc_report_buf(report, GFP_ATOMIC); + if (!buf) return -EIO; hid_output_report(report, buf); hdr = HIDP_TRANS_DATA | HIDP_DATA_RTYPE_OUPUT; - return hidp_send_intr_message(session, hdr, buf, rsize); + rsize = ((report->size - 1) >> 3) + 1 + (report->id > 0); + ret = hidp_send_intr_message(session, hdr, buf, rsize); + + kfree(buf); + return ret; } static int hidp_get_raw_report(struct hid_device *hid, diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 30b3721dc6d7..c1c6028e389a 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -608,7 +608,6 @@ static int rfcomm_sock_recvmsg(struct kiocb *iocb, struct socket *sock, if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { rfcomm_dlc_accept(d); - msg->msg_namelen = 0; return 0; } diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index e7bd4eea575c..2bb1d3a5e76b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -700,7 +700,6 @@ static int sco_sock_recvmsg(struct kiocb *iocb, struct socket *sock, test_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) { sco_conn_defer_accept(pi->conn->hcon, 0); sk->sk_state = BT_CONFIG; - msg->msg_namelen = 0; release_sock(sk); return 0; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index ebfa4443c69b..84dd783abe5c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -161,7 +161,7 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) if (!pv) return; - for_each_set_bit_from(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit_from(vid, pv->vlan_bitmap, VLAN_N_VID) { f = __br_fdb_get(br, br->dev->dev_addr, vid); if (f && f->is_local && !f->dst) fdb_delete(br, f); @@ -725,7 +725,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], /* VID was specified, so use it. */ err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { err = __br_fdb_add(ndm, p, addr, nlh_flags, 0); goto out; } @@ -734,7 +734,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], * specify a VLAN. To be nice, add/update entry for every * vlan on this port. */ - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { err = __br_fdb_add(ndm, p, addr, nlh_flags, vid); if (err) goto out; @@ -812,7 +812,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], err = __br_fdb_delete(p, addr, vid); } else { - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) { err = __br_fdb_delete(p, addr, 0); goto out; } @@ -822,7 +822,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], * vlan on this port. */ err = -ENOENT; - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { err &= __br_fdb_delete(p, addr, vid); } } diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index 4cdba60926ff..32bd1e87f149 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -172,6 +172,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_fdb_delete_by_port(br, NULL, 1); + del_timer_sync(&br->gc_timer); br_sysfs_delbr(br->dev); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d6448e35e027..2a180a380181 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1185,7 +1185,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, max_delay = msecs_to_jiffies(ntohs(mld->mld_maxdelay)); if (max_delay) group = &mld->mld_mca; - } else if (skb->len >= sizeof(*mld2q)) { + } else { if (!pskb_may_pull(skb, sizeof(*mld2q))) { err = -EINVAL; goto out; @@ -1193,7 +1193,8 @@ static int br_ip6_multicast_query(struct net_bridge *br, mld2q = (struct mld2_query *)icmp6_hdr(skb); if (!mld2q->mld2q_nsrcs) group = &mld2q->mld2q_mca; - max_delay = mld2q->mld2q_mrc ? MLDV2_MRC(ntohs(mld2q->mld2q_mrc)) : 1; + + max_delay = max(msecs_to_jiffies(MLDV2_MRC(ntohs(mld2q->mld2q_mrc))), 1UL); } if (!group) @@ -1838,7 +1839,7 @@ int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val) u32 old; struct net_bridge_mdb_htable *mdb; - spin_lock(&br->multicast_lock); + spin_lock_bh(&br->multicast_lock); if (!netif_running(br->dev)) goto unlock; @@ -1870,7 +1871,7 @@ rollback: } unlock: - spin_unlock(&br->multicast_lock); + spin_unlock_bh(&br->multicast_lock); return err; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 8e3abf564798..06873e80a432 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -128,7 +128,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, else pv = br_get_vlan_info(br); - if (!pv || bitmap_empty(pv->vlan_bitmap, BR_VLAN_BITMAP_LEN)) + if (!pv || bitmap_empty(pv->vlan_bitmap, VLAN_N_VID)) goto done; af = nla_nest_start(skb, IFLA_AF_SPEC); @@ -136,7 +136,7 @@ static int br_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure; pvid = br_get_pvid(pv); - for_each_set_bit(vid, pv->vlan_bitmap, BR_VLAN_BITMAP_LEN) { + for_each_set_bit(vid, pv->vlan_bitmap, VLAN_N_VID) { vinfo.vid = vid; vinfo.flags = 0; if (vid == pvid) @@ -203,7 +203,7 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, u32 filter_mask) { int err = 0; - struct net_bridge_port *port = br_port_get_rcu(dev); + struct net_bridge_port *port = br_port_get_rtnl(dev); /* not a bridge port and */ if (!port && !(filter_mask & RTEXT_FILTER_BRVLAN)) @@ -443,7 +443,7 @@ static size_t br_get_link_af_size(const struct net_device *dev) struct net_port_vlans *pv; if (br_port_exists(dev)) - pv = nbp_get_vlan_info(br_port_get_rcu(dev)); + pv = nbp_get_vlan_info(br_port_get_rtnl(dev)); else if (dev->priv_flags & IFF_EBRIDGE) pv = br_get_vlan_info((struct net_bridge *)netdev_priv(dev)); else diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index d2c043a857b6..e696833a31b5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -183,13 +183,10 @@ struct net_bridge_port static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) { - struct net_bridge_port *port = - rcu_dereference_rtnl(dev->rx_handler_data); - - return br_port_exists(dev) ? port : NULL; + return rcu_dereference(dev->rx_handler_data); } -static inline struct net_bridge_port *br_port_get_rtnl(struct net_device *dev) +static inline struct net_bridge_port *br_port_get_rtnl(const struct net_device *dev) { return br_port_exists(dev) ? rtnl_dereference(dev->rx_handler_data) : NULL; @@ -714,6 +711,7 @@ extern struct net_bridge_port *br_get_port(struct net_bridge *br, extern void br_init_port(struct net_bridge_port *p); extern void br_become_designated_port(struct net_bridge_port *p); +extern void __br_set_forward_delay(struct net_bridge *br, unsigned long t); extern int br_set_forward_delay(struct net_bridge *br, unsigned long x); extern int br_set_hello_time(struct net_bridge *br, unsigned long x); extern int br_set_max_age(struct net_bridge *br, unsigned long x); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 1c0a50f13229..3c86f0538cbb 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -209,7 +209,7 @@ static void br_record_config_information(struct net_bridge_port *p, p->designated_age = jiffies - bpdu->message_age; mod_timer(&p->message_age_timer, jiffies - + (p->br->max_age - bpdu->message_age)); + + (bpdu->max_age - bpdu->message_age)); } /* called under bridge lock */ @@ -544,18 +544,27 @@ int br_set_max_age(struct net_bridge *br, unsigned long val) } +void __br_set_forward_delay(struct net_bridge *br, unsigned long t) +{ + br->bridge_forward_delay = t; + if (br_is_root_bridge(br)) + br->forward_delay = br->bridge_forward_delay; +} + int br_set_forward_delay(struct net_bridge *br, unsigned long val) { unsigned long t = clock_t_to_jiffies(val); + int err = -ERANGE; + spin_lock_bh(&br->lock); if (br->stp_enabled != BR_NO_STP && (t < BR_MIN_FORWARD_DELAY || t > BR_MAX_FORWARD_DELAY)) - return -ERANGE; + goto unlock; - spin_lock_bh(&br->lock); - br->bridge_forward_delay = t; - if (br_is_root_bridge(br)) - br->forward_delay = br->bridge_forward_delay; + __br_set_forward_delay(br, t); + err = 0; + +unlock: spin_unlock_bh(&br->lock); - return 0; + return err; } diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index d45e760141bb..656a6f3e40de 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -129,6 +129,14 @@ static void br_stp_start(struct net_bridge *br) char *envp[] = { NULL }; r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + + spin_lock_bh(&br->lock); + + if (br->bridge_forward_delay < BR_MIN_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MIN_FORWARD_DELAY); + else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) + __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); + if (r == 0) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); @@ -137,10 +145,10 @@ static void br_stp_start(struct net_bridge *br) br_debug(br, "using kernel STP\n"); /* To start timers on any ports left in blocking */ - spin_lock_bh(&br->lock); br_port_state_selection(br); - spin_unlock_bh(&br->lock); } + + spin_unlock_bh(&br->lock); } static void br_stp_stop(struct net_bridge *br) diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index bd58b45f5f90..9a9ffe7e4019 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -108,7 +108,7 @@ static int __vlan_del(struct net_port_vlans *v, u16 vid) clear_bit(vid, v->vlan_bitmap); v->num_vlans--; - if (bitmap_empty(v->vlan_bitmap, BR_VLAN_BITMAP_LEN)) { + if (bitmap_empty(v->vlan_bitmap, VLAN_N_VID)) { if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); else @@ -122,7 +122,7 @@ static void __vlan_flush(struct net_port_vlans *v) { smp_wmb(); v->pvid = 0; - bitmap_zero(v->vlan_bitmap, BR_VLAN_BITMAP_LEN); + bitmap_zero(v->vlan_bitmap, VLAN_N_VID); if (v->port_idx) rcu_assign_pointer(v->parent.port->vlan_info, NULL); else diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 05a41c7ec304..d6be3edb7a43 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -286,8 +286,6 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock, if (m->msg_flags&MSG_OOB) goto read_error; - m->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags, 0 , &ret); if (!skb) goto read_error; @@ -361,8 +359,6 @@ static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - /* * Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c index 2bd4b58f4372..0f455227da83 100644 --- a/net/caif/cfctrl.c +++ b/net/caif/cfctrl.c @@ -293,9 +293,10 @@ int cfctrl_linkup_request(struct cflayer *layer, count = cfctrl_cancel_req(&cfctrl->serv.layer, user_layer); - if (count != 1) + if (count != 1) { pr_err("Could not remove request (%d)", count); return -ENODEV; + } } return 0; } diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index 925ca583c09c..8c93fa8d81bc 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -39,6 +39,11 @@ static int should_authenticate(struct ceph_auth_client *ac) return xi->starting; } +static int build_request(struct ceph_auth_client *ac, void *buf, void *end) +{ + return 0; +} + /* * the generic auth code decode the global_id, and we carry no actual * authenticate state, so nothing happens here. @@ -106,6 +111,7 @@ static const struct ceph_auth_client_ops ceph_auth_none_ops = { .destroy = destroy, .is_authenticated = is_authenticated, .should_authenticate = should_authenticate, + .build_request = build_request, .handle_reply = handle_reply, .create_authorizer = ceph_auth_none_create_authorizer, .destroy_authorizer = ceph_auth_none_destroy_authorizer, diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 3a246a6cab47..bc0016e3e5ac 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -733,12 +733,14 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, object_size = le32_to_cpu(layout->fl_object_size); object_base = off - objoff; - if (truncate_size <= object_base) { - truncate_size = 0; - } else { - truncate_size -= object_base; - if (truncate_size > object_size) - truncate_size = object_size; + if (!(truncate_seq == 1 && truncate_size == -1ULL)) { + if (truncate_size <= object_base) { + truncate_size = 0; + } else { + truncate_size -= object_base; + if (truncate_size > object_size) + truncate_size = object_size; + } } osd_req_op_extent_init(req, 0, opcode, objoff, objlen, @@ -1174,6 +1176,7 @@ static void __register_linger_request(struct ceph_osd_client *osdc, struct ceph_osd_request *req) { dout("__register_linger_request %p\n", req); + ceph_osdc_get_request(req); list_add_tail(&req->r_linger_item, &osdc->req_linger); if (req->r_osd) list_add_tail(&req->r_linger_osd, @@ -1196,6 +1199,7 @@ static void __unregister_linger_request(struct ceph_osd_client *osdc, if (list_empty(&req->r_osd_item)) req->r_osd = NULL; } + ceph_osdc_put_request(req); } void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, @@ -1203,9 +1207,8 @@ void ceph_osdc_unregister_linger_request(struct ceph_osd_client *osdc, { mutex_lock(&osdc->request_mutex); if (req->r_linger) { - __unregister_linger_request(osdc, req); req->r_linger = 0; - ceph_osdc_put_request(req); + __unregister_linger_request(osdc, req); } mutex_unlock(&osdc->request_mutex); } @@ -1217,11 +1220,6 @@ void ceph_osdc_set_request_linger(struct ceph_osd_client *osdc, if (!req->r_linger) { dout("set_request_linger %p\n", req); req->r_linger = 1; - /* - * caller is now responsible for calling - * unregister_linger_request - */ - ceph_osdc_get_request(req); } } EXPORT_SYMBOL(ceph_osdc_set_request_linger); @@ -1339,10 +1337,6 @@ static void __send_request(struct ceph_osd_client *osdc, ceph_msg_get(req->r_request); /* send consumes a ref */ - /* Mark the request unsafe if this is the first timet's being sent. */ - - if (!req->r_sent && req->r_unsafe_callback) - req->r_unsafe_callback(req, true); req->r_sent = req->r_osd->o_incarnation; ceph_con_send(&req->r_osd->o_con, req->r_request); @@ -1433,8 +1427,6 @@ static void handle_osds_timeout(struct work_struct *work) static void complete_request(struct ceph_osd_request *req) { - if (req->r_unsafe_callback) - req->r_unsafe_callback(req, false); complete_all(&req->r_safe_completion); /* fsync waiter */ } @@ -1496,14 +1488,14 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, dout("handle_reply %p tid %llu req %p result %d\n", msg, tid, req, result); - ceph_decode_need(&p, end, 4, bad); + ceph_decode_need(&p, end, 4, bad_put); numops = ceph_decode_32(&p); if (numops > CEPH_OSD_MAX_OP) goto bad_put; if (numops != req->r_num_ops) goto bad_put; payload_len = 0; - ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad); + ceph_decode_need(&p, end, numops * sizeof(struct ceph_osd_op), bad_put); for (i = 0; i < numops; i++) { struct ceph_osd_op *op = p; int len; @@ -1521,11 +1513,13 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, goto bad_put; } - ceph_decode_need(&p, end, 4 + numops * 4, bad); + ceph_decode_need(&p, end, 4 + numops * 4, bad_put); retry_attempt = ceph_decode_32(&p); for (i = 0; i < numops; i++) req->r_reply_op_result[i] = ceph_decode_32(&p); + already_completed = req->r_got_reply; + if (!req->r_got_reply) { req->r_result = result; @@ -1556,19 +1550,23 @@ static void handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg, ((flags & CEPH_OSD_FLAG_WRITE) == 0)) __unregister_request(osdc, req); - already_completed = req->r_completed; - req->r_completed = 1; mutex_unlock(&osdc->request_mutex); - if (already_completed) - goto done; - if (req->r_callback) - req->r_callback(req, msg); - else - complete_all(&req->r_completion); + if (!already_completed) { + if (req->r_unsafe_callback && + result >= 0 && !(flags & CEPH_OSD_FLAG_ONDISK)) + req->r_unsafe_callback(req, true); + if (req->r_callback) + req->r_callback(req, msg); + else + complete_all(&req->r_completion); + } - if (flags & CEPH_OSD_FLAG_ONDISK) + if (flags & CEPH_OSD_FLAG_ONDISK) { + if (req->r_unsafe_callback && already_completed) + req->r_unsafe_callback(req, false); complete_request(req); + } done: dout("req=%p req->r_linger=%d\n", req, req->r_linger); @@ -1633,8 +1631,10 @@ static void kick_requests(struct ceph_osd_client *osdc, int force_resend) dout("%p tid %llu restart on osd%d\n", req, req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); + ceph_osdc_get_request(req); __unregister_request(osdc, req); __register_linger_request(osdc, req); + ceph_osdc_put_request(req); continue; } @@ -1786,6 +1786,8 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg) nr_maps--; } + if (!osdc->osdmap) + goto bad; done: downgrade_write(&osdc->map_sem); ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch); @@ -2123,13 +2125,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc, __register_request(osdc, req); req->r_sent = 0; req->r_got_reply = 0; - req->r_completed = 0; rc = __map_request(osdc, req, 0); if (rc < 0) { if (nofail) { dout("osdc_start_request failed map, " " will retry %lld\n", req->r_tid); rc = 0; + } else { + __unregister_request(osdc, req); } goto out_unlock; } @@ -2206,6 +2209,17 @@ void ceph_osdc_sync(struct ceph_osd_client *osdc) EXPORT_SYMBOL(ceph_osdc_sync); /* + * Call all pending notify callbacks - for use after a watch is + * unregistered, to make sure no more callbacks for it will be invoked + */ +extern void ceph_osdc_flush_notifies(struct ceph_osd_client *osdc) +{ + flush_workqueue(osdc->notify_wq); +} +EXPORT_SYMBOL(ceph_osdc_flush_notifies); + + +/* * init, shutdown */ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) @@ -2254,12 +2268,10 @@ int ceph_osdc_init(struct ceph_osd_client *osdc, struct ceph_client *client) if (err < 0) goto out_msgpool; + err = -ENOMEM; osdc->notify_wq = create_singlethread_workqueue("ceph-watch-notify"); - if (IS_ERR(osdc->notify_wq)) { - err = PTR_ERR(osdc->notify_wq); - osdc->notify_wq = NULL; + if (!osdc->notify_wq) goto out_msgpool; - } return 0; out_msgpool: diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 603ddd92db19..dbd9a4792427 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1129,7 +1129,7 @@ static int *calc_pg_raw(struct ceph_osdmap *osdmap, struct ceph_pg pgid, /* pg_temp? */ pgid.seed = ceph_stable_mod(pgid.seed, pool->pg_num, - pool->pgp_num_mask); + pool->pg_num_mask); pg = __lookup_pg_mapping(&osdmap->pg_temp, pgid); if (pg) { *num = pg->len; diff --git a/net/compat.c b/net/compat.c index f0a1ba6c8086..f50161fb812e 100644 --- a/net/compat.c +++ b/net/compat.c @@ -71,6 +71,8 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg) __get_user(kmsg->msg_controllen, &umsg->msg_controllen) || __get_user(kmsg->msg_flags, &umsg->msg_flags)) return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_name = compat_ptr(tmp1); kmsg->msg_iov = compat_ptr(tmp2); kmsg->msg_control = compat_ptr(tmp3); @@ -91,7 +93,8 @@ int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *kern_iov, if (err < 0) return err; } - kern_msg->msg_name = kern_address; + if (kern_msg->msg_name) + kern_msg->msg_name = kern_address; } else kern_msg->msg_name = NULL; @@ -777,21 +780,16 @@ asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg, if (flags & MSG_CMSG_COMPAT) return -EINVAL; - if (COMPAT_USE_64BIT_TIME) - return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, - flags | MSG_CMSG_COMPAT, - (struct timespec *) timeout); - if (timeout == NULL) return __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, NULL); - if (get_compat_timespec(&ktspec, timeout)) + if (compat_get_timespec(&ktspec, timeout)) return -EFAULT; datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen, flags | MSG_CMSG_COMPAT, &ktspec); - if (datagrams > 0 && put_compat_timespec(&ktspec, timeout)) + if (datagrams > 0 && compat_put_timespec(&ktspec, timeout)) datagrams = -EFAULT; return datagrams; diff --git a/net/core/dev.c b/net/core/dev.c index faebb398fb46..1283c8442e99 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3513,8 +3513,15 @@ ncls: } } - if (vlan_tx_nonzero_tag_present(skb)) - skb->pkt_type = PACKET_OTHERHOST; + if (unlikely(vlan_tx_tag_present(skb))) { + if (vlan_tx_tag_get_id(skb)) + skb->pkt_type = PACKET_OTHERHOST; + /* Note: we might in the future use prio bits + * and set skb->priority like in vlan_do_receive() + * For the time being, just ignore Priority Code Point + */ + skb->vlan_tci = 0; + } /* deliver only exact match when indicated */ null_or_dev = deliver_exact ? skb->dev : NULL; @@ -4471,7 +4478,7 @@ static void dev_change_rx_flags(struct net_device *dev, int flags) { const struct net_device_ops *ops = dev->netdev_ops; - if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags) + if (ops->ndo_change_rx_flags) ops->ndo_change_rx_flags(dev, flags); } diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d23b6682f4e9..a974dfec4bf1 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -64,7 +64,6 @@ static struct genl_family net_drop_monitor_family = { .hdrsize = 0, .name = "NET_DM", .version = 2, - .maxattr = NET_DM_CMD_MAX, }; static DEFINE_PER_CPU(struct per_cpu_dm_data, dm_cpu_data); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d5a9f8ead0d8..0e9131195eb0 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -445,7 +445,8 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh_get_table(frh, tb) != rule->table)) + if (frh_get_table(frh, tb) && + (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && diff --git a/net/core/filter.c b/net/core/filter.c index 6438f29ff266..52f01229ee01 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -36,7 +36,6 @@ #include <asm/uaccess.h> #include <asm/unaligned.h> #include <linux/filter.h> -#include <linux/reciprocal_div.h> #include <linux/ratelimit.h> #include <linux/seccomp.h> #include <linux/if_vlan.h> @@ -166,7 +165,7 @@ unsigned int sk_run_filter(const struct sk_buff *skb, A /= X; continue; case BPF_S_ALU_DIV_K: - A = reciprocal_divide(A, K); + A /= K; continue; case BPF_S_ALU_MOD_X: if (X == 0) @@ -553,11 +552,6 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) /* Some instructions need special checks */ switch (code) { case BPF_S_ALU_DIV_K: - /* check for division by zero */ - if (ftest->k == 0) - return -EINVAL; - ftest->k = reciprocal_value(ftest->k); - break; case BPF_S_ALU_MOD_K: /* check for division by zero */ if (ftest->k == 0) @@ -853,27 +847,7 @@ void sk_decode_filter(struct sock_filter *filt, struct sock_filter *to) to->code = decodes[code]; to->jt = filt->jt; to->jf = filt->jf; - - if (code == BPF_S_ALU_DIV_K) { - /* - * When loaded this rule user gave us X, which was - * translated into R = r(X). Now we calculate the - * RR = r(R) and report it back. If next time this - * value is loaded and RRR = r(RR) is calculated - * then the R == RRR will be true. - * - * One exception. X == 1 translates into R == 0 and - * we can't calculate RR out of it with r(). - */ - - if (filt->k == 0) - to->k = 1; - else - to->k = reciprocal_value(filt->k); - - BUG_ON(reciprocal_value(to->k) != filt->k); - } else - to->k = filt->k; + to->k = filt->k; } int sk_get_filter(struct sock *sk, struct sock_filter __user *ubuf, unsigned int len) diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 00ee068efc1c..f97101b4d373 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -40,7 +40,7 @@ again: struct iphdr _iph; ip: iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); - if (!iph) + if (!iph || iph->ihl < 5) return false; if (ip_is_fragment(iph)) @@ -149,8 +149,8 @@ ipv6: if (poff >= 0) { __be32 *ports, _ports; - nhoff += poff; - ports = skb_header_pointer(skb, nhoff, sizeof(_ports), &_ports); + ports = skb_header_pointer(skb, nhoff + poff, + sizeof(_ports), &_ports); if (ports) flow->ports = *ports; } @@ -345,14 +345,9 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb) if (new_index < 0) new_index = skb_tx_hash(dev, skb); - if (queue_index != new_index && sk) { - struct dst_entry *dst = - rcu_dereference_check(sk->sk_dst_cache, 1); - - if (dst && skb_dst(skb) == dst) - sk_tx_queue_set(sk, queue_index); - - } + if (queue_index != new_index && sk && + rcu_access_pointer(sk->sk_dst_cache)) + sk_tx_queue_set(sk, new_index); queue_index = new_index; } diff --git a/net/core/iovec.c b/net/core/iovec.c index de178e462682..9a31515fb8e3 100644 --- a/net/core/iovec.c +++ b/net/core/iovec.c @@ -48,7 +48,8 @@ int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *a if (err < 0) return err; } - m->msg_name = address; + if (m->msg_name) + m->msg_name = address; } else { m->msg_name = NULL; } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 5c56b217b999..49aeab86f317 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -231,7 +231,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - skb_queue_purge(&n->arp_queue); + __skb_queue_purge(&n->arp_queue); n->arp_queue_len_bytes = 0; n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -286,7 +286,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device if (!n) goto out_entries; - skb_queue_head_init(&n->arp_queue); + __skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); seqlock_init(&n->ha_lock); n->updated = n->used = now; @@ -708,7 +708,9 @@ void neigh_destroy(struct neighbour *neigh) if (neigh_del_timer(neigh)) pr_warn("Impossible event\n"); - skb_queue_purge(&neigh->arp_queue); + write_lock_bh(&neigh->lock); + __skb_queue_purge(&neigh->arp_queue); + write_unlock_bh(&neigh->lock); neigh->arp_queue_len_bytes = 0; if (dev->netdev_ops->ndo_neigh_destroy) @@ -858,7 +860,7 @@ static void neigh_invalidate(struct neighbour *neigh) neigh->ops->error_report(neigh, skb); write_lock(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } @@ -1210,7 +1212,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, write_lock_bh(&neigh->lock); } - skb_queue_purge(&neigh->arp_queue); + __skb_queue_purge(&neigh->arp_queue); neigh->arp_queue_len_bytes = 0; } out: @@ -1272,7 +1274,7 @@ int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb) if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL, skb->len) < 0 && - dev->header_ops->rebuild(skb)) + dev_rebuild_header(skb)) return 0; return dev_queue_xmit(skb); @@ -1443,16 +1445,18 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, atomic_set(&p->refcnt, 1); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); + dev_hold(dev); + p->dev = dev; + write_pnet(&p->net, hold_net(net)); + p->sysctl_table = NULL; if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { + release_net(net); + dev_put(dev); kfree(p); return NULL; } - dev_hold(dev); - p->dev = dev; - write_pnet(&p->net, hold_net(net)); - p->sysctl_table = NULL; write_lock_bh(&tbl->lock); p->next = tbl->parms.next; tbl->parms.next = p; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index cec074be8c43..27f33f25cda8 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -386,8 +386,14 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, !vlan_hw_offload_capable(netif_skb_features(skb), skb->vlan_proto)) { skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb)); - if (unlikely(!skb)) - break; + if (unlikely(!skb)) { + /* This is actually a packet drop, but we + * don't want the code at the end of this + * function to try and re-queue a NULL skb. + */ + status = NETDEV_TX_OK; + goto unlock_txq; + } skb->vlan_tci = 0; } @@ -395,6 +401,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, if (status == NETDEV_TX_OK) txq_trans_update(txq); } + unlock_txq: __netif_tx_unlock(txq); if (status == NETDEV_TX_OK) @@ -550,7 +557,7 @@ static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo return; proto = ntohs(eth_hdr(skb)->h_proto); - if (proto == ETH_P_IP) { + if (proto == ETH_P_ARP) { struct arphdr *arp; unsigned char *arp_ptr; /* No arp on this interface */ @@ -1289,15 +1296,14 @@ EXPORT_SYMBOL_GPL(__netpoll_free_async); void netpoll_cleanup(struct netpoll *np) { - if (!np->dev) - return; - rtnl_lock(); + if (!np->dev) + goto out; __netpoll_cleanup(np); - rtnl_unlock(); - dev_put(np->dev); np->dev = NULL; +out: + rtnl_unlock(); } EXPORT_SYMBOL(netpoll_cleanup); diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 11f2704c3810..ebbea5371967 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2515,6 +2515,8 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, if (x) { int ret; __u8 *eth; + struct iphdr *iph; + nhead = x->props.header_len - skb_headroom(skb); if (nhead > 0) { ret = pskb_expand_head(skb, nhead, 0, GFP_ATOMIC); @@ -2536,6 +2538,11 @@ static int process_ipsec(struct pktgen_dev *pkt_dev, eth = (__u8 *) skb_push(skb, ETH_HLEN); memcpy(eth, pkt_dev->hh, 12); *(u16 *) ð[12] = protocol; + + /* Update IPv4 header len as well as checksum value */ + iph = ip_hdr(skb); + iph->tot_len = htons(skb->len - ETH_HLEN); + ip_send_check(iph); } } return 1; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a08bd2b7fe3f..fd01eca52a13 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2142,7 +2142,7 @@ int ndo_dflt_fdb_del(struct ndmsg *ndm, /* If aging addresses are supported device will need to * implement its own handler for this. */ - if (ndm->ndm_state & NUD_PERMANENT) { + if (!(ndm->ndm_state & NUD_PERMANENT)) { pr_info("%s: FDB only supports static addresses\n", dev->name); return -EINVAL; } @@ -2374,7 +2374,7 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) struct nlattr *extfilt; u32 filter_mask = 0; - extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct rtgenmsg), + extfilt = nlmsg_find_attr(cb->nlh, sizeof(struct ifinfomsg), IFLA_EXT_MASK); if (extfilt) filter_mask = nla_get_u32(extfilt); diff --git a/net/core/scm.c b/net/core/scm.c index 03795d0147f2..b4da80b1cc07 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -54,7 +54,7 @@ static __inline__ int scm_check_creds(struct ucred *creds) return -EINVAL; if ((creds->pid == task_tgid_vnr(current) || - ns_capable(current->nsproxy->pid_ns->user_ns, CAP_SYS_ADMIN)) && + ns_capable(task_active_pid_ns(current)->user_ns, CAP_SYS_ADMIN)) && ((uid_eq(uid, cred->uid) || uid_eq(uid, cred->euid) || uid_eq(uid, cred->suid)) || nsown_capable(CAP_SETUID)) && ((gid_eq(gid, cred->gid) || gid_eq(gid, cred->egid) || diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 6a2f13cee86a..8d9d05edd2eb 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -10,12 +10,27 @@ #include <net/secure_seq.h> -static u32 net_secret[MD5_MESSAGE_BYTES / 4] ____cacheline_aligned; +#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET) +#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4) -void net_secret_init(void) +static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned; + +static void net_secret_init(void) { - get_random_bytes(net_secret, sizeof(net_secret)); + u32 tmp; + int i; + + if (likely(net_secret[0])) + return; + + for (i = NET_SECRET_SIZE; i > 0;) { + do { + get_random_bytes(&tmp, sizeof(tmp)); + } while (!tmp); + cmpxchg(&net_secret[--i], 0, tmp); + } } +#endif #ifdef CONFIG_INET static u32 seq_scale(u32 seq) @@ -42,6 +57,7 @@ __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32)daddr[i]; @@ -63,6 +79,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, u32 hash[MD5_DIGEST_WORDS]; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + (__force u32) daddr[i]; @@ -82,6 +99,7 @@ __u32 secure_ip_id(__be32 daddr) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force __u32) daddr; hash[1] = net_secret[13]; hash[2] = net_secret[14]; @@ -96,6 +114,7 @@ __u32 secure_ipv6_id(const __be32 daddr[4]) { __u32 hash[4]; + net_secret_init(); memcpy(hash, daddr, 16); md5_transform(hash, net_secret); @@ -107,6 +126,7 @@ __u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr, { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -121,6 +141,7 @@ u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport) { u32 hash[MD5_DIGEST_WORDS]; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = (__force u32)dport ^ net_secret[14]; @@ -140,6 +161,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr, u32 hash[MD5_DIGEST_WORDS]; u64 seq; + net_secret_init(); hash[0] = (__force u32)saddr; hash[1] = (__force u32)daddr; hash[2] = ((__force u16)sport << 16) + (__force u16)dport; @@ -164,6 +186,7 @@ u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr, u64 seq; u32 i; + net_secret_init(); memcpy(hash, saddr, 16); for (i = 0; i < 4; i++) secret[i] = net_secret[i] + daddr[i]; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1c1738cc4538..20ee14d0a8a9 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -74,36 +74,6 @@ struct kmem_cache *skbuff_head_cache __read_mostly; static struct kmem_cache *skbuff_fclone_cache __read_mostly; -static void sock_pipe_buf_release(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - put_page(buf->page); -} - -static void sock_pipe_buf_get(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - get_page(buf->page); -} - -static int sock_pipe_buf_steal(struct pipe_inode_info *pipe, - struct pipe_buffer *buf) -{ - return 1; -} - - -/* Pipe buffer operations for a socket. */ -static const struct pipe_buf_operations sock_pipe_buf_ops = { - .can_merge = 0, - .map = generic_pipe_buf_map, - .unmap = generic_pipe_buf_unmap, - .confirm = generic_pipe_buf_confirm, - .release = sock_pipe_buf_release, - .steal = sock_pipe_buf_steal, - .get = sock_pipe_buf_get, -}; - /** * skb_panic - private function for out-of-line support * @skb: buffer @@ -585,9 +555,6 @@ static void skb_release_head_state(struct sk_buff *skb) #if IS_ENABLED(CONFIG_NF_CONNTRACK) nf_conntrack_put(skb->nfct); #endif -#ifdef NET_SKBUFF_NF_DEFRAG_NEEDED - nf_conntrack_put_reasm(skb->nfct_reasm); -#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); #endif @@ -1814,7 +1781,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset, .partial = partial, .nr_pages_max = MAX_SKB_FRAGS, .flags = flags, - .ops = &sock_pipe_buf_ops, + .ops = &nosteal_pipe_buf_ops, .spd_release = sock_spd_release, }; struct sk_buff *frag_iter; @@ -2857,7 +2824,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features) doffset + tnl_hlen); if (fskb != skb_shinfo(skb)->frag_list) - continue; + goto perform_csum_check; if (!sg) { nskb->ip_summed = CHECKSUM_NONE; @@ -2921,6 +2888,7 @@ skip_fraglist: nskb->len += nskb->data_len; nskb->truesize += nskb->data_len; +perform_csum_check: if (!csum) { nskb->csum = skb_checksum(nskb, doffset, nskb->len - doffset, 0); diff --git a/net/core/sock.c b/net/core/sock.c index d6d024cfaaaf..50a345e5a26f 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -885,7 +885,7 @@ set_rcvbuf: case SO_PEEK_OFF: if (sock->ops->set_peek_off) - sock->ops->set_peek_off(sk, val); + ret = sock->ops->set_peek_off(sk, val); else ret = -EOPNOTSUPP; break; @@ -2271,6 +2271,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_stamp = ktime_set(-1L, 0); + sk->sk_pacing_rate = ~0U; /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index cfdb46ab3a7f..2ff093b7c45e 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -20,7 +20,9 @@ #include <net/sock.h> #include <net/net_ratelimit.h> +static int zero = 0; static int one = 1; +static int ushort_max = USHRT_MAX; #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(ctl_table *table, int write, @@ -204,7 +206,9 @@ static struct ctl_table netns_core_table[] = { .data = &init_net.core.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .extra1 = &zero, + .extra2 = &ushort_max, + .proc_handler = proc_dointvec_minmax }, { } }; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 9c61f9c02fdb..6cf9f7782ad4 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -135,6 +135,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (dst) dst->ops->redirect(dst, sk, skb); + goto out; } if (type == ICMPV6_PKT_TOOBIG) { diff --git a/net/ieee802154/6lowpan.c b/net/ieee802154/6lowpan.c index 55e1fd5b3e56..31b127e8086b 100644 --- a/net/ieee802154/6lowpan.c +++ b/net/ieee802154/6lowpan.c @@ -862,7 +862,7 @@ lowpan_process_data(struct sk_buff *skb) * Traffic class carried in-line * ECN + DSCP (1 byte), Flow Label is elided */ - case 1: /* 10b */ + case 2: /* 10b */ if (lowpan_fetch_skb_u8(skb, &tmp)) goto drop; @@ -875,7 +875,7 @@ lowpan_process_data(struct sk_buff *skb) * Flow Label carried in-line * ECN + 2-bit Pad + Flow Label (3 bytes), DSCP is elided */ - case 2: /* 01b */ + case 1: /* 01b */ if (lowpan_fetch_skb_u8(skb, &tmp)) goto drop; diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c index 581a59504bd5..1865fdf5a5a5 100644 --- a/net/ieee802154/dgram.c +++ b/net/ieee802154/dgram.c @@ -315,9 +315,8 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk, if (saddr) { saddr->family = AF_IEEE802154; saddr->addr = mac_cb(skb)->sa; - } - if (addr_len) *addr_len = sizeof(*saddr); + } if (flags & MSG_TRUNC) copied = skb->len; diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index 22b1a7058fd3..4efd2375d7e1 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -224,8 +224,10 @@ static int ieee802154_add_iface(struct sk_buff *skb, if (info->attrs[IEEE802154_ATTR_DEV_TYPE]) { type = nla_get_u8(info->attrs[IEEE802154_ATTR_DEV_TYPE]); - if (type >= __IEEE802154_DEV_MAX) - return -EINVAL; + if (type >= __IEEE802154_DEV_MAX) { + rc = -EINVAL; + goto nla_put_failure; + } } dev = phy->add_iface(phy, devname, type); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d01be2a3ae53..c4adc319cc2e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -263,10 +263,8 @@ void build_ehash_secret(void) get_random_bytes(&rnd, sizeof(rnd)); } while (rnd == 0); - if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) { + if (cmpxchg(&inet_ehash_secret, 0, rnd) == 0) get_random_bytes(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); - net_secret_init(); - } } EXPORT_SYMBOL(build_ehash_secret); diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index b28e863fe0a7..19e36376d2a0 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -57,7 +57,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); goto out; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dfc39d4d48b7..9e38217c3931 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -771,7 +771,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { err = -EINVAL; - goto errout; + goto errout_free; } *pvalid_lft = ci->ifa_valid; *pprefered_lft = ci->ifa_prefered; @@ -779,6 +779,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, return ifa; +errout_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7629a209f9d..4556cd25acde 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1049,6 +1049,8 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo } in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return NOTIFY_DONE; switch (event) { case NETDEV_UP: diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 49616fed9340..6e8a13da6cbd 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -71,7 +71,6 @@ #include <linux/init.h> #include <linux/list.h> #include <linux/slab.h> -#include <linux/prefetch.h> #include <linux/export.h> #include <net/net_namespace.h> #include <net/ip.h> @@ -1761,10 +1760,8 @@ static struct leaf *leaf_walk_rcu(struct tnode *p, struct rt_trie_node *c) if (!c) continue; - if (IS_LEAF(c)) { - prefetch(rcu_dereference_rtnl(p->child[idx])); + if (IS_LEAF(c)) return (struct leaf *) c; - } /* Rescan start scanning in new node */ p = (struct tnode *) c; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index d8c232794bcb..089b4af4fecc 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -343,7 +343,7 @@ static struct sk_buff *igmpv3_newpack(struct net_device *dev, int size) pip->saddr = fl4.saddr; pip->protocol = IPPROTO_IGMP; pip->tot_len = 0; /* filled in later */ - ip_select_ident(pip, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); ((u8 *)&pip[1])[0] = IPOPT_RA; ((u8 *)&pip[1])[1] = 4; ((u8 *)&pip[1])[2] = 0; @@ -687,7 +687,7 @@ static int igmp_send_report(struct in_device *in_dev, struct ip_mc_list *pmc, iph->daddr = dst; iph->saddr = fl4.saddr; iph->protocol = IPPROTO_IGMP; - ip_select_ident(iph, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); ((u8 *)&iph[1])[0] = IPOPT_RA; ((u8 *)&iph[1])[1] = 4; ((u8 *)&iph[1])[2] = 0; @@ -709,7 +709,7 @@ static void igmp_gq_timer_expire(unsigned long data) in_dev->mr_gq_running = 0; igmpv3_send_report(in_dev, NULL); - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_timer_expire(unsigned long data) @@ -721,7 +721,7 @@ static void igmp_ifc_timer_expire(unsigned long data) in_dev->mr_ifc_count--; igmp_ifc_start_timer(in_dev, IGMP_Unsolicited_Report_Interval); } - __in_dev_put(in_dev); + in_dev_put(in_dev); } static void igmp_ifc_event(struct in_device *in_dev) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 5f648751fce2..45dbdab915e2 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -106,6 +106,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = inet->inet_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; @@ -240,12 +244,19 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, r->idiag_family = tw->tw_family; r->idiag_retrans = 0; + r->id.idiag_if = tw->tw_bound_dev_if; sock_diag_save_cookie(tw, r->id.idiag_cookie); + r->id.idiag_sport = tw->tw_sport; r->id.idiag_dport = tw->tw_dport; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = tw->tw_rcv_saddr; r->id.idiag_dst[0] = tw->tw_daddr; + r->idiag_state = tw->tw_substate; r->idiag_timer = 3; r->idiag_expires = DIV_ROUND_UP(tmo * 1000, HZ); @@ -732,8 +743,13 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, r->id.idiag_sport = inet->inet_sport; r->id.idiag_dport = ireq->rmt_port; + + memset(&r->id.idiag_src, 0, sizeof(r->id.idiag_src)); + memset(&r->id.idiag_dst, 0, sizeof(r->id.idiag_dst)); + r->id.idiag_src[0] = ireq->loc_addr; r->id.idiag_dst[0] = ireq->rmt_addr; + r->idiag_expires = jiffies_to_msecs(tmo); r->idiag_rqueue = 0; r->idiag_wqueue = 0; @@ -945,7 +961,7 @@ next_normal: ++num; } - if (r->idiag_states & TCPF_TIME_WAIT) { + if (r->idiag_states & (TCPF_TIME_WAIT | TCPF_FIN_WAIT2)) { struct inet_timewait_sock *tw; inet_twsk_for_each(tw, node, @@ -955,6 +971,8 @@ next_normal: if (num < s_num) goto next_dying; + if (!(r->idiag_states & (1 << tw->tw_substate))) + goto next_dying; if (r->sdiag_family != AF_UNSPEC && tw->tw_family != r->sdiag_family) goto next_dying; diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 6af375afeeef..c95848d00039 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -287,7 +287,7 @@ begintw: if (unlikely(!INET_TW_MATCH(sk, net, acookie, saddr, daddr, ports, dif))) { - sock_put(sk); + inet_twsk_put(inet_twsk(sk)); goto begintw; } goto out; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 000e3d239d64..33d5537881ed 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -32,8 +32,8 @@ * At the moment of writing this notes identifier of IP packets is generated * to be unpredictable using this code only for packets subjected * (actually or potentially) to defragmentation. I.e. DF packets less than - * PMTU in size uses a constant ID and do not use this code (see - * ip_select_ident() in include/net/ip.h). + * PMTU in size when local fragmentation is disabled use a constant ID and do + * not use this code (see ip_select_ident() in include/net/ip.h). * * Route cache entries hold references to our nodes. * New cache entries get references via lookup by destination IP address in diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2a83591492dd..828b2e8631e7 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -335,7 +335,8 @@ static int ipgre_rcv(struct sk_buff *skb) iph->saddr, iph->daddr, tpi.key); if (tunnel) { - ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + skb_pop_mac_header(skb); + ip_tunnel_rcv(tunnel, skb, &tpi, hdr_len, log_ecn_error); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -503,10 +504,11 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) { - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || + ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + return -EINVAL; } p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); @@ -571,7 +573,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev, if (daddr) memcpy(&iph->daddr, daddr, 4); if (iph->daddr) - return t->hlen; + return t->hlen + sizeof(*iph); return -(t->hlen + sizeof(*iph)); } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 3da817b89e9b..0a22bb0ce1a8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -190,10 +190,7 @@ static int ip_local_deliver_finish(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); - __skb_pull(skb, ip_hdrlen(skb)); - - /* Point into the IP datagram, just past the header. */ - skb_reset_transport_header(skb); + __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); { @@ -316,7 +313,7 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - if (sysctl_ip_early_demux && !skb_dst(skb)) { + if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct net_protocol *ipprot; int protocol = iph->protocol; @@ -437,6 +434,8 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, goto drop; } + skb->transport_header = skb->network_header + iph->ihl*4; + /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 4bcabf3ab4ca..6ca5873d6175 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -148,7 +148,7 @@ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr); iph->saddr = saddr; iph->protocol = sk->sk_protocol; - ip_select_ident(iph, &rt->dst, sk); + ip_select_ident(skb, &rt->dst, sk); if (opt && opt->opt.optlen) { iph->ihl += opt->opt.optlen>>2; @@ -394,7 +394,7 @@ packet_routed: ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0); } - ip_select_ident_more(iph, &rt->dst, sk, + ip_select_ident_more(skb, &rt->dst, sk, (skb_shinfo(skb)->gso_segs ?: 1) - 1); skb->priority = sk->sk_priority; @@ -844,7 +844,7 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if (((length > mtu) || (skb && skb_is_gso(skb))) && + if (((length > mtu) || (skb && skb_has_frags(skb))) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, @@ -1324,7 +1324,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, else ttl = ip_select_ttl(inet, &rt->dst); - iph = (struct iphdr *)skb->data; + iph = ip_hdr(skb); iph->version = 4; iph->ihl = 5; iph->tos = inet->tos; @@ -1332,7 +1332,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk, iph->ttl = ttl; iph->protocol = sk->sk_protocol; ip_copy_addrs(iph, fl4); - ip_select_ident(iph, &rt->dst, sk); + ip_select_ident(skb, &rt->dst, sk); if (opt) { iph->ihl += opt->optlen>>2; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d9c4f113d709..23e6ab0a2dc0 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -368,7 +368,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; @@ -405,6 +405,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 7fa8f08fa7ae..fa6573264c8a 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -402,7 +402,7 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net, } int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, - const struct tnl_ptk_info *tpi, bool log_ecn_error) + const struct tnl_ptk_info *tpi, int hdr_len, bool log_ecn_error) { struct pcpu_tstats *tstats; const struct iphdr *iph = ip_hdr(skb); @@ -413,7 +413,7 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb, skb->protocol = tpi->proto; skb->mac_header = skb->network_header; - __pskb_pull(skb, tunnel->hlen); + __pskb_pull(skb, hdr_len); skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen); #ifdef CONFIG_NET_IPGRE_BROADCAST if (ipv4_is_multicast(iph->daddr)) { @@ -486,6 +486,53 @@ drop: } EXPORT_SYMBOL_GPL(ip_tunnel_rcv); +static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, + struct rtable *rt, __be16 df) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len; + int mtu; + + if (df) + mtu = dst_mtu(&rt->dst) - dev->hard_header_len + - sizeof(struct iphdr) - tunnel->hlen; + else + mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; + + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IP)) { + if (!skb_is_gso(skb) && + (df & htons(IP_DF)) && mtu < pkt_size) { + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); + return -E2BIG; + } + } +#if IS_ENABLED(CONFIG_IPV6) + else if (skb->protocol == htons(ETH_P_IPV6)) { + struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); + + if (rt6 && mtu < dst_mtu(skb_dst(skb)) && + mtu >= IPV6_MIN_MTU) { + if ((tunnel->parms.iph.daddr && + !ipv4_is_multicast(tunnel->parms.iph.daddr)) || + rt6->rt6i_dst.plen == 128) { + rt6->rt6i_flags |= RTF_MODIFIED; + dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); + } + } + + if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && + mtu < pkt_size) { + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -E2BIG; + } + } +#endif + return 0; +} + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params) { @@ -499,7 +546,6 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, struct net_device *tdev; /* Device to other host */ unsigned int max_headroom; /* The extra header space needed */ __be32 dst; - int mtu; inner_iph = (const struct iphdr *)skb_inner_network_header(skb); @@ -579,56 +625,18 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - df = tnl_params->frag_off; - if (df) - mtu = dst_mtu(&rt->dst) - dev->hard_header_len - - sizeof(struct iphdr); - else - mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu; - - if (skb_dst(skb)) - skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - - if (skb->protocol == htons(ETH_P_IP)) { - df |= (inner_iph->frag_off&htons(IP_DF)); - - if (!skb_is_gso(skb) && - (inner_iph->frag_off&htons(IP_DF)) && - mtu < ntohs(inner_iph->tot_len)) { - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); - goto tx_error; - } - } -#if IS_ENABLED(CONFIG_IPV6) - else if (skb->protocol == htons(ETH_P_IPV6)) { - struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb); - - if (rt6 && mtu < dst_mtu(skb_dst(skb)) && - mtu >= IPV6_MIN_MTU) { - if ((tunnel->parms.iph.daddr && - !ipv4_is_multicast(tunnel->parms.iph.daddr)) || - rt6->rt6i_dst.plen == 128) { - rt6->rt6i_flags |= RTF_MODIFIED; - dst_metric_set(skb_dst(skb), RTAX_MTU, mtu); - } - } - - if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU && - mtu < skb->len) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - ip_rt_put(rt); - goto tx_error; - } + if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off)) { + ip_rt_put(rt); + goto tx_error; } -#endif if (tunnel->err_count > 0) { if (time_before(jiffies, tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { tunnel->err_count--; + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); dst_link_failure(skb); } else tunnel->err_count = 0; @@ -646,15 +654,19 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, ttl = ip4_dst_hoplimit(&rt->dst); } + df = tnl_params->frag_off; + if (skb->protocol == htons(ETH_P_IP)) + df |= (inner_iph->frag_off&htons(IP_DF)); + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr) + rt->dst.header_len; - if (max_headroom > dev->needed_headroom) { + if (max_headroom > dev->needed_headroom) dev->needed_headroom = max_headroom; - if (skb_cow_head(skb, dev->needed_headroom)) { - dev->stats.tx_dropped++; - dev_kfree_skb(skb); - return; - } + + if (skb_cow_head(skb, dev->needed_headroom)) { + dev->stats.tx_dropped++; + dev_kfree_skb(skb); + return; } skb_dst_drop(skb); @@ -675,7 +687,7 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, iph->daddr = fl4.daddr; iph->saddr = fl4.saddr; iph->ttl = ttl; - tunnel_ip_select_ident(skb, inner_iph, &rt->dst); + __ip_select_ident(iph, &rt->dst, (skb_shinfo(skb)->gso_segs ?: 1) - 1); iptunnel_xmit(skb, dev); return; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index c118f6b576bb..feb19db62359 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -285,8 +285,17 @@ static int vti_rcv(struct sk_buff *skb) tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; + u32 oldmark = skb->mark; + int ret; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + + /* temporarily mark the skb with the tunnel o_key, to + * only match policies with this mark. + */ + skb->mark = be32_to_cpu(tunnel->parms.o_key); + ret = xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb); + skb->mark = oldmark; + if (!ret) return -1; tstats = this_cpu_ptr(tunnel->dev->tstats); @@ -295,7 +304,6 @@ static int vti_rcv(struct sk_buff *skb) tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - skb->mark = 0; secpath_reset(skb); skb->dev = tunnel->dev; return 1; @@ -327,7 +335,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl4, 0, sizeof(fl4)); flowi4_init_output(&fl4, tunnel->parms.link, - be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), + be32_to_cpu(tunnel->parms.o_key), RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPIP, 0, dst, tiph->saddr, 0, 0); @@ -342,6 +350,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) if (!rt->dst.xfrm || rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { dev->stats.tx_carrier_errors++; + ip_rt_put(rt); goto tx_error_icmp; } tdev = rt->dst.dev; @@ -606,17 +615,10 @@ static int __net_init vti_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct vti_net *ipn = net_generic(dev_net(dev), vti_net_id); - tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); - iph->version = 4; iph->protocol = IPPROTO_IPIP; iph->ihl = 5; - dev->tstats = alloc_percpu(struct pcpu_tstats); - if (!dev->tstats) - return -ENOMEM; - dev_hold(dev); rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); return 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 77bfcce64fe5..f5cc7b331511 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -195,7 +195,7 @@ static int ipip_rcv(struct sk_buff *skb) if (tunnel) { if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; - return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error); + return ip_tunnel_rcv(tunnel, skb, &tpi, 0, log_ecn_error); } return -1; @@ -240,11 +240,13 @@ ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; - if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) - return -EINVAL; - if (p.i_key || p.o_key || p.i_flags || p.o_flags) - return -EINVAL; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { + if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP || + p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) + return -EINVAL; + } + + p.i_key = p.o_key = p.i_flags = p.o_flags = 0; if (p.iph.ttl) p.iph.frag_off |= htons(IP_DF); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d9610ae7855..7dbad6835843 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -157,9 +157,12 @@ static struct mr_table *ipmr_get_table(struct net *net, u32 id) static int ipmr_fib_lookup(struct net *net, struct flowi4 *flp4, struct mr_table **mrt) { - struct ipmr_result res; - struct fib_lookup_arg arg = { .result = &res, }; int err; + struct ipmr_result res; + struct fib_lookup_arg arg = { + .result = &res, + .flags = FIB_LOOKUP_NOREF, + }; err = fib_rules_lookup(net->ipv4.mr_rules_ops, flowi4_to_flowi(flp4), 0, &arg); @@ -1658,7 +1661,7 @@ static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->protocol = IPPROTO_IPIP; iph->ihl = 5; iph->tot_len = htons(skb->len); - ip_select_ident(iph, skb_dst(skb), NULL); + ip_select_ident(skb, skb_dst(skb), NULL); ip_send_check(iph); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 7d93d62cd5fd..8cae28f5c3cf 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -570,7 +570,7 @@ static int ping_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } @@ -626,7 +626,6 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct inet_sock *isk = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; struct sk_buff *skb; int copied, err; @@ -636,11 +635,8 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -660,11 +656,14 @@ static int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sock_recv_timestamp(msg, sk, skb); /* Copy the address. */ - if (sin) { + if (msg->msg_name) { + struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + sin->sin_family = AF_INET; sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } if (isk->cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index dd44e0ab600c..402870fdfa0e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -387,7 +387,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, iph->check = 0; iph->tot_len = htons(length); if (!iph->id) - ip_select_ident(iph, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); } @@ -571,7 +571,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, flowi4_init_output(&fl4, ipc.oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE, inet->hdrincl ? IPPROTO_RAW : sk->sk_protocol, - inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP, + inet_sk_flowi_flags(sk) | FLOWI_FLAG_CAN_SLEEP | + (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); if (!inet->hdrincl) { @@ -691,11 +692,8 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); - if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len); + err = ip_recv_error(sk, msg, len, addr_len); goto out; } @@ -721,6 +719,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d35bbf0cf404..f6c6ab14da41 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1720,8 +1720,12 @@ local_input: rth->dst.error= -err; rth->rt_flags &= ~RTCF_LOCAL; } - if (do_cache) - rt_cache_route(&FIB_RES_NH(res), rth); + if (do_cache) { + if (unlikely(!rt_cache_route(&FIB_RES_NH(res), rth))) { + rth->dst.flags |= DST_NOCACHE; + rt_add_uncached_list(rth); + } + } skb_dst_set(skb, &rth->dst); err = 0; goto out; @@ -2020,7 +2024,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) RT_SCOPE_LINK); goto make_route; } - if (fl4->saddr) { + if (!fl4->saddr) { if (ipv4_is_multicast(fl4->daddr)) fl4->saddr = inet_select_addr(dev_out, 0, fl4->flowi4_scope); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index fa2f63fc453b..90b26beb84d4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -29,6 +29,7 @@ static int zero; static int one = 1; static int four = 4; +static int gso_max_segs = GSO_MAX_SEGS; static int tcp_retr1_max = 255; static int ip_local_port_range_min[] = { 1, 1 }; static int ip_local_port_range_max[] = { 65535, 65535 }; @@ -36,6 +37,8 @@ static int tcp_adv_win_scale_min = -31; static int tcp_adv_win_scale_max = 31; static int ip_ttl_min = 1; static int ip_ttl_max = 255; +static int tcp_syn_retries_min = 1; +static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; @@ -331,7 +334,9 @@ static struct ctl_table ipv4_table[] = { .data = &sysctl_tcp_syn_retries, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_syn_retries_min, + .extra2 = &tcp_syn_retries_max }, { .procname = "tcp_synack_retries", @@ -749,6 +754,15 @@ static struct ctl_table ipv4_table[] = { .extra2 = &four, }, { + .procname = "tcp_min_tso_segs", + .data = &sysctl_tcp_min_tso_segs, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &gso_max_segs, + }, + { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ab450c099aa4..1a2e249cef49 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,6 +282,8 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; +int sysctl_tcp_min_tso_segs __read_mostly = 2; + struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); @@ -786,14 +788,24 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, xmit_size_goal = mss_now; if (large_allowed && sk_can_gso(sk)) { - xmit_size_goal = ((sk->sk_gso_max_size - 1) - - inet_csk(sk)->icsk_af_ops->net_header_len - - inet_csk(sk)->icsk_ext_hdr_len - - tp->tcp_header_len); + u32 gso_size, hlen; + + /* Maybe we should/could use sk->sk_prot->max_header here ? */ + hlen = inet_csk(sk)->icsk_af_ops->net_header_len + + inet_csk(sk)->icsk_ext_hdr_len + + tp->tcp_header_len; + + /* Goal is to send at least one packet per ms, + * not one big TSO packet every 100 ms. + * This preserves ACK clocking and is consistent + * with tcp_tso_should_defer() heuristic. + */ + gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC); + gso_size = max_t(u32, gso_size, + sysctl_tcp_min_tso_segs * mss_now); - /* TSQ : try to have two TSO segments in flight */ - xmit_size_goal = min_t(u32, xmit_size_goal, - sysctl_tcp_limit_output_bytes >> 1); + xmit_size_goal = min_t(u32, gso_size, + sk->sk_gso_max_size - 1 - hlen); xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal); @@ -1118,6 +1130,13 @@ new_segment: goto wait_for_memory; /* + * All packets are restored as if they have + * already been sent. + */ + if (tp->repair) + TCP_SKB_CB(skb)->when = tcp_time_stamp; + + /* * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) @@ -2440,10 +2459,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_THIN_DUPACK: if (val < 0 || val > 1) err = -EINVAL; - else + else { tp->thin_dupack = val; if (tp->thin_dupack) tcp_disable_early_retrans(tp); + } break; case TCP_REPAIR: @@ -2879,6 +2899,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int sum_truesize = 0; struct tcphdr *th; unsigned int thlen; unsigned int seq; @@ -2962,13 +2983,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { skb->destructor = gso_skb->destructor; skb->sk = gso_skb->sk; - /* {tcp|sock}_wfree() use exact truesize accounting : - * sum(skb->truesize) MUST be exactly be gso_skb->truesize - * So we account mss bytes of 'true size' for each segment. - * The last segment will contain the remaining. - */ - skb->truesize = mss; - gso_skb->truesize -= mss; + sum_truesize += skb->truesize; } skb = skb->next; th = tcp_hdr(skb); @@ -2985,7 +3000,9 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, if (copy_destructor) { swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); - swap(gso_skb->truesize, skb->truesize); + sum_truesize += skb->truesize; + atomic_add(sum_truesize - gso_skb->truesize, + &skb->sk->sk_wmem_alloc); } delta = htonl(oldlen + (skb->tail - skb->transport_header) + diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index a9077f441cb2..b6ae92a51f58 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -206,8 +206,8 @@ static u32 cubic_root(u64 a) */ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) { - u64 offs; - u32 delta, t, bic_target, max_cnt; + u32 delta, bic_target, max_cnt; + u64 offs, t; ca->ack_cnt++; /* count the number of ACKs */ @@ -250,9 +250,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) * if the cwnd < 1 million packets !!! */ + t = (s32)(tcp_time_stamp - ca->epoch_start); + t += msecs_to_jiffies(ca->delay_min >> 3); /* change the unit from HZ to bictcp_HZ */ - t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) - - ca->epoch_start) << BICTCP_HZ) / HZ; + t <<= BICTCP_HZ; + do_div(t, HZ); if (t < ca->bic_K) /* t - K */ offs = ca->bic_K - t; @@ -414,7 +416,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) return; /* Discard delay samples right after fast recovery */ - if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) + if (ca->epoch_start && (s32)(tcp_time_stamp - ca->epoch_start) < HZ) return; delay = (rtt_us << 3) / USEC_PER_MSEC; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9c6225780bd5..e15d330919af 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -699,6 +699,34 @@ static void tcp_rtt_estimator(struct sock *sk, const __u32 mrtt) } } +/* Set the sk_pacing_rate to allow proper sizing of TSO packets. + * Note: TCP stack does not yet implement pacing. + * FQ packet scheduler can be used to implement cheap but effective + * TCP pacing, to smooth the burst on large writes when packets + * in flight is significantly lower than cwnd (or rwin) + */ +static void tcp_update_pacing_rate(struct sock *sk) +{ + const struct tcp_sock *tp = tcp_sk(sk); + u64 rate; + + /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */ + rate = (u64)tp->mss_cache * 2 * (HZ << 3); + + rate *= max(tp->snd_cwnd, tp->packets_out); + + /* Correction for small srtt : minimum srtt being 8 (1 jiffy << 3), + * be conservative and assume srtt = 1 (125 us instead of 1.25 ms) + * We probably need usec resolution in the future. + * Note: This also takes care of possible srtt=0 case, + * when tcp_rtt_estimator() was not yet called. + */ + if (tp->srtt > 8 + 2) + do_div(rate, tp->srtt); + + sk->sk_pacing_rate = min_t(u64, rate, ~0U); +} + /* Calculate rto without backoff. This is the second half of Van Jacobson's * routine referred to above. */ @@ -1264,7 +1292,10 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tp->lost_cnt_hint -= tcp_skb_pcount(prev); } - TCP_SKB_CB(skb)->tcp_flags |= TCP_SKB_CB(prev)->tcp_flags; + TCP_SKB_CB(prev)->tcp_flags |= TCP_SKB_CB(skb)->tcp_flags; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + TCP_SKB_CB(prev)->end_seq++; + if (skb == tcp_highest_sack(sk)) tcp_advance_highest_sack(sk, skb); @@ -3314,7 +3345,7 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) tcp_init_cwnd_reduction(sk, true); tcp_set_ca_state(sk, TCP_CA_CWR); tcp_end_cwnd_reduction(sk); - tcp_set_ca_state(sk, TCP_CA_Open); + tcp_try_keep_open(sk); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPLOSSPROBERECOVERY); } @@ -3330,7 +3361,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_in_flight; + u32 prior_in_flight, prior_cwnd = tp->snd_cwnd, prior_rtt = tp->srtt; u32 prior_fackets; int prior_packets = tp->packets_out; int prior_sacked = tp->sacked_out; @@ -3438,6 +3469,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); + if (tp->srtt != prior_rtt || tp->snd_cwnd != prior_cwnd) + tcp_update_pacing_rate(sk); return 1; no_queue: @@ -3598,7 +3631,10 @@ static bool tcp_parse_aligned_timestamp(struct tcp_sock *tp, const struct tcphdr ++ptr; tp->rx_opt.rcv_tsval = ntohl(*ptr); ++ptr; - tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; + if (*ptr) + tp->rx_opt.rcv_tsecr = ntohl(*ptr) - tp->tsoffset; + else + tp->rx_opt.rcv_tsecr = 0; return true; } return false; @@ -3623,7 +3659,7 @@ static bool tcp_fast_parse_options(const struct sk_buff *skb, } tcp_parse_options(skb, &tp->rx_opt, 1, NULL); - if (tp->rx_opt.saw_tstamp) + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; return true; @@ -5376,7 +5412,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, int saved_clamp = tp->rx_opt.mss_clamp; tcp_parse_options(skb, &tp->rx_opt, 0, &foc); - if (tp->rx_opt.saw_tstamp) + if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr) tp->rx_opt.rcv_tsecr -= tp->tsoffset; if (th->ack) { @@ -5733,6 +5769,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, } else tcp_init_metrics(sk); + tcp_update_pacing_rate(sk); + /* Prevent spurious tcp_cwnd_restart() on * first data packet. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7999fc55c83b..5d87806d3ade 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -176,7 +176,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS_BH(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); return err; } diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index f6a005c485a9..b500d2ddc476 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -22,6 +22,9 @@ int sysctl_tcp_nometrics_save __read_mostly; +static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *addr, + struct net *net, unsigned int hash); + struct tcp_fastopen_metrics { u16 mss; u16 syn_loss:10; /* Recurring Fast Open SYN losses */ @@ -130,16 +133,41 @@ static void tcpm_suck_dst(struct tcp_metrics_block *tm, struct dst_entry *dst, } } +#define TCP_METRICS_TIMEOUT (60 * 60 * HZ) + +static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) +{ + if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) + tcpm_suck_dst(tm, dst, false); +} + +#define TCP_METRICS_RECLAIM_DEPTH 5 +#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL + static struct tcp_metrics_block *tcpm_new(struct dst_entry *dst, struct inetpeer_addr *addr, - unsigned int hash, - bool reclaim) + unsigned int hash) { struct tcp_metrics_block *tm; struct net *net; + bool reclaim = false; spin_lock_bh(&tcp_metrics_lock); net = dev_net(dst->dev); + + /* While waiting for the spin-lock the cache might have been populated + * with this entry and so we have to check again. + */ + tm = __tcp_get_metrics(addr, net, hash); + if (tm == TCP_METRICS_RECLAIM_PTR) { + reclaim = true; + tm = NULL; + } + if (tm) { + tcpm_check_stamp(tm, dst); + goto out_unlock; + } + if (unlikely(reclaim)) { struct tcp_metrics_block *oldest; @@ -169,17 +197,6 @@ out_unlock: return tm; } -#define TCP_METRICS_TIMEOUT (60 * 60 * HZ) - -static void tcpm_check_stamp(struct tcp_metrics_block *tm, struct dst_entry *dst) -{ - if (tm && unlikely(time_after(jiffies, tm->tcpm_stamp + TCP_METRICS_TIMEOUT))) - tcpm_suck_dst(tm, dst, false); -} - -#define TCP_METRICS_RECLAIM_DEPTH 5 -#define TCP_METRICS_RECLAIM_PTR (struct tcp_metrics_block *) 0x1UL - static struct tcp_metrics_block *tcp_get_encode(struct tcp_metrics_block *tm, int depth) { if (tm) @@ -280,7 +297,6 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, struct inetpeer_addr addr; unsigned int hash; struct net *net; - bool reclaim; addr.family = sk->sk_family; switch (addr.family) { @@ -300,13 +316,10 @@ static struct tcp_metrics_block *tcp_get_metrics(struct sock *sk, hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); tm = __tcp_get_metrics(&addr, net, hash); - reclaim = false; - if (tm == TCP_METRICS_RECLAIM_PTR) { - reclaim = true; + if (tm == TCP_METRICS_RECLAIM_PTR) tm = NULL; - } if (!tm && create) - tm = tcpm_new(dst, &addr, hash, reclaim); + tm = tcpm_new(dst, &addr, hash); else tcpm_check_stamp(tm, dst); @@ -665,10 +678,13 @@ void tcp_fastopen_cache_get(struct sock *sk, u16 *mss, void tcp_fastopen_cache_set(struct sock *sk, u16 mss, struct tcp_fastopen_cookie *cookie, bool syn_lost) { + struct dst_entry *dst = __sk_dst_get(sk); struct tcp_metrics_block *tm; + if (!dst) + return; rcu_read_lock(); - tm = tcp_get_metrics(sk, __sk_dst_get(sk), true); + tm = tcp_get_metrics(sk, dst, true); if (tm) { struct tcp_fastopen_metrics *tfom = &tm->tcpm_fastopen; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ec335fabd5cc..5560abfe6d30 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -887,8 +887,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_orphan(skb); skb->sk = sk; - skb->destructor = (sysctl_tcp_limit_output_bytes > 0) ? - tcp_wfree : sock_wfree; + skb->destructor = tcp_wfree; atomic_add(skb->truesize, &sk->sk_wmem_alloc); /* Build TCP header and checksum it. */ @@ -977,6 +976,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) static void tcp_set_skb_tso_segs(const struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { + /* Make sure we own this skb before messing gso_size/gso_segs */ + WARN_ON_ONCE(skb_cloned(skb)); + if (skb->len <= mss_now || !sk_can_gso(sk) || skb->ip_summed == CHECKSUM_NONE) { /* Avoid the costly divide in the normal @@ -1058,9 +1060,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, if (nsize < 0) nsize = 0; - if (skb_cloned(skb) && - skb_is_nonlinear(skb) && - pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + if (skb_unclone(skb, GFP_ATOMIC)) return -ENOMEM; /* Get a new skb... force flag on. */ @@ -1623,7 +1623,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb) /* If a full-sized TSO skb can be sent, do it. */ if (limit >= min_t(unsigned int, sk->sk_gso_max_size, - sk->sk_gso_max_segs * tp->mss_cache)) + tp->xmit_size_goal_segs * tp->mss_cache)) goto send_now; /* Middle in queue won't get any more data, full sendable already? */ @@ -1832,7 +1832,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, while ((skb = tcp_send_head(sk))) { unsigned int limit; - tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); @@ -1861,13 +1860,24 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, break; } - /* TSQ : sk_wmem_alloc accounts skb truesize, - * including skb overhead. But thats OK. + /* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates + * Alas, some drivers / subsystems require a fair amount + * of queued bytes to ensure line rate. + * One example is wifi aggregation (802.11 AMPDU) */ - if (atomic_read(&sk->sk_wmem_alloc) >= sysctl_tcp_limit_output_bytes) { + limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes, + sk->sk_pacing_rate >> 10); + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { set_bit(TSQ_THROTTLED, &tp->tsq_flags); break; } + limit = mss_now; if (tso_segs > 1 && !tcp_urg_mode(tp)) limit = tcp_mss_split_point(sk, skb, mss_now, @@ -2329,6 +2339,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) int oldpcount = tcp_skb_pcount(skb); if (unlikely(oldpcount > 1)) { + if (skb_unclone(skb, GFP_ATOMIC)) + return -ENOMEM; tcp_init_tso_segs(sk, skb, cur_mss); tcp_adjust_pcount(sk, skb, oldpcount - tcp_skb_pcount(skb)); } @@ -2664,7 +2676,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, int tcp_header_size; int mss; - skb = alloc_skb(MAX_TCP_HEADER + 15, sk_gfp_atomic(sk, GFP_ATOMIC)); + skb = sock_wmalloc(sk, MAX_TCP_HEADER + 15, 1, GFP_ATOMIC); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -2808,6 +2820,8 @@ void tcp_connect_init(struct sock *sk) if (likely(!tp->repair)) tp->rcv_nxt = 0; + else + tp->rcv_tstamp = tcp_time_stamp; tp->rcv_wup = tp->rcv_nxt; tp->copied_seq = tp->rcv_nxt; @@ -3088,7 +3102,6 @@ void tcp_send_window_probe(struct sock *sk) { if (sk->sk_state == TCP_ESTABLISHED) { tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1; - tcp_sk(sk)->snd_nxt = tcp_sk(sk)->write_seq; tcp_xmit_probe_skb(sk, 0); } } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0bf5d399a03c..c3075b552248 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -799,7 +799,7 @@ send: /* * Push out all pending data as one UDP datagram. Socket is locked. */ -static int udp_push_pending_frames(struct sock *sk) +int udp_push_pending_frames(struct sock *sk) { struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -818,6 +818,7 @@ out: up->pending = 0; return err; } +EXPORT_SYMBOL(udp_push_pending_frames); int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) @@ -970,7 +971,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, err = PTR_ERR(rt); rt = NULL; if (err == -ENETUNREACH) - IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); goto out; } @@ -1069,6 +1070,9 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, struct udp_sock *up = udp_sk(sk); int ret; + if (flags & MSG_SENDPAGE_NOTLAST) + flags |= MSG_MORE; + if (!up->pending) { struct msghdr msg = { .msg_flags = flags|MSG_MORE }; @@ -1206,14 +1210,8 @@ int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, int is_udplite = IS_UDPLITE(sk); bool slow; - /* - * Check any passed addresses - */ - if (addr_len) - *addr_len = sizeof(*sin); - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); + return ip_recv_error(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), @@ -1273,6 +1271,7 @@ try_again: sin->sin_port = udp_hdr(skb)->source; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index eb1dd4d643f2..b5663c37f089 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -117,7 +117,7 @@ static int xfrm4_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ? 0 : (XFRM_MODE_SKB_CB(skb)->frag_off & htons(IP_DF)); - ip_select_ident(top_iph, dst->child, NULL); + ip_select_ident(skb, dst->child, NULL); top_iph->ttl = ip4_dst_hoplimit(dst->child); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 4ab4c38958c6..b78a3ee93d52 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1124,12 +1124,10 @@ retry: if (ifp->flags & IFA_F_OPTIMISTIC) addr_flags |= IFA_F_OPTIMISTIC; - ift = !max_addresses || - ipv6_count_addresses(idev) < max_addresses ? - ipv6_add_addr(idev, &addr, tmp_plen, - ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, - addr_flags) : NULL; - if (IS_ERR_OR_NULL(ift)) { + ift = ipv6_add_addr(idev, &addr, tmp_plen, + ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, + addr_flags); + if (IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); pr_info("%s: retry temporary address regeneration\n", __func__); @@ -1448,6 +1446,23 @@ try_nextdev: } EXPORT_SYMBOL(ipv6_dev_get_saddr); +int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, + unsigned char banned_flags) +{ + struct inet6_ifaddr *ifp; + int err = -EADDRNOTAVAIL; + + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->scope == IFA_LINK && + !(ifp->flags & banned_flags)) { + *addr = ifp->addr; + err = 0; + break; + } + } + return err; +} + int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, unsigned char banned_flags) { @@ -1457,17 +1472,8 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - struct inet6_ifaddr *ifp; - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { - if (ifp->scope == IFA_LINK && - !(ifp->flags & banned_flags)) { - *addr = ifp->addr; - err = 0; - break; - } - } + err = __ipv6_get_lladdr(idev, addr, banned_flags); read_unlock_bh(&idev->lock); } rcu_read_unlock(); @@ -1527,6 +1533,33 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, return false; } +/* Compares an address/prefix_len with addresses on device @dev. + * If one is found it returns true. + */ +bool ipv6_chk_custom_prefix(const struct in6_addr *addr, + const unsigned int prefix_len, struct net_device *dev) +{ + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool ret = false; + + rcu_read_lock(); + idev = __in6_dev_get(dev); + if (idev) { + read_lock_bh(&idev->lock); + list_for_each_entry(ifa, &idev->addr_list, if_list) { + ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); + if (ret) + break; + } + read_unlock_bh(&idev->lock); + } + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(ipv6_chk_custom_prefix); + int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) { struct inet6_dev *idev; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f083a583a05c..b30ad3741b46 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -251,38 +251,36 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, /* add a label */ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) { + struct hlist_node *n; + struct ip6addrlbl_entry *last = NULL, *p = NULL; int ret = 0; - ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", - __func__, - newp, replace); + ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, + replace); - if (hlist_empty(&ip6addrlbl_table.head)) { - hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); - } else { - struct hlist_node *n; - struct ip6addrlbl_entry *p = NULL; - hlist_for_each_entry_safe(p, n, - &ip6addrlbl_table.head, list) { - if (p->prefixlen == newp->prefixlen && - net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && - p->ifindex == newp->ifindex && - ipv6_addr_equal(&p->prefix, &newp->prefix)) { - if (!replace) { - ret = -EEXIST; - goto out; - } - hlist_replace_rcu(&p->list, &newp->list); - ip6addrlbl_put(p); - goto out; - } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || - (p->prefixlen < newp->prefixlen)) { - hlist_add_before_rcu(&newp->list, &p->list); + hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + if (p->prefixlen == newp->prefixlen && + net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && + p->ifindex == newp->ifindex && + ipv6_addr_equal(&p->prefix, &newp->prefix)) { + if (!replace) { + ret = -EEXIST; goto out; } + hlist_replace_rcu(&p->list, &newp->list); + ip6addrlbl_put(p); + goto out; + } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || + (p->prefixlen < newp->prefixlen)) { + hlist_add_before_rcu(&newp->list, &p->list); + goto out; } - hlist_add_after_rcu(&p->list, &newp->list); + last = p; } + if (last) + hlist_add_after_rcu(&last->list, &newp->list); + else + hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); out: if (!ret) ip6addrlbl_table.seq++; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 4b56cbbc7890..8997340e3742 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -318,7 +318,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -369,6 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) &sin->sin6_addr); sin->sin6_scope_id = 0; } + *addr_len = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -377,6 +378,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; + sin->sin6_port = 0; if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -423,7 +425,8 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, + int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -457,6 +460,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; + *addr_len = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 07a7d65a7cb6..8d67900aa003 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -162,12 +162,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) off += optlen; len -= optlen; } - /* This case will not be caught by above check since its padding - * length is smaller than 7: - * 1 byte NH + 1 byte Length + 6 bytes Padding - */ - if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8)) - goto bad; if (len == 0) return true; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index b4ff0a42b8c7..70e704d49007 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -931,6 +931,14 @@ static const struct icmp6_err { .err = ECONNREFUSED, .fatal = 1, }, + { /* POLICY_FAIL */ + .err = EACCES, + .fatal = 1, + }, + { /* REJECT_ROUTE */ + .err = EACCES, + .fatal = 1, + }, }; int icmpv6_err_convert(u8 type, u8 code, int *err) @@ -942,7 +950,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) switch (type) { case ICMPV6_DEST_UNREACH: fatal = 1; - if (code <= ICMPV6_PORT_UNREACH) { + if (code < ARRAY_SIZE(tab_unreach)) { *err = tab_unreach[code].err; fatal = tab_unreach[code].fatal; } diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 32b4a1675d82..066640e0ba8e 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -116,7 +116,7 @@ begintw: } if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, ports, dif))) { - sock_put(sk); + inet_twsk_put(inet_twsk(sk)); goto begintw; } goto out; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 192dd1a0e188..9c06ecb6556e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -632,6 +632,12 @@ insert_above: return ln; } +static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_GATEWAY|RTF_ADDRCONF|RTF_DYNAMIC)) == + RTF_GATEWAY; +} + /* * Insert routing information in a node. */ @@ -646,6 +652,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, int add = (!info->nlh || (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; + bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); ins = &fn->leaf; @@ -691,9 +698,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, * To avoid long list, we only had siblings if the * route have a gateway. */ - if (rt->rt6i_flags & RTF_GATEWAY && - !(rt->rt6i_flags & RTF_EXPIRES) && - !(iter->rt6i_flags & RTF_EXPIRES)) + if (rt_can_ecmp && + rt6_qualify_for_ecmp(iter)) rt->rt6i_nsiblings++; } @@ -715,7 +721,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, /* Find the first route that have the same metric */ sibling = fn->leaf; while (sibling) { - if (sibling->rt6i_metric == rt->rt6i_metric) { + if (sibling->rt6i_metric == rt->rt6i_metric && + rt6_qualify_for_ecmp(sibling)) { list_add_tail(&rt->rt6i_siblings, &sibling->rt6i_siblings); break; @@ -818,9 +825,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), allow_create, replace_required); - if (IS_ERR(fn)) { err = PTR_ERR(fn); + fn = NULL; goto out; } @@ -986,14 +993,22 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { #ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) - fn = fib6_lookup_1(fn->subtree, args + 1); + if (fn->subtree) { + struct fib6_node *sfn; + sfn = fib6_lookup_1(fn->subtree, + args + 1); + if (!sfn) + goto backtrack; + fn = sfn; + } #endif - if (!fn || fn->fn_flags & RTN_RTINFO) + if (fn->fn_flags & RTN_RTINFO) return fn; } } - +#ifdef CONFIG_IPV6_SUBTREES +backtrack: +#endif if (fn->fn_flags & RTN_ROOT) break; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 46e88433ec7d..f0ccdb787100 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -453,8 +453,10 @@ static int mem_check(struct sock *sk) if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK) return 0; + rcu_read_lock_bh(); for_each_sk_fl_rcu(np, sfl) count++; + rcu_read_unlock_bh(); if (room <= 0 || ((count >= FL_MAX_PER_SOCK || diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index ecd60733e5e2..1f9a1a5b61f4 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -620,7 +620,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, struct ip6_tnl *tunnel = netdev_priv(dev); struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ - unsigned int max_headroom; /* The extra header space needed */ + unsigned int max_headroom = 0; /* The extra header space needed */ int gre_hlen; struct ipv6_tel_txoption opt; int mtu; @@ -693,7 +693,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, tunnel->err_count = 0; } - max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + max_headroom += LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; if (skb_headroom(skb) < max_headroom || skb_shared(skb) || (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 2bab2aa59745..774b09cb2920 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,7 +49,7 @@ int ip6_rcv_finish(struct sk_buff *skb) { - if (sysctl_ip_early_demux && !skb_dst(skb)) { + if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d5d20cde8d92..b98b8e06739e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -130,7 +130,7 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + nexthop = rt6_nexthop((struct rt6_info *)dst); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -141,8 +141,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } @@ -150,7 +150,8 @@ static int ip6_finish_output2(struct sk_buff *skb) static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb))) + dst_allfrag(skb_dst(skb)) || + (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(skb, ip6_finish_output2); else return ip6_finish_output2(skb); @@ -898,7 +899,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); @@ -1039,6 +1040,8 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { + struct frag_hdr fhdr; + skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); @@ -1059,12 +1062,6 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - } - - err = skb_append_datato_frags(sk,skb, getfrag, from, - (length - transhdrlen)); - if (!err) { - struct frag_hdr fhdr; /* Specify the length of each IPv6 datagram fragment. * It has to be a multiple of 8. @@ -1075,15 +1072,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, ipv6_select_ident(&fhdr, rt); skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); - - return 0; } - /* There is not enough support do UPD LSO, - * so follow normal path - */ - kfree_skb(skb); - return err; + return skb_append_datato_frags(sk, skb, getfrag, from, + (length - transhdrlen)); } static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, @@ -1098,11 +1090,12 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } -static void ip6_append_data_mtu(int *mtu, +static void ip6_append_data_mtu(unsigned int *mtu, int *maxfraglen, unsigned int fragheaderlen, struct sk_buff *skb, - struct rt6_info *rt) + struct rt6_info *rt, + bool pmtuprobe) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { @@ -1114,7 +1107,9 @@ static void ip6_append_data_mtu(int *mtu, * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = dst_mtu(rt->dst.path); + *mtu = min(*mtu, pmtuprobe ? + rt->dst.dev->mtu : + dst_mtu(rt->dst.path)); } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1131,11 +1126,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen; + unsigned int maxfraglen, fragheaderlen, mtu; int exthdrlen; int dst_exthdrlen; int hh_len; - int mtu; int copy; int err; int offset = 0; @@ -1248,27 +1242,27 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - cork->length += length; - if (length > mtu) { - int proto = sk->sk_protocol; - if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - - if (proto == IPPROTO_UDP && - (rt->dst.dev->features & NETIF_F_UFO)) { + if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); + return -EMSGSIZE; + } - err = ip6_ufo_append_data(sk, getfrag, from, length, - hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); - if (err) - goto error; - return 0; - } + skb = skb_peek_tail(&sk->sk_write_queue); + cork->length += length; + if (((length > mtu) || + (skb && skb_has_frags(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags, rt); + if (err) + goto error; + return 0; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { @@ -1292,7 +1286,9 @@ alloc_new_skb: /* update mtu and maxfraglen if necessary */ if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, - fragheaderlen, skb, rt); + fragheaderlen, skb, rt, + np->pmtudisc == + IPV6_PMTUDISC_PROBE); skb_prev = skb; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 1e55866cead7..f21cf476b00c 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1617,6 +1617,15 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], return ip6_tnl_update(t, &p); } +static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + + if (dev != ip6n->fb_tnl_dev) + unregister_netdevice_queue(dev, head); +} + static size_t ip6_tnl_get_size(const struct net_device *dev) { return @@ -1646,9 +1655,9 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), - &parm->raddr) || - nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), &parm->laddr) || + nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), + &parm->raddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || @@ -1681,6 +1690,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = { .validate = ip6_tnl_validate, .newlink = ip6_tnl_newlink, .changelink = ip6_tnl_changelink, + .dellink = ip6_tnl_dellink, .get_size = ip6_tnl_get_size, .fill_info = ip6_tnl_fill_info, }; @@ -1732,6 +1742,7 @@ static int __net_init ip6_tnl_init_net(struct net *net) if (!ip6n->fb_tnl_dev) goto err_alloc_dev; dev_net_set(ip6n->fb_tnl_dev, net); + ip6n->fb_tnl_dev->rtnl_link_ops = &ip6_link_ops; err = ip6_fb_tnl_dev_init(ip6n->fb_tnl_dev); if (err < 0) diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 241fb8ad9fcf..9f44ebc17759 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -141,9 +141,12 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr6_table **mrt) { - struct ip6mr_result res; - struct fib_lookup_arg arg = { .result = &res, }; int err; + struct ip6mr_result res; + struct fib_lookup_arg arg = { + .result = &res, + .flags = FIB_LOOKUP_NOREF, + }; err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flowi6_to_flowi(flp6), 0, &arg); @@ -259,10 +262,12 @@ static void __net_exit ip6mr_rules_exit(struct net *net) { struct mr6_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) { list_del(&mrt->list); ip6mr_free_table(mrt); } + rtnl_unlock(); fib_rules_unregister(net->ipv6.mr6_rules_ops); } #else @@ -289,7 +294,10 @@ static int __net_init ip6mr_rules_init(struct net *net) static void __net_exit ip6mr_rules_exit(struct net *net) { + rtnl_lock(); ip6mr_free_table(net->ipv6.mrt6); + net->ipv6.mrt6 = NULL; + rtnl_unlock(); } #endif diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index bfa6cc36ef2a..952eaed38808 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1343,8 +1343,9 @@ static void ip6_mc_hdr(struct sock *sk, struct sk_buff *skb, hdr->daddr = *daddr; } -static struct sk_buff *mld_newpack(struct net_device *dev, int size) +static struct sk_buff *mld_newpack(struct inet6_dev *idev, int size) { + struct net_device *dev = idev->dev; struct net *net = dev_net(dev); struct sock *sk = net->ipv6.igmp_sk; struct sk_buff *skb; @@ -1369,7 +1370,7 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size) skb_reserve(skb, hlen); - if (ipv6_get_lladdr(dev, &addr_buf, IFA_F_TENTATIVE)) { + if (__ipv6_get_lladdr(idev, &addr_buf, IFA_F_TENTATIVE)) { /* <draft-ietf-magma-mld-source-05.txt>: * use unspecified address as the source address * when a valid link-local address is not available. @@ -1465,7 +1466,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, struct mld2_grec *pgr; if (!skb) - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(pmc->idev, dev->mtu); if (!skb) return NULL; pgr = (struct mld2_grec *)skb_put(skb, sizeof(struct mld2_grec)); @@ -1485,7 +1486,8 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, int type, int gdeleted, int sdeleted) { - struct net_device *dev = pmc->idev->dev; + struct inet6_dev *idev = pmc->idev; + struct net_device *dev = idev->dev; struct mld2_report *pmr; struct mld2_grec *pgr = NULL; struct ip6_sf_list *psf, *psf_next, *psf_prev, **psf_list; @@ -1514,7 +1516,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, AVAILABLE(skb) < grec_size(pmc, type, gdeleted, sdeleted)) { if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); } } first = 1; @@ -1541,7 +1543,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, pgr->grec_nsrcs = htons(scount); if (skb) mld_sendpack(skb); - skb = mld_newpack(dev, dev->mtu); + skb = mld_newpack(idev, dev->mtu); first = 1; scount = 0; } @@ -1596,8 +1598,8 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) struct sk_buff *skb = NULL; int type; + read_lock_bh(&idev->lock); if (!pmc) { - read_lock_bh(&idev->lock); for (pmc=idev->mc_list; pmc; pmc=pmc->next) { if (pmc->mca_flags & MAF_NOREPORT) continue; @@ -1609,7 +1611,6 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } - read_unlock_bh(&idev->lock); } else { spin_lock_bh(&pmc->mca_lock); if (pmc->mca_sfcount[MCAST_EXCLUDE]) @@ -1619,6 +1620,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) skb = add_grec(skb, pmc, type, 0, 0); spin_unlock_bh(&pmc->mca_lock); } + read_unlock_bh(&idev->lock); if (skb) mld_sendpack(skb); } @@ -2156,7 +2158,7 @@ static void mld_gq_timer_expire(unsigned long data) idev->mc_gq_running = 0; mld_send_report(idev, NULL); - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_timer_expire(unsigned long data) @@ -2169,7 +2171,7 @@ static void mld_ifc_timer_expire(unsigned long data) if (idev->mc_ifc_count) mld_ifc_start_timer(idev, idev->mc_maxdelay); } - __in6_dev_put(idev); + in6_dev_put(idev); } static void mld_ifc_event(struct inet6_dev *idev) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index ca4ffcc287f1..060a0449acaa 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -372,14 +372,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, int tlen = dev->needed_tailroom; struct sock *sk = dev_net(dev)->ipv6.ndisc_sk; struct sk_buff *skb; - int err; - skb = sock_alloc_send_skb(sk, - hlen + sizeof(struct ipv6hdr) + len + tlen, - 1, &err); + skb = alloc_skb(hlen + sizeof(struct ipv6hdr) + len + tlen, GFP_ATOMIC); if (!skb) { - ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb, err=%d\n", - __func__, err); + ND_PRINTK(0, err, "ndisc: %s failed to allocate an skb\n", + __func__); return NULL; } @@ -389,6 +386,11 @@ static struct sk_buff *ndisc_alloc_skb(struct net_device *dev, skb_reserve(skb, hlen + sizeof(struct ipv6hdr)); skb_reset_transport_header(skb); + /* Manually assign socket ownership as we avoid calling + * sock_alloc_send_pskb() to bypass wmem buffer limits + */ + skb_set_owner_w(skb, sk); + return skb; } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index c9b6a6e6a1e8..97cd7507c1a4 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -172,63 +172,13 @@ out: return nf_conntrack_confirm(skb); } -static unsigned int __ipv6_conntrack_in(struct net *net, - unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb->nfct_reasm; - const struct nf_conn_help *help; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - - /* This packet is fragmented and has reassembled packet. */ - if (reasm) { - /* Reassembled packet isn't parsed yet ? */ - if (!reasm->nfct) { - unsigned int ret; - - ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); - if (ret != NF_ACCEPT) - return ret; - } - - /* Conntrack helpers need the entire reassembled packet in the - * POST_ROUTING hook. In case of unconfirmed connections NAT - * might reassign a helper, so the entire packet is also - * required. - */ - ct = nf_ct_get(reasm, &ctinfo); - if (ct != NULL && !nf_ct_is_untracked(ct)) { - help = nfct_help(ct); - if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { - nf_conntrack_get_reasm(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, - (struct net_device *)in, - (struct net_device *)out, - okfn, NF_IP6_PRI_CONNTRACK + 1); - return NF_DROP_ERR(-ECANCELED); - } - } - - nf_conntrack_get(reasm->nfct); - skb->nfct = reasm->nfct; - skb->nfctinfo = reasm->nfctinfo; - return NF_ACCEPT; - } - - return nf_conntrack_in(net, PF_INET6, hooknum, skb); -} - static unsigned int ipv6_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); + return nf_conntrack_in(dev_net(in), PF_INET6, hooknum, skb); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -242,7 +192,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); + return nf_conntrack_in(dev_net(out), PF_INET6, hooknum, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index dffdc1a389c5..253566a8d55b 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -621,31 +621,16 @@ ret_orig: return skb; } -void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, - struct net_device *in, struct net_device *out, - int (*okfn)(struct sk_buff *)) +void nf_ct_frag6_consume_orig(struct sk_buff *skb) { struct sk_buff *s, *s2; - unsigned int ret = 0; for (s = NFCT_FRAG6_CB(skb)->orig; s;) { - nf_conntrack_put_reasm(s->nfct_reasm); - nf_conntrack_get_reasm(skb); - s->nfct_reasm = skb; - s2 = s->next; s->next = NULL; - - if (ret != -ECANCELED) - ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, - in, out, okfn, - NF_IP6_PRI_CONNTRACK_DEFRAG + 1); - else - kfree_skb(s); - + consume_skb(s); s = s2; } - nf_conntrack_put_reasm(skb); } static int nf_ct_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index aacd121fe8c5..581dd9ede0de 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum, if (reasm == skb) return NF_ACCEPT; - nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, - (struct net_device *)out, okfn); + nf_ct_frag6_consume_orig(reasm); + + NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, + (struct net_device *) in, (struct net_device *) out, + okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); return NF_STOLEN; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index eedff8ccded5..464b1c9c08e4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -459,14 +459,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, if (flags & MSG_OOB) return -EOPNOTSUPP; - if (addr_len) - *addr_len=sizeof(*sin6); - if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) @@ -500,6 +497,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, IP6CB(skb)->iif); + *addr_len = sizeof(*sin6); } sock_recv_ts_and_drops(msg, sk, skb); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 790d9f4b8b0b..1aeb473b2cc6 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -490,6 +490,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, ipv6_hdr(head)->payload_len = htons(payload_len); ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; + IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) @@ -524,6 +525,9 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct net *net = dev_net(skb_dst(skb)->dev); int evicted; + if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) + goto fail_hdr; + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ @@ -544,6 +548,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; return 1; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad0aa6b0b86a..6c389300f4e9 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -65,6 +65,12 @@ #include <linux/sysctl.h> #endif +enum rt6_nud_state { + RT6_NUD_FAIL_HARD = -2, + RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_SUCCEED = 1 +}; + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); @@ -78,6 +84,8 @@ static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_prohibit(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -227,9 +235,6 @@ static const struct rt6_info ip6_null_entry_template = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); - static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), @@ -467,6 +472,24 @@ out: } #ifdef CONFIG_IPV6_ROUTER_PREF +struct __rt6_probe_work { + struct work_struct work; + struct in6_addr target; + struct net_device *dev; +}; + +static void rt6_probe_deferred(struct work_struct *w) +{ + struct in6_addr mcaddr; + struct __rt6_probe_work *work = + container_of(w, struct __rt6_probe_work, work); + + addrconf_addr_solict_mult(&work->target, &mcaddr); + ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); + dev_put(work->dev); + kfree(w); +} + static void rt6_probe(struct rt6_info *rt) { struct neighbour *neigh; @@ -490,17 +513,23 @@ static void rt6_probe(struct rt6_info *rt) if (!neigh || time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { - struct in6_addr mcaddr; - struct in6_addr *target; + struct __rt6_probe_work *work; + + work = kmalloc(sizeof(*work), GFP_ATOMIC); - if (neigh) { + if (neigh && work) neigh->updated = jiffies; + + if (neigh) write_unlock(&neigh->lock); - } - target = (struct in6_addr *)&rt->rt6i_gateway; - addrconf_addr_solict_mult(target, &mcaddr); - ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); + if (work) { + INIT_WORK(&work->work, rt6_probe_deferred); + work->target = rt->rt6i_gateway; + dev_hold(rt->dst.dev); + work->dev = rt->dst.dev; + schedule_work(&work->work); + } } else { out: write_unlock(&neigh->lock); @@ -527,26 +556,29 @@ static inline int rt6_check_dev(struct rt6_info *rt, int oif) return 0; } -static inline bool rt6_check_neigh(struct rt6_info *rt) +static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) { struct neighbour *neigh; - bool ret = false; + enum rt6_nud_state ret = RT6_NUD_FAIL_HARD; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) - return true; + return RT6_NUD_SUCCEED; rcu_read_lock_bh(); neigh = __ipv6_neigh_lookup_noref(rt->dst.dev, &rt->rt6i_gateway); if (neigh) { read_lock(&neigh->lock); if (neigh->nud_state & NUD_VALID) - ret = true; + ret = RT6_NUD_SUCCEED; #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) - ret = true; + ret = RT6_NUD_SUCCEED; #endif read_unlock(&neigh->lock); + } else { + ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? + RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; } rcu_read_unlock_bh(); @@ -560,43 +592,52 @@ static int rt6_score_route(struct rt6_info *rt, int oif, m = rt6_check_dev(rt, oif); if (!m && (strict & RT6_LOOKUP_F_IFACE)) - return -1; + return RT6_NUD_FAIL_HARD; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; #endif - if (!rt6_check_neigh(rt) && (strict & RT6_LOOKUP_F_REACHABLE)) - return -1; + if (strict & RT6_LOOKUP_F_REACHABLE) { + int n = rt6_check_neigh(rt); + if (n < 0) + return n; + } return m; } static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, - int *mpri, struct rt6_info *match) + int *mpri, struct rt6_info *match, + bool *do_rr) { int m; + bool match_do_rr = false; if (rt6_check_expired(rt)) goto out; m = rt6_score_route(rt, oif, strict); - if (m < 0) + if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { + match_do_rr = true; + m = 0; /* lowest valid score */ + } else if (m < 0) { goto out; + } + + if (strict & RT6_LOOKUP_F_REACHABLE) + rt6_probe(rt); if (m > *mpri) { - if (strict & RT6_LOOKUP_F_REACHABLE) - rt6_probe(match); + *do_rr = match_do_rr; *mpri = m; match = rt; - } else if (strict & RT6_LOOKUP_F_REACHABLE) { - rt6_probe(rt); } - out: return match; } static struct rt6_info *find_rr_leaf(struct fib6_node *fn, struct rt6_info *rr_head, - u32 metric, int oif, int strict) + u32 metric, int oif, int strict, + bool *do_rr) { struct rt6_info *rt, *match; int mpri = -1; @@ -604,10 +645,10 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; for (rt = rr_head; rt && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; rt = rt->dst.rt6_next) - match = find_match(rt, oif, strict, &mpri, match); + match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; } @@ -616,15 +657,16 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) { struct rt6_info *match, *rt0; struct net *net; + bool do_rr = false; rt0 = fn->rr_ptr; if (!rt0) fn->rr_ptr = rt0 = fn->leaf; - match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict); + match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, + &do_rr); - if (!match && - (strict & RT6_LOOKUP_F_REACHABLE)) { + if (do_rr) { struct rt6_info *next = rt0->dst.rt6_next; /* no entries matched; do round-robin */ @@ -685,8 +727,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, prefix = &prefix_buf; } - rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, - dev->ifindex); + if (rinfo->prefix_len == 0) + rt = rt6_get_dflt_router(gwaddr, dev); + else + rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, + gwaddr, dev->ifindex); if (rt && !lifetime) { ip6_del_rt(rt); @@ -829,7 +874,6 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; - rt->rt6i_gateway = *daddr; } rt->rt6i_flags |= RTF_CACHE; @@ -1042,10 +1086,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) return NULL; - if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) - return dst; + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; - return NULL; + if (rt6_check_expired(rt)) + return NULL; + + return dst; } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -1074,10 +1121,13 @@ static void ip6_link_failure(struct sk_buff *skb) rt = (struct rt6_info *) skb_dst(skb); if (rt) { - if (rt->rt6i_flags & RTF_CACHE) - rt6_update_expires(rt, 0); - else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) + if (rt->rt6i_flags & RTF_CACHE) { + dst_hold(&rt->dst); + if (ip6_del_rt(rt)) + dst_free(&rt->dst); + } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { rt->rt6i_node->fn_sernum = -1; + } } } @@ -1223,6 +1273,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, rt->dst.flags |= DST_HOST; rt->dst.output = ip6_output; atomic_set(&rt->dst.__refcnt, 1); + rt->rt6i_gateway = fl6->daddr; rt->rt6i_dst.addr = fl6->daddr; rt->rt6i_dst.plen = 128; rt->rt6i_idev = idev; @@ -1446,21 +1497,24 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } } - rt->dst.output = ip6_pkt_discard_out; - rt->dst.input = ip6_pkt_discard; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; + rt->dst.output = dst_discard; + rt->dst.input = dst_discard; break; case RTN_PROHIBIT: rt->dst.error = -EACCES; + rt->dst.output = ip6_pkt_prohibit_out; + rt->dst.input = ip6_pkt_prohibit; break; case RTN_THROW: - rt->dst.error = -EAGAIN; - break; default: - rt->dst.error = -ENETUNREACH; + rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN + : -ENETUNREACH; + rt->dst.output = ip6_pkt_discard_out; + rt->dst.input = ip6_pkt_discard; break; } goto install_route; @@ -1779,11 +1833,12 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, in6_dev_hold(rt->rt6i_idev); rt->dst.lastuse = jiffies; - rt->rt6i_gateway = ort->rt6i_gateway; + if (ort->rt6i_flags & RTF_GATEWAY) + rt->rt6i_gateway = ort->rt6i_gateway; + else + rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) - rt6_set_from(rt, ort); + rt6_set_from(rt, ort); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -2022,8 +2077,6 @@ static int ip6_pkt_discard_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - static int ip6_pkt_prohibit(struct sk_buff *skb) { return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); @@ -2035,8 +2088,6 @@ static int ip6_pkt_prohibit_out(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); } -#endif - /* * Allocate a dst for local (unicast / anycast) address. */ @@ -2046,12 +2097,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - - if (!rt) { - net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, + DST_NOCOUNT, NULL); + if (!rt) return ERR_PTR(-ENOMEM); - } in6_dev_hold(idev); @@ -2066,6 +2115,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, else rt->rt6i_flags |= RTF_LOCAL; + rt->rt6i_gateway = *addr; rt->rt6i_dst.addr = *addr; rt->rt6i_dst.plen = 128; rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 335363478bbf..620d326e8fdd 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -566,6 +566,70 @@ static inline bool is_spoofed_6rd(struct ip_tunnel *tunnel, const __be32 v4addr, return false; } +/* Checks if an address matches an address on the tunnel interface. + * Used to detect the NAT of proto 41 packets and let them pass spoofing test. + * Long story: + * This function is called after we considered the packet as spoofed + * in is_spoofed_6rd. + * We may have a router that is doing NAT for proto 41 packets + * for an internal station. Destination a.a.a.a/PREFIX:bbbb:bbbb + * will be translated to n.n.n.n/PREFIX:bbbb:bbbb. And is_spoofed_6rd + * function will return true, dropping the packet. + * But, we can still check if is spoofed against the IP + * addresses associated with the interface. + */ +static bool only_dnatted(const struct ip_tunnel *tunnel, + const struct in6_addr *v6dst) +{ + int prefix_len; + +#ifdef CONFIG_IPV6_SIT_6RD + prefix_len = tunnel->ip6rd.prefixlen + 32 + - tunnel->ip6rd.relay_prefixlen; +#else + prefix_len = 48; +#endif + return ipv6_chk_custom_prefix(v6dst, prefix_len, tunnel->dev); +} + +/* Returns true if a packet is spoofed */ +static bool packet_is_spoofed(struct sk_buff *skb, + const struct iphdr *iph, + struct ip_tunnel *tunnel) +{ + const struct ipv6hdr *ipv6h; + + if (tunnel->dev->priv_flags & IFF_ISATAP) { + if (!isatap_chksrc(skb, iph, tunnel)) + return true; + + return false; + } + + if (tunnel->dev->flags & IFF_POINTOPOINT) + return false; + + ipv6h = ipv6_hdr(skb); + + if (unlikely(is_spoofed_6rd(tunnel, iph->saddr, &ipv6h->saddr))) { + net_warn_ratelimited("Src spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; + } + + if (likely(!is_spoofed_6rd(tunnel, iph->daddr, &ipv6h->daddr))) + return false; + + if (only_dnatted(tunnel, &ipv6h->daddr)) + return false; + + net_warn_ratelimited("Dst spoofed %pI4/%pI6c -> %pI4/%pI6c\n", + &iph->saddr, &ipv6h->saddr, + &iph->daddr, &ipv6h->daddr); + return true; +} + static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); @@ -584,19 +648,9 @@ static int ipip6_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); skb->pkt_type = PACKET_HOST; - if (tunnel->dev->priv_flags & IFF_ISATAP) { - if (!isatap_chksrc(skb, iph, tunnel)) { - tunnel->dev->stats.rx_errors++; - goto out; - } - } else { - if (is_spoofed_6rd(tunnel, iph->saddr, - &ipv6_hdr(skb)->saddr) || - is_spoofed_6rd(tunnel, iph->daddr, - &ipv6_hdr(skb)->daddr)) { - tunnel->dev->stats.rx_errors++; - goto out; - } + if (packet_is_spoofed(skb, iph, tunnel)) { + tunnel->dev->stats.rx_errors++; + goto out; } __skb_tunnel_rx(skb, tunnel->dev); @@ -713,7 +767,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -742,7 +796,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); if (neigh == NULL) { - net_dbg_ratelimited("sit: nexthop == NULL\n"); + net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -865,7 +919,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, iph->ttl = iph6->hop_limit; skb->ip_summed = CHECKSUM_NONE; - ip_select_ident(iph, skb_dst(skb), NULL); + ip_select_ident(skb, skb_dst(skb), NULL); iptunnel_xmit(skb, dev); return NETDEV_TX_OK; @@ -1453,6 +1507,15 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { #endif }; +static void ipip6_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct sit_net *sitn = net_generic(net, sit_net_id); + + if (dev != sitn->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + static struct rtnl_link_ops sit_link_ops __read_mostly = { .kind = "sit", .maxtype = IFLA_IPTUN_MAX, @@ -1463,6 +1526,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = { .changelink = ipip6_changelink, .get_size = ipip6_get_size, .fill_info = ipip6_fill_info, + .dellink = ipip6_dellink, }; static struct xfrm_tunnel sit_handler __read_mostly = { @@ -1507,6 +1571,7 @@ static int __net_init sit_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(sitn->fb_tunnel_dev, net); + sitn->fb_tunnel_dev->rtnl_link_ops = &sit_link_ops; err = ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); if (err) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0a17ed9eaf39..66c718854e5a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1426,7 +1426,7 @@ ipv6_pktoptions: if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 42923b14dfa6..6b298dc614e3 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -373,14 +373,11 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, int is_udp4; bool slow; - if (addr_len) - *addr_len = sizeof(struct sockaddr_in6); - if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); if (np->rxpmtu && np->rxopt.bits.rxpmtu) - return ipv6_recv_rxpmtu(sk, msg, len); + return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), @@ -461,7 +458,7 @@ try_again: ipv6_iface_scope_id(&sin6->sin6_addr, IP6CB(skb)->iif); } - + *addr_len = sizeof(*sin6); } if (is_udp4) { if (inet->cmsg_flags) @@ -955,11 +952,16 @@ static int udp_v6_push_pending_frames(struct sock *sk) struct udphdr *uh; struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + struct flowi6 *fl6; int err = 0; int is_udplite = IS_UDPLITE(sk); __wsum csum = 0; + if (up->pending == AF_INET) + return udp_push_pending_frames(sk); + + fl6 = &inet->cork.fl.u.ip6; + /* Grab the skbuff where UDP header space exists. */ if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index d3cfaf9c7a08..3696aa28784a 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -85,7 +85,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Check if there is enough headroom to insert fragment header. */ tnl_hlen = skb_tnl_header_len(skb); - if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { + if (skb_mac_header(skb) < skb->head + tnl_hlen + frag_hdr_sz) { if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) goto out; } diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index f547a47d381c..e0897377b3b4 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1823,8 +1823,6 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, if (skb->tstamp.tv64) sk->sk_stamp = skb->tstamp; - msg->msg_namelen = sizeof(*sipx); - if (sipx) { sipx->sipx_family = AF_IPX; sipx->sipx_port = ipx->ipx_source.sock; @@ -1832,6 +1830,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, sipx->sipx_network = IPX_SKB_CB(skb)->ipx_source_net; sipx->sipx_type = ipx->ipx_type; sipx->sipx_zero = 0; + msg->msg_namelen = sizeof(*sipx); } rc = copied; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 0578d4fa00a9..a5e62ef57155 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -1385,8 +1385,6 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock, IRDA_DEBUG(4, "%s()\n", __func__); - msg->msg_namelen = 0; - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); if (!skb) @@ -1451,8 +1449,6 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); - msg->msg_namelen = 0; - do { int chunk; struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index ae691651b721..276aa86f366b 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1324,8 +1324,6 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock, int err = 0; u32 offset; - msg->msg_namelen = 0; - if ((sk->sk_state == IUCV_DISCONN) && skb_queue_empty(&iucv->backlog_skb_q) && skb_queue_empty(&sk->sk_receive_queue) && diff --git a/net/key/af_key.c b/net/key/af_key.c index 9da862070dd8..66f51c5a8a3a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2081,6 +2081,7 @@ static int pfkey_xfrm_policy2msg(struct sk_buff *skb, const struct xfrm_policy * pol->sadb_x_policy_type = IPSEC_POLICY_NONE; } pol->sadb_x_policy_dir = dir+1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; pol->sadb_x_policy_priority = xp->priority; @@ -3137,7 +3138,9 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = XFRM_POLICY_OUT + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = xp->index; + pol->sadb_x_policy_priority = xp->priority; /* Set sadb_comb's. */ if (x->id.proto == IPPROTO_AH) @@ -3525,6 +3528,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY; pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC; pol->sadb_x_policy_dir = dir + 1; + pol->sadb_x_policy_reserved = 0; pol->sadb_x_policy_id = 0; pol->sadb_x_policy_priority = 0; @@ -3619,7 +3623,6 @@ static int pfkey_recvmsg(struct kiocb *kiocb, if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT)) goto out; - msg->msg_namelen = 0; skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err); if (skb == NULL) goto out; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6984c3a353cd..8c27de2b4d5a 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -115,6 +115,11 @@ struct l2tp_net { static void l2tp_session_set_header_len(struct l2tp_session *session, int version); static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); +static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) +{ + return sk->sk_user_data; +} + static inline struct l2tp_net *l2tp_pernet(struct net *net) { BUG_ON(!net); @@ -507,7 +512,7 @@ static inline int l2tp_verify_udp_checksum(struct sock *sk, return 0; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) { + if (sk->sk_family == PF_INET6 && !l2tp_tunnel(sk)->v4mapped) { if (!uh->check) { LIMIT_NETDEBUG(KERN_INFO "L2TP: IPv6: checksum is 0\n"); return 1; @@ -1071,7 +1076,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, /* Queue the packet to IP for output */ skb->local_df = 1; #if IS_ENABLED(CONFIG_IPV6) - if (skb->sk->sk_family == PF_INET6) + if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped) error = inet6_csk_xmit(skb, NULL); else #endif @@ -1198,7 +1203,7 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len /* Calculate UDP checksum if configured to do so */ #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) l2tp_xmit_ipv6_csum(sk, skb, udp_len); else #endif @@ -1247,10 +1252,9 @@ EXPORT_SYMBOL_GPL(l2tp_xmit_skb); */ static void l2tp_tunnel_destruct(struct sock *sk) { - struct l2tp_tunnel *tunnel; + struct l2tp_tunnel *tunnel = l2tp_tunnel(sk); struct l2tp_net *pn; - tunnel = sk->sk_user_data; if (tunnel == NULL) goto end; @@ -1618,7 +1622,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 } /* Check if this socket has already been prepped */ - tunnel = (struct l2tp_tunnel *)sk->sk_user_data; + tunnel = l2tp_tunnel(sk); if (tunnel != NULL) { /* This socket has already been prepped */ err = -EBUSY; @@ -1647,6 +1651,24 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 if (cfg != NULL) tunnel->debug = cfg->debug; +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == PF_INET6) { + struct ipv6_pinfo *np = inet6_sk(sk); + + if (ipv6_addr_v4mapped(&np->saddr) && + ipv6_addr_v4mapped(&np->daddr)) { + struct inet_sock *inet = inet_sk(sk); + + tunnel->v4mapped = true; + inet->inet_saddr = np->saddr.s6_addr32[3]; + inet->inet_rcv_saddr = np->rcv_saddr.s6_addr32[3]; + inet->inet_daddr = np->daddr.s6_addr32[3]; + } else { + tunnel->v4mapped = false; + } + } +#endif + /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { @@ -1655,7 +1677,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv; udp_sk(sk)->encap_destroy = l2tp_udp_encap_destroy; #if IS_ENABLED(CONFIG_IPV6) - if (sk->sk_family == PF_INET6) + if (sk->sk_family == PF_INET6 && !tunnel->v4mapped) udpv6_encap_enable(); else #endif diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 485a490fd990..2f89d43877d7 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -189,6 +189,9 @@ struct l2tp_tunnel { struct sock *sock; /* Parent socket */ int fd; /* Parent fd, if tunnel socket * was created by userspace */ +#if IS_ENABLED(CONFIG_IPV6) + bool v4mapped; +#endif struct work_struct del_work; diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 571db8dd2292..da1a1cee1a08 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -518,9 +518,6 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*sin); - skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) goto out; @@ -543,6 +540,7 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); + *addr_len = sizeof(*sin); } if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index b8a6039314e8..e6e8408c9e36 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -665,7 +665,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk, *addr_len = sizeof(*lsa); if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len); + return ipv6_recv_error(sk, msg, len, addr_len); skb = skb_recv_datagram(sk, flags, noblock, &err); if (!skb) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 8dec6876dc50..44441c0c5037 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -197,8 +197,6 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock, if (sk->sk_state & PPPOX_BOUND) goto end; - msg->msg_namelen = 0; - err = 0; skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, flags & MSG_DONTWAIT, &err); @@ -353,7 +351,9 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh goto error_put_sess_tun; } + local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); + local_bh_enable(); sock_put(ps->tunnel_sock); sock_put(sk); @@ -422,7 +422,9 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb->data[0] = ppph[0]; skb->data[1] = ppph[1]; + local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); + local_bh_enable(); sock_put(sk_tun); sock_put(sk); @@ -1793,7 +1795,8 @@ static const struct proto_ops pppol2tp_ops = { static const struct pppox_proto pppol2tp_proto = { .create = pppol2tp_create, - .ioctl = pppol2tp_ioctl + .ioctl = pppol2tp_ioctl, + .owner = THIS_MODULE, }; #ifdef CONFIG_L2TP_V3 diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 48aaa89253e0..c3ee80547066 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -715,13 +715,11 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, unsigned long cpu_flags; size_t copied = 0; u32 peek_seq = 0; - u32 *seq; + u32 *seq, skb_len; unsigned long used; int target; /* Read at least this many bytes */ long timeo; - msg->msg_namelen = 0; - lock_sock(sk); copied = -ENOTCONN; if (unlikely(sk->sk_type == SOCK_STREAM && sk->sk_state == TCP_LISTEN)) @@ -814,6 +812,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, } continue; found_ok_skb: + skb_len = skb->len; /* Ok so how much can we use? */ used = skb->len - offset; if (len < used) @@ -846,7 +845,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, } /* Partial read */ - if (used + offset < skb->len) + if (used + offset < skb_len) continue; } while (len > 0); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4fdb306e42e0..e922bf3f422c 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -652,6 +652,8 @@ static void ieee80211_get_et_stats(struct wiphy *wiphy, if (sta->sdata->dev != dev) continue; + sinfo.filled = 0; + sta_set_sinfo(sta, &sinfo); i = 0; ADD_STA_STATS(sta); } @@ -973,8 +975,10 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, IEEE80211_P2P_OPPPS_ENABLE_BIT; err = ieee80211_assign_beacon(sdata, ¶ms->beacon); - if (err < 0) + if (err < 0) { + ieee80211_vif_release_channel(sdata); return err; + } changed |= err; err = drv_start_ap(sdata->local, sdata); @@ -983,6 +987,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev, if (old) kfree_rcu(old, rcu_head); RCU_INIT_POINTER(sdata->u.ap.beacon, NULL); + ieee80211_vif_release_channel(sdata); return err; } @@ -2354,8 +2359,7 @@ static int ieee80211_set_power_mgmt(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT) + if (sdata->vif.type != NL80211_IFTYPE_STATION) return -EOPNOTSUPP; if (!(local->hw.flags & IEEE80211_HW_SUPPORTS_PS)) @@ -2475,6 +2479,24 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, INIT_DELAYED_WORK(&roc->work, ieee80211_sw_roc_work); INIT_LIST_HEAD(&roc->dependents); + /* + * cookie is either the roc cookie (for normal roc) + * or the SKB (for mgmt TX) + */ + if (!txskb) { + /* local->mtx protects this */ + local->roc_cookie_counter++; + roc->cookie = local->roc_cookie_counter; + /* wow, you wrapped 64 bits ... more likely a bug */ + if (WARN_ON(roc->cookie == 0)) { + roc->cookie = 1; + local->roc_cookie_counter++; + } + *cookie = roc->cookie; + } else { + *cookie = (unsigned long)txskb; + } + /* if there's one pending or we're scanning, queue this one */ if (!list_empty(&local->roc_list) || local->scanning || local->radar_detect_enabled) @@ -2609,24 +2631,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, if (!queued) list_add_tail(&roc->list, &local->roc_list); - /* - * cookie is either the roc cookie (for normal roc) - * or the SKB (for mgmt TX) - */ - if (!txskb) { - /* local->mtx protects this */ - local->roc_cookie_counter++; - roc->cookie = local->roc_cookie_counter; - /* wow, you wrapped 64 bits ... more likely a bug */ - if (WARN_ON(roc->cookie == 0)) { - roc->cookie = 1; - local->roc_cookie_counter++; - } - *cookie = roc->cookie; - } else { - *cookie = (unsigned long)txskb; - } - return 0; } @@ -3313,7 +3317,7 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, return -EINVAL; } band = chanctx_conf->def.chan->band; - sta = sta_info_get(sdata, peer); + sta = sta_info_get_bss(sdata, peer); if (sta) { qos = test_sta_flag(sta, WLAN_STA_WME); } else { diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 170f9a7fa319..3052672e37f7 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -1166,6 +1166,7 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state); ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED | BSS_CHANGED_IBSS); + ieee80211_vif_release_channel(sdata); synchronize_rcu(); kfree(presp); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9ca8e3278cc0..92ef04c72c51 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -842,6 +842,8 @@ struct tpt_led_trigger { * that the scan completed. * @SCAN_ABORTED: Set for our scan work function when the driver reported * a scan complete for an aborted scan. + * @SCAN_HW_CANCELLED: Set for our scan work function when the scan is being + * cancelled. */ enum { SCAN_SW_SCANNING, @@ -849,6 +851,7 @@ enum { SCAN_ONCHANNEL_SCANNING, SCAN_COMPLETED, SCAN_ABORTED, + SCAN_HW_CANCELLED, }; /** diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 98d20c0f6fed..514e90f470bf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -1726,6 +1726,15 @@ void ieee80211_remove_interfaces(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) dev_close(sdata->dev); + /* + * Close all AP_VLAN interfaces first, as otherwise they + * might be closed while the AP interface they belong to + * is closed, causing unregister_netdevice_many() to crash. + */ + list_for_each_entry(sdata, &local->interfaces, list) + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + dev_close(sdata->dev); + mutex_lock(&local->iflist_mtx); list_for_each_entry_safe(sdata, tmp, &local->interfaces, list) { list_del(&sdata->list); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 741448b30825..5b4328dcbe4e 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -31,10 +31,12 @@ #include "led.h" #define IEEE80211_AUTH_TIMEOUT (HZ / 5) +#define IEEE80211_AUTH_TIMEOUT_LONG (HZ / 2) #define IEEE80211_AUTH_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_AUTH_MAX_TRIES 3 #define IEEE80211_AUTH_WAIT_ASSOC (HZ * 5) #define IEEE80211_ASSOC_TIMEOUT (HZ / 5) +#define IEEE80211_ASSOC_TIMEOUT_LONG (HZ / 2) #define IEEE80211_ASSOC_TIMEOUT_SHORT (HZ / 10) #define IEEE80211_ASSOC_MAX_TRIES 3 @@ -237,8 +239,9 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, - struct cfg80211_chan_def *chandef, bool verbose) + struct cfg80211_chan_def *chandef, bool tracking) { + struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; struct cfg80211_chan_def vht_chandef; u32 ht_cfreq, ret; @@ -257,7 +260,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, channel->band); /* check that channel matches the right operating channel */ - if (channel->center_freq != ht_cfreq) { + if (!tracking && channel->center_freq != ht_cfreq) { /* * It's possible that some APs are confused here; * Netgear WNDR3700 sometimes reports 4 higher than @@ -265,11 +268,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, * since we look at probe response/beacon data here * it should be OK. */ - if (verbose) - sdata_info(sdata, - "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", - channel->center_freq, ht_cfreq, - ht_oper->primary_chan, channel->band); + sdata_info(sdata, + "Wrong control channel: center-freq: %d ht-cfreq: %d ht->primary_chan: %d band: %d - Disabling HT\n", + channel->center_freq, ht_cfreq, + ht_oper->primary_chan, channel->band); ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; goto out; } @@ -323,7 +325,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, channel->band); break; default: - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT operation IE has invalid channel width (%d), disable VHT\n", vht_oper->chan_width); @@ -332,7 +334,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!cfg80211_chandef_valid(&vht_chandef)) { - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information is invalid, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; @@ -345,7 +347,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, } if (!cfg80211_chandef_compatible(chandef, &vht_chandef)) { - if (verbose) + if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) sdata_info(sdata, "AP VHT information doesn't match HT, disable VHT\n"); ret = IEEE80211_STA_DISABLE_VHT; @@ -361,18 +363,27 @@ out: if (ret & IEEE80211_STA_DISABLE_VHT) vht_chandef = *chandef; + /* + * Ignore the DISABLED flag when we're already connected and only + * tracking the APs beacon for bandwidth changes - otherwise we + * might get disconnected here if we connect to an AP, update our + * regulatory information based on the AP's country IE and the + * information we have is wrong/outdated and disables the channel + * that we're actually using for the connection to the AP. + */ while (!cfg80211_chandef_usable(sdata->local->hw.wiphy, chandef, - IEEE80211_CHAN_DISABLED)) { + tracking ? 0 : + IEEE80211_CHAN_DISABLED)) { if (WARN_ON(chandef->width == NL80211_CHAN_WIDTH_20_NOHT)) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; - goto out; + break; } ret |= chandef_downgrade(chandef); } - if (chandef->width != vht_chandef.width && verbose) + if (chandef->width != vht_chandef.width && !tracking) sdata_info(sdata, "capabilities/regulatory prevented using AP HT/VHT configuration, downgraded\n"); @@ -412,7 +423,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, /* calculate new channel (type) based on HT/VHT operation IEs */ flags = ieee80211_determine_chantype(sdata, sband, chan, ht_oper, - vht_oper, &chandef, false); + vht_oper, &chandef, true); /* * Downgrade the new channel if we associated with restricted @@ -3461,10 +3472,13 @@ static int ieee80211_probe_auth(struct ieee80211_sub_if_data *sdata) if (tx_flags == 0) { auth_data->timeout = jiffies + IEEE80211_AUTH_TIMEOUT; - ifmgd->auth_data->timeout_started = true; + auth_data->timeout_started = true; run_again(ifmgd, auth_data->timeout); } else { - auth_data->timeout_started = false; + auth_data->timeout = + round_jiffies_up(jiffies + IEEE80211_AUTH_TIMEOUT_LONG); + auth_data->timeout_started = true; + run_again(ifmgd, auth_data->timeout); } return 0; @@ -3501,7 +3515,11 @@ static int ieee80211_do_assoc(struct ieee80211_sub_if_data *sdata) assoc_data->timeout_started = true; run_again(&sdata->u.mgd, assoc_data->timeout); } else { - assoc_data->timeout_started = false; + assoc_data->timeout = + round_jiffies_up(jiffies + + IEEE80211_ASSOC_TIMEOUT_LONG); + assoc_data->timeout_started = true; + run_again(&sdata->u.mgd, assoc_data->timeout); } return 0; @@ -3906,7 +3924,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, ht_oper, vht_oper, - &chandef, true); + &chandef, false); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), local->rx_chains); diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 7fc5d0d8149a..340126204343 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -99,10 +99,13 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) } mutex_unlock(&local->sta_mtx); - /* remove all interfaces */ + /* remove all interfaces that were created in the driver */ list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata) || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sdata->vif.type == NL80211_IFTYPE_MONITOR) continue; + drv_remove_interface(local, sdata); } diff --git a/net/mac80211/rc80211_minstrel.c b/net/mac80211/rc80211_minstrel.c index ac7ef5414bde..e6512e2ffd20 100644 --- a/net/mac80211/rc80211_minstrel.c +++ b/net/mac80211/rc80211_minstrel.c @@ -290,7 +290,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, struct minstrel_rate *msr, *mr; unsigned int ndx; bool mrr_capable; - bool prev_sample = mi->prev_sample; + bool prev_sample; int delta; int sampling_ratio; @@ -314,6 +314,7 @@ minstrel_get_rate(void *priv, struct ieee80211_sta *sta, (mi->sample_count + mi->sample_deferred / 2); /* delta < 0: no sampling required */ + prev_sample = mi->prev_sample; mi->prev_sample = false; if (delta < 0 || (!mrr_capable && prev_sample)) return; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5b2d3012b983..f3bbea1eb9e7 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -804,10 +804,18 @@ minstrel_ht_get_rate(void *priv, struct ieee80211_sta *sta, void *priv_sta, sample_group = &minstrel_mcs_groups[sample_idx / MCS_GROUP_RATES]; info->flags |= IEEE80211_TX_CTL_RATE_CTRL_PROBE; + rate->count = 1; + + if (sample_idx / MCS_GROUP_RATES == MINSTREL_CCK_GROUP) { + int idx = sample_idx % ARRAY_SIZE(mp->cck_rates); + rate->idx = mp->cck_rates[idx]; + rate->flags = 0; + return; + } + rate->idx = sample_idx % MCS_GROUP_RATES + (sample_group->streams - 1) * MCS_GROUP_RATES; rate->flags = IEEE80211_TX_RC_MCS | sample_group->flags; - rate->count = 1; } static void @@ -820,6 +828,9 @@ minstrel_ht_update_cck(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, if (sband->band != IEEE80211_BAND_2GHZ) return; + if (!(mp->hw->flags & IEEE80211_HW_SUPPORTS_HT_CCK_RATES)) + return; + mi->cck_supported = 0; mi->cck_supported_short = 0; for (i = 0; i < 4; i++) { diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8e2952620256..fae73b0ef14b 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -864,7 +864,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, u16 sc; u8 tid, ack_policy; - if (!ieee80211_is_data_qos(hdr->frame_control)) + if (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1)) goto dont_reorder; /* @@ -932,8 +933,14 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); - /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ - if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { + /* + * Drop duplicate 802.11 retransmissions + * (IEEE 802.11-2012: 9.3.2.10 "Duplicate detection and recovery") + */ + if (rx->skb->len >= 24 && rx->sta && + !ieee80211_is_ctl(hdr->frame_control) && + !ieee80211_is_qos_nullfunc(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1)) { if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { @@ -2996,6 +3003,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, case NL80211_IFTYPE_ADHOC: if (!bssid) return 0; + if (ether_addr_equal(sdata->vif.addr, hdr->addr2) || + ether_addr_equal(sdata->u.ibss.bssid, hdr->addr2)) + return 0; if (ieee80211_is_beacon(hdr->frame_control)) { return 1; } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 99b103921a4b..eb03337b6545 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -202,6 +202,9 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) enum ieee80211_band band; int i, ielen, n_chans; + if (test_bit(SCAN_HW_CANCELLED, &local->scanning)) + return false; + do { if (local->hw_scan_band == IEEE80211_NUM_BANDS) return false; @@ -878,7 +881,23 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) if (!local->scan_req) goto out; + /* + * We have a scan running and the driver already reported completion, + * but the worker hasn't run yet or is stuck on the mutex - mark it as + * cancelled. + */ + if (test_bit(SCAN_HW_SCANNING, &local->scanning) && + test_bit(SCAN_COMPLETED, &local->scanning)) { + set_bit(SCAN_HW_CANCELLED, &local->scanning); + goto out; + } + if (test_bit(SCAN_HW_SCANNING, &local->scanning)) { + /* + * Make sure that __ieee80211_scan_completed doesn't trigger a + * scan on another band. + */ + set_bit(SCAN_HW_CANCELLED, &local->scanning); if (local->ops->cancel_hw_scan) drv_cancel_hw_scan(local, rcu_dereference_protected(local->scan_sdata, diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 43439203f4e4..9e78206bd9bb 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -180,6 +180,9 @@ static void ieee80211_frame_acked(struct sta_info *sta, struct sk_buff *skb) struct ieee80211_local *local = sta->local; struct ieee80211_sub_if_data *sdata = sta->sdata; + if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS) + sta->last_rx = jiffies; + if (ieee80211_is_data_qos(mgmt->frame_control)) { struct ieee80211_hdr *hdr = (void *) skb->data; u8 *qc = ieee80211_get_qos_ctl(hdr); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 9972e07a2f96..fe9d6e7b904b 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -447,7 +447,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) { struct sta_info *sta = tx->sta; struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; struct ieee80211_local *local = tx->local; if (unlikely(!sta)) @@ -458,15 +457,6 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) !(info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER))) { int ac = skb_get_queue_mapping(tx->skb); - /* only deauth, disassoc and action are bufferable MMPDUs */ - if (ieee80211_is_mgmt(hdr->frame_control) && - !ieee80211_is_deauth(hdr->frame_control) && - !ieee80211_is_disassoc(hdr->frame_control) && - !ieee80211_is_action(hdr->frame_control)) { - info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; - return TX_CONTINUE; - } - ps_dbg(sta->sdata, "STA %pM aid %d: PS buffer for AC %d\n", sta->sta.addr, sta->sta.aid, ac); if (tx->local->total_ps_buffered >= TOTAL_MAX_TX_BUFFER) @@ -509,9 +499,22 @@ ieee80211_tx_h_unicast_ps_buf(struct ieee80211_tx_data *tx) static ieee80211_tx_result debug_noinline ieee80211_tx_h_ps_buf(struct ieee80211_tx_data *tx) { + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx->skb->data; + if (unlikely(tx->flags & IEEE80211_TX_PS_BUFFERED)) return TX_CONTINUE; + /* only deauth, disassoc and action are bufferable MMPDUs */ + if (ieee80211_is_mgmt(hdr->frame_control) && + !ieee80211_is_deauth(hdr->frame_control) && + !ieee80211_is_disassoc(hdr->frame_control) && + !ieee80211_is_action(hdr->frame_control)) { + if (tx->flags & IEEE80211_TX_UNICAST) + info->flags |= IEEE80211_TX_CTL_NO_PS_BUFFER; + return TX_CONTINUE; + } + if (tx->flags & IEEE80211_TX_UNICAST) return ieee80211_tx_h_unicast_ps_buf(tx); else @@ -851,7 +854,7 @@ static int ieee80211_fragment(struct ieee80211_tx_data *tx, } /* adjust first fragment's length */ - skb->len = hdrlen + per_fragm; + skb_trim(skb, hdrlen + per_fragm); return 0; } @@ -1100,7 +1103,8 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, tx->sta = rcu_dereference(sdata->u.vlan.sta); if (!tx->sta && sdata->dev->ieee80211_ptr->use_4addr) return TX_DROP; - } else if (info->flags & IEEE80211_TX_CTL_INJECTED || + } else if (info->flags & (IEEE80211_TX_CTL_INJECTED | + IEEE80211_TX_INTFL_NL80211_FRAME_TX) || tx->sdata->control_port_protocol == tx->skb->protocol) { tx->sta = sta_info_get_bss(sdata, hdr->addr1); } diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 72e6292955bb..5db8eb5d56cf 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2174,6 +2174,10 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, } rate = cfg80211_calculate_bitrate(&ri); + if (WARN_ONCE(!rate, + "Invalid bitrate: flags=0x%x, idx=%d, vht_nss=%d\n", + status->flag, status->rate_idx, status->vht_nss)) + return 0; /* rewind from end of MPDU */ if (status->flag & RX_FLAG_MACTIME_END) diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index 57beb1762b2d..707bc520d629 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -325,18 +325,22 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length) static void mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length) { - u8 i, j; - - for (i = 0; i < nets_length - 1 && h->nets[i].cidr != cidr; i++) - ; - h->nets[i].nets--; - - if (h->nets[i].nets != 0) - return; - - for (j = i; j < nets_length - 1 && h->nets[j].nets; j++) { - h->nets[j].cidr = h->nets[j + 1].cidr; - h->nets[j].nets = h->nets[j + 1].nets; + u8 i, j, net_end = nets_length - 1; + + for (i = 0; i < nets_length; i++) { + if (h->nets[i].cidr != cidr) + continue; + if (h->nets[i].nets > 1 || i == net_end || + h->nets[i + 1].nets == 0) { + h->nets[i].nets--; + return; + } + for (j = i; j < net_end && h->nets[j].nets; j++) { + h->nets[j].cidr = h->nets[j + 1].cidr; + h->nets[j].nets = h->nets[j + 1].nets; + } + h->nets[j].nets = 0; + return; } } #endif diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 23b8eb53a569..21a3a475d7cd 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -1131,12 +1131,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) ip_vs_fill_iph_skb(af, skb, &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_out_icmp_v6(skb, &related, @@ -1606,12 +1600,6 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { - if (!iph.fragoffs && skb_nfct_reasm(skb)) { - struct sk_buff *reasm = skb_nfct_reasm(skb); - /* Save fw mark for coming frags. */ - reasm->ipvs_property = 1; - reasm->mark = skb->mark; - } if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related; int verdict = ip_vs_in_icmp_v6(skb, &related, hooknum, @@ -1663,9 +1651,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) /* sorry, all this trouble for a no-hit :) */ IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); - if (iph.fragoffs && !skb_nfct_reasm(skb)) { + if (iph.fragoffs) { /* Fragment that couldn't be mapped to a conn entry - * and don't have any pointer to a reasm skb * is missing module nf_defrag_ipv6 */ IP_VS_DBG_RL("Unhandled frag, load nf_defrag_ipv6\n"); @@ -1748,38 +1735,6 @@ ip_vs_local_request4(unsigned int hooknum, struct sk_buff *skb, #ifdef CONFIG_IP_VS_IPV6 /* - * AF_INET6 fragment handling - * Copy info from first fragment, to the rest of them. - */ -static unsigned int -ip_vs_preroute_frag6(unsigned int hooknum, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm = skb_nfct_reasm(skb); - struct net *net; - - /* Skip if not a "replay" from nf_ct_frag6_output or first fragment. - * ipvs_property is set when checking first fragment - * in ip_vs_in() and ip_vs_out(). - */ - if (reasm) - IP_VS_DBG(2, "Fragment recv prop:%d\n", reasm->ipvs_property); - if (!reasm || !reasm->ipvs_property) - return NF_ACCEPT; - - net = skb_net(skb); - if (!net_ipvs(net)->enable) - return NF_ACCEPT; - - /* Copy stored fw mark, saved in ip_vs_{in,out} */ - skb->mark = reasm->mark; - - return NF_ACCEPT; -} - -/* * AF_INET6 handler in NF_INET_LOCAL_IN chain * Schedule and forward packets from remote clients */ @@ -1916,14 +1871,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .priority = 100, }, #ifdef CONFIG_IP_VS_IPV6 - /* After mangle & nat fetch 2:nd fragment and following */ - { - .hook = ip_vs_preroute_frag6, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_NAT_DST + 1, - }, /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply6, diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 9ef22bdce9f1..bed5f7042529 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -65,7 +65,6 @@ static int get_callid(const char *dptr, unsigned int dataoff, static int ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) { - struct sk_buff *reasm = skb_nfct_reasm(skb); struct ip_vs_iphdr iph; unsigned int dataoff, datalen, matchoff, matchlen; const char *dptr; @@ -79,15 +78,10 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb) /* todo: IPv6 fragments: * I think this only should be done for the first fragment. /HS */ - if (reasm) { - skb = reasm; - dataoff = iph.thoff_reasm + sizeof(struct udphdr); - } else - dataoff = iph.len + sizeof(struct udphdr); + dataoff = iph.len + sizeof(struct udphdr); if (dataoff >= skb->len) return -EINVAL; - /* todo: Check if this will mess-up the reasm skb !!! /HS */ retc = skb_linearize(skb); if (retc < 0) return retc; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index b75ff6429a04..c47444e4cf8c 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -883,7 +883,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, iph->daddr = cp->daddr.ip; iph->saddr = saddr; iph->ttl = old_iph->ttl; - ip_select_ident(iph, &rt->dst, NULL); + ip_select_ident(skb, &rt->dst, NULL); /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index bdebd03bc8cd..70866d192efc 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -778,8 +778,8 @@ static int callforward_do_filter(const union nf_inet_addr *src, flowi6_to_flowi(&fl1), false)) { if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, flowi6_to_flowi(&fl2), false)) { - if (!memcmp(&rt1->rt6i_gateway, &rt2->rt6i_gateway, - sizeof(rt1->rt6i_gateway)) && + if (ipv6_addr_equal(rt6_nexthop(rt1), + rt6_nexthop(rt2)) && rt1->dst.dev == rt2->dst.dev) ret = 1; dst_release(&rt2->dst); diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index f02b3605823e..1fb2258c3535 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -34,10 +34,14 @@ static unsigned int help(struct sk_buff *skb, struct nf_conntrack_expect *exp) { char buffer[sizeof("4294967296 65635")]; + struct nf_conn *ct = exp->master; + union nf_inet_addr newaddr; u_int16_t port; unsigned int ret; /* Reply comes from server. */ + newaddr = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; @@ -57,17 +61,35 @@ static unsigned int help(struct sk_buff *skb, } if (port == 0) { - nf_ct_helper_log(skb, exp->master, "all ports in use"); + nf_ct_helper_log(skb, ct, "all ports in use"); return NF_DROP; } - ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo, - protoff, matchoff, matchlen, buffer, - strlen(buffer)); + /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27 + * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28 + * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26 + * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26 + * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27 + * + * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits, + * 255.255.255.255==4294967296, 10 digits) + * P: bound port (min 1 d, max 5d (65635)) + * F: filename (min 1 d ) + * S: size (min 1 d ) + * 0x01, \n: terminators + */ + /* AAA = "us", ie. where server normally talks to. */ + snprintf(buffer, sizeof(buffer), "%u %u", ntohl(newaddr.ip), port); + pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n", + buffer, &newaddr.ip, port); + + ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, matchoff, + matchlen, buffer, strlen(buffer)); if (ret != NF_ACCEPT) { - nf_ct_helper_log(skb, exp->master, "cannot mangle packet"); + nf_ct_helper_log(skb, ct, "cannot mangle packet"); nf_ct_unexpect_related(exp); } + return ret; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 57ee84d21470..c9c2a8441d32 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2168,8 +2168,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, } #endif - msg->msg_namelen = 0; - copied = data_skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 2fd6dbea327a..393f17eea1a2 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -364,7 +364,7 @@ int genl_unregister_ops(struct genl_family *family, struct genl_ops *ops) EXPORT_SYMBOL(genl_unregister_ops); /** - * genl_register_family - register a generic netlink family + * __genl_register_family - register a generic netlink family * @family: generic netlink family * * Registers the specified family after validating it first. Only one @@ -374,7 +374,7 @@ EXPORT_SYMBOL(genl_unregister_ops); * * Return 0 on success or a negative error code. */ -int genl_register_family(struct genl_family *family) +int __genl_register_family(struct genl_family *family) { int err = -EINVAL; @@ -430,10 +430,10 @@ errout_locked: errout: return err; } -EXPORT_SYMBOL(genl_register_family); +EXPORT_SYMBOL(__genl_register_family); /** - * genl_register_family_with_ops - register a generic netlink family + * __genl_register_family_with_ops - register a generic netlink family * @family: generic netlink family * @ops: operations to be registered * @n_ops: number of elements to register @@ -457,12 +457,12 @@ EXPORT_SYMBOL(genl_register_family); * * Return 0 on success or a negative error code. */ -int genl_register_family_with_ops(struct genl_family *family, +int __genl_register_family_with_ops(struct genl_family *family, struct genl_ops *ops, size_t n_ops) { int err, i; - err = genl_register_family(family); + err = __genl_register_family(family); if (err) return err; @@ -476,7 +476,7 @@ err_out: genl_unregister_family(family); return err; } -EXPORT_SYMBOL(genl_register_family_with_ops); +EXPORT_SYMBOL(__genl_register_family_with_ops); /** * genl_unregister_family - unregister generic netlink family @@ -544,6 +544,30 @@ void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, } EXPORT_SYMBOL(genlmsg_put); +static int genl_lock_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct genl_ops *ops = cb->data; + int rc; + + genl_lock(); + rc = ops->dumpit(skb, cb); + genl_unlock(); + return rc; +} + +static int genl_lock_done(struct netlink_callback *cb) +{ + struct genl_ops *ops = cb->data; + int rc = 0; + + if (ops->done) { + genl_lock(); + rc = ops->done(cb); + genl_unlock(); + } + return rc; +} + static int genl_family_rcv_msg(struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh) @@ -572,15 +596,34 @@ static int genl_family_rcv_msg(struct genl_family *family, return -EPERM; if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = ops->dumpit, - .done = ops->done, - }; + int rc; if (ops->dumpit == NULL) return -EOPNOTSUPP; - return netlink_dump_start(net->genl_sock, skb, nlh, &c); + if (!family->parallel_ops) { + struct netlink_dump_control c = { + .module = family->module, + .data = ops, + .dump = genl_lock_dumpit, + .done = genl_lock_done, + }; + + genl_unlock(); + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); + genl_lock(); + + } else { + struct netlink_dump_control c = { + .module = family->module, + .dump = ops->dumpit, + .done = ops->done, + }; + + rc = __netlink_dump_start(net->genl_sock, skb, nlh, &c); + } + + return rc; } if (ops->doit == NULL) @@ -877,8 +920,10 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_MODULES if (res == NULL) { genl_unlock(); + up_read(&cb_lock); request_module("net-pf-%d-proto-%d-family-%s", PF_NETLINK, NETLINK_GENERIC, name); + down_read(&cb_lock); genl_lock(); res = genl_family_find_byname(name); } diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ec0c80fde69f..13b92982a506 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -1179,10 +1179,9 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock, sax->sax25_family = AF_NETROM; skb_copy_from_linear_data_offset(skb, 7, sax->sax25_call.ax25_call, AX25_ADDR_LEN); + msg->msg_namelen = sizeof(*sax); } - msg->msg_namelen = sizeof(*sax); - skb_free_datagram(sk, skb); release_sock(sk); diff --git a/net/nfc/llcp.h b/net/nfc/llcp.h index ff8c434f7df8..f924dd209b31 100644 --- a/net/nfc/llcp.h +++ b/net/nfc/llcp.h @@ -19,6 +19,7 @@ enum llcp_state { LLCP_CONNECTED = 1, /* wait_for_packet() wants that */ + LLCP_CONNECTING, LLCP_CLOSED, LLCP_BOUND, LLCP_LISTEN, diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index 380253eccb74..86470cf54cee 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -571,7 +571,7 @@ static unsigned int llcp_sock_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if (sock_writeable(sk)) + if (sock_writeable(sk) && sk->sk_state == LLCP_CONNECTED) mask |= POLLOUT | POLLWRNORM | POLLWRBAND; else set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -722,14 +722,16 @@ static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr, if (ret) goto sock_unlink; + sk->sk_state = LLCP_CONNECTING; + ret = sock_wait_state(sk, LLCP_CONNECTED, sock_sndtimeo(sk, flags & O_NONBLOCK)); - if (ret) + if (ret && ret != -EINPROGRESS) goto sock_unlink; release_sock(sk); - return 0; + return ret; sock_unlink: nfc_llcp_put_ssap(local, llcp_sock->ssap); @@ -798,8 +800,6 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock, pr_debug("%p %zu\n", sk, len); - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state == LLCP_CLOSED && diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c index 313bf1bc848a..5d11f4ac3ecb 100644 --- a/net/nfc/rawsock.c +++ b/net/nfc/rawsock.c @@ -241,8 +241,6 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock, if (!skb) return rc; - msg->msg_namelen = 0; - copied = skb->len; if (len < copied) { msg->msg_flags |= MSG_TRUNC; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 20a1bd0e6549..e8b5a0dfca21 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -237,6 +237,30 @@ struct packet_skb_cb { static void __fanout_unlink(struct sock *sk, struct packet_sock *po); static void __fanout_link(struct sock *sk, struct packet_sock *po); +static struct net_device *packet_cached_dev_get(struct packet_sock *po) +{ + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(po->cached_dev); + if (likely(dev)) + dev_hold(dev); + rcu_read_unlock(); + + return dev; +} + +static void packet_cached_dev_assign(struct packet_sock *po, + struct net_device *dev) +{ + rcu_assign_pointer(po->cached_dev, dev); +} + +static void packet_cached_dev_reset(struct packet_sock *po) +{ + RCU_INIT_POINTER(po->cached_dev, NULL); +} + /* register_prot_hook must be invoked with the po->bind_lock held, * or from a context in which asynchronous accesses to the packet * socket is not possible (packet_create()). @@ -244,11 +268,13 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po); static void register_prot_hook(struct sock *sk) { struct packet_sock *po = pkt_sk(sk); + if (!po->running) { if (po->fanout) __fanout_link(sk, po); else dev_add_pack(&po->prot_hook); + sock_hold(sk); po->running = 1; } @@ -266,10 +292,12 @@ static void __unregister_prot_hook(struct sock *sk, bool sync) struct packet_sock *po = pkt_sk(sk); po->running = 0; + if (po->fanout) __fanout_unlink(sk, po); else __dev_remove_pack(&po->prot_hook); + __sock_put(sk); if (sync) { @@ -432,9 +460,9 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, pkc = tx_ring ? &po->tx_ring.prb_bdqc : &po->rx_ring.prb_bdqc; - spin_lock(&rb_queue->lock); + spin_lock_bh(&rb_queue->lock); pkc->delete_blk_timer = 1; - spin_unlock(&rb_queue->lock); + spin_unlock_bh(&rb_queue->lock); prb_del_retire_blk_timer(pkc); } @@ -2046,7 +2074,6 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; int err, reserve = 0; void *ph; struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name; @@ -2058,8 +2085,8 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) mutex_lock(&po->pg_vec_lock); - if (saddr == NULL) { - dev = po->prot_hook.dev; + if (likely(saddr == NULL)) { + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2073,19 +2100,17 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; if (unlikely(dev == NULL)) goto out; - - reserve = dev->hard_header_len; - err = -ENETDOWN; if (unlikely(!(dev->flags & IFF_UP))) goto out_put; + reserve = dev->hard_header_len; + size_max = po->tx_ring.frame_size - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); @@ -2162,8 +2187,7 @@ out_status: __packet_set_status(po, ph, status); kfree_skb(skb); out_put: - if (need_rls_dev) - dev_put(dev); + dev_put(dev); out: mutex_unlock(&po->pg_vec_lock); return err; @@ -2201,7 +2225,6 @@ static int packet_snd(struct socket *sock, struct sk_buff *skb; struct net_device *dev; __be16 proto; - bool need_rls_dev = false; unsigned char *addr; int err, reserve = 0; struct virtio_net_hdr vnet_hdr = { 0 }; @@ -2216,8 +2239,8 @@ static int packet_snd(struct socket *sock, * Get and verify the address. */ - if (saddr == NULL) { - dev = po->prot_hook.dev; + if (likely(saddr == NULL)) { + dev = packet_cached_dev_get(po); proto = po->num; addr = NULL; } else { @@ -2229,19 +2252,17 @@ static int packet_snd(struct socket *sock, proto = saddr->sll_protocol; addr = saddr->sll_addr; dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); - need_rls_dev = true; } err = -ENXIO; - if (dev == NULL) + if (unlikely(dev == NULL)) goto out_unlock; - if (sock->type == SOCK_RAW) - reserve = dev->hard_header_len; - err = -ENETDOWN; - if (!(dev->flags & IFF_UP)) + if (unlikely(!(dev->flags & IFF_UP))) goto out_unlock; + if (sock->type == SOCK_RAW) + reserve = dev->hard_header_len; if (po->has_vnet_hdr) { vnet_hdr_len = sizeof(vnet_hdr); @@ -2375,15 +2396,14 @@ static int packet_snd(struct socket *sock, if (err > 0 && (err = net_xmit_errno(err)) != 0) goto out_unlock; - if (need_rls_dev) - dev_put(dev); + dev_put(dev); return len; out_free: kfree_skb(skb); out_unlock: - if (dev && need_rls_dev) + if (dev) dev_put(dev); out: return err; @@ -2428,6 +2448,8 @@ static int packet_release(struct socket *sock) spin_lock(&po->bind_lock); unregister_prot_hook(sk, false); + packet_cached_dev_reset(po); + if (po->prot_hook.dev) { dev_put(po->prot_hook.dev); po->prot_hook.dev = NULL; @@ -2483,14 +2505,17 @@ static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protoc spin_lock(&po->bind_lock); unregister_prot_hook(sk, true); + po->num = protocol; po->prot_hook.type = protocol; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); - po->prot_hook.dev = dev; + po->prot_hook.dev = dev; po->ifindex = dev ? dev->ifindex : 0; + packet_cached_dev_assign(po, dev); + if (protocol == 0) goto out_unlock; @@ -2604,6 +2629,8 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sk->sk_family = PF_PACKET; po->num = proto; + packet_cached_dev_reset(po); + sk->sk_destruct = packet_sock_destruct; sk_refcnt_debug_inc(sk); @@ -2694,7 +2721,6 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, struct sock *sk = sock->sk; struct sk_buff *skb; int copied, err; - struct sockaddr_ll *sll; int vnet_hdr_len = 0; err = -EINVAL; @@ -2777,22 +2803,10 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, goto out_free; } - /* - * If the address length field is there to be filled in, we fill - * it in now. + /* You lose any data beyond the buffer you gave. If it worries + * a user program they can ask the device for its MTU + * anyway. */ - - sll = &PACKET_SKB_CB(skb)->sa.ll; - if (sock->type == SOCK_PACKET) - msg->msg_namelen = sizeof(struct sockaddr_pkt); - else - msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr); - - /* - * You lose any data beyond the buffer you gave. If it worries a - * user program they can ask the device for its MTU anyway. - */ - copied = skb->len; if (copied > len) { copied = len; @@ -2805,9 +2819,20 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock, sock_recv_ts_and_drops(msg, sk, skb); - if (msg->msg_name) + if (msg->msg_name) { + /* If the address length field is there to be filled + * in, we fill it in now. + */ + if (sock->type == SOCK_PACKET) { + msg->msg_namelen = sizeof(struct sockaddr_pkt); + } else { + struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; + msg->msg_namelen = sll->sll_halen + + offsetof(struct sockaddr_ll, sll_addr); + } memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, msg->msg_namelen); + } if (pkt_sk(sk)->auxdata) { struct tpacket_auxdata aux; @@ -3259,9 +3284,11 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (po->tp_version == TPACKET_V3) { lv = sizeof(struct tpacket_stats_v3); + st.stats3.tp_packets += st.stats3.tp_drops; data = &st.stats3; } else { lv = sizeof(struct tpacket_stats); + st.stats1.tp_packets += st.stats1.tp_drops; data = &st.stats1; } @@ -3356,6 +3383,7 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void sk->sk_error_report(sk); } if (msg == NETDEV_UNREGISTER) { + packet_cached_dev_reset(po); po->ifindex = -1; if (po->prot_hook.dev) dev_put(po->prot_hook.dev); diff --git a/net/packet/internal.h b/net/packet/internal.h index c4e4b4561207..1035fa2d909c 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -113,6 +113,7 @@ struct packet_sock { unsigned int tp_loss:1; unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; + struct net_device __rcu *cached_dev; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 12c30f3e643e..38946b26e471 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -139,9 +139,6 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, MSG_CMSG_COMPAT)) goto out_nofree; - if (addr_len) - *addr_len = sizeof(sa); - skb = skb_recv_datagram(sk, flags, noblock, &rval); if (skb == NULL) goto out_nofree; @@ -162,8 +159,10 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk, rval = (flags & MSG_TRUNC) ? skb->len : copylen; - if (msg->msg_name != NULL) - memcpy(msg->msg_name, &sa, sizeof(struct sockaddr_pn)); + if (msg->msg_name != NULL) { + memcpy(msg->msg_name, &sa, sizeof(sa)); + *addr_len = sizeof(sa); + } out: skb_free_datagram(sk, skb); diff --git a/net/rds/ib.c b/net/rds/ib.c index b4c8b0022fee..ba2dffeff608 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -338,7 +338,8 @@ static int rds_ib_laddr_check(__be32 addr) ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin); /* due to this, we will claim to support iWARP devices unless we check node_type. */ - if (ret || cm_id->device->node_type != RDMA_NODE_IB_CA) + if (ret || !cm_id->device || + cm_id->device->node_type != RDMA_NODE_IB_CA) ret = -EADDRNOTAVAIL; rdsdebug("addr %pI4 ret %d node type %d\n", diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 8eb9501e3d60..b7ebe23cdedf 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -421,8 +421,7 @@ static void rds_ib_recv_cache_put(struct list_head *new_item, struct rds_ib_refill_cache *cache) { unsigned long flags; - struct list_head *old; - struct list_head __percpu *chpfirst; + struct list_head *old, *chpfirst; local_irq_save(flags); @@ -432,7 +431,7 @@ static void rds_ib_recv_cache_put(struct list_head *new_item, else /* put on front */ list_add_tail(new_item, chpfirst); - __this_cpu_write(chpfirst, new_item); + __this_cpu_write(cache->percpu->first, new_item); __this_cpu_inc(cache->percpu->count); if (__this_cpu_read(cache->percpu->count) < RDS_IB_RECYCLE_BATCH_COUNT) @@ -452,7 +451,7 @@ static void rds_ib_recv_cache_put(struct list_head *new_item, } while (old); - __this_cpu_write(chpfirst, NULL); + __this_cpu_write(cache->percpu->first, NULL); __this_cpu_write(cache->percpu->count, 0); end: local_irq_restore(flags); diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index e59094981175..37be6e226d1b 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -552,9 +552,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, && rm->m_inc.i_hdr.h_flags & RDS_FLAG_CONG_BITMAP) { rds_cong_map_updated(conn->c_fcong, ~(u64) 0); scat = &rm->data.op_sg[sg]; - ret = sizeof(struct rds_header) + RDS_CONG_MAP_BYTES; - ret = min_t(int, ret, scat->length - conn->c_xmit_data_off); - return ret; + ret = max_t(int, RDS_CONG_MAP_BYTES, scat->length); + return sizeof(struct rds_header) + ret; } /* FIXME we may overallocate here */ diff --git a/net/rds/recv.c b/net/rds/recv.c index 9f0f17cf6bf9..de339b24ca14 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -410,8 +410,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, rdsdebug("size %zu flags 0x%x timeo %ld\n", size, msg_flags, timeo); - msg->msg_namelen = 0; - if (msg_flags & MSG_OOB) goto out; diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9c8347451597..27e6896705e6 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -1216,7 +1216,6 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, { struct sock *sk = sock->sk; struct rose_sock *rose = rose_sk(sk); - struct sockaddr_rose *srose = (struct sockaddr_rose *)msg->msg_name; size_t copied; unsigned char *asmptr; struct sk_buff *skb; @@ -1252,24 +1251,19 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock, skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); - if (srose != NULL) { - memset(srose, 0, msg->msg_namelen); + if (msg->msg_name) { + struct sockaddr_rose *srose; + struct full_sockaddr_rose *full_srose = msg->msg_name; + + memset(msg->msg_name, 0, sizeof(struct full_sockaddr_rose)); + srose = msg->msg_name; srose->srose_family = AF_ROSE; srose->srose_addr = rose->dest_addr; srose->srose_call = rose->dest_call; srose->srose_ndigis = rose->dest_ndigis; - if (msg->msg_namelen >= sizeof(struct full_sockaddr_rose)) { - struct full_sockaddr_rose *full_srose = (struct full_sockaddr_rose *)msg->msg_name; - for (n = 0 ; n < rose->dest_ndigis ; n++) - full_srose->srose_digis[n] = rose->dest_digis[n]; - msg->msg_namelen = sizeof(struct full_sockaddr_rose); - } else { - if (rose->dest_ndigis >= 1) { - srose->srose_ndigis = 1; - srose->srose_digi = rose->dest_digis[0]; - } - msg->msg_namelen = sizeof(struct sockaddr_rose); - } + for (n = 0 ; n < rose->dest_ndigis ; n++) + full_srose->srose_digis[n] = rose->dest_digis[n]; + msg->msg_namelen = sizeof(struct full_sockaddr_rose); } skb_free_datagram(sk, skb); diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c index 4b48687c3890..898492a8d61b 100644 --- a/net/rxrpc/ar-recvmsg.c +++ b/net/rxrpc/ar-recvmsg.c @@ -143,10 +143,13 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock, /* copy the peer address and timestamp */ if (!continue_call) { - if (msg->msg_name && msg->msg_namelen > 0) + if (msg->msg_name) { + size_t len = + sizeof(call->conn->trans->peer->srx); memcpy(msg->msg_name, - &call->conn->trans->peer->srx, - sizeof(call->conn->trans->peer->srx)); + &call->conn->trans->peer->srx, len); + msg->msg_namelen = len; + } sock_recv_ts_and_drops(msg, &rx->sk, skb); } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 281c1bded1f6..51b968d3febb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -285,6 +285,45 @@ static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind) return q; } +/* The linklayer setting were not transferred from iproute2, in older + * versions, and the rate tables lookup systems have been dropped in + * the kernel. To keep backward compatible with older iproute2 tc + * utils, we detect the linklayer setting by detecting if the rate + * table were modified. + * + * For linklayer ATM table entries, the rate table will be aligned to + * 48 bytes, thus some table entries will contain the same value. The + * mpu (min packet unit) is also encoded into the old rate table, thus + * starting from the mpu, we find low and high table entries for + * mapping this cell. If these entries contain the same value, when + * the rate tables have been modified for linklayer ATM. + * + * This is done by rounding mpu to the nearest 48 bytes cell/entry, + * and then roundup to the next cell, calc the table entry one below, + * and compare. + */ +static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) +{ + int low = roundup(r->mpu, 48); + int high = roundup(low+1, 48); + int cell_low = low >> r->cell_log; + int cell_high = (high >> r->cell_log) - 1; + + /* rtab is too inaccurate at rates > 100Mbit/s */ + if ((r->rate > (100000000/8)) || (rtab[0] == 0)) { + pr_debug("TC linklayer: Giving up ATM detection\n"); + return TC_LINKLAYER_ETHERNET; + } + + if ((cell_high > cell_low) && (cell_high < 256) + && (rtab[cell_low] == rtab[cell_high])) { + pr_debug("TC linklayer: Detected ATM, low(%d)=high(%d)=%u\n", + cell_low, cell_high, rtab[cell_high]); + return TC_LINKLAYER_ATM; + } + return TC_LINKLAYER_ETHERNET; +} + static struct qdisc_rate_table *qdisc_rtab_list; struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) @@ -308,6 +347,8 @@ struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *ta rtab->rate = *r; rtab->refcnt = 1; memcpy(rtab->data, nla_data(tab), 1024); + if (r->linklayer == TC_LINKLAYER_UNAWARE) + r->linklayer = __detect_linklayer(r, rtab->data); rtab->next = qdisc_rtab_list; qdisc_rtab_list = rtab; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index ca8e0a57d945..1f9c31411f19 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -605,6 +605,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, struct sockaddr_atmpvc pvc; int state; + memset(&pvc, 0, sizeof(pvc)); pvc.sap_family = AF_ATMPVC; pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1; pvc.sap_addr.vpi = flow->vcc->vpi; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 1bc210ffcba2..8ec15988b5f3 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1465,6 +1465,7 @@ static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl) unsigned char *b = skb_tail_pointer(skb); struct tc_cbq_wrropt opt; + memset(&opt, 0, sizeof(opt)); opt.flags = 0; opt.allot = cl->allot; opt.priority = cl->priority + 1; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 20224086cc28..a7f838b45dc8 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -908,6 +908,7 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, memset(r, 0, sizeof(*r)); r->overhead = conf->overhead; r->rate_bps = (u64)conf->rate << 3; + r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK); r->mult = 1; /* * Calibrate mult, shift so that token counting is accurate diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index adaedd79389c..e09b074bb8a7 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -87,7 +87,7 @@ struct htb_class { unsigned int children; struct htb_class *parent; /* parent class */ - int prio; /* these two are used only by leaves... */ + u32 prio; /* these two are used only by leaves... */ int quantum; /* but stored for parent-to-leaf return */ union { @@ -1312,6 +1312,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)*arg, *parent; struct nlattr *opt = tca[TCA_OPTIONS]; + struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct nlattr *tb[TCA_HTB_MAX + 1]; struct tc_htb_opt *hopt; @@ -1333,6 +1334,18 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!hopt->rate.rate || !hopt->ceil.rate) goto failure; + /* Keeping backward compatible with rate_table based iproute2 tc */ + if (hopt->rate.linklayer == TC_LINKLAYER_UNAWARE) { + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB]); + if (rtab) + qdisc_put_rtab(rtab); + } + if (hopt->ceil.linklayer == TC_LINKLAYER_UNAWARE) { + ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB]); + if (ctab) + qdisc_put_rtab(ctab); + } + if (!cl) { /* new class */ struct Qdisc *new_q; int prio; @@ -1463,7 +1476,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, psched_ratecfg_precompute(&cl->ceil, &hopt->ceil); cl->buffer = PSCHED_TICKS2NS(hopt->buffer); - cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer); + cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer); sch_tree_unlock(sch); diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index d51852bba01c..57922524f0c7 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -113,7 +113,6 @@ #define FRAC_BITS 30 /* fixed point arithmetic */ #define ONE_FP (1UL << FRAC_BITS) -#define IWSUM (ONE_FP/QFQ_MAX_WSUM) #define QFQ_MTU_SHIFT 16 /* to support TSO/GSO */ #define QFQ_MIN_LMAX 512 /* see qfq_slot_insert */ @@ -189,6 +188,7 @@ struct qfq_sched { struct qfq_aggregate *in_serv_agg; /* Aggregate being served. */ u32 num_active_agg; /* Num. of active aggregates */ u32 wsum; /* weight sum */ + u32 iwsum; /* inverse weight sum */ unsigned long bitmaps[QFQ_MAX_STATE]; /* Group bitmaps. */ struct qfq_group groups[QFQ_MAX_INDEX + 1]; /* The groups. */ @@ -314,6 +314,7 @@ static void qfq_update_agg(struct qfq_sched *q, struct qfq_aggregate *agg, q->wsum += (int) agg->class_weight * (new_num_classes - agg->num_classes); + q->iwsum = ONE_FP / q->wsum; agg->num_classes = new_num_classes; } @@ -340,6 +341,10 @@ static void qfq_destroy_agg(struct qfq_sched *q, struct qfq_aggregate *agg) { if (!hlist_unhashed(&agg->nonfull_next)) hlist_del_init(&agg->nonfull_next); + q->wsum -= agg->class_weight; + if (q->wsum != 0) + q->iwsum = ONE_FP / q->wsum; + if (q->in_serv_agg == agg) q->in_serv_agg = qfq_choose_next_agg(q); kfree(agg); @@ -827,38 +832,60 @@ static void qfq_make_eligible(struct qfq_sched *q) } } - /* - * The index of the slot in which the aggregate is to be inserted must - * not be higher than QFQ_MAX_SLOTS-2. There is a '-2' and not a '-1' - * because the start time of the group may be moved backward by one - * slot after the aggregate has been inserted, and this would cause - * non-empty slots to be right-shifted by one position. + * The index of the slot in which the input aggregate agg is to be + * inserted must not be higher than QFQ_MAX_SLOTS-2. There is a '-2' + * and not a '-1' because the start time of the group may be moved + * backward by one slot after the aggregate has been inserted, and + * this would cause non-empty slots to be right-shifted by one + * position. + * + * QFQ+ fully satisfies this bound to the slot index if the parameters + * of the classes are not changed dynamically, and if QFQ+ never + * happens to postpone the service of agg unjustly, i.e., it never + * happens that the aggregate becomes backlogged and eligible, or just + * eligible, while an aggregate with a higher approximated finish time + * is being served. In particular, in this case QFQ+ guarantees that + * the timestamps of agg are low enough that the slot index is never + * higher than 2. Unfortunately, QFQ+ cannot provide the same + * guarantee if it happens to unjustly postpone the service of agg, or + * if the parameters of some class are changed. + * + * As for the first event, i.e., an out-of-order service, the + * upper bound to the slot index guaranteed by QFQ+ grows to + * 2 + + * QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * + * (current_max_weight/current_wsum) <= 2 + 8 * 128 * 1. * - * If the weight and lmax (max_pkt_size) of the classes do not change, - * then QFQ+ does meet the above contraint according to the current - * values of its parameters. In fact, if the weight and lmax of the - * classes do not change, then, from the theory, QFQ+ guarantees that - * the slot index is never higher than - * 2 + QFQ_MAX_AGG_CLASSES * ((1<<QFQ_MTU_SHIFT)/QFQ_MIN_LMAX) * - * (QFQ_MAX_WEIGHT/QFQ_MAX_WSUM) = 2 + 8 * 128 * (1 / 64) = 18 + * The following function deals with this problem by backward-shifting + * the timestamps of agg, if needed, so as to guarantee that the slot + * index is never higher than QFQ_MAX_SLOTS-2. This backward-shift may + * cause the service of other aggregates to be postponed, yet the + * worst-case guarantees of these aggregates are not violated. In + * fact, in case of no out-of-order service, the timestamps of agg + * would have been even lower than they are after the backward shift, + * because QFQ+ would have guaranteed a maximum value equal to 2 for + * the slot index, and 2 < QFQ_MAX_SLOTS-2. Hence the aggregates whose + * service is postponed because of the backward-shift would have + * however waited for the service of agg before being served. * - * When the weight of a class is increased or the lmax of the class is - * decreased, a new aggregate with smaller slot size than the original - * parent aggregate of the class may happen to be activated. The - * activation of this aggregate should be properly delayed to when the - * service of the class has finished in the ideal system tracked by - * QFQ+. If the activation of the aggregate is not delayed to this - * reference time instant, then this aggregate may be unjustly served - * before other aggregates waiting for service. This may cause the - * above bound to the slot index to be violated for some of these - * unlucky aggregates. + * The other event that may cause the slot index to be higher than 2 + * for agg is a recent change of the parameters of some class. If the + * weight of a class is increased or the lmax (max_pkt_size) of the + * class is decreased, then a new aggregate with smaller slot size + * than the original parent aggregate of the class may happen to be + * activated. The activation of this aggregate should be properly + * delayed to when the service of the class has finished in the ideal + * system tracked by QFQ+. If the activation of the aggregate is not + * delayed to this reference time instant, then this aggregate may be + * unjustly served before other aggregates waiting for service. This + * may cause the above bound to the slot index to be violated for some + * of these unlucky aggregates. * * Instead of delaying the activation of the new aggregate, which is - * quite complex, the following inaccurate but simple solution is used: - * if the slot index is higher than QFQ_MAX_SLOTS-2, then the - * timestamps of the aggregate are shifted backward so as to let the - * slot index become equal to QFQ_MAX_SLOTS-2. + * quite complex, the above-discussed capping of the slot index is + * used to handle also the consequences of a change of the parameters + * of a class. */ static void qfq_slot_insert(struct qfq_group *grp, struct qfq_aggregate *agg, u64 roundedS) @@ -1077,7 +1104,7 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch) else in_serv_agg->budget -= len; - q->V += (u64)len * IWSUM; + q->V += (u64)len * q->iwsum; pr_debug("qfq dequeue: len %u F %lld now %lld\n", len, (unsigned long long) in_serv_agg->F, (unsigned long long) q->V); diff --git a/net/sctp/input.c b/net/sctp/input.c index 4b2c83146aa7..bd4fb459c63b 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -648,8 +648,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) break; case ICMP_REDIRECT: sctp_icmp_redirect(sk, transport, skb); - err = 0; - break; + /* Fall through to out_unlock. */ default: goto out_unlock; } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 391a245d5203..422d8bdacc0d 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -189,7 +189,7 @@ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, break; case NDISC_REDIRECT: sctp_icmp_redirect(sk, transport, skb); - break; + goto out_unlock; default: break; } @@ -210,45 +210,24 @@ out: in6_dev_put(idev); } -/* Based on tcp_v6_xmit() in tcp_ipv6.c. */ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport) { struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - - fl6.flowi6_proto = sk->sk_protocol; - - /* Fill in the dest address from the route entry passed with the skb - * and the source address from the transport. - */ - fl6.daddr = transport->ipaddr.v6.sin6_addr; - fl6.saddr = transport->saddr.v6.sin6_addr; - - fl6.flowlabel = np->flow_label; - IP6_ECN_flow_xmit(sk, fl6.flowlabel); - if (ipv6_addr_type(&fl6.saddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = transport->saddr.v6.sin6_scope_id; - else - fl6.flowi6_oif = sk->sk_bound_dev_if; - - if (np->opt && np->opt->srcrt) { - struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; - fl6.daddr = *rt0->addr; - } + struct flowi6 *fl6 = &transport->fl.u.ip6; SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n", __func__, skb, skb->len, - &fl6.saddr, &fl6.daddr); + &fl6->saddr, &fl6->daddr); - SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); + IP6_ECN_flow_xmit(sk, fl6->flowlabel); if (!(transport->param_flags & SPP_PMTUD_ENABLE)) skb->local_df = 1; - return ip6_xmit(sk, skb, &fl6, np->opt, np->tclass); + SCTP_INC_STATS(sock_net(sk), SCTP_MIB_OUTSCTPPACKS); + + return ip6_xmit(sk, skb, fl6, np->opt, np->tclass); } /* Returns the dst cache entry for the given source and destination ip @@ -261,10 +240,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, struct dst_entry *dst = NULL; struct flowi6 *fl6 = &fl->u.ip6; struct sctp_bind_addr *bp; + struct ipv6_pinfo *np = inet6_sk(sk); struct sctp_sockaddr_entry *laddr; union sctp_addr *baddr = NULL; union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; + struct in6_addr *final_p, final; __u8 matchlen = 0; __u8 bmatchlen; sctp_scope_t scope; @@ -287,7 +268,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr); } - dst = ip6_dst_lookup_flow(sk, fl6, NULL, false); + final_p = fl6_update_dst(fl6, np->opt, &final); + dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); if (!asoc || saddr) goto out; @@ -339,10 +321,12 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, } } rcu_read_unlock(); + if (baddr) { fl6->saddr = baddr->v6.sin6_addr; fl6->fl6_sport = baddr->v6.sin6_port; - dst = ip6_dst_lookup_flow(sk, fl6, NULL, false); + final_p = fl6_update_dst(fl6, np->opt, &final); + dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); } out: diff --git a/net/sctp/output.c b/net/sctp/output.c index bbef4a7a9b56..0beb2f9c8a7c 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -547,7 +547,8 @@ int sctp_packet_transmit(struct sctp_packet *packet) * by CRC32-C as described in <draft-ietf-tsvwg-sctpcsum-02.txt>. */ if (!sctp_checksum_disable) { - if (!(dst->dev->features & NETIF_F_SCTP_CSUM)) { + if (!(dst->dev->features & NETIF_F_SCTP_CSUM) || + (dst_xfrm(dst) != NULL) || packet->ipfragok) { __u32 crc32 = sctp_start_cksum((__u8 *)sh, cksum_buf_len); /* 3) Put the resultant value into the checksum field in the diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6abb1caf9836..79bc251042ba 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -820,6 +820,9 @@ static int sctp_send_asconf_del_ip(struct sock *sk, goto skip_mkasconf; } + if (laddr == NULL) + return -EINVAL; + /* We do not need RCU protection throughout this loop * because this is done under a socket lock from the * setsockopt call. @@ -6193,7 +6196,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) /* Is there any exceptional events? */ if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue)) mask |= POLLERR | - sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0; + (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0); if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) diff --git a/net/socket.c b/net/socket.c index 4ca1526db756..ac72efc3d965 100644 --- a/net/socket.c +++ b/net/socket.c @@ -215,12 +215,13 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, int err; int len; + BUG_ON(klen > sizeof(struct sockaddr_storage)); err = get_user(len, ulen); if (err) return err; if (len > klen) len = klen; - if (len < 0 || len > sizeof(struct sockaddr_storage)) + if (len < 0) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) @@ -1832,8 +1833,10 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = sizeof(address); + /* Save some cycles and don't copy the address if not needed */ + msg.msg_name = addr ? (struct sockaddr *)&address : NULL; + /* We assume all kernel code knows the size of sockaddr_storage */ + msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); @@ -1956,6 +1959,16 @@ struct used_address { unsigned int name_len; }; +static int copy_msghdr_from_user(struct msghdr *kmsg, + struct msghdr __user *umsg) +{ + if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) + return -EFAULT; + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + kmsg->msg_namelen = sizeof(struct sockaddr_storage); + return 0; +} + static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, struct msghdr *msg_sys, unsigned int flags, struct used_address *used_address) @@ -1974,8 +1987,11 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; @@ -2183,8 +2199,11 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; - } else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; + } if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; @@ -2197,16 +2216,14 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, goto out; } - /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + /* Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); - if (MSG_CMSG_COMPAT & flags) { + if (MSG_CMSG_COMPAT & flags) err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); - } else + else err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; @@ -2215,6 +2232,9 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + /* We assume all kernel code knows the size of sockaddr_storage */ + msg_sys->msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index d304f41260f2..f1eb0d16666c 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -120,7 +120,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt) if (IS_ERR(clnt)) { dprintk("RPC: failed to create AF_LOCAL gssproxy " "client (errno %ld).\n", PTR_ERR(clnt)); - result = -PTR_ERR(clnt); + result = PTR_ERR(clnt); *_clnt = NULL; goto out; } @@ -213,6 +213,26 @@ static int gssp_call(struct net *net, struct rpc_message *msg) return status; } +static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg) +{ + int i; + + for (i = 0; i < arg->npages && arg->pages[i]; i++) + __free_page(arg->pages[i]); +} + +static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg) +{ + arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE); + arg->pages = kzalloc(arg->npages * sizeof(struct page *), GFP_KERNEL); + /* + * XXX: actual pages are allocated by xdr layer in + * xdr_partial_copy_from_skb. + */ + if (!arg->pages) + return -ENOMEM; + return 0; +} /* * Public functions @@ -261,10 +281,16 @@ int gssp_accept_sec_context_upcall(struct net *net, arg.context_handle = &ctxh; res.output_token->len = GSSX_max_output_token_sz; + ret = gssp_alloc_receive_pages(&arg); + if (ret) + return ret; + /* use nfs/ for targ_name ? */ ret = gssp_call(net, &msg); + gssp_free_receive_pages(&arg); + /* we need to fetch all data even in case of error so * that we can free special strctures is they have been allocated */ data->major_status = res.status.major_status; @@ -328,7 +354,6 @@ void gssp_free_upcall_data(struct gssp_upcall_data *data) kfree(data->in_handle.data); kfree(data->out_handle.data); kfree(data->out_token.data); - kfree(data->mech_oid.data); free_svc_cred(&data->creds); } diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 357f613df7ff..f0f78c5f1c7d 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -166,14 +166,15 @@ static int dummy_dec_opt_array(struct xdr_stream *xdr, return 0; } -static int get_s32(void **p, void *max, s32 *res) +static int get_host_u32(struct xdr_stream *xdr, u32 *res) { - void *base = *p; - void *next = (void *)((char *)base + sizeof(s32)); - if (unlikely(next > max || next < base)) + __be32 *p; + + p = xdr_inline_decode(xdr, 4); + if (!p) return -EINVAL; - memcpy(res, base, sizeof(s32)); - *p = next; + /* Contents of linux creds are all host-endian: */ + memcpy(res, p, sizeof(u32)); return 0; } @@ -182,9 +183,9 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, { u32 length; __be32 *p; - void *q, *end; - s32 tmp; - int N, i, err; + u32 tmp; + u32 N; + int i, err; p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) @@ -192,33 +193,28 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, length = be32_to_cpup(p); - /* FIXME: we do not want to use the scratch buffer for this one - * may need to use functions that allows us to access an io vector - * directly */ - p = xdr_inline_decode(xdr, length); - if (unlikely(p == NULL)) + if (length > (3 + NGROUPS_MAX) * sizeof(u32)) return -ENOSPC; - q = p; - end = q + length; - /* uid */ - err = get_s32(&q, end, &tmp); + err = get_host_u32(xdr, &tmp); if (err) return err; creds->cr_uid = make_kuid(&init_user_ns, tmp); /* gid */ - err = get_s32(&q, end, &tmp); + err = get_host_u32(xdr, &tmp); if (err) return err; creds->cr_gid = make_kgid(&init_user_ns, tmp); /* number of additional gid's */ - err = get_s32(&q, end, &tmp); + err = get_host_u32(xdr, &tmp); if (err) return err; N = tmp; + if ((3 + N) * sizeof(u32) != length) + return -EINVAL; creds->cr_group_info = groups_alloc(N); if (creds->cr_group_info == NULL) return -ENOMEM; @@ -226,7 +222,7 @@ static int gssx_dec_linux_creds(struct xdr_stream *xdr, /* gid's */ for (i = 0; i < N; i++) { kgid_t kgid; - err = get_s32(&q, end, &tmp); + err = get_host_u32(xdr, &tmp); if (err) goto out_free_groups; err = -EINVAL; @@ -430,7 +426,7 @@ static int dummy_enc_nameattr_array(struct xdr_stream *xdr, static int dummy_dec_nameattr_array(struct xdr_stream *xdr, struct gssx_name_attr_array *naa) { - struct gssx_name_attr dummy; + struct gssx_name_attr dummy = { .attr = {.len = 0} }; u32 count, i; __be32 *p; @@ -493,12 +489,13 @@ static int gssx_enc_name(struct xdr_stream *xdr, return err; } + static int gssx_dec_name(struct xdr_stream *xdr, struct gssx_name *name) { - struct xdr_netobj dummy_netobj; - struct gssx_name_attr_array dummy_name_attr_array; - struct gssx_option_array dummy_option_array; + struct xdr_netobj dummy_netobj = { .len = 0 }; + struct gssx_name_attr_array dummy_name_attr_array = { .count = 0 }; + struct gssx_option_array dummy_option_array = { .count = 0 }; int err; /* name->display_name */ @@ -783,6 +780,9 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req, /* arg->options */ err = dummy_enc_opt_array(xdr, &arg->options); + xdr_inline_pages(&req->rq_rcv_buf, + PAGE_SIZE/2 /* pretty arbitrary */, + arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE); done: if (err) dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err); diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h index 1c98b27d870c..685a688f3d8a 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.h +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h @@ -147,6 +147,8 @@ struct gssx_arg_accept_sec_context { struct gssx_cb *input_cb; u32 ret_deleg_cred; struct gssx_option_array options; + struct page **pages; + unsigned int npages; }; struct gssx_res_accept_sec_context { @@ -240,7 +242,8 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, 2 * GSSX_max_princ_sz + \ 8 + 8 + 4 + 4 + 4) #define GSSX_max_output_token_sz 1024 -#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4) +/* grouplist not included; we allocate separate pages for that: */ +#define GSSX_max_creds_sz (4 + 4 + 4 /* + NGROUPS_MAX*4 */) #define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \ GSSX_default_ctx_sz + \ GSSX_max_output_token_sz + \ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 5a750b9c3640..3524a8b50046 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1402,14 +1402,18 @@ call_refreshresult(struct rpc_task *task) task->tk_action = call_refresh; switch (status) { case 0: - if (rpcauth_uptodatecred(task)) + if (rpcauth_uptodatecred(task)) { task->tk_action = call_allocate; - return; + return; + } + /* Use rate-limiting and a max number of retries if refresh + * had status 0 but failed to update the cred. + */ case -ETIMEDOUT: rpc_delay(task, 3*HZ); - case -EKEYEXPIRED: case -EAGAIN: status = -EACCES; + case -EKEYEXPIRED: if (!task->tk_cred_retry) break; task->tk_cred_retry--; @@ -1644,6 +1648,10 @@ call_connect(struct rpc_task *task) task->tk_action = call_connect_status; if (task->tk_status < 0) return; + if (task->tk_flags & RPC_TASK_NOCONNECT) { + rpc_exit(task, -ENOTCONN); + return; + } xprt_connect(task); } } diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 74d948f5d5a1..779742cfc1ff 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -23,6 +23,7 @@ struct sunrpc_net { struct rpc_clnt *rpcb_local_clnt4; spinlock_t rpcb_clnt_lock; unsigned int rpcb_users; + unsigned int rpcb_is_af_local : 1; struct mutex gssp_lock; wait_queue_head_t gssp_wq; diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index 3df764dc330c..1891a1022c17 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -204,13 +204,15 @@ void rpcb_put_local(struct net *net) } static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, - struct rpc_clnt *clnt4) + struct rpc_clnt *clnt4, + bool is_af_local) { struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); /* Protected by rpcb_create_local_mutex */ sn->rpcb_local_clnt = clnt; sn->rpcb_local_clnt4 = clnt4; + sn->rpcb_is_af_local = is_af_local ? 1 : 0; smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " @@ -238,6 +240,14 @@ static int rpcb_create_local_unix(struct net *net) .program = &rpcb_program, .version = RPCBVERS_2, .authflavor = RPC_AUTH_NULL, + /* + * We turn off the idle timeout to prevent the kernel + * from automatically disconnecting the socket. + * Otherwise, we'd have to cache the mount namespace + * of the caller and somehow pass that to the socket + * reconnect code. + */ + .flags = RPC_CLNT_CREATE_NO_IDLE_TIMEOUT, }; struct rpc_clnt *clnt, *clnt4; int result = 0; @@ -263,7 +273,7 @@ static int rpcb_create_local_unix(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, true); out: return result; @@ -315,7 +325,7 @@ static int rpcb_create_local_net(struct net *net) clnt4 = NULL; } - rpcb_set_local(net, clnt, clnt4); + rpcb_set_local(net, clnt, clnt4, false); out: return result; @@ -376,13 +386,16 @@ static struct rpc_clnt *rpcb_create(struct net *net, const char *hostname, return rpc_create(&args); } -static int rpcb_register_call(struct rpc_clnt *clnt, struct rpc_message *msg) +static int rpcb_register_call(struct sunrpc_net *sn, struct rpc_clnt *clnt, struct rpc_message *msg, bool is_set) { - int result, error = 0; + int flags = RPC_TASK_NOCONNECT; + int error, result = 0; + if (is_set || !sn->rpcb_is_af_local) + flags = RPC_TASK_SOFTCONN; msg->rpc_resp = &result; - error = rpc_call_sync(clnt, msg, RPC_TASK_SOFTCONN); + error = rpc_call_sync(clnt, msg, flags); if (error < 0) { dprintk("RPC: failed to contact local rpcbind " "server (errno %d).\n", -error); @@ -439,16 +452,19 @@ int rpcb_register(struct net *net, u32 prog, u32 vers, int prot, unsigned short .rpc_argp = &map, }; struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + bool is_set = false; dprintk("RPC: %sregistering (%u, %u, %d, %u) with local " "rpcbind\n", (port ? "" : "un"), prog, vers, prot, port); msg.rpc_proc = &rpcb_procedures2[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg.rpc_proc = &rpcb_procedures2[RPCBPROC_SET]; + is_set = true; + } - return rpcb_register_call(sn->rpcb_local_clnt, &msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt, &msg, is_set); } /* @@ -461,6 +477,7 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, const struct sockaddr_in *sin = (const struct sockaddr_in *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin->sin_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -471,10 +488,12 @@ static int rpcb_register_inet4(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -489,6 +508,7 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, const struct sockaddr_in6 *sin6 = (const struct sockaddr_in6 *)sap; struct rpcbind_args *map = msg->rpc_argp; unsigned short port = ntohs(sin6->sin6_port); + bool is_set = false; int result; map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL); @@ -499,10 +519,12 @@ static int rpcb_register_inet6(struct sunrpc_net *sn, map->r_addr, map->r_netid); msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - if (port) + if (port != 0) { msg->rpc_proc = &rpcb_procedures4[RPCBPROC_SET]; + is_set = true; + } - result = rpcb_register_call(sn->rpcb_local_clnt4, msg); + result = rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, is_set); kfree(map->r_addr); return result; } @@ -519,7 +541,7 @@ static int rpcb_unregister_all_protofamilies(struct sunrpc_net *sn, map->r_addr = ""; msg->rpc_proc = &rpcb_procedures4[RPCBPROC_UNSET]; - return rpcb_register_call(sn->rpcb_local_clnt4, msg); + return rpcb_register_call(sn, sn->rpcb_local_clnt4, msg, false); } /** diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 06bdf5a1082c..1583c8a4eb7f 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -493,8 +493,6 @@ static int unix_gid_parse(struct cache_detail *cd, if (rv) return -EINVAL; uid = make_kuid(&init_user_ns, id); - if (!uid_valid(uid)) - return -EINVAL; ug.uid = uid; expiry = get_expiry(&mesg); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 0f679df7d072..305374d4fb98 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -917,7 +917,10 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk) len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { - BUG_ON(svsk->sk_pages[i] == NULL); + if (svsk->sk_pages[i] == NULL) { + WARN_ON_ONCE(1); + continue; + } put_page(svsk->sk_pages[i]); svsk->sk_pages[i] = NULL; } @@ -1092,8 +1095,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) goto err_noclose; } - if (svc_sock_reclen(svsk) < 8) + if (svsk->sk_datalen < 8) { + svsk->sk_datalen = 0; goto err_delete; /* client is nuts. */ + } rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 75edcfad6e26..1504bb11e4f3 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -207,10 +207,13 @@ _shift_data_right_pages(struct page **pages, size_t pgto_base, pgfrom_base -= copy; vto = kmap_atomic(*pgto); - vfrom = kmap_atomic(*pgfrom); - memmove(vto + pgto_base, vfrom + pgfrom_base, copy); + if (*pgto != *pgfrom) { + vfrom = kmap_atomic(*pgfrom); + memcpy(vto + pgto_base, vfrom + pgfrom_base, copy); + kunmap_atomic(vfrom); + } else + memmove(vto + pgto_base, vto + pgfrom_base, copy); flush_dcache_page(*pgto); - kunmap_atomic(vfrom); kunmap_atomic(vto); } while ((len -= copy) != 0); diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c index 8d2edddf48cf..65b146297f5a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c +++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c @@ -98,6 +98,7 @@ void svc_rdma_rcl_chunk_counts(struct rpcrdma_read_chunk *ch, */ static u32 *decode_write_list(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = @@ -113,9 +114,12 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; @@ -129,6 +133,7 @@ static u32 *decode_write_list(u32 *va, u32 *vaend) static u32 *decode_reply_array(u32 *va, u32 *vaend) { + unsigned long start, end; int nchunks; struct rpcrdma_write_array *ary = (struct rpcrdma_write_array *)va; @@ -143,9 +148,12 @@ static u32 *decode_reply_array(u32 *va, u32 *vaend) return NULL; } nchunks = ntohl(ary->wc_nchunks); - if (((unsigned long)&ary->wc_array[0] + - (sizeof(struct rpcrdma_write_chunk) * nchunks)) > - (unsigned long)vaend) { + + start = (unsigned long)&ary->wc_array[0]; + end = (unsigned long)vaend; + if (nchunks < 0 || + nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) || + (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) { dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n", ary, nchunks, vaend); return NULL; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ffd50348a509..8a0e04d0928a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -391,8 +391,10 @@ static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, return kernel_sendmsg(sock, &msg, NULL, 0, 0); } -static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more) +static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy) { + ssize_t (*do_sendpage)(struct socket *sock, struct page *page, + int offset, size_t size, int flags); struct page **ppage; unsigned int remainder; int err, sent = 0; @@ -401,6 +403,9 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i base += xdr->page_base; ppage = xdr->pages + (base >> PAGE_SHIFT); base &= ~PAGE_MASK; + do_sendpage = sock->ops->sendpage; + if (!zerocopy) + do_sendpage = sock_no_sendpage; for(;;) { unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder); int flags = XS_SENDMSG_FLAGS; @@ -408,7 +413,7 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i remainder -= len; if (remainder != 0 || more) flags |= MSG_MORE; - err = sock->ops->sendpage(sock, *ppage, base, len, flags); + err = do_sendpage(sock, *ppage, base, len, flags); if (remainder == 0 || err != len) break; sent += err; @@ -429,9 +434,10 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i * @addrlen: UDP only -- length of destination address * @xdr: buffer containing this request * @base: starting position in the buffer + * @zerocopy: true if it is safe to use sendpage() * */ -static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base) +static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy) { unsigned int remainder = xdr->len - base; int err, sent = 0; @@ -459,7 +465,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, if (base < xdr->page_len) { unsigned int len = xdr->page_len - base; remainder -= len; - err = xs_send_pagedata(sock, xdr, base, remainder != 0); + err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy); if (remainder == 0 || err != len) goto out; sent += err; @@ -562,7 +568,7 @@ static int xs_local_send_request(struct rpc_task *task) req->rq_svec->iov_base, req->rq_svec->iov_len); status = xs_sendpages(transport->sock, NULL, 0, - xdr, req->rq_bytes_sent); + xdr, req->rq_bytes_sent, true); dprintk("RPC: %s(%u) = %d\n", __func__, xdr->len - req->rq_bytes_sent, status); if (likely(status >= 0)) { @@ -618,7 +624,7 @@ static int xs_udp_send_request(struct rpc_task *task) status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen, xdr, - req->rq_bytes_sent); + req->rq_bytes_sent, true); dprintk("RPC: xs_udp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); @@ -689,6 +695,7 @@ static int xs_tcp_send_request(struct rpc_task *task) struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); struct xdr_buf *xdr = &req->rq_snd_buf; + bool zerocopy = true; int status; xs_encode_stream_record_marker(&req->rq_snd_buf); @@ -696,13 +703,20 @@ static int xs_tcp_send_request(struct rpc_task *task) xs_pktdump("packet data:", req->rq_svec->iov_base, req->rq_svec->iov_len); + /* Don't use zero copy if this is a resend. If the RPC call + * completes while the socket holds a reference to the pages, + * then we may end up resending corrupted data. + */ + if (task->tk_flags & RPC_TASK_SENT) + zerocopy = false; /* Continue transmitting the packet/record. We must be careful * to cope with writespace callbacks arriving _after_ we have * called sendmsg(). */ while (1) { status = xs_sendpages(transport->sock, - NULL, 0, xdr, req->rq_bytes_sent); + NULL, 0, xdr, req->rq_bytes_sent, + zerocopy); dprintk("RPC: xs_tcp_send_request(%u) = %d\n", xdr->len - req->rq_bytes_sent, status); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 9bc6db04be3e..e7000be321b0 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -47,12 +47,12 @@ static int net_ctl_permissions(struct ctl_table_header *head, /* Allow network administrator to have same access as root. */ if (ns_capable(net->user_ns, CAP_NET_ADMIN) || - uid_eq(root_uid, current_uid())) { + uid_eq(root_uid, current_euid())) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } /* Allow netns root group to have the same access as the root group */ - if (gid_eq(root_gid, current_gid())) { + if (in_egroup_p(root_gid)) { int mode = (table->mode >> 3) & 7; return (mode << 3) | mode; } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 515ce38e4f4c..2b1d7c2d677d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -905,9 +905,6 @@ static int recv_msg(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); restart: @@ -1017,9 +1014,6 @@ static int recv_stream(struct kiocb *iocb, struct socket *sock, goto exit; } - /* will be updated in set_orig_addr() if needed */ - m->msg_namelen = 0; - target = sock_rcvlowat(sk, flags & MSG_WAITALL, buf_len); timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); @@ -1179,7 +1173,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) /* Accept only ACK or NACK message */ if (unlikely(msg_errcode(msg))) { sock->state = SS_DISCONNECTING; - sk->sk_err = -ECONNREFUSED; + sk->sk_err = ECONNREFUSED; retval = TIPC_OK; break; } @@ -1190,7 +1184,7 @@ static u32 filter_connect(struct tipc_sock *tsock, struct sk_buff **buf) res = auto_connect(sock, msg); if (res) { sock->state = SS_DISCONNECTING; - sk->sk_err = res; + sk->sk_err = -res; retval = TIPC_OK; break; } diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 826e09938bff..3ca7927520b0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -529,13 +529,17 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); -static void unix_set_peek_off(struct sock *sk, int val) +static int unix_set_peek_off(struct sock *sk, int val) { struct unix_sock *u = unix_sk(sk); - mutex_lock(&u->readlock); + if (mutex_lock_interruptible(&u->readlock)) + return -EINTR; + sk->sk_peek_off = val; mutex_unlock(&u->readlock); + + return 0; } @@ -713,7 +717,9 @@ static int unix_autobind(struct socket *sock) int err; unsigned int retries = 0; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) + return err; err = 0; if (u->addr) @@ -872,7 +878,9 @@ static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) goto out; addr_len = err; - mutex_lock(&u->readlock); + err = mutex_lock_interruptible(&u->readlock); + if (err) + goto out; err = -EINVAL; if (u->addr) @@ -1245,6 +1253,15 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) return 0; } +static void unix_sock_inherit_flags(const struct socket *old, + struct socket *new) +{ + if (test_bit(SOCK_PASSCRED, &old->flags)) + set_bit(SOCK_PASSCRED, &new->flags); + if (test_bit(SOCK_PASSSEC, &old->flags)) + set_bit(SOCK_PASSSEC, &new->flags); +} + static int unix_accept(struct socket *sock, struct socket *newsock, int flags) { struct sock *sk = sock->sk; @@ -1279,6 +1296,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags) /* attach accepted sock to socket */ unix_state_lock(tsk); newsock->state = SS_CONNECTED; + unix_sock_inherit_flags(sock, newsock); sock_graft(tsk, newsock); unix_state_unlock(tsk); return 0; @@ -1751,7 +1769,6 @@ static void unix_copy_addr(struct msghdr *msg, struct sock *sk) { struct unix_sock *u = unix_sk(sk); - msg->msg_namelen = 0; if (u->addr) { msg->msg_namelen = u->addr->len; memcpy(msg->msg_name, u->addr->name, u->addr->len); @@ -1775,8 +1792,6 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (flags&MSG_OOB) goto out; - msg->msg_namelen = 0; - err = mutex_lock_interruptible(&u->readlock); if (err) { err = sock_intr_errno(sock_rcvtimeo(sk, noblock)); @@ -1916,8 +1931,6 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT); - msg->msg_namelen = 0; - /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ diff --git a/net/unix/diag.c b/net/unix/diag.c index d591091603bf..86fa0f3b2caf 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -124,6 +124,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r rep->udiag_family = AF_UNIX; rep->udiag_type = sk->sk_type; rep->udiag_state = sk->sk_state; + rep->pad = 0; rep->udiag_ino = sk_ino; sock_diag_save_cookie(sk, rep->udiag_cookie); diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3f77f42a3b58..9b88693bcc99 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -1670,8 +1670,6 @@ vsock_stream_recvmsg(struct kiocb *kiocb, vsk = vsock_sk(sk); err = 0; - msg->msg_namelen = 0; - lock_sock(sk); if (sk->sk_state != SS_CONNECTED) { diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index daff75200e25..62bbf7d73980 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -1746,8 +1746,6 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb, if (flags & MSG_OOB || flags & MSG_ERRQUEUE) return -EOPNOTSUPP; - msg->msg_namelen = 0; - /* Retrieve the head sk_buff from the socket's receive queue. */ err = 0; skb = skb_recv_datagram(&vsk->sk, flags, noblock, &err); diff --git a/net/wireless/core.c b/net/wireless/core.c index 73405e00c800..64fcbae020d2 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -876,6 +876,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, cfg80211_leave_mesh(rdev, dev); break; case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_P2P_GO: cfg80211_stop_ap(rdev, dev); break; default: diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index d80e47194d49..e62c1ad4e4c9 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -269,6 +269,8 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (chan->flags & IEEE80211_CHAN_DISABLED) continue; wdev->wext.ibss.chandef.chan = chan; + wdev->wext.ibss.chandef.center_freq1 = + chan->center_freq; break; } @@ -353,6 +355,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (chan) { wdev->wext.ibss.chandef.chan = chan; wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; + wdev->wext.ibss.chandef.center_freq1 = freq; wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b14b7e3cb6e6..448c034184e2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -471,10 +471,12 @@ static int nl80211_prepare_wdev_dump(struct sk_buff *skb, goto out_unlock; } *rdev = wiphy_to_dev((*wdev)->wiphy); - cb->args[0] = (*rdev)->wiphy_idx; + /* 0 is the first index - add 1 to parse only once */ + cb->args[0] = (*rdev)->wiphy_idx + 1; cb->args[1] = (*wdev)->identifier; } else { - struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0]); + /* subtract the 1 again here */ + struct wiphy *wiphy = wiphy_idx_to_wiphy(cb->args[0] - 1); struct wireless_dev *tmp; if (!wiphy) { @@ -6588,12 +6590,14 @@ EXPORT_SYMBOL(cfg80211_testmode_alloc_event_skb); void cfg80211_testmode_event(struct sk_buff *skb, gfp_t gfp) { + struct cfg80211_registered_device *rdev = ((void **)skb->cb)[0]; void *hdr = ((void **)skb->cb)[1]; struct nlattr *data = ((void **)skb->cb)[2]; nla_nest_end(skb, data); genlmsg_end(skb, hdr); - genlmsg_multicast(skb, 0, nl80211_testmode_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), skb, 0, + nl80211_testmode_mcgrp.id, gfp); } EXPORT_SYMBOL(cfg80211_testmode_event); #endif @@ -10028,7 +10032,8 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie, genlmsg_end(msg, hdr); - genlmsg_multicast(msg, 0, nl80211_mlme_mcgrp.id, gfp); + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); return; nla_put_failure: diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index 7d604c06c3dc..722da616438c 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -97,6 +97,10 @@ int ieee80211_radiotap_iterator_init( struct ieee80211_radiotap_header *radiotap_header, int max_length, const struct ieee80211_radiotap_vendor_namespaces *vns) { + /* check the radiotap header can actually be present */ + if (max_length < sizeof(struct ieee80211_radiotap_header)) + return -EINVAL; + /* Linux only supports version 0 radiotap format */ if (radiotap_header->it_version) return -EINVAL; @@ -120,6 +124,10 @@ int ieee80211_radiotap_iterator_init( /* find payload start allowing for extended bitmap(s) */ if (iterator->_bitmap_shifter & (1<<IEEE80211_RADIOTAP_EXT)) { + if ((unsigned long)iterator->_arg - + (unsigned long)iterator->_rtheader + sizeof(uint32_t) > + (unsigned long)iterator->_max_length) + return -EINVAL; while (get_unaligned_le32(iterator->_arg) & (1 << IEEE80211_RADIOTAP_EXT)) { iterator->_arg += sizeof(uint32_t); @@ -131,7 +139,8 @@ int ieee80211_radiotap_iterator_init( */ if ((unsigned long)iterator->_arg - - (unsigned long)iterator->_rtheader > + (unsigned long)iterator->_rtheader + + sizeof(uint32_t) > (unsigned long)iterator->_max_length) return -EINVAL; } diff --git a/net/wireless/scan.c b/net/wireless/scan.c index fd99ea495b7e..81019ee3ddc8 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -253,10 +253,10 @@ void __cfg80211_sched_scan_results(struct work_struct *wk) rdev = container_of(wk, struct cfg80211_registered_device, sched_scan_results_wk); - request = rdev->sched_scan_req; - mutex_lock(&rdev->sched_scan_mtx); + request = rdev->sched_scan_req; + /* we don't have sched_scan_req anymore if the scan is stopping */ if (request) { if (request->flags & NL80211_SCAN_FLAG_FLUSH) { diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 37ca9694aabe..f96af3b96322 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1340,10 +1340,9 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock, if (sx25) { sx25->sx25_family = AF_X25; sx25->sx25_addr = x25->dest_addr; + msg->msg_namelen = sizeof(*sx25); } - msg->msg_namelen = sizeof(struct sockaddr_x25); - x25_check_rbuf(sk); rc = copied; out_free_dgram: @@ -1583,11 +1582,11 @@ out_cud_release: case SIOCX25CALLACCPTAPPRV: { rc = -EINVAL; lock_sock(sk); - if (sk->sk_state != TCP_CLOSE) - break; - clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + if (sk->sk_state == TCP_CLOSE) { + clear_bit(X25_ACCPT_APPRV_FLAG, &x25->flags); + rc = 0; + } release_sock(sk); - rc = 0; break; } @@ -1595,14 +1594,15 @@ out_cud_release: rc = -EINVAL; lock_sock(sk); if (sk->sk_state != TCP_ESTABLISHED) - break; + goto out_sendcallaccpt_release; /* must call accptapprv above */ if (test_bit(X25_ACCPT_APPRV_FLAG, &x25->flags)) - break; + goto out_sendcallaccpt_release; x25_write_internal(sk, X25_CALL_ACCEPTED); x25->state = X25_STATE_3; - release_sock(sk); rc = 0; +out_sendcallaccpt_release: + release_sock(sk); break; } |