summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c81
-rw-r--r--net/core/dev_ioctl.c1
-rw-r--r--net/core/dst.c37
-rw-r--r--net/core/ethtool.c9
-rw-r--r--net/core/rtnetlink.c44
-rw-r--r--net/core/sock.c12
6 files changed, 120 insertions, 64 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 48399d8ce614..24d243084aab 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -182,7 +182,7 @@ EXPORT_SYMBOL(dev_base_lock);
/* protects napi_hash addition/deletion and napi_gen_id */
static DEFINE_SPINLOCK(napi_hash_lock);
-static unsigned int napi_gen_id;
+static unsigned int napi_gen_id = NR_CPUS;
static DEFINE_HASHTABLE(napi_hash, 8);
static seqcount_t devnet_rename_seq;
@@ -1246,8 +1246,9 @@ int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
if (!new_ifalias)
return -ENOMEM;
dev->ifalias = new_ifalias;
+ memcpy(dev->ifalias, alias, len);
+ dev->ifalias[len] = 0;
- strlcpy(dev->ifalias, alias, len+1);
return len;
}
@@ -2549,9 +2550,10 @@ EXPORT_SYMBOL(skb_mac_gso_segment);
static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
{
if (tx_path)
- return skb->ip_summed != CHECKSUM_PARTIAL;
- else
- return skb->ip_summed == CHECKSUM_NONE;
+ return skb->ip_summed != CHECKSUM_PARTIAL &&
+ skb->ip_summed != CHECKSUM_UNNECESSARY;
+
+ return skb->ip_summed == CHECKSUM_NONE;
}
/**
@@ -2570,11 +2572,12 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
netdev_features_t features, bool tx_path)
{
+ struct sk_buff *segs;
+
if (unlikely(skb_needs_check(skb, tx_path))) {
int err;
- skb_warn_bad_offload(skb);
-
+ /* We're going to init ->check field in TCP or UDP header */
err = skb_cow_head(skb, 0);
if (err < 0)
return ERR_PTR(err);
@@ -2589,7 +2592,12 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
- return skb_mac_gso_segment(skb, features);
+ segs = skb_mac_gso_segment(skb, features);
+
+ if (unlikely(skb_needs_check(skb, tx_path)))
+ skb_warn_bad_offload(skb);
+
+ return segs;
}
EXPORT_SYMBOL(__skb_gso_segment);
@@ -3049,7 +3057,9 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
int queue_index = 0;
#ifdef CONFIG_XPS
- if (skb->sender_cpu == 0)
+ u32 sender_cpu = skb->sender_cpu - 1;
+
+ if (sender_cpu >= (u32)NR_CPUS)
skb->sender_cpu = raw_smp_processor_id() + 1;
#endif
@@ -4372,6 +4382,12 @@ struct packet_offload *gro_find_complete_by_type(__be16 type)
}
EXPORT_SYMBOL(gro_find_complete_by_type);
+static void napi_skb_free_stolen_head(struct sk_buff *skb)
+{
+ skb_dst_drop(skb);
+ kmem_cache_free(skbuff_head_cache, skb);
+}
+
static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
{
switch (ret) {
@@ -4385,12 +4401,10 @@ static gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_MERGED_FREE:
- if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD) {
- skb_dst_drop(skb);
- kmem_cache_free(skbuff_head_cache, skb);
- } else {
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
__kfree_skb(skb);
- }
break;
case GRO_HELD:
@@ -4456,10 +4470,16 @@ static gro_result_t napi_frags_finish(struct napi_struct *napi,
break;
case GRO_DROP:
- case GRO_MERGED_FREE:
napi_reuse_skb(napi, skb);
break;
+ case GRO_MERGED_FREE:
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ napi_skb_free_stolen_head(skb);
+ else
+ napi_reuse_skb(napi, skb);
+ break;
+
case GRO_MERGED:
break;
}
@@ -4726,25 +4746,22 @@ EXPORT_SYMBOL_GPL(napi_by_id);
void napi_hash_add(struct napi_struct *napi)
{
- if (!test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) {
+ if (test_and_set_bit(NAPI_STATE_HASHED, &napi->state))
+ return;
- spin_lock(&napi_hash_lock);
+ spin_lock(&napi_hash_lock);
- /* 0 is not a valid id, we also skip an id that is taken
- * we expect both events to be extremely rare
- */
- napi->napi_id = 0;
- while (!napi->napi_id) {
- napi->napi_id = ++napi_gen_id;
- if (napi_by_id(napi->napi_id))
- napi->napi_id = 0;
- }
+ /* 0..NR_CPUS+1 range is reserved for sender_cpu use */
+ do {
+ if (unlikely(++napi_gen_id < NR_CPUS + 1))
+ napi_gen_id = NR_CPUS + 1;
+ } while (napi_by_id(napi_gen_id));
+ napi->napi_id = napi_gen_id;
- hlist_add_head_rcu(&napi->napi_hash_node,
- &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
+ hlist_add_head_rcu(&napi->napi_hash_node,
+ &napi_hash[napi->napi_id % HASH_SIZE(napi_hash)]);
- spin_unlock(&napi_hash_lock);
- }
+ spin_unlock(&napi_hash_lock);
}
EXPORT_SYMBOL_GPL(napi_hash_add);
@@ -7052,8 +7069,8 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
} else {
netdev_stats_to_stats64(storage, &dev->stats);
}
- storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
- storage->tx_dropped += atomic_long_read(&dev->tx_dropped);
+ storage->rx_dropped += (unsigned long)atomic_long_read(&dev->rx_dropped);
+ storage->tx_dropped += (unsigned long)atomic_long_read(&dev->tx_dropped);
return storage;
}
EXPORT_SYMBOL(dev_get_stats);
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index b94b1d293506..151e047ce072 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -28,6 +28,7 @@ static int dev_ifname(struct net *net, struct ifreq __user *arg)
if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
return -EFAULT;
+ ifr.ifr_name[IFNAMSIZ-1] = 0;
error = netdev_get_name(net, ifr.ifr_name, ifr.ifr_ifindex);
if (error)
diff --git a/net/core/dst.c b/net/core/dst.c
index a1656e3b8d72..e72d706f8d0c 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -151,13 +151,13 @@ int dst_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(dst_discard_out);
-const u32 dst_default_metrics[RTAX_MAX + 1] = {
+const struct dst_metrics dst_default_metrics = {
/* This initializer is needed to force linker to place this variable
* into const section. Otherwise it might end into bss section.
* We really want to avoid false sharing on this variable, and catch
* any writes on it.
*/
- [RTAX_MAX] = 0xdeadbeef,
+ .refcnt = ATOMIC_INIT(1),
};
void dst_init(struct dst_entry *dst, struct dst_ops *ops,
@@ -169,7 +169,7 @@ void dst_init(struct dst_entry *dst, struct dst_ops *ops,
if (dev)
dev_hold(dev);
dst->ops = ops;
- dst_init_metrics(dst, dst_default_metrics, true);
+ dst_init_metrics(dst, dst_default_metrics.metrics, true);
dst->expires = 0UL;
dst->path = dst;
dst->from = NULL;
@@ -315,25 +315,30 @@ EXPORT_SYMBOL(dst_release);
u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old)
{
- u32 *p = kmalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
+ struct dst_metrics *p = kmalloc(sizeof(*p), GFP_ATOMIC);
if (p) {
- u32 *old_p = __DST_METRICS_PTR(old);
+ struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old);
unsigned long prev, new;
- memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+ atomic_set(&p->refcnt, 1);
+ memcpy(p->metrics, old_p->metrics, sizeof(p->metrics));
new = (unsigned long) p;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev != old) {
kfree(p);
- p = __DST_METRICS_PTR(prev);
+ p = (struct dst_metrics *)__DST_METRICS_PTR(prev);
if (prev & DST_METRICS_READ_ONLY)
p = NULL;
+ } else if (prev & DST_METRICS_REFCOUNTED) {
+ if (atomic_dec_and_test(&old_p->refcnt))
+ kfree(old_p);
}
}
- return p;
+ BUILD_BUG_ON(offsetof(struct dst_metrics, metrics) != 0);
+ return (u32 *)p;
}
EXPORT_SYMBOL(dst_cow_metrics_generic);
@@ -342,7 +347,7 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
{
unsigned long prev, new;
- new = ((unsigned long) dst_default_metrics) | DST_METRICS_READ_ONLY;
+ new = ((unsigned long) &dst_default_metrics) | DST_METRICS_READ_ONLY;
prev = cmpxchg(&dst->_metrics, old, new);
if (prev == old)
kfree(__DST_METRICS_PTR(old));
@@ -457,6 +462,20 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event,
spin_lock_bh(&dst_garbage.lock);
dst = dst_garbage.list;
dst_garbage.list = NULL;
+ /* The code in dst_ifdown places a hold on the loopback device.
+ * If the gc entry processing is set to expire after a lengthy
+ * interval, this hold can cause netdev_wait_allrefs() to hang
+ * out and wait for a long time -- until the the loopback
+ * interface is released. If we're really unlucky, it'll emit
+ * pr_emerg messages to console too. Reset the interval here,
+ * so dst cleanups occur in a more timely fashion.
+ */
+ if (dst_garbage.timer_inc > DST_GC_INC) {
+ dst_garbage.timer_inc = DST_GC_INC;
+ dst_garbage.timer_expires = DST_GC_MIN;
+ mod_delayed_work(system_wq, &dst_gc_work,
+ dst_garbage.timer_expires);
+ }
spin_unlock_bh(&dst_garbage.lock);
if (last)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 29edf74846fc..b6bca625b0d2 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -886,9 +886,12 @@ static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
if (regs.len > reglen)
regs.len = reglen;
- regbuf = vzalloc(reglen);
- if (reglen && !regbuf)
- return -ENOMEM;
+ regbuf = NULL;
+ if (reglen) {
+ regbuf = vzalloc(reglen);
+ if (!regbuf)
+ return -ENOMEM;
+ }
ops->get_regs(dev, &regs, regbuf);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe38ef58997c..5b3d611d8b5f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -897,6 +897,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+ nla_total_size(4) /* IFLA_LINK_NETNSID */
+ + nla_total_size(4) /* IFLA_GROUP */
+ nla_total_size(ext_filter_mask
& RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
+ rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1089,6 +1090,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
struct ifla_vf_mac vf_mac;
struct ifla_vf_info ivi;
+ memset(&ivi, 0, sizeof(ivi));
+
/* Not all SR-IOV capable drivers support the
* spoofcheck and "RSS query enable" query. Preset to
* -1 so the user space tool can detect that the driver
@@ -1097,7 +1100,6 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb,
ivi.spoofchk = -1;
ivi.rss_query_en = -1;
ivi.trusted = -1;
- memset(ivi.mac, 0, sizeof(ivi.mac));
/* The default value for VF link state is "auto"
* IFLA_VF_LINK_STATE_AUTO which equals zero
*/
@@ -1370,6 +1372,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
[IFLA_LINK_NETNSID] = { .type = NLA_S32 },
[IFLA_PROTO_DOWN] = { .type = NLA_U8 },
+ [IFLA_GROUP] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -1458,13 +1461,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
cb->nlh->nlmsg_seq, 0,
NLM_F_MULTI,
ext_filter_mask);
- /* If we ran out of room on the first message,
- * we're in trouble
- */
- WARN_ON((err == -EMSGSIZE) && (skb->len == 0));
- if (err < 0)
- goto out;
+ if (err < 0) {
+ if (likely(skb->len))
+ goto out;
+
+ goto out_err;
+ }
nl_dump_check_consistent(cb, nlmsg_hdr(skb));
cont:
@@ -1472,10 +1475,12 @@ cont:
}
}
out:
+ err = skb->len;
+out_err:
cb->args[1] = idx;
cb->args[0] = h;
- return skb->len;
+ return err;
}
int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len)
@@ -1737,7 +1742,8 @@ static int do_setlink(const struct sk_buff *skb,
struct sockaddr *sa;
int len;
- len = sizeof(sa_family_t) + dev->addr_len;
+ len = sizeof(sa_family_t) + max_t(size_t, dev->addr_len,
+ sizeof(*sa));
sa = kmalloc(len, GFP_KERNEL);
if (!sa) {
err = -ENOMEM;
@@ -3127,8 +3133,12 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
err = br_dev->netdev_ops->ndo_bridge_getlink(
skb, portid, seq, dev,
filter_mask, NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
@@ -3139,16 +3149,22 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb)
seq, dev,
filter_mask,
NLM_F_MULTI);
- if (err < 0 && err != -EOPNOTSUPP)
- break;
+ if (err < 0 && err != -EOPNOTSUPP) {
+ if (likely(skb->len))
+ break;
+
+ goto out_err;
+ }
}
idx++;
}
}
+ err = skb->len;
+out_err:
rcu_read_unlock();
cb->args[0] = idx;
- return skb->len;
+ return err;
}
static inline size_t bridge_nlmsg_size(void)
diff --git a/net/core/sock.c b/net/core/sock.c
index 9c708a5fb751..bd2fad27891e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1690,17 +1690,17 @@ EXPORT_SYMBOL(skb_set_owner_w);
void skb_orphan_partial(struct sk_buff *skb)
{
- /* TCP stack sets skb->ooo_okay based on sk_wmem_alloc,
- * so we do not completely orphan skb, but transfert all
- * accounted bytes but one, to avoid unexpected reorders.
- */
if (skb->destructor == sock_wfree
#ifdef CONFIG_INET
|| skb->destructor == tcp_wfree
#endif
) {
- atomic_sub(skb->truesize - 1, &skb->sk->sk_wmem_alloc);
- skb->truesize = 1;
+ struct sock *sk = skb->sk;
+
+ if (atomic_inc_not_zero(&sk->sk_refcnt)) {
+ atomic_sub(skb->truesize, &sk->sk_wmem_alloc);
+ skb->destructor = sock_efree;
+ }
} else {
skb_orphan(skb);
}