diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 157 | ||||
-rw-r--r-- | net/core/skbuff.c | 14 | ||||
-rw-r--r-- | net/core/sock.c | 3 |
3 files changed, 134 insertions, 40 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 0f9289ff0f2a..f1c1a198d4b7 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -184,6 +184,7 @@ static unsigned int napi_gen_id; static DEFINE_HASHTABLE(napi_hash, 8); static seqcount_t devnet_rename_seq; +static DEFINE_MUTEX(devnet_rename_mutex); static inline void dev_base_seq_inc(struct net *net) { @@ -205,14 +206,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex) static inline void rps_lock(struct softnet_data *sd) { #ifdef CONFIG_RPS - spin_lock(&sd->input_pkt_queue.lock); + raw_spin_lock(&sd->input_pkt_queue.raw_lock); #endif } static inline void rps_unlock(struct softnet_data *sd) { #ifdef CONFIG_RPS - spin_unlock(&sd->input_pkt_queue.lock); + raw_spin_unlock(&sd->input_pkt_queue.raw_lock); #endif } @@ -852,7 +853,8 @@ retry: strcpy(name, dev->name); rcu_read_unlock(); if (read_seqcount_retry(&devnet_rename_seq, seq)) { - cond_resched(); + mutex_lock(&devnet_rename_mutex); + mutex_unlock(&devnet_rename_mutex); goto retry; } @@ -1121,20 +1123,17 @@ int dev_change_name(struct net_device *dev, const char *newname) if (dev->flags & IFF_UP) return -EBUSY; - write_seqcount_begin(&devnet_rename_seq); + mutex_lock(&devnet_rename_mutex); + __raw_write_seqcount_begin(&devnet_rename_seq); - if (strncmp(newname, dev->name, IFNAMSIZ) == 0) { - write_seqcount_end(&devnet_rename_seq); - return 0; - } + if (strncmp(newname, dev->name, IFNAMSIZ) == 0) + goto outunlock; memcpy(oldname, dev->name, IFNAMSIZ); err = dev_get_valid_name(net, dev, newname); - if (err < 0) { - write_seqcount_end(&devnet_rename_seq); - return err; - } + if (err < 0) + goto outunlock; if (oldname[0] && !strchr(oldname, '%')) netdev_info(dev, "renamed from %s\n", oldname); @@ -1147,11 +1146,12 @@ rollback: if (ret) { memcpy(dev->name, oldname, IFNAMSIZ); dev->name_assign_type = old_assign_type; - write_seqcount_end(&devnet_rename_seq); - return ret; + err = ret; + goto outunlock; } - write_seqcount_end(&devnet_rename_seq); + __raw_write_seqcount_end(&devnet_rename_seq); + mutex_unlock(&devnet_rename_mutex); netdev_adjacent_rename_links(dev, oldname); @@ -1172,7 +1172,8 @@ rollback: /* err >= 0 after dev_alloc_name() or stores the first errno */ if (err >= 0) { err = ret; - write_seqcount_begin(&devnet_rename_seq); + mutex_lock(&devnet_rename_mutex); + __raw_write_seqcount_begin(&devnet_rename_seq); memcpy(dev->name, oldname, IFNAMSIZ); memcpy(oldname, newname, IFNAMSIZ); dev->name_assign_type = old_assign_type; @@ -1185,6 +1186,11 @@ rollback: } return err; + +outunlock: + __raw_write_seqcount_end(&devnet_rename_seq); + mutex_unlock(&devnet_rename_mutex); + return err; } /** @@ -2214,6 +2220,7 @@ static inline void __netif_reschedule(struct Qdisc *q) sd->output_queue_tailp = &q->next_sched; raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); + preempt_check_resched_rt(); } void __netif_schedule(struct Qdisc *q) @@ -2295,6 +2302,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason) __this_cpu_write(softnet_data.completion_queue, skb); raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_restore(flags); + preempt_check_resched_rt(); } EXPORT_SYMBOL(__dev_kfree_skb_irq); @@ -2820,7 +2828,11 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, * This permits __QDISC___STATE_RUNNING owner to get the lock more * often and dequeue packets faster. */ +#ifdef CONFIG_PREEMPT_RT_FULL + contended = true; +#else contended = qdisc_is_running(q); +#endif if (unlikely(contended)) spin_lock(&q->busylock); @@ -2880,9 +2892,44 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif +#ifdef CONFIG_PREEMPT_RT_FULL + +static inline int xmit_rec_read(void) +{ + return current->xmit_recursion; +} + +static inline void xmit_rec_inc(void) +{ + current->xmit_recursion++; +} + +static inline void xmit_rec_dec(void) +{ + current->xmit_recursion--; +} + +#else + DEFINE_PER_CPU(int, xmit_recursion); EXPORT_SYMBOL(xmit_recursion); +static inline int xmit_rec_read(void) +{ + return __this_cpu_read(xmit_recursion); +} + +static inline void xmit_rec_inc(void) +{ + __this_cpu_inc(xmit_recursion); +} + +static inline int xmit_rec_dec(void) +{ + __this_cpu_dec(xmit_recursion); +} +#endif + #define RECURSION_LIMIT 10 /** @@ -2984,7 +3031,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) if (txq->xmit_lock_owner != cpu) { - if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT) + if (xmit_rec_read() > RECURSION_LIMIT) goto recursion_alert; skb = validate_xmit_skb(skb, dev); @@ -2994,9 +3041,9 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { - __this_cpu_inc(xmit_recursion); + xmit_rec_inc(); skb = dev_hard_start_xmit(skb, dev, txq, &rc); - __this_cpu_dec(xmit_recursion); + xmit_rec_dec(); if (dev_xmit_complete(rc)) { HARD_TX_UNLOCK(dev, txq); goto out; @@ -3370,6 +3417,7 @@ drop: rps_unlock(sd); local_irq_restore(flags); + preempt_check_resched_rt(); atomic_long_inc(&skb->dev->rx_dropped); kfree_skb(skb); @@ -3388,7 +3436,7 @@ static int netif_rx_internal(struct sk_buff *skb) struct rps_dev_flow voidflow, *rflow = &voidflow; int cpu; - preempt_disable(); + migrate_disable(); rcu_read_lock(); cpu = get_rps_cpu(skb->dev, skb, &rflow); @@ -3398,13 +3446,13 @@ static int netif_rx_internal(struct sk_buff *skb) ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail); rcu_read_unlock(); - preempt_enable(); + migrate_enable(); } else #endif { unsigned int qtail; - ret = enqueue_to_backlog(skb, get_cpu(), &qtail); - put_cpu(); + ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail); + put_cpu_light(); } return ret; } @@ -3438,16 +3486,44 @@ int netif_rx_ni(struct sk_buff *skb) trace_netif_rx_ni_entry(skb); - preempt_disable(); + local_bh_disable(); err = netif_rx_internal(skb); - if (local_softirq_pending()) - do_softirq(); - preempt_enable(); + local_bh_enable(); return err; } EXPORT_SYMBOL(netif_rx_ni); +#ifdef CONFIG_PREEMPT_RT_FULL +/* + * RT runs ksoftirqd as a real time thread and the root_lock is a + * "sleeping spinlock". If the trylock fails then we can go into an + * infinite loop when ksoftirqd preempted the task which actually + * holds the lock, because we requeue q and raise NET_TX softirq + * causing ksoftirqd to loop forever. + * + * It's safe to use spin_lock on RT here as softirqs run in thread + * context and cannot deadlock against the thread which is holding + * root_lock. + * + * On !RT the trylock might fail, but there we bail out from the + * softirq loop after 10 attempts which we can't do on RT. And the + * task holding root_lock cannot be preempted, so the only downside of + * that trylock is that we need 10 loops to decide that we should have + * given up in the first one :) + */ +static inline int take_root_lock(spinlock_t *lock) +{ + spin_lock(lock); + return 1; +} +#else +static inline int take_root_lock(spinlock_t *lock) +{ + return spin_trylock(lock); +} +#endif + static void net_tx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -3489,7 +3565,7 @@ static void net_tx_action(struct softirq_action *h) head = head->next_sched; root_lock = qdisc_lock(q); - if (spin_trylock(root_lock)) { + if (take_root_lock(root_lock)) { smp_mb__before_atomic(); clear_bit(__QDISC_STATE_SCHED, &q->state); @@ -3886,7 +3962,7 @@ static void flush_backlog(void *arg) skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev == dev) { __skb_unlink(skb, &sd->input_pkt_queue); - kfree_skb(skb); + __skb_queue_tail(&sd->tofree_queue, skb); input_queue_head_incr(sd); } } @@ -3895,10 +3971,13 @@ static void flush_backlog(void *arg) skb_queue_walk_safe(&sd->process_queue, skb, tmp) { if (skb->dev == dev) { __skb_unlink(skb, &sd->process_queue); - kfree_skb(skb); + __skb_queue_tail(&sd->tofree_queue, skb); input_queue_head_incr(sd); } } + + if (!skb_queue_empty(&sd->tofree_queue)) + raise_softirq_irqoff(NET_RX_SOFTIRQ); } static int napi_gro_complete(struct sk_buff *skb) @@ -4350,6 +4429,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) sd->rps_ipi_list = NULL; local_irq_enable(); + preempt_check_resched_rt(); /* Send pending IPI's to kick RPS processing on remote cpus. */ while (remsd) { @@ -4363,6 +4443,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd) } else #endif local_irq_enable(); + preempt_check_resched_rt(); } static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) @@ -4444,6 +4525,7 @@ void __napi_schedule(struct napi_struct *n) local_irq_save(flags); ____napi_schedule(this_cpu_ptr(&softnet_data), n); local_irq_restore(flags); + preempt_check_resched_rt(); } EXPORT_SYMBOL(__napi_schedule); @@ -4718,7 +4800,7 @@ static void net_rx_action(struct softirq_action *h) list_splice_tail(&repoll, &list); list_splice(&list, &sd->poll_list); if (!list_empty(&sd->poll_list)) - __raise_softirq_irqoff(NET_RX_SOFTIRQ); + __raise_softirq_irqoff_ksoft(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); } @@ -6932,7 +7014,7 @@ EXPORT_SYMBOL(free_netdev); void synchronize_net(void) { might_sleep(); - if (rtnl_is_locked()) + if (rtnl_is_locked() && !IS_ENABLED(CONFIG_PREEMPT_RT_FULL)) synchronize_rcu_expedited(); else synchronize_rcu(); @@ -7173,16 +7255,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, raise_softirq_irqoff(NET_TX_SOFTIRQ); local_irq_enable(); + preempt_check_resched_rt(); /* Process offline CPU's input_pkt_queue */ while ((skb = __skb_dequeue(&oldsd->process_queue))) { netif_rx_ni(skb); input_queue_head_incr(oldsd); } - while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx_ni(skb); input_queue_head_incr(oldsd); } + while ((skb = __skb_dequeue(&oldsd->tofree_queue))) { + kfree_skb(skb); + } return NOTIFY_OK; } @@ -7484,8 +7570,9 @@ static int __init net_dev_init(void) for_each_possible_cpu(i) { struct softnet_data *sd = &per_cpu(softnet_data, i); - skb_queue_head_init(&sd->input_pkt_queue); - skb_queue_head_init(&sd->process_queue); + skb_queue_head_init_raw(&sd->input_pkt_queue); + skb_queue_head_init_raw(&sd->process_queue); + skb_queue_head_init_raw(&sd->tofree_queue); INIT_LIST_HEAD(&sd->poll_list); sd->output_queue_tailp = &sd->output_queue; #ifdef CONFIG_RPS diff --git a/net/core/skbuff.c b/net/core/skbuff.c index c9793c6c5005..d4516d7d58aa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -63,6 +63,7 @@ #include <linux/errqueue.h> #include <linux/prefetch.h> #include <linux/if_vlan.h> +#include <linux/locallock.h> #include <net/protocol.h> #include <net/dst.h> @@ -358,6 +359,8 @@ struct netdev_alloc_cache { }; static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache); +static DEFINE_LOCAL_IRQ_LOCK(netdev_alloc_lock); +static DEFINE_LOCAL_IRQ_LOCK(napi_alloc_cache_lock); static struct page *__page_frag_refill(struct netdev_alloc_cache *nc, gfp_t gfp_mask) @@ -435,9 +438,9 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) unsigned long flags; void *data; - local_irq_save(flags); + local_lock_irqsave(netdev_alloc_lock, flags); data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask); - local_irq_restore(flags); + local_unlock_irqrestore(netdev_alloc_lock, flags); return data; } @@ -456,7 +459,12 @@ EXPORT_SYMBOL(netdev_alloc_frag); static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) { - return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); + void *data; + + local_lock(napi_alloc_cache_lock); + data = __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask); + local_unlock(napi_alloc_cache_lock); + return data; } void *napi_alloc_frag(unsigned int fragsz) diff --git a/net/core/sock.c b/net/core/sock.c index 47fc8bb3b946..23a1423f78ca 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2369,12 +2369,11 @@ void lock_sock_nested(struct sock *sk, int subclass) if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; - spin_unlock(&sk->sk_lock.slock); + spin_unlock_bh(&sk->sk_lock.slock); /* * The sk_lock has mutex_lock() semantics here: */ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); - local_bh_enable(); } EXPORT_SYMBOL(lock_sock_nested); |