aboutsummaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c112
1 files changed, 83 insertions, 29 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index f6d8d7fe29ab..60bb6cefcc2c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -181,6 +181,7 @@ static unsigned int napi_gen_id;
static DEFINE_HASHTABLE(napi_hash, 8);
static seqcount_t devnet_rename_seq;
+static DEFINE_MUTEX(devnet_rename_mutex);
static inline void dev_base_seq_inc(struct net *net)
{
@@ -202,14 +203,14 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
static inline void rps_lock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- spin_lock(&sd->input_pkt_queue.lock);
+ raw_spin_lock(&sd->input_pkt_queue.raw_lock);
#endif
}
static inline void rps_unlock(struct softnet_data *sd)
{
#ifdef CONFIG_RPS
- spin_unlock(&sd->input_pkt_queue.lock);
+ raw_spin_unlock(&sd->input_pkt_queue.raw_lock);
#endif
}
@@ -831,7 +832,8 @@ retry:
strcpy(name, dev->name);
rcu_read_unlock();
if (read_seqcount_retry(&devnet_rename_seq, seq)) {
- cond_resched();
+ mutex_lock(&devnet_rename_mutex);
+ mutex_unlock(&devnet_rename_mutex);
goto retry;
}
@@ -1097,30 +1099,28 @@ int dev_change_name(struct net_device *dev, const char *newname)
if (dev->flags & IFF_UP)
return -EBUSY;
- write_seqcount_begin(&devnet_rename_seq);
+ mutex_lock(&devnet_rename_mutex);
+ __raw_write_seqcount_begin(&devnet_rename_seq);
- if (strncmp(newname, dev->name, IFNAMSIZ) == 0) {
- write_seqcount_end(&devnet_rename_seq);
- return 0;
- }
+ if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
+ goto outunlock;
memcpy(oldname, dev->name, IFNAMSIZ);
err = dev_get_valid_name(net, dev, newname);
- if (err < 0) {
- write_seqcount_end(&devnet_rename_seq);
- return err;
- }
+ if (err < 0)
+ goto outunlock;
rollback:
ret = device_rename(&dev->dev, dev->name);
if (ret) {
memcpy(dev->name, oldname, IFNAMSIZ);
- write_seqcount_end(&devnet_rename_seq);
- return ret;
+ err = ret;
+ goto outunlock;
}
- write_seqcount_end(&devnet_rename_seq);
+ __raw_write_seqcount_end(&devnet_rename_seq);
+ mutex_unlock(&devnet_rename_mutex);
netdev_adjacent_rename_links(dev, oldname);
@@ -1141,7 +1141,8 @@ rollback:
/* err >= 0 after dev_alloc_name() or stores the first errno */
if (err >= 0) {
err = ret;
- write_seqcount_begin(&devnet_rename_seq);
+ mutex_lock(&devnet_rename_mutex);
+ __raw_write_seqcount_begin(&devnet_rename_seq);
memcpy(dev->name, oldname, IFNAMSIZ);
memcpy(oldname, newname, IFNAMSIZ);
goto rollback;
@@ -1152,6 +1153,11 @@ rollback:
}
return err;
+
+outunlock:
+ __raw_write_seqcount_end(&devnet_rename_seq);
+ mutex_unlock(&devnet_rename_mutex);
+ return err;
}
/**
@@ -2148,6 +2154,7 @@ static inline void __netif_reschedule(struct Qdisc *q)
sd->output_queue_tailp = &q->next_sched;
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
void __netif_schedule(struct Qdisc *q)
@@ -2182,6 +2189,7 @@ void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
__this_cpu_write(softnet_data.completion_queue, skb);
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
EXPORT_SYMBOL(__dev_kfree_skb_irq);
@@ -3237,6 +3245,7 @@ enqueue:
rps_unlock(sd);
local_irq_restore(flags);
+ preempt_check_resched_rt();
atomic_long_inc(&skb->dev->rx_dropped);
kfree_skb(skb);
@@ -3259,7 +3268,7 @@ static int netif_rx_internal(struct sk_buff *skb)
struct rps_dev_flow voidflow, *rflow = &voidflow;
int cpu;
- preempt_disable();
+ migrate_disable();
rcu_read_lock();
cpu = get_rps_cpu(skb->dev, skb, &rflow);
@@ -3269,13 +3278,13 @@ static int netif_rx_internal(struct sk_buff *skb)
ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
rcu_read_unlock();
- preempt_enable();
+ migrate_enable();
} else
#endif
{
unsigned int qtail;
- ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
- put_cpu();
+ ret = enqueue_to_backlog(skb, get_cpu_light(), &qtail);
+ put_cpu_light();
}
return ret;
}
@@ -3309,16 +3318,44 @@ int netif_rx_ni(struct sk_buff *skb)
trace_netif_rx_ni_entry(skb);
- preempt_disable();
+ local_bh_disable();
err = netif_rx_internal(skb);
- if (local_softirq_pending())
- do_softirq();
- preempt_enable();
+ local_bh_enable();
return err;
}
EXPORT_SYMBOL(netif_rx_ni);
+#ifdef CONFIG_PREEMPT_RT_FULL
+/*
+ * RT runs ksoftirqd as a real time thread and the root_lock is a
+ * "sleeping spinlock". If the trylock fails then we can go into an
+ * infinite loop when ksoftirqd preempted the task which actually
+ * holds the lock, because we requeue q and raise NET_TX softirq
+ * causing ksoftirqd to loop forever.
+ *
+ * It's safe to use spin_lock on RT here as softirqs run in thread
+ * context and cannot deadlock against the thread which is holding
+ * root_lock.
+ *
+ * On !RT the trylock might fail, but there we bail out from the
+ * softirq loop after 10 attempts which we can't do on RT. And the
+ * task holding root_lock cannot be preempted, so the only downside of
+ * that trylock is that we need 10 loops to decide that we should have
+ * given up in the first one :)
+ */
+static inline int take_root_lock(spinlock_t *lock)
+{
+ spin_lock(lock);
+ return 1;
+}
+#else
+static inline int take_root_lock(spinlock_t *lock)
+{
+ return spin_trylock(lock);
+}
+#endif
+
static void net_tx_action(struct softirq_action *h)
{
struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -3360,7 +3397,7 @@ static void net_tx_action(struct softirq_action *h)
head = head->next_sched;
root_lock = qdisc_lock(q);
- if (spin_trylock(root_lock)) {
+ if (take_root_lock(root_lock)) {
smp_mb__before_clear_bit();
clear_bit(__QDISC_STATE_SCHED,
&q->state);
@@ -3758,7 +3795,7 @@ static void flush_backlog(void *arg)
skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->input_pkt_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&sd->tofree_queue, skb);
input_queue_head_incr(sd);
}
}
@@ -3767,10 +3804,13 @@ static void flush_backlog(void *arg)
skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
if (skb->dev == dev) {
__skb_unlink(skb, &sd->process_queue);
- kfree_skb(skb);
+ __skb_queue_tail(&sd->tofree_queue, skb);
input_queue_head_incr(sd);
}
}
+
+ if (!skb_queue_empty(&sd->tofree_queue))
+ raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
static int napi_gro_complete(struct sk_buff *skb)
@@ -4157,6 +4197,7 @@ static void net_rps_action_and_irq_enable(struct softnet_data *sd)
} else
#endif
local_irq_enable();
+ preempt_check_resched_rt();
}
static int process_backlog(struct napi_struct *napi, int quota)
@@ -4229,6 +4270,7 @@ void __napi_schedule(struct napi_struct *n)
local_irq_save(flags);
____napi_schedule(&__get_cpu_var(softnet_data), n);
local_irq_restore(flags);
+ preempt_check_resched_rt();
}
EXPORT_SYMBOL(__napi_schedule);
@@ -4351,10 +4393,17 @@ static void net_rx_action(struct softirq_action *h)
struct softnet_data *sd = &__get_cpu_var(softnet_data);
unsigned long time_limit = jiffies + 2;
int budget = netdev_budget;
+ struct sk_buff *skb;
void *have;
local_irq_disable();
+ while ((skb = __skb_dequeue(&sd->tofree_queue))) {
+ local_irq_enable();
+ kfree_skb(skb);
+ local_irq_disable();
+ }
+
while (!list_empty(&sd->poll_list)) {
struct napi_struct *n;
int work, weight;
@@ -6830,6 +6879,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
raise_softirq_irqoff(NET_TX_SOFTIRQ);
local_irq_enable();
+ preempt_check_resched_rt();
/* Process offline CPU's input_pkt_queue */
while ((skb = __skb_dequeue(&oldsd->process_queue))) {
@@ -6840,6 +6890,9 @@ static int dev_cpu_callback(struct notifier_block *nfb,
netif_rx_internal(skb);
input_queue_head_incr(oldsd);
}
+ while ((skb = __skb_dequeue(&oldsd->tofree_queue))) {
+ kfree_skb(skb);
+ }
return NOTIFY_OK;
}
@@ -7149,8 +7202,9 @@ static int __init net_dev_init(void)
for_each_possible_cpu(i) {
struct softnet_data *sd = &per_cpu(softnet_data, i);
- skb_queue_head_init(&sd->input_pkt_queue);
- skb_queue_head_init(&sd->process_queue);
+ skb_queue_head_init_raw(&sd->input_pkt_queue);
+ skb_queue_head_init_raw(&sd->process_queue);
+ skb_queue_head_init_raw(&sd->tofree_queue);
INIT_LIST_HEAD(&sd->poll_list);
sd->output_queue_tailp = &sd->output_queue;
#ifdef CONFIG_RPS