From f8c141c3e915e3a040d4c1badde28e23f8cbe255 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 9 Dec 2011 09:35:39 +0300 Subject: nfc: signedness bug in __nci_request() wait_for_completion_interruptible_timeout() returns -ERESTARTSYS if interrupted so completion_rc needs to be signed. The current code probably returns -ETIMEDOUT if we hit this situation, but after this patch is applied it will return -ERESTARTSYS. Signed-off-by: Dan Carpenter Signed-off-by: John W. Linville --- net/nfc/nci/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 4047e29acb3b..25dae3f8f5c2 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -68,7 +68,7 @@ static int __nci_request(struct nci_dev *ndev, __u32 timeout) { int rc = 0; - unsigned long completion_rc; + long completion_rc; ndev->req_status = NCI_REQ_PEND; -- cgit v1.2.3 From 36e999a83a4a4badd389901eb6d23a30e199b8db Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Thu, 8 Dec 2011 17:23:21 -0800 Subject: Bluetooth: Prevent uninitialized data access in L2CAP configuration When configuring an ERTM or streaming mode connection, remote devices are expected to send an RFC option in a successful config response. A misbehaving remote device might not send an RFC option, and the L2CAP code should not access uninitialized data in this case. Signed-off-by: Mat Martineau Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/l2cap_core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 5ea94a1eecf2..17b5b1cd9657 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -2152,7 +2152,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, voi void *ptr = req->data; int type, olen; unsigned long val; - struct l2cap_conf_rfc rfc; + struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; BT_DBG("chan %p, rsp %p, len %d, req %p", chan, rsp, len, data); @@ -2271,6 +2271,16 @@ static void l2cap_conf_rfc_get(struct l2cap_chan *chan, void *rsp, int len) } } + /* Use sane default values in case a misbehaving remote device + * did not send an RFC option. + */ + rfc.mode = chan->mode; + rfc.retrans_timeout = cpu_to_le16(L2CAP_DEFAULT_RETRANS_TO); + rfc.monitor_timeout = cpu_to_le16(L2CAP_DEFAULT_MONITOR_TO); + rfc.max_pdu_size = cpu_to_le16(chan->imtu); + + BT_ERR("Expected RFC option was not found, using defaults"); + done: switch (rfc.mode) { case L2CAP_MODE_ERTM: -- cgit v1.2.3 From 79e654787c67f6b05f73366ff8ccac72ba7249e6 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 6 Dec 2011 16:23:26 -0800 Subject: Bluetooth: Clear RFCOMM session timer when disconnecting last channel When the last RFCOMM data channel is closed, a timer is normally set up to disconnect the control channel at a later time. If the control channel disconnect command is sent with the timer pending, the timer needs to be cancelled. If the timer is not cancelled in this situation, the reference counting logic for the RFCOMM session does not work correctly when the remote device closes the L2CAP connection. The session is freed at the wrong time, leading to a kernel panic. Signed-off-by: Mat Martineau Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- net/bluetooth/rfcomm/core.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4e32e18211f9..2d28dfe98389 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -1146,6 +1146,7 @@ static int rfcomm_recv_ua(struct rfcomm_session *s, u8 dlci) if (list_empty(&s->dlcs)) { s->state = BT_DISCONN; rfcomm_send_disc(s, 0); + rfcomm_session_clear_timer(s); } break; -- cgit v1.2.3 From d7660918fce210f421cc58c060ca3de71e4ffd37 Mon Sep 17 00:00:00 2001 From: "Gustavo F. Padovan" Date: Sun, 18 Dec 2011 22:33:30 -0200 Subject: Revert "Bluetooth: Revert: Fix L2CAP connection establishment" This reverts commit 4dff523a913197e3314c7b0d08734ab037709093. It was reported that this patch cause issues when trying to connect to legacy devices so reverting it. Reported-by: David Fries Signed-off-by: Gustavo F. Padovan --- net/bluetooth/hci_conn.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index e0af7237cd92..c1c597e3e198 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -673,7 +673,7 @@ int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type) goto encrypt; auth: - if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) + if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) return 0; if (!hci_conn_auth(conn, sec_level, auth_type)) -- cgit v1.2.3 From 9cef310fcdee12b49b8b4c96fd8f611c8873d284 Mon Sep 17 00:00:00 2001 From: Alex Juncu Date: Thu, 15 Dec 2011 23:01:25 +0000 Subject: llc: llc_cmsg_rcv was getting called after sk_eat_skb. Received non stream protocol packets were calling llc_cmsg_rcv that used a skb after that skb was released by sk_eat_skb. This caused received STP packets to generate kernel panics. Signed-off-by: Alexandru Juncu Signed-off-by: Kunjan Naik Signed-off-by: David S. Miller --- net/llc/af_llc.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index dfd3a648a551..a18e6c3d36e3 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -833,15 +833,15 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock, copied += used; len -= used; + /* For non stream protcols we get one packet per recvmsg call */ + if (sk->sk_type != SOCK_STREAM) + goto copy_uaddr; + if (!(flags & MSG_PEEK)) { sk_eat_skb(sk, skb, 0); *seq = 0; } - /* For non stream protcols we get one packet per recvmsg call */ - if (sk->sk_type != SOCK_STREAM) - goto copy_uaddr; - /* Partial read */ if (used + offset < skb->len) continue; @@ -857,6 +857,12 @@ copy_uaddr: } if (llc_sk(sk)->cmsg_flags) llc_cmsg_rcv(msg, skb); + + if (!(flags & MSG_PEEK)) { + sk_eat_skb(sk, skb, 0); + *seq = 0; + } + goto out; } -- cgit v1.2.3 From 2692ba61a82203404abd7dd2a027bda962861f74 Mon Sep 17 00:00:00 2001 From: Xi Wang Date: Fri, 16 Dec 2011 12:44:15 +0000 Subject: sctp: fix incorrect overflow check on autoclose Commit 8ffd3208 voids the previous patches f6778aab and 810c0719 for limiting the autoclose value. If userspace passes in -1 on 32-bit platform, the overflow check didn't work and autoclose would be set to 0xffffffff. This patch defines a max_autoclose (in seconds) for limiting the value and exposes it through sysctl, with the following intentions. 1) Avoid overflowing autoclose * HZ. 2) Keep the default autoclose bound consistent across 32- and 64-bit platforms (INT_MAX / HZ in this patch). 3) Keep the autoclose value consistent between setsockopt() and getsockopt() calls. Suggested-by: Vlad Yasevich Signed-off-by: Xi Wang Signed-off-by: David S. Miller --- net/sctp/associola.c | 2 +- net/sctp/protocol.c | 3 +++ net/sctp/socket.c | 2 -- net/sctp/sysctl.c | 13 +++++++++++++ 4 files changed, 17 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 152b5b3c3fff..acd2edbc073e 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -173,7 +173,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a asoc->timeouts[SCTP_EVENT_TIMEOUT_HEARTBEAT] = 0; asoc->timeouts[SCTP_EVENT_TIMEOUT_SACK] = asoc->sackdelay; asoc->timeouts[SCTP_EVENT_TIMEOUT_AUTOCLOSE] = - (unsigned long)sp->autoclose * HZ; + min_t(unsigned long, sp->autoclose, sctp_max_autoclose) * HZ; /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 61b9fca5a173..6f6ad8686833 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1285,6 +1285,9 @@ SCTP_STATIC __init int sctp_init(void) sctp_max_instreams = SCTP_DEFAULT_INSTREAMS; sctp_max_outstreams = SCTP_DEFAULT_OUTSTREAMS; + /* Initialize maximum autoclose timeout. */ + sctp_max_autoclose = INT_MAX / HZ; + /* Initialize handle used for association ids. */ idr_init(&sctp_assocs_id); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 13bf5fcdbff1..54a7cd2fdd7a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2200,8 +2200,6 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, return -EINVAL; if (copy_from_user(&sp->autoclose, optval, optlen)) return -EFAULT; - /* make sure it won't exceed MAX_SCHEDULE_TIMEOUT */ - sp->autoclose = min_t(long, sp->autoclose, MAX_SCHEDULE_TIMEOUT / HZ); return 0; } diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 6b3952961b85..60ffbd067ff7 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -53,6 +53,10 @@ static int sack_timer_min = 1; static int sack_timer_max = 500; static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ static int rwnd_scale_max = 16; +static unsigned long max_autoclose_min = 0; +static unsigned long max_autoclose_max = + (MAX_SCHEDULE_TIMEOUT / HZ > UINT_MAX) + ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; extern long sysctl_sctp_mem[3]; extern int sysctl_sctp_rmem[3]; @@ -258,6 +262,15 @@ static ctl_table sctp_table[] = { .extra1 = &one, .extra2 = &rwnd_scale_max, }, + { + .procname = "max_autoclose", + .data = &sctp_max_autoclose, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = &proc_doulongvec_minmax, + .extra1 = &max_autoclose_min, + .extra2 = &max_autoclose_max, + }, { /* sentinel */ } }; -- cgit v1.2.3 From a76c0adf60f6ca5ff3481992e4ea0383776b24d2 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 19 Dec 2011 04:11:40 +0000 Subject: sctp: Do not account for sizeof(struct sk_buff) in estimated rwnd When checking whether a DATA chunk fits into the estimated rwnd a full sizeof(struct sk_buff) is added to the needed chunk size. This quickly exhausts the available rwnd space and leads to packets being sent which are much below the PMTU limit. This can lead to much worse performance. The reason for this behaviour was to avoid putting too much memory pressure on the receiver. The concept is not completely irational because a Linux receiver does in fact clone an skb for each DATA chunk delivered. However, Linux also reserves half the available socket buffer space for data structures therefore usage of it is already accounted for. When proposing to change this the last time it was noted that this behaviour was introduced to solve a performance issue caused by rwnd overusage in combination with small DATA chunks. Trying to reproduce this I found that with the sk_buff overhead removed, the performance would improve significantly unless socket buffer limits are increased. The following numbers have been gathered using a patched iperf supporting SCTP over a live 1 Gbit ethernet network. The -l option was used to limit DATA chunk sizes. The numbers listed are based on the average of 3 test runs each. Default values have been used for sk_(r|w)mem. Chunk Size Unpatched No Overhead ------------------------------------- 4 15.2 Kbit [!] 12.2 Mbit [!] 8 35.8 Kbit [!] 26.0 Mbit [!] 16 95.5 Kbit [!] 54.4 Mbit [!] 32 106.7 Mbit 102.3 Mbit 64 189.2 Mbit 188.3 Mbit 128 331.2 Mbit 334.8 Mbit 256 537.7 Mbit 536.0 Mbit 512 766.9 Mbit 766.6 Mbit 1024 810.1 Mbit 808.6 Mbit Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- net/sctp/output.c | 8 +------- net/sctp/outqueue.c | 6 ++---- 2 files changed, 3 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/sctp/output.c b/net/sctp/output.c index 08b3cead6503..817174eb5f41 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -697,13 +697,7 @@ static void sctp_packet_append_data(struct sctp_packet *packet, /* Keep track of how many bytes are in flight to the receiver. */ asoc->outqueue.outstanding_bytes += datasize; - /* Update our view of the receiver's rwnd. Include sk_buff overhead - * while updating peer.rwnd so that it reduces the chances of a - * receiver running out of receive buffer space even when receive - * window is still open. This can happen when a sender is sending - * sending small messages. - */ - datasize += sizeof(struct sk_buff); + /* Update our view of the receiver's rwnd. */ if (datasize < rwnd) rwnd -= datasize; else diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 14c2b06028ff..cfeb1d4a1ee6 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -411,8 +411,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, chunk->transport->flight_size -= sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); - q->asoc->peer.rwnd += (sctp_data_size(chunk) + - sizeof(struct sk_buff)); + q->asoc->peer.rwnd += sctp_data_size(chunk); } continue; } @@ -432,8 +431,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * (Section 7.2.4)), add the data size of those * chunks to the rwnd. */ - q->asoc->peer.rwnd += (sctp_data_size(chunk) + - sizeof(struct sk_buff)); + q->asoc->peer.rwnd += sctp_data_size(chunk); q->outstanding_bytes -= sctp_data_size(chunk); if (chunk->transport) transport->flight_size -= sctp_data_size(chunk); -- cgit v1.2.3 From cd7816d14953c8af910af5bb92f488b0b277e29d Mon Sep 17 00:00:00 2001 From: Gerlando Falauto Date: Mon, 19 Dec 2011 22:58:04 +0000 Subject: net: have ipconfig not wait if no dev is available previous commit 3fb72f1e6e6165c5f495e8dc11c5bbd14c73385c makes IP-Config wait for carrier on at least one network device. Before waiting (predefined value 120s), check that at least one device was successfully brought up. Otherwise (e.g. buggy bootloader which does not set the MAC address) there is no point in waiting for carrier. Cc: Micha Nelissen Cc: Holger Brunck Signed-off-by: Gerlando Falauto Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 0da2afc97f32..99ec116bef14 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -253,6 +253,10 @@ static int __init ic_open_devs(void) } } + /* no point in waiting if we could not bring up at least one device */ + if (!ic_first_dev) + goto have_carrier; + /* wait for a carrier on at least one device */ start = jiffies; while (jiffies - start < msecs_to_jiffies(CONF_CARRIER_TIMEOUT)) { -- cgit v1.2.3 From 9f28a2fc0bd77511f649c0a788c7bf9a5fd04edb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Dec 2011 15:47:16 -0500 Subject: ipv4: reintroduce route cache garbage collector Commit 2c8cec5c10b (ipv4: Cache learned PMTU information in inetpeer) removed IP route cache garbage collector a bit too soon, as this gc was responsible for expired routes cleanup, releasing their neighbour reference. As pointed out by Robert Gladewitz, recent kernels can fill and exhaust their neighbour cache. Reintroduce the garbage collection, since we'll have to wait our neighbour lookups become refcount-less to not depend on this stuff. Reported-by: Robert Gladewitz Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/route.c | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) (limited to 'net') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 46af62363b8c..252c512e8a81 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -120,6 +120,7 @@ static int ip_rt_max_size; static int ip_rt_gc_timeout __read_mostly = RT_GC_TIMEOUT; +static int ip_rt_gc_interval __read_mostly = 60 * HZ; static int ip_rt_gc_min_interval __read_mostly = HZ / 2; static int ip_rt_redirect_number __read_mostly = 9; static int ip_rt_redirect_load __read_mostly = HZ / 50; @@ -133,6 +134,9 @@ static int ip_rt_min_advmss __read_mostly = 256; static int rt_chain_length_max __read_mostly = 20; static int redirect_genid; +static struct delayed_work expires_work; +static unsigned long expires_ljiffies; + /* * Interface to generic destination cache. */ @@ -830,6 +834,97 @@ static int has_noalias(const struct rtable *head, const struct rtable *rth) return ONE; } +static void rt_check_expire(void) +{ + static unsigned int rover; + unsigned int i = rover, goal; + struct rtable *rth; + struct rtable __rcu **rthp; + unsigned long samples = 0; + unsigned long sum = 0, sum2 = 0; + unsigned long delta; + u64 mult; + + delta = jiffies - expires_ljiffies; + expires_ljiffies = jiffies; + mult = ((u64)delta) << rt_hash_log; + if (ip_rt_gc_timeout > 1) + do_div(mult, ip_rt_gc_timeout); + goal = (unsigned int)mult; + if (goal > rt_hash_mask) + goal = rt_hash_mask + 1; + for (; goal > 0; goal--) { + unsigned long tmo = ip_rt_gc_timeout; + unsigned long length; + + i = (i + 1) & rt_hash_mask; + rthp = &rt_hash_table[i].chain; + + if (need_resched()) + cond_resched(); + + samples++; + + if (rcu_dereference_raw(*rthp) == NULL) + continue; + length = 0; + spin_lock_bh(rt_hash_lock_addr(i)); + while ((rth = rcu_dereference_protected(*rthp, + lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) { + prefetch(rth->dst.rt_next); + if (rt_is_expired(rth)) { + *rthp = rth->dst.rt_next; + rt_free(rth); + continue; + } + if (rth->dst.expires) { + /* Entry is expired even if it is in use */ + if (time_before_eq(jiffies, rth->dst.expires)) { +nofree: + tmo >>= 1; + rthp = &rth->dst.rt_next; + /* + * We only count entries on + * a chain with equal hash inputs once + * so that entries for different QOS + * levels, and other non-hash input + * attributes don't unfairly skew + * the length computation + */ + length += has_noalias(rt_hash_table[i].chain, rth); + continue; + } + } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout)) + goto nofree; + + /* Cleanup aged off entries. */ + *rthp = rth->dst.rt_next; + rt_free(rth); + } + spin_unlock_bh(rt_hash_lock_addr(i)); + sum += length; + sum2 += length*length; + } + if (samples) { + unsigned long avg = sum / samples; + unsigned long sd = int_sqrt(sum2 / samples - avg*avg); + rt_chain_length_max = max_t(unsigned long, + ip_rt_gc_elasticity, + (avg + 4*sd) >> FRACT_BITS); + } + rover = i; +} + +/* + * rt_worker_func() is run in process context. + * we call rt_check_expire() to scan part of the hash table + */ +static void rt_worker_func(struct work_struct *work) +{ + rt_check_expire(); + schedule_delayed_work(&expires_work, ip_rt_gc_interval); +} + /* * Perturbation of rt_genid by a small quantity [1..256] * Using 8 bits of shuffling ensure we can call rt_cache_invalidate() @@ -3178,6 +3273,13 @@ static ctl_table ipv4_route_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "gc_interval", + .data = &ip_rt_gc_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { .procname = "redirect_load", .data = &ip_rt_redirect_load, @@ -3388,6 +3490,11 @@ int __init ip_rt_init(void) devinet_init(); ip_fib_init(); + INIT_DELAYED_WORK_DEFERRABLE(&expires_work, rt_worker_func); + expires_ljiffies = jiffies; + schedule_delayed_work(&expires_work, + net_random() % ip_rt_gc_interval + ip_rt_gc_interval); + if (ip_rt_proc_init()) printk(KERN_ERR "Unable to create route proc files\n"); #ifdef CONFIG_XFRM -- cgit v1.2.3 From c0ed1c14a72ca9ebacd51fb94a8aca488b0d361e Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 21 Dec 2011 16:48:08 -0500 Subject: net: Add a flow_cache_flush_deferred function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit flow_cach_flush() might sleep but can be called from atomic context via the xfrm garbage collector. So add a flow_cache_flush_deferred() function and use this if the xfrm garbage colector is invoked from within the packet path. Signed-off-by: Steffen Klassert Acked-by: Timo Teräs Signed-off-by: David S. Miller --- net/core/flow.c | 12 ++++++++++++ net/xfrm/xfrm_policy.c | 18 ++++++++++++++---- 2 files changed, 26 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/flow.c b/net/core/flow.c index 8ae42de9c79e..e318c7e98042 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -358,6 +358,18 @@ void flow_cache_flush(void) put_online_cpus(); } +static void flow_cache_flush_task(struct work_struct *work) +{ + flow_cache_flush(); +} + +static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task); + +void flow_cache_flush_deferred(void) +{ + schedule_work(&flow_cache_flush_work); +} + static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2118d6446630..9049a5caeb25 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2276,8 +2276,6 @@ static void __xfrm_garbage_collect(struct net *net) { struct dst_entry *head, *next; - flow_cache_flush(); - spin_lock_bh(&xfrm_policy_sk_bundle_lock); head = xfrm_policy_sk_bundles; xfrm_policy_sk_bundles = NULL; @@ -2290,6 +2288,18 @@ static void __xfrm_garbage_collect(struct net *net) } } +static void xfrm_garbage_collect(struct net *net) +{ + flow_cache_flush(); + __xfrm_garbage_collect(net); +} + +static void xfrm_garbage_collect_deferred(struct net *net) +{ + flow_cache_flush_deferred(); + __xfrm_garbage_collect(net); +} + static void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -2422,7 +2432,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) if (likely(dst_ops->neigh_lookup == NULL)) dst_ops->neigh_lookup = xfrm_neigh_lookup; if (likely(afinfo->garbage_collect == NULL)) - afinfo->garbage_collect = __xfrm_garbage_collect; + afinfo->garbage_collect = xfrm_garbage_collect_deferred; xfrm_policy_afinfo[afinfo->family] = afinfo; } write_unlock_bh(&xfrm_policy_afinfo_lock); @@ -2516,7 +2526,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void switch (event) { case NETDEV_DOWN: - __xfrm_garbage_collect(dev_net(dev)); + xfrm_garbage_collect(dev_net(dev)); } return NOTIFY_DONE; } -- cgit v1.2.3