diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/ceph/osd_client.c | 31 | ||||
-rw-r--r-- | net/core/dev.c | 4 | ||||
-rw-r--r-- | net/core/skbuff.c | 30 | ||||
-rw-r--r-- | net/core/sock.c | 19 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 66 | ||||
-rw-r--r-- | net/ipv6/ip6_output.c | 3 | ||||
-rw-r--r-- | net/ipv6/ndisc.c | 9 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 2 | ||||
-rw-r--r-- | net/mac80211/wep.c | 6 | ||||
-rw-r--r-- | net/netfilter/nf_conntrack_proto_generic.c | 26 | ||||
-rw-r--r-- | net/netlink/af_netlink.c | 6 | ||||
-rw-r--r-- | net/socket.c | 24 | ||||
-rw-r--r-- | net/sunrpc/auth_gss/gss_rpc_xdr.c | 23 |
16 files changed, 189 insertions, 72 deletions
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 45f077c60348..5b7ef5bb90ed 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1932,20 +1932,29 @@ static void kick_requests(struct ceph_osd_client *osdc, bool force_resend, err = __map_request(osdc, req, force_resend || force_resend_writes); dout("__map_request returned %d\n", err); - if (err == 0) - continue; /* no change and no osd was specified */ if (err < 0) continue; /* hrm! */ - if (req->r_osd == NULL) { - dout("tid %llu maps to no valid osd\n", req->r_tid); - needmap++; /* request a newer map */ - continue; - } + if (req->r_osd == NULL || err > 0) { + if (req->r_osd == NULL) { + dout("lingering %p tid %llu maps to no osd\n", + req, req->r_tid); + /* + * A homeless lingering request makes + * no sense, as it's job is to keep + * a particular OSD connection open. + * Request a newer map and kick the + * request, knowing that it won't be + * resent until we actually get a map + * that can tell us where to send it. + */ + needmap++; + } - dout("kicking lingering %p tid %llu osd%d\n", req, req->r_tid, - req->r_osd ? req->r_osd->o_osd : -1); - __register_request(osdc, req); - __unregister_linger_request(osdc, req); + dout("kicking lingering %p tid %llu osd%d\n", req, + req->r_tid, req->r_osd ? req->r_osd->o_osd : -1); + __register_request(osdc, req); + __unregister_linger_request(osdc, req); + } } reset_changed_osds(osdc); mutex_unlock(&osdc->request_mutex); diff --git a/net/core/dev.c b/net/core/dev.c index f6d8d7fe29ab..73abbd77d72c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2775,7 +2775,9 @@ static void skb_update_prio(struct sk_buff *skb) #define skb_update_prio(skb) #endif -static DEFINE_PER_CPU(int, xmit_recursion); +DEFINE_PER_CPU(int, xmit_recursion); +EXPORT_SYMBOL(xmit_recursion); + #define RECURSION_LIMIT 10 /** diff --git a/net/core/skbuff.c b/net/core/skbuff.c index e2b1bba69882..69ec61abfb37 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -278,13 +278,14 @@ nodata: EXPORT_SYMBOL(__alloc_skb); /** - * build_skb - build a network buffer + * __build_skb - build a network buffer * @data: data buffer provided by caller - * @frag_size: size of fragment, or 0 if head was kmalloced + * @frag_size: size of data, or 0 if head was kmalloced * * Allocate a new &sk_buff. Caller provides space holding head and * skb_shared_info. @data must have been allocated by kmalloc() only if - * @frag_size is 0, otherwise data should come from the page allocator. + * @frag_size is 0, otherwise data should come from the page allocator + * or vmalloc() * The return is the new skb buffer. * On a failure the return is %NULL, and @data is not freed. * Notes : @@ -295,7 +296,7 @@ EXPORT_SYMBOL(__alloc_skb); * before giving packet to stack. * RX rings only contains data buffers, not full skbs. */ -struct sk_buff *build_skb(void *data, unsigned int frag_size) +struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct skb_shared_info *shinfo; struct sk_buff *skb; @@ -309,7 +310,6 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) memset(skb, 0, offsetof(struct sk_buff, tail)); skb->truesize = SKB_TRUESIZE(size); - skb->head_frag = frag_size != 0; atomic_set(&skb->users, 1); skb->head = data; skb->data = data; @@ -326,6 +326,23 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) return skb; } + +/* build_skb() is wrapper over __build_skb(), that specifically + * takes care of skb->head and skb->pfmemalloc + * This means that if @frag_size is not zero, then @data must be backed + * by a page fragment, not kmalloc() or vmalloc() + */ +struct sk_buff *build_skb(void *data, unsigned int frag_size) +{ + struct sk_buff *skb = __build_skb(data, frag_size); + + if (skb && frag_size) { + skb->head_frag = 1; + if (virt_to_head_page(data)->pfmemalloc) + skb->pfmemalloc = 1; + } + return skb; +} EXPORT_SYMBOL(build_skb); struct netdev_alloc_cache { @@ -352,7 +369,8 @@ refill: gfp_t gfp = gfp_mask; if (order) - gfp |= __GFP_COMP | __GFP_NOWARN; + gfp |= __GFP_COMP | __GFP_NOWARN | + __GFP_NOMEMALLOC; nc->frag.page = alloc_pages(gfp, order); if (likely(nc->frag.page)) break; diff --git a/net/core/sock.c b/net/core/sock.c index c8069561bdb7..650dd58ebd05 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -659,6 +659,25 @@ static inline void sock_valbool_flag(struct sock *sk, int bit, int valbool) sock_reset_flag(sk, bit); } +bool sk_mc_loop(struct sock *sk) +{ + if (dev_recursion_level()) + return false; + if (!sk) + return true; + switch (sk->sk_family) { + case AF_INET: + return inet_sk(sk)->mc_loop; +#if IS_ENABLED(CONFIG_IPV6) + case AF_INET6: + return inet6_sk(sk)->mc_loop; +#endif + } + WARN_ON(1); + return true; +} +EXPORT_SYMBOL(sk_mc_loop); + /* * This is meant for all protocols to use and covers goings on * at the socket level. Everything here is generic. diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index ecb34b5ea42f..57075c4508f7 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -127,6 +127,9 @@ int ip_forward(struct sk_buff *skb) struct rtable *rt; /* Route we use */ struct ip_options *opt = &(IPCB(skb)->opt); + if (unlikely(skb->sk)) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b4842383aa2d..d3d2c0abbad7 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3065,10 +3065,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (seq_rtt < 0) { seq_rtt = ca_seq_rtt; } - if (!(sacked & TCPCB_SACKED_ACKED)) + if (!(sacked & TCPCB_SACKED_ACKED)) { reord = min(pkts_acked, reord); - if (!after(scb->end_seq, tp->high_seq)) - flag |= FLAG_ORIG_SACK_ACKED; + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; + } } if (sacked & TCPCB_SACKED_ACKED) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3530ad85c47c..ed04620a35ed 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1876,7 +1876,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = ACCESS_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bbc40bb84373..ecd60bd9d616 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2595,39 +2595,65 @@ begin_fwd: } } -/* Send a fin. The caller locks the socket for us. This cannot be - * allowed to fail queueing a FIN frame under any circumstances. +/* We allow to exceed memory limits for FIN packets to expedite + * connection tear down and (memory) recovery. + * Otherwise tcp_send_fin() could be tempted to either delay FIN + * or even be forced to close flow without any FIN. + */ +static void sk_forced_wmem_schedule(struct sock *sk, int size) +{ + int amt, status; + + if (size <= sk->sk_forward_alloc) + return; + amt = sk_mem_pages(size); + sk->sk_forward_alloc += amt * SK_MEM_QUANTUM; + sk_memory_allocated_add(sk, amt, &status); +} + +/* Send a FIN. The caller locks the socket for us. + * We should try to send a FIN packet really hard, but eventually give up. */ void tcp_send_fin(struct sock *sk) { + struct sk_buff *skb, *tskb = tcp_write_queue_tail(sk); struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = tcp_write_queue_tail(sk); - int mss_now; - /* Optimization, tack on the FIN if we have a queue of - * unsent frames. But be careful about outgoing SACKS - * and IP options. + /* Optimization, tack on the FIN if we have one skb in write queue and + * this skb was not yet sent, or we are under memory pressure. + * Note: in the latter case, FIN packet will be sent after a timeout, + * as TCP stack thinks it has already been transmitted. */ - mss_now = tcp_current_mss(sk); - - if (tcp_send_head(sk) != NULL) { - TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_FIN; - TCP_SKB_CB(skb)->end_seq++; + if (tskb && (tcp_send_head(sk) || sk_under_memory_pressure(sk))) { +coalesce: + TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; + TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; + if (!tcp_send_head(sk)) { + /* This means tskb was already sent. + * Pretend we included the FIN on previous transmit. + * We need to set tp->snd_nxt to the value it would have + * if FIN had been sent. This is because retransmit path + * does not change tp->snd_nxt. + */ + tp->snd_nxt++; + return; + } } else { - /* Socket is locked, keep trying until memory is available. */ - for (;;) { - skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); - if (skb) - break; - yield(); + skb = alloc_skb_fclone(MAX_TCP_HEADER, sk->sk_allocation); + if (unlikely(!skb)) { + if (tskb) + goto coalesce; + return; } + skb_reserve(skb, MAX_TCP_HEADER); + sk_forced_wmem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); tcp_queue_skb(sk, skb); } - __tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_OFF); + __tcp_push_pending_frames(sk, tcp_current_mss(sk), TCP_NAGLE_OFF); } /* We get here when a process closes a file descriptor (either due to @@ -2796,6 +2822,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } #endif + /* Do not fool tcpdump (if any), clean our debris */ + skb->tstamp.tv64 = 0; return skb; } EXPORT_SYMBOL(tcp_make_synack); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index d7907ecf0b75..066d0b03f2b8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -555,7 +555,8 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; + struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ? + inet6_sk(skb->sk) : NULL; struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 09a22f4f36c9..bcd65186b497 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1193,7 +1193,14 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt) rt6_set_expires(rt, jiffies + (HZ * lifetime)); if (ra_msg->icmph.icmp6_hop_limit) { - in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + /* Only set hop_limit on the interface if it is higher than + * the current hop_limit. + */ + if (in6_dev->cnf.hop_limit < ra_msg->icmph.icmp6_hop_limit) { + in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit; + } else { + ND_PRINTK(2, warn, "RA: Got route advertisement with lower hop_limit than current\n"); + } if (rt) dst_metric_set(&rt->dst, RTAX_HOPLIMIT, ra_msg->icmph.icmp6_hop_limit); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 3291857694b1..a9b6ea11da29 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1636,7 +1636,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = ACCESS_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 6ee2b5863572..f21b142dee1f 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -98,8 +98,7 @@ static u8 *ieee80211_wep_add_iv(struct ieee80211_local *local, hdr->frame_control |= cpu_to_le16(IEEE80211_FCTL_PROTECTED); - if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN || - skb_headroom(skb) < IEEE80211_WEP_IV_LEN)) + if (WARN_ON(skb_headroom(skb) < IEEE80211_WEP_IV_LEN)) return NULL; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -169,6 +168,9 @@ int ieee80211_wep_encrypt(struct ieee80211_local *local, size_t len; u8 rc4key[3 + WLAN_KEY_LEN_WEP104]; + if (WARN_ON(skb_tailroom(skb) < IEEE80211_WEP_ICV_LEN)) + return -1; + iv = ieee80211_wep_add_iv(local, skb, keylen, keyidx); if (!iv) return -1; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d25f29377648..957c1db66652 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -14,6 +14,30 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; +static bool nf_generic_should_process(u8 proto) +{ + switch (proto) { +#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE + case IPPROTO_SCTP: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE + case IPPROTO_DCCP: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_GRE_MODULE + case IPPROTO_GRE: + return false; +#endif +#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE + case IPPROTO_UDPLITE: + return false; +#endif + default: + return true; + } +} + static inline struct nf_generic_net *generic_pernet(struct net *net) { return &net->ct.nf_ct_proto.generic; @@ -67,7 +91,7 @@ static int generic_packet(struct nf_conn *ct, static bool generic_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, unsigned int *timeouts) { - return true; + return nf_generic_should_process(nf_ct_protonum(ct)); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 1d52506bda14..a0b0ea949192 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1624,13 +1624,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size, if (data == NULL) return NULL; - skb = build_skb(data, size); + skb = __build_skb(data, size); if (skb == NULL) vfree(data); - else { - skb->head_frag = 0; + else skb->destructor = netlink_skb_destructor; - } return skb; } diff --git a/net/socket.c b/net/socket.c index 1b2c2d62ff20..b72fc137e1a6 100644 --- a/net/socket.c +++ b/net/socket.c @@ -2007,14 +2007,12 @@ static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, int err, ctl_len, total_len; err = -EFAULT; - if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(msg_sys, msg_compat)) - return -EFAULT; - } else { + if (MSG_CMSG_COMPAT & flags) + err = get_compat_msghdr(msg_sys, msg_compat); + else err = copy_msghdr_from_user(msg_sys, msg); - if (err) - return err; - } + if (err) + return err; if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; @@ -2219,14 +2217,12 @@ static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, struct sockaddr __user *uaddr; int __user *uaddr_len; - if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(msg_sys, msg_compat)) - return -EFAULT; - } else { + if (MSG_CMSG_COMPAT & flags) + err = get_compat_msghdr(msg_sys, msg_compat); + else err = copy_msghdr_from_user(msg_sys, msg); - if (err) - return err; - } + if (err) + return err; if (msg_sys->msg_iovlen > UIO_FASTIOV) { err = -EMSGSIZE; diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c index 1ec19f6f0c2b..eeeba5adee6d 100644 --- a/net/sunrpc/auth_gss/gss_rpc_xdr.c +++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c @@ -793,20 +793,26 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, { u32 value_follows; int err; + struct page *scratch; + + scratch = alloc_page(GFP_KERNEL); + if (!scratch) + return -ENOMEM; + xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE); /* res->status */ err = gssx_dec_status(xdr, &res->status); if (err) - return err; + goto out_free; /* res->context_handle */ err = gssx_dec_bool(xdr, &value_follows); if (err) - return err; + goto out_free; if (value_follows) { err = gssx_dec_ctx(xdr, res->context_handle); if (err) - return err; + goto out_free; } else { res->context_handle = NULL; } @@ -814,11 +820,11 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, /* res->output_token */ err = gssx_dec_bool(xdr, &value_follows); if (err) - return err; + goto out_free; if (value_follows) { err = gssx_dec_buffer(xdr, res->output_token); if (err) - return err; + goto out_free; } else { res->output_token = NULL; } @@ -826,14 +832,17 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp, /* res->delegated_cred_handle */ err = gssx_dec_bool(xdr, &value_follows); if (err) - return err; + goto out_free; if (value_follows) { /* we do not support upcall servers sending this data. */ - return -EINVAL; + err = -EINVAL; + goto out_free; } /* res->options */ err = gssx_dec_option_array(xdr, &res->options); +out_free: + __free_page(scratch); return err; } |