diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 7 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 9 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 12 | ||||
-rw-r--r-- | net/ipv4/igmp.c | 6 | ||||
-rw-r--r-- | net/ipv4/inet_fragment.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 12 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 7 | ||||
-rw-r--r-- | net/ipv4/ip_tunnel.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/nf_reject_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 2 | ||||
-rw-r--r-- | net/ipv4/syncookies.c | 1 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_bbr.c | 49 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 3 | ||||
-rw-r--r-- | net/ipv4/udp.c | 2 | ||||
-rw-r--r-- | net/ipv4/udp_offload.c | 2 |
19 files changed, 87 insertions, 51 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f60fe82c2c1e..b5116ec31757 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1693,6 +1693,13 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_dynaddr = 0; net->ipv4.sysctl_ip_early_demux = 1; + /* Some igmp sysctl, whose values are always used */ + net->ipv4.sysctl_igmp_max_memberships = 20; + net->ipv4.sysctl_igmp_max_msf = 10; + /* IGMP reports for link-local multicast groups are enabled by default */ + net->ipv4.sysctl_igmp_llm_reports = 1; + net->ipv4.sysctl_igmp_qrv = 2; + return 0; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 3d92534c4450..968d8e165e3d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1319,13 +1319,14 @@ static struct pernet_operations fib_net_ops = { void __init ip_fib_init(void) { - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + fib_trie_init(); register_pernet_subsys(&fib_net_ops); + register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - fib_trie_init(); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 7563831fa432..38c1c979ecb1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1044,15 +1044,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (!fi) goto failure; - fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); - if (!fi->fib_metrics) - goto failure; + if (unlikely(!fi->fib_metrics)) { + kfree(fi); + return ERR_PTR(err); + } atomic_set(&fi->fib_metrics->refcnt, 1); - } else + } else { fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; - + } + fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 19930da56b0a..08575e3bd135 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2974,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net) goto out_sock; } - /* Sysctl initialization */ - net->ipv4.sysctl_igmp_max_memberships = 20; - net->ipv4.sysctl_igmp_max_msf = 10; - /* IGMP reports for link-local multicast groups are enabled by default */ - net->ipv4.sysctl_igmp_llm_reports = 1; - net->ipv4.sysctl_igmp_qrv = 2; return 0; out_sock: diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index b5e9317eaf9e..631c0d0d7cf8 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -234,10 +234,8 @@ evict_again: cond_resched(); if (read_seqretry(&f->rnd_seqlock, seq) || - percpu_counter_sum(&nf->mem)) + sum_frag_mem_limit(nf)) goto evict_again; - - percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index bbe7f72db9c1..453db950dc9f 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -835,8 +835,6 @@ static void __init ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { - int res; - /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -862,13 +860,9 @@ static int __net_init ipv4_frags_init_net(struct net *net) net->ipv4.frags.max_dist = 64; - res = inet_frags_init_net(&net->ipv4.frags); - if (res) - return res; - res = ip4_frags_ns_ctl_register(net); - if (res) - inet_frags_uninit_net(&net->ipv4.frags); - return res; + inet_frags_init_net(&net->ipv4.frags); + + return ip4_frags_ns_ctl_register(net); } static void __net_exit ipv4_frags_exit_net(struct net *net) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e5c1dbef3626..2c3c1a223df4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -936,10 +936,12 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if (((length > mtu) || (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + ((length > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1255,6 +1257,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EINVAL; if ((size + skb->len > mtu) && + (skb_queue_len(&sk->sk_write_queue) == 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { if (skb->ip_summed != CHECKSUM_PARTIAL) diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 5719d6ba0824..bd7f1836bb70 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -609,8 +609,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) ip_rt_put(rt); goto tx_dropped; } - iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos, - key->ttl, df, !net_eq(tunnel->net, dev_net(dev))); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return; tx_error: dev->stats.tx_errors++; diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index fd8220213afc..146d86105183 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) /* ip_route_me_harder expects skb->dst to be set */ skb_dst_set_noref(nskb, skb_dst(oldskb)); + nskb->mark = IP4_REPLY_MARK(net, oldskb->mark); + skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, ip4_dst_hoplimit(skb_dst(nskb))); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 6cd49fd17ac0..6a5b7783932e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1247,7 +1247,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (mtu) return mtu; - mtu = dst->dev->mtu; + mtu = READ_ONCE(dst->dev->mtu); if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { if (rt->rt_uses_gateway && mtu > 576) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e3c4043c27de..b6f710d515d0 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) treq = tcp_rsk(req); treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; + treq->txhash = net_tx_rndhash(); req->mss = mss; ireq->ir_num = ntohs(th->dest); ireq->ir_rmt_port = th->source; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 80bc36b25de2..566cfc50f7cf 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -958,7 +958,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_tcp_notsent_lowat, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = proc_douintvec, }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 1a4db27f5833..6b3d27e50317 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2297,6 +2297,10 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_set_ca_state(sk, TCP_CA_Open); tcp_clear_retrans(tp); inet_csk_delack_init(sk); + /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0 + * issue in __tcp_select_window() + */ + icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 0ea66c2c9344..cb8db347c680 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -83,7 +83,8 @@ struct bbr { cwnd_gain:10, /* current gain for setting cwnd */ full_bw_cnt:3, /* number of rounds without large bw gains */ cycle_idx:3, /* current index in pacing_gain cycle array */ - unused_b:6; + has_seen_rtt:1, /* have we seen an RTT sample yet? */ + unused_b:5; u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ u32 full_bw; /* recent bw, to estimate if pipe is full */ }; @@ -182,6 +183,35 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) return rate >> BW_SCALE; } +/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */ +static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + return rate; +} + +/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ +static void bbr_init_pacing_rate_from_rtt(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + u32 rtt_us; + + if (tp->srtt_us) { /* any RTT sample yet? */ + rtt_us = max(tp->srtt_us >> 3, 1U); + bbr->has_seen_rtt = 1; + } else { /* no RTT sample yet */ + rtt_us = USEC_PER_MSEC; /* use nominal default RTT */ + } + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, rtt_us); + sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain); +} + /* Pace using current bw estimate and a gain factor. In order to help drive the * network toward lower queues while maintaining high utilization and low * latency, the average pacing rate aims to be slightly (~1%) lower than the @@ -191,12 +221,13 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) */ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) { + struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u64 rate = bw; + u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain); - rate = bbr_rate_bytes_per_sec(sk, rate, gain); - rate = min_t(u64, rate, sk->sk_max_pacing_rate); - if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) + if (unlikely(!bbr->has_seen_rtt && tp->srtt_us)) + bbr_init_pacing_rate_from_rtt(sk); + if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate) sk->sk_pacing_rate = rate; } @@ -769,7 +800,6 @@ static void bbr_init(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - u64 bw; bbr->prior_cwnd = 0; bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ @@ -785,11 +815,8 @@ static void bbr_init(struct sock *sk) minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ - /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ - bw = (u64)tp->snd_cwnd * BW_UNIT; - do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); - sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ - bbr_set_pacing_rate(sk, bw, bbr_high_gain); + bbr->has_seen_rtt = 0; + bbr_init_pacing_rate_from_rtt(sk); bbr->restore_cwnd = 0; bbr->round_start = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 01336aa5f973..c03850771a4e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2560,8 +2560,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) return; /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || - (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_time_stamp; } @@ -3036,8 +3036,7 @@ void tcp_rearm_rto(struct sock *sk) /* delta may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta > 0) - rto = delta; + rto = max(delta, 1); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index dc4258fd15dc..5d836b037442 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3344,6 +3344,9 @@ int tcp_connect(struct sock *sk) struct sk_buff *buff; int err; + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) + return -EHOSTUNREACH; /* Routing failure or similar. */ + tcp_connect_init(sk); if (unlikely(tp->repair)) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b1e65b3b4361..74db43b47917 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -654,7 +654,8 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } - if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) + if (!sock_flag(sk, SOCK_KEEPOPEN) || + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) goto out; elapsed = keepalive_time_when(tp); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5bab6c3f7a2f..4363b1e89bdf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -813,7 +813,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check_tx) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index b2be1d9757ef..6de016f80f17 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -232,7 +232,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* If there is no outer header we can fake a checksum offload * due to the fact that we have already done the checksum in |