aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/fib_frontend.c9
-rw-r--r--net/ipv4/fib_semantics.c12
-rw-r--r--net/ipv4/igmp.c6
-rw-r--r--net/ipv4/inet_fragment.c4
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_output.c7
-rw-r--r--net/ipv4/ip_tunnel.c4
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv4/route.c2
-rw-r--r--net/ipv4/syncookies.c1
-rw-r--r--net/ipv4/sysctl_net_ipv4.c2
-rw-r--r--net/ipv4/tcp.c4
-rw-r--r--net/ipv4/tcp_bbr.c49
-rw-r--r--net/ipv4/tcp_input.c7
-rw-r--r--net/ipv4/tcp_output.c3
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv4/udp_offload.c2
19 files changed, 87 insertions, 51 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index f60fe82c2c1e..b5116ec31757 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1693,6 +1693,13 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_dynaddr = 0;
net->ipv4.sysctl_ip_early_demux = 1;
+ /* Some igmp sysctl, whose values are always used */
+ net->ipv4.sysctl_igmp_max_memberships = 20;
+ net->ipv4.sysctl_igmp_max_msf = 10;
+ /* IGMP reports for link-local multicast groups are enabled by default */
+ net->ipv4.sysctl_igmp_llm_reports = 1;
+ net->ipv4.sysctl_igmp_qrv = 2;
+
return 0;
}
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3d92534c4450..968d8e165e3d 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1319,13 +1319,14 @@ static struct pernet_operations fib_net_ops = {
void __init ip_fib_init(void)
{
- rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
- rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
+ fib_trie_init();
register_pernet_subsys(&fib_net_ops);
+
register_netdevice_notifier(&fib_netdev_notifier);
register_inetaddr_notifier(&fib_inetaddr_notifier);
- fib_trie_init();
+ rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL);
+ rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL);
}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 7563831fa432..38c1c979ecb1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1044,15 +1044,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (!fi)
goto failure;
- fib_info_cnt++;
if (cfg->fc_mx) {
fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
- if (!fi->fib_metrics)
- goto failure;
+ if (unlikely(!fi->fib_metrics)) {
+ kfree(fi);
+ return ERR_PTR(err);
+ }
atomic_set(&fi->fib_metrics->refcnt, 1);
- } else
+ } else {
fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
-
+ }
+ fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_scope = cfg->fc_scope;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 19930da56b0a..08575e3bd135 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2974,12 +2974,6 @@ static int __net_init igmp_net_init(struct net *net)
goto out_sock;
}
- /* Sysctl initialization */
- net->ipv4.sysctl_igmp_max_memberships = 20;
- net->ipv4.sysctl_igmp_max_msf = 10;
- /* IGMP reports for link-local multicast groups are enabled by default */
- net->ipv4.sysctl_igmp_llm_reports = 1;
- net->ipv4.sysctl_igmp_qrv = 2;
return 0;
out_sock:
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index b5e9317eaf9e..631c0d0d7cf8 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -234,10 +234,8 @@ evict_again:
cond_resched();
if (read_seqretry(&f->rnd_seqlock, seq) ||
- percpu_counter_sum(&nf->mem))
+ sum_frag_mem_limit(nf))
goto evict_again;
-
- percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index bbe7f72db9c1..453db950dc9f 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -835,8 +835,6 @@ static void __init ip4_frags_ctl_register(void)
static int __net_init ipv4_frags_init_net(struct net *net)
{
- int res;
-
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
@@ -862,13 +860,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
net->ipv4.frags.max_dist = 64;
- res = inet_frags_init_net(&net->ipv4.frags);
- if (res)
- return res;
- res = ip4_frags_ns_ctl_register(net);
- if (res)
- inet_frags_uninit_net(&net->ipv4.frags);
- return res;
+ inet_frags_init_net(&net->ipv4.frags);
+
+ return ip4_frags_ns_ctl_register(net);
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e5c1dbef3626..2c3c1a223df4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -936,10 +936,12 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if (((length > mtu) || (skb && skb_is_gso(skb))) &&
+ if ((skb && skb_is_gso(skb)) ||
+ ((length > mtu) &&
+ (skb_queue_len(queue) <= 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
- (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
@@ -1255,6 +1257,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EINVAL;
if ((size + skb->len > mtu) &&
+ (skb_queue_len(&sk->sk_write_queue) == 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
if (skb->ip_summed != CHECKSUM_PARTIAL)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 5719d6ba0824..bd7f1836bb70 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -609,8 +609,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
ip_rt_put(rt);
goto tx_dropped;
}
- iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos,
- key->ttl, df, !net_eq(tunnel->net, dev_net(dev)));
+ iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
+ df, !net_eq(tunnel->net, dev_net(dev)));
return;
tx_error:
dev->stats.tx_errors++;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fd8220213afc..146d86105183 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -126,6 +126,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook)
/* ip_route_me_harder expects skb->dst to be set */
skb_dst_set_noref(nskb, skb_dst(oldskb));
+ nskb->mark = IP4_REPLY_MARK(net, oldskb->mark);
+
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
ip4_dst_hoplimit(skb_dst(nskb)));
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6cd49fd17ac0..6a5b7783932e 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1247,7 +1247,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
if (mtu)
return mtu;
- mtu = dst->dev->mtu;
+ mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
if (rt->rt_uses_gateway && mtu > 576)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index e3c4043c27de..b6f710d515d0 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -334,6 +334,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
treq = tcp_rsk(req);
treq->rcv_isn = ntohl(th->seq) - 1;
treq->snt_isn = cookie;
+ treq->txhash = net_tx_rndhash();
req->mss = mss;
ireq->ir_num = ntohs(th->dest);
ireq->ir_rmt_port = th->source;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 80bc36b25de2..566cfc50f7cf 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -958,7 +958,7 @@ static struct ctl_table ipv4_net_table[] = {
.data = &init_net.ipv4.sysctl_tcp_notsent_lowat,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_douintvec,
},
#ifdef CONFIG_IP_ROUTE_MULTIPATH
{
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1a4db27f5833..6b3d27e50317 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2297,6 +2297,10 @@ int tcp_disconnect(struct sock *sk, int flags)
tcp_set_ca_state(sk, TCP_CA_Open);
tcp_clear_retrans(tp);
inet_csk_delack_init(sk);
+ /* Initialize rcv_mss to TCP_MIN_MSS to avoid division by 0
+ * issue in __tcp_select_window()
+ */
+ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS;
tcp_init_send_head(sk);
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index 0ea66c2c9344..cb8db347c680 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -83,7 +83,8 @@ struct bbr {
cwnd_gain:10, /* current gain for setting cwnd */
full_bw_cnt:3, /* number of rounds without large bw gains */
cycle_idx:3, /* current index in pacing_gain cycle array */
- unused_b:6;
+ has_seen_rtt:1, /* have we seen an RTT sample yet? */
+ unused_b:5;
u32 prior_cwnd; /* prior cwnd upon entering loss recovery */
u32 full_bw; /* recent bw, to estimate if pipe is full */
};
@@ -182,6 +183,35 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
return rate >> BW_SCALE;
}
+/* Convert a BBR bw and gain factor to a pacing rate in bytes per second. */
+static u32 bbr_bw_to_pacing_rate(struct sock *sk, u32 bw, int gain)
+{
+ u64 rate = bw;
+
+ rate = bbr_rate_bytes_per_sec(sk, rate, gain);
+ rate = min_t(u64, rate, sk->sk_max_pacing_rate);
+ return rate;
+}
+
+/* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
+static void bbr_init_pacing_rate_from_rtt(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ struct bbr *bbr = inet_csk_ca(sk);
+ u64 bw;
+ u32 rtt_us;
+
+ if (tp->srtt_us) { /* any RTT sample yet? */
+ rtt_us = max(tp->srtt_us >> 3, 1U);
+ bbr->has_seen_rtt = 1;
+ } else { /* no RTT sample yet */
+ rtt_us = USEC_PER_MSEC; /* use nominal default RTT */
+ }
+ bw = (u64)tp->snd_cwnd * BW_UNIT;
+ do_div(bw, rtt_us);
+ sk->sk_pacing_rate = bbr_bw_to_pacing_rate(sk, bw, bbr_high_gain);
+}
+
/* Pace using current bw estimate and a gain factor. In order to help drive the
* network toward lower queues while maintaining high utilization and low
* latency, the average pacing rate aims to be slightly (~1%) lower than the
@@ -191,12 +221,13 @@ static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain)
*/
static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
{
+ struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 rate = bw;
+ u32 rate = bbr_bw_to_pacing_rate(sk, bw, gain);
- rate = bbr_rate_bytes_per_sec(sk, rate, gain);
- rate = min_t(u64, rate, sk->sk_max_pacing_rate);
- if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate)
+ if (unlikely(!bbr->has_seen_rtt && tp->srtt_us))
+ bbr_init_pacing_rate_from_rtt(sk);
+ if (bbr_full_bw_reached(sk) || rate > sk->sk_pacing_rate)
sk->sk_pacing_rate = rate;
}
@@ -769,7 +800,6 @@ static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
- u64 bw;
bbr->prior_cwnd = 0;
bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */
@@ -785,11 +815,8 @@ static void bbr_init(struct sock *sk)
minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */
- /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */
- bw = (u64)tp->snd_cwnd * BW_UNIT;
- do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC);
- sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */
- bbr_set_pacing_rate(sk, bw, bbr_high_gain);
+ bbr->has_seen_rtt = 0;
+ bbr_init_pacing_rate_from_rtt(sk);
bbr->restore_cwnd = 0;
bbr->round_start = 0;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 01336aa5f973..c03850771a4e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2560,8 +2560,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
return;
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
- (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_time_stamp;
}
@@ -3036,8 +3036,7 @@ void tcp_rearm_rto(struct sock *sk)
/* delta may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta > 0)
- rto = delta;
+ rto = max(delta, 1);
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index dc4258fd15dc..5d836b037442 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3344,6 +3344,9 @@ int tcp_connect(struct sock *sk)
struct sk_buff *buff;
int err;
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+ return -EHOSTUNREACH; /* Routing failure or similar. */
+
tcp_connect_init(sk);
if (unlikely(tp->repair)) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index b1e65b3b4361..74db43b47917 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -654,7 +654,8 @@ static void tcp_keepalive_timer (unsigned long data)
goto death;
}
- if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
+ if (!sock_flag(sk, SOCK_KEEPOPEN) ||
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
goto out;
elapsed = keepalive_time_when(tp);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 5bab6c3f7a2f..4363b1e89bdf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -813,7 +813,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
- else if (sk->sk_no_check_tx) { /* UDP csum disabled */
+ else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */
skb->ip_summed = CHECKSUM_NONE;
goto send;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index b2be1d9757ef..6de016f80f17 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -232,7 +232,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
/* If there is no outer header we can fake a checksum offload
* due to the fact that we have already done the checksum in