diff options
Diffstat (limited to 'net/ipv4')
| -rw-r--r-- | net/ipv4/devinet.c | 5 | ||||
| -rw-r--r-- | net/ipv4/ip_forward.c | 1 | ||||
| -rw-r--r-- | net/ipv4/ip_fragment.c | 25 | ||||
| -rw-r--r-- | net/ipv4/ip_input.c | 4 | ||||
| -rw-r--r-- | net/ipv4/ipconfig.c | 2 | ||||
| -rw-r--r-- | net/ipv4/ipmr.c | 4 | ||||
| -rw-r--r-- | net/ipv4/raw.c | 2 | ||||
| -rw-r--r-- | net/ipv4/tcp_output.c | 45 | ||||
| -rw-r--r-- | net/ipv4/tcp_timer.c | 10 |
9 files changed, 69 insertions, 29 deletions
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a34602ae27de..608a6f4223fb 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -952,17 +952,18 @@ static int inet_abc_len(__be32 addr) { int rc = -1; /* Something else, probably a multicast. */ - if (ipv4_is_zeronet(addr)) + if (ipv4_is_zeronet(addr) || ipv4_is_lbcast(addr)) rc = 0; else { __u32 haddr = ntohl(addr); - if (IN_CLASSA(haddr)) rc = 8; else if (IN_CLASSB(haddr)) rc = 16; else if (IN_CLASSC(haddr)) rc = 24; + else if (IN_CLASSE(haddr)) + rc = 32; } return rc; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 32662e9e5d21..d5984d31ab93 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -72,6 +72,7 @@ static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *s if (unlikely(opt->optlen)) ip_forward_options(skb); + skb->tstamp = 0; return dst_output(net, sk, skb); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index d6ee343fdb86..867be8f7f1fa 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -346,10 +346,10 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) struct net *net = container_of(qp->q.net, struct net, ipv4.frags); struct rb_node **rbn, *parent; struct sk_buff *skb1, *prev_tail; + int ihl, end, skb1_run_end; struct net_device *dev; unsigned int fragsize; int flags, offset; - int ihl, end; int err = -ENOENT; u8 ecn; @@ -419,7 +419,9 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) * overlapping fragment, the entire datagram (and any constituent * fragments) MUST be silently discarded. * - * We do the same here for IPv4 (and increment an snmp counter). + * We do the same here for IPv4 (and increment an snmp counter) but + * we do not want to drop the whole queue in response to a duplicate + * fragment. */ err = -EINVAL; @@ -444,13 +446,17 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb) do { parent = *rbn; skb1 = rb_to_skb(parent); + skb1_run_end = skb1->ip_defrag_offset + + FRAG_CB(skb1)->frag_run_len; if (end <= skb1->ip_defrag_offset) rbn = &parent->rb_left; - else if (offset >= skb1->ip_defrag_offset + - FRAG_CB(skb1)->frag_run_len) + else if (offset >= skb1_run_end) rbn = &parent->rb_right; - else /* Found an overlap with skb1. */ - goto overlap; + else if (offset >= skb1->ip_defrag_offset && + end <= skb1_run_end) + goto err; /* No new data, potential duplicate */ + else + goto overlap; /* Found an overlap */ } while (*rbn); /* Here we have parent properly set, and rbn pointing to * one of its NULL left/right children. Insert skb. @@ -515,6 +521,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, struct rb_node *rbn; int len; int ihlen; + int delta; int err; u8 ecn; @@ -556,10 +563,16 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb, if (len > 65535) goto out_oversize; + delta = - head->truesize; + /* Head of list must not be cloned. */ if (skb_unclone(head, GFP_ATOMIC)) goto out_nomem; + delta += head->truesize; + if (delta) + add_frag_mem_limit(qp->q.net, delta); + /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 35a786c0aaa0..e609b08c9df4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -547,7 +547,7 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk, list_for_each_entry_safe(skb, next, head, list) { struct dst_entry *dst; - list_del(&skb->list); + skb_list_del_init(skb); /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing */ @@ -594,7 +594,7 @@ void ip_list_rcv(struct list_head *head, struct packet_type *pt, struct net_device *dev = skb->dev; struct net *net = dev_net(dev); - list_del(&skb->list); + skb_list_del_init(skb); skb = ip_rcv_core(skb, net); if (skb == NULL) continue; diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 88212615bf4c..2393e5c106bf 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -429,6 +429,8 @@ static int __init ic_defaults(void) ic_netmask = htonl(IN_CLASSB_NET); else if (IN_CLASSC(ntohl(ic_myaddr))) ic_netmask = htonl(IN_CLASSC_NET); + else if (IN_CLASSE(ntohl(ic_myaddr))) + ic_netmask = htonl(IN_CLASSE_NET); else { pr_err("IP-Config: Unable to guess netmask for address %pI4\n", &ic_myaddr); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index a6defbec4f1b..e7a3879cedd0 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -69,6 +69,8 @@ #include <net/nexthop.h> #include <net/switchdev.h> +#include <linux/nospec.h> + struct ipmr_rule { struct fib_rule common; }; @@ -1612,6 +1614,7 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg) return -EFAULT; if (vr.vifi >= mrt->maxvif) return -EINVAL; + vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { @@ -1686,6 +1689,7 @@ int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) return -EFAULT; if (vr.vifi >= mrt->maxvif) return -EINVAL; + vr.vifi = array_index_nospec(vr.vifi, mrt->maxvif); read_lock(&mrt_lock); vif = &mrt->vif_table[vr.vifi]; if (VIF_EXISTS(mrt, vr.vifi)) { diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 8ca3eb06ba04..169a652b3dd1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -391,7 +391,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4, skb->ip_summed = CHECKSUM_NONE; - sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags); + skb_setup_tx_timestamp(skb, sockc->tsflags); if (flags & MSG_CONFIRM) skb_set_dst_pending_confirm(skb, 1); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 3f510cad0b3e..d1676d8a6ed7 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1904,7 +1904,9 @@ static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, * This algorithm is from John Heffner. */ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, - bool *is_cwnd_limited, u32 max_segs) + bool *is_cwnd_limited, + bool *is_rwnd_limited, + u32 max_segs) { const struct inet_connection_sock *icsk = inet_csk(sk); u32 age, send_win, cong_win, limit, in_flight; @@ -1912,9 +1914,6 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, struct sk_buff *head; int win_divisor; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - goto send_now; - if (icsk->icsk_ca_state >= TCP_CA_Recovery) goto send_now; @@ -1973,10 +1972,27 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if (age < (tp->srtt_us >> 4)) goto send_now; - /* Ok, it looks like it is advisable to defer. */ + /* Ok, it looks like it is advisable to defer. + * Three cases are tracked : + * 1) We are cwnd-limited + * 2) We are rwnd-limited + * 3) We are application limited. + */ + if (cong_win < send_win) { + if (cong_win <= skb->len) { + *is_cwnd_limited = true; + return true; + } + } else { + if (send_win <= skb->len) { + *is_rwnd_limited = true; + return true; + } + } - if (cong_win < send_win && cong_win <= skb->len) - *is_cwnd_limited = true; + /* If this packet won't get more data, do not wait. */ + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + goto send_now; return true; @@ -2356,7 +2372,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } else { if (!push_one && tcp_tso_should_defer(sk, skb, &is_cwnd_limited, - max_segs)) + &is_rwnd_limited, max_segs)) break; } @@ -2494,15 +2510,18 @@ void tcp_send_loss_probe(struct sock *sk) goto rearm_timer; } skb = skb_rb_last(&sk->tcp_rtx_queue); + if (unlikely(!skb)) { + WARN_ONCE(tp->packets_out, + "invalid inflight: %u state %u cwnd %u mss %d\n", + tp->packets_out, sk->sk_state, tp->snd_cwnd, mss); + inet_csk(sk)->icsk_pending = 0; + return; + } /* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) goto rearm_timer; - /* Retransmit last segment. */ - if (WARN_ON(!skb)) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; @@ -2920,7 +2939,7 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; trace_tcp_retransmit_skb(sk, skb); } else if (err != -EBUSY) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); + NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL, segs); } return err; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 091c53925e4d..f87dbc78b6bc 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -378,7 +378,7 @@ static void tcp_probe_timer(struct sock *sk) return; } - if (icsk->icsk_probes_out > max_probes) { + if (icsk->icsk_probes_out >= max_probes) { abort: tcp_write_err(sk); } else { /* Only send another probe if we didn't close things up. */ @@ -484,11 +484,12 @@ void tcp_retransmit_timer(struct sock *sk) goto out_reset_timer; } + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); if (tcp_write_timeout(sk)) goto out; if (icsk->icsk_retransmits == 0) { - int mib_idx; + int mib_idx = 0; if (icsk->icsk_ca_state == TCP_CA_Recovery) { if (tcp_is_sack(tp)) @@ -503,10 +504,9 @@ void tcp_retransmit_timer(struct sock *sk) mib_idx = LINUX_MIB_TCPSACKFAILURES; else mib_idx = LINUX_MIB_TCPRENOFAILURES; - } else { - mib_idx = LINUX_MIB_TCPTIMEOUTS; } - __NET_INC_STATS(sock_net(sk), mib_idx); + if (mib_idx) + __NET_INC_STATS(sock_net(sk), mib_idx); } tcp_enter_loss(sk); |

