diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
-rw-r--r-- | net/ipv4/devinet.c | 16 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 36 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 13 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 2 | ||||
-rw-r--r-- | net/ipv4/raw.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 69 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 4 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 11 | ||||
-rw-r--r-- | net/ipv4/udp.c | 80 | ||||
-rw-r--r-- | net/ipv4/udp_impl.h | 4 |
12 files changed, 162 insertions, 81 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 58c4b0f7c4aa..57737b8d1711 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1119,6 +1119,7 @@ int inet_sk_rebuild_header(struct sock *sk) { struct flowi fl = { .oif = sk->sk_bound_dev_if, + .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = daddr, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e92f1fd28aa5..5df2f6a0b0f0 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1077,12 +1077,16 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, ip_mc_up(in_dev); /* fall through */ case NETDEV_CHANGEADDR: - if (IN_DEV_ARP_NOTIFY(in_dev)) - arp_send(ARPOP_REQUEST, ETH_P_ARP, - in_dev->ifa_list->ifa_address, - dev, - in_dev->ifa_list->ifa_address, - NULL, dev->dev_addr, NULL); + /* Send gratuitous ARP to notify of link change */ + if (IN_DEV_ARP_NOTIFY(in_dev)) { + struct in_ifaddr *ifa = in_dev->ifa_list; + + if (ifa) + arp_send(ARPOP_REQUEST, ETH_P_ARP, + ifa->ifa_address, dev, + ifa->ifa_address, NULL, + dev->dev_addr, NULL); + } break; case NETDEV_DOWN: ip_mc_down(in_dev); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 22cd19ee44e5..537731b3bcb3 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -446,6 +446,28 @@ extern int sysctl_tcp_synack_retries; EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); +/* Decide when to expire the request and when to resend SYN-ACK */ +static inline void syn_ack_recalc(struct request_sock *req, const int thresh, + const int max_retries, + const u8 rskq_defer_accept, + int *expire, int *resend) +{ + if (!rskq_defer_accept) { + *expire = req->retrans >= thresh; + *resend = 1; + return; + } + *expire = req->retrans >= thresh && + (!inet_rsk(req)->acked || req->retrans >= max_retries); + /* + * Do not resend while waiting for data after ACK, + * start to resend on end of deferring period to give + * last chance for data or ACK to create established socket. + */ + *resend = !inet_rsk(req)->acked || + req->retrans >= rskq_defer_accept - 1; +} + void inet_csk_reqsk_queue_prune(struct sock *parent, const unsigned long interval, const unsigned long timeout, @@ -501,9 +523,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent, reqp=&lopt->syn_table[i]; while ((req = *reqp) != NULL) { if (time_after_eq(now, req->expires)) { - if ((req->retrans < thresh || - (inet_rsk(req)->acked && req->retrans < max_retries)) - && !req->rsk_ops->rtx_syn_ack(parent, req)) { + int expire = 0, resend = 0; + + syn_ack_recalc(req, thresh, max_retries, + queue->rskq_defer_accept, + &expire, &resend); + if (!expire && + (!resend || + !req->rsk_ops->rtx_syn_ack(parent, req) || + inet_rsk(req)->acked)) { unsigned long timeo; if (req->retrans++ == 0) @@ -714,7 +742,7 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt); int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { const struct inet_connection_sock *icsk = inet_csk(sk); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 9fe5d7b81580..f9895180f481 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -335,6 +335,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) { struct flowi fl = { .oif = sk->sk_bound_dev_if, + .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = daddr, .saddr = inet->saddr, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 5a0693576e82..e982b5c1ee17 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -440,7 +440,7 @@ out: */ static int do_ip_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, int optlen) + int optname, char __user *optval, unsigned int optlen) { struct inet_sock *inet = inet_sk(sk); int val = 0, err; @@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, break; } dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr); - if (dev) { + if (dev) mreq.imr_ifindex = dev->ifindex; - dev_put(dev); - } } else - dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex); + dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex); err = -EADDRNOTAVAIL; if (!dev) break; + dev_put(dev); err = -EINVAL; if (sk->sk_bound_dev_if && @@ -950,7 +949,7 @@ e_inval: } int ip_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, int optlen) + int optname, char __user *optval, unsigned int optlen) { int err; @@ -975,7 +974,7 @@ EXPORT_SYMBOL(ip_setsockopt); #ifdef CONFIG_COMPAT int compat_ip_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { int err; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c43ec2d51ce2..630a56df7b47 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -931,7 +931,7 @@ static void mrtsock_destruct(struct sock *sk) * MOSPF/PIM router set up we can clean this up. */ -int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen) +int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen) { int ret; struct vifctl vif; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ebb1e5848bc6..757c9171e7c2 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -741,7 +741,7 @@ out: return ret; } static int do_raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { if (optname == ICMP_FILTER) { if (inet_sk(sk)->num != IPPROTO_ICMP) @@ -753,7 +753,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname, } static int raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { if (level != SOL_RAW) return ip_setsockopt(sk, level, optname, optval, optlen); @@ -762,7 +762,7 @@ static int raw_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_COMPAT static int compat_raw_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { if (level != SOL_RAW) return compat_ip_setsockopt(sk, level, optname, optval, optlen); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 21387ebabf00..90b2e0649bfb 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk) EXPORT_SYMBOL(tcp_enter_memory_pressure); +/* Convert seconds to retransmits based on initial and max timeout */ +static u8 secs_to_retrans(int seconds, int timeout, int rto_max) +{ + u8 res = 0; + + if (seconds > 0) { + int period = timeout; + + res = 1; + while (seconds > period && res < 255) { + res++; + timeout <<= 1; + if (timeout > rto_max) + timeout = rto_max; + period += timeout; + } + } + return res; +} + +/* Convert retransmits to seconds based on initial and max timeout */ +static int retrans_to_secs(u8 retrans, int timeout, int rto_max) +{ + int period = 0; + + if (retrans > 0) { + period = timeout; + while (--retrans) { + timeout <<= 1; + if (timeout > rto_max) + timeout = rto_max; + period += timeout; + } + } + return period; +} + /* * Wait for a TCP event. * @@ -580,7 +617,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, lock_sock(sk); - timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK); + timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK); while (tss.len) { ret = __tcp_splice_read(sk, &tss); if (ret < 0) @@ -1405,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, goto found_ok_skb; if (tcp_hdr(skb)->fin) goto found_fin_ok; - WARN_ON(!(flags & MSG_PEEK)); + if (WARN_ON(!(flags & MSG_PEEK))) + printk(KERN_INFO "recvmsg bug 2: copied %X " + "seq %X\n", *seq, TCP_SKB_CB(skb)->seq); } /* Well, if we have backlog, try to process it now yet. */ @@ -2032,7 +2071,7 @@ int tcp_disconnect(struct sock *sk, int flags) * Socket option code for TCP. */ static int do_tcp_setsockopt(struct sock *sk, int level, - int optname, char __user *optval, int optlen) + int optname, char __user *optval, unsigned int optlen) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); @@ -2047,7 +2086,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, return -EINVAL; val = strncpy_from_user(name, optval, - min(TCP_CA_NAME_MAX-1, optlen)); + min_t(long, TCP_CA_NAME_MAX-1, optlen)); if (val < 0) return -EFAULT; name[val] = 0; @@ -2163,16 +2202,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level, break; case TCP_DEFER_ACCEPT: - icsk->icsk_accept_queue.rskq_defer_accept = 0; - if (val > 0) { - /* Translate value in seconds to number of - * retransmits */ - while (icsk->icsk_accept_queue.rskq_defer_accept < 32 && - val > ((TCP_TIMEOUT_INIT / HZ) << - icsk->icsk_accept_queue.rskq_defer_accept)) - icsk->icsk_accept_queue.rskq_defer_accept++; - icsk->icsk_accept_queue.rskq_defer_accept++; - } + /* Translate value in seconds to number of retransmits */ + icsk->icsk_accept_queue.rskq_defer_accept = + secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ, + TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: @@ -2220,7 +2253,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, - int optlen) + unsigned int optlen) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -2232,7 +2265,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, #ifdef CONFIG_COMPAT int compat_tcp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { if (level != SOL_TCP) return inet_csk_compat_setsockopt(sk, level, optname, @@ -2353,8 +2386,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = (val ? : sysctl_tcp_fin_timeout) / HZ; break; case TCP_DEFER_ACCEPT: - val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 : - ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1)); + val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept, + TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ); break; case TCP_WINDOW_CLAMP: val = tp->window_clamp; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 624c3c9b3c2b..4c03598ed924 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -641,8 +641,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, if (!(flg & TCP_FLAG_ACK)) return NULL; - /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */ - if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && + /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ + if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { inet_rsk(req)->acked = 1; return NULL; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5200aab0ca97..fcd278a7080e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -361,6 +361,7 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_SACK_ADVERTISE (1 << 0) #define OPTION_TS (1 << 1) #define OPTION_MD5 (1 << 2) +#define OPTION_WSCALE (1 << 3) struct tcp_out_options { u8 options; /* bit field of OPTION_* */ @@ -427,7 +428,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, TCPOLEN_SACK_PERM); } - if (unlikely(opts->ws)) { + if (unlikely(OPTION_WSCALE & opts->options)) { *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_WINDOW << 16) | (TCPOLEN_WINDOW << 8) | @@ -494,8 +495,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb, } if (likely(sysctl_tcp_window_scaling)) { opts->ws = tp->rx_opt.rcv_wscale; - if (likely(opts->ws)) - size += TCPOLEN_WSCALE_ALIGNED; + opts->options |= OPTION_WSCALE; + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(sysctl_tcp_sack)) { opts->options |= OPTION_SACK_ADVERTISE; @@ -537,8 +538,8 @@ static unsigned tcp_synack_options(struct sock *sk, if (likely(ireq->wscale_ok)) { opts->ws = ireq->rcv_wscale; - if (likely(opts->ws)) - size += TCPOLEN_WSCALE_ALIGNED; + opts->options |= OPTION_WSCALE; + size += TCPOLEN_WSCALE_ALIGNED; } if (likely(doing_ts)) { opts->options |= OPTION_TS; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ebaaa7f973d7..d0d436d6216c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -696,6 +696,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rt == NULL) { struct flowi fl = { .oif = ipc.oif, + .mark = sk->sk_mark, .nl_u = { .ip4_u = { .daddr = faddr, .saddr = saddr, @@ -840,6 +841,42 @@ out: return ret; } + +/** + * first_packet_length - return length of first packet in receive queue + * @sk: socket + * + * Drops all bad checksum frames, until a valid one is found. + * Returns the length of found skb, or 0 if none is found. + */ +static unsigned int first_packet_length(struct sock *sk) +{ + struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue; + struct sk_buff *skb; + unsigned int res; + + __skb_queue_head_init(&list_kill); + + spin_lock_bh(&rcvq->lock); + while ((skb = skb_peek(rcvq)) != NULL && + udp_lib_checksum_complete(skb)) { + UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS, + IS_UDPLITE(sk)); + __skb_unlink(skb, rcvq); + __skb_queue_tail(&list_kill, skb); + } + res = skb ? skb->len : 0; + spin_unlock_bh(&rcvq->lock); + + if (!skb_queue_empty(&list_kill)) { + lock_sock(sk); + __skb_queue_purge(&list_kill); + sk_mem_reclaim_partial(sk); + release_sock(sk); + } + return res; +} + /* * IOCTL requests applicable to the UDP protocol */ @@ -856,21 +893,16 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) case SIOCINQ: { - struct sk_buff *skb; - unsigned long amount; + unsigned int amount = first_packet_length(sk); - amount = 0; - spin_lock_bh(&sk->sk_receive_queue.lock); - skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) { + if (amount) /* * We will only return the amount * of this packet since that is all * that will be read. */ - amount = skb->len - sizeof(struct udphdr); - } - spin_unlock_bh(&sk->sk_receive_queue.lock); + amount -= sizeof(struct udphdr); + return put_user(amount, (int __user *)arg); } @@ -1359,7 +1391,7 @@ void udp_destroy_sock(struct sock *sk) * Socket option code for UDP */ int udp_lib_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen, + char __user *optval, unsigned int optlen, int (*push_pending_frames)(struct sock *)) { struct udp_sock *up = udp_sk(sk); @@ -1441,7 +1473,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(udp_lib_setsockopt); int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_setsockopt(sk, level, optname, optval, optlen, @@ -1451,7 +1483,7 @@ int udp_setsockopt(struct sock *sk, int level, int optname, #ifdef CONFIG_COMPAT int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_setsockopt(sk, level, optname, optval, optlen, @@ -1539,29 +1571,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask = datagram_poll(file, sock, wait); struct sock *sk = sock->sk; - int is_lite = IS_UDPLITE(sk); /* Check for false positives due to checksum errors */ - if ((mask & POLLRDNORM) && - !(file->f_flags & O_NONBLOCK) && - !(sk->sk_shutdown & RCV_SHUTDOWN)) { - struct sk_buff_head *rcvq = &sk->sk_receive_queue; - struct sk_buff *skb; - - spin_lock_bh(&rcvq->lock); - while ((skb = skb_peek(rcvq)) != NULL && - udp_lib_checksum_complete(skb)) { - UDP_INC_STATS_BH(sock_net(sk), - UDP_MIB_INERRORS, is_lite); - __skb_unlink(skb, rcvq); - kfree_skb(skb); - } - spin_unlock_bh(&rcvq->lock); - - /* nothing to see, move along */ - if (skb == NULL) - mask &= ~(POLLIN | POLLRDNORM); - } + if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) && + !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk)) + mask &= ~(POLLIN | POLLRDNORM); return mask; diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index 9f4a6165f722..aaad650d47d9 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -11,13 +11,13 @@ extern void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); extern int udp_v4_get_port(struct sock *sk, unsigned short snum); extern int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen); + char __user *optval, unsigned int optlen); extern int udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #ifdef CONFIG_COMPAT extern int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen); + char __user *optval, unsigned int optlen); extern int compat_udp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif |