summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/devinet.c16
-rw-r--r--net/ipv4/inet_connection_sock.c36
-rw-r--r--net/ipv4/ip_output.c1
-rw-r--r--net/ipv4/ip_sockglue.c13
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/raw.c6
-rw-r--r--net/ipv4/tcp.c69
-rw-r--r--net/ipv4/tcp_minisocks.c4
-rw-r--r--net/ipv4/tcp_output.c11
-rw-r--r--net/ipv4/udp.c80
-rw-r--r--net/ipv4/udp_impl.h4
12 files changed, 162 insertions, 81 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 58c4b0f7c4aa..57737b8d1711 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1119,6 +1119,7 @@ int inet_sk_rebuild_header(struct sock *sk)
{
struct flowi fl = {
.oif = sk->sk_bound_dev_if,
+ .mark = sk->sk_mark,
.nl_u = {
.ip4_u = {
.daddr = daddr,
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index e92f1fd28aa5..5df2f6a0b0f0 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1077,12 +1077,16 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
ip_mc_up(in_dev);
/* fall through */
case NETDEV_CHANGEADDR:
- if (IN_DEV_ARP_NOTIFY(in_dev))
- arp_send(ARPOP_REQUEST, ETH_P_ARP,
- in_dev->ifa_list->ifa_address,
- dev,
- in_dev->ifa_list->ifa_address,
- NULL, dev->dev_addr, NULL);
+ /* Send gratuitous ARP to notify of link change */
+ if (IN_DEV_ARP_NOTIFY(in_dev)) {
+ struct in_ifaddr *ifa = in_dev->ifa_list;
+
+ if (ifa)
+ arp_send(ARPOP_REQUEST, ETH_P_ARP,
+ ifa->ifa_address, dev,
+ ifa->ifa_address, NULL,
+ dev->dev_addr, NULL);
+ }
break;
case NETDEV_DOWN:
ip_mc_down(in_dev);
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 22cd19ee44e5..537731b3bcb3 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -446,6 +446,28 @@ extern int sysctl_tcp_synack_retries;
EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add);
+/* Decide when to expire the request and when to resend SYN-ACK */
+static inline void syn_ack_recalc(struct request_sock *req, const int thresh,
+ const int max_retries,
+ const u8 rskq_defer_accept,
+ int *expire, int *resend)
+{
+ if (!rskq_defer_accept) {
+ *expire = req->retrans >= thresh;
+ *resend = 1;
+ return;
+ }
+ *expire = req->retrans >= thresh &&
+ (!inet_rsk(req)->acked || req->retrans >= max_retries);
+ /*
+ * Do not resend while waiting for data after ACK,
+ * start to resend on end of deferring period to give
+ * last chance for data or ACK to create established socket.
+ */
+ *resend = !inet_rsk(req)->acked ||
+ req->retrans >= rskq_defer_accept - 1;
+}
+
void inet_csk_reqsk_queue_prune(struct sock *parent,
const unsigned long interval,
const unsigned long timeout,
@@ -501,9 +523,15 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
reqp=&lopt->syn_table[i];
while ((req = *reqp) != NULL) {
if (time_after_eq(now, req->expires)) {
- if ((req->retrans < thresh ||
- (inet_rsk(req)->acked && req->retrans < max_retries))
- && !req->rsk_ops->rtx_syn_ack(parent, req)) {
+ int expire = 0, resend = 0;
+
+ syn_ack_recalc(req, thresh, max_retries,
+ queue->rskq_defer_accept,
+ &expire, &resend);
+ if (!expire &&
+ (!resend ||
+ !req->rsk_ops->rtx_syn_ack(parent, req) ||
+ inet_rsk(req)->acked)) {
unsigned long timeo;
if (req->retrans++ == 0)
@@ -714,7 +742,7 @@ int inet_csk_compat_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL_GPL(inet_csk_compat_getsockopt);
int inet_csk_compat_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 9fe5d7b81580..f9895180f481 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -335,6 +335,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
{
struct flowi fl = { .oif = sk->sk_bound_dev_if,
+ .mark = sk->sk_mark,
.nl_u = { .ip4_u =
{ .daddr = daddr,
.saddr = inet->saddr,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 5a0693576e82..e982b5c1ee17 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -440,7 +440,7 @@ out:
*/
static int do_ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int optlen)
+ int optname, char __user *optval, unsigned int optlen)
{
struct inet_sock *inet = inet_sk(sk);
int val = 0, err;
@@ -634,17 +634,16 @@ static int do_ip_setsockopt(struct sock *sk, int level,
break;
}
dev = ip_dev_find(sock_net(sk), mreq.imr_address.s_addr);
- if (dev) {
+ if (dev)
mreq.imr_ifindex = dev->ifindex;
- dev_put(dev);
- }
} else
- dev = __dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
+ dev = dev_get_by_index(sock_net(sk), mreq.imr_ifindex);
err = -EADDRNOTAVAIL;
if (!dev)
break;
+ dev_put(dev);
err = -EINVAL;
if (sk->sk_bound_dev_if &&
@@ -950,7 +949,7 @@ e_inval:
}
int ip_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int optlen)
+ int optname, char __user *optval, unsigned int optlen)
{
int err;
@@ -975,7 +974,7 @@ EXPORT_SYMBOL(ip_setsockopt);
#ifdef CONFIG_COMPAT
int compat_ip_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
int err;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c43ec2d51ce2..630a56df7b47 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -931,7 +931,7 @@ static void mrtsock_destruct(struct sock *sk)
* MOSPF/PIM router set up we can clean this up.
*/
-int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, int optlen)
+int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
{
int ret;
struct vifctl vif;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index ebb1e5848bc6..757c9171e7c2 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -741,7 +741,7 @@ out: return ret;
}
static int do_raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
if (optname == ICMP_FILTER) {
if (inet_sk(sk)->num != IPPROTO_ICMP)
@@ -753,7 +753,7 @@ static int do_raw_setsockopt(struct sock *sk, int level, int optname,
}
static int raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
if (level != SOL_RAW)
return ip_setsockopt(sk, level, optname, optval, optlen);
@@ -762,7 +762,7 @@ static int raw_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_COMPAT
static int compat_raw_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
if (level != SOL_RAW)
return compat_ip_setsockopt(sk, level, optname, optval, optlen);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 21387ebabf00..90b2e0649bfb 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -326,6 +326,43 @@ void tcp_enter_memory_pressure(struct sock *sk)
EXPORT_SYMBOL(tcp_enter_memory_pressure);
+/* Convert seconds to retransmits based on initial and max timeout */
+static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
+{
+ u8 res = 0;
+
+ if (seconds > 0) {
+ int period = timeout;
+
+ res = 1;
+ while (seconds > period && res < 255) {
+ res++;
+ timeout <<= 1;
+ if (timeout > rto_max)
+ timeout = rto_max;
+ period += timeout;
+ }
+ }
+ return res;
+}
+
+/* Convert retransmits to seconds based on initial and max timeout */
+static int retrans_to_secs(u8 retrans, int timeout, int rto_max)
+{
+ int period = 0;
+
+ if (retrans > 0) {
+ period = timeout;
+ while (--retrans) {
+ timeout <<= 1;
+ if (timeout > rto_max)
+ timeout = rto_max;
+ period += timeout;
+ }
+ }
+ return period;
+}
+
/*
* Wait for a TCP event.
*
@@ -580,7 +617,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
lock_sock(sk);
- timeo = sock_rcvtimeo(sk, flags & SPLICE_F_NONBLOCK);
+ timeo = sock_rcvtimeo(sk, sock->file->f_flags & O_NONBLOCK);
while (tss.len) {
ret = __tcp_splice_read(sk, &tss);
if (ret < 0)
@@ -1405,7 +1442,9 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
goto found_ok_skb;
if (tcp_hdr(skb)->fin)
goto found_fin_ok;
- WARN_ON(!(flags & MSG_PEEK));
+ if (WARN_ON(!(flags & MSG_PEEK)))
+ printk(KERN_INFO "recvmsg bug 2: copied %X "
+ "seq %X\n", *seq, TCP_SKB_CB(skb)->seq);
}
/* Well, if we have backlog, try to process it now yet. */
@@ -2032,7 +2071,7 @@ int tcp_disconnect(struct sock *sk, int flags)
* Socket option code for TCP.
*/
static int do_tcp_setsockopt(struct sock *sk, int level,
- int optname, char __user *optval, int optlen)
+ int optname, char __user *optval, unsigned int optlen)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2047,7 +2086,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
return -EINVAL;
val = strncpy_from_user(name, optval,
- min(TCP_CA_NAME_MAX-1, optlen));
+ min_t(long, TCP_CA_NAME_MAX-1, optlen));
if (val < 0)
return -EFAULT;
name[val] = 0;
@@ -2163,16 +2202,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
break;
case TCP_DEFER_ACCEPT:
- icsk->icsk_accept_queue.rskq_defer_accept = 0;
- if (val > 0) {
- /* Translate value in seconds to number of
- * retransmits */
- while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
- val > ((TCP_TIMEOUT_INIT / HZ) <<
- icsk->icsk_accept_queue.rskq_defer_accept))
- icsk->icsk_accept_queue.rskq_defer_accept++;
- icsk->icsk_accept_queue.rskq_defer_accept++;
- }
+ /* Translate value in seconds to number of retransmits */
+ icsk->icsk_accept_queue.rskq_defer_accept =
+ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+ TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
@@ -2220,7 +2253,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
}
int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
- int optlen)
+ unsigned int optlen)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2232,7 +2265,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
#ifdef CONFIG_COMPAT
int compat_tcp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
if (level != SOL_TCP)
return inet_csk_compat_setsockopt(sk, level, optname,
@@ -2353,8 +2386,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = (val ? : sysctl_tcp_fin_timeout) / HZ;
break;
case TCP_DEFER_ACCEPT:
- val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
- ((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
+ val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+ TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
break;
case TCP_WINDOW_CLAMP:
val = tp->window_clamp;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 624c3c9b3c2b..4c03598ed924 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -641,8 +641,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
if (!(flg & TCP_FLAG_ACK))
return NULL;
- /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
- if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+ /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */
+ if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
inet_rsk(req)->acked = 1;
return NULL;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 5200aab0ca97..fcd278a7080e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -361,6 +361,7 @@ static inline int tcp_urg_mode(const struct tcp_sock *tp)
#define OPTION_SACK_ADVERTISE (1 << 0)
#define OPTION_TS (1 << 1)
#define OPTION_MD5 (1 << 2)
+#define OPTION_WSCALE (1 << 3)
struct tcp_out_options {
u8 options; /* bit field of OPTION_* */
@@ -427,7 +428,7 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp,
TCPOLEN_SACK_PERM);
}
- if (unlikely(opts->ws)) {
+ if (unlikely(OPTION_WSCALE & opts->options)) {
*ptr++ = htonl((TCPOPT_NOP << 24) |
(TCPOPT_WINDOW << 16) |
(TCPOLEN_WINDOW << 8) |
@@ -494,8 +495,8 @@ static unsigned tcp_syn_options(struct sock *sk, struct sk_buff *skb,
}
if (likely(sysctl_tcp_window_scaling)) {
opts->ws = tp->rx_opt.rcv_wscale;
- if (likely(opts->ws))
- size += TCPOLEN_WSCALE_ALIGNED;
+ opts->options |= OPTION_WSCALE;
+ size += TCPOLEN_WSCALE_ALIGNED;
}
if (likely(sysctl_tcp_sack)) {
opts->options |= OPTION_SACK_ADVERTISE;
@@ -537,8 +538,8 @@ static unsigned tcp_synack_options(struct sock *sk,
if (likely(ireq->wscale_ok)) {
opts->ws = ireq->rcv_wscale;
- if (likely(opts->ws))
- size += TCPOLEN_WSCALE_ALIGNED;
+ opts->options |= OPTION_WSCALE;
+ size += TCPOLEN_WSCALE_ALIGNED;
}
if (likely(doing_ts)) {
opts->options |= OPTION_TS;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index ebaaa7f973d7..d0d436d6216c 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -696,6 +696,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (rt == NULL) {
struct flowi fl = { .oif = ipc.oif,
+ .mark = sk->sk_mark,
.nl_u = { .ip4_u =
{ .daddr = faddr,
.saddr = saddr,
@@ -840,6 +841,42 @@ out:
return ret;
}
+
+/**
+ * first_packet_length - return length of first packet in receive queue
+ * @sk: socket
+ *
+ * Drops all bad checksum frames, until a valid one is found.
+ * Returns the length of found skb, or 0 if none is found.
+ */
+static unsigned int first_packet_length(struct sock *sk)
+{
+ struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
+ struct sk_buff *skb;
+ unsigned int res;
+
+ __skb_queue_head_init(&list_kill);
+
+ spin_lock_bh(&rcvq->lock);
+ while ((skb = skb_peek(rcvq)) != NULL &&
+ udp_lib_checksum_complete(skb)) {
+ UDP_INC_STATS_BH(sock_net(sk), UDP_MIB_INERRORS,
+ IS_UDPLITE(sk));
+ __skb_unlink(skb, rcvq);
+ __skb_queue_tail(&list_kill, skb);
+ }
+ res = skb ? skb->len : 0;
+ spin_unlock_bh(&rcvq->lock);
+
+ if (!skb_queue_empty(&list_kill)) {
+ lock_sock(sk);
+ __skb_queue_purge(&list_kill);
+ sk_mem_reclaim_partial(sk);
+ release_sock(sk);
+ }
+ return res;
+}
+
/*
* IOCTL requests applicable to the UDP protocol
*/
@@ -856,21 +893,16 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
case SIOCINQ:
{
- struct sk_buff *skb;
- unsigned long amount;
+ unsigned int amount = first_packet_length(sk);
- amount = 0;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- skb = skb_peek(&sk->sk_receive_queue);
- if (skb != NULL) {
+ if (amount)
/*
* We will only return the amount
* of this packet since that is all
* that will be read.
*/
- amount = skb->len - sizeof(struct udphdr);
- }
- spin_unlock_bh(&sk->sk_receive_queue.lock);
+ amount -= sizeof(struct udphdr);
+
return put_user(amount, (int __user *)arg);
}
@@ -1359,7 +1391,7 @@ void udp_destroy_sock(struct sock *sk)
* Socket option code for UDP
*/
int udp_lib_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen,
+ char __user *optval, unsigned int optlen,
int (*push_pending_frames)(struct sock *))
{
struct udp_sock *up = udp_sk(sk);
@@ -1441,7 +1473,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(udp_lib_setsockopt);
int udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_setsockopt(sk, level, optname, optval, optlen,
@@ -1451,7 +1483,7 @@ int udp_setsockopt(struct sock *sk, int level, int optname,
#ifdef CONFIG_COMPAT
int compat_udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_setsockopt(sk, level, optname, optval, optlen,
@@ -1539,29 +1571,11 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
unsigned int mask = datagram_poll(file, sock, wait);
struct sock *sk = sock->sk;
- int is_lite = IS_UDPLITE(sk);
/* Check for false positives due to checksum errors */
- if ((mask & POLLRDNORM) &&
- !(file->f_flags & O_NONBLOCK) &&
- !(sk->sk_shutdown & RCV_SHUTDOWN)) {
- struct sk_buff_head *rcvq = &sk->sk_receive_queue;
- struct sk_buff *skb;
-
- spin_lock_bh(&rcvq->lock);
- while ((skb = skb_peek(rcvq)) != NULL &&
- udp_lib_checksum_complete(skb)) {
- UDP_INC_STATS_BH(sock_net(sk),
- UDP_MIB_INERRORS, is_lite);
- __skb_unlink(skb, rcvq);
- kfree_skb(skb);
- }
- spin_unlock_bh(&rcvq->lock);
-
- /* nothing to see, move along */
- if (skb == NULL)
- mask &= ~(POLLIN | POLLRDNORM);
- }
+ if ((mask & POLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+ !(sk->sk_shutdown & RCV_SHUTDOWN) && !first_packet_length(sk))
+ mask &= ~(POLLIN | POLLRDNORM);
return mask;
diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h
index 9f4a6165f722..aaad650d47d9 100644
--- a/net/ipv4/udp_impl.h
+++ b/net/ipv4/udp_impl.h
@@ -11,13 +11,13 @@ extern void __udp4_lib_err(struct sk_buff *, u32, struct udp_table *);
extern int udp_v4_get_port(struct sock *sk, unsigned short snum);
extern int udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen);
+ char __user *optval, unsigned int optlen);
extern int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#ifdef CONFIG_COMPAT
extern int compat_udp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen);
+ char __user *optval, unsigned int optlen);
extern int compat_udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
#endif
OpenPOWER on IntegriCloud