diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 152 |
1 files changed, 73 insertions, 79 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 46febcacb729..9bcdec3ad772 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); -long sysctl_tcp_mem[3] __read_mostly; int sysctl_tcp_wmem[3] __read_mostly; int sysctl_tcp_rmem[3] __read_mostly; -EXPORT_SYMBOL(sysctl_tcp_mem); EXPORT_SYMBOL(sysctl_tcp_rmem); EXPORT_SYMBOL(sysctl_tcp_wmem); @@ -374,7 +372,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) { unsigned int mask; struct sock *sk = sock->sk; - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == TCP_LISTEN) @@ -524,11 +522,11 @@ EXPORT_SYMBOL(tcp_ioctl); static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { - TCP_SKB_CB(skb)->flags |= TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; tp->pushed_seq = tp->write_seq; } -static inline int forced_push(struct tcp_sock *tp) +static inline int forced_push(const struct tcp_sock *tp) { return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1)); } @@ -540,7 +538,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb) skb->csum = 0; tcb->seq = tcb->end_seq = tp->write_seq; - tcb->flags = TCPHDR_ACK; + tcb->tcp_flags = TCPHDR_ACK; tcb->sacked = 0; skb_header_release(skb); tcp_add_write_queue_tail(sk, skb); @@ -813,7 +811,7 @@ new_segment: goto wait_for_memory; if (can_coalesce) { - skb_shinfo(skb)->frags[i - 1].size += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { get_page(page); skb_fill_page_desc(skb, i, page, offset, copy); @@ -830,7 +828,7 @@ new_segment: skb_shinfo(skb)->gso_segs = 0; if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; copied += copy; poffset += copy; @@ -888,18 +886,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, } EXPORT_SYMBOL(tcp_sendpage); -#define TCP_PAGE(sk) (sk->sk_sndmsg_page) -#define TCP_OFF(sk) (sk->sk_sndmsg_off) - -static inline int select_size(struct sock *sk, int sg) +static inline int select_size(const struct sock *sk, bool sg) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { - if (sk_can_gso(sk)) - tmp = 0; - else { + if (sk_can_gso(sk)) { + /* Small frames wont use a full page: + * Payload will immediately follow tcp header. + */ + tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + } else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); if (tmp >= pgbreak && @@ -917,9 +915,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct iovec *iov; struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int iovlen, flags; + int iovlen, flags, err, copied; int mss_now, size_goal; - int sg, err, copied; + bool sg; long timeo; lock_sock(sk); @@ -946,7 +944,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto out_err; - sg = sk->sk_route_caps & NETIF_F_SG; + sg = !!(sk->sk_route_caps & NETIF_F_SG); while (--iovlen >= 0) { size_t seglen = iov->iov_len; @@ -1005,8 +1003,13 @@ new_segment: } else { int merge = 0; int i = skb_shinfo(skb)->nr_frags; - struct page *page = TCP_PAGE(sk); - int off = TCP_OFF(sk); + struct page *page = sk->sk_sndmsg_page; + int off; + + if (page && page_count(page) == 1) + sk->sk_sndmsg_off = 0; + + off = sk->sk_sndmsg_off; if (skb_can_coalesce(skb, i, page, off) && off != PAGE_SIZE) { @@ -1023,7 +1026,7 @@ new_segment: } else if (page) { if (off == PAGE_SIZE) { put_page(page); - TCP_PAGE(sk) = page = NULL; + sk->sk_sndmsg_page = page = NULL; off = 0; } } else @@ -1049,32 +1052,31 @@ new_segment: /* If this page was new, give it to the * socket so it does not get leaked. */ - if (!TCP_PAGE(sk)) { - TCP_PAGE(sk) = page; - TCP_OFF(sk) = 0; + if (!sk->sk_sndmsg_page) { + sk->sk_sndmsg_page = page; + sk->sk_sndmsg_off = 0; } goto do_error; } /* Update the skb. */ if (merge) { - skb_shinfo(skb)->frags[i - 1].size += - copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { skb_fill_page_desc(skb, i, page, off, copy); - if (TCP_PAGE(sk)) { + if (sk->sk_sndmsg_page) { get_page(page); } else if (off + copy < PAGE_SIZE) { get_page(page); - TCP_PAGE(sk) = page; + sk->sk_sndmsg_page = page; } } - TCP_OFF(sk) = off + copy; + sk->sk_sndmsg_off = off + copy; } if (!copied) - TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH; + TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; @@ -1194,13 +1196,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) struct tcp_sock *tp = tcp_sk(sk); int time_to_ack = 0; -#if TCP_DEBUG struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq), "cleanup rbuf bug: copied %X seq %X rcvnxt %X\n", tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt); -#endif if (inet_csk_ack_scheduled(sk)) { const struct inet_connection_sock *icsk = inet_csk(sk); @@ -2409,7 +2409,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { - struct inet_connection_sock *icsk = inet_csk(sk); + const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) return icsk->icsk_af_ops->setsockopt(sk, level, optname, @@ -2431,9 +2431,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt); #endif /* Return information about state of tcp endpoint in API format. */ -void tcp_get_info(struct sock *sk, struct tcp_info *info) +void tcp_get_info(const struct sock *sk, struct tcp_info *info) { - struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_sock *tp = tcp_sk(sk); const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp; @@ -2455,8 +2455,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale; } - if (tp->ecn_flags&TCP_ECN_OK) + if (tp->ecn_flags & TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; + if (tp->ecn_flags & TCP_ECN_SEEN) + info->tcpi_options |= TCPI_OPT_ECN_SEEN; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); @@ -2654,7 +2656,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL(compat_tcp_getsockopt); #endif -struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features) +struct sk_buff *tcp_tso_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); struct tcphdr *th; @@ -2857,26 +2860,25 @@ EXPORT_SYMBOL(tcp_gro_complete); #ifdef CONFIG_TCP_MD5SIG static unsigned long tcp_md5sig_users; -static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool; +static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool; static DEFINE_SPINLOCK(tcp_md5sig_pool_lock); -static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool) +static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool) { int cpu; + for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu); - if (p) { - if (p->md5_desc.tfm) - crypto_free_hash(p->md5_desc.tfm); - kfree(p); - } + struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu); + + if (p->md5_desc.tfm) + crypto_free_hash(p->md5_desc.tfm); } free_percpu(pool); } void tcp_free_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *pool = NULL; + struct tcp_md5sig_pool __percpu *pool = NULL; spin_lock_bh(&tcp_md5sig_pool_lock); if (--tcp_md5sig_users == 0) { @@ -2889,30 +2891,24 @@ void tcp_free_md5sig_pool(void) } EXPORT_SYMBOL(tcp_free_md5sig_pool); -static struct tcp_md5sig_pool * __percpu * +static struct tcp_md5sig_pool __percpu * __tcp_alloc_md5sig_pool(struct sock *sk) { int cpu; - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; - pool = alloc_percpu(struct tcp_md5sig_pool *); + pool = alloc_percpu(struct tcp_md5sig_pool); if (!pool) return NULL; for_each_possible_cpu(cpu) { - struct tcp_md5sig_pool *p; struct crypto_hash *hash; - p = kzalloc(sizeof(*p), sk->sk_allocation); - if (!p) - goto out_free; - *per_cpu_ptr(pool, cpu) = p; - hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); if (!hash || IS_ERR(hash)) goto out_free; - p->md5_desc.tfm = hash; + per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash; } return pool; out_free: @@ -2920,9 +2916,9 @@ out_free: return NULL; } -struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk) +struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk) { - struct tcp_md5sig_pool * __percpu *pool; + struct tcp_md5sig_pool __percpu *pool; int alloc = 0; retry: @@ -2941,7 +2937,7 @@ retry: if (alloc) { /* we cannot hold spinlock here because this may sleep. */ - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; p = __tcp_alloc_md5sig_pool(sk); spin_lock_bh(&tcp_md5sig_pool_lock); @@ -2974,7 +2970,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool); */ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) { - struct tcp_md5sig_pool * __percpu *p; + struct tcp_md5sig_pool __percpu *p; local_bh_disable(); @@ -2985,7 +2981,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) spin_unlock(&tcp_md5sig_pool_lock); if (p) - return *this_cpu_ptr(p); + return this_cpu_ptr(p); local_bh_enable(); return NULL; @@ -3000,23 +2996,25 @@ void tcp_put_md5sig_pool(void) EXPORT_SYMBOL(tcp_put_md5sig_pool); int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, - struct tcphdr *th) + const struct tcphdr *th) { struct scatterlist sg; + struct tcphdr hdr; int err; - __sum16 old_checksum = th->check; - th->check = 0; + /* We are not allowed to change tcphdr, make a local copy */ + memcpy(&hdr, th, sizeof(hdr)); + hdr.check = 0; + /* options aren't included in the hash */ - sg_init_one(&sg, th, sizeof(struct tcphdr)); - err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr)); - th->check = old_checksum; + sg_init_one(&sg, &hdr, sizeof(hdr)); + err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr)); return err; } EXPORT_SYMBOL(tcp_md5_hash_header); int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, - struct sk_buff *skb, unsigned header_len) + const struct sk_buff *skb, unsigned int header_len) { struct scatterlist sg; const struct tcphdr *tp = tcp_hdr(skb); @@ -3035,8 +3033,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, for (i = 0; i < shi->nr_frags; ++i) { const struct skb_frag_struct *f = &shi->frags[i]; - sg_set_page(&sg, f->page, f->size, f->page_offset); - if (crypto_hash_update(desc, &sg, f->size)) + struct page *page = skb_frag_page(f); + sg_set_page(&sg, page, skb_frag_size(f), f->page_offset); + if (crypto_hash_update(desc, &sg, skb_frag_size(f))) return 1; } @@ -3048,7 +3047,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, } EXPORT_SYMBOL(tcp_md5_hash_skb_data); -int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key) +int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key) { struct scatterlist sg; @@ -3277,14 +3276,9 @@ void __init tcp_init(void) sysctl_tcp_max_orphans = cnt / 2; sysctl_max_syn_backlog = max(128, cnt / 256); - limit = nr_free_buffer_pages() / 8; - limit = max(limit, 128UL); - sysctl_tcp_mem[0] = limit / 4 * 3; - sysctl_tcp_mem[1] = limit; - sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2; - /* Set per-socket limits to no more than 1/128 the pressure threshold */ - limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7); + limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1]) + << (PAGE_SHIFT - 7); max_share = min(4UL*1024*1024, limit); sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; |