summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c152
1 files changed, 73 insertions, 79 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46febcacb729..9bcdec3ad772 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -282,11 +282,9 @@ int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
-long sysctl_tcp_mem[3] __read_mostly;
int sysctl_tcp_wmem[3] __read_mostly;
int sysctl_tcp_rmem[3] __read_mostly;
-EXPORT_SYMBOL(sysctl_tcp_mem);
EXPORT_SYMBOL(sysctl_tcp_rmem);
EXPORT_SYMBOL(sysctl_tcp_wmem);
@@ -374,7 +372,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
unsigned int mask;
struct sock *sk = sock->sk;
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == TCP_LISTEN)
@@ -524,11 +522,11 @@ EXPORT_SYMBOL(tcp_ioctl);
static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{
- TCP_SKB_CB(skb)->flags |= TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH;
tp->pushed_seq = tp->write_seq;
}
-static inline int forced_push(struct tcp_sock *tp)
+static inline int forced_push(const struct tcp_sock *tp)
{
return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
}
@@ -540,7 +538,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
skb->csum = 0;
tcb->seq = tcb->end_seq = tp->write_seq;
- tcb->flags = TCPHDR_ACK;
+ tcb->tcp_flags = TCPHDR_ACK;
tcb->sacked = 0;
skb_header_release(skb);
tcp_add_write_queue_tail(sk, skb);
@@ -813,7 +811,7 @@ new_segment:
goto wait_for_memory;
if (can_coalesce) {
- skb_shinfo(skb)->frags[i - 1].size += copy;
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
get_page(page);
skb_fill_page_desc(skb, i, page, offset, copy);
@@ -830,7 +828,7 @@ new_segment:
skb_shinfo(skb)->gso_segs = 0;
if (!copied)
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
copied += copy;
poffset += copy;
@@ -888,18 +886,18 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset,
}
EXPORT_SYMBOL(tcp_sendpage);
-#define TCP_PAGE(sk) (sk->sk_sndmsg_page)
-#define TCP_OFF(sk) (sk->sk_sndmsg_off)
-
-static inline int select_size(struct sock *sk, int sg)
+static inline int select_size(const struct sock *sk, bool sg)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
int tmp = tp->mss_cache;
if (sg) {
- if (sk_can_gso(sk))
- tmp = 0;
- else {
+ if (sk_can_gso(sk)) {
+ /* Small frames wont use a full page:
+ * Payload will immediately follow tcp header.
+ */
+ tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER);
+ } else {
int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
if (tmp >= pgbreak &&
@@ -917,9 +915,9 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
struct iovec *iov;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int iovlen, flags;
+ int iovlen, flags, err, copied;
int mss_now, size_goal;
- int sg, err, copied;
+ bool sg;
long timeo;
lock_sock(sk);
@@ -946,7 +944,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
goto out_err;
- sg = sk->sk_route_caps & NETIF_F_SG;
+ sg = !!(sk->sk_route_caps & NETIF_F_SG);
while (--iovlen >= 0) {
size_t seglen = iov->iov_len;
@@ -1005,8 +1003,13 @@ new_segment:
} else {
int merge = 0;
int i = skb_shinfo(skb)->nr_frags;
- struct page *page = TCP_PAGE(sk);
- int off = TCP_OFF(sk);
+ struct page *page = sk->sk_sndmsg_page;
+ int off;
+
+ if (page && page_count(page) == 1)
+ sk->sk_sndmsg_off = 0;
+
+ off = sk->sk_sndmsg_off;
if (skb_can_coalesce(skb, i, page, off) &&
off != PAGE_SIZE) {
@@ -1023,7 +1026,7 @@ new_segment:
} else if (page) {
if (off == PAGE_SIZE) {
put_page(page);
- TCP_PAGE(sk) = page = NULL;
+ sk->sk_sndmsg_page = page = NULL;
off = 0;
}
} else
@@ -1049,32 +1052,31 @@ new_segment:
/* If this page was new, give it to the
* socket so it does not get leaked.
*/
- if (!TCP_PAGE(sk)) {
- TCP_PAGE(sk) = page;
- TCP_OFF(sk) = 0;
+ if (!sk->sk_sndmsg_page) {
+ sk->sk_sndmsg_page = page;
+ sk->sk_sndmsg_off = 0;
}
goto do_error;
}
/* Update the skb. */
if (merge) {
- skb_shinfo(skb)->frags[i - 1].size +=
- copy;
+ skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
} else {
skb_fill_page_desc(skb, i, page, off, copy);
- if (TCP_PAGE(sk)) {
+ if (sk->sk_sndmsg_page) {
get_page(page);
} else if (off + copy < PAGE_SIZE) {
get_page(page);
- TCP_PAGE(sk) = page;
+ sk->sk_sndmsg_page = page;
}
}
- TCP_OFF(sk) = off + copy;
+ sk->sk_sndmsg_off = off + copy;
}
if (!copied)
- TCP_SKB_CB(skb)->flags &= ~TCPHDR_PSH;
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
tp->write_seq += copy;
TCP_SKB_CB(skb)->end_seq += copy;
@@ -1194,13 +1196,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied)
struct tcp_sock *tp = tcp_sk(sk);
int time_to_ack = 0;
-#if TCP_DEBUG
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
WARN(skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq),
"cleanup rbuf bug: copied %X seq %X rcvnxt %X\n",
tp->copied_seq, TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt);
-#endif
if (inet_csk_ack_scheduled(sk)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2409,7 +2409,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
unsigned int optlen)
{
- struct inet_connection_sock *icsk = inet_csk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
if (level != SOL_TCP)
return icsk->icsk_af_ops->setsockopt(sk, level, optname,
@@ -2431,9 +2431,9 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
#endif
/* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(struct sock *sk, struct tcp_info *info)
+void tcp_get_info(const struct sock *sk, struct tcp_info *info)
{
- struct tcp_sock *tp = tcp_sk(sk);
+ const struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
@@ -2455,8 +2455,10 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
}
- if (tp->ecn_flags&TCP_ECN_OK)
+ if (tp->ecn_flags & TCP_ECN_OK)
info->tcpi_options |= TCPI_OPT_ECN;
+ if (tp->ecn_flags & TCP_ECN_SEEN)
+ info->tcpi_options |= TCPI_OPT_ECN_SEEN;
info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato);
@@ -2654,7 +2656,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL(compat_tcp_getsockopt);
#endif
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+ netdev_features_t features)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
struct tcphdr *th;
@@ -2857,26 +2860,25 @@ EXPORT_SYMBOL(tcp_gro_complete);
#ifdef CONFIG_TCP_MD5SIG
static unsigned long tcp_md5sig_users;
-static struct tcp_md5sig_pool * __percpu *tcp_md5sig_pool;
+static struct tcp_md5sig_pool __percpu *tcp_md5sig_pool;
static DEFINE_SPINLOCK(tcp_md5sig_pool_lock);
-static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool * __percpu *pool)
+static void __tcp_free_md5sig_pool(struct tcp_md5sig_pool __percpu *pool)
{
int cpu;
+
for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p = *per_cpu_ptr(pool, cpu);
- if (p) {
- if (p->md5_desc.tfm)
- crypto_free_hash(p->md5_desc.tfm);
- kfree(p);
- }
+ struct tcp_md5sig_pool *p = per_cpu_ptr(pool, cpu);
+
+ if (p->md5_desc.tfm)
+ crypto_free_hash(p->md5_desc.tfm);
}
free_percpu(pool);
}
void tcp_free_md5sig_pool(void)
{
- struct tcp_md5sig_pool * __percpu *pool = NULL;
+ struct tcp_md5sig_pool __percpu *pool = NULL;
spin_lock_bh(&tcp_md5sig_pool_lock);
if (--tcp_md5sig_users == 0) {
@@ -2889,30 +2891,24 @@ void tcp_free_md5sig_pool(void)
}
EXPORT_SYMBOL(tcp_free_md5sig_pool);
-static struct tcp_md5sig_pool * __percpu *
+static struct tcp_md5sig_pool __percpu *
__tcp_alloc_md5sig_pool(struct sock *sk)
{
int cpu;
- struct tcp_md5sig_pool * __percpu *pool;
+ struct tcp_md5sig_pool __percpu *pool;
- pool = alloc_percpu(struct tcp_md5sig_pool *);
+ pool = alloc_percpu(struct tcp_md5sig_pool);
if (!pool)
return NULL;
for_each_possible_cpu(cpu) {
- struct tcp_md5sig_pool *p;
struct crypto_hash *hash;
- p = kzalloc(sizeof(*p), sk->sk_allocation);
- if (!p)
- goto out_free;
- *per_cpu_ptr(pool, cpu) = p;
-
hash = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
if (!hash || IS_ERR(hash))
goto out_free;
- p->md5_desc.tfm = hash;
+ per_cpu_ptr(pool, cpu)->md5_desc.tfm = hash;
}
return pool;
out_free:
@@ -2920,9 +2916,9 @@ out_free:
return NULL;
}
-struct tcp_md5sig_pool * __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
+struct tcp_md5sig_pool __percpu *tcp_alloc_md5sig_pool(struct sock *sk)
{
- struct tcp_md5sig_pool * __percpu *pool;
+ struct tcp_md5sig_pool __percpu *pool;
int alloc = 0;
retry:
@@ -2941,7 +2937,7 @@ retry:
if (alloc) {
/* we cannot hold spinlock here because this may sleep. */
- struct tcp_md5sig_pool * __percpu *p;
+ struct tcp_md5sig_pool __percpu *p;
p = __tcp_alloc_md5sig_pool(sk);
spin_lock_bh(&tcp_md5sig_pool_lock);
@@ -2974,7 +2970,7 @@ EXPORT_SYMBOL(tcp_alloc_md5sig_pool);
*/
struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
{
- struct tcp_md5sig_pool * __percpu *p;
+ struct tcp_md5sig_pool __percpu *p;
local_bh_disable();
@@ -2985,7 +2981,7 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void)
spin_unlock(&tcp_md5sig_pool_lock);
if (p)
- return *this_cpu_ptr(p);
+ return this_cpu_ptr(p);
local_bh_enable();
return NULL;
@@ -3000,23 +2996,25 @@ void tcp_put_md5sig_pool(void)
EXPORT_SYMBOL(tcp_put_md5sig_pool);
int tcp_md5_hash_header(struct tcp_md5sig_pool *hp,
- struct tcphdr *th)
+ const struct tcphdr *th)
{
struct scatterlist sg;
+ struct tcphdr hdr;
int err;
- __sum16 old_checksum = th->check;
- th->check = 0;
+ /* We are not allowed to change tcphdr, make a local copy */
+ memcpy(&hdr, th, sizeof(hdr));
+ hdr.check = 0;
+
/* options aren't included in the hash */
- sg_init_one(&sg, th, sizeof(struct tcphdr));
- err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(struct tcphdr));
- th->check = old_checksum;
+ sg_init_one(&sg, &hdr, sizeof(hdr));
+ err = crypto_hash_update(&hp->md5_desc, &sg, sizeof(hdr));
return err;
}
EXPORT_SYMBOL(tcp_md5_hash_header);
int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
- struct sk_buff *skb, unsigned header_len)
+ const struct sk_buff *skb, unsigned int header_len)
{
struct scatterlist sg;
const struct tcphdr *tp = tcp_hdr(skb);
@@ -3035,8 +3033,9 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
for (i = 0; i < shi->nr_frags; ++i) {
const struct skb_frag_struct *f = &shi->frags[i];
- sg_set_page(&sg, f->page, f->size, f->page_offset);
- if (crypto_hash_update(desc, &sg, f->size))
+ struct page *page = skb_frag_page(f);
+ sg_set_page(&sg, page, skb_frag_size(f), f->page_offset);
+ if (crypto_hash_update(desc, &sg, skb_frag_size(f)))
return 1;
}
@@ -3048,7 +3047,7 @@ int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp,
}
EXPORT_SYMBOL(tcp_md5_hash_skb_data);
-int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, struct tcp_md5sig_key *key)
+int tcp_md5_hash_key(struct tcp_md5sig_pool *hp, const struct tcp_md5sig_key *key)
{
struct scatterlist sg;
@@ -3277,14 +3276,9 @@ void __init tcp_init(void)
sysctl_tcp_max_orphans = cnt / 2;
sysctl_max_syn_backlog = max(128, cnt / 256);
- limit = nr_free_buffer_pages() / 8;
- limit = max(limit, 128UL);
- sysctl_tcp_mem[0] = limit / 4 * 3;
- sysctl_tcp_mem[1] = limit;
- sysctl_tcp_mem[2] = sysctl_tcp_mem[0] * 2;
-
/* Set per-socket limits to no more than 1/128 the pressure threshold */
- limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
+ limit = ((unsigned long)init_net.ipv4.sysctl_tcp_mem[1])
+ << (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);
sysctl_tcp_wmem[0] = SK_MEM_QUANTUM;
OpenPOWER on IntegriCloud