diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/fib_semantics.c | 9 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 14 | ||||
-rw-r--r-- | net/ipv4/icmp.c | 15 | ||||
-rw-r--r-- | net/ipv4/inetpeer.c | 11 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 8 | ||||
-rw-r--r-- | net/ipv4/ip_gre.c | 21 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 3 | ||||
-rw-r--r-- | net/ipv4/ipcomp.c | 2 | ||||
-rw-r--r-- | net/ipv4/ipip.c | 20 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 6 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_conntrack_core.c | 5 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_nat_standalone.c | 4 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_queue.c | 7 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_ECN.c | 17 | ||||
-rw-r--r-- | net/ipv4/netfilter/ipt_TCPMSS.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 23 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 135 | ||||
-rw-r--r-- | net/ipv4/udp.c | 34 |
19 files changed, 195 insertions, 148 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c886b28ba9f5..e278cb9d0075 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -593,10 +593,13 @@ static void fib_hash_move(struct hlist_head *new_info_hash, struct hlist_head *new_laddrhash, unsigned int new_size) { + struct hlist_head *old_info_hash, *old_laddrhash; unsigned int old_size = fib_hash_size; - unsigned int i; + unsigned int i, bytes; write_lock(&fib_info_lock); + old_info_hash = fib_info_hash; + old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; for (i = 0; i < old_size; i++) { @@ -636,6 +639,10 @@ static void fib_hash_move(struct hlist_head *new_info_hash, fib_info_laddrhash = new_laddrhash; write_unlock(&fib_info_lock); + + bytes = old_size * sizeof(struct hlist_head *); + fib_hash_free(old_info_hash, bytes); + fib_hash_free(old_laddrhash, bytes); } struct fib_info * diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a701405fab0b..45efd5f4741b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1333,9 +1333,9 @@ err:; } static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp, - struct fib_result *res, int *err) + struct fib_result *res) { - int i; + int err, i; t_key mask; struct leaf_info *li; struct hlist_head *hhead = &l->list; @@ -1348,18 +1348,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *pl if (l->key != (key & mask)) continue; - if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) { + if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) { *plen = i; #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_passed++; #endif - return 1; + return err; } #ifdef CONFIG_IP_FIB_TRIE_STATS t->stats.semantic_match_miss++; #endif } - return 0; + return 1; } static int @@ -1386,7 +1386,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result /* Just a leaf? */ if (IS_LEAF(n)) { - if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) + if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; goto failed; } @@ -1508,7 +1508,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result continue; } if (IS_LEAF(n)) { - if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret)) + if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0) goto found; } backtrace: diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 279f57abfecb..badfc5849973 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -349,12 +349,12 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param, { struct sk_buff *skb; - ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, - icmp_param->data_len+icmp_param->head_len, - icmp_param->head_len, - ipc, rt, MSG_DONTWAIT); - - if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { + if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param, + icmp_param->data_len+icmp_param->head_len, + icmp_param->head_len, + ipc, rt, MSG_DONTWAIT) < 0) + ip_flush_pending_frames(icmp_socket->sk); + else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) { struct icmphdr *icmph = skb->h.icmph; unsigned int csum = 0; struct sk_buff *skb1; @@ -936,8 +936,7 @@ int icmp_rcv(struct sk_buff *skb) case CHECKSUM_HW: if (!(u16)csum_fold(skb->csum)) break; - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "icmp v4 hw csum failure\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "icmp v4 hw csum failure\n")); case CHECKSUM_NONE: if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) goto error; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 95473953c406..ab18a853d7ce 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -450,10 +450,13 @@ static void peer_check_expire(unsigned long dummy) /* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime * interval depending on the total number of entries (more entries, * less interval). */ - peer_periodic_timer.expires = jiffies - + inet_peer_gc_maxtime - - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * - peer_total / inet_peer_threshold * HZ; + if (peer_total >= inet_peer_threshold) + peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime; + else + peer_periodic_timer.expires = jiffies + + inet_peer_gc_maxtime + - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ * + peer_total / inet_peer_threshold * HZ; add_timer(&peer_periodic_timer); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7f68e27eb4ea..eb377ae15305 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -377,7 +377,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user) return ip_frag_intern(hash, qp); out_nomem: - NETDEBUG(if (net_ratelimit()) printk(KERN_ERR "ip_frag_create: no memory left !\n")); + LIMIT_NETDEBUG(printk(KERN_ERR "ip_frag_create: no memory left !\n")); return NULL; } @@ -625,10 +625,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) return head; out_nomem: - NETDEBUG(if (net_ratelimit()) - printk(KERN_ERR - "IP: queue_glue: no memory for gluing queue %p\n", - qp)); + LIMIT_NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing " + "queue %p\n", qp)); goto out_fail; out_oversize: if (net_ratelimit()) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 884835522224..f0d5740d7e22 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -290,7 +290,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int dev_hold(dev); ipgre_tunnel_link(nt); - /* Do not decrement MOD_USE_COUNT here. */ return nt; failed: @@ -1277,12 +1276,28 @@ err1: goto out; } -static void ipgre_fini(void) +static void __exit ipgre_destroy_tunnels(void) +{ + int prio; + + for (prio = 0; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip_tunnel *t; + while ((t = tunnels[prio][h]) != NULL) + unregister_netdevice(t->dev); + } + } +} + +static void __exit ipgre_fini(void) { if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); - unregister_netdev(ipgre_fb_tunnel_dev); + rtnl_lock(); + ipgre_destroy_tunnels(); + rtnl_unlock(); } module_init(ipgre_init); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index fc7c481d0d79..ff4bd067b397 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -848,6 +848,9 @@ mc_msf_out: case IP_IPSEC_POLICY: case IP_XFRM_POLICY: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + break; err = xfrm_user_policy(sk, optname, optval, optlen); break; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 2065944fd9e5..7ded6e60f43a 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -358,7 +358,7 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) int cpu; /* This can be any valid CPU ID so we don't need locking. */ - cpu = smp_processor_id(); + cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp_tfms_list, list) { struct crypto_tfm *tfm; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index c3947cd566b7..c05c1df0bb04 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -255,7 +255,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c dev_hold(dev); ipip_tunnel_link(nt); - /* Do not decrement MOD_USE_COUNT here. */ return nt; failed: @@ -920,12 +919,29 @@ static int __init ipip_init(void) goto out; } +static void __exit ipip_destroy_tunnels(void) +{ + int prio; + + for (prio = 1; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip_tunnel *t; + while ((t = tunnels[prio][h]) != NULL) + unregister_netdevice(t->dev); + } + } +} + static void __exit ipip_fini(void) { if (ipip_unregister() < 0) printk(KERN_INFO "ipip close: can't deregister tunnel\n"); - unregister_netdev(ipip_fb_tunnel_dev); + rtnl_lock(); + ipip_destroy_tunnels(); + unregister_netdevice(ipip_fb_tunnel_dev); + rtnl_unlock(); } module_init(ipip_init); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 7833d920bdba..dc806b578427 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -362,7 +362,7 @@ out: /* Fill oifs list. It is called under write locked mrt_lock. */ -static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls) +static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls) { int vifi; @@ -727,7 +727,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) if (c != NULL) { write_lock_bh(&mrt_lock); c->mfc_parent = mfc->mfcc_parent; - ipmr_update_threshoulds(c, mfc->mfcc_ttls); + ipmr_update_thresholds(c, mfc->mfcc_ttls); if (!mrtsock) c->mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); @@ -744,7 +744,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock) c->mfc_origin=mfc->mfcc_origin.s_addr; c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr; c->mfc_parent=mfc->mfcc_parent; - ipmr_update_threshoulds(c, mfc->mfcc_ttls); + ipmr_update_thresholds(c, mfc->mfcc_ttls); if (!mrtsock) c->mfc_flags |= MFC_STATIC; diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 86f04e41dd8e..a7f0c821a9b2 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -513,6 +513,11 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, #ifdef CONFIG_IP_NF_CONNTRACK_MARK conntrack->mark = exp->master->mark; #endif +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + /* this is ugly, but there is no other place where to put it */ + conntrack->nat.masq_index = exp->master->nat.masq_index; +#endif nf_conntrack_get(&conntrack->master->ct_general); CONNTRACK_STAT_INC(expect_new); } else { diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index bc59d0d6e89e..91d5ea1dbbc9 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -102,6 +102,10 @@ ip_nat_fn(unsigned int hooknum, return NF_ACCEPT; } + /* Don't try to NAT if this packet is not conntracked */ + if (ct == &ip_conntrack_untracked) + return NF_ACCEPT; + switch (ctinfo) { case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index eda1fba431a4..c6baa8174389 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -214,6 +214,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: + if (entry->skb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entry->skb, + entry->info->outdev == NULL))) { + read_unlock_bh(&queue_lock); + return NULL; + } if (copy_range == 0 || copy_range > entry->skb->len) data_len = entry->skb->len; else @@ -385,6 +391,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) if (!skb_ip_make_writable(&e->skb, v->data_len)) return -ENOMEM; memcpy(e->skb->data, v->payload, v->data_len); + e->skb->ip_summed = CHECKSUM_NONE; e->skb->nfcache |= NFC_ALTERED; /* diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index ada9911118e9..94a0ce1c1c9d 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -61,16 +61,20 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) if (!tcph) return 0; - if (!(einfo->operation & IPT_ECN_OP_SET_ECE - || tcph->ece == einfo->proto.tcp.ece) - && (!(einfo->operation & IPT_ECN_OP_SET_CWR - || tcph->cwr == einfo->proto.tcp.cwr))) + if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) || + tcph->ece == einfo->proto.tcp.ece) && + ((!(einfo->operation & IPT_ECN_OP_SET_CWR) || + tcph->cwr == einfo->proto.tcp.cwr))) return 1; if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; + if ((*pskb)->ip_summed == CHECKSUM_HW && + skb_checksum_help(*pskb, inward)) + return 0; + diffs[0] = ((u_int16_t *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; @@ -79,13 +83,10 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) diffs[1] = ((u_int16_t *)tcph)[6]; diffs[0] = diffs[0] ^ 0xFFFF; - if ((*pskb)->ip_summed != CHECKSUM_HW) + if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) tcph->check = csum_fold(csum_partial((char *)diffs, sizeof(diffs), tcph->check^0xFFFF)); - else - if (skb_checksum_help(*pskb, inward)) - return 0; (*pskb)->nfcache |= NFC_ALTERED; return 1; } diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c index 1049050b2bfb..7b84a254440e 100644 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/net/ipv4/netfilter/ipt_TCPMSS.c @@ -61,6 +61,10 @@ ipt_tcpmss_target(struct sk_buff **pskb, if (!skb_ip_make_writable(pskb, (*pskb)->len)) return NF_DROP; + if ((*pskb)->ip_summed == CHECKSUM_HW && + skb_checksum_help(*pskb, out == NULL)) + return NF_DROP; + iph = (*pskb)->nh.iph; tcplen = (*pskb)->len - iph->ihl*4; @@ -186,9 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb, newmss); retmodified: - /* We never hw checksum SYN packets. */ - BUG_ON((*pskb)->ip_summed == CHECKSUM_HW); - (*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED; return IPT_CONTINUE; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ddb6ce4ecff2..69b1fcf70077 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -584,7 +584,7 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp, sk_charge_skb(sk, skb); if (!sk->sk_send_head) sk->sk_send_head = skb; - else if (tp->nonagle&TCP_NAGLE_PUSH) + if (tp->nonagle & TCP_NAGLE_PUSH) tp->nonagle &= ~TCP_NAGLE_PUSH; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 62f62bb05c2a..67c670886c1f 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -242,9 +242,14 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum) tcp_port_rover = rover; spin_unlock(&tcp_portalloc_lock); - /* Exhausted local port range during search? */ + /* Exhausted local port range during search? It is not + * possible for us to be holding one of the bind hash + * locks if this test triggers, because if 'remaining' + * drops to zero, we broke out of the do/while loop at + * the top level, not from the 'break;' statement. + */ ret = 1; - if (remaining <= 0) + if (unlikely(remaining <= 0)) goto fail; /* OK, here is the one we will use. HEAD is @@ -1494,12 +1499,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) * to destinations, already remembered * to the moment of synflood. */ - NETDEBUG(if (net_ratelimit()) \ - printk(KERN_DEBUG "TCP: drop open " - "request from %u.%u." - "%u.%u/%u\n", \ - NIPQUAD(saddr), - ntohs(skb->h.th->source))); + LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open " + "request from %u.%u." + "%u.%u/%u\n", + NIPQUAD(saddr), + ntohs(skb->h.th->source))); dst_release(dst); goto drop_and_free; } @@ -1627,8 +1631,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb) skb->nh.iph->daddr, skb->csum)) return 0; - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "hw tcp v4 csum failed\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n")); skb->ip_summed = CHECKSUM_NONE; } if (skb->len <= 76) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e3f8ea1bfa9c..dd30dd137b74 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -403,11 +403,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb) sk->sk_send_head = skb; } -static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) +static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { - struct tcp_sock *tp = tcp_sk(sk); - - if (skb->len <= tp->mss_cache || + if (skb->len <= mss_now || !(sk->sk_route_caps & NETIF_F_TSO)) { /* Avoid the costly divide in the normal * non-TSO case. @@ -417,10 +415,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) } else { unsigned int factor; - factor = skb->len + (tp->mss_cache - 1); - factor /= tp->mss_cache; + factor = skb->len + (mss_now - 1); + factor /= mss_now; skb_shinfo(skb)->tso_segs = factor; - skb_shinfo(skb)->tso_size = tp->mss_cache; + skb_shinfo(skb)->tso_size = mss_now; } } @@ -429,7 +427,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb) * packet to the list. This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */ -static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) +static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *buff; @@ -492,8 +490,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) } /* Fix up tso_factor for both original and new SKB. */ - tcp_set_skb_tso_segs(sk, skb); - tcp_set_skb_tso_segs(sk, buff); + tcp_set_skb_tso_segs(sk, skb, mss_now); + tcp_set_skb_tso_segs(sk, buff, mss_now); if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { tp->lost_out += tcp_skb_pcount(skb); @@ -569,7 +567,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) * factor and mss. */ if (tcp_skb_pcount(skb) > 1) - tcp_set_skb_tso_segs(sk, skb); + tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1)); return 0; } @@ -734,12 +732,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk /* This must be invoked the first time we consider transmitting * SKB onto the wire. */ -static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb) +static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now) { int tso_segs = tcp_skb_pcount(skb); - if (!tso_segs) { - tcp_set_skb_tso_segs(sk, skb); + if (!tso_segs || + (tso_segs > 1 && + skb_shinfo(skb)->tso_size != mss_now)) { + tcp_set_skb_tso_segs(sk, skb, mss_now); tso_segs = tcp_skb_pcount(skb); } return tso_segs; @@ -817,7 +817,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb, struct tcp_sock *tp = tcp_sk(sk); unsigned int cwnd_quota; - tcp_init_tso_segs(sk, skb); + tcp_init_tso_segs(sk, skb, cur_mss); if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) return 0; @@ -854,14 +854,15 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp) * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ -static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) +static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now) { struct sk_buff *buff; int nlen = skb->len - len; u16 flags; /* All of a TSO frame must be composed of paged data. */ - BUG_ON(skb->len != skb->data_len); + if (skb->len != skb->data_len) + return tcp_fragment(sk, skb, len, mss_now); buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC); if (unlikely(buff == NULL)) @@ -887,8 +888,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len) skb_split(skb, buff, len); /* Fix up tso_factor for both original and new SKB. */ - tcp_set_skb_tso_segs(sk, skb); - tcp_set_skb_tso_segs(sk, buff); + tcp_set_skb_tso_segs(sk, skb, mss_now); + tcp_set_skb_tso_segs(sk, buff, mss_now); /* Link BUFF into the send queue. */ skb_header_release(buff); @@ -924,10 +925,6 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_ limit = min(send_win, cong_win); - /* If sk_send_head can be sent fully now, just do it. */ - if (skb->len <= limit) - return 0; - if (sysctl_tcp_tso_win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); @@ -972,19 +969,20 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) if (unlikely(sk->sk_state == TCP_CLOSE)) return 0; - skb = sk->sk_send_head; - if (unlikely(!skb)) - return 0; - - tso_segs = tcp_init_tso_segs(sk, skb); - cwnd_quota = tcp_cwnd_test(tp, skb); - if (unlikely(!cwnd_quota)) - goto out; - sent_pkts = 0; - while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) { + while ((skb = sk->sk_send_head)) { + unsigned int limit; + + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); BUG_ON(!tso_segs); + cwnd_quota = tcp_cwnd_test(tp, skb); + if (!cwnd_quota) + break; + + if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now))) + break; + if (tso_segs == 1) { if (unlikely(!tcp_nagle_test(tp, skb, mss_now, (tcp_skb_is_last(sk, skb) ? @@ -995,9 +993,10 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) break; } + limit = mss_now; if (tso_segs > 1) { - u32 limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); + limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; @@ -1005,15 +1004,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) if (trim) limit = skb->len - trim; } - if (skb->len > limit) { - if (tso_fragment(sk, skb, limit)) - break; - } - } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now))) - break; } + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now))) + break; + TCP_SKB_CB(skb)->when = tcp_time_stamp; if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)))) @@ -1026,27 +1022,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle) tcp_minshall_update(tp, mss_now, skb); sent_pkts++; - - /* Do not optimize this to use tso_segs. If we chopped up - * the packet above, tso_segs will no longer be valid. - */ - cwnd_quota -= tcp_skb_pcount(skb); - - BUG_ON(cwnd_quota < 0); - if (!cwnd_quota) - break; - - skb = sk->sk_send_head; - if (!skb) - break; - tso_segs = tcp_init_tso_segs(sk, skb); } if (likely(sent_pkts)) { tcp_cwnd_validate(sk, tp); return 0; } -out: return !tp->packets_out && sk->sk_send_head; } @@ -1076,15 +1057,18 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) BUG_ON(!skb || skb->len < mss_now); - tso_segs = tcp_init_tso_segs(sk, skb); + tso_segs = tcp_init_tso_segs(sk, skb, mss_now); cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH); if (likely(cwnd_quota)) { + unsigned int limit; + BUG_ON(!tso_segs); + limit = mss_now; if (tso_segs > 1) { - u32 limit = tcp_window_allows(tp, skb, - mss_now, cwnd_quota); + limit = tcp_window_allows(tp, skb, + mss_now, cwnd_quota); if (skb->len < limit) { unsigned int trim = skb->len % mss_now; @@ -1092,15 +1076,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now) if (trim) limit = skb->len - trim; } - if (skb->len > limit) { - if (unlikely(tso_fragment(sk, skb, limit))) - return; - } - } else if (unlikely(skb->len > mss_now)) { - if (unlikely(tcp_fragment(sk, skb, mss_now))) - return; } + if (skb->len > limit && + unlikely(tso_fragment(sk, skb, limit, mss_now))) + return; + /* Send it out now. */ TCP_SKB_CB(skb)->when = tcp_time_stamp; @@ -1386,15 +1367,21 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) if (skb->len > cur_mss) { int old_factor = tcp_skb_pcount(skb); - int new_factor; + int diff; - if (tcp_fragment(sk, skb, cur_mss)) + if (tcp_fragment(sk, skb, cur_mss, cur_mss)) return -ENOMEM; /* We'll try again later. */ /* New SKB created, account for it. */ - new_factor = tcp_skb_pcount(skb); - tp->packets_out -= old_factor - new_factor; - tp->packets_out += tcp_skb_pcount(skb->next); + diff = old_factor - tcp_skb_pcount(skb) - + tcp_skb_pcount(skb->next); + tp->packets_out -= diff; + + if (diff > 0) { + tp->fackets_out -= diff; + if ((int)tp->fackets_out < 0) + tp->fackets_out = 0; + } } /* Collapse two adjacent packets if worthwhile and we can. */ @@ -1991,7 +1978,7 @@ int tcp_write_wakeup(struct sock *sk) skb->len > mss) { seg_size = min(seg_size, mss); TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; - if (tcp_fragment(sk, skb, seg_size)) + if (tcp_fragment(sk, skb, seg_size, mss)) return -1; /* SWS override triggered forced fragmentation. * Disable TSO, the connection is too sick. */ @@ -2000,7 +1987,7 @@ int tcp_write_wakeup(struct sock *sk) sk->sk_route_caps &= ~NETIF_F_TSO; } } else if (!tcp_skb_pcount(skb)) - tcp_set_skb_tso_segs(sk, skb); + tcp_set_skb_tso_segs(sk, skb, mss); TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 7c24e64b443f..dc4d07357e3a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -628,7 +628,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n")); err = -EINVAL; goto out; } @@ -693,7 +693,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 3\n")); return -EINVAL; } @@ -1102,7 +1102,7 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, skb->ip_summed = CHECKSUM_UNNECESSARY; if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) return 0; - NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n")); + LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v4 hw csum failure.\n")); skb->ip_summed = CHECKSUM_NONE; } if (skb->ip_summed != CHECKSUM_UNNECESSARY) @@ -1181,14 +1181,13 @@ int udp_rcv(struct sk_buff *skb) return(0); short_packet: - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - NIPQUAD(saddr), - ntohs(uh->source), - ulen, - len, - NIPQUAD(daddr), - ntohs(uh->dest))); + LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + NIPQUAD(saddr), + ntohs(uh->source), + ulen, + len, + NIPQUAD(daddr), + ntohs(uh->dest))); no_header: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); @@ -1199,13 +1198,12 @@ csum_error: * RFC1122: OK. Discards the bad packet silently (as far as * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ - NETDEBUG(if (net_ratelimit()) - printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - NIPQUAD(saddr), - ntohs(uh->source), - NIPQUAD(daddr), - ntohs(uh->dest), - ulen)); + LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + NIPQUAD(saddr), + ntohs(uh->source), + NIPQUAD(daddr), + ntohs(uh->dest), + ulen)); drop: UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); |