summaryrefslogtreecommitdiffstats
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_semantics.c9
-rw-r--r--net/ipv4/fib_trie.c14
-rw-r--r--net/ipv4/icmp.c15
-rw-r--r--net/ipv4/inetpeer.c11
-rw-r--r--net/ipv4/ip_fragment.c8
-rw-r--r--net/ipv4/ip_gre.c21
-rw-r--r--net/ipv4/ip_sockglue.c3
-rw-r--r--net/ipv4/ipcomp.c2
-rw-r--r--net/ipv4/ipip.c20
-rw-r--r--net/ipv4/ipmr.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c5
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c4
-rw-r--r--net/ipv4/netfilter/ip_queue.c7
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c17
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c7
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_ipv4.c23
-rw-r--r--net/ipv4/tcp_output.c135
-rw-r--r--net/ipv4/udp.c34
19 files changed, 195 insertions, 148 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index c886b28ba9f5..e278cb9d0075 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -593,10 +593,13 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
struct hlist_head *new_laddrhash,
unsigned int new_size)
{
+ struct hlist_head *old_info_hash, *old_laddrhash;
unsigned int old_size = fib_hash_size;
- unsigned int i;
+ unsigned int i, bytes;
write_lock(&fib_info_lock);
+ old_info_hash = fib_info_hash;
+ old_laddrhash = fib_info_laddrhash;
fib_hash_size = new_size;
for (i = 0; i < old_size; i++) {
@@ -636,6 +639,10 @@ static void fib_hash_move(struct hlist_head *new_info_hash,
fib_info_laddrhash = new_laddrhash;
write_unlock(&fib_info_lock);
+
+ bytes = old_size * sizeof(struct hlist_head *);
+ fib_hash_free(old_info_hash, bytes);
+ fib_hash_free(old_laddrhash, bytes);
}
struct fib_info *
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index a701405fab0b..45efd5f4741b 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1333,9 +1333,9 @@ err:;
}
static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
- struct fib_result *res, int *err)
+ struct fib_result *res)
{
- int i;
+ int err, i;
t_key mask;
struct leaf_info *li;
struct hlist_head *hhead = &l->list;
@@ -1348,18 +1348,18 @@ static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *pl
if (l->key != (key & mask))
continue;
- if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
+ if ((err = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) <= 0) {
*plen = i;
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.semantic_match_passed++;
#endif
- return 1;
+ return err;
}
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.semantic_match_miss++;
#endif
}
- return 0;
+ return 1;
}
static int
@@ -1386,7 +1386,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
/* Just a leaf? */
if (IS_LEAF(n)) {
- if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
+ if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
goto found;
goto failed;
}
@@ -1508,7 +1508,7 @@ fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result
continue;
}
if (IS_LEAF(n)) {
- if (check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
+ if ((ret = check_leaf(t, (struct leaf *)n, key, &plen, flp, res)) <= 0)
goto found;
}
backtrace:
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 279f57abfecb..badfc5849973 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -349,12 +349,12 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
{
struct sk_buff *skb;
- ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
- icmp_param->data_len+icmp_param->head_len,
- icmp_param->head_len,
- ipc, rt, MSG_DONTWAIT);
-
- if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
+ if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
+ icmp_param->data_len+icmp_param->head_len,
+ icmp_param->head_len,
+ ipc, rt, MSG_DONTWAIT) < 0)
+ ip_flush_pending_frames(icmp_socket->sk);
+ else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {
struct icmphdr *icmph = skb->h.icmph;
unsigned int csum = 0;
struct sk_buff *skb1;
@@ -936,8 +936,7 @@ int icmp_rcv(struct sk_buff *skb)
case CHECKSUM_HW:
if (!(u16)csum_fold(skb->csum))
break;
- NETDEBUG(if (net_ratelimit())
- printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
+ LIMIT_NETDEBUG(printk(KERN_DEBUG "icmp v4 hw csum failure\n"));
case CHECKSUM_NONE:
if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0)))
goto error;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 95473953c406..ab18a853d7ce 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -450,10 +450,13 @@ static void peer_check_expire(unsigned long dummy)
/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
* interval depending on the total number of entries (more entries,
* less interval). */
- peer_periodic_timer.expires = jiffies
- + inet_peer_gc_maxtime
- - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
- peer_total / inet_peer_threshold * HZ;
+ if (peer_total >= inet_peer_threshold)
+ peer_periodic_timer.expires = jiffies + inet_peer_gc_mintime;
+ else
+ peer_periodic_timer.expires = jiffies
+ + inet_peer_gc_maxtime
+ - (inet_peer_gc_maxtime - inet_peer_gc_mintime) / HZ *
+ peer_total / inet_peer_threshold * HZ;
add_timer(&peer_periodic_timer);
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 7f68e27eb4ea..eb377ae15305 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -377,7 +377,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
return ip_frag_intern(hash, qp);
out_nomem:
- NETDEBUG(if (net_ratelimit()) printk(KERN_ERR "ip_frag_create: no memory left !\n"));
+ LIMIT_NETDEBUG(printk(KERN_ERR "ip_frag_create: no memory left !\n"));
return NULL;
}
@@ -625,10 +625,8 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
return head;
out_nomem:
- NETDEBUG(if (net_ratelimit())
- printk(KERN_ERR
- "IP: queue_glue: no memory for gluing queue %p\n",
- qp));
+ LIMIT_NETDEBUG(printk(KERN_ERR "IP: queue_glue: no memory for gluing "
+ "queue %p\n", qp));
goto out_fail;
out_oversize:
if (net_ratelimit())
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 884835522224..f0d5740d7e22 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -290,7 +290,6 @@ static struct ip_tunnel * ipgre_tunnel_locate(struct ip_tunnel_parm *parms, int
dev_hold(dev);
ipgre_tunnel_link(nt);
- /* Do not decrement MOD_USE_COUNT here. */
return nt;
failed:
@@ -1277,12 +1276,28 @@ err1:
goto out;
}
-static void ipgre_fini(void)
+static void __exit ipgre_destroy_tunnels(void)
+{
+ int prio;
+
+ for (prio = 0; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip_tunnel *t;
+ while ((t = tunnels[prio][h]) != NULL)
+ unregister_netdevice(t->dev);
+ }
+ }
+}
+
+static void __exit ipgre_fini(void)
{
if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
printk(KERN_INFO "ipgre close: can't remove protocol\n");
- unregister_netdev(ipgre_fb_tunnel_dev);
+ rtnl_lock();
+ ipgre_destroy_tunnels();
+ rtnl_unlock();
}
module_init(ipgre_init);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index fc7c481d0d79..ff4bd067b397 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -848,6 +848,9 @@ mc_msf_out:
case IP_IPSEC_POLICY:
case IP_XFRM_POLICY:
+ err = -EPERM;
+ if (!capable(CAP_NET_ADMIN))
+ break;
err = xfrm_user_policy(sk, optname, optval, optlen);
break;
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index 2065944fd9e5..7ded6e60f43a 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -358,7 +358,7 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name)
int cpu;
/* This can be any valid CPU ID so we don't need locking. */
- cpu = smp_processor_id();
+ cpu = raw_smp_processor_id();
list_for_each_entry(pos, &ipcomp_tfms_list, list) {
struct crypto_tfm *tfm;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index c3947cd566b7..c05c1df0bb04 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -255,7 +255,6 @@ static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int c
dev_hold(dev);
ipip_tunnel_link(nt);
- /* Do not decrement MOD_USE_COUNT here. */
return nt;
failed:
@@ -920,12 +919,29 @@ static int __init ipip_init(void)
goto out;
}
+static void __exit ipip_destroy_tunnels(void)
+{
+ int prio;
+
+ for (prio = 1; prio < 4; prio++) {
+ int h;
+ for (h = 0; h < HASH_SIZE; h++) {
+ struct ip_tunnel *t;
+ while ((t = tunnels[prio][h]) != NULL)
+ unregister_netdevice(t->dev);
+ }
+ }
+}
+
static void __exit ipip_fini(void)
{
if (ipip_unregister() < 0)
printk(KERN_INFO "ipip close: can't deregister tunnel\n");
- unregister_netdev(ipip_fb_tunnel_dev);
+ rtnl_lock();
+ ipip_destroy_tunnels();
+ unregister_netdevice(ipip_fb_tunnel_dev);
+ rtnl_unlock();
}
module_init(ipip_init);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7833d920bdba..dc806b578427 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -362,7 +362,7 @@ out:
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ipmr_update_threshoulds(struct mfc_cache *cache, unsigned char *ttls)
+static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
{
int vifi;
@@ -727,7 +727,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
if (c != NULL) {
write_lock_bh(&mrt_lock);
c->mfc_parent = mfc->mfcc_parent;
- ipmr_update_threshoulds(c, mfc->mfcc_ttls);
+ ipmr_update_thresholds(c, mfc->mfcc_ttls);
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
@@ -744,7 +744,7 @@ static int ipmr_mfc_add(struct mfcctl *mfc, int mrtsock)
c->mfc_origin=mfc->mfcc_origin.s_addr;
c->mfc_mcastgrp=mfc->mfcc_mcastgrp.s_addr;
c->mfc_parent=mfc->mfcc_parent;
- ipmr_update_threshoulds(c, mfc->mfcc_ttls);
+ ipmr_update_thresholds(c, mfc->mfcc_ttls);
if (!mrtsock)
c->mfc_flags |= MFC_STATIC;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 86f04e41dd8e..a7f0c821a9b2 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -513,6 +513,11 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
#ifdef CONFIG_IP_NF_CONNTRACK_MARK
conntrack->mark = exp->master->mark;
#endif
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
+ /* this is ugly, but there is no other place where to put it */
+ conntrack->nat.masq_index = exp->master->nat.masq_index;
+#endif
nf_conntrack_get(&conntrack->master->ct_general);
CONNTRACK_STAT_INC(expect_new);
} else {
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index bc59d0d6e89e..91d5ea1dbbc9 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -102,6 +102,10 @@ ip_nat_fn(unsigned int hooknum,
return NF_ACCEPT;
}
+ /* Don't try to NAT if this packet is not conntracked */
+ if (ct == &ip_conntrack_untracked)
+ return NF_ACCEPT;
+
switch (ctinfo) {
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index eda1fba431a4..c6baa8174389 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -214,6 +214,12 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
break;
case IPQ_COPY_PACKET:
+ if (entry->skb->ip_summed == CHECKSUM_HW &&
+ (*errp = skb_checksum_help(entry->skb,
+ entry->info->outdev == NULL))) {
+ read_unlock_bh(&queue_lock);
+ return NULL;
+ }
if (copy_range == 0 || copy_range > entry->skb->len)
data_len = entry->skb->len;
else
@@ -385,6 +391,7 @@ ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
if (!skb_ip_make_writable(&e->skb, v->data_len))
return -ENOMEM;
memcpy(e->skb->data, v->payload, v->data_len);
+ e->skb->ip_summed = CHECKSUM_NONE;
e->skb->nfcache |= NFC_ALTERED;
/*
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index ada9911118e9..94a0ce1c1c9d 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -61,16 +61,20 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
if (!tcph)
return 0;
- if (!(einfo->operation & IPT_ECN_OP_SET_ECE
- || tcph->ece == einfo->proto.tcp.ece)
- && (!(einfo->operation & IPT_ECN_OP_SET_CWR
- || tcph->cwr == einfo->proto.tcp.cwr)))
+ if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
+ tcph->ece == einfo->proto.tcp.ece) &&
+ ((!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
+ tcph->cwr == einfo->proto.tcp.cwr)))
return 1;
if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
return 0;
tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+ if ((*pskb)->ip_summed == CHECKSUM_HW &&
+ skb_checksum_help(*pskb, inward))
+ return 0;
+
diffs[0] = ((u_int16_t *)tcph)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
tcph->ece = einfo->proto.tcp.ece;
@@ -79,13 +83,10 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
diffs[1] = ((u_int16_t *)tcph)[6];
diffs[0] = diffs[0] ^ 0xFFFF;
- if ((*pskb)->ip_summed != CHECKSUM_HW)
+ if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
tcph->check = csum_fold(csum_partial((char *)diffs,
sizeof(diffs),
tcph->check^0xFFFF));
- else
- if (skb_checksum_help(*pskb, inward))
- return 0;
(*pskb)->nfcache |= NFC_ALTERED;
return 1;
}
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
index 1049050b2bfb..7b84a254440e 100644
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ b/net/ipv4/netfilter/ipt_TCPMSS.c
@@ -61,6 +61,10 @@ ipt_tcpmss_target(struct sk_buff **pskb,
if (!skb_ip_make_writable(pskb, (*pskb)->len))
return NF_DROP;
+ if ((*pskb)->ip_summed == CHECKSUM_HW &&
+ skb_checksum_help(*pskb, out == NULL))
+ return NF_DROP;
+
iph = (*pskb)->nh.iph;
tcplen = (*pskb)->len - iph->ihl*4;
@@ -186,9 +190,6 @@ ipt_tcpmss_target(struct sk_buff **pskb,
newmss);
retmodified:
- /* We never hw checksum SYN packets. */
- BUG_ON((*pskb)->ip_summed == CHECKSUM_HW);
-
(*pskb)->nfcache |= NFC_UNKNOWN | NFC_ALTERED;
return IPT_CONTINUE;
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ddb6ce4ecff2..69b1fcf70077 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -584,7 +584,7 @@ static inline void skb_entail(struct sock *sk, struct tcp_sock *tp,
sk_charge_skb(sk, skb);
if (!sk->sk_send_head)
sk->sk_send_head = skb;
- else if (tp->nonagle&TCP_NAGLE_PUSH)
+ if (tp->nonagle & TCP_NAGLE_PUSH)
tp->nonagle &= ~TCP_NAGLE_PUSH;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 62f62bb05c2a..67c670886c1f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -242,9 +242,14 @@ static int tcp_v4_get_port(struct sock *sk, unsigned short snum)
tcp_port_rover = rover;
spin_unlock(&tcp_portalloc_lock);
- /* Exhausted local port range during search? */
+ /* Exhausted local port range during search? It is not
+ * possible for us to be holding one of the bind hash
+ * locks if this test triggers, because if 'remaining'
+ * drops to zero, we broke out of the do/while loop at
+ * the top level, not from the 'break;' statement.
+ */
ret = 1;
- if (remaining <= 0)
+ if (unlikely(remaining <= 0))
goto fail;
/* OK, here is the one we will use. HEAD is
@@ -1494,12 +1499,11 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* to destinations, already remembered
* to the moment of synflood.
*/
- NETDEBUG(if (net_ratelimit()) \
- printk(KERN_DEBUG "TCP: drop open "
- "request from %u.%u."
- "%u.%u/%u\n", \
- NIPQUAD(saddr),
- ntohs(skb->h.th->source)));
+ LIMIT_NETDEBUG(printk(KERN_DEBUG "TCP: drop open "
+ "request from %u.%u."
+ "%u.%u/%u\n",
+ NIPQUAD(saddr),
+ ntohs(skb->h.th->source)));
dst_release(dst);
goto drop_and_free;
}
@@ -1627,8 +1631,7 @@ static int tcp_v4_checksum_init(struct sk_buff *skb)
skb->nh.iph->daddr, skb->csum))
return 0;
- NETDEBUG(if (net_ratelimit())
- printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
+ LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v4 csum failed\n"));
skb->ip_summed = CHECKSUM_NONE;
}
if (skb->len <= 76) {
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index e3f8ea1bfa9c..dd30dd137b74 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -403,11 +403,9 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
sk->sk_send_head = skb;
}
-static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
+static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (skb->len <= tp->mss_cache ||
+ if (skb->len <= mss_now ||
!(sk->sk_route_caps & NETIF_F_TSO)) {
/* Avoid the costly divide in the normal
* non-TSO case.
@@ -417,10 +415,10 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
} else {
unsigned int factor;
- factor = skb->len + (tp->mss_cache - 1);
- factor /= tp->mss_cache;
+ factor = skb->len + (mss_now - 1);
+ factor /= mss_now;
skb_shinfo(skb)->tso_segs = factor;
- skb_shinfo(skb)->tso_size = tp->mss_cache;
+ skb_shinfo(skb)->tso_size = mss_now;
}
}
@@ -429,7 +427,7 @@ static void tcp_set_skb_tso_segs(struct sock *sk, struct sk_buff *skb)
* packet to the list. This won't be called frequently, I hope.
* Remember, these are still headerless SKBs at this point.
*/
-static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
+static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss_now)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
@@ -492,8 +490,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
}
/* Fix up tso_factor for both original and new SKB. */
- tcp_set_skb_tso_segs(sk, skb);
- tcp_set_skb_tso_segs(sk, buff);
+ tcp_set_skb_tso_segs(sk, skb, mss_now);
+ tcp_set_skb_tso_segs(sk, buff, mss_now);
if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
tp->lost_out += tcp_skb_pcount(skb);
@@ -569,7 +567,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
* factor and mss.
*/
if (tcp_skb_pcount(skb) > 1)
- tcp_set_skb_tso_segs(sk, skb);
+ tcp_set_skb_tso_segs(sk, skb, tcp_current_mss(sk, 1));
return 0;
}
@@ -734,12 +732,14 @@ static inline unsigned int tcp_cwnd_test(struct tcp_sock *tp, struct sk_buff *sk
/* This must be invoked the first time we consider transmitting
* SKB onto the wire.
*/
-static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_init_tso_segs(struct sock *sk, struct sk_buff *skb, unsigned int mss_now)
{
int tso_segs = tcp_skb_pcount(skb);
- if (!tso_segs) {
- tcp_set_skb_tso_segs(sk, skb);
+ if (!tso_segs ||
+ (tso_segs > 1 &&
+ skb_shinfo(skb)->tso_size != mss_now)) {
+ tcp_set_skb_tso_segs(sk, skb, mss_now);
tso_segs = tcp_skb_pcount(skb);
}
return tso_segs;
@@ -817,7 +817,7 @@ static unsigned int tcp_snd_test(struct sock *sk, struct sk_buff *skb,
struct tcp_sock *tp = tcp_sk(sk);
unsigned int cwnd_quota;
- tcp_init_tso_segs(sk, skb);
+ tcp_init_tso_segs(sk, skb, cur_mss);
if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
return 0;
@@ -854,14 +854,15 @@ int tcp_may_send_now(struct sock *sk, struct tcp_sock *tp)
* know that all the data is in scatter-gather pages, and that the
* packet has never been sent out before (and thus is not cloned).
*/
-static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
+static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, unsigned int mss_now)
{
struct sk_buff *buff;
int nlen = skb->len - len;
u16 flags;
/* All of a TSO frame must be composed of paged data. */
- BUG_ON(skb->len != skb->data_len);
+ if (skb->len != skb->data_len)
+ return tcp_fragment(sk, skb, len, mss_now);
buff = sk_stream_alloc_pskb(sk, 0, 0, GFP_ATOMIC);
if (unlikely(buff == NULL))
@@ -887,8 +888,8 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len)
skb_split(skb, buff, len);
/* Fix up tso_factor for both original and new SKB. */
- tcp_set_skb_tso_segs(sk, skb);
- tcp_set_skb_tso_segs(sk, buff);
+ tcp_set_skb_tso_segs(sk, skb, mss_now);
+ tcp_set_skb_tso_segs(sk, buff, mss_now);
/* Link BUFF into the send queue. */
skb_header_release(buff);
@@ -924,10 +925,6 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_
limit = min(send_win, cong_win);
- /* If sk_send_head can be sent fully now, just do it. */
- if (skb->len <= limit)
- return 0;
-
if (sysctl_tcp_tso_win_divisor) {
u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
@@ -972,19 +969,20 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
if (unlikely(sk->sk_state == TCP_CLOSE))
return 0;
- skb = sk->sk_send_head;
- if (unlikely(!skb))
- return 0;
-
- tso_segs = tcp_init_tso_segs(sk, skb);
- cwnd_quota = tcp_cwnd_test(tp, skb);
- if (unlikely(!cwnd_quota))
- goto out;
-
sent_pkts = 0;
- while (likely(tcp_snd_wnd_test(tp, skb, mss_now))) {
+ while ((skb = sk->sk_send_head)) {
+ unsigned int limit;
+
+ tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
BUG_ON(!tso_segs);
+ cwnd_quota = tcp_cwnd_test(tp, skb);
+ if (!cwnd_quota)
+ break;
+
+ if (unlikely(!tcp_snd_wnd_test(tp, skb, mss_now)))
+ break;
+
if (tso_segs == 1) {
if (unlikely(!tcp_nagle_test(tp, skb, mss_now,
(tcp_skb_is_last(sk, skb) ?
@@ -995,9 +993,10 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
break;
}
+ limit = mss_now;
if (tso_segs > 1) {
- u32 limit = tcp_window_allows(tp, skb,
- mss_now, cwnd_quota);
+ limit = tcp_window_allows(tp, skb,
+ mss_now, cwnd_quota);
if (skb->len < limit) {
unsigned int trim = skb->len % mss_now;
@@ -1005,15 +1004,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
if (trim)
limit = skb->len - trim;
}
- if (skb->len > limit) {
- if (tso_fragment(sk, skb, limit))
- break;
- }
- } else if (unlikely(skb->len > mss_now)) {
- if (unlikely(tcp_fragment(sk, skb, mss_now)))
- break;
}
+ if (skb->len > limit &&
+ unlikely(tso_fragment(sk, skb, limit, mss_now)))
+ break;
+
TCP_SKB_CB(skb)->when = tcp_time_stamp;
if (unlikely(tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC))))
@@ -1026,27 +1022,12 @@ static int tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle)
tcp_minshall_update(tp, mss_now, skb);
sent_pkts++;
-
- /* Do not optimize this to use tso_segs. If we chopped up
- * the packet above, tso_segs will no longer be valid.
- */
- cwnd_quota -= tcp_skb_pcount(skb);
-
- BUG_ON(cwnd_quota < 0);
- if (!cwnd_quota)
- break;
-
- skb = sk->sk_send_head;
- if (!skb)
- break;
- tso_segs = tcp_init_tso_segs(sk, skb);
}
if (likely(sent_pkts)) {
tcp_cwnd_validate(sk, tp);
return 0;
}
-out:
return !tp->packets_out && sk->sk_send_head;
}
@@ -1076,15 +1057,18 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
BUG_ON(!skb || skb->len < mss_now);
- tso_segs = tcp_init_tso_segs(sk, skb);
+ tso_segs = tcp_init_tso_segs(sk, skb, mss_now);
cwnd_quota = tcp_snd_test(sk, skb, mss_now, TCP_NAGLE_PUSH);
if (likely(cwnd_quota)) {
+ unsigned int limit;
+
BUG_ON(!tso_segs);
+ limit = mss_now;
if (tso_segs > 1) {
- u32 limit = tcp_window_allows(tp, skb,
- mss_now, cwnd_quota);
+ limit = tcp_window_allows(tp, skb,
+ mss_now, cwnd_quota);
if (skb->len < limit) {
unsigned int trim = skb->len % mss_now;
@@ -1092,15 +1076,12 @@ void tcp_push_one(struct sock *sk, unsigned int mss_now)
if (trim)
limit = skb->len - trim;
}
- if (skb->len > limit) {
- if (unlikely(tso_fragment(sk, skb, limit)))
- return;
- }
- } else if (unlikely(skb->len > mss_now)) {
- if (unlikely(tcp_fragment(sk, skb, mss_now)))
- return;
}
+ if (skb->len > limit &&
+ unlikely(tso_fragment(sk, skb, limit, mss_now)))
+ return;
+
/* Send it out now. */
TCP_SKB_CB(skb)->when = tcp_time_stamp;
@@ -1386,15 +1367,21 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
if (skb->len > cur_mss) {
int old_factor = tcp_skb_pcount(skb);
- int new_factor;
+ int diff;
- if (tcp_fragment(sk, skb, cur_mss))
+ if (tcp_fragment(sk, skb, cur_mss, cur_mss))
return -ENOMEM; /* We'll try again later. */
/* New SKB created, account for it. */
- new_factor = tcp_skb_pcount(skb);
- tp->packets_out -= old_factor - new_factor;
- tp->packets_out += tcp_skb_pcount(skb->next);
+ diff = old_factor - tcp_skb_pcount(skb) -
+ tcp_skb_pcount(skb->next);
+ tp->packets_out -= diff;
+
+ if (diff > 0) {
+ tp->fackets_out -= diff;
+ if ((int)tp->fackets_out < 0)
+ tp->fackets_out = 0;
+ }
}
/* Collapse two adjacent packets if worthwhile and we can. */
@@ -1991,7 +1978,7 @@ int tcp_write_wakeup(struct sock *sk)
skb->len > mss) {
seg_size = min(seg_size, mss);
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
- if (tcp_fragment(sk, skb, seg_size))
+ if (tcp_fragment(sk, skb, seg_size, mss))
return -1;
/* SWS override triggered forced fragmentation.
* Disable TSO, the connection is too sick. */
@@ -2000,7 +1987,7 @@ int tcp_write_wakeup(struct sock *sk)
sk->sk_route_caps &= ~NETIF_F_TSO;
}
} else if (!tcp_skb_pcount(skb))
- tcp_set_skb_tso_segs(sk, skb);
+ tcp_set_skb_tso_segs(sk, skb, mss);
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 7c24e64b443f..dc4d07357e3a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -628,7 +628,7 @@ back_from_confirm:
/* ... which is an evident application bug. --ANK */
release_sock(sk);
- NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
+ LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 2\n"));
err = -EINVAL;
goto out;
}
@@ -693,7 +693,7 @@ static int udp_sendpage(struct sock *sk, struct page *page, int offset,
if (unlikely(!up->pending)) {
release_sock(sk);
- NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n"));
+ LIMIT_NETDEBUG(printk(KERN_DEBUG "udp cork app bug 3\n"));
return -EINVAL;
}
@@ -1102,7 +1102,7 @@ static int udp_checksum_init(struct sk_buff *skb, struct udphdr *uh,
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (!udp_check(uh, ulen, saddr, daddr, skb->csum))
return 0;
- NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
+ LIMIT_NETDEBUG(printk(KERN_DEBUG "udp v4 hw csum failure.\n"));
skb->ip_summed = CHECKSUM_NONE;
}
if (skb->ip_summed != CHECKSUM_UNNECESSARY)
@@ -1181,14 +1181,13 @@ int udp_rcv(struct sk_buff *skb)
return(0);
short_packet:
- NETDEBUG(if (net_ratelimit())
- printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
- NIPQUAD(saddr),
- ntohs(uh->source),
- ulen,
- len,
- NIPQUAD(daddr),
- ntohs(uh->dest)));
+ LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
+ NIPQUAD(saddr),
+ ntohs(uh->source),
+ ulen,
+ len,
+ NIPQUAD(daddr),
+ ntohs(uh->dest)));
no_header:
UDP_INC_STATS_BH(UDP_MIB_INERRORS);
kfree_skb(skb);
@@ -1199,13 +1198,12 @@ csum_error:
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
- NETDEBUG(if (net_ratelimit())
- printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
- NIPQUAD(saddr),
- ntohs(uh->source),
- NIPQUAD(daddr),
- ntohs(uh->dest),
- ulen));
+ LIMIT_NETDEBUG(printk(KERN_DEBUG "UDP: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
+ NIPQUAD(saddr),
+ ntohs(uh->source),
+ NIPQUAD(daddr),
+ ntohs(uh->dest),
+ ulen));
drop:
UDP_INC_STATS_BH(UDP_MIB_INERRORS);
kfree_skb(skb);
OpenPOWER on IntegriCloud