diff options
author | David S. Miller <davem@davemloft.net> | 2013-11-04 19:46:58 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2013-11-04 19:46:58 -0500 |
commit | 72c39a0ade6229a938736fe1aa1d5e471fc7face (patch) | |
tree | 33e5e634d553750eba4bf876a23c4c051e0d9c26 /net/netfilter/ipvs | |
parent | 6fcf018ae4491dc11b080892fa9f3dbd928fdbb9 (diff) | |
parent | 4542fa4727f5f83faf9e1f28f35be0b9a2317aec (diff) | |
download | blackbird-op-linux-72c39a0ade6229a938736fe1aa1d5e471fc7face.tar.gz blackbird-op-linux-72c39a0ade6229a938736fe1aa1d5e471fc7face.zip |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
This is another batch containing Netfilter/IPVS updates for your net-next
tree, they are:
* Six patches to make the ipt_CLUSTERIP target support netnamespace,
from Gao feng.
* Two cleanups for the nf_conntrack_acct infrastructure, introducing
a new structure to encapsulate conntrack counters, from Holger
Eitzenberger.
* Fix missing verdict in SCTP support for IPVS, from Daniel Borkmann.
* Skip checksum recalculation in SCTP support for IPVS, also from
Daniel Borkmann.
* Fix behavioural change in xt_socket after IP early demux, from
Florian Westphal.
* Fix bogus large memory allocation in the bitmap port set type in ipset,
from Jozsef Kadlecsik.
* Fix possible compilation issues in the hash netnet set type in ipset,
also from Jozsef Kadlecsik.
* Define constants to identify netlink callback data in ipset dumps,
again from Jozsef Kadlecsik.
* Use sock_gen_put() in xt_socket to replace xt_socket_put_sk,
from Eric Dumazet.
* Improvements for the SH scheduler in IPVS, from Alexander Frolkin.
* Remove extra delay due to unneeded rcu barrier in IPVS net namespace
cleanup path, from Julian Anastasov.
* Save some cycles in ip6t_REJECT by skipping checksum validation in
packets leaving from our stack, from Stanislav Fomichev.
* Fix IPVS_CMD_ATTR_MAX definition in IPVS, larger that required, from
Julian Anastasov.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netfilter/ipvs')
-rw-r--r-- | net/netfilter/ipvs/ip_vs_ctl.c | 6 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_lblc.c | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_lblcr.c | 2 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_proto_sctp.c | 48 | ||||
-rw-r--r-- | net/netfilter/ipvs/ip_vs_sh.c | 39 |
5 files changed, 72 insertions, 25 deletions
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index a3df9bddc4f7..62786a495cea 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -704,7 +704,7 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) __ip_vs_dst_cache_reset(dest); __ip_vs_svc_put(svc, false); free_percpu(dest->stats.cpustats); - kfree(dest); + ip_vs_dest_put_and_free(dest); } /* @@ -3820,10 +3820,6 @@ void __net_exit ip_vs_control_net_cleanup(struct net *net) { struct netns_ipvs *ipvs = net_ipvs(net); - /* Some dest can be in grace period even before cleanup, we have to - * defer ip_vs_trash_cleanup until ip_vs_dest_wait_readers is called. - */ - rcu_barrier(); ip_vs_trash_cleanup(net); ip_vs_stop_estimator(net, &ipvs->tot_stats); ip_vs_control_net_cleanup_sysctl(net); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index eff13c94498e..ca056a331e60 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -136,7 +136,7 @@ static void ip_vs_lblc_rcu_free(struct rcu_head *head) struct ip_vs_lblc_entry, rcu_head); - ip_vs_dest_put(en->dest); + ip_vs_dest_put_and_free(en->dest); kfree(en); } diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index 0b8550089a2e..3f21a2f47de1 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -130,7 +130,7 @@ static void ip_vs_lblcr_elem_rcu_free(struct rcu_head *head) struct ip_vs_dest_set_elem *e; e = container_of(head, struct ip_vs_dest_set_elem, rcu_head); - ip_vs_dest_put(e->dest); + ip_vs_dest_put_and_free(e->dest); kfree(e); } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 23e596e438b3..2f7ea7564044 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -20,13 +20,18 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd, sctp_sctphdr_t *sh, _sctph; sh = skb_header_pointer(skb, iph->len, sizeof(_sctph), &_sctph); - if (sh == NULL) + if (sh == NULL) { + *verdict = NF_DROP; return 0; + } sch = skb_header_pointer(skb, iph->len + sizeof(sctp_sctphdr_t), sizeof(_schunkh), &_schunkh); - if (sch == NULL) + if (sch == NULL) { + *verdict = NF_DROP; return 0; + } + net = skb_net(skb); ipvs = net_ipvs(net); rcu_read_lock(); @@ -76,6 +81,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -87,19 +93,31 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + ret = ip_vs_app_pkt_out(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->source = cp->vport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->source != cp->vport || payload_csum || + skb->ip_summed == CHECKSUM_PARTIAL) { + sctph->source = cp->vport; + sctp_nat_csum(skb, sctph, sctphoff); + } else { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } @@ -110,6 +128,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, { sctp_sctphdr_t *sctph; unsigned int sctphoff = iph->len; + bool payload_csum = false; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6 && iph->fragoffs) @@ -121,19 +140,32 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_in(cp, skb)) + ret = ip_vs_app_pkt_in(cp, skb); + if (ret == 0) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 2) + payload_csum = true; } sctph = (void *) skb_network_header(skb) + sctphoff; - sctph->dest = cp->dport; - sctp_nat_csum(skb, sctph, sctphoff); + /* Only update csum if we really have to */ + if (sctph->dest != cp->dport || payload_csum || + (skb->ip_summed == CHECKSUM_PARTIAL && + !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CSUM))) { + sctph->dest = cp->dport; + sctp_nat_csum(skb, sctph, sctphoff); + } else if (skb->ip_summed != CHECKSUM_PARTIAL) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } return 1; } diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 3588faebe529..cc65b2f42cd4 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -115,27 +115,46 @@ ip_vs_sh_get(struct ip_vs_service *svc, struct ip_vs_sh_state *s, } -/* As ip_vs_sh_get, but with fallback if selected server is unavailable */ +/* As ip_vs_sh_get, but with fallback if selected server is unavailable + * + * The fallback strategy loops around the table starting from a "random" + * point (in fact, it is chosen to be the original hash value to make the + * algorithm deterministic) to find a new server. + */ static inline struct ip_vs_dest * ip_vs_sh_get_fallback(struct ip_vs_service *svc, struct ip_vs_sh_state *s, const union nf_inet_addr *addr, __be16 port) { - unsigned int offset; - unsigned int hash; + unsigned int offset, roffset; + unsigned int hash, ihash; struct ip_vs_dest *dest; + /* first try the dest it's supposed to go to */ + ihash = ip_vs_sh_hashkey(svc->af, addr, port, 0); + dest = rcu_dereference(s->buckets[ihash].dest); + if (!dest) + return NULL; + if (!is_unavailable(dest)) + return dest; + + IP_VS_DBG_BUF(6, "SH: selected unavailable server %s:%d, reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), ntohs(dest->port)); + + /* if the original dest is unavailable, loop around the table + * starting from ihash to find a new dest + */ for (offset = 0; offset < IP_VS_SH_TAB_SIZE; offset++) { - hash = ip_vs_sh_hashkey(svc->af, addr, port, offset); + roffset = (offset + ihash) % IP_VS_SH_TAB_SIZE; + hash = ip_vs_sh_hashkey(svc->af, addr, port, roffset); dest = rcu_dereference(s->buckets[hash].dest); if (!dest) break; - if (is_unavailable(dest)) - IP_VS_DBG_BUF(6, "SH: selected unavailable server " - "%s:%d (offset %d)", - IP_VS_DBG_ADDR(svc->af, &dest->addr), - ntohs(dest->port), offset); - else + if (!is_unavailable(dest)) return dest; + IP_VS_DBG_BUF(6, "SH: selected unavailable " + "server %s:%d (offset %d), reselecting", + IP_VS_DBG_ADDR(svc->af, &dest->addr), + ntohs(dest->port), roffset); } return NULL; |