diff options
author | David S. Miller <davem@davemloft.net> | 2014-01-06 13:29:30 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-01-06 13:29:30 -0500 |
commit | 9aa28f2b71055d5ae17a2e1daee359d4174bb13e (patch) | |
tree | fbf4e0fd11eb924e0bece74a87f442bc54441b35 /include/net | |
parent | 6a8c4796df74045088a916581c736432d08c53c0 (diff) | |
parent | c9c8e485978a308c8a359140da187d55120f8fee (diff) | |
download | blackbird-op-linux-9aa28f2b71055d5ae17a2e1daee359d4174bb13e.tar.gz blackbird-op-linux-9aa28f2b71055d5ae17a2e1daee359d4174bb13e.zip |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nftables
Pablo Neira Ayuso says: <pablo@netfilter.org>
====================
nftables updates for net-next
The following patchset contains nftables updates for your net-next tree,
they are:
* Add set operation to the meta expression by means of the select_ops()
infrastructure, this allows us to set the packet mark among other things.
From Arturo Borrero Gonzalez.
* Fix wrong format in sscanf in nf_tables_set_alloc_name(), from Daniel
Borkmann.
* Add new queue expression to nf_tables. These comes with two previous patches
to prepare this new feature, one to add mask in nf_tables_core to
evaluate the queue verdict appropriately and another to refactor common
code with xt_NFQUEUE, from Eric Leblond.
* Do not hide nftables from Kconfig if nfnetlink is not enabled, also from
Eric Leblond.
* Add the reject expression to nf_tables, this adds the missing TCP RST
support. It comes with an initial patch to refactor common code with
xt_NFQUEUE, again from Eric Leblond.
* Remove an unused variable assignment in nf_tables_dump_set(), from Michal
Nazarewicz.
* Remove the nft_meta_target code, now that Arturo added the set operation
to the meta expression, from me.
* Add help information for nf_tables to Kconfig, also from me.
* Allow to dump all sets by specifying NFPROTO_UNSPEC, similar feature is
available to other nf_tables objects, requested by Arturo, from me.
* Expose the table usage counter, so we can know how many chains are using
this table without dumping the list of chains, from Tomasz Bursztyka.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/net')
-rw-r--r-- | include/net/netfilter/ipv4/nf_reject.h | 128 | ||||
-rw-r--r-- | include/net/netfilter/ipv6/nf_reject.h | 171 | ||||
-rw-r--r-- | include/net/netfilter/nf_queue.h | 62 |
3 files changed, 361 insertions, 0 deletions
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h new file mode 100644 index 000000000000..931fbf812171 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_reject.h @@ -0,0 +1,128 @@ +#ifndef _IPV4_NF_REJECT_H +#define _IPV4_NF_REJECT_H + +#include <net/ip.h> +#include <net/tcp.h> +#include <net/route.h> +#include <net/dst.h> + +static inline void nf_send_unreach(struct sk_buff *skb_in, int code) +{ + icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); +} + +/* Send RST reply */ +static void nf_send_reset(struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + const struct iphdr *oiph; + struct iphdr *niph; + const struct tcphdr *oth; + struct tcphdr _otcph, *tcph; + + /* IP header checks: fragment. */ + if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET)) + return; + + oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb), + sizeof(_otcph), &_otcph); + if (oth == NULL) + return; + + /* No RST for RST. */ + if (oth->rst) + return; + + if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return; + + /* Check checksum */ + if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP)) + return; + oiph = ip_hdr(oldskb); + + nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) + + LL_MAX_HEADER, GFP_ATOMIC); + if (!nskb) + return; + + skb_reserve(nskb, LL_MAX_HEADER); + + skb_reset_network_header(nskb); + niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr)); + niph->version = 4; + niph->ihl = sizeof(struct iphdr) / 4; + niph->tos = 0; + niph->id = 0; + niph->frag_off = htons(IP_DF); + niph->protocol = IPPROTO_TCP; + niph->check = 0; + niph->saddr = oiph->daddr; + niph->daddr = oiph->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + memset(tcph, 0, sizeof(*tcph)); + tcph->source = oth->dest; + tcph->dest = oth->source; + tcph->doff = sizeof(struct tcphdr) / 4; + + if (oth->ack) + tcph->seq = oth->ack_seq; + else { + tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin + + oldskb->len - ip_hdrlen(oldskb) - + (oth->doff << 2)); + tcph->ack = 1; + } + + tcph->rst = 1; + tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr, + niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)tcph - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); + + /* ip_route_me_harder expects skb->dst to be set */ + skb_dst_set_noref(nskb, skb_dst(oldskb)); + + nskb->protocol = htons(ETH_P_IP); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) + goto free_nskb; + + niph->ttl = ip4_dst_hoplimit(skb_dst(nskb)); + + /* "Never happens" */ + if (nskb->len > dst_mtu(skb_dst(nskb))) + goto free_nskb; + + nf_ct_attach(nskb, oldskb); + +#ifdef CONFIG_BRIDGE_NETFILTER + /* If we use ip_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + niph->tot_len = htons(nskb->len); + ip_send_check(niph); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + goto free_nskb; + dev_queue_xmit(nskb); + } else +#endif + ip_local_out(nskb); + + return; + + free_nskb: + kfree_skb(nskb); +} + + +#endif /* _IPV4_NF_REJECT_H */ diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h new file mode 100644 index 000000000000..710d17ed70b4 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_reject.h @@ -0,0 +1,171 @@ +#ifndef _IPV6_NF_REJECT_H +#define _IPV6_NF_REJECT_H + +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/ip6_fib.h> +#include <net/ip6_checksum.h> +#include <linux/netfilter_ipv6.h> + +static inline void +nf_send_unreach6(struct net *net, struct sk_buff *skb_in, unsigned char code, + unsigned int hooknum) +{ + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = net->loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +} + +/* Send RST reply */ +static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) +{ + struct sk_buff *nskb; + struct tcphdr otcph, *tcph; + unsigned int otcplen, hh_len; + int tcphoff, needs_ack; + const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); + struct ipv6hdr *ip6h; +#define DEFAULT_TOS_VALUE 0x0U + const __u8 tclass = DEFAULT_TOS_VALUE; + struct dst_entry *dst = NULL; + u8 proto; + __be16 frag_off; + struct flowi6 fl6; + + if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || + (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { + pr_debug("addr is not unicast.\n"); + return; + } + + proto = oip6h->nexthdr; + tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); + + if ((tcphoff < 0) || (tcphoff > oldskb->len)) { + pr_debug("Cannot get TCP header.\n"); + return; + } + + otcplen = oldskb->len - tcphoff; + + /* IP header checks: fragment, too short. */ + if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { + pr_debug("proto(%d) != IPPROTO_TCP, " + "or too short. otcplen = %d\n", + proto, otcplen); + return; + } + + if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) + BUG(); + + /* No RST for RST. */ + if (otcph.rst) { + pr_debug("RST is set\n"); + return; + } + + /* Check checksum. */ + if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { + pr_debug("TCP checksum is invalid\n"); + return; + } + + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_TCP; + fl6.saddr = oip6h->daddr; + fl6.daddr = oip6h->saddr; + fl6.fl6_sport = otcph.dest; + fl6.fl6_dport = otcph.source; + security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); + dst = ip6_route_output(net, NULL, &fl6); + if (dst == NULL || dst->error) { + dst_release(dst); + return; + } + dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); + if (IS_ERR(dst)) + return; + + hh_len = (dst->dev->hard_header_len + 15)&~15; + nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) + + sizeof(struct tcphdr) + dst->trailer_len, + GFP_ATOMIC); + + if (!nskb) { + net_dbg_ratelimited("cannot alloc skb\n"); + dst_release(dst); + return; + } + + skb_dst_set(nskb, dst); + + skb_reserve(nskb, hh_len + dst->header_len); + + skb_put(nskb, sizeof(struct ipv6hdr)); + skb_reset_network_header(nskb); + ip6h = ipv6_hdr(nskb); + ip6_flow_hdr(ip6h, tclass, 0); + ip6h->hop_limit = ip6_dst_hoplimit(dst); + ip6h->nexthdr = IPPROTO_TCP; + ip6h->saddr = oip6h->daddr; + ip6h->daddr = oip6h->saddr; + + skb_reset_transport_header(nskb); + tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); + /* Truncate to length (no data) */ + tcph->doff = sizeof(struct tcphdr)/4; + tcph->source = otcph.dest; + tcph->dest = otcph.source; + + if (otcph.ack) { + needs_ack = 0; + tcph->seq = otcph.ack_seq; + tcph->ack_seq = 0; + } else { + needs_ack = 1; + tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin + + otcplen - (otcph.doff<<2)); + tcph->seq = 0; + } + + /* Reset flags */ + ((u_int8_t *)tcph)[13] = 0; + tcph->rst = 1; + tcph->ack = needs_ack; + tcph->window = 0; + tcph->urg_ptr = 0; + tcph->check = 0; + + /* Adjust TCP checksum */ + tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, + &ipv6_hdr(nskb)->daddr, + sizeof(struct tcphdr), IPPROTO_TCP, + csum_partial(tcph, + sizeof(struct tcphdr), 0)); + + nf_ct_attach(nskb, oldskb); + +#ifdef CONFIG_BRIDGE_NETFILTER + /* If we use ip6_local_out for bridged traffic, the MAC source on + * the RST will be ours, instead of the destination's. This confuses + * some routers/firewalls, and they drop the packet. So we need to + * build the eth header using the original destination's MAC as the + * source, and send the RST packet directly. + */ + if (oldskb->nf_bridge) { + struct ethhdr *oeth = eth_hdr(oldskb); + nskb->dev = oldskb->nf_bridge->physindev; + nskb->protocol = htons(ETH_P_IPV6); + ip6h->payload_len = htons(sizeof(struct tcphdr)); + if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), + oeth->h_source, oeth->h_dest, nskb->len) < 0) + return; + dev_queue_xmit(nskb); + } else +#endif + ip6_local_out(nskb); +} + +#endif /* _IPV6_NF_REJECT_H */ diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index c1d5b3e34a21..84a53d780306 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -1,6 +1,10 @@ #ifndef _NF_QUEUE_H #define _NF_QUEUE_H +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/jhash.h> + /* Each queued (to userspace) skbuff has one of these. */ struct nf_queue_entry { struct list_head list; @@ -33,4 +37,62 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); bool nf_queue_entry_get_refs(struct nf_queue_entry *entry); void nf_queue_entry_release_refs(struct nf_queue_entry *entry); +static inline void init_hashrandom(u32 *jhash_initval) +{ + while (*jhash_initval == 0) + *jhash_initval = prandom_u32(); +} + +static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) +{ + const struct iphdr *iph = ip_hdr(skb); + + /* packets in either direction go into same queue */ + if ((__force u32)iph->saddr < (__force u32)iph->daddr) + return jhash_3words((__force u32)iph->saddr, + (__force u32)iph->daddr, iph->protocol, jhash_initval); + + return jhash_3words((__force u32)iph->daddr, + (__force u32)iph->saddr, iph->protocol, jhash_initval); +} + +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) +static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + u32 a, b, c; + + if ((__force u32)ip6h->saddr.s6_addr32[3] < + (__force u32)ip6h->daddr.s6_addr32[3]) { + a = (__force u32) ip6h->saddr.s6_addr32[3]; + b = (__force u32) ip6h->daddr.s6_addr32[3]; + } else { + b = (__force u32) ip6h->saddr.s6_addr32[3]; + a = (__force u32) ip6h->daddr.s6_addr32[3]; + } + + if ((__force u32)ip6h->saddr.s6_addr32[1] < + (__force u32)ip6h->daddr.s6_addr32[1]) + c = (__force u32) ip6h->saddr.s6_addr32[1]; + else + c = (__force u32) ip6h->daddr.s6_addr32[1]; + + return jhash_3words(a, b, c, jhash_initval); +} +#endif + +static inline u32 +nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, + u32 jhash_initval) +{ + if (family == NFPROTO_IPV4) + queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) + else if (family == NFPROTO_IPV6) + queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; +#endif + + return queue; +} + #endif /* _NF_QUEUE_H */ |