diff options
author | David S. Miller <davem@davemloft.net> | 2009-03-24 13:24:36 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-03-24 13:24:36 -0700 |
commit | b5bb14386eabcb4229ade2bc0a2b237ca166d37d (patch) | |
tree | 1966e65479f0d12cec0a204443a95b8eb57946db /net | |
parent | bb4f92b3a33bfc31f55098da85be44702bea2d16 (diff) | |
parent | 1d45209d89e647e9f27e4afa1f47338df73bc112 (diff) | |
download | talos-obmc-linux-b5bb14386eabcb4229ade2bc0a2b237ca166d37d.tar.gz talos-obmc-linux-b5bb14386eabcb4229ade2bc0a2b237ca166d37d.zip |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
Diffstat (limited to 'net')
58 files changed, 1555 insertions, 756 deletions
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8604dfc1fc3b..c751111440f8 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -46,7 +46,6 @@ static struct ebt_table broute_table = .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .lock = __RW_LOCK_UNLOCKED(broute_table.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 2b2e8040a9c6..a5eea72938a6 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -55,7 +55,6 @@ static struct ebt_table frame_filter = .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_filter.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 3fe1ae87e35f..6024c551f9a9 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -55,7 +55,6 @@ static struct ebt_table frame_nat = .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_nat.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3816e1dc9295..1833bdbf9805 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -31,7 +31,7 @@ config NF_CONNTRACK_PROC_COMPAT default y help This option enables /proc and sysctl compatibility with the old - layer 3 dependant connection tracking. This is needed to keep + layer 3 dependent connection tracking. This is needed to keep old programs that have not been adapted to the new names working. If unsure, say Y. @@ -95,11 +95,11 @@ config IP_NF_MATCH_ECN config IP_NF_MATCH_TTL tristate '"ttl" match support' depends on NETFILTER_ADVANCED - help - This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user - to match packets by their TTL value. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_HL. # `filter', generic and specific targets config IP_NF_FILTER @@ -323,19 +323,13 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_TTL - tristate 'TTL target support' - depends on IP_NF_MANGLE + tristate '"TTL" target support' depends on NETFILTER_ADVANCED - help - This option adds a `TTL' target, which enables the user to modify - the TTL value of the IP header. - - While it is safe to decrement/lower the TTL, this target also enables - functionality to increment and set the TTL value of the IP header to - arbitrary values. This is EXTREMELY DANGEROUS since you can easily - create immortal packets that loop forever on the network. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_HL. # raw + specific targets config IP_NF_RAW diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 5f9b650d90fc..48111594ee9b 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -51,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o @@ -61,7 +60,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o -obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7ea88b61cb0d..64a7c6ce0b98 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -73,6 +73,36 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, return (ret != 0); } +/* + * Unfortunatly, _b and _mask are not aligned to an int (or long int) + * Some arches dont care, unrolling the loop is a win on them. + */ +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); +#else + unsigned long ret = 0; + int i; + + for (i = 0; i < IFNAMSIZ; i++) + ret |= (_a[i] ^ _b[i]) & _mask[i]; +#endif + return ret; +} + /* Returns whether packet matches rule or not. */ static inline int arp_packet_match(const struct arphdr *arphdr, struct net_device *dev, @@ -83,7 +113,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, const char *arpptr = (char *)(arphdr + 1); const char *src_devaddr, *tgt_devaddr; __be32 src_ipaddr, tgt_ipaddr; - int i, ret; + long ret; #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) @@ -156,10 +186,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, } /* Look for ifname matches. */ - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (indev[i] ^ arpinfo->iniface[i]) - & arpinfo->iniface_mask[i]; - } + ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -168,10 +195,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, return 0; } - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (outdev[i] ^ arpinfo->outiface[i]) - & arpinfo->outiface_mask[i]; - } + ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -221,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; bool hotdrop = false; @@ -237,9 +261,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - read_lock_bh(&table->lock); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -311,7 +336,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - read_unlock_bh(&table->lock); + + rcu_read_unlock(); if (hotdrop) return NF_DROP; @@ -714,11 +740,65 @@ static void get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters *alloc_counters(struct xt_table *table) + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + ARPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + +static inline int +zero_entry_counter(struct arpt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + +static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -728,14 +808,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1075,20 +1171,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. - */ -static inline int add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { @@ -1148,13 +1230,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[smp_processor_id()]; @@ -1163,8 +1246,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); + xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index e091187e864f..6ecfdae7c589 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -48,8 +48,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), - .private = NULL, .me = THIS_MODULE, .af = NFPROTO_ARP, }; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 432ce9d1c11c..5f22c91c6e15 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -24,6 +24,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/security.h> +#include <linux/net.h> #include <linux/mutex.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -640,6 +641,7 @@ static void __exit ip_queue_fini(void) MODULE_DESCRIPTION("IPv4 packet queue handler"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); module_init(ip_queue_init); module_exit(ip_queue_fini); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef8b6ca068b2..e5294aec967d 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -74,6 +74,25 @@ do { \ Hence the start of any table is given by get_table() below. */ +static unsigned long ifname_compare(const char *_a, const char *_b, + const unsigned char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -83,7 +102,6 @@ ip_packet_match(const struct iphdr *ip, const struct ipt_ip *ipinfo, int isfrag) { - size_t i; unsigned long ret; #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) @@ -103,12 +121,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ipinfo->iniface)[i]) - & ((const unsigned long *)ipinfo->iniface_mask)[i]; - } + ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask); if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -117,11 +130,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ipinfo->outiface)[i]) - & ((const unsigned long *)ipinfo->outiface_mask)[i]; - } + ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -347,10 +356,12 @@ ipt_do_table(struct sk_buff *skb, mtpar.family = tgpar.family = NFPROTO_IPV4; tgpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); /* For return from builtin chain */ @@ -445,7 +456,7 @@ ipt_do_table(struct sk_buff *skb, } } while (!hotdrop); - read_unlock_bh(&table->lock); + rcu_read_unlock(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -924,13 +935,68 @@ get_counters(const struct xt_table_info *t, counters, &i); } + +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + + +static inline int +zero_entry_counter(struct ipt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } } static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -939,14 +1005,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1312,27 +1394,6 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ -#if 0 - duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", - *i, - (long unsigned int)e->counters.pcnt, - (long unsigned int)e->counters.bcnt, - (long unsigned int)addme[*i].pcnt, - (long unsigned int)addme[*i].bcnt); -#endif - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) @@ -1393,13 +1454,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; @@ -1408,8 +1470,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c index 27a78fbbd92b..acc44c69eb68 100644 --- a/net/ipv4/netfilter/ipt_LOG.c +++ b/net/ipv4/netfilter/ipt_LOG.c @@ -464,7 +464,7 @@ static struct xt_target log_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static const struct nf_logger ipt_log_logger ={ +static struct nf_logger ipt_log_logger __read_mostly = { .name = "ipt_LOG", .logfn = &ipt_log_packet, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c deleted file mode 100644 index 6d76aae90cc0..000000000000 --- a/net/ipv4/netfilter/ipt_TTL.c +++ /dev/null @@ -1,97 +0,0 @@ -/* TTL modification target for IP tables - * (C) 2000,2005 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -#include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv4/ipt_TTL.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target"); -MODULE_LICENSE("GPL"); - -static unsigned int -ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) -{ - struct iphdr *iph; - const struct ipt_TTL_info *info = par->targinfo; - int new_ttl; - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - iph = ip_hdr(skb); - - switch (info->mode) { - case IPT_TTL_SET: - new_ttl = info->ttl; - break; - case IPT_TTL_INC: - new_ttl = iph->ttl + info->ttl; - if (new_ttl > 255) - new_ttl = 255; - break; - case IPT_TTL_DEC: - new_ttl = iph->ttl - info->ttl; - if (new_ttl < 0) - new_ttl = 0; - break; - default: - new_ttl = iph->ttl; - break; - } - - if (new_ttl != iph->ttl) { - csum_replace2(&iph->check, htons(iph->ttl << 8), - htons(new_ttl << 8)); - iph->ttl = new_ttl; - } - - return XT_CONTINUE; -} - -static bool ttl_tg_check(const struct xt_tgchk_param *par) -{ - const struct ipt_TTL_info *info = par->targinfo; - - if (info->mode > IPT_TTL_MAXMODE) { - printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", - info->mode); - return false; - } - if (info->mode != IPT_TTL_SET && info->ttl == 0) - return false; - return true; -} - -static struct xt_target ttl_tg_reg __read_mostly = { - .name = "TTL", - .family = NFPROTO_IPV4, - .target = ttl_tg, - .targetsize = sizeof(struct ipt_TTL_info), - .table = "mangle", - .checkentry = ttl_tg_check, - .me = THIS_MODULE, -}; - -static int __init ttl_tg_init(void) -{ - return xt_register_target(&ttl_tg_reg); -} - -static void __exit ttl_tg_exit(void) -{ - xt_unregister_target(&ttl_tg_reg); -} - -module_init(ttl_tg_init); -module_exit(ttl_tg_exit); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 18a2826b57c6..d32cc4bb328a 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -379,7 +379,7 @@ static struct xt_target ulog_tg_reg __read_mostly = { .me = THIS_MODULE, }; -static struct nf_logger ipt_ulog_logger = { +static struct nf_logger ipt_ulog_logger __read_mostly = { .name = "ipt_ULOG", .logfn = ipt_logfn, .me = THIS_MODULE, diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c deleted file mode 100644 index 297f1cbf4ff5..000000000000 --- a/net/ipv4/netfilter/ipt_ttl.c +++ /dev/null @@ -1,63 +0,0 @@ -/* IP tables module for matching the value of the TTL - * - * (C) 2000,2001 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv4/ipt_ttl.h> -#include <linux/netfilter/x_tables.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("Xtables: IPv4 TTL field match"); -MODULE_LICENSE("GPL"); - -static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ipt_ttl_info *info = par->matchinfo; - const u8 ttl = ip_hdr(skb)->ttl; - - switch (info->mode) { - case IPT_TTL_EQ: - return ttl == info->ttl; - case IPT_TTL_NE: - return ttl != info->ttl; - case IPT_TTL_LT: - return ttl < info->ttl; - case IPT_TTL_GT: - return ttl > info->ttl; - default: - printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", - info->mode); - return false; - } - - return false; -} - -static struct xt_match ttl_mt_reg __read_mostly = { - .name = "ttl", - .family = NFPROTO_IPV4, - .match = ttl_mt, - .matchsize = sizeof(struct ipt_ttl_info), - .me = THIS_MODULE, -}; - -static int __init ttl_mt_init(void) -{ - return xt_register_match(&ttl_mt_reg); -} - -static void __exit ttl_mt_exit(void) -{ - xt_unregister_match(&ttl_mt_reg); -} - -module_init(ttl_mt_init); -module_exit(ttl_mt_exit); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 52cb6939d093..c30a969724f8 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -56,7 +56,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 3929d20b9e45..4087614d9519 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -67,7 +67,6 @@ static struct static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 7f65d18333e3..e5356da1fb54 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -39,7 +39,6 @@ static struct static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index a52a35f4a584..29ab630f240a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -60,7 +60,6 @@ static struct static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(security_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 4beb04fac588..8b681f24e271 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -120,8 +120,10 @@ static unsigned int ipv4_confirm(unsigned int hooknum, typeof(nf_nat_seq_adjust_hook) seq_adjust; seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); - if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) + if (!seq_adjust || !seq_adjust(skb, ct, ctinfo)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); return NF_DROP; + } } out: /* We've seen it coming out the other side: confirm it */ diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index a7eb04719044..6348a793936e 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -61,7 +61,6 @@ static struct static struct xt_table nat_table = { .name = "nat", .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(nat_table.lock), .me = THIS_MODULE, .af = AF_INET, }; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 53ea512c4608..625353a5fe18 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -95,13 +95,13 @@ config IP6_NF_MATCH_OPTS To compile it as a module, choose M here. If unsure, say N. config IP6_NF_MATCH_HL - tristate '"hl" match support' + tristate '"hl" hoplimit match support' depends on NETFILTER_ADVANCED - help - HL matching allows you to match packets based on the hop - limit of the packet. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_MATCH_HL. config IP6_NF_MATCH_IPV6HEADER tristate '"ipv6header" IPv6 Extension Headers Match' @@ -130,6 +130,15 @@ config IP6_NF_MATCH_RT To compile it as a module, choose M here. If unsure, say N. # The targets +config IP6_NF_TARGET_HL + tristate '"HL" hoplimit target support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + COFNIG_NETFILTER_XT_TARGET_HL. + config IP6_NF_TARGET_LOG tristate "LOG target support" default m if NETFILTER_ADVANCED=n @@ -170,23 +179,6 @@ config IP6_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_HL - tristate 'HL (hoplimit) target support' - depends on IP6_NF_MANGLE - depends on NETFILTER_ADVANCED - help - This option adds a `HL' target, which enables the user to decrement - the hoplimit value of the IPv6 header or set it to a given (lower) - value. - - While it is safe to decrement the hoplimit value, this option also - enables functionality to increment and set the hoplimit value of the - IPv6 header to arbitrary values. This is EXTREMELY DANGEROUS since - you can easily create immortal packets that loop forever on the - network. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_RAW tristate 'raw table support (required for TRACE)' depends on NETFILTER_ADVANCED diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 3f17c948eefb..aafbba30c899 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -20,13 +20,11 @@ obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o -obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets -obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6_queue.c b/net/ipv6/netfilter/ip6_queue.c index 5859c046cbc4..b693f841aeb4 100644 --- a/net/ipv6/netfilter/ip6_queue.c +++ b/net/ipv6/netfilter/ip6_queue.c @@ -643,6 +643,7 @@ static void __exit ip6_queue_fini(void) MODULE_DESCRIPTION("IPv6 packet queue handler"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_IP6_FW); module_init(ip6_queue_init); module_exit(ip6_queue_fini); diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a33485dc81cb..34af7bb8df5f 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -89,6 +89,25 @@ ip6t_ext_hdr(u8 nexthdr) (nexthdr == IPPROTO_DSTOPTS) ); } +static unsigned long ifname_compare(const char *_a, const char *_b, + const unsigned char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -99,7 +118,6 @@ ip6_packet_match(const struct sk_buff *skb, unsigned int *protoff, int *fragoff, bool *hotdrop) { - size_t i; unsigned long ret; const struct ipv6hdr *ipv6 = ipv6_hdr(skb); @@ -120,12 +138,7 @@ ip6_packet_match(const struct sk_buff *skb, return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ip6info->iniface)[i]) - & ((const unsigned long *)ip6info->iniface_mask)[i]; - } + ret = ifname_compare(indev, ip6info->iniface, ip6info->iniface_mask); if (FWINV(ret != 0, IP6T_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -134,11 +147,7 @@ ip6_packet_match(const struct sk_buff *skb, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ip6info->outiface)[i]) - & ((const unsigned long *)ip6info->outiface_mask)[i]; - } + ret = ifname_compare(outdev, ip6info->outiface, ip6info->outiface_mask); if (FWINV(ret != 0, IP6T_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -373,10 +382,12 @@ ip6t_do_table(struct sk_buff *skb, mtpar.family = tgpar.family = NFPROTO_IPV6; tgpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); /* For return from builtin chain */ @@ -474,7 +485,7 @@ ip6t_do_table(struct sk_buff *skb, #ifdef CONFIG_NETFILTER_DEBUG ((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON; #endif - read_unlock_bh(&table->lock); + rcu_read_unlock(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -955,11 +966,64 @@ get_counters(const struct xt_table_info *t, } } +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ip6t_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IP6T_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + +static inline int +zero_entry_counter(struct ip6t_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -968,14 +1032,28 @@ static struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ - return counters; + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1342,28 +1420,6 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static inline int -add_counter_to_entry(struct ip6t_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ -#if 0 - duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", - *i, - (long unsigned int)e->counters.pcnt, - (long unsigned int)e->counters.bcnt, - (long unsigned int)addme[*i].pcnt, - (long unsigned int)addme[*i].bcnt); -#endif - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) @@ -1424,13 +1480,14 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; @@ -1439,8 +1496,9 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv6/netfilter/ip6t_HL.c b/net/ipv6/netfilter/ip6t_HL.c deleted file mode 100644 index 27b5adf670a2..000000000000 --- a/net/ipv6/netfilter/ip6t_HL.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Hop Limit modification target for ip6tables - * Maciej Soltysiak <solt@dns.toxicfilms.tv> - * Based on HW's TTL module - * - * This software is distributed under the terms of GNU GPL - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/ipv6.h> - -#include <linux/netfilter/x_tables.h> -#include <linux/netfilter_ipv6/ip6t_HL.h> - -MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); -MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field modification target"); -MODULE_LICENSE("GPL"); - -static unsigned int -hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) -{ - struct ipv6hdr *ip6h; - const struct ip6t_HL_info *info = par->targinfo; - int new_hl; - - if (!skb_make_writable(skb, skb->len)) - return NF_DROP; - - ip6h = ipv6_hdr(skb); - - switch (info->mode) { - case IP6T_HL_SET: - new_hl = info->hop_limit; - break; - case IP6T_HL_INC: - new_hl = ip6h->hop_limit + info->hop_limit; - if (new_hl > 255) - new_hl = 255; - break; - case IP6T_HL_DEC: - new_hl = ip6h->hop_limit - info->hop_limit; - if (new_hl < 0) - new_hl = 0; - break; - default: - new_hl = ip6h->hop_limit; - break; - } - - ip6h->hop_limit = new_hl; - - return XT_CONTINUE; -} - -static bool hl_tg6_check(const struct xt_tgchk_param *par) -{ - const struct ip6t_HL_info *info = par->targinfo; - - if (info->mode > IP6T_HL_MAXMODE) { - printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", - info->mode); - return false; - } - if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { - printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " - "make sense with value 0\n"); - return false; - } - return true; -} - -static struct xt_target hl_tg6_reg __read_mostly = { - .name = "HL", - .family = NFPROTO_IPV6, - .target = hl_tg6, - .targetsize = sizeof(struct ip6t_HL_info), - .table = "mangle", - .checkentry = hl_tg6_check, - .me = THIS_MODULE -}; - -static int __init hl_tg6_init(void) -{ - return xt_register_target(&hl_tg6_reg); -} - -static void __exit hl_tg6_exit(void) -{ - xt_unregister_target(&hl_tg6_reg); -} - -module_init(hl_tg6_init); -module_exit(hl_tg6_exit); diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c index 37adf5abc51e..7018cac4fddc 100644 --- a/net/ipv6/netfilter/ip6t_LOG.c +++ b/net/ipv6/netfilter/ip6t_LOG.c @@ -477,7 +477,7 @@ static struct xt_target log_tg6_reg __read_mostly = { .me = THIS_MODULE, }; -static const struct nf_logger ip6t_logger = { +static struct nf_logger ip6t_logger __read_mostly = { .name = "ip6t_LOG", .logfn = &ip6t_log_packet, .me = THIS_MODULE, diff --git a/net/ipv6/netfilter/ip6t_hl.c b/net/ipv6/netfilter/ip6t_hl.c deleted file mode 100644 index c964dca1132d..000000000000 --- a/net/ipv6/netfilter/ip6t_hl.c +++ /dev/null @@ -1,68 +0,0 @@ -/* Hop Limit matching module */ - -/* (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv> - * Based on HW's ttl module - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/ipv6.h> -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv6/ip6t_hl.h> -#include <linux/netfilter/x_tables.h> - -MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); -MODULE_DESCRIPTION("Xtables: IPv6 Hop Limit field match"); -MODULE_LICENSE("GPL"); - -static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) -{ - const struct ip6t_hl_info *info = par->matchinfo; - const struct ipv6hdr *ip6h = ipv6_hdr(skb); - - switch (info->mode) { - case IP6T_HL_EQ: - return ip6h->hop_limit == info->hop_limit; - break; - case IP6T_HL_NE: - return ip6h->hop_limit != info->hop_limit; - break; - case IP6T_HL_LT: - return ip6h->hop_limit < info->hop_limit; - break; - case IP6T_HL_GT: - return ip6h->hop_limit > info->hop_limit; - break; - default: - printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", - info->mode); - return false; - } - - return false; -} - -static struct xt_match hl_mt6_reg __read_mostly = { - .name = "hl", - .family = NFPROTO_IPV6, - .match = hl_mt6, - .matchsize = sizeof(struct ip6t_hl_info), - .me = THIS_MODULE, -}; - -static int __init hl_mt6_init(void) -{ - return xt_register_match(&hl_mt6_reg); -} - -static void __exit hl_mt6_exit(void) -{ - xt_unregister_match(&hl_mt6_reg); -} - -module_init(hl_mt6_init); -module_exit(hl_mt6_exit); diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 40d2e36d8fac..ef5a0a32bf8e 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -54,7 +54,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index d0b31b259d4d..ab0d398a2ba7 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -60,7 +60,6 @@ static struct static struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 109fab6f831a..4b792b6ca321 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -38,7 +38,6 @@ static struct static struct xt_table packet_raw = { .name = "raw", .valid_hooks = RAW_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_raw.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 20bc52f13e43..0ea37ff15d56 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -59,7 +59,6 @@ static struct static struct xt_table security_table = { .name = "security", .valid_hooks = SECURITY_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(security_table.lock), .me = THIS_MODULE, .af = AF_INET6, }; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 727b9530448a..e6852f617217 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -26,6 +26,7 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_tuple *tuple) diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 72dbb6d1a6b3..41b8a956e1be 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -126,6 +126,10 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); + if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) + nf_log_packet(PF_INET6, 0, skb, NULL, NULL, NULL, + "nf_ct_icmpv6: invalid new with type %d ", + type + 128); return false; } atomic_set(&ct->proto.icmp.count, 0); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c2bac9cd0caf..2562d05dbaf5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -357,6 +357,45 @@ config NETFILTER_XT_TARGET_DSCP To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_HL + tristate '"HL" hoplimit target support' + depends on IP_NF_MANGLE || IP6_NF_MANGLE + depends on NETFILTER_ADVANCED + ---help--- + This option adds the "HL" (for IPv6) and "TTL" (for IPv4) + targets, which enable the user to change the + hoplimit/time-to-live value of the IP header. + + While it is safe to decrement the hoplimit/TTL value, the + modules also allow to increment and set the hoplimit value of + the header to arbitrary values. This is EXTREMELY DANGEROUS + since you can easily create immortal packets that loop + forever on the network. + +config NETFILTER_XT_TARGET_LED + tristate '"LED" target support' + depends on LEDS_CLASS + depends on NETFILTER_ADVANCED + help + This option adds a `LED' target, which allows you to blink LEDs in + response to particular packets passing through your machine. + + This can be used to turn a spare LED into a network activity LED, + which only flashes in response to FTP transfers, for example. Or + you could have an LED which lights up for a minute or two every time + somebody connects to your machine via SSH. + + You will need support for the "led" class to make this work. + + To create an LED trigger for incoming SSH traffic: + iptables -A INPUT -p tcp --dport 22 -j LED --led-trigger-id ssh --led-delay 1000 + + Then attach the new trigger to an LED on your system: + echo netfilter-ssh > /sys/class/leds/<ledname>/trigger + + For more information on the LEDs available on your system, see + Documentation/leds-class.txt + config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' default m if NETFILTER_ADVANCED=n @@ -488,6 +527,22 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP This option adds a "TCPOPTSTRIP" target, which allows you to strip TCP options from TCP packets. +config NETFILTER_XT_MATCH_CLUSTER + tristate '"cluster" match support' + depends on NF_CONNTRACK + depends on NETFILTER_ADVANCED + ---help--- + This option allows you to build work-load-sharing clusters of + network servers/stateful firewalls without having a dedicated + load-balancing router/server/switch. Basically, this match returns + true when the packet must be handled by this cluster node. Thus, + all nodes see all packets and this match decides which node handles + what packets. The work-load sharing algorithm is based on source + address hashing. + + If you say Y or M here, try `iptables -m cluster --help` for + more information. + config NETFILTER_XT_MATCH_COMMENT tristate '"comment" match support' depends on NETFILTER_ADVANCED @@ -605,6 +660,14 @@ config NETFILTER_XT_MATCH_HELPER To compile it as a module, choose M here. If unsure, say Y. +config NETFILTER_XT_MATCH_HL + tristate '"hl" hoplimit/TTL match support' + depends on NETFILTER_ADVANCED + ---help--- + HL matching allows you to match packets based on the hoplimit + in the IPv6 header, or the time-to-live field in the IPv4 + header of the packet. + config NETFILTER_XT_MATCH_IPRANGE tristate '"iprange" address range match support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index da3d909e053f..6282060fbda9 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -45,6 +45,8 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o +obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o +obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o @@ -57,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o # matches +obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o @@ -67,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o +obj-$(CONFIG_NETFILTER_XT_MATCH_HL) += xt_hl.o obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index a90ac83c5918..5bb34737501f 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -174,7 +174,6 @@ next_hook: outdev, &elem, okfn, hook_thresh); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; - goto unlock; } else if (verdict == NF_DROP) { kfree_skb(skb); ret = -EPERM; @@ -183,7 +182,6 @@ next_hook: verdict >> NF_VERDICT_BITS)) goto next_hook; } -unlock: rcu_read_unlock(); return ret; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f4935e344b61..dfb447b584da 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -54,7 +54,7 @@ EXPORT_SYMBOL_GPL(nf_conntrack_lock); unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); -int nf_conntrack_max __read_mostly; +unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); struct nf_conn nf_conntrack_untracked __read_mostly; @@ -472,7 +472,8 @@ struct nf_conn *nf_conntrack_alloc(struct net *net, struct nf_conn *ct; if (unlikely(!nf_conntrack_hash_rnd_initted)) { - get_random_bytes(&nf_conntrack_hash_rnd, 4); + get_random_bytes(&nf_conntrack_hash_rnd, + sizeof(nf_conntrack_hash_rnd)); nf_conntrack_hash_rnd_initted = 1; } @@ -516,16 +517,17 @@ EXPORT_SYMBOL_GPL(nf_conntrack_alloc); static void nf_conntrack_free_rcu(struct rcu_head *head) { struct nf_conn *ct = container_of(head, struct nf_conn, rcu); - struct net *net = nf_ct_net(ct); nf_ct_ext_free(ct); kmem_cache_free(nf_conntrack_cachep, ct); - atomic_dec(&net->ct.count); } void nf_conntrack_free(struct nf_conn *ct) { + struct net *net = nf_ct_net(ct); + nf_ct_ext_destroy(ct); + atomic_dec(&net->ct.count); call_rcu(&ct->rcu, nf_conntrack_free_rcu); } EXPORT_SYMBOL_GPL(nf_conntrack_free); @@ -733,6 +735,8 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, nf_conntrack_put(skb->nfct); skb->nfct = NULL; NF_CT_STAT_INC_ATOMIC(net, invalid); + if (ret == -NF_DROP) + NF_CT_STAT_INC_ATOMIC(net, drop); return -ret; } @@ -1103,7 +1107,7 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) /* We have to rehahs for the new table anyway, so we also can * use a newrandom seed */ - get_random_bytes(&rnd, 4); + get_random_bytes(&rnd, sizeof(rnd)); /* Lookups in the old hash might happen in parallel, which means we * might get false negatives during connection lookup. New connections diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 3a8a34a6d37c..357ba39d4c8d 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -72,7 +72,8 @@ static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple unsigned int hash; if (unlikely(!nf_ct_expect_hash_rnd_initted)) { - get_random_bytes(&nf_ct_expect_hash_rnd, 4); + get_random_bytes(&nf_ct_expect_hash_rnd, + sizeof(nf_ct_expect_hash_rnd)); nf_ct_expect_hash_rnd_initted = 1; } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ed6d873ad384..7a16bd462f82 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -518,6 +518,7 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: + nfnetlink_set_err(0, group, -ENOBUFS); kfree_skb(skb); return NOTIFY_DONE; } @@ -599,7 +600,8 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) nla_parse_nested(tb, CTA_IP_MAX, attr, NULL); - l3proto = nf_ct_l3proto_find_get(tuple->src.l3num); + rcu_read_lock(); + l3proto = __nf_ct_l3proto_find(tuple->src.l3num); if (likely(l3proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_IP_MAX, @@ -608,7 +610,7 @@ ctnetlink_parse_tuple_ip(struct nlattr *attr, struct nf_conntrack_tuple *tuple) ret = l3proto->nlattr_to_tuple(tb, tuple); } - nf_ct_l3proto_put(l3proto); + rcu_read_unlock(); return ret; } @@ -633,7 +635,8 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, return -EINVAL; tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]); - l4proto = nf_ct_l4proto_find_get(tuple->src.l3num, tuple->dst.protonum); + rcu_read_lock(); + l4proto = __nf_ct_l4proto_find(tuple->src.l3num, tuple->dst.protonum); if (likely(l4proto->nlattr_to_tuple)) { ret = nla_validate_nested(attr, CTA_PROTO_MAX, @@ -642,7 +645,7 @@ ctnetlink_parse_tuple_proto(struct nlattr *attr, ret = l4proto->nlattr_to_tuple(tb, tuple); } - nf_ct_l4proto_put(l4proto); + rcu_read_unlock(); return ret; } @@ -989,10 +992,11 @@ ctnetlink_change_protoinfo(struct nf_conn *ct, struct nlattr *cda[]) nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, NULL); - l4proto = nf_ct_l4proto_find_get(nf_ct_l3num(ct), nf_ct_protonum(ct)); + rcu_read_lock(); + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->from_nlattr) err = l4proto->from_nlattr(tb, ct); - nf_ct_l4proto_put(l4proto); + rcu_read_unlock(); return err; } @@ -1062,6 +1066,10 @@ ctnetlink_change_conntrack(struct nf_conn *ct, struct nlattr *cda[]) { int err; + /* only allow NAT changes and master assignation for new conntracks */ + if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST] || cda[CTA_TUPLE_MASTER]) + return -EOPNOTSUPP; + if (cda[CTA_HELP]) { err = ctnetlink_change_helper(ct, cda); if (err < 0) @@ -1124,13 +1132,11 @@ ctnetlink_event_report(struct nf_conn *ct, u32 pid, int report) report); } -static int +static struct nf_conn * ctnetlink_create_conntrack(struct nlattr *cda[], struct nf_conntrack_tuple *otuple, struct nf_conntrack_tuple *rtuple, - struct nf_conn *master_ct, - u32 pid, - int report) + u8 u3) { struct nf_conn *ct; int err = -EINVAL; @@ -1138,10 +1144,10 @@ ctnetlink_create_conntrack(struct nlattr *cda[], ct = nf_conntrack_alloc(&init_net, otuple, rtuple, GFP_ATOMIC); if (IS_ERR(ct)) - return -ENOMEM; + return ERR_PTR(-ENOMEM); if (!cda[CTA_TIMEOUT]) - goto err; + goto err1; ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); ct->timeout.expires = jiffies + ct->timeout.expires * HZ; @@ -1152,10 +1158,8 @@ ctnetlink_create_conntrack(struct nlattr *cda[], char *helpname; err = ctnetlink_parse_help(cda[CTA_HELP], &helpname); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; helper = __nf_conntrack_helper_find_byname(helpname); if (helper == NULL) { @@ -1163,28 +1167,26 @@ ctnetlink_create_conntrack(struct nlattr *cda[], #ifdef CONFIG_MODULES if (request_module("nfct-helper-%s", helpname) < 0) { err = -EOPNOTSUPP; - goto err; + goto err1; } rcu_read_lock(); helper = __nf_conntrack_helper_find_byname(helpname); if (helper) { - rcu_read_unlock(); err = -EAGAIN; - goto err; + goto err2; } rcu_read_unlock(); #endif err = -EOPNOTSUPP; - goto err; + goto err1; } else { struct nf_conn_help *help; help = nf_ct_helper_ext_add(ct, GFP_ATOMIC); if (help == NULL) { - rcu_read_unlock(); err = -ENOMEM; - goto err; + goto err2; } /* not in hash table yet so not strictly necessary */ @@ -1193,44 +1195,34 @@ ctnetlink_create_conntrack(struct nlattr *cda[], } else { /* try an implicit helper assignation */ err = __nf_ct_try_assign_helper(ct, GFP_ATOMIC); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } if (cda[CTA_STATUS]) { err = ctnetlink_change_status(ct, cda); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { err = ctnetlink_change_nat(ct, cda); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } #ifdef CONFIG_NF_NAT_NEEDED if (cda[CTA_NAT_SEQ_ADJ_ORIG] || cda[CTA_NAT_SEQ_ADJ_REPLY]) { err = ctnetlink_change_nat_seq_adj(ct, cda); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } #endif if (cda[CTA_PROTOINFO]) { err = ctnetlink_change_protoinfo(ct, cda); - if (err < 0) { - rcu_read_unlock(); - goto err; - } + if (err < 0) + goto err2; } nf_ct_acct_ext_add(ct, GFP_ATOMIC); @@ -1241,23 +1233,37 @@ ctnetlink_create_conntrack(struct nlattr *cda[], #endif /* setup master conntrack: this is a confirmed expectation */ - if (master_ct) { + if (cda[CTA_TUPLE_MASTER]) { + struct nf_conntrack_tuple master; + struct nf_conntrack_tuple_hash *master_h; + struct nf_conn *master_ct; + + err = ctnetlink_parse_tuple(cda, &master, CTA_TUPLE_MASTER, u3); + if (err < 0) + goto err2; + + master_h = __nf_conntrack_find(&init_net, &master); + if (master_h == NULL) { + err = -ENOENT; + goto err2; + } + master_ct = nf_ct_tuplehash_to_ctrack(master_h); + nf_conntrack_get(&master_ct->ct_general); __set_bit(IPS_EXPECTED_BIT, &ct->status); ct->master = master_ct; } - nf_conntrack_get(&ct->ct_general); add_timer(&ct->timeout); nf_conntrack_hash_insert(ct); rcu_read_unlock(); - ctnetlink_event_report(ct, pid, report); - nf_ct_put(ct); - return 0; + return ct; -err: +err2: + rcu_read_unlock(); +err1: nf_conntrack_free(ct); - return err; + return ERR_PTR(err); } static int @@ -1289,38 +1295,25 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, h = __nf_conntrack_find(&init_net, &rtuple); if (h == NULL) { - struct nf_conntrack_tuple master; - struct nf_conntrack_tuple_hash *master_h = NULL; - struct nf_conn *master_ct = NULL; - - if (cda[CTA_TUPLE_MASTER]) { - err = ctnetlink_parse_tuple(cda, - &master, - CTA_TUPLE_MASTER, - u3); - if (err < 0) - goto out_unlock; + err = -ENOENT; + if (nlh->nlmsg_flags & NLM_F_CREATE) { + struct nf_conn *ct; - master_h = __nf_conntrack_find(&init_net, &master); - if (master_h == NULL) { - err = -ENOENT; + ct = ctnetlink_create_conntrack(cda, &otuple, + &rtuple, u3); + if (IS_ERR(ct)) { + err = PTR_ERR(ct); goto out_unlock; } - master_ct = nf_ct_tuplehash_to_ctrack(master_h); - nf_conntrack_get(&master_ct->ct_general); - } - - err = -ENOENT; - if (nlh->nlmsg_flags & NLM_F_CREATE) - err = ctnetlink_create_conntrack(cda, - &otuple, - &rtuple, - master_ct, - NETLINK_CB(skb).pid, - nlmsg_report(nlh)); - spin_unlock_bh(&nf_conntrack_lock); - if (err < 0 && master_ct) - nf_ct_put(master_ct); + err = 0; + nf_conntrack_get(&ct->ct_general); + spin_unlock_bh(&nf_conntrack_lock); + ctnetlink_event_report(ct, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + nf_ct_put(ct); + } else + spin_unlock_bh(&nf_conntrack_lock); return err; } @@ -1332,17 +1325,6 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT_SRC] || cda[CTA_NAT_DST]) { - err = -EOPNOTSUPP; - goto out_unlock; - } - /* can't link an existing conntrack to a master */ - if (cda[CTA_TUPLE_MASTER]) { - err = -EOPNOTSUPP; - goto out_unlock; - } - err = ctnetlink_change_conntrack(ct, cda); if (err == 0) { nf_conntrack_get(&ct->ct_general); @@ -1533,6 +1515,7 @@ static int ctnetlink_expect_event(struct notifier_block *this, nla_put_failure: rcu_read_unlock(); nlmsg_failure: + nfnetlink_set_err(0, 0, -ENOBUFS); kfree_skb(skb); return NOTIFY_DONE; } diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 592d73344d46..9a62b4efa0e1 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -74,27 +74,6 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); /* this is guaranteed to always return a valid protocol helper, since * it falls back to generic_protocol */ -struct nf_conntrack_l4proto * -nf_ct_l4proto_find_get(u_int16_t l3proto, u_int8_t l4proto) -{ - struct nf_conntrack_l4proto *p; - - rcu_read_lock(); - p = __nf_ct_l4proto_find(l3proto, l4proto); - if (!try_module_get(p->me)) - p = &nf_conntrack_l4proto_generic; - rcu_read_unlock(); - - return p; -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); - -void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); - struct nf_conntrack_l3proto * nf_ct_l3proto_find_get(u_int16_t l3proto) { diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 8fcf1762fabf..d3d5a7fd73ce 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -16,6 +16,9 @@ #include <linux/skbuff.h> #include <linux/dccp.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> + #include <linux/netfilter/nfnetlink_conntrack.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_l4proto.h> @@ -23,8 +26,6 @@ static DEFINE_RWLOCK(dccp_lock); -static int nf_ct_dccp_loose __read_mostly = 1; - /* Timeouts are based on values from RFC4340: * * - REQUEST: @@ -72,16 +73,6 @@ static int nf_ct_dccp_loose __read_mostly = 1; #define DCCP_MSL (2 * 60 * HZ) -static unsigned int dccp_timeout[CT_DCCP_MAX + 1] __read_mostly = { - [CT_DCCP_REQUEST] = 2 * DCCP_MSL, - [CT_DCCP_RESPOND] = 4 * DCCP_MSL, - [CT_DCCP_PARTOPEN] = 4 * DCCP_MSL, - [CT_DCCP_OPEN] = 12 * 3600 * HZ, - [CT_DCCP_CLOSEREQ] = 64 * HZ, - [CT_DCCP_CLOSING] = 64 * HZ, - [CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL, -}; - static const char * const dccp_state_names[] = { [CT_DCCP_NONE] = "NONE", [CT_DCCP_REQUEST] = "REQUEST", @@ -393,6 +384,22 @@ dccp_state_table[CT_DCCP_ROLE_MAX + 1][DCCP_PKT_SYNCACK + 1][CT_DCCP_MAX + 1] = }, }; +/* this module per-net specifics */ +static int dccp_net_id; +struct dccp_net { + int dccp_loose; + unsigned int dccp_timeout[CT_DCCP_MAX + 1]; +#ifdef CONFIG_SYSCTL + struct ctl_table_header *sysctl_header; + struct ctl_table *sysctl_table; +#endif +}; + +static inline struct dccp_net *dccp_pernet(struct net *net) +{ + return net_generic(net, dccp_net_id); +} + static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct nf_conntrack_tuple *tuple) { @@ -419,6 +426,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff) { struct net *net = nf_ct_net(ct); + struct dccp_net *dn; struct dccp_hdr _dh, *dh; const char *msg; u_int8_t state; @@ -429,7 +437,8 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, state = dccp_state_table[CT_DCCP_ROLE_CLIENT][dh->dccph_type][CT_DCCP_NONE]; switch (state) { default: - if (nf_ct_dccp_loose == 0) { + dn = dccp_pernet(net); + if (dn->dccp_loose == 0) { msg = "nf_ct_dccp: not picking up existing connection "; goto out_invalid; } @@ -465,6 +474,7 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, u_int8_t pf, unsigned int hooknum) { struct net *net = nf_ct_net(ct); + struct dccp_net *dn; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -542,7 +552,9 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; ct->proto.dccp.state = new_state; write_unlock_bh(&dccp_lock); - nf_ct_refresh_acct(ct, ctinfo, skb, dccp_timeout[new_state]); + + dn = dccp_pernet(net); + nf_ct_refresh_acct(ct, ctinfo, skb, dn->dccp_timeout[new_state]); return NF_ACCEPT; } @@ -660,13 +672,11 @@ static int nlattr_to_dccp(struct nlattr *cda[], struct nf_conn *ct) #endif #ifdef CONFIG_SYSCTL -static unsigned int dccp_sysctl_table_users; -static struct ctl_table_header *dccp_sysctl_header; -static ctl_table dccp_sysctl_table[] = { +/* template, data assigned later */ +static struct ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_request", - .data = &dccp_timeout[CT_DCCP_REQUEST], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -674,7 +684,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_respond", - .data = &dccp_timeout[CT_DCCP_RESPOND], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -682,7 +691,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_partopen", - .data = &dccp_timeout[CT_DCCP_PARTOPEN], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -690,7 +698,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_open", - .data = &dccp_timeout[CT_DCCP_OPEN], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -698,7 +705,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_closereq", - .data = &dccp_timeout[CT_DCCP_CLOSEREQ], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -706,7 +712,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_closing", - .data = &dccp_timeout[CT_DCCP_CLOSING], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -714,7 +719,6 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_timeout_timewait", - .data = &dccp_timeout[CT_DCCP_TIMEWAIT], .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, @@ -722,8 +726,7 @@ static ctl_table dccp_sysctl_table[] = { { .ctl_name = CTL_UNNUMBERED, .procname = "nf_conntrack_dccp_loose", - .data = &nf_ct_dccp_loose, - .maxlen = sizeof(nf_ct_dccp_loose), + .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec, }, @@ -751,11 +754,6 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif -#ifdef CONFIG_SYSCTL - .ctl_table_users = &dccp_sysctl_table_users, - .ctl_table_header = &dccp_sysctl_header, - .ctl_table = dccp_sysctl_table, -#endif }; static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { @@ -776,34 +774,107 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = { .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, .nla_policy = nf_ct_port_nla_policy, #endif +}; + +static __net_init int dccp_net_init(struct net *net) +{ + struct dccp_net *dn; + int err; + + dn = kmalloc(sizeof(*dn), GFP_KERNEL); + if (!dn) + return -ENOMEM; + + /* default values */ + dn->dccp_loose = 1; + dn->dccp_timeout[CT_DCCP_REQUEST] = 2 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_RESPOND] = 4 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_PARTOPEN] = 4 * DCCP_MSL; + dn->dccp_timeout[CT_DCCP_OPEN] = 12 * 3600 * HZ; + dn->dccp_timeout[CT_DCCP_CLOSEREQ] = 64 * HZ; + dn->dccp_timeout[CT_DCCP_CLOSING] = 64 * HZ; + dn->dccp_timeout[CT_DCCP_TIMEWAIT] = 2 * DCCP_MSL; + + err = net_assign_generic(net, dccp_net_id, dn); + if (err) + goto out; + #ifdef CONFIG_SYSCTL - .ctl_table_users = &dccp_sysctl_table_users, - .ctl_table_header = &dccp_sysctl_header, - .ctl_table = dccp_sysctl_table, + err = -ENOMEM; + dn->sysctl_table = kmemdup(dccp_sysctl_table, + sizeof(dccp_sysctl_table), GFP_KERNEL); + if (!dn->sysctl_table) + goto out; + + dn->sysctl_table[0].data = &dn->dccp_timeout[CT_DCCP_REQUEST]; + dn->sysctl_table[1].data = &dn->dccp_timeout[CT_DCCP_RESPOND]; + dn->sysctl_table[2].data = &dn->dccp_timeout[CT_DCCP_PARTOPEN]; + dn->sysctl_table[3].data = &dn->dccp_timeout[CT_DCCP_OPEN]; + dn->sysctl_table[4].data = &dn->dccp_timeout[CT_DCCP_CLOSEREQ]; + dn->sysctl_table[5].data = &dn->dccp_timeout[CT_DCCP_CLOSING]; + dn->sysctl_table[6].data = &dn->dccp_timeout[CT_DCCP_TIMEWAIT]; + dn->sysctl_table[7].data = &dn->dccp_loose; + + dn->sysctl_header = register_net_sysctl_table(net, + nf_net_netfilter_sysctl_path, dn->sysctl_table); + if (!dn->sysctl_header) { + kfree(dn->sysctl_table); + goto out; + } #endif + + return 0; + +out: + kfree(dn); + return err; +} + +static __net_exit void dccp_net_exit(struct net *net) +{ + struct dccp_net *dn = dccp_pernet(net); +#ifdef CONFIG_SYSCTL + unregister_net_sysctl_table(dn->sysctl_header); + kfree(dn->sysctl_table); +#endif + kfree(dn); + + net_assign_generic(net, dccp_net_id, NULL); +} + +static struct pernet_operations dccp_net_ops = { + .init = dccp_net_init, + .exit = dccp_net_exit, }; static int __init nf_conntrack_proto_dccp_init(void) { int err; - err = nf_conntrack_l4proto_register(&dccp_proto4); + err = register_pernet_gen_subsys(&dccp_net_id, &dccp_net_ops); if (err < 0) goto err1; - err = nf_conntrack_l4proto_register(&dccp_proto6); + err = nf_conntrack_l4proto_register(&dccp_proto4); if (err < 0) goto err2; + + err = nf_conntrack_l4proto_register(&dccp_proto6); + if (err < 0) + goto err3; return 0; -err2: +err3: nf_conntrack_l4proto_unregister(&dccp_proto4); +err2: + unregister_pernet_gen_subsys(dccp_net_id, &dccp_net_ops); err1: return err; } static void __exit nf_conntrack_proto_dccp_fini(void) { + unregister_pernet_gen_subsys(dccp_net_id, &dccp_net_ops); nf_conntrack_l4proto_unregister(&dccp_proto6); nf_conntrack_l4proto_unregister(&dccp_proto4); } diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 4be80d7b8795..829374f426c4 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -92,7 +92,7 @@ static struct ctl_table generic_compat_sysctl_table[] = { struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, - .l4proto = 0, + .l4proto = 255, .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index f3fd154d1ddd..e46f3b79adb3 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -25,6 +25,8 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> +#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> +#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> /* Protects ct->proto.tcp */ static DEFINE_RWLOCK(tcp_lock); diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 2b8b1f579f93..d4021179e24e 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -22,6 +22,8 @@ #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_log.h> +#include <net/netfilter/ipv4/nf_conntrack_ipv4.h> +#include <net/netfilter/ipv6/nf_conntrack_ipv6.h> static unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; static unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index fa8ae5d2659c..8bb998fe098b 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -14,58 +14,63 @@ LOG target modules */ #define NF_LOG_PREFIXLEN 128 +#define NFLOGGER_NAME_LEN 64 static const struct nf_logger *nf_loggers[NFPROTO_NUMPROTO] __read_mostly; +static struct list_head nf_loggers_l[NFPROTO_NUMPROTO] __read_mostly; static DEFINE_MUTEX(nf_log_mutex); -/* return EBUSY if somebody else is registered, EEXIST if the same logger - * is registred, 0 on success. */ -int nf_log_register(u_int8_t pf, const struct nf_logger *logger) +static struct nf_logger *__find_logger(int pf, const char *str_logger) { - int ret; + struct nf_logger *t; - if (pf >= ARRAY_SIZE(nf_loggers)) - return -EINVAL; - - /* Any setup of logging members must be done before - * substituting pointer. */ - ret = mutex_lock_interruptible(&nf_log_mutex); - if (ret < 0) - return ret; - - if (!nf_loggers[pf]) - rcu_assign_pointer(nf_loggers[pf], logger); - else if (nf_loggers[pf] == logger) - ret = -EEXIST; - else - ret = -EBUSY; + list_for_each_entry(t, &nf_loggers_l[pf], list[pf]) { + if (!strnicmp(str_logger, t->name, strlen(t->name))) + return t; + } - mutex_unlock(&nf_log_mutex); - return ret; + return NULL; } -EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister_pf(u_int8_t pf) +/* return EEXIST if the same logger is registred, 0 on success. */ +int nf_log_register(u_int8_t pf, struct nf_logger *logger) { + const struct nf_logger *llog; + if (pf >= ARRAY_SIZE(nf_loggers)) - return; + return -EINVAL; + mutex_lock(&nf_log_mutex); - rcu_assign_pointer(nf_loggers[pf], NULL); + + if (pf == NFPROTO_UNSPEC) { + int i; + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) + list_add_tail(&(logger->list[i]), &(nf_loggers_l[i])); + } else { + /* register at end of list to honor first register win */ + list_add_tail(&logger->list[pf], &nf_loggers_l[pf]); + llog = rcu_dereference(nf_loggers[pf]); + if (llog == NULL) + rcu_assign_pointer(nf_loggers[pf], logger); + } + mutex_unlock(&nf_log_mutex); - /* Give time to concurrent readers. */ - synchronize_rcu(); + return 0; } -EXPORT_SYMBOL(nf_log_unregister_pf); +EXPORT_SYMBOL(nf_log_register); -void nf_log_unregister(const struct nf_logger *logger) +void nf_log_unregister(struct nf_logger *logger) { + const struct nf_logger *c_logger; int i; mutex_lock(&nf_log_mutex); for (i = 0; i < ARRAY_SIZE(nf_loggers); i++) { - if (nf_loggers[i] == logger) + c_logger = rcu_dereference(nf_loggers[i]); + if (c_logger == logger) rcu_assign_pointer(nf_loggers[i], NULL); + list_del(&logger->list[i]); } mutex_unlock(&nf_log_mutex); @@ -73,6 +78,27 @@ void nf_log_unregister(const struct nf_logger *logger) } EXPORT_SYMBOL(nf_log_unregister); +int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger) +{ + mutex_lock(&nf_log_mutex); + if (__find_logger(pf, logger->name) == NULL) { + mutex_unlock(&nf_log_mutex); + return -ENOENT; + } + rcu_assign_pointer(nf_loggers[pf], logger); + mutex_unlock(&nf_log_mutex); + return 0; +} +EXPORT_SYMBOL(nf_log_bind_pf); + +void nf_log_unbind_pf(u_int8_t pf) +{ + mutex_lock(&nf_log_mutex); + rcu_assign_pointer(nf_loggers[pf], NULL); + mutex_unlock(&nf_log_mutex); +} +EXPORT_SYMBOL(nf_log_unbind_pf); + void nf_log_packet(u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, @@ -129,13 +155,37 @@ static int seq_show(struct seq_file *s, void *v) { loff_t *pos = v; const struct nf_logger *logger; + struct nf_logger *t; + int ret; logger = rcu_dereference(nf_loggers[*pos]); if (!logger) - return seq_printf(s, "%2lld NONE\n", *pos); + ret = seq_printf(s, "%2lld NONE (", *pos); + else + ret = seq_printf(s, "%2lld %s (", *pos, logger->name); + + if (ret < 0) + return ret; + + mutex_lock(&nf_log_mutex); + list_for_each_entry(t, &nf_loggers_l[*pos], list[*pos]) { + ret = seq_printf(s, "%s", t->name); + if (ret < 0) { + mutex_unlock(&nf_log_mutex); + return ret; + } + if (&t->list[*pos] != nf_loggers_l[*pos].prev) { + ret = seq_printf(s, ","); + if (ret < 0) { + mutex_unlock(&nf_log_mutex); + return ret; + } + } + } + mutex_unlock(&nf_log_mutex); - return seq_printf(s, "%2lld %s\n", *pos, logger->name); + return seq_printf(s, ")\n"); } static const struct seq_operations nflog_seq_ops = { @@ -158,15 +208,102 @@ static const struct file_operations nflog_file_ops = { .release = seq_release, }; + #endif /* PROC_FS */ +#ifdef CONFIG_SYSCTL +struct ctl_path nf_log_sysctl_path[] = { + { .procname = "net", .ctl_name = CTL_NET, }, + { .procname = "netfilter", .ctl_name = NET_NETFILTER, }, + { .procname = "nf_log", .ctl_name = CTL_UNNUMBERED, }, + { } +}; + +static char nf_log_sysctl_fnames[NFPROTO_NUMPROTO-NFPROTO_UNSPEC][3]; +static struct ctl_table nf_log_sysctl_table[NFPROTO_NUMPROTO+1]; +static struct ctl_table_header *nf_log_dir_header; + +static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp, + void *buffer, size_t *lenp, loff_t *ppos) +{ + const struct nf_logger *logger; + int r = 0; + int tindex = (unsigned long)table->extra1; + + if (write) { + if (!strcmp(buffer, "NONE")) { + nf_log_unbind_pf(tindex); + return 0; + } + mutex_lock(&nf_log_mutex); + logger = __find_logger(tindex, buffer); + if (logger == NULL) { + mutex_unlock(&nf_log_mutex); + return -ENOENT; + } + rcu_assign_pointer(nf_loggers[tindex], logger); + mutex_unlock(&nf_log_mutex); + } else { + rcu_read_lock(); + logger = rcu_dereference(nf_loggers[tindex]); + if (!logger) + table->data = "NONE"; + else + table->data = logger->name; + r = proc_dostring(table, write, filp, buffer, lenp, ppos); + rcu_read_unlock(); + } + + return r; +} + +static __init int netfilter_log_sysctl_init(void) +{ + int i; + + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) { + snprintf(nf_log_sysctl_fnames[i-NFPROTO_UNSPEC], 3, "%d", i); + nf_log_sysctl_table[i].ctl_name = CTL_UNNUMBERED; + nf_log_sysctl_table[i].procname = + nf_log_sysctl_fnames[i-NFPROTO_UNSPEC]; + nf_log_sysctl_table[i].data = NULL; + nf_log_sysctl_table[i].maxlen = + NFLOGGER_NAME_LEN * sizeof(char); + nf_log_sysctl_table[i].mode = 0644; + nf_log_sysctl_table[i].proc_handler = nf_log_proc_dostring; + nf_log_sysctl_table[i].extra1 = (void *)(unsigned long) i; + } + + nf_log_dir_header = register_sysctl_paths(nf_log_sysctl_path, + nf_log_sysctl_table); + if (!nf_log_dir_header) + return -ENOMEM; + + return 0; +} +#else +static __init int netfilter_log_sysctl_init(void) +{ + return 0; +} +#endif /* CONFIG_SYSCTL */ int __init netfilter_log_init(void) { + int i, r; #ifdef CONFIG_PROC_FS if (!proc_create("nf_log", S_IRUGO, proc_net_netfilter, &nflog_file_ops)) return -1; #endif + + /* Errors will trigger panic, unroll on error is unnecessary. */ + r = netfilter_log_sysctl_init(); + if (r < 0) + return r; + + for (i = NFPROTO_UNSPEC; i < NFPROTO_NUMPROTO; i++) + INIT_LIST_HEAD(&(nf_loggers_l[i])); + return 0; } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index 9c0ba17a1ddb..2785d66a7e38 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -113,6 +113,12 @@ int nfnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) } EXPORT_SYMBOL_GPL(nfnetlink_send); +void nfnetlink_set_err(u32 pid, u32 group, int error) +{ + netlink_set_err(nfnl, pid, group, error); +} +EXPORT_SYMBOL_GPL(nfnetlink_set_err); + int nfnetlink_unicast(struct sk_buff *skb, u_int32_t pid, int flags) { return netlink_unicast(nfnl, skb, pid, flags); diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index c712e9fc6bba..fd326ac27ec8 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -693,7 +693,7 @@ nfulnl_recv_unsupp(struct sock *ctnl, struct sk_buff *skb, return -ENOTSUPP; } -static const struct nf_logger nfulnl_logger = { +static struct nf_logger nfulnl_logger __read_mostly = { .name = "nfnetlink_log", .logfn = &nfulnl_log_packet, .me = THIS_MODULE, @@ -725,9 +725,9 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, /* Commands without queue context */ switch (cmd->command) { case NFULNL_CFG_CMD_PF_BIND: - return nf_log_register(pf, &nfulnl_logger); + return nf_log_bind_pf(pf, &nfulnl_logger); case NFULNL_CFG_CMD_PF_UNBIND: - nf_log_unregister_pf(pf); + nf_log_unbind_pf(pf); return 0; } } @@ -952,17 +952,25 @@ static int __init nfnetlink_log_init(void) goto cleanup_netlink_notifier; } + status = nf_log_register(NFPROTO_UNSPEC, &nfulnl_logger); + if (status < 0) { + printk(KERN_ERR "log: failed to register logger\n"); + goto cleanup_subsys; + } + #ifdef CONFIG_PROC_FS if (!proc_create("nfnetlink_log", 0440, proc_net_netfilter, &nful_file_ops)) - goto cleanup_subsys; + goto cleanup_logger; #endif return status; #ifdef CONFIG_PROC_FS +cleanup_logger: + nf_log_unregister(&nfulnl_logger); +#endif cleanup_subsys: nfnetlink_subsys_unregister(&nfulnl_subsys); -#endif cleanup_netlink_notifier: netlink_unregister_notifier(&nfulnl_rtnl_notifier); return status; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 5baccfa5a0de..509a95621f9f 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -625,6 +625,20 @@ void xt_free_table_info(struct xt_table_info *info) } EXPORT_SYMBOL(xt_free_table_info); +void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo, + struct xt_table_info *newinfo) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + void *p = oldinfo->entries[cpu]; + rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]); + newinfo->entries[cpu] = p; + } + +} +EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu); + /* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */ struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af, const char *name) @@ -671,21 +685,22 @@ xt_replace_table(struct xt_table *table, struct xt_table_info *oldinfo, *private; /* Do the substitution. */ - write_lock_bh(&table->lock); + mutex_lock(&table->lock); private = table->private; /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { duprintf("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - write_unlock_bh(&table->lock); + mutex_unlock(&table->lock); *error = -EAGAIN; return NULL; } oldinfo = private; - table->private = newinfo; + rcu_assign_pointer(table->private, newinfo); newinfo->initial_entries = oldinfo->initial_entries; - write_unlock_bh(&table->lock); + mutex_unlock(&table->lock); + synchronize_net(); return oldinfo; } EXPORT_SYMBOL_GPL(xt_replace_table); @@ -719,7 +734,8 @@ struct xt_table *xt_register_table(struct net *net, struct xt_table *table, /* Simplifies replace_table code. */ table->private = bootstrap; - rwlock_init(&table->lock); + mutex_init(&table->lock); + if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c new file mode 100644 index 000000000000..10e789e2d12a --- /dev/null +++ b/net/netfilter/xt_HL.c @@ -0,0 +1,171 @@ +/* + * TTL modification target for IP tables + * (C) 2000,2005 by Harald Welte <laforge@netfilter.org> + * + * Hop Limit modification target for ip6tables + * Maciej Soltysiak <solt@dns.toxicfilms.tv> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <net/checksum.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ipt_TTL.h> +#include <linux/netfilter_ipv6/ip6t_HL.h> + +MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); +MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); +MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target"); +MODULE_LICENSE("GPL"); + +static unsigned int +ttl_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + struct iphdr *iph; + const struct ipt_TTL_info *info = par->targinfo; + int new_ttl; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + iph = ip_hdr(skb); + + switch (info->mode) { + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; + } + + if (new_ttl != iph->ttl) { + csum_replace2(&iph->check, htons(iph->ttl << 8), + htons(new_ttl << 8)); + iph->ttl = new_ttl; + } + + return XT_CONTINUE; +} + +static unsigned int +hl_tg6(struct sk_buff *skb, const struct xt_target_param *par) +{ + struct ipv6hdr *ip6h; + const struct ip6t_HL_info *info = par->targinfo; + int new_hl; + + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; + + ip6h = ipv6_hdr(skb); + + switch (info->mode) { + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; + } + + ip6h->hop_limit = new_hl; + + return XT_CONTINUE; +} + +static bool ttl_tg_check(const struct xt_tgchk_param *par) +{ + const struct ipt_TTL_info *info = par->targinfo; + + if (info->mode > IPT_TTL_MAXMODE) { + printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", + info->mode); + return false; + } + if (info->mode != IPT_TTL_SET && info->ttl == 0) + return false; + return true; +} + +static bool hl_tg6_check(const struct xt_tgchk_param *par) +{ + const struct ip6t_HL_info *info = par->targinfo; + + if (info->mode > IP6T_HL_MAXMODE) { + printk(KERN_WARNING "ip6t_HL: invalid or unknown Mode %u\n", + info->mode); + return false; + } + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { + printk(KERN_WARNING "ip6t_HL: increment/decrement doesn't " + "make sense with value 0\n"); + return false; + } + return true; +} + +static struct xt_target hl_tg_reg[] __read_mostly = { + { + .name = "TTL", + .revision = 0, + .family = NFPROTO_IPV4, + .target = ttl_tg, + .targetsize = sizeof(struct ipt_TTL_info), + .table = "mangle", + .checkentry = ttl_tg_check, + .me = THIS_MODULE, + }, + { + .name = "HL", + .revision = 0, + .family = NFPROTO_IPV6, + .target = hl_tg6, + .targetsize = sizeof(struct ip6t_HL_info), + .table = "mangle", + .checkentry = hl_tg6_check, + .me = THIS_MODULE, + }, +}; + +static int __init hl_tg_init(void) +{ + return xt_register_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); +} + +static void __exit hl_tg_exit(void) +{ + xt_unregister_targets(hl_tg_reg, ARRAY_SIZE(hl_tg_reg)); +} + +module_init(hl_tg_init); +module_exit(hl_tg_exit); +MODULE_ALIAS("ipt_TTL"); +MODULE_ALIAS("ip6t_HL"); diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c new file mode 100644 index 000000000000..8ff7843bb921 --- /dev/null +++ b/net/netfilter/xt_LED.c @@ -0,0 +1,161 @@ +/* + * xt_LED.c - netfilter target to make LEDs blink upon packet matches + * + * Copyright (C) 2008 Adam Nielsen <a.nielsen@shikadi.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301 USA. + * + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netfilter/x_tables.h> +#include <linux/leds.h> +#include <linux/mutex.h> + +#include <linux/netfilter/xt_LED.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Adam Nielsen <a.nielsen@shikadi.net>"); +MODULE_DESCRIPTION("Xtables: trigger LED devices on packet match"); + +/* + * This is declared in here (the kernel module) only, to avoid having these + * dependencies in userspace code. This is what xt_led_info.internal_data + * points to. + */ +struct xt_led_info_internal { + struct led_trigger netfilter_led_trigger; + struct timer_list timer; +}; + +static unsigned int +led_tg(struct sk_buff *skb, const struct xt_target_param *par) +{ + const struct xt_led_info *ledinfo = par->targinfo; + struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + + /* + * If "always blink" is enabled, and there's still some time until the + * LED will switch off, briefly switch it off now. + */ + if ((ledinfo->delay > 0) && ledinfo->always_blink && + timer_pending(&ledinternal->timer)) + led_trigger_event(&ledinternal->netfilter_led_trigger,LED_OFF); + + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL); + + /* If there's a positive delay, start/update the timer */ + if (ledinfo->delay > 0) { + mod_timer(&ledinternal->timer, + jiffies + msecs_to_jiffies(ledinfo->delay)); + + /* Otherwise if there was no delay given, blink as fast as possible */ + } else if (ledinfo->delay == 0) { + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); + } + + /* else the delay is negative, which means switch on and stay on */ + + return XT_CONTINUE; +} + +static void led_timeout_callback(unsigned long data) +{ + struct xt_led_info *ledinfo = (struct xt_led_info *)data; + struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + + led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF); +} + +static bool led_tg_check(const struct xt_tgchk_param *par) +{ + struct xt_led_info *ledinfo = par->targinfo; + struct xt_led_info_internal *ledinternal; + int err; + + if (ledinfo->id[0] == '\0') { + printk(KERN_ERR KBUILD_MODNAME ": No 'id' parameter given.\n"); + return false; + } + + ledinternal = kzalloc(sizeof(struct xt_led_info_internal), GFP_KERNEL); + if (!ledinternal) { + printk(KERN_CRIT KBUILD_MODNAME ": out of memory\n"); + return false; + } + + ledinternal->netfilter_led_trigger.name = ledinfo->id; + + err = led_trigger_register(&ledinternal->netfilter_led_trigger); + if (err) { + printk(KERN_CRIT KBUILD_MODNAME + ": led_trigger_register() failed\n"); + if (err == -EEXIST) + printk(KERN_ERR KBUILD_MODNAME + ": Trigger name is already in use.\n"); + goto exit_alloc; + } + + /* See if we need to set up a timer */ + if (ledinfo->delay > 0) + setup_timer(&ledinternal->timer, led_timeout_callback, + (unsigned long)ledinfo); + + ledinfo->internal_data = ledinternal; + + return true; + +exit_alloc: + kfree(ledinternal); + + return false; +} + +static void led_tg_destroy(const struct xt_tgdtor_param *par) +{ + const struct xt_led_info *ledinfo = par->targinfo; + struct xt_led_info_internal *ledinternal = ledinfo->internal_data; + + if (ledinfo->delay > 0) + del_timer_sync(&ledinternal->timer); + + led_trigger_unregister(&ledinternal->netfilter_led_trigger); + kfree(ledinternal); +} + +static struct xt_target led_tg_reg __read_mostly = { + .name = "LED", + .revision = 0, + .family = NFPROTO_UNSPEC, + .target = led_tg, + .targetsize = XT_ALIGN(sizeof(struct xt_led_info)), + .checkentry = led_tg_check, + .destroy = led_tg_destroy, + .me = THIS_MODULE, +}; + +static int __init led_tg_init(void) +{ + return xt_register_target(&led_tg_reg); +} + +static void __exit led_tg_exit(void) +{ + xt_unregister_target(&led_tg_reg); +} + +module_init(led_tg_init); +module_exit(led_tg_exit); diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c new file mode 100644 index 000000000000..ad5bd890e4e8 --- /dev/null +++ b/net/netfilter/xt_cluster.c @@ -0,0 +1,164 @@ +/* + * (C) 2008-2009 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/jhash.h> +#include <linux/ip.h> +#include <net/ipv6.h> + +#include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <linux/netfilter/xt_cluster.h> + +static inline u_int32_t nf_ct_orig_ipv4_src(const struct nf_conn *ct) +{ + return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; +} + +static inline const void *nf_ct_orig_ipv6_src(const struct nf_conn *ct) +{ + return ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip6; +} + +static inline u_int32_t +xt_cluster_hash_ipv4(u_int32_t ip, const struct xt_cluster_match_info *info) +{ + return jhash_1word(ip, info->hash_seed); +} + +static inline u_int32_t +xt_cluster_hash_ipv6(const void *ip, const struct xt_cluster_match_info *info) +{ + return jhash2(ip, NF_CT_TUPLE_L3SIZE / sizeof(__u32), info->hash_seed); +} + +static inline u_int32_t +xt_cluster_hash(const struct nf_conn *ct, + const struct xt_cluster_match_info *info) +{ + u_int32_t hash = 0; + + switch(nf_ct_l3num(ct)) { + case AF_INET: + hash = xt_cluster_hash_ipv4(nf_ct_orig_ipv4_src(ct), info); + break; + case AF_INET6: + hash = xt_cluster_hash_ipv6(nf_ct_orig_ipv6_src(ct), info); + break; + default: + WARN_ON(1); + break; + } + return (((u64)hash * info->total_nodes) >> 32); +} + +static inline bool +xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family) +{ + bool is_multicast = false; + + switch(family) { + case NFPROTO_IPV4: + is_multicast = ipv4_is_multicast(ip_hdr(skb)->daddr); + break; + case NFPROTO_IPV6: + is_multicast = ipv6_addr_type(&ipv6_hdr(skb)->daddr) & + IPV6_ADDR_MULTICAST; + break; + default: + WARN_ON(1); + break; + } + return is_multicast; +} + +static bool +xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par) +{ + struct sk_buff *pskb = (struct sk_buff *)skb; + const struct xt_cluster_match_info *info = par->matchinfo; + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned long hash; + + /* This match assumes that all nodes see the same packets. This can be + * achieved if the switch that connects the cluster nodes support some + * sort of 'port mirroring'. However, if your switch does not support + * this, your cluster nodes can reply ARP request using a multicast MAC + * address. Thus, your switch will flood the same packets to the + * cluster nodes with the same multicast MAC address. Using a multicast + * link address is a RFC 1812 (section 3.3.2) violation, but this works + * fine in practise. + * + * Unfortunately, if you use the multicast MAC address, the link layer + * sets skbuff's pkt_type to PACKET_MULTICAST, which is not accepted + * by TCP and others for packets coming to this node. For that reason, + * this match mangles skbuff's pkt_type if it detects a packet + * addressed to a unicast address but using PACKET_MULTICAST. Yes, I + * know, matches should not alter packets, but we are doing this here + * because we would need to add a PKTTYPE target for this sole purpose. + */ + if (!xt_cluster_is_multicast_addr(skb, par->family) && + skb->pkt_type == PACKET_MULTICAST) { + pskb->pkt_type = PACKET_HOST; + } + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return false; + + if (ct == &nf_conntrack_untracked) + return false; + + if (ct->master) + hash = xt_cluster_hash(ct->master, info); + else + hash = xt_cluster_hash(ct, info); + + return !!((1 << hash) & info->node_mask) ^ + !!(info->flags & XT_CLUSTER_F_INV); +} + +static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_cluster_match_info *info = par->matchinfo; + + if (info->node_mask >= (1 << info->total_nodes)) { + printk(KERN_ERR "xt_cluster: this node mask cannot be " + "higher than the total number of nodes\n"); + return false; + } + return true; +} + +static struct xt_match xt_cluster_match __read_mostly = { + .name = "cluster", + .family = NFPROTO_UNSPEC, + .match = xt_cluster_mt, + .checkentry = xt_cluster_mt_checkentry, + .matchsize = sizeof(struct xt_cluster_match_info), + .me = THIS_MODULE, +}; + +static int __init xt_cluster_mt_init(void) +{ + return xt_register_match(&xt_cluster_match); +} + +static void __exit xt_cluster_mt_fini(void) +{ + xt_unregister_match(&xt_cluster_match); +} + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: hash-based cluster match"); +MODULE_ALIAS("ipt_cluster"); +MODULE_ALIAS("ip6t_cluster"); +module_init(xt_cluster_mt_init); +module_exit(xt_cluster_mt_fini); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index f97fded024c4..a5b5369c30f9 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -149,7 +149,7 @@ dsthash_alloc_init(struct xt_hashlimit_htable *ht, /* initialize hash with random val at the time we allocate * the first hashtable entry */ if (!ht->rnd_initialized) { - get_random_bytes(&ht->rnd, 4); + get_random_bytes(&ht->rnd, sizeof(ht->rnd)); ht->rnd_initialized = 1; } @@ -565,8 +565,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo, static bool hashlimit_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par) { - const struct xt_hashlimit_info *r = - ((const struct xt_hashlimit_info *)par->matchinfo)->u.master; + const struct xt_hashlimit_info *r = par->matchinfo; struct xt_hashlimit_htable *hinfo = r->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; @@ -702,8 +701,6 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par) } mutex_unlock(&hlimit_mutex); - /* Ugly hack: For SMP, we only want to use one set */ - r->u.master = r; return true; } diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c new file mode 100644 index 000000000000..7726154c87b2 --- /dev/null +++ b/net/netfilter/xt_hl.c @@ -0,0 +1,108 @@ +/* + * IP tables module for matching the value of the TTL + * (C) 2000,2001 by Harald Welte <laforge@netfilter.org> + * + * Hop Limit matching module + * (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter_ipv4/ipt_ttl.h> +#include <linux/netfilter_ipv6/ip6t_hl.h> + +MODULE_AUTHOR("Maciej Soltysiak <solt@dns.toxicfilms.tv>"); +MODULE_DESCRIPTION("Xtables: Hoplimit/TTL field match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ttl"); +MODULE_ALIAS("ip6t_hl"); + +static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct ipt_ttl_info *info = par->matchinfo; + const u8 ttl = ip_hdr(skb)->ttl; + + switch (info->mode) { + case IPT_TTL_EQ: + return ttl == info->ttl; + case IPT_TTL_NE: + return ttl != info->ttl; + case IPT_TTL_LT: + return ttl < info->ttl; + case IPT_TTL_GT: + return ttl > info->ttl; + default: + printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", + info->mode); + return false; + } + + return false; +} + +static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par) +{ + const struct ip6t_hl_info *info = par->matchinfo; + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + + switch (info->mode) { + case IP6T_HL_EQ: + return ip6h->hop_limit == info->hop_limit; + break; + case IP6T_HL_NE: + return ip6h->hop_limit != info->hop_limit; + break; + case IP6T_HL_LT: + return ip6h->hop_limit < info->hop_limit; + break; + case IP6T_HL_GT: + return ip6h->hop_limit > info->hop_limit; + break; + default: + printk(KERN_WARNING "ip6t_hl: unknown mode %d\n", + info->mode); + return false; + } + + return false; +} + +static struct xt_match hl_mt_reg[] __read_mostly = { + { + .name = "ttl", + .revision = 0, + .family = NFPROTO_IPV4, + .match = ttl_mt, + .matchsize = sizeof(struct ipt_ttl_info), + .me = THIS_MODULE, + }, + { + .name = "hl", + .revision = 0, + .family = NFPROTO_IPV6, + .match = hl_mt6, + .matchsize = sizeof(struct ip6t_hl_info), + .me = THIS_MODULE, + }, +}; + +static int __init hl_mt_init(void) +{ + return xt_register_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); +} + +static void __exit hl_mt_exit(void) +{ + xt_unregister_matches(hl_mt_reg, ARRAY_SIZE(hl_mt_reg)); +} + +module_init(hl_mt_init); +module_exit(hl_mt_exit); diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index c908d69a5595..2e8089ecd0af 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -14,6 +14,11 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_limit.h> +struct xt_limit_priv { + unsigned long prev; + uint32_t credit; +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Herve Eychenne <rv@wallfire.org>"); MODULE_DESCRIPTION("Xtables: rate-limit match"); @@ -60,18 +65,18 @@ static DEFINE_SPINLOCK(limit_lock); static bool limit_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct xt_rateinfo *r = - ((const struct xt_rateinfo *)par->matchinfo)->master; + const struct xt_rateinfo *r = par->matchinfo; + struct xt_limit_priv *priv = r->master; unsigned long now = jiffies; spin_lock_bh(&limit_lock); - r->credit += (now - xchg(&r->prev, now)) * CREDITS_PER_JIFFY; - if (r->credit > r->credit_cap) - r->credit = r->credit_cap; + priv->credit += (now - xchg(&priv->prev, now)) * CREDITS_PER_JIFFY; + if (priv->credit > r->credit_cap) + priv->credit = r->credit_cap; - if (r->credit >= r->cost) { + if (priv->credit >= r->cost) { /* We're not limited. */ - r->credit -= r->cost; + priv->credit -= r->cost; spin_unlock_bh(&limit_lock); return true; } @@ -95,6 +100,7 @@ user2credits(u_int32_t user) static bool limit_mt_check(const struct xt_mtchk_param *par) { struct xt_rateinfo *r = par->matchinfo; + struct xt_limit_priv *priv; /* Check for overflow. */ if (r->burst == 0 @@ -104,19 +110,30 @@ static bool limit_mt_check(const struct xt_mtchk_param *par) return false; } - /* For SMP, we only want to use one set of counters. */ - r->master = r; + priv = kmalloc(sizeof(*priv), GFP_KERNEL); + if (priv == NULL) + return -ENOMEM; + + /* For SMP, we only want to use one set of state. */ + r->master = priv; if (r->cost == 0) { /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * 128. */ - r->prev = jiffies; - r->credit = user2credits(r->avg * r->burst); /* Credits full. */ + priv->prev = jiffies; + priv->credit = user2credits(r->avg * r->burst); /* Credits full. */ r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ r->cost = user2credits(r->avg); } return true; } +static void limit_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_rateinfo *info = par->matchinfo; + + kfree(info->master); +} + #ifdef CONFIG_COMPAT struct compat_xt_rateinfo { u_int32_t avg; @@ -167,6 +184,7 @@ static struct xt_match limit_mt_reg __read_mostly = { .family = NFPROTO_UNSPEC, .match = limit_mt, .checkentry = limit_mt_check, + .destroy = limit_mt_destroy, .matchsize = sizeof(struct xt_rateinfo), #ifdef CONFIG_COMPAT .compatsize = sizeof(struct compat_xt_rateinfo), diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 1bcdfc12cf59..44a234ef4439 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -20,13 +20,30 @@ MODULE_DESCRIPTION("Xtables: Bridge physical device match"); MODULE_ALIAS("ipt_physdev"); MODULE_ALIAS("ip6t_physdev"); +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + static bool physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - int i; - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); const struct xt_physdev_info *info = par->matchinfo; - bool ret; + unsigned long ret; const char *indev, *outdev; const struct nf_bridge_info *nf_bridge; @@ -68,11 +85,7 @@ physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par) if (!(info->bitmask & XT_PHYSDEV_OP_IN)) goto match_outdev; indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname; - for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { - ret |= (((const unsigned int *)indev)[i] - ^ ((const unsigned int *)info->physindev)[i]) - & ((const unsigned int *)info->in_mask)[i]; - } + ret = ifname_compare(indev, info->physindev, info->in_mask); if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN)) return false; @@ -82,13 +95,9 @@ match_outdev: return true; outdev = nf_bridge->physoutdev ? nf_bridge->physoutdev->name : nulldevname; - for (i = 0, ret = false; i < IFNAMSIZ/sizeof(unsigned int); i++) { - ret |= (((const unsigned int *)outdev)[i] - ^ ((const unsigned int *)info->physoutdev)[i]) - & ((const unsigned int *)info->out_mask)[i]; - } + ret = ifname_compare(outdev, info->physoutdev, info->out_mask); - return ret ^ !(info->invert & XT_PHYSDEV_OP_OUT); + return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT)); } static bool physdev_mt_check(const struct xt_mtchk_param *par) diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c index c84fce5e0f3e..01dd07b764ec 100644 --- a/net/netfilter/xt_quota.c +++ b/net/netfilter/xt_quota.c @@ -9,6 +9,10 @@ #include <linux/netfilter/x_tables.h> #include <linux/netfilter/xt_quota.h> +struct xt_quota_priv { + uint64_t quota; +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sam Johnston <samj@samj.net>"); MODULE_DESCRIPTION("Xtables: countdown quota match"); @@ -20,18 +24,20 @@ static DEFINE_SPINLOCK(quota_lock); static bool quota_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct xt_quota_info *q = - ((const struct xt_quota_info *)par->matchinfo)->master; + struct xt_quota_info *q = (void *)par->matchinfo; + struct xt_quota_priv *priv = q->master; bool ret = q->flags & XT_QUOTA_INVERT; spin_lock_bh("a_lock); - if (q->quota >= skb->len) { - q->quota -= skb->len; + if (priv->quota >= skb->len) { + priv->quota -= skb->len; ret = !ret; } else { /* we do not allow even small packets from now on */ - q->quota = 0; + priv->quota = 0; } + /* Copy quota back to matchinfo so that iptables can display it */ + q->quota = priv->quota; spin_unlock_bh("a_lock); return ret; @@ -43,17 +49,28 @@ static bool quota_mt_check(const struct xt_mtchk_param *par) if (q->flags & ~XT_QUOTA_MASK) return false; - /* For SMP, we only want to use one set of counters. */ - q->master = q; + + q->master = kmalloc(sizeof(*q->master), GFP_KERNEL); + if (q->master == NULL) + return -ENOMEM; + return true; } +static void quota_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_quota_info *q = par->matchinfo; + + kfree(q->master); +} + static struct xt_match quota_mt_reg __read_mostly = { .name = "quota", .revision = 0, .family = NFPROTO_UNSPEC, .match = quota_mt, .checkentry = quota_mt_check, + .destroy = quota_mt_destroy, .matchsize = sizeof(struct xt_quota_info), .me = THIS_MODULE, }; diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c index 0d75141139d5..d8c0f8f1a78e 100644 --- a/net/netfilter/xt_statistic.c +++ b/net/netfilter/xt_statistic.c @@ -16,6 +16,10 @@ #include <linux/netfilter/xt_statistic.h> #include <linux/netfilter/x_tables.h> +struct xt_statistic_priv { + uint32_t count; +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_DESCRIPTION("Xtables: statistics-based matching (\"Nth\", random)"); @@ -27,7 +31,7 @@ static DEFINE_SPINLOCK(nth_lock); static bool statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) { - struct xt_statistic_info *info = (void *)par->matchinfo; + const struct xt_statistic_info *info = par->matchinfo; bool ret = info->flags & XT_STATISTIC_INVERT; switch (info->mode) { @@ -36,10 +40,9 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par) ret = !ret; break; case XT_STATISTIC_MODE_NTH: - info = info->master; spin_lock_bh(&nth_lock); - if (info->u.nth.count++ == info->u.nth.every) { - info->u.nth.count = 0; + if (info->master->count++ == info->u.nth.every) { + info->master->count = 0; ret = !ret; } spin_unlock_bh(&nth_lock); @@ -56,16 +59,31 @@ static bool statistic_mt_check(const struct xt_mtchk_param *par) if (info->mode > XT_STATISTIC_MODE_MAX || info->flags & ~XT_STATISTIC_MASK) return false; - info->master = info; + + info->master = kzalloc(sizeof(*info->master), GFP_KERNEL); + if (info->master == NULL) { + printk(KERN_ERR KBUILD_MODNAME ": Out of memory\n"); + return false; + } + info->master->count = info->u.nth.count; + return true; } +static void statistic_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_statistic_info *info = par->matchinfo; + + kfree(info->master); +} + static struct xt_match xt_statistic_mt_reg __read_mostly = { .name = "statistic", .revision = 0, .family = NFPROTO_UNSPEC, .match = statistic_mt, .checkentry = statistic_mt_check, + .destroy = statistic_mt_destroy, .matchsize = sizeof(struct xt_statistic_info), .me = THIS_MODULE, }; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index a007dbb4c9f1..b73d4e61c5ac 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1117,6 +1117,7 @@ void netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) read_unlock(&nl_table_lock); } +EXPORT_SYMBOL(netlink_set_err); /* must be called with netlink table grabbed */ static void netlink_update_socket_mc(struct netlink_sock *nlk, diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 972201cd5fa7..0b15d7250c40 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -61,7 +61,7 @@ static struct ctl_table_root net_sysctl_root = { static int net_ctl_ro_header_perms(struct ctl_table_root *root, struct nsproxy *namespaces, struct ctl_table *table) { - if (namespaces->net_ns == &init_net) + if (net_eq(namespaces->net_ns, &init_net)) return table->mode; else return table->mode & ~0222; |