summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-07-24 22:02:36 -0700
committerDavid S. Miller <davem@davemloft.net>2016-07-24 22:02:36 -0700
commitc42d7121fbee1ee30fd9221d594e9c5a4bc1fed6 (patch)
tree37ccf6ebfc28dc4aad92e9d3831d8825eafb7a5a
parentde0ba9a0d8909996f9e293d311c2cc459fa77d67 (diff)
parent4b512e1c1f8de6b9ceb796ecef8658e0a083cab7 (diff)
downloadtalos-obmc-linux-c42d7121fbee1ee30fd9221d594e9c5a4bc1fed6.tar.gz
talos-obmc-linux-c42d7121fbee1ee30fd9221d594e9c5a4bc1fed6.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter/IPVS updates for net-next The following patchset contains Netfilter/IPVS updates for net-next, they are: 1) Count pre-established connections as active in "least connection" schedulers such that pre-established connections to avoid overloading backend servers on peak demands, from Michal Kubecek via Simon Horman. 2) Address a race condition when resizing the conntrack table by caching the bucket size when fulling iterating over the hashtable in these three possible scenarios: 1) dump via /proc/net/nf_conntrack, 2) unlinking userspace helper and 3) unlinking custom conntrack timeout. From Liping Zhang. 3) Revisit early_drop() path to perform lockless traversal on conntrack eviction under stress, use del_timer() as synchronization point to avoid two CPUs evicting the same entry, from Florian Westphal. 4) Move NAT hlist_head to nf_conn object, this simplifies the existing NAT extension and it doesn't increase size since recent patches to align nf_conn, from Florian. 5) Use rhashtable for the by-source NAT hashtable, also from Florian. 6) Don't allow --physdev-is-out from OUTPUT chain, just like --physdev-out is not either, from Hangbin Liu. 7) Automagically set on nf_conntrack counters if the user tries to match ct bytes/packets from nftables, from Liping Zhang. 8) Remove possible_net_t fields in nf_tables set objects since we just simply pass the net pointer to the backend set type implementations. 9) Fix possible off-by-one in h323, from Toby DiPasquale. 10) early_drop() may be called from ctnetlink patch, so we must hold rcu read size lock from them too, this amends Florian's patch #3 coming in this batch, from Liping Zhang. 11) Use binary search to validate jump offset in x_tables, this addresses the O(n!) validation that was introduced recently resolve security issues with unpriviledge namespaces, from Florian. 12) Fix reference leak to connlabel in error path of nft_ct, from Zhang. 13) Three updates for nft_log: Fix log prefix leak in error path. Bail out on loglevel larger than debug in nft_log and set on the new NF_LOG_F_COPY_LEN flag when snaplen is specified. Again from Zhang. 14) Allow to filter rule dumps in nf_tables based on table and chain names. 15) Simplify connlabel to always use 128 bits to store labels and get rid of unused function in xt_connlabel, from Florian. 16) Replace set_expect_timeout() by mod_timer() from the h323 conntrack helper, by Gao Feng. 17) Put back x_tables module reference in nft_compat on error, from Liping Zhang. 18) Add a reference count to the x_tables extensions cache in nft_compat, so we can remove them when unused and avoid a crash if the extensions are rmmod, again from Zhang. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/netfilter/x_tables.h4
-rw-r--r--include/net/netfilter/nf_conntrack.h9
-rw-r--r--include/net/netfilter/nf_conntrack_core.h2
-rw-r--r--include/net/netfilter/nf_conntrack_extend.h3
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h15
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h18
-rw-r--r--include/net/netfilter/nf_nat.h3
-rw-r--r--include/net/netfilter/nf_tables.h21
-rw-r--r--net/ipv4/netfilter/arp_tables.c47
-rw-r--r--net/ipv4/netfilter/ip_tables.c45
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c14
-rw-r--r--net/ipv6/netfilter/ip6_tables.c45
-rw-r--r--net/netfilter/ipvs/ip_vs_proto_tcp.c25
-rw-r--r--net/netfilter/nf_conntrack_core.c115
-rw-r--r--net/netfilter/nf_conntrack_extend.c15
-rw-r--r--net/netfilter/nf_conntrack_ftp.c58
-rw-r--r--net/netfilter/nf_conntrack_h323_asn1.c3
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c15
-rw-r--r--net/netfilter/nf_conntrack_helper.c76
-rw-r--r--net/netfilter/nf_conntrack_irc.c36
-rw-r--r--net/netfilter/nf_conntrack_labels.c28
-rw-r--r--net/netfilter/nf_conntrack_netlink.c10
-rw-r--r--net/netfilter/nf_conntrack_sane.c57
-rw-r--r--net/netfilter/nf_conntrack_sip.c75
-rw-r--r--net/netfilter/nf_conntrack_standalone.c14
-rw-r--r--net/netfilter/nf_conntrack_tftp.c48
-rw-r--r--net/netfilter/nf_nat_core.c149
-rw-r--r--net/netfilter/nf_tables_api.c48
-rw-r--r--net/netfilter/nfnetlink_cttimeout.c20
-rw-r--r--net/netfilter/nft_compat.c75
-rw-r--r--net/netfilter/nft_ct.c41
-rw-r--r--net/netfilter/nft_hash.c20
-rw-r--r--net/netfilter/nft_log.c34
-rw-r--r--net/netfilter/nft_lookup.c2
-rw-r--r--net/netfilter/nft_rbtree.c26
-rw-r--r--net/netfilter/x_tables.c50
-rw-r--r--net/netfilter/xt_connlabel.c29
-rw-r--r--net/netfilter/xt_physdev.c8
-rw-r--r--net/openvswitch/conntrack.c4
39 files changed, 718 insertions, 589 deletions
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index e94e81ab2b58..2ad1a2b289b5 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -250,6 +250,10 @@ int xt_check_entry_offsets(const void *base, const char *elems,
unsigned int target_offset,
unsigned int next_offset);
+unsigned int *xt_alloc_entry_offsets(unsigned int size);
+bool xt_find_jump_offset(const unsigned int *offsets,
+ unsigned int target, unsigned int size);
+
int xt_check_match(struct xt_mtchk_param *, unsigned int size, u_int8_t proto,
bool inv_proto);
int xt_check_target(struct xt_tgchk_param *, unsigned int size, u_int8_t proto,
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 092ca19fa044..445b019c2078 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -17,6 +17,7 @@
#include <linux/bitops.h>
#include <linux/compiler.h>
#include <linux/atomic.h>
+#include <linux/rhashtable.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
#include <linux/netfilter/nf_conntrack_dccp.h>
@@ -117,6 +118,9 @@ struct nf_conn {
/* Extensions */
struct nf_ct_ext *ext;
+#if IS_ENABLED(CONFIG_NF_NAT)
+ struct rhash_head nat_bysource;
+#endif
/* Storage reserved for other modules, must be the last member */
union nf_conntrack_proto proto;
};
@@ -266,12 +270,12 @@ static inline int nf_ct_is_template(const struct nf_conn *ct)
}
/* It's confirmed if it is, or has been in the hash table. */
-static inline int nf_ct_is_confirmed(struct nf_conn *ct)
+static inline int nf_ct_is_confirmed(const struct nf_conn *ct)
{
return test_bit(IPS_CONFIRMED_BIT, &ct->status);
}
-static inline int nf_ct_is_dying(struct nf_conn *ct)
+static inline int nf_ct_is_dying(const struct nf_conn *ct)
{
return test_bit(IPS_DYING_BIT, &ct->status);
}
@@ -309,6 +313,7 @@ void nf_ct_tmpl_free(struct nf_conn *tmpl);
#define NF_CT_STAT_INC(net, count) __this_cpu_inc((net)->ct.stat->count)
#define NF_CT_STAT_INC_ATOMIC(net, count) this_cpu_inc((net)->ct.stat->count)
+#define NF_CT_STAT_ADD_ATOMIC(net, count, v) this_cpu_add((net)->ct.stat->count, (v))
#define MODULE_ALIAS_NFCT_HELPER(helper) \
MODULE_ALIAS("nfct-helper-" helper)
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 3e2f3328945c..79d7ac5c9740 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -51,6 +51,8 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *l4proto);
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize);
+
/* Find a connection corresponding to a tuple. */
struct nf_conntrack_tuple_hash *
nf_conntrack_find_get(struct net *net,
diff --git a/include/net/netfilter/nf_conntrack_extend.h b/include/net/netfilter/nf_conntrack_extend.h
index b925395fa5ed..1c3035dda31f 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -99,9 +99,6 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
struct nf_ct_ext_type {
/* Destroys relationships (can be NULL). */
void (*destroy)(struct nf_conn *ct);
- /* Called when realloacted (can be NULL).
- Contents has already been moved. */
- void (*move)(void *new, void *old);
enum nf_ct_ext_id id;
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 6cf614bc0029..1eaac1f4cd6a 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -58,10 +58,25 @@ struct nf_conntrack_helper *__nf_conntrack_helper_find(const char *name,
struct nf_conntrack_helper *nf_conntrack_helper_try_module_get(const char *name,
u16 l3num,
u8 protonum);
+void nf_ct_helper_init(struct nf_conntrack_helper *helper,
+ u16 l3num, u16 protonum, const char *name,
+ u16 default_port, u16 spec_port, u32 id,
+ const struct nf_conntrack_expect_policy *exp_pol,
+ u32 expect_class_max, u32 data_len,
+ int (*help)(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo),
+ int (*from_nlattr)(struct nlattr *attr,
+ struct nf_conn *ct),
+ struct module *module);
int nf_conntrack_helper_register(struct nf_conntrack_helper *);
void nf_conntrack_helper_unregister(struct nf_conntrack_helper *);
+int nf_conntrack_helpers_register(struct nf_conntrack_helper *, unsigned int);
+void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *,
+ unsigned int);
+
struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct,
struct nf_conntrack_helper *helper,
gfp_t gfp);
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index c5f8fc736b3d..498814626e28 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -10,8 +10,7 @@
#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
struct nf_conn_labels {
- u8 words;
- unsigned long bits[];
+ unsigned long bits[NF_CT_LABELS_MAX_SIZE / sizeof(long)];
};
static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
@@ -26,27 +25,18 @@ static inline struct nf_conn_labels *nf_ct_labels_find(const struct nf_conn *ct)
static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
{
#ifdef CONFIG_NF_CONNTRACK_LABELS
- struct nf_conn_labels *cl_ext;
struct net *net = nf_ct_net(ct);
- u8 words;
- words = ACCESS_ONCE(net->ct.label_words);
- if (words == 0)
+ if (net->ct.labels_used == 0)
return NULL;
- cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
- words * sizeof(long), GFP_ATOMIC);
- if (cl_ext != NULL)
- cl_ext->words = words;
-
- return cl_ext;
+ return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
+ sizeof(struct nf_conn_labels), GFP_ATOMIC);
#else
return NULL;
#endif
}
-int nf_connlabel_set(struct nf_conn *ct, u16 bit);
-
int nf_connlabels_replace(struct nf_conn *ct,
const u32 *data, const u32 *mask, unsigned int words);
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index 344b1ab19220..c327a431a6f3 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,5 +1,6 @@
#ifndef _NF_NAT_H
#define _NF_NAT_H
+#include <linux/rhashtable.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/nf_nat.h>
#include <net/netfilter/nf_conntrack_tuple.h>
@@ -29,8 +30,6 @@ struct nf_conn;
/* The structure embedded in the conntrack structure. */
struct nf_conn_nat {
- struct hlist_node bysource;
- struct nf_conn *ct;
union nf_conntrack_nat_help help;
#if IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV4) || \
IS_ENABLED(CONFIG_NF_NAT_MASQUERADE_IPV6)
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 30c1d9489ae2..f2f13399ce44 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -236,7 +236,8 @@ struct nft_expr;
* @features: features supported by the implementation
*/
struct nft_set_ops {
- bool (*lookup)(const struct nft_set *set,
+ bool (*lookup)(const struct net *net,
+ const struct nft_set *set,
const u32 *key,
const struct nft_set_ext **ext);
bool (*update)(struct nft_set *set,
@@ -248,11 +249,14 @@ struct nft_set_ops {
struct nft_regs *regs,
const struct nft_set_ext **ext);
- int (*insert)(const struct nft_set *set,
+ int (*insert)(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem);
- void (*activate)(const struct nft_set *set,
+ void (*activate)(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem);
- void * (*deactivate)(const struct nft_set *set,
+ void * (*deactivate)(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem);
void (*remove)(const struct nft_set *set,
const struct nft_set_elem *elem);
@@ -295,7 +299,6 @@ void nft_unregister_set(struct nft_set_ops *ops);
* @udlen: user data length
* @udata: user data
* @ops: set ops
- * @pnet: network namespace
* @flags: set flags
* @genmask: generation mask
* @klen: key length
@@ -318,7 +321,6 @@ struct nft_set {
unsigned char *udata;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
- possible_net_t pnet;
u16 flags:14,
genmask:2;
u8 klen;
@@ -804,7 +806,6 @@ struct nft_stats {
* struct nft_base_chain - nf_tables base chain
*
* @ops: netfilter hook ops
- * @pnet: net namespace that this chain belongs to
* @type: chain type
* @policy: default policy
* @stats: per-cpu chain stats
@@ -813,7 +814,6 @@ struct nft_stats {
*/
struct nft_base_chain {
struct nf_hook_ops ops[NFT_HOOK_OPS_MAX];
- possible_net_t pnet;
const struct nf_chain_type *type;
u8 policy;
u8 flags;
@@ -1009,10 +1009,11 @@ static inline bool nft_set_elem_active(const struct nft_set_ext *ext,
return !(ext->genmask & genmask);
}
-static inline void nft_set_elem_change_active(const struct nft_set *set,
+static inline void nft_set_elem_change_active(const struct net *net,
+ const struct nft_set *set,
struct nft_set_ext *ext)
{
- ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
+ ext->genmask ^= nft_genmask_next(net);
}
/*
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index c8dd9e26b185..b31df597fd37 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -299,23 +299,12 @@ static inline bool unconditional(const struct arpt_entry *e)
memcmp(&e->arp, &uncond, sizeof(uncond)) == 0;
}
-static bool find_jump_target(const struct xt_table_info *t,
- const struct arpt_entry *target)
-{
- struct arpt_entry *iter;
-
- xt_entry_foreach(iter, t->entries, t->size) {
- if (iter == target)
- return true;
- }
- return false;
-}
-
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
static int mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -388,10 +377,11 @@ static int mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
e = (struct arpt_entry *)
(entry0 + newpos);
- if (!find_jump_target(newinfo, e))
- return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -543,6 +533,7 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
const struct arpt_replace *repl)
{
struct arpt_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -555,6 +546,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
@@ -565,17 +559,20 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- break;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(arpt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
if (ret != 0)
- return ret;
+ goto out_free;
+ ret = -EINVAL;
if (i != repl->num_entries)
- return -EINVAL;
+ goto out_free;
/* Check hooks all assigned */
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
@@ -583,13 +580,16 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- return -EINVAL;
+ goto out_free;
if (newinfo->underflow[i] == 0xFFFFFFFF)
- return -EINVAL;
+ goto out_free;
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -610,6 +610,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void get_counters(const struct xt_table_info *t,
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f0df66f54ce6..f993545a3373 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -373,23 +373,12 @@ ipt_do_table(struct sk_buff *skb,
else return verdict;
}
-static bool find_jump_target(const struct xt_table_info *t,
- const struct ipt_entry *target)
-{
- struct ipt_entry *iter;
-
- xt_entry_foreach(iter, t->entries, t->size) {
- if (iter == target)
- return true;
- }
- return false;
-}
-
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -458,10 +447,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
e = (struct ipt_entry *)
(entry0 + newpos);
- if (!find_jump_target(newinfo, e))
- return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -694,6 +684,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ipt_replace *repl)
{
struct ipt_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -706,6 +697,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -715,15 +709,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- return ret;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ipt_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
+ ret = -EINVAL;
if (i != repl->num_entries)
- return -EINVAL;
+ goto out_free;
/* Check hooks all assigned */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
@@ -731,13 +728,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- return -EINVAL;
+ goto out_free;
if (newinfo->underflow[i] == 0xFFFFFFFF)
- return -EINVAL;
+ goto out_free;
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -758,6 +758,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index c6f3c406f707..63923710f325 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -26,6 +26,8 @@
struct ct_iter_state {
struct seq_net_private p;
+ struct hlist_nulls_head *hash;
+ unsigned int htable_size;
unsigned int bucket;
};
@@ -35,10 +37,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
struct hlist_nulls_node *n;
for (st->bucket = 0;
- st->bucket < nf_conntrack_htable_size;
+ st->bucket < st->htable_size;
st->bucket++) {
n = rcu_dereference(
- hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+ hlist_nulls_first_rcu(&st->hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -53,11 +55,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
- if (++st->bucket >= nf_conntrack_htable_size)
+ if (++st->bucket >= st->htable_size)
return NULL;
}
head = rcu_dereference(
- hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+ hlist_nulls_first_rcu(&st->hash[st->bucket]));
}
return head;
}
@@ -75,7 +77,11 @@ static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
+ struct ct_iter_state *st = seq->private;
+
rcu_read_lock();
+
+ nf_conntrack_get_ht(&st->hash, &st->htable_size);
return ct_get_idx(seq, *pos);
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 61ed95054efa..552fac2f390a 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -402,23 +402,12 @@ ip6t_do_table(struct sk_buff *skb,
else return verdict;
}
-static bool find_jump_target(const struct xt_table_info *t,
- const struct ip6t_entry *target)
-{
- struct ip6t_entry *iter;
-
- xt_entry_foreach(iter, t->entries, t->size) {
- if (iter == target)
- return true;
- }
- return false;
-}
-
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
mark_source_chains(const struct xt_table_info *newinfo,
- unsigned int valid_hooks, void *entry0)
+ unsigned int valid_hooks, void *entry0,
+ unsigned int *offsets)
{
unsigned int hook;
@@ -487,10 +476,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
XT_STANDARD_TARGET) == 0 &&
newpos >= 0) {
/* This a jump; chase it. */
+ if (!xt_find_jump_offset(offsets, newpos,
+ newinfo->number))
+ return 0;
e = (struct ip6t_entry *)
(entry0 + newpos);
- if (!find_jump_target(newinfo, e))
- return 0;
} else {
/* ... this is a fallthru */
newpos = pos + e->next_offset;
@@ -724,6 +714,7 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
const struct ip6t_replace *repl)
{
struct ip6t_entry *iter;
+ unsigned int *offsets;
unsigned int i;
int ret = 0;
@@ -736,6 +727,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
newinfo->underflow[i] = 0xFFFFFFFF;
}
+ offsets = xt_alloc_entry_offsets(newinfo->number);
+ if (!offsets)
+ return -ENOMEM;
i = 0;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter, entry0, newinfo->size) {
@@ -745,15 +739,18 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
repl->underflow,
repl->valid_hooks);
if (ret != 0)
- return ret;
+ goto out_free;
+ if (i < repl->num_entries)
+ offsets[i] = (void *)iter - entry0;
++i;
if (strcmp(ip6t_get_target(iter)->u.user.name,
XT_ERROR_TARGET) == 0)
++newinfo->stacksize;
}
+ ret = -EINVAL;
if (i != repl->num_entries)
- return -EINVAL;
+ goto out_free;
/* Check hooks all assigned */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
@@ -761,13 +758,16 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- return -EINVAL;
+ goto out_free;
if (newinfo->underflow[i] == 0xFFFFFFFF)
- return -EINVAL;
+ goto out_free;
}
- if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
- return -ELOOP;
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
+ ret = -ELOOP;
+ goto out_free;
+ }
+ kvfree(offsets);
/* Finally, each sanity check must pass */
i = 0;
@@ -788,6 +788,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
}
return ret;
+ out_free:
+ kvfree(offsets);
+ return ret;
}
static void
diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c
index d7024b2ed769..5117bcb7d2f0 100644
--- a/net/netfilter/ipvs/ip_vs_proto_tcp.c
+++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c
@@ -395,6 +395,20 @@ static const char *const tcp_state_name_table[IP_VS_TCP_S_LAST+1] = {
[IP_VS_TCP_S_LAST] = "BUG!",
};
+static const bool tcp_state_active_table[IP_VS_TCP_S_LAST] = {
+ [IP_VS_TCP_S_NONE] = false,
+ [IP_VS_TCP_S_ESTABLISHED] = true,
+ [IP_VS_TCP_S_SYN_SENT] = true,
+ [IP_VS_TCP_S_SYN_RECV] = true,
+ [IP_VS_TCP_S_FIN_WAIT] = false,
+ [IP_VS_TCP_S_TIME_WAIT] = false,
+ [IP_VS_TCP_S_CLOSE] = false,
+ [IP_VS_TCP_S_CLOSE_WAIT] = false,
+ [IP_VS_TCP_S_LAST_ACK] = false,
+ [IP_VS_TCP_S_LISTEN] = false,
+ [IP_VS_TCP_S_SYNACK] = true,
+};
+
#define sNO IP_VS_TCP_S_NONE
#define sES IP_VS_TCP_S_ESTABLISHED
#define sSS IP_VS_TCP_S_SYN_SENT
@@ -418,6 +432,13 @@ static const char * tcp_state_name(int state)
return tcp_state_name_table[state] ? tcp_state_name_table[state] : "?";
}
+static bool tcp_state_active(int state)
+{
+ if (state >= IP_VS_TCP_S_LAST)
+ return false;
+ return tcp_state_active_table[state];
+}
+
static struct tcp_states_t tcp_states [] = {
/* INPUT */
/* sNO, sES, sSS, sSR, sFW, sTW, sCL, sCW, sLA, sLI, sSA */
@@ -540,12 +561,12 @@ set_tcp_state(struct ip_vs_proto_data *pd, struct ip_vs_conn *cp,
if (dest) {
if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (new_state != IP_VS_TCP_S_ESTABLISHED)) {
+ !tcp_state_active(new_state)) {
atomic_dec(&dest->activeconns);
atomic_inc(&dest->inactconns);
cp->flags |= IP_VS_CONN_F_INACTIVE;
} else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
- (new_state == IP_VS_TCP_S_ESTABLISHED)) {
+ tcp_state_active(new_state)) {
atomic_inc(&dest->activeconns);
atomic_dec(&dest->inactconns);
cp->flags &= ~IP_VS_CONN_F_INACTIVE;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0ad936814fa2..9198e690e692 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -460,6 +460,23 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h,
net_eq(net, nf_ct_net(ct));
}
+/* must be called with rcu read lock held */
+void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize)
+{
+ struct hlist_nulls_head *hptr;
+ unsigned int sequence, hsz;
+
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ hsz = nf_conntrack_htable_size;
+ hptr = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ *hash = hptr;
+ *hsize = hsz;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_get_ht);
+
/*
* Warning :
* - Caller must take a reference on returned object
@@ -818,67 +835,69 @@ EXPORT_SYMBOL_GPL(nf_conntrack_tuple_taken);
/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
-static noinline int early_drop(struct net *net, unsigned int _hash)
+static unsigned int early_drop_list(struct net *net,
+ struct hlist_nulls_head *head)
{
- /* Use oldest entry, which is roughly LRU */
struct nf_conntrack_tuple_hash *h;
- struct nf_conn *tmp;
struct hlist_nulls_node *n;
- unsigned int i, hash, sequence;
- struct nf_conn *ct = NULL;
- spinlock_t *lockp;
- bool ret = false;
+ unsigned int drops = 0;
+ struct nf_conn *tmp;
- i = 0;
+ hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
+ tmp = nf_ct_tuplehash_to_ctrack(h);
- local_bh_disable();
-restart:
- sequence = read_seqcount_begin(&nf_conntrack_generation);
- for (; i < NF_CT_EVICTION_RANGE; i++) {
- hash = scale_hash(_hash++);
- lockp = &nf_conntrack_locks[hash % CONNTRACK_LOCKS];
- nf_conntrack_lock(lockp);
- if (read_seqcount_retry(&nf_conntrack_generation, sequence)) {
- spin_unlock(lockp);
- goto restart;
- }
- hlist_nulls_for_each_entry_rcu(h, n, &nf_conntrack_hash[hash],
- hnnode) {
- tmp = nf_ct_tuplehash_to_ctrack(h);
-
- if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
- !net_eq(nf_ct_net(tmp), net) ||
- nf_ct_is_dying(tmp))
- continue;
-
- if (atomic_inc_not_zero(&tmp->ct_general.use)) {
- ct = tmp;
- break;
- }
- }
+ if (test_bit(IPS_ASSURED_BIT, &tmp->status) ||
+ !net_eq(nf_ct_net(tmp), net) ||
+ nf_ct_is_dying(tmp))
+ continue;
- spin_unlock(lockp);
- if (ct)
- break;
+ if (!atomic_inc_not_zero(&tmp->ct_general.use))
+ continue;
+
+ /* kill only if still in same netns -- might have moved due to
+ * SLAB_DESTROY_BY_RCU rules.
+ *
+ * We steal the timer reference. If that fails timer has
+ * already fired or someone else deleted it. Just drop ref
+ * and move to next entry.
+ */
+ if (net_eq(nf_ct_net(tmp), net) &&
+ nf_ct_is_confirmed(tmp) &&
+ del_timer(&tmp->timeout) &&
+ nf_ct_delete(tmp, 0, 0))
+ drops++;
+
+ nf_ct_put(tmp);
}
- local_bh_enable();
+ return drops;
+}
- if (!ct)
- return false;
+static noinline int early_drop(struct net *net, unsigned int _hash)
+{
+ unsigned int i;
- /* kill only if in same netns -- might have moved due to
- * SLAB_DESTROY_BY_RCU rules
- */
- if (net_eq(nf_ct_net(ct), net) && del_timer(&ct->timeout)) {
- if (nf_ct_delete(ct, 0, 0)) {
- NF_CT_STAT_INC_ATOMIC(net, early_drop);
- ret = true;
+ for (i = 0; i < NF_CT_EVICTION_RANGE; i++) {
+ struct hlist_nulls_head *ct_hash;
+ unsigned hash, sequence, drops;
+
+ rcu_read_lock();
+ do {
+ sequence = read_seqcount_begin(&nf_conntrack_generation);
+ hash = scale_hash(_hash++);
+ ct_hash = nf_conntrack_hash;
+ } while (read_seqcount_retry(&nf_conntrack_generation, sequence));
+
+ drops = early_drop_list(net, &ct_hash[hash]);
+ rcu_read_unlock();
+
+ if (drops) {
+ NF_CT_STAT_ADD_ATOMIC(net, early_drop, drops);
+ return true;
}
}
- nf_ct_put(ct);
- return ret;
+ return false;
}
static struct nf_conn *
diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c
index 1a9545965c0d..02bcf00c2492 100644
--- a/net/netfilter/nf_conntrack_extend.c
+++ b/net/netfilter/nf_conntrack_extend.c
@@ -73,7 +73,7 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
size_t var_alloc_len, gfp_t gfp)
{
struct nf_ct_ext *old, *new;
- int i, newlen, newoff;
+ int newlen, newoff;
struct nf_ct_ext_type *t;
/* Conntrack must not be confirmed to avoid races on reallocation. */
@@ -99,19 +99,8 @@ void *__nf_ct_ext_add_length(struct nf_conn *ct, enum nf_ct_ext_id id,
return NULL;
if (new != old) {
- for (i = 0; i < NF_CT_EXT_NUM; i++) {
- if (!__nf_ct_ext_exist(old, i))
- continue;
-
- rcu_read_lock();
- t = rcu_dereference(nf_ct_ext_types[i]);
- if (t && t->move)
- t->move((void *)new + new->offset[i],
- (void *)old + old->offset[i]);
- rcu_read_unlock();
- }
kfree_rcu(old, rcu);
- ct->ext = new;
+ rcu_assign_pointer(ct->ext, new);
}
new->offset[id] = newoff;
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index 19efeba02abb..43147005bea3 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -572,7 +572,7 @@ static int nf_ct_ftp_from_nlattr(struct nlattr *attr, struct nf_conn *ct)
return 0;
}
-static struct nf_conntrack_helper ftp[MAX_PORTS][2] __read_mostly;
+static struct nf_conntrack_helper ftp[MAX_PORTS * 2] __read_mostly;
static const struct nf_conntrack_expect_policy ftp_exp_policy = {
.max_expected = 1,
@@ -582,24 +582,13 @@ static const struct nf_conntrack_expect_policy ftp_exp_policy = {
/* don't make this __exit, since it's called from __init ! */
static void nf_conntrack_ftp_fini(void)
{
- int i, j;
- for (i = 0; i < ports_c; i++) {
- for (j = 0; j < 2; j++) {
- if (ftp[i][j].me == NULL)
- continue;
-
- pr_debug("unregistering helper for pf: %d port: %d\n",
- ftp[i][j].tuple.src.l3num, ports[i]);
- nf_conntrack_helper_unregister(&ftp[i][j]);
- }
- }
-
+ nf_conntrack_helpers_unregister(ftp, ports_c * 2);
kfree(ftp_buffer);
}
static int __init nf_conntrack_ftp_init(void)
{
- int i, j = -1, ret = 0;
+ int i, ret = 0;
ftp_buffer = kmalloc(65536, GFP_KERNEL);
if (!ftp_buffer)
@@ -611,32 +600,21 @@ static int __init nf_conntrack_ftp_init(void)
/* FIXME should be configurable whether IPv4 and IPv6 FTP connections
are tracked or not - YK */
for (i = 0; i < ports_c; i++) {
- ftp[i][0].tuple.src.l3num = PF_INET;
- ftp[i][1].tuple.src.l3num = PF_INET6;
- for (j = 0; j < 2; j++) {
- ftp[i][j].data_len = sizeof(struct nf_ct_ftp_master);
- ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]);
- ftp[i][j].tuple.dst.protonum = IPPROTO_TCP;
- ftp[i][j].expect_policy = &ftp_exp_policy;
- ftp[i][j].me = THIS_MODULE;
- ftp[i][j].help = help;
- ftp[i][j].from_nlattr = nf_ct_ftp_from_nlattr;
- if (ports[i] == FTP_PORT)
- sprintf(ftp[i][j].name, "ftp");
- else
- sprintf(ftp[i][j].name, "ftp-%d", ports[i]);
-
- pr_debug("registering helper for pf: %d port: %d\n",
- ftp[i][j].tuple.src.l3num, ports[i]);
- ret = nf_conntrack_helper_register(&ftp[i][j]);
- if (ret) {
- pr_err("failed to register helper for pf: %d port: %d\n",
- ftp[i][j].tuple.src.l3num, ports[i]);
- ports_c = i;
- nf_conntrack_ftp_fini();
- return ret;
- }
- }
+ nf_ct_helper_init(&ftp[2 * i], AF_INET, IPPROTO_TCP, "ftp",
+ FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
+ 0, sizeof(struct nf_ct_ftp_master), help,
+ nf_ct_ftp_from_nlattr, THIS_MODULE);
+ nf_ct_helper_init(&ftp[2 * i + 1], AF_INET6, IPPROTO_TCP, "ftp",
+ FTP_PORT, ports[i], ports[i], &ftp_exp_policy,
+ 0, sizeof(struct nf_ct_ftp_master), help,
+ nf_ct_ftp_from_nlattr, THIS_MODULE);
+ }
+
+ ret = nf_conntrack_helpers_register(ftp, ports_c * 2);
+ if (ret < 0) {
+ pr_err("failed to register helpers\n");
+ kfree(ftp_buffer);
+ return ret;
}
return 0;
diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c
index bcd5ed6b7130..89b2e46925c4 100644
--- a/net/netfilter/nf_conntrack_h323_asn1.c
+++ b/net/netfilter/nf_conntrack_h323_asn1.c
@@ -846,9 +846,10 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931)
sz -= len;
/* Message Type */
- if (sz < 1)
+ if (sz < 2)
return H323_ERROR_BOUND;
q931->MessageType = *p++;
+ sz--;
PRINT("MessageType = %02X\n", q931->MessageType);
if (*p & 0x80) {
p++;
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 9511af04dc81..bb77a97961bf 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1273,19 +1273,6 @@ static struct nf_conntrack_expect *find_expect(struct nf_conn *ct,
}
/****************************************************************************/
-static int set_expect_timeout(struct nf_conntrack_expect *exp,
- unsigned int timeout)
-{
- if (!exp || !del_timer(&exp->timeout))
- return 0;
-
- exp->timeout.expires = jiffies + timeout * HZ;
- add_timer(&exp->timeout);
-
- return 1;
-}
-
-/****************************************************************************/
static int expect_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
unsigned int protoff, unsigned char **data,
@@ -1486,7 +1473,7 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
"timeout to %u seconds for",
info->timeout);
nf_ct_dump_tuple(&exp->tuple);
- set_expect_timeout(exp, info->timeout);
+ mod_timer(&exp->timeout, jiffies + info->timeout * HZ);
}
spin_unlock_bh(&nf_conntrack_expect_lock);
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 3a1a88b9bafa..b989b81ac156 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -409,6 +409,8 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
struct nf_conntrack_expect *exp;
const struct hlist_node *next;
const struct hlist_nulls_node *nn;
+ unsigned int last_hsize;
+ spinlock_t *lock;
struct net *net;
unsigned int i;
@@ -446,18 +448,80 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me)
rtnl_unlock();
local_bh_disable();
- for (i = 0; i < nf_conntrack_htable_size; i++) {
- nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
- if (i < nf_conntrack_htable_size) {
- hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
- unhelp(h, me);
+restart:
+ last_hsize = nf_conntrack_htable_size;
+ for (i = 0; i < last_hsize; i++) {
+ lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
+ nf_conntrack_lock(lock);
+ if (last_hsize != nf_conntrack_htable_size) {
+ spin_unlock(lock);
+ goto restart;
}
- spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+ hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
+ unhelp(h, me);
+ spin_unlock(lock);
}
local_bh_enable();
}
EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister);
+void nf_ct_helper_init(struct nf_conntrack_helper *helper,
+ u16 l3num, u16 protonum, const char *name,
+ u16 default_port, u16 spec_port, u32 id,
+ const struct nf_conntrack_expect_policy *exp_pol,
+ u32 expect_class_max, u32 data_len,
+ int (*help)(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo),
+ int (*from_nlattr)(struct nlattr *attr,
+ struct nf_conn *ct),
+ struct module *module)
+{
+ helper->tuple.src.l3num = l3num;
+ helper->tuple.dst.protonum = protonum;
+ helper->tuple.src.u.all = htons(spec_port);
+ helper->expect_policy = exp_pol;
+ helper->expect_class_max = expect_class_max;
+ helper->data_len = data_len;
+ helper->help = help;
+ helper->from_nlattr = from_nlattr;
+ helper->me = module;
+
+ if (spec_port == default_port)
+ snprintf(helper->name, sizeof(helper->name), "%s", name);
+ else
+ snprintf(helper->name, sizeof(helper->name), "%s-%u", name, id);
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper_init);
+
+int nf_conntrack_helpers_register(struct nf_conntrack_helper *helper,
+ unsigned int n)
+{
+ unsigned int i;
+ int err = 0;
+
+ for (i = 0; i < n; i++) {
+ err = nf_conntrack_helper_register(&helper[i]);
+ if (err < 0)
+ goto err;
+ }
+
+ return err;
+err:
+ if (i > 0)
+ nf_conntrack_helpers_unregister(helper, i);
+ return err;
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helpers_register);
+
+void nf_conntrack_helpers_unregister(struct nf_conntrack_helper *helper,
+ unsigned int n)
+{
+ while (n-- > 0)
+ nf_conntrack_helper_unregister(&helper[n]);
+}
+EXPORT_SYMBOL_GPL(nf_conntrack_helpers_unregister);
+
static struct nf_ct_ext_type helper_extend __read_mostly = {
.len = sizeof(struct nf_conn_help),
.align = __alignof__(struct nf_conn_help),
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index f97ac61d2536..1972a149f958 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -255,27 +255,18 @@ static int __init nf_conntrack_irc_init(void)
ports[ports_c++] = IRC_PORT;
for (i = 0; i < ports_c; i++) {
- irc[i].tuple.src.l3num = AF_INET;
- irc[i].tuple.src.u.tcp.port = htons(ports[i]);
- irc[i].tuple.dst.protonum = IPPROTO_TCP;
- irc[i].expect_policy = &irc_exp_policy;
- irc[i].me = THIS_MODULE;
- irc[i].help = help;
-
- if (ports[i] == IRC_PORT)
- sprintf(irc[i].name, "irc");
- else
- sprintf(irc[i].name, "irc-%u", i);
-
- ret = nf_conntrack_helper_register(&irc[i]);
- if (ret) {
- pr_err("failed to register helper for pf: %u port: %u\n",
- irc[i].tuple.src.l3num, ports[i]);
- ports_c = i;
- nf_conntrack_irc_fini();
- return ret;
- }
+ nf_ct_helper_init(&irc[i], AF_INET, IPPROTO_TCP, "irc",
+ IRC_PORT, ports[i], i, &irc_exp_policy,
+ 0, 0, help, NULL, THIS_MODULE);
+ }
+
+ ret = nf_conntrack_helpers_register(&irc[0], ports_c);
+ if (ret) {
+ pr_err("failed to register helpers\n");
+ kfree(irc_buffer);
+ return ret;
}
+
return 0;
}
@@ -283,10 +274,7 @@ static int __init nf_conntrack_irc_init(void)
* it is needed by the init function */
static void nf_conntrack_irc_fini(void)
{
- int i;
-
- for (i = 0; i < ports_c; i++)
- nf_conntrack_helper_unregister(&irc[i]);
+ nf_conntrack_helpers_unregister(irc, ports_c);
kfree(irc_buffer);
}
diff --git a/net/netfilter/nf_conntrack_labels.c b/net/netfilter/nf_conntrack_labels.c
index 252e6a7cd2f1..bcab8bde7312 100644
--- a/net/netfilter/nf_conntrack_labels.c
+++ b/net/netfilter/nf_conntrack_labels.c
@@ -16,23 +16,6 @@
static spinlock_t nf_connlabels_lock;
-int nf_connlabel_set(struct nf_conn *ct, u16 bit)
-{
- struct nf_conn_labels *labels = nf_ct_labels_find(ct);
-
- if (!labels || BIT_WORD(bit) >= labels->words)
- return -ENOSPC;
-
- if (test_bit(bit, labels->bits))
- return 0;
-
- if (!test_and_set_bit(bit, labels->bits))
- nf_conntrack_event_cache(IPCT_LABEL, ct);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(nf_connlabel_set);
-
static int replace_u32(u32 *address, u32 mask, u32 new)
{
u32 old, tmp;
@@ -60,7 +43,7 @@ int nf_connlabels_replace(struct nf_conn *ct,
if (!labels)
return -ENOSPC;
- size = labels->words * sizeof(long);
+ size = sizeof(labels->bits);
if (size < (words32 * sizeof(u32)))
words32 = size / sizeof(u32);
@@ -80,16 +63,11 @@ EXPORT_SYMBOL_GPL(nf_connlabels_replace);
int nf_connlabels_get(struct net *net, unsigned int bits)
{
- size_t words;
-
- words = BIT_WORD(bits) + 1;
- if (words > NF_CT_LABELS_MAX_SIZE / sizeof(long))
+ if (BIT_WORD(bits) >= NF_CT_LABELS_MAX_SIZE / sizeof(long))
return -ERANGE;
spin_lock(&nf_connlabels_lock);
net->ct.labels_used++;
- if (words > net->ct.label_words)
- net->ct.label_words = words;
spin_unlock(&nf_connlabels_lock);
return 0;
@@ -100,8 +78,6 @@ void nf_connlabels_put(struct net *net)
{
spin_lock(&nf_connlabels_lock);
net->ct.labels_used--;
- if (net->ct.labels_used == 0)
- net->ct.label_words = 0;
spin_unlock(&nf_connlabels_lock);
}
EXPORT_SYMBOL_GPL(nf_connlabels_put);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index a18d1ceabad5..050bb3420a6b 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -346,25 +346,25 @@ static inline int ctnetlink_label_size(const struct nf_conn *ct)
if (!labels)
return 0;
- return nla_total_size(labels->words * sizeof(long));
+ return nla_total_size(sizeof(labels->bits));
}
static int
ctnetlink_dump_labels(struct sk_buff *skb, const struct nf_conn *ct)
{
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
- unsigned int len, i;
+ unsigned int i;
if (!labels)
return 0;
- len = labels->words * sizeof(long);
i = 0;
do {
if (labels->bits[i] != 0)
- return nla_put(skb, CTA_LABELS, len, labels->bits);
+ return nla_put(skb, CTA_LABELS, sizeof(labels->bits),
+ labels->bits);
i++;
- } while (i < labels->words);
+ } while (i < ARRAY_SIZE(labels->bits));
return 0;
}
diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c
index 3fcbaab83b3d..9dcb9ee9b97d 100644
--- a/net/netfilter/nf_conntrack_sane.c
+++ b/net/netfilter/nf_conntrack_sane.c
@@ -166,7 +166,7 @@ out:
return ret;
}
-static struct nf_conntrack_helper sane[MAX_PORTS][2] __read_mostly;
+static struct nf_conntrack_helper sane[MAX_PORTS * 2] __read_mostly;
static const struct nf_conntrack_expect_policy sane_exp_policy = {
.max_expected = 1,
@@ -176,22 +176,13 @@ static const struct nf_conntrack_expect_policy sane_exp_policy = {
/* don't make this __exit, since it's called from __init ! */
static void nf_conntrack_sane_fini(void)
{
- int i, j;
-
- for (i = 0; i < ports_c; i++) {
- for (j = 0; j < 2; j++) {
- pr_debug("unregistering helper for pf: %d port: %d\n",
- sane[i][j].tuple.src.l3num, ports[i]);
- nf_conntrack_helper_unregister(&sane[i][j]);
- }
- }
-
+ nf_conntrack_helpers_unregister(sane, ports_c * 2);
kfree(sane_buffer);
}
static int __init nf_conntrack_sane_init(void)
{
- int i, j = -1, ret = 0;
+ int i, ret = 0;
sane_buffer = kmalloc(65536, GFP_KERNEL);
if (!sane_buffer)
@@ -203,31 +194,23 @@ static int __init nf_conntrack_sane_init(void)
/* FIXME should be configurable whether IPv4 and IPv6 connections
are tracked or not - YK */
for (i = 0; i < ports_c; i++) {
- sane[i][0].tuple.src.l3num = PF_INET;
- sane[i][1].tuple.src.l3num = PF_INET6;
- for (j = 0; j < 2; j++) {
- sane[i][j].data_len = sizeof(struct nf_ct_sane_master);
- sane[i][j].tuple.src.u.tcp.port = htons(ports[i]);
- sane[i][j].tuple.dst.protonum = IPPROTO_TCP;
- sane[i][j].expect_policy = &sane_exp_policy;
- sane[i][j].me = THIS_MODULE;
- sane[i][j].help = help;
- if (ports[i] == SANE_PORT)
- sprintf(sane[i][j].name, "sane");
- else
- sprintf(sane[i][j].name, "sane-%d", ports[i]);
-
- pr_debug("registering helper for pf: %d port: %d\n",
- sane[i][j].tuple.src.l3num, ports[i]);
- ret = nf_conntrack_helper_register(&sane[i][j]);
- if (ret) {
- pr_err("failed to register helper for pf: %d port: %d\n",
- sane[i][j].tuple.src.l3num, ports[i]);
- ports_c = i;
- nf_conntrack_sane_fini();
- return ret;
- }
- }
+ nf_ct_helper_init(&sane[2 * i], AF_INET, IPPROTO_TCP, "sane",
+ SANE_PORT, ports[i], ports[i],
+ &sane_exp_policy, 0,
+ sizeof(struct nf_ct_sane_master), help, NULL,
+ THIS_MODULE);
+ nf_ct_helper_init(&sane[2 * i + 1], AF_INET6, IPPROTO_TCP, "sane",
+ SANE_PORT, ports[i], ports[i],
+ &sane_exp_policy, 0,
+ sizeof(struct nf_ct_sane_master), help, NULL,
+ THIS_MODULE);
+ }
+
+ ret = nf_conntrack_helpers_register(sane, ports_c * 2);
+ if (ret < 0) {
+ pr_err("failed to register helpers\n");
+ kfree(sane_buffer);
+ return ret;
}
return 0;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index f72ba5587588..8d9db9d4702b 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1589,7 +1589,7 @@ static int sip_help_udp(struct sk_buff *skb, unsigned int protoff,
return process_sip_msg(skb, ct, protoff, dataoff, &dptr, &datalen);
}
-static struct nf_conntrack_helper sip[MAX_PORTS][4] __read_mostly;
+static struct nf_conntrack_helper sip[MAX_PORTS * 4] __read_mostly;
static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1] = {
[SIP_EXPECT_SIGNALLING] = {
@@ -1616,20 +1616,12 @@ static const struct nf_conntrack_expect_policy sip_exp_policy[SIP_EXPECT_MAX + 1
static void nf_conntrack_sip_fini(void)
{
- int i, j;
-
- for (i = 0; i < ports_c; i++) {
- for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
- if (sip[i][j].me == NULL)
- continue;
- nf_conntrack_helper_unregister(&sip[i][j]);
- }
- }
+ nf_conntrack_helpers_unregister(sip, ports_c * 4);
}
static int __init nf_conntrack_sip_init(void)
{
- int i, j, ret;
+ int i, ret;
if (ports_c == 0)
ports[ports_c++] = SIP_PORT;
@@ -1637,43 +1629,32 @@ static int __init nf_conntrack_sip_init(void)
for (i = 0; i < ports_c; i++) {
memset(&sip[i], 0, sizeof(sip[i]));
- sip[i][0].tuple.src.l3num = AF_INET;
- sip[i][0].tuple.dst.protonum = IPPROTO_UDP;
- sip[i][0].help = sip_help_udp;
- sip[i][1].tuple.src.l3num = AF_INET;
- sip[i][1].tuple.dst.protonum = IPPROTO_TCP;
- sip[i][1].help = sip_help_tcp;
-
- sip[i][2].tuple.src.l3num = AF_INET6;
- sip[i][2].tuple.dst.protonum = IPPROTO_UDP;
- sip[i][2].help = sip_help_udp;
- sip[i][3].tuple.src.l3num = AF_INET6;
- sip[i][3].tuple.dst.protonum = IPPROTO_TCP;
- sip[i][3].help = sip_help_tcp;
-
- for (j = 0; j < ARRAY_SIZE(sip[i]); j++) {
- sip[i][j].data_len = sizeof(struct nf_ct_sip_master);
- sip[i][j].tuple.src.u.udp.port = htons(ports[i]);
- sip[i][j].expect_policy = sip_exp_policy;
- sip[i][j].expect_class_max = SIP_EXPECT_MAX;
- sip[i][j].me = THIS_MODULE;
-
- if (ports[i] == SIP_PORT)
- sprintf(sip[i][j].name, "sip");
- else
- sprintf(sip[i][j].name, "sip-%u", i);
-
- pr_debug("port #%u: %u\n", i, ports[i]);
+ nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
+ SIP_PORT, ports[i], i, sip_exp_policy,
+ SIP_EXPECT_MAX,
+ sizeof(struct nf_ct_sip_master), sip_help_udp,
+ NULL, THIS_MODULE);
+ nf_ct_helper_init(&sip[4 * i + 1], AF_INET, IPPROTO_TCP, "sip",
+ SIP_PORT, ports[i], i, sip_exp_policy,
+ SIP_EXPECT_MAX,
+ sizeof(struct nf_ct_sip_master), sip_help_tcp,
+ NULL, THIS_MODULE);
+ nf_ct_helper_init(&sip[4 * i + 2], AF_INET6, IPPROTO_UDP, "sip",
+ SIP_PORT, ports[i], i, sip_exp_policy,
+ SIP_EXPECT_MAX,
+ sizeof(struct nf_ct_sip_master), sip_help_udp,
+ NULL, THIS_MODULE);
+ nf_ct_helper_init(&sip[4 * i + 3], AF_INET6, IPPROTO_TCP, "sip",
+ SIP_PORT, ports[i], i, sip_exp_policy,
+ SIP_EXPECT_MAX,
+ sizeof(struct nf_ct_sip_master), sip_help_tcp,
+ NULL, THIS_MODULE);
+ }
- ret = nf_conntrack_helper_register(&sip[i][j]);
- if (ret) {
- pr_err("failed to register helper for pf: %u port: %u\n",
- sip[i][j].tuple.src.l3num, ports[i]);
- ports_c = i;
- nf_conntrack_sip_fini();
- return ret;
- }
- }
+ ret = nf_conntrack_helpers_register(sip, ports_c * 4);
+ if (ret < 0) {
+ pr_err("failed to register helpers\n");
+ return ret;
}
return 0;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 2aaa188ee961..958a1455ca7f 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -48,6 +48,8 @@ EXPORT_SYMBOL_GPL(print_tuple);
struct ct_iter_state {
struct seq_net_private p;
+ struct hlist_nulls_head *hash;
+ unsigned int htable_size;
unsigned int bucket;
u_int64_t time_now;
};
@@ -58,9 +60,10 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
struct hlist_nulls_node *n;
for (st->bucket = 0;
- st->bucket < nf_conntrack_htable_size;
+ st->bucket < st->htable_size;
st->bucket++) {
- n = rcu_dereference(hlist_nulls_first_rcu(&nf_conntrack_hash[st->bucket]));
+ n = rcu_dereference(
+ hlist_nulls_first_rcu(&st->hash[st->bucket]));
if (!is_a_nulls(n))
return n;
}
@@ -75,12 +78,11 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
head = rcu_dereference(hlist_nulls_next_rcu(head));
while (is_a_nulls(head)) {
if (likely(get_nulls_value(head) == st->bucket)) {
- if (++st->bucket >= nf_conntrack_htable_size)
+ if (++st->bucket >= st->htable_size)
return NULL;
}
head = rcu_dereference(
- hlist_nulls_first_rcu(
- &nf_conntrack_hash[st->bucket]));
+ hlist_nulls_first_rcu(&st->hash[st->bucket]));
}
return head;
}
@@ -102,6 +104,8 @@ static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
st->time_now = ktime_get_real_ns();
rcu_read_lock();
+
+ nf_conntrack_get_ht(&st->hash, &st->htable_size);
return ct_get_idx(seq, *pos);
}
diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c
index 2e65b5430fba..b1227dc6f75e 100644
--- a/net/netfilter/nf_conntrack_tftp.c
+++ b/net/netfilter/nf_conntrack_tftp.c
@@ -97,7 +97,7 @@ static int tftp_help(struct sk_buff *skb,
return ret;
}
-static struct nf_conntrack_helper tftp[MAX_PORTS][2] __read_mostly;
+static struct nf_conntrack_helper tftp[MAX_PORTS * 2] __read_mostly;
static const struct nf_conntrack_expect_policy tftp_exp_policy = {
.max_expected = 1,
@@ -106,47 +106,29 @@ static const struct nf_conntrack_expect_policy tftp_exp_policy = {
static void nf_conntrack_tftp_fini(void)
{
- int i, j;
-
- for (i = 0; i < ports_c; i++) {
- for (j = 0; j < 2; j++)
- nf_conntrack_helper_unregister(&tftp[i][j]);
- }
+ nf_conntrack_helpers_unregister(tftp, ports_c * 2);
}
static int __init nf_conntrack_tftp_init(void)
{
- int i, j, ret;
+ int i, ret;
if (ports_c == 0)
ports[ports_c++] = TFTP_PORT;
for (i = 0; i < ports_c; i++) {
- memset(&tftp[i], 0, sizeof(tftp[i]));
-
- tftp[i][0].tuple.src.l3num = AF_INET;
- tftp[i][1].tuple.src.l3num = AF_INET6;
- for (j = 0; j < 2; j++) {
- tftp[i][j].tuple.dst.protonum = IPPROTO_UDP;
- tftp[i][j].tuple.src.u.udp.port = htons(ports[i]);
- tftp[i][j].expect_policy = &tftp_exp_policy;
- tftp[i][j].me = THIS_MODULE;
- tftp[i][j].help = tftp_help;
-
- if (ports[i] == TFTP_PORT)
- sprintf(tftp[i][j].name, "tftp");
- else
- sprintf(tftp[i][j].name, "tftp-%u", i);
-
- ret = nf_conntrack_helper_register(&tftp[i][j]);
- if (ret) {
- pr_err("failed to register helper for pf: %u port: %u\n",
- tftp[i][j].tuple.src.l3num, ports[i]);
- ports_c = i;
- nf_conntrack_tftp_fini();
- return ret;
- }
- }
+ nf_ct_helper_init(&tftp[2 * i], AF_INET, IPPROTO_UDP, "tftp",
+ TFTP_PORT, ports[i], i, &tftp_exp_policy,
+ 0, 0, tftp_help, NULL, THIS_MODULE);
+ nf_ct_helper_init(&tftp[2 * i + 1], AF_INET6, IPPROTO_UDP, "tftp",
+ TFTP_PORT, ports[i], i, &tftp_exp_policy,
+ 0, 0, tftp_help, NULL, THIS_MODULE);
+ }
+
+ ret = nf_conntrack_helpers_register(tftp, ports_c * 2);
+ if (ret < 0) {
+ pr_err("failed to register helpers\n");
+ return ret;
}
return 0;
}
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 6877a396f8fc..de31818417b8 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -30,17 +30,19 @@
#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_nat.h>
-static DEFINE_SPINLOCK(nf_nat_lock);
-
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
__read_mostly;
-static struct hlist_head *nf_nat_bysource __read_mostly;
-static unsigned int nf_nat_htable_size __read_mostly;
-static unsigned int nf_nat_hash_rnd __read_mostly;
+struct nf_nat_conn_key {
+ const struct net *net;
+ const struct nf_conntrack_tuple *tuple;
+ const struct nf_conntrack_zone *zone;
+};
+
+static struct rhashtable nf_nat_bysource_table;
inline const struct nf_nat_l3proto *
__nf_nat_l3proto_find(u8 family)
@@ -119,19 +121,17 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family)
EXPORT_SYMBOL(nf_xfrm_me_harder);
#endif /* CONFIG_XFRM */
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
+static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed)
{
- unsigned int hash;
-
- get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
+ const struct nf_conntrack_tuple *t;
+ const struct nf_conn *ct = data;
+ t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* Original src, to ensure we map it consistently if poss. */
- hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
- tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
- return reciprocal_scale(hash, nf_nat_htable_size);
+ seed ^= net_hash_mix(nf_ct_net(ct));
+ return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32),
+ t->dst.protonum ^ seed);
}
/* Is this tuple already taken? (not by us) */
@@ -187,6 +187,26 @@ same_src(const struct nf_conn *ct,
t->src.u.all == tuple->src.u.all);
}
+static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg,
+ const void *obj)
+{
+ const struct nf_nat_conn_key *key = arg->key;
+ const struct nf_conn *ct = obj;
+
+ return same_src(ct, key->tuple) &&
+ net_eq(nf_ct_net(ct), key->net) &&
+ nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL);
+}
+
+static struct rhashtable_params nf_nat_bysource_params = {
+ .head_offset = offsetof(struct nf_conn, nat_bysource),
+ .obj_hashfn = nf_nat_bysource_hash,
+ .obj_cmpfn = nf_nat_bysource_cmp,
+ .nelem_hint = 256,
+ .min_size = 1024,
+ .nulls_base = (1U << RHT_BASE_SHIFT),
+};
+
/* Only called for SRC manip */
static int
find_appropriate_src(struct net *net,
@@ -197,25 +217,23 @@ find_appropriate_src(struct net *net,
struct nf_conntrack_tuple *result,
const struct nf_nat_range *range)
{
- unsigned int h = hash_by_src(net, tuple);
- const struct nf_conn_nat *nat;
const struct nf_conn *ct;
+ struct nf_nat_conn_key key = {
+ .net = net,
+ .tuple = tuple,
+ .zone = zone
+ };
- hlist_for_each_entry_rcu(nat, &nf_nat_bysource[h], bysource) {
- ct = nat->ct;
- if (same_src(ct, tuple) &&
- net_eq(net, nf_ct_net(ct)) &&
- nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) {
- /* Copy source part from reply tuple. */
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(l3proto, l4proto, result, range))
- return 1;
- }
- }
- return 0;
+ ct = rhashtable_lookup_fast(&nf_nat_bysource_table, &key,
+ nf_nat_bysource_params);
+ if (!ct)
+ return 0;
+
+ nf_ct_invert_tuplepr(result,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+ result->dst = tuple->dst;
+
+ return in_range(l3proto, l4proto, result, range);
}
/* For [FUTURE] fragmentation handling, we want the least-used
@@ -387,7 +405,6 @@ nf_nat_setup_info(struct nf_conn *ct,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype)
{
- struct net *net = nf_ct_net(ct);
struct nf_conntrack_tuple curr_tuple, new_tuple;
struct nf_conn_nat *nat;
@@ -428,17 +445,13 @@ nf_nat_setup_info(struct nf_conn *ct,
}
if (maniptype == NF_NAT_MANIP_SRC) {
- unsigned int srchash;
-
- srchash = hash_by_src(net,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- spin_lock_bh(&nf_nat_lock);
- /* nf_conntrack_alter_reply might re-allocate extension aera */
- nat = nfct_nat(ct);
- nat->ct = ct;
- hlist_add_head_rcu(&nat->bysource,
- &nf_nat_bysource[srchash]);
- spin_unlock_bh(&nf_nat_lock);
+ int err;
+
+ err = rhashtable_insert_fast(&nf_nat_bysource_table,
+ &ct->nat_bysource,
+ nf_nat_bysource_params);
+ if (err)
+ return NF_DROP;
}
/* It's done. */
@@ -543,7 +556,7 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (nf_nat_proto_remove(ct, data))
return 1;
- if (!nat || !nat->ct)
+ if (!nat)
return 0;
/* This netns is being destroyed, and conntrack has nat null binding.
@@ -555,11 +568,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
if (!del_timer(&ct->timeout))
return 1;
- spin_lock_bh(&nf_nat_lock);
- hlist_del_rcu(&nat->bysource);
ct->status &= ~IPS_NAT_DONE_MASK;
- nat->ct = NULL;
- spin_unlock_bh(&nf_nat_lock);
+
+ rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
+ nf_nat_bysource_params);
add_timer(&ct->timeout);
@@ -688,35 +700,17 @@ static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
{
struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
- if (nat == NULL || nat->ct == NULL)
- return;
-
- NF_CT_ASSERT(nat->ct->status & IPS_SRC_NAT_DONE);
-
- spin_lock_bh(&nf_nat_lock);
- hlist_del_rcu(&nat->bysource);
- spin_unlock_bh(&nf_nat_lock);
-}
-
-static void nf_nat_move_storage(void *new, void *old)
-{
- struct nf_conn_nat *new_nat = new;
- struct nf_conn_nat *old_nat = old;
- struct nf_conn *ct = old_nat->ct;
-
- if (!ct || !(ct->status & IPS_SRC_NAT_DONE))
+ if (!nat)
return;
- spin_lock_bh(&nf_nat_lock);
- hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
- spin_unlock_bh(&nf_nat_lock);
+ rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource,
+ nf_nat_bysource_params);
}
static struct nf_ct_ext_type nat_extend __read_mostly = {
.len = sizeof(struct nf_conn_nat),
.align = __alignof__(struct nf_conn_nat),
.destroy = nf_nat_cleanup_conntrack,
- .move = nf_nat_move_storage,
.id = NF_CT_EXT_NAT,
.flags = NF_CT_EXT_F_PREALLOC,
};
@@ -845,16 +839,13 @@ static int __init nf_nat_init(void)
{
int ret;
- /* Leave them the same for the moment. */
- nf_nat_htable_size = nf_conntrack_htable_size;
-
- nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0);
- if (!nf_nat_bysource)
- return -ENOMEM;
+ ret = rhashtable_init(&nf_nat_bysource_table, &nf_nat_bysource_params);
+ if (ret)
+ return ret;
ret = nf_ct_extend_register(&nat_extend);
if (ret < 0) {
- nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+ rhashtable_destroy(&nf_nat_bysource_table);
printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
return ret;
}
@@ -878,7 +869,7 @@ static int __init nf_nat_init(void)
return 0;
cleanup_extend:
- nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+ rhashtable_destroy(&nf_nat_bysource_table);
nf_ct_extend_unregister(&nat_extend);
return ret;
}
@@ -896,8 +887,8 @@ static void __exit nf_nat_cleanup(void)
#endif
for (i = 0; i < NFPROTO_NUMPROTO; i++)
kfree(nf_nat_l4protos[i]);
- synchronize_net();
- nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size);
+
+ rhashtable_destroy(&nf_nat_bysource_table);
}
MODULE_LICENSE("GPL");
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f24bed04ccfc..7e1c876c7608 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1405,7 +1405,6 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk,
rcu_assign_pointer(basechain->stats, stats);
}
- write_pnet(&basechain->pnet, net);
basechain->type = type;
chain = &basechain->chain;
@@ -1860,10 +1859,16 @@ err:
return err;
}
+struct nft_rule_dump_ctx {
+ char table[NFT_TABLE_MAXNAMELEN];
+ char chain[NFT_CHAIN_MAXNAMELEN];
+};
+
static int nf_tables_dump_rules(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
+ const struct nft_rule_dump_ctx *ctx = cb->data;
const struct nft_af_info *afi;
const struct nft_table *table;
const struct nft_chain *chain;
@@ -1880,7 +1885,15 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
continue;
list_for_each_entry_rcu(table, &afi->tables, list) {
+ if (ctx && ctx->table[0] &&
+ strcmp(ctx->table, table->name) != 0)
+ continue;
+
list_for_each_entry_rcu(chain, &table->chains, list) {
+ if (ctx && ctx->chain[0] &&
+ strcmp(ctx->chain, chain->name) != 0)
+ continue;
+
list_for_each_entry_rcu(rule, &chain->rules, list) {
if (!nft_is_active(net, rule))
goto cont;
@@ -1910,6 +1923,12 @@ done:
return skb->len;
}
+static int nf_tables_dump_rules_done(struct netlink_callback *cb)
+{
+ kfree(cb->data);
+ return 0;
+}
+
static int nf_tables_getrule(struct net *net, struct sock *nlsk,
struct sk_buff *skb, const struct nlmsghdr *nlh,
const struct nlattr * const nla[])
@@ -1927,7 +1946,25 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk,
if (nlh->nlmsg_flags & NLM_F_DUMP) {
struct netlink_dump_control c = {
.dump = nf_tables_dump_rules,
+ .done = nf_tables_dump_rules_done,
};
+
+ if (nla[NFTA_RULE_TABLE] || nla[NFTA_RULE_CHAIN]) {
+ struct nft_rule_dump_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (nla[NFTA_RULE_TABLE])
+ nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE],
+ sizeof(ctx->table));
+ if (nla[NFTA_RULE_CHAIN])
+ nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN],
+ sizeof(ctx->chain));
+ c.data = ctx;
+ }
+
return netlink_dump_start(nlsk, skb, nlh, &c);
}
@@ -2843,7 +2880,6 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
}
INIT_LIST_HEAD(&set->bindings);
- write_pnet(&set->pnet, net);
set->ops = ops;
set->ktype = ktype;
set->klen = desc.klen;
@@ -3522,7 +3558,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err4;
ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
- err = set->ops->insert(set, &elem);
+ err = set->ops->insert(ctx->net, set, &elem);
if (err < 0)
goto err5;
@@ -3646,7 +3682,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
goto err3;
}
- priv = set->ops->deactivate(set, &elem);
+ priv = set->ops->deactivate(ctx->net, set, &elem);
if (priv == NULL) {
err = -ENOENT;
goto err4;
@@ -4020,7 +4056,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
- te->set->ops->activate(te->set, &te->elem);
+ te->set->ops->activate(net, te->set, &te->elem);
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_NEWSETELEM, 0);
@@ -4145,7 +4181,7 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb)
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
- te->set->ops->activate(te->set, &te->elem);
+ te->set->ops->activate(net, te->set, &te->elem);
te->set->ndeact--;
nft_trans_destroy(trans);
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index 3c84f14326f5..4cdcd969b64c 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -303,16 +303,24 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
{
struct nf_conntrack_tuple_hash *h;
const struct hlist_nulls_node *nn;
+ unsigned int last_hsize;
+ spinlock_t *lock;
int i;
local_bh_disable();
- for (i = 0; i < nf_conntrack_htable_size; i++) {
- nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
- if (i < nf_conntrack_htable_size) {
- hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
- untimeout(h, timeout);
+restart:
+ last_hsize = nf_conntrack_htable_size;
+ for (i = 0; i < last_hsize; i++) {
+ lock = &nf_conntrack_locks[i % CONNTRACK_LOCKS];
+ nf_conntrack_lock(lock);
+ if (last_hsize != nf_conntrack_htable_size) {
+ spin_unlock(lock);
+ goto restart;
}
- spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
+
+ hlist_nulls_for_each_entry(h, nn, &nf_conntrack_hash[i], hnnode)
+ untimeout(h, timeout);
+ spin_unlock(lock);
}
local_bh_enable();
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 6228c422c766..c21e7eb8dce0 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -23,6 +23,20 @@
#include <linux/netfilter_arp/arp_tables.h>
#include <net/netfilter/nf_tables.h>
+struct nft_xt {
+ struct list_head head;
+ struct nft_expr_ops ops;
+ unsigned int refcnt;
+};
+
+static void nft_xt_put(struct nft_xt *xt)
+{
+ if (--xt->refcnt == 0) {
+ list_del(&xt->head);
+ kfree(xt);
+ }
+}
+
static int nft_compat_chain_validate_dependency(const char *tablename,
const struct nft_chain *chain)
{
@@ -260,6 +274,7 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
if (par.target->destroy != NULL)
par.target->destroy(&par);
+ nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
module_put(target->me);
}
@@ -442,6 +457,7 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr)
if (par.match->destroy != NULL)
par.match->destroy(&par);
+ nft_xt_put(container_of(expr->ops, struct nft_xt, ops));
module_put(match->me);
}
@@ -612,11 +628,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = {
static LIST_HEAD(nft_match_list);
-struct nft_xt {
- struct list_head head;
- struct nft_expr_ops ops;
-};
-
static struct nft_expr_type nft_match_type;
static bool nft_match_cmp(const struct xt_match *match,
@@ -634,6 +645,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
struct xt_match *match;
char *mt_name;
u32 rev, family;
+ int err;
if (tb[NFTA_MATCH_NAME] == NULL ||
tb[NFTA_MATCH_REV] == NULL ||
@@ -652,6 +664,7 @@ nft_match_select_ops(const struct nft_ctx *ctx,
if (!try_module_get(match->me))
return ERR_PTR(-ENOENT);
+ nft_match->refcnt++;
return &nft_match->ops;
}
}
@@ -660,14 +673,19 @@ nft_match_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(match))
return ERR_PTR(-ENOENT);
- if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO]))
- return ERR_PTR(-EINVAL);
+ if (match->matchsize > nla_len(tb[NFTA_MATCH_INFO])) {
+ err = -EINVAL;
+ goto err;
+ }
/* This is the first time we use this match, allocate operations */
nft_match = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
- if (nft_match == NULL)
- return ERR_PTR(-ENOMEM);
+ if (nft_match == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+ nft_match->refcnt = 1;
nft_match->ops.type = &nft_match_type;
nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize));
nft_match->ops.eval = nft_match_eval;
@@ -680,14 +698,9 @@ nft_match_select_ops(const struct nft_ctx *ctx,
list_add(&nft_match->head, &nft_match_list);
return &nft_match->ops;
-}
-
-static void nft_match_release(void)
-{
- struct nft_xt *nft_match, *tmp;
-
- list_for_each_entry_safe(nft_match, tmp, &nft_match_list, head)
- kfree(nft_match);
+err:
+ module_put(match->me);
+ return ERR_PTR(err);
}
static struct nft_expr_type nft_match_type __read_mostly = {
@@ -717,6 +730,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
struct xt_target *target;
char *tg_name;
u32 rev, family;
+ int err;
if (tb[NFTA_TARGET_NAME] == NULL ||
tb[NFTA_TARGET_REV] == NULL ||
@@ -735,6 +749,7 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (!try_module_get(target->me))
return ERR_PTR(-ENOENT);
+ nft_target->refcnt++;
return &nft_target->ops;
}
}
@@ -743,14 +758,19 @@ nft_target_select_ops(const struct nft_ctx *ctx,
if (IS_ERR(target))
return ERR_PTR(-ENOENT);
- if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO]))
- return ERR_PTR(-EINVAL);
+ if (target->targetsize > nla_len(tb[NFTA_TARGET_INFO])) {
+ err = -EINVAL;
+ goto err;
+ }
/* This is the first time we use this target, allocate operations */
nft_target = kzalloc(sizeof(struct nft_xt), GFP_KERNEL);
- if (nft_target == NULL)
- return ERR_PTR(-ENOMEM);
+ if (nft_target == NULL) {
+ err = -ENOMEM;
+ goto err;
+ }
+ nft_target->refcnt = 1;
nft_target->ops.type = &nft_target_type;
nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize));
nft_target->ops.init = nft_target_init;
@@ -767,14 +787,9 @@ nft_target_select_ops(const struct nft_ctx *ctx,
list_add(&nft_target->head, &nft_target_list);
return &nft_target->ops;
-}
-
-static void nft_target_release(void)
-{
- struct nft_xt *nft_target, *tmp;
-
- list_for_each_entry_safe(nft_target, tmp, &nft_target_list, head)
- kfree(nft_target);
+err:
+ module_put(target->me);
+ return ERR_PTR(err);
}
static struct nft_expr_type nft_target_type __read_mostly = {
@@ -819,8 +834,6 @@ static void __exit nft_compat_module_exit(void)
nfnetlink_subsys_unregister(&nfnl_compat_subsys);
nft_unregister_expr(&nft_target_type);
nft_unregister_expr(&nft_match_type);
- nft_match_release();
- nft_target_release();
}
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 81fbb450783e..51e180f2a003 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -109,18 +109,11 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
#ifdef CONFIG_NF_CONNTRACK_LABELS
case NFT_CT_LABELS: {
struct nf_conn_labels *labels = nf_ct_labels_find(ct);
- unsigned int size;
- if (!labels) {
+ if (labels)
+ memcpy(dest, labels->bits, NF_CT_LABELS_MAX_SIZE);
+ else
memset(dest, 0, NF_CT_LABELS_MAX_SIZE);
- return;
- }
-
- size = labels->words * sizeof(long);
- memcpy(dest, labels->bits, size);
- if (size < NF_CT_LABELS_MAX_SIZE)
- memset(((char *) dest) + size, 0,
- NF_CT_LABELS_MAX_SIZE - size);
return;
}
#endif
@@ -351,6 +344,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
if (err < 0)
return err;
+ if (priv->key == NFT_CT_BYTES || priv->key == NFT_CT_PKTS)
+ nf_ct_set_acct(ctx->net, true);
+
return 0;
}
@@ -359,6 +355,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_ct *priv = nft_expr_priv(expr);
+ bool label_got = false;
unsigned int len;
int err;
@@ -377,6 +374,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
err = nf_connlabels_get(ctx->net, (len * BITS_PER_BYTE) - 1);
if (err)
return err;
+ label_got = true;
break;
#endif
default:
@@ -386,17 +384,28 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
priv->sreg = nft_parse_register(tb[NFTA_CT_SREG]);
err = nft_validate_register_load(priv->sreg, len);
if (err < 0)
- return err;
+ goto err1;
err = nft_ct_l3proto_try_module_get(ctx->afi->family);
if (err < 0)
- return err;
+ goto err1;
return 0;
+
+err1:
+ if (label_got)
+ nf_connlabels_put(ctx->net);
+ return err;
+}
+
+static void nft_ct_get_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ nft_ct_l3proto_module_put(ctx->afi->family);
}
-static void nft_ct_destroy(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+static void nft_ct_set_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
struct nft_ct *priv = nft_expr_priv(expr);
@@ -468,7 +477,7 @@ static const struct nft_expr_ops nft_ct_get_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_get_eval,
.init = nft_ct_get_init,
- .destroy = nft_ct_destroy,
+ .destroy = nft_ct_get_destroy,
.dump = nft_ct_get_dump,
};
@@ -477,7 +486,7 @@ static const struct nft_expr_ops nft_ct_set_ops = {
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
.eval = nft_ct_set_eval,
.init = nft_ct_set_init,
- .destroy = nft_ct_destroy,
+ .destroy = nft_ct_set_destroy,
.dump = nft_ct_set_dump,
};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index ea924816b7b8..564fa7929ed5 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -71,13 +71,13 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
return 0;
}
-static bool nft_hash_lookup(const struct nft_set *set, const u32 *key,
- const struct nft_set_ext **ext)
+static bool nft_hash_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
const struct nft_hash_elem *he;
struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_cur(read_pnet(&set->pnet)),
+ .genmask = nft_genmask_cur(net),
.set = set,
.key = key,
};
@@ -125,13 +125,13 @@ err1:
return false;
}
-static int nft_hash_insert(const struct nft_set *set,
+static int nft_hash_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he = elem->priv;
struct nft_hash_cmp_arg arg = {
- .genmask = nft_genmask_next(read_pnet(&set->pnet)),
+ .genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
};
@@ -140,20 +140,20 @@ static int nft_hash_insert(const struct nft_set *set,
nft_hash_params);
}
-static void nft_hash_activate(const struct nft_set *set,
+static void nft_hash_activate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash_elem *he = elem->priv;
- nft_set_elem_change_active(set, &he->ext);
+ nft_set_elem_change_active(net, set, &he->ext);
nft_set_elem_clear_busy(&he->ext);
}
-static void *nft_hash_deactivate(const struct nft_set *set,
+static void *nft_hash_deactivate(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_hash *priv = nft_set_priv(set);
- struct net *net = read_pnet(&set->pnet);
struct nft_hash_elem *he;
struct nft_hash_cmp_arg arg = {
.genmask = nft_genmask_next(net),
@@ -166,7 +166,7 @@ static void *nft_hash_deactivate(const struct nft_set *set,
if (he != NULL) {
if (!nft_set_elem_mark_busy(&he->ext) ||
!nft_is_active(net, &he->ext))
- nft_set_elem_change_active(set, &he->ext);
+ nft_set_elem_change_active(net, set, &he->ext);
else
he = NULL;
}
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 713d66837705..24a73bb26e94 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -52,6 +52,14 @@ static int nft_log_init(const struct nft_ctx *ctx,
struct nft_log *priv = nft_expr_priv(expr);
struct nf_loginfo *li = &priv->loginfo;
const struct nlattr *nla;
+ int err;
+
+ li->type = NF_LOG_TYPE_LOG;
+ if (tb[NFTA_LOG_LEVEL] != NULL &&
+ tb[NFTA_LOG_GROUP] != NULL)
+ return -EINVAL;
+ if (tb[NFTA_LOG_GROUP] != NULL)
+ li->type = NF_LOG_TYPE_ULOG;
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
@@ -63,13 +71,6 @@ static int nft_log_init(const struct nft_ctx *ctx,
priv->prefix = (char *)nft_log_null_prefix;
}
- li->type = NF_LOG_TYPE_LOG;
- if (tb[NFTA_LOG_LEVEL] != NULL &&
- tb[NFTA_LOG_GROUP] != NULL)
- return -EINVAL;
- if (tb[NFTA_LOG_GROUP] != NULL)
- li->type = NF_LOG_TYPE_ULOG;
-
switch (li->type) {
case NF_LOG_TYPE_LOG:
if (tb[NFTA_LOG_LEVEL] != NULL) {
@@ -78,6 +79,11 @@ static int nft_log_init(const struct nft_ctx *ctx,
} else {
li->u.log.level = LOGLEVEL_WARNING;
}
+ if (li->u.log.level > LOGLEVEL_DEBUG) {
+ err = -EINVAL;
+ goto err1;
+ }
+
if (tb[NFTA_LOG_FLAGS] != NULL) {
li->u.log.logflags =
ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
@@ -86,6 +92,7 @@ static int nft_log_init(const struct nft_ctx *ctx,
case NF_LOG_TYPE_ULOG:
li->u.ulog.group = ntohs(nla_get_be16(tb[NFTA_LOG_GROUP]));
if (tb[NFTA_LOG_SNAPLEN] != NULL) {
+ li->u.ulog.flags |= NF_LOG_F_COPY_LEN;
li->u.ulog.copy_len =
ntohl(nla_get_be32(tb[NFTA_LOG_SNAPLEN]));
}
@@ -96,7 +103,16 @@ static int nft_log_init(const struct nft_ctx *ctx,
break;
}
- return nf_logger_find_get(ctx->afi->family, li->type);
+ err = nf_logger_find_get(ctx->afi->family, li->type);
+ if (err < 0)
+ goto err1;
+
+ return 0;
+
+err1:
+ if (priv->prefix != nft_log_null_prefix)
+ kfree(priv->prefix);
+ return err;
}
static void nft_log_destroy(const struct nft_ctx *ctx,
@@ -134,7 +150,7 @@ static int nft_log_dump(struct sk_buff *skb, const struct nft_expr *expr)
if (nla_put_be16(skb, NFTA_LOG_GROUP, htons(li->u.ulog.group)))
goto nla_put_failure;
- if (li->u.ulog.copy_len) {
+ if (li->u.ulog.flags & NF_LOG_F_COPY_LEN) {
if (nla_put_be32(skb, NFTA_LOG_SNAPLEN,
htonl(li->u.ulog.copy_len)))
goto nla_put_failure;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index b8d18f598569..e164325d1bc0 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -35,7 +35,7 @@ static void nft_lookup_eval(const struct nft_expr *expr,
const struct nft_set_ext *ext;
bool found;
- found = set->ops->lookup(set, &regs->data[priv->sreg], &ext) ^
+ found = set->ops->lookup(pkt->net, set, &regs->data[priv->sreg], &ext) ^
priv->invert;
if (!found) {
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index c0f638745adc..6473936d05c6 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -41,13 +41,13 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
}
-static bool nft_rbtree_lookup(const struct nft_set *set, const u32 *key,
- const struct nft_set_ext **ext)
+static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
+ const u32 *key, const struct nft_set_ext **ext)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct nft_rbtree_elem *rbe, *interval = NULL;
+ u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
- u8 genmask = nft_genmask_cur(read_pnet(&set->pnet));
const void *this;
int d;
@@ -93,13 +93,13 @@ out:
return false;
}
-static int __nft_rbtree_insert(const struct nft_set *set,
+static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new)
{
struct nft_rbtree *priv = nft_set_priv(set);
+ u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *rbe;
struct rb_node *parent, **p;
- u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
int d;
parent = NULL;
@@ -132,14 +132,14 @@ static int __nft_rbtree_insert(const struct nft_set *set,
return 0;
}
-static int nft_rbtree_insert(const struct nft_set *set,
+static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree_elem *rbe = elem->priv;
int err;
spin_lock_bh(&nft_rbtree_lock);
- err = __nft_rbtree_insert(set, rbe);
+ err = __nft_rbtree_insert(net, set, rbe);
spin_unlock_bh(&nft_rbtree_lock);
return err;
@@ -156,21 +156,23 @@ static void nft_rbtree_remove(const struct nft_set *set,
spin_unlock_bh(&nft_rbtree_lock);
}
-static void nft_rbtree_activate(const struct nft_set *set,
+static void nft_rbtree_activate(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree_elem *rbe = elem->priv;
- nft_set_elem_change_active(set, &rbe->ext);
+ nft_set_elem_change_active(net, set, &rbe->ext);
}
-static void *nft_rbtree_deactivate(const struct nft_set *set,
+static void *nft_rbtree_deactivate(const struct net *net,
+ const struct nft_set *set,
const struct nft_set_elem *elem)
{
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
struct nft_rbtree_elem *rbe, *this = elem->priv;
- u8 genmask = nft_genmask_next(read_pnet(&set->pnet));
+ u8 genmask = nft_genmask_next(net);
int d;
while (parent != NULL) {
@@ -196,7 +198,7 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
parent = parent->rb_right;
continue;
}
- nft_set_elem_change_active(set, &rbe->ext);
+ nft_set_elem_change_active(net, set, &rbe->ext);
return rbe;
}
}
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index fe0e2db632c7..e0aa7c1d0224 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -702,6 +702,56 @@ int xt_check_entry_offsets(const void *base,
}
EXPORT_SYMBOL(xt_check_entry_offsets);
+/**
+ * xt_alloc_entry_offsets - allocate array to store rule head offsets
+ *
+ * @size: number of entries
+ *
+ * Return: NULL or kmalloc'd or vmalloc'd array
+ */
+unsigned int *xt_alloc_entry_offsets(unsigned int size)
+{
+ unsigned int *off;
+
+ off = kcalloc(size, sizeof(unsigned int), GFP_KERNEL | __GFP_NOWARN);
+
+ if (off)
+ return off;
+
+ if (size < (SIZE_MAX / sizeof(unsigned int)))
+ off = vmalloc(size * sizeof(unsigned int));
+
+ return off;
+}
+EXPORT_SYMBOL(xt_alloc_entry_offsets);
+
+/**
+ * xt_find_jump_offset - check if target is a valid jump offset
+ *
+ * @offsets: array containing all valid rule start offsets of a rule blob
+ * @target: the jump target to search for
+ * @size: entries in @offset
+ */
+bool xt_find_jump_offset(const unsigned int *offsets,
+ unsigned int target, unsigned int size)
+{
+ int m, low = 0, hi = size;
+
+ while (hi > low) {
+ m = (low + hi) / 2u;
+
+ if (offsets[m] > target)
+ hi = m;
+ else if (offsets[m] < target)
+ low = m + 1;
+ else
+ return true;
+ }
+
+ return false;
+}
+EXPORT_SYMBOL(xt_find_jump_offset);
+
int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c
index a79af255561a..03d66f1c5e69 100644
--- a/net/netfilter/xt_connlabel.c
+++ b/net/netfilter/xt_connlabel.c
@@ -9,6 +9,7 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_labels.h>
#include <linux/netfilter/x_tables.h>
@@ -18,21 +19,12 @@ MODULE_DESCRIPTION("Xtables: add/match connection trackling labels");
MODULE_ALIAS("ipt_connlabel");
MODULE_ALIAS("ip6t_connlabel");
-static bool connlabel_match(const struct nf_conn *ct, u16 bit)
-{
- struct nf_conn_labels *labels = nf_ct_labels_find(ct);
-
- if (!labels)
- return false;
-
- return BIT_WORD(bit) < labels->words && test_bit(bit, labels->bits);
-}
-
static bool
connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_connlabel_mtinfo *info = par->matchinfo;
enum ip_conntrack_info ctinfo;
+ struct nf_conn_labels *labels;
struct nf_conn *ct;
bool invert = info->options & XT_CONNLABEL_OP_INVERT;
@@ -40,10 +32,21 @@ connlabel_mt(const struct sk_buff *skb, struct xt_action_param *par)
if (ct == NULL || nf_ct_is_untracked(ct))
return invert;
- if (info->options & XT_CONNLABEL_OP_SET)
- return (nf_connlabel_set(ct, info->bit) == 0) ^ invert;
+ labels = nf_ct_labels_find(ct);
+ if (!labels)
+ return invert;
+
+ if (test_bit(info->bit, labels->bits))
+ return !invert;
+
+ if (info->options & XT_CONNLABEL_OP_SET) {
+ if (!test_and_set_bit(info->bit, labels->bits))
+ nf_conntrack_event_cache(IPCT_LABEL, ct);
+
+ return !invert;
+ }
- return connlabel_match(ct, info->bit) ^ invert;
+ return invert;
}
static int connlabel_mt_check(const struct xt_mtchk_param *par)
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 1caaccbc306c..e5f18988aee0 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -102,14 +102,14 @@ static int physdev_mt_check(const struct xt_mtchk_param *par)
if (!(info->bitmask & XT_PHYSDEV_OP_MASK) ||
info->bitmask & ~XT_PHYSDEV_OP_MASK)
return -EINVAL;
- if (info->bitmask & XT_PHYSDEV_OP_OUT &&
+ if (info->bitmask & (XT_PHYSDEV_OP_OUT | XT_PHYSDEV_OP_ISOUT) &&
(!(info->bitmask & XT_PHYSDEV_OP_BRIDGED) ||
info->invert & XT_PHYSDEV_OP_BRIDGED) &&
par->hook_mask & ((1 << NF_INET_LOCAL_OUT) |
(1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) {
- pr_info("using --physdev-out in the OUTPUT, FORWARD and "
- "POSTROUTING chains for non-bridged traffic is not "
- "supported anymore.\n");
+ pr_info("using --physdev-out and --physdev-is-out are only"
+ "supported in the FORWARD and POSTROUTING chains with"
+ "bridged traffic.\n");
if (par->hook_mask & (1 << NF_INET_LOCAL_OUT))
return -EINVAL;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index b4069a90e375..c644c78ed485 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -135,7 +135,7 @@ static void ovs_ct_get_labels(const struct nf_conn *ct,
struct nf_conn_labels *cl = ct ? nf_ct_labels_find(ct) : NULL;
if (cl) {
- size_t len = cl->words * sizeof(long);
+ size_t len = sizeof(cl->bits);
if (len > OVS_CT_LABELS_LEN)
len = OVS_CT_LABELS_LEN;
@@ -274,7 +274,7 @@ static int ovs_ct_set_labels(struct sk_buff *skb, struct sw_flow_key *key,
nf_ct_labels_ext_add(ct);
cl = nf_ct_labels_find(ct);
}
- if (!cl || cl->words * sizeof(long) < OVS_CT_LABELS_LEN)
+ if (!cl || sizeof(cl->bits) < OVS_CT_LABELS_LEN)
return -ENOSPC;
err = nf_connlabels_replace(ct, (u32 *)labels, (u32 *)mask,
OpenPOWER on IntegriCloud