summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-12-20 18:20:26 -0800
committerDavid S. Miller <davem@davemloft.net>2018-12-20 18:20:26 -0800
commitc3e533692527046fb55020e7fac8c4272644ba45 (patch)
treeee442571421f406bf12dd16629f36e5ed91b3bd6
parent339bbff2d6e005a5586adeffc3d69a0eea50a764 (diff)
parent8527f9df04a8b5f6ee24ae7bdda5a94d73c7d243 (diff)
downloadtalos-obmc-linux-c3e533692527046fb55020e7fac8c4272644ba45.tar.gz
talos-obmc-linux-c3e533692527046fb55020e7fac8c4272644ba45.zip
Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter updates for net-next The following patchset contains Netfilter updates for net-next: 1) Support for destination MAC in ipset, from Stefano Brivio. 2) Disallow all-zeroes MAC address in ipset, also from Stefano. 3) Add IPSET_CMD_GET_BYNAME and IPSET_CMD_GET_BYINDEX commands, introduce protocol version number 7, from Jozsef Kadlecsik. A follow up patch to fix ip_set_byindex() is also included in this batch. 4) Honor CTA_MARK_MASK from ctnetlink, from Andreas Jaggi. 5) Statify nf_flow_table_iterate(), from Taehee Yoo. 6) Use nf_flow_table_iterate() to simplify garbage collection in nf_flow_table logic, also from Taehee Yoo. 7) Don't use _bh variants of call_rcu(), rcu_barrier() and synchronize_rcu_bh() in Netfilter, from Paul E. McKenney. 8) Remove NFC_* cache definition from the old caching infrastructure. 9) Remove layer 4 port rover in NAT helpers, use random port instead, from Florian Westphal. 10) Use strscpy() in ipset, from Qian Cai. 11) Remove NF_NAT_RANGE_PROTO_RANDOM_FULLY branch now that random port is allocated by default, from Xiaozhou Liu. 12) Ignore NF_NAT_RANGE_PROTO_RANDOM too, from Florian Westphal. 13) Limit port allocation selection routine in NAT to avoid softlockup splats when most ports are in use, from Florian. 14) Remove unused parameters in nf_ct_l4proto_unregister_sysctl() from Yafang Shao. 15) Direct call to nf_nat_l4proto_unique_tuple() instead of indirection, from Florian Westphal. 16) Several patches to remove all layer 4 NAT indirections, remove nf_nat_l4proto struct, from Florian Westphal. 17) Fix RTP/RTCP source port translation when SNAT is in place, from Alin Nastac. 18) Selective rule dump per chain, from Phil Sutter. 19) Revisit CLUSTERIP target, this includes a deadlock fix from netns path, sleep in atomic, remove bogus WARN_ON_ONCE() and disallow mismatching IP address and MAC address. Patchset from Taehee Yoo. 20) Update UDP timeout to stream after 2 seconds, from Florian. 21) Shrink UDP established timeout to 120 seconds like TCP timewait. 22) Sysctl knobs to set GRE timeouts, from Yafang Shao. 23) Move seq_print_acct() to conntrack core file, from Florian. 24) Add enum for conntrack sysctl knobs, also from Florian. 25) Place nf_conntrack_acct, nf_conntrack_helper, nf_conntrack_events and nf_conntrack_timestamp knobs in the core, from Florian Westphal. As a side effect, shrink netns_ct structure by removing obsolete sysctl anchors, also from Florian. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/nf_conntrack-sysctl.txt11
-rw-r--r--include/linux/netfilter/ipset/ip_set.h2
-rw-r--r--include/linux/netfilter/nf_conntrack_proto_gre.h2
-rw-r--r--include/net/netfilter/nf_conntrack.h5
-rw-r--r--include/net/netfilter/nf_conntrack_acct.h6
-rw-r--r--include/net/netfilter/nf_conntrack_ecache.h7
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h3
-rw-r--r--include/net/netfilter/nf_conntrack_timestamp.h13
-rw-r--r--include/net/netfilter/nf_flow_table.h4
-rw-r--r--include/net/netfilter/nf_nat_l3proto.h7
-rw-r--r--include/net/netfilter/nf_nat_l4proto.h78
-rw-r--r--include/net/netns/conntrack.h6
-rw-r--r--include/uapi/linux/netfilter.h4
-rw-r--r--include/uapi/linux/netfilter/ipset/ip_set.h19
-rw-r--r--include/uapi/linux/netfilter_decnet.h10
-rw-r--r--include/uapi/linux/netfilter_ipv4.h28
-rw-r--r--include/uapi/linux/netfilter_ipv6.h29
-rw-r--r--net/ipv4/netfilter/Kconfig5
-rw-r--r--net/ipv4/netfilter/Makefile5
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c184
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c43
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c150
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c83
-rw-r--r--net/ipv6/netfilter/Makefile2
-rw-r--r--net/ipv6/netfilter/nf_nat_l3proto_ipv6.c43
-rw-r--r--net/ipv6/netfilter/nf_nat_proto_icmpv6.c90
-rw-r--r--net/netfilter/Kconfig15
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c13
-rw-r--r--net/netfilter/ipset/ip_set_core.c170
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmac.c27
-rw-r--r--net/netfilter/ipset/ip_set_hash_mac.c10
-rw-r--r--net/netfilter/nf_conntrack_acct.c89
-rw-r--r--net/netfilter/nf_conntrack_core.c28
-rw-r--r--net/netfilter/nf_conntrack_ecache.c66
-rw-r--r--net/netfilter/nf_conntrack_helper.c69
-rw-r--r--net/netfilter/nf_conntrack_netlink.c30
-rw-r--r--net/netfilter/nf_conntrack_proto.c21
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c42
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c18
-rw-r--r--net/netfilter/nf_conntrack_standalone.c103
-rw-r--r--net/netfilter/nf_conntrack_timestamp.c70
-rw-r--r--net/netfilter/nf_flow_table_core.c42
-rw-r--r--net/netfilter/nf_nat_core.c327
-rw-r--r--net/netfilter/nf_nat_proto.c343
-rw-r--r--net/netfilter/nf_nat_proto_common.c120
-rw-r--r--net/netfilter/nf_nat_proto_dccp.c82
-rw-r--r--net/netfilter/nf_nat_proto_sctp.c77
-rw-r--r--net/netfilter/nf_nat_proto_tcp.c85
-rw-r--r--net/netfilter/nf_nat_proto_udp.c130
-rw-r--r--net/netfilter/nf_nat_proto_unknown.c54
-rw-r--r--net/netfilter/nf_nat_sip.c39
-rw-r--r--net/netfilter/nf_tables_api.c90
-rw-r--r--net/netfilter/nfnetlink_log.c2
-rw-r--r--net/netfilter/xt_hashlimit.c4
57 files changed, 1166 insertions, 1852 deletions
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
index 1669dc2419fd..f75c2ce6e136 100644
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ b/Documentation/networking/nf_conntrack-sysctl.txt
@@ -157,7 +157,16 @@ nf_conntrack_udp_timeout - INTEGER (seconds)
default 30
nf_conntrack_udp_timeout_stream - INTEGER (seconds)
- default 180
+ default 120
This extended timeout will be used in case there is an UDP stream
detected.
+
+nf_conntrack_gre_timeout - INTEGER (seconds)
+ default 30
+
+nf_conntrack_gre_timeout_stream - INTEGER (seconds)
+ default 180
+
+ This extended timeout will be used in case there is an GRE stream
+ detected.
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 1d100efe74ec..f2e1e6b13ca4 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -303,11 +303,11 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
/* Netlink CB args */
enum {
IPSET_CB_NET = 0, /* net namespace */
+ IPSET_CB_PROTO, /* ipset protocol */
IPSET_CB_DUMP, /* dump single set/all sets */
IPSET_CB_INDEX, /* set index */
IPSET_CB_PRIVATE, /* set private data */
IPSET_CB_ARG0, /* type specific */
- IPSET_CB_ARG1,
};
/* register and unregister set references */
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index 14edb795ab43..6989e2e4eabf 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -41,7 +41,5 @@ int nf_ct_gre_keymap_add(struct nf_conn *ct, enum ip_conntrack_dir dir,
/* delete keymap entries */
void nf_ct_gre_keymap_destroy(struct nf_conn *ct);
-void nf_nat_need_gre(void);
-
#endif /* __KERNEL__ */
#endif /* _CONNTRACK_PROTO_GRE_H */
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index 7e012312cd61..249d0a5b12b8 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -27,12 +27,17 @@
#include <net/netfilter/nf_conntrack_tuple.h>
+struct nf_ct_udp {
+ unsigned long stream_ts;
+};
+
/* per conntrack: protocol private data */
union nf_conntrack_proto {
/* insert conntrack proto private data here */
struct nf_ct_dccp dccp;
struct ip_ct_sctp sctp;
struct ip_ct_tcp tcp;
+ struct nf_ct_udp udp;
struct nf_ct_gre gre;
unsigned int tmpl_padto;
};
diff --git a/include/net/netfilter/nf_conntrack_acct.h b/include/net/netfilter/nf_conntrack_acct.h
index 79d8d16732b4..bc6745d3010e 100644
--- a/include/net/netfilter/nf_conntrack_acct.h
+++ b/include/net/netfilter/nf_conntrack_acct.h
@@ -46,9 +46,6 @@ struct nf_conn_acct *nf_ct_acct_ext_add(struct nf_conn *ct, gfp_t gfp)
return acct;
};
-unsigned int seq_print_acct(struct seq_file *s, const struct nf_conn *ct,
- int dir);
-
/* Check if connection tracking accounting is enabled */
static inline bool nf_ct_acct_enabled(struct net *net)
{
@@ -61,8 +58,7 @@ static inline void nf_ct_set_acct(struct net *net, bool enable)
net->ct.sysctl_acct = enable;
}
-int nf_conntrack_acct_pernet_init(struct net *net);
-void nf_conntrack_acct_pernet_fini(struct net *net);
+void nf_conntrack_acct_pernet_init(struct net *net);
int nf_conntrack_acct_init(void);
void nf_conntrack_acct_fini(void);
diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h
index 3f1ce9a8776e..52b44192b43f 100644
--- a/include/net/netfilter/nf_conntrack_ecache.h
+++ b/include/net/netfilter/nf_conntrack_ecache.h
@@ -142,7 +142,7 @@ void nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp,
u32 portid, int report);
-int nf_conntrack_ecache_pernet_init(struct net *net);
+void nf_conntrack_ecache_pernet_init(struct net *net);
void nf_conntrack_ecache_pernet_fini(struct net *net);
int nf_conntrack_ecache_init(void);
@@ -182,10 +182,7 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
u32 portid,
int report) {}
-static inline int nf_conntrack_ecache_pernet_init(struct net *net)
-{
- return 0;
-}
+static inline void nf_conntrack_ecache_pernet_init(struct net *net) {}
static inline void nf_conntrack_ecache_pernet_fini(struct net *net)
{
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 2492120b8097..ec52a8dc32fd 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -124,8 +124,7 @@ static inline void *nfct_help_data(const struct nf_conn *ct)
return (void *)help->data;
}
-int nf_conntrack_helper_pernet_init(struct net *net);
-void nf_conntrack_helper_pernet_fini(struct net *net);
+void nf_conntrack_helper_pernet_init(struct net *net);
int nf_conntrack_helper_init(void);
void nf_conntrack_helper_fini(void);
diff --git a/include/net/netfilter/nf_conntrack_timestamp.h b/include/net/netfilter/nf_conntrack_timestamp.h
index 3b661986be8f..0ed617bf0a3d 100644
--- a/include/net/netfilter/nf_conntrack_timestamp.h
+++ b/include/net/netfilter/nf_conntrack_timestamp.h
@@ -49,21 +49,12 @@ static inline void nf_ct_set_tstamp(struct net *net, bool enable)
}
#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
-int nf_conntrack_tstamp_pernet_init(struct net *net);
-void nf_conntrack_tstamp_pernet_fini(struct net *net);
+void nf_conntrack_tstamp_pernet_init(struct net *net);
int nf_conntrack_tstamp_init(void);
void nf_conntrack_tstamp_fini(void);
#else
-static inline int nf_conntrack_tstamp_pernet_init(struct net *net)
-{
- return 0;
-}
-
-static inline void nf_conntrack_tstamp_pernet_fini(struct net *net)
-{
- return;
-}
+static inline void nf_conntrack_tstamp_pernet_init(struct net *net) {}
static inline int nf_conntrack_tstamp_init(void)
{
diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index 77e2761d4f2f..7d5cda7ce32a 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -95,10 +95,6 @@ void flow_offload_free(struct flow_offload *flow);
int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
struct flow_offload_tuple *tuple);
-int nf_flow_table_iterate(struct nf_flowtable *flow_table,
- void (*iter)(struct flow_offload *flow, void *data),
- void *data);
-
void nf_flow_table_cleanup(struct net_device *dev);
int nf_flow_table_init(struct nf_flowtable *flow_table);
diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h
index d300b8f03972..d774ca0c4c5e 100644
--- a/include/net/netfilter/nf_nat_l3proto.h
+++ b/include/net/netfilter/nf_nat_l3proto.h
@@ -2,18 +2,11 @@
#ifndef _NF_NAT_L3PROTO_H
#define _NF_NAT_L3PROTO_H
-struct nf_nat_l4proto;
struct nf_nat_l3proto {
u8 l3proto;
- bool (*in_range)(const struct nf_conntrack_tuple *t,
- const struct nf_nat_range2 *range);
-
- u32 (*secure_port)(const struct nf_conntrack_tuple *t, __be16);
-
bool (*manip_pkt)(struct sk_buff *skb,
unsigned int iphdroff,
- const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype);
diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h
index b4d6b29bca62..95a4655bd1ad 100644
--- a/include/net/netfilter/nf_nat_l4proto.h
+++ b/include/net/netfilter/nf_nat_l4proto.h
@@ -5,78 +5,12 @@
#include <net/netfilter/nf_nat.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
-struct nf_nat_range;
struct nf_nat_l3proto;
-struct nf_nat_l4proto {
- /* Protocol number. */
- u8 l4proto;
-
- /* Translate a packet to the target according to manip type.
- * Return true if succeeded.
- */
- bool (*manip_pkt)(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype);
-
- /* Is the manipable part of the tuple between min and max incl? */
- bool (*in_range)(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max);
-
- /* Alter the per-proto part of the tuple (depending on
- * maniptype), to give a unique tuple in the given range if
- * possible. Per-protocol part of tuple is initialized to the
- * incoming packet.
- */
- void (*unique_tuple)(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct);
-
- int (*nlattr_to_range)(struct nlattr *tb[],
- struct nf_nat_range2 *range);
-};
-
-/* Protocol registration. */
-int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto);
-void nf_nat_l4proto_unregister(u8 l3proto,
- const struct nf_nat_l4proto *l4proto);
-
-const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto);
-
-/* Built-in protocols. */
-extern const struct nf_nat_l4proto nf_nat_l4proto_tcp;
-extern const struct nf_nat_l4proto nf_nat_l4proto_udp;
-extern const struct nf_nat_l4proto nf_nat_l4proto_icmp;
-extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6;
-extern const struct nf_nat_l4proto nf_nat_l4proto_unknown;
-#ifdef CONFIG_NF_NAT_PROTO_DCCP
-extern const struct nf_nat_l4proto nf_nat_l4proto_dccp;
-#endif
-#ifdef CONFIG_NF_NAT_PROTO_SCTP
-extern const struct nf_nat_l4proto nf_nat_l4proto_sctp;
-#endif
-#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
-extern const struct nf_nat_l4proto nf_nat_l4proto_udplite;
-#endif
-
-bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max);
-
-void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct, u16 *rover);
-
-int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
- struct nf_nat_range2 *range);
-
+/* Translate a packet to the target according to manip type. Return on success. */
+bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype);
#endif /*_NF_NAT_L4PROTO_H*/
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index 9795d628a127..51cba0b8adf5 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -97,18 +97,14 @@ struct netns_ct {
struct delayed_work ecache_dwork;
bool ecache_dwork_pending;
#endif
+ bool auto_assign_helper_warned;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
- struct ctl_table_header *acct_sysctl_header;
- struct ctl_table_header *tstamp_sysctl_header;
- struct ctl_table_header *event_sysctl_header;
- struct ctl_table_header *helper_sysctl_header;
#endif
unsigned int sysctl_log_invalid; /* Log invalid packets */
int sysctl_events;
int sysctl_acct;
int sysctl_auto_assign_helper;
- bool auto_assign_helper_warned;
int sysctl_tstamp;
int sysctl_checksum;
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index cca10e767cd8..ca9e63d6e0e4 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -34,10 +34,6 @@
/* only for userspace compatibility */
#ifndef __KERNEL__
-/* Generic cache responses from hook functions.
- <= 0x2000 is used for protocol-flags. */
-#define NFC_UNKNOWN 0x4000
-#define NFC_ALTERED 0x8000
/* NF_VERDICT_BITS should be 8 now, but userspace might break if this changes */
#define NF_VERDICT_BITS 16
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 60236f694143..ea69ca21ff23 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -13,8 +13,9 @@
#include <linux/types.h>
-/* The protocol version */
-#define IPSET_PROTOCOL 6
+/* The protocol versions */
+#define IPSET_PROTOCOL 7
+#define IPSET_PROTOCOL_MIN 6
/* The max length of strings including NUL: set and type identifiers */
#define IPSET_MAXNAMELEN 32
@@ -38,17 +39,19 @@ enum ipset_cmd {
IPSET_CMD_TEST, /* 11: Test an element in a set */
IPSET_CMD_HEADER, /* 12: Get set header data only */
IPSET_CMD_TYPE, /* 13: Get set type */
+ IPSET_CMD_GET_BYNAME, /* 14: Get set index by name */
+ IPSET_CMD_GET_BYINDEX, /* 15: Get set name by index */
IPSET_MSG_MAX, /* Netlink message commands */
/* Commands in userspace: */
- IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 14: Enter restore mode */
- IPSET_CMD_HELP, /* 15: Get help */
- IPSET_CMD_VERSION, /* 16: Get program version */
- IPSET_CMD_QUIT, /* 17: Quit from interactive mode */
+ IPSET_CMD_RESTORE = IPSET_MSG_MAX, /* 16: Enter restore mode */
+ IPSET_CMD_HELP, /* 17: Get help */
+ IPSET_CMD_VERSION, /* 18: Get program version */
+ IPSET_CMD_QUIT, /* 19: Quit from interactive mode */
IPSET_CMD_MAX,
- IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 18: Commit buffered commands */
+ IPSET_CMD_COMMIT = IPSET_CMD_MAX, /* 20: Commit buffered commands */
};
/* Attributes at command level */
@@ -66,6 +69,7 @@ enum {
IPSET_ATTR_LINENO, /* 9: Restore lineno */
IPSET_ATTR_PROTOCOL_MIN, /* 10: Minimal supported version number */
IPSET_ATTR_REVISION_MIN = IPSET_ATTR_PROTOCOL_MIN, /* type rev min */
+ IPSET_ATTR_INDEX, /* 11: Kernel index of set */
__IPSET_ATTR_CMD_MAX,
};
#define IPSET_ATTR_CMD_MAX (__IPSET_ATTR_CMD_MAX - 1)
@@ -223,6 +227,7 @@ enum ipset_adt {
/* Sets are identified by an index in kernel space. Tweak with ip_set_id_t
* and IPSET_INVALID_ID if you want to increase the max number of sets.
+ * Also, IPSET_ATTR_INDEX must be changed.
*/
typedef __u16 ip_set_id_t;
diff --git a/include/uapi/linux/netfilter_decnet.h b/include/uapi/linux/netfilter_decnet.h
index 61f1c7dfd033..3c77f54560f2 100644
--- a/include/uapi/linux/netfilter_decnet.h
+++ b/include/uapi/linux/netfilter_decnet.h
@@ -15,16 +15,6 @@
#include <limits.h> /* for INT_MIN, INT_MAX */
-/* IP Cache bits. */
-/* Src IP address. */
-#define NFC_DN_SRC 0x0001
-/* Dest IP address. */
-#define NFC_DN_DST 0x0002
-/* Input device. */
-#define NFC_DN_IF_IN 0x0004
-/* Output device. */
-#define NFC_DN_IF_OUT 0x0008
-
/* kernel define is in netfilter_defs.h */
#define NF_DN_NUMHOOKS 7
#endif /* ! __KERNEL__ */
diff --git a/include/uapi/linux/netfilter_ipv4.h b/include/uapi/linux/netfilter_ipv4.h
index c3b060775e13..155e77d6a42d 100644
--- a/include/uapi/linux/netfilter_ipv4.h
+++ b/include/uapi/linux/netfilter_ipv4.h
@@ -13,34 +13,6 @@
#include <limits.h> /* for INT_MIN, INT_MAX */
-/* IP Cache bits. */
-/* Src IP address. */
-#define NFC_IP_SRC 0x0001
-/* Dest IP address. */
-#define NFC_IP_DST 0x0002
-/* Input device. */
-#define NFC_IP_IF_IN 0x0004
-/* Output device. */
-#define NFC_IP_IF_OUT 0x0008
-/* TOS. */
-#define NFC_IP_TOS 0x0010
-/* Protocol. */
-#define NFC_IP_PROTO 0x0020
-/* IP options. */
-#define NFC_IP_OPTIONS 0x0040
-/* Frag & flags. */
-#define NFC_IP_FRAG 0x0080
-
-/* Per-protocol information: only matters if proto match. */
-/* TCP flags. */
-#define NFC_IP_TCPFLAGS 0x0100
-/* Source port. */
-#define NFC_IP_SRC_PT 0x0200
-/* Dest port. */
-#define NFC_IP_DST_PT 0x0400
-/* Something else about the proto */
-#define NFC_IP_PROTO_UNKNOWN 0x2000
-
/* IP Hooks */
/* After promisc drops, checksum checks. */
#define NF_IP_PRE_ROUTING 0
diff --git a/include/uapi/linux/netfilter_ipv6.h b/include/uapi/linux/netfilter_ipv6.h
index dc624fd24d25..80aa9b0799af 100644
--- a/include/uapi/linux/netfilter_ipv6.h
+++ b/include/uapi/linux/netfilter_ipv6.h
@@ -16,35 +16,6 @@
#include <limits.h> /* for INT_MIN, INT_MAX */
-/* IP Cache bits. */
-/* Src IP address. */
-#define NFC_IP6_SRC 0x0001
-/* Dest IP address. */
-#define NFC_IP6_DST 0x0002
-/* Input device. */
-#define NFC_IP6_IF_IN 0x0004
-/* Output device. */
-#define NFC_IP6_IF_OUT 0x0008
-/* TOS. */
-#define NFC_IP6_TOS 0x0010
-/* Protocol. */
-#define NFC_IP6_PROTO 0x0020
-/* IP options. */
-#define NFC_IP6_OPTIONS 0x0040
-/* Frag & flags. */
-#define NFC_IP6_FRAG 0x0080
-
-
-/* Per-protocol information: only matters if proto match. */
-/* TCP flags. */
-#define NFC_IP6_TCPFLAGS 0x0100
-/* Source port. */
-#define NFC_IP6_SRC_PT 0x0200
-/* Dest port. */
-#define NFC_IP6_DST_PT 0x0400
-/* Something else about the proto */
-#define NFC_IP6_PROTO_UNKNOWN 0x2000
-
/* IP6 Hooks */
/* After promisc drops, checksum checks. */
#define NF_IP6_PRE_ROUTING 0
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 184bf2e0a1ed..80f72cc5ca8d 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -156,15 +156,10 @@ config NF_NAT_SNMP_BASIC
To compile it as a module, choose M here. If unsure, say N.
-config NF_NAT_PROTO_GRE
- tristate
- depends on NF_CT_PROTO_GRE
-
config NF_NAT_PPTP
tristate
depends on NF_CONNTRACK
default NF_CONNTRACK_PPTP
- select NF_NAT_PROTO_GRE
config NF_NAT_H323
tristate
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 367993adf4d3..fd7122e0e2c9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -3,7 +3,7 @@
# Makefile for the netfilter modules on top of IPv4.
#
-nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o
nf_nat_ipv4-$(CONFIG_NF_NAT_MASQUERADE_IPV4) += nf_nat_masquerade_ipv4.o
obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
@@ -28,9 +28,6 @@ nf_nat_snmp_basic-y := nf_nat_snmp_basic.asn1.o nf_nat_snmp_basic_main.o
$(obj)/nf_nat_snmp_basic_main.o: $(obj)/nf_nat_snmp_basic.asn1.h
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2c8d313ae216..b61977db9b7f 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -56,18 +56,15 @@ struct clusterip_config {
#endif
enum clusterip_hashmode hash_mode; /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
- struct rcu_head rcu;
-
+ struct rcu_head rcu; /* for call_rcu_bh */
+ struct net *net; /* netns for pernet list */
char ifname[IFNAMSIZ]; /* device ifname */
- struct notifier_block notifier; /* refresh c->ifindex in it */
};
#ifdef CONFIG_PROC_FS
static const struct file_operations clusterip_proc_fops;
#endif
-static unsigned int clusterip_net_id __read_mostly;
-
struct clusterip_net {
struct list_head configs;
/* lock protects the configs list */
@@ -75,51 +72,66 @@ struct clusterip_net {
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *procdir;
+ /* mutex protects the config->pde*/
+ struct mutex mutex;
#endif
};
+static unsigned int clusterip_net_id __read_mostly;
+static inline struct clusterip_net *clusterip_pernet(struct net *net)
+{
+ return net_generic(net, clusterip_net_id);
+}
+
static inline void
clusterip_config_get(struct clusterip_config *c)
{
refcount_inc(&c->refcount);
}
-
static void clusterip_config_rcu_free(struct rcu_head *head)
{
- kfree(container_of(head, struct clusterip_config, rcu));
+ struct clusterip_config *config;
+ struct net_device *dev;
+
+ config = container_of(head, struct clusterip_config, rcu);
+ dev = dev_get_by_name(config->net, config->ifname);
+ if (dev) {
+ dev_mc_del(dev, config->clustermac);
+ dev_put(dev);
+ }
+ kfree(config);
}
static inline void
clusterip_config_put(struct clusterip_config *c)
{
if (refcount_dec_and_test(&c->refcount))
- call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
+ call_rcu(&c->rcu, clusterip_config_rcu_free);
}
/* decrease the count of entries using/referencing this config. If last
* entry(rule) is removed, remove the config from lists, but don't free it
* yet, since proc-files could still be holding references */
static inline void
-clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
+clusterip_config_entry_put(struct clusterip_config *c)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(c->net);
local_bh_disable();
if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
+ list_del_rcu(&c->list);
+ spin_unlock(&cn->lock);
+ local_bh_enable();
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
+ mutex_lock(&cn->mutex);
if (cn->procdir)
proc_remove(c->pde);
+ mutex_unlock(&cn->mutex);
#endif
- list_del_rcu(&c->list);
- spin_unlock(&cn->lock);
- local_bh_enable();
-
- unregister_netdevice_notifier(&c->notifier);
-
return;
}
local_bh_enable();
@@ -129,7 +141,7 @@ static struct clusterip_config *
__clusterip_config_find(struct net *net, __be32 clusterip)
{
struct clusterip_config *c;
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
list_for_each_entry_rcu(c, &cn->configs, list) {
if (c->clusterip == clusterip)
@@ -181,32 +193,37 @@ clusterip_netdev_event(struct notifier_block *this, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
+ struct clusterip_net *cn = clusterip_pernet(net);
struct clusterip_config *c;
- c = container_of(this, struct clusterip_config, notifier);
- switch (event) {
- case NETDEV_REGISTER:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- }
- break;
- case NETDEV_UNREGISTER:
- if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
- }
- break;
- case NETDEV_CHANGENAME:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- } else if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
+ spin_lock_bh(&cn->lock);
+ list_for_each_entry_rcu(c, &cn->configs, list) {
+ switch (event) {
+ case NETDEV_REGISTER:
+ if (!strcmp(dev->name, c->ifname)) {
+ c->ifindex = dev->ifindex;
+ dev_mc_add(dev, c->clustermac);
+ }
+ break;
+ case NETDEV_UNREGISTER:
+ if (dev->ifindex == c->ifindex) {
+ dev_mc_del(dev, c->clustermac);
+ c->ifindex = -1;
+ }
+ break;
+ case NETDEV_CHANGENAME:
+ if (!strcmp(dev->name, c->ifname)) {
+ c->ifindex = dev->ifindex;
+ dev_mc_add(dev, c->clustermac);
+ } else if (dev->ifindex == c->ifindex) {
+ dev_mc_del(dev, c->clustermac);
+ c->ifindex = -1;
+ }
+ break;
}
- break;
}
+ spin_unlock_bh(&cn->lock);
return NOTIFY_DONE;
}
@@ -215,30 +232,44 @@ static struct clusterip_config *
clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
__be32 ip, const char *iniface)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
struct clusterip_config *c;
+ struct net_device *dev;
int err;
+ if (iniface[0] == '\0') {
+ pr_info("Please specify an interface name\n");
+ return ERR_PTR(-EINVAL);
+ }
+
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return ERR_PTR(-ENOMEM);
- strcpy(c->ifname, iniface);
- c->ifindex = -1;
- c->clusterip = ip;
+ dev = dev_get_by_name(net, iniface);
+ if (!dev) {
+ pr_info("no such interface %s\n", iniface);
+ kfree(c);
+ return ERR_PTR(-ENOENT);
+ }
+ c->ifindex = dev->ifindex;
+ strcpy(c->ifname, dev->name);
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
+ dev_mc_add(dev, c->clustermac);
+ dev_put(dev);
+
+ c->clusterip = ip;
c->num_total_nodes = i->num_total_nodes;
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
+ c->net = net;
refcount_set(&c->refcount, 1);
spin_lock_bh(&cn->lock);
if (__clusterip_config_find(net, ip)) {
- spin_unlock_bh(&cn->lock);
- kfree(c);
-
- return ERR_PTR(-EBUSY);
+ err = -EBUSY;
+ goto out_config_put;
}
list_add_rcu(&c->list, &cn->configs);
@@ -250,9 +281,11 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
+ mutex_lock(&cn->mutex);
c->pde = proc_create_data(buffer, 0600,
cn->procdir,
&clusterip_proc_fops, c);
+ mutex_unlock(&cn->mutex);
if (!c->pde) {
err = -ENOMEM;
goto err;
@@ -260,22 +293,17 @@ clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
}
#endif
- c->notifier.notifier_call = clusterip_netdev_event;
- err = register_netdevice_notifier(&c->notifier);
- if (!err) {
- refcount_set(&c->entries, 1);
- return c;
- }
+ refcount_set(&c->entries, 1);
+ return c;
#ifdef CONFIG_PROC_FS
- proc_remove(c->pde);
err:
#endif
spin_lock_bh(&cn->lock);
list_del_rcu(&c->list);
+out_config_put:
spin_unlock_bh(&cn->lock);
clusterip_config_put(c);
-
return ERR_PTR(err);
}
@@ -475,34 +503,20 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
&e->ip.dst.s_addr);
return -EINVAL;
} else {
- struct net_device *dev;
-
- if (e->ip.iniface[0] == '\0') {
- pr_info("Please specify an interface name\n");
- return -EINVAL;
- }
-
- dev = dev_get_by_name(par->net, e->ip.iniface);
- if (!dev) {
- pr_info("no such interface %s\n",
- e->ip.iniface);
- return -ENOENT;
- }
- dev_put(dev);
-
config = clusterip_config_init(par->net, cipinfo,
e->ip.dst.s_addr,
e->ip.iniface);
if (IS_ERR(config))
return PTR_ERR(config);
}
- }
+ } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
+ return -EINVAL;
ret = nf_ct_netns_get(par->net, par->family);
if (ret < 0) {
pr_info("cannot load conntrack support for proto=%u\n",
par->family);
- clusterip_config_entry_put(par->net, config);
+ clusterip_config_entry_put(config);
clusterip_config_put(config);
return ret;
}
@@ -524,7 +538,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
- clusterip_config_entry_put(par->net, cipinfo->config);
+ clusterip_config_entry_put(cipinfo->config);
clusterip_config_put(cipinfo->config);
@@ -806,7 +820,7 @@ static const struct file_operations clusterip_proc_fops = {
static int clusterip_net_init(struct net *net)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
int ret;
INIT_LIST_HEAD(&cn->configs);
@@ -824,6 +838,7 @@ static int clusterip_net_init(struct net *net)
pr_err("Unable to proc dir entry\n");
return -ENOMEM;
}
+ mutex_init(&cn->mutex);
#endif /* CONFIG_PROC_FS */
return 0;
@@ -831,13 +846,15 @@ static int clusterip_net_init(struct net *net)
static void clusterip_net_exit(struct net *net)
{
- struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ struct clusterip_net *cn = clusterip_pernet(net);
+
#ifdef CONFIG_PROC_FS
+ mutex_lock(&cn->mutex);
proc_remove(cn->procdir);
cn->procdir = NULL;
+ mutex_unlock(&cn->mutex);
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
- WARN_ON_ONCE(!list_empty(&cn->configs));
}
static struct pernet_operations clusterip_net_ops = {
@@ -847,6 +864,10 @@ static struct pernet_operations clusterip_net_ops = {
.size = sizeof(struct clusterip_net),
};
+struct notifier_block cip_netdev_notifier = {
+ .notifier_call = clusterip_netdev_event
+};
+
static int __init clusterip_tg_init(void)
{
int ret;
@@ -859,11 +880,17 @@ static int __init clusterip_tg_init(void)
if (ret < 0)
goto cleanup_subsys;
+ ret = register_netdevice_notifier(&cip_netdev_notifier);
+ if (ret < 0)
+ goto unregister_target;
+
pr_info("ClusterIP Version %s loaded successfully\n",
CLUSTERIP_VERSION);
return 0;
+unregister_target:
+ xt_unregister_target(&clusterip_tg_reg);
cleanup_subsys:
unregister_pernet_subsys(&clusterip_net_ops);
return ret;
@@ -873,11 +900,12 @@ static void __exit clusterip_tg_exit(void)
{
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
+ unregister_netdevice_notifier(&cip_netdev_notifier);
xt_unregister_target(&clusterip_tg_reg);
unregister_pernet_subsys(&clusterip_net_ops);
- /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
- rcu_barrier_bh();
+ /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
+ rcu_barrier();
}
module_init(clusterip_tg_init);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
index 78a67f961d86..2687db015b6f 100644
--- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -62,22 +62,8 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
}
#endif /* CONFIG_XFRM */
-static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
- const struct nf_nat_range2 *range)
-{
- return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
- ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
-}
-
-static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
- __be16 dport)
-{
- return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
-}
-
static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
- const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
{
@@ -90,8 +76,8 @@ static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
iph = (void *)skb->data + iphdroff;
hdroff = iphdroff + iph->ihl * 4;
- if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
- target, maniptype))
+ if (!nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff,
+ hdroff, target, maniptype))
return false;
iph = (void *)skb->data + iphdroff;
@@ -161,8 +147,6 @@ static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
.l3proto = NFPROTO_IPV4,
- .in_range = nf_nat_ipv4_in_range,
- .secure_port = nf_nat_ipv4_secure_port,
.manip_pkt = nf_nat_ipv4_manip_pkt,
.csum_update = nf_nat_ipv4_csum_update,
.csum_recalc = nf_nat_ipv4_csum_recalc,
@@ -186,7 +170,6 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
unsigned int hdrlen = ip_hdrlen(skb);
- const struct nf_nat_l4proto *l4proto;
struct nf_conntrack_tuple target;
unsigned long statusbit;
@@ -217,9 +200,8 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
if (!(ct->status & statusbit))
return 1;
- l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
- l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ &ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -233,8 +215,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb,
/* Change outer to look like the reply to an incoming packet */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
- if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
+ if (!nf_nat_ipv4_manip_pkt(skb, 0, &target, manip))
return 0;
return 1;
@@ -391,26 +372,12 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv4_unregister_fn);
static int __init nf_nat_l3proto_ipv4_init(void)
{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
- if (err < 0)
- goto err1;
- err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
- if (err < 0)
- goto err2;
- return err;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
-err1:
- return err;
+ return nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
}
static void __exit nf_nat_l3proto_ipv4_exit(void)
{
nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
}
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 5d259a12e25f..68b4d450391b 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -299,8 +299,6 @@ pptp_inbound_pkt(struct sk_buff *skb,
static int __init nf_nat_helper_pptp_init(void)
{
- nf_nat_need_gre();
-
BUG_ON(nf_nat_pptp_hook_outbound != NULL);
RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
deleted file mode 100644
index 00fda6331ce5..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ /dev/null
@@ -1,150 +0,0 @@
-/*
- * nf_nat_proto_gre.c
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
- *
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-#include <linux/netfilter/nf_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-/* generate unique tuple ... */
-static void
-gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- static u_int16_t key;
- __be16 *keyptr;
- unsigned int min, i, range_size;
-
- /* If there is no master conntrack we are not PPTP,
- do not change tuples */
- if (!ct->master)
- return;
-
- if (maniptype == NF_NAT_MANIP_SRC)
- keyptr = &tuple->src.u.gre.key;
- else
- keyptr = &tuple->dst.u.gre.key;
-
- if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
- pr_debug("%p: NATing GRE PPTP\n", ct);
- min = 1;
- range_size = 0xffff;
- } else {
- min = ntohs(range->min_proto.gre.key);
- range_size = ntohs(range->max_proto.gre.key) - min + 1;
- }
-
- pr_debug("min = %u, range_size = %u\n", min, range_size);
-
- for (i = 0; ; ++key) {
- *keyptr = htons(min + key % range_size);
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
- return;
- }
-
- pr_debug("%p: no NAT mapping\n", ct);
- return;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static bool
-gre_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- const struct gre_base_hdr *greh;
- struct pptp_gre_header *pgreh;
-
- /* pgreh includes two optional 32bit fields which are not required
- * to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
- return false;
-
- greh = (void *)skb->data + hdroff;
- pgreh = (struct pptp_gre_header *)greh;
-
- /* we only have destination manip of a packet, since 'source key'
- * is not present in the packet itself */
- if (maniptype != NF_NAT_MANIP_DST)
- return true;
-
- switch (greh->flags & GRE_VERSION) {
- case GRE_VERSION_0:
- /* We do not currently NAT any GREv0 packets.
- * Try to behave like "nf_nat_proto_unknown" */
- break;
- case GRE_VERSION_1:
- pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
- pgreh->call_id = tuple->dst.u.gre.key;
- break;
- default:
- pr_debug("can't nat unknown GRE version\n");
- return false;
- }
- return true;
-}
-
-static const struct nf_nat_l4proto gre = {
- .l4proto = IPPROTO_GRE,
- .manip_pkt = gre_manip_pkt,
- .in_range = nf_nat_l4proto_in_range,
- .unique_tuple = gre_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_gre_init(void)
-{
- return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
-}
-
-static void __exit nf_nat_proto_gre_fini(void)
-{
- nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
-}
-
-module_init(nf_nat_proto_gre_init);
-module_exit(nf_nat_proto_gre_fini);
-
-void nf_nat_need_gre(void)
-{
- return;
-}
-EXPORT_SYMBOL_GPL(nf_nat_need_gre);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
deleted file mode 100644
index 6d7cf1d79baf..000000000000
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ /dev/null
@@ -1,83 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static bool
-icmp_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
- ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static void
-icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- static u_int16_t id;
- unsigned int range_size;
- unsigned int i;
-
- range_size = ntohs(range->max_proto.icmp.id) -
- ntohs(range->min_proto.icmp.id) + 1;
- /* If no range specified... */
- if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
- range_size = 0xFFFF;
-
- for (i = 0; ; ++id) {
- tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
- (id % range_size));
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
- return;
- }
- return;
-}
-
-static bool
-icmp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct icmphdr *hdr;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- hdr = (struct icmphdr *)(skb->data + hdroff);
- inet_proto_csum_replace2(&hdr->checksum, skb,
- hdr->un.echo.id, tuple->src.u.icmp.id, false);
- hdr->un.echo.id = tuple->src.u.icmp.id;
- return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
- .l4proto = IPPROTO_ICMP,
- .manip_pkt = icmp_manip_pkt,
- .in_range = icmp_in_range,
- .unique_tuple = icmp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index 200c0c235565..9ea43d5256e0 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -11,7 +11,7 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o
obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o
obj-$(CONFIG_IP6_NF_NAT) += ip6table_nat.o
-nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o
+nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o
nf_nat_ipv6-$(CONFIG_NF_NAT_MASQUERADE_IPV6) += nf_nat_masquerade_ipv6.o
obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o
diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
index ca6d38698b1a..23022447eb49 100644
--- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c
@@ -61,22 +61,8 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb,
}
#endif
-static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t,
- const struct nf_nat_range2 *range)
-{
- return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
- ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
-}
-
-static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t,
- __be16 dport)
-{
- return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport);
-}
-
static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
unsigned int iphdroff,
- const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *target,
enum nf_nat_manip_type maniptype)
{
@@ -96,8 +82,8 @@ static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb,
goto manip_addr;
if ((frag_off & htons(~0x7)) == 0 &&
- !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
- target, maniptype))
+ !nf_nat_l4proto_manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff,
+ target, maniptype))
return false;
/* must reload, offset might have changed */
@@ -171,8 +157,6 @@ static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[],
static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = {
.l3proto = NFPROTO_IPV6,
- .secure_port = nf_nat_ipv6_secure_port,
- .in_range = nf_nat_ipv6_in_range,
.manip_pkt = nf_nat_ipv6_manip_pkt,
.csum_update = nf_nat_ipv6_csum_update,
.csum_recalc = nf_nat_ipv6_csum_recalc,
@@ -196,7 +180,6 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
} *inside;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
- const struct nf_nat_l4proto *l4proto;
struct nf_conntrack_tuple target;
unsigned long statusbit;
@@ -227,9 +210,8 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
if (!(ct->status & statusbit))
return 1;
- l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr);
if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6),
- l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ &ct->tuplehash[!dir].tuple, !manip))
return 0;
if (skb->ip_summed != CHECKSUM_PARTIAL) {
@@ -244,8 +226,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb,
}
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6);
- if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip))
+ if (!nf_nat_ipv6_manip_pkt(skb, 0, &target, manip))
return 0;
return 1;
@@ -415,26 +396,12 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_ipv6_unregister_fn);
static int __init nf_nat_l3proto_ipv6_init(void)
{
- int err;
-
- err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
- if (err < 0)
- goto err1;
- err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
- if (err < 0)
- goto err2;
- return err;
-
-err2:
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
-err1:
- return err;
+ return nf_nat_l3proto_register(&nf_nat_l3proto_ipv6);
}
static void __exit nf_nat_l3proto_ipv6_exit(void)
{
nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6);
- nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6);
}
MODULE_LICENSE("GPL");
diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
deleted file mode 100644
index d9bf42ba44fa..000000000000
--- a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2011 Patrick Mchardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6
- * NAT funded by Astaro.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/icmpv6.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static bool
-icmpv6_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
- ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
-}
-
-static void
-icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- static u16 id;
- unsigned int range_size;
- unsigned int i;
-
- range_size = ntohs(range->max_proto.icmp.id) -
- ntohs(range->min_proto.icmp.id) + 1;
-
- if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
- range_size = 0xffff;
-
- for (i = 0; ; ++id) {
- tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
- (id % range_size));
- if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
- return;
- }
-}
-
-static bool
-icmpv6_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct icmp6hdr *hdr;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- hdr = (struct icmp6hdr *)(skb->data + hdroff);
- l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
- tuple, maniptype);
- if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
- hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
- inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
- hdr->icmp6_identifier,
- tuple->src.u.icmp.id, false);
- hdr->icmp6_identifier = tuple->src.u.icmp.id;
- }
- return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = {
- .l4proto = IPPROTO_ICMPV6,
- .manip_pkt = icmpv6_manip_pkt,
- .in_range = icmpv6_in_range,
- .unique_tuple = icmpv6_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 2ab870ef233a..beb3a69ce1d4 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -403,21 +403,6 @@ config NF_NAT_NEEDED
depends on NF_NAT
default y
-config NF_NAT_PROTO_DCCP
- bool
- depends on NF_NAT && NF_CT_PROTO_DCCP
- default NF_NAT && NF_CT_PROTO_DCCP
-
-config NF_NAT_PROTO_UDPLITE
- bool
- depends on NF_NAT && NF_CT_PROTO_UDPLITE
- default NF_NAT && NF_CT_PROTO_UDPLITE
-
-config NF_NAT_PROTO_SCTP
- bool
- default NF_NAT && NF_CT_PROTO_SCTP
- depends on NF_NAT && NF_CT_PROTO_SCTP
-
config NF_NAT_AMANDA
tristate
depends on NF_CONNTRACK && NF_NAT
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4ddf3ef51ece..1ae65a314d7a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -47,12 +47,7 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o
obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o
obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o
-nf_nat-y := nf_nat_core.o nf_nat_proto_unknown.o nf_nat_proto_common.o \
- nf_nat_proto_udp.o nf_nat_proto_tcp.o nf_nat_helper.o
-
-# NAT protocols (nf_nat)
-nf_nat-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
-nf_nat-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+nf_nat-y := nf_nat_core.o nf_nat_proto.o nf_nat_helper.o
# generic transport layer logging
obj-$(CONFIG_NF_LOG_COMMON) += nf_log_common.o
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index c00b6a2e8e3c..980000fc3b50 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -219,10 +219,6 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
u32 ip;
- /* MAC can be src only */
- if (!(opt->flags & IPSET_DIM_TWO_SRC))
- return 0;
-
ip = ntohl(ip4addr(skb, opt->flags & IPSET_DIM_ONE_SRC));
if (ip < map->first_ip || ip > map->last_ip)
return -IPSET_ERR_BITMAP_RANGE;
@@ -233,7 +229,14 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb,
return -EINVAL;
e.id = ip_to_id(map, ip);
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+
+ if (opt->flags & IPSET_DIM_ONE_SRC)
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ else
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+ if (is_zero_ether_addr(e.ether))
+ return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 1577f2f76060..45a257695bef 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -771,11 +771,21 @@ EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
* The commands are serialized by the nfnl mutex.
*/
+static inline u8 protocol(const struct nlattr * const tb[])
+{
+ return nla_get_u8(tb[IPSET_ATTR_PROTOCOL]);
+}
+
static inline bool
protocol_failed(const struct nlattr * const tb[])
{
- return !tb[IPSET_ATTR_PROTOCOL] ||
- nla_get_u8(tb[IPSET_ATTR_PROTOCOL]) != IPSET_PROTOCOL;
+ return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) != IPSET_PROTOCOL;
+}
+
+static inline bool
+protocol_min_failed(const struct nlattr * const tb[])
+{
+ return !tb[IPSET_ATTR_PROTOCOL] || protocol(tb) < IPSET_PROTOCOL_MIN;
}
static inline u32
@@ -889,7 +899,7 @@ static int ip_set_create(struct net *net, struct sock *ctnl,
u32 flags = flag_exist(nlh);
int ret = 0;
- if (unlikely(protocol_failed(attr) ||
+ if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_TYPENAME] ||
!attr[IPSET_ATTR_REVISION] ||
@@ -1027,7 +1037,7 @@ static int ip_set_destroy(struct net *net, struct sock *ctnl,
ip_set_id_t i;
int ret = 0;
- if (unlikely(protocol_failed(attr)))
+ if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
/* Must wait for flush to be really finished in list:set */
@@ -1105,7 +1115,7 @@ static int ip_set_flush(struct net *net, struct sock *ctnl, struct sk_buff *skb,
struct ip_set *s;
ip_set_id_t i;
- if (unlikely(protocol_failed(attr)))
+ if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
if (!attr[IPSET_ATTR_SETNAME]) {
@@ -1147,7 +1157,7 @@ static int ip_set_rename(struct net *net, struct sock *ctnl,
ip_set_id_t i;
int ret = 0;
- if (unlikely(protocol_failed(attr) ||
+ if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL;
@@ -1196,7 +1206,7 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ip_set_id_t from_id, to_id;
char from_name[IPSET_MAXNAMELEN];
- if (unlikely(protocol_failed(attr) ||
+ if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_SETNAME2]))
return -IPSET_ERR_PROTOCOL;
@@ -1291,6 +1301,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
nla_parse(cda, IPSET_ATTR_CMD_MAX, attr, nlh->nlmsg_len - min_len,
ip_set_setname_policy, NULL);
+ cb->args[IPSET_CB_PROTO] = nla_get_u8(cda[IPSET_ATTR_PROTOCOL]);
if (cda[IPSET_ATTR_SETNAME]) {
struct ip_set *set;
@@ -1392,7 +1403,8 @@ dump_last:
ret = -EMSGSIZE;
goto release_refcount;
}
- if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+ if (nla_put_u8(skb, IPSET_ATTR_PROTOCOL,
+ cb->args[IPSET_CB_PROTO]) ||
nla_put_string(skb, IPSET_ATTR_SETNAME, set->name))
goto nla_put_failure;
if (dump_flags & IPSET_FLAG_LIST_SETNAME)
@@ -1407,6 +1419,9 @@ dump_last:
nla_put_u8(skb, IPSET_ATTR_REVISION,
set->revision))
goto nla_put_failure;
+ if (cb->args[IPSET_CB_PROTO] > IPSET_PROTOCOL_MIN &&
+ nla_put_net16(skb, IPSET_ATTR_INDEX, htons(index)))
+ goto nla_put_failure;
ret = set->variant->head(set, skb);
if (ret < 0)
goto release_refcount;
@@ -1466,7 +1481,7 @@ static int ip_set_dump(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const struct nlattr * const attr[],
struct netlink_ext_ack *extack)
{
- if (unlikely(protocol_failed(attr)))
+ if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
{
@@ -1560,7 +1575,7 @@ static int ip_set_uadd(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool use_lineno;
int ret = 0;
- if (unlikely(protocol_failed(attr) ||
+ if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) ||
@@ -1615,7 +1630,7 @@ static int ip_set_udel(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool use_lineno;
int ret = 0;
- if (unlikely(protocol_failed(attr) ||
+ if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!((attr[IPSET_ATTR_DATA] != NULL) ^
(attr[IPSET_ATTR_ADT] != NULL)) ||
@@ -1667,7 +1682,7 @@ static int ip_set_utest(struct net *net, struct sock *ctnl, struct sk_buff *skb,
struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
int ret = 0;
- if (unlikely(protocol_failed(attr) ||
+ if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME] ||
!attr[IPSET_ATTR_DATA] ||
!flag_nested(attr[IPSET_ATTR_DATA])))
@@ -1704,7 +1719,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
struct nlmsghdr *nlh2;
int ret = 0;
- if (unlikely(protocol_failed(attr) ||
+ if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_SETNAME]))
return -IPSET_ERR_PROTOCOL;
@@ -1720,7 +1735,7 @@ static int ip_set_header(struct net *net, struct sock *ctnl,
IPSET_CMD_HEADER);
if (!nlh2)
goto nlmsg_failure;
- if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+ if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name) ||
nla_put_string(skb2, IPSET_ATTR_TYPENAME, set->type->name) ||
nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
@@ -1761,7 +1776,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
const char *typename;
int ret = 0;
- if (unlikely(protocol_failed(attr) ||
+ if (unlikely(protocol_min_failed(attr) ||
!attr[IPSET_ATTR_TYPENAME] ||
!attr[IPSET_ATTR_FAMILY]))
return -IPSET_ERR_PROTOCOL;
@@ -1780,7 +1795,7 @@ static int ip_set_type(struct net *net, struct sock *ctnl, struct sk_buff *skb,
IPSET_CMD_TYPE);
if (!nlh2)
goto nlmsg_failure;
- if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL) ||
+ if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
nla_put_string(skb2, IPSET_ATTR_TYPENAME, typename) ||
nla_put_u8(skb2, IPSET_ATTR_FAMILY, family) ||
nla_put_u8(skb2, IPSET_ATTR_REVISION, max) ||
@@ -1831,6 +1846,111 @@ static int ip_set_protocol(struct net *net, struct sock *ctnl,
goto nlmsg_failure;
if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, IPSET_PROTOCOL))
goto nla_put_failure;
+ if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL_MIN, IPSET_PROTOCOL_MIN))
+ goto nla_put_failure;
+ nlmsg_end(skb2, nlh2);
+
+ ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+ kfree_skb(skb2);
+ return -EMSGSIZE;
+}
+
+/* Get set by name or index, from userspace */
+
+static int ip_set_byname(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_set_net *inst = ip_set_pernet(net);
+ struct sk_buff *skb2;
+ struct nlmsghdr *nlh2;
+ ip_set_id_t id = IPSET_INVALID_ID;
+ const struct ip_set *set;
+ int ret = 0;
+
+ if (unlikely(protocol_failed(attr) ||
+ !attr[IPSET_ATTR_SETNAME]))
+ return -IPSET_ERR_PROTOCOL;
+
+ set = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]), &id);
+ if (id == IPSET_INVALID_ID)
+ return -ENOENT;
+
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ IPSET_CMD_GET_BYNAME);
+ if (!nlh2)
+ goto nlmsg_failure;
+ if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+ nla_put_u8(skb2, IPSET_ATTR_FAMILY, set->family) ||
+ nla_put_net16(skb2, IPSET_ATTR_INDEX, htons(id)))
+ goto nla_put_failure;
+ nlmsg_end(skb2, nlh2);
+
+ ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+
+nla_put_failure:
+ nlmsg_cancel(skb2, nlh2);
+nlmsg_failure:
+ kfree_skb(skb2);
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy ip_set_index_policy[IPSET_ATTR_CMD_MAX + 1] = {
+ [IPSET_ATTR_PROTOCOL] = { .type = NLA_U8 },
+ [IPSET_ATTR_INDEX] = { .type = NLA_U16 },
+};
+
+static int ip_set_byindex(struct net *net, struct sock *ctnl,
+ struct sk_buff *skb, const struct nlmsghdr *nlh,
+ const struct nlattr * const attr[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_set_net *inst = ip_set_pernet(net);
+ struct sk_buff *skb2;
+ struct nlmsghdr *nlh2;
+ ip_set_id_t id = IPSET_INVALID_ID;
+ const struct ip_set *set;
+ int ret = 0;
+
+ if (unlikely(protocol_failed(attr) ||
+ !attr[IPSET_ATTR_INDEX]))
+ return -IPSET_ERR_PROTOCOL;
+
+ id = ip_set_get_h16(attr[IPSET_ATTR_INDEX]);
+ if (id >= inst->ip_set_max)
+ return -ENOENT;
+ set = ip_set(inst, id);
+ if (set == NULL)
+ return -ENOENT;
+
+ skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb2)
+ return -ENOMEM;
+
+ nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+ IPSET_CMD_GET_BYINDEX);
+ if (!nlh2)
+ goto nlmsg_failure;
+ if (nla_put_u8(skb2, IPSET_ATTR_PROTOCOL, protocol(attr)) ||
+ nla_put_string(skb2, IPSET_ATTR_SETNAME, set->name))
+ goto nla_put_failure;
nlmsg_end(skb2, nlh2);
ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
@@ -1916,6 +2036,16 @@ static const struct nfnl_callback ip_set_netlink_subsys_cb[IPSET_MSG_MAX] = {
.attr_count = IPSET_ATTR_CMD_MAX,
.policy = ip_set_protocol_policy,
},
+ [IPSET_CMD_GET_BYNAME] = {
+ .call = ip_set_byname,
+ .attr_count = IPSET_ATTR_CMD_MAX,
+ .policy = ip_set_setname_policy,
+ },
+ [IPSET_CMD_GET_BYINDEX] = {
+ .call = ip_set_byindex,
+ .attr_count = IPSET_ATTR_CMD_MAX,
+ .policy = ip_set_index_policy,
+ },
};
static struct nfnetlink_subsystem ip_set_netlink_subsys __read_mostly = {
@@ -1961,7 +2091,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
goto done;
}
- if (req_version->version != IPSET_PROTOCOL) {
+ if (req_version->version < IPSET_PROTOCOL_MIN) {
ret = -EPROTO;
goto done;
}
@@ -2024,9 +2154,11 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
}
nfnl_lock(NFNL_SUBSYS_IPSET);
set = ip_set(inst, req_get->set.index);
- strncpy(req_get->set.name, set ? set->name : "",
- IPSET_MAXNAMELEN);
+ ret = strscpy(req_get->set.name, set ? set->name : "",
+ IPSET_MAXNAMELEN);
nfnl_unlock(NFNL_SUBSYS_IPSET);
+ if (ret < 0)
+ goto done;
goto copy;
}
default:
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index e287da68d5fa..2c9609929c71 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -67,7 +67,7 @@ tune_ahash_max(u8 curr, u32 multi)
/* A hash bucket */
struct hbucket {
- struct rcu_head rcu; /* for call_rcu_bh */
+ struct rcu_head rcu; /* for call_rcu */
/* Which positions are used in the array */
DECLARE_BITMAP(used, AHASH_MAX_TUNED);
u8 size; /* size of the array */
@@ -664,7 +664,7 @@ retry:
spin_unlock_bh(&set->lock);
/* Give time to other readers of the set */
- synchronize_rcu_bh();
+ synchronize_rcu();
pr_debug("set %s resized from %u (%p) to %u (%p)\n", set->name,
orig->htable_bits, orig, t->htable_bits, t);
diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c
index 1ab5ed2f6839..c830c68142ff 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmac.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmac.c
@@ -36,9 +36,6 @@ MODULE_ALIAS("ip_set_hash:ip,mac");
/* Type specific function prefix */
#define HTYPE hash_ipmac
-/* Zero valued element is not supported */
-static const unsigned char invalid_ether[ETH_ALEN] = { 0 };
-
/* IPv4 variant */
/* Member elements */
@@ -103,8 +100,12 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb,
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
- if (ether_addr_equal(e.ether, invalid_ether))
+ if (opt->flags & IPSET_DIM_ONE_SRC)
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ else
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+ if (is_zero_ether_addr(e.ether))
return -EINVAL;
ip4addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip);
@@ -140,7 +141,7 @@ hash_ipmac4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ret)
return ret;
memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
- if (ether_addr_equal(e.ether, invalid_ether))
+ if (is_zero_ether_addr(e.ether))
return -IPSET_ERR_HASH_ELEM;
return adtfn(set, &e, &ext, &ext, flags);
@@ -211,16 +212,16 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb,
};
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- /* MAC can be src only */
- if (!(opt->flags & IPSET_DIM_TWO_SRC))
- return 0;
-
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
- if (ether_addr_equal(e.ether, invalid_ether))
+ if (opt->flags & IPSET_DIM_ONE_SRC)
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ else
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
+ if (is_zero_ether_addr(e.ether))
return -EINVAL;
ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip.in6);
@@ -260,7 +261,7 @@ hash_ipmac6_uadt(struct ip_set *set, struct nlattr *tb[],
return ret;
memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
- if (ether_addr_equal(e.ether, invalid_ether))
+ if (is_zero_ether_addr(e.ether))
return -IPSET_ERR_HASH_ELEM;
return adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index f9d5a2a1e3d0..4fe5f243d0a3 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -81,15 +81,15 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } };
struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set);
- /* MAC can be src only */
- if (!(opt->flags & IPSET_DIM_ONE_SRC))
- return 0;
-
if (skb_mac_header(skb) < skb->head ||
(skb_mac_header(skb) + ETH_HLEN) > skb->data)
return -EINVAL;
- ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ if (opt->flags & IPSET_DIM_ONE_SRC)
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
+ else
+ ether_addr_copy(e.ether, eth_hdr(skb)->h_dest);
+
if (is_zero_ether_addr(e.ether))
return -EINVAL;
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c
index 1d66de5151b2..49e523cc49d0 100644
--- a/net/netfilter/nf_conntrack_acct.c
+++ b/net/netfilter/nf_conntrack_acct.c
@@ -25,102 +25,15 @@ static bool nf_ct_acct __read_mostly;
module_param_named(acct, nf_ct_acct, bool, 0644);
MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting.");
-#ifdef CONFIG_SYSCTL
-static struct ctl_table acct_sysctl_table[] = {
- {
- .procname = "nf_conntrack_acct",
- .data = &init_net.ct.sysctl_acct,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {}
-};
-#endif /* CONFIG_SYSCTL */
-
-unsigned int
-seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
-{
- struct nf_conn_acct *acct;
- struct nf_conn_counter *counter;
-
- acct = nf_conn_acct_find(ct);
- if (!acct)
- return 0;
-
- counter = acct->counter;
- seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)atomic64_read(&counter[dir].packets),
- (unsigned long long)atomic64_read(&counter[dir].bytes));
-
- return 0;
-};
-EXPORT_SYMBOL_GPL(seq_print_acct);
-
static const struct nf_ct_ext_type acct_extend = {
.len = sizeof(struct nf_conn_acct),
.align = __alignof__(struct nf_conn_acct),
.id = NF_CT_EXT_ACCT,
};
-#ifdef CONFIG_SYSCTL
-static int nf_conntrack_acct_init_sysctl(struct net *net)
-{
- struct ctl_table *table;
-
- table = kmemdup(acct_sysctl_table, sizeof(acct_sysctl_table),
- GFP_KERNEL);
- if (!table)
- goto out;
-
- table[0].data = &net->ct.sysctl_acct;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
-
- net->ct.acct_sysctl_header = register_net_sysctl(net, "net/netfilter",
- table);
- if (!net->ct.acct_sysctl_header) {
- pr_err("can't register to sysctl\n");
- goto out_register;
- }
- return 0;
-
-out_register:
- kfree(table);
-out:
- return -ENOMEM;
-}
-
-static void nf_conntrack_acct_fini_sysctl(struct net *net)
-{
- struct ctl_table *table;
-
- table = net->ct.acct_sysctl_header->ctl_table_arg;
- unregister_net_sysctl_table(net->ct.acct_sysctl_header);
- kfree(table);
-}
-#else
-static int nf_conntrack_acct_init_sysctl(struct net *net)
-{
- return 0;
-}
-
-static void nf_conntrack_acct_fini_sysctl(struct net *net)
-{
-}
-#endif
-
-int nf_conntrack_acct_pernet_init(struct net *net)
+void nf_conntrack_acct_pernet_init(struct net *net)
{
net->ct.sysctl_acct = nf_ct_acct;
- return nf_conntrack_acct_init_sysctl(net);
-}
-
-void nf_conntrack_acct_pernet_fini(struct net *net)
-{
- nf_conntrack_acct_fini_sysctl(net);
}
int nf_conntrack_acct_init(void)
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index e92e749aff53..e87c21e47efe 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -2110,10 +2110,7 @@ i_see_dead_people:
list_for_each_entry(net, net_exit_list, exit_list) {
nf_conntrack_proto_pernet_fini(net);
- nf_conntrack_helper_pernet_fini(net);
nf_conntrack_ecache_pernet_fini(net);
- nf_conntrack_tstamp_pernet_fini(net);
- nf_conntrack_acct_pernet_fini(net);
nf_conntrack_expect_pernet_fini(net);
free_percpu(net->ct.stat);
free_percpu(net->ct.pcpu_lists);
@@ -2410,32 +2407,19 @@ int nf_conntrack_init_net(struct net *net)
ret = nf_conntrack_expect_pernet_init(net);
if (ret < 0)
goto err_expect;
- ret = nf_conntrack_acct_pernet_init(net);
- if (ret < 0)
- goto err_acct;
- ret = nf_conntrack_tstamp_pernet_init(net);
- if (ret < 0)
- goto err_tstamp;
- ret = nf_conntrack_ecache_pernet_init(net);
- if (ret < 0)
- goto err_ecache;
- ret = nf_conntrack_helper_pernet_init(net);
- if (ret < 0)
- goto err_helper;
+
+ nf_conntrack_acct_pernet_init(net);
+ nf_conntrack_tstamp_pernet_init(net);
+ nf_conntrack_ecache_pernet_init(net);
+ nf_conntrack_helper_pernet_init(net);
+
ret = nf_conntrack_proto_pernet_init(net);
if (ret < 0)
goto err_proto;
return 0;
err_proto:
- nf_conntrack_helper_pernet_fini(net);
-err_helper:
nf_conntrack_ecache_pernet_fini(net);
-err_ecache:
- nf_conntrack_tstamp_pernet_fini(net);
-err_tstamp:
- nf_conntrack_acct_pernet_fini(net);
-err_acct:
nf_conntrack_expect_pernet_fini(net);
err_expect:
free_percpu(net->ct.stat);
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index c11822a7d2bf..3d042f8ff183 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -336,85 +336,21 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
#define NF_CT_EVENTS_DEFAULT 1
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
-#ifdef CONFIG_SYSCTL
-static struct ctl_table event_sysctl_table[] = {
- {
- .procname = "nf_conntrack_events",
- .data = &init_net.ct.sysctl_events,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {}
-};
-#endif /* CONFIG_SYSCTL */
-
static const struct nf_ct_ext_type event_extend = {
.len = sizeof(struct nf_conntrack_ecache),
.align = __alignof__(struct nf_conntrack_ecache),
.id = NF_CT_EXT_ECACHE,
};
-#ifdef CONFIG_SYSCTL
-static int nf_conntrack_event_init_sysctl(struct net *net)
-{
- struct ctl_table *table;
-
- table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
- GFP_KERNEL);
- if (!table)
- goto out;
-
- table[0].data = &net->ct.sysctl_events;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
-
- net->ct.event_sysctl_header =
- register_net_sysctl(net, "net/netfilter", table);
- if (!net->ct.event_sysctl_header) {
- pr_err("can't register to sysctl\n");
- goto out_register;
- }
- return 0;
-
-out_register:
- kfree(table);
-out:
- return -ENOMEM;
-}
-
-static void nf_conntrack_event_fini_sysctl(struct net *net)
-{
- struct ctl_table *table;
-
- table = net->ct.event_sysctl_header->ctl_table_arg;
- unregister_net_sysctl_table(net->ct.event_sysctl_header);
- kfree(table);
-}
-#else
-static int nf_conntrack_event_init_sysctl(struct net *net)
-{
- return 0;
-}
-
-static void nf_conntrack_event_fini_sysctl(struct net *net)
-{
-}
-#endif /* CONFIG_SYSCTL */
-
-int nf_conntrack_ecache_pernet_init(struct net *net)
+void nf_conntrack_ecache_pernet_init(struct net *net)
{
net->ct.sysctl_events = nf_ct_events;
INIT_DELAYED_WORK(&net->ct.ecache_dwork, ecache_work);
- return nf_conntrack_event_init_sysctl(net);
}
void nf_conntrack_ecache_pernet_fini(struct net *net)
{
cancel_delayed_work_sync(&net->ct.ecache_dwork);
- nf_conntrack_event_fini_sysctl(net);
}
int nf_conntrack_ecache_init(void)
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index e24b762ffa1d..274baf1dab87 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -42,67 +42,6 @@ module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644);
MODULE_PARM_DESC(nf_conntrack_helper,
"Enable automatic conntrack helper assignment (default 0)");
-#ifdef CONFIG_SYSCTL
-static struct ctl_table helper_sysctl_table[] = {
- {
- .procname = "nf_conntrack_helper",
- .data = &init_net.ct.sysctl_auto_assign_helper,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {}
-};
-
-static int nf_conntrack_helper_init_sysctl(struct net *net)
-{
- struct ctl_table *table;
-
- table = kmemdup(helper_sysctl_table, sizeof(helper_sysctl_table),
- GFP_KERNEL);
- if (!table)
- goto out;
-
- table[0].data = &net->ct.sysctl_auto_assign_helper;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
-
- net->ct.helper_sysctl_header =
- register_net_sysctl(net, "net/netfilter", table);
-
- if (!net->ct.helper_sysctl_header) {
- pr_err("nf_conntrack_helper: can't register to sysctl.\n");
- goto out_register;
- }
- return 0;
-
-out_register:
- kfree(table);
-out:
- return -ENOMEM;
-}
-
-static void nf_conntrack_helper_fini_sysctl(struct net *net)
-{
- struct ctl_table *table;
-
- table = net->ct.helper_sysctl_header->ctl_table_arg;
- unregister_net_sysctl_table(net->ct.helper_sysctl_header);
- kfree(table);
-}
-#else
-static int nf_conntrack_helper_init_sysctl(struct net *net)
-{
- return 0;
-}
-
-static void nf_conntrack_helper_fini_sysctl(struct net *net)
-{
-}
-#endif /* CONFIG_SYSCTL */
-
/* Stupid hash, but collision free for the default registrations of the
* helpers currently in the kernel. */
static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple)
@@ -533,16 +472,10 @@ static const struct nf_ct_ext_type helper_extend = {
.id = NF_CT_EXT_HELPER,
};
-int nf_conntrack_helper_pernet_init(struct net *net)
+void nf_conntrack_helper_pernet_init(struct net *net)
{
net->ct.auto_assign_helper_warned = false;
net->ct.sysctl_auto_assign_helper = nf_ct_auto_assign_helper;
- return nf_conntrack_helper_init_sysctl(net);
-}
-
-void nf_conntrack_helper_pernet_fini(struct net *net)
-{
- nf_conntrack_helper_fini_sysctl(net);
}
int nf_conntrack_helper_init(void)
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4ae8e528943a..1213beb5a714 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -47,7 +47,6 @@
#include <net/netfilter/nf_conntrack_synproxy.h>
#ifdef CONFIG_NF_NAT_NEEDED
#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_helper.h>
#endif
@@ -1688,6 +1687,22 @@ static int ctnetlink_change_timeout(struct nf_conn *ct,
return 0;
}
+#if defined(CONFIG_NF_CONNTRACK_MARK)
+static void ctnetlink_change_mark(struct nf_conn *ct,
+ const struct nlattr * const cda[])
+{
+ u32 mark, newmark, mask = 0;
+
+ if (cda[CTA_MARK_MASK])
+ mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+
+ mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+ newmark = (ct->mark & mask) ^ mark;
+ if (newmark != ct->mark)
+ ct->mark = newmark;
+}
+#endif
+
static const struct nla_policy protoinfo_policy[CTA_PROTOINFO_MAX+1] = {
[CTA_PROTOINFO_TCP] = { .type = NLA_NESTED },
[CTA_PROTOINFO_DCCP] = { .type = NLA_NESTED },
@@ -1883,7 +1898,7 @@ ctnetlink_change_conntrack(struct nf_conn *ct,
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
- ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+ ctnetlink_change_mark(ct, cda);
#endif
if (cda[CTA_SEQ_ADJ_ORIG] || cda[CTA_SEQ_ADJ_REPLY]) {
@@ -2027,7 +2042,7 @@ ctnetlink_create_conntrack(struct net *net,
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
- ct->mark = ntohl(nla_get_be32(cda[CTA_MARK]));
+ ctnetlink_change_mark(ct, cda);
#endif
/* setup master conntrack: this is a confirmed expectation */
@@ -2524,14 +2539,7 @@ ctnetlink_glue_parse_ct(const struct nlattr *cda[], struct nf_conn *ct)
}
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK]) {
- u32 mask = 0, mark, newmark;
- if (cda[CTA_MARK_MASK])
- mask = ~ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
-
- mark = ntohl(nla_get_be32(cda[CTA_MARK]));
- newmark = (ct->mark & mask) ^ mark;
- if (newmark != ct->mark)
- ct->mark = newmark;
+ ctnetlink_change_mark(ct, cda);
}
#endif
return 0;
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 40643af7137e..859f5d07a915 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -175,8 +175,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
static
int nf_ct_l4proto_register_sysctl(struct net *net,
- struct nf_proto_net *pn,
- const struct nf_conntrack_l4proto *l4proto)
+ struct nf_proto_net *pn)
{
int err = 0;
@@ -198,9 +197,7 @@ int nf_ct_l4proto_register_sysctl(struct net *net,
}
static
-void nf_ct_l4proto_unregister_sysctl(struct net *net,
- struct nf_proto_net *pn,
- const struct nf_conntrack_l4proto *l4proto)
+void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn)
{
#ifdef CONFIG_SYSCTL
if (pn->ctl_table_header != NULL)
@@ -252,7 +249,7 @@ int nf_ct_l4proto_pernet_register_one(struct net *net,
if (pn == NULL)
goto out;
- ret = nf_ct_l4proto_register_sysctl(net, pn, l4proto);
+ ret = nf_ct_l4proto_register_sysctl(net, pn);
if (ret < 0)
goto out;
@@ -296,7 +293,7 @@ void nf_ct_l4proto_pernet_unregister_one(struct net *net,
return;
pn->users--;
- nf_ct_l4proto_unregister_sysctl(net, pn, l4proto);
+ nf_ct_l4proto_unregister_sysctl(pn);
}
EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
@@ -946,16 +943,14 @@ int nf_conntrack_proto_pernet_init(struct net *net)
if (err < 0)
return err;
err = nf_ct_l4proto_register_sysctl(net,
- pn,
- &nf_conntrack_l4proto_generic);
+ pn);
if (err < 0)
return err;
err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
if (err < 0) {
- nf_ct_l4proto_unregister_sysctl(net, pn,
- &nf_conntrack_l4proto_generic);
+ nf_ct_l4proto_unregister_sysctl(pn);
return err;
}
@@ -971,9 +966,7 @@ void nf_conntrack_proto_pernet_fini(struct net *net)
nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
ARRAY_SIZE(builtin_l4proto));
pn->users--;
- nf_ct_l4proto_unregister_sysctl(net,
- pn,
- &nf_conntrack_l4proto_generic);
+ nf_ct_l4proto_unregister_sysctl(pn);
}
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 2a5e56c6d8d9..8899b51aad44 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -320,9 +320,49 @@ gre_timeout_nla_policy[CTA_TIMEOUT_GRE_MAX+1] = {
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
+#ifdef CONFIG_SYSCTL
+static struct ctl_table gre_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_gre_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {
+ .procname = "nf_conntrack_gre_timeout_stream",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ {}
+};
+#endif
+
+static int gre_kmemdup_sysctl_table(struct net *net, struct nf_proto_net *nf,
+ struct netns_proto_gre *net_gre)
+{
+#ifdef CONFIG_SYSCTL
+ int i;
+
+ if (nf->ctl_table)
+ return 0;
+
+ nf->ctl_table = kmemdup(gre_sysctl_table,
+ sizeof(gre_sysctl_table),
+ GFP_KERNEL);
+ if (!nf->ctl_table)
+ return -ENOMEM;
+
+ for (i = 0; i < GRE_CT_MAX; i++)
+ nf->ctl_table[i].data = &net_gre->gre_timeouts[i];
+#endif
+ return 0;
+}
+
static int gre_init_net(struct net *net)
{
struct netns_proto_gre *net_gre = gre_pernet(net);
+ struct nf_proto_net *nf = &net_gre->nf;
int i;
rwlock_init(&net_gre->keymap_lock);
@@ -330,7 +370,7 @@ static int gre_init_net(struct net *net)
for (i = 0; i < GRE_CT_MAX; i++)
net_gre->gre_timeouts[i] = gre_timeouts[i];
- return 0;
+ return gre_kmemdup_sysctl_table(net, nf, net_gre);
}
/* protocol helper struct */
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index c879d8d78cfd..b4f5d5e82031 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -29,7 +29,7 @@
static const unsigned int udp_timeouts[UDP_CT_MAX] = {
[UDP_CT_UNREPLIED] = 30*HZ,
- [UDP_CT_REPLIED] = 180*HZ,
+ [UDP_CT_REPLIED] = 120*HZ,
};
static unsigned int *udp_get_timeouts(struct net *net)
@@ -100,11 +100,21 @@ static int udp_packet(struct nf_conn *ct,
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
+ if (!nf_ct_is_confirmed(ct))
+ ct->proto.udp.stream_ts = 2 * HZ + jiffies;
+
/* If we've seen traffic both ways, this is some kind of UDP
- stream. Extend timeout. */
+ * stream. Set Assured.
+ */
if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
- nf_ct_refresh_acct(ct, ctinfo, skb,
- timeouts[UDP_CT_REPLIED]);
+ unsigned long extra = timeouts[UDP_CT_UNREPLIED];
+
+ /* Still active after two seconds? Extend timeout. */
+ if (time_after(jiffies, ct->proto.udp.stream_ts))
+ extra = timeouts[UDP_CT_REPLIED];
+
+ nf_ct_refresh_acct(ct, ctinfo, skb, extra);
+
/* Also, more likely to be important, and not a probe */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 463d17d349c1..b6177fd73304 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -267,6 +267,24 @@ static const char* l4proto_name(u16 proto)
return "unknown";
}
+static unsigned int
+seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
+{
+ struct nf_conn_acct *acct;
+ struct nf_conn_counter *counter;
+
+ acct = nf_conn_acct_find(ct);
+ if (!acct)
+ return 0;
+
+ counter = acct->counter;
+ seq_printf(s, "packets=%llu bytes=%llu ",
+ (unsigned long long)atomic64_read(&counter[dir].packets),
+ (unsigned long long)atomic64_read(&counter[dir].bytes));
+
+ return 0;
+}
+
/* return 0 on success, 1 in case of error */
static int ct_seq_show(struct seq_file *s, void *v)
{
@@ -514,36 +532,53 @@ nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
static struct ctl_table_header *nf_ct_netfilter_header;
+enum nf_ct_sysctl_index {
+ NF_SYSCTL_CT_MAX,
+ NF_SYSCTL_CT_COUNT,
+ NF_SYSCTL_CT_BUCKETS,
+ NF_SYSCTL_CT_CHECKSUM,
+ NF_SYSCTL_CT_LOG_INVALID,
+ NF_SYSCTL_CT_EXPECT_MAX,
+ NF_SYSCTL_CT_ACCT,
+ NF_SYSCTL_CT_HELPER,
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ NF_SYSCTL_CT_EVENTS,
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ NF_SYSCTL_CT_TIMESTAMP,
+#endif
+};
+
static struct ctl_table nf_ct_sysctl_table[] = {
- {
+ [NF_SYSCTL_CT_MAX] = {
.procname = "nf_conntrack_max",
.data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NF_SYSCTL_CT_COUNT] = {
.procname = "nf_conntrack_count",
.data = &init_net.ct.count,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = proc_dointvec,
},
- {
+ [NF_SYSCTL_CT_BUCKETS] = {
.procname = "nf_conntrack_buckets",
.data = &nf_conntrack_htable_size_user,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = nf_conntrack_hash_sysctl,
},
- {
+ [NF_SYSCTL_CT_CHECKSUM] = {
.procname = "nf_conntrack_checksum",
.data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
- {
+ [NF_SYSCTL_CT_LOG_INVALID] = {
.procname = "nf_conntrack_log_invalid",
.data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(unsigned int),
@@ -552,13 +587,45 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.extra1 = &log_invalid_proto_min,
.extra2 = &log_invalid_proto_max,
},
- {
+ [NF_SYSCTL_CT_EXPECT_MAX] = {
.procname = "nf_conntrack_expect_max",
.data = &nf_ct_expect_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ [NF_SYSCTL_CT_ACCT] = {
+ .procname = "nf_conntrack_acct",
+ .data = &init_net.ct.sysctl_acct,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ [NF_SYSCTL_CT_HELPER] = {
+ .procname = "nf_conntrack_helper",
+ .data = &init_net.ct.sysctl_auto_assign_helper,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ [NF_SYSCTL_CT_EVENTS] = {
+ .procname = "nf_conntrack_events",
+ .data = &init_net.ct.sysctl_events,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ [NF_SYSCTL_CT_TIMESTAMP] = {
+ .procname = "nf_conntrack_timestamp",
+ .data = &init_net.ct.sysctl_tstamp,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+#endif
{ }
};
@@ -582,16 +649,28 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
if (!table)
goto out_kmemdup;
- table[1].data = &net->ct.count;
- table[3].data = &net->ct.sysctl_checksum;
- table[4].data = &net->ct.sysctl_log_invalid;
+ table[NF_SYSCTL_CT_COUNT].data = &net->ct.count;
+ table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum;
+ table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid;
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events;
+#endif
/* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
+ if (net->user_ns != &init_user_ns) {
+ table[NF_SYSCTL_CT_MAX].procname = NULL;
+ table[NF_SYSCTL_CT_ACCT].procname = NULL;
+ table[NF_SYSCTL_CT_HELPER].procname = NULL;
+#ifdef CONFIG_NF_CONNTRACK_TIMESTAMP
+ table[NF_SYSCTL_CT_TIMESTAMP].procname = NULL;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+ table[NF_SYSCTL_CT_EVENTS].procname = NULL;
+#endif
+ }
if (!net_eq(&init_net, net))
- table[2].mode = 0444;
+ table[NF_SYSCTL_CT_BUCKETS].mode = 0444;
net->ct.sysctl_header = register_net_sysctl(net, "net/netfilter", table);
if (!net->ct.sysctl_header)
diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c
index 56766cb26e40..705b912bd91f 100644
--- a/net/netfilter/nf_conntrack_timestamp.c
+++ b/net/netfilter/nf_conntrack_timestamp.c
@@ -22,83 +22,15 @@ static bool nf_ct_tstamp __read_mostly;
module_param_named(tstamp, nf_ct_tstamp, bool, 0644);
MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping.");
-#ifdef CONFIG_SYSCTL
-static struct ctl_table tstamp_sysctl_table[] = {
- {
- .procname = "nf_conntrack_timestamp",
- .data = &init_net.ct.sysctl_tstamp,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {}
-};
-#endif /* CONFIG_SYSCTL */
-
static const struct nf_ct_ext_type tstamp_extend = {
.len = sizeof(struct nf_conn_tstamp),
.align = __alignof__(struct nf_conn_tstamp),
.id = NF_CT_EXT_TSTAMP,
};
-#ifdef CONFIG_SYSCTL
-static int nf_conntrack_tstamp_init_sysctl(struct net *net)
-{
- struct ctl_table *table;
-
- table = kmemdup(tstamp_sysctl_table, sizeof(tstamp_sysctl_table),
- GFP_KERNEL);
- if (!table)
- goto out;
-
- table[0].data = &net->ct.sysctl_tstamp;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
-
- net->ct.tstamp_sysctl_header = register_net_sysctl(net, "net/netfilter",
- table);
- if (!net->ct.tstamp_sysctl_header) {
- pr_err("can't register to sysctl\n");
- goto out_register;
- }
- return 0;
-
-out_register:
- kfree(table);
-out:
- return -ENOMEM;
-}
-
-static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
-{
- struct ctl_table *table;
-
- table = net->ct.tstamp_sysctl_header->ctl_table_arg;
- unregister_net_sysctl_table(net->ct.tstamp_sysctl_header);
- kfree(table);
-}
-#else
-static int nf_conntrack_tstamp_init_sysctl(struct net *net)
-{
- return 0;
-}
-
-static void nf_conntrack_tstamp_fini_sysctl(struct net *net)
-{
-}
-#endif
-
-int nf_conntrack_tstamp_pernet_init(struct net *net)
+void nf_conntrack_tstamp_pernet_init(struct net *net)
{
net->ct.sysctl_tstamp = nf_ct_tstamp;
- return nf_conntrack_tstamp_init_sysctl(net);
-}
-
-void nf_conntrack_tstamp_pernet_fini(struct net *net)
-{
- nf_conntrack_tstamp_fini_sysctl(net);
}
int nf_conntrack_tstamp_init(void)
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index b7a4816add76..fa0844e2a68d 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -247,9 +247,10 @@ flow_offload_lookup(struct nf_flowtable *flow_table,
}
EXPORT_SYMBOL_GPL(flow_offload_lookup);
-int nf_flow_table_iterate(struct nf_flowtable *flow_table,
- void (*iter)(struct flow_offload *flow, void *data),
- void *data)
+static int
+nf_flow_table_iterate(struct nf_flowtable *flow_table,
+ void (*iter)(struct flow_offload *flow, void *data),
+ void *data)
{
struct flow_offload_tuple_rhash *tuplehash;
struct rhashtable_iter hti;
@@ -279,40 +280,19 @@ int nf_flow_table_iterate(struct nf_flowtable *flow_table,
return err;
}
-EXPORT_SYMBOL_GPL(nf_flow_table_iterate);
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return (__s32)(flow->timeout - (u32)jiffies) <= 0;
}
-static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table)
+static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data)
{
- struct flow_offload_tuple_rhash *tuplehash;
- struct rhashtable_iter hti;
- struct flow_offload *flow;
-
- rhashtable_walk_enter(&flow_table->rhashtable, &hti);
- rhashtable_walk_start(&hti);
+ struct nf_flowtable *flow_table = data;
- while ((tuplehash = rhashtable_walk_next(&hti))) {
- if (IS_ERR(tuplehash)) {
- if (PTR_ERR(tuplehash) != -EAGAIN)
- break;
- continue;
- }
- if (tuplehash->tuple.dir)
- continue;
-
- flow = container_of(tuplehash, struct flow_offload, tuplehash[0]);
-
- if (nf_flow_has_expired(flow) ||
- (flow->flags & (FLOW_OFFLOAD_DYING |
- FLOW_OFFLOAD_TEARDOWN)))
- flow_offload_del(flow_table, flow);
- }
- rhashtable_walk_stop(&hti);
- rhashtable_walk_exit(&hti);
+ if (nf_flow_has_expired(flow) ||
+ (flow->flags & (FLOW_OFFLOAD_DYING | FLOW_OFFLOAD_TEARDOWN)))
+ flow_offload_del(flow_table, flow);
}
static void nf_flow_offload_work_gc(struct work_struct *work)
@@ -320,7 +300,7 @@ static void nf_flow_offload_work_gc(struct work_struct *work)
struct nf_flowtable *flow_table;
flow_table = container_of(work, struct nf_flowtable, gc_work.work);
- nf_flow_offload_gc_step(flow_table);
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
queue_delayed_work(system_power_efficient_wq, &flow_table->gc_work, HZ);
}
@@ -504,7 +484,7 @@ void nf_flow_table_free(struct nf_flowtable *flow_table)
mutex_unlock(&flowtable_lock);
cancel_delayed_work_sync(&flow_table->gc_work);
nf_flow_table_iterate(flow_table, nf_flow_table_do_cleanup, NULL);
- nf_flow_offload_gc_step(flow_table);
+ nf_flow_table_iterate(flow_table, nf_flow_offload_gc_step, flow_table);
rhashtable_destroy(&flow_table->rhashtable);
}
EXPORT_SYMBOL_GPL(nf_flow_table_free);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2268b10a9dcf..d159e9e7835b 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -23,7 +23,6 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
#include <net/netfilter/nf_nat_core.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/nf_conntrack_helper.h>
@@ -38,8 +37,6 @@ static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
static DEFINE_MUTEX(nf_nat_proto_mutex);
static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO]
__read_mostly;
-static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO]
- __read_mostly;
static unsigned int nat_net_id __read_mostly;
static struct hlist_head *nf_nat_bysource __read_mostly;
@@ -67,13 +64,6 @@ __nf_nat_l3proto_find(u8 family)
return rcu_dereference(nf_nat_l3protos[family]);
}
-inline const struct nf_nat_l4proto *
-__nf_nat_l4proto_find(u8 family, u8 protonum)
-{
- return rcu_dereference(nf_nat_l4protos[family][protonum]);
-}
-EXPORT_SYMBOL_GPL(__nf_nat_l4proto_find);
-
#ifdef CONFIG_XFRM
static void __nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl)
{
@@ -173,27 +163,66 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
}
EXPORT_SYMBOL(nf_nat_used_tuple);
+static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
+ const struct nf_nat_range2 *range)
+{
+ if (t->src.l3num == NFPROTO_IPV4)
+ return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
+ ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
+
+ return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 &&
+ ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0;
+}
+
+/* Is the manipable part of the tuple between min and max incl? */
+static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ __be16 port;
+
+ switch (tuple->dst.protonum) {
+ case IPPROTO_ICMP: /* fallthrough */
+ case IPPROTO_ICMPV6:
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+ case IPPROTO_GRE: /* all fall though */
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_DCCP:
+ case IPPROTO_SCTP:
+ if (maniptype == NF_NAT_MANIP_SRC)
+ port = tuple->src.u.all;
+ else
+ port = tuple->dst.u.all;
+
+ return ntohs(port) >= ntohs(min->all) &&
+ ntohs(port) <= ntohs(max->all);
+ default:
+ return true;
+ }
+}
+
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range.
*/
-static int in_range(const struct nf_nat_l3proto *l3proto,
- const struct nf_nat_l4proto *l4proto,
- const struct nf_conntrack_tuple *tuple,
+static int in_range(const struct nf_conntrack_tuple *tuple,
const struct nf_nat_range2 *range)
{
/* If we are supposed to map IPs, then we must be in the
* range specified, otherwise let this drag us onto a new src IP.
*/
if (range->flags & NF_NAT_RANGE_MAP_IPS &&
- !l3proto->in_range(tuple, range))
+ !nf_nat_inet_in_range(tuple, range))
return 0;
- if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
- l4proto->in_range(tuple, NF_NAT_MANIP_SRC,
- &range->min_proto, &range->max_proto))
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
return 1;
- return 0;
+ return l4proto_in_range(tuple, NF_NAT_MANIP_SRC,
+ &range->min_proto, &range->max_proto);
}
static inline int
@@ -212,8 +241,6 @@ same_src(const struct nf_conn *ct,
static int
find_appropriate_src(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_nat_l3proto *l3proto,
- const struct nf_nat_l4proto *l4proto,
const struct nf_conntrack_tuple *tuple,
struct nf_conntrack_tuple *result,
const struct nf_nat_range2 *range)
@@ -230,7 +257,7 @@ find_appropriate_src(struct net *net,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
- if (in_range(l3proto, l4proto, result, range))
+ if (in_range(result, range))
return 1;
}
}
@@ -311,6 +338,123 @@ find_best_ips_proto(const struct nf_conntrack_zone *zone,
}
}
+/* Alter the per-proto part of the tuple (depending on maniptype), to
+ * give a unique tuple in the given range if possible.
+ *
+ * Per-protocol part of tuple is initialized to the incoming packet.
+ */
+static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range2 *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ unsigned int range_size, min, max, i, attempts;
+ __be16 *keyptr;
+ u16 off;
+ static const unsigned int max_attempts = 128;
+
+ switch (tuple->dst.protonum) {
+ case IPPROTO_ICMP: /* fallthrough */
+ case IPPROTO_ICMPV6:
+ /* id is same for either direction... */
+ keyptr = &tuple->src.u.icmp.id;
+ min = range->min_proto.icmp.id;
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
+ goto find_free_id;
+#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
+ case IPPROTO_GRE:
+ /* If there is no master conntrack we are not PPTP,
+ do not change tuples */
+ if (!ct->master)
+ return;
+
+ if (maniptype == NF_NAT_MANIP_SRC)
+ keyptr = &tuple->src.u.gre.key;
+ else
+ keyptr = &tuple->dst.u.gre.key;
+
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+ min = 1;
+ range_size = 65535;
+ } else {
+ min = ntohs(range->min_proto.gre.key);
+ range_size = ntohs(range->max_proto.gre.key) - min + 1;
+ }
+ goto find_free_id;
+#endif
+ case IPPROTO_UDP: /* fallthrough */
+ case IPPROTO_UDPLITE: /* fallthrough */
+ case IPPROTO_TCP: /* fallthrough */
+ case IPPROTO_SCTP: /* fallthrough */
+ case IPPROTO_DCCP: /* fallthrough */
+ if (maniptype == NF_NAT_MANIP_SRC)
+ keyptr = &tuple->src.u.all;
+ else
+ keyptr = &tuple->dst.u.all;
+
+ break;
+ default:
+ return;
+ }
+
+ /* If no range specified... */
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+ /* If it's dst rewrite, can't change port */
+ if (maniptype == NF_NAT_MANIP_DST)
+ return;
+
+ if (ntohs(*keyptr) < 1024) {
+ /* Loose convention: >> 512 is credential passing */
+ if (ntohs(*keyptr) < 512) {
+ min = 1;
+ range_size = 511 - min + 1;
+ } else {
+ min = 600;
+ range_size = 1023 - min + 1;
+ }
+ } else {
+ min = 1024;
+ range_size = 65535 - 1024 + 1;
+ }
+ } else {
+ min = ntohs(range->min_proto.all);
+ max = ntohs(range->max_proto.all);
+ if (unlikely(max < min))
+ swap(max, min);
+ range_size = max - min + 1;
+ }
+
+find_free_id:
+ if (range->flags & NF_NAT_RANGE_PROTO_OFFSET)
+ off = (ntohs(*keyptr) - ntohs(range->base_proto.all));
+ else
+ off = prandom_u32();
+
+ attempts = range_size;
+ if (attempts > max_attempts)
+ attempts = max_attempts;
+
+ /* We are in softirq; doing a search of the entire range risks
+ * soft lockup when all tuples are already used.
+ *
+ * If we can't find any free port from first offset, pick a new
+ * one and try again, with ever smaller search window.
+ */
+another_round:
+ for (i = 0; i < attempts; i++, off++) {
+ *keyptr = htons(min + off % range_size);
+ if (!nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+
+ if (attempts >= range_size || attempts < 16)
+ return;
+ attempts /= 2;
+ off = prandom_u32();
+ goto another_round;
+}
+
/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
* we change the source to map into the range. For NF_INET_PRE_ROUTING
* and NF_INET_LOCAL_OUT, we change the destination to map into the
@@ -325,17 +469,10 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
const struct nf_conntrack_zone *zone;
- const struct nf_nat_l3proto *l3proto;
- const struct nf_nat_l4proto *l4proto;
struct net *net = nf_ct_net(ct);
zone = nf_ct_zone(ct);
- rcu_read_lock();
- l3proto = __nf_nat_l3proto_find(orig_tuple->src.l3num);
- l4proto = __nf_nat_l4proto_find(orig_tuple->src.l3num,
- orig_tuple->dst.protonum);
-
/* 1) If this srcip/proto/src-proto-part is currently mapped,
* and that same mapping gives a unique tuple within the given
* range, use that.
@@ -347,16 +484,16 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
if (maniptype == NF_NAT_MANIP_SRC &&
!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
/* try the original tuple first */
- if (in_range(l3proto, l4proto, orig_tuple, range)) {
+ if (in_range(orig_tuple, range)) {
if (!nf_nat_used_tuple(orig_tuple, ct)) {
*tuple = *orig_tuple;
- goto out;
+ return;
}
- } else if (find_appropriate_src(net, zone, l3proto, l4proto,
+ } else if (find_appropriate_src(net, zone,
orig_tuple, tuple, range)) {
pr_debug("get_unique_tuple: Found current src map\n");
if (!nf_nat_used_tuple(tuple, ct))
- goto out;
+ return;
}
}
@@ -372,21 +509,19 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM_ALL)) {
if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
if (!(range->flags & NF_NAT_RANGE_PROTO_OFFSET) &&
- l4proto->in_range(tuple, maniptype,
+ l4proto_in_range(tuple, maniptype,
&range->min_proto,
&range->max_proto) &&
(range->min_proto.all == range->max_proto.all ||
!nf_nat_used_tuple(tuple, ct)))
- goto out;
+ return;
} else if (!nf_nat_used_tuple(tuple, ct)) {
- goto out;
+ return;
}
}
/* Last chance: get protocol to try to obtain unique tuple. */
- l4proto->unique_tuple(l3proto, tuple, range, maniptype, ct);
-out:
- rcu_read_unlock();
+ nf_nat_l4proto_unique_tuple(tuple, range, maniptype, ct);
}
struct nf_conn_nat *nf_ct_nat_ext_add(struct nf_conn *ct)
@@ -502,16 +637,13 @@ static unsigned int nf_nat_manip_pkt(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_dir dir)
{
const struct nf_nat_l3proto *l3proto;
- const struct nf_nat_l4proto *l4proto;
struct nf_conntrack_tuple target;
/* We are aiming to look like inverse of other direction. */
nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
l3proto = __nf_nat_l3proto_find(target.src.l3num);
- l4proto = __nf_nat_l4proto_find(target.src.l3num,
- target.dst.protonum);
- if (!l3proto->manip_pkt(skb, 0, l4proto, &target, mtype))
+ if (!l3proto->manip_pkt(skb, 0, &target, mtype))
return NF_DROP;
return NF_ACCEPT;
@@ -667,16 +799,6 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data)
return 0;
}
-static void nf_nat_l4proto_clean(u8 l3proto, u8 l4proto)
-{
- struct nf_nat_proto_clean clean = {
- .l3proto = l3proto,
- .l4proto = l4proto,
- };
-
- nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
-}
-
static void nf_nat_l3proto_clean(u8 l3proto)
{
struct nf_nat_proto_clean clean = {
@@ -686,82 +808,8 @@ static void nf_nat_l3proto_clean(u8 l3proto)
nf_ct_iterate_destroy(nf_nat_proto_remove, &clean);
}
-/* Protocol registration. */
-int nf_nat_l4proto_register(u8 l3proto, const struct nf_nat_l4proto *l4proto)
-{
- const struct nf_nat_l4proto **l4protos;
- unsigned int i;
- int ret = 0;
-
- mutex_lock(&nf_nat_proto_mutex);
- if (nf_nat_l4protos[l3proto] == NULL) {
- l4protos = kmalloc_array(IPPROTO_MAX,
- sizeof(struct nf_nat_l4proto *),
- GFP_KERNEL);
- if (l4protos == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- for (i = 0; i < IPPROTO_MAX; i++)
- RCU_INIT_POINTER(l4protos[i], &nf_nat_l4proto_unknown);
-
- /* Before making proto_array visible to lockless readers,
- * we must make sure its content is committed to memory.
- */
- smp_wmb();
-
- nf_nat_l4protos[l3proto] = l4protos;
- }
-
- if (rcu_dereference_protected(
- nf_nat_l4protos[l3proto][l4proto->l4proto],
- lockdep_is_held(&nf_nat_proto_mutex)
- ) != &nf_nat_l4proto_unknown) {
- ret = -EBUSY;
- goto out;
- }
- RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto], l4proto);
- out:
- mutex_unlock(&nf_nat_proto_mutex);
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_register);
-
-/* No one stores the protocol anywhere; simply delete it. */
-void nf_nat_l4proto_unregister(u8 l3proto, const struct nf_nat_l4proto *l4proto)
-{
- mutex_lock(&nf_nat_proto_mutex);
- RCU_INIT_POINTER(nf_nat_l4protos[l3proto][l4proto->l4proto],
- &nf_nat_l4proto_unknown);
- mutex_unlock(&nf_nat_proto_mutex);
- synchronize_rcu();
-
- nf_nat_l4proto_clean(l3proto, l4proto->l4proto);
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_unregister);
-
int nf_nat_l3proto_register(const struct nf_nat_l3proto *l3proto)
{
- mutex_lock(&nf_nat_proto_mutex);
- RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_TCP],
- &nf_nat_l4proto_tcp);
- RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDP],
- &nf_nat_l4proto_udp);
-#ifdef CONFIG_NF_NAT_PROTO_DCCP
- RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_DCCP],
- &nf_nat_l4proto_dccp);
-#endif
-#ifdef CONFIG_NF_NAT_PROTO_SCTP
- RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_SCTP],
- &nf_nat_l4proto_sctp);
-#endif
-#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
- RCU_INIT_POINTER(nf_nat_l4protos[l3proto->l3proto][IPPROTO_UDPLITE],
- &nf_nat_l4proto_udplite);
-#endif
- mutex_unlock(&nf_nat_proto_mutex);
-
RCU_INIT_POINTER(nf_nat_l3protos[l3proto->l3proto], l3proto);
return 0;
}
@@ -802,12 +850,26 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
[CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
};
+static int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range2 *range)
+{
+ if (tb[CTA_PROTONAT_PORT_MIN]) {
+ range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
+ range->max_proto.all = range->min_proto.all;
+ range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ }
+ if (tb[CTA_PROTONAT_PORT_MAX]) {
+ range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
+ range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ }
+ return 0;
+}
+
static int nfnetlink_parse_nat_proto(struct nlattr *attr,
const struct nf_conn *ct,
struct nf_nat_range2 *range)
{
struct nlattr *tb[CTA_PROTONAT_MAX+1];
- const struct nf_nat_l4proto *l4proto;
int err;
err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr,
@@ -815,11 +877,7 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr,
if (err < 0)
return err;
- l4proto = __nf_nat_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
- if (l4proto->nlattr_to_range)
- err = l4proto->nlattr_to_range(tb, range);
-
- return err;
+ return nf_nat_l4proto_nlattr_to_range(tb, range);
}
static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
@@ -1082,7 +1140,6 @@ static int __init nf_nat_init(void)
static void __exit nf_nat_cleanup(void)
{
struct nf_nat_proto_clean clean = {};
- unsigned int i;
nf_ct_iterate_destroy(nf_nat_proto_clean, &clean);
@@ -1090,10 +1147,6 @@ static void __exit nf_nat_cleanup(void)
nf_ct_helper_expectfn_unregister(&follow_master_nat);
RCU_INIT_POINTER(nf_nat_hook, NULL);
- synchronize_rcu();
-
- for (i = 0; i < NFPROTO_NUMPROTO; i++)
- kfree(nf_nat_l4protos[i]);
synchronize_net();
kvfree(nf_nat_bysource);
unregister_pernet_subsys(&nat_net_ops);
diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c
new file mode 100644
index 000000000000..f83bf9d8c9f5
--- /dev/null
+++ b/net/netfilter/nf_nat_proto.c
@@ -0,0 +1,343 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+
+#include <linux/dccp.h>
+#include <linux/sctp.h>
+#include <net/sctp/checksum.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static void
+__udp_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, struct udphdr *hdr,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype, bool do_csum)
+{
+ __be16 *portptr, newport;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ /* Get rid of src port */
+ newport = tuple->src.u.udp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst port */
+ newport = tuple->dst.u.udp.port;
+ portptr = &hdr->dest;
+ }
+ if (do_csum) {
+ l3proto->csum_update(skb, iphdroff, &hdr->check,
+ tuple, maniptype);
+ inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
+ false);
+ if (!hdr->check)
+ hdr->check = CSUM_MANGLED_0;
+ }
+ *portptr = newport;
+}
+
+static bool udp_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct udphdr *hdr;
+ bool do_csum;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct udphdr *)(skb->data + hdroff);
+ do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
+
+ __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum);
+ return true;
+}
+
+static bool udplite_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_UDPLITE
+ struct udphdr *hdr;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct udphdr *)(skb->data + hdroff);
+ __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true);
+#endif
+ return true;
+}
+
+static bool
+sctp_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_SCTP
+ struct sctphdr *hdr;
+ int hdrsize = 8;
+
+ /* This could be an inner header returned in imcp packet; in such
+ * cases we cannot update the checksum field since it is outside
+ * of the 8 bytes of transport layer headers we are guaranteed.
+ */
+ if (skb->len >= hdroff + sizeof(*hdr))
+ hdrsize = sizeof(*hdr);
+
+ if (!skb_make_writable(skb, hdroff + hdrsize))
+ return false;
+
+ hdr = (struct sctphdr *)(skb->data + hdroff);
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ /* Get rid of src port */
+ hdr->source = tuple->src.u.sctp.port;
+ } else {
+ /* Get rid of dst port */
+ hdr->dest = tuple->dst.u.sctp.port;
+ }
+
+ if (hdrsize < sizeof(*hdr))
+ return true;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ hdr->checksum = sctp_compute_cksum(skb, hdroff);
+ skb->ip_summed = CHECKSUM_NONE;
+ }
+
+#endif
+ return true;
+}
+
+static bool
+tcp_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct tcphdr *hdr;
+ __be16 *portptr, newport, oldport;
+ int hdrsize = 8; /* TCP connection tracking guarantees this much */
+
+ /* this could be a inner header returned in icmp packet; in such
+ cases we cannot update the checksum field since it is outside of
+ the 8 bytes of transport layer headers we are guaranteed */
+ if (skb->len >= hdroff + sizeof(struct tcphdr))
+ hdrsize = sizeof(struct tcphdr);
+
+ if (!skb_make_writable(skb, hdroff + hdrsize))
+ return false;
+
+ hdr = (struct tcphdr *)(skb->data + hdroff);
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ /* Get rid of src port */
+ newport = tuple->src.u.tcp.port;
+ portptr = &hdr->source;
+ } else {
+ /* Get rid of dst port */
+ newport = tuple->dst.u.tcp.port;
+ portptr = &hdr->dest;
+ }
+
+ oldport = *portptr;
+ *portptr = newport;
+
+ if (hdrsize < sizeof(*hdr))
+ return true;
+
+ l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
+ inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
+ return true;
+}
+
+static bool
+dccp_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+#ifdef CONFIG_NF_CT_PROTO_DCCP
+ struct dccp_hdr *hdr;
+ __be16 *portptr, oldport, newport;
+ int hdrsize = 8; /* DCCP connection tracking guarantees this much */
+
+ if (skb->len >= hdroff + sizeof(struct dccp_hdr))
+ hdrsize = sizeof(struct dccp_hdr);
+
+ if (!skb_make_writable(skb, hdroff + hdrsize))
+ return false;
+
+ hdr = (struct dccp_hdr *)(skb->data + hdroff);
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ newport = tuple->src.u.dccp.port;
+ portptr = &hdr->dccph_sport;
+ } else {
+ newport = tuple->dst.u.dccp.port;
+ portptr = &hdr->dccph_dport;
+ }
+
+ oldport = *portptr;
+ *portptr = newport;
+
+ if (hdrsize < sizeof(*hdr))
+ return true;
+
+ l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
+ tuple, maniptype);
+ inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
+ false);
+#endif
+ return true;
+}
+
+static bool
+icmp_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct icmphdr *hdr;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct icmphdr *)(skb->data + hdroff);
+ inet_proto_csum_replace2(&hdr->checksum, skb,
+ hdr->un.echo.id, tuple->src.u.icmp.id, false);
+ hdr->un.echo.id = tuple->src.u.icmp.id;
+ return true;
+}
+
+static bool
+icmpv6_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct icmp6hdr *hdr;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct icmp6hdr *)(skb->data + hdroff);
+ l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum,
+ tuple, maniptype);
+ if (hdr->icmp6_type == ICMPV6_ECHO_REQUEST ||
+ hdr->icmp6_type == ICMPV6_ECHO_REPLY) {
+ inet_proto_csum_replace2(&hdr->icmp6_cksum, skb,
+ hdr->icmp6_identifier,
+ tuple->src.u.icmp.id, false);
+ hdr->icmp6_identifier = tuple->src.u.icmp.id;
+ }
+ return true;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static bool
+gre_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+#if IS_ENABLED(CONFIG_NF_CT_PROTO_GRE)
+ const struct gre_base_hdr *greh;
+ struct pptp_gre_header *pgreh;
+
+ /* pgreh includes two optional 32bit fields which are not required
+ * to be there. That's where the magic '8' comes from */
+ if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
+ return false;
+
+ greh = (void *)skb->data + hdroff;
+ pgreh = (struct pptp_gre_header *)greh;
+
+ /* we only have destination manip of a packet, since 'source key'
+ * is not present in the packet itself */
+ if (maniptype != NF_NAT_MANIP_DST)
+ return true;
+
+ switch (greh->flags & GRE_VERSION) {
+ case GRE_VERSION_0:
+ /* We do not currently NAT any GREv0 packets.
+ * Try to behave like "nf_nat_proto_unknown" */
+ break;
+ case GRE_VERSION_1:
+ pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
+ pgreh->call_id = tuple->dst.u.gre.key;
+ break;
+ default:
+ pr_debug("can't nat unknown GRE version\n");
+ return false;
+ }
+#endif
+ return true;
+}
+
+bool nf_nat_l4proto_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ switch (tuple->dst.protonum) {
+ case IPPROTO_TCP:
+ return tcp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ tuple, maniptype);
+ case IPPROTO_UDP:
+ return udp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ tuple, maniptype);
+ case IPPROTO_UDPLITE:
+ return udplite_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ tuple, maniptype);
+ case IPPROTO_SCTP:
+ return sctp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ tuple, maniptype);
+ case IPPROTO_ICMP:
+ return icmp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ tuple, maniptype);
+ case IPPROTO_ICMPV6:
+ return icmpv6_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ tuple, maniptype);
+ case IPPROTO_DCCP:
+ return dccp_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ tuple, maniptype);
+ case IPPROTO_GRE:
+ return gre_manip_pkt(skb, l3proto, iphdroff, hdroff,
+ tuple, maniptype);
+ }
+
+ /* If we don't know protocol -- no error, pass it unmodified. */
+ return true;
+}
+EXPORT_SYMBOL_GPL(nf_nat_l4proto_manip_pkt);
diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c
deleted file mode 100644
index 5d849d835561..000000000000
--- a/net/netfilter/nf_nat_proto_common.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/random.h>
-#include <linux/netfilter.h>
-#include <linux/export.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- __be16 port;
-
- if (maniptype == NF_NAT_MANIP_SRC)
- port = tuple->src.u.all;
- else
- port = tuple->dst.u.all;
-
- return ntohs(port) >= ntohs(min->all) &&
- ntohs(port) <= ntohs(max->all);
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_in_range);
-
-void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct,
- u16 *rover)
-{
- unsigned int range_size, min, max, i;
- __be16 *portptr;
- u_int16_t off;
-
- if (maniptype == NF_NAT_MANIP_SRC)
- portptr = &tuple->src.u.all;
- else
- portptr = &tuple->dst.u.all;
-
- /* If no range specified... */
- if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == NF_NAT_MANIP_DST)
- return;
-
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr) < 512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min_proto.all);
- max = ntohs(range->max_proto.all);
- if (unlikely(max < min))
- swap(max, min);
- range_size = max - min + 1;
- }
-
- if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) {
- off = l3proto->secure_port(tuple, maniptype == NF_NAT_MANIP_SRC
- ? tuple->dst.u.all
- : tuple->src.u.all);
- } else if (range->flags & NF_NAT_RANGE_PROTO_RANDOM_FULLY) {
- off = prandom_u32();
- } else if (range->flags & NF_NAT_RANGE_PROTO_OFFSET) {
- off = (ntohs(*portptr) - ntohs(range->base_proto.all));
- } else {
- off = *rover;
- }
-
- for (i = 0; ; ++off) {
- *portptr = htons(min + off % range_size);
- if (++i != range_size && nf_nat_used_tuple(tuple, ct))
- continue;
- if (!(range->flags & (NF_NAT_RANGE_PROTO_RANDOM_ALL|
- NF_NAT_RANGE_PROTO_OFFSET)))
- *rover = off;
- return;
- }
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_unique_tuple);
-
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
-int nf_nat_l4proto_nlattr_to_range(struct nlattr *tb[],
- struct nf_nat_range2 *range)
-{
- if (tb[CTA_PROTONAT_PORT_MIN]) {
- range->min_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
- range->max_proto.all = range->min_proto.all;
- range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
- }
- if (tb[CTA_PROTONAT_PORT_MAX]) {
- range->max_proto.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
- range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(nf_nat_l4proto_nlattr_to_range);
-#endif
diff --git a/net/netfilter/nf_nat_proto_dccp.c b/net/netfilter/nf_nat_proto_dccp.c
deleted file mode 100644
index 67ea0d83aa5a..000000000000
--- a/net/netfilter/nf_nat_proto_dccp.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * DCCP NAT protocol helper
- *
- * Copyright (c) 2005, 2006, 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/dccp.h>
-
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static u_int16_t dccp_port_rover;
-
-static void
-dccp_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &dccp_port_rover);
-}
-
-static bool
-dccp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct dccp_hdr *hdr;
- __be16 *portptr, oldport, newport;
- int hdrsize = 8; /* DCCP connection tracking guarantees this much */
-
- if (skb->len >= hdroff + sizeof(struct dccp_hdr))
- hdrsize = sizeof(struct dccp_hdr);
-
- if (!skb_make_writable(skb, hdroff + hdrsize))
- return false;
-
- hdr = (struct dccp_hdr *)(skb->data + hdroff);
-
- if (maniptype == NF_NAT_MANIP_SRC) {
- newport = tuple->src.u.dccp.port;
- portptr = &hdr->dccph_sport;
- } else {
- newport = tuple->dst.u.dccp.port;
- portptr = &hdr->dccph_dport;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return true;
-
- l3proto->csum_update(skb, iphdroff, &hdr->dccph_checksum,
- tuple, maniptype);
- inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
- false);
- return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_dccp = {
- .l4proto = IPPROTO_DCCP,
- .manip_pkt = dccp_manip_pkt,
- .in_range = nf_nat_l4proto_in_range,
- .unique_tuple = dccp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
diff --git a/net/netfilter/nf_nat_proto_sctp.c b/net/netfilter/nf_nat_proto_sctp.c
deleted file mode 100644
index 1c5d9b65fbba..000000000000
--- a/net/netfilter/nf_nat_proto_sctp.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/sctp.h>
-#include <net/sctp/checksum.h>
-
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static u_int16_t nf_sctp_port_rover;
-
-static void
-sctp_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &nf_sctp_port_rover);
-}
-
-static bool
-sctp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct sctphdr *hdr;
- int hdrsize = 8;
-
- /* This could be an inner header returned in imcp packet; in such
- * cases we cannot update the checksum field since it is outside
- * of the 8 bytes of transport layer headers we are guaranteed.
- */
- if (skb->len >= hdroff + sizeof(*hdr))
- hdrsize = sizeof(*hdr);
-
- if (!skb_make_writable(skb, hdroff + hdrsize))
- return false;
-
- hdr = (struct sctphdr *)(skb->data + hdroff);
-
- if (maniptype == NF_NAT_MANIP_SRC) {
- /* Get rid of src port */
- hdr->source = tuple->src.u.sctp.port;
- } else {
- /* Get rid of dst port */
- hdr->dest = tuple->dst.u.sctp.port;
- }
-
- if (hdrsize < sizeof(*hdr))
- return true;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- hdr->checksum = sctp_compute_cksum(skb, hdroff);
- skb->ip_summed = CHECKSUM_NONE;
- }
-
- return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_sctp = {
- .l4proto = IPPROTO_SCTP,
- .manip_pkt = sctp_manip_pkt,
- .in_range = nf_nat_l4proto_in_range,
- .unique_tuple = sctp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
diff --git a/net/netfilter/nf_nat_proto_tcp.c b/net/netfilter/nf_nat_proto_tcp.c
deleted file mode 100644
index f15fcd475f98..000000000000
--- a/net/netfilter/nf_nat_proto_tcp.c
+++ /dev/null
@@ -1,85 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-#include <net/netfilter/nf_nat_core.h>
-
-static u16 tcp_port_rover;
-
-static void
-tcp_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &tcp_port_rover);
-}
-
-static bool
-tcp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct tcphdr *hdr;
- __be16 *portptr, newport, oldport;
- int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
- /* this could be a inner header returned in icmp packet; in such
- cases we cannot update the checksum field since it is outside of
- the 8 bytes of transport layer headers we are guaranteed */
- if (skb->len >= hdroff + sizeof(struct tcphdr))
- hdrsize = sizeof(struct tcphdr);
-
- if (!skb_make_writable(skb, hdroff + hdrsize))
- return false;
-
- hdr = (struct tcphdr *)(skb->data + hdroff);
-
- if (maniptype == NF_NAT_MANIP_SRC) {
- /* Get rid of src port */
- newport = tuple->src.u.tcp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst port */
- newport = tuple->dst.u.tcp.port;
- portptr = &hdr->dest;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return true;
-
- l3proto->csum_update(skb, iphdroff, &hdr->check, tuple, maniptype);
- inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, false);
- return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_tcp = {
- .l4proto = IPPROTO_TCP,
- .manip_pkt = tcp_manip_pkt,
- .in_range = nf_nat_l4proto_in_range,
- .unique_tuple = tcp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
diff --git a/net/netfilter/nf_nat_proto_udp.c b/net/netfilter/nf_nat_proto_udp.c
deleted file mode 100644
index 5790f70a83b2..000000000000
--- a/net/netfilter/nf_nat_proto_udp.c
+++ /dev/null
@@ -1,130 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_l3proto.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static u16 udp_port_rover;
-
-static void
-udp_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &udp_port_rover);
-}
-
-static void
-__udp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, struct udphdr *hdr,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype, bool do_csum)
-{
- __be16 *portptr, newport;
-
- if (maniptype == NF_NAT_MANIP_SRC) {
- /* Get rid of src port */
- newport = tuple->src.u.udp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst port */
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
- if (do_csum) {
- l3proto->csum_update(skb, iphdroff, &hdr->check,
- tuple, maniptype);
- inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
- false);
- if (!hdr->check)
- hdr->check = CSUM_MANGLED_0;
- }
- *portptr = newport;
-}
-
-static bool udp_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct udphdr *hdr;
- bool do_csum;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- hdr = (struct udphdr *)(skb->data + hdroff);
- do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL;
-
- __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, do_csum);
- return true;
-}
-
-#ifdef CONFIG_NF_NAT_PROTO_UDPLITE
-static u16 udplite_port_rover;
-
-static bool udplite_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- struct udphdr *hdr;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- hdr = (struct udphdr *)(skb->data + hdroff);
- __udp_manip_pkt(skb, l3proto, iphdroff, hdr, tuple, maniptype, true);
- return true;
-}
-
-static void
-udplite_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- nf_nat_l4proto_unique_tuple(l3proto, tuple, range, maniptype, ct,
- &udplite_port_rover);
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_udplite = {
- .l4proto = IPPROTO_UDPLITE,
- .manip_pkt = udplite_manip_pkt,
- .in_range = nf_nat_l4proto_in_range,
- .unique_tuple = udplite_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
-#endif /* CONFIG_NF_NAT_PROTO_UDPLITE */
-
-const struct nf_nat_l4proto nf_nat_l4proto_udp = {
- .l4proto = IPPROTO_UDP,
- .manip_pkt = udp_manip_pkt,
- .in_range = nf_nat_l4proto_in_range,
- .unique_tuple = udp_unique_tuple,
-#if IS_ENABLED(CONFIG_NF_CT_NETLINK)
- .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
-#endif
-};
diff --git a/net/netfilter/nf_nat_proto_unknown.c b/net/netfilter/nf_nat_proto_unknown.c
deleted file mode 100644
index c5db3e251232..000000000000
--- a/net/netfilter/nf_nat_proto_unknown.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/* The "unknown" protocol. This is what is used for protocols we
- * don't understand. It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_l4proto.h>
-
-static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type manip_type,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- return true;
-}
-
-static void unknown_unique_tuple(const struct nf_nat_l3proto *l3proto,
- struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range2 *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- /* Sorry: we can't help you; if it's not unique, we can't frob
- * anything.
- */
- return;
-}
-
-static bool
-unknown_manip_pkt(struct sk_buff *skb,
- const struct nf_nat_l3proto *l3proto,
- unsigned int iphdroff, unsigned int hdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- return true;
-}
-
-const struct nf_nat_l4proto nf_nat_l4proto_unknown = {
- .manip_pkt = unknown_manip_pkt,
- .in_range = unknown_in_range,
- .unique_tuple = unknown_unique_tuple,
-};
diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c
index 1f3086074981..aa1be643d7a0 100644
--- a/net/netfilter/nf_nat_sip.c
+++ b/net/netfilter/nf_nat_sip.c
@@ -18,6 +18,7 @@
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
@@ -316,6 +317,9 @@ static void nf_nat_sip_seq_adjust(struct sk_buff *skb, unsigned int protoff,
static void nf_nat_sip_expected(struct nf_conn *ct,
struct nf_conntrack_expect *exp)
{
+ struct nf_conn_help *help = nfct_help(ct->master);
+ struct nf_conntrack_expect *pair_exp;
+ int range_set_for_snat = 0;
struct nf_nat_range2 range;
/* This must be a fresh one. */
@@ -327,15 +331,42 @@ static void nf_nat_sip_expected(struct nf_conn *ct,
range.min_addr = range.max_addr = exp->saved_addr;
nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
- /* Change src to where master sends to, but only if the connection
- * actually came from the same source. */
- if (nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
+ /* Do media streams SRC manip according with the parameters
+ * found in the paired expectation.
+ */
+ if (exp->class != SIP_EXPECT_SIGNALLING) {
+ spin_lock_bh(&nf_conntrack_expect_lock);
+ hlist_for_each_entry(pair_exp, &help->expectations, lnode) {
+ if (pair_exp->tuple.src.l3num == nf_ct_l3num(ct) &&
+ pair_exp->tuple.dst.protonum == ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum &&
+ nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3, &pair_exp->saved_addr) &&
+ ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all == pair_exp->saved_proto.all) {
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+ range.min_proto.all = range.max_proto.all = pair_exp->tuple.dst.u.all;
+ range.min_addr = range.max_addr = pair_exp->tuple.dst.u3;
+ range_set_for_snat = 1;
+ break;
+ }
+ }
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+ }
+
+ /* When no paired expectation has been found, change src to
+ * where master sends to, but only if the connection actually came
+ * from the same source.
+ */
+ if (!range_set_for_snat &&
+ nf_inet_addr_cmp(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3,
&ct->master->tuplehash[exp->dir].tuple.src.u3)) {
range.flags = NF_NAT_RANGE_MAP_IPS;
range.min_addr = range.max_addr
= ct->master->tuplehash[!exp->dir].tuple.dst.u3;
- nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+ range_set_for_snat = 1;
}
+
+ /* Perform SRC manip. */
+ if (range_set_for_snat)
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
}
static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6e548d7c9f67..fec814dace5a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2295,15 +2295,52 @@ struct nft_rule_dump_ctx {
char *chain;
};
+static int __nf_tables_dump_rules(struct sk_buff *skb,
+ unsigned int *idx,
+ struct netlink_callback *cb,
+ const struct nft_table *table,
+ const struct nft_chain *chain)
+{
+ struct net *net = sock_net(skb->sk);
+ unsigned int s_idx = cb->args[0];
+ const struct nft_rule *rule;
+ int rc = 1;
+
+ list_for_each_entry_rcu(rule, &chain->rules, list) {
+ if (!nft_is_active(net, rule))
+ goto cont;
+ if (*idx < s_idx)
+ goto cont;
+ if (*idx > s_idx) {
+ memset(&cb->args[1], 0,
+ sizeof(cb->args) - sizeof(cb->args[0]));
+ }
+ if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NFT_MSG_NEWRULE,
+ NLM_F_MULTI | NLM_F_APPEND,
+ table->family,
+ table, chain, rule) < 0)
+ goto out_unfinished;
+
+ nl_dump_check_consistent(cb, nlmsg_hdr(skb));
+cont:
+ (*idx)++;
+ }
+ rc = 0;
+out_unfinished:
+ cb->args[0] = *idx;
+ return rc;
+}
+
static int nf_tables_dump_rules(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
const struct nft_rule_dump_ctx *ctx = cb->data;
- const struct nft_table *table;
+ struct nft_table *table;
const struct nft_chain *chain;
- const struct nft_rule *rule;
- unsigned int idx = 0, s_idx = cb->args[0];
+ unsigned int idx = 0;
struct net *net = sock_net(skb->sk);
int family = nfmsg->nfgen_family;
@@ -2317,37 +2354,34 @@ static int nf_tables_dump_rules(struct sk_buff *skb,
if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0)
continue;
- list_for_each_entry_rcu(chain, &table->chains, list) {
- if (ctx && ctx->chain &&
- strcmp(ctx->chain, chain->name) != 0)
- continue;
+ if (ctx && ctx->chain) {
+ struct rhlist_head *list, *tmp;
- list_for_each_entry_rcu(rule, &chain->rules, list) {
- if (!nft_is_active(net, rule))
- goto cont;
- if (idx < s_idx)
- goto cont;
- if (idx > s_idx)
- memset(&cb->args[1], 0,
- sizeof(cb->args) - sizeof(cb->args[0]));
- if (nf_tables_fill_rule_info(skb, net, NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NFT_MSG_NEWRULE,
- NLM_F_MULTI | NLM_F_APPEND,
- table->family,
- table, chain, rule) < 0)
- goto done;
-
- nl_dump_check_consistent(cb, nlmsg_hdr(skb));
-cont:
- idx++;
+ list = rhltable_lookup(&table->chains_ht, ctx->chain,
+ nft_chain_ht_params);
+ if (!list)
+ goto done;
+
+ rhl_for_each_entry_rcu(chain, tmp, list, rhlhead) {
+ if (!nft_is_active(net, chain))
+ continue;
+ __nf_tables_dump_rules(skb, &idx,
+ cb, table, chain);
+ break;
}
+ goto done;
}
+
+ list_for_each_entry_rcu(chain, &table->chains, list) {
+ if (__nf_tables_dump_rules(skb, &idx, cb, table, chain))
+ goto done;
+ }
+
+ if (ctx && ctx->table)
+ break;
}
done:
rcu_read_unlock();
-
- cb->args[0] = idx;
return skb->len;
}
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 332c69d27b47..b1f9c5303f02 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -148,7 +148,7 @@ static void
instance_put(struct nfulnl_instance *inst)
{
if (inst && refcount_dec_and_test(&inst->use))
- call_rcu_bh(&inst->rcu, nfulnl_instance_free_rcu);
+ call_rcu(&inst->rcu, nfulnl_instance_free_rcu);
}
static void nfulnl_timer(struct timer_list *t);
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 1ad4017f9b73..28e27a32d9b9 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -260,7 +260,7 @@ static inline void
dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
{
hlist_del_rcu(&ent->node);
- call_rcu_bh(&ent->rcu, dsthash_free_rcu);
+ call_rcu(&ent->rcu, dsthash_free_rcu);
ht->count--;
}
static void htable_gc(struct work_struct *work);
@@ -1326,7 +1326,7 @@ static void __exit hashlimit_mt_exit(void)
xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
unregister_pernet_subsys(&hashlimit_net_ops);
- rcu_barrier_bh();
+ rcu_barrier();
kmem_cache_destroy(hashlimit_cachep);
}
OpenPOWER on IntegriCloud