From 31875d4970baa02e08b719fdfea6f43e9e2f7e77 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Thu, 24 May 2018 23:40:12 +0300 Subject: ipvs: register conntrack hooks for ftp ip_vs_ftp requires conntrack modules for mangling of FTP command responses in passive mode. Make sure the conntrack hooks are registered when real servers use NAT method in FTP virtual service. The hooks will be registered while the service is present. Fixes: 0c66dc1ea3f0 ("netfilter: conntrack: register hooks in netns when needed by ruleset") Signed-off-by: Julian Anastasov Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/net/ip_vs.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index eb0bec043c96..ae72d9057eda 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -643,6 +643,7 @@ struct ip_vs_service { /* alternate persistence engine */ struct ip_vs_pe __rcu *pe; + int conntrack_afmask; struct rcu_head rcu_head; }; @@ -1620,6 +1621,35 @@ static inline bool ip_vs_conn_uses_conntrack(struct ip_vs_conn *cp, return false; } +static inline int ip_vs_register_conntrack(struct ip_vs_service *svc) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + int afmask = (svc->af == AF_INET6) ? 2 : 1; + int ret = 0; + + if (!(svc->conntrack_afmask & afmask)) { + ret = nf_ct_netns_get(svc->ipvs->net, svc->af); + if (ret >= 0) + svc->conntrack_afmask |= afmask; + } + return ret; +#else + return 0; +#endif +} + +static inline void ip_vs_unregister_conntrack(struct ip_vs_service *svc) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + int afmask = (svc->af == AF_INET6) ? 2 : 1; + + if (svc->conntrack_afmask & afmask) { + nf_ct_netns_put(svc->ipvs->net, svc->af); + svc->conntrack_afmask &= ~afmask; + } +#endif +} + static inline int ip_vs_dest_conn_overhead(struct ip_vs_dest *dest) { -- cgit v1.2.3 From bd975e691486ba52790ba23cc9b4fecab7bc0d31 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Thu, 31 May 2018 18:45:21 +0200 Subject: netfilter: ipset: List timing out entries with "timeout 1" instead of zero When listing sets with timeout support, there's a probability that just timing out entries with "0" timeout value is listed/saved. However when restoring the saved list, the zero timeout value means permanent elelements. The new behaviour is that timing out entries are listed with "timeout 1" instead of zero. Fixes netfilter bugzilla #1258. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_timeout.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index bfb3531fd88a..7ad8ddf9ca8a 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -65,8 +65,14 @@ ip_set_timeout_set(unsigned long *timeout, u32 value) static inline u32 ip_set_timeout_get(const unsigned long *timeout) { - return *timeout == IPSET_ELEM_PERMANENT ? 0 : - jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; + u32 t; + + if (*timeout == IPSET_ELEM_PERMANENT) + return 0; + + t = jiffies_to_msecs(*timeout - jiffies)/MSEC_PER_SEC; + /* Zero value in userspace means no timeout */ + return t == 0 ? 1 : t; } #endif /* __KERNEL__ */ -- cgit v1.2.3 From 30a2e107108c66cbcb7776b58cbcd7db223a1cc9 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Tue, 5 Jun 2018 11:53:35 +0200 Subject: netfilter: ipset: Limit max timeout value Due to the negative value condition in msecs_to_jiffies(), the real max possible timeout value must be set to (UINT_MAX >> 1)/MSEC_PER_SEC. Neutron Soutmun proposed the proper fix, but an insufficient one was applied, see https://patchwork.ozlabs.org/patch/400405/. Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set_timeout.h | 10 ++++++---- net/netfilter/xt_set.c | 8 ++++---- 2 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set_timeout.h b/include/linux/netfilter/ipset/ip_set_timeout.h index 7ad8ddf9ca8a..8ce271e187b6 100644 --- a/include/linux/netfilter/ipset/ip_set_timeout.h +++ b/include/linux/netfilter/ipset/ip_set_timeout.h @@ -23,6 +23,9 @@ /* Set is defined with timeout support: timeout value may be 0 */ #define IPSET_NO_TIMEOUT UINT_MAX +/* Max timeout value, see msecs_to_jiffies() in jiffies.h */ +#define IPSET_MAX_TIMEOUT (UINT_MAX >> 1)/MSEC_PER_SEC + #define ip_set_adt_opt_timeout(opt, set) \ ((opt)->ext.timeout != IPSET_NO_TIMEOUT ? (opt)->ext.timeout : (set)->timeout) @@ -32,11 +35,10 @@ ip_set_timeout_uget(struct nlattr *tb) unsigned int timeout = ip_set_get_h32(tb); /* Normalize to fit into jiffies */ - if (timeout > UINT_MAX/MSEC_PER_SEC) - timeout = UINT_MAX/MSEC_PER_SEC; + if (timeout > IPSET_MAX_TIMEOUT) + timeout = IPSET_MAX_TIMEOUT; - /* Userspace supplied TIMEOUT parameter: adjust crazy size */ - return timeout == IPSET_NO_TIMEOUT ? IPSET_NO_TIMEOUT - 1 : timeout; + return timeout; } static inline bool diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 07af7dbf7a30..bf2890b13212 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -372,8 +372,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par) /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && - add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) - add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; + add_opt.ext.timeout > IPSET_MAX_TIMEOUT) + add_opt.ext.timeout = IPSET_MAX_TIMEOUT; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) @@ -407,8 +407,8 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par) /* Normalize to fit into jiffies */ if (add_opt.ext.timeout != IPSET_NO_TIMEOUT && - add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC) - add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC; + add_opt.ext.timeout > IPSET_MAX_TIMEOUT) + add_opt.ext.timeout = IPSET_MAX_TIMEOUT; if (info->add_set.index != IPSET_INVALID_ID) ip_set_add(info->add_set.index, skb, par, &add_opt); if (info->del_set.index != IPSET_INVALID_ID) -- cgit v1.2.3 From 64e6dd1fb2f1ed799f317dc34aa6e251c64f4981 Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Thu, 7 Jun 2018 18:15:14 +0800 Subject: netfilter: nf_conntrack: Increase __IPS_MAX_BIT with new bit IPS_OFFLOAD_BIT The __IPS_MAX_BIT is used in __ctnetlink_change_status as the max bit value. When add new bit IPS_OFFLOAD_BIT whose value is 14, we should increase the __IPS_MAX_BIT too, from 14 to 15. There is no any bug in current codes, although it lost one loop in __ctnetlink_change_status. Because the new bit IPS_OFFLOAD_BIT belongs the IPS_UNCHANGEABLE_MASK. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index c712eb6879f1..336014bf8868 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -112,7 +112,7 @@ enum ip_conntrack_status { IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_OFFLOAD), - __IPS_MAX_BIT = 14, + __IPS_MAX_BIT = 15, }; /* Connection tracking event types */ -- cgit v1.2.3 From d8e87fc6d11c31525430a388317b52f4a98a5328 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Thu, 7 Jun 2018 19:38:09 +0000 Subject: netfilter: remove include/net/netfilter/nft_dup.h include/net/netfilter/nft_dup.h was introduced in d877f07112f1 ("netfilter: nf_tables: add nft_dup expression") but was never user since this date. Furthermore, the only struct in this file is unused elsewhere. Signed-off-by: Corentin Labbe Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nft_dup.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 include/net/netfilter/nft_dup.h (limited to 'include') diff --git a/include/net/netfilter/nft_dup.h b/include/net/netfilter/nft_dup.h deleted file mode 100644 index 4d9d512984b2..000000000000 --- a/include/net/netfilter/nft_dup.h +++ /dev/null @@ -1,10 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _NFT_DUP_H_ -#define _NFT_DUP_H_ - -struct nft_dup_inet { - enum nft_registers sreg_addr:8; - enum nft_registers sreg_dev:8; -}; - -#endif /* _NFT_DUP_H_ */ -- cgit v1.2.3 From f6fadff33e8b09373eedf99822b89d9dd84545b8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 11 Jun 2018 23:22:04 +0200 Subject: tls: fix NULL pointer dereference on poll While hacking on kTLS, I ran into the following panic from an unprivileged netserver / netperf TCP session: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 PGD 800000037f378067 P4D 800000037f378067 PUD 3c0e61067 PMD 0 Oops: 0010 [#1] SMP KASAN PTI CPU: 1 PID: 2289 Comm: netserver Not tainted 4.17.0+ #139 Hardware name: LENOVO 20FBCTO1WW/20FBCTO1WW, BIOS N1FET47W (1.21 ) 11/28/2016 RIP: 0010: (null) Code: Bad RIP value. RSP: 0018:ffff88036abcf740 EFLAGS: 00010246 RAX: dffffc0000000000 RBX: ffff88036f5f6800 RCX: 1ffff1006debed26 RDX: ffff88036abcf920 RSI: ffff8803cb1a4f00 RDI: ffff8803c258c280 RBP: ffff8803c258c280 R08: ffff8803c258c280 R09: ffffed006f559d48 R10: ffff88037aacea43 R11: ffffed006f559d49 R12: ffff8803c258c280 R13: ffff8803cb1a4f20 R14: 00000000000000db R15: ffffffffc168a350 FS: 00007f7e631f4700(0000) GS:ffff8803d1c80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffffffffffffffd6 CR3: 00000003ccf64005 CR4: 00000000003606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? tls_sw_poll+0xa4/0x160 [tls] ? sock_poll+0x20a/0x680 ? do_select+0x77b/0x11a0 ? poll_schedule_timeout.constprop.12+0x130/0x130 ? pick_link+0xb00/0xb00 ? read_word_at_a_time+0x13/0x20 ? vfs_poll+0x270/0x270 ? deref_stack_reg+0xad/0xe0 ? __read_once_size_nocheck.constprop.6+0x10/0x10 [...] Debugging further, it turns out that calling into ctx->sk_poll() is invalid since sk_poll itself is NULL which was saved from the original TCP socket in order for tls_sw_poll() to invoke it. Looks like the recent conversion from poll to poll_mask callback started in 152524231023 ("net: add support for ->poll_mask in proto_ops") missed to eventually convert kTLS, too: TCP's ->poll was converted over to the ->poll_mask in commit 2c7d3dacebd4 ("net/tcp: convert to ->poll_mask") and therefore kTLS wrongly saved the ->poll old one which is now NULL. Convert kTLS over to use ->poll_mask instead. Also instead of POLLIN | POLLRDNORM use the proper EPOLLIN | EPOLLRDNORM bits as the case in tcp_poll_mask() as well that is mangled here. Fixes: 2c7d3dacebd4 ("net/tcp: convert to ->poll_mask") Signed-off-by: Daniel Borkmann Cc: Christoph Hellwig Cc: Dave Watson Tested-by: Dave Watson Signed-off-by: David S. Miller --- include/net/tls.h | 6 ++---- net/tls/tls_main.c | 2 +- net/tls/tls_sw.c | 19 +++++++++---------- 3 files changed, 12 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index 70c273777fe9..7f84ea3e217c 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -109,8 +109,7 @@ struct tls_sw_context_rx { struct strparser strp; void (*saved_data_ready)(struct sock *sk); - unsigned int (*sk_poll)(struct file *file, struct socket *sock, - struct poll_table_struct *wait); + __poll_t (*sk_poll_mask)(struct socket *sock, __poll_t events); struct sk_buff *recv_pkt; u8 control; bool decrypted; @@ -225,8 +224,7 @@ void tls_sw_free_resources_tx(struct sock *sk); void tls_sw_free_resources_rx(struct sock *sk); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len); -unsigned int tls_sw_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); +__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, size_t len, unsigned int flags); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 301f22430469..a127d61e8af9 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -712,7 +712,7 @@ static int __init tls_register(void) build_protos(tls_prots[TLSV4], &tcp_prot); tls_sw_proto_ops = inet_stream_ops; - tls_sw_proto_ops.poll = tls_sw_poll; + tls_sw_proto_ops.poll_mask = tls_sw_poll_mask; tls_sw_proto_ops.splice_read = tls_sw_splice_read; #ifdef CONFIG_TLS_DEVICE diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 8ca57d01b18f..34895b7c132d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -915,23 +915,22 @@ splice_read_end: return copied ? : err; } -unsigned int tls_sw_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait) +__poll_t tls_sw_poll_mask(struct socket *sock, __poll_t events) { - unsigned int ret; struct sock *sk = sock->sk; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); + __poll_t mask; - /* Grab POLLOUT and POLLHUP from the underlying socket */ - ret = ctx->sk_poll(file, sock, wait); + /* Grab EPOLLOUT and EPOLLHUP from the underlying socket */ + mask = ctx->sk_poll_mask(sock, events); - /* Clear POLLIN bits, and set based on recv_pkt */ - ret &= ~(POLLIN | POLLRDNORM); + /* Clear EPOLLIN bits, and set based on recv_pkt */ + mask &= ~(EPOLLIN | EPOLLRDNORM); if (ctx->recv_pkt) - ret |= POLLIN | POLLRDNORM; + mask |= EPOLLIN | EPOLLRDNORM; - return ret; + return mask; } static int tls_read_size(struct strparser *strp, struct sk_buff *skb) @@ -1188,7 +1187,7 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) sk->sk_data_ready = tls_data_ready; write_unlock_bh(&sk->sk_callback_lock); - sw_ctx_rx->sk_poll = sk->sk_socket->ops->poll; + sw_ctx_rx->sk_poll_mask = sk->sk_socket->ops->poll_mask; strp_check_rcv(&sw_ctx_rx->strp); } -- cgit v1.2.3 From 155fb5c5fae72d1faa2067d6fa0a5be12279c689 Mon Sep 17 00:00:00 2001 From: Prashant Bhole Date: Mon, 28 May 2018 18:14:49 +0900 Subject: netfilter: fix null-ptr-deref in nf_nat_decode_session Add null check for nat_hook in nf_nat_decode_session() [ 195.648098] UBSAN: Undefined behaviour in ./include/linux/netfilter.h:348:14 [ 195.651366] BUG: KASAN: null-ptr-deref in __xfrm_policy_check+0x208/0x1d70 [ 195.653888] member access within null pointer of type 'struct nf_nat_hook' [ 195.653896] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.17.0-rc6+ #5 [ 195.656320] Read of size 8 at addr 0000000000000008 by task ping/2469 [ 195.658715] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 195.658721] Call Trace: [ 195.661087] [ 195.669341] [ 195.670574] dump_stack+0xc6/0x150 [ 195.672156] ? dump_stack_print_info.cold.0+0x1b/0x1b [ 195.674121] ? ubsan_prologue+0x31/0x92 [ 195.676546] ubsan_epilogue+0x9/0x49 [ 195.678159] handle_null_ptr_deref+0x11a/0x130 [ 195.679800] ? sprint_OID+0x1a0/0x1a0 [ 195.681322] __ubsan_handle_type_mismatch_v1+0xd5/0x11d [ 195.683146] ? ubsan_prologue+0x92/0x92 [ 195.684642] __xfrm_policy_check+0x18ef/0x1d70 [ 195.686294] ? rt_cache_valid+0x118/0x180 [ 195.687804] ? __xfrm_route_forward+0x410/0x410 [ 195.689463] ? fib_multipath_hash+0x700/0x700 [ 195.691109] ? kvm_sched_clock_read+0x23/0x40 [ 195.692805] ? pvclock_clocksource_read+0xf6/0x280 [ 195.694409] ? graph_lock+0xa0/0xa0 [ 195.695824] ? pvclock_clocksource_read+0xf6/0x280 [ 195.697508] ? pvclock_read_flags+0x80/0x80 [ 195.698981] ? kvm_sched_clock_read+0x23/0x40 [ 195.700347] ? sched_clock+0x5/0x10 [ 195.701525] ? sched_clock_cpu+0x18/0x1a0 [ 195.702846] tcp_v4_rcv+0x1d32/0x1de0 [ 195.704115] ? lock_repin_lock+0x70/0x270 [ 195.707072] ? pvclock_read_flags+0x80/0x80 [ 195.709302] ? tcp_v4_early_demux+0x4b0/0x4b0 [ 195.711833] ? lock_acquire+0x195/0x380 [ 195.714222] ? ip_local_deliver_finish+0xfc/0x770 [ 195.716967] ? raw_rcv+0x2b0/0x2b0 [ 195.718856] ? lock_release+0xa00/0xa00 [ 195.720938] ip_local_deliver_finish+0x1b9/0x770 [...] Fixes: 2c205dd3981f ("netfilter: add struct nf_nat_hook and use it") Signed-off-by: Prashant Bhole Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 04551af2ff23..dd2052f0efb7 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -345,7 +345,7 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) rcu_read_lock(); nat_hook = rcu_dereference(nf_nat_hook); - if (nat_hook->decode_session) + if (nat_hook && nat_hook->decode_session) nat_hook->decode_session(skb, fl); rcu_read_unlock(); #endif -- cgit v1.2.3 From 215a31f19dedd4e92a67cf5a9717ee898d012b3a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 11 Jun 2018 17:18:29 +0200 Subject: netfilter: nft_dynset: do not reject set updates with NFT_SET_EVAL NFT_SET_EVAL is signalling the kernel that this sets can be updated from the evaluation path, even if there are no expressions attached to the element. Otherwise, set updates with no expressions fail. Update description to describe the right semantics. Fixes: 22fe54d5fefc ("netfilter: nf_tables: add support for dynamic set updates") Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_tables.h | 2 +- net/netfilter/nft_dynset.c | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c9bf74b94f37..89438e68dc03 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -266,7 +266,7 @@ enum nft_rule_compat_attributes { * @NFT_SET_INTERVAL: set contains intervals * @NFT_SET_MAP: set is used as a dictionary * @NFT_SET_TIMEOUT: set uses timeouts - * @NFT_SET_EVAL: set contains expressions for evaluation + * @NFT_SET_EVAL: set can be updated from the evaluation path * @NFT_SET_OBJECT: set contains stateful objects */ enum nft_set_flags { diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 4d49529cff61..27d7e4598ab6 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -203,9 +203,7 @@ static int nft_dynset_init(const struct nft_ctx *ctx, goto err1; set->ops->gc_init(set); } - - } else if (set->flags & NFT_SET_EVAL) - return -EINVAL; + } nft_set_ext_prepare(&priv->tmpl); nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen); -- cgit v1.2.3 From 21ba8847f857028dc83a0f341e16ecc616e34740 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Tue, 12 Jun 2018 10:51:34 -0700 Subject: netfilter: nf_conncount: Fix garbage collection with zones MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, we use check_hlist() for garbage colleciton. However, we use the ‘zone’ from the counted entry to query the existence of existing entries in the hlist. This could be wrong when they are in different zones, and this patch fixes this issue. Fixes: e59ea3df3fc2 ("netfilter: xt_connlimit: honor conntrack zone if available") Signed-off-by: Yi-Hung Wei Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_conntrack_count.h | 3 ++- net/netfilter/nf_conncount.c | 13 +++++++++---- net/netfilter/nft_connlimit.c | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_conntrack_count.h b/include/net/netfilter/nf_conntrack_count.h index 1910b6572430..3a188a0923a3 100644 --- a/include/net/netfilter/nf_conntrack_count.h +++ b/include/net/netfilter/nf_conntrack_count.h @@ -20,7 +20,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, bool *addit); bool nf_conncount_add(struct hlist_head *head, - const struct nf_conntrack_tuple *tuple); + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone); void nf_conncount_cache_free(struct hlist_head *hhead); diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 3b5059a8dcdd..d8383609fe28 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -46,6 +46,7 @@ struct nf_conncount_tuple { struct hlist_node node; struct nf_conntrack_tuple tuple; + struct nf_conntrack_zone zone; }; struct nf_conncount_rb { @@ -80,7 +81,8 @@ static int key_diff(const u32 *a, const u32 *b, unsigned int klen) } bool nf_conncount_add(struct hlist_head *head, - const struct nf_conntrack_tuple *tuple) + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_zone *zone) { struct nf_conncount_tuple *conn; @@ -88,6 +90,7 @@ bool nf_conncount_add(struct hlist_head *head, if (conn == NULL) return false; conn->tuple = *tuple; + conn->zone = *zone; hlist_add_head(&conn->node, head); return true; } @@ -108,7 +111,7 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { - found = nf_conntrack_find_get(net, zone, &conn->tuple); + found = nf_conntrack_find_get(net, &conn->zone, &conn->tuple); if (found == NULL) { hlist_del(&conn->node); kmem_cache_free(conncount_conn_cachep, conn); @@ -117,7 +120,8 @@ unsigned int nf_conncount_lookup(struct net *net, struct hlist_head *head, found_ct = nf_ct_tuplehash_to_ctrack(found); - if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple)) { + if (tuple && nf_ct_tuple_equal(&conn->tuple, tuple) && + nf_ct_zone_equal(found_ct, zone, zone->dir)) { /* * Just to be sure we have it only once in the list. * We should not see tuples twice unless someone hooks @@ -196,7 +200,7 @@ count_tree(struct net *net, struct rb_root *root, if (!addit) return count; - if (!nf_conncount_add(&rbconn->hhead, tuple)) + if (!nf_conncount_add(&rbconn->hhead, tuple, zone)) return 0; /* hotdrop */ return count + 1; @@ -238,6 +242,7 @@ count_tree(struct net *net, struct rb_root *root, } conn->tuple = *tuple; + conn->zone = *zone; memcpy(rbconn->key, key, sizeof(u32) * keylen); INIT_HLIST_HEAD(&rbconn->hhead); diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 50c068d660e5..a832c59f0a9c 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -52,7 +52,7 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv, if (!addit) goto out; - if (!nf_conncount_add(&priv->hhead, tuple_ptr)) { + if (!nf_conncount_add(&priv->hhead, tuple_ptr, zone)) { regs->verdict.code = NF_DROP; spin_unlock_bh(&priv->lock); return; -- cgit v1.2.3 From 995191220056300c51ed870a5d5321f91f3eef89 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 14 Jun 2018 07:37:02 +0800 Subject: sctp: define sctp_packet_gso_append to build GSO frames Now sctp GSO uses skb_gro_receive() to append the data into head skb frag_list. However it actually only needs very few code from skb_gro_receive(). Besides, NAPI_GRO_CB has to be set while most of its members are not needed here. This patch is to add sctp_packet_gso_append() to build GSO frames instead of skb_gro_receive(), and it would avoid many unnecessary checks and make the code clearer. Note that sctp will use page frags instead of frag_list to build GSO frames in another patch. But it may take time, as sctp's GSO frames may have different size. skb_segment() can only split it into the frags with the same size, which would break the border of sctp chunks. Signed-off-by: Xin Long Reviewed-by: Marcelo Ricardo Leitner Acked-by: Neil Horman Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 5 +++++ net/sctp/output.c | 28 ++++++++++++++++++---------- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ebf809eed33a..dbe1b911a24d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1133,6 +1133,11 @@ struct sctp_input_cb { }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) +struct sctp_output_cb { + struct sk_buff *last; +}; +#define SCTP_OUTPUT_CB(__skb) ((struct sctp_output_cb *)&((__skb)->cb[0])) + static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb) { const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; diff --git a/net/sctp/output.c b/net/sctp/output.c index e672dee302c7..7f849b01ec8e 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -409,6 +409,21 @@ static void sctp_packet_set_owner_w(struct sk_buff *skb, struct sock *sk) refcount_inc(&sk->sk_wmem_alloc); } +static void sctp_packet_gso_append(struct sk_buff *head, struct sk_buff *skb) +{ + if (SCTP_OUTPUT_CB(head)->last == head) + skb_shinfo(head)->frag_list = skb; + else + SCTP_OUTPUT_CB(head)->last->next = skb; + SCTP_OUTPUT_CB(head)->last = skb; + + head->truesize += skb->truesize; + head->data_len += skb->len; + head->len += skb->len; + + __skb_header_release(skb); +} + static int sctp_packet_pack(struct sctp_packet *packet, struct sk_buff *head, int gso, gfp_t gfp) { @@ -422,7 +437,7 @@ static int sctp_packet_pack(struct sctp_packet *packet, if (gso) { skb_shinfo(head)->gso_type = sk->sk_gso_type; - NAPI_GRO_CB(head)->last = head; + SCTP_OUTPUT_CB(head)->last = head; } else { nskb = head; pkt_size = packet->size; @@ -503,15 +518,8 @@ merge: &packet->chunk_list); } - if (gso) { - if (skb_gro_receive(&head, nskb)) { - kfree_skb(nskb); - return 0; - } - if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= - sk->sk_gso_max_segs)) - return 0; - } + if (gso) + sctp_packet_gso_append(head, nskb); pkt_count++; } while (!list_empty(&packet->chunk_list)); -- cgit v1.2.3 From badbc27df3a934e0025be238754f9ca6a852c006 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Fri, 8 Jun 2018 10:04:47 +0300 Subject: nl80211: fix some kernel doc tag mistakes There is a bunch of tags marking constants with &, which means struct or enum name. Replace them with %, which is the correct tag for constants. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 28b36545de24..27e4e441caac 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -981,18 +981,18 @@ * only the %NL80211_ATTR_IE data is used and updated with this command. * * @NL80211_CMD_SET_PMK: For offloaded 4-Way handshake, set the PMK or PMK-R0 - * for the given authenticator address (specified with &NL80211_ATTR_MAC). - * When &NL80211_ATTR_PMKR0_NAME is set, &NL80211_ATTR_PMK specifies the + * for the given authenticator address (specified with %NL80211_ATTR_MAC). + * When %NL80211_ATTR_PMKR0_NAME is set, %NL80211_ATTR_PMK specifies the * PMK-R0, otherwise it specifies the PMK. * @NL80211_CMD_DEL_PMK: For offloaded 4-Way handshake, delete the previously * configured PMK for the authenticator address identified by - * &NL80211_ATTR_MAC. + * %NL80211_ATTR_MAC. * @NL80211_CMD_PORT_AUTHORIZED: An event that indicates that the 4 way * handshake was completed successfully by the driver. The BSSID is - * specified with &NL80211_ATTR_MAC. Drivers that support 4 way handshake + * specified with %NL80211_ATTR_MAC. Drivers that support 4 way handshake * offload should send this event after indicating 802.11 association with - * &NL80211_CMD_CONNECT or &NL80211_CMD_ROAM. If the 4 way handshake failed - * &NL80211_CMD_DISCONNECT should be indicated instead. + * %NL80211_CMD_CONNECT or %NL80211_CMD_ROAM. If the 4 way handshake failed + * %NL80211_CMD_DISCONNECT should be indicated instead. * * @NL80211_CMD_CONTROL_PORT_FRAME: Control Port (e.g. PAE) frame TX request * and RX notification. This command is used both as a request to transmit @@ -1029,9 +1029,9 @@ * initiated the connection through the connect request. * * @NL80211_CMD_STA_OPMODE_CHANGED: An event that notify station's - * ht opmode or vht opmode changes using any of &NL80211_ATTR_SMPS_MODE, - * &NL80211_ATTR_CHANNEL_WIDTH,&NL80211_ATTR_NSS attributes with its - * address(specified in &NL80211_ATTR_MAC). + * ht opmode or vht opmode changes using any of %NL80211_ATTR_SMPS_MODE, + * %NL80211_ATTR_CHANNEL_WIDTH,%NL80211_ATTR_NSS attributes with its + * address(specified in %NL80211_ATTR_MAC). * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use @@ -2218,7 +2218,7 @@ enum nl80211_commands { * @NL80211_ATTR_EXTERNAL_AUTH_ACTION: Identify the requested external * authentication operation (u32 attribute with an * &enum nl80211_external_auth_action value). This is used with the - * &NL80211_CMD_EXTERNAL_AUTH request event. + * %NL80211_CMD_EXTERNAL_AUTH request event. * @NL80211_ATTR_EXTERNAL_AUTH_SUPPORT: Flag attribute indicating that the user * space supports external authentication. This attribute shall be used * only with %NL80211_CMD_CONNECT request. The driver may offload @@ -3491,7 +3491,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated * base on contiguous rules and wider channels will be allowed to cross * multiple contiguous/overlapping frequency ranges. - * @NL80211_RRF_IR_CONCURRENT: See &NL80211_FREQUENCY_ATTR_IR_CONCURRENT + * @NL80211_RRF_IR_CONCURRENT: See %NL80211_FREQUENCY_ATTR_IR_CONCURRENT * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed @@ -5643,11 +5643,11 @@ enum nl80211_nan_func_attributes { * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set. * This is a flag. * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if - * &NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. + * %NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if - * &NL80211_NAN_SRF_BF is present. This is a u8. + * %NL80211_NAN_SRF_BF is present. This is a u8. * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if - * and only if &NL80211_NAN_SRF_BF isn't present. This is a nested + * and only if %NL80211_NAN_SRF_BF isn't present. This is a nested * attribute. Each nested attribute is a MAC address. * @NUM_NL80211_NAN_SRF_ATTR: internal * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute -- cgit v1.2.3