summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/Kconfig26
-rw-r--r--net/atm/clip.c6
-rw-r--r--net/bpf/test_run.c43
-rw-r--r--net/core/bpf_sk_storage.c2
-rw-r--r--net/core/filter.c12
-rw-r--r--net/core/net-sysfs.c25
-rw-r--r--net/core/page_pool.c71
-rw-r--r--net/core/rtnetlink.c23
-rw-r--r--net/core/skmsg.c13
-rw-r--r--net/core/xdp.c5
-rw-r--r--net/dsa/tag_ocelot.c14
-rw-r--r--net/ipv4/Kconfig218
-rw-r--r--net/ipv4/fib_frontend.c10
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv4/ip_input.c35
-rw-r--r--net/ipv4/ip_output.c4
-rw-r--r--net/ipv4/ip_tunnel_core.c140
-rw-r--r--net/ipv4/ipconfig.c3
-rw-r--r--net/ipv4/netfilter/nf_flow_table_ipv4.c2
-rw-r--r--net/ipv4/nexthop.c1
-rw-r--r--net/ipv4/route.c42
-rw-r--r--net/ipv4/sysctl_net_ipv4.c6
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_ulp.c3
-rw-r--r--net/ipv4/udp.c27
-rw-r--r--net/ipv4/xfrm4_output.c2
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_input.c26
-rw-r--r--net/ipv6/ip6_output.c4
-rw-r--r--net/ipv6/ipv6_sockglue.c4
-rw-r--r--net/ipv6/netfilter/Kconfig28
-rw-r--r--net/ipv6/netfilter/nf_flow_table_ipv6.c2
-rw-r--r--net/ipv6/route.c5
-rw-r--r--net/ipv6/seg6_local.c22
-rw-r--r--net/ipv6/xfrm6_output.c2
-rw-r--r--net/mac80211/Makefile3
-rw-r--r--net/mac80211/airtime.c597
-rw-r--r--net/mac80211/debugfs.c88
-rw-r--r--net/mac80211/debugfs_sta.c43
-rw-r--r--net/mac80211/ieee80211_i.h8
-rw-r--r--net/mac80211/main.c10
-rw-r--r--net/mac80211/sta_info.c52
-rw-r--r--net/mac80211/sta_info.h12
-rw-r--r--net/mac80211/status.c39
-rw-r--r--net/mac80211/tx.c72
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c23
-rw-r--r--net/netfilter/nf_flow_table_core.c8
-rw-r--r--net/netfilter/nf_flow_table_inet.c25
-rw-r--r--net/netfilter/nf_flow_table_offload.c179
-rw-r--r--net/netfilter/nf_tables_api.c49
-rw-r--r--net/netfilter/nf_tables_offload.c95
-rw-r--r--net/netfilter/nft_cmp.c6
-rw-r--r--net/netfilter/nft_meta.c18
-rw-r--r--net/netfilter/nft_payload.c94
-rw-r--r--net/netfilter/xt_time.c19
-rw-r--r--net/nfc/hci/Kconfig14
-rw-r--r--net/sched/act_ct.c1
-rw-r--r--net/sched/act_mpls.c1
-rw-r--r--net/sched/act_pedit.c12
-rw-r--r--net/sched/act_tunnel_key.c207
-rw-r--r--net/sched/cls_flower.c254
-rw-r--r--net/sched/sch_pie.c120
-rw-r--r--net/sched/sch_taprio.c28
-rw-r--r--net/tipc/bcast.c4
-rw-r--r--net/tipc/bcast.h2
-rw-r--r--net/tipc/link.c2
-rw-r--r--net/tipc/name_table.c51
-rw-r--r--net/tipc/name_table.h4
-rw-r--r--net/tipc/node.c8
-rw-r--r--net/tls/tls_main.c1
-rw-r--r--net/tls/tls_sw.c11
-rw-r--r--net/vmw_vsock/af_vsock.c9
-rw-r--r--net/xfrm/Kconfig10
73 files changed, 2560 insertions, 461 deletions
diff --git a/net/Kconfig b/net/Kconfig
index 3101bfcbdd7a..bd191f978a23 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -258,7 +258,7 @@ config XPS
default y
config HWBM
- bool
+ bool
config CGROUP_NET_PRIO
bool "Network priority cgroup"
@@ -309,12 +309,12 @@ config BPF_STREAM_PARSER
select STREAM_PARSER
select NET_SOCK_MSG
---help---
- Enabling this allows a stream parser to be used with
- BPF_MAP_TYPE_SOCKMAP.
+ Enabling this allows a stream parser to be used with
+ BPF_MAP_TYPE_SOCKMAP.
- BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
- It can be used to enforce socket policy, implement socket redirects,
- etc.
+ BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets.
+ It can be used to enforce socket policy, implement socket redirects,
+ etc.
config NET_FLOW_LIMIT
bool
@@ -349,12 +349,12 @@ config NET_DROP_MONITOR
tristate "Network packet drop alerting service"
depends on INET && TRACEPOINTS
---help---
- This feature provides an alerting service to userspace in the
- event that packets are discarded in the network stack. Alerts
- are broadcast via netlink socket to any listening user space
- process. If you don't need network drop alerts, or if you are ok
- just checking the various proc files and other utilities for
- drop statistics, say N here.
+ This feature provides an alerting service to userspace in the
+ event that packets are discarded in the network stack. Alerts
+ are broadcast via netlink socket to any listening user space
+ process. If you don't need network drop alerts, or if you are ok
+ just checking the various proc files and other utilities for
+ drop statistics, say N here.
endmenu
@@ -433,7 +433,7 @@ config NET_DEVLINK
imply NET_DROP_MONITOR
config PAGE_POOL
- bool
+ bool
config FAILOVER
tristate "Generic failover module"
diff --git a/net/atm/clip.c b/net/atm/clip.c
index a7972da7235d..294cb9efe3d3 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -89,7 +89,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
struct clip_vcc **walk;
if (!entry) {
- pr_crit("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
+ pr_err("!clip_vcc->entry (clip_vcc %p)\n", clip_vcc);
return;
}
netif_tx_lock_bh(entry->neigh->dev); /* block clip_start_xmit() */
@@ -109,10 +109,10 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
error = neigh_update(entry->neigh, NULL, NUD_NONE,
NEIGH_UPDATE_F_ADMIN, 0);
if (error)
- pr_crit("neigh_update failed with %d\n", error);
+ pr_err("neigh_update failed with %d\n", error);
goto out;
}
- pr_crit("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
+ pr_err("ATMARP: failed (entry %p, vcc 0x%p)\n", entry, clip_vcc);
out:
netif_tx_unlock_bh(entry->neigh->dev);
}
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 0be4497cb832..915c2d6f7fb9 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -105,6 +105,40 @@ out:
return err;
}
+/* Integer types of various sizes and pointer combinations cover variety of
+ * architecture dependent calling conventions. 7+ can be supported in the
+ * future.
+ */
+int noinline bpf_fentry_test1(int a)
+{
+ return a + 1;
+}
+
+int noinline bpf_fentry_test2(int a, u64 b)
+{
+ return a + b;
+}
+
+int noinline bpf_fentry_test3(char a, int b, u64 c)
+{
+ return a + b + c;
+}
+
+int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
+{
+ return (long)a + b + c + d;
+}
+
+int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
+{
+ return a + (long)b + c + d + e;
+}
+
+int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
+{
+ return a + (long)b + c + d + (long)e + f;
+}
+
static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
u32 headroom, u32 tailroom)
{
@@ -122,6 +156,15 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
kfree(data);
return ERR_PTR(-EFAULT);
}
+ if (bpf_fentry_test1(1) != 2 ||
+ bpf_fentry_test2(2, 3) != 5 ||
+ bpf_fentry_test3(4, 5, 6) != 15 ||
+ bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
+ bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
+ bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) {
+ kfree(data);
+ return ERR_PTR(-EFAULT);
+ }
return data;
}
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index da5639a5bd3b..458be6b3eda9 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -798,7 +798,7 @@ int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk)
* Try to grab map refcnt to make sure that it's still
* alive and prevent concurrent removal.
*/
- map = bpf_map_inc_not_zero(&smap->map, false);
+ map = bpf_map_inc_not_zero(&smap->map);
if (IS_ERR(map))
continue;
diff --git a/net/core/filter.c b/net/core/filter.c
index fc303abec8fa..49ded4a7588a 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3816,7 +3816,7 @@ static const struct bpf_func_proto bpf_skb_event_output_proto = {
.arg5_type = ARG_CONST_SIZE_OR_ZERO,
};
-static u32 bpf_skb_output_btf_ids[5];
+static int bpf_skb_output_btf_ids[5];
const struct bpf_func_proto bpf_skb_output_proto = {
.func = bpf_skb_event_output,
.gpl_only = true,
@@ -8684,16 +8684,6 @@ out:
}
#ifdef CONFIG_INET
-struct sk_reuseport_kern {
- struct sk_buff *skb;
- struct sock *sk;
- struct sock *selected_sk;
- void *data_end;
- u32 hash;
- u32 reuseport_id;
- bool bind_inany;
-};
-
static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern,
struct sock_reuseport *reuse,
struct sock *sk, struct sk_buff *skb,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 865ba6ca16eb..ae3bcb1540ec 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -923,21 +923,23 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
"rx-%u", index);
if (error)
- return error;
+ goto err;
dev_hold(queue->dev);
if (dev->sysfs_rx_queue_group) {
error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group);
- if (error) {
- kobject_put(kobj);
- return error;
- }
+ if (error)
+ goto err;
}
kobject_uevent(kobj, KOBJ_ADD);
return error;
+
+err:
+ kobject_put(kobj);
+ return error;
}
#endif /* CONFIG_SYSFS */
@@ -1461,21 +1463,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index)
error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
"tx-%u", index);
if (error)
- return error;
+ goto err;
dev_hold(queue->dev);
#ifdef CONFIG_BQL
error = sysfs_create_group(kobj, &dql_group);
- if (error) {
- kobject_put(kobj);
- return error;
- }
+ if (error)
+ goto err;
#endif
kobject_uevent(kobj, KOBJ_ADD);
-
return 0;
+
+err:
+ kobject_put(kobj);
+ return error;
}
#endif /* CONFIG_SYSFS */
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index dfc2501c35d9..a6aefe989043 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -47,6 +47,21 @@ static int page_pool_init(struct page_pool *pool,
(pool->p.dma_dir != DMA_BIDIRECTIONAL))
return -EINVAL;
+ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV) {
+ /* In order to request DMA-sync-for-device the page
+ * needs to be mapped
+ */
+ if (!(pool->p.flags & PP_FLAG_DMA_MAP))
+ return -EINVAL;
+
+ if (!pool->p.max_len)
+ return -EINVAL;
+
+ /* pool->p.offset has to be set according to the address
+ * offset used by the DMA engine to start copying rx data
+ */
+ }
+
if (ptr_ring_init(&pool->ring, ring_qsize, GFP_KERNEL) < 0)
return -ENOMEM;
@@ -115,6 +130,16 @@ static struct page *__page_pool_get_cached(struct page_pool *pool)
return page;
}
+static void page_pool_dma_sync_for_device(struct page_pool *pool,
+ struct page *page,
+ unsigned int dma_sync_size)
+{
+ dma_sync_size = min(dma_sync_size, pool->p.max_len);
+ dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
+ pool->p.offset, dma_sync_size,
+ pool->p.dma_dir);
+}
+
/* slow path */
noinline
static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
@@ -159,6 +184,9 @@ static struct page *__page_pool_alloc_pages_slow(struct page_pool *pool,
}
page->dma_addr = dma;
+ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+ page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
+
skip_dma_map:
/* Track how many pages are held 'in-flight' */
pool->pages_state_hold_cnt++;
@@ -200,7 +228,7 @@ static s32 page_pool_inflight(struct page_pool *pool)
inflight = _distance(hold_cnt, release_cnt);
- trace_page_pool_inflight(pool, inflight, hold_cnt, release_cnt);
+ trace_page_pool_release(pool, inflight, hold_cnt, release_cnt);
WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight);
return inflight;
@@ -281,8 +309,19 @@ static bool __page_pool_recycle_direct(struct page *page,
return true;
}
-void __page_pool_put_page(struct page_pool *pool,
- struct page *page, bool allow_direct)
+/* page is NOT reusable when:
+ * 1) allocated when system is under some pressure. (page_is_pfmemalloc)
+ * 2) belongs to a different NUMA node than pool->p.nid.
+ *
+ * To update pool->p.nid users must call page_pool_update_nid.
+ */
+static bool pool_page_reusable(struct page_pool *pool, struct page *page)
+{
+ return !page_is_pfmemalloc(page) && page_to_nid(page) == pool->p.nid;
+}
+
+void __page_pool_put_page(struct page_pool *pool, struct page *page,
+ unsigned int dma_sync_size, bool allow_direct)
{
/* This allocator is optimized for the XDP mode that uses
* one-frame-per-page, but have fallbacks that act like the
@@ -290,9 +329,14 @@ void __page_pool_put_page(struct page_pool *pool,
*
* refcnt == 1 means page_pool owns page, and can recycle it.
*/
- if (likely(page_ref_count(page) == 1)) {
+ if (likely(page_ref_count(page) == 1 &&
+ pool_page_reusable(pool, page))) {
/* Read barrier done in page_ref_count / READ_ONCE */
+ if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
+ page_pool_dma_sync_for_device(pool, page,
+ dma_sync_size);
+
if (allow_direct && in_serving_softirq())
if (__page_pool_recycle_direct(page, pool))
return;
@@ -349,10 +393,13 @@ static void page_pool_free(struct page_pool *pool)
kfree(pool);
}
-static void page_pool_scrub(struct page_pool *pool)
+static void page_pool_empty_alloc_cache_once(struct page_pool *pool)
{
struct page *page;
+ if (pool->destroy_cnt)
+ return;
+
/* Empty alloc cache, assume caller made sure this is
* no-longer in use, and page_pool_alloc_pages() cannot be
* call concurrently.
@@ -361,6 +408,12 @@ static void page_pool_scrub(struct page_pool *pool)
page = pool->alloc.cache[--pool->alloc.count];
__page_pool_return_page(pool, page);
}
+}
+
+static void page_pool_scrub(struct page_pool *pool)
+{
+ page_pool_empty_alloc_cache_once(pool);
+ pool->destroy_cnt++;
/* No more consumers should exist, but producers could still
* be in-flight.
@@ -427,3 +480,11 @@ void page_pool_destroy(struct page_pool *pool)
schedule_delayed_work(&pool->release_dw, DEFER_TIME);
}
EXPORT_SYMBOL(page_pool_destroy);
+
+/* Caller must provide appropriate safe context, e.g. NAPI. */
+void page_pool_update_nid(struct page_pool *pool, int new_nid)
+{
+ trace_page_pool_update_nid(pool, new_nid);
+ pool->p.nid = new_nid;
+}
+EXPORT_SYMBOL(page_pool_update_nid);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 000eddb1207d..9f7aa448bd11 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2251,6 +2251,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_MAC]) {
struct ifla_vf_mac *ivm = nla_data(tb[IFLA_VF_MAC]);
+ if (ivm->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_mac)
err = ops->ndo_set_vf_mac(dev, ivm->vf,
@@ -2262,6 +2264,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_VLAN]) {
struct ifla_vf_vlan *ivv = nla_data(tb[IFLA_VF_VLAN]);
+ if (ivv->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_vlan)
err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan,
@@ -2294,6 +2298,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (len == 0)
return -EINVAL;
+ if (ivvl[0]->vf >= INT_MAX)
+ return -EINVAL;
err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan,
ivvl[0]->qos, ivvl[0]->vlan_proto);
if (err < 0)
@@ -2304,6 +2310,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
struct ifla_vf_tx_rate *ivt = nla_data(tb[IFLA_VF_TX_RATE]);
struct ifla_vf_info ivf;
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_get_vf_config)
err = ops->ndo_get_vf_config(dev, ivt->vf, &ivf);
@@ -2322,6 +2330,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_RATE]) {
struct ifla_vf_rate *ivt = nla_data(tb[IFLA_VF_RATE]);
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_rate)
err = ops->ndo_set_vf_rate(dev, ivt->vf,
@@ -2334,6 +2344,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_SPOOFCHK]) {
struct ifla_vf_spoofchk *ivs = nla_data(tb[IFLA_VF_SPOOFCHK]);
+ if (ivs->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_spoofchk)
err = ops->ndo_set_vf_spoofchk(dev, ivs->vf,
@@ -2345,6 +2357,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_LINK_STATE]) {
struct ifla_vf_link_state *ivl = nla_data(tb[IFLA_VF_LINK_STATE]);
+ if (ivl->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_link_state)
err = ops->ndo_set_vf_link_state(dev, ivl->vf,
@@ -2358,6 +2372,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
err = -EOPNOTSUPP;
ivrssq_en = nla_data(tb[IFLA_VF_RSS_QUERY_EN]);
+ if (ivrssq_en->vf >= INT_MAX)
+ return -EINVAL;
if (ops->ndo_set_vf_rss_query_en)
err = ops->ndo_set_vf_rss_query_en(dev, ivrssq_en->vf,
ivrssq_en->setting);
@@ -2368,6 +2384,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_TRUST]) {
struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
err = -EOPNOTSUPP;
if (ops->ndo_set_vf_trust)
err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
@@ -2378,15 +2396,18 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (tb[IFLA_VF_IB_NODE_GUID]) {
struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_NODE_GUID]);
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
if (!ops->ndo_set_vf_guid)
return -EOPNOTSUPP;
-
return handle_vf_guid(dev, ivt, IFLA_VF_IB_NODE_GUID);
}
if (tb[IFLA_VF_IB_PORT_GUID]) {
struct ifla_vf_guid *ivt = nla_data(tb[IFLA_VF_IB_PORT_GUID]);
+ if (ivt->vf >= INT_MAX)
+ return -EINVAL;
if (!ops->ndo_set_vf_guid)
return -EOPNOTSUPP;
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index ad31e4e53d0a..a469d2124f3f 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -793,15 +793,18 @@ static void sk_psock_strp_data_ready(struct sock *sk)
static void sk_psock_write_space(struct sock *sk)
{
struct sk_psock *psock;
- void (*write_space)(struct sock *sk);
+ void (*write_space)(struct sock *sk) = NULL;
rcu_read_lock();
psock = sk_psock(sk);
- if (likely(psock && sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED)))
- schedule_work(&psock->work);
- write_space = psock->saved_write_space;
+ if (likely(psock)) {
+ if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
+ schedule_work(&psock->work);
+ write_space = psock->saved_write_space;
+ }
rcu_read_unlock();
- write_space(sk);
+ if (write_space)
+ write_space(sk);
}
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock)
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 8e405abaf05a..e334fad0a6b8 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -73,11 +73,6 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu)
/* Allow this ID to be reused */
ida_simple_remove(&mem_id_pool, xa->mem.id);
- /* Poison memory */
- xa->mem.id = 0xFFFF;
- xa->mem.type = 0xF0F0;
- xa->allocator = (void *)0xDEAD9001;
-
kfree(xa);
}
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 078d4790669d..8e3e7283d430 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -137,9 +137,11 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
struct net_device *netdev)
{
struct dsa_port *dp = dsa_slave_to_port(netdev);
- u64 bypass, dest, src, qos_class;
+ u64 bypass, dest, src, qos_class, rew_op;
struct dsa_switch *ds = dp->ds;
int port = dp->index;
+ struct ocelot *ocelot = ds->priv;
+ struct ocelot_port *ocelot_port = ocelot->ports[port];
u8 *injection;
if (unlikely(skb_cow_head(skb, OCELOT_TAG_LEN) < 0)) {
@@ -161,6 +163,16 @@ static struct sk_buff *ocelot_xmit(struct sk_buff *skb,
packing(injection, &src, 46, 43, OCELOT_TAG_LEN, PACK, 0);
packing(injection, &qos_class, 19, 17, OCELOT_TAG_LEN, PACK, 0);
+ if (ocelot->ptp && (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+ rew_op = ocelot_port->ptp_cmd;
+ if (ocelot_port->ptp_cmd == IFH_REW_OP_TWO_STEP_PTP) {
+ rew_op |= (ocelot_port->ts_id % 4) << 3;
+ ocelot_port->ts_id++;
+ }
+
+ packing(injection, &rew_op, 125, 117, OCELOT_TAG_LEN, PACK, 0);
+ }
+
return skb;
}
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 03381f3e12ba..fc816b187170 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -180,8 +180,8 @@ config NET_IPIP
config NET_IPGRE_DEMUX
tristate "IP: GRE demultiplexer"
help
- This is helper module to demultiplex GRE packets on GRE version field criteria.
- Required by ip_gre and pptp modules.
+ This is helper module to demultiplex GRE packets on GRE version field criteria.
+ Required by ip_gre and pptp modules.
config NET_IP_TUNNEL
tristate
@@ -459,200 +459,200 @@ config TCP_CONG_BIC
tristate "Binary Increase Congestion (BIC) control"
default m
---help---
- BIC-TCP is a sender-side only change that ensures a linear RTT
- fairness under large windows while offering both scalability and
- bounded TCP-friendliness. The protocol combines two schemes
- called additive increase and binary search increase. When the
- congestion window is large, additive increase with a large
- increment ensures linear RTT fairness as well as good
- scalability. Under small congestion windows, binary search
- increase provides TCP friendliness.
- See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
+ BIC-TCP is a sender-side only change that ensures a linear RTT
+ fairness under large windows while offering both scalability and
+ bounded TCP-friendliness. The protocol combines two schemes
+ called additive increase and binary search increase. When the
+ congestion window is large, additive increase with a large
+ increment ensures linear RTT fairness as well as good
+ scalability. Under small congestion windows, binary search
+ increase provides TCP friendliness.
+ See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/
config TCP_CONG_CUBIC
tristate "CUBIC TCP"
default y
---help---
- This is version 2.0 of BIC-TCP which uses a cubic growth function
- among other techniques.
- See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
+ This is version 2.0 of BIC-TCP which uses a cubic growth function
+ among other techniques.
+ See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf
config TCP_CONG_WESTWOOD
tristate "TCP Westwood+"
default m
---help---
- TCP Westwood+ is a sender-side only modification of the TCP Reno
- protocol stack that optimizes the performance of TCP congestion
- control. It is based on end-to-end bandwidth estimation to set
- congestion window and slow start threshold after a congestion
- episode. Using this estimation, TCP Westwood+ adaptively sets a
- slow start threshold and a congestion window which takes into
- account the bandwidth used at the time congestion is experienced.
- TCP Westwood+ significantly increases fairness wrt TCP Reno in
- wired networks and throughput over wireless links.
+ TCP Westwood+ is a sender-side only modification of the TCP Reno
+ protocol stack that optimizes the performance of TCP congestion
+ control. It is based on end-to-end bandwidth estimation to set
+ congestion window and slow start threshold after a congestion
+ episode. Using this estimation, TCP Westwood+ adaptively sets a
+ slow start threshold and a congestion window which takes into
+ account the bandwidth used at the time congestion is experienced.
+ TCP Westwood+ significantly increases fairness wrt TCP Reno in
+ wired networks and throughput over wireless links.
config TCP_CONG_HTCP
tristate "H-TCP"
default m
---help---
- H-TCP is a send-side only modifications of the TCP Reno
- protocol stack that optimizes the performance of TCP
- congestion control for high speed network links. It uses a
- modeswitch to change the alpha and beta parameters of TCP Reno
- based on network conditions and in a way so as to be fair with
- other Reno and H-TCP flows.
+ H-TCP is a send-side only modifications of the TCP Reno
+ protocol stack that optimizes the performance of TCP
+ congestion control for high speed network links. It uses a
+ modeswitch to change the alpha and beta parameters of TCP Reno
+ based on network conditions and in a way so as to be fair with
+ other Reno and H-TCP flows.
config TCP_CONG_HSTCP
tristate "High Speed TCP"
default n
---help---
- Sally Floyd's High Speed TCP (RFC 3649) congestion control.
- A modification to TCP's congestion control mechanism for use
- with large congestion windows. A table indicates how much to
- increase the congestion window by when an ACK is received.
- For more detail see http://www.icir.org/floyd/hstcp.html
+ Sally Floyd's High Speed TCP (RFC 3649) congestion control.
+ A modification to TCP's congestion control mechanism for use
+ with large congestion windows. A table indicates how much to
+ increase the congestion window by when an ACK is received.
+ For more detail see http://www.icir.org/floyd/hstcp.html
config TCP_CONG_HYBLA
tristate "TCP-Hybla congestion control algorithm"
default n
---help---
- TCP-Hybla is a sender-side only change that eliminates penalization of
- long-RTT, large-bandwidth connections, like when satellite legs are
- involved, especially when sharing a common bottleneck with normal
- terrestrial connections.
+ TCP-Hybla is a sender-side only change that eliminates penalization of
+ long-RTT, large-bandwidth connections, like when satellite legs are
+ involved, especially when sharing a common bottleneck with normal
+ terrestrial connections.
config TCP_CONG_VEGAS
tristate "TCP Vegas"
default n
---help---
- TCP Vegas is a sender-side only change to TCP that anticipates
- the onset of congestion by estimating the bandwidth. TCP Vegas
- adjusts the sending rate by modifying the congestion
- window. TCP Vegas should provide less packet loss, but it is
- not as aggressive as TCP Reno.
+ TCP Vegas is a sender-side only change to TCP that anticipates
+ the onset of congestion by estimating the bandwidth. TCP Vegas
+ adjusts the sending rate by modifying the congestion
+ window. TCP Vegas should provide less packet loss, but it is
+ not as aggressive as TCP Reno.
config TCP_CONG_NV
- tristate "TCP NV"
- default n
- ---help---
- TCP NV is a follow up to TCP Vegas. It has been modified to deal with
- 10G networks, measurement noise introduced by LRO, GRO and interrupt
- coalescence. In addition, it will decrease its cwnd multiplicatively
- instead of linearly.
+ tristate "TCP NV"
+ default n
+ ---help---
+ TCP NV is a follow up to TCP Vegas. It has been modified to deal with
+ 10G networks, measurement noise introduced by LRO, GRO and interrupt
+ coalescence. In addition, it will decrease its cwnd multiplicatively
+ instead of linearly.
- Note that in general congestion avoidance (cwnd decreased when # packets
- queued grows) cannot coexist with congestion control (cwnd decreased only
- when there is packet loss) due to fairness issues. One scenario when they
- can coexist safely is when the CA flows have RTTs << CC flows RTTs.
+ Note that in general congestion avoidance (cwnd decreased when # packets
+ queued grows) cannot coexist with congestion control (cwnd decreased only
+ when there is packet loss) due to fairness issues. One scenario when they
+ can coexist safely is when the CA flows have RTTs << CC flows RTTs.
- For further details see http://www.brakmo.org/networking/tcp-nv/
+ For further details see http://www.brakmo.org/networking/tcp-nv/
config TCP_CONG_SCALABLE
tristate "Scalable TCP"
default n
---help---
- Scalable TCP is a sender-side only change to TCP which uses a
- MIMD congestion control algorithm which has some nice scaling
- properties, though is known to have fairness issues.
- See http://www.deneholme.net/tom/scalable/
+ Scalable TCP is a sender-side only change to TCP which uses a
+ MIMD congestion control algorithm which has some nice scaling
+ properties, though is known to have fairness issues.
+ See http://www.deneholme.net/tom/scalable/
config TCP_CONG_LP
tristate "TCP Low Priority"
default n
---help---
- TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
- to utilize only the excess network bandwidth as compared to the
- ``fair share`` of bandwidth as targeted by TCP.
- See http://www-ece.rice.edu/networks/TCP-LP/
+ TCP Low Priority (TCP-LP), a distributed algorithm whose goal is
+ to utilize only the excess network bandwidth as compared to the
+ ``fair share`` of bandwidth as targeted by TCP.
+ See http://www-ece.rice.edu/networks/TCP-LP/
config TCP_CONG_VENO
tristate "TCP Veno"
default n
---help---
- TCP Veno is a sender-side only enhancement of TCP to obtain better
- throughput over wireless networks. TCP Veno makes use of state
- distinguishing to circumvent the difficult judgment of the packet loss
- type. TCP Veno cuts down less congestion window in response to random
- loss packets.
- See <http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1177186>
+ TCP Veno is a sender-side only enhancement of TCP to obtain better
+ throughput over wireless networks. TCP Veno makes use of state
+ distinguishing to circumvent the difficult judgment of the packet loss
+ type. TCP Veno cuts down less congestion window in response to random
+ loss packets.
+ See <http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=1177186>
config TCP_CONG_YEAH
tristate "YeAH TCP"
select TCP_CONG_VEGAS
default n
---help---
- YeAH-TCP is a sender-side high-speed enabled TCP congestion control
- algorithm, which uses a mixed loss/delay approach to compute the
- congestion window. It's design goals target high efficiency,
- internal, RTT and Reno fairness, resilience to link loss while
- keeping network elements load as low as possible.
+ YeAH-TCP is a sender-side high-speed enabled TCP congestion control
+ algorithm, which uses a mixed loss/delay approach to compute the
+ congestion window. It's design goals target high efficiency,
+ internal, RTT and Reno fairness, resilience to link loss while
+ keeping network elements load as low as possible.
- For further details look here:
- http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
+ For further details look here:
+ http://wil.cs.caltech.edu/pfldnet2007/paper/YeAH_TCP.pdf
config TCP_CONG_ILLINOIS
tristate "TCP Illinois"
default n
---help---
- TCP-Illinois is a sender-side modification of TCP Reno for
- high speed long delay links. It uses round-trip-time to
- adjust the alpha and beta parameters to achieve a higher average
- throughput and maintain fairness.
+ TCP-Illinois is a sender-side modification of TCP Reno for
+ high speed long delay links. It uses round-trip-time to
+ adjust the alpha and beta parameters to achieve a higher average
+ throughput and maintain fairness.
- For further details see:
- http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
+ For further details see:
+ http://www.ews.uiuc.edu/~shaoliu/tcpillinois/index.html
config TCP_CONG_DCTCP
tristate "DataCenter TCP (DCTCP)"
default n
---help---
- DCTCP leverages Explicit Congestion Notification (ECN) in the network to
- provide multi-bit feedback to the end hosts. It is designed to provide:
+ DCTCP leverages Explicit Congestion Notification (ECN) in the network to
+ provide multi-bit feedback to the end hosts. It is designed to provide:
- - High burst tolerance (incast due to partition/aggregate),
- - Low latency (short flows, queries),
- - High throughput (continuous data updates, large file transfers) with
- commodity, shallow-buffered switches.
+ - High burst tolerance (incast due to partition/aggregate),
+ - Low latency (short flows, queries),
+ - High throughput (continuous data updates, large file transfers) with
+ commodity, shallow-buffered switches.
- All switches in the data center network running DCTCP must support
- ECN marking and be configured for marking when reaching defined switch
- buffer thresholds. The default ECN marking threshold heuristic for
- DCTCP on switches is 20 packets (30KB) at 1Gbps, and 65 packets
- (~100KB) at 10Gbps, but might need further careful tweaking.
+ All switches in the data center network running DCTCP must support
+ ECN marking and be configured for marking when reaching defined switch
+ buffer thresholds. The default ECN marking threshold heuristic for
+ DCTCP on switches is 20 packets (30KB) at 1Gbps, and 65 packets
+ (~100KB) at 10Gbps, but might need further careful tweaking.
- For further details see:
- http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+ For further details see:
+ http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
config TCP_CONG_CDG
tristate "CAIA Delay-Gradient (CDG)"
default n
---help---
- CAIA Delay-Gradient (CDG) is a TCP congestion control that modifies
- the TCP sender in order to:
+ CAIA Delay-Gradient (CDG) is a TCP congestion control that modifies
+ the TCP sender in order to:
o Use the delay gradient as a congestion signal.
o Back off with an average probability that is independent of the RTT.
o Coexist with flows that use loss-based congestion control.
o Tolerate packet loss unrelated to congestion.
- For further details see:
- D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
- delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
+ For further details see:
+ D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using
+ delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg
config TCP_CONG_BBR
tristate "BBR TCP"
default n
---help---
- BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
- maximize network utilization and minimize queues. It builds an explicit
- model of the the bottleneck delivery rate and path round-trip
- propagation delay. It tolerates packet loss and delay unrelated to
- congestion. It can operate over LAN, WAN, cellular, wifi, or cable
- modem links. It can coexist with flows that use loss-based congestion
- control, and can operate with shallow buffers, deep buffers,
- bufferbloat, policers, or AQM schemes that do not provide a delay
- signal. It requires the fq ("Fair Queue") pacing packet scheduler.
+ BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to
+ maximize network utilization and minimize queues. It builds an explicit
+ model of the the bottleneck delivery rate and path round-trip
+ propagation delay. It tolerates packet loss and delay unrelated to
+ congestion. It can operate over LAN, WAN, cellular, wifi, or cable
+ modem links. It can coexist with flows that use loss-based congestion
+ control, and can operate with shallow buffers, deep buffers,
+ bufferbloat, policers, or AQM schemes that do not provide a delay
+ signal. It requires the fq ("Fair Queue") pacing packet scheduler.
choice
prompt "Default TCP congestion control"
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 71c78d223dfd..577db1d50a24 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -70,11 +70,6 @@ fail:
fib_free_table(main_table);
return -ENOMEM;
}
-
-static bool fib4_has_custom_rules(struct net *net)
-{
- return false;
-}
#else
struct fib_table *fib_new_table(struct net *net, u32 id)
@@ -131,11 +126,6 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
}
return NULL;
}
-
-static bool fib4_has_custom_rules(struct net *net)
-{
- return net->ipv4.fib_has_custom_rules;
-}
#endif /* CONFIG_IP_MULTIPLE_TABLES */
static void fib_replace_table(struct net *net, struct fib_table *old,
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 10636fb6093e..572b6307a2df 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -340,6 +340,8 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
+ const struct iphdr *tnl_params;
+
if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
raw_proto, false) < 0)
goto drop;
@@ -348,7 +350,9 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
skb_pop_mac_header(skb);
else
skb_reset_mac_header(skb);
- if (tunnel->collect_md) {
+
+ tnl_params = &tunnel->parms.iph;
+ if (tunnel->collect_md || tnl_params->daddr == 0) {
__be16 flags;
__be64 tun_id;
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 24a95126e698..aa438c6758a7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -302,16 +302,31 @@ drop:
return true;
}
+static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph,
+ const struct sk_buff *hint)
+{
+ return hint && !skb_dst(skb) && ip_hdr(hint)->daddr == iph->daddr &&
+ ip_hdr(hint)->tos == iph->tos;
+}
+
INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
- struct sk_buff *skb, struct net_device *dev)
+ struct sk_buff *skb, struct net_device *dev,
+ const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
int (*edemux)(struct sk_buff *skb);
struct rtable *rt;
int err;
+ if (ip_can_use_hint(skb, iph, hint)) {
+ err = ip_route_use_hint(skb, iph->daddr, iph->saddr, iph->tos,
+ dev, hint);
+ if (unlikely(err))
+ goto drop_error;
+ }
+
if (net->ipv4.sysctl_ip_early_demux &&
!skb_dst(skb) &&
!skb->sk &&
@@ -408,7 +423,7 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
if (!skb)
return NET_RX_SUCCESS;
- ret = ip_rcv_finish_core(net, sk, skb, dev);
+ ret = ip_rcv_finish_core(net, sk, skb, dev, NULL);
if (ret != NET_RX_DROP)
ret = dst_input(skb);
return ret;
@@ -535,11 +550,20 @@ static void ip_sublist_rcv_finish(struct list_head *head)
}
}
+static struct sk_buff *ip_extract_route_hint(const struct net *net,
+ struct sk_buff *skb, int rt_type)
+{
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+ return NULL;
+
+ return skb;
+}
+
static void ip_list_rcv_finish(struct net *net, struct sock *sk,
struct list_head *head)
{
+ struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
@@ -554,11 +578,14 @@ static void ip_list_rcv_finish(struct net *net, struct sock *sk,
skb = l3mdev_ip_rcv(skb);
if (!skb)
continue;
- if (ip_rcv_finish_core(net, sk, skb, dev) == NET_RX_DROP)
+ if (ip_rcv_finish_core(net, sk, skb, dev, hint) == NET_RX_DROP)
continue;
dst = skb_dst(skb);
if (curr_dst != dst) {
+ hint = ip_extract_route_hint(net, skb,
+ ((struct rtable *)dst)->rt_type);
+
/* dispatch old sublist */
if (!list_empty(&sublist))
ip_sublist_rcv_finish(&sublist);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 3d8baaaf7086..9d83cb320dcb 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -422,7 +422,7 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
@@ -430,7 +430,7 @@ int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
skb->protocol = htons(ETH_P_IP);
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, dev,
+ net, sk, skb, indev, dev,
ip_finish_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index c724fb30d048..47f8b947eef1 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -215,6 +215,7 @@ void ip_tunnel_get_stats64(struct net_device *dev,
EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
static const struct nla_policy ip_tun_policy[LWTUNNEL_IP_MAX + 1] = {
+ [LWTUNNEL_IP_UNSPEC] = { .strict_start_type = LWTUNNEL_IP_OPTS },
[LWTUNNEL_IP_ID] = { .type = NLA_U64 },
[LWTUNNEL_IP_DST] = { .type = NLA_U32 },
[LWTUNNEL_IP_SRC] = { .type = NLA_U32 },
@@ -251,7 +252,7 @@ erspan_opt_policy[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1] = {
};
static int ip_tun_parse_opts_geneve(struct nlattr *attr,
- struct ip_tunnel_info *info,
+ struct ip_tunnel_info *info, int opts_len,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[LWTUNNEL_IP_OPT_GENEVE_MAX + 1];
@@ -273,7 +274,7 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr,
return -EINVAL;
if (info) {
- struct geneve_opt *opt = ip_tunnel_info_opts(info);
+ struct geneve_opt *opt = ip_tunnel_info_opts(info) + opts_len;
memcpy(opt->opt_data, nla_data(attr), data_len);
opt->length = data_len / 4;
@@ -288,7 +289,7 @@ static int ip_tun_parse_opts_geneve(struct nlattr *attr,
}
static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
- struct ip_tunnel_info *info,
+ struct ip_tunnel_info *info, int opts_len,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[LWTUNNEL_IP_OPT_VXLAN_MAX + 1];
@@ -303,7 +304,8 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
return -EINVAL;
if (info) {
- struct vxlan_metadata *md = ip_tunnel_info_opts(info);
+ struct vxlan_metadata *md =
+ ip_tunnel_info_opts(info) + opts_len;
attr = tb[LWTUNNEL_IP_OPT_VXLAN_GBP];
md->gbp = nla_get_u32(attr);
@@ -314,11 +316,12 @@ static int ip_tun_parse_opts_vxlan(struct nlattr *attr,
}
static int ip_tun_parse_opts_erspan(struct nlattr *attr,
- struct ip_tunnel_info *info,
+ struct ip_tunnel_info *info, int opts_len,
struct netlink_ext_ack *extack)
{
struct nlattr *tb[LWTUNNEL_IP_OPT_ERSPAN_MAX + 1];
int err;
+ u8 ver;
err = nla_parse_nested(tb, LWTUNNEL_IP_OPT_ERSPAN_MAX, attr,
erspan_opt_policy, extack);
@@ -328,23 +331,31 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr,
if (!tb[LWTUNNEL_IP_OPT_ERSPAN_VER])
return -EINVAL;
- if (info) {
- struct erspan_metadata *md = ip_tunnel_info_opts(info);
+ ver = nla_get_u8(tb[LWTUNNEL_IP_OPT_ERSPAN_VER]);
+ if (ver == 1) {
+ if (!tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX])
+ return -EINVAL;
+ } else if (ver == 2) {
+ if (!tb[LWTUNNEL_IP_OPT_ERSPAN_DIR] ||
+ !tb[LWTUNNEL_IP_OPT_ERSPAN_HWID])
+ return -EINVAL;
+ } else {
+ return -EINVAL;
+ }
- attr = tb[LWTUNNEL_IP_OPT_ERSPAN_VER];
- md->version = nla_get_u8(attr);
+ if (info) {
+ struct erspan_metadata *md =
+ ip_tunnel_info_opts(info) + opts_len;
- if (md->version == 1 && tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX]) {
+ md->version = ver;
+ if (ver == 1) {
attr = tb[LWTUNNEL_IP_OPT_ERSPAN_INDEX];
md->u.index = nla_get_be32(attr);
- } else if (md->version == 2 && tb[LWTUNNEL_IP_OPT_ERSPAN_DIR] &&
- tb[LWTUNNEL_IP_OPT_ERSPAN_HWID]) {
+ } else {
attr = tb[LWTUNNEL_IP_OPT_ERSPAN_DIR];
md->u.md2.dir = nla_get_u8(attr);
attr = tb[LWTUNNEL_IP_OPT_ERSPAN_HWID];
set_hwid(&md->u.md2, nla_get_u8(attr));
- } else {
- return -EINVAL;
}
info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
@@ -356,30 +367,57 @@ static int ip_tun_parse_opts_erspan(struct nlattr *attr,
static int ip_tun_parse_opts(struct nlattr *attr, struct ip_tunnel_info *info,
struct netlink_ext_ack *extack)
{
- struct nlattr *tb[LWTUNNEL_IP_OPTS_MAX + 1];
- int err;
+ int err, rem, opt_len, opts_len = 0, type = 0;
+ struct nlattr *nla;
if (!attr)
return 0;
- err = nla_parse_nested(tb, LWTUNNEL_IP_OPTS_MAX, attr,
- ip_opts_policy, extack);
+ err = nla_validate(nla_data(attr), nla_len(attr), LWTUNNEL_IP_OPTS_MAX,
+ ip_opts_policy, extack);
if (err)
return err;
- if (tb[LWTUNNEL_IP_OPTS_GENEVE])
- err = ip_tun_parse_opts_geneve(tb[LWTUNNEL_IP_OPTS_GENEVE],
- info, extack);
- else if (tb[LWTUNNEL_IP_OPTS_VXLAN])
- err = ip_tun_parse_opts_vxlan(tb[LWTUNNEL_IP_OPTS_VXLAN],
- info, extack);
- else if (tb[LWTUNNEL_IP_OPTS_ERSPAN])
- err = ip_tun_parse_opts_erspan(tb[LWTUNNEL_IP_OPTS_ERSPAN],
- info, extack);
- else
- err = -EINVAL;
+ nla_for_each_attr(nla, nla_data(attr), nla_len(attr), rem) {
+ switch (nla_type(nla)) {
+ case LWTUNNEL_IP_OPTS_GENEVE:
+ if (type && type != TUNNEL_GENEVE_OPT)
+ return -EINVAL;
+ opt_len = ip_tun_parse_opts_geneve(nla, info, opts_len,
+ extack);
+ if (opt_len < 0)
+ return opt_len;
+ opts_len += opt_len;
+ if (opts_len > IP_TUNNEL_OPTS_MAX)
+ return -EINVAL;
+ type = TUNNEL_GENEVE_OPT;
+ break;
+ case LWTUNNEL_IP_OPTS_VXLAN:
+ if (type)
+ return -EINVAL;
+ opt_len = ip_tun_parse_opts_vxlan(nla, info, opts_len,
+ extack);
+ if (opt_len < 0)
+ return opt_len;
+ opts_len += opt_len;
+ type = TUNNEL_VXLAN_OPT;
+ break;
+ case LWTUNNEL_IP_OPTS_ERSPAN:
+ if (type)
+ return -EINVAL;
+ opt_len = ip_tun_parse_opts_erspan(nla, info, opts_len,
+ extack);
+ if (opt_len < 0)
+ return opt_len;
+ opts_len += opt_len;
+ type = TUNNEL_ERSPAN_OPT;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
- return err;
+ return opts_len;
}
static int ip_tun_get_optlen(struct nlattr *attr,
@@ -477,18 +515,23 @@ static int ip_tun_fill_encap_opts_geneve(struct sk_buff *skb,
{
struct geneve_opt *opt;
struct nlattr *nest;
+ int offset = 0;
nest = nla_nest_start_noflag(skb, LWTUNNEL_IP_OPTS_GENEVE);
if (!nest)
return -ENOMEM;
- opt = ip_tunnel_info_opts(tun_info);
- if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS, opt->opt_class) ||
- nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) ||
- nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4,
- opt->opt_data)) {
- nla_nest_cancel(skb, nest);
- return -ENOMEM;
+ while (tun_info->options_len > offset) {
+ opt = ip_tunnel_info_opts(tun_info) + offset;
+ if (nla_put_be16(skb, LWTUNNEL_IP_OPT_GENEVE_CLASS,
+ opt->opt_class) ||
+ nla_put_u8(skb, LWTUNNEL_IP_OPT_GENEVE_TYPE, opt->type) ||
+ nla_put(skb, LWTUNNEL_IP_OPT_GENEVE_DATA, opt->length * 4,
+ opt->opt_data)) {
+ nla_nest_cancel(skb, nest);
+ return -ENOMEM;
+ }
+ offset += sizeof(*opt) + opt->length * 4;
}
nla_nest_end(skb, nest);
@@ -526,7 +569,7 @@ static int ip_tun_fill_encap_opts_erspan(struct sk_buff *skb,
return -ENOMEM;
md = ip_tunnel_info_opts(tun_info);
- if (nla_put_u32(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version))
+ if (nla_put_u8(skb, LWTUNNEL_IP_OPT_ERSPAN_VER, md->version))
goto err;
if (md->version == 1 &&
@@ -602,13 +645,18 @@ static int ip_tun_opts_nlsize(struct ip_tunnel_info *info)
opt_len = nla_total_size(0); /* LWTUNNEL_IP_OPTS */
if (info->key.tun_flags & TUNNEL_GENEVE_OPT) {
- struct geneve_opt *opt = ip_tunnel_info_opts(info);
-
- opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_GENEVE */
- + nla_total_size(2) /* OPT_GENEVE_CLASS */
- + nla_total_size(1) /* OPT_GENEVE_TYPE */
- + nla_total_size(opt->length * 4);
- /* OPT_GENEVE_DATA */
+ struct geneve_opt *opt;
+ int offset = 0;
+
+ opt_len += nla_total_size(0); /* LWTUNNEL_IP_OPTS_GENEVE */
+ while (info->options_len > offset) {
+ opt = ip_tunnel_info_opts(info) + offset;
+ opt_len += nla_total_size(2) /* OPT_GENEVE_CLASS */
+ + nla_total_size(1) /* OPT_GENEVE_TYPE */
+ + nla_total_size(opt->length * 4);
+ /* OPT_GENEVE_DATA */
+ offset += sizeof(*opt) + opt->length * 4;
+ }
} else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
opt_len += nla_total_size(0) /* LWTUNNEL_IP_OPTS_VXLAN */
+ nla_total_size(4); /* OPT_VXLAN_GBP */
@@ -661,12 +709,14 @@ static const struct lwtunnel_encap_ops ip_tun_lwt_ops = {
};
static const struct nla_policy ip6_tun_policy[LWTUNNEL_IP6_MAX + 1] = {
+ [LWTUNNEL_IP6_UNSPEC] = { .strict_start_type = LWTUNNEL_IP6_OPTS },
[LWTUNNEL_IP6_ID] = { .type = NLA_U64 },
[LWTUNNEL_IP6_DST] = { .len = sizeof(struct in6_addr) },
[LWTUNNEL_IP6_SRC] = { .len = sizeof(struct in6_addr) },
[LWTUNNEL_IP6_HOPLIMIT] = { .type = NLA_U8 },
[LWTUNNEL_IP6_TC] = { .type = NLA_U8 },
[LWTUNNEL_IP6_FLAGS] = { .type = NLA_U16 },
+ [LWTUNNEL_IP6_OPTS] = { .type = NLA_NESTED },
};
static int ip6_tun_build_state(struct nlattr *attr,
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index 32e20b758b68..f35308ff84c3 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -1412,6 +1412,9 @@ static int __init wait_for_devices(void)
struct net_device *dev;
int found = 0;
+ /* make sure deferred device probes are finished */
+ wait_for_device_probe();
+
rtnl_lock();
for_each_netdev(&init_net, dev) {
if (ic_is_init_dev(dev)) {
diff --git a/net/ipv4/netfilter/nf_flow_table_ipv4.c b/net/ipv4/netfilter/nf_flow_table_ipv4.c
index 168b72e18be0..e32e41b99f0f 100644
--- a/net/ipv4/netfilter/nf_flow_table_ipv4.c
+++ b/net/ipv4/netfilter/nf_flow_table_ipv4.c
@@ -10,7 +10,7 @@ static struct nf_flowtable_type flowtable_ipv4 = {
.family = NFPROTO_IPV4,
.init = nf_flow_table_init,
.setup = nf_flow_table_offload_setup,
- .action = nf_flow_rule_route,
+ .action = nf_flow_rule_route_ipv4,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ip_hook,
.owner = THIS_MODULE,
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index fc34fd1668d6..511eaa94e2d1 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -23,7 +23,6 @@ static void remove_nexthop(struct net *net, struct nexthop *nh,
#define NH_DEV_HASHSIZE (1U << NH_DEV_HASHBITS)
static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
- [NHA_UNSPEC] = { .strict_start_type = NHA_UNSPEC + 1 },
[NHA_ID] = { .type = NLA_U32 },
[NHA_GROUP] = { .type = NLA_BINARY },
[NHA_GROUP_TYPE] = { .type = NLA_U16 },
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index dcc4fa10138d..f88c93c38f11 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2019,10 +2019,52 @@ static int ip_mkroute_input(struct sk_buff *skb,
return __mkroute_input(skb, res, in_dev, daddr, saddr, tos);
}
+/* Implements all the saddr-related checks as ip_route_input_slow(),
+ * assuming daddr is valid and the destination is not a local broadcast one.
+ * Uses the provided hint instead of performing a route lookup.
+ */
+int ip_route_use_hint(struct sk_buff *skb, __be32 daddr, __be32 saddr,
+ u8 tos, struct net_device *dev,
+ const struct sk_buff *hint)
+{
+ struct in_device *in_dev = __in_dev_get_rcu(dev);
+ struct rtable *rt = (struct rtable *)hint;
+ struct net *net = dev_net(dev);
+ int err = -EINVAL;
+ u32 tag = 0;
+
+ if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr))
+ goto martian_source;
+
+ if (ipv4_is_zeronet(saddr))
+ goto martian_source;
+
+ if (ipv4_is_loopback(saddr) && !IN_DEV_NET_ROUTE_LOCALNET(in_dev, net))
+ goto martian_source;
+
+ if (rt->rt_type != RTN_LOCAL)
+ goto skip_validate_source;
+
+ tos &= IPTOS_RT_MASK;
+ err = fib_validate_source(skb, saddr, daddr, tos, 0, dev, in_dev, &tag);
+ if (err < 0)
+ goto martian_source;
+
+skip_validate_source:
+ skb_dst_copy(skb, hint);
+ return 0;
+
+martian_source:
+ ip_handle_martian_source(dev, in_dev, skb, daddr, saddr);
+ return err;
+}
+
/*
* NOTE. We drop all the packets that has local source
* addresses, because every properly looped back packet
* must have correct destination already attached by output routine.
+ * Changes in the enforced policies must be applied also to
+ * ip_route_use_hint().
*
* Such approach solves two big problems:
* 1. Not simplex devices are handled properly.
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 59ded25acd04..fcb2cd167f64 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -340,6 +340,10 @@ static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
user_key[i * 4 + 1],
user_key[i * 4 + 2],
user_key[i * 4 + 3]);
+
+ if (WARN_ON_ONCE(off >= tbl.maxlen - 1))
+ break;
+
if (i + 1 < n_keys)
off += snprintf(tbl.data + off, tbl.maxlen - off, ",");
}
@@ -1037,7 +1041,7 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_fib_multipath_hash_policy,
.extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
+ .extra2 = &two,
},
#endif
{
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index c445a81d144e..3737ec096650 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -256,6 +256,9 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen)
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
offs == 0 ? "" : " ", ca->name);
+
+ if (WARN_ON_ONCE(offs >= maxlen))
+ break;
}
rcu_read_unlock();
}
@@ -285,6 +288,9 @@ void tcp_get_allowed_congestion_control(char *buf, size_t maxlen)
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
offs == 0 ? "" : " ", ca->name);
+
+ if (WARN_ON_ONCE(offs >= maxlen))
+ break;
}
rcu_read_unlock();
}
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 4849edb62d52..12ab5db2b71c 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -92,6 +92,9 @@ void tcp_get_available_ulp(char *buf, size_t maxlen)
offs += snprintf(buf + offs, maxlen - offs,
"%s%s",
offs == 0 ? "" : " ", ulp_ops->name);
+
+ if (WARN_ON_ONCE(offs >= maxlen))
+ break;
}
rcu_read_unlock();
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index da805ee7558b..4da5758cc718 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1297,6 +1297,27 @@ out:
#define UDP_SKB_IS_STATELESS 0x80000000
+/* all head states (dst, sk, nf conntrack) except skb extensions are
+ * cleared by udp_rcv().
+ *
+ * We need to preserve secpath, if present, to eventually process
+ * IP_CMSG_PASSSEC at recvmsg() time.
+ *
+ * Other extensions can be cleared.
+ */
+static bool udp_try_make_stateless(struct sk_buff *skb)
+{
+ if (!skb_has_extensions(skb))
+ return true;
+
+ if (!secpath_exists(skb)) {
+ skb_ext_reset(skb);
+ return true;
+ }
+
+ return false;
+}
+
static void udp_set_dev_scratch(struct sk_buff *skb)
{
struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
@@ -1308,11 +1329,7 @@ static void udp_set_dev_scratch(struct sk_buff *skb)
scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
scratch->is_linear = !skb_is_nonlinear(skb);
#endif
- /* all head states execept sp (dst, sk, nf) are always cleared by
- * udp_rcv() and we need to preserve secpath, if present, to eventually
- * process IP_CMSG_PASSSEC at recvmsg() time
- */
- if (likely(!skb_sec_path(skb)))
+ if (udp_try_make_stateless(skb))
scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
}
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index ecff3fce9807..89ba7c87de5d 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -92,7 +92,7 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb_dst(skb)->dev,
+ net, sk, skb, skb->dev, skb_dst(skb)->dev,
__xfrm4_output,
!(IPCB(skb)->flags & IPSKB_REROUTED));
}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index f66bc2af4e9d..7bae6a91b487 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1461,6 +1461,8 @@ out:
}
#endif
goto failure;
+ } else if (fib6_requires_src(rt)) {
+ fib6_routes_require_src_inc(info->nl_net);
}
return err;
@@ -1933,6 +1935,8 @@ int fib6_del(struct fib6_info *rt, struct nl_info *info)
struct fib6_info *cur = rcu_dereference_protected(*rtp,
lockdep_is_held(&table->tb6_lock));
if (rt == cur) {
+ if (fib6_requires_src(cur))
+ fib6_routes_require_src_dec(info->nl_net);
fib6_del_route(table, fn, rtp, info);
return 0;
}
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index ef7f707d9ae3..7b089d0ac8cd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -86,11 +86,27 @@ static void ip6_sublist_rcv_finish(struct list_head *head)
}
}
+static bool ip6_can_use_hint(const struct sk_buff *skb,
+ const struct sk_buff *hint)
+{
+ return hint && !skb_dst(skb) &&
+ ipv6_addr_equal(&ipv6_hdr(hint)->daddr, &ipv6_hdr(skb)->daddr);
+}
+
+static struct sk_buff *ip6_extract_route_hint(const struct net *net,
+ struct sk_buff *skb)
+{
+ if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+ return NULL;
+
+ return skb;
+}
+
static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
struct list_head *head)
{
+ struct sk_buff *skb, *next, *hint = NULL;
struct dst_entry *curr_dst = NULL;
- struct sk_buff *skb, *next;
struct list_head sublist;
INIT_LIST_HEAD(&sublist);
@@ -104,9 +120,15 @@ static void ip6_list_rcv_finish(struct net *net, struct sock *sk,
skb = l3mdev_ip6_rcv(skb);
if (!skb)
continue;
- ip6_rcv_finish_core(net, sk, skb);
+
+ if (ip6_can_use_hint(skb, hint))
+ skb_dst_copy(skb, hint);
+ else
+ ip6_rcv_finish_core(net, sk, skb);
dst = skb_dst(skb);
if (curr_dst != dst) {
+ hint = ip6_extract_route_hint(net, skb);
+
/* dispatch old sublist */
if (!list_empty(&sublist))
ip6_sublist_rcv_finish(&sublist);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 71827b56c006..945508a7cb0f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -160,7 +160,7 @@ static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *s
int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
skb->protocol = htons(ETH_P_IPV6);
@@ -173,7 +173,7 @@ int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
}
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, dev,
+ net, sk, skb, indev, dev,
ip6_finish_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 264c292e7dcc..79fc012dd2ca 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -363,8 +363,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
break;
case IPV6_TRANSPARENT:
- if (valbool && !ns_capable(net->user_ns, CAP_NET_ADMIN) &&
- !ns_capable(net->user_ns, CAP_NET_RAW)) {
+ if (valbool && !ns_capable(net->user_ns, CAP_NET_RAW) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN)) {
retv = -EPERM;
break;
}
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index 69443e9a3aa5..0594131fa46d 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -128,9 +128,9 @@ config IP6_NF_MATCH_HL
depends on NETFILTER_ADVANCED
select NETFILTER_XT_MATCH_HL
---help---
- This is a backwards-compat option for the user's convenience
- (e.g. when running oldconfig). It selects
- CONFIG_NETFILTER_XT_MATCH_HL.
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_HL.
config IP6_NF_MATCH_IPV6HEADER
tristate '"ipv6header" IPv6 Extension Headers Match'
@@ -184,9 +184,9 @@ config IP6_NF_TARGET_HL
depends on NETFILTER_ADVANCED && IP6_NF_MANGLE
select NETFILTER_XT_TARGET_HL
---help---
- This is a backwards-compatible option for the user's convenience
- (e.g. when running oldconfig). It selects
- CONFIG_NETFILTER_XT_TARGET_HL.
+ This is a backwards-compatible option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_HL.
config IP6_NF_FILTER
tristate "Packet filtering"
@@ -245,14 +245,14 @@ config IP6_NF_RAW
# security table for MAC policy
config IP6_NF_SECURITY
- tristate "Security table"
- depends on SECURITY
- depends on NETFILTER_ADVANCED
- help
- This option adds a `security' table to iptables, for use
- with Mandatory Access Control (MAC) policy.
-
- If unsure, say N.
+ tristate "Security table"
+ depends on SECURITY
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
config IP6_NF_NAT
tristate "ip6tables NAT support"
diff --git a/net/ipv6/netfilter/nf_flow_table_ipv6.c b/net/ipv6/netfilter/nf_flow_table_ipv6.c
index f069bc0dc056..a8566ee12e83 100644
--- a/net/ipv6/netfilter/nf_flow_table_ipv6.c
+++ b/net/ipv6/netfilter/nf_flow_table_ipv6.c
@@ -11,7 +11,7 @@ static struct nf_flowtable_type flowtable_ipv6 = {
.family = NFPROTO_IPV6,
.init = nf_flow_table_init,
.setup = nf_flow_table_offload_setup,
- .action = nf_flow_rule_route,
+ .action = nf_flow_rule_route_ipv6,
.free = nf_flow_table_free,
.hook = nf_flow_offload_ipv6_hook,
.owner = THIS_MODULE,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index edcb52543518..b59940416cb5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -634,7 +634,7 @@ static void rt6_probe(struct fib6_nh *fib6_nh)
* Router Reachability Probe MUST be rate-limited
* to no more than one per minute.
*/
- if (fib6_nh->fib_nh_gw_family)
+ if (!fib6_nh->fib_nh_gw_family)
return;
nh_gw = &fib6_nh->fib_nh_gw6;
@@ -6199,6 +6199,9 @@ static int __net_init ip6_route_net_init(struct net *net)
dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
ip6_template_metrics, true);
INIT_LIST_HEAD(&net->ipv6.ip6_blk_hole_entry->rt6i_uncached);
+#ifdef CONFIG_IPV6_SUBTREES
+ net->ipv6.fib6_routes_require_src = 0;
+#endif
#endif
net->ipv6.sysctl.flush_delay = 0;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index e70567446f28..85a5447a3e8d 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -149,8 +149,9 @@ static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
*daddr = *addr;
}
-int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
- u32 tbl_id)
+static int
+seg6_lookup_any_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id, bool local_delivery)
{
struct net *net = dev_net(skb->dev);
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -158,6 +159,7 @@ int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
struct dst_entry *dst = NULL;
struct rt6_info *rt;
struct flowi6 fl6;
+ int dev_flags = 0;
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
@@ -182,7 +184,13 @@ int seg6_lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
dst = &rt->dst;
}
- if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
+ /* we want to discard traffic destined for local packet processing,
+ * if @local_delivery is set to false.
+ */
+ if (!local_delivery)
+ dev_flags |= IFF_LOOPBACK;
+
+ if (dst && (dst->dev->flags & dev_flags) && !dst->error) {
dst_release(dst);
dst = NULL;
}
@@ -199,6 +207,12 @@ out:
return dst->error;
}
+int seg6_lookup_nexthop(struct sk_buff *skb,
+ struct in6_addr *nhaddr, u32 tbl_id)
+{
+ return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false);
+}
+
/* regular endpoint function */
static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
@@ -396,7 +410,7 @@ static int input_action_end_dt6(struct sk_buff *skb,
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
- seg6_lookup_nexthop(skb, NULL, slwt->table);
+ seg6_lookup_any_nexthop(skb, NULL, slwt->table, true);
return dst_input(skb);
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index eecac1b7148e..fbe51d40bd7e 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -187,7 +187,7 @@ skip_frag:
int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
- net, sk, skb, NULL, skb_dst(skb)->dev,
+ net, sk, skb, skb->dev, skb_dst(skb)->dev,
__xfrm6_output,
!(IP6CB(skb)->flags & IP6SKB_REROUTED));
}
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 4f03ebe732fa..6cbb1286d6c0 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -32,7 +32,8 @@ mac80211-y := \
chan.o \
trace.o mlme.o \
tdls.o \
- ocb.o
+ ocb.o \
+ airtime.o
mac80211-$(CONFIG_MAC80211_LEDS) += led.o
mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
diff --git a/net/mac80211/airtime.c b/net/mac80211/airtime.c
new file mode 100644
index 000000000000..63cb0028b02d
--- /dev/null
+++ b/net/mac80211/airtime.c
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: ISC
+/*
+ * Copyright (C) 2019 Felix Fietkau <nbd@nbd.name>
+ */
+
+#include <net/mac80211.h>
+#include "ieee80211_i.h"
+#include "sta_info.h"
+
+#define AVG_PKT_SIZE 1024
+
+/* Number of bits for an average sized packet */
+#define MCS_NBITS (AVG_PKT_SIZE << 3)
+
+/* Number of kilo-symbols (symbols * 1024) for a packet with (bps) bits per
+ * symbol. We use k-symbols to avoid rounding in the _TIME macros below.
+ */
+#define MCS_N_KSYMS(bps) DIV_ROUND_UP(MCS_NBITS << 10, (bps))
+
+/* Transmission time (in 1024 * usec) for a packet containing (ksyms) * 1024
+ * symbols.
+ */
+#define MCS_SYMBOL_TIME(sgi, ksyms) \
+ (sgi ? \
+ ((ksyms) * 4 * 18) / 20 : /* 3.6 us per sym */ \
+ ((ksyms) * 4) /* 4.0 us per sym */ \
+ )
+
+/* Transmit duration for the raw data part of an average sized packet */
+#define MCS_DURATION(streams, sgi, bps) \
+ ((u32)MCS_SYMBOL_TIME(sgi, MCS_N_KSYMS((streams) * (bps))))
+
+#define MCS_DURATION_S(shift, streams, sgi, bps) \
+ ((u16)((MCS_DURATION(streams, sgi, bps) >> shift)))
+
+/* These should match the values in enum nl80211_he_gi */
+#define HE_GI_08 0
+#define HE_GI_16 1
+#define HE_GI_32 2
+
+/* Transmission time (1024 usec) for a packet containing (ksyms) * k-symbols */
+#define HE_SYMBOL_TIME(gi, ksyms) \
+ (gi == HE_GI_08 ? \
+ ((ksyms) * 16 * 17) / 20 : /* 13.6 us per sym */ \
+ (gi == HE_GI_16 ? \
+ ((ksyms) * 16 * 18) / 20 : /* 14.4 us per sym */ \
+ ((ksyms) * 16) /* 16.0 us per sym */ \
+ ))
+
+/* Transmit duration for the raw data part of an average sized packet */
+#define HE_DURATION(streams, gi, bps) \
+ ((u32)HE_SYMBOL_TIME(gi, MCS_N_KSYMS((streams) * (bps))))
+
+#define HE_DURATION_S(shift, streams, gi, bps) \
+ (HE_DURATION(streams, gi, bps) >> shift)
+
+#define BW_20 0
+#define BW_40 1
+#define BW_80 2
+#define BW_160 3
+
+/*
+ * Define group sort order: HT40 -> SGI -> #streams
+ */
+#define IEEE80211_MAX_STREAMS 4
+#define IEEE80211_HT_STREAM_GROUPS 4 /* BW(=2) * SGI(=2) */
+#define IEEE80211_VHT_STREAM_GROUPS 8 /* BW(=4) * SGI(=2) */
+
+#define IEEE80211_HE_MAX_STREAMS 8
+#define IEEE80211_HE_STREAM_GROUPS 12 /* BW(=4) * GI(=3) */
+
+#define IEEE80211_HT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
+ IEEE80211_HT_STREAM_GROUPS)
+#define IEEE80211_VHT_GROUPS_NB (IEEE80211_MAX_STREAMS * \
+ IEEE80211_VHT_STREAM_GROUPS)
+#define IEEE80211_HE_GROUPS_NB (IEEE80211_HE_MAX_STREAMS * \
+ IEEE80211_HE_STREAM_GROUPS)
+#define IEEE80211_GROUPS_NB (IEEE80211_HT_GROUPS_NB + \
+ IEEE80211_VHT_GROUPS_NB + \
+ IEEE80211_HE_GROUPS_NB)
+
+#define IEEE80211_HT_GROUP_0 0
+#define IEEE80211_VHT_GROUP_0 (IEEE80211_HT_GROUP_0 + IEEE80211_HT_GROUPS_NB)
+#define IEEE80211_HE_GROUP_0 (IEEE80211_VHT_GROUP_0 + IEEE80211_VHT_GROUPS_NB)
+
+#define MCS_GROUP_RATES 12
+
+#define HT_GROUP_IDX(_streams, _sgi, _ht40) \
+ IEEE80211_HT_GROUP_0 + \
+ IEEE80211_MAX_STREAMS * 2 * _ht40 + \
+ IEEE80211_MAX_STREAMS * _sgi + \
+ _streams - 1
+
+#define _MAX(a, b) (((a)>(b))?(a):(b))
+
+#define GROUP_SHIFT(duration) \
+ _MAX(0, 16 - __builtin_clz(duration))
+
+/* MCS rate information for an MCS group */
+#define __MCS_GROUP(_streams, _sgi, _ht40, _s) \
+ [HT_GROUP_IDX(_streams, _sgi, _ht40)] = { \
+ .shift = _s, \
+ .duration = { \
+ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 54 : 26), \
+ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 108 : 52), \
+ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 162 : 78), \
+ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 216 : 104), \
+ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 324 : 156), \
+ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 432 : 208), \
+ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 486 : 234), \
+ MCS_DURATION_S(_s, _streams, _sgi, _ht40 ? 540 : 260) \
+ } \
+}
+
+#define MCS_GROUP_SHIFT(_streams, _sgi, _ht40) \
+ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, _ht40 ? 54 : 26))
+
+#define MCS_GROUP(_streams, _sgi, _ht40) \
+ __MCS_GROUP(_streams, _sgi, _ht40, \
+ MCS_GROUP_SHIFT(_streams, _sgi, _ht40))
+
+#define VHT_GROUP_IDX(_streams, _sgi, _bw) \
+ (IEEE80211_VHT_GROUP_0 + \
+ IEEE80211_MAX_STREAMS * 2 * (_bw) + \
+ IEEE80211_MAX_STREAMS * (_sgi) + \
+ (_streams) - 1)
+
+#define BW2VBPS(_bw, r4, r3, r2, r1) \
+ (_bw == BW_160 ? r4 : _bw == BW_80 ? r3 : _bw == BW_40 ? r2 : r1)
+
+#define __VHT_GROUP(_streams, _sgi, _bw, _s) \
+ [VHT_GROUP_IDX(_streams, _sgi, _bw)] = { \
+ .shift = _s, \
+ .duration = { \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 234, 117, 54, 26)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 468, 234, 108, 52)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 702, 351, 162, 78)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 936, 468, 216, 104)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 1404, 702, 324, 156)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 1872, 936, 432, 208)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 2106, 1053, 486, 234)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 2340, 1170, 540, 260)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 2808, 1404, 648, 312)), \
+ MCS_DURATION_S(_s, _streams, _sgi, \
+ BW2VBPS(_bw, 3120, 1560, 720, 346)) \
+ } \
+}
+
+#define VHT_GROUP_SHIFT(_streams, _sgi, _bw) \
+ GROUP_SHIFT(MCS_DURATION(_streams, _sgi, \
+ BW2VBPS(_bw, 243, 117, 54, 26)))
+
+#define VHT_GROUP(_streams, _sgi, _bw) \
+ __VHT_GROUP(_streams, _sgi, _bw, \
+ VHT_GROUP_SHIFT(_streams, _sgi, _bw))
+
+
+#define HE_GROUP_IDX(_streams, _gi, _bw) \
+ (IEEE80211_HE_GROUP_0 + \
+ IEEE80211_HE_MAX_STREAMS * 3 * (_bw) + \
+ IEEE80211_HE_MAX_STREAMS * (_gi) + \
+ (_streams) - 1)
+
+#define __HE_GROUP(_streams, _gi, _bw, _s) \
+ [HE_GROUP_IDX(_streams, _gi, _bw)] = { \
+ .shift = _s, \
+ .duration = { \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 979, 489, 230, 115)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 1958, 979, 475, 230)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 2937, 1468, 705, 345)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 3916, 1958, 936, 475)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 5875, 2937, 1411, 705)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 7833, 3916, 1872, 936)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 8827, 4406, 2102, 1051)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 9806, 4896, 2347, 1166)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 11764, 5875, 2808, 1411)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 13060, 6523, 3124, 1555)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 14702, 7344, 3513, 1756)), \
+ HE_DURATION_S(_s, _streams, _gi, \
+ BW2VBPS(_bw, 16329, 8164, 3902, 1944)) \
+ } \
+}
+
+#define HE_GROUP_SHIFT(_streams, _gi, _bw) \
+ GROUP_SHIFT(HE_DURATION(_streams, _gi, \
+ BW2VBPS(_bw, 979, 489, 230, 115)))
+
+#define HE_GROUP(_streams, _gi, _bw) \
+ __HE_GROUP(_streams, _gi, _bw, \
+ HE_GROUP_SHIFT(_streams, _gi, _bw))
+struct mcs_group {
+ u8 shift;
+ u16 duration[MCS_GROUP_RATES];
+};
+
+static const struct mcs_group airtime_mcs_groups[] = {
+ MCS_GROUP(1, 0, BW_20),
+ MCS_GROUP(2, 0, BW_20),
+ MCS_GROUP(3, 0, BW_20),
+ MCS_GROUP(4, 0, BW_20),
+
+ MCS_GROUP(1, 1, BW_20),
+ MCS_GROUP(2, 1, BW_20),
+ MCS_GROUP(3, 1, BW_20),
+ MCS_GROUP(4, 1, BW_20),
+
+ MCS_GROUP(1, 0, BW_40),
+ MCS_GROUP(2, 0, BW_40),
+ MCS_GROUP(3, 0, BW_40),
+ MCS_GROUP(4, 0, BW_40),
+
+ MCS_GROUP(1, 1, BW_40),
+ MCS_GROUP(2, 1, BW_40),
+ MCS_GROUP(3, 1, BW_40),
+ MCS_GROUP(4, 1, BW_40),
+
+ VHT_GROUP(1, 0, BW_20),
+ VHT_GROUP(2, 0, BW_20),
+ VHT_GROUP(3, 0, BW_20),
+ VHT_GROUP(4, 0, BW_20),
+
+ VHT_GROUP(1, 1, BW_20),
+ VHT_GROUP(2, 1, BW_20),
+ VHT_GROUP(3, 1, BW_20),
+ VHT_GROUP(4, 1, BW_20),
+
+ VHT_GROUP(1, 0, BW_40),
+ VHT_GROUP(2, 0, BW_40),
+ VHT_GROUP(3, 0, BW_40),
+ VHT_GROUP(4, 0, BW_40),
+
+ VHT_GROUP(1, 1, BW_40),
+ VHT_GROUP(2, 1, BW_40),
+ VHT_GROUP(3, 1, BW_40),
+ VHT_GROUP(4, 1, BW_40),
+
+ VHT_GROUP(1, 0, BW_80),
+ VHT_GROUP(2, 0, BW_80),
+ VHT_GROUP(3, 0, BW_80),
+ VHT_GROUP(4, 0, BW_80),
+
+ VHT_GROUP(1, 1, BW_80),
+ VHT_GROUP(2, 1, BW_80),
+ VHT_GROUP(3, 1, BW_80),
+ VHT_GROUP(4, 1, BW_80),
+
+ VHT_GROUP(1, 0, BW_160),
+ VHT_GROUP(2, 0, BW_160),
+ VHT_GROUP(3, 0, BW_160),
+ VHT_GROUP(4, 0, BW_160),
+
+ VHT_GROUP(1, 1, BW_160),
+ VHT_GROUP(2, 1, BW_160),
+ VHT_GROUP(3, 1, BW_160),
+ VHT_GROUP(4, 1, BW_160),
+
+ HE_GROUP(1, HE_GI_08, BW_20),
+ HE_GROUP(2, HE_GI_08, BW_20),
+ HE_GROUP(3, HE_GI_08, BW_20),
+ HE_GROUP(4, HE_GI_08, BW_20),
+ HE_GROUP(5, HE_GI_08, BW_20),
+ HE_GROUP(6, HE_GI_08, BW_20),
+ HE_GROUP(7, HE_GI_08, BW_20),
+ HE_GROUP(8, HE_GI_08, BW_20),
+
+ HE_GROUP(1, HE_GI_16, BW_20),
+ HE_GROUP(2, HE_GI_16, BW_20),
+ HE_GROUP(3, HE_GI_16, BW_20),
+ HE_GROUP(4, HE_GI_16, BW_20),
+ HE_GROUP(5, HE_GI_16, BW_20),
+ HE_GROUP(6, HE_GI_16, BW_20),
+ HE_GROUP(7, HE_GI_16, BW_20),
+ HE_GROUP(8, HE_GI_16, BW_20),
+
+ HE_GROUP(1, HE_GI_32, BW_20),
+ HE_GROUP(2, HE_GI_32, BW_20),
+ HE_GROUP(3, HE_GI_32, BW_20),
+ HE_GROUP(4, HE_GI_32, BW_20),
+ HE_GROUP(5, HE_GI_32, BW_20),
+ HE_GROUP(6, HE_GI_32, BW_20),
+ HE_GROUP(7, HE_GI_32, BW_20),
+ HE_GROUP(8, HE_GI_32, BW_20),
+
+ HE_GROUP(1, HE_GI_08, BW_40),
+ HE_GROUP(2, HE_GI_08, BW_40),
+ HE_GROUP(3, HE_GI_08, BW_40),
+ HE_GROUP(4, HE_GI_08, BW_40),
+ HE_GROUP(5, HE_GI_08, BW_40),
+ HE_GROUP(6, HE_GI_08, BW_40),
+ HE_GROUP(7, HE_GI_08, BW_40),
+ HE_GROUP(8, HE_GI_08, BW_40),
+
+ HE_GROUP(1, HE_GI_16, BW_40),
+ HE_GROUP(2, HE_GI_16, BW_40),
+ HE_GROUP(3, HE_GI_16, BW_40),
+ HE_GROUP(4, HE_GI_16, BW_40),
+ HE_GROUP(5, HE_GI_16, BW_40),
+ HE_GROUP(6, HE_GI_16, BW_40),
+ HE_GROUP(7, HE_GI_16, BW_40),
+ HE_GROUP(8, HE_GI_16, BW_40),
+
+ HE_GROUP(1, HE_GI_32, BW_40),
+ HE_GROUP(2, HE_GI_32, BW_40),
+ HE_GROUP(3, HE_GI_32, BW_40),
+ HE_GROUP(4, HE_GI_32, BW_40),
+ HE_GROUP(5, HE_GI_32, BW_40),
+ HE_GROUP(6, HE_GI_32, BW_40),
+ HE_GROUP(7, HE_GI_32, BW_40),
+ HE_GROUP(8, HE_GI_32, BW_40),
+
+ HE_GROUP(1, HE_GI_08, BW_80),
+ HE_GROUP(2, HE_GI_08, BW_80),
+ HE_GROUP(3, HE_GI_08, BW_80),
+ HE_GROUP(4, HE_GI_08, BW_80),
+ HE_GROUP(5, HE_GI_08, BW_80),
+ HE_GROUP(6, HE_GI_08, BW_80),
+ HE_GROUP(7, HE_GI_08, BW_80),
+ HE_GROUP(8, HE_GI_08, BW_80),
+
+ HE_GROUP(1, HE_GI_16, BW_80),
+ HE_GROUP(2, HE_GI_16, BW_80),
+ HE_GROUP(3, HE_GI_16, BW_80),
+ HE_GROUP(4, HE_GI_16, BW_80),
+ HE_GROUP(5, HE_GI_16, BW_80),
+ HE_GROUP(6, HE_GI_16, BW_80),
+ HE_GROUP(7, HE_GI_16, BW_80),
+ HE_GROUP(8, HE_GI_16, BW_80),
+
+ HE_GROUP(1, HE_GI_32, BW_80),
+ HE_GROUP(2, HE_GI_32, BW_80),
+ HE_GROUP(3, HE_GI_32, BW_80),
+ HE_GROUP(4, HE_GI_32, BW_80),
+ HE_GROUP(5, HE_GI_32, BW_80),
+ HE_GROUP(6, HE_GI_32, BW_80),
+ HE_GROUP(7, HE_GI_32, BW_80),
+ HE_GROUP(8, HE_GI_32, BW_80),
+
+ HE_GROUP(1, HE_GI_08, BW_160),
+ HE_GROUP(2, HE_GI_08, BW_160),
+ HE_GROUP(3, HE_GI_08, BW_160),
+ HE_GROUP(4, HE_GI_08, BW_160),
+ HE_GROUP(5, HE_GI_08, BW_160),
+ HE_GROUP(6, HE_GI_08, BW_160),
+ HE_GROUP(7, HE_GI_08, BW_160),
+ HE_GROUP(8, HE_GI_08, BW_160),
+
+ HE_GROUP(1, HE_GI_16, BW_160),
+ HE_GROUP(2, HE_GI_16, BW_160),
+ HE_GROUP(3, HE_GI_16, BW_160),
+ HE_GROUP(4, HE_GI_16, BW_160),
+ HE_GROUP(5, HE_GI_16, BW_160),
+ HE_GROUP(6, HE_GI_16, BW_160),
+ HE_GROUP(7, HE_GI_16, BW_160),
+ HE_GROUP(8, HE_GI_16, BW_160),
+
+ HE_GROUP(1, HE_GI_32, BW_160),
+ HE_GROUP(2, HE_GI_32, BW_160),
+ HE_GROUP(3, HE_GI_32, BW_160),
+ HE_GROUP(4, HE_GI_32, BW_160),
+ HE_GROUP(5, HE_GI_32, BW_160),
+ HE_GROUP(6, HE_GI_32, BW_160),
+ HE_GROUP(7, HE_GI_32, BW_160),
+ HE_GROUP(8, HE_GI_32, BW_160),
+};
+
+static u32
+ieee80211_calc_legacy_rate_duration(u16 bitrate, bool short_pre,
+ bool cck, int len)
+{
+ u32 duration;
+
+ if (cck) {
+ duration = 144 + 48; /* preamble + PLCP */
+ if (short_pre)
+ duration >>= 1;
+
+ duration += 10; /* SIFS */
+ } else {
+ duration = 20 + 16; /* premable + SIFS */
+ }
+
+ len <<= 3;
+ duration += (len * 10) / bitrate;
+
+ return duration;
+}
+
+u32 ieee80211_calc_rx_airtime(struct ieee80211_hw *hw,
+ struct ieee80211_rx_status *status,
+ int len)
+{
+ struct ieee80211_supported_band *sband;
+ const struct ieee80211_rate *rate;
+ bool sgi = status->enc_flags & RX_ENC_FLAG_SHORT_GI;
+ bool sp = status->enc_flags & RX_ENC_FLAG_SHORTPRE;
+ int bw, streams;
+ int group, idx;
+ u32 duration;
+ bool cck;
+
+ switch (status->bw) {
+ case RATE_INFO_BW_20:
+ bw = BW_20;
+ break;
+ case RATE_INFO_BW_40:
+ bw = BW_40;
+ break;
+ case RATE_INFO_BW_80:
+ bw = BW_80;
+ break;
+ case RATE_INFO_BW_160:
+ bw = BW_160;
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ switch (status->encoding) {
+ case RX_ENC_LEGACY:
+ if (WARN_ON_ONCE(status->band > NL80211_BAND_5GHZ))
+ return 0;
+
+ sband = hw->wiphy->bands[status->band];
+ if (!sband || status->rate_idx > sband->n_bitrates)
+ return 0;
+
+ rate = &sband->bitrates[status->rate_idx];
+ cck = rate->flags & IEEE80211_RATE_MANDATORY_B;
+
+ return ieee80211_calc_legacy_rate_duration(rate->bitrate, sp,
+ cck, len);
+
+ case RX_ENC_VHT:
+ streams = status->nss;
+ idx = status->rate_idx;
+ group = VHT_GROUP_IDX(streams, sgi, bw);
+ break;
+ case RX_ENC_HT:
+ streams = ((status->rate_idx >> 3) & 3) + 1;
+ idx = status->rate_idx & 7;
+ group = HT_GROUP_IDX(streams, sgi, bw);
+ break;
+ case RX_ENC_HE:
+ streams = status->nss;
+ idx = status->rate_idx;
+ group = HE_GROUP_IDX(streams, status->he_gi, bw);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return 0;
+ }
+
+ if (WARN_ON_ONCE((status->encoding != RX_ENC_HE && streams > 4) ||
+ (status->encoding == RX_ENC_HE && streams > 8)))
+ return 0;
+
+ duration = airtime_mcs_groups[group].duration[idx];
+ duration <<= airtime_mcs_groups[group].shift;
+ duration *= len;
+ duration /= AVG_PKT_SIZE;
+ duration /= 1024;
+
+ duration += 36 + (streams << 2);
+
+ return duration;
+}
+EXPORT_SYMBOL_GPL(ieee80211_calc_rx_airtime);
+
+static u32 ieee80211_calc_tx_airtime_rate(struct ieee80211_hw *hw,
+ struct ieee80211_tx_rate *rate,
+ u8 band, int len)
+{
+ struct ieee80211_rx_status stat = {
+ .band = band,
+ };
+
+ if (rate->idx < 0 || !rate->count)
+ return 0;
+
+ if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+ stat.bw = RATE_INFO_BW_80;
+ else if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
+ stat.bw = RATE_INFO_BW_40;
+ else
+ stat.bw = RATE_INFO_BW_20;
+
+ stat.enc_flags = 0;
+ if (rate->flags & IEEE80211_TX_RC_USE_SHORT_PREAMBLE)
+ stat.enc_flags |= RX_ENC_FLAG_SHORTPRE;
+ if (rate->flags & IEEE80211_TX_RC_SHORT_GI)
+ stat.enc_flags |= RX_ENC_FLAG_SHORT_GI;
+
+ stat.rate_idx = rate->idx;
+ if (rate->flags & IEEE80211_TX_RC_VHT_MCS) {
+ stat.encoding = RX_ENC_VHT;
+ stat.rate_idx = ieee80211_rate_get_vht_mcs(rate);
+ stat.nss = ieee80211_rate_get_vht_nss(rate);
+ } else if (rate->flags & IEEE80211_TX_RC_MCS) {
+ stat.encoding = RX_ENC_HT;
+ } else {
+ stat.encoding = RX_ENC_LEGACY;
+ }
+
+ return ieee80211_calc_rx_airtime(hw, &stat, len);
+}
+
+u32 ieee80211_calc_tx_airtime(struct ieee80211_hw *hw,
+ struct ieee80211_tx_info *info,
+ int len)
+{
+ u32 duration = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(info->status.rates); i++) {
+ struct ieee80211_tx_rate *rate = &info->status.rates[i];
+ u32 cur_duration;
+
+ cur_duration = ieee80211_calc_tx_airtime_rate(hw, rate,
+ info->band, len);
+ if (!cur_duration)
+ break;
+
+ duration += cur_duration * rate->count;
+ }
+
+ return duration;
+}
+EXPORT_SYMBOL_GPL(ieee80211_calc_tx_airtime);
+
+u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *pubsta,
+ int len)
+{
+ struct ieee80211_supported_band *sband;
+ struct ieee80211_chanctx_conf *conf;
+ int rateidx, shift = 0;
+ bool cck, short_pream;
+ u32 basic_rates;
+ u8 band = 0;
+ u16 rate;
+
+ len += 38; /* Ethernet header length */
+
+ conf = rcu_dereference(vif->chanctx_conf);
+ if (conf) {
+ band = conf->def.chan->band;
+ shift = ieee80211_chandef_get_shift(&conf->def);
+ }
+
+ if (pubsta) {
+ struct sta_info *sta = container_of(pubsta, struct sta_info,
+ sta);
+
+ return ieee80211_calc_tx_airtime_rate(hw,
+ &sta->tx_stats.last_rate,
+ band, len);
+ }
+
+ if (!conf)
+ return 0;
+
+ /* No station to get latest rate from, so calculate the worst-case
+ * duration using the lowest configured basic rate.
+ */
+ sband = hw->wiphy->bands[band];
+
+ basic_rates = vif->bss_conf.basic_rates;
+ short_pream = vif->bss_conf.use_short_preamble;
+
+ rateidx = basic_rates ? ffs(basic_rates) - 1 : 0;
+ rate = sband->bitrates[rateidx].bitrate << shift;
+ cck = sband->bitrates[rateidx].flags & IEEE80211_RATE_MANDATORY_B;
+
+ return ieee80211_calc_legacy_rate_duration(rate, short_pream, cck, len);
+}
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 568b3b276931..ad41d74530c6 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -59,6 +59,8 @@ static const struct file_operations name## _ops = { \
debugfs_create_file(#name, mode, phyd, local, &name## _ops);
+DEBUGFS_READONLY_FILE(hw_conf, "%x",
+ local->hw.conf.flags);
DEBUGFS_READONLY_FILE(user_power, "%d",
local->user_power_level);
DEBUGFS_READONLY_FILE(power, "%d",
@@ -148,6 +150,87 @@ static const struct file_operations aqm_ops = {
.llseek = default_llseek,
};
+static ssize_t aql_txq_limit_read(struct file *file,
+ char __user *user_buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ char buf[400];
+ int len = 0;
+
+ len = scnprintf(buf, sizeof(buf),
+ "AC AQL limit low AQL limit high\n"
+ "VO %u %u\n"
+ "VI %u %u\n"
+ "BE %u %u\n"
+ "BK %u %u\n",
+ local->aql_txq_limit_low[IEEE80211_AC_VO],
+ local->aql_txq_limit_high[IEEE80211_AC_VO],
+ local->aql_txq_limit_low[IEEE80211_AC_VI],
+ local->aql_txq_limit_high[IEEE80211_AC_VI],
+ local->aql_txq_limit_low[IEEE80211_AC_BE],
+ local->aql_txq_limit_high[IEEE80211_AC_BE],
+ local->aql_txq_limit_low[IEEE80211_AC_BK],
+ local->aql_txq_limit_high[IEEE80211_AC_BK]);
+ return simple_read_from_buffer(user_buf, count, ppos,
+ buf, len);
+}
+
+static ssize_t aql_txq_limit_write(struct file *file,
+ const char __user *user_buf,
+ size_t count,
+ loff_t *ppos)
+{
+ struct ieee80211_local *local = file->private_data;
+ char buf[100];
+ size_t len;
+ u32 ac, q_limit_low, q_limit_high, q_limit_low_old, q_limit_high_old;
+ struct sta_info *sta;
+
+ if (count > sizeof(buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, user_buf, count))
+ return -EFAULT;
+
+ buf[sizeof(buf) - 1] = 0;
+ len = strlen(buf);
+ if (len > 0 && buf[len - 1] == '\n')
+ buf[len - 1] = 0;
+
+ if (sscanf(buf, "%u %u %u", &ac, &q_limit_low, &q_limit_high) != 3)
+ return -EINVAL;
+
+ if (ac >= IEEE80211_NUM_ACS)
+ return -EINVAL;
+
+ q_limit_low_old = local->aql_txq_limit_low[ac];
+ q_limit_high_old = local->aql_txq_limit_high[ac];
+
+ local->aql_txq_limit_low[ac] = q_limit_low;
+ local->aql_txq_limit_high[ac] = q_limit_high;
+
+ mutex_lock(&local->sta_mtx);
+ list_for_each_entry(sta, &local->sta_list, list) {
+ /* If a sta has customized queue limits, keep it */
+ if (sta->airtime[ac].aql_limit_low == q_limit_low_old &&
+ sta->airtime[ac].aql_limit_high == q_limit_high_old) {
+ sta->airtime[ac].aql_limit_low = q_limit_low;
+ sta->airtime[ac].aql_limit_high = q_limit_high;
+ }
+ }
+ mutex_unlock(&local->sta_mtx);
+ return count;
+}
+
+static const struct file_operations aql_txq_limit_ops = {
+ .write = aql_txq_limit_write,
+ .read = aql_txq_limit_read,
+ .open = simple_open,
+ .llseek = default_llseek,
+};
+
static ssize_t force_tx_status_read(struct file *file,
char __user *user_buf,
size_t count,
@@ -433,6 +516,7 @@ void debugfs_hw_add(struct ieee80211_local *local)
DEBUGFS_ADD(hwflags);
DEBUGFS_ADD(user_power);
DEBUGFS_ADD(power);
+ DEBUGFS_ADD(hw_conf);
DEBUGFS_ADD_MODE(force_tx_status, 0600);
if (local->ops->wake_tx_queue)
@@ -441,6 +525,10 @@ void debugfs_hw_add(struct ieee80211_local *local)
debugfs_create_u16("airtime_flags", 0600,
phyd, &local->airtime_flags);
+ DEBUGFS_ADD(aql_txq_limit);
+ debugfs_create_u32("aql_threshold", 0600,
+ phyd, &local->aql_threshold);
+
statsd = debugfs_create_dir("statistics", phyd);
/* if the dir failed, don't put all the other things into the root! */
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index c8ad20c28c43..0185e6e5e5d1 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -197,10 +197,12 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
{
struct sta_info *sta = file->private_data;
struct ieee80211_local *local = sta->sdata->local;
- size_t bufsz = 200;
+ size_t bufsz = 400;
char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf;
u64 rx_airtime = 0, tx_airtime = 0;
s64 deficit[IEEE80211_NUM_ACS];
+ u32 q_depth[IEEE80211_NUM_ACS];
+ u32 q_limit_l[IEEE80211_NUM_ACS], q_limit_h[IEEE80211_NUM_ACS];
ssize_t rv;
int ac;
@@ -212,19 +214,22 @@ static ssize_t sta_airtime_read(struct file *file, char __user *userbuf,
rx_airtime += sta->airtime[ac].rx_airtime;
tx_airtime += sta->airtime[ac].tx_airtime;
deficit[ac] = sta->airtime[ac].deficit;
+ q_limit_l[ac] = sta->airtime[ac].aql_limit_low;
+ q_limit_h[ac] = sta->airtime[ac].aql_limit_high;
spin_unlock_bh(&local->active_txq_lock[ac]);
+ q_depth[ac] = atomic_read(&sta->airtime[ac].aql_tx_pending);
}
p += scnprintf(p, bufsz + buf - p,
"RX: %llu us\nTX: %llu us\nWeight: %u\n"
- "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n",
- rx_airtime,
- tx_airtime,
- sta->airtime_weight,
- deficit[0],
- deficit[1],
- deficit[2],
- deficit[3]);
+ "Deficit: VO: %lld us VI: %lld us BE: %lld us BK: %lld us\n"
+ "Q depth: VO: %u us VI: %u us BE: %u us BK: %u us\n"
+ "Q limit[low/high]: VO: %u/%u VI: %u/%u BE: %u/%u BK: %u/%u\n",
+ rx_airtime, tx_airtime, sta->airtime_weight,
+ deficit[0], deficit[1], deficit[2], deficit[3],
+ q_depth[0], q_depth[1], q_depth[2], q_depth[3],
+ q_limit_l[0], q_limit_h[0], q_limit_l[1], q_limit_h[1],
+ q_limit_l[2], q_limit_h[2], q_limit_l[3], q_limit_h[3]),
rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf);
kfree(buf);
@@ -236,7 +241,25 @@ static ssize_t sta_airtime_write(struct file *file, const char __user *userbuf,
{
struct sta_info *sta = file->private_data;
struct ieee80211_local *local = sta->sdata->local;
- int ac;
+ u32 ac, q_limit_l, q_limit_h;
+ char _buf[100] = {}, *buf = _buf;
+
+ if (count > sizeof(_buf))
+ return -EINVAL;
+
+ if (copy_from_user(buf, userbuf, count))
+ return -EFAULT;
+
+ buf[sizeof(_buf) - 1] = '\0';
+ if (sscanf(buf, "queue limit %u %u %u", &ac, &q_limit_l, &q_limit_h)
+ != 3)
+ return -EINVAL;
+
+ if (ac >= IEEE80211_NUM_ACS)
+ return -EINVAL;
+
+ sta->airtime[ac].aql_limit_low = q_limit_l;
+ sta->airtime[ac].aql_limit_high = q_limit_h;
for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
spin_lock_bh(&local->active_txq_lock[ac]);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 05406e9c05b3..ad15b3be8bb3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1142,6 +1142,10 @@ struct ieee80211_local {
u16 schedule_round[IEEE80211_NUM_ACS];
u16 airtime_flags;
+ u32 aql_txq_limit_low[IEEE80211_NUM_ACS];
+ u32 aql_txq_limit_high[IEEE80211_NUM_ACS];
+ u32 aql_threshold;
+ atomic_t aql_total_pending_airtime;
const struct ieee80211_ops *ops;
@@ -2249,6 +2253,10 @@ const char *ieee80211_get_reason_code_string(u16 reason_code);
extern const struct ethtool_ops ieee80211_ethtool_ops;
+u32 ieee80211_calc_expected_tx_airtime(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_sta *pubsta,
+ int len);
#ifdef CONFIG_MAC80211_NOINLINE
#define debug_noinline noinline
#else
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 2d05c4cfaf6d..6cca0853f183 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -667,8 +667,16 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
for (i = 0; i < IEEE80211_NUM_ACS; i++) {
INIT_LIST_HEAD(&local->active_txqs[i]);
spin_lock_init(&local->active_txq_lock[i]);
+ local->aql_txq_limit_low[i] = IEEE80211_DEFAULT_AQL_TXQ_LIMIT_L;
+ local->aql_txq_limit_high[i] =
+ IEEE80211_DEFAULT_AQL_TXQ_LIMIT_H;
}
- local->airtime_flags = AIRTIME_USE_TX | AIRTIME_USE_RX;
+
+ local->airtime_flags = AIRTIME_USE_TX |
+ AIRTIME_USE_RX |
+ AIRTIME_USE_AQL;
+ local->aql_threshold = IEEE80211_AQL_THRESHOLD;
+ atomic_set(&local->aql_total_pending_airtime, 0);
INIT_LIST_HEAD(&local->chanctx_list);
mutex_init(&local->chanctx_mtx);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 8d3a2389b055..8eafd81e97b4 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -210,6 +210,20 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
return NULL;
}
+struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local,
+ const u8 *sta_addr, const u8 *vif_addr)
+{
+ struct rhlist_head *tmp;
+ struct sta_info *sta;
+
+ for_each_sta_info(local, sta_addr, sta, tmp) {
+ if (ether_addr_equal(vif_addr, sta->sdata->vif.addr))
+ return sta;
+ }
+
+ return NULL;
+}
+
struct sta_info *sta_info_get_by_idx(struct ieee80211_sub_if_data *sdata,
int idx)
{
@@ -396,6 +410,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
skb_queue_head_init(&sta->ps_tx_buf[i]);
skb_queue_head_init(&sta->tx_filtered[i]);
sta->airtime[i].deficit = sta->airtime_weight;
+ atomic_set(&sta->airtime[i].aql_tx_pending, 0);
+ sta->airtime[i].aql_limit_low = local->aql_txq_limit_low[i];
+ sta->airtime[i].aql_limit_high = local->aql_txq_limit_high[i];
}
for (i = 0; i < IEEE80211_NUM_TIDS; i++)
@@ -1893,6 +1910,41 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid,
}
EXPORT_SYMBOL(ieee80211_sta_register_airtime);
+void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
+ struct sta_info *sta, u8 ac,
+ u16 tx_airtime, bool tx_completed)
+{
+ int tx_pending;
+
+ if (!tx_completed) {
+ if (sta)
+ atomic_add(tx_airtime,
+ &sta->airtime[ac].aql_tx_pending);
+
+ atomic_add(tx_airtime, &local->aql_total_pending_airtime);
+ return;
+ }
+
+ if (sta) {
+ tx_pending = atomic_sub_return(tx_airtime,
+ &sta->airtime[ac].aql_tx_pending);
+ if (WARN_ONCE(tx_pending < 0,
+ "STA %pM AC %d txq pending airtime underflow: %u, %u",
+ sta->addr, ac, tx_pending, tx_airtime))
+ atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending,
+ tx_pending, 0);
+ }
+
+ tx_pending = atomic_sub_return(tx_airtime,
+ &local->aql_total_pending_airtime);
+ if (WARN_ONCE(tx_pending < 0,
+ "Device %s AC %d pending airtime underflow: %u, %u",
+ wiphy_name(local->hw.wiphy), ac, tx_pending,
+ tx_airtime))
+ atomic_cmpxchg(&local->aql_total_pending_airtime,
+ tx_pending, 0);
+}
+
int sta_info_move_state(struct sta_info *sta,
enum ieee80211_sta_state new_state)
{
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 369c2dddce52..ad5d8a4ae56d 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -127,13 +127,21 @@ enum ieee80211_agg_stop_reason {
/* Debugfs flags to enable/disable use of RX/TX airtime in scheduler */
#define AIRTIME_USE_TX BIT(0)
#define AIRTIME_USE_RX BIT(1)
+#define AIRTIME_USE_AQL BIT(2)
struct airtime_info {
u64 rx_airtime;
u64 tx_airtime;
s64 deficit;
+ atomic_t aql_tx_pending; /* Estimated airtime for frames pending */
+ u32 aql_limit_low;
+ u32 aql_limit_high;
};
+void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local,
+ struct sta_info *sta, u8 ac,
+ u16 tx_airtime, bool tx_completed);
+
struct sta_info;
/**
@@ -725,6 +733,10 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata,
struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata,
const u8 *addr);
+/* user must hold sta_mtx or be in RCU critical section */
+struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local,
+ const u8 *sta_addr, const u8 *vif_addr);
+
#define for_each_sta_info(local, _addr, _sta, _tmp) \
rhl_for_each_entry_rcu(_sta, _tmp, \
sta_info_hash_lookup(local, _addr), hash_node)
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index ab8ba5835ca0..b720feaf9a74 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -670,12 +670,26 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
struct sk_buff *skb, bool dropped)
{
struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ u16 tx_time_est = ieee80211_info_get_tx_time_est(info);
struct ieee80211_hdr *hdr = (void *)skb->data;
bool acked = info->flags & IEEE80211_TX_STAT_ACK;
if (dropped)
acked = false;
+ if (tx_time_est) {
+ struct sta_info *sta;
+
+ rcu_read_lock();
+
+ sta = sta_info_get_by_addrs(local, hdr->addr1, hdr->addr2);
+ ieee80211_sta_update_pending_airtime(local, sta,
+ skb_get_queue_mapping(skb),
+ tx_time_est,
+ true);
+ rcu_read_unlock();
+ }
+
if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) {
struct ieee80211_sub_if_data *sdata;
@@ -877,6 +891,7 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
struct ieee80211_bar *bar;
int shift = 0;
int tid = IEEE80211_NUM_TIDS;
+ u16 tx_time_est;
rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
@@ -986,6 +1001,17 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
ieee80211_sta_register_airtime(&sta->sta, tid,
info->status.tx_time, 0);
+ if ((tx_time_est = ieee80211_info_get_tx_time_est(info)) > 0) {
+ /* Do this here to avoid the expensive lookup of the sta
+ * in ieee80211_report_used_skb().
+ */
+ ieee80211_sta_update_pending_airtime(local, sta,
+ skb_get_queue_mapping(skb),
+ tx_time_est,
+ true);
+ ieee80211_info_set_tx_time_est(info, 0);
+ }
+
if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) {
if (info->flags & IEEE80211_TX_STAT_ACK) {
if (sta->status_stats.lost_packets)
@@ -1030,7 +1056,8 @@ static void __ieee80211_tx_status(struct ieee80211_hw *hw,
I802_DEBUG_INC(local->dot11FailedCount);
}
- if (ieee80211_is_nullfunc(fc) && ieee80211_has_pm(fc) &&
+ if ((ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) &&
+ ieee80211_has_pm(fc) &&
ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS) &&
!(info->flags & IEEE80211_TX_CTL_INJECTED) &&
local->ps_sdata && !(local->scanning)) {
@@ -1073,19 +1100,13 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
.skb = skb,
.info = IEEE80211_SKB_CB(skb),
};
- struct rhlist_head *tmp;
struct sta_info *sta;
rcu_read_lock();
- for_each_sta_info(local, hdr->addr1, sta, tmp) {
- /* skip wrong virtual interface */
- if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr))
- continue;
-
+ sta = sta_info_get_by_addrs(local, hdr->addr1, hdr->addr2);
+ if (sta)
status.sta = &sta->sta;
- break;
- }
__ieee80211_tx_status(hw, &status);
rcu_read_unlock();
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index db38be1b75fa..b696b9136f4c 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2270,6 +2270,9 @@ netdev_tx_t ieee80211_monitor_start_xmit(struct sk_buff *skb,
* isn't always enough to find the interface to use; for proper
* VLAN/WDS support we will need a different mechanism (which
* likely isn't going to be monitor interfaces).
+ *
+ * This is necessary, for example, for old hostapd versions that
+ * don't use nl80211-based management TX/RX.
*/
sdata = IEEE80211_DEV_TO_SUB_IF(dev);
@@ -3551,6 +3554,9 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
WARN_ON_ONCE(softirq_count() == 0);
+ if (!ieee80211_txq_airtime_check(hw, txq))
+ return NULL;
+
begin:
spin_lock_bh(&fq->lock);
@@ -3661,6 +3667,21 @@ begin:
}
IEEE80211_SKB_CB(skb)->control.vif = vif;
+
+ if (local->airtime_flags & AIRTIME_USE_AQL) {
+ u32 airtime;
+
+ airtime = ieee80211_calc_expected_tx_airtime(hw, vif, txq->sta,
+ skb->len);
+ if (airtime) {
+ airtime = ieee80211_info_set_tx_time_est(info, airtime);
+ ieee80211_sta_update_pending_airtime(local, tx.sta,
+ txq->ac,
+ airtime,
+ false);
+ }
+ }
+
return skb;
out:
@@ -3674,7 +3695,8 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
{
struct ieee80211_local *local = hw_to_local(hw);
struct ieee80211_txq *ret = NULL;
- struct txq_info *txqi = NULL;
+ struct txq_info *txqi = NULL, *head = NULL;
+ bool found_eligible_txq = false;
spin_lock_bh(&local->active_txq_lock[ac]);
@@ -3685,13 +3707,30 @@ struct ieee80211_txq *ieee80211_next_txq(struct ieee80211_hw *hw, u8 ac)
if (!txqi)
goto out;
+ if (txqi == head) {
+ if (!found_eligible_txq)
+ goto out;
+ else
+ found_eligible_txq = false;
+ }
+
+ if (!head)
+ head = txqi;
+
if (txqi->txq.sta) {
struct sta_info *sta = container_of(txqi->txq.sta,
- struct sta_info, sta);
+ struct sta_info, sta);
+ bool aql_check = ieee80211_txq_airtime_check(hw, &txqi->txq);
+ s64 deficit = sta->airtime[txqi->txq.ac].deficit;
+
+ if (aql_check)
+ found_eligible_txq = true;
- if (sta->airtime[txqi->txq.ac].deficit < 0) {
+ if (deficit < 0)
sta->airtime[txqi->txq.ac].deficit +=
sta->airtime_weight;
+
+ if (deficit < 0 || !aql_check) {
list_move_tail(&txqi->schedule_order,
&local->active_txqs[txqi->txq.ac]);
goto begin;
@@ -3745,6 +3784,33 @@ void __ieee80211_schedule_txq(struct ieee80211_hw *hw,
}
EXPORT_SYMBOL(__ieee80211_schedule_txq);
+bool ieee80211_txq_airtime_check(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq)
+{
+ struct sta_info *sta;
+ struct ieee80211_local *local = hw_to_local(hw);
+
+ if (!(local->airtime_flags & AIRTIME_USE_AQL))
+ return true;
+
+ if (!txq->sta)
+ return true;
+
+ sta = container_of(txq->sta, struct sta_info, sta);
+ if (atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
+ sta->airtime[txq->ac].aql_limit_low)
+ return true;
+
+ if (atomic_read(&local->aql_total_pending_airtime) <
+ local->aql_threshold &&
+ atomic_read(&sta->airtime[txq->ac].aql_tx_pending) <
+ sta->airtime[txq->ac].aql_limit_high)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(ieee80211_txq_airtime_check);
+
bool ieee80211_txq_may_transmit(struct ieee80211_hw *hw,
struct ieee80211_txq *txq)
{
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 1a04e0929738..be5e95a0d876 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -25,7 +25,8 @@
/* 3 Counters support added */
/* 4 Comments support added */
/* 5 Forceadd support added */
-#define IPSET_TYPE_REV_MAX 6 /* skbinfo support added */
+/* 6 skbinfo support added */
+#define IPSET_TYPE_REV_MAX 7 /* interface wildcard support added */
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@netfilter.org>");
@@ -57,6 +58,7 @@ struct hash_netiface4_elem {
u8 cidr;
u8 nomatch;
u8 elem;
+ u8 wildcard;
char iface[IFNAMSIZ];
};
@@ -71,7 +73,9 @@ hash_netiface4_data_equal(const struct hash_netiface4_elem *ip1,
ip1->cidr == ip2->cidr &&
(++*multi) &&
ip1->physdev == ip2->physdev &&
- strcmp(ip1->iface, ip2->iface) == 0;
+ (ip1->wildcard ?
+ strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 :
+ strcmp(ip1->iface, ip2->iface) == 0);
}
static int
@@ -103,7 +107,8 @@ static bool
hash_netiface4_data_list(struct sk_buff *skb,
const struct hash_netiface4_elem *data)
{
- u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
+ u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) |
+ (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0);
if (data->nomatch)
flags |= IPSET_FLAG_NOMATCH;
@@ -229,6 +234,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
e.physdev = 1;
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
+ if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD)
+ e.wildcard = 1;
}
if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
e.ip = htonl(ip & ip_set_hostmask(e.cidr));
@@ -280,6 +287,7 @@ struct hash_netiface6_elem {
u8 cidr;
u8 nomatch;
u8 elem;
+ u8 wildcard;
char iface[IFNAMSIZ];
};
@@ -294,7 +302,9 @@ hash_netiface6_data_equal(const struct hash_netiface6_elem *ip1,
ip1->cidr == ip2->cidr &&
(++*multi) &&
ip1->physdev == ip2->physdev &&
- strcmp(ip1->iface, ip2->iface) == 0;
+ (ip1->wildcard ?
+ strncmp(ip1->iface, ip2->iface, strlen(ip1->iface)) == 0 :
+ strcmp(ip1->iface, ip2->iface) == 0);
}
static int
@@ -326,7 +336,8 @@ static bool
hash_netiface6_data_list(struct sk_buff *skb,
const struct hash_netiface6_elem *data)
{
- u32 flags = data->physdev ? IPSET_FLAG_PHYSDEV : 0;
+ u32 flags = (data->physdev ? IPSET_FLAG_PHYSDEV : 0) |
+ (data->wildcard ? IPSET_FLAG_IFACE_WILDCARD : 0);
if (data->nomatch)
flags |= IPSET_FLAG_NOMATCH;
@@ -440,6 +451,8 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
e.physdev = 1;
if (cadt_flags & IPSET_FLAG_NOMATCH)
flags |= (IPSET_FLAG_NOMATCH << 16);
+ if (cadt_flags & IPSET_FLAG_IFACE_WILDCARD)
+ e.wildcard = 1;
}
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 8468d2d02284..9889d52eda82 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -18,11 +18,11 @@ static DEFINE_MUTEX(flowtable_lock);
static LIST_HEAD(flowtables);
static void
-flow_offload_fill_dir(struct flow_offload *flow, struct nf_conn *ct,
+flow_offload_fill_dir(struct flow_offload *flow,
enum flow_offload_tuple_dir dir)
{
struct flow_offload_tuple *ft = &flow->tuplehash[dir].tuple;
- struct nf_conntrack_tuple *ctt = &ct->tuplehash[dir].tuple;
+ struct nf_conntrack_tuple *ctt = &flow->ct->tuplehash[dir].tuple;
ft->dir = dir;
@@ -57,8 +57,8 @@ struct flow_offload *flow_offload_alloc(struct nf_conn *ct)
flow->ct = ct;
- flow_offload_fill_dir(flow, ct, FLOW_OFFLOAD_DIR_ORIGINAL);
- flow_offload_fill_dir(flow, ct, FLOW_OFFLOAD_DIR_REPLY);
+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_dir(flow, FLOW_OFFLOAD_DIR_REPLY);
if (ct->status & IPS_SRC_NAT)
flow->flags |= FLOW_OFFLOAD_SNAT;
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index bfb910b874ce..88bedf1ff1ae 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -21,11 +21,34 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
return NF_ACCEPT;
}
+static int nf_flow_rule_route_inet(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ const struct flow_offload_tuple *flow_tuple = &flow->tuplehash[dir].tuple;
+ int err;
+
+ switch (flow_tuple->l3proto) {
+ case NFPROTO_IPV4:
+ err = nf_flow_rule_route_ipv4(net, flow, dir, flow_rule);
+ break;
+ case NFPROTO_IPV6:
+ err = nf_flow_rule_route_ipv6(net, flow, dir, flow_rule);
+ break;
+ default:
+ err = -1;
+ break;
+ }
+
+ return err;
+}
+
static struct nf_flowtable_type flowtable_inet = {
.family = NFPROTO_INET,
.init = nf_flow_table_init,
.setup = nf_flow_table_offload_setup,
- .action = nf_flow_rule_route,
+ .action = nf_flow_rule_route_inet,
.free = nf_flow_table_free,
.hook = nf_flow_offload_inet_hook,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 9be61f47303a..c54c9a6cc981 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -112,13 +112,22 @@ static void flow_offload_mangle(struct flow_action_entry *entry,
memcpy(&entry->mangle.val, value, sizeof(u32));
}
+static inline struct flow_action_entry *
+flow_action_entry_next(struct nf_flow_rule *flow_rule)
+{
+ int i = flow_rule->rule->action.num_entries++;
+
+ return &flow_rule->rule->action.entries[i];
+}
+
static int flow_offload_eth_src(struct net *net,
const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
- struct flow_action_entry *entry0,
- struct flow_action_entry *entry1)
+ struct nf_flow_rule *flow_rule)
{
const struct flow_offload_tuple *tuple = &flow->tuplehash[!dir].tuple;
+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
struct net_device *dev;
u32 mask, val;
u16 val16;
@@ -145,10 +154,11 @@ static int flow_offload_eth_src(struct net *net,
static int flow_offload_eth_dst(struct net *net,
const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
- struct flow_action_entry *entry0,
- struct flow_action_entry *entry1)
+ struct nf_flow_rule *flow_rule)
{
const struct flow_offload_tuple *tuple = &flow->tuplehash[dir].tuple;
+ struct flow_action_entry *entry0 = flow_action_entry_next(flow_rule);
+ struct flow_action_entry *entry1 = flow_action_entry_next(flow_rule);
struct neighbour *n;
u32 mask, val;
u16 val16;
@@ -175,8 +185,9 @@ static int flow_offload_eth_dst(struct net *net,
static void flow_offload_ipv4_snat(struct net *net,
const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
- struct flow_action_entry *entry)
+ struct nf_flow_rule *flow_rule)
{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
u32 mask = ~htonl(0xffffffff);
__be32 addr;
u32 offset;
@@ -201,8 +212,9 @@ static void flow_offload_ipv4_snat(struct net *net,
static void flow_offload_ipv4_dnat(struct net *net,
const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
- struct flow_action_entry *entry)
+ struct nf_flow_rule *flow_rule)
{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
u32 mask = ~htonl(0xffffffff);
__be32 addr;
u32 offset;
@@ -224,6 +236,71 @@ static void flow_offload_ipv4_dnat(struct net *net,
(u8 *)&addr, (u8 *)&mask);
}
+static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
+ unsigned int offset,
+ u8 *addr, u8 *mask)
+{
+ struct flow_action_entry *entry;
+ int i;
+
+ for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32)) {
+ entry = flow_action_entry_next(flow_rule);
+ flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
+ offset + i,
+ &addr[i], mask);
+ }
+}
+
+static void flow_offload_ipv6_snat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ u32 mask = ~htonl(0xffffffff);
+ const u8 *addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.dst_v6.s6_addr;
+ offset = offsetof(struct ipv6hdr, saddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.src_v6.s6_addr;
+ offset = offsetof(struct ipv6hdr, daddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask);
+}
+
+static void flow_offload_ipv6_dnat(struct net *net,
+ const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ u32 mask = ~htonl(0xffffffff);
+ const u8 *addr;
+ u32 offset;
+
+ switch (dir) {
+ case FLOW_OFFLOAD_DIR_ORIGINAL:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.src_v6.s6_addr;
+ offset = offsetof(struct ipv6hdr, daddr);
+ break;
+ case FLOW_OFFLOAD_DIR_REPLY:
+ addr = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.dst_v6.s6_addr;
+ offset = offsetof(struct ipv6hdr, saddr);
+ break;
+ default:
+ return;
+ }
+
+ flow_offload_ipv6_mangle(flow_rule, offset, (u8 *)addr, (u8 *)&mask);
+}
+
static int flow_offload_l4proto(const struct flow_offload *flow)
{
u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
@@ -246,8 +323,9 @@ static int flow_offload_l4proto(const struct flow_offload *flow)
static void flow_offload_port_snat(struct net *net,
const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
- struct flow_action_entry *entry)
+ struct nf_flow_rule *flow_rule)
{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
u32 mask = ~htonl(0xffff0000);
__be16 port;
u32 offset;
@@ -272,8 +350,9 @@ static void flow_offload_port_snat(struct net *net,
static void flow_offload_port_dnat(struct net *net,
const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
- struct flow_action_entry *entry)
+ struct nf_flow_rule *flow_rule)
{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
u32 mask = ~htonl(0xffff);
__be16 port;
u32 offset;
@@ -297,9 +376,10 @@ static void flow_offload_port_dnat(struct net *net,
static void flow_offload_ipv4_checksum(struct net *net,
const struct flow_offload *flow,
- struct flow_action_entry *entry)
+ struct nf_flow_rule *flow_rule)
{
u8 protonum = flow->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.l4proto;
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
entry->id = FLOW_ACTION_CSUM;
entry->csum_flags = TCA_CSUM_UPDATE_FLAG_IPV4HDR;
@@ -316,8 +396,9 @@ static void flow_offload_ipv4_checksum(struct net *net,
static void flow_offload_redirect(const struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
- struct flow_action_entry *entry)
+ struct nf_flow_rule *flow_rule)
{
+ struct flow_action_entry *entry = flow_action_entry_next(flow_rule);
struct rtable *rt;
rt = (struct rtable *)flow->tuplehash[dir].tuple.dst_cache;
@@ -326,45 +407,56 @@ static void flow_offload_redirect(const struct flow_offload *flow,
dev_hold(rt->dst.dev);
}
-int nf_flow_rule_route(struct net *net, const struct flow_offload *flow,
- enum flow_offload_tuple_dir dir,
- struct nf_flow_rule *flow_rule)
+int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
{
- int i;
-
- if (flow_offload_eth_src(net, flow, dir,
- &flow_rule->rule->action.entries[0],
- &flow_rule->rule->action.entries[1]) < 0)
+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
return -1;
- if (flow_offload_eth_dst(net, flow, dir,
- &flow_rule->rule->action.entries[2],
- &flow_rule->rule->action.entries[3]) < 0)
- return -1;
-
- i = 4;
if (flow->flags & FLOW_OFFLOAD_SNAT) {
- flow_offload_ipv4_snat(net, flow, dir,
- &flow_rule->rule->action.entries[i++]);
- flow_offload_port_snat(net, flow, dir,
- &flow_rule->rule->action.entries[i++]);
+ flow_offload_ipv4_snat(net, flow, dir, flow_rule);
+ flow_offload_port_snat(net, flow, dir, flow_rule);
}
if (flow->flags & FLOW_OFFLOAD_DNAT) {
- flow_offload_ipv4_dnat(net, flow, dir,
- &flow_rule->rule->action.entries[i++]);
- flow_offload_port_dnat(net, flow, dir,
- &flow_rule->rule->action.entries[i++]);
+ flow_offload_ipv4_dnat(net, flow, dir, flow_rule);
+ flow_offload_port_dnat(net, flow, dir, flow_rule);
}
if (flow->flags & FLOW_OFFLOAD_SNAT ||
flow->flags & FLOW_OFFLOAD_DNAT)
- flow_offload_ipv4_checksum(net, flow,
- &flow_rule->rule->action.entries[i++]);
+ flow_offload_ipv4_checksum(net, flow, flow_rule);
- flow_offload_redirect(flow, dir, &flow_rule->rule->action.entries[i++]);
+ flow_offload_redirect(flow, dir, flow_rule);
- return i;
+ return 0;
}
-EXPORT_SYMBOL_GPL(nf_flow_rule_route);
+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
+
+int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+ enum flow_offload_tuple_dir dir,
+ struct nf_flow_rule *flow_rule)
+{
+ if (flow_offload_eth_src(net, flow, dir, flow_rule) < 0 ||
+ flow_offload_eth_dst(net, flow, dir, flow_rule) < 0)
+ return -1;
+
+ if (flow->flags & FLOW_OFFLOAD_SNAT) {
+ flow_offload_ipv6_snat(net, flow, dir, flow_rule);
+ flow_offload_port_snat(net, flow, dir, flow_rule);
+ }
+ if (flow->flags & FLOW_OFFLOAD_DNAT) {
+ flow_offload_ipv6_dnat(net, flow, dir, flow_rule);
+ flow_offload_port_dnat(net, flow, dir, flow_rule);
+ }
+
+ flow_offload_redirect(flow, dir, flow_rule);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv6);
+
+#define NF_FLOW_RULE_ACTION_MAX 16
static struct nf_flow_rule *
nf_flow_offload_rule_alloc(struct net *net,
@@ -375,13 +467,13 @@ nf_flow_offload_rule_alloc(struct net *net,
const struct flow_offload *flow = offload->flow;
const struct flow_offload_tuple *tuple;
struct nf_flow_rule *flow_rule;
- int err = -ENOMEM, num_actions;
+ int err = -ENOMEM;
flow_rule = kzalloc(sizeof(*flow_rule), GFP_KERNEL);
if (!flow_rule)
goto err_flow;
- flow_rule->rule = flow_rule_alloc(10);
+ flow_rule->rule = flow_rule_alloc(NF_FLOW_RULE_ACTION_MAX);
if (!flow_rule->rule)
goto err_flow_rule;
@@ -394,12 +486,10 @@ nf_flow_offload_rule_alloc(struct net *net,
if (err < 0)
goto err_flow_match;
- num_actions = flowtable->type->action(net, flow, dir, flow_rule);
- if (num_actions < 0)
+ flow_rule->rule->action.num_entries = 0;
+ if (flowtable->type->action(net, flow, dir, flow_rule) < 0)
goto err_flow_match;
- flow_rule->rule->action.num_entries = num_actions;
-
return flow_rule;
err_flow_match:
@@ -722,6 +812,9 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD))
return 0;
+ if (!dev->netdev_ops->ndo_setup_tc)
+ return -EOPNOTSUPP;
+
bo.net = dev_net(dev);
bo.block = &flowtable->flow_block;
bo.command = cmd;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2dc636faa322..ff04cdc87f76 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -361,6 +361,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type,
static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
{
+ struct nft_flow_rule *flow;
struct nft_trans *trans;
int err;
@@ -368,6 +369,16 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule)
if (trans == NULL)
return -ENOMEM;
+ if (ctx->chain->flags & NFT_CHAIN_HW_OFFLOAD) {
+ flow = nft_flow_rule_create(ctx->net, rule);
+ if (IS_ERR(flow)) {
+ nft_trans_destroy(trans);
+ return PTR_ERR(flow);
+ }
+
+ nft_trans_flow_rule(trans) = flow;
+ }
+
err = nf_tables_delrule_deactivate(ctx, rule);
if (err < 0) {
nft_trans_destroy(trans);
@@ -5964,16 +5975,22 @@ nft_flowtable_type_get(struct net *net, u8 family)
return ERR_PTR(-ENOENT);
}
+static void nft_unregister_flowtable_hook(struct net *net,
+ struct nft_flowtable *flowtable,
+ struct nft_hook *hook)
+{
+ nf_unregister_net_hook(net, &hook->ops);
+ flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+}
+
static void nft_unregister_flowtable_net_hooks(struct net *net,
struct nft_flowtable *flowtable)
{
struct nft_hook *hook;
- list_for_each_entry(hook, &flowtable->hook_list, list) {
- nf_unregister_net_hook(net, &hook->ops);
- flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
- FLOW_BLOCK_UNBIND);
- }
+ list_for_each_entry(hook, &flowtable->hook_list, list)
+ nft_unregister_flowtable_hook(net, flowtable, hook);
}
static int nft_register_flowtable_net_hooks(struct net *net,
@@ -5995,12 +6012,20 @@ static int nft_register_flowtable_net_hooks(struct net *net,
}
}
- flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
- FLOW_BLOCK_BIND);
- err = nf_register_net_hook(net, &hook->ops);
+ err = flowtable->data.type->setup(&flowtable->data,
+ hook->ops.dev,
+ FLOW_BLOCK_BIND);
if (err < 0)
goto err_unregister_net_hooks;
+ err = nf_register_net_hook(net, &hook->ops);
+ if (err < 0) {
+ flowtable->data.type->setup(&flowtable->data,
+ hook->ops.dev,
+ FLOW_BLOCK_UNBIND);
+ goto err_unregister_net_hooks;
+ }
+
i++;
}
@@ -6011,9 +6036,7 @@ err_unregister_net_hooks:
if (i-- <= 0)
break;
- nf_unregister_net_hook(net, &hook->ops);
- flowtable->data.type->setup(&flowtable->data, hook->ops.dev,
- FLOW_BLOCK_UNBIND);
+ nft_unregister_flowtable_hook(net, flowtable, hook);
list_del_rcu(&hook->list);
kfree_rcu(hook, rcu);
}
@@ -6120,7 +6143,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
return 0;
err5:
list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- nf_unregister_net_hook(net, &hook->ops);
+ nft_unregister_flowtable_hook(net, flowtable, hook);
list_del_rcu(&hook->list);
kfree_rcu(hook, rcu);
}
@@ -6465,7 +6488,7 @@ static void nft_flowtable_event(unsigned long event, struct net_device *dev,
if (hook->ops.dev != dev)
continue;
- nf_unregister_net_hook(dev_net(dev), &hook->ops);
+ nft_unregister_flowtable_hook(dev_net(dev), flowtable, hook);
list_del_rcu(&hook->list);
kfree_rcu(hook, rcu);
break;
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index cdea3010c7a0..68f17a6921d8 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -159,9 +159,9 @@ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
const struct nft_base_chain *basechain,
const struct nft_rule *rule,
const struct nft_flow_rule *flow,
+ struct netlink_ext_ack *extack,
enum flow_cls_command command)
{
- struct netlink_ext_ack extack;
__be16 proto = ETH_P_ALL;
memset(cls_flow, 0, sizeof(*cls_flow));
@@ -170,7 +170,7 @@ static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
proto = flow->proto;
nft_flow_offload_common_init(&cls_flow->common, proto,
- basechain->ops.priority, &extack);
+ basechain->ops.priority, extack);
cls_flow->command = command;
cls_flow->cookie = (unsigned long) rule;
if (flow)
@@ -182,6 +182,7 @@ static int nft_flow_offload_rule(struct nft_chain *chain,
struct nft_flow_rule *flow,
enum flow_cls_command command)
{
+ struct netlink_ext_ack extack = {};
struct flow_cls_offload cls_flow;
struct nft_base_chain *basechain;
@@ -189,7 +190,8 @@ static int nft_flow_offload_rule(struct nft_chain *chain,
return -EOPNOTSUPP;
basechain = nft_base_chain(chain);
- nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, command);
+ nft_flow_cls_offload_setup(&cls_flow, basechain, rule, flow, &extack,
+ command);
return nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow,
&basechain->flow_block.cb_list);
@@ -207,13 +209,15 @@ static int nft_flow_offload_unbind(struct flow_block_offload *bo,
{
struct flow_block_cb *block_cb, *next;
struct flow_cls_offload cls_flow;
+ struct netlink_ext_ack extack;
struct nft_chain *chain;
struct nft_rule *rule;
chain = &basechain->chain;
list_for_each_entry(rule, &chain->rules, list) {
+ memset(&extack, 0, sizeof(extack));
nft_flow_cls_offload_setup(&cls_flow, basechain, rule, NULL,
- FLOW_CLS_DESTROY);
+ &extack, FLOW_CLS_DESTROY);
nft_setup_cb_call(TC_SETUP_CLSFLOWER, &cls_flow, &bo->cb_list);
}
@@ -385,6 +389,55 @@ static int nft_flow_offload_chain(struct nft_chain *chain, u8 *ppolicy,
return nft_flow_block_chain(basechain, NULL, cmd);
}
+static void nft_flow_rule_offload_abort(struct net *net,
+ struct nft_trans *trans)
+{
+ int err = 0;
+
+ list_for_each_entry_continue_reverse(trans, &net->nft.commit_list, list) {
+ if (trans->ctx.family != NFPROTO_NETDEV)
+ continue;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) ||
+ nft_trans_chain_update(trans))
+ continue;
+
+ err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+ FLOW_BLOCK_UNBIND);
+ break;
+ case NFT_MSG_DELCHAIN:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_chain(trans->ctx.chain, NULL,
+ FLOW_BLOCK_BIND);
+ break;
+ case NFT_MSG_NEWRULE:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_rule(trans->ctx.chain,
+ nft_trans_rule(trans),
+ NULL, FLOW_CLS_DESTROY);
+ break;
+ case NFT_MSG_DELRULE:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ err = nft_flow_offload_rule(trans->ctx.chain,
+ nft_trans_rule(trans),
+ nft_trans_flow_rule(trans),
+ FLOW_CLS_REPLACE);
+ break;
+ }
+
+ if (WARN_ON_ONCE(err))
+ break;
+ }
+}
+
int nft_flow_rule_offload_commit(struct net *net)
{
struct nft_trans *trans;
@@ -418,14 +471,14 @@ int nft_flow_rule_offload_commit(struct net *net)
continue;
if (trans->ctx.flags & NLM_F_REPLACE ||
- !(trans->ctx.flags & NLM_F_APPEND))
- return -EOPNOTSUPP;
-
+ !(trans->ctx.flags & NLM_F_APPEND)) {
+ err = -EOPNOTSUPP;
+ break;
+ }
err = nft_flow_offload_rule(trans->ctx.chain,
nft_trans_rule(trans),
nft_trans_flow_rule(trans),
FLOW_CLS_REPLACE);
- nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_DELRULE:
if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
@@ -433,13 +486,31 @@ int nft_flow_rule_offload_commit(struct net *net)
err = nft_flow_offload_rule(trans->ctx.chain,
nft_trans_rule(trans),
- nft_trans_flow_rule(trans),
- FLOW_CLS_DESTROY);
+ NULL, FLOW_CLS_DESTROY);
break;
}
- if (err)
- return err;
+ if (err) {
+ nft_flow_rule_offload_abort(net, trans);
+ break;
+ }
+ }
+
+ list_for_each_entry(trans, &net->nft.commit_list, list) {
+ if (trans->ctx.family != NFPROTO_NETDEV)
+ continue;
+
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWRULE:
+ case NFT_MSG_DELRULE:
+ if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD))
+ continue;
+
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+ break;
+ default:
+ break;
+ }
}
return err;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index 0744b2bb46da..b8092069f868 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -10,6 +10,7 @@
#include <linux/module.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
+#include <linux/if_arp.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables_offload.h>
@@ -125,6 +126,11 @@ static int __nft_cmp_offload(struct nft_offload_ctx *ctx,
flow->match.dissector.used_keys |= BIT(reg->key);
flow->match.dissector.offset[reg->key] = reg->base_offset;
+ if (reg->key == FLOW_DISSECTOR_KEY_META &&
+ reg->offset == offsetof(struct nft_flow_key, meta.ingress_iftype) &&
+ nft_reg_load16(priv->data.data) != ARPHRD_ETHER)
+ return -EOPNOTSUPP;
+
nft_offload_update_dependency(ctx, &priv->data, priv->len);
return 0;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 317e3a9e8c5b..9740b554fdb3 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -33,19 +33,19 @@
static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-static u8 nft_meta_weekday(unsigned long secs)
+static u8 nft_meta_weekday(time64_t secs)
{
unsigned int dse;
u8 wday;
secs -= NFT_META_SECS_PER_MINUTE * sys_tz.tz_minuteswest;
- dse = secs / NFT_META_SECS_PER_DAY;
+ dse = div_u64(secs, NFT_META_SECS_PER_DAY);
wday = (4 + dse) % NFT_META_DAYS_PER_WEEK;
return wday;
}
-static u32 nft_meta_hour(unsigned long secs)
+static u32 nft_meta_hour(time64_t secs)
{
struct tm tm;
@@ -250,10 +250,10 @@ void nft_meta_get_eval(const struct nft_expr *expr,
nft_reg_store64(dest, ktime_get_real_ns());
break;
case NFT_META_TIME_DAY:
- nft_reg_store8(dest, nft_meta_weekday(get_seconds()));
+ nft_reg_store8(dest, nft_meta_weekday(ktime_get_real_seconds()));
break;
case NFT_META_TIME_HOUR:
- *dest = nft_meta_hour(get_seconds());
+ *dest = nft_meta_hour(ktime_get_real_seconds());
break;
default:
WARN_ON(1);
@@ -547,6 +547,14 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx,
sizeof(__u8), reg);
nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_TRANSPORT);
break;
+ case NFT_META_IIF:
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta,
+ ingress_ifindex, sizeof(__u32), reg);
+ break;
+ case NFT_META_IIFTYPE:
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_META, meta,
+ ingress_iftype, sizeof(__u16), reg);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 5cb2d8908d2a..1993af3a2979 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -23,50 +23,58 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
+static bool nft_payload_rebuild_vlan_hdr(const struct sk_buff *skb, int mac_off,
+ struct vlan_ethhdr *veth)
+{
+ if (skb_copy_bits(skb, mac_off, veth, ETH_HLEN))
+ return false;
+
+ veth->h_vlan_proto = skb->vlan_proto;
+ veth->h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ veth->h_vlan_encapsulated_proto = skb->protocol;
+
+ return true;
+}
+
/* add vlan header into the user buffer for if tag was removed by offloads */
static bool
nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
{
int mac_off = skb_mac_header(skb) - skb->data;
- u8 vlan_len, *vlanh, *dst_u8 = (u8 *) d;
+ u8 *vlanh, *dst_u8 = (u8 *) d;
struct vlan_ethhdr veth;
+ u8 vlan_hlen = 0;
+
+ if ((skb->protocol == htons(ETH_P_8021AD) ||
+ skb->protocol == htons(ETH_P_8021Q)) &&
+ offset >= VLAN_ETH_HLEN && offset < VLAN_ETH_HLEN + VLAN_HLEN)
+ vlan_hlen += VLAN_HLEN;
vlanh = (u8 *) &veth;
- if (offset < ETH_HLEN) {
- u8 ethlen = min_t(u8, len, ETH_HLEN - offset);
+ if (offset < VLAN_ETH_HLEN + vlan_hlen) {
+ u8 ethlen = len;
- if (skb_copy_bits(skb, mac_off, &veth, ETH_HLEN))
+ if (vlan_hlen &&
+ skb_copy_bits(skb, mac_off, &veth, VLAN_ETH_HLEN) < 0)
+ return false;
+ else if (!nft_payload_rebuild_vlan_hdr(skb, mac_off, &veth))
return false;
- veth.h_vlan_proto = skb->vlan_proto;
+ if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
+ ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen;
- memcpy(dst_u8, vlanh + offset, ethlen);
+ memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
len -= ethlen;
if (len == 0)
return true;
dst_u8 += ethlen;
- offset = ETH_HLEN;
- } else if (offset >= VLAN_ETH_HLEN) {
- offset -= VLAN_HLEN;
- goto skip;
+ offset = ETH_HLEN + vlan_hlen;
+ } else {
+ offset -= VLAN_HLEN + vlan_hlen;
}
- veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
- veth.h_vlan_encapsulated_proto = skb->protocol;
-
- vlanh += offset;
-
- vlan_len = min_t(u8, len, VLAN_ETH_HLEN - offset);
- memcpy(dst_u8, vlanh, vlan_len);
-
- len -= vlan_len;
- if (!len)
- return true;
-
- dst_u8 += vlan_len;
- skip:
return skb_copy_bits(skb, offset + mac_off, dst_u8, len) == 0;
}
@@ -174,6 +182,44 @@ static int nft_payload_offload_ll(struct nft_offload_ctx *ctx,
NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_ETH_ADDRS, eth_addrs,
dst, ETH_ALEN, reg);
break;
+ case offsetof(struct ethhdr, h_proto):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_BASIC, basic,
+ n_proto, sizeof(__be16), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_TCI):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
+ vlan_tci, sizeof(__be16), reg);
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_VLAN, vlan,
+ vlan_tpid, sizeof(__be16), reg);
+ nft_offload_set_dependency(ctx, NFT_OFFLOAD_DEP_NETWORK);
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_TCI) + sizeof(struct vlan_hdr):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
+ vlan_tci, sizeof(__be16), reg);
+ break;
+ case offsetof(struct vlan_ethhdr, h_vlan_encapsulated_proto) +
+ sizeof(struct vlan_hdr):
+ if (priv->len != sizeof(__be16))
+ return -EOPNOTSUPP;
+
+ NFT_OFFLOAD_MATCH(FLOW_DISSECTOR_KEY_CVLAN, vlan,
+ vlan_tpid, sizeof(__be16), reg);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 8dbb4d48f2ed..67cb98489415 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -77,12 +77,12 @@ static inline bool is_leap(unsigned int y)
* This is done in three separate functions so that the most expensive
* calculations are done last, in case a "simple match" can be found earlier.
*/
-static inline unsigned int localtime_1(struct xtm *r, time_t time)
+static inline unsigned int localtime_1(struct xtm *r, time64_t time)
{
unsigned int v, w;
/* Each day has 86400s, so finding the hour/minute is actually easy. */
- v = time % SECONDS_PER_DAY;
+ div_u64_rem(time, SECONDS_PER_DAY, &v);
r->second = v % 60;
w = v / 60;
r->minute = w % 60;
@@ -90,13 +90,13 @@ static inline unsigned int localtime_1(struct xtm *r, time_t time)
return v;
}
-static inline void localtime_2(struct xtm *r, time_t time)
+static inline void localtime_2(struct xtm *r, time64_t time)
{
/*
* Here comes the rest (weekday, monthday). First, divide the SSTE
* by seconds-per-day to get the number of _days_ since the epoch.
*/
- r->dse = time / 86400;
+ r->dse = div_u64(time, SECONDS_PER_DAY);
/*
* 1970-01-01 (w=0) was a Thursday (4).
@@ -105,7 +105,7 @@ static inline void localtime_2(struct xtm *r, time_t time)
r->weekday = (4 + r->dse - 1) % 7 + 1;
}
-static void localtime_3(struct xtm *r, time_t time)
+static void localtime_3(struct xtm *r, time64_t time)
{
unsigned int year, i, w = r->dse;
@@ -160,7 +160,7 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct xt_time_info *info = par->matchinfo;
unsigned int packet_time;
struct xtm current_time;
- s64 stamp;
+ time64_t stamp;
/*
* We need real time here, but we can neither use skb->tstamp
@@ -173,14 +173,14 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
* 1. match before 13:00
* 2. match after 13:00
*
- * If you match against processing time (get_seconds) it
+ * If you match against processing time (ktime_get_real_seconds) it
* may happen that the same packet matches both rules if
* it arrived at the right moment before 13:00, so it would be
* better to check skb->tstamp and set it via __net_timestamp()
* if needed. This however breaks outgoing packets tx timestamp,
* and causes them to get delayed forever by fq packet scheduler.
*/
- stamp = get_seconds();
+ stamp = ktime_get_real_seconds();
if (info->flags & XT_TIME_LOCAL_TZ)
/* Adjust for local timezone */
@@ -193,6 +193,9 @@ time_mt(const struct sk_buff *skb, struct xt_action_param *par)
* - 'now' is in the weekday mask
* - 'now' is in the daytime range time_start..time_end
* (and by default, libxt_time will set these so as to match)
+ *
+ * note: info->date_start/stop are unsigned 32-bit values that
+ * can hold values beyond y2038, but not after y2106.
*/
if (stamp < info->date_start || stamp > info->date_stop)
diff --git a/net/nfc/hci/Kconfig b/net/nfc/hci/Kconfig
index 97bd3a2c5c98..4822d6f46947 100644
--- a/net/nfc/hci/Kconfig
+++ b/net/nfc/hci/Kconfig
@@ -1,12 +1,12 @@
# SPDX-License-Identifier: GPL-2.0-only
config NFC_HCI
- depends on NFC
- tristate "NFC HCI implementation"
- default n
- help
- Say Y here if you want to build support for a kernel NFC HCI
- implementation. This is mostly needed for devices that only process
- HCI frames, like for example the NXP pn544.
+ depends on NFC
+ tristate "NFC HCI implementation"
+ default n
+ help
+ Say Y here if you want to build support for a kernel NFC HCI
+ implementation. This is mostly needed for devices that only process
+ HCI frames, like for example the NXP pn544.
config NFC_SHDLC
depends on NFC_HCI
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 68d6af56b243..c13638aeef46 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -474,7 +474,6 @@ drop:
}
static const struct nla_policy ct_policy[TCA_CT_MAX + 1] = {
- [TCA_CT_UNSPEC] = { .strict_start_type = TCA_CT_UNSPEC + 1 },
[TCA_CT_ACTION] = { .type = NLA_U16 },
[TCA_CT_PARMS] = { .type = NLA_EXACT_LEN, .len = sizeof(struct tc_ct) },
[TCA_CT_ZONE] = { .type = NLA_U16 },
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index 4d8c822b6aca..c7d5e12ee919 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -119,7 +119,6 @@ static int valid_label(const struct nlattr *attr,
}
static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
- [TCA_MPLS_UNSPEC] = { .strict_start_type = TCA_MPLS_UNSPEC + 1 },
[TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)),
[TCA_MPLS_PROTO] = { .type = NLA_U16 },
[TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label),
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index d5eff6ac17a9..3ad718576304 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -43,7 +43,7 @@ static struct tcf_pedit_key_ex *tcf_pedit_keys_ex_parse(struct nlattr *nla,
int err = -EINVAL;
int rem;
- if (!nla || !n)
+ if (!nla)
return NULL;
keys_ex = kcalloc(n, sizeof(*k), GFP_KERNEL);
@@ -171,6 +171,10 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
parm = nla_data(pattr);
+ if (!parm->nkeys) {
+ NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
+ return -EINVAL;
+ }
ksize = parm->nkeys * sizeof(struct tc_pedit_key);
if (nla_len(pattr) < sizeof(*parm) + ksize) {
NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
@@ -184,12 +188,6 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- if (!parm->nkeys) {
- tcf_idr_cleanup(tn, index);
- NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
- ret = -EINVAL;
- goto out_free;
- }
ret = tcf_idr_create(tn, index, est, a,
&act_pedit_ops, bind, false, 0);
if (ret) {
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index cb34e5d57aaa..6379f9568ab8 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -10,6 +10,8 @@
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <net/geneve.h>
+#include <net/vxlan.h>
+#include <net/erspan.h>
#include <net/netlink.h>
#include <net/pkt_sched.h>
#include <net/dst.h>
@@ -53,7 +55,11 @@ static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a,
static const struct nla_policy
enc_opts_policy[TCA_TUNNEL_KEY_ENC_OPTS_MAX + 1] = {
+ [TCA_TUNNEL_KEY_ENC_OPTS_UNSPEC] = {
+ .strict_start_type = TCA_TUNNEL_KEY_ENC_OPTS_VXLAN },
[TCA_TUNNEL_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
+ [TCA_TUNNEL_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED },
+ [TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED },
};
static const struct nla_policy
@@ -64,6 +70,19 @@ geneve_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_GENEVE_MAX + 1] = {
.len = 128 },
};
+static const struct nla_policy
+vxlan_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX + 1] = {
+ [TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy
+erspan_opt_policy[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
+ [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER] = { .type = NLA_U8 },
+ [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR] = { .type = NLA_U8 },
+ [TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 },
+};
+
static int
tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
struct netlink_ext_ack *extack)
@@ -116,10 +135,89 @@ tunnel_key_copy_geneve_opt(const struct nlattr *nla, void *dst, int dst_len,
return opt_len;
}
+static int
+tunnel_key_copy_vxlan_opt(const struct nlattr *nla, void *dst, int dst_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX + 1];
+ int err;
+
+ err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_VXLAN_MAX, nla,
+ vxlan_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp");
+ return -EINVAL;
+ }
+
+ if (dst) {
+ struct vxlan_metadata *md = dst;
+
+ md->gbp = nla_get_u32(tb[TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP]);
+ }
+
+ return sizeof(struct vxlan_metadata);
+}
+
+static int
+tunnel_key_copy_erspan_opt(const struct nlattr *nla, void *dst, int dst_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX + 1];
+ int err;
+ u8 ver;
+
+ err = nla_parse_nested(tb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_MAX, nla,
+ erspan_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver");
+ return -EINVAL;
+ }
+
+ ver = nla_get_u8(tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER]);
+ if (ver == 1) {
+ if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index");
+ return -EINVAL;
+ }
+ } else if (ver == 2) {
+ if (!tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR] ||
+ !tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid");
+ return -EINVAL;
+ }
+ } else {
+ NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect");
+ return -EINVAL;
+ }
+
+ if (dst) {
+ struct erspan_metadata *md = dst;
+
+ md->version = ver;
+ if (ver == 1) {
+ nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX];
+ md->u.index = nla_get_be32(nla);
+ } else {
+ nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR];
+ md->u.md2.dir = nla_get_u8(nla);
+ nla = tb[TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID];
+ set_hwid(&md->u.md2, nla_get_u8(nla));
+ }
+ }
+
+ return sizeof(struct erspan_metadata);
+}
+
static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
int dst_len, struct netlink_ext_ack *extack)
{
- int err, rem, opt_len, len = nla_len(nla), opts_len = 0;
+ int err, rem, opt_len, len = nla_len(nla), opts_len = 0, type = 0;
const struct nlattr *attr, *head = nla_data(nla);
err = nla_validate_deprecated(head, len, TCA_TUNNEL_KEY_ENC_OPTS_MAX,
@@ -130,15 +228,48 @@ static int tunnel_key_copy_opts(const struct nlattr *nla, u8 *dst,
nla_for_each_attr(attr, head, len, rem) {
switch (nla_type(attr)) {
case TCA_TUNNEL_KEY_ENC_OPTS_GENEVE:
+ if (type && type != TUNNEL_GENEVE_OPT) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
+ return -EINVAL;
+ }
opt_len = tunnel_key_copy_geneve_opt(attr, dst,
dst_len, extack);
if (opt_len < 0)
return opt_len;
opts_len += opt_len;
+ if (opts_len > IP_TUNNEL_OPTS_MAX) {
+ NL_SET_ERR_MSG(extack, "Tunnel options exceeds max size");
+ return -EINVAL;
+ }
if (dst) {
dst_len -= opt_len;
dst += opt_len;
}
+ type = TUNNEL_GENEVE_OPT;
+ break;
+ case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
+ if (type) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options");
+ return -EINVAL;
+ }
+ opt_len = tunnel_key_copy_vxlan_opt(attr, dst,
+ dst_len, extack);
+ if (opt_len < 0)
+ return opt_len;
+ opts_len += opt_len;
+ type = TUNNEL_VXLAN_OPT;
+ break;
+ case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
+ if (type) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for erspan options");
+ return -EINVAL;
+ }
+ opt_len = tunnel_key_copy_erspan_opt(attr, dst,
+ dst_len, extack);
+ if (opt_len < 0)
+ return opt_len;
+ opts_len += opt_len;
+ type = TUNNEL_ERSPAN_OPT;
break;
}
}
@@ -175,6 +306,22 @@ static int tunnel_key_opts_set(struct nlattr *nla, struct ip_tunnel_info *info,
#else
return -EAFNOSUPPORT;
#endif
+ case TCA_TUNNEL_KEY_ENC_OPTS_VXLAN:
+#if IS_ENABLED(CONFIG_INET)
+ info->key.tun_flags |= TUNNEL_VXLAN_OPT;
+ return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+ opts_len, extack);
+#else
+ return -EAFNOSUPPORT;
+#endif
+ case TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN:
+#if IS_ENABLED(CONFIG_INET)
+ info->key.tun_flags |= TUNNEL_ERSPAN_OPT;
+ return tunnel_key_copy_opts(nla, ip_tunnel_info_opts(info),
+ opts_len, extack);
+#else
+ return -EAFNOSUPPORT;
+#endif
default:
NL_SET_ERR_MSG(extack, "Cannot set tunnel options for unknown tunnel type");
return -EINVAL;
@@ -451,6 +598,56 @@ static int tunnel_key_geneve_opts_dump(struct sk_buff *skb,
return 0;
}
+static int tunnel_key_vxlan_opts_dump(struct sk_buff *skb,
+ const struct ip_tunnel_info *info)
+{
+ struct vxlan_metadata *md = (struct vxlan_metadata *)(info + 1);
+ struct nlattr *start;
+
+ start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_VXLAN);
+ if (!start)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, TCA_TUNNEL_KEY_ENC_OPT_VXLAN_GBP, md->gbp)) {
+ nla_nest_cancel(skb, start);
+ return -EMSGSIZE;
+ }
+
+ nla_nest_end(skb, start);
+ return 0;
+}
+
+static int tunnel_key_erspan_opts_dump(struct sk_buff *skb,
+ const struct ip_tunnel_info *info)
+{
+ struct erspan_metadata *md = (struct erspan_metadata *)(info + 1);
+ struct nlattr *start;
+
+ start = nla_nest_start_noflag(skb, TCA_TUNNEL_KEY_ENC_OPTS_ERSPAN);
+ if (!start)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_VER, md->version))
+ goto err;
+
+ if (md->version == 1 &&
+ nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_INDEX, md->u.index))
+ goto err;
+
+ if (md->version == 2 &&
+ (nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_DIR,
+ md->u.md2.dir) ||
+ nla_put_u8(skb, TCA_TUNNEL_KEY_ENC_OPT_ERSPAN_HWID,
+ get_hwid(&md->u.md2))))
+ goto err;
+
+ nla_nest_end(skb, start);
+ return 0;
+err:
+ nla_nest_cancel(skb, start);
+ return -EMSGSIZE;
+}
+
static int tunnel_key_opts_dump(struct sk_buff *skb,
const struct ip_tunnel_info *info)
{
@@ -468,6 +665,14 @@ static int tunnel_key_opts_dump(struct sk_buff *skb,
err = tunnel_key_geneve_opts_dump(skb, info);
if (err)
goto err_out;
+ } else if (info->key.tun_flags & TUNNEL_VXLAN_OPT) {
+ err = tunnel_key_vxlan_opts_dump(skb, info);
+ if (err)
+ goto err_out;
+ } else if (info->key.tun_flags & TUNNEL_ERSPAN_OPT) {
+ err = tunnel_key_erspan_opts_dump(skb, info);
+ if (err)
+ goto err_out;
} else {
err_out:
nla_nest_cancel(skb, start);
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 74221e3351c3..c307ee1d6ca6 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -22,6 +22,8 @@
#include <net/ip.h>
#include <net/flow_dissector.h>
#include <net/geneve.h>
+#include <net/vxlan.h>
+#include <net/erspan.h>
#include <net/dst.h>
#include <net/dst_metadata.h>
@@ -688,7 +690,11 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
static const struct nla_policy
enc_opts_policy[TCA_FLOWER_KEY_ENC_OPTS_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPTS_UNSPEC] = {
+ .strict_start_type = TCA_FLOWER_KEY_ENC_OPTS_VXLAN },
[TCA_FLOWER_KEY_ENC_OPTS_GENEVE] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_VXLAN] = { .type = NLA_NESTED },
+ [TCA_FLOWER_KEY_ENC_OPTS_ERSPAN] = { .type = NLA_NESTED },
};
static const struct nla_policy
@@ -699,6 +705,19 @@ geneve_opt_policy[TCA_FLOWER_KEY_ENC_OPT_GENEVE_MAX + 1] = {
.len = 128 },
};
+static const struct nla_policy
+vxlan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy
+erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
+ [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] = { .type = NLA_U8 },
+ [TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID] = { .type = NLA_U8 },
+};
+
static void fl_set_key_val(struct nlattr **tb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -928,6 +947,105 @@ static int fl_set_geneve_opt(const struct nlattr *nla, struct fl_flow_key *key,
return sizeof(struct geneve_opt) + data_len;
}
+static int fl_set_vxlan_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX + 1];
+ struct vxlan_metadata *md;
+ int err;
+
+ md = (struct vxlan_metadata *)&key->enc_opts.data[key->enc_opts.len];
+ memset(md, 0xff, sizeof(*md));
+
+ if (!depth)
+ return sizeof(*md);
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_VXLAN) {
+ NL_SET_ERR_MSG(extack, "Non-vxlan option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_MAX, nla,
+ vxlan_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key vxlan option gbp");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP])
+ md->gbp = nla_get_u32(tb[TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP]);
+
+ return sizeof(*md);
+}
+
+static int fl_set_erspan_opt(const struct nlattr *nla, struct fl_flow_key *key,
+ int depth, int option_len,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1];
+ struct erspan_metadata *md;
+ int err;
+
+ md = (struct erspan_metadata *)&key->enc_opts.data[key->enc_opts.len];
+ memset(md, 0xff, sizeof(*md));
+ md->version = 1;
+
+ if (!depth)
+ return sizeof(*md);
+
+ if (nla_type(nla) != TCA_FLOWER_KEY_ENC_OPTS_ERSPAN) {
+ NL_SET_ERR_MSG(extack, "Non-erspan option type for mask");
+ return -EINVAL;
+ }
+
+ err = nla_parse_nested(tb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX, nla,
+ erspan_opt_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option ver");
+ return -EINVAL;
+ }
+
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER])
+ md->version = nla_get_u8(tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER]);
+
+ if (md->version == 1) {
+ if (!option_len && !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option index");
+ return -EINVAL;
+ }
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX]) {
+ nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX];
+ md->u.index = nla_get_be32(nla);
+ }
+ } else if (md->version == 2) {
+ if (!option_len && (!tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR] ||
+ !tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID])) {
+ NL_SET_ERR_MSG(extack, "Missing tunnel key erspan option dir or hwid");
+ return -EINVAL;
+ }
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR]) {
+ nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR];
+ md->u.md2.dir = nla_get_u8(nla);
+ }
+ if (tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]) {
+ nla = tb[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID];
+ set_hwid(&md->u.md2, nla_get_u8(nla));
+ }
+ } else {
+ NL_SET_ERR_MSG(extack, "Tunnel key erspan option ver is incorrect");
+ return -EINVAL;
+ }
+
+ return sizeof(*md);
+}
+
static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
struct fl_flow_key *mask,
struct netlink_ext_ack *extack)
@@ -958,6 +1076,11 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
nla_len(tb[TCA_FLOWER_KEY_ENC_OPTS]), key_depth) {
switch (nla_type(nla_opt_key)) {
case TCA_FLOWER_KEY_ENC_OPTS_GENEVE:
+ if (key->enc_opts.dst_opt_type &&
+ key->enc_opts.dst_opt_type != TUNNEL_GENEVE_OPT) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for geneve options");
+ return -EINVAL;
+ }
option_len = 0;
key->enc_opts.dst_opt_type = TUNNEL_GENEVE_OPT;
option_len = fl_set_geneve_opt(nla_opt_key, key,
@@ -986,6 +1109,72 @@ static int fl_set_enc_opt(struct nlattr **tb, struct fl_flow_key *key,
if (msk_depth)
nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
break;
+ case TCA_FLOWER_KEY_ENC_OPTS_VXLAN:
+ if (key->enc_opts.dst_opt_type) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for vxlan options");
+ return -EINVAL;
+ }
+ option_len = 0;
+ key->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
+ option_len = fl_set_vxlan_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = TUNNEL_VXLAN_OPT;
+ option_len = fl_set_vxlan_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
+ return -EINVAL;
+ }
+
+ if (msk_depth)
+ nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+ break;
+ case TCA_FLOWER_KEY_ENC_OPTS_ERSPAN:
+ if (key->enc_opts.dst_opt_type) {
+ NL_SET_ERR_MSG(extack, "Duplicate type for erspan options");
+ return -EINVAL;
+ }
+ option_len = 0;
+ key->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
+ option_len = fl_set_erspan_opt(nla_opt_key, key,
+ key_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ key->enc_opts.len += option_len;
+ /* At the same time we need to parse through the mask
+ * in order to verify exact and mask attribute lengths.
+ */
+ mask->enc_opts.dst_opt_type = TUNNEL_ERSPAN_OPT;
+ option_len = fl_set_erspan_opt(nla_opt_msk, mask,
+ msk_depth, option_len,
+ extack);
+ if (option_len < 0)
+ return option_len;
+
+ mask->enc_opts.len += option_len;
+ if (key->enc_opts.len != mask->enc_opts.len) {
+ NL_SET_ERR_MSG(extack, "Key and mask miss aligned");
+ return -EINVAL;
+ }
+
+ if (msk_depth)
+ nla_opt_msk = nla_next(nla_opt_msk, &msk_depth);
+ break;
default:
NL_SET_ERR_MSG(extack, "Unknown tunnel option type");
return -EINVAL;
@@ -2135,6 +2324,61 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int fl_dump_key_vxlan_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct vxlan_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_VXLAN);
+ if (!nest)
+ goto nla_put_failure;
+
+ md = (struct vxlan_metadata *)&enc_opts->data[0];
+ if (nla_put_u32(skb, TCA_FLOWER_KEY_ENC_OPT_VXLAN_GBP, md->gbp))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int fl_dump_key_erspan_opt(struct sk_buff *skb,
+ struct flow_dissector_key_enc_opts *enc_opts)
+{
+ struct erspan_metadata *md;
+ struct nlattr *nest;
+
+ nest = nla_nest_start_noflag(skb, TCA_FLOWER_KEY_ENC_OPTS_ERSPAN);
+ if (!nest)
+ goto nla_put_failure;
+
+ md = (struct erspan_metadata *)&enc_opts->data[0];
+ if (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_VER, md->version))
+ goto nla_put_failure;
+
+ if (md->version == 1 &&
+ nla_put_be32(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_INDEX, md->u.index))
+ goto nla_put_failure;
+
+ if (md->version == 2 &&
+ (nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_DIR,
+ md->u.md2.dir) ||
+ nla_put_u8(skb, TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID,
+ get_hwid(&md->u.md2))))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
static int fl_dump_key_ct(struct sk_buff *skb,
struct flow_dissector_key_ct *key,
struct flow_dissector_key_ct *mask)
@@ -2188,6 +2432,16 @@ static int fl_dump_key_options(struct sk_buff *skb, int enc_opt_type,
if (err)
goto nla_put_failure;
break;
+ case TUNNEL_VXLAN_OPT:
+ err = fl_dump_key_vxlan_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
+ case TUNNEL_ERSPAN_OPT:
+ err = fl_dump_key_erspan_opt(skb, enc_opts);
+ if (err)
+ goto nla_put_failure;
+ break;
default:
goto nla_put_failure;
}
diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c
index df98a887eb89..b0b0dc46af61 100644
--- a/net/sched/sch_pie.c
+++ b/net/sched/sch_pie.c
@@ -22,6 +22,7 @@
#define QUEUE_THRESHOLD 16384
#define DQCOUNT_INVALID -1
+#define DTIME_INVALID 0xffffffffffffffff
#define MAX_PROB 0xffffffffffffffff
#define PIE_SCALE 8
@@ -34,6 +35,7 @@ struct pie_params {
u32 beta; /* and are used for shift relative to 1 */
bool ecn; /* true if ecn is enabled */
bool bytemode; /* to scale drop early prob based on pkt size */
+ u8 dq_rate_estimator; /* to calculate delay using Little's law */
};
/* variables used */
@@ -77,11 +79,34 @@ static void pie_params_init(struct pie_params *params)
params->target = PSCHED_NS2TICKS(15 * NSEC_PER_MSEC); /* 15 ms */
params->ecn = false;
params->bytemode = false;
+ params->dq_rate_estimator = false;
+}
+
+/* private skb vars */
+struct pie_skb_cb {
+ psched_time_t enqueue_time;
+};
+
+static struct pie_skb_cb *get_pie_cb(const struct sk_buff *skb)
+{
+ qdisc_cb_private_validate(skb, sizeof(struct pie_skb_cb));
+ return (struct pie_skb_cb *)qdisc_skb_cb(skb)->data;
+}
+
+static psched_time_t pie_get_enqueue_time(const struct sk_buff *skb)
+{
+ return get_pie_cb(skb)->enqueue_time;
+}
+
+static void pie_set_enqueue_time(struct sk_buff *skb)
+{
+ get_pie_cb(skb)->enqueue_time = psched_get_time();
}
static void pie_vars_init(struct pie_vars *vars)
{
vars->dq_count = DQCOUNT_INVALID;
+ vars->dq_tstamp = DTIME_INVALID;
vars->accu_prob = 0;
vars->avg_dq_rate = 0;
/* default of 150 ms in pschedtime */
@@ -172,6 +197,10 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* we can enqueue the packet */
if (enqueue) {
+ /* Set enqueue time only when dq_rate_estimator is disabled. */
+ if (!q->params.dq_rate_estimator)
+ pie_set_enqueue_time(skb);
+
q->stats.packets_in++;
if (qdisc_qlen(sch) > q->stats.maxq)
q->stats.maxq = qdisc_qlen(sch);
@@ -194,6 +223,7 @@ static const struct nla_policy pie_policy[TCA_PIE_MAX + 1] = {
[TCA_PIE_BETA] = {.type = NLA_U32},
[TCA_PIE_ECN] = {.type = NLA_U32},
[TCA_PIE_BYTEMODE] = {.type = NLA_U32},
+ [TCA_PIE_DQ_RATE_ESTIMATOR] = {.type = NLA_U32},
};
static int pie_change(struct Qdisc *sch, struct nlattr *opt,
@@ -247,6 +277,10 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_PIE_BYTEMODE])
q->params.bytemode = nla_get_u32(tb[TCA_PIE_BYTEMODE]);
+ if (tb[TCA_PIE_DQ_RATE_ESTIMATOR])
+ q->params.dq_rate_estimator =
+ nla_get_u32(tb[TCA_PIE_DQ_RATE_ESTIMATOR]);
+
/* Drop excess packets if new limit is lower */
qlen = sch->q.qlen;
while (sch->q.qlen > sch->limit) {
@@ -266,6 +300,28 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
{
struct pie_sched_data *q = qdisc_priv(sch);
int qlen = sch->qstats.backlog; /* current queue size in bytes */
+ psched_time_t now = psched_get_time();
+ u32 dtime = 0;
+
+ /* If dq_rate_estimator is disabled, calculate qdelay using the
+ * packet timestamp.
+ */
+ if (!q->params.dq_rate_estimator) {
+ q->vars.qdelay = now - pie_get_enqueue_time(skb);
+
+ if (q->vars.dq_tstamp != DTIME_INVALID)
+ dtime = now - q->vars.dq_tstamp;
+
+ q->vars.dq_tstamp = now;
+
+ if (qlen == 0)
+ q->vars.qdelay = 0;
+
+ if (dtime == 0)
+ return;
+
+ goto burst_allowance_reduction;
+ }
/* If current queue is about 10 packets or more and dq_count is unset
* we have enough packets to calculate the drain rate. Save
@@ -289,10 +345,10 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
q->vars.dq_count += skb->len;
if (q->vars.dq_count >= QUEUE_THRESHOLD) {
- psched_time_t now = psched_get_time();
- u32 dtime = now - q->vars.dq_tstamp;
u32 count = q->vars.dq_count << PIE_SCALE;
+ dtime = now - q->vars.dq_tstamp;
+
if (dtime == 0)
return;
@@ -317,14 +373,19 @@ static void pie_process_dequeue(struct Qdisc *sch, struct sk_buff *skb)
q->vars.dq_tstamp = psched_get_time();
}
- if (q->vars.burst_time > 0) {
- if (q->vars.burst_time > dtime)
- q->vars.burst_time -= dtime;
- else
- q->vars.burst_time = 0;
- }
+ goto burst_allowance_reduction;
}
}
+
+ return;
+
+burst_allowance_reduction:
+ if (q->vars.burst_time > 0) {
+ if (q->vars.burst_time > dtime)
+ q->vars.burst_time -= dtime;
+ else
+ q->vars.burst_time = 0;
+ }
}
static void calculate_probability(struct Qdisc *sch)
@@ -332,19 +393,25 @@ static void calculate_probability(struct Qdisc *sch)
struct pie_sched_data *q = qdisc_priv(sch);
u32 qlen = sch->qstats.backlog; /* queue size in bytes */
psched_time_t qdelay = 0; /* in pschedtime */
- psched_time_t qdelay_old = q->vars.qdelay; /* in pschedtime */
+ psched_time_t qdelay_old = 0; /* in pschedtime */
s64 delta = 0; /* determines the change in probability */
u64 oldprob;
u64 alpha, beta;
u32 power;
bool update_prob = true;
- q->vars.qdelay_old = q->vars.qdelay;
+ if (q->params.dq_rate_estimator) {
+ qdelay_old = q->vars.qdelay;
+ q->vars.qdelay_old = q->vars.qdelay;
- if (q->vars.avg_dq_rate > 0)
- qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
- else
- qdelay = 0;
+ if (q->vars.avg_dq_rate > 0)
+ qdelay = (qlen << PIE_SCALE) / q->vars.avg_dq_rate;
+ else
+ qdelay = 0;
+ } else {
+ qdelay = q->vars.qdelay;
+ qdelay_old = q->vars.qdelay_old;
+ }
/* If qdelay is zero and qlen is not, it means qlen is very small, less
* than dequeue_rate, so we do not update probabilty in this round
@@ -430,14 +497,18 @@ static void calculate_probability(struct Qdisc *sch)
/* We restart the measurement cycle if the following conditions are met
* 1. If the delay has been low for 2 consecutive Tupdate periods
* 2. Calculated drop probability is zero
- * 3. We have atleast one estimate for the avg_dq_rate ie.,
- * is a non-zero value
+ * 3. If average dq_rate_estimator is enabled, we have atleast one
+ * estimate for the avg_dq_rate ie., is a non-zero value
*/
if ((q->vars.qdelay < q->params.target / 2) &&
(q->vars.qdelay_old < q->params.target / 2) &&
q->vars.prob == 0 &&
- q->vars.avg_dq_rate > 0)
+ (!q->params.dq_rate_estimator || q->vars.avg_dq_rate > 0)) {
pie_vars_init(&q->vars);
+ }
+
+ if (!q->params.dq_rate_estimator)
+ q->vars.qdelay_old = qdelay;
}
static void pie_timer(struct timer_list *t)
@@ -497,7 +568,9 @@ static int pie_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_PIE_ALPHA, q->params.alpha) ||
nla_put_u32(skb, TCA_PIE_BETA, q->params.beta) ||
nla_put_u32(skb, TCA_PIE_ECN, q->params.ecn) ||
- nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode))
+ nla_put_u32(skb, TCA_PIE_BYTEMODE, q->params.bytemode) ||
+ nla_put_u32(skb, TCA_PIE_DQ_RATE_ESTIMATOR,
+ q->params.dq_rate_estimator))
goto nla_put_failure;
return nla_nest_end(skb, opts);
@@ -514,9 +587,6 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.prob = q->vars.prob,
.delay = ((u32)PSCHED_TICKS2NS(q->vars.qdelay)) /
NSEC_PER_USEC,
- /* unscale and return dq_rate in bytes per sec */
- .avg_dq_rate = q->vars.avg_dq_rate *
- (PSCHED_TICKS_PER_SEC) >> PIE_SCALE,
.packets_in = q->stats.packets_in,
.overlimit = q->stats.overlimit,
.maxq = q->stats.maxq,
@@ -524,6 +594,14 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
.ecn_mark = q->stats.ecn_mark,
};
+ /* avg_dq_rate is only valid if dq_rate_estimator is enabled */
+ st.dq_rate_estimating = q->params.dq_rate_estimator;
+
+ /* unscale and return dq_rate in bytes per sec */
+ if (q->params.dq_rate_estimator)
+ st.avg_dq_rate = q->vars.avg_dq_rate *
+ (PSCHED_TICKS_PER_SEC) >> PIE_SCALE;
+
return gnet_stats_copy_app(d, &st, sizeof(st));
}
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 7cd68628c637..c609373c8661 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -922,7 +922,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
}
/* Verify priority mapping uses valid tcs */
- for (i = 0; i < TC_BITMASK + 1; i++) {
+ for (i = 0; i <= TC_BITMASK; i++) {
if (qopt->prio_tc_map[i] >= qopt->num_tc) {
NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
return -EINVAL;
@@ -1347,6 +1347,26 @@ out:
return err;
}
+static int taprio_mqprio_cmp(const struct net_device *dev,
+ const struct tc_mqprio_qopt *mqprio)
+{
+ int i;
+
+ if (!mqprio || mqprio->num_tc != dev->num_tc)
+ return -1;
+
+ for (i = 0; i < mqprio->num_tc; i++)
+ if (dev->tc_to_txq[i].count != mqprio->count[i] ||
+ dev->tc_to_txq[i].offset != mqprio->offset[i])
+ return -1;
+
+ for (i = 0; i <= TC_BITMASK; i++)
+ if (dev->prio_tc_map[i] != mqprio->prio_tc_map[i])
+ return -1;
+
+ return 0;
+}
+
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -1398,6 +1418,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
admin = rcu_dereference(q->admin_sched);
rcu_read_unlock();
+ /* no changes - no new mqprio settings */
+ if (!taprio_mqprio_cmp(dev, mqprio))
+ mqprio = NULL;
+
if (mqprio && (oper || admin)) {
NL_SET_ERR_MSG(extack, "Changing the traffic mapping of a running schedule is not supported");
err = -ENOTSUPP;
@@ -1455,7 +1479,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
mqprio->offset[i]);
/* Always use supplied priority mappings */
- for (i = 0; i < TC_BITMASK + 1; i++)
+ for (i = 0; i <= TC_BITMASK; i++)
netdev_set_prio_tc_map(dev, i,
mqprio->prio_tc_map[i]);
}
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index f41096a759fa..55aeba681cf4 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -87,9 +87,9 @@ int tipc_bcast_get_mtu(struct net *net)
return tipc_link_mss(tipc_bc_sndlink(net));
}
-void tipc_bcast_disable_rcast(struct net *net)
+void tipc_bcast_toggle_rcast(struct net *net, bool supp)
{
- tipc_bc_base(net)->rcast_support = false;
+ tipc_bc_base(net)->rcast_support = supp;
}
static void tipc_bcbase_calc_bc_threshold(struct net *net)
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index dadad953e2be..9e847d9617d3 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -85,7 +85,7 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_bcl);
void tipc_bcast_inc_bearer_dst_cnt(struct net *net, int bearer_id);
void tipc_bcast_dec_bearer_dst_cnt(struct net *net, int bearer_id);
int tipc_bcast_get_mtu(struct net *net);
-void tipc_bcast_disable_rcast(struct net *net);
+void tipc_bcast_toggle_rcast(struct net *net, bool supp);
int tipc_mcast_xmit(struct net *net, struct sk_buff_head *pkts,
struct tipc_mc_method *method, struct tipc_nlist *dests,
u16 *cong_link_cnt);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index fb72031228c9..24d4d10756d3 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -550,7 +550,7 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
/* Disable replicast if even a single peer doesn't support it */
if (link_is_bc_rcvlink(l) && !(peer_caps & TIPC_BCAST_RCAST))
- tipc_bcast_disable_rcast(net);
+ tipc_bcast_toggle_rcast(net, false);
return true;
}
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 66a65c2cdb23..92d04dc2a44b 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -35,6 +35,7 @@
*/
#include <net/sock.h>
+#include <linux/list_sort.h>
#include "core.h"
#include "netlink.h"
#include "name_table.h"
@@ -66,6 +67,7 @@ struct service_range {
/**
* struct tipc_service - container for all published instances of a service type
* @type: 32 bit 'type' value for service
+ * @publ_cnt: increasing counter for publications in this service
* @ranges: rb tree containing all service ranges for this service
* @service_list: links to adjacent name ranges in hash chain
* @subscriptions: list of subscriptions for this service type
@@ -74,6 +76,7 @@ struct service_range {
*/
struct tipc_service {
u32 type;
+ u32 publ_cnt;
struct rb_root ranges;
struct hlist_node service_list;
struct list_head subscriptions;
@@ -109,6 +112,7 @@ static struct publication *tipc_publ_create(u32 type, u32 lower, u32 upper,
INIT_LIST_HEAD(&publ->binding_node);
INIT_LIST_HEAD(&publ->local_publ);
INIT_LIST_HEAD(&publ->all_publ);
+ INIT_LIST_HEAD(&publ->list);
return publ;
}
@@ -244,6 +248,8 @@ static struct publication *tipc_service_insert_publ(struct net *net,
p = tipc_publ_create(type, lower, upper, scope, node, port, key);
if (!p)
goto err;
+ /* Suppose there shouldn't be a huge gap btw publs i.e. >INT_MAX */
+ p->id = sc->publ_cnt++;
if (in_own_node(net, node))
list_add(&p->local_publ, &sr->local_publ);
list_add(&p->all_publ, &sr->all_publ);
@@ -278,6 +284,20 @@ static struct publication *tipc_service_remove_publ(struct service_range *sr,
}
/**
+ * Code reused: time_after32() for the same purpose
+ */
+#define publication_after(pa, pb) time_after32((pa)->id, (pb)->id)
+static int tipc_publ_sort(void *priv, struct list_head *a,
+ struct list_head *b)
+{
+ struct publication *pa, *pb;
+
+ pa = container_of(a, struct publication, list);
+ pb = container_of(b, struct publication, list);
+ return publication_after(pa, pb);
+}
+
+/**
* tipc_service_subscribe - attach a subscription, and optionally
* issue the prescribed number of events if there is any service
* range overlapping with the requested range
@@ -286,36 +306,51 @@ static void tipc_service_subscribe(struct tipc_service *service,
struct tipc_subscription *sub)
{
struct tipc_subscr *sb = &sub->evt.s;
+ struct publication *p, *first, *tmp;
+ struct list_head publ_list;
struct service_range *sr;
struct tipc_name_seq ns;
- struct publication *p;
struct rb_node *n;
- bool first;
+ u32 filter;
ns.type = tipc_sub_read(sb, seq.type);
ns.lower = tipc_sub_read(sb, seq.lower);
ns.upper = tipc_sub_read(sb, seq.upper);
+ filter = tipc_sub_read(sb, filter);
tipc_sub_get(sub);
list_add(&sub->service_list, &service->subscriptions);
- if (tipc_sub_read(sb, filter) & TIPC_SUB_NO_STATUS)
+ if (filter & TIPC_SUB_NO_STATUS)
return;
+ INIT_LIST_HEAD(&publ_list);
for (n = rb_first(&service->ranges); n; n = rb_next(n)) {
sr = container_of(n, struct service_range, tree_node);
if (sr->lower > ns.upper)
break;
if (!tipc_sub_check_overlap(&ns, sr->lower, sr->upper))
continue;
- first = true;
+ first = NULL;
list_for_each_entry(p, &sr->all_publ, all_publ) {
- tipc_sub_report_overlap(sub, sr->lower, sr->upper,
- TIPC_PUBLISHED, p->port,
- p->node, p->scope, first);
- first = false;
+ if (filter & TIPC_SUB_PORTS)
+ list_add_tail(&p->list, &publ_list);
+ else if (!first || publication_after(first, p))
+ /* Pick this range's *first* publication */
+ first = p;
}
+ if (first)
+ list_add_tail(&first->list, &publ_list);
+ }
+
+ /* Sort the publications before reporting */
+ list_sort(NULL, &publ_list, tipc_publ_sort);
+ list_for_each_entry_safe(p, tmp, &publ_list, list) {
+ tipc_sub_report_overlap(sub, p->lower, p->upper,
+ TIPC_PUBLISHED, p->port, p->node,
+ p->scope, true);
+ list_del_init(&p->list);
}
}
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index f79066334cc8..728bc7016c38 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -58,6 +58,7 @@ struct tipc_group;
* @node: network address of publishing socket's node
* @port: publishing port
* @key: publication key, unique across the cluster
+ * @id: publication id
* @binding_node: all publications from the same node which bound this one
* - Remote publications: in node->publ_list
* Used by node/name distr to withdraw publications when node is lost
@@ -69,6 +70,7 @@ struct tipc_group;
* Used by closest_first and multicast receive lookup algorithms
* @all_publ: all publications identical to this one, whatever node and scope
* Used by round-robin lookup algorithm
+ * @list: to form a list of publications in temporal order
* @rcu: RCU callback head used for deferred freeing
*/
struct publication {
@@ -79,10 +81,12 @@ struct publication {
u32 node;
u32 port;
u32 key;
+ u32 id;
struct list_head binding_node;
struct list_head binding_sock;
struct list_head local_publ;
struct list_head all_publ;
+ struct list_head list;
struct rcu_head rcu;
};
diff --git a/net/tipc/node.c b/net/tipc/node.c
index aaf595613e6e..ab04e00cb95b 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -496,6 +496,9 @@ update:
tn->capabilities &= temp_node->capabilities;
}
+ tipc_bcast_toggle_rcast(net,
+ (tn->capabilities & TIPC_BCAST_RCAST));
+
goto exit;
}
n = kzalloc(sizeof(*n), GFP_ATOMIC);
@@ -557,6 +560,7 @@ update:
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+ tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
trace_tipc_node_create(n, true, " ");
exit:
spin_unlock_bh(&tn->node_list_lock);
@@ -740,7 +744,8 @@ static bool tipc_node_cleanup(struct tipc_node *peer)
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
-
+ tipc_bcast_toggle_rcast(peer->net,
+ (tn->capabilities & TIPC_BCAST_RCAST));
spin_unlock_bh(&tn->node_list_lock);
return deleted;
}
@@ -2198,6 +2203,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info)
list_for_each_entry_rcu(temp_node, &tn->node_list, list) {
tn->capabilities &= temp_node->capabilities;
}
+ tipc_bcast_toggle_rcast(net, (tn->capabilities & TIPC_BCAST_RCAST));
err = 0;
err_out:
tipc_node_put(peer);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index 5bb93dd5762b..bdca31ffe6da 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -861,6 +861,7 @@ static int __init tls_register(void)
tls_sw_proto_ops = inet_stream_ops;
tls_sw_proto_ops.splice_read = tls_sw_splice_read;
+ tls_sw_proto_ops.sendpage_locked = tls_sw_sendpage_locked,
tls_device_init();
tcp_register_ulp(&tcp_tls_ulp_ops);
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 141da093ff04..da9f9ce51e7b 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1209,6 +1209,17 @@ sendpage_end:
return copied ? copied : ret;
}
+int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
+ int offset, size_t size, int flags)
+{
+ if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+ MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY |
+ MSG_NO_SHARED_FRAGS))
+ return -ENOTSUPP;
+
+ return tls_sw_do_sendpage(sk, page, offset, size, flags);
+}
+
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index cc8659838bf2..74db4cd637a7 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -412,6 +412,7 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
const struct vsock_transport *new_transport;
struct sock *sk = sk_vsock(vsk);
unsigned int remote_cid = vsk->remote_addr.svm_cid;
+ int ret;
switch (sk->sk_type) {
case SOCK_DGRAM:
@@ -443,9 +444,15 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
if (!new_transport || !try_module_get(new_transport->module))
return -ENODEV;
+ ret = new_transport->init(vsk, psk);
+ if (ret) {
+ module_put(new_transport->module);
+ return ret;
+ }
+
vsk->transport = new_transport;
- return vsk->transport->init(vsk, psk);
+ return 0;
}
EXPORT_SYMBOL_GPL(vsock_assign_transport);
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 51bb6018f3bf..17b8a7d4b71b 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -3,13 +3,13 @@
# XFRM configuration
#
config XFRM
- bool
- depends on INET
- select GRO_CELLS
- select SKB_EXTENSIONS
+ bool
+ depends on INET
+ select GRO_CELLS
+ select SKB_EXTENSIONS
config XFRM_OFFLOAD
- bool
+ bool
config XFRM_ALGO
tristate
OpenPOWER on IntegriCloud