diff options
Diffstat (limited to 'net/sched/cls_api.c')
-rw-r--r-- | net/sched/cls_api.c | 847 |
1 files changed, 575 insertions, 272 deletions
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index efd3cfb80a2a..c2cdd0fc2e70 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -21,6 +21,7 @@ #include <linux/slab.h> #include <linux/idr.h> #include <linux/rhashtable.h> +#include <linux/jhash.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/netlink.h> @@ -36,6 +37,8 @@ #include <net/tc_act/tc_sample.h> #include <net/tc_act/tc_skbedit.h> #include <net/tc_act/tc_ct.h> +#include <net/tc_act/tc_mpls.h> +#include <net/flow_offload.h> extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1]; @@ -45,6 +48,62 @@ static LIST_HEAD(tcf_proto_base); /* Protects list of registered TC modules. It is pure SMP lock. */ static DEFINE_RWLOCK(cls_mod_lock); +static u32 destroy_obj_hashfn(const struct tcf_proto *tp) +{ + return jhash_3words(tp->chain->index, tp->prio, + (__force __u32)tp->protocol, 0); +} + +static void tcf_proto_signal_destroying(struct tcf_chain *chain, + struct tcf_proto *tp) +{ + struct tcf_block *block = chain->block; + + mutex_lock(&block->proto_destroy_lock); + hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node, + destroy_obj_hashfn(tp)); + mutex_unlock(&block->proto_destroy_lock); +} + +static bool tcf_proto_cmp(const struct tcf_proto *tp1, + const struct tcf_proto *tp2) +{ + return tp1->chain->index == tp2->chain->index && + tp1->prio == tp2->prio && + tp1->protocol == tp2->protocol; +} + +static bool tcf_proto_exists_destroying(struct tcf_chain *chain, + struct tcf_proto *tp) +{ + u32 hash = destroy_obj_hashfn(tp); + struct tcf_proto *iter; + bool found = false; + + rcu_read_lock(); + hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter, + destroy_ht_node, hash) { + if (tcf_proto_cmp(tp, iter)) { + found = true; + break; + } + } + rcu_read_unlock(); + + return found; +} + +static void +tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp) +{ + struct tcf_block *block = chain->block; + + mutex_lock(&block->proto_destroy_lock); + if (hash_hashed(&tp->destroy_ht_node)) + hash_del_rcu(&tp->destroy_ht_node); + mutex_unlock(&block->proto_destroy_lock); +} + /* Find classifier type by string name */ static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind) @@ -160,11 +219,22 @@ static inline u32 tcf_auto_prio(struct tcf_proto *tp) return TC_H_MAJ(first); } +static bool tcf_proto_check_kind(struct nlattr *kind, char *name) +{ + if (kind) + return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ; + memset(name, 0, IFNAMSIZ); + return false; +} + static bool tcf_proto_is_unlocked(const char *kind) { const struct tcf_proto_ops *ops; bool ret; + if (strlen(kind) == 0) + return false; + ops = tcf_proto_lookup_ops(kind, false, NULL); /* On error return false to take rtnl lock. Proto lookup/create * functions will perform lookup again and properly handle errors. @@ -221,9 +291,11 @@ static void tcf_proto_get(struct tcf_proto *tp) static void tcf_chain_put(struct tcf_chain *chain); static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held, - struct netlink_ext_ack *extack) + bool sig_destroy, struct netlink_ext_ack *extack) { tp->ops->destroy(tp, rtnl_held, extack); + if (sig_destroy) + tcf_proto_signal_destroyed(tp->chain, tp); tcf_chain_put(tp->chain); module_put(tp->ops->owner); kfree_rcu(tp, rcu); @@ -233,36 +305,15 @@ static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { if (refcount_dec_and_test(&tp->refcnt)) - tcf_proto_destroy(tp, rtnl_held, extack); -} - -static int walker_check_empty(struct tcf_proto *tp, void *fh, - struct tcf_walker *arg) -{ - if (fh) { - arg->nonempty = true; - return -1; - } - return 0; + tcf_proto_destroy(tp, rtnl_held, true, extack); } -static bool tcf_proto_is_empty(struct tcf_proto *tp, bool rtnl_held) +static bool tcf_proto_check_delete(struct tcf_proto *tp) { - struct tcf_walker walker = { .fn = walker_check_empty, }; + if (tp->ops->delete_empty) + return tp->ops->delete_empty(tp); - if (tp->ops->walk) { - tp->ops->walk(tp, &walker, rtnl_held); - return !walker.nonempty; - } - return true; -} - -static bool tcf_proto_check_delete(struct tcf_proto *tp, bool rtnl_held) -{ - spin_lock(&tp->lock); - if (tcf_proto_is_empty(tp, rtnl_held)) - tp->deleting = true; - spin_unlock(&tp->lock); + tp->deleting = true; return tp->deleting; } @@ -357,6 +408,7 @@ static bool tcf_chain_detach(struct tcf_chain *chain) static void tcf_block_destroy(struct tcf_block *block) { mutex_destroy(&block->lock); + mutex_destroy(&block->proto_destroy_lock); kfree_rcu(block, rcu); } @@ -532,6 +584,12 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) mutex_lock(&chain->filter_chain_lock); tp = tcf_chain_dereference(chain->filter_chain, chain); + while (tp) { + tp_next = rcu_dereference_protected(tp->next, 1); + tcf_proto_signal_destroying(chain, tp); + tp = tp_next; + } + tp = tcf_chain_dereference(chain->filter_chain, chain); RCU_INIT_POINTER(chain->filter_chain, NULL); tcf_chain0_head_change(chain, NULL); chain->flushing = true; @@ -544,235 +602,87 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held) } } -static struct tcf_block *tc_dev_ingress_block(struct net_device *dev) -{ - const struct Qdisc_class_ops *cops; - struct Qdisc *qdisc; - - if (!dev_ingress_queue(dev)) - return NULL; - - qdisc = dev_ingress_queue(dev)->qdisc_sleeping; - if (!qdisc) - return NULL; - - cops = qdisc->ops->cl_ops; - if (!cops) - return NULL; - - if (!cops->tcf_block) - return NULL; - - return cops->tcf_block(qdisc, TC_H_MIN_INGRESS, NULL); -} - -static struct rhashtable indr_setup_block_ht; - -struct tc_indr_block_dev { - struct rhash_head ht_node; - struct net_device *dev; - unsigned int refcnt; - struct list_head cb_list; - struct tcf_block *block; -}; - -struct tc_indr_block_cb { - struct list_head list; - void *cb_priv; - tc_indr_block_bind_cb_t *cb; - void *cb_ident; -}; - -static const struct rhashtable_params tc_indr_setup_block_ht_params = { - .key_offset = offsetof(struct tc_indr_block_dev, dev), - .head_offset = offsetof(struct tc_indr_block_dev, ht_node), - .key_len = sizeof(struct net_device *), -}; - -static struct tc_indr_block_dev * -tc_indr_block_dev_lookup(struct net_device *dev) -{ - return rhashtable_lookup_fast(&indr_setup_block_ht, &dev, - tc_indr_setup_block_ht_params); -} - -static struct tc_indr_block_dev *tc_indr_block_dev_get(struct net_device *dev) -{ - struct tc_indr_block_dev *indr_dev; - - indr_dev = tc_indr_block_dev_lookup(dev); - if (indr_dev) - goto inc_ref; - - indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL); - if (!indr_dev) - return NULL; - - INIT_LIST_HEAD(&indr_dev->cb_list); - indr_dev->dev = dev; - indr_dev->block = tc_dev_ingress_block(dev); - if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params)) { - kfree(indr_dev); - return NULL; - } - -inc_ref: - indr_dev->refcnt++; - return indr_dev; -} - -static void tc_indr_block_dev_put(struct tc_indr_block_dev *indr_dev) -{ - if (--indr_dev->refcnt) - return; - - rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node, - tc_indr_setup_block_ht_params); - kfree(indr_dev); -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_lookup(struct tc_indr_block_dev *indr_dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - if (indr_block_cb->cb == cb && - indr_block_cb->cb_ident == cb_ident) - return indr_block_cb; - return NULL; -} - -static struct tc_indr_block_cb * -tc_indr_block_cb_add(struct tc_indr_block_dev *indr_dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - struct tc_indr_block_cb *indr_block_cb; - - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (indr_block_cb) - return ERR_PTR(-EEXIST); - - indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL); - if (!indr_block_cb) - return ERR_PTR(-ENOMEM); - - indr_block_cb->cb_priv = cb_priv; - indr_block_cb->cb = cb; - indr_block_cb->cb_ident = cb_ident; - list_add(&indr_block_cb->list, &indr_dev->cb_list); - - return indr_block_cb; -} - -static void tc_indr_block_cb_del(struct tc_indr_block_cb *indr_block_cb) -{ - list_del(&indr_block_cb->list); - kfree(indr_block_cb); -} - static int tcf_block_setup(struct tcf_block *block, struct flow_block_offload *bo); -static void tc_indr_block_ing_cmd(struct tc_indr_block_dev *indr_dev, - struct tc_indr_block_cb *indr_block_cb, - enum flow_block_command command) +static void tc_indr_block_cmd(struct net_device *dev, struct tcf_block *block, + flow_indr_block_bind_cb_t *cb, void *cb_priv, + enum flow_block_command command, bool ingress) { struct flow_block_offload bo = { .command = command, - .binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS, - .net = dev_net(indr_dev->dev), - .block_shared = tcf_block_non_null_shared(indr_dev->block), + .binder_type = ingress ? + FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS : + FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS, + .net = dev_net(dev), + .block_shared = tcf_block_non_null_shared(block), }; INIT_LIST_HEAD(&bo.cb_list); - if (!indr_dev->block) + if (!block) return; - bo.block = &indr_dev->block->flow_block; + bo.block = &block->flow_block; + + down_write(&block->cb_lock); + cb(dev, cb_priv, TC_SETUP_BLOCK, &bo); - indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - tcf_block_setup(indr_dev->block, &bo); + tcf_block_setup(block, &bo); + up_write(&block->cb_lock); } -int __tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static struct tcf_block *tc_dev_block(struct net_device *dev, bool ingress) { - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; - int err; + const struct Qdisc_class_ops *cops; + const struct Qdisc_ops *ops; + struct Qdisc *qdisc; - indr_dev = tc_indr_block_dev_get(dev); - if (!indr_dev) - return -ENOMEM; + if (!dev_ingress_queue(dev)) + return NULL; - indr_block_cb = tc_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident); - err = PTR_ERR_OR_ZERO(indr_block_cb); - if (err) - goto err_dev_put; + qdisc = dev_ingress_queue(dev)->qdisc_sleeping; + if (!qdisc) + return NULL; - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_BIND); - return 0; + ops = qdisc->ops; + if (!ops) + return NULL; -err_dev_put: - tc_indr_block_dev_put(indr_dev); - return err; -} -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_register); + if (!ingress && !strcmp("ingress", ops->id)) + return NULL; -int tc_indr_block_cb_register(struct net_device *dev, void *cb_priv, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - int err; + cops = ops->cl_ops; + if (!cops) + return NULL; - rtnl_lock(); - err = __tc_indr_block_cb_register(dev, cb_priv, cb, cb_ident); - rtnl_unlock(); + if (!cops->tcf_block) + return NULL; - return err; + return cops->tcf_block(qdisc, + ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS, + NULL); } -EXPORT_SYMBOL_GPL(tc_indr_block_cb_register); -void __tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) +static void tc_indr_block_get_and_cmd(struct net_device *dev, + flow_indr_block_bind_cb_t *cb, + void *cb_priv, + enum flow_block_command command) { - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; - - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; + struct tcf_block *block; - indr_block_cb = tc_indr_block_cb_lookup(indr_dev, cb, cb_ident); - if (!indr_block_cb) - return; + block = tc_dev_block(dev, true); + tc_indr_block_cmd(dev, block, cb, cb_priv, command, true); - /* Send unbind message if required to free any block cbs. */ - tc_indr_block_ing_cmd(indr_dev, indr_block_cb, FLOW_BLOCK_UNBIND); - tc_indr_block_cb_del(indr_block_cb); - tc_indr_block_dev_put(indr_dev); -} -EXPORT_SYMBOL_GPL(__tc_indr_block_cb_unregister); - -void tc_indr_block_cb_unregister(struct net_device *dev, - tc_indr_block_bind_cb_t *cb, void *cb_ident) -{ - rtnl_lock(); - __tc_indr_block_cb_unregister(dev, cb, cb_ident); - rtnl_unlock(); + block = tc_dev_block(dev, false); + tc_indr_block_cmd(dev, block, cb, cb_priv, command, false); } -EXPORT_SYMBOL_GPL(tc_indr_block_cb_unregister); -static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, +static void tc_indr_block_call(struct tcf_block *block, + struct net_device *dev, struct tcf_block_ext_info *ei, enum flow_block_command command, struct netlink_ext_ack *extack) { - struct tc_indr_block_cb *indr_block_cb; - struct tc_indr_block_dev *indr_dev; struct flow_block_offload bo = { .command = command, .binder_type = ei->binder_type, @@ -783,22 +693,13 @@ static void tc_indr_block_call(struct tcf_block *block, struct net_device *dev, }; INIT_LIST_HEAD(&bo.cb_list); - indr_dev = tc_indr_block_dev_lookup(dev); - if (!indr_dev) - return; - - indr_dev->block = command == FLOW_BLOCK_BIND ? block : NULL; - - list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list) - indr_block_cb->cb(dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK, - &bo); - + flow_indr_block_call(dev, &bo, command); tcf_block_setup(block, &bo); } static bool tcf_block_offload_in_use(struct tcf_block *block) { - return block->offloadcnt; + return atomic_read(&block->offloadcnt); } static int tcf_block_offload_cmd(struct tcf_block *block, @@ -832,6 +733,7 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); if (!dev->netdev_ops->ndo_setup_tc) goto no_offload_dev_inc; @@ -840,24 +742,31 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, */ if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) { NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled"); - return -EOPNOTSUPP; + err = -EOPNOTSUPP; + goto err_unlock; } err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_BIND, extack); if (err == -EOPNOTSUPP) goto no_offload_dev_inc; if (err) - return err; + goto err_unlock; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); + up_write(&block->cb_lock); return 0; no_offload_dev_inc: - if (tcf_block_offload_in_use(block)) - return -EOPNOTSUPP; + if (tcf_block_offload_in_use(block)) { + err = -EOPNOTSUPP; + goto err_unlock; + } + err = 0; block->nooffloaddevcnt++; tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack); - return 0; +err_unlock: + up_write(&block->cb_lock); + return err; } static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, @@ -866,6 +775,7 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, struct net_device *dev = q->dev_queue->dev; int err; + down_write(&block->cb_lock); tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (!dev->netdev_ops->ndo_setup_tc) @@ -873,10 +783,12 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL); if (err == -EOPNOTSUPP) goto no_offload_dev_dec; + up_write(&block->cb_lock); return; no_offload_dev_dec: WARN_ON(block->nooffloaddevcnt-- == 0); + up_write(&block->cb_lock); } static int @@ -991,6 +903,8 @@ static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q, return ERR_PTR(-ENOMEM); } mutex_init(&block->lock); + mutex_init(&block->proto_destroy_lock); + init_rwsem(&block->cb_lock); flow_block_init(&block->flow_block); INIT_LIST_HEAD(&block->chain_list); INIT_LIST_HEAD(&block->owner_list); @@ -1526,6 +1440,8 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb, struct tcf_proto *tp, *tp_prev; int err; + lockdep_assert_held(&block->cb_lock); + for (chain = __tcf_get_next_chain(block, NULL); chain; chain_prev = chain, @@ -1564,6 +1480,8 @@ static int tcf_block_bind(struct tcf_block *block, struct flow_block_cb *block_cb, *next; int err, i = 0; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry(block_cb, &bo->cb_list, list) { err = tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, true, @@ -1571,6 +1489,8 @@ static int tcf_block_bind(struct tcf_block *block, bo->extack); if (err) goto err_unroll; + if (!bo->unlocked_driver_cb) + block->lockeddevcnt++; i++; } @@ -1586,6 +1506,8 @@ err_unroll: block_cb->cb_priv, false, tcf_block_offload_in_use(block), NULL); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } flow_block_cb_free(block_cb); } @@ -1598,6 +1520,8 @@ static void tcf_block_unbind(struct tcf_block *block, { struct flow_block_cb *block_cb, *next; + lockdep_assert_held(&block->cb_lock); + list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) { tcf_block_playback_offloads(block, block_cb->cb, block_cb->cb_priv, false, @@ -1605,6 +1529,8 @@ static void tcf_block_unbind(struct tcf_block *block, NULL); list_del(&block_cb->list); flow_block_cb_free(block_cb); + if (!bo->unlocked_driver_cb) + block->lockeddevcnt--; } } @@ -1659,6 +1585,18 @@ reclassify: goto reset; } else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) { first_tp = res->goto_tp; + +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) + { + struct tc_skb_ext *ext; + + ext = skb_ext_add(skb, TC_SKB_EXT); + if (WARN_ON_ONCE(!ext)) + return TC_ACT_SHOT; + + ext->chain = err & TC_ACT_EXT_VAL_MASK; + } +#endif goto reset; } #endif @@ -1743,6 +1681,12 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, mutex_lock(&chain->filter_chain_lock); + if (tcf_proto_exists_destroying(chain, tp_new)) { + mutex_unlock(&chain->filter_chain_lock); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); + return ERR_PTR(-EAGAIN); + } + tp = tcf_chain_tp_find(chain, &chain_info, protocol, prio, false); if (!tp) @@ -1750,10 +1694,10 @@ static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain, mutex_unlock(&chain->filter_chain_lock); if (tp) { - tcf_proto_destroy(tp_new, rtnl_held, NULL); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = tp; } else if (err) { - tcf_proto_destroy(tp_new, rtnl_held, NULL); + tcf_proto_destroy(tp_new, rtnl_held, false, NULL); tp_new = ERR_PTR(err); } @@ -1786,11 +1730,12 @@ static void tcf_chain_tp_delete_empty(struct tcf_chain *chain, * concurrently. * Mark tp for deletion if it is empty. */ - if (!tp_iter || !tcf_proto_check_delete(tp, rtnl_held)) { + if (!tp_iter || !tcf_proto_check_delete(tp)) { mutex_unlock(&chain->filter_chain_lock); return; } + tcf_proto_signal_destroying(chain, tp); next = tcf_chain_dereference(chain_info.next, chain); if (tp == chain->filter_chain) tcf_chain0_head_change(chain, next); @@ -1976,6 +1921,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n, { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; @@ -2032,13 +1978,19 @@ replay: if (err) return err; + if (tcf_proto_check_kind(tca[TCA_KIND], name)) { + NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); + err = -EINVAL; + goto errout; + } + /* Take rtnl mutex if rtnl_held was set to true on previous iteration, * block is shared (no qdisc found), qdisc is not unlocked, classifier * type is not specified, classifier is not unlocked. */ if (rtnl_held || (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || - !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } @@ -2103,9 +2055,8 @@ replay: &chain_info)); mutex_unlock(&chain->filter_chain_lock); - tp_new = tcf_proto_create(nla_data(tca[TCA_KIND]), - protocol, prio, chain, rtnl_held, - extack); + tp_new = tcf_proto_create(name, protocol, prio, chain, + rtnl_held, extack); if (IS_ERR(tp_new)) { err = PTR_ERR(tp_new); goto errout_tp; @@ -2196,6 +2147,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; @@ -2235,13 +2187,18 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (err) return err; + if (tcf_proto_check_kind(tca[TCA_KIND], name)) { + NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); + err = -EINVAL; + goto errout; + } /* Take rtnl mutex if flushing whole chain, block is shared (no qdisc * found), qdisc is not unlocked, classifier type is not specified, * classifier is not unlocked. */ if (!prio || (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || - !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } @@ -2297,6 +2254,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n, err = -EINVAL; goto errout_locked; } else if (t->tcm_handle == 0) { + tcf_proto_signal_destroying(chain, tp); tcf_chain_tp_remove(chain, &chain_info, tp); mutex_unlock(&chain->filter_chain_lock); @@ -2349,6 +2307,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_MAX + 1]; + char name[IFNAMSIZ]; struct tcmsg *t; u32 protocol; u32 prio; @@ -2385,12 +2344,17 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n, if (err) return err; + if (tcf_proto_check_kind(tca[TCA_KIND], name)) { + NL_SET_ERR_MSG(extack, "Specified TC filter name too long"); + err = -EINVAL; + goto errout; + } /* Take rtnl mutex if block is shared (no qdisc found), qdisc is not * unlocked, classifier type is not specified, classifier is not * unlocked. */ if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) || - !tca[TCA_KIND] || !tcf_proto_is_unlocked(nla_data(tca[TCA_KIND]))) { + !tcf_proto_is_unlocked(name)) { rtnl_held = true; rtnl_lock(); } @@ -2749,13 +2713,19 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net, struct netlink_ext_ack *extack) { const struct tcf_proto_ops *ops; + char name[IFNAMSIZ]; void *tmplt_priv; /* If kind is not set, user did not specify template. */ if (!tca[TCA_KIND]) return 0; - ops = tcf_proto_lookup_ops(nla_data(tca[TCA_KIND]), true, extack); + if (tcf_proto_check_kind(tca[TCA_KIND], name)) { + NL_SET_ERR_MSG(extack, "Specified TC chain template name too long"); + return -EINVAL; + } + + ops = tcf_proto_lookup_ops(name, true, extack); if (IS_ERR(ops)) return PTR_ERR(ops); if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) { @@ -3027,8 +2997,10 @@ out: void tcf_exts_destroy(struct tcf_exts *exts) { #ifdef CONFIG_NET_CLS_ACT - tcf_action_destroy(exts->actions, TCA_ACT_UNBIND); - kfree(exts->actions); + if (exts->actions) { + tcf_action_destroy(exts->actions, TCA_ACT_UNBIND); + kfree(exts->actions); + } exts->nr_actions = 0; #endif } @@ -3151,17 +3123,61 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, - void *type_data, bool err_stop) +static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags) +{ + if (*flags & TCA_CLS_FLAGS_IN_HW) + return; + *flags |= TCA_CLS_FLAGS_IN_HW; + atomic_inc(&block->offloadcnt); +} + +static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags) +{ + if (!(*flags & TCA_CLS_FLAGS_IN_HW)) + return; + *flags &= ~TCA_CLS_FLAGS_IN_HW; + atomic_dec(&block->offloadcnt); +} + +static void tc_cls_offload_cnt_update(struct tcf_block *block, + struct tcf_proto *tp, u32 *cnt, + u32 *flags, u32 diff, bool add) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + if (add) { + if (!*cnt) + tcf_block_offload_inc(block, flags); + *cnt += diff; + } else { + *cnt -= diff; + if (!*cnt) + tcf_block_offload_dec(block, flags); + } + spin_unlock(&tp->lock); +} + +static void +tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp, + u32 *cnt, u32 *flags) +{ + lockdep_assert_held(&block->cb_lock); + + spin_lock(&tp->lock); + tcf_block_offload_dec(block, flags); + *cnt = 0; + spin_unlock(&tp->lock); +} + +static int +__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) { struct flow_block_cb *block_cb; int ok_count = 0; int err; - /* Make sure all netdevs sharing this block are offload-capable. */ - if (block->nooffloaddevcnt && err_stop) - return -EOPNOTSUPP; - list_for_each_entry(block_cb, &block->flow_block.cb_list, list) { err = block_cb->cb(type, type_data, block_cb->cb_priv); if (err) { @@ -3173,17 +3189,261 @@ int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, } return ok_count; } + +int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count; +} EXPORT_SYMBOL(tc_setup_cb_call); +/* Non-destructive filter add. If filter that wasn't already in hardware is + * successfully offloaded, increment block offloads counter. On failure, + * previously offloaded filter is considered to be intact and offloads counter + * is not decremented. + */ + +int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, + ok_count, true); +err_unlock: + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_add); + +/* Destructive filter replace. If filter that wasn't already in hardware is + * successfully offloaded, increment block offload counter. On failure, + * previously offloaded filter is considered to be destroyed and offload counter + * is decremented. + */ + +int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *old_flags, unsigned int *old_in_hw_count, + u32 *new_flags, unsigned int *new_in_hw_count, + bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + /* Make sure all netdevs sharing this block are offload-capable. */ + if (block->nooffloaddevcnt && err_stop) { + ok_count = -EOPNOTSUPP; + goto err_unlock; + } + + tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + if (ok_count < 0) + goto err_unlock; + + if (tp->ops->hw_add) + tp->ops->hw_add(tp, type_data); + if (ok_count > 0) + tc_cls_offload_cnt_update(block, tp, new_in_hw_count, + new_flags, ok_count, true); +err_unlock: + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_replace); + +/* Destroy filter and decrement block offload counter, if filter was previously + * offloaded. + */ + +int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp, + enum tc_setup_type type, void *type_data, bool err_stop, + u32 *flags, unsigned int *in_hw_count, bool rtnl_held) +{ + bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held; + int ok_count; + +retry: + if (take_rtnl) + rtnl_lock(); + down_read(&block->cb_lock); + /* Need to obtain rtnl lock if block is bound to devs that require it. + * In block bind code cb_lock is obtained while holding rtnl, so we must + * obtain the locks in same order here. + */ + if (!rtnl_held && !take_rtnl && block->lockeddevcnt) { + up_read(&block->cb_lock); + take_rtnl = true; + goto retry; + } + + ok_count = __tc_setup_cb_call(block, type, type_data, err_stop); + + tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags); + if (tp->ops->hw_del) + tp->ops->hw_del(tp, type_data); + + up_read(&block->cb_lock); + if (take_rtnl) + rtnl_unlock(); + return ok_count < 0 ? ok_count : 0; +} +EXPORT_SYMBOL(tc_setup_cb_destroy); + +int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp, + bool add, flow_setup_cb_t *cb, + enum tc_setup_type type, void *type_data, + void *cb_priv, u32 *flags, unsigned int *in_hw_count) +{ + int err = cb(type, type_data, cb_priv); + + if (err) { + if (add && tc_skip_sw(*flags)) + return err; + } else { + tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1, + add); + } + + return 0; +} +EXPORT_SYMBOL(tc_setup_cb_reoffload); + +void tc_cleanup_flow_action(struct flow_action *flow_action) +{ + struct flow_action_entry *entry; + int i; + + flow_action_for_each(i, entry, flow_action) + if (entry->destructor) + entry->destructor(entry->destructor_priv); +} +EXPORT_SYMBOL(tc_cleanup_flow_action); + +static void tcf_mirred_get_dev(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->dev = act->ops->get_dev(act, &entry->destructor); + if (!entry->dev) + return; + entry->destructor_priv = entry->dev; +#endif +} + +static void tcf_tunnel_encap_put_tunnel(void *priv) +{ + struct ip_tunnel_info *tunnel = priv; + + kfree(tunnel); +} + +static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry, + const struct tc_action *act) +{ + entry->tunnel = tcf_tunnel_info_copy(act); + if (!entry->tunnel) + return -ENOMEM; + entry->destructor = tcf_tunnel_encap_put_tunnel; + entry->destructor_priv = entry->tunnel; + return 0; +} + +static void tcf_sample_get_group(struct flow_action_entry *entry, + const struct tc_action *act) +{ +#ifdef CONFIG_NET_CLS_ACT + entry->sample.psample_group = + act->ops->get_psample_group(act, &entry->destructor); + entry->destructor_priv = entry->sample.psample_group; +#endif +} + int tc_setup_flow_action(struct flow_action *flow_action, - const struct tcf_exts *exts) + const struct tcf_exts *exts, bool rtnl_held) { const struct tc_action *act; - int i, j, k; + int i, j, k, err = 0; if (!exts) return 0; + if (!rtnl_held) + rtnl_lock(); + j = 0; tcf_exts_for_each_action(i, act, exts) { struct flow_action_entry *entry; @@ -3200,10 +3460,16 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->chain_index = tcf_gact_goto_chain_index(act); } else if (is_tcf_mirred_egress_redirect(act)) { entry->id = FLOW_ACTION_REDIRECT; - entry->dev = tcf_mirred_dev(act); + tcf_mirred_get_dev(entry, act); } else if (is_tcf_mirred_egress_mirror(act)) { entry->id = FLOW_ACTION_MIRRED; - entry->dev = tcf_mirred_dev(act); + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_redirect(act)) { + entry->id = FLOW_ACTION_REDIRECT_INGRESS; + tcf_mirred_get_dev(entry, act); + } else if (is_tcf_mirred_ingress_mirror(act)) { + entry->id = FLOW_ACTION_MIRRED_INGRESS; + tcf_mirred_get_dev(entry, act); } else if (is_tcf_vlan(act)) { switch (tcf_vlan_action(act)) { case TCA_VLAN_ACT_PUSH: @@ -3222,11 +3488,14 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->vlan.prio = tcf_vlan_push_prio(act); break; default: + err = -EOPNOTSUPP; goto err_out; } } else if (is_tcf_tunnel_set(act)) { entry->id = FLOW_ACTION_TUNNEL_ENCAP; - entry->tunnel = tcf_tunnel_info(act); + err = tcf_tunnel_encap_get_tunnel(entry, act); + if (err) + goto err_out; } else if (is_tcf_tunnel_release(act)) { entry->id = FLOW_ACTION_TUNNEL_DECAP; } else if (is_tcf_pedit(act)) { @@ -3239,6 +3508,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_ADD; break; default: + err = -EOPNOTSUPP; goto err_out; } entry->mangle.htype = tcf_pedit_htype(act, k); @@ -3255,11 +3525,10 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mark = tcf_skbedit_mark(act); } else if (is_tcf_sample(act)) { entry->id = FLOW_ACTION_SAMPLE; - entry->sample.psample_group = - tcf_sample_psample_group(act); entry->sample.trunc_size = tcf_sample_trunc_size(act); entry->sample.truncate = tcf_sample_truncate(act); entry->sample.rate = tcf_sample_rate(act); + tcf_sample_get_group(entry, act); } else if (is_tcf_police(act)) { entry->id = FLOW_ACTION_POLICE; entry->police.burst = tcf_police_tcfp_burst(act); @@ -3269,16 +3538,50 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->id = FLOW_ACTION_CT; entry->ct.action = tcf_ct_action(act); entry->ct.zone = tcf_ct_zone(act); + } else if (is_tcf_mpls(act)) { + switch (tcf_mpls_action(act)) { + case TCA_MPLS_ACT_PUSH: + entry->id = FLOW_ACTION_MPLS_PUSH; + entry->mpls_push.proto = tcf_mpls_proto(act); + entry->mpls_push.label = tcf_mpls_label(act); + entry->mpls_push.tc = tcf_mpls_tc(act); + entry->mpls_push.bos = tcf_mpls_bos(act); + entry->mpls_push.ttl = tcf_mpls_ttl(act); + break; + case TCA_MPLS_ACT_POP: + entry->id = FLOW_ACTION_MPLS_POP; + entry->mpls_pop.proto = tcf_mpls_proto(act); + break; + case TCA_MPLS_ACT_MODIFY: + entry->id = FLOW_ACTION_MPLS_MANGLE; + entry->mpls_mangle.label = tcf_mpls_label(act); + entry->mpls_mangle.tc = tcf_mpls_tc(act); + entry->mpls_mangle.bos = tcf_mpls_bos(act); + entry->mpls_mangle.ttl = tcf_mpls_ttl(act); + break; + default: + goto err_out; + } + } else if (is_tcf_skbedit_ptype(act)) { + entry->id = FLOW_ACTION_PTYPE; + entry->ptype = tcf_skbedit_ptype(act); } else { + err = -EOPNOTSUPP; goto err_out; } if (!is_tcf_pedit(act)) j++; } - return 0; + err_out: - return -EOPNOTSUPP; + if (!rtnl_held) + rtnl_unlock(); + + if (err) + tc_cleanup_flow_action(flow_action); + + return err; } EXPORT_SYMBOL(tc_setup_flow_action); @@ -3321,6 +3624,11 @@ static struct pernet_operations tcf_net_ops = { .size = sizeof(struct tcf_net), }; +static struct flow_indr_block_entry block_entry = { + .cb = tc_indr_block_get_and_cmd, + .list = LIST_HEAD_INIT(block_entry.list), +}; + static int __init tc_filter_init(void) { int err; @@ -3333,10 +3641,7 @@ static int __init tc_filter_init(void) if (err) goto err_register_pernet_subsys; - err = rhashtable_init(&indr_setup_block_ht, - &tc_indr_setup_block_ht_params); - if (err) - goto err_rhash_setup_block_ht; + flow_indr_add_block_cb(&block_entry); rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL, RTNL_FLAG_DOIT_UNLOCKED); @@ -3351,8 +3656,6 @@ static int __init tc_filter_init(void) return 0; -err_rhash_setup_block_ht: - unregister_pernet_subsys(&tcf_net_ops); err_register_pernet_subsys: destroy_workqueue(tc_filter_wq); return err; |