From c9f1d0389b962521af1e2b699c8ee5e299d77b85 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:13 +0000 Subject: net_sched: fix class grafting errno codes If the parent qdisc doesn't support classes, use EOPNOTSUPP. If the parent class doesn't exist, use ENOENT. Currently EINVAL is returned in both cases. Additionally check whether grafting is supported and remove a now unnecessary graft function from sch_ingress. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 24d17ce9c294..bef2d645a366 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -728,14 +728,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; - err = -EINVAL; - - if (cops) { + err = -EOPNOTSUPP; + if (cops && cops->graft) { unsigned long cl = cops->get(parent, classid); if (cl) { err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); - } + } else + err = -ENOENT; } if (!err) notify_and_destroy(skb, n, classid, old, new); -- cgit v1.2.3 From de6d5cdf881353f83006d5f3e28ac4fffd42145e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:16 +0000 Subject: net_sched: make cls_ops->change and cls_ops->delete optional Some schedulers don't support creating, changing or deleting classes. Make the respective callbacks optionally and consistently return -EOPNOTSUPP for unsupported operations, instead of currently either -EOPNOTSUPP, -ENOSYS or no error. In case of sch_prio and sch_multiq, the removed operations additionally checked for an invalid class. This is not necessary since the class argument can only orginate from ->get() or in case of ->change is 0 for creation of new classes, in which case ->change() incorrectly returned -ENOENT. As a side-effect, this patch fixes a possible (root-only) NULL pointer function call in sch_ingress, which didn't implement a so far mandatory ->delete() operation. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 8 ++++++-- net/sched/sch_ingress.c | 7 ------- net/sched/sch_multiq.c | 22 ---------------------- net/sched/sch_prio.c | 21 --------------------- net/sched/sch_red.c | 13 ------------- net/sched/sch_sfq.c | 7 ------- net/sched/sch_tbf.c | 13 ------------- 7 files changed, 6 insertions(+), 85 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bef2d645a366..166fcca86e7a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1417,7 +1417,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) goto out; break; case RTM_DELTCLASS: - err = cops->delete(q, cl); + err = -EOPNOTSUPP; + if (cops->delete) + err = cops->delete(q, cl); if (err == 0) tclass_notify(skb, n, q, cl, RTM_DELTCLASS); goto out; @@ -1431,7 +1433,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } new_cl = cl; - err = cops->change(q, clid, pid, tca, &new_cl); + err = -EOPNOTSUPP; + if (cops->change) + err = cops->change(q, clid, pid, tca, &new_cl); if (err == 0) tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS); diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index ace7902b5097..a9e646bdb605 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -42,12 +42,6 @@ static void ingress_put(struct Qdisc *sch, unsigned long cl) { } -static int ingress_change(struct Qdisc *sch, u32 classid, u32 parent, - struct nlattr **tca, unsigned long *arg) -{ - return 0; -} - static void ingress_walk(struct Qdisc *sch, struct qdisc_walker *walker) { return; @@ -120,7 +114,6 @@ static const struct Qdisc_class_ops ingress_class_ops = { .leaf = ingress_leaf, .get = ingress_get, .put = ingress_put, - .change = ingress_change, .walk = ingress_walk, .tcf_chain = ingress_find_tcf, .bind_tcf = ingress_bind_filter, diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 912731203047..a0ffe7158ff3 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -348,26 +348,6 @@ static void multiq_put(struct Qdisc *q, unsigned long cl) return; } -static int multiq_change(struct Qdisc *sch, u32 handle, u32 parent, - struct nlattr **tca, unsigned long *arg) -{ - unsigned long cl = *arg; - struct multiq_sched_data *q = qdisc_priv(sch); - - if (cl - 1 > q->bands) - return -ENOENT; - return 0; -} - -static int multiq_delete(struct Qdisc *sch, unsigned long cl) -{ - struct multiq_sched_data *q = qdisc_priv(sch); - if (cl - 1 > q->bands) - return -ENOENT; - return 0; -} - - static int multiq_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { @@ -430,8 +410,6 @@ static const struct Qdisc_class_ops multiq_class_ops = { .leaf = multiq_leaf, .get = multiq_get, .put = multiq_put, - .change = multiq_change, - .delete = multiq_delete, .walk = multiq_walk, .tcf_chain = multiq_find_tcf, .bind_tcf = multiq_bind, diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 94cecef70145..209a4ca4b98d 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -311,25 +311,6 @@ static void prio_put(struct Qdisc *q, unsigned long cl) return; } -static int prio_change(struct Qdisc *sch, u32 handle, u32 parent, struct nlattr **tca, unsigned long *arg) -{ - unsigned long cl = *arg; - struct prio_sched_data *q = qdisc_priv(sch); - - if (cl - 1 > q->bands) - return -ENOENT; - return 0; -} - -static int prio_delete(struct Qdisc *sch, unsigned long cl) -{ - struct prio_sched_data *q = qdisc_priv(sch); - if (cl - 1 > q->bands) - return -ENOENT; - return 0; -} - - static int prio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { @@ -392,8 +373,6 @@ static const struct Qdisc_class_ops prio_class_ops = { .leaf = prio_leaf, .get = prio_get, .put = prio_put, - .change = prio_change, - .delete = prio_delete, .walk = prio_walk, .tcf_chain = prio_find_tcf, .bind_tcf = prio_bind, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index c27b8023f079..a2c4d1aa3cb1 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -308,17 +308,6 @@ static void red_put(struct Qdisc *sch, unsigned long arg) return; } -static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -ENOSYS; -} - -static int red_delete(struct Qdisc *sch, unsigned long cl) -{ - return -ENOSYS; -} - static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -336,8 +325,6 @@ static const struct Qdisc_class_ops red_class_ops = { .leaf = red_leaf, .get = red_get, .put = red_put, - .change = red_change_class, - .delete = red_delete, .walk = red_walk, .dump = red_dump_class, }; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 8706920a6d45..cb21380c0605 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -496,12 +496,6 @@ nla_put_failure: return -1; } -static int sfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -EOPNOTSUPP; -} - static unsigned long sfq_get(struct Qdisc *sch, u32 classid) { return 0; @@ -560,7 +554,6 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) static const struct Qdisc_class_ops sfq_class_ops = { .get = sfq_get, - .change = sfq_change_class, .tcf_chain = sfq_find_tcf, .dump = sfq_dump_class, .dump_stats = sfq_dump_class_stats, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 28909699d24d..d904167e73b3 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -410,17 +410,6 @@ static void tbf_put(struct Qdisc *sch, unsigned long arg) { } -static int tbf_change_class(struct Qdisc *sch, u32 classid, u32 parentid, - struct nlattr **tca, unsigned long *arg) -{ - return -ENOSYS; -} - -static int tbf_delete(struct Qdisc *sch, unsigned long arg) -{ - return -ENOSYS; -} - static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -439,8 +428,6 @@ static const struct Qdisc_class_ops tbf_class_ops = .leaf = tbf_leaf, .get = tbf_get, .put = tbf_put, - .change = tbf_change_class, - .delete = tbf_delete, .walk = tbf_walk, .dump = tbf_dump_class, }; -- cgit v1.2.3 From af356afa010f3cd2c8b8fcc3bce90f7a7b7ec02a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:18 +0000 Subject: net_sched: reintroduce dev->qdisc for use by sch_api Currently the multiqueue integration with the qdisc API suffers from a few problems: - with multiple queues, all root qdiscs use the same handle. This means they can't be exposed to userspace in a backwards compatible fashion. - all API operations always refer to queue number 0. Newly created qdiscs are automatically shared between all queues, its not possible to address individual queues or restore multiqueue behaviour once a shared qdisc has been attached. - Dumps only contain the root qdisc of queue 0, in case of non-shared qdiscs this means the statistics are incomplete. This patch reintroduces dev->qdisc, which points to the (single) root qdisc from userspace's point of view. Currently it either points to the first (non-shared) default qdisc, or a qdisc shared between all queues. The following patches will introduce a classful dummy qdisc, which will be used as root qdisc and contain the per-queue qdiscs as children. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/rtnetlink.c | 6 ++---- net/sched/cls_api.c | 7 ++----- net/sched/sch_api.c | 41 ++++++++++++++++------------------------- net/sched/sch_generic.c | 25 +++++++++++-------------- 5 files changed, 34 insertions(+), 48 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 121cbad0aae5..a44118b1b56c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -832,6 +832,9 @@ struct net_device /* Number of TX queues currently active in device */ unsigned int real_num_tx_queues; + /* root qdisc from userspace point of view */ + struct Qdisc *qdisc; + unsigned long tx_queue_len; /* Max frames per queue allowed */ spinlock_t tx_global_lock; /* diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bbcba2a41018..eb42873f2a3a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -606,7 +606,6 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags) { - struct netdev_queue *txq; struct ifinfomsg *ifm; struct nlmsghdr *nlh; const struct net_device_stats *stats; @@ -637,9 +636,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (dev->master) NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); - txq = netdev_get_tx_queue(dev, 0); - if (txq->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, txq->qdisc_sleeping->ops->id); + if (dev->qdisc) + NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc->ops->id); if (dev->ifalias) NLA_PUT_STRING(skb, IFLA_IFALIAS, dev->ifalias); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index bcfbdb4758c9..6a536949cdc0 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -168,8 +168,7 @@ replay: /* Find qdisc */ if (!parent) { - struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0); - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; parent = q->handle; } else { q = qdisc_lookup(dev, TC_H_MAJ(t->tcm_parent)); @@ -408,7 +407,6 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct netdev_queue *dev_queue; int t; int s_t; struct net_device *dev; @@ -427,9 +425,8 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL) return skb->len; - dev_queue = netdev_get_tx_queue(dev, 0); if (!tcm->tcm_parent) - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); if (!q) diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 166fcca86e7a..8aa9a0c5a9eb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -207,7 +207,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) static void qdisc_list_add(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); + list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list); } void qdisc_list_del(struct Qdisc *q) @@ -219,17 +219,11 @@ EXPORT_SYMBOL(qdisc_list_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { - unsigned int i; struct Qdisc *q; - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *txq_root = txq->qdisc_sleeping; - - q = qdisc_match_from_root(txq_root, handle); - if (q) - goto out; - } + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); out: @@ -720,9 +714,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (new && i > 0) atomic_inc(&new->refcnt); - notify_and_destroy(skb, n, classid, old, new); + qdisc_destroy(old); } + notify_and_destroy(skb, n, classid, dev->qdisc, new); + if (new) + atomic_inc(&new->refcnt); + dev->qdisc = new ? : &noop_qdisc; + if (dev->flags & IFF_UP) dev_activate(dev); } else { @@ -974,9 +973,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) q = dev->rx_queue.qdisc_sleeping; } } else { - struct netdev_queue *dev_queue; - dev_queue = netdev_get_tx_queue(dev, 0); - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; } if (!q) return -ENOENT; @@ -1044,9 +1041,7 @@ replay: q = dev->rx_queue.qdisc_sleeping; } } else { - struct netdev_queue *dev_queue; - dev_queue = netdev_get_tx_queue(dev, 0); - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; } /* It may be default qdisc, ignore it */ @@ -1291,8 +1286,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) goto done; dev_queue = &dev->rx_queue; @@ -1323,7 +1317,6 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); - struct netdev_queue *dev_queue; struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -1361,7 +1354,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - dev_queue = netdev_get_tx_queue(dev, 0); if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -1372,7 +1364,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev_queue->qdisc_sleeping->handle; + qid = dev->qdisc->handle; /* Now qid is genuine qdisc handle consistent both with parent and child. @@ -1383,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) pid = TC_H_MAKE(qid, pid); } else { if (qid == 0) - qid = dev_queue->qdisc_sleeping->handle; + qid = dev->qdisc->handle; } /* OK. Locate qdisc */ @@ -1588,8 +1580,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) goto done; dev_queue = &dev->rx_queue; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 6128e6f24589..a91f079fb47a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -623,19 +623,6 @@ void qdisc_destroy(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_destroy); -static bool dev_all_qdisc_sleeping_noop(struct net_device *dev) -{ - unsigned int i; - - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - - if (txq->qdisc_sleeping != &noop_qdisc) - return false; - } - return true; -} - static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) @@ -677,6 +664,7 @@ static void transition_one_qdisc(struct net_device *dev, void dev_activate(struct net_device *dev) { + struct netdev_queue *txq; int need_watchdog; /* No queueing discipline is attached to device; @@ -685,9 +673,14 @@ void dev_activate(struct net_device *dev) virtual interfaces */ - if (dev_all_qdisc_sleeping_noop(dev)) + if (dev->qdisc == &noop_qdisc) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); + txq = netdev_get_tx_queue(dev, 0); + dev->qdisc = txq->qdisc_sleeping; + atomic_inc(&dev->qdisc->refcnt); + } + if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ return; @@ -777,6 +770,7 @@ static void dev_init_scheduler_queue(struct net_device *dev, void dev_init_scheduler(struct net_device *dev) { + dev->qdisc = &noop_qdisc; netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc); dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); @@ -802,5 +796,8 @@ void dev_shutdown(struct net_device *dev) { netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc); shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc); + qdisc_destroy(dev->qdisc); + dev->qdisc = &noop_qdisc; + WARN_ON(timer_pending(&dev->watchdog_timer)); } -- cgit v1.2.3 From 589983cd21f4a2e4ed74a958805a90fa676845c5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:20 +0000 Subject: net_sched: move dev_graft_qdisc() to sch_generic.c It will be used in a following patch by the multiqueue qdisc. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/sch_generic.h | 2 ++ net/sched/sch_api.c | 26 -------------------------- net/sched/sch_generic.c | 25 +++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 26 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a48a4cc7258b..a92dc6208eff 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -302,6 +302,8 @@ extern void dev_init_scheduler(struct net_device *dev); extern void dev_shutdown(struct net_device *dev); extern void dev_activate(struct net_device *dev); extern void dev_deactivate(struct net_device *dev); +extern struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, + struct Qdisc *qdisc); extern void qdisc_reset(struct Qdisc *qdisc); extern void qdisc_destroy(struct Qdisc *qdisc); extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 8aa9a0c5a9eb..d71f12be6e29 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -610,32 +610,6 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return i>0 ? autohandle : 0; } -/* Attach toplevel qdisc to device queue. */ - -static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, - struct Qdisc *qdisc) -{ - struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; - spinlock_t *root_lock; - - root_lock = qdisc_lock(oqdisc); - spin_lock_bh(root_lock); - - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); - - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; - rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); - - spin_unlock_bh(root_lock); - - return oqdisc; -} - void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) { const struct Qdisc_class_ops *cops; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index a91f079fb47a..e7c47ceb0098 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -623,6 +623,31 @@ void qdisc_destroy(struct Qdisc *qdisc) } EXPORT_SYMBOL(qdisc_destroy); +/* Attach toplevel qdisc to device queue. */ +struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, + struct Qdisc *qdisc) +{ + struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; + spinlock_t *root_lock; + + root_lock = qdisc_lock(oqdisc); + spin_lock_bh(root_lock); + + /* Prune old scheduler */ + if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) + qdisc_reset(oqdisc); + + /* ... and graft new one */ + if (qdisc == NULL) + qdisc = &noop_qdisc; + dev_queue->qdisc_sleeping = qdisc; + rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); + + spin_unlock_bh(root_lock); + + return oqdisc; +} + static void attach_one_default_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_unused) -- cgit v1.2.3 From 6ec1c69a8f6492fd25722f4762721921da074c12 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 6 Sep 2009 01:58:51 -0700 Subject: net_sched: add classful multiqueue dummy scheduler This patch adds a classful dummy scheduler which can be used as root qdisc for multiqueue devices and exposes each device queue as a child class. This allows to address queues individually and graft them similar to regular classes. Additionally it presents an accumulated view of the statistics of all real root qdiscs in the dummy root. Two new callbacks are added to the qdisc_ops and qdisc_class_ops: - cl_ops->select_queue selects the tx queue number for new child classes. - qdisc_ops->attach() overrides root qdisc device grafting to attach non-shared qdiscs to the queues. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/sch_generic.h | 4 + net/sched/Makefile | 2 +- net/sched/sch_api.c | 18 +++- net/sched/sch_generic.c | 32 +++++-- net/sched/sch_mq.c | 234 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 277 insertions(+), 13 deletions(-) create mode 100644 net/sched/sch_mq.c (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a92dc6208eff..9c69585a1be8 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -80,6 +80,7 @@ struct Qdisc struct Qdisc_class_ops { /* Child qdisc manipulation */ + unsigned int (*select_queue)(struct Qdisc *, struct tcmsg *); int (*graft)(struct Qdisc *, unsigned long cl, struct Qdisc *, struct Qdisc **); struct Qdisc * (*leaf)(struct Qdisc *, unsigned long cl); @@ -122,6 +123,7 @@ struct Qdisc_ops void (*reset)(struct Qdisc *); void (*destroy)(struct Qdisc *); int (*change)(struct Qdisc *, struct nlattr *arg); + void (*attach)(struct Qdisc *); int (*dump)(struct Qdisc *, struct sk_buff *); int (*dump_stats)(struct Qdisc *, struct gnet_dump *); @@ -255,6 +257,8 @@ static inline void sch_tree_unlock(struct Qdisc *q) extern struct Qdisc noop_qdisc; extern struct Qdisc_ops noop_qdisc_ops; +extern struct Qdisc_ops pfifo_fast_ops; +extern struct Qdisc_ops mq_qdisc_ops; struct Qdisc_class_common { diff --git a/net/sched/Makefile b/net/sched/Makefile index 54d950cd4b8d..f14e71bfa58f 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -2,7 +2,7 @@ # Makefile for the Linux Traffic Control Unit. # -obj-y := sch_generic.o +obj-y := sch_generic.o sch_mq.o obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o obj-$(CONFIG_NET_CLS) += cls_api.o diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d71f12be6e29..2a78d5410154 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -678,6 +678,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (dev->flags & IFF_UP) dev_deactivate(dev); + if (new && new->ops->attach) { + new->ops->attach(new); + num_q = 0; + } + for (i = 0; i < num_q; i++) { struct netdev_queue *dev_queue = &dev->rx_queue; @@ -692,7 +697,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } notify_and_destroy(skb, n, classid, dev->qdisc, new); - if (new) + if (new && !new->ops->attach) atomic_inc(&new->refcnt); dev->qdisc = new ? : &noop_qdisc; @@ -1095,10 +1100,16 @@ create_n_graft: q = qdisc_create(dev, &dev->rx_queue, tcm->tcm_parent, tcm->tcm_parent, tca, &err); - else - q = qdisc_create(dev, netdev_get_tx_queue(dev, 0), + else { + unsigned int ntx = 0; + + if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) + ntx = p->ops->cl_ops->select_queue(p, tcm); + + q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), tcm->tcm_parent, tcm->tcm_handle, tca, &err); + } if (q == NULL) { if (err == -EAGAIN) goto replay; @@ -1674,6 +1685,7 @@ static int __init pktsched_init(void) { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); + register_qdisc(&mq_qdisc_ops); proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index e7c47ceb0098..4ae6aa562f2b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -514,7 +514,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) return 0; } -static struct Qdisc_ops pfifo_fast_ops __read_mostly = { +struct Qdisc_ops pfifo_fast_ops __read_mostly = { .id = "pfifo_fast", .priv_size = sizeof(struct pfifo_fast_priv), .enqueue = pfifo_fast_enqueue, @@ -670,6 +670,26 @@ static void attach_one_default_qdisc(struct net_device *dev, dev_queue->qdisc_sleeping = qdisc; } +static void attach_default_qdiscs(struct net_device *dev) +{ + struct netdev_queue *txq; + struct Qdisc *qdisc; + + txq = netdev_get_tx_queue(dev, 0); + + if (!netif_is_multiqueue(dev) || dev->tx_queue_len == 0) { + netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); + dev->qdisc = txq->qdisc_sleeping; + atomic_inc(&dev->qdisc->refcnt); + } else { + qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); + if (qdisc) { + qdisc->ops->attach(qdisc); + dev->qdisc = qdisc; + } + } +} + static void transition_one_qdisc(struct net_device *dev, struct netdev_queue *dev_queue, void *_need_watchdog) @@ -689,7 +709,6 @@ static void transition_one_qdisc(struct net_device *dev, void dev_activate(struct net_device *dev) { - struct netdev_queue *txq; int need_watchdog; /* No queueing discipline is attached to device; @@ -698,13 +717,8 @@ void dev_activate(struct net_device *dev) virtual interfaces */ - if (dev->qdisc == &noop_qdisc) { - netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); - - txq = netdev_get_tx_queue(dev, 0); - dev->qdisc = txq->qdisc_sleeping; - atomic_inc(&dev->qdisc->refcnt); - } + if (dev->qdisc == &noop_qdisc) + attach_default_qdiscs(dev); if (!netif_carrier_ok(dev)) /* Delay activation until next carrier-on event */ diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c new file mode 100644 index 000000000000..c84dec9c8c7d --- /dev/null +++ b/net/sched/sch_mq.c @@ -0,0 +1,234 @@ +/* + * net/sched/sch_mq.c Classful multiqueue dummy scheduler + * + * Copyright (c) 2009 Patrick McHardy + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct mq_sched { + struct Qdisc **qdiscs; +}; + +static void mq_destroy(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct mq_sched *priv = qdisc_priv(sch); + unsigned int ntx; + + if (!priv->qdiscs) + return; + for (ntx = 0; ntx < dev->num_tx_queues && priv->qdiscs[ntx]; ntx++) + qdisc_destroy(priv->qdiscs[ntx]); + kfree(priv->qdiscs); +} + +static int mq_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct net_device *dev = qdisc_dev(sch); + struct mq_sched *priv = qdisc_priv(sch); + struct netdev_queue *dev_queue; + struct Qdisc *qdisc; + unsigned int ntx; + + if (sch->parent != TC_H_ROOT) + return -EOPNOTSUPP; + + if (!netif_is_multiqueue(dev)) + return -EOPNOTSUPP; + + /* pre-allocate qdiscs, attachment can't fail */ + priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), + GFP_KERNEL); + if (priv->qdiscs == NULL) + return -ENOMEM; + + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + dev_queue = netdev_get_tx_queue(dev, ntx); + qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(ntx + 1))); + if (qdisc == NULL) + goto err; + qdisc->flags |= TCQ_F_CAN_BYPASS; + priv->qdiscs[ntx] = qdisc; + } + + return 0; + +err: + mq_destroy(sch); + return -ENOMEM; +} + +static void mq_attach(struct Qdisc *sch) +{ + struct net_device *dev = qdisc_dev(sch); + struct mq_sched *priv = qdisc_priv(sch); + struct Qdisc *qdisc; + unsigned int ntx; + + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + qdisc = priv->qdiscs[ntx]; + qdisc = dev_graft_qdisc(qdisc->dev_queue, qdisc); + if (qdisc) + qdisc_destroy(qdisc); + } + kfree(priv->qdiscs); + priv->qdiscs = NULL; +} + +static int mq_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct net_device *dev = qdisc_dev(sch); + struct Qdisc *qdisc; + unsigned int ntx; + + sch->q.qlen = 0; + memset(&sch->bstats, 0, sizeof(sch->bstats)); + memset(&sch->qstats, 0, sizeof(sch->qstats)); + + for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { + qdisc = netdev_get_tx_queue(dev, ntx)->qdisc_sleeping; + spin_lock_bh(qdisc_lock(qdisc)); + sch->q.qlen += qdisc->q.qlen; + sch->bstats.bytes += qdisc->bstats.bytes; + sch->bstats.packets += qdisc->bstats.packets; + sch->qstats.qlen += qdisc->qstats.qlen; + sch->qstats.backlog += qdisc->qstats.backlog; + sch->qstats.drops += qdisc->qstats.drops; + sch->qstats.requeues += qdisc->qstats.requeues; + sch->qstats.overlimits += qdisc->qstats.overlimits; + spin_unlock_bh(qdisc_lock(qdisc)); + } + return 0; +} + +static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned long ntx = cl - 1; + + if (ntx >= dev->num_tx_queues) + return NULL; + return netdev_get_tx_queue(dev, ntx); +} + +static unsigned int mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm) +{ + unsigned int ntx = TC_H_MIN(tcm->tcm_parent); + + if (!mq_queue_get(sch, ntx)) + return 0; + return ntx - 1; +} + +static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, + struct Qdisc **old) +{ + struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + struct net_device *dev = qdisc_dev(sch); + + if (dev->flags & IFF_UP) + dev_deactivate(dev); + + *old = dev_graft_qdisc(dev_queue, new); + + if (dev->flags & IFF_UP) + dev_activate(dev); + return 0; +} + +static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) +{ + struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + + return dev_queue->qdisc_sleeping; +} + +static unsigned long mq_get(struct Qdisc *sch, u32 classid) +{ + unsigned int ntx = TC_H_MIN(classid); + + if (!mq_queue_get(sch, ntx)) + return 0; + return ntx; +} + +static void mq_put(struct Qdisc *sch, unsigned long cl) +{ + return; +} + +static int mq_dump_class(struct Qdisc *sch, unsigned long cl, + struct sk_buff *skb, struct tcmsg *tcm) +{ + struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + + tcm->tcm_parent = TC_H_ROOT; + tcm->tcm_handle |= TC_H_MIN(cl); + tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + return 0; +} + +static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl, + struct gnet_dump *d) +{ + struct netdev_queue *dev_queue = mq_queue_get(sch, cl); + + sch = dev_queue->qdisc_sleeping; + if (gnet_stats_copy_basic(d, &sch->bstats) < 0 || + gnet_stats_copy_queue(d, &sch->qstats) < 0) + return -1; + return 0; +} + +static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg) +{ + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx; + + if (arg->stop) + return; + + arg->count = arg->skip; + for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { + if (arg->fn(sch, ntx + 1, arg) < 0) { + arg->stop = 1; + break; + } + arg->count++; + } +} + +static const struct Qdisc_class_ops mq_class_ops = { + .select_queue = mq_select_queue, + .graft = mq_graft, + .leaf = mq_leaf, + .get = mq_get, + .put = mq_put, + .walk = mq_walk, + .dump = mq_dump_class, + .dump_stats = mq_dump_class_stats, +}; + +struct Qdisc_ops mq_qdisc_ops __read_mostly = { + .cl_ops = &mq_class_ops, + .id = "mq", + .priv_size = sizeof(struct mq_sched), + .init = mq_init, + .destroy = mq_destroy, + .attach = mq_attach, + .dump = mq_dump, + .owner = THIS_MODULE, +}; -- cgit v1.2.3 From 23bcf634c8bc0d84607a5b863333191d58baee4c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 9 Sep 2009 18:11:23 -0700 Subject: net_sched: fix estimator lock selection for mq child qdiscs When new child qdiscs are attached to the mq qdisc, they are actually attached as root qdiscs to the device queues. The lock selection for new estimators incorrectly picks the root lock of the existing and to be replaced qdisc, which results in a use-after-free once the old qdisc has been destroyed. Mark mq qdisc instances with a new flag and treat qdiscs attached to mq as children similar to regular root qdiscs. Additionally prevent estimators from being attached to the mq qdisc itself since it only updates its byte and packet counters during dumps. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/sch_generic.h | 1 + net/sched/sch_api.c | 42 ++++++++++++++++++++++++++---------------- net/sched/sch_mq.c | 1 + 3 files changed, 28 insertions(+), 16 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 9c69585a1be8..88eb9de095de 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -46,6 +46,7 @@ struct Qdisc #define TCQ_F_THROTTLED 2 #define TCQ_F_INGRESS 4 #define TCQ_F_CAN_BYPASS 8 +#define TCQ_F_MQROOT 16 #define TCQ_F_WARN_NONWC (1 << 16) int padded; struct Qdisc_ops *ops; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2a78d5410154..3af106140f35 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -733,7 +733,8 @@ static struct lock_class_key qdisc_rx_lock; static struct Qdisc * qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - u32 parent, u32 handle, struct nlattr **tca, int *errp) + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -810,24 +811,21 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (tca[TCA_RATE]) { spinlock_t *root_lock; + err = -EOPNOTSUPP; + if (sch->flags & TCQ_F_MQROOT) + goto err_out4; + if ((sch->parent != TC_H_ROOT) && - !(sch->flags & TCQ_F_INGRESS)) + !(sch->flags & TCQ_F_INGRESS) && + (!p || !(p->flags & TCQ_F_MQROOT))) root_lock = qdisc_root_sleeping_lock(sch); else root_lock = qdisc_lock(sch); err = gen_new_estimator(&sch->bstats, &sch->rate_est, root_lock, tca[TCA_RATE]); - if (err) { - /* - * Any broken qdiscs that would require - * a ops->reset() here? The qdisc was never - * in action so it shouldn't be necessary. - */ - if (ops->destroy) - ops->destroy(sch); - goto err_out3; - } + if (err) + goto err_out4; } qdisc_list_add(sch); @@ -843,6 +841,15 @@ err_out2: err_out: *errp = err; return NULL; + +err_out4: + /* + * Any broken qdiscs that would require a ops->reset() here? + * The qdisc was never in action so it shouldn't be necessary. + */ + if (ops->destroy) + ops->destroy(sch); + goto err_out3; } static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) @@ -867,13 +874,16 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) qdisc_put_stab(sch->stab); sch->stab = stab; - if (tca[TCA_RATE]) + if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator because change can't be undone. */ + if (sch->flags & TCQ_F_MQROOT) + goto out; gen_replace_estimator(&sch->bstats, &sch->rate_est, qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); - + } +out: return 0; } @@ -1097,7 +1107,7 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, &dev->rx_queue, + q = qdisc_create(dev, &dev->rx_queue, p, tcm->tcm_parent, tcm->tcm_parent, tca, &err); else { @@ -1106,7 +1116,7 @@ create_n_graft: if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) ntx = p->ops->cl_ops->select_queue(p, tcm); - q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), + q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p, tcm->tcm_parent, tcm->tcm_handle, tca, &err); } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index c84dec9c8c7d..dd5ee022f1f7 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -64,6 +64,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) priv->qdiscs[ntx] = qdisc; } + sch->flags |= TCQ_F_MQROOT; return 0; err: -- cgit v1.2.3