diff options
Diffstat (limited to 'net/sched')
39 files changed, 327 insertions, 571 deletions
diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 475df8449be9..b4662888bdbd 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -111,6 +111,17 @@ config NET_SCH_PRIO To compile this code as a module, choose M here: the module will be called sch_prio. +config NET_SCH_RR + tristate "Multi Band Round Robin Queuing (RR)" + select NET_SCH_PRIO + ---help--- + Say Y here if you want to use an n-band round robin packet + scheduler. + + The module uses sch_prio for its framework and is aliased as + sch_rr, so it will load sch_prio, although it is referred + to using sch_rr. + config NET_SCH_RED tristate "Random Early Detection (RED)" ---help--- @@ -275,7 +286,6 @@ config CLS_U32_MARK config NET_CLS_RSVP tristate "IPv4 Resource Reservation Protocol (RSVP)" select NET_CLS - select NET_ESTIMATOR ---help--- The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this @@ -290,7 +300,6 @@ config NET_CLS_RSVP config NET_CLS_RSVP6 tristate "IPv6 Resource Reservation Protocol (RSVP6)" select NET_CLS - select NET_ESTIMATOR ---help--- The Resource Reservation Protocol (RSVP) permits end systems to request a minimum and maximum data flow rate for a connection; this @@ -382,7 +391,6 @@ config NET_EMATCH_TEXT config NET_CLS_ACT bool "Actions" - select NET_ESTIMATOR ---help--- Say Y here if you want to use traffic control actions. Actions get attached to classifiers and are invoked after a successful @@ -465,7 +473,6 @@ config NET_ACT_SIMP config NET_CLS_POLICE bool "Traffic Policing (obsolete)" depends on NET_CLS_ACT!=y - select NET_ESTIMATOR ---help--- Say Y here if you want to do traffic policing, i.e. strict bandwidth limiting. This option is obsoleted by the traffic @@ -480,14 +487,6 @@ config NET_CLS_IND classification based on the incoming device. This option is likely to disappear in favour of the metadata ematch. -config NET_ESTIMATOR - bool "Rate estimator" - ---help--- - Say Y here to allow using rate estimators to estimate the current - rate-of-flow for network devices, queues, etc. This module is - automatically selected if needed but can be selected manually for - statistical purposes. - endif # NET_SCHED endmenu diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 711dd26c95c3..feef366cad5d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -11,23 +11,13 @@ * */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> -#include <net/sock.h> #include <net/sch_generic.h> #include <net/act_api.h> #include <net/netlink.h> @@ -42,10 +32,8 @@ void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) write_lock_bh(hinfo->lock); *p1p = p->tcfc_next; write_unlock_bh(hinfo->lock); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&p->tcfc_bstats, &p->tcfc_rate_est); -#endif kfree(p); return; } @@ -232,15 +220,12 @@ struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_acti p->tcfc_bindcnt = 1; spin_lock_init(&p->tcfc_lock); - p->tcfc_stats_lock = &p->tcfc_lock; p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); p->tcfc_tm.install = jiffies; p->tcfc_tm.lastuse = jiffies; -#ifdef CONFIG_NET_ESTIMATOR if (est) gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - p->tcfc_stats_lock, est); -#endif + &p->tcfc_lock, est); a->priv = (void *) p; return p; } @@ -599,12 +584,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); + TCA_STATS, TCA_XSTATS, &h->tcf_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - h->tcf_stats_lock, &d); + &h->tcf_lock, &d); if (err < 0) goto errout; @@ -614,9 +599,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, goto errout; if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 7517f3791541..a9631e426d91 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -10,26 +10,15 @@ * */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/proc_fs.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 00b05f422d45..6b407ece953c 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -11,27 +11,15 @@ * Copyright: Jamal Hadi Salim (2002-4) */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/proc_fs.h> -#include <linux/kmod.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_ipt.h> #include <net/tc_act/tc_ipt.h> diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index de21c92faaa2..579578944ae7 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -12,31 +12,19 @@ * */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/proc_fs.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> -#include <linux/etherdevice.h> #include <linux/if_arp.h> #define MIRRED_TAB_MASK 7 diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 6f8684b5617e..b46fab5fb323 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -9,26 +9,15 @@ * Authors: Jamal Hadi Salim (2002-4) */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <asm/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/module.h> #include <linux/init.h> -#include <linux/proc_fs.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <linux/tc_act/tc_pedit.h> #include <net/tc_act/tc_pedit.h> diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 616f465f407e..d20403890877 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -10,25 +10,15 @@ * J Hadi Salim (action changes) */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/module.h> #include <linux/rtnetlink.h> #include <linux/init.h> -#include <net/sock.h> #include <net/act_api.h> #include <net/netlink.h> @@ -118,10 +108,8 @@ void tcf_police_destroy(struct tcf_police *p) write_lock_bh(&police_lock); *p1p = p->tcf_next; write_unlock_bh(&police_lock); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&p->tcf_bstats, &p->tcf_rate_est); -#endif if (p->tcfp_R_tab) qdisc_put_rtab(p->tcfp_R_tab); if (p->tcfp_P_tab) @@ -185,7 +173,6 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, ret = ACT_P_CREATED; police->tcf_refcnt = 1; spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; if (bind) police->tcf_bindcnt = 1; override: @@ -227,15 +214,13 @@ override: police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); police->tcf_action = parm->action; -#ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) police->tcfp_ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); if (est) gen_replace_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); -#endif + &police->tcf_lock, est); spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) @@ -281,14 +266,12 @@ static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, police->tcf_bstats.bytes += skb->len; police->tcf_bstats.packets++; -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; spin_unlock(&police->tcf_lock); return police->tcf_action; } -#endif if (skb->len <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { @@ -348,10 +331,8 @@ tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) if (police->tcfp_result) RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &police->tcfp_result); -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate) RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); -#endif return skb->len; rtattr_failure: @@ -458,7 +439,6 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) police->tcf_refcnt = 1; spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; if (parm->rate.rate) { police->tcfp_R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); @@ -477,14 +457,12 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) goto failure; police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); } -#ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) goto failure; police->tcfp_ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); } -#endif police->tcfp_toks = police->tcfp_burst = parm->burst; police->tcfp_mtu = parm->mtu; if (police->tcfp_mtu == 0) { @@ -498,11 +476,9 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) police->tcf_index = parm->index ? parm->index : tcf_police_new_index(); police->tcf_action = parm->action; -#ifdef CONFIG_NET_ESTIMATOR if (est) gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); -#endif + &police->tcf_lock, est); h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); police->tcf_next = tcf_police_ht[h]; @@ -528,14 +504,12 @@ int tcf_police(struct sk_buff *skb, struct tcf_police *police) police->tcf_bstats.bytes += skb->len; police->tcf_bstats.packets++; -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate && police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { police->tcf_qstats.overlimits++; spin_unlock(&police->tcf_lock); return police->tcf_action; } -#endif if (skb->len <= police->tcfp_mtu) { if (police->tcfp_R_tab == NULL) { spin_unlock(&police->tcf_lock); @@ -591,10 +565,8 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) if (police->tcfp_result) RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &police->tcfp_result); -#ifdef CONFIG_NET_ESTIMATOR if (police->tcfp_ewma_rate) RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); -#endif return skb->len; rtattr_failure: @@ -607,14 +579,12 @@ int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) struct gnet_dump d; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, police->tcf_stats_lock, + TCA_XSTATS, &police->tcf_lock, &d) < 0) goto errout; if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) goto errout; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 36e1edad5990..fb84ef33d14f 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -13,7 +13,6 @@ #include <linux/module.h> #include <linux/init.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <net/netlink.h> diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index ebf94edf0478..36b72aab1bde 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -14,26 +14,16 @@ * */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/kmod.h> #include <linux/netlink.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index c885412d79d5..8dbcf2771a46 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -13,7 +13,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> #include <linux/errno.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index bbec4a0d4dcb..8adbd6a37d14 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -19,29 +19,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <linux/netfilter.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index cc941d0ee3a5..0a8409c1d28a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -10,28 +10,14 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/dst.h> +#include <net/route.h> +#include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c index 0a683c07c648..cbb5e0d600f3 100644 --- a/net/sched/cls_rsvp.c +++ b/net/sched/cls_rsvp.c @@ -10,27 +10,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/ip.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c index 93b6abed57db..dd08aea2aee5 100644 --- a/net/sched/cls_rsvp6.c +++ b/net/sched/cls_rsvp6.c @@ -10,28 +10,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> #include <linux/ipv6.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <net/netlink.h> diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 47ac0c556429..2314820a080a 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -9,12 +9,9 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/errno.h> -#include <linux/netdevice.h> -#include <net/ip.h> #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> -#include <net/route.h> /* diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index c7a347bd6d70..77961e2314dc 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -30,30 +30,14 @@ * nfmark match added by Catalin(ux aka Dino) BOIE <catab at umbrella.ro> */ -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> #include <linux/rtnetlink.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> diff --git a/net/sched/em_cmp.c b/net/sched/em_cmp.c index 8d6dacd81900..cc49c932641d 100644 --- a/net/sched/em_cmp.c +++ b/net/sched/em_cmp.c @@ -98,3 +98,4 @@ MODULE_LICENSE("GPL"); module_init(init_em_cmp); module_exit(exit_em_cmp); +MODULE_ALIAS_TCF_EMATCH(TCF_EM_CMP); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 60acf8cdb27b..650f09c8bd6a 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -848,3 +848,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_meta); module_exit(exit_em_meta); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_META); diff --git a/net/sched/em_nbyte.c b/net/sched/em_nbyte.c index b4b36efce292..370a1b2ea317 100644 --- a/net/sched/em_nbyte.c +++ b/net/sched/em_nbyte.c @@ -76,3 +76,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_nbyte); module_exit(exit_em_nbyte); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_NBYTE); diff --git a/net/sched/em_text.c b/net/sched/em_text.c index e8f46169449d..d5cd86efb7d0 100644 --- a/net/sched/em_text.c +++ b/net/sched/em_text.c @@ -150,3 +150,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_text); module_exit(exit_em_text); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_TEXT); diff --git a/net/sched/em_u32.c b/net/sched/em_u32.c index 0a2a7fe08de3..112796e4a7c4 100644 --- a/net/sched/em_u32.c +++ b/net/sched/em_u32.c @@ -60,3 +60,5 @@ MODULE_LICENSE("GPL"); module_init(init_em_u32); module_exit(exit_em_u32); + +MODULE_ALIAS_TCF_EMATCH(TCF_EM_U32); diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 63146d339d81..f3a104e323bd 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -84,9 +84,7 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/mm.h> #include <linux/errno.h> -#include <linux/interrupt.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <net/pkt_cls.h> @@ -224,6 +222,19 @@ static int tcf_em_validate(struct tcf_proto *tp, if (em->ops == NULL) { err = -ENOENT; +#ifdef CONFIG_KMOD + __rtnl_unlock(); + request_module("ematch-kind-%u", em_hdr->kind); + rtnl_lock(); + em->ops = tcf_em_lookup(em_hdr->kind); + if (em->ops) { + /* We dropped the RTNL mutex in order to + * perform the module load. Tell the caller + * to replay the request. */ + module_put(em->ops->owner); + err = -EAGAIN; + } +#endif goto errout; } diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bec600af03ca..d92ea26982c5 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -19,30 +19,18 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/init.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/kmod.h> #include <linux/list.h> -#include <linux/bitops.h> #include <linux/hrtimer.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> -#include <asm/processor.h> -#include <asm/uaccess.h> -#include <asm/system.h> - static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new); static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n, @@ -515,7 +503,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) sch->handle = handle; if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) { -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) { err = gen_new_estimator(&sch->bstats, &sch->rate_est, sch->stats_lock, @@ -531,7 +518,6 @@ qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp) goto err_out3; } } -#endif qdisc_lock_tree(dev); list_add_tail(&sch->list, &dev->qdisc_list); qdisc_unlock_tree(dev); @@ -559,11 +545,9 @@ static int qdisc_change(struct Qdisc *sch, struct rtattr **tca) if (err) return err; } -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&sch->bstats, &sch->rate_est, sch->stats_lock, tca[TCA_RATE-1]); -#endif return 0; } @@ -839,9 +823,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, goto rtattr_failure; if (gnet_stats_copy_basic(&d, &q->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || -#endif gnet_stats_copy_queue(&d, &q->qstats) < 0) goto rtattr_failure; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index d1c383fca82c..54b92d22796c 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -8,15 +8,12 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> -#include <linux/interrupt.h> #include <linux/atmdev.h> #include <linux/atmclip.h> -#include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/file.h> /* for fput */ #include <net/netlink.h> #include <net/pkt_sched.h> -#include <net/sock.h> extern struct socket *sockfd_lookup(int fd, int *err); /* @@@ fix this */ @@ -71,7 +68,6 @@ struct atm_flow_data { int ref; /* reference count */ struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; - spinlock_t *stats_lock; struct atm_flow_data *next; struct atm_flow_data *excess; /* flow for excess traffic; NULL to set CLP instead */ diff --git a/net/sched/sch_blackhole.c b/net/sched/sch_blackhole.c index cb0c456aa349..f914fc43a124 100644 --- a/net/sched/sch_blackhole.c +++ b/net/sched/sch_blackhole.c @@ -14,7 +14,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ee2d5967d109..b184c3545145 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -11,28 +11,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/netlink.h> #include <net/pkt_sched.h> @@ -148,7 +132,6 @@ struct cbq_class struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; struct tc_cbq_xstats xstats; struct tcf_proto *filter_list; @@ -1442,7 +1425,6 @@ static int cbq_init(struct Qdisc *sch, struct rtattr *opt) q->link.ewma_log = TC_CBQ_DEF_EWMA; q->link.avpkt = q->link.allot/2; q->link.minidle = -0x7FFFFFFF; - q->link.stats_lock = &sch->dev->queue_lock; qdisc_watchdog_init(&q->watchdog, sch); hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); @@ -1653,9 +1635,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg, cl->xstats.undertime = cl->undertime - q->now; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || -#endif gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; @@ -1726,9 +1706,7 @@ static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->q); qdisc_put_rtab(cl->R_tab); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); -#endif if (cl != &q->link) kfree(cl); } @@ -1873,11 +1851,10 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); -#endif + &sch->dev->queue_lock, + tca[TCA_RATE-1]); return 0; } @@ -1935,7 +1912,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cl->allot = parent->allot; cl->quantum = cl->allot; cl->weight = cl->R_tab->rate.rate; - cl->stats_lock = &sch->dev->queue_lock; sch_tree_lock(sch); cbq_link_class(cl); @@ -1963,11 +1939,9 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **t cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1])); sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); -#endif + &sch->dev->queue_lock, tca[TCA_RATE-1]); *arg = (unsigned long)cl; return 0; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 3c6fd181263f..4d2c233a8611 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -9,7 +9,6 @@ #include <linux/string.h> #include <linux/errno.h> #include <linux/skbuff.h> -#include <linux/netdevice.h> /* for pkt_sched */ #include <linux/rtnetlink.h> #include <net/pkt_sched.h> #include <net/dsfield.h> diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index c2689f4ba8de..c264308f17c1 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -13,7 +13,6 @@ #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index f4d34480a093..c81649cf0b9e 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -11,27 +11,19 @@ * - Ingress support */ -#include <asm/uaccess.h> -#include <asm/system.h> #include <linux/bitops.h> #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> #include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> #include <linux/init.h> #include <linux/rcupdate.h> #include <linux/list.h> -#include <net/sock.h> #include <net/pkt_sched.h> /* Main transmission queue. */ @@ -59,122 +51,143 @@ void qdisc_unlock_tree(struct net_device *dev) spin_unlock_bh(&dev->queue_lock); } -/* - dev->queue_lock serializes queue accesses for this device - AND dev->qdisc pointer itself. +static inline int qdisc_qlen(struct Qdisc *q) +{ + return q->q.qlen; +} - netif_tx_lock serializes accesses to device driver. +static inline int dev_requeue_skb(struct sk_buff *skb, struct net_device *dev, + struct Qdisc *q) +{ + if (unlikely(skb->next)) + dev->gso_skb = skb; + else + q->ops->requeue(skb, q); - dev->queue_lock and netif_tx_lock are mutually exclusive, - if one is grabbed, another must be free. - */ + netif_schedule(dev); + return 0; +} +static inline struct sk_buff *dev_dequeue_skb(struct net_device *dev, + struct Qdisc *q) +{ + struct sk_buff *skb; -/* Kick device. + if ((skb = dev->gso_skb)) + dev->gso_skb = NULL; + else + skb = q->dequeue(q); - Returns: 0 - queue is empty or throttled. - >0 - queue is not empty. + return skb; +} - NOTE: Called under dev->queue_lock with locally disabled BH. -*/ +static inline int handle_dev_cpu_collision(struct sk_buff *skb, + struct net_device *dev, + struct Qdisc *q) +{ + int ret; + if (unlikely(dev->xmit_lock_owner == smp_processor_id())) { + /* + * Same CPU holding the lock. It may be a transient + * configuration error, when hard_start_xmit() recurses. We + * detect it by checking xmit owner and drop the packet when + * deadloop is detected. Return OK to try the next skb. + */ + kfree_skb(skb); + if (net_ratelimit()) + printk(KERN_WARNING "Dead loop on netdevice %s, " + "fix it urgently!\n", dev->name); + ret = qdisc_qlen(q); + } else { + /* + * Another cpu is holding lock, requeue & delay xmits for + * some time. + */ + __get_cpu_var(netdev_rx_stat).cpu_collision++; + ret = dev_requeue_skb(skb, dev, q); + } + + return ret; +} + +/* + * NOTE: Called under dev->queue_lock with locally disabled BH. + * + * __LINK_STATE_QDISC_RUNNING guarantees only one CPU can process this + * device at a time. dev->queue_lock serializes queue accesses for + * this device AND dev->qdisc pointer itself. + * + * netif_tx_lock serializes accesses to device driver. + * + * dev->queue_lock and netif_tx_lock are mutually exclusive, + * if one is grabbed, another must be free. + * + * Note, that this procedure can be called by a watchdog timer + * + * Returns to the caller: + * 0 - queue is empty or throttled. + * >0 - queue is not empty. + * + */ static inline int qdisc_restart(struct net_device *dev) { struct Qdisc *q = dev->qdisc; struct sk_buff *skb; + unsigned lockless; + int ret; /* Dequeue packet */ - if (((skb = dev->gso_skb)) || ((skb = q->dequeue(q)))) { - unsigned nolock = (dev->features & NETIF_F_LLTX); + if (unlikely((skb = dev_dequeue_skb(dev, q)) == NULL)) + return 0; + + /* + * When the driver has LLTX set, it does its own locking in + * start_xmit. These checks are worth it because even uncongested + * locks can be quite expensive. The driver can do a trylock, as + * is being done here; in case of lock contention it should return + * NETDEV_TX_LOCKED and the packet will be requeued. + */ + lockless = (dev->features & NETIF_F_LLTX); - dev->gso_skb = NULL; + if (!lockless && !netif_tx_trylock(dev)) { + /* Another CPU grabbed the driver tx lock */ + return handle_dev_cpu_collision(skb, dev, q); + } - /* - * When the driver has LLTX set it does its own locking - * in start_xmit. No need to add additional overhead by - * locking again. These checks are worth it because - * even uncongested locks can be quite expensive. - * The driver can do trylock like here too, in case - * of lock congestion it should return -1 and the packet - * will be requeued. - */ - if (!nolock) { - if (!netif_tx_trylock(dev)) { - collision: - /* So, someone grabbed the driver. */ - - /* It may be transient configuration error, - when hard_start_xmit() recurses. We detect - it by checking xmit owner and drop the - packet when deadloop is detected. - */ - if (dev->xmit_lock_owner == smp_processor_id()) { - kfree_skb(skb); - if (net_ratelimit()) - printk(KERN_DEBUG "Dead loop on netdevice %s, fix it urgently!\n", dev->name); - goto out; - } - __get_cpu_var(netdev_rx_stat).cpu_collision++; - goto requeue; - } - } + /* And release queue */ + spin_unlock(&dev->queue_lock); - { - /* And release queue */ - spin_unlock(&dev->queue_lock); - - if (!netif_queue_stopped(dev)) { - int ret; - - ret = dev_hard_start_xmit(skb, dev); - if (ret == NETDEV_TX_OK) { - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto out; - } - if (ret == NETDEV_TX_LOCKED && nolock) { - spin_lock(&dev->queue_lock); - q = dev->qdisc; - goto collision; - } - } + ret = dev_hard_start_xmit(skb, dev); - /* NETDEV_TX_BUSY - we need to requeue */ - /* Release the driver */ - if (!nolock) { - netif_tx_unlock(dev); - } - spin_lock(&dev->queue_lock); - q = dev->qdisc; - } + if (!lockless) + netif_tx_unlock(dev); - /* Device kicked us out :( - This is possible in three cases: + spin_lock(&dev->queue_lock); + q = dev->qdisc; - 0. driver is locked - 1. fastroute is enabled - 2. device cannot determine busy state - before start of transmission (f.e. dialout) - 3. device is buggy (ppp) - */ + switch (ret) { + case NETDEV_TX_OK: + /* Driver sent out skb successfully */ + ret = qdisc_qlen(q); + break; -requeue: - if (unlikely(q == &noop_qdisc)) - kfree_skb(skb); - else if (skb->next) - dev->gso_skb = skb; - else - q->ops->requeue(skb, q); - netif_schedule(dev); + case NETDEV_TX_LOCKED: + /* Driver try lock failed */ + ret = handle_dev_cpu_collision(skb, dev, q); + break; + + default: + /* Driver returned NETDEV_TX_BUSY - requeue skb */ + if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit())) + printk(KERN_WARNING "BUG %s code %d qlen %d\n", + dev->name, ret, q->q.qlen); + + ret = dev_requeue_skb(skb, dev, q); + break; } - return 0; -out: - BUG_ON((int) q->q.qlen < 0); - return q->q.qlen; + return ret; } void __qdisc_run(struct net_device *dev) @@ -493,9 +506,7 @@ void qdisc_destroy(struct Qdisc *qdisc) return; list_del(&qdisc->list); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est); -#endif if (ops->reset) ops->reset(qdisc); if (ops->destroy) diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index fa1b4fe7a5fd..3cc6dda02e2e 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -21,7 +21,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/red.h> diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 9d124c4ee3a7..874452c41a01 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -53,7 +53,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/errno.h> -#include <linux/jiffies.h> #include <linux/compiler.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -62,13 +61,11 @@ #include <linux/list.h> #include <linux/rbtree.h> #include <linux/init.h> -#include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/pkt_sched.h> #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/pkt_cls.h> -#include <asm/system.h> #include <asm/div64.h> /* @@ -122,7 +119,6 @@ struct hfsc_class struct gnet_stats_basic bstats; struct gnet_stats_queue qstats; struct gnet_stats_rate_est rate_est; - spinlock_t *stats_lock; unsigned int level; /* class level in hierarchy */ struct tcf_proto *filter_list; /* filter list */ unsigned int filter_cnt; /* filter count */ @@ -1054,11 +1050,10 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_replace_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); -#endif + &sch->dev->queue_lock, + tca[TCA_RATE-1]); return 0; } @@ -1098,7 +1093,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; - cl->stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&cl->children); cl->vt_tree = RB_ROOT; cl->cf_tree = RB_ROOT; @@ -1112,11 +1106,9 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); -#ifdef CONFIG_NET_ESTIMATOR if (tca[TCA_RATE-1]) gen_new_estimator(&cl->bstats, &cl->rate_est, - cl->stats_lock, tca[TCA_RATE-1]); -#endif + &sch->dev->queue_lock, tca[TCA_RATE-1]); *arg = (unsigned long)cl; return 0; } @@ -1128,9 +1120,7 @@ hfsc_destroy_class(struct Qdisc *sch, struct hfsc_class *cl) tcf_destroy_chain(cl->filter_list); qdisc_destroy(cl->qdisc); -#ifdef CONFIG_NET_ESTIMATOR gen_kill_estimator(&cl->bstats, &cl->rate_est); -#endif if (cl != &q->root) kfree(cl); } @@ -1384,9 +1374,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg, xstats.rtwork = cl->cl_cumul; if (gnet_stats_copy_basic(d, &cl->bstats) < 0 || -#ifdef CONFIG_NET_ESTIMATOR gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 || -#endif gnet_stats_copy_queue(d, &cl->qstats) < 0) return -1; @@ -1448,8 +1436,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) return -EINVAL; qopt = RTA_DATA(opt); - sch->stats_lock = &sch->dev->queue_lock; - q->defcls = qopt->defcls; for (i = 0; i < HFSC_HSIZE; i++) INIT_LIST_HEAD(&q->clhash[i]); @@ -1464,7 +1450,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct rtattr *opt) sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; - q->root.stats_lock = &sch->dev->queue_lock; INIT_LIST_HEAD(&q->root.children); q->root.vt_tree = RB_ROOT; q->root.cf_tree = RB_ROOT; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 035788c5b7f8..b417a95df322 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -28,32 +28,16 @@ * $Id: sch_htb.c,v 1.25 2003/12/07 11:08:25 devik Exp devik $ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> #include <linux/list.h> #include <linux/compiler.h> +#include <linux/rbtree.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> -#include <linux/rbtree.h> /* HTB algorithm. Author: devik@cdi.cz @@ -69,8 +53,6 @@ */ #define HTB_HSIZE 16 /* classid hash size */ -#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#define HTB_RATECM 1 /* whether to use rate computer */ #define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ #define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ @@ -95,12 +77,6 @@ struct htb_class { struct tc_htb_xstats xstats; /* our special stats */ int refcnt; /* usage count of this class */ -#ifdef HTB_RATECM - /* rate measurement counters */ - unsigned long rate_bytes, sum_bytes; - unsigned long rate_packets, sum_packets; -#endif - /* topology */ int level; /* our level (see above) */ struct htb_class *parent; /* parent class */ @@ -153,15 +129,12 @@ struct htb_class { /* of un.leaf originals should be done. */ }; -/* TODO: maybe compute rate when size is too large .. or drop ? */ static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, int size) { int slot = size >> rate->rate.cell_log; - if (slot > 255) { - cl->xstats.giants++; - slot = 255; - } + if (slot > 255) + return (rate->data[255]*(slot >> 8) + rate->data[slot & 0xFF]); return rate->data[slot]; } @@ -194,10 +167,6 @@ struct htb_sched { int rate2quantum; /* quant = rate / rate2quantum */ psched_time_t now; /* cached dequeue time */ struct qdisc_watchdog watchdog; -#ifdef HTB_RATECM - struct timer_list rttim; /* rate computer timer */ - int recmp_bucket; /* which hash bucket to recompute next */ -#endif /* non shaped skbs; let them go directly thru */ struct sk_buff_head direct_queue; @@ -634,13 +603,14 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->qstats.drops++; return NET_XMIT_DROP; } else { - cl->bstats.packets++; + cl->bstats.packets += + skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; cl->bstats.bytes += skb->len; htb_activate(q, cl); } sch->q.qlen++; - sch->bstats.packets++; + sch->bstats.packets += skb_is_gso(skb)?skb_shinfo(skb)->gso_segs:1; sch->bstats.bytes += skb->len; return NET_XMIT_SUCCESS; } @@ -677,34 +647,6 @@ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) return NET_XMIT_SUCCESS; } -#ifdef HTB_RATECM -#define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 -static void htb_rate_timer(unsigned long arg) -{ - struct Qdisc *sch = (struct Qdisc *)arg; - struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; - - - /* lock queue so that we can muck with it */ - spin_lock_bh(&sch->dev->queue_lock); - - q->rttim.expires = jiffies + HZ; - add_timer(&q->rttim); - - /* scan and recompute one bucket at time */ - if (++q->recmp_bucket >= HTB_HSIZE) - q->recmp_bucket = 0; - - hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) { - RT_GEN(cl->sum_bytes, cl->rate_bytes); - RT_GEN(cl->sum_packets, cl->rate_packets); - } - spin_unlock_bh(&sch->dev->queue_lock); -} -#endif - /** * htb_charge_class - charges amount "bytes" to leaf and ancestors * @@ -717,8 +659,9 @@ static void htb_rate_timer(unsigned long arg) * In such case we remove class from event queue first. */ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, - int level, int bytes) + int level, struct sk_buff *skb) { + int bytes = skb->len; long toks, diff; enum htb_cmode old_mode; @@ -750,16 +693,12 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, if (cl->cmode != HTB_CAN_SEND) htb_add_to_wait_tree(q, cl, diff); } -#ifdef HTB_RATECM - /* update rate counters */ - cl->sum_bytes += bytes; - cl->sum_packets++; -#endif /* update byte stats except for leaves which are already updated */ if (cl->level) { cl->bstats.bytes += bytes; - cl->bstats.packets++; + cl->bstats.packets += skb_is_gso(skb)? + skb_shinfo(skb)->gso_segs:1; } cl = cl->parent; } @@ -943,7 +882,7 @@ next: gives us slightly better performance */ if (!cl->un.leaf.q->q.qlen) htb_deactivate(q, cl); - htb_charge_class(q, cl, level, skb->len); + htb_charge_class(q, cl, level, skb); } return skb; } @@ -1095,13 +1034,6 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; -#ifdef HTB_RATECM - init_timer(&q->rttim); - q->rttim.function = htb_rate_timer; - q->rttim.data = (unsigned long)sch; - q->rttim.expires = jiffies + HZ; - add_timer(&q->rttim); -#endif if ((q->rate2quantum = gopt->rate2quantum) < 1) q->rate2quantum = 1; q->defcls = gopt->defcls; @@ -1175,11 +1107,6 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) { struct htb_class *cl = (struct htb_class *)arg; -#ifdef HTB_RATECM - cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); - cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); -#endif - if (!cl->level && cl->un.leaf.q) cl->qstats.qlen = cl->un.leaf.q->q.qlen; cl->xstats.tokens = cl->tokens; @@ -1277,6 +1204,7 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) BUG_TRAP(cl->un.leaf.q); qdisc_destroy(cl->un.leaf.q); } + gen_kill_estimator(&cl->bstats, &cl->rate_est); qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); @@ -1305,9 +1233,6 @@ static void htb_destroy(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); qdisc_watchdog_cancel(&q->watchdog); -#ifdef HTB_RATECM - del_timer_sync(&q->rttim); -#endif /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call @@ -1403,6 +1328,20 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) { /* new class */ struct Qdisc *new_q; int prio; + struct { + struct rtattr rta; + struct gnet_estimator opt; + } est = { + .rta = { + .rta_len = RTA_LENGTH(sizeof(est.opt)), + .rta_type = TCA_RATE, + }, + .opt = { + /* 4s interval, 16s averaging constant */ + .interval = 2, + .ewma_log = 2, + }, + }; /* check for valid classid */ if (!classid || TC_H_MAJ(classid ^ sch->handle) @@ -1418,6 +1357,9 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; + gen_new_estimator(&cl->bstats, &cl->rate_est, + &sch->dev->queue_lock, + tca[TCA_RATE-1] ? : &est.rta); cl->refcnt = 1; INIT_LIST_HEAD(&cl->sibling); INIT_HLIST_NODE(&cl->hlist); @@ -1469,8 +1411,13 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); - } else + } else { + if (tca[TCA_RATE-1]) + gen_replace_estimator(&cl->bstats, &cl->rate_est, + &sch->dev->queue_lock, + tca[TCA_RATE-1]); sch_tree_lock(sch); + } /* it used to be a nasty bug here, we have to check that node is really leaf before changing cl->un.leaf ! */ diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index f8b9f1cdf738..cd0aab6a2a7c 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -9,21 +9,14 @@ #include <linux/module.h> #include <linux/types.h> +#include <linux/list.h> #include <linux/skbuff.h> -#include <linux/netdevice.h> #include <linux/rtnetlink.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv6.h> #include <linux/netfilter.h> -#include <linux/smp.h> #include <net/netlink.h> #include <net/pkt_sched.h> -#include <asm/byteorder.h> -#include <asm/uaccess.h> -#include <linux/kmod.h> -#include <linux/stat.h> -#include <linux/interrupt.h> -#include <linux/list.h> #undef DEBUG_INGRESS diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 5d9d8bc9cc3a..9e5e87e81f00 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -14,11 +14,9 @@ */ #include <linux/module.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/errno.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/rtnetlink.h> diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 6d7542c26e47..2d8c08493d6e 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -12,37 +12,23 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> #include <net/netlink.h> -#include <net/sock.h> #include <net/pkt_sched.h> struct prio_sched_data { int bands; + int curband; /* for round-robin */ struct tcf_proto *filter_list; u8 prio2band[TC_PRIO_MAX+1]; struct Qdisc *queues[TCQ_PRIO_BANDS]; + int mq; }; @@ -70,14 +56,17 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) #endif if (TC_H_MAJ(band)) band = 0; - return q->queues[q->prio2band[band&TC_PRIO_MAX]]; + band = q->prio2band[band&TC_PRIO_MAX]; + goto out; } band = res.classid; } band = TC_H_MIN(band) - 1; if (band >= q->bands) - return q->queues[q->prio2band[0]]; - + band = q->prio2band[0]; +out: + if (q->mq) + skb_set_queue_mapping(skb, band); return q->queues[band]; } @@ -144,17 +133,58 @@ prio_dequeue(struct Qdisc* sch) struct Qdisc *qdisc; for (prio = 0; prio < q->bands; prio++) { - qdisc = q->queues[prio]; - skb = qdisc->dequeue(qdisc); - if (skb) { - sch->q.qlen--; - return skb; + /* Check if the target subqueue is available before + * pulling an skb. This way we avoid excessive requeues + * for slower queues. + */ + if (!netif_subqueue_stopped(sch->dev, (q->mq ? prio : 0))) { + qdisc = q->queues[prio]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + return skb; + } } } return NULL; } +static struct sk_buff *rr_dequeue(struct Qdisc* sch) +{ + struct sk_buff *skb; + struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *qdisc; + int bandcount; + + /* Only take one pass through the queues. If nothing is available, + * return nothing. + */ + for (bandcount = 0; bandcount < q->bands; bandcount++) { + /* Check if the target subqueue is available before + * pulling an skb. This way we avoid excessive requeues + * for slower queues. If the queue is stopped, try the + * next queue. + */ + if (!netif_subqueue_stopped(sch->dev, + (q->mq ? q->curband : 0))) { + qdisc = q->queues[q->curband]; + skb = qdisc->dequeue(qdisc); + if (skb) { + sch->q.qlen--; + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + return skb; + } + } + q->curband++; + if (q->curband >= q->bands) + q->curband = 0; + } + return NULL; +} + static unsigned int prio_drop(struct Qdisc* sch) { struct prio_sched_data *q = qdisc_priv(sch); @@ -198,21 +228,41 @@ prio_destroy(struct Qdisc* sch) static int prio_tune(struct Qdisc *sch, struct rtattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); - struct tc_prio_qopt *qopt = RTA_DATA(opt); + struct tc_prio_qopt *qopt; + struct rtattr *tb[TCA_PRIO_MAX]; int i; - if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) + if (rtattr_parse_nested_compat(tb, TCA_PRIO_MAX, opt, qopt, + sizeof(*qopt))) return -EINVAL; - if (qopt->bands > TCQ_PRIO_BANDS || qopt->bands < 2) + q->bands = qopt->bands; + /* If we're multiqueue, make sure the number of incoming bands + * matches the number of queues on the device we're associating with. + * If the number of bands requested is zero, then set q->bands to + * dev->egress_subqueue_count. + */ + q->mq = RTA_GET_FLAG(tb[TCA_PRIO_MQ - 1]); + if (q->mq) { + if (sch->handle != TC_H_ROOT) + return -EINVAL; + if (netif_is_multiqueue(sch->dev)) { + if (q->bands == 0) + q->bands = sch->dev->egress_subqueue_count; + else if (q->bands != sch->dev->egress_subqueue_count) + return -EINVAL; + } else + return -EOPNOTSUPP; + } + + if (q->bands > TCQ_PRIO_BANDS || q->bands < 2) return -EINVAL; for (i=0; i<=TC_PRIO_MAX; i++) { - if (qopt->priomap[i] >= qopt->bands) + if (qopt->priomap[i] >= q->bands) return -EINVAL; } sch_tree_lock(sch); - q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); for (i=q->bands; i<TCQ_PRIO_BANDS; i++) { @@ -268,11 +318,17 @@ static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct prio_sched_data *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); + struct rtattr *nest; struct tc_prio_qopt opt; opt.bands = q->bands; memcpy(&opt.priomap, q->prio2band, TC_PRIO_MAX+1); - RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + + nest = RTA_NEST_COMPAT(skb, TCA_OPTIONS, sizeof(opt), &opt); + if (q->mq) + RTA_PUT_FLAG(skb, TCA_PRIO_MQ); + RTA_NEST_COMPAT_END(skb, nest); + return skb->len; rtattr_failure: @@ -443,17 +499,44 @@ static struct Qdisc_ops prio_qdisc_ops = { .owner = THIS_MODULE, }; +static struct Qdisc_ops rr_qdisc_ops = { + .next = NULL, + .cl_ops = &prio_class_ops, + .id = "rr", + .priv_size = sizeof(struct prio_sched_data), + .enqueue = prio_enqueue, + .dequeue = rr_dequeue, + .requeue = prio_requeue, + .drop = prio_drop, + .init = prio_init, + .reset = prio_reset, + .destroy = prio_destroy, + .change = prio_tune, + .dump = prio_dump, + .owner = THIS_MODULE, +}; + static int __init prio_module_init(void) { - return register_qdisc(&prio_qdisc_ops); + int err; + + err = register_qdisc(&prio_qdisc_ops); + if (err < 0) + return err; + err = register_qdisc(&rr_qdisc_ops); + if (err < 0) + unregister_qdisc(&prio_qdisc_ops); + return err; } static void __exit prio_module_exit(void) { unregister_qdisc(&prio_qdisc_ops); + unregister_qdisc(&rr_qdisc_ops); } module_init(prio_module_init) module_exit(prio_module_exit) MODULE_LICENSE("GPL"); +MODULE_ALIAS("sch_rr"); diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 00db53eb8159..9b95fefb70f4 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -17,7 +17,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> #include <net/inet_ecn.h> diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 96dfdf78d32c..957957309859 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -10,31 +10,17 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> #include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> #include <linux/init.h> -#include <net/ip.h> -#include <net/netlink.h> #include <linux/ipv6.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/ip.h> +#include <net/netlink.h> #include <net/pkt_sched.h> diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 53862953baaf..22e431dace54 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -13,29 +13,12 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/jiffies.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> -#include <linux/if_ether.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> -#include <net/ip.h> -#include <net/netlink.h> -#include <net/route.h> #include <linux/skbuff.h> -#include <net/sock.h> +#include <net/netlink.h> #include <net/pkt_sched.h> diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index f05ad9a30b4c..0968184ea6be 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -9,30 +9,17 @@ */ #include <linux/module.h> -#include <asm/uaccess.h> -#include <asm/system.h> -#include <linux/bitops.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> -#include <linux/mm.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/in.h> #include <linux/errno.h> -#include <linux/interrupt.h> #include <linux/if_arp.h> -#include <linux/if_ether.h> -#include <linux/inet.h> #include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/notifier.h> #include <linux/init.h> -#include <net/ip.h> -#include <net/route.h> #include <linux/skbuff.h> #include <linux/moduleparam.h> -#include <net/sock.h> +#include <net/dst.h> +#include <net/neighbour.h> #include <net/pkt_sched.h> /* @@ -225,7 +212,6 @@ static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt) return 0; } -/* "teql*" netdevice routines */ static int __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev) @@ -277,6 +263,7 @@ static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev) int busy; int nores; int len = skb->len; + int subq = skb->queue_mapping; struct sk_buff *skb_res = NULL; start = master->slaves; @@ -293,7 +280,9 @@ restart: if (slave->qdisc_sleeping != q) continue; - if (netif_queue_stopped(slave) || ! netif_running(slave)) { + if (netif_queue_stopped(slave) || + netif_subqueue_stopped(slave, subq) || + !netif_running(slave)) { busy = 1; continue; } @@ -302,6 +291,7 @@ restart: case 0: if (netif_tx_trylock(slave)) { if (!netif_queue_stopped(slave) && + !netif_subqueue_stopped(slave, subq) && slave->hard_start_xmit(skb, slave) == 0) { netif_tx_unlock(slave); master->slaves = NEXT_SLAVE(q); |