From c4266263249f22479eb1abb1a1709c38240b1597 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Dec 2010 08:18:36 +0000 Subject: net_sched: sch_sfq: add backlog info in sfq_dump_class_stats() We currently return for each active SFQ slot the number of packets in queue. We can also give number of bytes accounted for these packets. tc -s class show dev ifb0 Before patch : class sfq 11:3d9 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 3p requeues 0 allot 1266 After patch : class sfq 11:3e4 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 4380b 3p requeues 0 allot 1212 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3cf478d012dd..cb331dea7fe0 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -548,8 +548,13 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, { struct sfq_sched_data *q = qdisc_priv(sch); sfq_index idx = q->ht[cl-1]; - struct gnet_stats_queue qs = { .qlen = q->qs[idx].qlen }; + struct sk_buff_head *list = &q->qs[idx]; + struct gnet_stats_queue qs = { .qlen = list->qlen }; struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; + struct sk_buff *skb; + + skb_queue_walk(list, skb) + qs.backlog += qdisc_pkt_len(skb); if (gnet_stats_copy_queue(d, &qs) < 0) return -1; -- cgit v1.2.1 From aa3e219997e4b949be4199660936099ded0b401f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2010 13:18:16 -0800 Subject: net_sched: sch_sfq: fix allot handling When deploying SFQ/IFB here at work, I found the allot management was pretty wrong in sfq, even changing allot from short to int... We should init allot for each new flow, not using a previous value found in slot. Before patch, I saw bursts of several packets per flow, apparently denying the default "quantum 1514" limit I had on my SFQ class. class sfq 11:1 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 7p requeues 0 allot 11546 class sfq 11:46 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 1p requeues 0 allot -23873 class sfq 11:78 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 5p requeues 0 allot 11393 After patch, better fairness among each flow, allot limit being respected, allot is positive : class sfq 11:e parent 11: (dropped 0, overlimits 0 requeues 86) backlog 0b 3p requeues 86 allot 596 class sfq 11:94 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 3p requeues 0 allot 1468 class sfq 11:a4 parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 4p requeues 0 allot 650 class sfq 11:bb parent 11: (dropped 0, overlimits 0 requeues 0) backlog 0b 3p requeues 0 allot 596 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 3cf478d012dd..7150705f1d0b 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -270,7 +270,6 @@ static unsigned int sfq_drop(struct Qdisc *sch) /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ d = q->next[q->tail]; q->next[q->tail] = q->next[d]; - q->allot[q->next[d]] += q->quantum; skb = q->qs[d].prev; len = qdisc_pkt_len(skb); __skb_unlink(skb, &q->qs[d]); @@ -321,14 +320,13 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) sfq_inc(q, x); if (q->qs[x].qlen == 1) { /* The flow is new */ if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->tail = x; q->next[x] = x; - q->allot[x] = q->quantum; } else { q->next[x] = q->next[q->tail]; q->next[q->tail] = x; - q->tail = x; } + q->tail = x; + q->allot[x] = q->quantum; } if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); @@ -359,13 +357,13 @@ sfq_dequeue(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; - sfq_index a, old_a; + sfq_index a, next_a; /* No active slots */ if (q->tail == SFQ_DEPTH) return NULL; - a = old_a = q->next[q->tail]; + a = q->next[q->tail]; /* Grab packet */ skb = __skb_dequeue(&q->qs[a]); @@ -376,17 +374,15 @@ sfq_dequeue(struct Qdisc *sch) /* Is the slot empty? */ if (q->qs[a].qlen == 0) { q->ht[q->hash[a]] = SFQ_DEPTH; - a = q->next[a]; - if (a == old_a) { + next_a = q->next[a]; + if (a == next_a) { q->tail = SFQ_DEPTH; return skb; } - q->next[q->tail] = a; - q->allot[a] += q->quantum; + q->next[q->tail] = next_a; } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { - q->tail = a; - a = q->next[a]; q->allot[a] += q->quantum; + q->tail = a; } return skb; } -- cgit v1.2.1 From eda83e3b63e88351310c13c99178eb4634f137b2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Dec 2010 12:54:58 +0000 Subject: net_sched: sch_sfq: better struct layouts Here is a respin of patch. I'll send a short patch to make SFQ more fair in presence of large packets as well. Thanks [PATCH v3 net-next-2.6] net_sched: sch_sfq: better struct layouts This patch shrinks sizeof(struct sfq_sched_data) from 0x14f8 (or more if spinlocks are bigger) to 0x1180 bytes, and reduce text size as well. text data bss dec hex filename 4821 152 0 4973 136d old/net/sched/sch_sfq.o 4627 136 0 4763 129b new/net/sched/sch_sfq.o All data for a slot/flow is now grouped in a compact and cache friendly structure, instead of being spreaded in many different points. struct sfq_slot { struct sk_buff *skblist_next; struct sk_buff *skblist_prev; sfq_index qlen; /* number of skbs in skblist */ sfq_index next; /* next slot in sfq chain */ struct sfq_head dep; /* anchor in dep[] chains */ unsigned short hash; /* hash value (index in ht[]) */ short allot; /* credit for this slot */ }; Signed-off-by: Eric Dumazet Cc: Jarek Poplawski Cc: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 260 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 162 insertions(+), 98 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 42396c965dd6..13322e8a0456 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -67,27 +67,42 @@ IMPLEMENTATION: This implementation limits maximal queue length to 128; - maximal mtu to 2^15-1; number of hash buckets to 1024. + maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024. The only goal of this restrictions was that all data - fit into one 4K page :-). Struct sfq_sched_data is - organized in anti-cache manner: all the data for a bucket - are scattered over different locations. This is not good, - but it allowed me to put it into 4K. + fit into one 4K page on 32bit arches. It is easy to increase these values, but not in flight. */ -#define SFQ_DEPTH 128 +#define SFQ_DEPTH 128 /* max number of packets per flow */ +#define SFQ_SLOTS 128 /* max number of flows */ +#define SFQ_EMPTY_SLOT 255 #define SFQ_HASH_DIVISOR 1024 -/* This type should contain at least SFQ_DEPTH*2 values */ +/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ typedef unsigned char sfq_index; +/* + * We dont use pointers to save space. + * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array + * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1] + * are 'pointers' to dep[] array + */ struct sfq_head { sfq_index next; sfq_index prev; }; +struct sfq_slot { + struct sk_buff *skblist_next; + struct sk_buff *skblist_prev; + sfq_index qlen; /* number of skbs in skblist */ + sfq_index next; /* next slot in sfq chain */ + struct sfq_head dep; /* anchor in dep[] chains */ + unsigned short hash; /* hash value (index in ht[]) */ + short allot; /* credit for this slot */ +}; + struct sfq_sched_data { /* Parameters */ @@ -99,17 +114,24 @@ struct sfq_sched_data struct tcf_proto *filter_list; struct timer_list perturb_timer; u32 perturbation; - sfq_index tail; /* Index of current slot in round */ - sfq_index max_depth; /* Maximal depth */ + sfq_index cur_depth; /* depth of longest slot */ + struct sfq_slot *tail; /* current slot in round */ sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ - sfq_index next[SFQ_DEPTH]; /* Active slots link */ - short allot[SFQ_DEPTH]; /* Current allotment per slot */ - unsigned short hash[SFQ_DEPTH]; /* Hash value indexed by slots */ - struct sk_buff_head qs[SFQ_DEPTH]; /* Slot queue */ - struct sfq_head dep[SFQ_DEPTH*2]; /* Linked list of slots, indexed by depth */ + struct sfq_slot slots[SFQ_SLOTS]; + struct sfq_head dep[SFQ_DEPTH]; /* Linked list of slots, indexed by depth */ }; +/* + * sfq_head are either in a sfq_slot or in dep[] array + */ +static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val) +{ + if (val < SFQ_SLOTS) + return &q->slots[val].dep; + return &q->dep[val - SFQ_SLOTS]; +} + static __inline__ unsigned sfq_fold_hash(struct sfq_sched_data *q, u32 h, u32 h1) { return jhash_2words(h, h1, q->perturbation) & (SFQ_HASH_DIVISOR - 1); @@ -200,30 +222,41 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, return 0; } +/* + * x : slot number [0 .. SFQ_SLOTS - 1] + */ static inline void sfq_link(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; - int d = q->qs[x].qlen + SFQ_DEPTH; + int qlen = q->slots[x].qlen; + + p = qlen + SFQ_SLOTS; + n = q->dep[qlen].next; - p = d; - n = q->dep[d].next; - q->dep[x].next = n; - q->dep[x].prev = p; - q->dep[p].next = q->dep[n].prev = x; + q->slots[x].dep.next = n; + q->slots[x].dep.prev = p; + + q->dep[qlen].next = x; /* sfq_dep_head(q, p)->next = x */ + sfq_dep_head(q, n)->prev = x; } +#define sfq_unlink(q, x, n, p) \ + n = q->slots[x].dep.next; \ + p = q->slots[x].dep.prev; \ + sfq_dep_head(q, p)->next = n; \ + sfq_dep_head(q, n)->prev = p + + static inline void sfq_dec(struct sfq_sched_data *q, sfq_index x) { sfq_index p, n; + int d; - n = q->dep[x].next; - p = q->dep[x].prev; - q->dep[p].next = n; - q->dep[n].prev = p; - - if (n == p && q->max_depth == q->qs[x].qlen + 1) - q->max_depth--; + sfq_unlink(q, x, n, p); + d = q->slots[x].qlen--; + if (n == p && q->cur_depth == d) + q->cur_depth--; sfq_link(q, x); } @@ -232,34 +265,72 @@ static inline void sfq_inc(struct sfq_sched_data *q, sfq_index x) sfq_index p, n; int d; - n = q->dep[x].next; - p = q->dep[x].prev; - q->dep[p].next = n; - q->dep[n].prev = p; - d = q->qs[x].qlen; - if (q->max_depth < d) - q->max_depth = d; + sfq_unlink(q, x, n, p); + d = ++q->slots[x].qlen; + if (q->cur_depth < d) + q->cur_depth = d; sfq_link(q, x); } +/* helper functions : might be changed when/if skb use a standard list_head */ + +/* remove one skb from tail of slot queue */ +static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) +{ + struct sk_buff *skb = slot->skblist_prev; + + slot->skblist_prev = skb->prev; + skb->next = skb->prev = NULL; + return skb; +} + +/* remove one skb from head of slot queue */ +static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) +{ + struct sk_buff *skb = slot->skblist_next; + + slot->skblist_next = skb->next; + skb->next = skb->prev = NULL; + return skb; +} + +static inline void slot_queue_init(struct sfq_slot *slot) +{ + slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot; +} + +/* add skb to slot queue (tail add) */ +static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) +{ + skb->prev = slot->skblist_prev; + skb->next = (struct sk_buff *)slot; + slot->skblist_prev->next = skb; + slot->skblist_prev = skb; +} + +#define slot_queue_walk(slot, skb) \ + for (skb = slot->skblist_next; \ + skb != (struct sk_buff *)slot; \ + skb = skb->next) + static unsigned int sfq_drop(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index d = q->max_depth; + sfq_index x, d = q->cur_depth; struct sk_buff *skb; unsigned int len; + struct sfq_slot *slot; - /* Queue is full! Find the longest slot and - drop a packet from it */ - + /* Queue is full! Find the longest slot and drop tail packet from it */ if (d > 1) { - sfq_index x = q->dep[d + SFQ_DEPTH].next; - skb = q->qs[x].prev; + x = q->dep[d].next; + slot = &q->slots[x]; +drop: + skb = slot_dequeue_tail(slot); len = qdisc_pkt_len(skb); - __skb_unlink(skb, &q->qs[x]); - kfree_skb(skb); sfq_dec(q, x); + kfree_skb(skb); sch->q.qlen--; sch->qstats.drops++; sch->qstats.backlog -= len; @@ -268,18 +339,11 @@ static unsigned int sfq_drop(struct Qdisc *sch) if (d == 1) { /* It is difficult to believe, but ALL THE SLOTS HAVE LENGTH 1. */ - d = q->next[q->tail]; - q->next[q->tail] = q->next[d]; - skb = q->qs[d].prev; - len = qdisc_pkt_len(skb); - __skb_unlink(skb, &q->qs[d]); - kfree_skb(skb); - sfq_dec(q, d); - sch->q.qlen--; - q->ht[q->hash[d]] = SFQ_DEPTH; - sch->qstats.drops++; - sch->qstats.backlog -= len; - return len; + x = q->tail->next; + slot = &q->slots[x]; + q->tail->next = slot->next; + q->ht[slot->hash] = SFQ_EMPTY_SLOT; + goto drop; } return 0; @@ -291,6 +355,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); unsigned int hash; sfq_index x; + struct sfq_slot *slot; int uninitialized_var(ret); hash = sfq_classify(skb, sch, &ret); @@ -303,30 +368,33 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) hash--; x = q->ht[hash]; - if (x == SFQ_DEPTH) { - q->ht[hash] = x = q->dep[SFQ_DEPTH].next; - q->hash[x] = hash; + slot = &q->slots[x]; + if (x == SFQ_EMPTY_SLOT) { + x = q->dep[0].next; /* get a free slot */ + q->ht[hash] = x; + slot = &q->slots[x]; + slot->hash = hash; + slot_queue_init(slot); } - /* If selected queue has length q->limit, this means that - * all another queues are empty and that we do simple tail drop, + /* If selected queue has length q->limit, do simple tail drop, * i.e. drop _this_ packet. */ - if (q->qs[x].qlen >= q->limit) + if (slot->qlen >= q->limit) return qdisc_drop(skb, sch); sch->qstats.backlog += qdisc_pkt_len(skb); - __skb_queue_tail(&q->qs[x], skb); + slot_queue_add(slot, skb); sfq_inc(q, x); - if (q->qs[x].qlen == 1) { /* The flow is new */ - if (q->tail == SFQ_DEPTH) { /* It is the first flow */ - q->next[x] = x; + if (slot->qlen == 1) { /* The flow is new */ + if (q->tail == NULL) { /* It is the first flow */ + slot->next = x; } else { - q->next[x] = q->next[q->tail]; - q->next[q->tail] = x; + slot->next = q->tail->next; + q->tail->next = x; } - q->tail = x; - q->allot[x] = q->quantum; + q->tail = slot; + slot->allot = q->quantum; } if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); @@ -342,14 +410,12 @@ static struct sk_buff * sfq_peek(struct Qdisc *sch) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index a; /* No active slots */ - if (q->tail == SFQ_DEPTH) + if (q->tail == NULL) return NULL; - a = q->next[q->tail]; - return skb_peek(&q->qs[a]); + return q->slots[q->tail->next].skblist_next; } static struct sk_buff * @@ -358,31 +424,31 @@ sfq_dequeue(struct Qdisc *sch) struct sfq_sched_data *q = qdisc_priv(sch); struct sk_buff *skb; sfq_index a, next_a; + struct sfq_slot *slot; /* No active slots */ - if (q->tail == SFQ_DEPTH) + if (q->tail == NULL) return NULL; - a = q->next[q->tail]; - - /* Grab packet */ - skb = __skb_dequeue(&q->qs[a]); + a = q->tail->next; + slot = &q->slots[a]; + skb = slot_dequeue_head(slot); sfq_dec(q, a); sch->q.qlen--; sch->qstats.backlog -= qdisc_pkt_len(skb); /* Is the slot empty? */ - if (q->qs[a].qlen == 0) { - q->ht[q->hash[a]] = SFQ_DEPTH; - next_a = q->next[a]; + if (slot->qlen == 0) { + q->ht[slot->hash] = SFQ_EMPTY_SLOT; + next_a = slot->next; if (a == next_a) { - q->tail = SFQ_DEPTH; + q->tail = NULL; /* no more active slots */ return skb; } - q->next[q->tail] = next_a; - } else if ((q->allot[a] -= qdisc_pkt_len(skb)) <= 0) { - q->allot[a] += q->quantum; - q->tail = a; + q->tail->next = next_a; + } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) { + q->tail = slot; + slot->allot += q->quantum; } return skb; } @@ -446,17 +512,16 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) init_timer_deferrable(&q->perturb_timer); for (i = 0; i < SFQ_HASH_DIVISOR; i++) - q->ht[i] = SFQ_DEPTH; + q->ht[i] = SFQ_EMPTY_SLOT; for (i = 0; i < SFQ_DEPTH; i++) { - skb_queue_head_init(&q->qs[i]); - q->dep[i + SFQ_DEPTH].next = i + SFQ_DEPTH; - q->dep[i + SFQ_DEPTH].prev = i + SFQ_DEPTH; + q->dep[i].next = i + SFQ_SLOTS; + q->dep[i].prev = i + SFQ_SLOTS; } q->limit = SFQ_DEPTH - 1; - q->max_depth = 0; - q->tail = SFQ_DEPTH; + q->cur_depth = 0; + q->tail = NULL; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); q->perturb_period = 0; @@ -467,7 +532,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } - for (i = 0; i < SFQ_DEPTH; i++) + for (i = 0; i < SFQ_SLOTS; i++) sfq_link(q, i); return 0; } @@ -543,13 +608,12 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - sfq_index idx = q->ht[cl-1]; - struct sk_buff_head *list = &q->qs[idx]; - struct gnet_stats_queue qs = { .qlen = list->qlen }; - struct tc_sfq_xstats xstats = { .allot = q->allot[idx] }; + const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]]; + struct gnet_stats_queue qs = { .qlen = slot->qlen }; + struct tc_sfq_xstats xstats = { .allot = slot->allot }; struct sk_buff *skb; - skb_queue_walk(list, skb) + slot_queue_walk(slot, skb) qs.backlog += qdisc_pkt_len(skb); if (gnet_stats_copy_queue(d, &qs) < 0) @@ -566,7 +630,7 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < SFQ_HASH_DIVISOR; i++) { - if (q->ht[i] == SFQ_DEPTH || + if (q->ht[i] == SFQ_EMPTY_SLOT || arg->count < arg->skip) { arg->count++; continue; -- cgit v1.2.1 From ee09b3c1cff0335137dc1b146488e4352f640f13 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Dec 2010 11:39:59 -0800 Subject: sfq: fix sfq class stats handling sfq_walk() runs without qdisc lock. By the time it selects a non empty hash slot and sfq_dump_class_stats() is run (with lock held), slot might have been freed : We then access q->slots[SFQ_EMPTY_SLOT], out of bounds, and crash in slot_queue_walk() On previous kernels, bug is here but out of bounds qs[SFQ_DEPTH] and allot[SFQ_DEPTH] are located in struct sfq_sched_data, so no illegal memory access happens, only possibly wrong data reported to user. Also, slot_dequeue_tail() should make sure slot skb chain is correctly terminated, or sfq_dump_class_stats() can access freed skbs. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 13322e8a0456..6a2f88fea6d8 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -281,6 +281,7 @@ static inline struct sk_buff *slot_dequeue_tail(struct sfq_slot *slot) struct sk_buff *skb = slot->skblist_prev; slot->skblist_prev = skb->prev; + skb->prev->next = (struct sk_buff *)slot; skb->next = skb->prev = NULL; return skb; } @@ -608,14 +609,19 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, struct gnet_dump *d) { struct sfq_sched_data *q = qdisc_priv(sch); - const struct sfq_slot *slot = &q->slots[q->ht[cl - 1]]; - struct gnet_stats_queue qs = { .qlen = slot->qlen }; - struct tc_sfq_xstats xstats = { .allot = slot->allot }; + sfq_index idx = q->ht[cl - 1]; + struct gnet_stats_queue qs = { 0 }; + struct tc_sfq_xstats xstats = { 0 }; struct sk_buff *skb; - slot_queue_walk(slot, skb) - qs.backlog += qdisc_pkt_len(skb); + if (idx != SFQ_EMPTY_SLOT) { + const struct sfq_slot *slot = &q->slots[idx]; + xstats.allot = slot->allot; + qs.qlen = slot->qlen; + slot_queue_walk(slot, skb) + qs.backlog += qdisc_pkt_len(skb); + } if (gnet_stats_copy_queue(d, &qs) < 0) return -1; return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); -- cgit v1.2.1 From eeaeb068f1393b4db4861481bf594bcd1c3eda7a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 28 Dec 2010 21:53:33 +0000 Subject: sch_sfq: allow big packets and be fair SFQ is currently 'limited' to small packets, because it uses a 15bit allotment number per flow. Introduce a scale by 8, so that we can handle full size TSO/GRO packets. Use appropriate handling to make sure allot is positive before a new packet is dequeued, so that fairness is respected. Signed-off-by: Eric Dumazet Acked-by: Jarek Poplawski Cc: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 6a2f88fea6d8..b76d46b71466 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -67,7 +67,7 @@ IMPLEMENTATION: This implementation limits maximal queue length to 128; - maximal mtu to 2^15-1; max 128 flows, number of hash buckets to 1024. + max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024. The only goal of this restrictions was that all data fit into one 4K page on 32bit arches. @@ -77,6 +77,11 @@ #define SFQ_SLOTS 128 /* max number of flows */ #define SFQ_EMPTY_SLOT 255 #define SFQ_HASH_DIVISOR 1024 +/* We use 16 bits to store allot, and want to handle packets up to 64K + * Scale allot by 8 (1<<3) so that no overflow occurs. + */ +#define SFQ_ALLOT_SHIFT 3 +#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) /* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */ typedef unsigned char sfq_index; @@ -115,7 +120,7 @@ struct sfq_sched_data struct timer_list perturb_timer; u32 perturbation; sfq_index cur_depth; /* depth of longest slot */ - + unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ struct sfq_slot *tail; /* current slot in round */ sfq_index ht[SFQ_HASH_DIVISOR]; /* Hash table */ struct sfq_slot slots[SFQ_SLOTS]; @@ -395,7 +400,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->tail->next = x; } q->tail = slot; - slot->allot = q->quantum; + slot->allot = q->scaled_quantum; } if (++sch->q.qlen <= q->limit) { sch->bstats.bytes += qdisc_pkt_len(skb); @@ -431,8 +436,14 @@ sfq_dequeue(struct Qdisc *sch) if (q->tail == NULL) return NULL; +next_slot: a = q->tail->next; slot = &q->slots[a]; + if (slot->allot <= 0) { + q->tail = slot; + slot->allot += q->scaled_quantum; + goto next_slot; + } skb = slot_dequeue_head(slot); sfq_dec(q, a); sch->q.qlen--; @@ -447,9 +458,8 @@ sfq_dequeue(struct Qdisc *sch) return skb; } q->tail->next = next_a; - } else if ((slot->allot -= qdisc_pkt_len(skb)) <= 0) { - q->tail = slot; - slot->allot += q->quantum; + } else { + slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); } return skb; } @@ -485,6 +495,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) sch_tree_lock(sch); q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch)); + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = ctl->perturb_period * HZ; if (ctl->limit) q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1); @@ -525,6 +536,7 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) q->tail = NULL; if (opt == NULL) { q->quantum = psched_mtu(qdisc_dev(sch)); + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); q->perturb_period = 0; q->perturbation = net_random(); } else { @@ -617,7 +629,7 @@ static int sfq_dump_class_stats(struct Qdisc *sch, unsigned long cl, if (idx != SFQ_EMPTY_SLOT) { const struct sfq_slot *slot = &q->slots[idx]; - xstats.allot = slot->allot; + xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; qs.qlen = slot->qlen; slot_queue_walk(slot, skb) qs.backlog += qdisc_pkt_len(skb); -- cgit v1.2.1 From 18c8d82ae5b802c5d82e0dfbcc08b1b568955f46 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 31 Dec 2010 12:48:55 -0800 Subject: sfq: fix slot_dequeue_head() slot_dequeue_head() should make sure slot skb chain is correct in both ways, or we can crash if all possible flows are in use. Jarek pointed out slot_queue_init() can now be done in sfq_init() once, instead each time a flow is setup. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_sfq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_sfq.c') diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index b76d46b71466..d54ac94066c2 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -297,6 +297,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot) struct sk_buff *skb = slot->skblist_next; slot->skblist_next = skb->next; + skb->next->prev = (struct sk_buff *)slot; skb->next = skb->prev = NULL; return skb; } @@ -380,7 +381,6 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; - slot_queue_init(slot); } /* If selected queue has length q->limit, do simple tail drop, @@ -545,8 +545,10 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) return err; } - for (i = 0; i < SFQ_SLOTS; i++) + for (i = 0; i < SFQ_SLOTS; i++) { + slot_queue_init(&q->slots[i]); sfq_link(q, i); + } return 0; } -- cgit v1.2.1