Version:  2.0.40 2.2.26 2.4.37 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9 3.10 3.11 3.12 3.13 3.14 3.15 3.16 3.17

Linux/net/sched/sch_choke.c

  1 /*
  2  * net/sched/sch_choke.c        CHOKE scheduler
  3  *
  4  * Copyright (c) 2011 Stephen Hemminger <shemminger@vyatta.com>
  5  * Copyright (c) 2011 Eric Dumazet <eric.dumazet@gmail.com>
  6  *
  7  * This program is free software; you can redistribute it and/or
  8  * modify it under the terms of the GNU General Public License
  9  * version 2 as published by the Free Software Foundation.
 10  *
 11  */
 12 
 13 #include <linux/module.h>
 14 #include <linux/types.h>
 15 #include <linux/kernel.h>
 16 #include <linux/skbuff.h>
 17 #include <linux/vmalloc.h>
 18 #include <net/pkt_sched.h>
 19 #include <net/inet_ecn.h>
 20 #include <net/red.h>
 21 #include <net/flow_keys.h>
 22 
 23 /*
 24    CHOKe stateless AQM for fair bandwidth allocation
 25    =================================================
 26 
 27    CHOKe (CHOose and Keep for responsive flows, CHOose and Kill for
 28    unresponsive flows) is a variant of RED that penalizes misbehaving flows but
 29    maintains no flow state. The difference from RED is an additional step
 30    during the enqueuing process. If average queue size is over the
 31    low threshold (qmin), a packet is chosen at random from the queue.
 32    If both the new and chosen packet are from the same flow, both
 33    are dropped. Unlike RED, CHOKe is not really a "classful" qdisc because it
 34    needs to access packets in queue randomly. It has a minimal class
 35    interface to allow overriding the builtin flow classifier with
 36    filters.
 37 
 38    Source:
 39    R. Pan, B. Prabhakar, and K. Psounis, "CHOKe, A Stateless
 40    Active Queue Management Scheme for Approximating Fair Bandwidth Allocation",
 41    IEEE INFOCOM, 2000.
 42 
 43    A. Tang, J. Wang, S. Low, "Understanding CHOKe: Throughput and Spatial
 44    Characteristics", IEEE/ACM Transactions on Networking, 2004
 45 
 46  */
 47 
 48 /* Upper bound on size of sk_buff table (packets) */
 49 #define CHOKE_MAX_QUEUE (128*1024 - 1)
 50 
 51 struct choke_sched_data {
 52 /* Parameters */
 53         u32              limit;
 54         unsigned char    flags;
 55 
 56         struct red_parms parms;
 57 
 58 /* Variables */
 59         struct red_vars  vars;
 60         struct tcf_proto *filter_list;
 61         struct {
 62                 u32     prob_drop;      /* Early probability drops */
 63                 u32     prob_mark;      /* Early probability marks */
 64                 u32     forced_drop;    /* Forced drops, qavg > max_thresh */
 65                 u32     forced_mark;    /* Forced marks, qavg > max_thresh */
 66                 u32     pdrop;          /* Drops due to queue limits */
 67                 u32     other;          /* Drops due to drop() calls */
 68                 u32     matched;        /* Drops to flow match */
 69         } stats;
 70 
 71         unsigned int     head;
 72         unsigned int     tail;
 73 
 74         unsigned int     tab_mask; /* size - 1 */
 75 
 76         struct sk_buff **tab;
 77 };
 78 
 79 /* number of elements in queue including holes */
 80 static unsigned int choke_len(const struct choke_sched_data *q)
 81 {
 82         return (q->tail - q->head) & q->tab_mask;
 83 }
 84 
 85 /* Is ECN parameter configured */
 86 static int use_ecn(const struct choke_sched_data *q)
 87 {
 88         return q->flags & TC_RED_ECN;
 89 }
 90 
 91 /* Should packets over max just be dropped (versus marked) */
 92 static int use_harddrop(const struct choke_sched_data *q)
 93 {
 94         return q->flags & TC_RED_HARDDROP;
 95 }
 96 
 97 /* Move head pointer forward to skip over holes */
 98 static void choke_zap_head_holes(struct choke_sched_data *q)
 99 {
100         do {
101                 q->head = (q->head + 1) & q->tab_mask;
102                 if (q->head == q->tail)
103                         break;
104         } while (q->tab[q->head] == NULL);
105 }
106 
107 /* Move tail pointer backwards to reuse holes */
108 static void choke_zap_tail_holes(struct choke_sched_data *q)
109 {
110         do {
111                 q->tail = (q->tail - 1) & q->tab_mask;
112                 if (q->head == q->tail)
113                         break;
114         } while (q->tab[q->tail] == NULL);
115 }
116 
117 /* Drop packet from queue array by creating a "hole" */
118 static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx)
119 {
120         struct choke_sched_data *q = qdisc_priv(sch);
121         struct sk_buff *skb = q->tab[idx];
122 
123         q->tab[idx] = NULL;
124 
125         if (idx == q->head)
126                 choke_zap_head_holes(q);
127         if (idx == q->tail)
128                 choke_zap_tail_holes(q);
129 
130         sch->qstats.backlog -= qdisc_pkt_len(skb);
131         qdisc_drop(skb, sch);
132         qdisc_tree_decrease_qlen(sch, 1);
133         --sch->q.qlen;
134 }
135 
136 /* private part of skb->cb[] that a qdisc is allowed to use
137  * is limited to QDISC_CB_PRIV_LEN bytes.
138  * As a flow key might be too large, we store a part of it only.
139  */
140 #define CHOKE_K_LEN min_t(u32, sizeof(struct flow_keys), QDISC_CB_PRIV_LEN - 3)
141 
142 struct choke_skb_cb {
143         u16                     classid;
144         u8                      keys_valid;
145         u8                      keys[QDISC_CB_PRIV_LEN - 3];
146 };
147 
148 static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
149 {
150         qdisc_cb_private_validate(skb, sizeof(struct choke_skb_cb));
151         return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
152 }
153 
154 static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
155 {
156         choke_skb_cb(skb)->classid = classid;
157 }
158 
159 static u16 choke_get_classid(const struct sk_buff *skb)
160 {
161         return choke_skb_cb(skb)->classid;
162 }
163 
164 /*
165  * Compare flow of two packets
166  *  Returns true only if source and destination address and port match.
167  *          false for special cases
168  */
169 static bool choke_match_flow(struct sk_buff *skb1,
170                              struct sk_buff *skb2)
171 {
172         struct flow_keys temp;
173 
174         if (skb1->protocol != skb2->protocol)
175                 return false;
176 
177         if (!choke_skb_cb(skb1)->keys_valid) {
178                 choke_skb_cb(skb1)->keys_valid = 1;
179                 skb_flow_dissect(skb1, &temp);
180                 memcpy(&choke_skb_cb(skb1)->keys, &temp, CHOKE_K_LEN);
181         }
182 
183         if (!choke_skb_cb(skb2)->keys_valid) {
184                 choke_skb_cb(skb2)->keys_valid = 1;
185                 skb_flow_dissect(skb2, &temp);
186                 memcpy(&choke_skb_cb(skb2)->keys, &temp, CHOKE_K_LEN);
187         }
188 
189         return !memcmp(&choke_skb_cb(skb1)->keys,
190                        &choke_skb_cb(skb2)->keys,
191                        CHOKE_K_LEN);
192 }
193 
194 /*
195  * Classify flow using either:
196  *  1. pre-existing classification result in skb
197  *  2. fast internal classification
198  *  3. use TC filter based classification
199  */
200 static bool choke_classify(struct sk_buff *skb,
201                            struct Qdisc *sch, int *qerr)
202 
203 {
204         struct choke_sched_data *q = qdisc_priv(sch);
205         struct tcf_result res;
206         int result;
207 
208         result = tc_classify(skb, q->filter_list, &res);
209         if (result >= 0) {
210 #ifdef CONFIG_NET_CLS_ACT
211                 switch (result) {
212                 case TC_ACT_STOLEN:
213                 case TC_ACT_QUEUED:
214                         *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
215                 case TC_ACT_SHOT:
216                         return false;
217                 }
218 #endif
219                 choke_set_classid(skb, TC_H_MIN(res.classid));
220                 return true;
221         }
222 
223         return false;
224 }
225 
226 /*
227  * Select a packet at random from queue
228  * HACK: since queue can have holes from previous deletion; retry several
229  *   times to find a random skb but then just give up and return the head
230  * Will return NULL if queue is empty (q->head == q->tail)
231  */
232 static struct sk_buff *choke_peek_random(const struct choke_sched_data *q,
233                                          unsigned int *pidx)
234 {
235         struct sk_buff *skb;
236         int retrys = 3;
237 
238         do {
239                 *pidx = (q->head + prandom_u32_max(choke_len(q))) & q->tab_mask;
240                 skb = q->tab[*pidx];
241                 if (skb)
242                         return skb;
243         } while (--retrys > 0);
244 
245         return q->tab[*pidx = q->head];
246 }
247 
248 /*
249  * Compare new packet with random packet in queue
250  * returns true if matched and sets *pidx
251  */
252 static bool choke_match_random(const struct choke_sched_data *q,
253                                struct sk_buff *nskb,
254                                unsigned int *pidx)
255 {
256         struct sk_buff *oskb;
257 
258         if (q->head == q->tail)
259                 return false;
260 
261         oskb = choke_peek_random(q, pidx);
262         if (q->filter_list)
263                 return choke_get_classid(nskb) == choke_get_classid(oskb);
264 
265         return choke_match_flow(oskb, nskb);
266 }
267 
268 static int choke_enqueue(struct sk_buff *skb, struct Qdisc *sch)
269 {
270         struct choke_sched_data *q = qdisc_priv(sch);
271         const struct red_parms *p = &q->parms;
272         int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
273 
274         if (q->filter_list) {
275                 /* If using external classifiers, get result and record it. */
276                 if (!choke_classify(skb, sch, &ret))
277                         goto other_drop;        /* Packet was eaten by filter */
278         }
279 
280         choke_skb_cb(skb)->keys_valid = 0;
281         /* Compute average queue usage (see RED) */
282         q->vars.qavg = red_calc_qavg(p, &q->vars, sch->q.qlen);
283         if (red_is_idling(&q->vars))
284                 red_end_of_idle_period(&q->vars);
285 
286         /* Is queue small? */
287         if (q->vars.qavg <= p->qth_min)
288                 q->vars.qcount = -1;
289         else {
290                 unsigned int idx;
291 
292                 /* Draw a packet at random from queue and compare flow */
293                 if (choke_match_random(q, skb, &idx)) {
294                         q->stats.matched++;
295                         choke_drop_by_idx(sch, idx);
296                         goto congestion_drop;
297                 }
298 
299                 /* Queue is large, always mark/drop */
300                 if (q->vars.qavg > p->qth_max) {
301                         q->vars.qcount = -1;
302 
303                         sch->qstats.overlimits++;
304                         if (use_harddrop(q) || !use_ecn(q) ||
305                             !INET_ECN_set_ce(skb)) {
306                                 q->stats.forced_drop++;
307                                 goto congestion_drop;
308                         }
309 
310                         q->stats.forced_mark++;
311                 } else if (++q->vars.qcount) {
312                         if (red_mark_probability(p, &q->vars, q->vars.qavg)) {
313                                 q->vars.qcount = 0;
314                                 q->vars.qR = red_random(p);
315 
316                                 sch->qstats.overlimits++;
317                                 if (!use_ecn(q) || !INET_ECN_set_ce(skb)) {
318                                         q->stats.prob_drop++;
319                                         goto congestion_drop;
320                                 }
321 
322                                 q->stats.prob_mark++;
323                         }
324                 } else
325                         q->vars.qR = red_random(p);
326         }
327 
328         /* Admit new packet */
329         if (sch->q.qlen < q->limit) {
330                 q->tab[q->tail] = skb;
331                 q->tail = (q->tail + 1) & q->tab_mask;
332                 ++sch->q.qlen;
333                 sch->qstats.backlog += qdisc_pkt_len(skb);
334                 return NET_XMIT_SUCCESS;
335         }
336 
337         q->stats.pdrop++;
338         return qdisc_drop(skb, sch);
339 
340 congestion_drop:
341         qdisc_drop(skb, sch);
342         return NET_XMIT_CN;
343 
344 other_drop:
345         if (ret & __NET_XMIT_BYPASS)
346                 sch->qstats.drops++;
347         kfree_skb(skb);
348         return ret;
349 }
350 
351 static struct sk_buff *choke_dequeue(struct Qdisc *sch)
352 {
353         struct choke_sched_data *q = qdisc_priv(sch);
354         struct sk_buff *skb;
355 
356         if (q->head == q->tail) {
357                 if (!red_is_idling(&q->vars))
358                         red_start_of_idle_period(&q->vars);
359                 return NULL;
360         }
361 
362         skb = q->tab[q->head];
363         q->tab[q->head] = NULL;
364         choke_zap_head_holes(q);
365         --sch->q.qlen;
366         sch->qstats.backlog -= qdisc_pkt_len(skb);
367         qdisc_bstats_update(sch, skb);
368 
369         return skb;
370 }
371 
372 static unsigned int choke_drop(struct Qdisc *sch)
373 {
374         struct choke_sched_data *q = qdisc_priv(sch);
375         unsigned int len;
376 
377         len = qdisc_queue_drop(sch);
378         if (len > 0)
379                 q->stats.other++;
380         else {
381                 if (!red_is_idling(&q->vars))
382                         red_start_of_idle_period(&q->vars);
383         }
384 
385         return len;
386 }
387 
388 static void choke_reset(struct Qdisc *sch)
389 {
390         struct choke_sched_data *q = qdisc_priv(sch);
391 
392         red_restart(&q->vars);
393 }
394 
395 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
396         [TCA_CHOKE_PARMS]       = { .len = sizeof(struct tc_red_qopt) },
397         [TCA_CHOKE_STAB]        = { .len = RED_STAB_SIZE },
398         [TCA_CHOKE_MAX_P]       = { .type = NLA_U32 },
399 };
400 
401 
402 static void choke_free(void *addr)
403 {
404         kvfree(addr);
405 }
406 
407 static int choke_change(struct Qdisc *sch, struct nlattr *opt)
408 {
409         struct choke_sched_data *q = qdisc_priv(sch);
410         struct nlattr *tb[TCA_CHOKE_MAX + 1];
411         const struct tc_red_qopt *ctl;
412         int err;
413         struct sk_buff **old = NULL;
414         unsigned int mask;
415         u32 max_P;
416 
417         if (opt == NULL)
418                 return -EINVAL;
419 
420         err = nla_parse_nested(tb, TCA_CHOKE_MAX, opt, choke_policy);
421         if (err < 0)
422                 return err;
423 
424         if (tb[TCA_CHOKE_PARMS] == NULL ||
425             tb[TCA_CHOKE_STAB] == NULL)
426                 return -EINVAL;
427 
428         max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
429 
430         ctl = nla_data(tb[TCA_CHOKE_PARMS]);
431 
432         if (ctl->limit > CHOKE_MAX_QUEUE)
433                 return -EINVAL;
434 
435         mask = roundup_pow_of_two(ctl->limit + 1) - 1;
436         if (mask != q->tab_mask) {
437                 struct sk_buff **ntab;
438 
439                 ntab = kcalloc(mask + 1, sizeof(struct sk_buff *),
440                                GFP_KERNEL | __GFP_NOWARN);
441                 if (!ntab)
442                         ntab = vzalloc((mask + 1) * sizeof(struct sk_buff *));
443                 if (!ntab)
444                         return -ENOMEM;
445 
446                 sch_tree_lock(sch);
447                 old = q->tab;
448                 if (old) {
449                         unsigned int oqlen = sch->q.qlen, tail = 0;
450 
451                         while (q->head != q->tail) {
452                                 struct sk_buff *skb = q->tab[q->head];
453 
454                                 q->head = (q->head + 1) & q->tab_mask;
455                                 if (!skb)
456                                         continue;
457                                 if (tail < mask) {
458                                         ntab[tail++] = skb;
459                                         continue;
460                                 }
461                                 sch->qstats.backlog -= qdisc_pkt_len(skb);
462                                 --sch->q.qlen;
463                                 qdisc_drop(skb, sch);
464                         }
465                         qdisc_tree_decrease_qlen(sch, oqlen - sch->q.qlen);
466                         q->head = 0;
467                         q->tail = tail;
468                 }
469 
470                 q->tab_mask = mask;
471                 q->tab = ntab;
472         } else
473                 sch_tree_lock(sch);
474 
475         q->flags = ctl->flags;
476         q->limit = ctl->limit;
477 
478         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
479                       ctl->Plog, ctl->Scell_log,
480                       nla_data(tb[TCA_CHOKE_STAB]),
481                       max_P);
482         red_set_vars(&q->vars);
483 
484         if (q->head == q->tail)
485                 red_end_of_idle_period(&q->vars);
486 
487         sch_tree_unlock(sch);
488         choke_free(old);
489         return 0;
490 }
491 
492 static int choke_init(struct Qdisc *sch, struct nlattr *opt)
493 {
494         return choke_change(sch, opt);
495 }
496 
497 static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
498 {
499         struct choke_sched_data *q = qdisc_priv(sch);
500         struct nlattr *opts = NULL;
501         struct tc_red_qopt opt = {
502                 .limit          = q->limit,
503                 .flags          = q->flags,
504                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
505                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
506                 .Wlog           = q->parms.Wlog,
507                 .Plog           = q->parms.Plog,
508                 .Scell_log      = q->parms.Scell_log,
509         };
510 
511         opts = nla_nest_start(skb, TCA_OPTIONS);
512         if (opts == NULL)
513                 goto nla_put_failure;
514 
515         if (nla_put(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt) ||
516             nla_put_u32(skb, TCA_CHOKE_MAX_P, q->parms.max_P))
517                 goto nla_put_failure;
518         return nla_nest_end(skb, opts);
519 
520 nla_put_failure:
521         nla_nest_cancel(skb, opts);
522         return -EMSGSIZE;
523 }
524 
525 static int choke_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
526 {
527         struct choke_sched_data *q = qdisc_priv(sch);
528         struct tc_choke_xstats st = {
529                 .early  = q->stats.prob_drop + q->stats.forced_drop,
530                 .marked = q->stats.prob_mark + q->stats.forced_mark,
531                 .pdrop  = q->stats.pdrop,
532                 .other  = q->stats.other,
533                 .matched = q->stats.matched,
534         };
535 
536         return gnet_stats_copy_app(d, &st, sizeof(st));
537 }
538 
539 static void choke_destroy(struct Qdisc *sch)
540 {
541         struct choke_sched_data *q = qdisc_priv(sch);
542 
543         tcf_destroy_chain(&q->filter_list);
544         choke_free(q->tab);
545 }
546 
547 static struct Qdisc *choke_leaf(struct Qdisc *sch, unsigned long arg)
548 {
549         return NULL;
550 }
551 
552 static unsigned long choke_get(struct Qdisc *sch, u32 classid)
553 {
554         return 0;
555 }
556 
557 static void choke_put(struct Qdisc *q, unsigned long cl)
558 {
559 }
560 
561 static unsigned long choke_bind(struct Qdisc *sch, unsigned long parent,
562                                 u32 classid)
563 {
564         return 0;
565 }
566 
567 static struct tcf_proto **choke_find_tcf(struct Qdisc *sch, unsigned long cl)
568 {
569         struct choke_sched_data *q = qdisc_priv(sch);
570 
571         if (cl)
572                 return NULL;
573         return &q->filter_list;
574 }
575 
576 static int choke_dump_class(struct Qdisc *sch, unsigned long cl,
577                           struct sk_buff *skb, struct tcmsg *tcm)
578 {
579         tcm->tcm_handle |= TC_H_MIN(cl);
580         return 0;
581 }
582 
583 static void choke_walk(struct Qdisc *sch, struct qdisc_walker *arg)
584 {
585         if (!arg->stop) {
586                 if (arg->fn(sch, 1, arg) < 0) {
587                         arg->stop = 1;
588                         return;
589                 }
590                 arg->count++;
591         }
592 }
593 
594 static const struct Qdisc_class_ops choke_class_ops = {
595         .leaf           =       choke_leaf,
596         .get            =       choke_get,
597         .put            =       choke_put,
598         .tcf_chain      =       choke_find_tcf,
599         .bind_tcf       =       choke_bind,
600         .unbind_tcf     =       choke_put,
601         .dump           =       choke_dump_class,
602         .walk           =       choke_walk,
603 };
604 
605 static struct sk_buff *choke_peek_head(struct Qdisc *sch)
606 {
607         struct choke_sched_data *q = qdisc_priv(sch);
608 
609         return (q->head != q->tail) ? q->tab[q->head] : NULL;
610 }
611 
612 static struct Qdisc_ops choke_qdisc_ops __read_mostly = {
613         .id             =       "choke",
614         .priv_size      =       sizeof(struct choke_sched_data),
615 
616         .enqueue        =       choke_enqueue,
617         .dequeue        =       choke_dequeue,
618         .peek           =       choke_peek_head,
619         .drop           =       choke_drop,
620         .init           =       choke_init,
621         .destroy        =       choke_destroy,
622         .reset          =       choke_reset,
623         .change         =       choke_change,
624         .dump           =       choke_dump,
625         .dump_stats     =       choke_dump_stats,
626         .owner          =       THIS_MODULE,
627 };
628 
629 static int __init choke_module_init(void)
630 {
631         return register_qdisc(&choke_qdisc_ops);
632 }
633 
634 static void __exit choke_module_exit(void)
635 {
636         unregister_qdisc(&choke_qdisc_ops);
637 }
638 
639 module_init(choke_module_init)
640 module_exit(choke_module_exit)
641 
642 MODULE_LICENSE("GPL");
643 

This page was automatically generated by LXR 0.3.1 (source).  •  Linux is a registered trademark of Linus Torvalds  •  Contact us