Version:  2.6.32 2.6.33 2.6.34 2.6.35 2.6.36 2.6.37 2.6.38 2.6.39 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8 3.9

Architecture:  x86 arm avr32 blackfin m68k m68knommu microblaze mips powerpc sh

Linux/net/ipv6/ip6mr.c

  1 /*
  2  *      Linux IPv6 multicast routing support for BSD pim6sd
  3  *      Based on net/ipv4/ipmr.c.
  4  *
  5  *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
  6  *              LSIIT Laboratory, Strasbourg, France
  7  *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
  8  *              6WIND, Paris, France
  9  *      Copyright (C)2007,2008 USAGI/WIDE Project
 10  *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
 11  *
 12  *      This program is free software; you can redistribute it and/or
 13  *      modify it under the terms of the GNU General Public License
 14  *      as published by the Free Software Foundation; either version
 15  *      2 of the License, or (at your option) any later version.
 16  *
 17  */
 18 
 19 #include <asm/uaccess.h>
 20 #include <linux/types.h>
 21 #include <linux/sched.h>
 22 #include <linux/errno.h>
 23 #include <linux/timer.h>
 24 #include <linux/mm.h>
 25 #include <linux/kernel.h>
 26 #include <linux/fcntl.h>
 27 #include <linux/stat.h>
 28 #include <linux/socket.h>
 29 #include <linux/inet.h>
 30 #include <linux/netdevice.h>
 31 #include <linux/inetdevice.h>
 32 #include <linux/proc_fs.h>
 33 #include <linux/seq_file.h>
 34 #include <linux/init.h>
 35 #include <linux/slab.h>
 36 #include <linux/compat.h>
 37 #include <net/protocol.h>
 38 #include <linux/skbuff.h>
 39 #include <net/sock.h>
 40 #include <net/raw.h>
 41 #include <linux/notifier.h>
 42 #include <linux/if_arp.h>
 43 #include <net/checksum.h>
 44 #include <net/netlink.h>
 45 #include <net/fib_rules.h>
 46 
 47 #include <net/ipv6.h>
 48 #include <net/ip6_route.h>
 49 #include <linux/mroute6.h>
 50 #include <linux/pim.h>
 51 #include <net/addrconf.h>
 52 #include <linux/netfilter_ipv6.h>
 53 #include <linux/export.h>
 54 #include <net/ip6_checksum.h>
 55 #include <linux/netconf.h>
 56 
 57 struct mr6_table {
 58         struct list_head        list;
 59 #ifdef CONFIG_NET_NS
 60         struct net              *net;
 61 #endif
 62         u32                     id;
 63         struct sock             *mroute6_sk;
 64         struct timer_list       ipmr_expire_timer;
 65         struct list_head        mfc6_unres_queue;
 66         struct list_head        mfc6_cache_array[MFC6_LINES];
 67         struct mif_device       vif6_table[MAXMIFS];
 68         int                     maxvif;
 69         atomic_t                cache_resolve_queue_len;
 70         bool                    mroute_do_assert;
 71         bool                    mroute_do_pim;
 72 #ifdef CONFIG_IPV6_PIMSM_V2
 73         int                     mroute_reg_vif_num;
 74 #endif
 75 };
 76 
 77 struct ip6mr_rule {
 78         struct fib_rule         common;
 79 };
 80 
 81 struct ip6mr_result {
 82         struct mr6_table        *mrt;
 83 };
 84 
 85 /* Big lock, protecting vif table, mrt cache and mroute socket state.
 86    Note that the changes are semaphored via rtnl_lock.
 87  */
 88 
 89 static DEFINE_RWLOCK(mrt_lock);
 90 
 91 /*
 92  *      Multicast router control variables
 93  */
 94 
 95 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
 96 
 97 /* Special spinlock for queue of unresolved entries */
 98 static DEFINE_SPINLOCK(mfc_unres_lock);
 99 
100 /* We return to original Alan's scheme. Hash table of resolved
101    entries is changed only in process context and protected
102    with weak lock mrt_lock. Queue of unresolved entries is protected
103    with strong spinlock mfc_unres_lock.
104 
105    In this case data path is free of exclusive locks at all.
106  */
107 
108 static struct kmem_cache *mrt_cachep __read_mostly;
109 
110 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
111 static void ip6mr_free_table(struct mr6_table *mrt);
112 
113 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
114                           struct sk_buff *skb, struct mfc6_cache *cache);
115 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
116                               mifi_t mifi, int assert);
117 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
118                                struct mfc6_cache *c, struct rtmsg *rtm);
119 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
120                               int cmd);
121 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
122                                struct netlink_callback *cb);
123 static void mroute_clean_tables(struct mr6_table *mrt);
124 static void ipmr_expire_process(unsigned long arg);
125 
126 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
127 #define ip6mr_for_each_table(mrt, net) \
128         list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
129 
130 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
131 {
132         struct mr6_table *mrt;
133 
134         ip6mr_for_each_table(mrt, net) {
135                 if (mrt->id == id)
136                         return mrt;
137         }
138         return NULL;
139 }
140 
141 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
142                             struct mr6_table **mrt)
143 {
144         struct ip6mr_result res;
145         struct fib_lookup_arg arg = { .result = &res, };
146         int err;
147 
148         err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
149                                flowi6_to_flowi(flp6), 0, &arg);
150         if (err < 0)
151                 return err;
152         *mrt = res.mrt;
153         return 0;
154 }
155 
156 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
157                              int flags, struct fib_lookup_arg *arg)
158 {
159         struct ip6mr_result *res = arg->result;
160         struct mr6_table *mrt;
161 
162         switch (rule->action) {
163         case FR_ACT_TO_TBL:
164                 break;
165         case FR_ACT_UNREACHABLE:
166                 return -ENETUNREACH;
167         case FR_ACT_PROHIBIT:
168                 return -EACCES;
169         case FR_ACT_BLACKHOLE:
170         default:
171                 return -EINVAL;
172         }
173 
174         mrt = ip6mr_get_table(rule->fr_net, rule->table);
175         if (mrt == NULL)
176                 return -EAGAIN;
177         res->mrt = mrt;
178         return 0;
179 }
180 
181 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
182 {
183         return 1;
184 }
185 
186 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
187         FRA_GENERIC_POLICY,
188 };
189 
190 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
191                                 struct fib_rule_hdr *frh, struct nlattr **tb)
192 {
193         return 0;
194 }
195 
196 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
197                               struct nlattr **tb)
198 {
199         return 1;
200 }
201 
202 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
203                            struct fib_rule_hdr *frh)
204 {
205         frh->dst_len = 0;
206         frh->src_len = 0;
207         frh->tos     = 0;
208         return 0;
209 }
210 
211 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
212         .family         = RTNL_FAMILY_IP6MR,
213         .rule_size      = sizeof(struct ip6mr_rule),
214         .addr_size      = sizeof(struct in6_addr),
215         .action         = ip6mr_rule_action,
216         .match          = ip6mr_rule_match,
217         .configure      = ip6mr_rule_configure,
218         .compare        = ip6mr_rule_compare,
219         .default_pref   = fib_default_rule_pref,
220         .fill           = ip6mr_rule_fill,
221         .nlgroup        = RTNLGRP_IPV6_RULE,
222         .policy         = ip6mr_rule_policy,
223         .owner          = THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228         struct fib_rules_ops *ops;
229         struct mr6_table *mrt;
230         int err;
231 
232         ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233         if (IS_ERR(ops))
234                 return PTR_ERR(ops);
235 
236         INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238         mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239         if (mrt == NULL) {
240                 err = -ENOMEM;
241                 goto err1;
242         }
243 
244         err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245         if (err < 0)
246                 goto err2;
247 
248         net->ipv6.mr6_rules_ops = ops;
249         return 0;
250 
251 err2:
252         kfree(mrt);
253 err1:
254         fib_rules_unregister(ops);
255         return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260         struct mr6_table *mrt, *next;
261 
262         list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
263                 list_del(&mrt->list);
264                 ip6mr_free_table(mrt);
265         }
266         fib_rules_unregister(net->ipv6.mr6_rules_ops);
267 }
268 #else
269 #define ip6mr_for_each_table(mrt, net) \
270         for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
271 
272 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
273 {
274         return net->ipv6.mrt6;
275 }
276 
277 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
278                             struct mr6_table **mrt)
279 {
280         *mrt = net->ipv6.mrt6;
281         return 0;
282 }
283 
284 static int __net_init ip6mr_rules_init(struct net *net)
285 {
286         net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
287         return net->ipv6.mrt6 ? 0 : -ENOMEM;
288 }
289 
290 static void __net_exit ip6mr_rules_exit(struct net *net)
291 {
292         ip6mr_free_table(net->ipv6.mrt6);
293 }
294 #endif
295 
296 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
297 {
298         struct mr6_table *mrt;
299         unsigned int i;
300 
301         mrt = ip6mr_get_table(net, id);
302         if (mrt != NULL)
303                 return mrt;
304 
305         mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
306         if (mrt == NULL)
307                 return NULL;
308         mrt->id = id;
309         write_pnet(&mrt->net, net);
310 
311         /* Forwarding cache */
312         for (i = 0; i < MFC6_LINES; i++)
313                 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
314 
315         INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
316 
317         setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
318                     (unsigned long)mrt);
319 
320 #ifdef CONFIG_IPV6_PIMSM_V2
321         mrt->mroute_reg_vif_num = -1;
322 #endif
323 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
324         list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
325 #endif
326         return mrt;
327 }
328 
329 static void ip6mr_free_table(struct mr6_table *mrt)
330 {
331         del_timer(&mrt->ipmr_expire_timer);
332         mroute_clean_tables(mrt);
333         kfree(mrt);
334 }
335 
336 #ifdef CONFIG_PROC_FS
337 
338 struct ipmr_mfc_iter {
339         struct seq_net_private p;
340         struct mr6_table *mrt;
341         struct list_head *cache;
342         int ct;
343 };
344 
345 
346 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
347                                            struct ipmr_mfc_iter *it, loff_t pos)
348 {
349         struct mr6_table *mrt = it->mrt;
350         struct mfc6_cache *mfc;
351 
352         read_lock(&mrt_lock);
353         for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
354                 it->cache = &mrt->mfc6_cache_array[it->ct];
355                 list_for_each_entry(mfc, it->cache, list)
356                         if (pos-- == 0)
357                                 return mfc;
358         }
359         read_unlock(&mrt_lock);
360 
361         spin_lock_bh(&mfc_unres_lock);
362         it->cache = &mrt->mfc6_unres_queue;
363         list_for_each_entry(mfc, it->cache, list)
364                 if (pos-- == 0)
365                         return mfc;
366         spin_unlock_bh(&mfc_unres_lock);
367 
368         it->cache = NULL;
369         return NULL;
370 }
371 
372 /*
373  *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
374  */
375 
376 struct ipmr_vif_iter {
377         struct seq_net_private p;
378         struct mr6_table *mrt;
379         int ct;
380 };
381 
382 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
383                                             struct ipmr_vif_iter *iter,
384                                             loff_t pos)
385 {
386         struct mr6_table *mrt = iter->mrt;
387 
388         for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
389                 if (!MIF_EXISTS(mrt, iter->ct))
390                         continue;
391                 if (pos-- == 0)
392                         return &mrt->vif6_table[iter->ct];
393         }
394         return NULL;
395 }
396 
397 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
398         __acquires(mrt_lock)
399 {
400         struct ipmr_vif_iter *iter = seq->private;
401         struct net *net = seq_file_net(seq);
402         struct mr6_table *mrt;
403 
404         mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
405         if (mrt == NULL)
406                 return ERR_PTR(-ENOENT);
407 
408         iter->mrt = mrt;
409 
410         read_lock(&mrt_lock);
411         return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
412                 : SEQ_START_TOKEN;
413 }
414 
415 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
416 {
417         struct ipmr_vif_iter *iter = seq->private;
418         struct net *net = seq_file_net(seq);
419         struct mr6_table *mrt = iter->mrt;
420 
421         ++*pos;
422         if (v == SEQ_START_TOKEN)
423                 return ip6mr_vif_seq_idx(net, iter, 0);
424 
425         while (++iter->ct < mrt->maxvif) {
426                 if (!MIF_EXISTS(mrt, iter->ct))
427                         continue;
428                 return &mrt->vif6_table[iter->ct];
429         }
430         return NULL;
431 }
432 
433 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
434         __releases(mrt_lock)
435 {
436         read_unlock(&mrt_lock);
437 }
438 
439 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
440 {
441         struct ipmr_vif_iter *iter = seq->private;
442         struct mr6_table *mrt = iter->mrt;
443 
444         if (v == SEQ_START_TOKEN) {
445                 seq_puts(seq,
446                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
447         } else {
448                 const struct mif_device *vif = v;
449                 const char *name = vif->dev ? vif->dev->name : "none";
450 
451                 seq_printf(seq,
452                            "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
453                            vif - mrt->vif6_table,
454                            name, vif->bytes_in, vif->pkt_in,
455                            vif->bytes_out, vif->pkt_out,
456                            vif->flags);
457         }
458         return 0;
459 }
460 
461 static const struct seq_operations ip6mr_vif_seq_ops = {
462         .start = ip6mr_vif_seq_start,
463         .next  = ip6mr_vif_seq_next,
464         .stop  = ip6mr_vif_seq_stop,
465         .show  = ip6mr_vif_seq_show,
466 };
467 
468 static int ip6mr_vif_open(struct inode *inode, struct file *file)
469 {
470         return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
471                             sizeof(struct ipmr_vif_iter));
472 }
473 
474 static const struct file_operations ip6mr_vif_fops = {
475         .owner   = THIS_MODULE,
476         .open    = ip6mr_vif_open,
477         .read    = seq_read,
478         .llseek  = seq_lseek,
479         .release = seq_release_net,
480 };
481 
482 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
483 {
484         struct ipmr_mfc_iter *it = seq->private;
485         struct net *net = seq_file_net(seq);
486         struct mr6_table *mrt;
487 
488         mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
489         if (mrt == NULL)
490                 return ERR_PTR(-ENOENT);
491 
492         it->mrt = mrt;
493         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
494                 : SEQ_START_TOKEN;
495 }
496 
497 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
498 {
499         struct mfc6_cache *mfc = v;
500         struct ipmr_mfc_iter *it = seq->private;
501         struct net *net = seq_file_net(seq);
502         struct mr6_table *mrt = it->mrt;
503 
504         ++*pos;
505 
506         if (v == SEQ_START_TOKEN)
507                 return ipmr_mfc_seq_idx(net, seq->private, 0);
508 
509         if (mfc->list.next != it->cache)
510                 return list_entry(mfc->list.next, struct mfc6_cache, list);
511 
512         if (it->cache == &mrt->mfc6_unres_queue)
513                 goto end_of_list;
514 
515         BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
516 
517         while (++it->ct < MFC6_LINES) {
518                 it->cache = &mrt->mfc6_cache_array[it->ct];
519                 if (list_empty(it->cache))
520                         continue;
521                 return list_first_entry(it->cache, struct mfc6_cache, list);
522         }
523 
524         /* exhausted cache_array, show unresolved */
525         read_unlock(&mrt_lock);
526         it->cache = &mrt->mfc6_unres_queue;
527         it->ct = 0;
528 
529         spin_lock_bh(&mfc_unres_lock);
530         if (!list_empty(it->cache))
531                 return list_first_entry(it->cache, struct mfc6_cache, list);
532 
533  end_of_list:
534         spin_unlock_bh(&mfc_unres_lock);
535         it->cache = NULL;
536 
537         return NULL;
538 }
539 
540 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
541 {
542         struct ipmr_mfc_iter *it = seq->private;
543         struct mr6_table *mrt = it->mrt;
544 
545         if (it->cache == &mrt->mfc6_unres_queue)
546                 spin_unlock_bh(&mfc_unres_lock);
547         else if (it->cache == mrt->mfc6_cache_array)
548                 read_unlock(&mrt_lock);
549 }
550 
551 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
552 {
553         int n;
554 
555         if (v == SEQ_START_TOKEN) {
556                 seq_puts(seq,
557                          "Group                            "
558                          "Origin                           "
559                          "Iif      Pkts  Bytes     Wrong  Oifs\n");
560         } else {
561                 const struct mfc6_cache *mfc = v;
562                 const struct ipmr_mfc_iter *it = seq->private;
563                 struct mr6_table *mrt = it->mrt;
564 
565                 seq_printf(seq, "%pI6 %pI6 %-3hd",
566                            &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
567                            mfc->mf6c_parent);
568 
569                 if (it->cache != &mrt->mfc6_unres_queue) {
570                         seq_printf(seq, " %8lu %8lu %8lu",
571                                    mfc->mfc_un.res.pkt,
572                                    mfc->mfc_un.res.bytes,
573                                    mfc->mfc_un.res.wrong_if);
574                         for (n = mfc->mfc_un.res.minvif;
575                              n < mfc->mfc_un.res.maxvif; n++) {
576                                 if (MIF_EXISTS(mrt, n) &&
577                                     mfc->mfc_un.res.ttls[n] < 255)
578                                         seq_printf(seq,
579                                                    " %2d:%-3d",
580                                                    n, mfc->mfc_un.res.ttls[n]);
581                         }
582                 } else {
583                         /* unresolved mfc_caches don't contain
584                          * pkt, bytes and wrong_if values
585                          */
586                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
587                 }
588                 seq_putc(seq, '\n');
589         }
590         return 0;
591 }
592 
593 static const struct seq_operations ipmr_mfc_seq_ops = {
594         .start = ipmr_mfc_seq_start,
595         .next  = ipmr_mfc_seq_next,
596         .stop  = ipmr_mfc_seq_stop,
597         .show  = ipmr_mfc_seq_show,
598 };
599 
600 static int ipmr_mfc_open(struct inode *inode, struct file *file)
601 {
602         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
603                             sizeof(struct ipmr_mfc_iter));
604 }
605 
606 static const struct file_operations ip6mr_mfc_fops = {
607         .owner   = THIS_MODULE,
608         .open    = ipmr_mfc_open,
609         .read    = seq_read,
610         .llseek  = seq_lseek,
611         .release = seq_release_net,
612 };
613 #endif
614 
615 #ifdef CONFIG_IPV6_PIMSM_V2
616 
617 static int pim6_rcv(struct sk_buff *skb)
618 {
619         struct pimreghdr *pim;
620         struct ipv6hdr   *encap;
621         struct net_device  *reg_dev = NULL;
622         struct net *net = dev_net(skb->dev);
623         struct mr6_table *mrt;
624         struct flowi6 fl6 = {
625                 .flowi6_iif     = skb->dev->ifindex,
626                 .flowi6_mark    = skb->mark,
627         };
628         int reg_vif_num;
629 
630         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
631                 goto drop;
632 
633         pim = (struct pimreghdr *)skb_transport_header(skb);
634         if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
635             (pim->flags & PIM_NULL_REGISTER) ||
636             (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
637                              sizeof(*pim), IPPROTO_PIM,
638                              csum_partial((void *)pim, sizeof(*pim), 0)) &&
639              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
640                 goto drop;
641 
642         /* check if the inner packet is destined to mcast group */
643         encap = (struct ipv6hdr *)(skb_transport_header(skb) +
644                                    sizeof(*pim));
645 
646         if (!ipv6_addr_is_multicast(&encap->daddr) ||
647             encap->payload_len == 0 ||
648             ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
649                 goto drop;
650 
651         if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
652                 goto drop;
653         reg_vif_num = mrt->mroute_reg_vif_num;
654 
655         read_lock(&mrt_lock);
656         if (reg_vif_num >= 0)
657                 reg_dev = mrt->vif6_table[reg_vif_num].dev;
658         if (reg_dev)
659                 dev_hold(reg_dev);
660         read_unlock(&mrt_lock);
661 
662         if (reg_dev == NULL)
663                 goto drop;
664 
665         skb->mac_header = skb->network_header;
666         skb_pull(skb, (u8 *)encap - skb->data);
667         skb_reset_network_header(skb);
668         skb->protocol = htons(ETH_P_IPV6);
669         skb->ip_summed = CHECKSUM_NONE;
670         skb->pkt_type = PACKET_HOST;
671 
672         skb_tunnel_rx(skb, reg_dev);
673 
674         netif_rx(skb);
675 
676         dev_put(reg_dev);
677         return 0;
678  drop:
679         kfree_skb(skb);
680         return 0;
681 }
682 
683 static const struct inet6_protocol pim6_protocol = {
684         .handler        =       pim6_rcv,
685 };
686 
687 /* Service routines creating virtual interfaces: PIMREG */
688 
689 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
690                                       struct net_device *dev)
691 {
692         struct net *net = dev_net(dev);
693         struct mr6_table *mrt;
694         struct flowi6 fl6 = {
695                 .flowi6_oif     = dev->ifindex,
696                 .flowi6_iif     = skb->skb_iif,
697                 .flowi6_mark    = skb->mark,
698         };
699         int err;
700 
701         err = ip6mr_fib_lookup(net, &fl6, &mrt);
702         if (err < 0) {
703                 kfree_skb(skb);
704                 return err;
705         }
706 
707         read_lock(&mrt_lock);
708         dev->stats.tx_bytes += skb->len;
709         dev->stats.tx_packets++;
710         ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
711         read_unlock(&mrt_lock);
712         kfree_skb(skb);
713         return NETDEV_TX_OK;
714 }
715 
716 static const struct net_device_ops reg_vif_netdev_ops = {
717         .ndo_start_xmit = reg_vif_xmit,
718 };
719 
720 static void reg_vif_setup(struct net_device *dev)
721 {
722         dev->type               = ARPHRD_PIMREG;
723         dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
724         dev->flags              = IFF_NOARP;
725         dev->netdev_ops         = &reg_vif_netdev_ops;
726         dev->destructor         = free_netdev;
727         dev->features           |= NETIF_F_NETNS_LOCAL;
728 }
729 
730 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
731 {
732         struct net_device *dev;
733         char name[IFNAMSIZ];
734 
735         if (mrt->id == RT6_TABLE_DFLT)
736                 sprintf(name, "pim6reg");
737         else
738                 sprintf(name, "pim6reg%u", mrt->id);
739 
740         dev = alloc_netdev(0, name, reg_vif_setup);
741         if (dev == NULL)
742                 return NULL;
743 
744         dev_net_set(dev, net);
745 
746         if (register_netdevice(dev)) {
747                 free_netdev(dev);
748                 return NULL;
749         }
750         dev->iflink = 0;
751 
752         if (dev_open(dev))
753                 goto failure;
754 
755         dev_hold(dev);
756         return dev;
757 
758 failure:
759         /* allow the register to be completed before unregistering. */
760         rtnl_unlock();
761         rtnl_lock();
762 
763         unregister_netdevice(dev);
764         return NULL;
765 }
766 #endif
767 
768 /*
769  *      Delete a VIF entry
770  */
771 
772 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
773 {
774         struct mif_device *v;
775         struct net_device *dev;
776         struct inet6_dev *in6_dev;
777 
778         if (vifi < 0 || vifi >= mrt->maxvif)
779                 return -EADDRNOTAVAIL;
780 
781         v = &mrt->vif6_table[vifi];
782 
783         write_lock_bh(&mrt_lock);
784         dev = v->dev;
785         v->dev = NULL;
786 
787         if (!dev) {
788                 write_unlock_bh(&mrt_lock);
789                 return -EADDRNOTAVAIL;
790         }
791 
792 #ifdef CONFIG_IPV6_PIMSM_V2
793         if (vifi == mrt->mroute_reg_vif_num)
794                 mrt->mroute_reg_vif_num = -1;
795 #endif
796 
797         if (vifi + 1 == mrt->maxvif) {
798                 int tmp;
799                 for (tmp = vifi - 1; tmp >= 0; tmp--) {
800                         if (MIF_EXISTS(mrt, tmp))
801                                 break;
802                 }
803                 mrt->maxvif = tmp + 1;
804         }
805 
806         write_unlock_bh(&mrt_lock);
807 
808         dev_set_allmulti(dev, -1);
809 
810         in6_dev = __in6_dev_get(dev);
811         if (in6_dev) {
812                 in6_dev->cnf.mc_forwarding--;
813                 inet6_netconf_notify_devconf(dev_net(dev),
814                                              NETCONFA_MC_FORWARDING,
815                                              dev->ifindex, &in6_dev->cnf);
816         }
817 
818         if (v->flags & MIFF_REGISTER)
819                 unregister_netdevice_queue(dev, head);
820 
821         dev_put(dev);
822         return 0;
823 }
824 
825 static inline void ip6mr_cache_free(struct mfc6_cache *c)
826 {
827         kmem_cache_free(mrt_cachep, c);
828 }
829 
830 /* Destroy an unresolved cache entry, killing queued skbs
831    and reporting error to netlink readers.
832  */
833 
834 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
835 {
836         struct net *net = read_pnet(&mrt->net);
837         struct sk_buff *skb;
838 
839         atomic_dec(&mrt->cache_resolve_queue_len);
840 
841         while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
842                 if (ipv6_hdr(skb)->version == 0) {
843                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
844                         nlh->nlmsg_type = NLMSG_ERROR;
845                         nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
846                         skb_trim(skb, nlh->nlmsg_len);
847                         ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT;
848                         rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
849                 } else
850                         kfree_skb(skb);
851         }
852 
853         ip6mr_cache_free(c);
854 }
855 
856 
857 /* Timer process for all the unresolved queue. */
858 
859 static void ipmr_do_expire_process(struct mr6_table *mrt)
860 {
861         unsigned long now = jiffies;
862         unsigned long expires = 10 * HZ;
863         struct mfc6_cache *c, *next;
864 
865         list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
866                 if (time_after(c->mfc_un.unres.expires, now)) {
867                         /* not yet... */
868                         unsigned long interval = c->mfc_un.unres.expires - now;
869                         if (interval < expires)
870                                 expires = interval;
871                         continue;
872                 }
873 
874                 list_del(&c->list);
875                 mr6_netlink_event(mrt, c, RTM_DELROUTE);
876                 ip6mr_destroy_unres(mrt, c);
877         }
878 
879         if (!list_empty(&mrt->mfc6_unres_queue))
880                 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
881 }
882 
883 static void ipmr_expire_process(unsigned long arg)
884 {
885         struct mr6_table *mrt = (struct mr6_table *)arg;
886 
887         if (!spin_trylock(&mfc_unres_lock)) {
888                 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
889                 return;
890         }
891 
892         if (!list_empty(&mrt->mfc6_unres_queue))
893                 ipmr_do_expire_process(mrt);
894 
895         spin_unlock(&mfc_unres_lock);
896 }
897 
898 /* Fill oifs list. It is called under write locked mrt_lock. */
899 
900 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
901                                     unsigned char *ttls)
902 {
903         int vifi;
904 
905         cache->mfc_un.res.minvif = MAXMIFS;
906         cache->mfc_un.res.maxvif = 0;
907         memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
908 
909         for (vifi = 0; vifi < mrt->maxvif; vifi++) {
910                 if (MIF_EXISTS(mrt, vifi) &&
911                     ttls[vifi] && ttls[vifi] < 255) {
912                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
913                         if (cache->mfc_un.res.minvif > vifi)
914                                 cache->mfc_un.res.minvif = vifi;
915                         if (cache->mfc_un.res.maxvif <= vifi)
916                                 cache->mfc_un.res.maxvif = vifi + 1;
917                 }
918         }
919 }
920 
921 static int mif6_add(struct net *net, struct mr6_table *mrt,
922                     struct mif6ctl *vifc, int mrtsock)
923 {
924         int vifi = vifc->mif6c_mifi;
925         struct mif_device *v = &mrt->vif6_table[vifi];
926         struct net_device *dev;
927         struct inet6_dev *in6_dev;
928         int err;
929 
930         /* Is vif busy ? */
931         if (MIF_EXISTS(mrt, vifi))
932                 return -EADDRINUSE;
933 
934         switch (vifc->mif6c_flags) {
935 #ifdef CONFIG_IPV6_PIMSM_V2
936         case MIFF_REGISTER:
937                 /*
938                  * Special Purpose VIF in PIM
939                  * All the packets will be sent to the daemon
940                  */
941                 if (mrt->mroute_reg_vif_num >= 0)
942                         return -EADDRINUSE;
943                 dev = ip6mr_reg_vif(net, mrt);
944                 if (!dev)
945                         return -ENOBUFS;
946                 err = dev_set_allmulti(dev, 1);
947                 if (err) {
948                         unregister_netdevice(dev);
949                         dev_put(dev);
950                         return err;
951                 }
952                 break;
953 #endif
954         case 0:
955                 dev = dev_get_by_index(net, vifc->mif6c_pifi);
956                 if (!dev)
957                         return -EADDRNOTAVAIL;
958                 err = dev_set_allmulti(dev, 1);
959                 if (err) {
960                         dev_put(dev);
961                         return err;
962                 }
963                 break;
964         default:
965                 return -EINVAL;
966         }
967 
968         in6_dev = __in6_dev_get(dev);
969         if (in6_dev) {
970                 in6_dev->cnf.mc_forwarding++;
971                 inet6_netconf_notify_devconf(dev_net(dev),
972                                              NETCONFA_MC_FORWARDING,
973                                              dev->ifindex, &in6_dev->cnf);
974         }
975 
976         /*
977          *      Fill in the VIF structures
978          */
979         v->rate_limit = vifc->vifc_rate_limit;
980         v->flags = vifc->mif6c_flags;
981         if (!mrtsock)
982                 v->flags |= VIFF_STATIC;
983         v->threshold = vifc->vifc_threshold;
984         v->bytes_in = 0;
985         v->bytes_out = 0;
986         v->pkt_in = 0;
987         v->pkt_out = 0;
988         v->link = dev->ifindex;
989         if (v->flags & MIFF_REGISTER)
990                 v->link = dev->iflink;
991 
992         /* And finish update writing critical data */
993         write_lock_bh(&mrt_lock);
994         v->dev = dev;
995 #ifdef CONFIG_IPV6_PIMSM_V2
996         if (v->flags & MIFF_REGISTER)
997                 mrt->mroute_reg_vif_num = vifi;
998 #endif
999         if (vifi + 1 > mrt->maxvif)
1000                 mrt->maxvif = vifi + 1;
1001         write_unlock_bh(&mrt_lock);
1002         return 0;
1003 }
1004 
1005 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1006                                            const struct in6_addr *origin,
1007                                            const struct in6_addr *mcastgrp)
1008 {
1009         int line = MFC6_HASH(mcastgrp, origin);
1010         struct mfc6_cache *c;
1011 
1012         list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1013                 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1014                     ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1015                         return c;
1016         }
1017         return NULL;
1018 }
1019 
1020 /* Look for a (*,*,oif) entry */
1021 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1022                                                       mifi_t mifi)
1023 {
1024         int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1025         struct mfc6_cache *c;
1026 
1027         list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1028                 if (ipv6_addr_any(&c->mf6c_origin) &&
1029                     ipv6_addr_any(&c->mf6c_mcastgrp) &&
1030                     (c->mfc_un.res.ttls[mifi] < 255))
1031                         return c;
1032 
1033         return NULL;
1034 }
1035 
1036 /* Look for a (*,G) entry */
1037 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1038                                                struct in6_addr *mcastgrp,
1039                                                mifi_t mifi)
1040 {
1041         int line = MFC6_HASH(mcastgrp, &in6addr_any);
1042         struct mfc6_cache *c, *proxy;
1043 
1044         if (ipv6_addr_any(mcastgrp))
1045                 goto skip;
1046 
1047         list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1048                 if (ipv6_addr_any(&c->mf6c_origin) &&
1049                     ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1050                         if (c->mfc_un.res.ttls[mifi] < 255)
1051                                 return c;
1052 
1053                         /* It's ok if the mifi is part of the static tree */
1054                         proxy = ip6mr_cache_find_any_parent(mrt,
1055                                                             c->mf6c_parent);
1056                         if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1057                                 return c;
1058                 }
1059 
1060 skip:
1061         return ip6mr_cache_find_any_parent(mrt, mifi);
1062 }
1063 
1064 /*
1065  *      Allocate a multicast cache entry
1066  */
1067 static struct mfc6_cache *ip6mr_cache_alloc(void)
1068 {
1069         struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1070         if (c == NULL)
1071                 return NULL;
1072         c->mfc_un.res.minvif = MAXMIFS;
1073         return c;
1074 }
1075 
1076 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1077 {
1078         struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1079         if (c == NULL)
1080                 return NULL;
1081         skb_queue_head_init(&c->mfc_un.unres.unresolved);
1082         c->mfc_un.unres.expires = jiffies + 10 * HZ;
1083         return c;
1084 }
1085 
1086 /*
1087  *      A cache entry has gone into a resolved state from queued
1088  */
1089 
1090 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1091                                 struct mfc6_cache *uc, struct mfc6_cache *c)
1092 {
1093         struct sk_buff *skb;
1094 
1095         /*
1096          *      Play the pending entries through our router
1097          */
1098 
1099         while((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1100                 if (ipv6_hdr(skb)->version == 0) {
1101                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1102 
1103                         if (__ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
1104                                 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1105                         } else {
1106                                 nlh->nlmsg_type = NLMSG_ERROR;
1107                                 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
1108                                 skb_trim(skb, nlh->nlmsg_len);
1109                                 ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
1110                         }
1111                         rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1112                 } else
1113                         ip6_mr_forward(net, mrt, skb, c);
1114         }
1115 }
1116 
1117 /*
1118  *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1119  *      expects the following bizarre scheme.
1120  *
1121  *      Called under mrt_lock.
1122  */
1123 
1124 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1125                               mifi_t mifi, int assert)
1126 {
1127         struct sk_buff *skb;
1128         struct mrt6msg *msg;
1129         int ret;
1130 
1131 #ifdef CONFIG_IPV6_PIMSM_V2
1132         if (assert == MRT6MSG_WHOLEPKT)
1133                 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1134                                                 +sizeof(*msg));
1135         else
1136 #endif
1137                 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1138 
1139         if (!skb)
1140                 return -ENOBUFS;
1141 
1142         /* I suppose that internal messages
1143          * do not require checksums */
1144 
1145         skb->ip_summed = CHECKSUM_UNNECESSARY;
1146 
1147 #ifdef CONFIG_IPV6_PIMSM_V2
1148         if (assert == MRT6MSG_WHOLEPKT) {
1149                 /* Ugly, but we have no choice with this interface.
1150                    Duplicate old header, fix length etc.
1151                    And all this only to mangle msg->im6_msgtype and
1152                    to set msg->im6_mbz to "mbz" :-)
1153                  */
1154                 skb_push(skb, -skb_network_offset(pkt));
1155 
1156                 skb_push(skb, sizeof(*msg));
1157                 skb_reset_transport_header(skb);
1158                 msg = (struct mrt6msg *)skb_transport_header(skb);
1159                 msg->im6_mbz = 0;
1160                 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1161                 msg->im6_mif = mrt->mroute_reg_vif_num;
1162                 msg->im6_pad = 0;
1163                 msg->im6_src = ipv6_hdr(pkt)->saddr;
1164                 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1165 
1166                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1167         } else
1168 #endif
1169         {
1170         /*
1171          *      Copy the IP header
1172          */
1173 
1174         skb_put(skb, sizeof(struct ipv6hdr));
1175         skb_reset_network_header(skb);
1176         skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1177 
1178         /*
1179          *      Add our header
1180          */
1181         skb_put(skb, sizeof(*msg));
1182         skb_reset_transport_header(skb);
1183         msg = (struct mrt6msg *)skb_transport_header(skb);
1184 
1185         msg->im6_mbz = 0;
1186         msg->im6_msgtype = assert;
1187         msg->im6_mif = mifi;
1188         msg->im6_pad = 0;
1189         msg->im6_src = ipv6_hdr(pkt)->saddr;
1190         msg->im6_dst = ipv6_hdr(pkt)->daddr;
1191 
1192         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1193         skb->ip_summed = CHECKSUM_UNNECESSARY;
1194         }
1195 
1196         if (mrt->mroute6_sk == NULL) {
1197                 kfree_skb(skb);
1198                 return -EINVAL;
1199         }
1200 
1201         /*
1202          *      Deliver to user space multicast routing algorithms
1203          */
1204         ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1205         if (ret < 0) {
1206                 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1207                 kfree_skb(skb);
1208         }
1209 
1210         return ret;
1211 }
1212 
1213 /*
1214  *      Queue a packet for resolution. It gets locked cache entry!
1215  */
1216 
1217 static int
1218 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1219 {
1220         bool found = false;
1221         int err;
1222         struct mfc6_cache *c;
1223 
1224         spin_lock_bh(&mfc_unres_lock);
1225         list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1226                 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1227                     ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1228                         found = true;
1229                         break;
1230                 }
1231         }
1232 
1233         if (!found) {
1234                 /*
1235                  *      Create a new entry if allowable
1236                  */
1237 
1238                 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1239                     (c = ip6mr_cache_alloc_unres()) == NULL) {
1240                         spin_unlock_bh(&mfc_unres_lock);
1241 
1242                         kfree_skb(skb);
1243                         return -ENOBUFS;
1244                 }
1245 
1246                 /*
1247                  *      Fill in the new cache entry
1248                  */
1249                 c->mf6c_parent = -1;
1250                 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1251                 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1252 
1253                 /*
1254                  *      Reflect first query at pim6sd
1255                  */
1256                 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1257                 if (err < 0) {
1258                         /* If the report failed throw the cache entry
1259                            out - Brad Parker
1260                          */
1261                         spin_unlock_bh(&mfc_unres_lock);
1262 
1263                         ip6mr_cache_free(c);
1264                         kfree_skb(skb);
1265                         return err;
1266                 }
1267 
1268                 atomic_inc(&mrt->cache_resolve_queue_len);
1269                 list_add(&c->list, &mrt->mfc6_unres_queue);
1270                 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1271 
1272                 ipmr_do_expire_process(mrt);
1273         }
1274 
1275         /*
1276          *      See if we can append the packet
1277          */
1278         if (c->mfc_un.unres.unresolved.qlen > 3) {
1279                 kfree_skb(skb);
1280                 err = -ENOBUFS;
1281         } else {
1282                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1283                 err = 0;
1284         }
1285 
1286         spin_unlock_bh(&mfc_unres_lock);
1287         return err;
1288 }
1289 
1290 /*
1291  *      MFC6 cache manipulation by user space
1292  */
1293 
1294 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1295                             int parent)
1296 {
1297         int line;
1298         struct mfc6_cache *c, *next;
1299 
1300         line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1301 
1302         list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1303                 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1304                     ipv6_addr_equal(&c->mf6c_mcastgrp,
1305                                     &mfc->mf6cc_mcastgrp.sin6_addr) &&
1306                     (parent == -1 || parent == c->mf6c_parent)) {
1307                         write_lock_bh(&mrt_lock);
1308                         list_del(&c->list);
1309                         write_unlock_bh(&mrt_lock);
1310 
1311                         mr6_netlink_event(mrt, c, RTM_DELROUTE);
1312                         ip6mr_cache_free(c);
1313                         return 0;
1314                 }
1315         }
1316         return -ENOENT;
1317 }
1318 
1319 static int ip6mr_device_event(struct notifier_block *this,
1320                               unsigned long event, void *ptr)
1321 {
1322         struct net_device *dev = ptr;
1323         struct net *net = dev_net(dev);
1324         struct mr6_table *mrt;
1325         struct mif_device *v;
1326         int ct;
1327         LIST_HEAD(list);
1328 
1329         if (event != NETDEV_UNREGISTER)
1330                 return NOTIFY_DONE;
1331 
1332         ip6mr_for_each_table(mrt, net) {
1333                 v = &mrt->vif6_table[0];
1334                 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1335                         if (v->dev == dev)
1336                                 mif6_delete(mrt, ct, &list);
1337                 }
1338         }
1339         unregister_netdevice_many(&list);
1340 
1341         return NOTIFY_DONE;
1342 }
1343 
1344 static struct notifier_block ip6_mr_notifier = {
1345         .notifier_call = ip6mr_device_event
1346 };
1347 
1348 /*
1349  *      Setup for IP multicast routing
1350  */
1351 
1352 static int __net_init ip6mr_net_init(struct net *net)
1353 {
1354         int err;
1355 
1356         err = ip6mr_rules_init(net);
1357         if (err < 0)
1358                 goto fail;
1359 
1360 #ifdef CONFIG_PROC_FS
1361         err = -ENOMEM;
1362         if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1363                 goto proc_vif_fail;
1364         if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1365                 goto proc_cache_fail;
1366 #endif
1367 
1368         return 0;
1369 
1370 #ifdef CONFIG_PROC_FS
1371 proc_cache_fail:
1372         remove_proc_entry("ip6_mr_vif", net->proc_net);
1373 proc_vif_fail:
1374         ip6mr_rules_exit(net);
1375 #endif
1376 fail:
1377         return err;
1378 }
1379 
1380 static void __net_exit ip6mr_net_exit(struct net *net)
1381 {
1382 #ifdef CONFIG_PROC_FS
1383         remove_proc_entry("ip6_mr_cache", net->proc_net);
1384         remove_proc_entry("ip6_mr_vif", net->proc_net);
1385 #endif
1386         ip6mr_rules_exit(net);
1387 }
1388 
1389 static struct pernet_operations ip6mr_net_ops = {
1390         .init = ip6mr_net_init,
1391         .exit = ip6mr_net_exit,
1392 };
1393 
1394 int __init ip6_mr_init(void)
1395 {
1396         int err;
1397 
1398         mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1399                                        sizeof(struct mfc6_cache),
1400                                        0, SLAB_HWCACHE_ALIGN,
1401                                        NULL);
1402         if (!mrt_cachep)
1403                 return -ENOMEM;
1404 
1405         err = register_pernet_subsys(&ip6mr_net_ops);
1406         if (err)
1407                 goto reg_pernet_fail;
1408 
1409         err = register_netdevice_notifier(&ip6_mr_notifier);
1410         if (err)
1411                 goto reg_notif_fail;
1412 #ifdef CONFIG_IPV6_PIMSM_V2
1413         if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1414                 pr_err("%s: can't add PIM protocol\n", __func__);
1415                 err = -EAGAIN;
1416                 goto add_proto_fail;
1417         }
1418 #endif
1419         rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1420                       ip6mr_rtm_dumproute, NULL);
1421         return 0;
1422 #ifdef CONFIG_IPV6_PIMSM_V2
1423 add_proto_fail:
1424         unregister_netdevice_notifier(&ip6_mr_notifier);
1425 #endif
1426 reg_notif_fail:
1427         unregister_pernet_subsys(&ip6mr_net_ops);
1428 reg_pernet_fail:
1429         kmem_cache_destroy(mrt_cachep);
1430         return err;
1431 }
1432 
1433 void ip6_mr_cleanup(void)
1434 {
1435         unregister_netdevice_notifier(&ip6_mr_notifier);
1436         unregister_pernet_subsys(&ip6mr_net_ops);
1437         kmem_cache_destroy(mrt_cachep);
1438 }
1439 
1440 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1441                          struct mf6cctl *mfc, int mrtsock, int parent)
1442 {
1443         bool found = false;
1444         int line;
1445         struct mfc6_cache *uc, *c;
1446         unsigned char ttls[MAXMIFS];
1447         int i;
1448 
1449         if (mfc->mf6cc_parent >= MAXMIFS)
1450                 return -ENFILE;
1451 
1452         memset(ttls, 255, MAXMIFS);
1453         for (i = 0; i < MAXMIFS; i++) {
1454                 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1455                         ttls[i] = 1;
1456 
1457         }
1458 
1459         line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1460 
1461         list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1462                 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1463                     ipv6_addr_equal(&c->mf6c_mcastgrp,
1464                                     &mfc->mf6cc_mcastgrp.sin6_addr) &&
1465                     (parent == -1 || parent == mfc->mf6cc_parent)) {
1466                         found = true;
1467                         break;
1468                 }
1469         }
1470 
1471         if (found) {
1472                 write_lock_bh(&mrt_lock);
1473                 c->mf6c_parent = mfc->mf6cc_parent;
1474                 ip6mr_update_thresholds(mrt, c, ttls);
1475                 if (!mrtsock)
1476                         c->mfc_flags |= MFC_STATIC;
1477                 write_unlock_bh(&mrt_lock);
1478                 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1479                 return 0;
1480         }
1481 
1482         if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1483             !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1484                 return -EINVAL;
1485 
1486         c = ip6mr_cache_alloc();
1487         if (c == NULL)
1488                 return -ENOMEM;
1489 
1490         c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1491         c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1492         c->mf6c_parent = mfc->mf6cc_parent;
1493         ip6mr_update_thresholds(mrt, c, ttls);
1494         if (!mrtsock)
1495                 c->mfc_flags |= MFC_STATIC;
1496 
1497         write_lock_bh(&mrt_lock);
1498         list_add(&c->list, &mrt->mfc6_cache_array[line]);
1499         write_unlock_bh(&mrt_lock);
1500 
1501         /*
1502          *      Check to see if we resolved a queued list. If so we
1503          *      need to send on the frames and tidy up.
1504          */
1505         found = false;
1506         spin_lock_bh(&mfc_unres_lock);
1507         list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1508                 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1509                     ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1510                         list_del(&uc->list);
1511                         atomic_dec(&mrt->cache_resolve_queue_len);
1512                         found = true;
1513                         break;
1514                 }
1515         }
1516         if (list_empty(&mrt->mfc6_unres_queue))
1517                 del_timer(&mrt->ipmr_expire_timer);
1518         spin_unlock_bh(&mfc_unres_lock);
1519 
1520         if (found) {
1521                 ip6mr_cache_resolve(net, mrt, uc, c);
1522                 ip6mr_cache_free(uc);
1523         }
1524         mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1525         return 0;
1526 }
1527 
1528 /*
1529  *      Close the multicast socket, and clear the vif tables etc
1530  */
1531 
1532 static void mroute_clean_tables(struct mr6_table *mrt)
1533 {
1534         int i;
1535         LIST_HEAD(list);
1536         struct mfc6_cache *c, *next;
1537 
1538         /*
1539          *      Shut down all active vif entries
1540          */
1541         for (i = 0; i < mrt->maxvif; i++) {
1542                 if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
1543                         mif6_delete(mrt, i, &list);
1544         }
1545         unregister_netdevice_many(&list);
1546 
1547         /*
1548          *      Wipe the cache
1549          */
1550         for (i = 0; i < MFC6_LINES; i++) {
1551                 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1552                         if (c->mfc_flags & MFC_STATIC)
1553                                 continue;
1554                         write_lock_bh(&mrt_lock);
1555                         list_del(&c->list);
1556                         write_unlock_bh(&mrt_lock);
1557 
1558                         mr6_netlink_event(mrt, c, RTM_DELROUTE);
1559                         ip6mr_cache_free(c);
1560                 }
1561         }
1562 
1563         if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1564                 spin_lock_bh(&mfc_unres_lock);
1565                 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1566                         list_del(&c->list);
1567                         mr6_netlink_event(mrt, c, RTM_DELROUTE);
1568                         ip6mr_destroy_unres(mrt, c);
1569                 }
1570                 spin_unlock_bh(&mfc_unres_lock);
1571         }
1572 }
1573 
1574 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1575 {
1576         int err = 0;
1577         struct net *net = sock_net(sk);
1578 
1579         rtnl_lock();
1580         write_lock_bh(&mrt_lock);
1581         if (likely(mrt->mroute6_sk == NULL)) {
1582                 mrt->mroute6_sk = sk;
1583                 net->ipv6.devconf_all->mc_forwarding++;
1584                 inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1585                                              NETCONFA_IFINDEX_ALL,
1586                                              net->ipv6.devconf_all);
1587         }
1588         else
1589                 err = -EADDRINUSE;
1590         write_unlock_bh(&mrt_lock);
1591 
1592         rtnl_unlock();
1593 
1594         return err;
1595 }
1596 
1597 int ip6mr_sk_done(struct sock *sk)
1598 {
1599         int err = -EACCES;
1600         struct net *net = sock_net(sk);
1601         struct mr6_table *mrt;
1602 
1603         rtnl_lock();
1604         ip6mr_for_each_table(mrt, net) {
1605                 if (sk == mrt->mroute6_sk) {
1606                         write_lock_bh(&mrt_lock);
1607                         mrt->mroute6_sk = NULL;
1608                         net->ipv6.devconf_all->mc_forwarding--;
1609                         inet6_netconf_notify_devconf(net,
1610                                                      NETCONFA_MC_FORWARDING,
1611                                                      NETCONFA_IFINDEX_ALL,
1612                                                      net->ipv6.devconf_all);
1613                         write_unlock_bh(&mrt_lock);
1614 
1615                         mroute_clean_tables(mrt);
1616                         err = 0;
1617                         break;
1618                 }
1619         }
1620         rtnl_unlock();
1621 
1622         return err;
1623 }
1624 
1625 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1626 {
1627         struct mr6_table *mrt;
1628         struct flowi6 fl6 = {
1629                 .flowi6_iif     = skb->skb_iif,
1630                 .flowi6_oif     = skb->dev->ifindex,
1631                 .flowi6_mark    = skb->mark,
1632         };
1633 
1634         if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1635                 return NULL;
1636 
1637         return mrt->mroute6_sk;
1638 }
1639 
1640 /*
1641  *      Socket options and virtual interface manipulation. The whole
1642  *      virtual interface system is a complete heap, but unfortunately
1643  *      that's how BSD mrouted happens to think. Maybe one day with a proper
1644  *      MOSPF/PIM router set up we can clean this up.
1645  */
1646 
1647 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1648 {
1649         int ret, parent = 0;
1650         struct mif6ctl vif;
1651         struct mf6cctl mfc;
1652         mifi_t mifi;
1653         struct net *net = sock_net(sk);
1654         struct mr6_table *mrt;
1655 
1656         mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1657         if (mrt == NULL)
1658                 return -ENOENT;
1659 
1660         if (optname != MRT6_INIT) {
1661                 if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1662                         return -EACCES;
1663         }
1664 
1665         switch (optname) {
1666         case MRT6_INIT:
1667                 if (sk->sk_type != SOCK_RAW ||
1668                     inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1669                         return -EOPNOTSUPP;
1670                 if (optlen < sizeof(int))
1671                         return -EINVAL;
1672 
1673                 return ip6mr_sk_init(mrt, sk);
1674 
1675         case MRT6_DONE:
1676                 return ip6mr_sk_done(sk);
1677 
1678         case MRT6_ADD_MIF:
1679                 if (optlen < sizeof(vif))
1680                         return -EINVAL;
1681                 if (copy_from_user(&vif, optval, sizeof(vif)))
1682                         return -EFAULT;
1683                 if (vif.mif6c_mifi >= MAXMIFS)
1684                         return -ENFILE;
1685                 rtnl_lock();
1686                 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1687                 rtnl_unlock();
1688                 return ret;
1689 
1690         case MRT6_DEL_MIF:
1691                 if (optlen < sizeof(mifi_t))
1692                         return -EINVAL;
1693                 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1694                         return -EFAULT;
1695                 rtnl_lock();
1696                 ret = mif6_delete(mrt, mifi, NULL);
1697                 rtnl_unlock();
1698                 return ret;
1699 
1700         /*
1701          *      Manipulate the forwarding caches. These live
1702          *      in a sort of kernel/user symbiosis.
1703          */
1704         case MRT6_ADD_MFC:
1705         case MRT6_DEL_MFC:
1706                 parent = -1;
1707         case MRT6_ADD_MFC_PROXY:
1708         case MRT6_DEL_MFC_PROXY:
1709                 if (optlen < sizeof(mfc))
1710                         return -EINVAL;
1711                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1712                         return -EFAULT;
1713                 if (parent == 0)
1714                         parent = mfc.mf6cc_parent;
1715                 rtnl_lock();
1716                 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1717                         ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1718                 else
1719                         ret = ip6mr_mfc_add(net, mrt, &mfc,
1720                                             sk == mrt->mroute6_sk, parent);
1721                 rtnl_unlock();
1722                 return ret;
1723 
1724         /*
1725          *      Control PIM assert (to activate pim will activate assert)
1726          */
1727         case MRT6_ASSERT:
1728         {
1729                 int v;
1730 
1731                 if (optlen != sizeof(v))
1732                         return -EINVAL;
1733                 if (get_user(v, (int __user *)optval))
1734                         return -EFAULT;
1735                 mrt->mroute_do_assert = v;
1736                 return 0;
1737         }
1738 
1739 #ifdef CONFIG_IPV6_PIMSM_V2
1740         case MRT6_PIM:
1741         {
1742                 int v;
1743 
1744                 if (optlen != sizeof(v))
1745                         return -EINVAL;
1746                 if (get_user(v, (int __user *)optval))
1747                         return -EFAULT;
1748                 v = !!v;
1749                 rtnl_lock();
1750                 ret = 0;
1751                 if (v != mrt->mroute_do_pim) {
1752                         mrt->mroute_do_pim = v;
1753                         mrt->mroute_do_assert = v;
1754                 }
1755                 rtnl_unlock();
1756                 return ret;
1757         }
1758 
1759 #endif
1760 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1761         case MRT6_TABLE:
1762         {
1763                 u32 v;
1764 
1765                 if (optlen != sizeof(u32))
1766                         return -EINVAL;
1767                 if (get_user(v, (u32 __user *)optval))
1768                         return -EFAULT;
1769                 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1770                 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1771                         return -EINVAL;
1772                 if (sk == mrt->mroute6_sk)
1773                         return -EBUSY;
1774 
1775                 rtnl_lock();
1776                 ret = 0;
1777                 if (!ip6mr_new_table(net, v))
1778                         ret = -ENOMEM;
1779                 raw6_sk(sk)->ip6mr_table = v;
1780                 rtnl_unlock();
1781                 return ret;
1782         }
1783 #endif
1784         /*
1785          *      Spurious command, or MRT6_VERSION which you cannot
1786          *      set.
1787          */
1788         default:
1789                 return -ENOPROTOOPT;
1790         }
1791 }
1792 
1793 /*
1794  *      Getsock opt support for the multicast routing system.
1795  */
1796 
1797 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1798                           int __user *optlen)
1799 {
1800         int olr;
1801         int val;
1802         struct net *net = sock_net(sk);
1803         struct mr6_table *mrt;
1804 
1805         mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1806         if (mrt == NULL)
1807                 return -ENOENT;
1808 
1809         switch (optname) {
1810         case MRT6_VERSION:
1811                 val = 0x0305;
1812                 break;
1813 #ifdef CONFIG_IPV6_PIMSM_V2
1814         case MRT6_PIM:
1815                 val = mrt->mroute_do_pim;
1816                 break;
1817 #endif
1818         case MRT6_ASSERT:
1819                 val = mrt->mroute_do_assert;
1820                 break;
1821         default:
1822                 return -ENOPROTOOPT;
1823         }
1824 
1825         if (get_user(olr, optlen))
1826                 return -EFAULT;
1827 
1828         olr = min_t(int, olr, sizeof(int));
1829         if (olr < 0)
1830                 return -EINVAL;
1831 
1832         if (put_user(olr, optlen))
1833                 return -EFAULT;
1834         if (copy_to_user(optval, &val, olr))
1835                 return -EFAULT;
1836         return 0;
1837 }
1838 
1839 /*
1840  *      The IP multicast ioctl support routines.
1841  */
1842 
1843 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1844 {
1845         struct sioc_sg_req6 sr;
1846         struct sioc_mif_req6 vr;
1847         struct mif_device *vif;
1848         struct mfc6_cache *c;
1849         struct net *net = sock_net(sk);
1850         struct mr6_table *mrt;
1851 
1852         mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1853         if (mrt == NULL)
1854                 return -ENOENT;
1855 
1856         switch (cmd) {
1857         case SIOCGETMIFCNT_IN6:
1858                 if (copy_from_user(&vr, arg, sizeof(vr)))
1859                         return -EFAULT;
1860                 if (vr.mifi >= mrt->maxvif)
1861                         return -EINVAL;
1862                 read_lock(&mrt_lock);
1863                 vif = &mrt->vif6_table[vr.mifi];
1864                 if (MIF_EXISTS(mrt, vr.mifi)) {
1865                         vr.icount = vif->pkt_in;
1866                         vr.ocount = vif->pkt_out;
1867                         vr.ibytes = vif->bytes_in;
1868                         vr.obytes = vif->bytes_out;
1869                         read_unlock(&mrt_lock);
1870 
1871                         if (copy_to_user(arg, &vr, sizeof(vr)))
1872                                 return -EFAULT;
1873                         return 0;
1874                 }
1875                 read_unlock(&mrt_lock);
1876                 return -EADDRNOTAVAIL;
1877         case SIOCGETSGCNT_IN6:
1878                 if (copy_from_user(&sr, arg, sizeof(sr)))
1879                         return -EFAULT;
1880 
1881                 read_lock(&mrt_lock);
1882                 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1883                 if (c) {
1884                         sr.pktcnt = c->mfc_un.res.pkt;
1885                         sr.bytecnt = c->mfc_un.res.bytes;
1886                         sr.wrong_if = c->mfc_un.res.wrong_if;
1887                         read_unlock(&mrt_lock);
1888 
1889                         if (copy_to_user(arg, &sr, sizeof(sr)))
1890                                 return -EFAULT;
1891                         return 0;
1892                 }
1893                 read_unlock(&mrt_lock);
1894                 return -EADDRNOTAVAIL;
1895         default:
1896                 return -ENOIOCTLCMD;
1897         }
1898 }
1899 
1900 #ifdef CONFIG_COMPAT
1901 struct compat_sioc_sg_req6 {
1902         struct sockaddr_in6 src;
1903         struct sockaddr_in6 grp;
1904         compat_ulong_t pktcnt;
1905         compat_ulong_t bytecnt;
1906         compat_ulong_t wrong_if;
1907 };
1908 
1909 struct compat_sioc_mif_req6 {
1910         mifi_t  mifi;
1911         compat_ulong_t icount;
1912         compat_ulong_t ocount;
1913         compat_ulong_t ibytes;
1914         compat_ulong_t obytes;
1915 };
1916 
1917 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1918 {
1919         struct compat_sioc_sg_req6 sr;
1920         struct compat_sioc_mif_req6 vr;
1921         struct mif_device *vif;
1922         struct mfc6_cache *c;
1923         struct net *net = sock_net(sk);
1924         struct mr6_table *mrt;
1925 
1926         mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1927         if (mrt == NULL)
1928                 return -ENOENT;
1929 
1930         switch (cmd) {
1931         case SIOCGETMIFCNT_IN6:
1932                 if (copy_from_user(&vr, arg, sizeof(vr)))
1933                         return -EFAULT;
1934                 if (vr.mifi >= mrt->maxvif)
1935                         return -EINVAL;
1936                 read_lock(&mrt_lock);
1937                 vif = &mrt->vif6_table[vr.mifi];
1938                 if (MIF_EXISTS(mrt, vr.mifi)) {
1939                         vr.icount = vif->pkt_in;
1940                         vr.ocount = vif->pkt_out;
1941                         vr.ibytes = vif->bytes_in;
1942                         vr.obytes = vif->bytes_out;
1943                         read_unlock(&mrt_lock);
1944 
1945                         if (copy_to_user(arg, &vr, sizeof(vr)))
1946                                 return -EFAULT;
1947                         return 0;
1948                 }
1949                 read_unlock(&mrt_lock);
1950                 return -EADDRNOTAVAIL;
1951         case SIOCGETSGCNT_IN6:
1952                 if (copy_from_user(&sr, arg, sizeof(sr)))
1953                         return -EFAULT;
1954 
1955                 read_lock(&mrt_lock);
1956                 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1957                 if (c) {
1958                         sr.pktcnt = c->mfc_un.res.pkt;
1959                         sr.bytecnt = c->mfc_un.res.bytes;
1960                         sr.wrong_if = c->mfc_un.res.wrong_if;
1961                         read_unlock(&mrt_lock);
1962 
1963                         if (copy_to_user(arg, &sr, sizeof(sr)))
1964                                 return -EFAULT;
1965                         return 0;
1966                 }
1967                 read_unlock(&mrt_lock);
1968                 return -EADDRNOTAVAIL;
1969         default:
1970                 return -ENOIOCTLCMD;
1971         }
1972 }
1973 #endif
1974 
1975 static inline int ip6mr_forward2_finish(struct sk_buff *skb)
1976 {
1977         IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1978                          IPSTATS_MIB_OUTFORWDATAGRAMS);
1979         IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)),
1980                          IPSTATS_MIB_OUTOCTETS, skb->len);
1981         return dst_output(skb);
1982 }
1983 
1984 /*
1985  *      Processing handlers for ip6mr_forward
1986  */
1987 
1988 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1989                           struct sk_buff *skb, struct mfc6_cache *c, int vifi)
1990 {
1991         struct ipv6hdr *ipv6h;
1992         struct mif_device *vif = &mrt->vif6_table[vifi];
1993         struct net_device *dev;
1994         struct dst_entry *dst;
1995         struct flowi6 fl6;
1996 
1997         if (vif->dev == NULL)
1998                 goto out_free;
1999 
2000 #ifdef CONFIG_IPV6_PIMSM_V2
2001         if (vif->flags & MIFF_REGISTER) {
2002                 vif->pkt_out++;
2003                 vif->bytes_out += skb->len;
2004                 vif->dev->stats.tx_bytes += skb->len;
2005                 vif->dev->stats.tx_packets++;
2006                 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2007                 goto out_free;
2008         }
2009 #endif
2010 
2011         ipv6h = ipv6_hdr(skb);
2012 
2013         fl6 = (struct flowi6) {
2014                 .flowi6_oif = vif->link,
2015                 .daddr = ipv6h->daddr,
2016         };
2017 
2018         dst = ip6_route_output(net, NULL, &fl6);
2019         if (dst->error) {
2020                 dst_release(dst);
2021                 goto out_free;
2022         }
2023 
2024         skb_dst_drop(skb);
2025         skb_dst_set(skb, dst);
2026 
2027         /*
2028          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2029          * not only before forwarding, but after forwarding on all output
2030          * interfaces. It is clear, if mrouter runs a multicasting
2031          * program, it should receive packets not depending to what interface
2032          * program is joined.
2033          * If we will not make it, the program will have to join on all
2034          * interfaces. On the other hand, multihoming host (or router, but
2035          * not mrouter) cannot join to more than one interface - it will
2036          * result in receiving multiple packets.
2037          */
2038         dev = vif->dev;
2039         skb->dev = dev;
2040         vif->pkt_out++;
2041         vif->bytes_out += skb->len;
2042 
2043         /* We are about to write */
2044         /* XXX: extension headers? */
2045         if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2046                 goto out_free;
2047 
2048         ipv6h = ipv6_hdr(skb);
2049         ipv6h->hop_limit--;
2050 
2051         IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2052 
2053         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev,
2054                        ip6mr_forward2_finish);
2055 
2056 out_free:
2057         kfree_skb(skb);
2058         return 0;
2059 }
2060 
2061 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2062 {
2063         int ct;
2064 
2065         for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2066                 if (mrt->vif6_table[ct].dev == dev)
2067                         break;
2068         }
2069         return ct;
2070 }
2071 
2072 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2073                           struct sk_buff *skb, struct mfc6_cache *cache)
2074 {
2075         int psend = -1;
2076         int vif, ct;
2077         int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2078 
2079         vif = cache->mf6c_parent;
2080         cache->mfc_un.res.pkt++;
2081         cache->mfc_un.res.bytes += skb->len;
2082 
2083         if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2084                 struct mfc6_cache *cache_proxy;
2085 
2086                 /* For an (*,G) entry, we only check that the incomming
2087                  * interface is part of the static tree.
2088                  */
2089                 cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2090                 if (cache_proxy &&
2091                     cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2092                         goto forward;
2093         }
2094 
2095         /*
2096          * Wrong interface: drop packet and (maybe) send PIM assert.
2097          */
2098         if (mrt->vif6_table[vif].dev != skb->dev) {
2099                 cache->mfc_un.res.wrong_if++;
2100 
2101                 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2102                     /* pimsm uses asserts, when switching from RPT to SPT,
2103                        so that we cannot check that packet arrived on an oif.
2104                        It is bad, but otherwise we would need to move pretty
2105                        large chunk of pimd to kernel. Ough... --ANK
2106                      */
2107                     (mrt->mroute_do_pim ||
2108                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
2109                     time_after(jiffies,
2110                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2111                         cache->mfc_un.res.last_assert = jiffies;
2112                         ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2113                 }
2114                 goto dont_forward;
2115         }
2116 
2117 forward:
2118         mrt->vif6_table[vif].pkt_in++;
2119         mrt->vif6_table[vif].bytes_in += skb->len;
2120 
2121         /*
2122          *      Forward the frame
2123          */
2124         if (ipv6_addr_any(&cache->mf6c_origin) &&
2125             ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2126                 if (true_vifi >= 0 &&
2127                     true_vifi != cache->mf6c_parent &&
2128                     ipv6_hdr(skb)->hop_limit >
2129                                 cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2130                         /* It's an (*,*) entry and the packet is not coming from
2131                          * the upstream: forward the packet to the upstream
2132                          * only.
2133                          */
2134                         psend = cache->mf6c_parent;
2135                         goto last_forward;
2136                 }
2137                 goto dont_forward;
2138         }
2139         for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2140                 /* For (*,G) entry, don't forward to the incoming interface */
2141                 if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2142                     ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2143                         if (psend != -1) {
2144                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2145                                 if (skb2)
2146                                         ip6mr_forward2(net, mrt, skb2, cache, psend);
2147                         }
2148                         psend = ct;
2149                 }
2150         }
2151 last_forward:
2152         if (psend != -1) {
2153                 ip6mr_forward2(net, mrt, skb, cache, psend);
2154                 return 0;
2155         }
2156 
2157 dont_forward:
2158         kfree_skb(skb);
2159         return 0;
2160 }
2161 
2162 
2163 /*
2164  *      Multicast packets for forwarding arrive here
2165  */
2166 
2167 int ip6_mr_input(struct sk_buff *skb)
2168 {
2169         struct mfc6_cache *cache;
2170         struct net *net = dev_net(skb->dev);
2171         struct mr6_table *mrt;
2172         struct flowi6 fl6 = {
2173                 .flowi6_iif     = skb->dev->ifindex,
2174                 .flowi6_mark    = skb->mark,
2175         };
2176         int err;
2177 
2178         err = ip6mr_fib_lookup(net, &fl6, &mrt);
2179         if (err < 0) {
2180                 kfree_skb(skb);
2181                 return err;
2182         }
2183 
2184         read_lock(&mrt_lock);
2185         cache = ip6mr_cache_find(mrt,
2186                                  &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2187         if (cache == NULL) {
2188                 int vif = ip6mr_find_vif(mrt, skb->dev);
2189 
2190                 if (vif >= 0)
2191                         cache = ip6mr_cache_find_any(mrt,
2192                                                      &ipv6_hdr(skb)->daddr,
2193                                                      vif);
2194         }
2195 
2196         /*
2197          *      No usable cache entry
2198          */
2199         if (cache == NULL) {
2200                 int vif;
2201 
2202                 vif = ip6mr_find_vif(mrt, skb->dev);
2203                 if (vif >= 0) {
2204                         int err = ip6mr_cache_unresolved(mrt, vif, skb);
2205                         read_unlock(&mrt_lock);
2206 
2207                         return err;
2208                 }
2209                 read_unlock(&mrt_lock);
2210                 kfree_skb(skb);
2211                 return -ENODEV;
2212         }
2213 
2214         ip6_mr_forward(net, mrt, skb, cache);
2215 
2216         read_unlock(&mrt_lock);
2217 
2218         return 0;
2219 }
2220 
2221 
2222 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2223                                struct mfc6_cache *c, struct rtmsg *rtm)
2224 {
2225         int ct;
2226         struct rtnexthop *nhp;
2227         struct nlattr *mp_attr;
2228         struct rta_mfc_stats mfcs;
2229 
2230         /* If cache is unresolved, don't try to parse IIF and OIF */
2231         if (c->mf6c_parent >= MAXMIFS)
2232                 return -ENOENT;
2233 
2234         if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2235             nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2236                 return -EMSGSIZE;
2237         mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2238         if (mp_attr == NULL)
2239                 return -EMSGSIZE;
2240 
2241         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2242                 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2243                         nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2244                         if (nhp == NULL) {
2245                                 nla_nest_cancel(skb, mp_attr);
2246                                 return -EMSGSIZE;
2247                         }
2248 
2249                         nhp->rtnh_flags = 0;
2250                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2251                         nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2252                         nhp->rtnh_len = sizeof(*nhp);
2253                 }
2254         }
2255 
2256         nla_nest_end(skb, mp_attr);
2257 
2258         mfcs.mfcs_packets = c->mfc_un.res.pkt;
2259         mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2260         mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2261         if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2262                 return -EMSGSIZE;
2263 
2264         rtm->rtm_type = RTN_MULTICAST;
2265         return 1;
2266 }
2267 
2268 int ip6mr_get_route(struct net *net,
2269                     struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2270 {
2271         int err;
2272         struct mr6_table *mrt;
2273         struct mfc6_cache *cache;
2274         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2275 
2276         mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2277         if (mrt == NULL)
2278                 return -ENOENT;
2279 
2280         read_lock(&mrt_lock);
2281         cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2282         if (!cache && skb->dev) {
2283                 int vif = ip6mr_find_vif(mrt, skb->dev);
2284 
2285                 if (vif >= 0)
2286                         cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2287                                                      vif);
2288         }
2289 
2290         if (!cache) {
2291                 struct sk_buff *skb2;
2292                 struct ipv6hdr *iph;
2293                 struct net_device *dev;
2294                 int vif;
2295 
2296                 if (nowait) {
2297                         read_unlock(&mrt_lock);
2298                         return -EAGAIN;
2299                 }
2300 
2301                 dev = skb->dev;
2302                 if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2303                         read_unlock(&mrt_lock);
2304                         return -ENODEV;
2305                 }
2306 
2307                 /* really correct? */
2308                 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2309                 if (!skb2) {
2310                         read_unlock(&mrt_lock);
2311                         return -ENOMEM;
2312                 }
2313 
2314                 skb_reset_transport_header(skb2);
2315 
2316                 skb_put(skb2, sizeof(struct ipv6hdr));
2317                 skb_reset_network_header(skb2);
2318 
2319                 iph = ipv6_hdr(skb2);
2320                 iph->version = 0;
2321                 iph->priority = 0;
2322                 iph->flow_lbl[0] = 0;
2323                 iph->flow_lbl[1] = 0;
2324                 iph->flow_lbl[2] = 0;
2325                 iph->payload_len = 0;
2326                 iph->nexthdr = IPPROTO_NONE;
2327                 iph->hop_limit = 0;
2328                 iph->saddr = rt->rt6i_src.addr;
2329                 iph->daddr = rt->rt6i_dst.addr;
2330 
2331                 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2332                 read_unlock(&mrt_lock);
2333 
2334                 return err;
2335         }
2336 
2337         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2338                 cache->mfc_flags |= MFC_NOTIFY;
2339 
2340         err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2341         read_unlock(&mrt_lock);
2342         return err;
2343 }
2344 
2345 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2346                              u32 portid, u32 seq, struct mfc6_cache *c, int cmd)
2347 {
2348         struct nlmsghdr *nlh;
2349         struct rtmsg *rtm;
2350         int err;
2351 
2352         nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI);
2353         if (nlh == NULL)
2354                 return -EMSGSIZE;
2355 
2356         rtm = nlmsg_data(nlh);
2357         rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2358         rtm->rtm_dst_len  = 128;
2359         rtm->rtm_src_len  = 128;
2360         rtm->rtm_tos      = 0;
2361         rtm->rtm_table    = mrt->id;
2362         if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2363                 goto nla_put_failure;
2364         rtm->rtm_type = RTN_MULTICAST;
2365         rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2366         if (c->mfc_flags & MFC_STATIC)
2367                 rtm->rtm_protocol = RTPROT_STATIC;
2368         else
2369                 rtm->rtm_protocol = RTPROT_MROUTED;
2370         rtm->rtm_flags    = 0;
2371 
2372         if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) ||
2373             nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp))
2374                 goto nla_put_failure;
2375         err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2376         /* do not break the dump if cache is unresolved */
2377         if (err < 0 && err != -ENOENT)
2378                 goto nla_put_failure;
2379 
2380         return nlmsg_end(skb, nlh);
2381 
2382 nla_put_failure:
2383         nlmsg_cancel(skb, nlh);
2384         return -EMSGSIZE;
2385 }
2386 
2387 static int mr6_msgsize(bool unresolved, int maxvif)
2388 {
2389         size_t len =
2390                 NLMSG_ALIGN(sizeof(struct rtmsg))
2391                 + nla_total_size(4)     /* RTA_TABLE */
2392                 + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2393                 + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2394                 ;
2395 
2396         if (!unresolved)
2397                 len = len
2398                       + nla_total_size(4)       /* RTA_IIF */
2399                       + nla_total_size(0)       /* RTA_MULTIPATH */
2400                       + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2401                                                 /* RTA_MFC_STATS */
2402                       + nla_total_size(sizeof(struct rta_mfc_stats))
2403                 ;
2404 
2405         return len;
2406 }
2407 
2408 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2409                               int cmd)
2410 {
2411         struct net *net = read_pnet(&mrt->net);
2412         struct sk_buff *skb;
2413         int err = -ENOBUFS;
2414 
2415         skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2416                         GFP_ATOMIC);
2417         if (skb == NULL)
2418                 goto errout;
2419 
2420         err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd);
2421         if (err < 0)
2422                 goto errout;
2423 
2424         rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2425         return;
2426 
2427 errout:
2428         kfree_skb(skb);
2429         if (err < 0)
2430                 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2431 }
2432 
2433 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2434 {
2435         struct net *net = sock_net(skb->sk);
2436         struct mr6_table *mrt;
2437         struct mfc6_cache *mfc;
2438         unsigned int t = 0, s_t;
2439         unsigned int h = 0, s_h;
2440         unsigned int e = 0, s_e;
2441 
2442         s_t = cb->args[0];
2443         s_h = cb->args[1];
2444         s_e = cb->args[2];
2445 
2446         read_lock(&mrt_lock);
2447         ip6mr_for_each_table(mrt, net) {
2448                 if (t < s_t)
2449                         goto next_table;
2450                 if (t > s_t)
2451                         s_h = 0;
2452                 for (h = s_h; h < MFC6_LINES; h++) {
2453                         list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2454                                 if (e < s_e)
2455                                         goto next_entry;
2456                                 if (ip6mr_fill_mroute(mrt, skb,
2457                                                       NETLINK_CB(cb->skb).portid,
2458                                                       cb->nlh->nlmsg_seq,
2459                                                       mfc, RTM_NEWROUTE) < 0)
2460                                         goto done;
2461 next_entry:
2462                                 e++;
2463                         }
2464                         e = s_e = 0;
2465                 }
2466                 spin_lock_bh(&mfc_unres_lock);
2467                 list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2468                         if (e < s_e)
2469                                 goto next_entry2;
2470                         if (ip6mr_fill_mroute(mrt, skb,
2471                                               NETLINK_CB(cb->skb).portid,
2472                                               cb->nlh->nlmsg_seq,
2473                                               mfc, RTM_NEWROUTE) < 0) {
2474                                 spin_unlock_bh(&mfc_unres_lock);
2475                                 goto done;
2476                         }
2477 next_entry2:
2478                         e++;
2479                 }
2480                 spin_unlock_bh(&mfc_unres_lock);
2481                 e = s_e = 0;
2482                 s_h = 0;
2483 next_table:
2484                 t++;
2485         }
2486 done:
2487         read_unlock(&mrt_lock);
2488 
2489         cb->args[2] = e;
2490         cb->args[1] = h;
2491         cb->args[0] = t;
2492 
2493         return skb->len;
2494 }
2495 

This page was automatically generated by LXR 0.3.1 (source).  •  Linux is a registered trademark of Linus Torvalds