Version:  2.0.40 2.2.26 2.4.37 3.8 3.9 3.10 3.11 3.12 3.13 3.14 3.15 3.16 3.17 3.18 3.19 4.0 4.1 4.2 4.3 4.4

Linux/net/ipv6/ip6mr.c

  1 /*
  2  *      Linux IPv6 multicast routing support for BSD pim6sd
  3  *      Based on net/ipv4/ipmr.c.
  4  *
  5  *      (c) 2004 Mickael Hoerdt, <hoerdt@clarinet.u-strasbg.fr>
  6  *              LSIIT Laboratory, Strasbourg, France
  7  *      (c) 2004 Jean-Philippe Andriot, <jean-philippe.andriot@6WIND.com>
  8  *              6WIND, Paris, France
  9  *      Copyright (C)2007,2008 USAGI/WIDE Project
 10  *              YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
 11  *
 12  *      This program is free software; you can redistribute it and/or
 13  *      modify it under the terms of the GNU General Public License
 14  *      as published by the Free Software Foundation; either version
 15  *      2 of the License, or (at your option) any later version.
 16  *
 17  */
 18 
 19 #include <asm/uaccess.h>
 20 #include <linux/types.h>
 21 #include <linux/sched.h>
 22 #include <linux/errno.h>
 23 #include <linux/timer.h>
 24 #include <linux/mm.h>
 25 #include <linux/kernel.h>
 26 #include <linux/fcntl.h>
 27 #include <linux/stat.h>
 28 #include <linux/socket.h>
 29 #include <linux/inet.h>
 30 #include <linux/netdevice.h>
 31 #include <linux/inetdevice.h>
 32 #include <linux/proc_fs.h>
 33 #include <linux/seq_file.h>
 34 #include <linux/init.h>
 35 #include <linux/slab.h>
 36 #include <linux/compat.h>
 37 #include <net/protocol.h>
 38 #include <linux/skbuff.h>
 39 #include <net/sock.h>
 40 #include <net/raw.h>
 41 #include <linux/notifier.h>
 42 #include <linux/if_arp.h>
 43 #include <net/checksum.h>
 44 #include <net/netlink.h>
 45 #include <net/fib_rules.h>
 46 
 47 #include <net/ipv6.h>
 48 #include <net/ip6_route.h>
 49 #include <linux/mroute6.h>
 50 #include <linux/pim.h>
 51 #include <net/addrconf.h>
 52 #include <linux/netfilter_ipv6.h>
 53 #include <linux/export.h>
 54 #include <net/ip6_checksum.h>
 55 #include <linux/netconf.h>
 56 
 57 struct mr6_table {
 58         struct list_head        list;
 59         possible_net_t          net;
 60         u32                     id;
 61         struct sock             *mroute6_sk;
 62         struct timer_list       ipmr_expire_timer;
 63         struct list_head        mfc6_unres_queue;
 64         struct list_head        mfc6_cache_array[MFC6_LINES];
 65         struct mif_device       vif6_table[MAXMIFS];
 66         int                     maxvif;
 67         atomic_t                cache_resolve_queue_len;
 68         bool                    mroute_do_assert;
 69         bool                    mroute_do_pim;
 70 #ifdef CONFIG_IPV6_PIMSM_V2
 71         int                     mroute_reg_vif_num;
 72 #endif
 73 };
 74 
 75 struct ip6mr_rule {
 76         struct fib_rule         common;
 77 };
 78 
 79 struct ip6mr_result {
 80         struct mr6_table        *mrt;
 81 };
 82 
 83 /* Big lock, protecting vif table, mrt cache and mroute socket state.
 84    Note that the changes are semaphored via rtnl_lock.
 85  */
 86 
 87 static DEFINE_RWLOCK(mrt_lock);
 88 
 89 /*
 90  *      Multicast router control variables
 91  */
 92 
 93 #define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
 94 
 95 /* Special spinlock for queue of unresolved entries */
 96 static DEFINE_SPINLOCK(mfc_unres_lock);
 97 
 98 /* We return to original Alan's scheme. Hash table of resolved
 99    entries is changed only in process context and protected
100    with weak lock mrt_lock. Queue of unresolved entries is protected
101    with strong spinlock mfc_unres_lock.
102 
103    In this case data path is free of exclusive locks at all.
104  */
105 
106 static struct kmem_cache *mrt_cachep __read_mostly;
107 
108 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
109 static void ip6mr_free_table(struct mr6_table *mrt);
110 
111 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
112                            struct sk_buff *skb, struct mfc6_cache *cache);
113 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
114                               mifi_t mifi, int assert);
115 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
116                                struct mfc6_cache *c, struct rtmsg *rtm);
117 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
118                               int cmd);
119 static int ip6mr_rtm_dumproute(struct sk_buff *skb,
120                                struct netlink_callback *cb);
121 static void mroute_clean_tables(struct mr6_table *mrt, bool all);
122 static void ipmr_expire_process(unsigned long arg);
123 
124 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
125 #define ip6mr_for_each_table(mrt, net) \
126         list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
127 
128 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
129 {
130         struct mr6_table *mrt;
131 
132         ip6mr_for_each_table(mrt, net) {
133                 if (mrt->id == id)
134                         return mrt;
135         }
136         return NULL;
137 }
138 
139 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
140                             struct mr6_table **mrt)
141 {
142         int err;
143         struct ip6mr_result res;
144         struct fib_lookup_arg arg = {
145                 .result = &res,
146                 .flags = FIB_LOOKUP_NOREF,
147         };
148 
149         err = fib_rules_lookup(net->ipv6.mr6_rules_ops,
150                                flowi6_to_flowi(flp6), 0, &arg);
151         if (err < 0)
152                 return err;
153         *mrt = res.mrt;
154         return 0;
155 }
156 
157 static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
158                              int flags, struct fib_lookup_arg *arg)
159 {
160         struct ip6mr_result *res = arg->result;
161         struct mr6_table *mrt;
162 
163         switch (rule->action) {
164         case FR_ACT_TO_TBL:
165                 break;
166         case FR_ACT_UNREACHABLE:
167                 return -ENETUNREACH;
168         case FR_ACT_PROHIBIT:
169                 return -EACCES;
170         case FR_ACT_BLACKHOLE:
171         default:
172                 return -EINVAL;
173         }
174 
175         mrt = ip6mr_get_table(rule->fr_net, rule->table);
176         if (!mrt)
177                 return -EAGAIN;
178         res->mrt = mrt;
179         return 0;
180 }
181 
182 static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
183 {
184         return 1;
185 }
186 
187 static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
188         FRA_GENERIC_POLICY,
189 };
190 
191 static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
192                                 struct fib_rule_hdr *frh, struct nlattr **tb)
193 {
194         return 0;
195 }
196 
197 static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
198                               struct nlattr **tb)
199 {
200         return 1;
201 }
202 
203 static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
204                            struct fib_rule_hdr *frh)
205 {
206         frh->dst_len = 0;
207         frh->src_len = 0;
208         frh->tos     = 0;
209         return 0;
210 }
211 
212 static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
213         .family         = RTNL_FAMILY_IP6MR,
214         .rule_size      = sizeof(struct ip6mr_rule),
215         .addr_size      = sizeof(struct in6_addr),
216         .action         = ip6mr_rule_action,
217         .match          = ip6mr_rule_match,
218         .configure      = ip6mr_rule_configure,
219         .compare        = ip6mr_rule_compare,
220         .fill           = ip6mr_rule_fill,
221         .nlgroup        = RTNLGRP_IPV6_RULE,
222         .policy         = ip6mr_rule_policy,
223         .owner          = THIS_MODULE,
224 };
225 
226 static int __net_init ip6mr_rules_init(struct net *net)
227 {
228         struct fib_rules_ops *ops;
229         struct mr6_table *mrt;
230         int err;
231 
232         ops = fib_rules_register(&ip6mr_rules_ops_template, net);
233         if (IS_ERR(ops))
234                 return PTR_ERR(ops);
235 
236         INIT_LIST_HEAD(&net->ipv6.mr6_tables);
237 
238         mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
239         if (!mrt) {
240                 err = -ENOMEM;
241                 goto err1;
242         }
243 
244         err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
245         if (err < 0)
246                 goto err2;
247 
248         net->ipv6.mr6_rules_ops = ops;
249         return 0;
250 
251 err2:
252         ip6mr_free_table(mrt);
253 err1:
254         fib_rules_unregister(ops);
255         return err;
256 }
257 
258 static void __net_exit ip6mr_rules_exit(struct net *net)
259 {
260         struct mr6_table *mrt, *next;
261 
262         rtnl_lock();
263         list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
264                 list_del(&mrt->list);
265                 ip6mr_free_table(mrt);
266         }
267         fib_rules_unregister(net->ipv6.mr6_rules_ops);
268         rtnl_unlock();
269 }
270 #else
271 #define ip6mr_for_each_table(mrt, net) \
272         for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
273 
274 static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
275 {
276         return net->ipv6.mrt6;
277 }
278 
279 static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
280                             struct mr6_table **mrt)
281 {
282         *mrt = net->ipv6.mrt6;
283         return 0;
284 }
285 
286 static int __net_init ip6mr_rules_init(struct net *net)
287 {
288         net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
289         return net->ipv6.mrt6 ? 0 : -ENOMEM;
290 }
291 
292 static void __net_exit ip6mr_rules_exit(struct net *net)
293 {
294         rtnl_lock();
295         ip6mr_free_table(net->ipv6.mrt6);
296         net->ipv6.mrt6 = NULL;
297         rtnl_unlock();
298 }
299 #endif
300 
301 static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
302 {
303         struct mr6_table *mrt;
304         unsigned int i;
305 
306         mrt = ip6mr_get_table(net, id);
307         if (mrt)
308                 return mrt;
309 
310         mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
311         if (!mrt)
312                 return NULL;
313         mrt->id = id;
314         write_pnet(&mrt->net, net);
315 
316         /* Forwarding cache */
317         for (i = 0; i < MFC6_LINES; i++)
318                 INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
319 
320         INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
321 
322         setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
323                     (unsigned long)mrt);
324 
325 #ifdef CONFIG_IPV6_PIMSM_V2
326         mrt->mroute_reg_vif_num = -1;
327 #endif
328 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
329         list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
330 #endif
331         return mrt;
332 }
333 
334 static void ip6mr_free_table(struct mr6_table *mrt)
335 {
336         del_timer_sync(&mrt->ipmr_expire_timer);
337         mroute_clean_tables(mrt, true);
338         kfree(mrt);
339 }
340 
341 #ifdef CONFIG_PROC_FS
342 
343 struct ipmr_mfc_iter {
344         struct seq_net_private p;
345         struct mr6_table *mrt;
346         struct list_head *cache;
347         int ct;
348 };
349 
350 
351 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
352                                            struct ipmr_mfc_iter *it, loff_t pos)
353 {
354         struct mr6_table *mrt = it->mrt;
355         struct mfc6_cache *mfc;
356 
357         read_lock(&mrt_lock);
358         for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
359                 it->cache = &mrt->mfc6_cache_array[it->ct];
360                 list_for_each_entry(mfc, it->cache, list)
361                         if (pos-- == 0)
362                                 return mfc;
363         }
364         read_unlock(&mrt_lock);
365 
366         spin_lock_bh(&mfc_unres_lock);
367         it->cache = &mrt->mfc6_unres_queue;
368         list_for_each_entry(mfc, it->cache, list)
369                 if (pos-- == 0)
370                         return mfc;
371         spin_unlock_bh(&mfc_unres_lock);
372 
373         it->cache = NULL;
374         return NULL;
375 }
376 
377 /*
378  *      The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
379  */
380 
381 struct ipmr_vif_iter {
382         struct seq_net_private p;
383         struct mr6_table *mrt;
384         int ct;
385 };
386 
387 static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
388                                             struct ipmr_vif_iter *iter,
389                                             loff_t pos)
390 {
391         struct mr6_table *mrt = iter->mrt;
392 
393         for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
394                 if (!MIF_EXISTS(mrt, iter->ct))
395                         continue;
396                 if (pos-- == 0)
397                         return &mrt->vif6_table[iter->ct];
398         }
399         return NULL;
400 }
401 
402 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
403         __acquires(mrt_lock)
404 {
405         struct ipmr_vif_iter *iter = seq->private;
406         struct net *net = seq_file_net(seq);
407         struct mr6_table *mrt;
408 
409         mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
410         if (!mrt)
411                 return ERR_PTR(-ENOENT);
412 
413         iter->mrt = mrt;
414 
415         read_lock(&mrt_lock);
416         return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
417                 : SEQ_START_TOKEN;
418 }
419 
420 static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
421 {
422         struct ipmr_vif_iter *iter = seq->private;
423         struct net *net = seq_file_net(seq);
424         struct mr6_table *mrt = iter->mrt;
425 
426         ++*pos;
427         if (v == SEQ_START_TOKEN)
428                 return ip6mr_vif_seq_idx(net, iter, 0);
429 
430         while (++iter->ct < mrt->maxvif) {
431                 if (!MIF_EXISTS(mrt, iter->ct))
432                         continue;
433                 return &mrt->vif6_table[iter->ct];
434         }
435         return NULL;
436 }
437 
438 static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
439         __releases(mrt_lock)
440 {
441         read_unlock(&mrt_lock);
442 }
443 
444 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
445 {
446         struct ipmr_vif_iter *iter = seq->private;
447         struct mr6_table *mrt = iter->mrt;
448 
449         if (v == SEQ_START_TOKEN) {
450                 seq_puts(seq,
451                          "Interface      BytesIn  PktsIn  BytesOut PktsOut Flags\n");
452         } else {
453                 const struct mif_device *vif = v;
454                 const char *name = vif->dev ? vif->dev->name : "none";
455 
456                 seq_printf(seq,
457                            "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
458                            vif - mrt->vif6_table,
459                            name, vif->bytes_in, vif->pkt_in,
460                            vif->bytes_out, vif->pkt_out,
461                            vif->flags);
462         }
463         return 0;
464 }
465 
466 static const struct seq_operations ip6mr_vif_seq_ops = {
467         .start = ip6mr_vif_seq_start,
468         .next  = ip6mr_vif_seq_next,
469         .stop  = ip6mr_vif_seq_stop,
470         .show  = ip6mr_vif_seq_show,
471 };
472 
473 static int ip6mr_vif_open(struct inode *inode, struct file *file)
474 {
475         return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
476                             sizeof(struct ipmr_vif_iter));
477 }
478 
479 static const struct file_operations ip6mr_vif_fops = {
480         .owner   = THIS_MODULE,
481         .open    = ip6mr_vif_open,
482         .read    = seq_read,
483         .llseek  = seq_lseek,
484         .release = seq_release_net,
485 };
486 
487 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
488 {
489         struct ipmr_mfc_iter *it = seq->private;
490         struct net *net = seq_file_net(seq);
491         struct mr6_table *mrt;
492 
493         mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
494         if (!mrt)
495                 return ERR_PTR(-ENOENT);
496 
497         it->mrt = mrt;
498         return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
499                 : SEQ_START_TOKEN;
500 }
501 
502 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
503 {
504         struct mfc6_cache *mfc = v;
505         struct ipmr_mfc_iter *it = seq->private;
506         struct net *net = seq_file_net(seq);
507         struct mr6_table *mrt = it->mrt;
508 
509         ++*pos;
510 
511         if (v == SEQ_START_TOKEN)
512                 return ipmr_mfc_seq_idx(net, seq->private, 0);
513 
514         if (mfc->list.next != it->cache)
515                 return list_entry(mfc->list.next, struct mfc6_cache, list);
516 
517         if (it->cache == &mrt->mfc6_unres_queue)
518                 goto end_of_list;
519 
520         BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
521 
522         while (++it->ct < MFC6_LINES) {
523                 it->cache = &mrt->mfc6_cache_array[it->ct];
524                 if (list_empty(it->cache))
525                         continue;
526                 return list_first_entry(it->cache, struct mfc6_cache, list);
527         }
528 
529         /* exhausted cache_array, show unresolved */
530         read_unlock(&mrt_lock);
531         it->cache = &mrt->mfc6_unres_queue;
532         it->ct = 0;
533 
534         spin_lock_bh(&mfc_unres_lock);
535         if (!list_empty(it->cache))
536                 return list_first_entry(it->cache, struct mfc6_cache, list);
537 
538  end_of_list:
539         spin_unlock_bh(&mfc_unres_lock);
540         it->cache = NULL;
541 
542         return NULL;
543 }
544 
545 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
546 {
547         struct ipmr_mfc_iter *it = seq->private;
548         struct mr6_table *mrt = it->mrt;
549 
550         if (it->cache == &mrt->mfc6_unres_queue)
551                 spin_unlock_bh(&mfc_unres_lock);
552         else if (it->cache == &mrt->mfc6_cache_array[it->ct])
553                 read_unlock(&mrt_lock);
554 }
555 
556 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
557 {
558         int n;
559 
560         if (v == SEQ_START_TOKEN) {
561                 seq_puts(seq,
562                          "Group                            "
563                          "Origin                           "
564                          "Iif      Pkts  Bytes     Wrong  Oifs\n");
565         } else {
566                 const struct mfc6_cache *mfc = v;
567                 const struct ipmr_mfc_iter *it = seq->private;
568                 struct mr6_table *mrt = it->mrt;
569 
570                 seq_printf(seq, "%pI6 %pI6 %-3hd",
571                            &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
572                            mfc->mf6c_parent);
573 
574                 if (it->cache != &mrt->mfc6_unres_queue) {
575                         seq_printf(seq, " %8lu %8lu %8lu",
576                                    mfc->mfc_un.res.pkt,
577                                    mfc->mfc_un.res.bytes,
578                                    mfc->mfc_un.res.wrong_if);
579                         for (n = mfc->mfc_un.res.minvif;
580                              n < mfc->mfc_un.res.maxvif; n++) {
581                                 if (MIF_EXISTS(mrt, n) &&
582                                     mfc->mfc_un.res.ttls[n] < 255)
583                                         seq_printf(seq,
584                                                    " %2d:%-3d",
585                                                    n, mfc->mfc_un.res.ttls[n]);
586                         }
587                 } else {
588                         /* unresolved mfc_caches don't contain
589                          * pkt, bytes and wrong_if values
590                          */
591                         seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
592                 }
593                 seq_putc(seq, '\n');
594         }
595         return 0;
596 }
597 
598 static const struct seq_operations ipmr_mfc_seq_ops = {
599         .start = ipmr_mfc_seq_start,
600         .next  = ipmr_mfc_seq_next,
601         .stop  = ipmr_mfc_seq_stop,
602         .show  = ipmr_mfc_seq_show,
603 };
604 
605 static int ipmr_mfc_open(struct inode *inode, struct file *file)
606 {
607         return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
608                             sizeof(struct ipmr_mfc_iter));
609 }
610 
611 static const struct file_operations ip6mr_mfc_fops = {
612         .owner   = THIS_MODULE,
613         .open    = ipmr_mfc_open,
614         .read    = seq_read,
615         .llseek  = seq_lseek,
616         .release = seq_release_net,
617 };
618 #endif
619 
620 #ifdef CONFIG_IPV6_PIMSM_V2
621 
622 static int pim6_rcv(struct sk_buff *skb)
623 {
624         struct pimreghdr *pim;
625         struct ipv6hdr   *encap;
626         struct net_device  *reg_dev = NULL;
627         struct net *net = dev_net(skb->dev);
628         struct mr6_table *mrt;
629         struct flowi6 fl6 = {
630                 .flowi6_iif     = skb->dev->ifindex,
631                 .flowi6_mark    = skb->mark,
632         };
633         int reg_vif_num;
634 
635         if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
636                 goto drop;
637 
638         pim = (struct pimreghdr *)skb_transport_header(skb);
639         if (pim->type != ((PIM_VERSION << 4) | PIM_REGISTER) ||
640             (pim->flags & PIM_NULL_REGISTER) ||
641             (csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr,
642                              sizeof(*pim), IPPROTO_PIM,
643                              csum_partial((void *)pim, sizeof(*pim), 0)) &&
644              csum_fold(skb_checksum(skb, 0, skb->len, 0))))
645                 goto drop;
646 
647         /* check if the inner packet is destined to mcast group */
648         encap = (struct ipv6hdr *)(skb_transport_header(skb) +
649                                    sizeof(*pim));
650 
651         if (!ipv6_addr_is_multicast(&encap->daddr) ||
652             encap->payload_len == 0 ||
653             ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
654                 goto drop;
655 
656         if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
657                 goto drop;
658         reg_vif_num = mrt->mroute_reg_vif_num;
659 
660         read_lock(&mrt_lock);
661         if (reg_vif_num >= 0)
662                 reg_dev = mrt->vif6_table[reg_vif_num].dev;
663         if (reg_dev)
664                 dev_hold(reg_dev);
665         read_unlock(&mrt_lock);
666 
667         if (!reg_dev)
668                 goto drop;
669 
670         skb->mac_header = skb->network_header;
671         skb_pull(skb, (u8 *)encap - skb->data);
672         skb_reset_network_header(skb);
673         skb->protocol = htons(ETH_P_IPV6);
674         skb->ip_summed = CHECKSUM_NONE;
675 
676         skb_tunnel_rx(skb, reg_dev, dev_net(reg_dev));
677 
678         netif_rx(skb);
679 
680         dev_put(reg_dev);
681         return 0;
682  drop:
683         kfree_skb(skb);
684         return 0;
685 }
686 
687 static const struct inet6_protocol pim6_protocol = {
688         .handler        =       pim6_rcv,
689 };
690 
691 /* Service routines creating virtual interfaces: PIMREG */
692 
693 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
694                                       struct net_device *dev)
695 {
696         struct net *net = dev_net(dev);
697         struct mr6_table *mrt;
698         struct flowi6 fl6 = {
699                 .flowi6_oif     = dev->ifindex,
700                 .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
701                 .flowi6_mark    = skb->mark,
702         };
703         int err;
704 
705         err = ip6mr_fib_lookup(net, &fl6, &mrt);
706         if (err < 0) {
707                 kfree_skb(skb);
708                 return err;
709         }
710 
711         read_lock(&mrt_lock);
712         dev->stats.tx_bytes += skb->len;
713         dev->stats.tx_packets++;
714         ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
715         read_unlock(&mrt_lock);
716         kfree_skb(skb);
717         return NETDEV_TX_OK;
718 }
719 
720 static int reg_vif_get_iflink(const struct net_device *dev)
721 {
722         return 0;
723 }
724 
725 static const struct net_device_ops reg_vif_netdev_ops = {
726         .ndo_start_xmit = reg_vif_xmit,
727         .ndo_get_iflink = reg_vif_get_iflink,
728 };
729 
730 static void reg_vif_setup(struct net_device *dev)
731 {
732         dev->type               = ARPHRD_PIMREG;
733         dev->mtu                = 1500 - sizeof(struct ipv6hdr) - 8;
734         dev->flags              = IFF_NOARP;
735         dev->netdev_ops         = &reg_vif_netdev_ops;
736         dev->destructor         = free_netdev;
737         dev->features           |= NETIF_F_NETNS_LOCAL;
738 }
739 
740 static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
741 {
742         struct net_device *dev;
743         char name[IFNAMSIZ];
744 
745         if (mrt->id == RT6_TABLE_DFLT)
746                 sprintf(name, "pim6reg");
747         else
748                 sprintf(name, "pim6reg%u", mrt->id);
749 
750         dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup);
751         if (!dev)
752                 return NULL;
753 
754         dev_net_set(dev, net);
755 
756         if (register_netdevice(dev)) {
757                 free_netdev(dev);
758                 return NULL;
759         }
760 
761         if (dev_open(dev))
762                 goto failure;
763 
764         dev_hold(dev);
765         return dev;
766 
767 failure:
768         unregister_netdevice(dev);
769         return NULL;
770 }
771 #endif
772 
773 /*
774  *      Delete a VIF entry
775  */
776 
777 static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
778 {
779         struct mif_device *v;
780         struct net_device *dev;
781         struct inet6_dev *in6_dev;
782 
783         if (vifi < 0 || vifi >= mrt->maxvif)
784                 return -EADDRNOTAVAIL;
785 
786         v = &mrt->vif6_table[vifi];
787 
788         write_lock_bh(&mrt_lock);
789         dev = v->dev;
790         v->dev = NULL;
791 
792         if (!dev) {
793                 write_unlock_bh(&mrt_lock);
794                 return -EADDRNOTAVAIL;
795         }
796 
797 #ifdef CONFIG_IPV6_PIMSM_V2
798         if (vifi == mrt->mroute_reg_vif_num)
799                 mrt->mroute_reg_vif_num = -1;
800 #endif
801 
802         if (vifi + 1 == mrt->maxvif) {
803                 int tmp;
804                 for (tmp = vifi - 1; tmp >= 0; tmp--) {
805                         if (MIF_EXISTS(mrt, tmp))
806                                 break;
807                 }
808                 mrt->maxvif = tmp + 1;
809         }
810 
811         write_unlock_bh(&mrt_lock);
812 
813         dev_set_allmulti(dev, -1);
814 
815         in6_dev = __in6_dev_get(dev);
816         if (in6_dev) {
817                 in6_dev->cnf.mc_forwarding--;
818                 inet6_netconf_notify_devconf(dev_net(dev),
819                                              NETCONFA_MC_FORWARDING,
820                                              dev->ifindex, &in6_dev->cnf);
821         }
822 
823         if (v->flags & MIFF_REGISTER)
824                 unregister_netdevice_queue(dev, head);
825 
826         dev_put(dev);
827         return 0;
828 }
829 
830 static inline void ip6mr_cache_free(struct mfc6_cache *c)
831 {
832         kmem_cache_free(mrt_cachep, c);
833 }
834 
835 /* Destroy an unresolved cache entry, killing queued skbs
836    and reporting error to netlink readers.
837  */
838 
839 static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
840 {
841         struct net *net = read_pnet(&mrt->net);
842         struct sk_buff *skb;
843 
844         atomic_dec(&mrt->cache_resolve_queue_len);
845 
846         while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
847                 if (ipv6_hdr(skb)->version == 0) {
848                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
849                         nlh->nlmsg_type = NLMSG_ERROR;
850                         nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
851                         skb_trim(skb, nlh->nlmsg_len);
852                         ((struct nlmsgerr *)nlmsg_data(nlh))->error = -ETIMEDOUT;
853                         rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
854                 } else
855                         kfree_skb(skb);
856         }
857 
858         ip6mr_cache_free(c);
859 }
860 
861 
862 /* Timer process for all the unresolved queue. */
863 
864 static void ipmr_do_expire_process(struct mr6_table *mrt)
865 {
866         unsigned long now = jiffies;
867         unsigned long expires = 10 * HZ;
868         struct mfc6_cache *c, *next;
869 
870         list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
871                 if (time_after(c->mfc_un.unres.expires, now)) {
872                         /* not yet... */
873                         unsigned long interval = c->mfc_un.unres.expires - now;
874                         if (interval < expires)
875                                 expires = interval;
876                         continue;
877                 }
878 
879                 list_del(&c->list);
880                 mr6_netlink_event(mrt, c, RTM_DELROUTE);
881                 ip6mr_destroy_unres(mrt, c);
882         }
883 
884         if (!list_empty(&mrt->mfc6_unres_queue))
885                 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
886 }
887 
888 static void ipmr_expire_process(unsigned long arg)
889 {
890         struct mr6_table *mrt = (struct mr6_table *)arg;
891 
892         if (!spin_trylock(&mfc_unres_lock)) {
893                 mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
894                 return;
895         }
896 
897         if (!list_empty(&mrt->mfc6_unres_queue))
898                 ipmr_do_expire_process(mrt);
899 
900         spin_unlock(&mfc_unres_lock);
901 }
902 
903 /* Fill oifs list. It is called under write locked mrt_lock. */
904 
905 static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
906                                     unsigned char *ttls)
907 {
908         int vifi;
909 
910         cache->mfc_un.res.minvif = MAXMIFS;
911         cache->mfc_un.res.maxvif = 0;
912         memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
913 
914         for (vifi = 0; vifi < mrt->maxvif; vifi++) {
915                 if (MIF_EXISTS(mrt, vifi) &&
916                     ttls[vifi] && ttls[vifi] < 255) {
917                         cache->mfc_un.res.ttls[vifi] = ttls[vifi];
918                         if (cache->mfc_un.res.minvif > vifi)
919                                 cache->mfc_un.res.minvif = vifi;
920                         if (cache->mfc_un.res.maxvif <= vifi)
921                                 cache->mfc_un.res.maxvif = vifi + 1;
922                 }
923         }
924 }
925 
926 static int mif6_add(struct net *net, struct mr6_table *mrt,
927                     struct mif6ctl *vifc, int mrtsock)
928 {
929         int vifi = vifc->mif6c_mifi;
930         struct mif_device *v = &mrt->vif6_table[vifi];
931         struct net_device *dev;
932         struct inet6_dev *in6_dev;
933         int err;
934 
935         /* Is vif busy ? */
936         if (MIF_EXISTS(mrt, vifi))
937                 return -EADDRINUSE;
938 
939         switch (vifc->mif6c_flags) {
940 #ifdef CONFIG_IPV6_PIMSM_V2
941         case MIFF_REGISTER:
942                 /*
943                  * Special Purpose VIF in PIM
944                  * All the packets will be sent to the daemon
945                  */
946                 if (mrt->mroute_reg_vif_num >= 0)
947                         return -EADDRINUSE;
948                 dev = ip6mr_reg_vif(net, mrt);
949                 if (!dev)
950                         return -ENOBUFS;
951                 err = dev_set_allmulti(dev, 1);
952                 if (err) {
953                         unregister_netdevice(dev);
954                         dev_put(dev);
955                         return err;
956                 }
957                 break;
958 #endif
959         case 0:
960                 dev = dev_get_by_index(net, vifc->mif6c_pifi);
961                 if (!dev)
962                         return -EADDRNOTAVAIL;
963                 err = dev_set_allmulti(dev, 1);
964                 if (err) {
965                         dev_put(dev);
966                         return err;
967                 }
968                 break;
969         default:
970                 return -EINVAL;
971         }
972 
973         in6_dev = __in6_dev_get(dev);
974         if (in6_dev) {
975                 in6_dev->cnf.mc_forwarding++;
976                 inet6_netconf_notify_devconf(dev_net(dev),
977                                              NETCONFA_MC_FORWARDING,
978                                              dev->ifindex, &in6_dev->cnf);
979         }
980 
981         /*
982          *      Fill in the VIF structures
983          */
984         v->rate_limit = vifc->vifc_rate_limit;
985         v->flags = vifc->mif6c_flags;
986         if (!mrtsock)
987                 v->flags |= VIFF_STATIC;
988         v->threshold = vifc->vifc_threshold;
989         v->bytes_in = 0;
990         v->bytes_out = 0;
991         v->pkt_in = 0;
992         v->pkt_out = 0;
993         v->link = dev->ifindex;
994         if (v->flags & MIFF_REGISTER)
995                 v->link = dev_get_iflink(dev);
996 
997         /* And finish update writing critical data */
998         write_lock_bh(&mrt_lock);
999         v->dev = dev;
1000 #ifdef CONFIG_IPV6_PIMSM_V2
1001         if (v->flags & MIFF_REGISTER)
1002                 mrt->mroute_reg_vif_num = vifi;
1003 #endif
1004         if (vifi + 1 > mrt->maxvif)
1005                 mrt->maxvif = vifi + 1;
1006         write_unlock_bh(&mrt_lock);
1007         return 0;
1008 }
1009 
1010 static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
1011                                            const struct in6_addr *origin,
1012                                            const struct in6_addr *mcastgrp)
1013 {
1014         int line = MFC6_HASH(mcastgrp, origin);
1015         struct mfc6_cache *c;
1016 
1017         list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1018                 if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
1019                     ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
1020                         return c;
1021         }
1022         return NULL;
1023 }
1024 
1025 /* Look for a (*,*,oif) entry */
1026 static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
1027                                                       mifi_t mifi)
1028 {
1029         int line = MFC6_HASH(&in6addr_any, &in6addr_any);
1030         struct mfc6_cache *c;
1031 
1032         list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1033                 if (ipv6_addr_any(&c->mf6c_origin) &&
1034                     ipv6_addr_any(&c->mf6c_mcastgrp) &&
1035                     (c->mfc_un.res.ttls[mifi] < 255))
1036                         return c;
1037 
1038         return NULL;
1039 }
1040 
1041 /* Look for a (*,G) entry */
1042 static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
1043                                                struct in6_addr *mcastgrp,
1044                                                mifi_t mifi)
1045 {
1046         int line = MFC6_HASH(mcastgrp, &in6addr_any);
1047         struct mfc6_cache *c, *proxy;
1048 
1049         if (ipv6_addr_any(mcastgrp))
1050                 goto skip;
1051 
1052         list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
1053                 if (ipv6_addr_any(&c->mf6c_origin) &&
1054                     ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
1055                         if (c->mfc_un.res.ttls[mifi] < 255)
1056                                 return c;
1057 
1058                         /* It's ok if the mifi is part of the static tree */
1059                         proxy = ip6mr_cache_find_any_parent(mrt,
1060                                                             c->mf6c_parent);
1061                         if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
1062                                 return c;
1063                 }
1064 
1065 skip:
1066         return ip6mr_cache_find_any_parent(mrt, mifi);
1067 }
1068 
1069 /*
1070  *      Allocate a multicast cache entry
1071  */
1072 static struct mfc6_cache *ip6mr_cache_alloc(void)
1073 {
1074         struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
1075         if (!c)
1076                 return NULL;
1077         c->mfc_un.res.minvif = MAXMIFS;
1078         return c;
1079 }
1080 
1081 static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
1082 {
1083         struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
1084         if (!c)
1085                 return NULL;
1086         skb_queue_head_init(&c->mfc_un.unres.unresolved);
1087         c->mfc_un.unres.expires = jiffies + 10 * HZ;
1088         return c;
1089 }
1090 
1091 /*
1092  *      A cache entry has gone into a resolved state from queued
1093  */
1094 
1095 static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
1096                                 struct mfc6_cache *uc, struct mfc6_cache *c)
1097 {
1098         struct sk_buff *skb;
1099 
1100         /*
1101          *      Play the pending entries through our router
1102          */
1103 
1104         while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
1105                 if (ipv6_hdr(skb)->version == 0) {
1106                         struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
1107 
1108                         if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
1109                                 nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
1110                         } else {
1111                                 nlh->nlmsg_type = NLMSG_ERROR;
1112                                 nlh->nlmsg_len = nlmsg_msg_size(sizeof(struct nlmsgerr));
1113                                 skb_trim(skb, nlh->nlmsg_len);
1114                                 ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE;
1115                         }
1116                         rtnl_unicast(skb, net, NETLINK_CB(skb).portid);
1117                 } else
1118                         ip6_mr_forward(net, mrt, skb, c);
1119         }
1120 }
1121 
1122 /*
1123  *      Bounce a cache query up to pim6sd. We could use netlink for this but pim6sd
1124  *      expects the following bizarre scheme.
1125  *
1126  *      Called under mrt_lock.
1127  */
1128 
1129 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
1130                               mifi_t mifi, int assert)
1131 {
1132         struct sk_buff *skb;
1133         struct mrt6msg *msg;
1134         int ret;
1135 
1136 #ifdef CONFIG_IPV6_PIMSM_V2
1137         if (assert == MRT6MSG_WHOLEPKT)
1138                 skb = skb_realloc_headroom(pkt, -skb_network_offset(pkt)
1139                                                 +sizeof(*msg));
1140         else
1141 #endif
1142                 skb = alloc_skb(sizeof(struct ipv6hdr) + sizeof(*msg), GFP_ATOMIC);
1143 
1144         if (!skb)
1145                 return -ENOBUFS;
1146 
1147         /* I suppose that internal messages
1148          * do not require checksums */
1149 
1150         skb->ip_summed = CHECKSUM_UNNECESSARY;
1151 
1152 #ifdef CONFIG_IPV6_PIMSM_V2
1153         if (assert == MRT6MSG_WHOLEPKT) {
1154                 /* Ugly, but we have no choice with this interface.
1155                    Duplicate old header, fix length etc.
1156                    And all this only to mangle msg->im6_msgtype and
1157                    to set msg->im6_mbz to "mbz" :-)
1158                  */
1159                 skb_push(skb, -skb_network_offset(pkt));
1160 
1161                 skb_push(skb, sizeof(*msg));
1162                 skb_reset_transport_header(skb);
1163                 msg = (struct mrt6msg *)skb_transport_header(skb);
1164                 msg->im6_mbz = 0;
1165                 msg->im6_msgtype = MRT6MSG_WHOLEPKT;
1166                 msg->im6_mif = mrt->mroute_reg_vif_num;
1167                 msg->im6_pad = 0;
1168                 msg->im6_src = ipv6_hdr(pkt)->saddr;
1169                 msg->im6_dst = ipv6_hdr(pkt)->daddr;
1170 
1171                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1172         } else
1173 #endif
1174         {
1175         /*
1176          *      Copy the IP header
1177          */
1178 
1179         skb_put(skb, sizeof(struct ipv6hdr));
1180         skb_reset_network_header(skb);
1181         skb_copy_to_linear_data(skb, ipv6_hdr(pkt), sizeof(struct ipv6hdr));
1182 
1183         /*
1184          *      Add our header
1185          */
1186         skb_put(skb, sizeof(*msg));
1187         skb_reset_transport_header(skb);
1188         msg = (struct mrt6msg *)skb_transport_header(skb);
1189 
1190         msg->im6_mbz = 0;
1191         msg->im6_msgtype = assert;
1192         msg->im6_mif = mifi;
1193         msg->im6_pad = 0;
1194         msg->im6_src = ipv6_hdr(pkt)->saddr;
1195         msg->im6_dst = ipv6_hdr(pkt)->daddr;
1196 
1197         skb_dst_set(skb, dst_clone(skb_dst(pkt)));
1198         skb->ip_summed = CHECKSUM_UNNECESSARY;
1199         }
1200 
1201         if (!mrt->mroute6_sk) {
1202                 kfree_skb(skb);
1203                 return -EINVAL;
1204         }
1205 
1206         /*
1207          *      Deliver to user space multicast routing algorithms
1208          */
1209         ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
1210         if (ret < 0) {
1211                 net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
1212                 kfree_skb(skb);
1213         }
1214 
1215         return ret;
1216 }
1217 
1218 /*
1219  *      Queue a packet for resolution. It gets locked cache entry!
1220  */
1221 
1222 static int
1223 ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
1224 {
1225         bool found = false;
1226         int err;
1227         struct mfc6_cache *c;
1228 
1229         spin_lock_bh(&mfc_unres_lock);
1230         list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
1231                 if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
1232                     ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
1233                         found = true;
1234                         break;
1235                 }
1236         }
1237 
1238         if (!found) {
1239                 /*
1240                  *      Create a new entry if allowable
1241                  */
1242 
1243                 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
1244                     (c = ip6mr_cache_alloc_unres()) == NULL) {
1245                         spin_unlock_bh(&mfc_unres_lock);
1246 
1247                         kfree_skb(skb);
1248                         return -ENOBUFS;
1249                 }
1250 
1251                 /*
1252                  *      Fill in the new cache entry
1253                  */
1254                 c->mf6c_parent = -1;
1255                 c->mf6c_origin = ipv6_hdr(skb)->saddr;
1256                 c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
1257 
1258                 /*
1259                  *      Reflect first query at pim6sd
1260                  */
1261                 err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
1262                 if (err < 0) {
1263                         /* If the report failed throw the cache entry
1264                            out - Brad Parker
1265                          */
1266                         spin_unlock_bh(&mfc_unres_lock);
1267 
1268                         ip6mr_cache_free(c);
1269                         kfree_skb(skb);
1270                         return err;
1271                 }
1272 
1273                 atomic_inc(&mrt->cache_resolve_queue_len);
1274                 list_add(&c->list, &mrt->mfc6_unres_queue);
1275                 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1276 
1277                 ipmr_do_expire_process(mrt);
1278         }
1279 
1280         /*
1281          *      See if we can append the packet
1282          */
1283         if (c->mfc_un.unres.unresolved.qlen > 3) {
1284                 kfree_skb(skb);
1285                 err = -ENOBUFS;
1286         } else {
1287                 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1288                 err = 0;
1289         }
1290 
1291         spin_unlock_bh(&mfc_unres_lock);
1292         return err;
1293 }
1294 
1295 /*
1296  *      MFC6 cache manipulation by user space
1297  */
1298 
1299 static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
1300                             int parent)
1301 {
1302         int line;
1303         struct mfc6_cache *c, *next;
1304 
1305         line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1306 
1307         list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
1308                 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1309                     ipv6_addr_equal(&c->mf6c_mcastgrp,
1310                                     &mfc->mf6cc_mcastgrp.sin6_addr) &&
1311                     (parent == -1 || parent == c->mf6c_parent)) {
1312                         write_lock_bh(&mrt_lock);
1313                         list_del(&c->list);
1314                         write_unlock_bh(&mrt_lock);
1315 
1316                         mr6_netlink_event(mrt, c, RTM_DELROUTE);
1317                         ip6mr_cache_free(c);
1318                         return 0;
1319                 }
1320         }
1321         return -ENOENT;
1322 }
1323 
1324 static int ip6mr_device_event(struct notifier_block *this,
1325                               unsigned long event, void *ptr)
1326 {
1327         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
1328         struct net *net = dev_net(dev);
1329         struct mr6_table *mrt;
1330         struct mif_device *v;
1331         int ct;
1332         LIST_HEAD(list);
1333 
1334         if (event != NETDEV_UNREGISTER)
1335                 return NOTIFY_DONE;
1336 
1337         ip6mr_for_each_table(mrt, net) {
1338                 v = &mrt->vif6_table[0];
1339                 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1340                         if (v->dev == dev)
1341                                 mif6_delete(mrt, ct, &list);
1342                 }
1343         }
1344         unregister_netdevice_many(&list);
1345 
1346         return NOTIFY_DONE;
1347 }
1348 
1349 static struct notifier_block ip6_mr_notifier = {
1350         .notifier_call = ip6mr_device_event
1351 };
1352 
1353 /*
1354  *      Setup for IP multicast routing
1355  */
1356 
1357 static int __net_init ip6mr_net_init(struct net *net)
1358 {
1359         int err;
1360 
1361         err = ip6mr_rules_init(net);
1362         if (err < 0)
1363                 goto fail;
1364 
1365 #ifdef CONFIG_PROC_FS
1366         err = -ENOMEM;
1367         if (!proc_create("ip6_mr_vif", 0, net->proc_net, &ip6mr_vif_fops))
1368                 goto proc_vif_fail;
1369         if (!proc_create("ip6_mr_cache", 0, net->proc_net, &ip6mr_mfc_fops))
1370                 goto proc_cache_fail;
1371 #endif
1372 
1373         return 0;
1374 
1375 #ifdef CONFIG_PROC_FS
1376 proc_cache_fail:
1377         remove_proc_entry("ip6_mr_vif", net->proc_net);
1378 proc_vif_fail:
1379         ip6mr_rules_exit(net);
1380 #endif
1381 fail:
1382         return err;
1383 }
1384 
1385 static void __net_exit ip6mr_net_exit(struct net *net)
1386 {
1387 #ifdef CONFIG_PROC_FS
1388         remove_proc_entry("ip6_mr_cache", net->proc_net);
1389         remove_proc_entry("ip6_mr_vif", net->proc_net);
1390 #endif
1391         ip6mr_rules_exit(net);
1392 }
1393 
1394 static struct pernet_operations ip6mr_net_ops = {
1395         .init = ip6mr_net_init,
1396         .exit = ip6mr_net_exit,
1397 };
1398 
1399 int __init ip6_mr_init(void)
1400 {
1401         int err;
1402 
1403         mrt_cachep = kmem_cache_create("ip6_mrt_cache",
1404                                        sizeof(struct mfc6_cache),
1405                                        0, SLAB_HWCACHE_ALIGN,
1406                                        NULL);
1407         if (!mrt_cachep)
1408                 return -ENOMEM;
1409 
1410         err = register_pernet_subsys(&ip6mr_net_ops);
1411         if (err)
1412                 goto reg_pernet_fail;
1413 
1414         err = register_netdevice_notifier(&ip6_mr_notifier);
1415         if (err)
1416                 goto reg_notif_fail;
1417 #ifdef CONFIG_IPV6_PIMSM_V2
1418         if (inet6_add_protocol(&pim6_protocol, IPPROTO_PIM) < 0) {
1419                 pr_err("%s: can't add PIM protocol\n", __func__);
1420                 err = -EAGAIN;
1421                 goto add_proto_fail;
1422         }
1423 #endif
1424         rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL,
1425                       ip6mr_rtm_dumproute, NULL);
1426         return 0;
1427 #ifdef CONFIG_IPV6_PIMSM_V2
1428 add_proto_fail:
1429         unregister_netdevice_notifier(&ip6_mr_notifier);
1430 #endif
1431 reg_notif_fail:
1432         unregister_pernet_subsys(&ip6mr_net_ops);
1433 reg_pernet_fail:
1434         kmem_cache_destroy(mrt_cachep);
1435         return err;
1436 }
1437 
1438 void ip6_mr_cleanup(void)
1439 {
1440         rtnl_unregister(RTNL_FAMILY_IP6MR, RTM_GETROUTE);
1441 #ifdef CONFIG_IPV6_PIMSM_V2
1442         inet6_del_protocol(&pim6_protocol, IPPROTO_PIM);
1443 #endif
1444         unregister_netdevice_notifier(&ip6_mr_notifier);
1445         unregister_pernet_subsys(&ip6mr_net_ops);
1446         kmem_cache_destroy(mrt_cachep);
1447 }
1448 
1449 static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
1450                          struct mf6cctl *mfc, int mrtsock, int parent)
1451 {
1452         bool found = false;
1453         int line;
1454         struct mfc6_cache *uc, *c;
1455         unsigned char ttls[MAXMIFS];
1456         int i;
1457 
1458         if (mfc->mf6cc_parent >= MAXMIFS)
1459                 return -ENFILE;
1460 
1461         memset(ttls, 255, MAXMIFS);
1462         for (i = 0; i < MAXMIFS; i++) {
1463                 if (IF_ISSET(i, &mfc->mf6cc_ifset))
1464                         ttls[i] = 1;
1465 
1466         }
1467 
1468         line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
1469 
1470         list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
1471                 if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
1472                     ipv6_addr_equal(&c->mf6c_mcastgrp,
1473                                     &mfc->mf6cc_mcastgrp.sin6_addr) &&
1474                     (parent == -1 || parent == mfc->mf6cc_parent)) {
1475                         found = true;
1476                         break;
1477                 }
1478         }
1479 
1480         if (found) {
1481                 write_lock_bh(&mrt_lock);
1482                 c->mf6c_parent = mfc->mf6cc_parent;
1483                 ip6mr_update_thresholds(mrt, c, ttls);
1484                 if (!mrtsock)
1485                         c->mfc_flags |= MFC_STATIC;
1486                 write_unlock_bh(&mrt_lock);
1487                 mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1488                 return 0;
1489         }
1490 
1491         if (!ipv6_addr_any(&mfc->mf6cc_mcastgrp.sin6_addr) &&
1492             !ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
1493                 return -EINVAL;
1494 
1495         c = ip6mr_cache_alloc();
1496         if (!c)
1497                 return -ENOMEM;
1498 
1499         c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
1500         c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
1501         c->mf6c_parent = mfc->mf6cc_parent;
1502         ip6mr_update_thresholds(mrt, c, ttls);
1503         if (!mrtsock)
1504                 c->mfc_flags |= MFC_STATIC;
1505 
1506         write_lock_bh(&mrt_lock);
1507         list_add(&c->list, &mrt->mfc6_cache_array[line]);
1508         write_unlock_bh(&mrt_lock);
1509 
1510         /*
1511          *      Check to see if we resolved a queued list. If so we
1512          *      need to send on the frames and tidy up.
1513          */
1514         found = false;
1515         spin_lock_bh(&mfc_unres_lock);
1516         list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
1517                 if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
1518                     ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
1519                         list_del(&uc->list);
1520                         atomic_dec(&mrt->cache_resolve_queue_len);
1521                         found = true;
1522                         break;
1523                 }
1524         }
1525         if (list_empty(&mrt->mfc6_unres_queue))
1526                 del_timer(&mrt->ipmr_expire_timer);
1527         spin_unlock_bh(&mfc_unres_lock);
1528 
1529         if (found) {
1530                 ip6mr_cache_resolve(net, mrt, uc, c);
1531                 ip6mr_cache_free(uc);
1532         }
1533         mr6_netlink_event(mrt, c, RTM_NEWROUTE);
1534         return 0;
1535 }
1536 
1537 /*
1538  *      Close the multicast socket, and clear the vif tables etc
1539  */
1540 
1541 static void mroute_clean_tables(struct mr6_table *mrt, bool all)
1542 {
1543         int i;
1544         LIST_HEAD(list);
1545         struct mfc6_cache *c, *next;
1546 
1547         /*
1548          *      Shut down all active vif entries
1549          */
1550         for (i = 0; i < mrt->maxvif; i++) {
1551                 if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
1552                         continue;
1553                 mif6_delete(mrt, i, &list);
1554         }
1555         unregister_netdevice_many(&list);
1556 
1557         /*
1558          *      Wipe the cache
1559          */
1560         for (i = 0; i < MFC6_LINES; i++) {
1561                 list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
1562                         if (!all && (c->mfc_flags & MFC_STATIC))
1563                                 continue;
1564                         write_lock_bh(&mrt_lock);
1565                         list_del(&c->list);
1566                         write_unlock_bh(&mrt_lock);
1567 
1568                         mr6_netlink_event(mrt, c, RTM_DELROUTE);
1569                         ip6mr_cache_free(c);
1570                 }
1571         }
1572 
1573         if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1574                 spin_lock_bh(&mfc_unres_lock);
1575                 list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
1576                         list_del(&c->list);
1577                         mr6_netlink_event(mrt, c, RTM_DELROUTE);
1578                         ip6mr_destroy_unres(mrt, c);
1579                 }
1580                 spin_unlock_bh(&mfc_unres_lock);
1581         }
1582 }
1583 
1584 static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
1585 {
1586         int err = 0;
1587         struct net *net = sock_net(sk);
1588 
1589         rtnl_lock();
1590         write_lock_bh(&mrt_lock);
1591         if (likely(mrt->mroute6_sk == NULL)) {
1592                 mrt->mroute6_sk = sk;
1593                 net->ipv6.devconf_all->mc_forwarding++;
1594                 inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING,
1595                                              NETCONFA_IFINDEX_ALL,
1596                                              net->ipv6.devconf_all);
1597         }
1598         else
1599                 err = -EADDRINUSE;
1600         write_unlock_bh(&mrt_lock);
1601 
1602         rtnl_unlock();
1603 
1604         return err;
1605 }
1606 
1607 int ip6mr_sk_done(struct sock *sk)
1608 {
1609         int err = -EACCES;
1610         struct net *net = sock_net(sk);
1611         struct mr6_table *mrt;
1612 
1613         rtnl_lock();
1614         ip6mr_for_each_table(mrt, net) {
1615                 if (sk == mrt->mroute6_sk) {
1616                         write_lock_bh(&mrt_lock);
1617                         mrt->mroute6_sk = NULL;
1618                         net->ipv6.devconf_all->mc_forwarding--;
1619                         inet6_netconf_notify_devconf(net,
1620                                                      NETCONFA_MC_FORWARDING,
1621                                                      NETCONFA_IFINDEX_ALL,
1622                                                      net->ipv6.devconf_all);
1623                         write_unlock_bh(&mrt_lock);
1624 
1625                         mroute_clean_tables(mrt, false);
1626                         err = 0;
1627                         break;
1628                 }
1629         }
1630         rtnl_unlock();
1631 
1632         return err;
1633 }
1634 
1635 struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
1636 {
1637         struct mr6_table *mrt;
1638         struct flowi6 fl6 = {
1639                 .flowi6_iif     = skb->skb_iif ? : LOOPBACK_IFINDEX,
1640                 .flowi6_oif     = skb->dev->ifindex,
1641                 .flowi6_mark    = skb->mark,
1642         };
1643 
1644         if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
1645                 return NULL;
1646 
1647         return mrt->mroute6_sk;
1648 }
1649 
1650 /*
1651  *      Socket options and virtual interface manipulation. The whole
1652  *      virtual interface system is a complete heap, but unfortunately
1653  *      that's how BSD mrouted happens to think. Maybe one day with a proper
1654  *      MOSPF/PIM router set up we can clean this up.
1655  */
1656 
1657 int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1658 {
1659         int ret, parent = 0;
1660         struct mif6ctl vif;
1661         struct mf6cctl mfc;
1662         mifi_t mifi;
1663         struct net *net = sock_net(sk);
1664         struct mr6_table *mrt;
1665 
1666         mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1667         if (!mrt)
1668                 return -ENOENT;
1669 
1670         if (optname != MRT6_INIT) {
1671                 if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
1672                         return -EACCES;
1673         }
1674 
1675         switch (optname) {
1676         case MRT6_INIT:
1677                 if (sk->sk_type != SOCK_RAW ||
1678                     inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
1679                         return -EOPNOTSUPP;
1680                 if (optlen < sizeof(int))
1681                         return -EINVAL;
1682 
1683                 return ip6mr_sk_init(mrt, sk);
1684 
1685         case MRT6_DONE:
1686                 return ip6mr_sk_done(sk);
1687 
1688         case MRT6_ADD_MIF:
1689                 if (optlen < sizeof(vif))
1690                         return -EINVAL;
1691                 if (copy_from_user(&vif, optval, sizeof(vif)))
1692                         return -EFAULT;
1693                 if (vif.mif6c_mifi >= MAXMIFS)
1694                         return -ENFILE;
1695                 rtnl_lock();
1696                 ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
1697                 rtnl_unlock();
1698                 return ret;
1699 
1700         case MRT6_DEL_MIF:
1701                 if (optlen < sizeof(mifi_t))
1702                         return -EINVAL;
1703                 if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
1704                         return -EFAULT;
1705                 rtnl_lock();
1706                 ret = mif6_delete(mrt, mifi, NULL);
1707                 rtnl_unlock();
1708                 return ret;
1709 
1710         /*
1711          *      Manipulate the forwarding caches. These live
1712          *      in a sort of kernel/user symbiosis.
1713          */
1714         case MRT6_ADD_MFC:
1715         case MRT6_DEL_MFC:
1716                 parent = -1;
1717         case MRT6_ADD_MFC_PROXY:
1718         case MRT6_DEL_MFC_PROXY:
1719                 if (optlen < sizeof(mfc))
1720                         return -EINVAL;
1721                 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1722                         return -EFAULT;
1723                 if (parent == 0)
1724                         parent = mfc.mf6cc_parent;
1725                 rtnl_lock();
1726                 if (optname == MRT6_DEL_MFC || optname == MRT6_DEL_MFC_PROXY)
1727                         ret = ip6mr_mfc_delete(mrt, &mfc, parent);
1728                 else
1729                         ret = ip6mr_mfc_add(net, mrt, &mfc,
1730                                             sk == mrt->mroute6_sk, parent);
1731                 rtnl_unlock();
1732                 return ret;
1733 
1734         /*
1735          *      Control PIM assert (to activate pim will activate assert)
1736          */
1737         case MRT6_ASSERT:
1738         {
1739                 int v;
1740 
1741                 if (optlen != sizeof(v))
1742                         return -EINVAL;
1743                 if (get_user(v, (int __user *)optval))
1744                         return -EFAULT;
1745                 mrt->mroute_do_assert = v;
1746                 return 0;
1747         }
1748 
1749 #ifdef CONFIG_IPV6_PIMSM_V2
1750         case MRT6_PIM:
1751         {
1752                 int v;
1753 
1754                 if (optlen != sizeof(v))
1755                         return -EINVAL;
1756                 if (get_user(v, (int __user *)optval))
1757                         return -EFAULT;
1758                 v = !!v;
1759                 rtnl_lock();
1760                 ret = 0;
1761                 if (v != mrt->mroute_do_pim) {
1762                         mrt->mroute_do_pim = v;
1763                         mrt->mroute_do_assert = v;
1764                 }
1765                 rtnl_unlock();
1766                 return ret;
1767         }
1768 
1769 #endif
1770 #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
1771         case MRT6_TABLE:
1772         {
1773                 u32 v;
1774 
1775                 if (optlen != sizeof(u32))
1776                         return -EINVAL;
1777                 if (get_user(v, (u32 __user *)optval))
1778                         return -EFAULT;
1779                 /* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
1780                 if (v != RT_TABLE_DEFAULT && v >= 100000000)
1781                         return -EINVAL;
1782                 if (sk == mrt->mroute6_sk)
1783                         return -EBUSY;
1784 
1785                 rtnl_lock();
1786                 ret = 0;
1787                 if (!ip6mr_new_table(net, v))
1788                         ret = -ENOMEM;
1789                 raw6_sk(sk)->ip6mr_table = v;
1790                 rtnl_unlock();
1791                 return ret;
1792         }
1793 #endif
1794         /*
1795          *      Spurious command, or MRT6_VERSION which you cannot
1796          *      set.
1797          */
1798         default:
1799                 return -ENOPROTOOPT;
1800         }
1801 }
1802 
1803 /*
1804  *      Getsock opt support for the multicast routing system.
1805  */
1806 
1807 int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
1808                           int __user *optlen)
1809 {
1810         int olr;
1811         int val;
1812         struct net *net = sock_net(sk);
1813         struct mr6_table *mrt;
1814 
1815         mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1816         if (!mrt)
1817                 return -ENOENT;
1818 
1819         switch (optname) {
1820         case MRT6_VERSION:
1821                 val = 0x0305;
1822                 break;
1823 #ifdef CONFIG_IPV6_PIMSM_V2
1824         case MRT6_PIM:
1825                 val = mrt->mroute_do_pim;
1826                 break;
1827 #endif
1828         case MRT6_ASSERT:
1829                 val = mrt->mroute_do_assert;
1830                 break;
1831         default:
1832                 return -ENOPROTOOPT;
1833         }
1834 
1835         if (get_user(olr, optlen))
1836                 return -EFAULT;
1837 
1838         olr = min_t(int, olr, sizeof(int));
1839         if (olr < 0)
1840                 return -EINVAL;
1841 
1842         if (put_user(olr, optlen))
1843                 return -EFAULT;
1844         if (copy_to_user(optval, &val, olr))
1845                 return -EFAULT;
1846         return 0;
1847 }
1848 
1849 /*
1850  *      The IP multicast ioctl support routines.
1851  */
1852 
1853 int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
1854 {
1855         struct sioc_sg_req6 sr;
1856         struct sioc_mif_req6 vr;
1857         struct mif_device *vif;
1858         struct mfc6_cache *c;
1859         struct net *net = sock_net(sk);
1860         struct mr6_table *mrt;
1861 
1862         mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1863         if (!mrt)
1864                 return -ENOENT;
1865 
1866         switch (cmd) {
1867         case SIOCGETMIFCNT_IN6:
1868                 if (copy_from_user(&vr, arg, sizeof(vr)))
1869                         return -EFAULT;
1870                 if (vr.mifi >= mrt->maxvif)
1871                         return -EINVAL;
1872                 read_lock(&mrt_lock);
1873                 vif = &mrt->vif6_table[vr.mifi];
1874                 if (MIF_EXISTS(mrt, vr.mifi)) {
1875                         vr.icount = vif->pkt_in;
1876                         vr.ocount = vif->pkt_out;
1877                         vr.ibytes = vif->bytes_in;
1878                         vr.obytes = vif->bytes_out;
1879                         read_unlock(&mrt_lock);
1880 
1881                         if (copy_to_user(arg, &vr, sizeof(vr)))
1882                                 return -EFAULT;
1883                         return 0;
1884                 }
1885                 read_unlock(&mrt_lock);
1886                 return -EADDRNOTAVAIL;
1887         case SIOCGETSGCNT_IN6:
1888                 if (copy_from_user(&sr, arg, sizeof(sr)))
1889                         return -EFAULT;
1890 
1891                 read_lock(&mrt_lock);
1892                 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1893                 if (c) {
1894                         sr.pktcnt = c->mfc_un.res.pkt;
1895                         sr.bytecnt = c->mfc_un.res.bytes;
1896                         sr.wrong_if = c->mfc_un.res.wrong_if;
1897                         read_unlock(&mrt_lock);
1898 
1899                         if (copy_to_user(arg, &sr, sizeof(sr)))
1900                                 return -EFAULT;
1901                         return 0;
1902                 }
1903                 read_unlock(&mrt_lock);
1904                 return -EADDRNOTAVAIL;
1905         default:
1906                 return -ENOIOCTLCMD;
1907         }
1908 }
1909 
1910 #ifdef CONFIG_COMPAT
1911 struct compat_sioc_sg_req6 {
1912         struct sockaddr_in6 src;
1913         struct sockaddr_in6 grp;
1914         compat_ulong_t pktcnt;
1915         compat_ulong_t bytecnt;
1916         compat_ulong_t wrong_if;
1917 };
1918 
1919 struct compat_sioc_mif_req6 {
1920         mifi_t  mifi;
1921         compat_ulong_t icount;
1922         compat_ulong_t ocount;
1923         compat_ulong_t ibytes;
1924         compat_ulong_t obytes;
1925 };
1926 
1927 int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
1928 {
1929         struct compat_sioc_sg_req6 sr;
1930         struct compat_sioc_mif_req6 vr;
1931         struct mif_device *vif;
1932         struct mfc6_cache *c;
1933         struct net *net = sock_net(sk);
1934         struct mr6_table *mrt;
1935 
1936         mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
1937         if (!mrt)
1938                 return -ENOENT;
1939 
1940         switch (cmd) {
1941         case SIOCGETMIFCNT_IN6:
1942                 if (copy_from_user(&vr, arg, sizeof(vr)))
1943                         return -EFAULT;
1944                 if (vr.mifi >= mrt->maxvif)
1945                         return -EINVAL;
1946                 read_lock(&mrt_lock);
1947                 vif = &mrt->vif6_table[vr.mifi];
1948                 if (MIF_EXISTS(mrt, vr.mifi)) {
1949                         vr.icount = vif->pkt_in;
1950                         vr.ocount = vif->pkt_out;
1951                         vr.ibytes = vif->bytes_in;
1952                         vr.obytes = vif->bytes_out;
1953                         read_unlock(&mrt_lock);
1954 
1955                         if (copy_to_user(arg, &vr, sizeof(vr)))
1956                                 return -EFAULT;
1957                         return 0;
1958                 }
1959                 read_unlock(&mrt_lock);
1960                 return -EADDRNOTAVAIL;
1961         case SIOCGETSGCNT_IN6:
1962                 if (copy_from_user(&sr, arg, sizeof(sr)))
1963                         return -EFAULT;
1964 
1965                 read_lock(&mrt_lock);
1966                 c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
1967                 if (c) {
1968                         sr.pktcnt = c->mfc_un.res.pkt;
1969                         sr.bytecnt = c->mfc_un.res.bytes;
1970                         sr.wrong_if = c->mfc_un.res.wrong_if;
1971                         read_unlock(&mrt_lock);
1972 
1973                         if (copy_to_user(arg, &sr, sizeof(sr)))
1974                                 return -EFAULT;
1975                         return 0;
1976                 }
1977                 read_unlock(&mrt_lock);
1978                 return -EADDRNOTAVAIL;
1979         default:
1980                 return -ENOIOCTLCMD;
1981         }
1982 }
1983 #endif
1984 
1985 static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
1986 {
1987         IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
1988                          IPSTATS_MIB_OUTFORWDATAGRAMS);
1989         IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
1990                          IPSTATS_MIB_OUTOCTETS, skb->len);
1991         return dst_output(net, sk, skb);
1992 }
1993 
1994 /*
1995  *      Processing handlers for ip6mr_forward
1996  */
1997 
1998 static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
1999                           struct sk_buff *skb, struct mfc6_cache *c, int vifi)
2000 {
2001         struct ipv6hdr *ipv6h;
2002         struct mif_device *vif = &mrt->vif6_table[vifi];
2003         struct net_device *dev;
2004         struct dst_entry *dst;
2005         struct flowi6 fl6;
2006 
2007         if (!vif->dev)
2008                 goto out_free;
2009 
2010 #ifdef CONFIG_IPV6_PIMSM_V2
2011         if (vif->flags & MIFF_REGISTER) {
2012                 vif->pkt_out++;
2013                 vif->bytes_out += skb->len;
2014                 vif->dev->stats.tx_bytes += skb->len;
2015                 vif->dev->stats.tx_packets++;
2016                 ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
2017                 goto out_free;
2018         }
2019 #endif
2020 
2021         ipv6h = ipv6_hdr(skb);
2022 
2023         fl6 = (struct flowi6) {
2024                 .flowi6_oif = vif->link,
2025                 .daddr = ipv6h->daddr,
2026         };
2027 
2028         dst = ip6_route_output(net, NULL, &fl6);
2029         if (dst->error) {
2030                 dst_release(dst);
2031                 goto out_free;
2032         }
2033 
2034         skb_dst_drop(skb);
2035         skb_dst_set(skb, dst);
2036 
2037         /*
2038          * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
2039          * not only before forwarding, but after forwarding on all output
2040          * interfaces. It is clear, if mrouter runs a multicasting
2041          * program, it should receive packets not depending to what interface
2042          * program is joined.
2043          * If we will not make it, the program will have to join on all
2044          * interfaces. On the other hand, multihoming host (or router, but
2045          * not mrouter) cannot join to more than one interface - it will
2046          * result in receiving multiple packets.
2047          */
2048         dev = vif->dev;
2049         skb->dev = dev;
2050         vif->pkt_out++;
2051         vif->bytes_out += skb->len;
2052 
2053         /* We are about to write */
2054         /* XXX: extension headers? */
2055         if (skb_cow(skb, sizeof(*ipv6h) + LL_RESERVED_SPACE(dev)))
2056                 goto out_free;
2057 
2058         ipv6h = ipv6_hdr(skb);
2059         ipv6h->hop_limit--;
2060 
2061         IP6CB(skb)->flags |= IP6SKB_FORWARDED;
2062 
2063         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
2064                        net, NULL, skb, skb->dev, dev,
2065                        ip6mr_forward2_finish);
2066 
2067 out_free:
2068         kfree_skb(skb);
2069         return 0;
2070 }
2071 
2072 static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
2073 {
2074         int ct;
2075 
2076         for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
2077                 if (mrt->vif6_table[ct].dev == dev)
2078                         break;
2079         }
2080         return ct;
2081 }
2082 
2083 static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
2084                            struct sk_buff *skb, struct mfc6_cache *cache)
2085 {
2086         int psend = -1;
2087         int vif, ct;
2088         int true_vifi = ip6mr_find_vif(mrt, skb->dev);
2089 
2090         vif = cache->mf6c_parent;
2091         cache->mfc_un.res.pkt++;
2092         cache->mfc_un.res.bytes += skb->len;
2093 
2094         if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
2095                 struct mfc6_cache *cache_proxy;
2096 
2097                 /* For an (*,G) entry, we only check that the incoming
2098                  * interface is part of the static tree.
2099                  */
2100                 cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
2101                 if (cache_proxy &&
2102                     cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
2103                         goto forward;
2104         }
2105 
2106         /*
2107          * Wrong interface: drop packet and (maybe) send PIM assert.
2108          */
2109         if (mrt->vif6_table[vif].dev != skb->dev) {
2110                 cache->mfc_un.res.wrong_if++;
2111 
2112                 if (true_vifi >= 0 && mrt->mroute_do_assert &&
2113                     /* pimsm uses asserts, when switching from RPT to SPT,
2114                        so that we cannot check that packet arrived on an oif.
2115                        It is bad, but otherwise we would need to move pretty
2116                        large chunk of pimd to kernel. Ough... --ANK
2117                      */
2118                     (mrt->mroute_do_pim ||
2119                      cache->mfc_un.res.ttls[true_vifi] < 255) &&
2120                     time_after(jiffies,
2121                                cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
2122                         cache->mfc_un.res.last_assert = jiffies;
2123                         ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
2124                 }
2125                 goto dont_forward;
2126         }
2127 
2128 forward:
2129         mrt->vif6_table[vif].pkt_in++;
2130         mrt->vif6_table[vif].bytes_in += skb->len;
2131 
2132         /*
2133          *      Forward the frame
2134          */
2135         if (ipv6_addr_any(&cache->mf6c_origin) &&
2136             ipv6_addr_any(&cache->mf6c_mcastgrp)) {
2137                 if (true_vifi >= 0 &&
2138                     true_vifi != cache->mf6c_parent &&
2139                     ipv6_hdr(skb)->hop_limit >
2140                                 cache->mfc_un.res.ttls[cache->mf6c_parent]) {
2141                         /* It's an (*,*) entry and the packet is not coming from
2142                          * the upstream: forward the packet to the upstream
2143                          * only.
2144                          */
2145                         psend = cache->mf6c_parent;
2146                         goto last_forward;
2147                 }
2148                 goto dont_forward;
2149         }
2150         for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
2151                 /* For (*,G) entry, don't forward to the incoming interface */
2152                 if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
2153                     ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
2154                         if (psend != -1) {
2155                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
2156                                 if (skb2)
2157                                         ip6mr_forward2(net, mrt, skb2, cache, psend);
2158                         }
2159                         psend = ct;
2160                 }
2161         }
2162 last_forward:
2163         if (psend != -1) {
2164                 ip6mr_forward2(net, mrt, skb, cache, psend);
2165                 return;
2166         }
2167 
2168 dont_forward:
2169         kfree_skb(skb);
2170 }
2171 
2172 
2173 /*
2174  *      Multicast packets for forwarding arrive here
2175  */
2176 
2177 int ip6_mr_input(struct sk_buff *skb)
2178 {
2179         struct mfc6_cache *cache;
2180         struct net *net = dev_net(skb->dev);
2181         struct mr6_table *mrt;
2182         struct flowi6 fl6 = {
2183                 .flowi6_iif     = skb->dev->ifindex,
2184                 .flowi6_mark    = skb->mark,
2185         };
2186         int err;
2187 
2188         err = ip6mr_fib_lookup(net, &fl6, &mrt);
2189         if (err < 0) {
2190                 kfree_skb(skb);
2191                 return err;
2192         }
2193 
2194         read_lock(&mrt_lock);
2195         cache = ip6mr_cache_find(mrt,
2196                                  &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
2197         if (!cache) {
2198                 int vif = ip6mr_find_vif(mrt, skb->dev);
2199 
2200                 if (vif >= 0)
2201                         cache = ip6mr_cache_find_any(mrt,
2202                                                      &ipv6_hdr(skb)->daddr,
2203                                                      vif);
2204         }
2205 
2206         /*
2207          *      No usable cache entry
2208          */
2209         if (!cache) {
2210                 int vif;
2211 
2212                 vif = ip6mr_find_vif(mrt, skb->dev);
2213                 if (vif >= 0) {
2214                         int err = ip6mr_cache_unresolved(mrt, vif, skb);
2215                         read_unlock(&mrt_lock);
2216 
2217                         return err;
2218                 }
2219                 read_unlock(&mrt_lock);
2220                 kfree_skb(skb);
2221                 return -ENODEV;
2222         }
2223 
2224         ip6_mr_forward(net, mrt, skb, cache);
2225 
2226         read_unlock(&mrt_lock);
2227 
2228         return 0;
2229 }
2230 
2231 
2232 static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2233                                struct mfc6_cache *c, struct rtmsg *rtm)
2234 {
2235         int ct;
2236         struct rtnexthop *nhp;
2237         struct nlattr *mp_attr;
2238         struct rta_mfc_stats mfcs;
2239 
2240         /* If cache is unresolved, don't try to parse IIF and OIF */
2241         if (c->mf6c_parent >= MAXMIFS)
2242                 return -ENOENT;
2243 
2244         if (MIF_EXISTS(mrt, c->mf6c_parent) &&
2245             nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
2246                 return -EMSGSIZE;
2247         mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
2248         if (!mp_attr)
2249                 return -EMSGSIZE;
2250 
2251         for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
2252                 if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
2253                         nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
2254                         if (!nhp) {
2255                                 nla_nest_cancel(skb, mp_attr);
2256                                 return -EMSGSIZE;
2257                         }
2258 
2259                         nhp->rtnh_flags = 0;
2260                         nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
2261                         nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
2262                         nhp->rtnh_len = sizeof(*nhp);
2263                 }
2264         }
2265 
2266         nla_nest_end(skb, mp_attr);
2267 
2268         mfcs.mfcs_packets = c->mfc_un.res.pkt;
2269         mfcs.mfcs_bytes = c->mfc_un.res.bytes;
2270         mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
2271         if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0)
2272                 return -EMSGSIZE;
2273 
2274         rtm->rtm_type = RTN_MULTICAST;
2275         return 1;
2276 }
2277 
2278 int ip6mr_get_route(struct net *net,
2279                     struct sk_buff *skb, struct rtmsg *rtm, int nowait)
2280 {
2281         int err;
2282         struct mr6_table *mrt;
2283         struct mfc6_cache *cache;
2284         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
2285 
2286         mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
2287         if (!mrt)
2288                 return -ENOENT;
2289 
2290         read_lock(&mrt_lock);
2291         cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
2292         if (!cache && skb->dev) {
2293                 int vif = ip6mr_find_vif(mrt, skb->dev);
2294 
2295                 if (vif >= 0)
2296                         cache = ip6mr_cache_find_any(mrt, &rt->rt6i_dst.addr,
2297                                                      vif);
2298         }
2299 
2300         if (!cache) {
2301                 struct sk_buff *skb2;
2302                 struct ipv6hdr *iph;
2303                 struct net_device *dev;
2304                 int vif;
2305 
2306                 if (nowait) {
2307                         read_unlock(&mrt_lock);
2308                         return -EAGAIN;
2309                 }
2310 
2311                 dev = skb->dev;
2312                 if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
2313                         read_unlock(&mrt_lock);
2314                         return -ENODEV;
2315                 }
2316 
2317                 /* really correct? */
2318                 skb2 = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
2319                 if (!skb2) {
2320                         read_unlock(&mrt_lock);
2321                         return -ENOMEM;
2322                 }
2323 
2324                 skb_reset_transport_header(skb2);
2325 
2326                 skb_put(skb2, sizeof(struct ipv6hdr));
2327                 skb_reset_network_header(skb2);
2328 
2329                 iph = ipv6_hdr(skb2);
2330                 iph->version = 0;
2331                 iph->priority = 0;
2332                 iph->flow_lbl[0] = 0;
2333                 iph->flow_lbl[1] = 0;
2334                 iph->flow_lbl[2] = 0;
2335                 iph->payload_len = 0;
2336                 iph->nexthdr = IPPROTO_NONE;
2337                 iph->hop_limit = 0;
2338                 iph->saddr = rt->rt6i_src.addr;
2339                 iph->daddr = rt->rt6i_dst.addr;
2340 
2341                 err = ip6mr_cache_unresolved(mrt, vif, skb2);
2342                 read_unlock(&mrt_lock);
2343 
2344                 return err;
2345         }
2346 
2347         if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
2348                 cache->mfc_flags |= MFC_NOTIFY;
2349 
2350         err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
2351         read_unlock(&mrt_lock);
2352         return err;
2353 }
2354 
2355 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
2356                              u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
2357                              int flags)
2358 {
2359         struct nlmsghdr *nlh;
2360         struct rtmsg *rtm;
2361         int err;
2362 
2363         nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);
2364         if (!nlh)
2365                 return -EMSGSIZE;
2366 
2367         rtm = nlmsg_data(nlh);
2368         rtm->rtm_family   = RTNL_FAMILY_IP6MR;
2369         rtm->rtm_dst_len  = 128;
2370         rtm->rtm_src_len  = 128;
2371         rtm->rtm_tos      = 0;
2372         rtm->rtm_table    = mrt->id;
2373         if (nla_put_u32(skb, RTA_TABLE, mrt->id))
2374                 goto nla_put_failure;
2375         rtm->rtm_type = RTN_MULTICAST;
2376         rtm->rtm_scope    = RT_SCOPE_UNIVERSE;
2377         if (c->mfc_flags & MFC_STATIC)
2378                 rtm->rtm_protocol = RTPROT_STATIC;
2379         else
2380                 rtm->rtm_protocol = RTPROT_MROUTED;
2381         rtm->rtm_flags    = 0;
2382 
2383         if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
2384             nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
2385                 goto nla_put_failure;
2386         err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
2387         /* do not break the dump if cache is unresolved */
2388         if (err < 0 && err != -ENOENT)
2389                 goto nla_put_failure;
2390 
2391         nlmsg_end(skb, nlh);
2392         return 0;
2393 
2394 nla_put_failure:
2395         nlmsg_cancel(skb, nlh);
2396         return -EMSGSIZE;
2397 }
2398 
2399 static int mr6_msgsize(bool unresolved, int maxvif)
2400 {
2401         size_t len =
2402                 NLMSG_ALIGN(sizeof(struct rtmsg))
2403                 + nla_total_size(4)     /* RTA_TABLE */
2404                 + nla_total_size(sizeof(struct in6_addr))       /* RTA_SRC */
2405                 + nla_total_size(sizeof(struct in6_addr))       /* RTA_DST */
2406                 ;
2407 
2408         if (!unresolved)
2409                 len = len
2410                       + nla_total_size(4)       /* RTA_IIF */
2411                       + nla_total_size(0)       /* RTA_MULTIPATH */
2412                       + maxvif * NLA_ALIGN(sizeof(struct rtnexthop))
2413                                                 /* RTA_MFC_STATS */
2414                       + nla_total_size(sizeof(struct rta_mfc_stats))
2415                 ;
2416 
2417         return len;
2418 }
2419 
2420 static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
2421                               int cmd)
2422 {
2423         struct net *net = read_pnet(&mrt->net);
2424         struct sk_buff *skb;
2425         int err = -ENOBUFS;
2426 
2427         skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
2428                         GFP_ATOMIC);
2429         if (!skb)
2430                 goto errout;
2431 
2432         err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);
2433         if (err < 0)
2434                 goto errout;
2435 
2436         rtnl_notify(skb, net, 0, RTNLGRP_IPV6_MROUTE, NULL, GFP_ATOMIC);
2437         return;
2438 
2439 errout:
2440         kfree_skb(skb);
2441         if (err < 0)
2442                 rtnl_set_sk_err(net, RTNLGRP_IPV6_MROUTE, err);
2443 }
2444 
2445 static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2446 {
2447         struct net *net = sock_net(skb->sk);
2448         struct mr6_table *mrt;
2449         struct mfc6_cache *mfc;
2450         unsigned int t = 0, s_t;
2451         unsigned int h = 0, s_h;
2452         unsigned int e = 0, s_e;
2453 
2454         s_t = cb->args[0];
2455         s_h = cb->args[1];
2456         s_e = cb->args[2];
2457 
2458         read_lock(&mrt_lock);
2459         ip6mr_for_each_table(mrt, net) {
2460                 if (t < s_t)
2461                         goto next_table;
2462                 if (t > s_t)
2463                         s_h = 0;
2464                 for (h = s_h; h < MFC6_LINES; h++) {
2465                         list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
2466                                 if (e < s_e)
2467                                         goto next_entry;
2468                                 if (ip6mr_fill_mroute(mrt, skb,
2469                                                       NETLINK_CB(cb->skb).portid,
2470                                                       cb->nlh->nlmsg_seq,
2471                                                       mfc, RTM_NEWROUTE,
2472                                                       NLM_F_MULTI) < 0)
2473                                         goto done;
2474 next_entry:
2475                                 e++;
2476                         }
2477                         e = s_e = 0;
2478                 }
2479                 spin_lock_bh(&mfc_unres_lock);
2480                 list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
2481                         if (e < s_e)
2482                                 goto next_entry2;
2483                         if (ip6mr_fill_mroute(mrt, skb,
2484                                               NETLINK_CB(cb->skb).portid,
2485                                               cb->nlh->nlmsg_seq,
2486                                               mfc, RTM_NEWROUTE,
2487                                               NLM_F_MULTI) < 0) {
2488                                 spin_unlock_bh(&mfc_unres_lock);
2489                                 goto done;
2490                         }
2491 next_entry2:
2492                         e++;
2493                 }
2494                 spin_unlock_bh(&mfc_unres_lock);
2495                 e = s_e = 0;
2496                 s_h = 0;
2497 next_table:
2498                 t++;
2499         }
2500 done:
2501         read_unlock(&mrt_lock);
2502 
2503         cb->args[2] = e;
2504         cb->args[1] = h;
2505         cb->args[0] = t;
2506 
2507         return skb->len;
2508 }
2509 

This page was automatically generated by LXR 0.3.1 (source).  •  Linux is a registered trademark of Linus Torvalds  •  Contact us