Version:  2.0.40 2.2.26 2.4.37 3.9 3.10 3.11 3.12 3.13 3.14 3.15 3.16 3.17 3.18 3.19 4.0 4.1 4.2 4.3 4.4 4.5 4.6

Linux/drivers/staging/lustre/lustre/obdclass/obd_mount.c

  1 /*
  2  * GPL HEADER START
  3  *
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This program is free software; you can redistribute it and/or modify
  7  * it under the terms of the GNU General Public License version 2 only,
  8  * as published by the Free Software Foundation.
  9  *
 10  * This program is distributed in the hope that it will be useful, but
 11  * WITHOUT ANY WARRANTY; without even the implied warranty of
 12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13  * General Public License version 2 for more details (a copy is included
 14  * in the LICENSE file that accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License
 17  * version 2 along with this program; If not, see
 18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
 19  *
 20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 21  * CA 95054 USA or visit www.sun.com if you need additional information or
 22  * have any questions.
 23  *
 24  * GPL HEADER END
 25  */
 26 /*
 27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 28  * Use is subject to license terms.
 29  *
 30  * Copyright (c) 2011, 2015, Intel Corporation.
 31  */
 32 /*
 33  * This file is part of Lustre, http://www.lustre.org/
 34  * Lustre is a trademark of Sun Microsystems, Inc.
 35  *
 36  * lustre/obdclass/obd_mount.c
 37  *
 38  * Client mount routines
 39  *
 40  * Author: Nathan Rutman <nathan@clusterfs.com>
 41  */
 42 
 43 #define DEBUG_SUBSYSTEM S_CLASS
 44 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
 45 #define PRINT_CMD CDEBUG
 46 
 47 #include "../include/obd.h"
 48 #include "../include/linux/lustre_compat25.h"
 49 #include "../include/obd_class.h"
 50 #include "../include/lustre/lustre_user.h"
 51 #include "../include/lustre_log.h"
 52 #include "../include/lustre_disk.h"
 53 #include "../include/lustre_param.h"
 54 
 55 static int (*client_fill_super)(struct super_block *sb,
 56                                 struct vfsmount *mnt);
 57 
 58 static void (*kill_super_cb)(struct super_block *sb);
 59 
 60 /**************** config llog ********************/
 61 
 62 /** Get a config log from the MGS and process it.
 63  * This func is called for both clients and servers.
 64  * Continue to process new statements appended to the logs
 65  * (whenever the config lock is revoked) until lustre_end_log
 66  * is called.
 67  * @param sb The superblock is used by the MGC to write to the local copy of
 68  *   the config log
 69  * @param logname The name of the llog to replicate from the MGS
 70  * @param cfg Since the same mgc may be used to follow multiple config logs
 71  *   (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
 72  *   this log, and is added to the mgc's list of logs to follow.
 73  */
 74 int lustre_process_log(struct super_block *sb, char *logname,
 75                       struct config_llog_instance *cfg)
 76 {
 77         struct lustre_cfg *lcfg;
 78         struct lustre_cfg_bufs *bufs;
 79         struct lustre_sb_info *lsi = s2lsi(sb);
 80         struct obd_device *mgc = lsi->lsi_mgc;
 81         int rc;
 82 
 83         LASSERT(mgc);
 84         LASSERT(cfg);
 85 
 86         bufs = kzalloc(sizeof(*bufs), GFP_NOFS);
 87         if (!bufs)
 88                 return -ENOMEM;
 89 
 90         /* mgc_process_config */
 91         lustre_cfg_bufs_reset(bufs, mgc->obd_name);
 92         lustre_cfg_bufs_set_string(bufs, 1, logname);
 93         lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
 94         lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
 95         lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
 96         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
 97         lustre_cfg_free(lcfg);
 98 
 99         kfree(bufs);
100 
101         if (rc == -EINVAL)
102                 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d).  Make sure this client and the MGS are running compatible versions of Lustre.\n",
103                                    mgc->obd_name, logname, rc);
104 
105         if (rc)
106                 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
107                                    mgc->obd_name, logname,
108                                    rc);
109 
110         /* class_obd_list(); */
111         return rc;
112 }
113 EXPORT_SYMBOL(lustre_process_log);
114 
115 /* Stop watching this config log for updates */
116 int lustre_end_log(struct super_block *sb, char *logname,
117                    struct config_llog_instance *cfg)
118 {
119         struct lustre_cfg *lcfg;
120         struct lustre_cfg_bufs bufs;
121         struct lustre_sb_info *lsi = s2lsi(sb);
122         struct obd_device *mgc = lsi->lsi_mgc;
123         int rc;
124 
125         if (!mgc)
126                 return -ENOENT;
127 
128         /* mgc_process_config */
129         lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
130         lustre_cfg_bufs_set_string(&bufs, 1, logname);
131         if (cfg)
132                 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
133         lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
134         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
135         lustre_cfg_free(lcfg);
136         return rc;
137 }
138 EXPORT_SYMBOL(lustre_end_log);
139 
140 /**************** obd start *******************/
141 
142 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
143  * lctl (and do for echo cli/srv.
144  */
145 static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
146                    char *s1, char *s2, char *s3, char *s4)
147 {
148         struct lustre_cfg_bufs bufs;
149         struct lustre_cfg     *lcfg = NULL;
150         int rc;
151 
152         CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
153                cmd, s1, s2, s3, s4);
154 
155         lustre_cfg_bufs_reset(&bufs, cfgname);
156         if (s1)
157                 lustre_cfg_bufs_set_string(&bufs, 1, s1);
158         if (s2)
159                 lustre_cfg_bufs_set_string(&bufs, 2, s2);
160         if (s3)
161                 lustre_cfg_bufs_set_string(&bufs, 3, s3);
162         if (s4)
163                 lustre_cfg_bufs_set_string(&bufs, 4, s4);
164 
165         lcfg = lustre_cfg_new(cmd, &bufs);
166         lcfg->lcfg_nid = nid;
167         rc = class_process_config(lcfg);
168         lustre_cfg_free(lcfg);
169         return rc;
170 }
171 
172 /** Call class_attach and class_setup.  These methods in turn call
173  * obd type-specific methods.
174  */
175 static int lustre_start_simple(char *obdname, char *type, char *uuid,
176                                char *s1, char *s2, char *s3, char *s4)
177 {
178         int rc;
179 
180         CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
181 
182         rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
183         if (rc) {
184                 CERROR("%s attach error %d\n", obdname, rc);
185                 return rc;
186         }
187         rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
188         if (rc) {
189                 CERROR("%s setup error %d\n", obdname, rc);
190                 do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
191         }
192         return rc;
193 }
194 
195 DEFINE_MUTEX(mgc_start_lock);
196 
197 /** Set up a mgc obd to process startup logs
198  *
199  * \param sb [in] super block of the mgc obd
200  *
201  * \retval 0 success, otherwise error code
202  */
203 int lustre_start_mgc(struct super_block *sb)
204 {
205         struct obd_connect_data *data = NULL;
206         struct lustre_sb_info *lsi = s2lsi(sb);
207         struct obd_device *obd;
208         struct obd_export *exp;
209         struct obd_uuid *uuid;
210         class_uuid_t uuidc;
211         lnet_nid_t nid;
212         char nidstr[LNET_NIDSTR_SIZE];
213         char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
214         char *ptr;
215         int rc = 0, i = 0, j;
216 
217         LASSERT(lsi->lsi_lmd);
218 
219         /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
220         ptr = lsi->lsi_lmd->lmd_dev;
221         if (class_parse_nid(ptr, &nid, &ptr) == 0)
222                 i++;
223         if (i == 0) {
224                 CERROR("No valid MGS nids found.\n");
225                 return -EINVAL;
226         }
227 
228         mutex_lock(&mgc_start_lock);
229 
230         libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
231         mgcname = kasprintf(GFP_NOFS,
232                             "%s%s", LUSTRE_MGC_OBDNAME, nidstr);
233         niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, i);
234         if (!mgcname || !niduuid) {
235                 rc = -ENOMEM;
236                 goto out_free;
237         }
238 
239         mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
240 
241         data = kzalloc(sizeof(*data), GFP_NOFS);
242         if (!data) {
243                 rc = -ENOMEM;
244                 goto out_free;
245         }
246 
247         obd = class_name2obd(mgcname);
248         if (obd && !obd->obd_stopping) {
249                 int recov_bk;
250 
251                 rc = obd_set_info_async(NULL, obd->obd_self_export,
252                                         strlen(KEY_MGSSEC), KEY_MGSSEC,
253                                         strlen(mgssec), mgssec, NULL);
254                 if (rc)
255                         goto out_free;
256 
257                 /* Re-using an existing MGC */
258                 atomic_inc(&obd->u.cli.cl_mgc_refcount);
259 
260                 /* IR compatibility check, only for clients */
261                 if (lmd_is_client(lsi->lsi_lmd)) {
262                         int has_ir;
263                         int vallen = sizeof(*data);
264                         __u32 *flags = &lsi->lsi_lmd->lmd_flags;
265 
266                         rc = obd_get_info(NULL, obd->obd_self_export,
267                                           strlen(KEY_CONN_DATA), KEY_CONN_DATA,
268                                           &vallen, data, NULL);
269                         LASSERT(rc == 0);
270                         has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
271                         if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
272                                 /* LMD_FLG_NOIR is for test purpose only */
273                                 LCONSOLE_WARN(
274                                         "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
275                                         has_ir ? "enabled" : "disabled");
276                                 if (has_ir)
277                                         *flags &= ~LMD_FLG_NOIR;
278                                 else
279                                         *flags |= LMD_FLG_NOIR;
280                         }
281                 }
282 
283                 recov_bk = 0;
284 
285                 /* Try all connections, but only once (again).
286                  * We don't want to block another target from starting
287                  * (using its local copy of the log), but we do want to connect
288                  * if at all possible.
289                  */
290                 recov_bk++;
291                 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
292                        recov_bk);
293                 rc = obd_set_info_async(NULL, obd->obd_self_export,
294                                         sizeof(KEY_INIT_RECOV_BACKUP),
295                                         KEY_INIT_RECOV_BACKUP,
296                                         sizeof(recov_bk), &recov_bk, NULL);
297                 rc = 0;
298                 goto out;
299         }
300 
301         CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
302 
303         /* Add the primary nids for the MGS */
304         i = 0;
305         /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
306         ptr = lsi->lsi_lmd->lmd_dev;
307         while (class_parse_nid(ptr, &nid, &ptr) == 0) {
308                 rc = do_lcfg(mgcname, nid,
309                              LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
310                 i++;
311                 /* Stop at the first failover nid */
312                 if (*ptr == ':')
313                         break;
314         }
315         if (i == 0) {
316                 CERROR("No valid MGS nids found.\n");
317                 rc = -EINVAL;
318                 goto out_free;
319         }
320         lsi->lsi_lmd->lmd_mgs_failnodes = 1;
321 
322         /* Random uuid for MGC allows easier reconnects */
323         uuid = kzalloc(sizeof(*uuid), GFP_NOFS);
324         if (!uuid) {
325                 rc = -ENOMEM;
326                 goto out_free;
327         }
328 
329         ll_generate_random_uuid(uuidc);
330         class_uuid_unparse(uuidc, uuid);
331 
332         /* Start the MGC */
333         rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
334                                  (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
335                                  niduuid, NULL, NULL);
336         kfree(uuid);
337         if (rc)
338                 goto out_free;
339 
340         /* Add any failover MGS nids */
341         i = 1;
342         while (ptr && ((*ptr == ':' ||
343                         class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
344                 /* New failover node */
345                 sprintf(niduuid, "%s_%x", mgcname, i);
346                 j = 0;
347                 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
348                         j++;
349                         rc = do_lcfg(mgcname, nid,
350                                      LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
351                         if (*ptr == ':')
352                                 break;
353                 }
354                 if (j > 0) {
355                         rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
356                                      niduuid, NULL, NULL, NULL);
357                         i++;
358                 } else {
359                         /* at ":/fsname" */
360                         break;
361                 }
362         }
363         lsi->lsi_lmd->lmd_mgs_failnodes = i;
364 
365         obd = class_name2obd(mgcname);
366         if (!obd) {
367                 CERROR("Can't find mgcobd %s\n", mgcname);
368                 rc = -ENOTCONN;
369                 goto out_free;
370         }
371 
372         rc = obd_set_info_async(NULL, obd->obd_self_export,
373                                 strlen(KEY_MGSSEC), KEY_MGSSEC,
374                                 strlen(mgssec), mgssec, NULL);
375         if (rc)
376                 goto out_free;
377 
378         /* Keep a refcount of servers/clients who started with "mount",
379          * so we know when we can get rid of the mgc.
380          */
381         atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
382 
383         /* We connect to the MGS at setup, and don't disconnect until cleanup */
384         data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
385                                   OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
386                                   OBD_CONNECT_LVB_TYPE;
387 
388 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
389         data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
390 #else
391 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
392 #endif
393 
394         if (lmd_is_client(lsi->lsi_lmd) &&
395             lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
396                 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
397         data->ocd_version = LUSTRE_VERSION_CODE;
398         rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
399         if (rc) {
400                 CERROR("connect failed %d\n", rc);
401                 goto out;
402         }
403 
404         obd->u.cli.cl_mgc_mgsexp = exp;
405 
406 out:
407         /* Keep the mgc info in the sb. Note that many lsi's can point
408          * to the same mgc.
409          */
410         lsi->lsi_mgc = obd;
411 out_free:
412         mutex_unlock(&mgc_start_lock);
413 
414         kfree(data);
415         kfree(mgcname);
416         kfree(niduuid);
417         return rc;
418 }
419 
420 static int lustre_stop_mgc(struct super_block *sb)
421 {
422         struct lustre_sb_info *lsi = s2lsi(sb);
423         struct obd_device *obd;
424         char *niduuid = NULL, *ptr = NULL;
425         int i, rc = 0, len = 0;
426 
427         if (!lsi)
428                 return -ENOENT;
429         obd = lsi->lsi_mgc;
430         if (!obd)
431                 return -ENOENT;
432         lsi->lsi_mgc = NULL;
433 
434         mutex_lock(&mgc_start_lock);
435         LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
436         if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
437                 /* This is not fatal, every client that stops
438                  * will call in here.
439                  */
440                 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
441                        atomic_read(&obd->u.cli.cl_mgc_refcount));
442                 rc = -EBUSY;
443                 goto out;
444         }
445 
446         /* The MGC has no recoverable data in any case.
447          * force shutdown set in umount_begin
448          */
449         obd->obd_no_recov = 1;
450 
451         if (obd->u.cli.cl_mgc_mgsexp) {
452                 /* An error is not fatal, if we are unable to send the
453                  * disconnect mgs ping evictor cleans up the export
454                  */
455                 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
456                 if (rc)
457                         CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
458         }
459 
460         /* Save the obdname for cleaning the nid uuids, which are obdname_XX */
461         len = strlen(obd->obd_name) + 6;
462         niduuid = kzalloc(len, GFP_NOFS);
463         if (niduuid) {
464                 strcpy(niduuid, obd->obd_name);
465                 ptr = niduuid + strlen(niduuid);
466         }
467 
468         rc = class_manual_cleanup(obd);
469         if (rc)
470                 goto out;
471 
472         /* Clean the nid uuids */
473         if (!niduuid) {
474                 rc = -ENOMEM;
475                 goto out;
476         }
477 
478         for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
479                 sprintf(ptr, "_%x", i);
480                 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
481                              niduuid, NULL, NULL, NULL);
482                 if (rc)
483                         CERROR("del MDC UUID %s failed: rc = %d\n",
484                                niduuid, rc);
485         }
486 out:
487         kfree(niduuid);
488 
489         /* class_import_put will get rid of the additional connections */
490         mutex_unlock(&mgc_start_lock);
491         return rc;
492 }
493 
494 /***************** lustre superblock **************/
495 
496 static struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
497 {
498         struct lustre_sb_info *lsi;
499 
500         lsi = kzalloc(sizeof(*lsi), GFP_NOFS);
501         if (!lsi)
502                 return NULL;
503         lsi->lsi_lmd = kzalloc(sizeof(*lsi->lsi_lmd), GFP_NOFS);
504         if (!lsi->lsi_lmd) {
505                 kfree(lsi);
506                 return NULL;
507         }
508 
509         lsi->lsi_lmd->lmd_exclude_count = 0;
510         lsi->lsi_lmd->lmd_recovery_time_soft = 0;
511         lsi->lsi_lmd->lmd_recovery_time_hard = 0;
512         s2lsi_nocast(sb) = lsi;
513         /* we take 1 extra ref for our setup */
514         atomic_set(&lsi->lsi_mounts, 1);
515 
516         /* Default umount style */
517         lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
518 
519         return lsi;
520 }
521 
522 static int lustre_free_lsi(struct super_block *sb)
523 {
524         struct lustre_sb_info *lsi = s2lsi(sb);
525 
526         CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
527 
528         /* someone didn't call server_put_mount. */
529         LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
530 
531         if (lsi->lsi_lmd) {
532                 kfree(lsi->lsi_lmd->lmd_dev);
533                 kfree(lsi->lsi_lmd->lmd_profile);
534                 kfree(lsi->lsi_lmd->lmd_mgssec);
535                 kfree(lsi->lsi_lmd->lmd_opts);
536                 if (lsi->lsi_lmd->lmd_exclude_count)
537                         kfree(lsi->lsi_lmd->lmd_exclude);
538                 kfree(lsi->lsi_lmd->lmd_mgs);
539                 kfree(lsi->lsi_lmd->lmd_osd_type);
540                 kfree(lsi->lsi_lmd->lmd_params);
541 
542                 kfree(lsi->lsi_lmd);
543         }
544 
545         LASSERT(!lsi->lsi_llsbi);
546         kfree(lsi);
547         s2lsi_nocast(sb) = NULL;
548 
549         return 0;
550 }
551 
552 /* The lsi has one reference for every server that is using the disk -
553  * e.g. MDT, MGS, and potentially MGC
554  */
555 static int lustre_put_lsi(struct super_block *sb)
556 {
557         struct lustre_sb_info *lsi = s2lsi(sb);
558 
559         CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
560         if (atomic_dec_and_test(&lsi->lsi_mounts)) {
561                 lustre_free_lsi(sb);
562                 return 1;
563         }
564         return 0;
565 }
566 
567 /*** SERVER NAME ***
568  * <FSNAME><SEPARATOR><TYPE><INDEX>
569  * FSNAME is between 1 and 8 characters (inclusive).
570  *      Excluded characters are '/' and ':'
571  * SEPARATOR is either ':' or '-'
572  * TYPE: "OST", "MDT", etc.
573  * INDEX: Hex representation of the index
574  */
575 
576 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
577  * @param [in] svname server name including type and index
578  * @param [out] fsname Buffer to copy filesystem name prefix into.
579  *  Must have at least 'strlen(fsname) + 1' chars.
580  * @param [out] endptr if endptr isn't NULL it is set to end of fsname
581  * rc < 0  on error
582  */
583 static int server_name2fsname(const char *svname, char *fsname,
584                               const char **endptr)
585 {
586         const char *dash;
587 
588         dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
589         for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
590                 ;
591         if (dash == svname)
592                 return -EINVAL;
593 
594         if (fsname) {
595                 strncpy(fsname, svname, dash - svname);
596                 fsname[dash - svname] = '\0';
597         }
598 
599         if (endptr)
600                 *endptr = dash;
601 
602         return 0;
603 }
604 
605 /* Get the index from the obd name.
606  *  rc = server type, or
607  * rc < 0  on error
608  * if endptr isn't NULL it is set to end of name
609  */
610 static int server_name2index(const char *svname, __u32 *idx,
611                              const char **endptr)
612 {
613         unsigned long index;
614         int rc;
615         const char *dash;
616 
617         /* We use server_name2fsname() just for parsing */
618         rc = server_name2fsname(svname, NULL, &dash);
619         if (rc != 0)
620                 return rc;
621 
622         dash++;
623 
624         if (strncmp(dash, "MDT", 3) == 0)
625                 rc = LDD_F_SV_TYPE_MDT;
626         else if (strncmp(dash, "OST", 3) == 0)
627                 rc = LDD_F_SV_TYPE_OST;
628         else
629                 return -EINVAL;
630 
631         dash += 3;
632 
633         if (strncmp(dash, "all", 3) == 0) {
634                 if (endptr)
635                         *endptr = dash + 3;
636                 return rc | LDD_F_SV_ALL;
637         }
638 
639         index = simple_strtoul(dash, (char **)endptr, 16);
640         if (idx)
641                 *idx = index;
642 
643         /* Account for -mdc after index that is possible when specifying mdt */
644         if (endptr && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
645                               sizeof(LUSTRE_MDC_NAME) - 1) == 0)
646                 *endptr += sizeof(LUSTRE_MDC_NAME);
647 
648         return rc;
649 }
650 
651 /*************** mount common between server and client ***************/
652 
653 /* Common umount */
654 int lustre_common_put_super(struct super_block *sb)
655 {
656         int rc;
657 
658         CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
659 
660         /* Drop a ref to the MGC */
661         rc = lustre_stop_mgc(sb);
662         if (rc && (rc != -ENOENT)) {
663                 if (rc != -EBUSY) {
664                         CERROR("Can't stop MGC: %d\n", rc);
665                         return rc;
666                 }
667                 /* BUSY just means that there's some other obd that
668                  * needs the mgc.  Let him clean it up.
669                  */
670                 CDEBUG(D_MOUNT, "MGC still in use\n");
671         }
672         /* Drop a ref to the mounted disk */
673         lustre_put_lsi(sb);
674         lu_types_stop();
675         return rc;
676 }
677 EXPORT_SYMBOL(lustre_common_put_super);
678 
679 static void lmd_print(struct lustre_mount_data *lmd)
680 {
681         int i;
682 
683         PRINT_CMD(D_MOUNT, "  mount data:\n");
684         if (lmd_is_client(lmd))
685                 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
686         PRINT_CMD(D_MOUNT, "device:  %s\n", lmd->lmd_dev);
687         PRINT_CMD(D_MOUNT, "flags:   %x\n", lmd->lmd_flags);
688 
689         if (lmd->lmd_opts)
690                 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
691 
692         if (lmd->lmd_recovery_time_soft)
693                 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
694                           lmd->lmd_recovery_time_soft);
695 
696         if (lmd->lmd_recovery_time_hard)
697                 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
698                           lmd->lmd_recovery_time_hard);
699 
700         for (i = 0; i < lmd->lmd_exclude_count; i++) {
701                 PRINT_CMD(D_MOUNT, "exclude %d:  OST%04x\n", i,
702                           lmd->lmd_exclude[i]);
703         }
704 }
705 
706 /* Is this server on the exclusion list */
707 int lustre_check_exclusion(struct super_block *sb, char *svname)
708 {
709         struct lustre_sb_info *lsi = s2lsi(sb);
710         struct lustre_mount_data *lmd = lsi->lsi_lmd;
711         __u32 index;
712         int i, rc;
713 
714         rc = server_name2index(svname, &index, NULL);
715         if (rc != LDD_F_SV_TYPE_OST)
716                 /* Only exclude OSTs */
717                 return 0;
718 
719         CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
720                index, lmd->lmd_exclude_count, lmd->lmd_dev);
721 
722         for (i = 0; i < lmd->lmd_exclude_count; i++) {
723                 if (index == lmd->lmd_exclude[i]) {
724                         CWARN("Excluding %s (on exclusion list)\n", svname);
725                         return 1;
726                 }
727         }
728         return 0;
729 }
730 
731 /* mount -v  -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
732 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
733 {
734         const char *s1 = ptr, *s2;
735         __u32 index, *exclude_list;
736         int rc = 0, devmax;
737 
738         /* The shortest an ost name can be is 8 chars: -OST0000.
739          * We don't actually know the fsname at this time, so in fact
740          * a user could specify any fsname.
741          */
742         devmax = strlen(ptr) / 8 + 1;
743 
744         /* temp storage until we figure out how many we have */
745         exclude_list = kcalloc(devmax, sizeof(index), GFP_NOFS);
746         if (!exclude_list)
747                 return -ENOMEM;
748 
749         /* we enter this fn pointing at the '=' */
750         while (*s1 && *s1 != ' ' && *s1 != ',') {
751                 s1++;
752                 rc = server_name2index(s1, &index, &s2);
753                 if (rc < 0) {
754                         CERROR("Can't parse server name '%s': rc = %d\n",
755                                s1, rc);
756                         break;
757                 }
758                 if (rc == LDD_F_SV_TYPE_OST)
759                         exclude_list[lmd->lmd_exclude_count++] = index;
760                 else
761                         CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
762                                (uint)(s2-s1), s1, rc);
763                 s1 = s2;
764                 /* now we are pointing at ':' (next exclude)
765                  * or ',' (end of excludes)
766                  */
767                 if (lmd->lmd_exclude_count >= devmax)
768                         break;
769         }
770         if (rc >= 0) /* non-err */
771                 rc = 0;
772 
773         if (lmd->lmd_exclude_count) {
774                 /* permanent, freed in lustre_free_lsi */
775                 lmd->lmd_exclude = kcalloc(lmd->lmd_exclude_count,
776                                            sizeof(index), GFP_NOFS);
777                 if (lmd->lmd_exclude) {
778                         memcpy(lmd->lmd_exclude, exclude_list,
779                                sizeof(index) * lmd->lmd_exclude_count);
780                 } else {
781                         rc = -ENOMEM;
782                         lmd->lmd_exclude_count = 0;
783                 }
784         }
785         kfree(exclude_list);
786         return rc;
787 }
788 
789 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
790 {
791         char   *tail;
792         int     length;
793 
794         kfree(lmd->lmd_mgssec);
795         lmd->lmd_mgssec = NULL;
796 
797         tail = strchr(ptr, ',');
798         if (!tail)
799                 length = strlen(ptr);
800         else
801                 length = tail - ptr;
802 
803         lmd->lmd_mgssec = kzalloc(length + 1, GFP_NOFS);
804         if (!lmd->lmd_mgssec)
805                 return -ENOMEM;
806 
807         memcpy(lmd->lmd_mgssec, ptr, length);
808         lmd->lmd_mgssec[length] = '\0';
809         return 0;
810 }
811 
812 static int lmd_parse_string(char **handle, char *ptr)
813 {
814         char   *tail;
815         int     length;
816 
817         if (!handle || !ptr)
818                 return -EINVAL;
819 
820         kfree(*handle);
821         *handle = NULL;
822 
823         tail = strchr(ptr, ',');
824         if (!tail)
825                 length = strlen(ptr);
826         else
827                 length = tail - ptr;
828 
829         *handle = kzalloc(length + 1, GFP_NOFS);
830         if (!*handle)
831                 return -ENOMEM;
832 
833         memcpy(*handle, ptr, length);
834         (*handle)[length] = '\0';
835 
836         return 0;
837 }
838 
839 /* Collect multiple values for mgsnid specifiers */
840 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
841 {
842         lnet_nid_t nid;
843         char *tail = *ptr;
844         char *mgsnid;
845         int   length;
846         int   oldlen = 0;
847 
848         /* Find end of nidlist */
849         while (class_parse_nid_quiet(tail, &nid, &tail) == 0)
850                 ;
851         length = tail - *ptr;
852         if (length == 0) {
853                 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
854                 return -EINVAL;
855         }
856 
857         if (lmd->lmd_mgs)
858                 oldlen = strlen(lmd->lmd_mgs) + 1;
859 
860         mgsnid = kzalloc(oldlen + length + 1, GFP_NOFS);
861         if (!mgsnid)
862                 return -ENOMEM;
863 
864         if (lmd->lmd_mgs) {
865                 /* Multiple mgsnid= are taken to mean failover locations */
866                 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
867                 mgsnid[oldlen - 1] = ':';
868                 kfree(lmd->lmd_mgs);
869         }
870         memcpy(mgsnid + oldlen, *ptr, length);
871         mgsnid[oldlen + length] = '\0';
872         lmd->lmd_mgs = mgsnid;
873         *ptr = tail;
874 
875         return 0;
876 }
877 
878 /** Parse mount line options
879  * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
880  * dev is passed as device=uml1:/lustre by mount.lustre
881  */
882 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
883 {
884         char *s1, *s2, *devname = NULL;
885         struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
886         int rc = 0;
887 
888         LASSERT(lmd);
889         if (!options) {
890                 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n");
891                 return -EINVAL;
892         }
893 
894         /* Options should be a string - try to detect old lmd data */
895         if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
896                 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre.  Please install version %s\n",
897                                    LUSTRE_VERSION_STRING);
898                 return -EINVAL;
899         }
900         lmd->lmd_magic = LMD_MAGIC;
901 
902         lmd->lmd_params = kzalloc(LMD_PARAMS_MAXLEN, GFP_NOFS);
903         if (!lmd->lmd_params)
904                 return -ENOMEM;
905         lmd->lmd_params[0] = '\0';
906 
907         /* Set default flags here */
908 
909         s1 = options;
910         while (*s1) {
911                 int clear = 0;
912                 int time_min = OBD_RECOVERY_TIME_MIN;
913 
914                 /* Skip whitespace and extra commas */
915                 while (*s1 == ' ' || *s1 == ',')
916                         s1++;
917 
918                 /* Client options are parsed in ll_options: eg. flock,
919                  * user_xattr, acl
920                  */
921 
922                 /* Parse non-ldiskfs options here. Rather than modifying
923                  * ldiskfs, we just zero these out here
924                  */
925                 if (strncmp(s1, "abort_recov", 11) == 0) {
926                         lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
927                         clear++;
928                 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
929                         lmd->lmd_recovery_time_soft = max_t(int,
930                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
931                         clear++;
932                 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
933                         lmd->lmd_recovery_time_hard = max_t(int,
934                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
935                         clear++;
936                 } else if (strncmp(s1, "noir", 4) == 0) {
937                         lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
938                         clear++;
939                 } else if (strncmp(s1, "nosvc", 5) == 0) {
940                         lmd->lmd_flags |= LMD_FLG_NOSVC;
941                         clear++;
942                 } else if (strncmp(s1, "nomgs", 5) == 0) {
943                         lmd->lmd_flags |= LMD_FLG_NOMGS;
944                         clear++;
945                 } else if (strncmp(s1, "noscrub", 7) == 0) {
946                         lmd->lmd_flags |= LMD_FLG_NOSCRUB;
947                         clear++;
948                 } else if (strncmp(s1, PARAM_MGSNODE,
949                                    sizeof(PARAM_MGSNODE) - 1) == 0) {
950                         s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
951                         /* Assume the next mount opt is the first
952                          * invalid nid we get to.
953                          */
954                         rc = lmd_parse_mgs(lmd, &s2);
955                         if (rc)
956                                 goto invalid;
957                         clear++;
958                 } else if (strncmp(s1, "writeconf", 9) == 0) {
959                         lmd->lmd_flags |= LMD_FLG_WRITECONF;
960                         clear++;
961                 } else if (strncmp(s1, "update", 6) == 0) {
962                         lmd->lmd_flags |= LMD_FLG_UPDATE;
963                         clear++;
964                 } else if (strncmp(s1, "virgin", 6) == 0) {
965                         lmd->lmd_flags |= LMD_FLG_VIRGIN;
966                         clear++;
967                 } else if (strncmp(s1, "noprimnode", 10) == 0) {
968                         lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
969                         clear++;
970                 } else if (strncmp(s1, "mgssec=", 7) == 0) {
971                         rc = lmd_parse_mgssec(lmd, s1 + 7);
972                         if (rc)
973                                 goto invalid;
974                         clear++;
975                 /* ost exclusion list */
976                 } else if (strncmp(s1, "exclude=", 8) == 0) {
977                         rc = lmd_make_exclusion(lmd, s1 + 7);
978                         if (rc)
979                                 goto invalid;
980                         clear++;
981                 } else if (strncmp(s1, "mgs", 3) == 0) {
982                         /* We are an MGS */
983                         lmd->lmd_flags |= LMD_FLG_MGS;
984                         clear++;
985                 } else if (strncmp(s1, "svname=", 7) == 0) {
986                         rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
987                         if (rc)
988                                 goto invalid;
989                         clear++;
990                 } else if (strncmp(s1, "param=", 6) == 0) {
991                         size_t length, params_length;
992                         char *tail = strchr(s1 + 6, ',');
993 
994                         if (!tail)
995                                 length = strlen(s1);
996                         else
997                                 length = tail - s1;
998                         length -= 6;
999                         params_length = strlen(lmd->lmd_params);
1000                         if (params_length + length + 1 >= LMD_PARAMS_MAXLEN)
1001                                 return -E2BIG;
1002                         strncat(lmd->lmd_params, s1 + 6, length);
1003                         lmd->lmd_params[params_length + length] = '\0';
1004                         strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN);
1005                         clear++;
1006                 } else if (strncmp(s1, "osd=", 4) == 0) {
1007                         rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
1008                         if (rc)
1009                                 goto invalid;
1010                         clear++;
1011                 }
1012                 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1013                  * end of the options.
1014                  */
1015                 else if (strncmp(s1, "device=", 7) == 0) {
1016                         devname = s1 + 7;
1017                         /* terminate options right before device.  device
1018                          * must be the last one.
1019                          */
1020                         *s1 = '\0';
1021                         break;
1022                 }
1023 
1024                 /* Find next opt */
1025                 s2 = strchr(s1, ',');
1026                 if (!s2) {
1027                         if (clear)
1028                                 *s1 = '\0';
1029                         break;
1030                 }
1031                 s2++;
1032                 if (clear)
1033                         memmove(s1, s2, strlen(s2) + 1);
1034                 else
1035                         s1 = s2;
1036         }
1037 
1038         if (!devname) {
1039                 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n");
1040                 goto invalid;
1041         }
1042 
1043         s1 = strstr(devname, ":/");
1044         if (s1) {
1045                 ++s1;
1046                 lmd->lmd_flags |= LMD_FLG_CLIENT;
1047                 /* Remove leading /s from fsname */
1048                 while (*++s1 == '/')
1049                         ;
1050                 /* Freed in lustre_free_lsi */
1051                 lmd->lmd_profile = kasprintf(GFP_NOFS, "%s-client", s1);
1052                 if (!lmd->lmd_profile)
1053                         return -ENOMEM;
1054         }
1055 
1056         /* Freed in lustre_free_lsi */
1057         lmd->lmd_dev = kzalloc(strlen(devname) + 1, GFP_NOFS);
1058         if (!lmd->lmd_dev)
1059                 return -ENOMEM;
1060         strcpy(lmd->lmd_dev, devname);
1061 
1062         /* Save mount options */
1063         s1 = options + strlen(options) - 1;
1064         while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1065                 *s1-- = 0;
1066         if (*options != 0) {
1067                 /* Freed in lustre_free_lsi */
1068                 lmd->lmd_opts = kzalloc(strlen(options) + 1, GFP_NOFS);
1069                 if (!lmd->lmd_opts)
1070                         return -ENOMEM;
1071                 strcpy(lmd->lmd_opts, options);
1072         }
1073 
1074         lmd_print(lmd);
1075         lmd->lmd_magic = LMD_MAGIC;
1076 
1077         return rc;
1078 
1079 invalid:
1080         CERROR("Bad mount options %s\n", options);
1081         return -EINVAL;
1082 }
1083 
1084 struct lustre_mount_data2 {
1085         void *lmd2_data;
1086         struct vfsmount *lmd2_mnt;
1087 };
1088 
1089 /** This is the entry point for the mount call into Lustre.
1090  * This is called when a server or client is mounted,
1091  * and this is where we start setting things up.
1092  * @param data Mount options (e.g. -o flock,abort_recov)
1093  */
1094 static int lustre_fill_super(struct super_block *sb, void *data, int silent)
1095 {
1096         struct lustre_mount_data *lmd;
1097         struct lustre_mount_data2 *lmd2 = data;
1098         struct lustre_sb_info *lsi;
1099         int rc;
1100 
1101         CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1102 
1103         lsi = lustre_init_lsi(sb);
1104         if (!lsi)
1105                 return -ENOMEM;
1106         lmd = lsi->lsi_lmd;
1107 
1108         /*
1109          * Disable lockdep during mount, because mount locking patterns are
1110          * `special'.
1111          */
1112         lockdep_off();
1113 
1114         /*
1115          * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1116          */
1117         obd_zombie_barrier();
1118 
1119         /* Figure out the lmd from the mount options */
1120         if (lmd_parse((lmd2->lmd2_data), lmd)) {
1121                 lustre_put_lsi(sb);
1122                 rc = -EINVAL;
1123                 goto out;
1124         }
1125 
1126         if (lmd_is_client(lmd)) {
1127                 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1128                 if (!client_fill_super)
1129                         request_module("lustre");
1130                 if (!client_fill_super) {
1131                         LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n");
1132                         lustre_put_lsi(sb);
1133                         rc = -ENODEV;
1134                 } else {
1135                         rc = lustre_start_mgc(sb);
1136                         if (rc) {
1137                                 lustre_put_lsi(sb);
1138                                 goto out;
1139                         }
1140                         /* Connect and start */
1141                         /* (should always be ll_fill_super) */
1142                         rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1143                         /* c_f_s will call lustre_common_put_super on failure */
1144                 }
1145         } else {
1146                 CERROR("This is client-side-only module, cannot handle server mount.\n");
1147                 rc = -EINVAL;
1148         }
1149 
1150         /* If error happens in fill_super() call, @lsi will be killed there.
1151          * This is why we do not put it here.
1152          */
1153         goto out;
1154 out:
1155         if (rc) {
1156                 CERROR("Unable to mount %s (%d)\n",
1157                        s2lsi(sb) ? lmd->lmd_dev : "", rc);
1158         } else {
1159                 CDEBUG(D_SUPER, "Mount %s complete\n",
1160                        lmd->lmd_dev);
1161         }
1162         lockdep_on();
1163         return rc;
1164 }
1165 
1166 /* We can't call ll_fill_super by name because it lives in a module that
1167  * must be loaded after this one.
1168  */
1169 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1170                                                   struct vfsmount *mnt))
1171 {
1172         client_fill_super = cfs;
1173 }
1174 EXPORT_SYMBOL(lustre_register_client_fill_super);
1175 
1176 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1177 {
1178         kill_super_cb = cfs;
1179 }
1180 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1181 
1182 /***************** FS registration ******************/
1183 static struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1184                                    const char *devname, void *data)
1185 {
1186         struct lustre_mount_data2 lmd2 = {
1187                 .lmd2_data = data,
1188                 .lmd2_mnt = NULL
1189         };
1190 
1191         return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1192 }
1193 
1194 static void lustre_kill_super(struct super_block *sb)
1195 {
1196         struct lustre_sb_info *lsi = s2lsi(sb);
1197 
1198         if (kill_super_cb && lsi)
1199                 (*kill_super_cb)(sb);
1200 
1201         kill_anon_super(sb);
1202 }
1203 
1204 /** Register the "lustre" fs type
1205  */
1206 static struct file_system_type lustre_fs_type = {
1207         .owner  = THIS_MODULE,
1208         .name    = "lustre",
1209         .mount  = lustre_mount,
1210         .kill_sb      = lustre_kill_super,
1211         .fs_flags     = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
1212                         FS_RENAME_DOES_D_MOVE,
1213 };
1214 MODULE_ALIAS_FS("lustre");
1215 
1216 int lustre_register_fs(void)
1217 {
1218         return register_filesystem(&lustre_fs_type);
1219 }
1220 
1221 int lustre_unregister_fs(void)
1222 {
1223         return unregister_filesystem(&lustre_fs_type);
1224 }
1225 

This page was automatically generated by LXR 0.3.1 (source).  •  Linux is a registered trademark of Linus Torvalds  •  Contact us