Version:  2.0.40 2.2.26 2.4.37 3.8 3.9 3.10 3.11 3.12 3.13 3.14 3.15 3.16 3.17 3.18 3.19 4.0 4.1 4.2 4.3 4.4 4.5

Linux/drivers/staging/lustre/lustre/obdclass/obd_mount.c

  1 /*
  2  * GPL HEADER START
  3  *
  4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  5  *
  6  * This program is free software; you can redistribute it and/or modify
  7  * it under the terms of the GNU General Public License version 2 only,
  8  * as published by the Free Software Foundation.
  9  *
 10  * This program is distributed in the hope that it will be useful, but
 11  * WITHOUT ANY WARRANTY; without even the implied warranty of
 12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 13  * General Public License version 2 for more details (a copy is included
 14  * in the LICENSE file that accompanied this code).
 15  *
 16  * You should have received a copy of the GNU General Public License
 17  * version 2 along with this program; If not, see
 18  * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
 19  *
 20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
 21  * CA 95054 USA or visit www.sun.com if you need additional information or
 22  * have any questions.
 23  *
 24  * GPL HEADER END
 25  */
 26 /*
 27  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
 28  * Use is subject to license terms.
 29  *
 30  * Copyright (c) 2011, 2015, Intel Corporation.
 31  */
 32 /*
 33  * This file is part of Lustre, http://www.lustre.org/
 34  * Lustre is a trademark of Sun Microsystems, Inc.
 35  *
 36  * lustre/obdclass/obd_mount.c
 37  *
 38  * Client mount routines
 39  *
 40  * Author: Nathan Rutman <nathan@clusterfs.com>
 41  */
 42 
 43 #define DEBUG_SUBSYSTEM S_CLASS
 44 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
 45 #define PRINT_CMD CDEBUG
 46 
 47 #include "../include/obd.h"
 48 #include "../include/linux/lustre_compat25.h"
 49 #include "../include/obd_class.h"
 50 #include "../include/lustre/lustre_user.h"
 51 #include "../include/lustre_log.h"
 52 #include "../include/lustre_disk.h"
 53 #include "../include/lustre_param.h"
 54 
 55 static int (*client_fill_super)(struct super_block *sb,
 56                                 struct vfsmount *mnt);
 57 
 58 static void (*kill_super_cb)(struct super_block *sb);
 59 
 60 /**************** config llog ********************/
 61 
 62 /** Get a config log from the MGS and process it.
 63  * This func is called for both clients and servers.
 64  * Continue to process new statements appended to the logs
 65  * (whenever the config lock is revoked) until lustre_end_log
 66  * is called.
 67  * @param sb The superblock is used by the MGC to write to the local copy of
 68  *   the config log
 69  * @param logname The name of the llog to replicate from the MGS
 70  * @param cfg Since the same mgc may be used to follow multiple config logs
 71  *   (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
 72  *   this log, and is added to the mgc's list of logs to follow.
 73  */
 74 int lustre_process_log(struct super_block *sb, char *logname,
 75                      struct config_llog_instance *cfg)
 76 {
 77         struct lustre_cfg *lcfg;
 78         struct lustre_cfg_bufs *bufs;
 79         struct lustre_sb_info *lsi = s2lsi(sb);
 80         struct obd_device *mgc = lsi->lsi_mgc;
 81         int rc;
 82 
 83         LASSERT(mgc);
 84         LASSERT(cfg);
 85 
 86         bufs = kzalloc(sizeof(*bufs), GFP_NOFS);
 87         if (!bufs)
 88                 return -ENOMEM;
 89 
 90         /* mgc_process_config */
 91         lustre_cfg_bufs_reset(bufs, mgc->obd_name);
 92         lustre_cfg_bufs_set_string(bufs, 1, logname);
 93         lustre_cfg_bufs_set(bufs, 2, cfg, sizeof(*cfg));
 94         lustre_cfg_bufs_set(bufs, 3, &sb, sizeof(sb));
 95         lcfg = lustre_cfg_new(LCFG_LOG_START, bufs);
 96         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
 97         lustre_cfg_free(lcfg);
 98 
 99         kfree(bufs);
100 
101         if (rc == -EINVAL)
102                 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d).  Make sure this client and the MGS are running compatible versions of Lustre.\n",
103                                    mgc->obd_name, logname, rc);
104 
105         if (rc)
106                 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
107                                    mgc->obd_name, logname,
108                                    rc);
109 
110         /* class_obd_list(); */
111         return rc;
112 }
113 EXPORT_SYMBOL(lustre_process_log);
114 
115 /* Stop watching this config log for updates */
116 int lustre_end_log(struct super_block *sb, char *logname,
117                        struct config_llog_instance *cfg)
118 {
119         struct lustre_cfg *lcfg;
120         struct lustre_cfg_bufs bufs;
121         struct lustre_sb_info *lsi = s2lsi(sb);
122         struct obd_device *mgc = lsi->lsi_mgc;
123         int rc;
124 
125         if (!mgc)
126                 return -ENOENT;
127 
128         /* mgc_process_config */
129         lustre_cfg_bufs_reset(&bufs, mgc->obd_name);
130         lustre_cfg_bufs_set_string(&bufs, 1, logname);
131         if (cfg)
132                 lustre_cfg_bufs_set(&bufs, 2, cfg, sizeof(*cfg));
133         lcfg = lustre_cfg_new(LCFG_LOG_END, &bufs);
134         rc = obd_process_config(mgc, sizeof(*lcfg), lcfg);
135         lustre_cfg_free(lcfg);
136         return rc;
137 }
138 EXPORT_SYMBOL(lustre_end_log);
139 
140 /**************** obd start *******************/
141 
142 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
143  * lctl (and do for echo cli/srv.
144  */
145 static int do_lcfg(char *cfgname, lnet_nid_t nid, int cmd,
146                    char *s1, char *s2, char *s3, char *s4)
147 {
148         struct lustre_cfg_bufs bufs;
149         struct lustre_cfg     *lcfg = NULL;
150         int rc;
151 
152         CDEBUG(D_TRACE, "lcfg %s %#x %s %s %s %s\n", cfgname,
153                cmd, s1, s2, s3, s4);
154 
155         lustre_cfg_bufs_reset(&bufs, cfgname);
156         if (s1)
157                 lustre_cfg_bufs_set_string(&bufs, 1, s1);
158         if (s2)
159                 lustre_cfg_bufs_set_string(&bufs, 2, s2);
160         if (s3)
161                 lustre_cfg_bufs_set_string(&bufs, 3, s3);
162         if (s4)
163                 lustre_cfg_bufs_set_string(&bufs, 4, s4);
164 
165         lcfg = lustre_cfg_new(cmd, &bufs);
166         lcfg->lcfg_nid = nid;
167         rc = class_process_config(lcfg);
168         lustre_cfg_free(lcfg);
169         return rc;
170 }
171 
172 /** Call class_attach and class_setup.  These methods in turn call
173  * obd type-specific methods.
174  */
175 static int lustre_start_simple(char *obdname, char *type, char *uuid,
176                                char *s1, char *s2, char *s3, char *s4)
177 {
178         int rc;
179 
180         CDEBUG(D_MOUNT, "Starting obd %s (typ=%s)\n", obdname, type);
181 
182         rc = do_lcfg(obdname, 0, LCFG_ATTACH, type, uuid, NULL, NULL);
183         if (rc) {
184                 CERROR("%s attach error %d\n", obdname, rc);
185                 return rc;
186         }
187         rc = do_lcfg(obdname, 0, LCFG_SETUP, s1, s2, s3, s4);
188         if (rc) {
189                 CERROR("%s setup error %d\n", obdname, rc);
190                 do_lcfg(obdname, 0, LCFG_DETACH, NULL, NULL, NULL, NULL);
191         }
192         return rc;
193 }
194 
195 DEFINE_MUTEX(mgc_start_lock);
196 
197 /** Set up a mgc obd to process startup logs
198  *
199  * \param sb [in] super block of the mgc obd
200  *
201  * \retval 0 success, otherwise error code
202  */
203 int lustre_start_mgc(struct super_block *sb)
204 {
205         struct obd_connect_data *data = NULL;
206         struct lustre_sb_info *lsi = s2lsi(sb);
207         struct obd_device *obd;
208         struct obd_export *exp;
209         struct obd_uuid *uuid;
210         class_uuid_t uuidc;
211         lnet_nid_t nid;
212         char nidstr[LNET_NIDSTR_SIZE];
213         char *mgcname = NULL, *niduuid = NULL, *mgssec = NULL;
214         char *ptr;
215         int rc = 0, i = 0, j;
216 
217         LASSERT(lsi->lsi_lmd);
218 
219         /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
220         ptr = lsi->lsi_lmd->lmd_dev;
221         if (class_parse_nid(ptr, &nid, &ptr) == 0)
222                 i++;
223         if (i == 0) {
224                 CERROR("No valid MGS nids found.\n");
225                 return -EINVAL;
226         }
227 
228         mutex_lock(&mgc_start_lock);
229 
230         libcfs_nid2str_r(nid, nidstr, sizeof(nidstr));
231         mgcname = kasprintf(GFP_NOFS,
232                             "%s%s", LUSTRE_MGC_OBDNAME, nidstr);
233         niduuid = kasprintf(GFP_NOFS, "%s_%x", mgcname, i);
234         if (!mgcname || !niduuid) {
235                 rc = -ENOMEM;
236                 goto out_free;
237         }
238 
239         mgssec = lsi->lsi_lmd->lmd_mgssec ? lsi->lsi_lmd->lmd_mgssec : "";
240 
241         data = kzalloc(sizeof(*data), GFP_NOFS);
242         if (!data) {
243                 rc = -ENOMEM;
244                 goto out_free;
245         }
246 
247         obd = class_name2obd(mgcname);
248         if (obd && !obd->obd_stopping) {
249                 int recov_bk;
250 
251                 rc = obd_set_info_async(NULL, obd->obd_self_export,
252                                         strlen(KEY_MGSSEC), KEY_MGSSEC,
253                                         strlen(mgssec), mgssec, NULL);
254                 if (rc)
255                         goto out_free;
256 
257                 /* Re-using an existing MGC */
258                 atomic_inc(&obd->u.cli.cl_mgc_refcount);
259 
260                 /* IR compatibility check, only for clients */
261                 if (lmd_is_client(lsi->lsi_lmd)) {
262                         int has_ir;
263                         int vallen = sizeof(*data);
264                         __u32 *flags = &lsi->lsi_lmd->lmd_flags;
265 
266                         rc = obd_get_info(NULL, obd->obd_self_export,
267                                           strlen(KEY_CONN_DATA), KEY_CONN_DATA,
268                                           &vallen, data, NULL);
269                         LASSERT(rc == 0);
270                         has_ir = OCD_HAS_FLAG(data, IMP_RECOV);
271                         if (has_ir ^ !(*flags & LMD_FLG_NOIR)) {
272                                 /* LMD_FLG_NOIR is for test purpose only */
273                                 LCONSOLE_WARN(
274                                         "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
275                                         has_ir ? "enabled" : "disabled");
276                                 if (has_ir)
277                                         *flags &= ~LMD_FLG_NOIR;
278                                 else
279                                         *flags |= LMD_FLG_NOIR;
280                         }
281                 }
282 
283                 recov_bk = 0;
284 
285                 /* Try all connections, but only once (again).
286                    We don't want to block another target from starting
287                    (using its local copy of the log), but we do want to connect
288                    if at all possible. */
289                 recov_bk++;
290                 CDEBUG(D_MOUNT, "%s: Set MGC reconnect %d\n", mgcname,
291                        recov_bk);
292                 rc = obd_set_info_async(NULL, obd->obd_self_export,
293                                         sizeof(KEY_INIT_RECOV_BACKUP),
294                                         KEY_INIT_RECOV_BACKUP,
295                                         sizeof(recov_bk), &recov_bk, NULL);
296                 rc = 0;
297                 goto out;
298         }
299 
300         CDEBUG(D_MOUNT, "Start MGC '%s'\n", mgcname);
301 
302         /* Add the primary nids for the MGS */
303         i = 0;
304         /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
305         ptr = lsi->lsi_lmd->lmd_dev;
306         while (class_parse_nid(ptr, &nid, &ptr) == 0) {
307                 rc = do_lcfg(mgcname, nid,
308                              LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
309                 i++;
310                 /* Stop at the first failover nid */
311                 if (*ptr == ':')
312                         break;
313         }
314         if (i == 0) {
315                 CERROR("No valid MGS nids found.\n");
316                 rc = -EINVAL;
317                 goto out_free;
318         }
319         lsi->lsi_lmd->lmd_mgs_failnodes = 1;
320 
321         /* Random uuid for MGC allows easier reconnects */
322         uuid = kzalloc(sizeof(*uuid), GFP_NOFS);
323         if (!uuid) {
324                 rc = -ENOMEM;
325                 goto out_free;
326         }
327 
328         ll_generate_random_uuid(uuidc);
329         class_uuid_unparse(uuidc, uuid);
330 
331         /* Start the MGC */
332         rc = lustre_start_simple(mgcname, LUSTRE_MGC_NAME,
333                                  (char *)uuid->uuid, LUSTRE_MGS_OBDNAME,
334                                  niduuid, NULL, NULL);
335         kfree(uuid);
336         if (rc)
337                 goto out_free;
338 
339         /* Add any failover MGS nids */
340         i = 1;
341         while (ptr && ((*ptr == ':' ||
342                class_find_param(ptr, PARAM_MGSNODE, &ptr) == 0))) {
343                 /* New failover node */
344                 sprintf(niduuid, "%s_%x", mgcname, i);
345                 j = 0;
346                 while (class_parse_nid_quiet(ptr, &nid, &ptr) == 0) {
347                         j++;
348                         rc = do_lcfg(mgcname, nid,
349                                      LCFG_ADD_UUID, niduuid, NULL, NULL, NULL);
350                         if (*ptr == ':')
351                                 break;
352                 }
353                 if (j > 0) {
354                         rc = do_lcfg(mgcname, 0, LCFG_ADD_CONN,
355                                      niduuid, NULL, NULL, NULL);
356                         i++;
357                 } else {
358                         /* at ":/fsname" */
359                         break;
360                 }
361         }
362         lsi->lsi_lmd->lmd_mgs_failnodes = i;
363 
364         obd = class_name2obd(mgcname);
365         if (!obd) {
366                 CERROR("Can't find mgcobd %s\n", mgcname);
367                 rc = -ENOTCONN;
368                 goto out_free;
369         }
370 
371         rc = obd_set_info_async(NULL, obd->obd_self_export,
372                                 strlen(KEY_MGSSEC), KEY_MGSSEC,
373                                 strlen(mgssec), mgssec, NULL);
374         if (rc)
375                 goto out_free;
376 
377         /* Keep a refcount of servers/clients who started with "mount",
378            so we know when we can get rid of the mgc. */
379         atomic_set(&obd->u.cli.cl_mgc_refcount, 1);
380 
381         /* We connect to the MGS at setup, and don't disconnect until cleanup */
382         data->ocd_connect_flags = OBD_CONNECT_VERSION | OBD_CONNECT_AT |
383                                   OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV |
384                                   OBD_CONNECT_LVB_TYPE;
385 
386 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
387         data->ocd_connect_flags |= OBD_CONNECT_MNE_SWAB;
388 #else
389 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
390 #endif
391 
392         if (lmd_is_client(lsi->lsi_lmd) &&
393             lsi->lsi_lmd->lmd_flags & LMD_FLG_NOIR)
394                 data->ocd_connect_flags &= ~OBD_CONNECT_IMP_RECOV;
395         data->ocd_version = LUSTRE_VERSION_CODE;
396         rc = obd_connect(NULL, &exp, obd, &(obd->obd_uuid), data, NULL);
397         if (rc) {
398                 CERROR("connect failed %d\n", rc);
399                 goto out;
400         }
401 
402         obd->u.cli.cl_mgc_mgsexp = exp;
403 
404 out:
405         /* Keep the mgc info in the sb. Note that many lsi's can point
406            to the same mgc.*/
407         lsi->lsi_mgc = obd;
408 out_free:
409         mutex_unlock(&mgc_start_lock);
410 
411         kfree(data);
412         kfree(mgcname);
413         kfree(niduuid);
414         return rc;
415 }
416 
417 static int lustre_stop_mgc(struct super_block *sb)
418 {
419         struct lustre_sb_info *lsi = s2lsi(sb);
420         struct obd_device *obd;
421         char *niduuid = NULL, *ptr = NULL;
422         int i, rc = 0, len = 0;
423 
424         if (!lsi)
425                 return -ENOENT;
426         obd = lsi->lsi_mgc;
427         if (!obd)
428                 return -ENOENT;
429         lsi->lsi_mgc = NULL;
430 
431         mutex_lock(&mgc_start_lock);
432         LASSERT(atomic_read(&obd->u.cli.cl_mgc_refcount) > 0);
433         if (!atomic_dec_and_test(&obd->u.cli.cl_mgc_refcount)) {
434                 /* This is not fatal, every client that stops
435                    will call in here. */
436                 CDEBUG(D_MOUNT, "mgc still has %d references.\n",
437                        atomic_read(&obd->u.cli.cl_mgc_refcount));
438                 rc = -EBUSY;
439                 goto out;
440         }
441 
442         /* The MGC has no recoverable data in any case.
443          * force shutdown set in umount_begin */
444         obd->obd_no_recov = 1;
445 
446         if (obd->u.cli.cl_mgc_mgsexp) {
447                 /* An error is not fatal, if we are unable to send the
448                    disconnect mgs ping evictor cleans up the export */
449                 rc = obd_disconnect(obd->u.cli.cl_mgc_mgsexp);
450                 if (rc)
451                         CDEBUG(D_MOUNT, "disconnect failed %d\n", rc);
452         }
453 
454         /* Save the obdname for cleaning the nid uuids, which are
455            obdname_XX */
456         len = strlen(obd->obd_name) + 6;
457         niduuid = kzalloc(len, GFP_NOFS);
458         if (niduuid) {
459                 strcpy(niduuid, obd->obd_name);
460                 ptr = niduuid + strlen(niduuid);
461         }
462 
463         rc = class_manual_cleanup(obd);
464         if (rc)
465                 goto out;
466 
467         /* Clean the nid uuids */
468         if (!niduuid) {
469                 rc = -ENOMEM;
470                 goto out;
471         }
472 
473         for (i = 0; i < lsi->lsi_lmd->lmd_mgs_failnodes; i++) {
474                 sprintf(ptr, "_%x", i);
475                 rc = do_lcfg(LUSTRE_MGC_OBDNAME, 0, LCFG_DEL_UUID,
476                              niduuid, NULL, NULL, NULL);
477                 if (rc)
478                         CERROR("del MDC UUID %s failed: rc = %d\n",
479                                niduuid, rc);
480         }
481 out:
482         kfree(niduuid);
483 
484         /* class_import_put will get rid of the additional connections */
485         mutex_unlock(&mgc_start_lock);
486         return rc;
487 }
488 
489 /***************** lustre superblock **************/
490 
491 static struct lustre_sb_info *lustre_init_lsi(struct super_block *sb)
492 {
493         struct lustre_sb_info *lsi;
494 
495         lsi = kzalloc(sizeof(*lsi), GFP_NOFS);
496         if (!lsi)
497                 return NULL;
498         lsi->lsi_lmd = kzalloc(sizeof(*lsi->lsi_lmd), GFP_NOFS);
499         if (!lsi->lsi_lmd) {
500                 kfree(lsi);
501                 return NULL;
502         }
503 
504         lsi->lsi_lmd->lmd_exclude_count = 0;
505         lsi->lsi_lmd->lmd_recovery_time_soft = 0;
506         lsi->lsi_lmd->lmd_recovery_time_hard = 0;
507         s2lsi_nocast(sb) = lsi;
508         /* we take 1 extra ref for our setup */
509         atomic_set(&lsi->lsi_mounts, 1);
510 
511         /* Default umount style */
512         lsi->lsi_flags = LSI_UMOUNT_FAILOVER;
513 
514         return lsi;
515 }
516 
517 static int lustre_free_lsi(struct super_block *sb)
518 {
519         struct lustre_sb_info *lsi = s2lsi(sb);
520 
521         LASSERT(lsi != NULL);
522         CDEBUG(D_MOUNT, "Freeing lsi %p\n", lsi);
523 
524         /* someone didn't call server_put_mount. */
525         LASSERT(atomic_read(&lsi->lsi_mounts) == 0);
526 
527         if (lsi->lsi_lmd != NULL) {
528                 kfree(lsi->lsi_lmd->lmd_dev);
529                 kfree(lsi->lsi_lmd->lmd_profile);
530                 kfree(lsi->lsi_lmd->lmd_mgssec);
531                 kfree(lsi->lsi_lmd->lmd_opts);
532                 if (lsi->lsi_lmd->lmd_exclude_count)
533                         kfree(lsi->lsi_lmd->lmd_exclude);
534                 kfree(lsi->lsi_lmd->lmd_mgs);
535                 kfree(lsi->lsi_lmd->lmd_osd_type);
536                 kfree(lsi->lsi_lmd->lmd_params);
537 
538                 kfree(lsi->lsi_lmd);
539         }
540 
541         LASSERT(lsi->lsi_llsbi == NULL);
542         kfree(lsi);
543         s2lsi_nocast(sb) = NULL;
544 
545         return 0;
546 }
547 
548 /* The lsi has one reference for every server that is using the disk -
549    e.g. MDT, MGS, and potentially MGC */
550 static int lustre_put_lsi(struct super_block *sb)
551 {
552         struct lustre_sb_info *lsi = s2lsi(sb);
553 
554         LASSERT(lsi != NULL);
555 
556         CDEBUG(D_MOUNT, "put %p %d\n", sb, atomic_read(&lsi->lsi_mounts));
557         if (atomic_dec_and_test(&lsi->lsi_mounts)) {
558                 lustre_free_lsi(sb);
559                 return 1;
560         }
561         return 0;
562 }
563 
564 /*** SERVER NAME ***
565  * <FSNAME><SEPARATOR><TYPE><INDEX>
566  * FSNAME is between 1 and 8 characters (inclusive).
567  *      Excluded characters are '/' and ':'
568  * SEPARATOR is either ':' or '-'
569  * TYPE: "OST", "MDT", etc.
570  * INDEX: Hex representation of the index
571  */
572 
573 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
574  * @param [in] svname server name including type and index
575  * @param [out] fsname Buffer to copy filesystem name prefix into.
576  *  Must have at least 'strlen(fsname) + 1' chars.
577  * @param [out] endptr if endptr isn't NULL it is set to end of fsname
578  * rc < 0  on error
579  */
580 static int server_name2fsname(const char *svname, char *fsname,
581                               const char **endptr)
582 {
583         const char *dash;
584 
585         dash = svname + strnlen(svname, 8); /* max fsname length is 8 */
586         for (; dash > svname && *dash != '-' && *dash != ':'; dash--)
587                 ;
588         if (dash == svname)
589                 return -EINVAL;
590 
591         if (fsname != NULL) {
592                 strncpy(fsname, svname, dash - svname);
593                 fsname[dash - svname] = '\0';
594         }
595 
596         if (endptr != NULL)
597                 *endptr = dash;
598 
599         return 0;
600 }
601 
602 /* Get the index from the obd name.
603    rc = server type, or
604    rc < 0  on error
605    if endptr isn't NULL it is set to end of name */
606 static int server_name2index(const char *svname, __u32 *idx,
607                              const char **endptr)
608 {
609         unsigned long index;
610         int rc;
611         const char *dash;
612 
613         /* We use server_name2fsname() just for parsing */
614         rc = server_name2fsname(svname, NULL, &dash);
615         if (rc != 0)
616                 return rc;
617 
618         dash++;
619 
620         if (strncmp(dash, "MDT", 3) == 0)
621                 rc = LDD_F_SV_TYPE_MDT;
622         else if (strncmp(dash, "OST", 3) == 0)
623                 rc = LDD_F_SV_TYPE_OST;
624         else
625                 return -EINVAL;
626 
627         dash += 3;
628 
629         if (strncmp(dash, "all", 3) == 0) {
630                 if (endptr != NULL)
631                         *endptr = dash + 3;
632                 return rc | LDD_F_SV_ALL;
633         }
634 
635         index = simple_strtoul(dash, (char **)endptr, 16);
636         if (idx != NULL)
637                 *idx = index;
638 
639         /* Account for -mdc after index that is possible when specifying mdt */
640         if (endptr != NULL && strncmp(LUSTRE_MDC_NAME, *endptr + 1,
641                                       sizeof(LUSTRE_MDC_NAME)-1) == 0)
642                 *endptr += sizeof(LUSTRE_MDC_NAME);
643 
644         return rc;
645 }
646 
647 /*************** mount common between server and client ***************/
648 
649 /* Common umount */
650 int lustre_common_put_super(struct super_block *sb)
651 {
652         int rc;
653 
654         CDEBUG(D_MOUNT, "dropping sb %p\n", sb);
655 
656         /* Drop a ref to the MGC */
657         rc = lustre_stop_mgc(sb);
658         if (rc && (rc != -ENOENT)) {
659                 if (rc != -EBUSY) {
660                         CERROR("Can't stop MGC: %d\n", rc);
661                         return rc;
662                 }
663                 /* BUSY just means that there's some other obd that
664                    needs the mgc.  Let him clean it up. */
665                 CDEBUG(D_MOUNT, "MGC still in use\n");
666         }
667         /* Drop a ref to the mounted disk */
668         lustre_put_lsi(sb);
669         lu_types_stop();
670         return rc;
671 }
672 EXPORT_SYMBOL(lustre_common_put_super);
673 
674 static void lmd_print(struct lustre_mount_data *lmd)
675 {
676         int i;
677 
678         PRINT_CMD(D_MOUNT, "  mount data:\n");
679         if (lmd_is_client(lmd))
680                 PRINT_CMD(D_MOUNT, "profile: %s\n", lmd->lmd_profile);
681         PRINT_CMD(D_MOUNT, "device:  %s\n", lmd->lmd_dev);
682         PRINT_CMD(D_MOUNT, "flags:   %x\n", lmd->lmd_flags);
683 
684         if (lmd->lmd_opts)
685                 PRINT_CMD(D_MOUNT, "options: %s\n", lmd->lmd_opts);
686 
687         if (lmd->lmd_recovery_time_soft)
688                 PRINT_CMD(D_MOUNT, "recovery time soft: %d\n",
689                           lmd->lmd_recovery_time_soft);
690 
691         if (lmd->lmd_recovery_time_hard)
692                 PRINT_CMD(D_MOUNT, "recovery time hard: %d\n",
693                           lmd->lmd_recovery_time_hard);
694 
695         for (i = 0; i < lmd->lmd_exclude_count; i++) {
696                 PRINT_CMD(D_MOUNT, "exclude %d:  OST%04x\n", i,
697                           lmd->lmd_exclude[i]);
698         }
699 }
700 
701 /* Is this server on the exclusion list */
702 int lustre_check_exclusion(struct super_block *sb, char *svname)
703 {
704         struct lustre_sb_info *lsi = s2lsi(sb);
705         struct lustre_mount_data *lmd = lsi->lsi_lmd;
706         __u32 index;
707         int i, rc;
708 
709         rc = server_name2index(svname, &index, NULL);
710         if (rc != LDD_F_SV_TYPE_OST)
711                 /* Only exclude OSTs */
712                 return 0;
713 
714         CDEBUG(D_MOUNT, "Check exclusion %s (%d) in %d of %s\n", svname,
715                index, lmd->lmd_exclude_count, lmd->lmd_dev);
716 
717         for (i = 0; i < lmd->lmd_exclude_count; i++) {
718                 if (index == lmd->lmd_exclude[i]) {
719                         CWARN("Excluding %s (on exclusion list)\n", svname);
720                         return 1;
721                 }
722         }
723         return 0;
724 }
725 
726 /* mount -v  -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
727 static int lmd_make_exclusion(struct lustre_mount_data *lmd, const char *ptr)
728 {
729         const char *s1 = ptr, *s2;
730         __u32 index, *exclude_list;
731         int rc = 0, devmax;
732 
733         /* The shortest an ost name can be is 8 chars: -OST0000.
734            We don't actually know the fsname at this time, so in fact
735            a user could specify any fsname. */
736         devmax = strlen(ptr) / 8 + 1;
737 
738         /* temp storage until we figure out how many we have */
739         exclude_list = kcalloc(devmax, sizeof(index), GFP_NOFS);
740         if (!exclude_list)
741                 return -ENOMEM;
742 
743         /* we enter this fn pointing at the '=' */
744         while (*s1 && *s1 != ' ' && *s1 != ',') {
745                 s1++;
746                 rc = server_name2index(s1, &index, &s2);
747                 if (rc < 0) {
748                         CERROR("Can't parse server name '%s': rc = %d\n",
749                                s1, rc);
750                         break;
751                 }
752                 if (rc == LDD_F_SV_TYPE_OST)
753                         exclude_list[lmd->lmd_exclude_count++] = index;
754                 else
755                         CDEBUG(D_MOUNT, "ignoring exclude %.*s: type = %#x\n",
756                                (uint)(s2-s1), s1, rc);
757                 s1 = s2;
758                 /* now we are pointing at ':' (next exclude)
759                    or ',' (end of excludes) */
760                 if (lmd->lmd_exclude_count >= devmax)
761                         break;
762         }
763         if (rc >= 0) /* non-err */
764                 rc = 0;
765 
766         if (lmd->lmd_exclude_count) {
767                 /* permanent, freed in lustre_free_lsi */
768                 lmd->lmd_exclude = kcalloc(lmd->lmd_exclude_count,
769                                            sizeof(index), GFP_NOFS);
770                 if (lmd->lmd_exclude) {
771                         memcpy(lmd->lmd_exclude, exclude_list,
772                                sizeof(index) * lmd->lmd_exclude_count);
773                 } else {
774                         rc = -ENOMEM;
775                         lmd->lmd_exclude_count = 0;
776                 }
777         }
778         kfree(exclude_list);
779         return rc;
780 }
781 
782 static int lmd_parse_mgssec(struct lustre_mount_data *lmd, char *ptr)
783 {
784         char   *tail;
785         int     length;
786 
787         kfree(lmd->lmd_mgssec);
788         lmd->lmd_mgssec = NULL;
789 
790         tail = strchr(ptr, ',');
791         if (tail == NULL)
792                 length = strlen(ptr);
793         else
794                 length = tail - ptr;
795 
796         lmd->lmd_mgssec = kzalloc(length + 1, GFP_NOFS);
797         if (!lmd->lmd_mgssec)
798                 return -ENOMEM;
799 
800         memcpy(lmd->lmd_mgssec, ptr, length);
801         lmd->lmd_mgssec[length] = '\0';
802         return 0;
803 }
804 
805 static int lmd_parse_string(char **handle, char *ptr)
806 {
807         char   *tail;
808         int     length;
809 
810         if ((handle == NULL) || (ptr == NULL))
811                 return -EINVAL;
812 
813         kfree(*handle);
814         *handle = NULL;
815 
816         tail = strchr(ptr, ',');
817         if (tail == NULL)
818                 length = strlen(ptr);
819         else
820                 length = tail - ptr;
821 
822         *handle = kzalloc(length + 1, GFP_NOFS);
823         if (!*handle)
824                 return -ENOMEM;
825 
826         memcpy(*handle, ptr, length);
827         (*handle)[length] = '\0';
828 
829         return 0;
830 }
831 
832 /* Collect multiple values for mgsnid specifiers */
833 static int lmd_parse_mgs(struct lustre_mount_data *lmd, char **ptr)
834 {
835         lnet_nid_t nid;
836         char *tail = *ptr;
837         char *mgsnid;
838         int   length;
839         int   oldlen = 0;
840 
841         /* Find end of nidlist */
842         while (class_parse_nid_quiet(tail, &nid, &tail) == 0)
843                 ;
844         length = tail - *ptr;
845         if (length == 0) {
846                 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr);
847                 return -EINVAL;
848         }
849 
850         if (lmd->lmd_mgs != NULL)
851                 oldlen = strlen(lmd->lmd_mgs) + 1;
852 
853         mgsnid = kzalloc(oldlen + length + 1, GFP_NOFS);
854         if (!mgsnid)
855                 return -ENOMEM;
856 
857         if (lmd->lmd_mgs != NULL) {
858                 /* Multiple mgsnid= are taken to mean failover locations */
859                 memcpy(mgsnid, lmd->lmd_mgs, oldlen);
860                 mgsnid[oldlen - 1] = ':';
861                 kfree(lmd->lmd_mgs);
862         }
863         memcpy(mgsnid + oldlen, *ptr, length);
864         mgsnid[oldlen + length] = '\0';
865         lmd->lmd_mgs = mgsnid;
866         *ptr = tail;
867 
868         return 0;
869 }
870 
871 /** Parse mount line options
872  * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
873  * dev is passed as device=uml1:/lustre by mount.lustre
874  */
875 static int lmd_parse(char *options, struct lustre_mount_data *lmd)
876 {
877         char *s1, *s2, *devname = NULL;
878         struct lustre_mount_data *raw = (struct lustre_mount_data *)options;
879         int rc = 0;
880 
881         LASSERT(lmd);
882         if (!options) {
883                 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n");
884                 return -EINVAL;
885         }
886 
887         /* Options should be a string - try to detect old lmd data */
888         if ((raw->lmd_magic & 0xffffff00) == (LMD_MAGIC & 0xffffff00)) {
889                 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre.  Please install version %s\n",
890                                    LUSTRE_VERSION_STRING);
891                 return -EINVAL;
892         }
893         lmd->lmd_magic = LMD_MAGIC;
894 
895         lmd->lmd_params = kzalloc(LMD_PARAMS_MAXLEN, GFP_NOFS);
896         if (!lmd->lmd_params)
897                 return -ENOMEM;
898         lmd->lmd_params[0] = '\0';
899 
900         /* Set default flags here */
901 
902         s1 = options;
903         while (*s1) {
904                 int clear = 0;
905                 int time_min = OBD_RECOVERY_TIME_MIN;
906 
907                 /* Skip whitespace and extra commas */
908                 while (*s1 == ' ' || *s1 == ',')
909                         s1++;
910 
911                 /* Client options are parsed in ll_options: eg. flock,
912                    user_xattr, acl */
913 
914                 /* Parse non-ldiskfs options here. Rather than modifying
915                    ldiskfs, we just zero these out here */
916                 if (strncmp(s1, "abort_recov", 11) == 0) {
917                         lmd->lmd_flags |= LMD_FLG_ABORT_RECOV;
918                         clear++;
919                 } else if (strncmp(s1, "recovery_time_soft=", 19) == 0) {
920                         lmd->lmd_recovery_time_soft = max_t(int,
921                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
922                         clear++;
923                 } else if (strncmp(s1, "recovery_time_hard=", 19) == 0) {
924                         lmd->lmd_recovery_time_hard = max_t(int,
925                                 simple_strtoul(s1 + 19, NULL, 10), time_min);
926                         clear++;
927                 } else if (strncmp(s1, "noir", 4) == 0) {
928                         lmd->lmd_flags |= LMD_FLG_NOIR; /* test purpose only. */
929                         clear++;
930                 } else if (strncmp(s1, "nosvc", 5) == 0) {
931                         lmd->lmd_flags |= LMD_FLG_NOSVC;
932                         clear++;
933                 } else if (strncmp(s1, "nomgs", 5) == 0) {
934                         lmd->lmd_flags |= LMD_FLG_NOMGS;
935                         clear++;
936                 } else if (strncmp(s1, "noscrub", 7) == 0) {
937                         lmd->lmd_flags |= LMD_FLG_NOSCRUB;
938                         clear++;
939                 } else if (strncmp(s1, PARAM_MGSNODE,
940                                    sizeof(PARAM_MGSNODE) - 1) == 0) {
941                         s2 = s1 + sizeof(PARAM_MGSNODE) - 1;
942                         /* Assume the next mount opt is the first
943                            invalid nid we get to. */
944                         rc = lmd_parse_mgs(lmd, &s2);
945                         if (rc)
946                                 goto invalid;
947                         clear++;
948                 } else if (strncmp(s1, "writeconf", 9) == 0) {
949                         lmd->lmd_flags |= LMD_FLG_WRITECONF;
950                         clear++;
951                 } else if (strncmp(s1, "update", 6) == 0) {
952                         lmd->lmd_flags |= LMD_FLG_UPDATE;
953                         clear++;
954                 } else if (strncmp(s1, "virgin", 6) == 0) {
955                         lmd->lmd_flags |= LMD_FLG_VIRGIN;
956                         clear++;
957                 } else if (strncmp(s1, "noprimnode", 10) == 0) {
958                         lmd->lmd_flags |= LMD_FLG_NO_PRIMNODE;
959                         clear++;
960                 } else if (strncmp(s1, "mgssec=", 7) == 0) {
961                         rc = lmd_parse_mgssec(lmd, s1 + 7);
962                         if (rc)
963                                 goto invalid;
964                         clear++;
965                 /* ost exclusion list */
966                 } else if (strncmp(s1, "exclude=", 8) == 0) {
967                         rc = lmd_make_exclusion(lmd, s1 + 7);
968                         if (rc)
969                                 goto invalid;
970                         clear++;
971                 } else if (strncmp(s1, "mgs", 3) == 0) {
972                         /* We are an MGS */
973                         lmd->lmd_flags |= LMD_FLG_MGS;
974                         clear++;
975                 } else if (strncmp(s1, "svname=", 7) == 0) {
976                         rc = lmd_parse_string(&lmd->lmd_profile, s1 + 7);
977                         if (rc)
978                                 goto invalid;
979                         clear++;
980                 } else if (strncmp(s1, "param=", 6) == 0) {
981                         size_t length, params_length;
982                         char *tail = strchr(s1 + 6, ',');
983 
984                         if (tail == NULL)
985                                 length = strlen(s1);
986                         else
987                                 length = tail - s1;
988                         length -= 6;
989                         params_length = strlen(lmd->lmd_params);
990                         if (params_length + length + 1 >= LMD_PARAMS_MAXLEN)
991                                 return -E2BIG;
992                         strncat(lmd->lmd_params, s1 + 6, length);
993                         lmd->lmd_params[params_length + length] = '\0';
994                         strlcat(lmd->lmd_params, " ", LMD_PARAMS_MAXLEN);
995                         clear++;
996                 } else if (strncmp(s1, "osd=", 4) == 0) {
997                         rc = lmd_parse_string(&lmd->lmd_osd_type, s1 + 4);
998                         if (rc)
999                                 goto invalid;
1000                         clear++;
1001                 }
1002                 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1003                    end of the options. */
1004                 else if (strncmp(s1, "device=", 7) == 0) {
1005                         devname = s1 + 7;
1006                         /* terminate options right before device.  device
1007                            must be the last one. */
1008                         *s1 = '\0';
1009                         break;
1010                 }
1011 
1012                 /* Find next opt */
1013                 s2 = strchr(s1, ',');
1014                 if (s2 == NULL) {
1015                         if (clear)
1016                                 *s1 = '\0';
1017                         break;
1018                 }
1019                 s2++;
1020                 if (clear)
1021                         memmove(s1, s2, strlen(s2) + 1);
1022                 else
1023                         s1 = s2;
1024         }
1025 
1026         if (!devname) {
1027                 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n");
1028                 goto invalid;
1029         }
1030 
1031         s1 = strstr(devname, ":/");
1032         if (s1) {
1033                 ++s1;
1034                 lmd->lmd_flags |= LMD_FLG_CLIENT;
1035                 /* Remove leading /s from fsname */
1036                 while (*++s1 == '/')
1037                         ;
1038                 /* Freed in lustre_free_lsi */
1039                 lmd->lmd_profile = kasprintf(GFP_NOFS, "%s-client", s1);
1040                 if (!lmd->lmd_profile)
1041                         return -ENOMEM;
1042         }
1043 
1044         /* Freed in lustre_free_lsi */
1045         lmd->lmd_dev = kzalloc(strlen(devname) + 1, GFP_NOFS);
1046         if (!lmd->lmd_dev)
1047                 return -ENOMEM;
1048         strcpy(lmd->lmd_dev, devname);
1049 
1050         /* Save mount options */
1051         s1 = options + strlen(options) - 1;
1052         while (s1 >= options && (*s1 == ',' || *s1 == ' '))
1053                 *s1-- = 0;
1054         if (*options != 0) {
1055                 /* Freed in lustre_free_lsi */
1056                 lmd->lmd_opts = kzalloc(strlen(options) + 1, GFP_NOFS);
1057                 if (!lmd->lmd_opts)
1058                         return -ENOMEM;
1059                 strcpy(lmd->lmd_opts, options);
1060         }
1061 
1062         lmd_print(lmd);
1063         lmd->lmd_magic = LMD_MAGIC;
1064 
1065         return rc;
1066 
1067 invalid:
1068         CERROR("Bad mount options %s\n", options);
1069         return -EINVAL;
1070 }
1071 
1072 struct lustre_mount_data2 {
1073         void *lmd2_data;
1074         struct vfsmount *lmd2_mnt;
1075 };
1076 
1077 /** This is the entry point for the mount call into Lustre.
1078  * This is called when a server or client is mounted,
1079  * and this is where we start setting things up.
1080  * @param data Mount options (e.g. -o flock,abort_recov)
1081  */
1082 static int lustre_fill_super(struct super_block *sb, void *data, int silent)
1083 {
1084         struct lustre_mount_data *lmd;
1085         struct lustre_mount_data2 *lmd2 = data;
1086         struct lustre_sb_info *lsi;
1087         int rc;
1088 
1089         CDEBUG(D_MOUNT|D_VFSTRACE, "VFS Op: sb %p\n", sb);
1090 
1091         lsi = lustre_init_lsi(sb);
1092         if (!lsi)
1093                 return -ENOMEM;
1094         lmd = lsi->lsi_lmd;
1095 
1096         /*
1097          * Disable lockdep during mount, because mount locking patterns are
1098          * `special'.
1099          */
1100         lockdep_off();
1101 
1102         /*
1103          * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1104          */
1105         obd_zombie_barrier();
1106 
1107         /* Figure out the lmd from the mount options */
1108         if (lmd_parse((lmd2->lmd2_data), lmd)) {
1109                 lustre_put_lsi(sb);
1110                 rc = -EINVAL;
1111                 goto out;
1112         }
1113 
1114         if (lmd_is_client(lmd)) {
1115                 CDEBUG(D_MOUNT, "Mounting client %s\n", lmd->lmd_profile);
1116                 if (client_fill_super == NULL)
1117                         request_module("lustre");
1118                 if (client_fill_super == NULL) {
1119                         LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n");
1120                         lustre_put_lsi(sb);
1121                         rc = -ENODEV;
1122                 } else {
1123                         rc = lustre_start_mgc(sb);
1124                         if (rc) {
1125                                 lustre_put_lsi(sb);
1126                                 goto out;
1127                         }
1128                         /* Connect and start */
1129                         /* (should always be ll_fill_super) */
1130                         rc = (*client_fill_super)(sb, lmd2->lmd2_mnt);
1131                         /* c_f_s will call lustre_common_put_super on failure */
1132                 }
1133         } else {
1134                 CERROR("This is client-side-only module, cannot handle server mount.\n");
1135                 rc = -EINVAL;
1136         }
1137 
1138         /* If error happens in fill_super() call, @lsi will be killed there.
1139          * This is why we do not put it here. */
1140         goto out;
1141 out:
1142         if (rc) {
1143                 CERROR("Unable to mount %s (%d)\n",
1144                        s2lsi(sb) ? lmd->lmd_dev : "", rc);
1145         } else {
1146                 CDEBUG(D_SUPER, "Mount %s complete\n",
1147                        lmd->lmd_dev);
1148         }
1149         lockdep_on();
1150         return rc;
1151 }
1152 
1153 /* We can't call ll_fill_super by name because it lives in a module that
1154    must be loaded after this one. */
1155 void lustre_register_client_fill_super(int (*cfs)(struct super_block *sb,
1156                                                   struct vfsmount *mnt))
1157 {
1158         client_fill_super = cfs;
1159 }
1160 EXPORT_SYMBOL(lustre_register_client_fill_super);
1161 
1162 void lustre_register_kill_super_cb(void (*cfs)(struct super_block *sb))
1163 {
1164         kill_super_cb = cfs;
1165 }
1166 EXPORT_SYMBOL(lustre_register_kill_super_cb);
1167 
1168 /***************** FS registration ******************/
1169 struct dentry *lustre_mount(struct file_system_type *fs_type, int flags,
1170                                 const char *devname, void *data)
1171 {
1172         struct lustre_mount_data2 lmd2 = {
1173                 .lmd2_data = data,
1174                 .lmd2_mnt = NULL
1175         };
1176 
1177         return mount_nodev(fs_type, flags, &lmd2, lustre_fill_super);
1178 }
1179 
1180 static void lustre_kill_super(struct super_block *sb)
1181 {
1182         struct lustre_sb_info *lsi = s2lsi(sb);
1183 
1184         if (kill_super_cb && lsi)
1185                 (*kill_super_cb)(sb);
1186 
1187         kill_anon_super(sb);
1188 }
1189 
1190 /** Register the "lustre" fs type
1191  */
1192 static struct file_system_type lustre_fs_type = {
1193         .owner  = THIS_MODULE,
1194         .name    = "lustre",
1195         .mount  = lustre_mount,
1196         .kill_sb      = lustre_kill_super,
1197         .fs_flags     = FS_BINARY_MOUNTDATA | FS_REQUIRES_DEV |
1198                         FS_RENAME_DOES_D_MOVE,
1199 };
1200 MODULE_ALIAS_FS("lustre");
1201 
1202 int lustre_register_fs(void)
1203 {
1204         return register_filesystem(&lustre_fs_type);
1205 }
1206 
1207 int lustre_unregister_fs(void)
1208 {
1209         return unregister_filesystem(&lustre_fs_type);
1210 }
1211 

This page was automatically generated by LXR 0.3.1 (source).  •  Linux is a registered trademark of Linus Torvalds  •  Contact us