svn commit: r331395 - in stable/11: cddl/contrib/opensolaris/cmd/zpool cddl/contrib/opensolaris/lib/libzfs/common cddl/usr.sbin/zfsd sys/cddl/contrib/opensolaris/common/zfs sys/cddl/contrib/opensol...

Alexander Motin mav at FreeBSD.org
Thu Mar 22 23:54:16 UTC 2018


Author: mav
Date: Thu Mar 22 23:54:14 2018
New Revision: 331395
URL: https://svnweb.freebsd.org/changeset/base/331395

Log:
  MFC r329681: MFV r318941: 7446 zpool create should support efi system partition
  
  illumos/illumos-gate at 7855d95b30fd903e3918bad5a29b777e765db821
  https://github.com/illumos/illumos-gate/commit/7855d95b30fd903e3918bad5a29b777e765db821
  
  https://www.illumos.org/issues/7446
    Since we support whole-disk configuration for boot pool, we also will need
    whole disk support with UEFI boot and for this, zpool create should create efi-
    system partition.
    I have borrowed the idea from oracle solaris, and introducing zpool create -
    B switch to provide an way to specify that boot partition should be created.
    However, there is still an question, how big should the system partition be.
    For time being, I have set default size 256MB (thats minimum size for FAT32
    with 4k blocks). To support custom size, the set on creation "bootsize"
    property is created and so the custom size can be set as: zpool create B -
    o bootsize=34MB rpool c0t0d0
    After pool is created, the "bootsize" property is read only. When -B switch is
    not used, the bootsize defaults to 0 and is shown in zpool get output with
    value ''. Older zfs/zpool implementations are ignoring this property.
    https://www.illumos.org/rb/r/219/
  
  Reviewed by: Andrew Stormont <andyjstormont at gmail.com>
  Reviewed by: Yuri Pankov <yuri.pankov at gmail.com>
  Approved by: Dan McDonald <danmcd at kebe.com>
  Author: Toomas Soome <tsoome at me.com>
  
  This commit makes no sense for FreeBSD, that is why I blocked the option,
  but it should be good to stay closer to upstream.

Modified:
  stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
  stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
  stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
  stable/11/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
  stable/11/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
  stable/11/cddl/usr.sbin/zfsd/case_file.cc
  stable/11/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
  stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
  stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
Directory Properties:
  stable/11/   (props changed)

Modified: stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c
==============================================================================
--- stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_main.c	Thu Mar 22 23:54:14 2018	(r331395)
@@ -212,7 +212,8 @@ get_usage(zpool_help_t idx)
 	case HELP_CLEAR:
 		return (gettext("\tclear [-nF] <pool> [device]\n"));
 	case HELP_CREATE:
-		return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
+		return (gettext("\tcreate [-fnd] [-B] "
+		    "[-o property=value] ... \n"
 		    "\t    [-O file-system-property=value] ... \n"
 		    "\t    [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
 	case HELP_DESTROY:
@@ -499,6 +500,8 @@ zpool_do_add(int argc, char **argv)
 	int c;
 	nvlist_t *nvroot;
 	char *poolname;
+	zpool_boot_label_t boot_type;
+	uint64_t boot_size;
 	int ret;
 	zpool_handle_t *zhp;
 	nvlist_t *config;
@@ -547,9 +550,15 @@ zpool_do_add(int argc, char **argv)
 		return (1);
 	}
 
+	if (zpool_is_bootable(zhp))
+		boot_type = ZPOOL_COPY_BOOT_LABEL;
+	else
+		boot_type = ZPOOL_NO_BOOT_LABEL;
+
 	/* pass off to get_vdev_spec for processing */
+	boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL);
 	nvroot = make_root_vdev(zhp, force, !force, B_FALSE, dryrun,
-	    argc, argv);
+	    boot_type, boot_size, argc, argv);
 	if (nvroot == NULL) {
 		zpool_close(zhp);
 		return (1);
@@ -774,10 +783,11 @@ errout:
 }
 
 /*
- * zpool create [-fnd] [-o property=value] ...
+ * zpool create [-fnd] [-B] [-o property=value] ...
  *		[-O file-system-property=value] ...
  *		[-R root] [-m mountpoint] <pool> <dev> ...
  *
+ *	-B	Create boot partition.
  *	-f	Force creation, even if devices appear in use
  *	-n	Do not create the pool, but display the resulting layout if it
  *		were to be created.
@@ -794,12 +804,16 @@ errout:
  * we get the nvlist back from get_vdev_spec(), we either print out the contents
  * (if '-n' was specified), or pass it to libzfs to do the creation.
  */
+
+#define	SYSTEM256	(256 * 1024 * 1024)
 int
 zpool_do_create(int argc, char **argv)
 {
 	boolean_t force = B_FALSE;
 	boolean_t dryrun = B_FALSE;
 	boolean_t enable_all_pool_feat = B_TRUE;
+	zpool_boot_label_t boot_type = ZPOOL_NO_BOOT_LABEL;
+	uint64_t boot_size = 0;
 	int c;
 	nvlist_t *nvroot = NULL;
 	char *poolname;
@@ -811,7 +825,7 @@ zpool_do_create(int argc, char **argv)
 	char *propval;
 
 	/* check options */
-	while ((c = getopt(argc, argv, ":fndR:m:o:O:")) != -1) {
+	while ((c = getopt(argc, argv, ":fndBR:m:o:O:")) != -1) {
 		switch (c) {
 		case 'f':
 			force = B_TRUE;
@@ -822,6 +836,22 @@ zpool_do_create(int argc, char **argv)
 		case 'd':
 			enable_all_pool_feat = B_FALSE;
 			break;
+		case 'B':
+#ifdef illumos
+			/*
+			 * We should create the system partition.
+			 * Also make sure the size is set.
+			 */
+			boot_type = ZPOOL_CREATE_BOOT_LABEL;
+			if (boot_size == 0)
+				boot_size = SYSTEM256;
+			break;
+#else
+			(void) fprintf(stderr,
+			    gettext("option '%c' is not supported\n"),
+			    optopt);
+			goto badusage;
+#endif
 		case 'R':
 			altroot = optarg;
 			if (add_prop_list(zpool_prop_to_name(
@@ -852,6 +882,20 @@ zpool_do_create(int argc, char **argv)
 				goto errout;
 
 			/*
+			 * Get bootsize value for make_root_vdev().
+			 */
+			if (zpool_name_to_prop(optarg) == ZPOOL_PROP_BOOTSIZE) {
+				if (zfs_nicestrtonum(g_zfs, propval,
+				    &boot_size) < 0 || boot_size == 0) {
+					(void) fprintf(stderr,
+					    gettext("bad boot partition size "
+					    "'%s': %s\n"),  propval,
+					    libzfs_error_description(g_zfs));
+					goto errout;
+				}
+			}
+
+			/*
 			 * If the user is creating a pool that doesn't support
 			 * feature flags, don't enable any features.
 			 */
@@ -928,9 +972,43 @@ zpool_do_create(int argc, char **argv)
 		goto errout;
 	}
 
+	/*
+	 * Make sure the bootsize is set when ZPOOL_CREATE_BOOT_LABEL is used,
+	 * and not set otherwise.
+	 */
+	if (boot_type == ZPOOL_CREATE_BOOT_LABEL) {
+		const char *propname;
+		char *strptr, *buf = NULL;
+		int rv;
+
+		propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE);
+		if (nvlist_lookup_string(props, propname, &strptr) != 0) {
+			(void) asprintf(&buf, "%" PRIu64, boot_size);
+			if (buf == NULL) {
+				(void) fprintf(stderr,
+				    gettext("internal error: out of memory\n"));
+				goto errout;
+			}
+			rv = add_prop_list(propname, buf, &props, B_TRUE);
+			free(buf);
+			if (rv != 0)
+				goto errout;
+		}
+	} else {
+		const char *propname;
+		char *strptr;
+
+		propname = zpool_prop_to_name(ZPOOL_PROP_BOOTSIZE);
+		if (nvlist_lookup_string(props, propname, &strptr) == 0) {
+			(void) fprintf(stderr, gettext("error: setting boot "
+			    "partition size requires option '-B'\n"));
+			goto errout;
+		}
+	}
+
 	/* pass off to get_vdev_spec for bulk processing */
 	nvroot = make_root_vdev(NULL, force, !force, B_FALSE, dryrun,
-	    argc - 1, argv + 1);
+	    boot_type, boot_size, argc - 1, argv + 1);
 	if (nvroot == NULL)
 		goto errout;
 
@@ -3209,6 +3287,8 @@ zpool_do_attach_or_replace(int argc, char **argv, int 
 	nvlist_t *nvroot;
 	char *poolname, *old_disk, *new_disk;
 	zpool_handle_t *zhp;
+	zpool_boot_label_t boot_type;
+	uint64_t boot_size;
 	int ret;
 
 	/* check options */
@@ -3273,8 +3353,14 @@ zpool_do_attach_or_replace(int argc, char **argv, int 
 		return (1);
 	}
 
+	if (zpool_is_bootable(zhp))
+		boot_type = ZPOOL_COPY_BOOT_LABEL;
+	else
+		boot_type = ZPOOL_NO_BOOT_LABEL;
+
+	boot_size = zpool_get_prop_int(zhp, ZPOOL_PROP_BOOTSIZE, NULL);
 	nvroot = make_root_vdev(zhp, force, B_FALSE, replacing, B_FALSE,
-	    argc, argv);
+	    boot_type, boot_size, argc, argv);
 	if (nvroot == NULL) {
 		zpool_close(zhp);
 		return (1);

Modified: stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h
==============================================================================
--- stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_util.h	Thu Mar 22 23:54:14 2018	(r331395)
@@ -44,7 +44,8 @@ uint_t num_logs(nvlist_t *nv);
  */
 
 nvlist_t *make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
-    boolean_t replacing, boolean_t dryrun, int argc, char **argv);
+    boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type,
+    uint64_t boot_size, int argc, char **argv);
 nvlist_t *split_mirror_vdev(zpool_handle_t *zhp, char *newname,
     nvlist_t *props, splitflags_t flags, int argc, char **argv);
 

Modified: stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c
==============================================================================
--- stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/cddl/contrib/opensolaris/cmd/zpool/zpool_vdev.c	Thu Mar 22 23:54:14 2018	(r331395)
@@ -935,14 +935,15 @@ check_replication(nvlist_t *config, nvlist_t *newroot)
  * Go through and find any whole disks in the vdev specification, labelling them
  * as appropriate.  When constructing the vdev spec, we were unable to open this
  * device in order to provide a devid.  Now that we have labelled the disk and
- * know that slice 0 is valid, we can construct the devid now.
+ * know the pool slice is valid, we can construct the devid now.
  *
  * If the disk was already labeled with an EFI label, we will have gotten the
  * devid already (because we were able to open the whole disk).  Otherwise, we
  * need to get the devid after we label the disk.
  */
 static int
-make_disks(zpool_handle_t *zhp, nvlist_t *nv)
+make_disks(zpool_handle_t *zhp, nvlist_t *nv, zpool_boot_label_t boot_type,
+    uint64_t boot_size)
 {
 	nvlist_t **child;
 	uint_t c, children;
@@ -951,6 +952,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
 	uint64_t wholedisk;
 	int fd;
 	int ret;
+	int slice;
 	ddi_devid_t devid;
 	char *minor = NULL, *devid_str = NULL;
 
@@ -968,20 +970,36 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
 		 * slice and stat()ing the device.
 		 */
 		verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
-		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
-		    &wholedisk) != 0 || !wholedisk)
-			return (0);
 
 		diskname = strrchr(path, '/');
 		assert(diskname != NULL);
 		diskname++;
-		if (zpool_label_disk(g_zfs, zhp, diskname) == -1)
-			return (-1);
 
+		if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
+		    &wholedisk) != 0 || !wholedisk) {
+			/*
+			 * This is not whole disk, return error if
+			 * boot partition creation was requested
+			 */
+			if (boot_type == ZPOOL_CREATE_BOOT_LABEL) {
+				(void) fprintf(stderr,
+				    gettext("creating boot partition is only "
+				    "supported on whole disk vdevs: %s\n"),
+				    diskname);
+				return (-1);
+			}
+			return (0);
+		}
+
+		ret = zpool_label_disk(g_zfs, zhp, diskname, boot_type,
+		    boot_size, &slice);
+		if (ret == -1)
+			return (ret);
+
 		/*
 		 * Fill in the devid, now that we've labeled the disk.
 		 */
-		(void) snprintf(buf, sizeof (buf), "%ss0", path);
+		(void) snprintf(buf, sizeof (buf), "%ss%d", path, slice);
 		if ((fd = open(buf, O_RDONLY)) < 0) {
 			(void) fprintf(stderr,
 			    gettext("cannot open '%s': %s\n"),
@@ -1004,7 +1022,7 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
 		}
 
 		/*
-		 * Update the path to refer to the 's0' slice.  The presence of
+		 * Update the path to refer to the pool slice.  The presence of
 		 * the 'whole_disk' field indicates to the CLI that we should
 		 * chop off the slice number when displaying the device in
 		 * future output.
@@ -1016,21 +1034,36 @@ make_disks(zpool_handle_t *zhp, nvlist_t *nv)
 		return (0);
 	}
 
-	for (c = 0; c < children; c++)
-		if ((ret = make_disks(zhp, child[c])) != 0)
+	/* illumos kernel does not support booting from multi-vdev pools. */
+	if ((boot_type == ZPOOL_CREATE_BOOT_LABEL)) {
+		if ((strcmp(type, VDEV_TYPE_ROOT) == 0) && children > 1) {
+			(void) fprintf(stderr, gettext("boot pool "
+			    "can not have more than one vdev\n"));
+			return (-1);
+		}
+	}
+
+	for (c = 0; c < children; c++) {
+		ret = make_disks(zhp, child[c], boot_type, boot_size);
+		if (ret != 0)
 			return (ret);
+	}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
 	    &child, &children) == 0)
-		for (c = 0; c < children; c++)
-			if ((ret = make_disks(zhp, child[c])) != 0)
+		for (c = 0; c < children; c++) {
+			ret = make_disks(zhp, child[c], boot_type, boot_size);
+			if (ret != 0)
 				return (ret);
+		}
 
 	if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
 	    &child, &children) == 0)
-		for (c = 0; c < children; c++)
-			if ((ret = make_disks(zhp, child[c])) != 0)
+		for (c = 0; c < children; c++) {
+			ret = make_disks(zhp, child[c], boot_type, boot_size);
+			if (ret != 0)
 				return (ret);
+		}
 
 	return (0);
 }
@@ -1429,6 +1462,9 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, 
 {
 	nvlist_t *newroot = NULL, **child;
 	uint_t c, children;
+#ifdef illumos
+	zpool_boot_label_t boot_type;
+#endif
 
 	if (argc > 0) {
 		if ((newroot = construct_spec(argc, argv)) == NULL) {
@@ -1438,7 +1474,13 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, 
 		}
 
 #ifdef illumos
-		if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
+		if (zpool_is_bootable(zhp))
+			boot_type = ZPOOL_COPY_BOOT_LABEL;
+		else
+			boot_type = ZPOOL_NO_BOOT_LABEL;
+
+		if (!flags.dryrun &&
+		    make_disks(zhp, newroot, boot_type, 0) != 0) {
 			nvlist_free(newroot);
 			return (NULL);
 		}
@@ -1483,7 +1525,8 @@ split_mirror_vdev(zpool_handle_t *zhp, char *newname, 
  */
 nvlist_t *
 make_root_vdev(zpool_handle_t *zhp, int force, int check_rep,
-    boolean_t replacing, boolean_t dryrun, int argc, char **argv)
+    boolean_t replacing, boolean_t dryrun, zpool_boot_label_t boot_type,
+    uint64_t boot_size, int argc, char **argv)
 {
 	nvlist_t *newroot;
 	nvlist_t *poolconfig = NULL;
@@ -1525,7 +1568,7 @@ make_root_vdev(zpool_handle_t *zhp, int force, int che
 	/*
 	 * Run through the vdev specification and label any whole disks found.
 	 */
-	if (!dryrun && make_disks(zhp, newroot) != 0) {
+	if (!dryrun && make_disks(zhp, newroot, boot_type, boot_size) != 0) {
 		nvlist_free(newroot);
 		return (NULL);
 	}

Modified: stable/11/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
==============================================================================
--- stable/11/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h	Thu Mar 22 23:54:14 2018	(r331395)
@@ -135,6 +135,18 @@ typedef enum zfs_error {
 } zfs_error_t;
 
 /*
+ * UEFI boot support parameters. When creating whole disk boot pool,
+ * zpool create should allow to create EFI System partition for UEFI boot
+ * program. In case of BIOS, the EFI System partition is not used
+ * even if it does exist.
+ */
+typedef enum zpool_boot_label {
+	ZPOOL_NO_BOOT_LABEL = 0,
+	ZPOOL_CREATE_BOOT_LABEL,
+	ZPOOL_COPY_BOOT_LABEL
+} zpool_boot_label_t;
+
+/*
  * The following data structures are all part
  * of the zfs_allow_t data structure which is
  * used for printing 'allow' permissions.
@@ -266,7 +278,8 @@ extern nvlist_t *zpool_find_vdev(zpool_handle_t *, con
     boolean_t *, boolean_t *);
 extern nvlist_t *zpool_find_vdev_by_physpath(zpool_handle_t *, const char *,
     boolean_t *, boolean_t *, boolean_t *);
-extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *);
+extern int zpool_label_disk(libzfs_handle_t *, zpool_handle_t *, const char *,
+    zpool_boot_label_t, uint64_t, int *);
 
 /*
  * Functions to manage pool properties
@@ -349,6 +362,7 @@ extern nvlist_t *zpool_get_config(zpool_handle_t *, nv
 extern nvlist_t *zpool_get_features(zpool_handle_t *);
 extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
 extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
+extern boolean_t zpool_is_bootable(zpool_handle_t *);
 
 /*
  * Import and export functions

Modified: stable/11/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c
==============================================================================
--- stable/11/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/cddl/contrib/opensolaris/lib/libzfs/common/libzfs_pool.c	Thu Mar 22 23:54:14 2018	(r331395)
@@ -49,7 +49,7 @@
 #include "zfs_comutil.h"
 #include "zfeature_common.h"
 
-static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
+static int read_efi_label(nvlist_t *, diskaddr_t *, boolean_t *);
 static boolean_t zpool_vdev_is_interior(const char *name);
 
 #define	BACKUP_SLICE	"s2"
@@ -316,6 +316,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop,
 				(void) zfs_nicenum(intval, buf, len);
 			}
 			break;
+		case ZPOOL_PROP_BOOTSIZE:
 		case ZPOOL_PROP_EXPANDSZ:
 			if (intval == 0) {
 				(void) strlcpy(buf, "-", len);
@@ -517,6 +518,16 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char 
 			}
 			break;
 
+		case ZPOOL_PROP_BOOTSIZE:
+			if (!flags.create) {
+				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+				    "property '%s' can only be set during pool "
+				    "creation"), propname);
+				(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
+				goto error;
+			}
+			break;
+
 		case ZPOOL_PROP_BOOTFS:
 			if (flags.create || flags.import) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
@@ -1990,8 +2001,9 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, bo
 		/*
 		 * Search for the requested value. Special cases:
 		 *
-		 * - ZPOOL_CONFIG_PATH for whole disk entries.  These end in
-		 *   "s0" or "s0/old".  The "s0" part is hidden from the user,
+		 * - ZPOOL_CONFIG_PATH for whole disk entries. To support
+		 *   UEFI boot, these end in "s0" or "s0/old" or "s1" or
+		 *   "s1/old".   The "s0" or "s1" part is hidden from the user,
 		 *   but included in the string, so this matches around it.
 		 * - looking for a top-level vdev name (i.e. ZPOOL_CONFIG_TYPE).
 		 *
@@ -2022,14 +2034,16 @@ vdev_to_nvlist_iter(nvlist_t *nv, nvlist_t *search, bo
 				/*
 				 * strings identical except trailing "s0"
 				 */
-				if (strcmp(&val[vlen - 2], "s0") == 0 &&
+				if ((strcmp(&val[vlen - 2], "s0") == 0 ||
+				    strcmp(&val[vlen - 2], "s1") == 0) &&
 				    strncmp(srchval, val, slen) == 0)
 					return (nv);
 
 				/*
 				 * strings identical except trailing "s0/old"
 				 */
-				if (strcmp(&val[vlen - 6], "s0/old") == 0 &&
+				if ((strcmp(&val[vlen - 6], "s0/old") == 0 ||
+				    strcmp(&val[vlen - 6], "s1/old") == 0) &&
 				    strcmp(&srchval[slen - 4], "/old") == 0 &&
 				    strncmp(srchval, val, slen - 4) == 0)
 					return (nv);
@@ -3460,15 +3474,17 @@ zpool_vdev_name(libzfs_handle_t *hdl, zpool_handle_t *
 			char *tmp = zfs_strdup(hdl, path);
 
 			/*
-			 * If it starts with c#, and ends with "s0", chop
-			 * the "s0" off, or if it ends with "s0/old", remove
-			 * the "s0" from the middle.
+			 * If it starts with c#, and ends with "s0" or "s1",
+			 * chop the slice off, or if it ends with "s0/old" or
+			 * "s1/old", remove the slice from the middle.
 			 */
 			if (CTD_CHECK(tmp)) {
-				if (strcmp(&tmp[pathlen - 2], "s0") == 0) {
+				if (strcmp(&tmp[pathlen - 2], "s0") == 0 ||
+				    strcmp(&tmp[pathlen - 2], "s1") == 0) {
 					tmp[pathlen - 2] = '\0';
 				} else if (pathlen > 6 &&
-				    strcmp(&tmp[pathlen - 6], "s0/old") == 0) {
+				    (strcmp(&tmp[pathlen - 6], "s0/old") == 0 ||
+				    strcmp(&tmp[pathlen - 6], "s1/old") == 0)) {
 					(void) strcpy(&tmp[pathlen - 6],
 					    "/old");
 				}
@@ -3873,15 +3889,18 @@ zpool_obj_to_path(zpool_handle_t *zhp, uint64_t dsobj,
  * Read the EFI label from the config, if a label does not exist then
  * pass back the error to the caller. If the caller has passed a non-NULL
  * diskaddr argument then we set it to the starting address of the EFI
- * partition.
+ * partition. If the caller has passed a non-NULL boolean argument, then
+ * we set it to indicate if the disk does have efi system partition.
  */
 static int
-read_efi_label(nvlist_t *config, diskaddr_t *sb)
+read_efi_label(nvlist_t *config, diskaddr_t *sb, boolean_t *system)
 {
 	char *path;
 	int fd;
 	char diskname[MAXPATHLEN];
+	boolean_t boot = B_FALSE;
 	int err = -1;
+	int slice;
 
 	if (nvlist_lookup_string(config, ZPOOL_CONFIG_PATH, &path) != 0)
 		return (err);
@@ -3892,8 +3911,16 @@ read_efi_label(nvlist_t *config, diskaddr_t *sb)
 		struct dk_gpt *vtoc;
 
 		if ((err = efi_alloc_and_read(fd, &vtoc)) >= 0) {
-			if (sb != NULL)
-				*sb = vtoc->efi_parts[0].p_start;
+			for (slice = 0; slice < vtoc->efi_nparts; slice++) {
+				if (vtoc->efi_parts[slice].p_tag == V_SYSTEM)
+					boot = B_TRUE;
+				if (vtoc->efi_parts[slice].p_tag == V_USR)
+					break;
+			}
+			if (sb != NULL && vtoc->efi_parts[slice].p_tag == V_USR)
+				*sb = vtoc->efi_parts[slice].p_start;
+			if (system != NULL)
+				*system = boot;
 			efi_free(vtoc);
 		}
 		(void) close(fd);
@@ -3920,7 +3947,7 @@ find_start_block(nvlist_t *config)
 		    &wholedisk) != 0 || !wholedisk) {
 			return (MAXOFFSET_T);
 		}
-		if (read_efi_label(config, &sb) < 0)
+		if (read_efi_label(config, &sb, NULL) < 0)
 			sb = MAXOFFSET_T;
 		return (sb);
 	}
@@ -3940,7 +3967,8 @@ find_start_block(nvlist_t *config)
  * stripped of any leading /dev path.
  */
 int
-zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
+zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name,
+    zpool_boot_label_t boot_type, uint64_t boot_size, int *slice)
 {
 #ifdef illumos
 	char path[MAXPATHLEN];
@@ -3999,15 +4027,6 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t 
 		return (zfs_error(hdl, EZFS_NOCAP, errbuf));
 	}
 
-	slice_size = vtoc->efi_last_u_lba + 1;
-	slice_size -= EFI_MIN_RESV_SIZE;
-	if (start_block == MAXOFFSET_T)
-		start_block = NEW_START_BLOCK;
-	slice_size -= start_block;
-
-	vtoc->efi_parts[0].p_start = start_block;
-	vtoc->efi_parts[0].p_size = slice_size;
-
 	/*
 	 * Why we use V_USR: V_BACKUP confuses users, and is considered
 	 * disposable by some EFI utilities (since EFI doesn't have a backup
@@ -4016,12 +4035,103 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t 
 	 * etc. were all pretty specific.  V_USR is as close to reality as we
 	 * can get, in the absence of V_OTHER.
 	 */
-	vtoc->efi_parts[0].p_tag = V_USR;
-	(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
+	/* first fix the partition start block */
+	if (start_block == MAXOFFSET_T)
+		start_block = NEW_START_BLOCK;
 
-	vtoc->efi_parts[8].p_start = slice_size + start_block;
-	vtoc->efi_parts[8].p_size = resv;
-	vtoc->efi_parts[8].p_tag = V_RESERVED;
+	/*
+	 * EFI System partition is using slice 0.
+	 * ZFS is on slice 1 and slice 8 is reserved.
+	 * We assume the GPT partition table without system
+	 * partition has zfs p_start == NEW_START_BLOCK.
+	 * If start_block != NEW_START_BLOCK, it means we have
+	 * system partition. Correct solution would be to query/cache vtoc
+	 * from existing vdev member.
+	 */
+	if (boot_type == ZPOOL_CREATE_BOOT_LABEL) {
+		if (boot_size % vtoc->efi_lbasize != 0) {
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "boot partition size must be a multiple of %d"),
+			    vtoc->efi_lbasize);
+			(void) close(fd);
+			efi_free(vtoc);
+			return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+		}
+		/*
+		 * System partition size checks.
+		 * Note the 1MB is quite arbitrary value, since we
+		 * are creating dedicated pool, it should be enough
+		 * to hold fat + efi bootloader. May need to be
+		 * adjusted if the bootloader size will grow.
+		 */
+		if (boot_size < 1024 * 1024) {
+			char buf[64];
+			zfs_nicenum(boot_size, buf, sizeof (buf));
+			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+			    "Specified size %s for EFI System partition is too "
+			    "small, the minimum size is 1MB."), buf);
+			(void) close(fd);
+			efi_free(vtoc);
+			return (zfs_error(hdl, EZFS_LABELFAILED, errbuf));
+		}
+		/* 33MB is tested with mkfs -F pcfs */
+		if (hdl->libzfs_printerr &&
+		    ((vtoc->efi_lbasize == 512 &&
+		    boot_size < 33 * 1024 * 1024) ||
+		    (vtoc->efi_lbasize == 4096 &&
+		    boot_size < 256 * 1024 * 1024)))  {
+			char buf[64];
+			zfs_nicenum(boot_size, buf, sizeof (buf));
+			(void) fprintf(stderr, dgettext(TEXT_DOMAIN,
+			    "Warning: EFI System partition size %s is "
+			    "not allowing to create FAT32 file\nsystem, which "
+			    "may result in unbootable system.\n"), buf);
+		}
+		/* Adjust zfs partition start by size of system partition. */
+		start_block += boot_size / vtoc->efi_lbasize;
+	}
+
+	if (start_block == NEW_START_BLOCK) {
+		/*
+		 * Use default layout.
+		 * ZFS is on slice 0 and slice 8 is reserved.
+		 */
+		slice_size = vtoc->efi_last_u_lba + 1;
+		slice_size -= EFI_MIN_RESV_SIZE;
+		slice_size -= start_block;
+		if (slice != NULL)
+			*slice = 0;
+
+		vtoc->efi_parts[0].p_start = start_block;
+		vtoc->efi_parts[0].p_size = slice_size;
+
+		vtoc->efi_parts[0].p_tag = V_USR;
+		(void) strcpy(vtoc->efi_parts[0].p_name, "zfs");
+
+		vtoc->efi_parts[8].p_start = slice_size + start_block;
+		vtoc->efi_parts[8].p_size = resv;
+		vtoc->efi_parts[8].p_tag = V_RESERVED;
+	} else {
+		slice_size = start_block - NEW_START_BLOCK;
+		vtoc->efi_parts[0].p_start = NEW_START_BLOCK;
+		vtoc->efi_parts[0].p_size = slice_size;
+		vtoc->efi_parts[0].p_tag = V_SYSTEM;
+		(void) strcpy(vtoc->efi_parts[0].p_name, "loader");
+		if (slice != NULL)
+			*slice = 1;
+		/* prepare slice 1 */
+		slice_size = vtoc->efi_last_u_lba + 1 - slice_size;
+		slice_size -= resv;
+		slice_size -= NEW_START_BLOCK;
+		vtoc->efi_parts[1].p_start = start_block;
+		vtoc->efi_parts[1].p_size = slice_size;
+		vtoc->efi_parts[1].p_tag = V_USR;
+		(void) strcpy(vtoc->efi_parts[1].p_name, "zfs");
+
+		vtoc->efi_parts[8].p_start = slice_size + start_block;
+		vtoc->efi_parts[8].p_size = resv;
+		vtoc->efi_parts[8].p_tag = V_RESERVED;
+	}
 
 	if (efi_write(fd, vtoc) != 0) {
 		/*

Modified: stable/11/cddl/usr.sbin/zfsd/case_file.cc
==============================================================================
--- stable/11/cddl/usr.sbin/zfsd/case_file.cc	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/cddl/usr.sbin/zfsd/case_file.cc	Thu Mar 22 23:54:14 2018	(r331395)
@@ -239,6 +239,8 @@ CaseFile::ReEvaluate(const string &devPath, const stri
 {
 	ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
 	zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
+	zpool_boot_label_t boot_type;
+	uint64_t boot_size;
 
 	if (pool == NULL || !RefreshVdevState()) {
 		/*
@@ -331,7 +333,13 @@ CaseFile::ReEvaluate(const string &devPath, const stri
 	}
 
 	/* Write a label on the newly inserted disk. */
-	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
+	if (zpool_is_bootable(pool))
+		boot_type = ZPOOL_COPY_BOOT_LABEL;
+	else
+		boot_type = ZPOOL_NO_BOOT_LABEL;
+	boot_size = zpool_get_prop_int(pool, ZPOOL_PROP_BOOTSIZE, NULL);
+	if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str(),
+	    boot_type, boot_size, NULL) != 0) {
 		syslog(LOG_ERR,
 		       "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
 		       zpool_get_name(pool), VdevGUIDString().c_str(),

Modified: stable/11/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/sys/cddl/contrib/opensolaris/common/zfs/zpool_prop.c	Thu Mar 22 23:54:14 2018	(r331395)
@@ -100,6 +100,10 @@ zpool_prop_init(void)
 	    PROP_READONLY, ZFS_TYPE_POOL, "<1.00x or higher if deduped>",
 	    "DEDUP");
 
+	/* system partition size */
+	zprop_register_number(ZPOOL_PROP_BOOTSIZE, "bootsize", 0, PROP_ONETIME,
+	    ZFS_TYPE_POOL, "<size>", "BOOTSIZE");
+
 	/* default number properties */
 	zprop_register_number(ZPOOL_PROP_VERSION, "version", SPA_VERSION,
 	    PROP_DEFAULT, ZFS_TYPE_POOL, "<version>", "VERSION");

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c	Thu Mar 22 23:54:14 2018	(r331395)
@@ -481,6 +481,7 @@ metaslab_class_expandable_space(metaslab_class_t *mc)
 
 	spa_config_enter(mc->mc_spa, SCL_VDEV, FTAG, RW_READER);
 	for (int c = 0; c < rvd->vdev_children; c++) {
+		uint64_t tspace;
 		vdev_t *tvd = rvd->vdev_child[c];
 		metaslab_group_t *mg = tvd->vdev_mg;
 
@@ -493,9 +494,13 @@ metaslab_class_expandable_space(metaslab_class_t *mc)
 		 * Calculate if we have enough space to add additional
 		 * metaslabs. We report the expandable space in terms
 		 * of the metaslab size since that's the unit of expansion.
+		 * Adjust by efi system partition size.
 		 */
-		space += P2ALIGN(tvd->vdev_max_asize - tvd->vdev_asize,
-		    1ULL << tvd->vdev_ms_shift);
+		tspace = tvd->vdev_max_asize - tvd->vdev_asize;
+		if (tspace > mc->mc_spa->spa_bootsize) {
+			tspace -= mc->mc_spa->spa_bootsize;
+		}
+		space += P2ALIGN(tspace, 1ULL << tvd->vdev_ms_shift);
 	}
 	spa_config_exit(mc->mc_spa, SCL_VDEV, FTAG);
 	return (space);

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa.c	Thu Mar 22 23:54:14 2018	(r331395)
@@ -2818,6 +2818,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t
 		spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
 		spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
 		spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
+		spa_prop_find(spa, ZPOOL_PROP_BOOTSIZE, &spa->spa_bootsize);
 		spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
 		    &spa->spa_dedup_ditto);
 

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/spa_impl.h	Thu Mar 22 23:54:14 2018	(r331395)
@@ -242,6 +242,7 @@ struct spa {
 	int		spa_mode;		/* FREAD | FWRITE */
 	spa_log_state_t spa_log_state;		/* log state */
 	uint64_t	spa_autoexpand;		/* lun expansion on/off */
+	uint64_t	spa_bootsize;		/* efi system partition size */
 	ddt_t		*spa_ddt[ZIO_CHECKSUM_FUNCTIONS]; /* in-core DDTs */
 	uint64_t	spa_ddt_stat_object;	/* DDT statistics */
 	uint64_t	spa_dedup_ditto;	/* dedup ditto threshold */

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c	Thu Mar 22 23:54:14 2018	(r331395)
@@ -2922,8 +2922,8 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
 	 * since that determines how much space the pool can expand.
 	 */
 	if (vd->vdev_aux == NULL && tvd != NULL && vd->vdev_max_asize != 0) {
-		vs->vs_esize = P2ALIGN(vd->vdev_max_asize - vd->vdev_asize,
-		    1ULL << tvd->vdev_ms_shift);
+		vs->vs_esize = P2ALIGN(vd->vdev_max_asize - vd->vdev_asize -
+		    spa->spa_bootsize, 1ULL << tvd->vdev_ms_shift);
 	}
 	vs->vs_configured_ashift = vd->vdev_top != NULL
 	    ? vd->vdev_top->vdev_ashift : vd->vdev_ashift;

Modified: stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h
==============================================================================
--- stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	Thu Mar 22 23:53:18 2018	(r331394)
+++ stable/11/sys/cddl/contrib/opensolaris/uts/common/sys/fs/zfs.h	Thu Mar 22 23:54:14 2018	(r331395)
@@ -211,6 +211,7 @@ typedef enum {
 	ZPOOL_PROP_FRAGMENTATION,
 	ZPOOL_PROP_LEAKED,
 	ZPOOL_PROP_MAXBLOCKSIZE,
+	ZPOOL_PROP_BOOTSIZE,
 	ZPOOL_NUM_PROPS
 } zpool_prop_t;
 


More information about the svn-src-all mailing list