git: c7996ddf8000 - main - Create a new GEOM utility, gunion(8).

From: Kirk McKusick <mckusick_at_FreeBSD.org>
Date: Tue, 01 Mar 2022 00:37:13 UTC
The branch main has been updated by mckusick:

URL: https://cgit.FreeBSD.org/src/commit/?id=c7996ddf8000cfb19a9e91a636f56747860d03d7

commit c7996ddf8000cfb19a9e91a636f56747860d03d7
Author:     Kirk McKusick <mckusick@FreeBSD.org>
AuthorDate: 2022-03-01 00:36:08 +0000
Commit:     Kirk McKusick <mckusick@FreeBSD.org>
CommitDate: 2022-03-01 00:36:08 +0000

    Create a new GEOM utility, gunion(8).
    
    The gunion(8) utility is used to track changes to a read-only disk on
    a writable disk. Logically, a writable disk is placed over a read-only
    disk. Write requests are intercepted and stored on the writable
    disk. Read requests are first checked to see if they have been
    written on the top (writable disk) and if found are returned. If
    they have not been written on the top disk, then they are read from
    the lower disk.
    
    The gunion(8) utility can be especially useful if you have a large
    disk with a corrupted filesystem that you are unsure of how to
    repair. You can use gunion(8) to place another disk over the corrupted
    disk and then attempt to repair the filesystem. If the repair fails,
    you can revert all the changes in the upper disk and be back to the
    unchanged state of the lower disk thus allowing you to try another
    approach to repairing it. If the repair is successful you can commit
    all the writes recorded on the top disk to the lower disk.
    
    Another use of the gunion(8) utility is to try out upgrades to your
    system. Place the upper disk over the disk holding your filesystem
    that is to be upgraded and then run the upgrade on it. If it works,
    commit it; if it fails, revert the upgrade.
    
    Further details can be found in the gunion(8) manual page.
    
    Reviewed by: Chuck Silvers, kib (earlier version)
    tested by:   Peter Holm
    Differential Revision: https://reviews.freebsd.org/D32697
---
 etc/mtree/BSD.include.dist           |    2 +
 include/Makefile                     |    2 +-
 lib/geom/Makefile.classes            |    1 +
 lib/geom/union/Makefile              |    8 +
 lib/geom/union/Makefile.depend       |   19 +
 lib/geom/union/geom_union.c          |   83 ++
 lib/geom/union/gunion.8              |  320 ++++++++
 sbin/geom/core/geom.8                |    5 +-
 sys/conf/files                       |    1 +
 sys/geom/union/g_union.c             | 1389 ++++++++++++++++++++++++++++++++++
 sys/geom/union/g_union.h             |  144 ++++
 sys/modules/geom/Makefile            |    1 +
 sys/modules/geom/geom_union/Makefile |    8 +
 13 files changed, 1981 insertions(+), 2 deletions(-)

diff --git a/etc/mtree/BSD.include.dist b/etc/mtree/BSD.include.dist
index 833618b190fd..7679fd528918 100644
--- a/etc/mtree/BSD.include.dist
+++ b/etc/mtree/BSD.include.dist
@@ -182,6 +182,8 @@
         ..
         stripe
         ..
+	union
+	..
         virstor
         ..
     ..
diff --git a/include/Makefile b/include/Makefile
index 76f713daf62c..42590b900442 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -51,7 +51,7 @@ LSUBDIRS=	dev/acpica dev/agp dev/ciss dev/filemon dev/firewire \
 	fs/procfs fs/smbfs fs/udf fs/unionfs \
 	geom/cache geom/concat geom/eli geom/gate geom/journal geom/label \
 	geom/mirror geom/mountver geom/multipath geom/nop \
-	geom/raid geom/raid3 geom/shsec geom/stripe geom/virstor \
+	geom/raid geom/raid3 geom/shsec geom/stripe geom/union geom/virstor \
 	net/altq \
 	net/route \
 	netgraph/atm netgraph/netflow \
diff --git a/lib/geom/Makefile.classes b/lib/geom/Makefile.classes
index fcaa748825ee..d4e6f52e65ae 100644
--- a/lib/geom/Makefile.classes
+++ b/lib/geom/Makefile.classes
@@ -22,4 +22,5 @@ GEOM_CLASSES+=	raid
 GEOM_CLASSES+=	raid3
 GEOM_CLASSES+=	shsec
 GEOM_CLASSES+=	stripe
+GEOM_CLASSES+=	union
 GEOM_CLASSES+=	virstor
diff --git a/lib/geom/union/Makefile b/lib/geom/union/Makefile
new file mode 100644
index 000000000000..cb8b09dc7eca
--- /dev/null
+++ b/lib/geom/union/Makefile
@@ -0,0 +1,8 @@
+# $FreeBSD$
+
+PACKAGE=runtime
+.PATH: ${.CURDIR:H:H}/misc
+
+GEOM_CLASS=	union
+
+.include <bsd.lib.mk>
diff --git a/lib/geom/union/Makefile.depend b/lib/geom/union/Makefile.depend
new file mode 100644
index 000000000000..fb5f86e931fb
--- /dev/null
+++ b/lib/geom/union/Makefile.depend
@@ -0,0 +1,19 @@
+# $FreeBSD$
+# Autogenerated - do NOT edit!
+
+DIRDEPS = \
+	gnu/lib/csu \
+	include \
+	include/xlocale \
+	lib/${CSU_DIR} \
+	lib/libc \
+	lib/libcompiler_rt \
+	lib/libgeom \
+	sbin/geom/core \
+
+
+.include <dirdeps.mk>
+
+.if ${DEP_RELDIR} == ${_DEP_RELDIR}
+# local dependencies - needed for -jN in clean tree
+.endif
diff --git a/lib/geom/union/geom_union.c b/lib/geom/union/geom_union.c
new file mode 100644
index 000000000000..2e0843d35899
--- /dev/null
+++ b/lib/geom/union/geom_union.c
@@ -0,0 +1,83 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <libgeom.h>
+#include <geom/union/g_union.h>
+
+#include "core/geom.h"
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_UNION_VERSION;
+
+struct g_command class_commands[] = {
+	{ "create", G_FLAG_LOADKLD, NULL,
+	    {
+		{ 'o', "offset", "0", G_TYPE_NUMBER },
+		{ 's', "size", "0", G_TYPE_NUMBER },
+		{ 'S', "secsize", "0", G_TYPE_NUMBER },
+		{ 'v', "verbose", NULL, G_TYPE_BOOL },
+		{ 'Z', "gunionname", G_VAL_OPTIONAL, G_TYPE_STRING },
+		G_OPT_SENTINEL
+	    },
+	    "[-v] [-o offset] [-s size] [-S secsize] [-Z gunionname] "
+	    "upperdev lowerdev"
+	},
+	{ "destroy", 0, NULL,
+	    {
+		{ 'f', "force", NULL, G_TYPE_BOOL },
+		{ 'v', "verbose", NULL, G_TYPE_BOOL },
+		G_OPT_SENTINEL
+	    },
+	    "[-fv] prov ..."
+	},
+	{ "reset", 0, NULL,
+	    {
+		{ 'v', "verbose", NULL, G_TYPE_BOOL },
+		G_OPT_SENTINEL
+	    },
+	    "[-v] prov ..."
+	},
+	{ "commit", 0, NULL,
+	    {
+		{ 'f', "force", NULL, G_TYPE_BOOL },
+		{ 'r', "reboot", NULL, G_TYPE_BOOL },
+		{ 'v', "verbose", NULL, G_TYPE_BOOL },
+		G_OPT_SENTINEL
+	    },
+	    "[-frv] prov ..."
+	},
+	{ "revert", 0, NULL,
+	    {
+		{ 'v', "verbose", NULL, G_TYPE_BOOL },
+		G_OPT_SENTINEL
+	    },
+	    "[-v] prov ..."
+	},
+	G_CMD_SENTINEL
+};
diff --git a/lib/geom/union/gunion.8 b/lib/geom/union/gunion.8
new file mode 100644
index 000000000000..732a803657d8
--- /dev/null
+++ b/lib/geom/union/gunion.8
@@ -0,0 +1,320 @@
+.\"
+.\" Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd January 19, 2022
+.Dt GUNION 8
+.Os
+.Sh NAME
+.Nm gunion
+.Nd "control utility for UNION GEOM class"
+.Sh SYNOPSIS
+.Nm
+.Cm create
+.Op Fl v
+.Op Fl o Ar offset
+.Op Fl s Ar size
+.Op Fl S Ar secsize
+.Op Fl Z Ar gunionname
+.Ar upperdev lowerdev
+.Nm
+.Cm destroy
+.Op Fl fv
+.Ar prov ...
+.Nm
+.Cm reset
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm revert
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm commit
+.Op Fl frv
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used to track changes to a read-only disk on a writable disk.
+Logically, a writable disk is placed over a read-only disk.
+Write requests are intercepted and stored on the writable disk.
+Read requests are first checked to see if they have been written
+on the top (writable disk) and if found are returned.
+If they have not been written on the top disk,
+then they are read from the lower disk.
+.Pp
+The
+.Nm
+utility can be especially useful if you have a large disk with a
+corrupted filesystem that you are unsure of how to repair.
+You can use
+.Nm
+to place another disk over the corrupted disk and then attempt
+to repair the filesystem.
+If the repair fails, you can revert all the changes in the upper disk
+and be back to the unchanged state of the lower disk thus allowing you
+to try another approach to repairing it.
+If the repair is successful you can request that all the writes recorded
+on the top disk be written to the lower disk.
+.Pp
+Another use of the
+.Nm
+utility is to try out upgrades to your system.
+Place the upper disk over the disk holding your filesystem that
+is to be upgraded and then run the upgrade on it.
+If it works, commit it;
+if it fails, revert the upgrade.
+An example is given below.
+.Pp
+The upper disk must be at least the size of the disk that it covers.
+The union metadata exists only for the
+period of time that the union is instantiated,
+so it is important to commit the updates before destroying the union.
+If the top disk is about 2.5 percent larger for 512 byte sector disks
+(or 0.5 percent larger for 4K sector disks) than the disk that it covers,
+it is posible (thought not currently implemented) to save the union
+metadata between instantiations of the union device.
+.Pp
+If you do not have physical media available to use for the upper layer, the
+.Xr md 4
+disk can be used instead.
+When used in
+.Cm swap
+mode the changes are all held in buffer memory.
+Pages get pushed out to the swap when the system is under memory pressure,
+otherwise they stay in the operating memory.
+If long-term persistance is desired,
+.Cm vnode
+mode can be used in which a regular file is used as backing store.
+The disk space used by the file is based on the amount of data that
+is written to the top device.
+.Pp
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width "destroy"
+.It Cm create
+Set up a union provider on the two given devices.
+The first device given is used as the top device and must be writable.
+The second device given is used as the bottom device and need only be readable.
+The second device may be mounted read-only but it is recommended
+that it be unmounted and accessed only through a mount of the union device.
+If the operation succeeds, the new provider should appear with name
+.Pa /dev/ Ns Ao Ar upperdev Ac Ns - Ns Ao Ar lowerdev Ac Ns Pa .union .
+An alternate name can be specified with the
+.Fl Z
+flag.
+The kernel module
+.Pa geom_union.ko
+will be loaded if it is not loaded already.
+.Pp
+Additional options include:
+.Bl -tag -width "-Z gunionname"
+.It Fl o Ar offset
+Where to begin on the original provider.
+The default is to start at the beginning of the disk (i.e., at offset 0).
+This option may be used to skip over partitioning information stored
+at the beginning of a disk.
+The offset must be a multiple of the sector size.
+.It Fl s Ar size
+Size of the transparent provider.
+The default is to be the same size as the lower disk.
+Any extra space at the end of the upper disk may be used to store
+union metadata.
+.It Fl S Ar secsize
+Sector size of the transparent provider.
+The default is to be the same sector size as the lower disk.
+.It Fl v
+Be more verbose.
+.It Fl Z Ar gunionname
+The name of the new provider.
+The suffix
+.Dq .union
+will be appended to the provider name.
+.El
+.It Cm destroy
+Turn off the given union providers.
+.Pp
+Additional options include:
+.Bl -tag -width "-f"
+.It Fl f
+Force the removal of the specified provider.
+.It Fl v
+Be more verbose.
+.El
+.It Cm revert
+Discard all the changes made in the top layer thus reverting to the
+original state of the lower device.
+The union device may not be mounted or otherwise in use when a
+.Cm revert
+operation is being done.
+.It Cm commit
+Write all the changes made in the top device to the lower device
+thus committing the lower device to have the same data as the union.
+.Pp
+Additional options include:
+.Bl -tag -width "-f"
+.It Fl f
+The
+.Cm commit
+command will not allow the lower device to be mounted
+or otherwise in use while the
+.Cm commit
+operation is being done.
+However, the
+.Fl f
+flag may be specified to allow the lower device to be mounted read-only.
+To prevent a filesystem panic on the mounted lower-device filesystem,
+immediately after the
+.Cm commit
+operation finishes the lower-device filesystem should be unmounted
+and then remounted to update its metadata state.
+If the lower-device filesystem is currently being used as the root
+filesystem then the
+.Fl r
+flag should be specified to reboot the system at the completion of the
+.Cm commit
+operation.
+.It Fl r
+Reboot the system at the completion of the
+.Cm commit
+operation.
+.It Fl v
+Be more verbose.
+.El
+.It Cm reset
+Reset statistics for the given union providers.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The following example shows how to create and destroy a
+union provider with disks
+.Pa /dev/da0p1
+as the read-only disk on the bottom and
+.Pa /dev/md0
+as the wriable disk on the top.
+.Bd -literal -offset indent
+gunion create -v md0 da0p1
+mount /dev/md0-da0p1.union /mnt
+.Ed
+.Pp
+Proceed to make changes in /mnt filesystem.
+If they are successful and you want to keep them.
+.Bd -literal -offset indent
+umount /mnt
+gunion commit -v md0-da0p1.union
+.Ed
+.Pp
+If they are unsuccessful and you want to roll back.
+.Bd -literal -offset indent
+umount /mnt
+gunion revert -v md0-da0p1.union
+.Ed
+.Pp
+When done eliminate the union.
+.Bd -literal -offset indent
+umount /mnt
+gunion destroy -v md0-da0p1.union
+.Ed
+.Pp
+All uncommitted changes will be discarded when the union is destroyed.
+.Pp
+If you use the name of the full disk, for example
+.Pa da0
+and it is labelled,
+then a union name will appear for the disk as
+.Pa md0-da0.union
+as well as for each partition on the disk as
+.Pa md0-da0p1.union ,
+.Pa md0-da0p2.union ,
+etc.
+A commit operation can be done only on
+.Pa md0-da0.union
+and will commit changes to all the partitions.
+If partition level commits are desired,
+then a union must be created for each partition.
+.Pp
+The traffic statistics for the given
+union providers can be obtained with the
+.Cm list
+command.
+The example below shows the number of bytes written with
+.Xr newfs 8 :
+.Bd -literal -offset indent
+gunion create md0 da0p1
+newfs /dev/md0-da0p1.union
+gunion list
+.Ed
+.Sh SYSCTL VARIABLES
+The following
+.Xr sysctl 8
+variables can be used to control the behavior of the
+.Nm UNION
+GEOM class.
+The default value is shown next to each variable.
+.Bl -tag -width indent
+.It Va kern.geom.union.debug : No 0
+Debug level of the
+.Nm UNION
+GEOM class.
+This can be set to a number between 0 and 4 inclusive.
+If set to 0, no debug information is printed.
+If set to 1, all the verbose messages are logged.
+If set to 2, addition error-related information is logged.
+If set to 3, mapping operations are logged.
+If set to 4, the maximum amount of debug information is printed.
+.El
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 14.0 .
+.Sh AUTHORS
+.An Marshall Kirk McKusick Aq Mt mckusick@mckusick.com
diff --git a/sbin/geom/core/geom.8 b/sbin/geom/core/geom.8
index 298fc2b1d4fd..db0556fb9505 100644
--- a/sbin/geom/core/geom.8
+++ b/sbin/geom/core/geom.8
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd September 14, 2018
+.Dd January 19, 2022
 .Dt GEOM 8
 .Os
 .Sh NAME
@@ -162,6 +162,8 @@ SHSEC
 .It
 STRIPE
 .It
+UNION
+.It
 VIRSTOR
 .El
 .Sh ENVIRONMENT
@@ -210,6 +212,7 @@ geom md unload
 .Xr gsched 8 ,
 .Xr gshsec 8 ,
 .Xr gstripe 8 ,
+.Xr gunion 8 ,
 .Xr gvirstor 8
 .Sh HISTORY
 The
diff --git a/sys/conf/files b/sys/conf/files
index 663441d3adfb..74f15f867213 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -3689,6 +3689,7 @@ geom/raid3/g_raid3.c		optional geom_raid3
 geom/raid3/g_raid3_ctl.c	optional geom_raid3
 geom/shsec/g_shsec.c		optional geom_shsec
 geom/stripe/g_stripe.c		optional geom_stripe
+geom/union/g_union.c		optional geom_union
 geom/uzip/g_uzip.c		optional geom_uzip
 geom/uzip/g_uzip_lzma.c		optional geom_uzip
 geom/uzip/g_uzip_wrkthr.c	optional geom_uzip
diff --git a/sys/geom/union/g_union.c b/sys/geom/union/g_union.c
new file mode 100644
index 000000000000..a5702d175264
--- /dev/null
+++ b/sys/geom/union/g_union.c
@@ -0,0 +1,1389 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2022 Marshall Kirk McKusick <mckusick@mckusick.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/param.h>
+#include <sys/bio.h>
+#include <sys/buf.h>
+#include <sys/ctype.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/reboot.h>
+#include <sys/rwlock.h>
+#include <sys/sbuf.h>
+#include <sys/sysctl.h>
+
+#include <geom/geom.h>
+#include <geom/geom_dbg.h>
+#include <geom/union/g_union.h>
+
+SYSCTL_DECL(_kern_geom);
+static SYSCTL_NODE(_kern_geom, OID_AUTO, union, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
+    "GEOM_UNION stuff");
+static u_int g_union_debug = 0;
+SYSCTL_UINT(_kern_geom_union, OID_AUTO, debug, CTLFLAG_RW, &g_union_debug, 0,
+    "Debug level");
+
+static void g_union_config(struct gctl_req *req, struct g_class *mp,
+    const char *verb);
+static g_access_t g_union_access;
+static g_start_t g_union_start;
+static g_dumpconf_t g_union_dumpconf;
+static g_orphan_t g_union_orphan;
+static int g_union_destroy_geom(struct gctl_req *req, struct g_class *mp,
+    struct g_geom *gp);
+static g_provgone_t g_union_providergone;
+static g_resize_t g_union_resize;
+
+struct g_class g_union_class = {
+	.name = G_UNION_CLASS_NAME,
+	.version = G_VERSION,
+	.ctlreq = g_union_config,
+	.access = g_union_access,
+	.start = g_union_start,
+	.dumpconf = g_union_dumpconf,
+	.orphan = g_union_orphan,
+	.destroy_geom = g_union_destroy_geom,
+	.providergone = g_union_providergone,
+	.resize = g_union_resize,
+};
+
+static void g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool);
+static intmax_t g_union_fetcharg(struct gctl_req *req, const char *name);
+static bool g_union_verify_nprefix(const char *name);
+static void g_union_ctl_destroy(struct gctl_req *req, struct g_class *mp, bool);
+static struct g_geom *g_union_find_geom(struct g_class *mp, const char *name);
+static void g_union_ctl_reset(struct gctl_req *req, struct g_class *mp, bool);
+static void g_union_ctl_revert(struct gctl_req *req, struct g_class *mp, bool);
+static void g_union_revert(struct g_union_softc *sc);
+static void g_union_doio(struct g_union_wip *wip);
+static void g_union_ctl_commit(struct gctl_req *req, struct g_class *mp, bool);
+static void g_union_setmap(struct bio *bp, struct g_union_softc *sc);
+static bool g_union_getmap(struct bio *bp, struct g_union_softc *sc,
+	off_t *len2read);
+static void g_union_done(struct bio *bp);
+static void g_union_kerneldump(struct bio *bp, struct g_union_softc *sc);
+static int g_union_dumper(void *, void *, vm_offset_t, off_t, size_t);
+static int g_union_destroy(struct gctl_req *req, struct g_geom *gp, bool force);
+
+/*
+ * Operate on union-specific configuration commands.
+ */
+static void
+g_union_config(struct gctl_req *req, struct g_class *mp, const char *verb)
+{
+	uint32_t *version, *verbose;
+
+	g_topology_assert();
+
+	version = gctl_get_paraml(req, "version", sizeof(*version));
+	if (version == NULL) {
+		gctl_error(req, "No '%s' argument.", "version");
+		return;
+	}
+	if (*version != G_UNION_VERSION) {
+		gctl_error(req, "Userland and kernel parts are out of sync.");
+		return;
+	}
+	verbose = gctl_get_paraml(req, "verbose", sizeof(*verbose));
+	if (verbose == NULL) {
+		gctl_error(req, "No '%s' argument.", "verbose");
+		return;
+	}
+	if (strcmp(verb, "create") == 0) {
+		g_union_ctl_create(req, mp, *verbose);
+		return;
+	} else if (strcmp(verb, "destroy") == 0) {
+		g_union_ctl_destroy(req, mp, *verbose);
+		return;
+	} else if (strcmp(verb, "reset") == 0) {
+		g_union_ctl_reset(req, mp, *verbose);
+		return;
+	} else if (strcmp(verb, "revert") == 0) {
+		g_union_ctl_revert(req, mp, *verbose);
+		return;
+	} else if (strcmp(verb, "commit") == 0) {
+		g_union_ctl_commit(req, mp, *verbose);
+		return;
+	}
+
+	gctl_error(req, "Unknown verb.");
+}
+
+/*
+ * Create a union device.
+ */
+static void
+g_union_ctl_create(struct gctl_req *req, struct g_class *mp, bool verbose)
+{
+	struct g_provider *upperpp, *lowerpp, *newpp;
+	struct g_consumer *uppercp, *lowercp;
+	struct g_union_softc *sc;
+	struct g_geom_alias *gap;
+	struct g_geom *gp;
+	intmax_t offset, secsize, size, needed;
+	const char *gunionname;
+	int *nargs, error, i, n;
+	char name[64];
+
+	g_topology_assert();
+
+	nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
+	if (nargs == NULL) {
+		gctl_error(req, "No '%s' argument.", "nargs");
+		return;
+	}
+	if (*nargs < 2) {
+		gctl_error(req, "Missing device(s).");
+		return;
+	}
+	if (*nargs > 2) {
+		gctl_error(req, "Extra device(s).");
+		return;
+	}
+
+	offset = g_union_fetcharg(req, "offset");
+	size = g_union_fetcharg(req, "size");
+	secsize = g_union_fetcharg(req, "secsize");
+	gunionname = gctl_get_asciiparam(req, "gunionname");
+
+	upperpp = gctl_get_provider(req, "arg0");
+	lowerpp = gctl_get_provider(req, "arg1");
+	if (upperpp == NULL || lowerpp == NULL)
+		/* error message provided by gctl_get_provider() */
+		return;
+	/* Create the union */
+	if (secsize == 0)
+		secsize = lowerpp->sectorsize;
+	else if ((secsize % lowerpp->sectorsize) != 0) {
+		gctl_error(req, "Sector size %jd is not a multiple of lower "
+		    "provider %s's %jd sector size.", (intmax_t)secsize,
+		    lowerpp->name, (intmax_t)lowerpp->sectorsize);
+		return;
+	}
+	if (secsize > maxphys) {
+		gctl_error(req, "Too big secsize %jd for lower provider %s.",
+		    (intmax_t)secsize, lowerpp->name);
+		return;
+	}
+	if (secsize % upperpp->sectorsize != 0) {
+		gctl_error(req, "Sector size %jd is not a multiple of upper "
+		    "provider %s's %jd sector size.", (intmax_t)secsize,
+		    upperpp->name, (intmax_t)upperpp->sectorsize);
+		return;
+	}
+	if ((offset % secsize) != 0) {
+		gctl_error(req, "Offset %jd is not a multiple of lower "
+		    "provider %s's %jd sector size.", (intmax_t)offset,
+		    lowerpp->name, (intmax_t)lowerpp->sectorsize);
+		return;
+	}
+	if (size == 0)
+		size = lowerpp->mediasize - offset;
+	else
+		size -= offset;
+	if ((size % secsize) != 0) {
+		gctl_error(req, "Size %jd is not a multiple of sector size "
+		    "%jd.", (intmax_t)size, (intmax_t)secsize);
+		return;
+	}
+	if (offset + size < lowerpp->mediasize) {
+		gctl_error(req, "Size %jd is too small for lower provider %s, "
+		    "needs %jd.", (intmax_t)(offset + size), lowerpp->name,
+		    lowerpp->mediasize);
+		return;
+	}
+	if (size > upperpp->mediasize) {
+		gctl_error(req, "Upper provider %s size (%jd) is too small, "
+		    "needs %jd.", upperpp->name, (intmax_t)upperpp->mediasize,
+		    (intmax_t)size);
+		return;
+	}
+	if (gunionname != NULL && !g_union_verify_nprefix(gunionname)) {
+		gctl_error(req, "Gunion name %s must be alphanumeric.",
+		    gunionname);
+		return;
+	}
+	if (gunionname != NULL) {
+		n = snprintf(name, sizeof(name), "%s%s", gunionname,
+		    G_UNION_SUFFIX);
+	} else {
+		n = snprintf(name, sizeof(name), "%s-%s%s", upperpp->name,
+		    lowerpp->name, G_UNION_SUFFIX);
+	}
+	if (n <= 0 || n >= sizeof(name)) {
+		gctl_error(req, "Invalid provider name.");
+		return;
+	}
+	LIST_FOREACH(gp, &mp->geom, geom) {
+		if (strcmp(gp->name, name) == 0) {
+			gctl_error(req, "Provider %s already exists.", name);
+			return;
+		}
+	}
+	gp = g_new_geomf(mp, "%s", name);
+	sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO);
+	rw_init(&sc->sc_rwlock, "gunion");
+	TAILQ_INIT(&sc->sc_wiplist);
+	sc->sc_offset = offset;
+	sc->sc_size = size;
+	sc->sc_sectorsize = secsize;
+	sc->sc_reads = 0;
+	sc->sc_writes = 0;
+	sc->sc_deletes = 0;
+	sc->sc_getattrs = 0;
+	sc->sc_flushes = 0;
+	sc->sc_speedups = 0;
+	sc->sc_cmd0s = 0;
+	sc->sc_cmd1s = 0;
+	sc->sc_cmd2s = 0;
+	sc->sc_readbytes = 0;
+	sc->sc_wrotebytes = 0;
+	sc->sc_writemap_memory = 0;
+	gp->softc = sc;
+
+	newpp = g_new_providerf(gp, "%s", gp->name);
+	newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
+	newpp->mediasize = size;
+	newpp->sectorsize = secsize;
+	LIST_FOREACH(gap, &upperpp->aliases, ga_next)
+		g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
+		    G_UNION_SUFFIX);
+	LIST_FOREACH(gap, &lowerpp->aliases, ga_next)
+		g_provider_add_alias(newpp, "%s%s", gap->ga_alias,
+		    G_UNION_SUFFIX);
+	lowercp = g_new_consumer(gp);
+	lowercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
+	if ((error = g_attach(lowercp, lowerpp)) != 0) {
+		gctl_error(req, "Error %d: cannot attach to provider %s.",
+		    error, lowerpp->name);
+		goto fail1;
+	}
+	/* request read and exclusive access for lower */
+	if ((error = g_access(lowercp, 1, 0, 1)) != 0) {
+		gctl_error(req, "Error %d: cannot obtain exclusive access to "
+		    "%s.\n\tMust be unmounted or mounted read-only.", error,
+		    lowerpp->name);
+		goto fail2;
+	}
+	uppercp = g_new_consumer(gp);
+	uppercp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
+	if ((error = g_attach(uppercp, upperpp)) != 0) {
+		gctl_error(req, "Error %d: cannot attach to provider %s.",
+		    error, upperpp->name);
+		goto fail3;
+	}
+	/* request read, write, and exclusive access for upper */
+	if ((error = g_access(uppercp, 1, 1, 1)) != 0) {
+		gctl_error(req, "Error %d: cannot obtain write access to %s.",
+		    error, upperpp->name);
+		goto fail4;
+	}
+	sc->sc_uppercp = uppercp;
+	sc->sc_lowercp = lowercp;
+
+	newpp->flags |= (upperpp->flags & G_PF_ACCEPT_UNMAPPED) &
+	    (lowerpp->flags & G_PF_ACCEPT_UNMAPPED);
+	g_error_provider(newpp, 0);
+	/*
+	 * Allocate the map that tracks the sectors that have been written
+	 * to the top layer. We use a 2-level hierarchy as that lets us
+	 * map up to 1 petabyte using allocations of less than 33 Mb
+	 * when using 4K byte sectors (or 268 Mb with 512 byte sectors).
+	 *
+	 * We totally populate the leaf nodes rather than allocating them
+	 * as they are first used because their usage occurs in the
+	 * g_union_start() routine that may be running in the g_down
+	 * thread which cannot sleep.
+	 */
+	sc->sc_map_size = roundup(size / secsize, BITS_PER_ENTRY);
+	needed = sc->sc_map_size / BITS_PER_ENTRY;
+	for (sc->sc_root_size = 1;
+	     sc->sc_root_size * sc->sc_root_size < needed;
+	     sc->sc_root_size++)
+		continue;
+	sc->sc_writemap_root = g_malloc(sc->sc_root_size * sizeof(uint64_t *),
+	    M_WAITOK | M_ZERO);
+	sc->sc_leaf_size = sc->sc_root_size;
+	sc->sc_bits_per_leaf = sc->sc_leaf_size * BITS_PER_ENTRY;
+	sc->sc_leafused = g_malloc(roundup(sc->sc_root_size, BITS_PER_ENTRY),
+	    M_WAITOK | M_ZERO);
+	for (i = 0; i < sc->sc_root_size; i++)
+		sc->sc_writemap_root[i] =
+		    g_malloc(sc->sc_leaf_size * sizeof(uint64_t),
+		    M_WAITOK | M_ZERO);
+	sc->sc_writemap_memory =
+	    (sc->sc_root_size + sc->sc_root_size * sc->sc_leaf_size) *
+	    sizeof(uint64_t) + roundup(sc->sc_root_size, BITS_PER_ENTRY);
+	if (verbose)
+		gctl_error(req, "Device %s created with memory map size %jd.",
+		    gp->name, sc->sc_writemap_memory);
+	G_UNION_DEBUG(1, "Device %s created with memory map size %jd.",
+	    gp->name, sc->sc_writemap_memory);
+	return;
+
+fail4:
+	g_detach(uppercp);
+fail3:
+	g_destroy_consumer(uppercp);
+	g_access(lowercp, -1, 0, -1);
+fail2:
+	g_detach(lowercp);
+fail1:
+	g_destroy_consumer(lowercp);
+	g_destroy_provider(newpp);
+	g_destroy_geom(gp);
+}
+
+/*
+ * Fetch named option and verify that it is positive.
+ */
+static intmax_t
+g_union_fetcharg(struct gctl_req *req, const char *name)
+{
+	intmax_t *val;
+
+	val = gctl_get_paraml_opt(req, name, sizeof(*val));
+	if (val == NULL)
+		return (0);
+	if (*val >= 0)
+		return (*val);
+	gctl_error(req, "Invalid '%s': negative value, using default.", name);
+	return (0);
+}
+
+/*
+ * Verify that a name is alphanumeric.
+ */
+static bool
+g_union_verify_nprefix(const char *name)
+{
+	int i;
+
+	for (i = 0; i < strlen(name); i++) {
+		if (isalpha(name[i]) == 0 && isdigit(name[i]) == 0) {
+			return (false);
+		}
+	}
+	return (true);
+}
*** 1171 LINES SKIPPED ***