git: bc9154a20824 - stable/13 - loader: implement mount/unmount rootfs

From: Emmanuel Vadot <manu_at_FreeBSD.org>
Date: Thu, 13 Jan 2022 09:49:04 UTC
The branch stable/13 has been updated by manu:

URL: https://cgit.FreeBSD.org/src/commit/?id=bc9154a208248a2ea0ca00177407caef87fec87e

commit bc9154a208248a2ea0ca00177407caef87fec87e
Author:     Toomas Soome <tsoome@FreeBSD.org>
AuthorDate: 2021-08-12 05:45:52 +0000
Commit:     Emmanuel Vadot <manu@FreeBSD.org>
CommitDate: 2022-01-13 07:58:21 +0000

    loader: implement mount/unmount rootfs
    
    We want to keep our root file system open to preserve bcache segment
    between file accesses, thus reducing physical disk IO.
    
    Reviewed by:    imp, allanjude, kevans (previous version)
    Differential Revision:  https://reviews.freebsd.org/D30848
    MFC after:      1 month
    
    (cherry picked from commit b4cb3fe0e39a31e60292018dc9fb7510b3de92bf)
---
 stand/common/bootstrap.h             |   4 +
 stand/common/misc.c                  |  22 ++++
 stand/efi/boot1/zfs_module.c         |   2 +-
 stand/efi/libefi/devicename.c        |   5 +-
 stand/efi/loader/main.c              |  14 +--
 stand/i386/gptzfsboot/Makefile       |   2 +-
 stand/i386/libi386/devicename.c      |  14 +--
 stand/i386/loader/main.c             |   4 +
 stand/i386/zfsboot/Makefile          |   2 +-
 stand/i386/zfsboot/zfsboot.c         |   4 +
 stand/libofw/devicename.c            |  14 +--
 stand/libsa/Makefile                 |   2 +-
 stand/libsa/cd9660.c                 | 158 +++++++++++++++++++++-----
 stand/libsa/dosfs.c                  | 134 ++++++++++++++++++----
 stand/libsa/mount.c                  | 163 +++++++++++++++++++++++++++
 stand/libsa/netif.c                  |   1 -
 stand/libsa/stand.h                  |   4 +
 stand/libsa/ufs.c                    | 210 +++++++++++++++++++++++------------
 stand/libsa/zfs/zfs.c                | 137 ++++++++++++++++++-----
 stand/libsa/zfs/zfsimpl.c            |  14 ++-
 stand/mips/beri/loader/devicename.c  |  14 +--
 stand/powerpc/kboot/main.c           |   1 -
 stand/powerpc/ofw/main.c             |   3 +
 stand/uboot/common/main.c            |   3 +
 stand/uboot/lib/devicename.c         |   4 +-
 stand/userboot/userboot/devicename.c |   4 +-
 stand/userboot/userboot/main.c       |   4 +
 27 files changed, 752 insertions(+), 191 deletions(-)

diff --git a/stand/common/bootstrap.h b/stand/common/bootstrap.h
index eb4e50203133..ea03519f5b39 100644
--- a/stand/common/bootstrap.h
+++ b/stand/common/bootstrap.h
@@ -29,6 +29,7 @@
 #ifndef _BOOTSTRAP_H_
 #define	_BOOTSTRAP_H_
 
+#include <stand.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <sys/linker_set.h>
@@ -400,6 +401,9 @@ int nvstore_set_var_from_string(void *, const char *, const char *,
     const char *);
 int nvstore_unset_var(void *, const char *);
 
+/* common code to set currdev variable. */
+extern int mount_currdev(struct env_var *, int, const void *);
+
 #ifndef CTASSERT
 #define	CTASSERT(x)	_Static_assert(x, "compile-time assertion failed")
 #endif
diff --git a/stand/common/misc.c b/stand/common/misc.c
index 9e6bea1e244c..9cb5550344ca 100644
--- a/stand/common/misc.c
+++ b/stand/common/misc.c
@@ -179,3 +179,25 @@ dev_cleanup(void)
 	if (devsw[i]->dv_cleanup != NULL)
 	    (devsw[i]->dv_cleanup)();
 }
+
+/*
+ * mount new rootfs and unmount old, set "currdev" environment variable.
+ */
+int mount_currdev(struct env_var *ev, int flags, const void *value)
+{
+	int rv;
+
+	/* mount new rootfs */
+	rv = mount(value, "/", 0, NULL);
+	if (rv == 0) {
+		/*
+		 * Note we unmount any previously mounted fs only after
+		 * successfully mounting the new because we do not want to
+		 * end up with unmounted rootfs.
+		 */
+		if (ev->ev_value != NULL)
+			unmount(ev->ev_value, 0);
+		env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL);
+	}
+	return (rv);
+}
diff --git a/stand/efi/boot1/zfs_module.c b/stand/efi/boot1/zfs_module.c
index 583d6b1d101f..7aaca72e30f1 100644
--- a/stand/efi/boot1/zfs_module.c
+++ b/stand/efi/boot1/zfs_module.c
@@ -160,7 +160,7 @@ load(const char *filepath, dev_info_t *devinfo, void **bufp, size_t *bufsize)
 		return (EFI_NOT_FOUND);
 	}
 
-	if ((err = zfs_mount(spa, 0, &zmount)) != 0) {
+	if ((err = zfs_mount_impl(spa, 0, &zmount)) != 0) {
 		DPRINTF("Failed to mount pool '%s' (%d)\n", spa->spa_name, err);
 		return (EFI_NOT_FOUND);
 	}
diff --git a/stand/efi/libefi/devicename.c b/stand/efi/libefi/devicename.c
index fbce0016338c..cdf4830697fd 100644
--- a/stand/efi/libefi/devicename.c
+++ b/stand/efi/libefi/devicename.c
@@ -209,8 +209,7 @@ efi_setcurrdev(struct env_var *ev, int flags, const void *value)
 	rv = efi_parsedev(&ncurr, value, NULL);
 	if (rv != 0)
 		return (rv);
-
 	free(ncurr);
-	env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL);
-	return (0);
+
+	return (mount_currdev(ev, flags, value));
 }
diff --git a/stand/efi/loader/main.c b/stand/efi/loader/main.c
index 32b278950745..97bdb7f31e97 100644
--- a/stand/efi/loader/main.c
+++ b/stand/efi/loader/main.c
@@ -187,15 +187,12 @@ static void
 set_currdev(const char *devname)
 {
 
+	env_setenv("currdev", EV_VOLATILE, devname, efi_setcurrdev,
+	    env_nounset);
 	/*
-	 * Don't execute hooks here; we may need to try setting these more than
-	 * once here if we're probing for the ZFS pool we're supposed to boot.
-	 * The currdev hook is intended to just validate user input anyways,
-	 * while the loaddev hook makes it immutable once we've determined what
-	 * the proper currdev is.
+	 * Don't execute hook here; the loaddev hook makes it immutable
+	 * once we've determined what the proper currdev is.
 	 */
-	env_setenv("currdev", EV_VOLATILE | EV_NOHOOK, devname, efi_setcurrdev,
-	    env_nounset);
 	env_setenv("loaddev", EV_VOLATILE | EV_NOHOOK, devname, env_noset,
 	    env_nounset);
 }
@@ -932,6 +929,9 @@ main(int argc, CHAR16 *argv[])
 #endif
 	cons_probe();
 
+	/* Set up currdev variable to have hooks in place. */
+	env_setenv("currdev", EV_VOLATILE, "", efi_setcurrdev, env_nounset);
+
 	/* Init the time source */
 	efi_time_init();
 
diff --git a/stand/i386/gptzfsboot/Makefile b/stand/i386/gptzfsboot/Makefile
index aa8b497f8f3a..09615c5f97c1 100644
--- a/stand/i386/gptzfsboot/Makefile
+++ b/stand/i386/gptzfsboot/Makefile
@@ -66,7 +66,7 @@ gptldr.bin: gptldr.out
 gptldr.out: gptldr.o
 	${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} gptldr.o
 
-OBJS=	zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o
+OBJS=	zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o misc.o
 CLEANFILES+=	gptzfsboot.bin gptzfsboot.out ${OBJS} ${OPENCRYPTO_XTS}
 
 # i386 standalone support library
diff --git a/stand/i386/libi386/devicename.c b/stand/i386/libi386/devicename.c
index 926a2c5a850e..061f2ba9ce9f 100644
--- a/stand/i386/libi386/devicename.c
+++ b/stand/i386/libi386/devicename.c
@@ -204,12 +204,12 @@ i386_fmtdev(void *vdev)
 int
 i386_setcurrdev(struct env_var *ev, int flags, const void *value)
 {
-    struct i386_devdesc	*ncurr;
-    int			rv;
+	struct i386_devdesc	*ncurr;
+	int			rv;
 
-    if ((rv = i386_parsedev(&ncurr, value, NULL)) != 0)
-	return(rv);
-    free(ncurr);
-    env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL);
-    return(0);
+	if ((rv = i386_parsedev(&ncurr, value, NULL)) != 0)
+		return (rv);
+	free(ncurr);
+
+	return (mount_currdev(ev, flags, value));
 }
diff --git a/stand/i386/loader/main.c b/stand/i386/loader/main.c
index 6b81ef411f7f..cb716d7a9f21 100644
--- a/stand/i386/loader/main.c
+++ b/stand/i386/loader/main.c
@@ -164,6 +164,10 @@ main(void)
 	}
 	cons_probe();
 
+	/* Set up currdev variable to have hooks in place. */
+	env_setenv("currdev", EV_VOLATILE | EV_NOHOOK, "",
+	    i386_setcurrdev, env_nounset);
+
 	/*
 	 * Initialise the block cache. Set the upper limit.
 	 */
diff --git a/stand/i386/zfsboot/Makefile b/stand/i386/zfsboot/Makefile
index 7e362b43a39c..243b5e99287c 100644
--- a/stand/i386/zfsboot/Makefile
+++ b/stand/i386/zfsboot/Makefile
@@ -59,7 +59,7 @@ zfsboot1: zfsldr.out
 zfsldr.out: zfsldr.o
 	${LD} ${LD_FLAGS} -e start --defsym ORG=${ORG1} -T ${LDSCRIPT} -o ${.TARGET} zfsldr.o
 
-OBJS=	zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o
+OBJS=	zfsboot.o sio.o cons.o bcache.o devopen.o disk.o part.o zfs_cmd.o misc.o
 CLEANFILES+=	zfsboot2 zfsboot.ld zfsboot.ldr zfsboot.bin zfsboot.out \
 		${OBJS}
 
diff --git a/stand/i386/zfsboot/zfsboot.c b/stand/i386/zfsboot/zfsboot.c
index 1067c4c6cd42..8dd3066f15e5 100644
--- a/stand/i386/zfsboot/zfsboot.c
+++ b/stand/i386/zfsboot/zfsboot.c
@@ -207,6 +207,10 @@ main(void)
 	snprintf(boot_devname, sizeof (boot_devname), "disk%d:",
 	    bd_bios2unit(bootinfo.bi_bios_dev));
 
+	/* Set up currdev variable to have hooks in place. */
+	env_setenv("currdev", EV_VOLATILE, "", i386_setcurrdev,
+	    env_nounset);
+
 	for (i = 0; devsw[i] != NULL; i++)
 		if (devsw[i]->dv_init != NULL)
 			(devsw[i]->dv_init)();
diff --git a/stand/libofw/devicename.c b/stand/libofw/devicename.c
index 786e5549c0fc..11c0a1719ff3 100644
--- a/stand/libofw/devicename.c
+++ b/stand/libofw/devicename.c
@@ -134,13 +134,13 @@ found:
 int
 ofw_setcurrdev(struct env_var *ev, int flags, const void *value)
 {
-    struct ofw_devdesc	*ncurr;
-    int			rv;
+	struct ofw_devdesc	*ncurr;
+	int			rv;
 
-    if ((rv = ofw_parsedev(&ncurr, value, NULL)) != 0)
-	return rv;
+	if ((rv = ofw_parsedev(&ncurr, value, NULL)) != 0)
+		return (rv);
 
-    free(ncurr);
-    env_setenv(ev->ev_name, flags | EV_NOHOOK, value, NULL, NULL);
-    return 0;
+	free(ncurr);
+
+	return (mount_currdev(ev, flags, value));
 }
diff --git a/stand/libsa/Makefile b/stand/libsa/Makefile
index 815f479b5a0a..6fe145d681db 100644
--- a/stand/libsa/Makefile
+++ b/stand/libsa/Makefile
@@ -131,7 +131,7 @@ CLEANDIRS+=${FAKE_DIRS}
 CLEANFILES+= ${SAFE_INCS} ${STAND_H_INC} ${OTHER_INC}
 
 # io routines
-SRCS+=	closeall.c dev.c ioctl.c nullfs.c stat.c \
+SRCS+=	closeall.c dev.c ioctl.c nullfs.c stat.c mount.c \
 	fstat.c close.c lseek.c open.c read.c write.c readdir.c
 
 # SMBios routines
diff --git a/stand/libsa/cd9660.c b/stand/libsa/cd9660.c
index c339678d96fe..57e4916c0fb5 100644
--- a/stand/libsa/cd9660.c
+++ b/stand/libsa/cd9660.c
@@ -70,6 +70,8 @@ static int	cd9660_read(struct open_file *f, void *buf, size_t size,
 static off_t	cd9660_seek(struct open_file *f, off_t offset, int where);
 static int	cd9660_stat(struct open_file *f, struct stat *sb);
 static int	cd9660_readdir(struct open_file *f, struct dirent *d);
+static int	cd9660_mount(const char *, const char *, void **);
+static int	cd9660_unmount(const char *, void *);
 static int	dirmatch(struct open_file *f, const char *path,
 		    struct iso_directory_record *dp, int use_rrip, int lenskip);
 static int	rrip_check(struct open_file *f, struct iso_directory_record *dp,
@@ -81,16 +83,28 @@ static ISO_SUSP_HEADER *susp_lookup_record(struct open_file *f,
 		    int lenskip);
 
 struct fs_ops cd9660_fsops = {
-	"cd9660",
-	cd9660_open,
-	cd9660_close,
-	cd9660_read,
-	null_write,
-	cd9660_seek,
-	cd9660_stat,
-	cd9660_readdir
+	.fs_name = "cd9660",
+	.fo_open = cd9660_open,
+	.fo_close = cd9660_close,
+	.fo_read = cd9660_read,
+	.fo_write = null_write,
+	.fo_seek = cd9660_seek,
+	.fo_stat = cd9660_stat,
+	.fo_readdir = cd9660_readdir,
+	.fo_mount = cd9660_mount,
+	.fo_unmount = cd9660_unmount
 };
 
+typedef struct cd9660_mnt {
+	struct devdesc			*cd_dev;
+	int				cd_fd;
+	struct iso_directory_record	cd_rec;
+	STAILQ_ENTRY(cd9660_mnt)	cd_link;
+} cd9660_mnt_t;
+
+typedef STAILQ_HEAD(cd9660_mnt_list, cd9660_mnt) cd9660_mnt_list_t;
+static cd9660_mnt_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list);
+
 #define	F_ISDIR		0x0001		/* Directory */
 #define	F_ROOTDIR	0x0002		/* Root directory */
 #define	F_RR		0x0004		/* Rock Ridge on this volume */
@@ -281,26 +295,23 @@ dirmatch(struct open_file *f, const char *path, struct iso_directory_record *dp,
 }
 
 static int
-cd9660_open(const char *path, struct open_file *f)
+cd9660_read_dr(struct open_file *f, struct iso_directory_record *rec)
 {
-	struct file *fp = NULL;
-	void *buf;
 	struct iso_primary_descriptor *vd;
-	size_t read, dsize, off;
-	daddr_t bno, boff;
-	struct iso_directory_record rec;
-	struct iso_directory_record *dp = NULL;
-	int rc, first, use_rrip, lenskip;
-	bool isdir = false;
+	size_t read;
+	daddr_t bno;
+	int rc;
 
-	/* First find the volume descriptor */
-	buf = malloc(MAX(ISO_DEFAULT_BLOCK_SIZE,
+	errno = 0;
+	vd = malloc(MAX(ISO_DEFAULT_BLOCK_SIZE,
 	    sizeof(struct iso_primary_descriptor)));
-	vd = buf;
+	if (vd == NULL)
+		return (errno);
+
 	for (bno = 16;; bno++) {
 		twiddle(1);
 		rc = f->f_dev->dv_strategy(f->f_devdata, F_READ, cdb2devb(bno),
-					ISO_DEFAULT_BLOCK_SIZE, buf, &read);
+		    ISO_DEFAULT_BLOCK_SIZE, (char *)vd, &read);
 		if (rc)
 			goto out;
 		if (read != ISO_DEFAULT_BLOCK_SIZE) {
@@ -308,18 +319,61 @@ cd9660_open(const char *path, struct open_file *f)
 			goto out;
 		}
 		rc = EINVAL;
-		if (bcmp(vd->id, ISO_STANDARD_ID, sizeof vd->id) != 0)
+		if (bcmp(vd->id, ISO_STANDARD_ID, sizeof(vd->id)) != 0)
 			goto out;
 		if (isonum_711(vd->type) == ISO_VD_END)
 			goto out;
 		if (isonum_711(vd->type) == ISO_VD_PRIMARY)
 			break;
 	}
-	if (isonum_723(vd->logical_block_size) != ISO_DEFAULT_BLOCK_SIZE)
+	if (isonum_723(vd->logical_block_size) == ISO_DEFAULT_BLOCK_SIZE) {
+		bcopy(vd->root_directory_record, rec, sizeof(*rec));
+		rc = 0;
+	}
+out:
+	free(vd);
+	return (rc);
+}
+
+static int
+cd9660_open(const char *path, struct open_file *f)
+{
+	struct file *fp = NULL;
+	void *buf;
+	size_t read, dsize, off;
+	daddr_t bno, boff;
+	struct iso_directory_record rec;
+	struct iso_directory_record *dp = NULL;
+	int rc, first, use_rrip, lenskip;
+	bool isdir = false;
+	struct devdesc *dev;
+	cd9660_mnt_t *mnt;
+
+	/* First find the volume descriptor */
+	errno = 0;
+	buf = malloc(MAX(ISO_DEFAULT_BLOCK_SIZE,
+	    sizeof(struct iso_primary_descriptor)));
+	if (buf == NULL)
+		return (errno);
+
+	dev = f->f_devdata;
+	STAILQ_FOREACH(mnt, &mnt_list, cd_link) {
+		if (dev->d_dev->dv_type == mnt->cd_dev->d_dev->dv_type &&
+		    dev->d_unit == mnt->cd_dev->d_unit)
+			break;
+	}
+
+	rc = 0;
+	if (mnt == NULL)
+		rc = cd9660_read_dr(f, &rec);
+	else
+		rec = mnt->cd_rec;
+
+	if (rc != 0)
 		goto out;
 
-	bcopy(vd->root_directory_record, &rec, sizeof(rec));
-	if (*path == '/') path++; /* eat leading '/' */
+	if (*path == '/')
+		path++; /* eat leading '/' */
 
 	first = 1;
 	use_rrip = 0;
@@ -621,3 +675,57 @@ cd9660_stat(struct open_file *f, struct stat *sb)
 	sb->st_size = fp->f_size;
 	return 0;
 }
+
+static int
+cd9660_mount(const char *dev, const char *path, void **data)
+{
+	cd9660_mnt_t *mnt;
+	struct open_file *f;
+	char *fs;
+
+	errno = 0;
+	mnt = calloc(1, sizeof(*mnt));
+	if (mnt == NULL)
+		return (errno);
+	mnt->cd_fd = -1;
+
+	if (asprintf(&fs, "%s%s", dev, path) < 0)
+		goto done;
+
+	mnt->cd_fd = open(fs, O_RDONLY);
+	free(fs);
+	if (mnt->cd_fd == -1)
+		goto done;
+
+	f = fd2open_file(mnt->cd_fd);
+	/* Is it cd9660 file system? */
+	if (strcmp(f->f_ops->fs_name, "cd9660") == 0) {
+		mnt->cd_dev = f->f_devdata;
+		errno = cd9660_read_dr(f, &mnt->cd_rec);
+		STAILQ_INSERT_TAIL(&mnt_list, mnt, cd_link);
+	} else {
+		errno = ENXIO;
+	}
+
+done:
+	if (errno != 0) {
+		free(mnt->cd_dev);
+		if (mnt->cd_fd >= 0)
+			close(mnt->cd_fd);
+		free(mnt);
+	} else {
+		*data = mnt;
+	}
+	return (errno);
+}
+
+static int
+cd9660_unmount(const char *dev __unused, void *data)
+{
+	cd9660_mnt_t *mnt = data;
+
+	STAILQ_REMOVE(&mnt_list, mnt, cd9660_mnt, cd_link);
+	close(mnt->cd_fd);
+	free(mnt);
+	return (0);
+}
diff --git a/stand/libsa/dosfs.c b/stand/libsa/dosfs.c
index 656af3642c88..452a79ae12dc 100644
--- a/stand/libsa/dosfs.c
+++ b/stand/libsa/dosfs.c
@@ -38,9 +38,19 @@ __FBSDID("$FreeBSD$");
 #include <stddef.h>
 
 #include "stand.h"
+#include "disk.h"
 
 #include "dosfs.h"
 
+typedef struct dos_mnt {
+	char			*dos_dev;
+	DOS_FS			*dos_fs;
+	int			dos_fd;
+	STAILQ_ENTRY(dos_mnt)	dos_link;
+} dos_mnt_t;
+
+typedef STAILQ_HEAD(dos_mnt_list, dos_mnt) dos_mnt_list_t;
+static dos_mnt_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list);
 
 static int	dos_open(const char *path, struct open_file *fd);
 static int	dos_close(struct open_file *fd);
@@ -48,16 +58,20 @@ static int	dos_read(struct open_file *fd, void *buf, size_t size, size_t *resid)
 static off_t	dos_seek(struct open_file *fd, off_t offset, int whence);
 static int	dos_stat(struct open_file *fd, struct stat *sb);
 static int	dos_readdir(struct open_file *fd, struct dirent *d);
+static int	dos_mount(const char *dev, const char *path, void **data);
+static int	dos_unmount(const char *dev, void *data);
 
 struct fs_ops dosfs_fsops = {
-	"dosfs",
-	dos_open,
-	dos_close,
-	dos_read,
-	null_write,
-	dos_seek,
-	dos_stat,
-	dos_readdir
+	.fs_name = "dosfs",
+	.fo_open = dos_open,
+	.fo_close = dos_close,
+	.fo_read = dos_read,
+	.fo_write = null_write,
+	.fo_seek = dos_seek,
+	.fo_stat = dos_stat,
+	.fo_readdir = dos_readdir,
+	.fo_mount = dos_mount,
+	.fo_unmount = dos_unmount
 };
 
 #define SECSIZ  512             /* sector size */
@@ -179,12 +193,11 @@ dos_read_fatblk(DOS_FS *fs, struct open_file *fd, u_int blknum)
  * Mount DOS filesystem
  */
 static int
-dos_mount(DOS_FS *fs, struct open_file *fd)
+dos_mount_impl(DOS_FS *fs, struct open_file *fd)
 {
 	int err;
 	u_char *buf;
 
-	bzero(fs, sizeof(DOS_FS));
 	fs->fd = fd;
 
 	if ((buf = malloc(secbyt(1))) == NULL)
@@ -215,11 +228,70 @@ dos_mount(DOS_FS *fs, struct open_file *fd)
 	return (0);
 }
 
+static int
+dos_mount(const char *dev, const char *path, void **data)
+{
+	char *fs;
+	dos_mnt_t *mnt;
+	struct open_file *f;
+	DOS_FILE *df;
+
+	errno = 0;
+	mnt = calloc(1, sizeof(*mnt));
+	if (mnt == NULL)
+		return (errno);
+	mnt->dos_fd = -1;
+	mnt->dos_dev = strdup(dev);
+	if (mnt->dos_dev == NULL)
+		goto done;
+
+	if (asprintf(&fs, "%s%s", dev, path) < 0)
+		goto done;
+
+	mnt->dos_fd = open(fs, O_RDONLY);
+	free(fs);
+	if (mnt->dos_fd == -1)
+		goto done;
+
+	f = fd2open_file(mnt->dos_fd);
+	if (strcmp(f->f_ops->fs_name, "dosfs") == 0) {
+		df = f->f_fsdata;
+		mnt->dos_fs = df->fs;
+		STAILQ_INSERT_TAIL(&mnt_list, mnt, dos_link);
+	} else {
+                errno = ENXIO;
+	}
+
+done:
+	if (errno != 0) {
+		free(mnt->dos_dev);
+		if (mnt->dos_fd >= 0)
+			close(mnt->dos_fd);
+		free(mnt);
+	} else {
+		*data = mnt;
+	}
+
+	return (errno);
+}
+
+static int
+dos_unmount(const char *dev __unused, void *data)
+{
+	dos_mnt_t *mnt = data;
+
+	STAILQ_REMOVE(&mnt_list, mnt, dos_mnt, dos_link);
+	free(mnt->dos_dev);
+	close(mnt->dos_fd);
+	free(mnt);
+	return (0);
+}
+
 /*
  * Unmount mounted filesystem
  */
 static int
-dos_unmount(DOS_FS *fs)
+dos_unmount_impl(DOS_FS *fs)
 {
 	if (fs->links)
 		return (EBUSY);
@@ -237,19 +309,32 @@ dos_open(const char *path, struct open_file *fd)
 	DOS_DE *de;
 	DOS_FILE *f;
 	DOS_FS *fs;
+	dos_mnt_t *mnt;
+	const char *dev;
 	u_int size, clus;
 	int err;
 
-	/* Allocate mount structure, associate with open */
-	if ((fs = malloc(sizeof(DOS_FS))) == NULL)
-		return (errno);
-	if ((err = dos_mount(fs, fd))) {
-		free(fs);
-		return (err);
+	dev = disk_fmtdev(fd->f_devdata);
+	STAILQ_FOREACH(mnt, &mnt_list, dos_link) {
+		if (strcmp(dev, mnt->dos_dev) == 0)
+			break;
+	}
+
+	if (mnt == NULL) {
+		/* Allocate mount structure, associate with open */
+		if ((fs = malloc(sizeof(DOS_FS))) == NULL)
+			return (errno);
+		if ((err = dos_mount_impl(fs, fd))) {
+			free(fs);
+			return (err);
+		}
+	} else {
+		fs = mnt->dos_fs;
 	}
 
 	if ((err = namede(fs, path, &de))) {
-		dos_unmount(fs);
+		if (mnt == NULL)
+			dos_unmount_impl(fs);
 		return (err);
 	}
 
@@ -259,19 +344,20 @@ dos_open(const char *path, struct open_file *fd)
 	if ((!(de->attr & FA_DIR) && (!clus != !size)) ||
 	    ((de->attr & FA_DIR) && size) ||
 	    (clus && !okclus(fs, clus))) {
-		dos_unmount(fs);
+		if (mnt == NULL)
+			dos_unmount_impl(fs);
 		return (EINVAL);
 	}
-	if ((f = malloc(sizeof(DOS_FILE))) == NULL) {
+	if ((f = calloc(1, sizeof(DOS_FILE))) == NULL) {
 		err = errno;
-		dos_unmount(fs);
+		if (mnt == NULL)
+			dos_unmount_impl(fs);
 		return (err);
 	}
-	bzero(f, sizeof(DOS_FILE));
 	f->fs = fs;
 	fs->links++;
 	f->de = *de;
-	fd->f_fsdata = (void *)f;
+	fd->f_fsdata = f;
 	return (0);
 }
 
@@ -381,7 +467,7 @@ dos_close(struct open_file *fd)
 
 	f->fs->links--;
 	free(f);
-	dos_unmount(fs);
+	dos_unmount_impl(fs);
 	return (0);
 }
 
diff --git a/stand/libsa/mount.c b/stand/libsa/mount.c
new file mode 100644
index 000000000000..c9abe6b945fe
--- /dev/null
+++ b/stand/libsa/mount.c
@@ -0,0 +1,163 @@
+/*-
+ * Copyright 2021 Toomas Soome <tsoome@me.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stand.h>
+#include <sys/queue.h>
+
+/*
+ * While setting "currdev" environment variable, alse "mount" the
+ * new root file system. This is done to hold disk device open
+ * in between file accesses, and thus preserve block cache for
+ * this device. Additionally, this allows us to optimize filesystem
+ * access by sharing filesystem metadata (like superblock).
+ */
+
+typedef STAILQ_HEAD(mnt_info_list, mnt_info) mnt_info_list_t;
+
+typedef struct mnt_info {
+	STAILQ_ENTRY(mnt_info)	mnt_link;	/* link in mount list */
+	const struct fs_ops	*mnt_fs;
+	char			*mnt_dev;
+	char			*mnt_path;
+	unsigned		mnt_refcount;
+	void			*mnt_data;	/* Private state */
+} mnt_info_t;
+
+/* list of mounted filesystems. */
+static mnt_info_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list);
+
+static void
+free_mnt(mnt_info_t *mnt)
+{
+	free(mnt->mnt_dev);
+	free(mnt->mnt_path);
+	free(mnt);
+}
+
+static int
+add_mnt_info(struct fs_ops *fs, const char *dev, const char *path, void *data)
+{
+	mnt_info_t *mnt;
+
+	mnt = malloc(sizeof(*mnt));
+	if (mnt == NULL)
+		return (ENOMEM);
+
+	mnt->mnt_fs = fs;
+	mnt->mnt_dev = strdup(dev);
+	mnt->mnt_path = strdup(path);
+	mnt->mnt_data = data;
+	mnt->mnt_refcount = 1;
+
+	if (mnt->mnt_dev == NULL || mnt->mnt_path == NULL) {
+		free_mnt(mnt);
+		return (ENOMEM);
+	}
+	STAILQ_INSERT_TAIL(&mnt_list, mnt, mnt_link);
+	return (0);
+}
+
+static void
+delete_mnt_info(mnt_info_t *mnt)
+{
+	STAILQ_REMOVE(&mnt_list, mnt, mnt_info, mnt_link);
+	free_mnt(mnt);
+}
+
+int
+mount(const char *dev, const char *path, int flags __unused, void *data)
+{
+	mnt_info_t *mnt;
+	int rc = -1;
+
+	/* Is it already mounted? */
+	STAILQ_FOREACH(mnt, &mnt_list, mnt_link) {
+		if (strcmp(dev, mnt->mnt_dev) == 0 &&
+		    strcmp(path, mnt->mnt_path) == 0) {
+			mnt->mnt_refcount++;
+			return (0);
+		}
+	}
+
+	for (int i = 0; file_system[i] != NULL; i++) {
+		struct fs_ops *fs;
+
+		fs = file_system[i];
+		if (fs->fo_mount == NULL)
+			continue;
+
+		if (fs->fo_mount(dev, path, &data) != 0)
+			continue;
+
+		rc = add_mnt_info(fs, dev, path, data);
+		if (rc != 0 && mnt->mnt_fs->fo_unmount != NULL) {
+			printf("failed to mount %s: %s\n", dev,
+			    strerror(rc));
+			(void)mnt->mnt_fs->fo_unmount(dev, data);
+		}
+		break;
+	}
+
+
+	/*
+	 * if rc is -1, it means we have no file system with fo_mount()
+	 * callback, or all fo_mount() calls failed. As long as we
+	 * have missing fo_mount() callbacks, we allow mount() to return 0.
+	 */
+	if (rc == -1)
+		rc = 0;
+
+	return (rc);
+}
+
+int
+unmount(const char *dev, int flags __unused)
+{
+	mnt_info_t *mnt;
+	int rv;
+
+	rv = 0;
+	STAILQ_FOREACH(mnt, &mnt_list, mnt_link) {
+		if (strcmp(dev, mnt->mnt_dev) == 0) {
+			if (mnt->mnt_refcount > 1) {
+				mnt->mnt_refcount--;
+				break;
+			}
+
+			if (mnt->mnt_fs->fo_unmount != NULL)
+				rv = mnt->mnt_fs->fo_unmount(dev,
+				    mnt->mnt_data);
+			delete_mnt_info(mnt);
+			break;
+		}
+	}
+
+	if (rv != 0)
+		printf("failed to unmount %s: %d\n", dev, rv);
+	return (0);
+}
diff --git a/stand/libsa/netif.c b/stand/libsa/netif.c
index 2d32ccd9de7e..f199da3b6e95 100644
--- a/stand/libsa/netif.c
+++ b/stand/libsa/netif.c
@@ -37,7 +37,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/types.h>
 #include <sys/cdefs.h>
-#include <sys/mount.h>
 #include <string.h>
 
 #include <netinet/in.h>
diff --git a/stand/libsa/stand.h b/stand/libsa/stand.h
index 535fee31d586..ab0961b77086 100644
--- a/stand/libsa/stand.h
+++ b/stand/libsa/stand.h
@@ -111,6 +111,8 @@ struct fs_ops {
     off_t	(*fo_seek)(struct open_file *f, off_t offset, int where);
     int		(*fo_stat)(struct open_file *f, struct stat *sb);
     int		(*fo_readdir)(struct open_file *f, struct dirent *d);
+    int		(*fo_mount)(const char *, const char *, void **);
+    int		(*fo_unmount)(const char *, void *);
 };
 
 /*
@@ -283,6 +285,8 @@ extern void	ngets(char *, int);
 #define gets(x)	ngets((x), 0)
 extern int	fgetstr(char *buf, int size, int fd);
 
+extern int	mount(const char *dev, const char *path, int flags, void *data);
+extern int	unmount(const char *dev, int flags);
 extern int	open(const char *, int);
 #define	O_RDONLY	0x0
 #define O_WRONLY	0x1
diff --git a/stand/libsa/ufs.c b/stand/libsa/ufs.c
index 02d13aa29722..a4015dea74c2 100644
--- a/stand/libsa/ufs.c
+++ b/stand/libsa/ufs.c
@@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$");
 #include <ufs/ufs/dir.h>
 #include <ufs/ffs/fs.h>
 #include "stand.h"
+#include "disk.h"
 #include "string.h"
 
 static int	ufs_open(const char *path, struct open_file *f);
@@ -91,16 +92,20 @@ static int	ufs_read(struct open_file *f, void *buf, size_t size, size_t *resid);
 static off_t	ufs_seek(struct open_file *f, off_t offset, int where);
 static int	ufs_stat(struct open_file *f, struct stat *sb);
 static int	ufs_readdir(struct open_file *f, struct dirent *d);
+static int	ufs_mount(const char *dev, const char *path, void **data);
+static int	ufs_unmount(const char *dev, void *data);
 
 struct fs_ops ufs_fsops = {
-	"ufs",
-	ufs_open,
-	ufs_close,
-	ufs_read,
-	ufs_write,
-	ufs_seek,
-	ufs_stat,
-	ufs_readdir
+	.fs_name = "ufs",
+	.fo_open = ufs_open,
+	.fo_close = ufs_close,
+	.fo_read = ufs_read,
+	.fo_write = ufs_write,
+	.fo_seek = ufs_seek,
+	.fo_stat = ufs_stat,
+	.fo_readdir = ufs_readdir,
+	.fo_mount = ufs_mount,
+	.fo_unmount = ufs_unmount
 };
 
 /*
@@ -130,6 +135,15 @@ struct file {
 	((fp)->f_fs->fs_magic == FS_UFS1_MAGIC ? \
 	(fp)->f_di.di1.field : (fp)->f_di.di2.field)
 
+typedef struct ufs_mnt {
+	char			*um_dev;
+	int			um_fd;
+	STAILQ_ENTRY(ufs_mnt)	um_link;
+} ufs_mnt_t;
+
+typedef STAILQ_HEAD(ufs_mnt_list, ufs_mnt) ufs_mnt_list_t;
+static ufs_mnt_list_t mnt_list = STAILQ_HEAD_INITIALIZER(mnt_list);
+
 static int	read_inode(ino_t, struct open_file *);
 static int	block_map(struct open_file *, ufs2_daddr_t, ufs2_daddr_t *);
 static int	buf_read_file(struct open_file *, char **, size_t *);
@@ -150,9 +164,7 @@ int	ffs_sbget(void *, struct fs **, off_t, char *,
  * Read a new inode into a file structure.
  */
 static int
-read_inode(inumber, f)
-	ino_t inumber;
-	struct open_file *f;
+read_inode(ino_t inumber, struct open_file *f)
 {
 	struct file *fp = (struct file *)f->f_fsdata;
 	struct fs *fs = fp->f_fs;
@@ -207,10 +219,8 @@ out:
  * contains that block.
  */
 static int
-block_map(f, file_block, disk_block_p)
-	struct open_file *f;
-	ufs2_daddr_t file_block;
-	ufs2_daddr_t *disk_block_p;	/* out */
+block_map(struct open_file *f, ufs2_daddr_t file_block,
*** 588 LINES SKIPPED ***