git: 51ab5e0d82c1 - stable/13 - stand/multiboot: adjust the protocol between loader and kernel

Roger Pau Monné royger at FreeBSD.org
Wed Feb 3 14:11:41 UTC 2021


The branch stable/13 has been updated by royger:

URL: https://cgit.FreeBSD.org/src/commit/?id=51ab5e0d82c1e9607c060976259bb9ae0d656a80

commit 51ab5e0d82c1e9607c060976259bb9ae0d656a80
Author:     Roger Pau Monné <royger at FreeBSD.org>
AuthorDate: 2021-01-27 10:12:07 +0000
Commit:     Roger Pau Monné <royger at FreeBSD.org>
CommitDate: 2021-02-03 13:07:43 +0000

    stand/multiboot: adjust the protocol between loader and kernel
    
    There's a currently ad-hoc protocol to hand off the FreeBSD kernel
    payload between the loader and the kernel itself when Xen is in the
    middle of the picture. Such protocol wasn't very resilient to changes
    to the loader itself, because it relied on moving metadata around to
    package it using a certain layout. This has proven to be fragile, so
    replace it with a more robust version.
    
    The new protocol requires using a xen_header structure that will be
    used to pass data between the FreeBSD loader and the FreeBSD kernel
    when booting in dom0 mode. At the moment the only data conveyed is the
    offset of the start of the module metadata relative to the start of the
    module itself.
    
    This is a slightly disruptive change since it also requires a change
    to the kernel which is contained in this patch. In order to update
    with this change the kernel must be updated before updating the
    loader, as described in the handbook. Note this is only required when
    booting a FreeBSD/Xen dom0. This change doesn't affect the normal
    FreeBSD boot protocol.
    
    This fixes booting FreeBSD/Xen in dom0 mode after
    3630506b9daec9167a89bc4525638ea45a00769e.
    
    (cherry picked from commit b6d85a5f51e4147452b35d76478fb9e191b7734b)
---
 stand/i386/libi386/multiboot.c | 156 +++++++++++------------------------------
 sys/x86/include/metadata.h     |  21 ++++++
 sys/x86/xen/pv.c               |  67 +++++++++++++-----
 3 files changed, 111 insertions(+), 133 deletions(-)

diff --git a/stand/i386/libi386/multiboot.c b/stand/i386/libi386/multiboot.c
index a29696e4baec..71fd63bb4c88 100644
--- a/stand/i386/libi386/multiboot.c
+++ b/stand/i386/libi386/multiboot.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/stdint.h>
 #define _MACHINE_ELF_WANT_32BIT
 #include <machine/elf.h>
+#include <machine/metadata.h>
 #include <string.h>
 #include <stand.h>
 
@@ -55,11 +56,6 @@ __FBSDID("$FreeBSD$");
 #define MULTIBOOT_SUPPORTED_FLAGS \
 				(MULTIBOOT_PAGE_ALIGN|MULTIBOOT_MEMORY_INFO)
 #define NUM_MODULES		2
-#define METADATA_FIXED_SIZE	(PAGE_SIZE*4)
-#define METADATA_MODULE_SIZE	PAGE_SIZE
-
-#define METADATA_RESV_SIZE(mod_num) \
-	roundup(METADATA_FIXED_SIZE + METADATA_MODULE_SIZE * mod_num, PAGE_SIZE)
 
 extern int elf32_loadfile_raw(char *filename, uint64_t dest,
     struct preloaded_file **result, int multiboot);
@@ -81,32 +77,6 @@ extern void multiboot_tramp();
 
 static const char mbl_name[] = "FreeBSD Loader";
 
-static int
-num_modules(struct preloaded_file *kfp)
-{
-	struct kernel_module	*kmp;
-	int			 mod_num = 0;
-
-	for (kmp = kfp->f_modules; kmp != NULL; kmp = kmp->m_next)
-		mod_num++;
-
-	return (mod_num);
-}
-
-static vm_offset_t
-max_addr(void)
-{
-	struct preloaded_file	*fp;
-	vm_offset_t		 addr = 0;
-
-	for (fp = file_findfile(NULL, NULL); fp != NULL; fp = fp->f_next) {
-		if (addr < (fp->f_addr + fp->f_size))
-			addr = fp->f_addr + fp->f_size;
-	}
-
-	return (addr);
-}
-
 static int
 multiboot_loadfile(char *filename, uint64_t dest,
     struct preloaded_file **result)
@@ -188,7 +158,6 @@ out:
 static int
 multiboot_exec(struct preloaded_file *fp)
 {
-	vm_offset_t			 module_start, last_addr, metadata_size;
 	vm_offset_t			 modulep, kernend, entry;
 	struct file_metadata		*md;
 	Elf_Ehdr			*ehdr;
@@ -197,6 +166,9 @@ multiboot_exec(struct preloaded_file *fp)
 	char				*cmdline = NULL;
 	size_t				 len;
 	int				 error, mod_num;
+	struct xen_header		 header;
+
+	CTASSERT(sizeof(header) <= PAGE_SIZE);
 
 	/*
 	 * Don't pass the memory size found by the bootloader, the memory
@@ -260,13 +232,11 @@ multiboot_exec(struct preloaded_file *fp)
 	 * FreeBSD kernel loaded as a raw file. The second module is going
 	 * to contain the metadata info and the loaded modules.
 	 *
-	 * On native FreeBSD loads all the modules and then places the
-	 * metadata info at the end, but this is painful when running on Xen,
-	 * because it relocates the second multiboot module wherever it
-	 * likes. In order to workaround this limitation the metadata
-	 * information is placed at the start of the second module and
-	 * the original modulep value is saved together with the other
-	 * metadata, so we can relocate everything.
+	 * There's a small header prefixed in the second module that contains
+	 * some information required to calculate the relocated address of
+	 * modulep based on the original offset of modulep from the start of
+	 * the module address. Note other fields might be added to this header
+	 * if required.
 	 *
 	 * Native layout:
 	 *           fp->f_addr + fp->f_size
@@ -277,27 +247,16 @@ multiboot_exec(struct preloaded_file *fp)
 	 * +---------+----------------+------------+
 	 * fp->f_addr                 modulep      kernend
 	 *
-	 * Xen layout:
-	 *
-	 * Initial:
-	 *                      fp->f_addr + fp->f_size
-	 * +---------+----------+----------------+------------+
-	 * |         |          |                |            |
-	 * | Kernel  | Reserved |    Modules     |  Metadata  |
-	 * |         |          |                |  dry run   |
-	 * +---------+----------+----------------+------------+
-	 * fp->f_addr
-	 *
-	 * After metadata polacement (ie: final):
-	 *                                  fp->f_addr + fp->f_size
-	 * +-----------+---------+----------+----------------+
-	 * |           |         |          |                |
-	 * |  Kernel   |  Free   | Metadata |    Modules     |
-	 * |           |         |          |                |
-	 * +-----------+---------+----------+----------------+
-	 * fp->f_addr            modulep                     kernend
-	 * \__________/          \__________________________/
-	 *  Multiboot module 0    Multiboot module 1
+	 * Xen dom0 layout:
+	 * fp->f_addr             fp->f_addr + fp->f_size
+	 * +---------+------------+----------------+------------+
+	 * |         |            |                |            |
+	 * | Kernel  | xen_header |    Modules     |  Metadata  |
+	 * |         |            |                |            |
+	 * +---------+------------+----------------+------------+
+	 * 	                                   modulep      kernend
+	 * \________/\__________________________________________/
+	 *  module 0                 module 1
 	 */
 
 	fp = file_findfile(NULL, "elf kernel");
@@ -307,13 +266,6 @@ multiboot_exec(struct preloaded_file *fp)
 		goto error;
 	}
 
-	if (fp->f_metadata != NULL) {
-		printf("FreeBSD kernel already contains metadata, aborting\n");
-		error = EINVAL;
-		goto error;
-	}
-
-
 	mb_mod = malloc(sizeof(struct multiboot_mod_list) * NUM_MODULES);
 	if (mb_mod == NULL) {
 		error = ENOMEM;
@@ -322,61 +274,34 @@ multiboot_exec(struct preloaded_file *fp)
 
 	bzero(mb_mod, sizeof(struct multiboot_mod_list) * NUM_MODULES);
 
-	/*
-	 * Calculate how much memory is needed for the metatdata. We did
-	 * an approximation of the maximum size when loading the kernel,
-	 * but now we know the exact size, so we can release some of this
-	 * preallocated memory if not needed.
-	 */
-	last_addr = roundup(max_addr(), PAGE_SIZE);
-	mod_num = num_modules(fp);
-
-	/*
-	 * Place the metadata after the last used address in order to
-	 * calculate it's size, this will not be used.
-	 */
-	error = bi_load64(fp->f_args, last_addr, &modulep, &kernend, 0);
+	error = bi_load64(fp->f_args, 0, &modulep, &kernend, 0);
 	if (error != 0) {
 		printf("bi_load64 failed: %d\n", error);
 		goto error;
 	}
-	metadata_size = roundup(kernend - last_addr, PAGE_SIZE);
 
-	/* Check that the size is not greater than what we have reserved */
-	if (metadata_size > METADATA_RESV_SIZE(mod_num)) {
-		printf("Required memory for metadata is greater than reserved "
-		    "space, please increase METADATA_FIXED_SIZE and "
-		    "METADATA_MODULE_SIZE and rebuild the loader\n");
+	mb_mod[0].mod_start = fp->f_addr;
+	mb_mod[0].mod_end = fp->f_addr + fp->f_size - PAGE_SIZE;
+
+	mb_mod[1].mod_start = mb_mod[0].mod_end;
+	mb_mod[1].mod_end = kernend;
+	/* Indicate the kernel metadata is prefixed with a xen_header. */
+	cmdline = strdup("header");
+	if (cmdline == NULL) {
+		printf("Out of memory, aborting\n");
 		error = ENOMEM;
 		goto error;
 	}
-
-	/* Clean the metadata added to the kernel in the bi_load64 dry run */
-	file_removemetadata(fp);
-
-	/*
-	 * This is the position where the second multiboot module
-	 * will be placed.
-	 */
-	module_start = fp->f_addr + fp->f_size - metadata_size;
-
-	error = bi_load64(fp->f_args, module_start, &modulep, &kernend, 0);
-	if (error != 0) {
-		printf("bi_load64 failed: %d\n", error);
-		goto error;
-	}
-
-	mb_mod[0].mod_start = fp->f_addr;
-	mb_mod[0].mod_end = fp->f_addr + fp->f_size;
-	mb_mod[0].mod_end -= METADATA_RESV_SIZE(mod_num);
-
-	mb_mod[1].mod_start = module_start;
-	mb_mod[1].mod_end = last_addr;
+	mb_mod[1].cmdline = VTOP(cmdline);
 
 	mb_info->mods_count = NUM_MODULES;
 	mb_info->mods_addr = VTOP(mb_mod);
 	mb_info->flags |= MULTIBOOT_INFO_MODS;
 
+	header.flags = XENHEADER_HAS_MODULEP_OFFSET;
+	header.modulep_offset = modulep - mb_mod[1].mod_start;
+	archsw.arch_copyin(&header, mb_mod[1].mod_start, sizeof(header));
+
 	dev_cleanup();
 	__exec((void *)VTOP(multiboot_tramp), (void *)entry,
 	    (void *)VTOP(mb_info));
@@ -434,17 +359,14 @@ multiboot_obj_loadfile(char *filename, uint64_t dest,
 			return (EINVAL);
 		}
 
+
 		/*
-		 * Save space at the end of the kernel in order to place
-		 * the metadata information. We do an approximation of the
-		 * max metadata size, this is not optimal but it's probably
-		 * the best we can do at this point. Once all modules are
-		 * loaded and the size of the metadata is known this
-		 * space will be recovered if not used.
+		 * Reserve one page at the end of the kernel to place some
+		 * metadata in order to cope for Xen relocating the modules and
+		 * the metadata information.
 		 */
-		mod_num = num_modules(rfp);
 		rfp->f_size = roundup(rfp->f_size, PAGE_SIZE);
-		rfp->f_size += METADATA_RESV_SIZE(mod_num);
+		rfp->f_size += PAGE_SIZE;
 		*result = rfp;
 	} else {
 		/* The rest should be loaded as regular modules */
diff --git a/sys/x86/include/metadata.h b/sys/x86/include/metadata.h
index d157df0307fd..be0eae43d34c 100644
--- a/sys/x86/include/metadata.h
+++ b/sys/x86/include/metadata.h
@@ -68,4 +68,25 @@ struct vbe_fb {
 	uint32_t	fb_bpp;
 };
 
+/*
+ * The structure below is used when FreeBSD kernel is booted as a dom0 kernel
+ * from Xen. In such scenario we need to accommodate the modules and the
+ * metadata as a contiguous memory region, so it can be passed as a multiboot
+ * module, and some extra information is required which is conveyed from the
+ * loader to the kernel using the xen_header structure below.
+ *
+ * See the comment in multiboot.c about how the structure below is packaged
+ * together with the rest of the kernel payload data.
+ */
+struct xen_header {
+	uint64_t	flags;
+#define XENHEADER_HAS_MODULEP_OFFSET (1ull << 0)
+
+	/*
+	 * Offset of the modulep location from the start of the multiboot
+	 * module blob.
+	 */
+	uint64_t	modulep_offset;
+};
+
 #endif /* !_MACHINE_METADATA_H_ */
diff --git a/sys/x86/xen/pv.c b/sys/x86/xen/pv.c
index 34fcbd990491..2fd698772f9d 100644
--- a/sys/x86/xen/pv.c
+++ b/sys/x86/xen/pv.c
@@ -568,25 +568,60 @@ xen_pvh_parse_preload_data(uint64_t modulep)
 
 	if (start_info->modlist_paddr != 0) {
 		struct hvm_modlist_entry *mod;
+		const char *cmdline;
 
 		mod = (struct hvm_modlist_entry *)
 		    (start_info->modlist_paddr + KERNBASE);
-		preload_metadata = (caddr_t)(mod[0].paddr + KERNBASE);
-
-		kmdp = preload_search_by_type("elf kernel");
-		if (kmdp == NULL)
-			kmdp = preload_search_by_type("elf64 kernel");
-		KASSERT(kmdp != NULL, ("unable to find kernel"));
-
-		/*
-		 * Xen has relocated the metadata and the modules,
-		 * so we need to recalculate it's position. This is
-		 * done by saving the original modulep address and
-		 * then calculating the offset with mod_start,
-		 * which contains the relocated modulep address.
-		 */
-		metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t);
-		off = mod[0].paddr + KERNBASE - metadata;
+		cmdline = mod[0].cmdline_paddr ?
+		    (const char *)(mod[0].cmdline_paddr + KERNBASE) : NULL;
+
+		if (strcmp(cmdline, "header") == 0) {
+			struct xen_header *header;
+
+			header = (struct xen_header *)(mod[0].paddr + KERNBASE);
+
+			if ((header->flags & XENHEADER_HAS_MODULEP_OFFSET) !=
+			    XENHEADER_HAS_MODULEP_OFFSET) {
+				xc_printf("Unable to load module metadata\n");
+				HYPERVISOR_shutdown(SHUTDOWN_crash);
+			}
+
+			preload_metadata = (caddr_t)(mod[0].paddr +
+			    header->modulep_offset + KERNBASE);
+
+			kmdp = preload_search_by_type("elf kernel");
+			if (kmdp == NULL)
+				kmdp = preload_search_by_type("elf64 kernel");
+			if (kmdp == NULL) {
+				xc_printf("Unable to find kernel\n");
+				HYPERVISOR_shutdown(SHUTDOWN_crash);
+			}
+
+			/*
+			 * Xen has relocated the metadata and the modules, so
+			 * we need to recalculate it's position. This is done
+			 * by saving the original modulep address and then
+			 * calculating the offset from the real modulep
+			 * position.
+			 */
+			metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP,
+			    vm_paddr_t);
+			off = mod[0].paddr + header->modulep_offset - metadata +
+			    KERNBASE;
+		} else {
+			preload_metadata = (caddr_t)(mod[0].paddr + KERNBASE);
+
+			kmdp = preload_search_by_type("elf kernel");
+			if (kmdp == NULL)
+				kmdp = preload_search_by_type("elf64 kernel");
+			if (kmdp == NULL) {
+				xc_printf("Unable to find kernel\n");
+				HYPERVISOR_shutdown(SHUTDOWN_crash);
+			}
+
+			metadata = MD_FETCH(kmdp, MODINFOMD_MODULEP, vm_paddr_t);
+			off = mod[0].paddr + KERNBASE - metadata;
+		}
 
 		preload_bootstrap_relocate(off);
 


More information about the dev-commits-src-branches mailing list