git: d1ca8cc638c4 - main - x86: Add MPTABLE_LINUX_BUG_COMPAT option

From: Colin Percival <cperciva_at_FreeBSD.org>
Date: Tue, 18 Oct 2022 06:03:13 UTC
The branch main has been updated by cperciva:

URL: https://cgit.FreeBSD.org/src/commit/?id=d1ca8cc638c4abc8a968ebacd862ebb0f7b73b91

commit d1ca8cc638c4abc8a968ebacd862ebb0f7b73b91
Author:     Colin Percival <cperciva@FreeBSD.org>
AuthorDate: 2022-07-13 00:45:18 +0000
Commit:     Colin Percival <cperciva@FreeBSD.org>
CommitDate: 2022-10-18 06:02:22 +0000

    x86: Add MPTABLE_LINUX_BUG_COMPAT option
    
    Linux has two bugs in its handling of the x86 MP table:
    1. It assumes that there is always 640 kB of base memory, and looks for
    the MP table in the top kB of this even if the memory map indicates
    that memory location does not exist.
    2. It ignores that entry_count field and instead iterates through the
    MP table by scanning until it runs out of bytes in the table.
    
    The Firecracker VM (and probably other related VMs) relies on both of
    these bugs.  With the MPTABLE_LINUX_BUG_COMPAT option, we search for
    the MP table at address 639k even if that isn't in the memory map; and
    replace a zeroed entry_count with a value computed from scanning the
    table until we run out of table bytes.
    
    Reviewed by:    imp
    Sponsored by:   https://www.patreon.com/cperciva
    Differential Revision:  https://reviews.freebsd.org/D35799
---
 sys/conf/options.amd64 |  3 +++
 sys/conf/options.i386  |  3 +++
 sys/x86/x86/mptable.c  | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

diff --git a/sys/conf/options.amd64 b/sys/conf/options.amd64
index 66ccfb7bbddb..a91cddb892fe 100644
--- a/sys/conf/options.amd64
+++ b/sys/conf/options.amd64
@@ -63,3 +63,6 @@ EFIRT			opt_efirt.h
 
 # Don't create a "legacy" PCI bridge if none is found.
 NO_LEGACY_PCIB		opt_cpu.h
+
+# Compatibility with Linux MP table bugs.
+MPTABLE_LINUX_BUG_COMPAT
diff --git a/sys/conf/options.i386 b/sys/conf/options.i386
index 03e88b985107..e9e0fa8bffc2 100644
--- a/sys/conf/options.i386
+++ b/sys/conf/options.i386
@@ -107,3 +107,6 @@ ISCI_LOGGING	opt_isci.h
 
 # Don't create a "legacy" PCI bridge if none is found.
 NO_LEGACY_PCIB		opt_cpu.h
+
+# Compatibility with Linux MP table bugs.
+MPTABLE_LINUX_BUG_COMPAT
diff --git a/sys/x86/x86/mptable.c b/sys/x86/x86/mptable.c
index ec746b756b4f..173ffaa09b45 100644
--- a/sys/x86/x86/mptable.c
+++ b/sys/x86/x86/mptable.c
@@ -30,6 +30,7 @@
 __FBSDID("$FreeBSD$");
 
 #include "opt_mptable_force_htt.h"
+#include "opt_mptable_linux_bug_compat.h"
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
@@ -245,6 +246,34 @@ lookup_bus_type(char *name)
 	return (UNKNOWN_BUSTYPE);
 }
 
+#ifdef MPTABLE_LINUX_BUG_COMPAT
+/* Compute the correct entry_count value. */
+static void
+compute_entry_count(void)
+{
+	u_char *end = (u_char *)(mpct) + mpct->base_table_length;
+	u_char *entry = (u_char *)(mpct + 1);
+	size_t nentries = 0;
+
+	while (entry < end) {
+		switch (*entry) {
+		case MPCT_ENTRY_PROCESSOR:
+		case MPCT_ENTRY_IOAPIC:
+		case MPCT_ENTRY_BUS:
+		case MPCT_ENTRY_INT:
+		case MPCT_ENTRY_LOCAL_INT:
+			break;
+		default:
+			panic("%s: Unknown MP Config Entry %d\n", __func__,
+			    (int)*entry);
+		}
+		entry += basetable_entry_types[*entry].length;
+		nentries++;
+	}
+	mpct->entry_count = (uint16_t)(nentries);
+}
+#endif
+
 /*
  * Look for an Intel MP spec table (ie, SMP capable hardware).
  */
@@ -273,6 +302,17 @@ mptable_probe(void)
 	if ((x = search_for_sig(target, BIOS_COUNT)) >= 0)
 		goto found;
 
+#ifdef MPTABLE_LINUX_BUG_COMPAT
+	/*
+	 * Linux assumes that it always has 640 kB of base memory and
+	 * searches for the MP table at 639k regardless of whether that
+	 * address is present in the system memory map.  Some VM systems
+	 * rely on this buggy behaviour.
+	 */
+	if ((x = search_for_sig(639 * 1024, 1024 / 4)) >= 0)
+		goto found;
+#endif
+
 	/* nothing found */
 	return (ENXIO);
 
@@ -321,6 +361,16 @@ found:
 			printf(
 			"MP Configuration Table version 1.%d found at %p\n",
 			    mpct->spec_rev, mpct);
+#ifdef MPTABLE_LINUX_BUG_COMPAT
+		/*
+		 * Linux ignores entry_count and instead scans the MP table
+		 * until it runs out of bytes of table (as specified by the
+		 * base_table_length field).  Some VM systems rely on this
+		 * buggy behaviour and record an entry_count of zero.
+		 */
+		if (mpct->entry_count == 0)
+			compute_entry_count();
+#endif
 	}
 
 	return (-100);