git: 701923e2a410 - main - riscv: improve parsing of riscv,isa property strings

From: Mitchell Horne <mhorne_at_FreeBSD.org>
Date: Fri, 28 Oct 2022 16:37:10 UTC
The branch main has been updated by mhorne:

URL: https://cgit.FreeBSD.org/src/commit/?id=701923e2a4105be606c5263181b6eb6f546f1a84

commit 701923e2a4105be606c5263181b6eb6f546f1a84
Author:     Mitchell Horne <mhorne@FreeBSD.org>
AuthorDate: 2022-10-28 16:28:08 +0000
Commit:     Mitchell Horne <mhorne@FreeBSD.org>
CommitDate: 2022-10-28 16:28:08 +0000

    riscv: improve parsing of riscv,isa property strings
    
    This code was originally written under the assumption that the ISA
    string would only contain single-letter extensions. The RISC-V
    specification has extended its description of the format quite a bit,
    allowing for much longer ISA strings containing multi-letter extension
    names.
    
    Newer versions of QEMU (7.1.0) will append to the riscv,isa property
    indicating the presence of multi-letter standard extensions such as
    Zfencei. This triggers a KASSERT about the expected length of the
    string, preventing boot.
    
    Increase the size of the isa array significantly, and teach the code
    to parse (skip over) multi-letter extensions, and optional extension
    version numbers. We currently ignore them completely, but this will
    change in the future as we start supporting supervisor-level extensions.
    
    MFC after:      2 weeks
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D36601
---
 sys/riscv/include/elf.h    |  14 ++--
 sys/riscv/riscv/identcpu.c | 182 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 169 insertions(+), 27 deletions(-)

diff --git a/sys/riscv/include/elf.h b/sys/riscv/include/elf.h
index 671e2d2617c0..c0817d8fb47b 100644
--- a/sys/riscv/include/elf.h
+++ b/sys/riscv/include/elf.h
@@ -75,13 +75,13 @@ __ElfType(Auxinfo);
 #define	ET_DYN_LOAD_ADDR 0x100000
 
 /* Flags passed in AT_HWCAP */
-#define	HWCAP_ISA_BIT(c)	(1 << ((c) - 'A'))
-#define	HWCAP_ISA_I		HWCAP_ISA_BIT('I')
-#define	HWCAP_ISA_M		HWCAP_ISA_BIT('M')
-#define	HWCAP_ISA_A		HWCAP_ISA_BIT('A')
-#define	HWCAP_ISA_F		HWCAP_ISA_BIT('F')
-#define	HWCAP_ISA_D		HWCAP_ISA_BIT('D')
-#define	HWCAP_ISA_C		HWCAP_ISA_BIT('C')
+#define	HWCAP_ISA_BIT(c)	(1 << ((c) - 'a'))
+#define	HWCAP_ISA_I		HWCAP_ISA_BIT('i')
+#define	HWCAP_ISA_M		HWCAP_ISA_BIT('m')
+#define	HWCAP_ISA_A		HWCAP_ISA_BIT('a')
+#define	HWCAP_ISA_F		HWCAP_ISA_BIT('f')
+#define	HWCAP_ISA_D		HWCAP_ISA_BIT('d')
+#define	HWCAP_ISA_C		HWCAP_ISA_BIT('c')
 #define	HWCAP_ISA_G		\
     (HWCAP_ISA_I | HWCAP_ISA_M | HWCAP_ISA_A | HWCAP_ISA_F | HWCAP_ISA_D)
 
diff --git a/sys/riscv/riscv/identcpu.c b/sys/riscv/riscv/identcpu.c
index f3afa9b8c7ea..4c151eb47939 100644
--- a/sys/riscv/riscv/identcpu.c
+++ b/sys/riscv/riscv/identcpu.c
@@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/ctype.h>
 #include <sys/kernel.h>
 #include <sys/pcpu.h>
 #include <sys/sysctl.h>
@@ -104,34 +105,171 @@ const struct cpu_implementers cpu_implementers[] = {
 	CPU_IMPLEMENTER_NONE,
 };
 
-#ifdef FDT
 /*
- * The ISA string is made up of a small prefix (e.g. rv64) and up to 26 letters
- * indicating the presence of the 26 possible standard extensions. Therefore 32
- * characters will be sufficient.
+ * The ISA string describes the complete set of instructions supported by a
+ * RISC-V CPU. The string begins with a small prefix (e.g. rv64) indicating the
+ * base ISA. It is followed first by single-letter ISA extensions, and then
+ * multi-letter ISA extensions.
+ *
+ * Underscores are used mainly to separate consecutive multi-letter extensions,
+ * but may optionally appear between any two extensions. An extension may be
+ * followed by a version number, in the form of 'Mpm', where M is the
+ * extension's major version number, and 'm' is the minor version number.
+ *
+ * The format is described in detail by the "ISA Extension Naming Conventions"
+ * chapter of the unprivileged spec.
  */
-#define	ISA_NAME_MAXLEN		32
 #define	ISA_PREFIX		("rv" __XSTRING(__riscv_xlen))
 #define	ISA_PREFIX_LEN		(sizeof(ISA_PREFIX) - 1)
 
+static __inline int
+parse_ext_s(char *isa, int idx, int len)
+{
+	/*
+	 * Proceed to the next multi-letter extension or the end of the
+	 * string.
+	 *
+	 * TODO: parse these once we gain support
+	 */
+	while (isa[idx] != '_' && idx < len) {
+		idx++;
+	}
+
+	return (idx);
+}
+
+static __inline int
+parse_ext_x(char *isa, int idx, int len)
+{
+	/*
+	 * Proceed to the next multi-letter extension or the end of the
+	 * string.
+	 */
+	while (isa[idx] != '_' && idx < len) {
+		idx++;
+	}
+
+	return (idx);
+}
+
+static __inline int
+parse_ext_z(char *isa, int idx, int len)
+{
+	/*
+	 * Proceed to the next multi-letter extension or the end of the
+	 * string.
+	 *
+	 * TODO: parse some of these.
+	 */
+	while (isa[idx] != '_' && idx < len) {
+		idx++;
+	}
+
+	return (idx);
+}
+
+static __inline int
+parse_ext_version(char *isa, int idx, u_int *majorp __unused,
+    u_int *minorp __unused)
+{
+	/* Major version. */
+	while (isdigit(isa[idx]))
+		idx++;
+
+	if (isa[idx] != 'p')
+		return (idx);
+	else
+		idx++;
+
+	/* Minor version. */
+	while (isdigit(isa[idx]))
+		idx++;
+
+	return (idx);
+}
+
+/*
+ * Parse the ISA string, building up the set of HWCAP bits as they are found.
+ */
 static void
-fill_elf_hwcap(void *dummy __unused)
+parse_riscv_isa(char *isa, int len, u_long *hwcapp)
 {
-	u_long caps[256] = {0};
-	char isa[ISA_NAME_MAXLEN];
 	u_long hwcap;
-	phandle_t node;
-	ssize_t len;
 	int i;
 
-	caps['i'] = caps['I'] = HWCAP_ISA_I;
-	caps['m'] = caps['M'] = HWCAP_ISA_M;
-	caps['a'] = caps['A'] = HWCAP_ISA_A;
+	hwcap = 0;
+	i = ISA_PREFIX_LEN;
+	while (i < len) {
+		switch(isa[i]) {
+		case 'a':
+		case 'c':
 #ifdef FPE
-	caps['f'] = caps['F'] = HWCAP_ISA_F;
-	caps['d'] = caps['D'] = HWCAP_ISA_D;
+		case 'd':
+		case 'f':
 #endif
-	caps['c'] = caps['C'] = HWCAP_ISA_C;
+		case 'i':
+		case 'm':
+			hwcap |= HWCAP_ISA_BIT(isa[i]);
+			i++;
+			break;
+		case 'g':
+			hwcap |= HWCAP_ISA_G;
+			i++;
+			break;
+		case 's':
+			/*
+			 * XXX: older versions of this string erroneously
+			 * indicated supervisor and user mode support as
+			 * single-letter extensions. Detect and skip both 's'
+			 * and 'u'.
+			 */
+			if (isa[i - 1] != '_' && isa[i + 1] == 'u') {
+				i += 2;
+				continue;
+			}
+
+			/*
+			 * Supervisor-level extension namespace.
+			 */
+			i = parse_ext_s(isa, i, len);
+			break;
+		case 'x':
+			/*
+			 * Custom extension namespace. For now, we ignore
+			 * these.
+			 */
+			i = parse_ext_x(isa, i, len);
+			break;
+		case 'z':
+			/*
+			 * Multi-letter standard extension namespace.
+			 */
+			i = parse_ext_z(isa, i, len);
+			break;
+		case '_':
+			i++;
+			continue;
+		default:
+			/* Unrecognized/unsupported. */
+			i++;
+			break;
+		}
+
+		i = parse_ext_version(isa, i, NULL, NULL);
+	}
+
+	if (hwcapp != NULL)
+		*hwcapp = hwcap;
+}
+
+#ifdef FDT
+static void
+fill_elf_hwcap(void *dummy __unused)
+{
+	char isa[1024];
+	u_long hwcap;
+	phandle_t node;
+	ssize_t len;
 
 	node = OF_finddevice("/cpus");
 	if (node == -1) {
@@ -152,7 +290,7 @@ fill_elf_hwcap(void *dummy __unused)
 			continue;
 
 		len = OF_getprop(node, "riscv,isa", isa, sizeof(isa));
-		KASSERT(len <= ISA_NAME_MAXLEN, ("ISA string truncated"));
+		KASSERT(len <= sizeof(isa), ("ISA string truncated"));
 		if (len == -1) {
 			if (bootverbose)
 				printf("fill_elf_hwcap: "
@@ -165,9 +303,13 @@ fill_elf_hwcap(void *dummy __unused)
 			return;
 		}
 
-		hwcap = 0;
-		for (i = ISA_PREFIX_LEN; i < len; i++)
-			hwcap |= caps[(unsigned char)isa[i]];
+		/*
+		 * The string is specified to be lowercase, but let's be
+		 * certain.
+		 */
+		for (int i = 0; i < len; i++)
+			isa[i] = tolower(isa[i]);
+		parse_riscv_isa(isa, len, &hwcap);
 
 		if (elf_hwcap != 0)
 			elf_hwcap &= hwcap;