git: 0be13a45f7cb - main - xregs_sig: Add a ability to handle multiple register banks

From: Dmitry Chagin <dchagin_at_FreeBSD.org>
Date: Sun, 04 Jun 2023 09:08:36 UTC
The branch main has been updated by dchagin:

URL: https://cgit.FreeBSD.org/src/commit/?id=0be13a45f7cb18f8b86c800a28457bf918e130f2

commit 0be13a45f7cb18f8b86c800a28457bf918e130f2
Author:     Dmitry Chagin <dchagin@FreeBSD.org>
AuthorDate: 2023-06-04 09:06:25 +0000
Commit:     Dmitry Chagin <dchagin@FreeBSD.org>
CommitDate: 2023-06-04 09:06:25 +0000

    xregs_sig: Add a ability to handle multiple register banks
    
    While here replace the .global directive by traditionally used .globl.
    
    Reviewed by:            kib
    Differential Revision:  https://reviews.freebsd.org/D40410
---
 tools/test/xregs_sig/c2x2c_aarch64.S |  18 ++---
 tools/test/xregs_sig/c2x2c_amd64.S   |  18 ++---
 tools/test/xregs_sig/xregs_sig.c     | 128 ++++++++++++++++++++++-------------
 3 files changed, 100 insertions(+), 64 deletions(-)

diff --git a/tools/test/xregs_sig/c2x2c_aarch64.S b/tools/test/xregs_sig/c2x2c_aarch64.S
index 6f1637f5bcdf..f53550b867fd 100644
--- a/tools/test/xregs_sig/c2x2c_aarch64.S
+++ b/tools/test/xregs_sig/c2x2c_aarch64.S
@@ -5,9 +5,11 @@
  * $FreeBSD$
  */
 
-	.global cpu_to_xregs
-	.type cpu_to_xregs, @function
-cpu_to_xregs:
+	.text
+
+	.globl cpu_to_vfp
+	.type cpu_to_vfp, @function
+cpu_to_vfp:
 	stp  q0,  q1, [x0, #( 0 * 16)]
 	stp  q2,  q3, [x0, #( 2 * 16)]
 	stp  q4,  q5, [x0, #( 4 * 16)]
@@ -26,12 +28,12 @@ cpu_to_xregs:
 	stp q30, q31, [x0, #(30 * 16)]
 	ret
 
-	.size cpu_to_xregs, . - cpu_to_xregs
+	.size cpu_to_vfp, . - cpu_to_vfp
 
 
-	.global xregs_to_cpu
-	.type xregs_to_cpu, @function
-xregs_to_cpu:
+	.globl vfp_to_cpu
+	.type vfp_to_cpu, @function
+vfp_to_cpu:
 	ldp  q0,  q1, [x0, #( 0 * 16)]
 	ldp  q2,  q3, [x0, #( 2 * 16)]
 	ldp  q4,  q5, [x0, #( 4 * 16)]
@@ -50,6 +52,6 @@ xregs_to_cpu:
 	ldp q30, q31, [x0, #(30 * 16)]
 	ret
 
-	.size xregs_to_cpu, . - xregs_to_cpu
+	.size vfp_to_cpu, . - vfp_to_cpu
 
 	.section        .note.GNU-stack,"",@progbits
diff --git a/tools/test/xregs_sig/c2x2c_amd64.S b/tools/test/xregs_sig/c2x2c_amd64.S
index e5ae5af99e97..7348e4871d53 100644
--- a/tools/test/xregs_sig/c2x2c_amd64.S
+++ b/tools/test/xregs_sig/c2x2c_amd64.S
@@ -5,9 +5,11 @@
  * $FreeBSD$
  */
 
-	.global cpu_to_xregs
-	.type cpu_to_xregs, @function
-cpu_to_xregs:
+	.text
+
+	.globl cpu_to_xmm
+	.type cpu_to_xmm, @function
+cpu_to_xmm:
 	movdqu	%xmm0, (%rdi)
 	movdqu	%xmm1, 1 * 16(%rdi)
 	movdqu	%xmm2, 2 * 16(%rdi)
@@ -26,12 +28,12 @@ cpu_to_xregs:
 	movdqu	%xmm15, 15 * 16(%rdi)
 	retq
 
-	.size cpu_to_xregs, . - cpu_to_xregs
+	.size cpu_to_xmm, . - cpu_to_xmm
 
 
-	.global xregs_to_cpu
-	.type xregs_to_cpu, @function
-xregs_to_cpu:
+	.globl xmm_to_cpu
+	.type xmm_to_cpu, @function
+xmm_to_cpu:
 	movdqu	(%rdi), %xmm0
 	movdqu	1 * 16(%rdi), %xmm1
 	movdqu	2 * 16(%rdi), %xmm2
@@ -50,6 +52,6 @@ xregs_to_cpu:
 	movdqu	15 * 16(%rdi), %xmm15
 	retq
 
-	.size xregs_to_cpu, . - xregs_to_cpu
+	.size xmm_to_cpu, . - xmm_to_cpu
 
 	.section        .note.GNU-stack,"",@progbits
diff --git a/tools/test/xregs_sig/xregs_sig.c b/tools/test/xregs_sig/xregs_sig.c
index a67379a0080e..4e29165d39e2 100644
--- a/tools/test/xregs_sig/xregs_sig.c
+++ b/tools/test/xregs_sig/xregs_sig.c
@@ -44,30 +44,49 @@
 #ifndef __unused
 #define	__unused	__attribute__((__unused__))
 #endif
-#ifndef nitems
-#define	nitems(x)	(sizeof((x)) / sizeof((x)[0]))
-#endif
 
-#if defined(__amd64__)
-#define	XREGSRNAM	"xmm"
-#define	NREGS		16
-#elif defined(__aarch64__)
-#define	XREGSRNAM	"q"
-#define	NREGS		32
-#endif
-
-struct xregsreg {
-	uint8_t xregs_bytes[16];
+struct xregs_bank {
+	const char	*b_name;
+	const char	*r_name;
+	uint32_t	regs;
+	uint32_t	bytes;
+	void		(*x2c)(uint8_t *);
+	void		(*c2x)(uint8_t *);
 };
 
-struct xregs {
-	struct xregsreg xregsreg[NREGS];
+#if defined(__amd64__)
+void cpu_to_xmm(uint8_t *);
+void xmm_to_cpu(uint8_t *);
+
+static const struct xregs_bank xregs_banks[] = {
+	{
+		.b_name	= "SSE",
+		.r_name	= "xmm",
+		.regs	= 16,
+		.bytes	= 16,
+		.x2c	= xmm_to_cpu,
+		.c2x	= cpu_to_xmm,
+	},
 };
-
-void cpu_to_xregs(struct xregs *xregs);
-void xregs_to_cpu(struct xregs *xregs);
+#elif defined(__aarch64__)
+void cpu_to_vfp(uint8_t *);
+void vfp_to_cpu(uint8_t *);
+
+static const struct xregs_bank xregs_banks[] = {
+	{
+		.b_name	= "VFP",
+		.r_name	= "q",
+		.regs	= 32,
+		.bytes	= 16,
+		.x2c	= vfp_to_cpu,
+		.c2x	= cpu_to_vfp,
+	},
+};
+#endif
 
 static atomic_uint sigs;
+static int max_bank_idx;
+
 
 static void
 sigusr1_handler(int sig __unused, siginfo_t *si __unused, void *m __unused)
@@ -87,23 +106,22 @@ sigalrm_handler(int sig __unused)
 	alarm(TIMO);
 }
 
-static struct xregs zero_xregs = {};
 
 static void
-fill_xregs(struct xregs *xregs)
+fill_xregs(uint8_t *xregs, int bank)
 {
-	arc4random_buf(xregs, sizeof(*xregs));
+	arc4random_buf(xregs, xregs_banks[bank].regs * xregs_banks[bank].bytes);
 }
 
 static void
-dump_xregs(const struct xregsreg *r)
+dump_xregs(const uint8_t *r, int bank)
 {
 	unsigned k;
 
-	for (k = 0; k < nitems(r->xregs_bytes); k++) {
+	for (k = 0; k < xregs_banks[bank].bytes; k++) {
 		if (k != 0)
 			printf(" ");
-		printf("%02x", r->xregs_bytes[k]);
+		printf("%02x", r[k]);
 	}
 	printf("\n");
 }
@@ -111,9 +129,9 @@ dump_xregs(const struct xregsreg *r)
 static pthread_mutex_t show_lock;
 
 static void
-show_diff(const struct xregs *xregs1, const struct xregs *xregs2)
+show_diff(const uint8_t *xregs1, const uint8_t *xregs2, int bank)
 {
-	const struct xregsreg *r1, *r2;
+	const uint8_t *r1, *r2;
 	unsigned i, j;
 
 #if defined(__FreeBSD__)
@@ -121,14 +139,14 @@ show_diff(const struct xregs *xregs1, const struct xregs *xregs2)
 #elif defined(__linux__)
 	printf("thr %ld\n", syscall(SYS_gettid));
 #endif
-	for (i = 0; i < nitems(xregs1->xregsreg); i++) {
-		r1 = &xregs1->xregsreg[i];
-		r2 = &xregs2->xregsreg[i];
-		for (j = 0; j < nitems(r1->xregs_bytes); j++) {
-			if (r1->xregs_bytes[j] != r2->xregs_bytes[j]) {
-				printf("%%%s%u\n", XREGSRNAM, i);
-				dump_xregs(r1);
-				dump_xregs(r2);
+	for (i = 0; i < xregs_banks[bank].regs; i++) {
+		r1 = xregs1 + i * xregs_banks[bank].bytes;
+		r2 = xregs2 + i * xregs_banks[bank].bytes;
+		for (j = 0; j < xregs_banks[bank].bytes; j++) {
+			if (r1[j] != r2[j]) {
+				printf("%%%s%u\n", xregs_banks[bank].r_name, i);
+				dump_xregs(r1, bank);
+				dump_xregs(r2, bank);
 				break;
 			}
 		}
@@ -142,28 +160,32 @@ my_pause(void)
 }
 
 static void *
-worker_thread(void *arg __unused)
+worker_thread(void *arg)
 {
-	struct xregs xregs, xregs_cpu;
+	int bank = (uintptr_t)arg;
+	int sz = xregs_banks[bank].regs * xregs_banks[bank].bytes;
+	uint8_t xregs[sz], xregs_cpu[sz], zero_xregs[sz];
+
+	memset(zero_xregs, 0, sz);
 
-	fill_xregs(&xregs);
+	fill_xregs(xregs, bank);
 	for (;;) {
-		xregs_to_cpu(&xregs);
+		xregs_banks[bank].x2c(xregs);
 		my_pause();
-		cpu_to_xregs(&xregs_cpu);
-		if (memcmp(&xregs, &xregs_cpu, sizeof(struct xregs)) != 0) {
+		xregs_banks[bank].c2x(xregs_cpu);
+		if (memcmp(xregs, xregs_cpu, sz) != 0) {
 			pthread_mutex_lock(&show_lock);
-			show_diff(&xregs, &xregs_cpu);
+			show_diff(xregs, xregs_cpu, bank);
 			abort();
 			pthread_mutex_unlock(&show_lock);
 		}
 
-		xregs_to_cpu(&zero_xregs);
+		xregs_banks[bank].x2c(zero_xregs);
 		my_pause();
-		cpu_to_xregs(&xregs_cpu);
-		if (memcmp(&zero_xregs, &xregs_cpu, sizeof(struct xregs)) != 0) {
+		xregs_banks[bank].c2x(xregs_cpu);
+		if (memcmp(zero_xregs, xregs_cpu, sz) != 0) {
 			pthread_mutex_lock(&show_lock);
-			show_diff(&zero_xregs, &xregs_cpu);
+			show_diff(zero_xregs, xregs_cpu, bank);
 			abort();
 			pthread_mutex_unlock(&show_lock);
 		}
@@ -175,7 +197,9 @@ int
 main(void)
 {
 	struct sigaction sa;
-	int error, i, ncpu;
+	int error, i, ncpu, bank;
+
+	max_bank_idx = 0;
 
 	bzero(&sa, sizeof(sa));
 	sa.sa_handler = sigalrm_handler;
@@ -199,14 +223,22 @@ main(void)
 	}
 
 	ncpu = sysconf(_SC_NPROCESSORS_ONLN);
-	ncpu *= 2;
+	if (max_bank_idx == 0)
+		ncpu *= 2;
+	bank = 0;
 	pthread_t wt[ncpu];
+nextbank:
+	printf("Starting %d threads for registers bank %s sized [%d][%d]\n", ncpu,
+	    xregs_banks[bank].b_name, xregs_banks[bank].regs, xregs_banks[bank].bytes);
 	for (i = 0; i < ncpu; i++) {
-		error = pthread_create(&wt[i], NULL, worker_thread, NULL);
+		error = pthread_create(&wt[i], NULL, worker_thread,
+		    (void *)(uintptr_t)bank);
 		if (error != 0) {
 			fprintf(stderr, "pthread_create %s\n", strerror(error));
 		}
 	}
+	if (++bank <= max_bank_idx)
+		goto nextbank;
 
 	alarm(TIMO);
 	for (;;) {