git: 2ccbf06c0285 - main - arm64: Add MOPS implementations of memset(), memcpy() and memmove()

From: Andrew Turner <andrew_at_FreeBSD.org>
Date: Tue, 10 Feb 2026 15:43:32 UTC
The branch main has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=2ccbf06c0285ca1c06681e7212da8e7d1e87fe19

commit 2ccbf06c0285ca1c06681e7212da8e7d1e87fe19
Author:     Sarah Walker <sarah.walker2@arm.com>
AuthorDate: 2026-01-28 16:22:50 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2026-02-10 15:39:56 +0000

    arm64: Add MOPS implementations of memset(), memcpy() and memmove()
    
    Enable the use of MOPS implementations of memset, memcpy and memmove within
    the kernel. Fix pre-ifunc resolution uses of these functions.
    
    Reported by:    andrew
    Sponsored by:   Arm Ltd
    Differential Revision:  https://reviews.freebsd.org/D55051
---
 sys/arm64/arm64/identcpu.c     |  5 +++--
 sys/arm64/arm64/machdep.c      | 38 ++++++++++++++++++++++++++++++++++++--
 sys/arm64/arm64/machdep_boot.c |  2 +-
 sys/arm64/arm64/memcpy.S       | 24 ++++++++++++++++++++----
 sys/arm64/arm64/memset.S       | 12 ++++++++++--
 sys/arm64/arm64/pmap.c         | 10 +++++-----
 sys/arm64/include/cpu.h        |  7 ++++++-
 7 files changed, 81 insertions(+), 17 deletions(-)

diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index 91078a411b88..e2f09fcb7f52 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -2675,14 +2675,15 @@ update_special_regs(u_int cpu)
 
 	if (cpu == 0) {
 		/* Create a user visible cpu description with safe values */
-		memset(&user_cpu_desc, 0, sizeof(user_cpu_desc));
+		memset_early(&user_cpu_desc, 0, sizeof(user_cpu_desc));
 		/* Safe values for these registers */
 		user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_AdvSIMD_NONE |
 		    ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 |
 		    ID_AA64PFR0_EL0_64;
 		user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DebugVer_8;
 		/* Create the Linux user visible cpu description */
-		memcpy(&l_user_cpu_desc, &user_cpu_desc, sizeof(user_cpu_desc));
+		memcpy_early(&l_user_cpu_desc, &user_cpu_desc,
+		    sizeof(user_cpu_desc));
 	}
 
 	desc = get_cpu_desc(cpu);
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 5e6a39381e84..ffe9acb0cfa4 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -80,6 +80,7 @@
 #include <machine/cpu_feat.h>
 #include <machine/debug_monitor.h>
 #include <machine/hypervisor.h>
+#include <machine/ifunc.h>
 #include <machine/kdb.h>
 #include <machine/machdep.h>
 #include <machine/metadata.h>
@@ -807,6 +808,9 @@ initarm(struct arm64_bootparams *abp)
 
 	update_special_regs(0);
 
+	sched_instance_select();
+	link_elf_ireloc();
+
 	/* Set the pcpu data, this is needed by pmap_bootstrap */
 	pcpup = &pcpu0;
 	pcpu_init(pcpup, 0, sizeof(struct pcpu));
@@ -823,8 +827,6 @@ initarm(struct arm64_bootparams *abp)
 	PCPU_SET(curthread, &thread0);
 	PCPU_SET(midr, get_midr());
 
-	sched_instance_select();
-	link_elf_ireloc();
 #ifdef FDT
 	try_load_dtb();
 #endif
@@ -1076,3 +1078,35 @@ DB_SHOW_COMMAND(vtop, db_show_vtop)
 		db_printf("show vtop <virt_addr>\n");
 }
 #endif
+
+#undef memset
+#undef memmove
+#undef memcpy
+
+void	*memset_std(void *buf, int c, size_t len);
+void	*memset_mops(void *buf, int c, size_t len);
+void    *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
+	    size_t len);
+void    *memmove_mops(void * _Nonnull dst, const void * _Nonnull src,
+	    size_t len);
+void    *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
+	    size_t len);
+void    *memcpy_mops(void * _Nonnull dst, const void * _Nonnull src,
+	    size_t len);
+
+DEFINE_IFUNC(, void *, memset, (void *, int, size_t))
+{
+	return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memset_mops : memset_std);
+}
+
+DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
+    size_t))
+{
+	return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memmove_mops : memmove_std);
+}
+
+DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,
+    size_t))
+{
+	return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memcpy_mops : memcpy_std);
+}
diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c
index 1c5e8189e436..0ccfd1b67a39 100644
--- a/sys/arm64/arm64/machdep_boot.c
+++ b/sys/arm64/arm64/machdep_boot.c
@@ -115,7 +115,7 @@ fake_preload_metadata(void *dtb_ptr, size_t dtb_size)
 		PRELOAD_PUSH_VALUE(uint32_t, MODINFO_METADATA | MODINFOMD_DTBP);
 		PRELOAD_PUSH_VALUE(uint32_t, sizeof(uint64_t));
 		PRELOAD_PUSH_VALUE(uint64_t, (uint64_t)lastaddr);
-		memmove((void *)lastaddr, dtb_ptr, dtb_size);
+		memmove_early((void *)lastaddr, dtb_ptr, dtb_size);
 		lastaddr += dtb_size;
 		lastaddr = roundup(lastaddr, sizeof(int));
 	}
diff --git a/sys/arm64/arm64/memcpy.S b/sys/arm64/arm64/memcpy.S
index 01daa8e1c228..3c408d2836aa 100644
--- a/sys/arm64/arm64/memcpy.S
+++ b/sys/arm64/arm64/memcpy.S
@@ -57,8 +57,8 @@
    The loop tail is handled by always copying 64 bytes from the end.
 */
 
-EENTRY(memmove)
-ENTRY(memcpy)
+EENTRY(memmove_std)
+ENTRY(memcpy_std)
 	add	srcend, src, count
 	add	dstend, dstin, count
 	cmp	count, 128
@@ -239,7 +239,23 @@ L(copy64_from_start):
 	stp	B_l, B_h, [dstin, 16]
 	stp	C_l, C_h, [dstin]
 	ret
-END(memcpy)
-EEND(memmove)
+END(memcpy_std)
+EEND(memmove_std)
+
+ENTRY(memcpy_mops)
+	mov	x3, x0
+	.inst	0x19010443	/* cpyfp   [x3]!, [x1]!, x2!  */
+	.inst	0x19410443	/* cpyfm   [x3]!, [x1]!, x2!  */
+	.inst	0x19810443	/* cpyfe   [x3]!, [x1]!, x2!  */
+	ret
+END(memcpy_mops)
+
+ENTRY(memmove_mops)
+	mov	x3, x0
+	.inst	0x1d010443	/* cpyp    [x3]!, [x1]!, x2!  */
+	.inst	0x1d410443	/* cpym    [x3]!, [x1]!, x2!  */
+	.inst	0x1d810443	/* cpye    [x3]!, [x1]!, x2!  */
+	ret
+END(memmove_mops)
 
 GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
diff --git a/sys/arm64/arm64/memset.S b/sys/arm64/arm64/memset.S
index f52bfd62cc54..f226e8de1e95 100644
--- a/sys/arm64/arm64/memset.S
+++ b/sys/arm64/arm64/memset.S
@@ -51,7 +51,7 @@
 #define dst		x8
 #define tmp3w		w9
 
-ENTRY(memset)
+ENTRY(memset_std)
 
 	mov	dst, dstin		/* Preserve return value.  */
 	ands	A_lw, val, #255
@@ -196,6 +196,14 @@ ENTRY(memset)
 	ands	count, count, zva_bits_x
 	b.ne	.Ltail_maybe_long
 	ret
-END(memset)
+END(memset_std)
+
+ENTRY(memset_mops)
+	mov     x3, x0
+	.inst   0x19c10443	/* setp    [x3]!, x2!, x1  */
+	.inst   0x19c14443	/* setm    [x3]!, x2!, x1  */
+	.inst   0x19c18443	/* sete    [x3]!, x2!, x1  */
+	ret
+END(memset_mops)
 
 GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 680209efd881..e865569ac377 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1015,7 +1015,7 @@ pmap_bootstrap_l0_table(struct pmap_bootstrap_state *state)
 
 		/* Create a new L0 table entry */
 		state->l1 = (pt_entry_t *)state->freemempos;
-		memset(state->l1, 0, PAGE_SIZE);
+		memset_early(state->l1, 0, PAGE_SIZE);
 		state->freemempos += PAGE_SIZE;
 
 		l1_pa = pmap_early_vtophys((vm_offset_t)state->l1);
@@ -1063,7 +1063,7 @@ pmap_bootstrap_l1_table(struct pmap_bootstrap_state *state)
 
 		/* Create a new L1 table entry */
 		state->l2 = (pt_entry_t *)state->freemempos;
-		memset(state->l2, 0, PAGE_SIZE);
+		memset_early(state->l2, 0, PAGE_SIZE);
 		state->freemempos += PAGE_SIZE;
 
 		l2_pa = pmap_early_vtophys((vm_offset_t)state->l2);
@@ -1107,7 +1107,7 @@ pmap_bootstrap_l2_table(struct pmap_bootstrap_state *state)
 
 		/* Create a new L2 table entry */
 		state->l3 = (pt_entry_t *)state->freemempos;
-		memset(state->l3, 0, PAGE_SIZE);
+		memset_early(state->l3, 0, PAGE_SIZE);
 		state->freemempos += PAGE_SIZE;
 
 		l3_pa = pmap_early_vtophys((vm_offset_t)state->l3);
@@ -1406,7 +1406,7 @@ pmap_bootstrap(void)
 #define alloc_pages(var, np)						\
 	(var) = bs_state.freemempos;					\
 	bs_state.freemempos += (np * PAGE_SIZE);			\
-	memset((char *)(var), 0, ((np) * PAGE_SIZE));
+	memset_early((char *)(var), 0, ((np) * PAGE_SIZE));
 
 	/* Allocate dynamic per-cpu area. */
 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
@@ -1444,7 +1444,7 @@ pmap_bootstrap_allocate_san_l2(vm_paddr_t start_pa, vm_paddr_t end_pa,
 			continue;
 		}
 
-		bzero((void *)PHYS_TO_DMAP(pa), L2_SIZE);
+		bzero_early((void *)PHYS_TO_DMAP(pa), L2_SIZE);
 		physmem_exclude_region(pa, L2_SIZE, EXFLAG_NOALLOC);
 		pmap_store(l2, PHYS_TO_PTE(pa) | PMAP_SAN_PTE_BITS | L2_BLOCK);
 	}
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
index 9f1db23744d4..05844ad63036 100644
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -328,7 +328,12 @@ ADDRESS_TRANSLATE_FUNC(s1e1r)
 ADDRESS_TRANSLATE_FUNC(s1e1w)
 
 #endif /* !__ASSEMBLER__ */
-#endif
+
+#define MEMSET_EARLY_FUNC	memset_std
+#define MEMCPY_EARLY_FUNC	memcpy_std
+#define MEMMOVE_EARLY_FUNC	memmove_std
+
+#endif /* _KERNEL */
 
 #endif /* !_MACHINE_CPU_H_ */