git: 2ccbf06c0285 - main - arm64: Add MOPS implementations of memset(), memcpy() and memmove()
Date: Tue, 10 Feb 2026 15:43:32 UTC
The branch main has been updated by andrew:
URL: https://cgit.FreeBSD.org/src/commit/?id=2ccbf06c0285ca1c06681e7212da8e7d1e87fe19
commit 2ccbf06c0285ca1c06681e7212da8e7d1e87fe19
Author: Sarah Walker <sarah.walker2@arm.com>
AuthorDate: 2026-01-28 16:22:50 +0000
Commit: Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2026-02-10 15:39:56 +0000
arm64: Add MOPS implementations of memset(), memcpy() and memmove()
Enable the use of MOPS implementations of memset, memcpy and memmove within
the kernel. Fix pre-ifunc resolution uses of these functions.
Reported by: andrew
Sponsored by: Arm Ltd
Differential Revision: https://reviews.freebsd.org/D55051
---
sys/arm64/arm64/identcpu.c | 5 +++--
sys/arm64/arm64/machdep.c | 38 ++++++++++++++++++++++++++++++++++++--
sys/arm64/arm64/machdep_boot.c | 2 +-
sys/arm64/arm64/memcpy.S | 24 ++++++++++++++++++++----
sys/arm64/arm64/memset.S | 12 ++++++++++--
sys/arm64/arm64/pmap.c | 10 +++++-----
sys/arm64/include/cpu.h | 7 ++++++-
7 files changed, 81 insertions(+), 17 deletions(-)
diff --git a/sys/arm64/arm64/identcpu.c b/sys/arm64/arm64/identcpu.c
index 91078a411b88..e2f09fcb7f52 100644
--- a/sys/arm64/arm64/identcpu.c
+++ b/sys/arm64/arm64/identcpu.c
@@ -2675,14 +2675,15 @@ update_special_regs(u_int cpu)
if (cpu == 0) {
/* Create a user visible cpu description with safe values */
- memset(&user_cpu_desc, 0, sizeof(user_cpu_desc));
+ memset_early(&user_cpu_desc, 0, sizeof(user_cpu_desc));
/* Safe values for these registers */
user_cpu_desc.id_aa64pfr0 = ID_AA64PFR0_AdvSIMD_NONE |
ID_AA64PFR0_FP_NONE | ID_AA64PFR0_EL1_64 |
ID_AA64PFR0_EL0_64;
user_cpu_desc.id_aa64dfr0 = ID_AA64DFR0_DebugVer_8;
/* Create the Linux user visible cpu description */
- memcpy(&l_user_cpu_desc, &user_cpu_desc, sizeof(user_cpu_desc));
+ memcpy_early(&l_user_cpu_desc, &user_cpu_desc,
+ sizeof(user_cpu_desc));
}
desc = get_cpu_desc(cpu);
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index 5e6a39381e84..ffe9acb0cfa4 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -80,6 +80,7 @@
#include <machine/cpu_feat.h>
#include <machine/debug_monitor.h>
#include <machine/hypervisor.h>
+#include <machine/ifunc.h>
#include <machine/kdb.h>
#include <machine/machdep.h>
#include <machine/metadata.h>
@@ -807,6 +808,9 @@ initarm(struct arm64_bootparams *abp)
update_special_regs(0);
+ sched_instance_select();
+ link_elf_ireloc();
+
/* Set the pcpu data, this is needed by pmap_bootstrap */
pcpup = &pcpu0;
pcpu_init(pcpup, 0, sizeof(struct pcpu));
@@ -823,8 +827,6 @@ initarm(struct arm64_bootparams *abp)
PCPU_SET(curthread, &thread0);
PCPU_SET(midr, get_midr());
- sched_instance_select();
- link_elf_ireloc();
#ifdef FDT
try_load_dtb();
#endif
@@ -1076,3 +1078,35 @@ DB_SHOW_COMMAND(vtop, db_show_vtop)
db_printf("show vtop <virt_addr>\n");
}
#endif
+
+#undef memset
+#undef memmove
+#undef memcpy
+
+void *memset_std(void *buf, int c, size_t len);
+void *memset_mops(void *buf, int c, size_t len);
+void *memmove_std(void * _Nonnull dst, const void * _Nonnull src,
+ size_t len);
+void *memmove_mops(void * _Nonnull dst, const void * _Nonnull src,
+ size_t len);
+void *memcpy_std(void * _Nonnull dst, const void * _Nonnull src,
+ size_t len);
+void *memcpy_mops(void * _Nonnull dst, const void * _Nonnull src,
+ size_t len);
+
+DEFINE_IFUNC(, void *, memset, (void *, int, size_t))
+{
+ return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memset_mops : memset_std);
+}
+
+DEFINE_IFUNC(, void *, memmove, (void * _Nonnull, const void * _Nonnull,
+ size_t))
+{
+ return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memmove_mops : memmove_std);
+}
+
+DEFINE_IFUNC(, void *, memcpy, (void * _Nonnull, const void * _Nonnull,
+ size_t))
+{
+ return ((elf_hwcap2 & HWCAP2_MOPS) != 0 ? memcpy_mops : memcpy_std);
+}
diff --git a/sys/arm64/arm64/machdep_boot.c b/sys/arm64/arm64/machdep_boot.c
index 1c5e8189e436..0ccfd1b67a39 100644
--- a/sys/arm64/arm64/machdep_boot.c
+++ b/sys/arm64/arm64/machdep_boot.c
@@ -115,7 +115,7 @@ fake_preload_metadata(void *dtb_ptr, size_t dtb_size)
PRELOAD_PUSH_VALUE(uint32_t, MODINFO_METADATA | MODINFOMD_DTBP);
PRELOAD_PUSH_VALUE(uint32_t, sizeof(uint64_t));
PRELOAD_PUSH_VALUE(uint64_t, (uint64_t)lastaddr);
- memmove((void *)lastaddr, dtb_ptr, dtb_size);
+ memmove_early((void *)lastaddr, dtb_ptr, dtb_size);
lastaddr += dtb_size;
lastaddr = roundup(lastaddr, sizeof(int));
}
diff --git a/sys/arm64/arm64/memcpy.S b/sys/arm64/arm64/memcpy.S
index 01daa8e1c228..3c408d2836aa 100644
--- a/sys/arm64/arm64/memcpy.S
+++ b/sys/arm64/arm64/memcpy.S
@@ -57,8 +57,8 @@
The loop tail is handled by always copying 64 bytes from the end.
*/
-EENTRY(memmove)
-ENTRY(memcpy)
+EENTRY(memmove_std)
+ENTRY(memcpy_std)
add srcend, src, count
add dstend, dstin, count
cmp count, 128
@@ -239,7 +239,23 @@ L(copy64_from_start):
stp B_l, B_h, [dstin, 16]
stp C_l, C_h, [dstin]
ret
-END(memcpy)
-EEND(memmove)
+END(memcpy_std)
+EEND(memmove_std)
+
+ENTRY(memcpy_mops)
+ mov x3, x0
+ .inst 0x19010443 /* cpyfp [x3]!, [x1]!, x2! */
+ .inst 0x19410443 /* cpyfm [x3]!, [x1]!, x2! */
+ .inst 0x19810443 /* cpyfe [x3]!, [x1]!, x2! */
+ ret
+END(memcpy_mops)
+
+ENTRY(memmove_mops)
+ mov x3, x0
+ .inst 0x1d010443 /* cpyp [x3]!, [x1]!, x2! */
+ .inst 0x1d410443 /* cpym [x3]!, [x1]!, x2! */
+ .inst 0x1d810443 /* cpye [x3]!, [x1]!, x2! */
+ ret
+END(memmove_mops)
GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
diff --git a/sys/arm64/arm64/memset.S b/sys/arm64/arm64/memset.S
index f52bfd62cc54..f226e8de1e95 100644
--- a/sys/arm64/arm64/memset.S
+++ b/sys/arm64/arm64/memset.S
@@ -51,7 +51,7 @@
#define dst x8
#define tmp3w w9
-ENTRY(memset)
+ENTRY(memset_std)
mov dst, dstin /* Preserve return value. */
ands A_lw, val, #255
@@ -196,6 +196,14 @@ ENTRY(memset)
ands count, count, zva_bits_x
b.ne .Ltail_maybe_long
ret
-END(memset)
+END(memset_std)
+
+ENTRY(memset_mops)
+ mov x3, x0
+ .inst 0x19c10443 /* setp [x3]!, x2!, x1 */
+ .inst 0x19c14443 /* setm [x3]!, x2!, x1 */
+ .inst 0x19c18443 /* sete [x3]!, x2!, x1 */
+ ret
+END(memset_mops)
GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL)
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 680209efd881..e865569ac377 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -1015,7 +1015,7 @@ pmap_bootstrap_l0_table(struct pmap_bootstrap_state *state)
/* Create a new L0 table entry */
state->l1 = (pt_entry_t *)state->freemempos;
- memset(state->l1, 0, PAGE_SIZE);
+ memset_early(state->l1, 0, PAGE_SIZE);
state->freemempos += PAGE_SIZE;
l1_pa = pmap_early_vtophys((vm_offset_t)state->l1);
@@ -1063,7 +1063,7 @@ pmap_bootstrap_l1_table(struct pmap_bootstrap_state *state)
/* Create a new L1 table entry */
state->l2 = (pt_entry_t *)state->freemempos;
- memset(state->l2, 0, PAGE_SIZE);
+ memset_early(state->l2, 0, PAGE_SIZE);
state->freemempos += PAGE_SIZE;
l2_pa = pmap_early_vtophys((vm_offset_t)state->l2);
@@ -1107,7 +1107,7 @@ pmap_bootstrap_l2_table(struct pmap_bootstrap_state *state)
/* Create a new L2 table entry */
state->l3 = (pt_entry_t *)state->freemempos;
- memset(state->l3, 0, PAGE_SIZE);
+ memset_early(state->l3, 0, PAGE_SIZE);
state->freemempos += PAGE_SIZE;
l3_pa = pmap_early_vtophys((vm_offset_t)state->l3);
@@ -1406,7 +1406,7 @@ pmap_bootstrap(void)
#define alloc_pages(var, np) \
(var) = bs_state.freemempos; \
bs_state.freemempos += (np * PAGE_SIZE); \
- memset((char *)(var), 0, ((np) * PAGE_SIZE));
+ memset_early((char *)(var), 0, ((np) * PAGE_SIZE));
/* Allocate dynamic per-cpu area. */
alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
@@ -1444,7 +1444,7 @@ pmap_bootstrap_allocate_san_l2(vm_paddr_t start_pa, vm_paddr_t end_pa,
continue;
}
- bzero((void *)PHYS_TO_DMAP(pa), L2_SIZE);
+ bzero_early((void *)PHYS_TO_DMAP(pa), L2_SIZE);
physmem_exclude_region(pa, L2_SIZE, EXFLAG_NOALLOC);
pmap_store(l2, PHYS_TO_PTE(pa) | PMAP_SAN_PTE_BITS | L2_BLOCK);
}
diff --git a/sys/arm64/include/cpu.h b/sys/arm64/include/cpu.h
index 9f1db23744d4..05844ad63036 100644
--- a/sys/arm64/include/cpu.h
+++ b/sys/arm64/include/cpu.h
@@ -328,7 +328,12 @@ ADDRESS_TRANSLATE_FUNC(s1e1r)
ADDRESS_TRANSLATE_FUNC(s1e1w)
#endif /* !__ASSEMBLER__ */
-#endif
+
+#define MEMSET_EARLY_FUNC memset_std
+#define MEMCPY_EARLY_FUNC memcpy_std
+#define MEMMOVE_EARLY_FUNC memmove_std
+
+#endif /* _KERNEL */
#endif /* !_MACHINE_CPU_H_ */