git: b5346307ed34 - stable/13 - Per-thread stack canary on arm64

From: Andrew Turner <andrew_at_FreeBSD.org>
Date: Mon, 20 Dec 2021 14:02:19 UTC
The branch stable/13 has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=b5346307ed34f9a7cb8c37b495283df0bdba852a

commit b5346307ed34f9a7cb8c37b495283df0bdba852a
Author:     Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2021-11-22 15:20:51 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2021-12-20 11:09:27 +0000

    Per-thread stack canary on arm64
    
    With the update to llvm 13 we are able to tell the compiler it can find
    the SSP canary relative to the register that holds the userspace stack
    pointer. As this is unused in most of the kernel it can be used here
    to point to a per-thread SSP canary.
    
    As the kernel could be built with an old toolchain, e.g. when upgrading
    from 13, add a warning that the options was enabled but the compiler
    doesn't support it to both the build and kernel boot.
    
    Discussed with: emaste
    Sponsored by:   The FreeBSD Foundation
    Differential Revision: https://reviews.freebsd.org/D33079
    
    (cherry picked from commit ae92ace05fd4fcf64e3bb787951578f655b1fa5f)
---
 sys/arm64/arm64/exception.S  |  7 +++++++
 sys/arm64/arm64/genassym.c   |  1 +
 sys/arm64/arm64/locore.S     | 14 ++++++++++++++
 sys/arm64/arm64/machdep.c    | 22 ++++++++++++++++++++++
 sys/arm64/arm64/pmap.c       |  4 ++++
 sys/arm64/arm64/vm_machdep.c | 10 ++++++++++
 sys/arm64/conf/GENERIC       |  1 +
 sys/arm64/include/proc.h     |  1 +
 sys/conf/Makefile.arm64      | 14 ++++++++++++++
 sys/conf/options.arm64       |  4 ++++
 10 files changed, 78 insertions(+)

diff --git a/sys/arm64/arm64/exception.S b/sys/arm64/arm64/exception.S
index 4fcf2ea6ece6..629385c659be 100644
--- a/sys/arm64/arm64/exception.S
+++ b/sys/arm64/arm64/exception.S
@@ -67,6 +67,13 @@ __FBSDID("$FreeBSD$");
 	mrs	x18, tpidr_el1
 	add	x29, sp, #(TF_SIZE)
 .if \el == 0
+#if defined(PERTHREAD_SSP)
+	/* Load the SSP canary to sp_el0 */
+	ldr	x1, [x18, #(PC_CURTHREAD)]
+	add	x1, x1, #(TD_MD_CANARY)
+	msr	sp_el0, x1
+#endif
+
 	/* Apply the SSBD (CVE-2018-3639) workaround if needed */
 	ldr	x1, [x18, #PC_SSBD]
 	cbz	x1, 1f
diff --git a/sys/arm64/arm64/genassym.c b/sys/arm64/arm64/genassym.c
index 1575a0158dec..8e3ddc48317b 100644
--- a/sys/arm64/arm64/genassym.c
+++ b/sys/arm64/arm64/genassym.c
@@ -73,6 +73,7 @@ ASSYM(TD_PCB, offsetof(struct thread, td_pcb));
 ASSYM(TD_FLAGS, offsetof(struct thread, td_flags));
 ASSYM(TD_FRAME, offsetof(struct thread, td_frame));
 ASSYM(TD_LOCK, offsetof(struct thread, td_lock));
+ASSYM(TD_MD_CANARY, offsetof(struct thread, td_md.md_canary));
 
 ASSYM(TF_SIZE, sizeof(struct trapframe));
 ASSYM(TF_SP, offsetof(struct trapframe, tf_sp));
diff --git a/sys/arm64/arm64/locore.S b/sys/arm64/arm64/locore.S
index 06dcfd11404b..c858b3cedc51 100644
--- a/sys/arm64/arm64/locore.S
+++ b/sys/arm64/arm64/locore.S
@@ -116,6 +116,13 @@ virtdone:
 	cmp	x15, x14
 	b.lo	1b
 
+#if defined(PERTHREAD_SSP)
+	/* Set sp_el0 to the boot canary for early per-thread SSP to work */
+	adrp	x15, boot_canary
+	add	x15, x15, :lo12:boot_canary
+	msr	sp_el0, x15
+#endif
+
 	/* Backup the module pointer */
 	mov	x1, x0
 
@@ -200,6 +207,13 @@ mp_virtdone:
 	ldr	x4, [x4]
 	mov	sp, x4
 
+#if defined(PERTHREAD_SSP)
+	/* Set sp_el0 to the boot canary for early per-thread SSP to work */
+	adrp	x15, boot_canary
+	add	x15, x15, :lo12:boot_canary
+	msr	sp_el0, x15
+#endif
+
 	/* Load the kernel ttbr0 pagetable */
 	msr	ttbr0_el1, x27
 	isb
diff --git a/sys/arm64/arm64/machdep.c b/sys/arm64/arm64/machdep.c
index b4a2a8cdd8e5..3ea7e850d70e 100644
--- a/sys/arm64/arm64/machdep.c
+++ b/sys/arm64/arm64/machdep.c
@@ -109,6 +109,14 @@ enum arm64_bus arm64_bus_method = ARM64_BUS_NONE;
  */
 struct pcpu pcpu0;
 
+#if defined(PERTHREAD_SSP)
+/*
+ * The boot SSP canary. Will be replaced with a per-thread canary when
+ * scheduling has started.
+ */
+uintptr_t boot_canary = 0x49a2d892bc05a0b1ul;
+#endif
+
 static struct trapframe proc0_tf;
 
 int early_boot = 1;
@@ -136,6 +144,16 @@ void (*pagezero)(void *p) = pagezero_simple;
 
 int (*apei_nmi)(void);
 
+#if defined(PERTHREAD_SSP_WARNING)
+static void
+print_ssp_warning(void *data __unused)
+{
+	printf("WARNING: Per-thread SSP is enabled but the compiler is too old to support it\n");
+}
+SYSINIT(ssp_warn, SI_SUB_COPYRIGHT, SI_ORDER_ANY, print_ssp_warning, NULL);
+SYSINIT(ssp_warn2, SI_SUB_LAST, SI_ORDER_ANY, print_ssp_warning, NULL);
+#endif
+
 static void
 pan_setup(void)
 {
@@ -347,6 +365,9 @@ init_proc0(vm_offset_t kstack)
 	proc_linkup0(&proc0, &thread0);
 	thread0.td_kstack = kstack;
 	thread0.td_kstack_pages = KSTACK_PAGES;
+#if defined(PERTHREAD_SSP)
+	thread0.td_md.md_canary = boot_canary;
+#endif
 	thread0.td_pcb = (struct pcb *)(thread0.td_kstack +
 	    thread0.td_kstack_pages * PAGE_SIZE) - 1;
 	thread0.td_pcb->pcb_fpflags = 0;
@@ -741,6 +762,7 @@ initarm(struct arm64_bootparams *abp)
 	    "mov x18, %0 \n"
 	    "msr tpidr_el1, %0" :: "r"(pcpup));
 
+	/* locore.S sets sp_el0 to &thread0 so no need to set it here. */
 	PCPU_SET(curthread, &thread0);
 	PCPU_SET(midr, get_midr());
 
diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index 1b9c6e96286a..f30a180612ef 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -6797,6 +6797,10 @@ pmap_switch(struct thread *old __unused, struct thread *new)
 
 	/* Store the new curthread */
 	PCPU_SET(curthread, new);
+#if defined(PERTHREAD_SSP)
+	/* Set the new threads SSP canary */
+	__asm("msr	sp_el0, %0" :: "r"(&new->td_md.md_canary));
+#endif
 
 	/* And the new pcb */
 	pcb = new->td_pcb;
diff --git a/sys/arm64/arm64/vm_machdep.c b/sys/arm64/arm64/vm_machdep.c
index a8690eeb67da..9c97a2848200 100644
--- a/sys/arm64/arm64/vm_machdep.c
+++ b/sys/arm64/arm64/vm_machdep.c
@@ -115,6 +115,11 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
 	/* Setup to release spin count in fork_exit(). */
 	td2->td_md.md_spinlock_count = 1;
 	td2->td_md.md_saved_daif = PSR_DAIF_DEFAULT;
+
+#if defined(PERTHREAD_SSP)
+	/* Set the new canary */
+	arc4random_buf(&td2->td_md.md_canary, sizeof(td2->td_md.md_canary));
+#endif
 }
 
 void
@@ -187,6 +192,11 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
 	/* Setup to release spin count in fork_exit(). */
 	td->td_md.md_spinlock_count = 1;
 	td->td_md.md_saved_daif = PSR_DAIF_DEFAULT;
+
+#if defined(PERTHREAD_SSP)
+	/* Set the new canary */
+	arc4random_buf(&td->td_md.md_canary, sizeof(td->td_md.md_canary));
+#endif
 }
 
 /*
diff --git a/sys/arm64/conf/GENERIC b/sys/arm64/conf/GENERIC
index a37d78332276..b14776d4d979 100644
--- a/sys/arm64/conf/GENERIC
+++ b/sys/arm64/conf/GENERIC
@@ -81,6 +81,7 @@ options 	RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default
 options 	RCTL			# Resource limits
 options 	INTRNG
 options 	LINUX_BOOT_ABI		# Boot using booti command from U-Boot
+options 	PERTHREAD_SSP		# Per-thread SSP canary
 
 # Debugging support.  Always need this:
 options 	KDB			# Enable kernel debugger support.
diff --git a/sys/arm64/include/proc.h b/sys/arm64/include/proc.h
index b34581f0bd8b..24f967e560f6 100644
--- a/sys/arm64/include/proc.h
+++ b/sys/arm64/include/proc.h
@@ -37,6 +37,7 @@
 struct mdthread {
 	int	md_spinlock_count;	/* (k) */
 	register_t md_saved_daif;	/* (k) */
+	uintptr_t md_canary;
 };
 
 struct mdproc {
diff --git a/sys/conf/Makefile.arm64 b/sys/conf/Makefile.arm64
index 2e404664708c..e75062115121 100644
--- a/sys/conf/Makefile.arm64
+++ b/sys/conf/Makefile.arm64
@@ -30,6 +30,20 @@ INCLUDES+= -I$S/contrib/libfdt -I$S/contrib/device-tree/include
 LINUX_DTS_VERSION!=	awk '/freebsd,dts-version/ { sub(/;$$/,"", $$NF); print $$NF }' $S/dts/freebsd-compatible.dts
 CFLAGS += -DLINUX_DTS_VERSION=\"${LINUX_DTS_VERSION}\"
 
+PERTHREAD_SSP_ENABLED!=	grep PERTHREAD_SSP opt_global.h || true ; echo
+.if !empty(PERTHREAD_SSP_ENABLED)
+. if ${COMPILER_TYPE} == "clang" && ${COMPILER_VERSION} >= 130000
+ARM64_SSP_CFLAGS = -mstack-protector-guard=sysreg
+ARM64_SSP_CFLAGS += -mstack-protector-guard-reg=sp_el0
+ARM64_SSP_CFLAGS += -mstack-protector-guard-offset=0
+. else
+ARM64_SSP_CFLAGS += -DPERTHREAD_SSP_WARNING
+.  warning "Compiler is too old to support PERTHREAD_SSP"
+. endif
+CFLAGS += ${ARM64_SSP_CFLAGS}
+ARCH_FLAGS += ${ARM64_SSP_CFLAGS}
+.endif
+
 # Use a custom SYSTEM_LD command to generate the elf kernel, so we can
 # set the text segment start address, and also strip the "arm mapping
 # symbols" which have names like $a.0 and $d.2; see the document
diff --git a/sys/conf/options.arm64 b/sys/conf/options.arm64
index 5a97fd6b3ef6..34ca3e221f27 100644
--- a/sys/conf/options.arm64
+++ b/sys/conf/options.arm64
@@ -9,6 +9,10 @@ VFP				opt_global.h
 LINUX_BOOT_ABI			opt_global.h
 LSE_ATOMICS			opt_global.h
 
+# Per-thread stack smashing protection support
+# Needs clang >= 13
+PERTHREAD_SSP			opt_global.h
+
 # Binary compatibility
 COMPAT_FREEBSD32		opt_global.h