git: d3eae160b21f - main - arm64: Add BTI support to pmap

From: Andrew Turner <andrew_at_FreeBSD.org>
Date: Thu, 22 Feb 2024 16:28:03 UTC
The branch main has been updated by andrew:

URL: https://cgit.FreeBSD.org/src/commit/?id=d3eae160b21f12c769f1e275901ad8ef18901744

commit d3eae160b21f12c769f1e275901ad8ef18901744
Author:     Andrew Turner <andrew@FreeBSD.org>
AuthorDate: 2023-04-05 12:31:41 +0000
Commit:     Andrew Turner <andrew@FreeBSD.org>
CommitDate: 2024-02-22 16:27:47 +0000

    arm64: Add BTI support to pmap
    
    Add a rangeset to the arm64 pmap to describe which address space needs
    the Branch Target Identification (BTI) Guard Page flag set in the page
    table.
    
    On hardware that supports BTI the Guard Page flag tells the hardware
    to raise an exception if the target of a BR* and BLR* instruction is
    not an appropriate landing pad instruction.
    
    To support this in userspace we need to know which address space
    should be guarded. For this add a rangeset to the arm64 pmap when the
    hardware supports BTI. The kernel can then use pmap_bti_set and
    pmap_bti_clear mark and unmark which address space is guarded.
    
    Sponsored by:   Arm Ltd
    Sponsored by:   The FreeBSD Foundation
    Differential Revision:  https://reviews.freebsd.org/D42328
---
 sys/arm64/arm64/pmap.c   | 270 ++++++++++++++++++++++++++++++++++++++++++++---
 sys/arm64/include/pmap.h |  13 +--
 2 files changed, 263 insertions(+), 20 deletions(-)

diff --git a/sys/arm64/arm64/pmap.c b/sys/arm64/arm64/pmap.c
index d9b6425cc20e..21912535bb6a 100644
--- a/sys/arm64/arm64/pmap.c
+++ b/sys/arm64/arm64/pmap.c
@@ -119,6 +119,7 @@
 #include <sys/mutex.h>
 #include <sys/physmem.h>
 #include <sys/proc.h>
+#include <sys/rangeset.h>
 #include <sys/rwlock.h>
 #include <sys/sbuf.h>
 #include <sys/sx.h>
@@ -432,6 +433,12 @@ SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled,
     CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0,
     "Are large page mappings enabled?");
 
+/*
+ * True when Branch Target Identification should be used by userspace. This
+ * allows pmap to mark pages as guarded with ATTR_S1_GP.
+ */
+__read_mostly static bool pmap_bti_support = false;
+
 /*
  * Internal flags for pmap_enter()'s helper functions.
  */
@@ -478,7 +485,14 @@ static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va);
 
+static uma_zone_t pmap_bti_ranges_zone;
+static bool pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
 static pt_entry_t pmap_pte_bti(pmap_t pmap, vm_offset_t va);
+static void pmap_bti_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva);
+static void *bti_dup_range(void *ctx, void *data);
+static void bti_free_range(void *ctx, void *node);
+static int pmap_bti_copy(pmap_t dst_pmap, pmap_t src_pmap);
+static void pmap_bti_deassign_all(pmap_t pmap);
 
 /*
  * These load the old table data and store the new value.
@@ -2320,6 +2334,7 @@ pmap_pinit0(pmap_t pmap)
 	pmap->pm_levels = 4;
 	pmap->pm_ttbr = pmap->pm_l0_paddr;
 	pmap->pm_asid_set = &asids;
+	pmap->pm_bti = NULL;
 
 	PCPU_SET(curpmap, pmap);
 }
@@ -2345,9 +2360,16 @@ pmap_pinit_stage(pmap_t pmap, enum pmap_stage stage, int levels)
 	MPASS(levels == 3 || levels == 4);
 	pmap->pm_levels = levels;
 	pmap->pm_stage = stage;
+	pmap->pm_bti = NULL;
 	switch (stage) {
 	case PM_STAGE1:
 		pmap->pm_asid_set = &asids;
+		if (pmap_bti_support) {
+			pmap->pm_bti = malloc(sizeof(struct rangeset), M_DEVBUF,
+			    M_ZERO | M_WAITOK);
+			rangeset_init(pmap->pm_bti, bti_dup_range,
+			    bti_free_range, pmap, M_NOWAIT);
+		}
 		break;
 	case PM_STAGE2:
 		pmap->pm_asid_set = &vmids;
@@ -2652,7 +2674,7 @@ void
 pmap_release(pmap_t pmap)
 {
 	bool rv __diagused;
-	struct spglist free;
+	struct spglist freelist;
 	struct asid_set *set;
 	vm_page_t m;
 	int asid;
@@ -2665,13 +2687,13 @@ pmap_release(pmap_t pmap)
 		KASSERT((pmap->pm_l0[0] & ATTR_DESCR_VALID) == ATTR_DESCR_VALID,
 		    ("pmap_release: Invalid l0 entry: %lx", pmap->pm_l0[0]));
 
-		SLIST_INIT(&free);
+		SLIST_INIT(&freelist);
 		m = PHYS_TO_VM_PAGE(pmap->pm_ttbr);
 		PMAP_LOCK(pmap);
-		rv = pmap_unwire_l3(pmap, 0, m, &free);
+		rv = pmap_unwire_l3(pmap, 0, m, &freelist);
 		PMAP_UNLOCK(pmap);
 		MPASS(rv == true);
-		vm_page_free_pages_toq(&free, true);
+		vm_page_free_pages_toq(&freelist, true);
 	}
 
 	KASSERT(pmap->pm_stats.resident_count == 0,
@@ -2699,6 +2721,11 @@ pmap_release(pmap_t pmap)
 			bit_clear(set->asid_set, asid);
 		}
 		mtx_unlock_spin(&set->asid_set_mutex);
+
+		if (pmap->pm_bti != NULL) {
+			rangeset_fini(pmap->pm_bti);
+			free(pmap->pm_bti, M_DEVBUF);
+		}
 	}
 
 	m = PHYS_TO_VM_PAGE(pmap->pm_l0_paddr);
@@ -3631,14 +3658,8 @@ pmap_remove_l3_range(pmap_t pmap, pd_entry_t l2e, vm_offset_t sva,
 		pmap_invalidate_range(pmap, va, sva, true);
 }
 
-/*
- *	Remove the given range of addresses from the specified map.
- *
- *	It is assumed that the start and end are properly
- *	rounded to the page size.
- */
-void
-pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+static void
+pmap_remove1(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool map_delete)
 {
 	struct rwlock *lock;
 	vm_offset_t va_next;
@@ -3655,6 +3676,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 	SLIST_INIT(&free);
 
 	PMAP_LOCK(pmap);
+	if (map_delete)
+		pmap_bti_on_remove(pmap, sva, eva);
 
 	lock = NULL;
 	for (; sva < eva; sva = va_next) {
@@ -3737,6 +3760,18 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 	vm_page_free_pages_toq(&free, true);
 }
 
+/*
+ *	Remove the given range of addresses from the specified map.
+ *
+ *	It is assumed that the start and end are properly
+ *	rounded to the page size.
+ */
+void
+pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	pmap_remove1(pmap, sva, eva, false);
+}
+
 /*
  *	Remove the given range of addresses as part of a logical unmap
  *	operation. This has the effect of calling pmap_remove(), but
@@ -3746,7 +3781,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_map_delete(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	pmap_remove(pmap, sva, eva);
+	pmap_remove1(pmap, sva, eva, true);
 }
 
 /*
@@ -4355,6 +4390,8 @@ pmap_enter_largepage(pmap_t pmap, vm_offset_t va, pt_entry_t newpte, int flags,
 	    PTE_TO_PHYS(newpte), newpte, psind));
 
 restart:
+	if (!pmap_bti_same(pmap, va, va + pagesizes[psind]))
+		return (KERN_PROTECTION_FAILURE);
 	if (psind == 2) {
 		PMAP_ASSERT_L1_BLOCKS_SUPPORTED;
 
@@ -4848,6 +4885,14 @@ pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags,
 		return (KERN_RESOURCE_SHORTAGE);
 	}
 
+	/*
+	 * If bti is not the same for the whole l2 range, return failure
+	 * and let vm_fault() cope.  Check after l2 allocation, since
+	 * it could sleep.
+	 */
+	if (!pmap_bti_same(pmap, va, va + L2_SIZE))
+		return (KERN_PROTECTION_FAILURE);
+
 	/*
 	 * If there are existing mappings, either abort or remove them.
 	 */
@@ -5496,6 +5541,38 @@ out:
 	PMAP_UNLOCK(dst_pmap);
 }
 
+int
+pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap)
+{
+	int error;
+
+	if (dst_pmap->pm_stage != src_pmap->pm_stage)
+		return (EINVAL);
+
+	if (dst_pmap->pm_stage != PM_STAGE1 || src_pmap->pm_bti == NULL)
+		return (0);
+
+	for (;;) {
+		if (dst_pmap < src_pmap) {
+			PMAP_LOCK(dst_pmap);
+			PMAP_LOCK(src_pmap);
+		} else {
+			PMAP_LOCK(src_pmap);
+			PMAP_LOCK(dst_pmap);
+		}
+		error = pmap_bti_copy(dst_pmap, src_pmap);
+		/* Clean up partial copy on failure due to no memory. */
+		if (error == ENOMEM)
+			pmap_bti_deassign_all(dst_pmap);
+		PMAP_UNLOCK(src_pmap);
+		PMAP_UNLOCK(dst_pmap);
+		if (error != ENOMEM)
+			break;
+		vm_wait(NULL);
+	}
+	return (error);
+}
+
 /*
  *	pmap_zero_page zeros the specified hardware page by mapping
  *	the page into KVM and using bzero to clear its contents.
@@ -5907,6 +5984,7 @@ pmap_remove_pages(pmap_t pmap)
 	if (lock != NULL)
 		rw_wunlock(lock);
 	pmap_invalidate_all(pmap);
+	pmap_bti_deassign_all(pmap);
 	free_pv_chunk_batch(free_chunks);
 	PMAP_UNLOCK(pmap);
 	vm_page_free_pages_toq(&free, true);
@@ -7436,6 +7514,29 @@ pmap_set_cnp(void *arg)
 	isb();
 }
 
+/*
+ * Defer enabling some features until we have read the ID registers to know
+ * if they are supported on all CPUs.
+ */
+static void
+pmap_init_mp(void *dummy __unused)
+{
+	uint64_t reg;
+
+	if (get_kernel_reg(ID_AA64PFR1_EL1, &reg)) {
+		if (ID_AA64PFR1_BT_VAL(reg) != ID_AA64PFR1_BT_NONE) {
+			if (bootverbose)
+				printf("Enabling BTI\n");
+			pmap_bti_support = true;
+
+			pmap_bti_ranges_zone = uma_zcreate("BTI ranges",
+			    sizeof(struct rs_el), NULL, NULL, NULL, NULL,
+			    UMA_ALIGN_PTR, 0);
+		}
+	}
+}
+SYSINIT(pmap_init_mp, SI_SUB_CPU, SI_ORDER_ANY, pmap_init_mp, NULL);
+
 /*
  * Defer enabling CnP until we have read the ID registers to know if it's
  * supported on all CPUs.
@@ -7871,8 +7972,85 @@ pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode)
 	return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_THROUGH);
 }
 
+static void *
+bti_dup_range(void *ctx __unused, void *data)
+{
+	struct rs_el *node, *new_node;
+
+	new_node = uma_zalloc(pmap_bti_ranges_zone, M_NOWAIT);
+	if (new_node == NULL)
+		return (NULL);
+	node = data;
+	memcpy(new_node, node, sizeof(*node));
+	return (new_node);
+}
+
+static void
+bti_free_range(void *ctx __unused, void *node)
+{
+
+	uma_zfree(pmap_bti_ranges_zone, node);
+}
+
+static int
+pmap_bti_assign(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	struct rs_el *rs;
+	int error;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	PMAP_ASSERT_STAGE1(pmap);
+	MPASS(pmap->pm_bti != NULL);
+	rs = uma_zalloc(pmap_bti_ranges_zone, M_NOWAIT);
+	if (rs == NULL)
+		return (ENOMEM);
+	error = rangeset_insert(pmap->pm_bti, sva, eva, rs);
+	if (error != 0)
+		uma_zfree(pmap_bti_ranges_zone, rs);
+	return (error);
+}
+
+static void
+pmap_bti_deassign_all(pmap_t pmap)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	if (pmap->pm_bti != NULL)
+		rangeset_remove_all(pmap->pm_bti);
+}
+
+static bool
+pmap_bti_same(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	struct rs_el *prev_rs, *rs;
+	vm_offset_t va;
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	KASSERT(ADDR_IS_CANONICAL(sva),
+	    ("%s: Start address not in canonical form: %lx", __func__, sva));
+	KASSERT(ADDR_IS_CANONICAL(eva),
+	    ("%s: End address not in canonical form: %lx", __func__, eva));
+
+	if (pmap->pm_bti == NULL || ADDR_IS_KERNEL(sva))
+		return (true);
+	MPASS(!ADDR_IS_KERNEL(eva));
+	for (va = sva; va < eva; prev_rs = rs) {
+		rs = rangeset_lookup(pmap->pm_bti, va);
+		if (va == sva)
+			prev_rs = rs;
+		else if ((rs == NULL) ^ (prev_rs == NULL))
+			return (false);
+		if (rs == NULL) {
+			va += PAGE_SIZE;
+			continue;
+		}
+		va = rs->re_end;
+	}
+	return (true);
+}
+
 static pt_entry_t
-pmap_pte_bti(pmap_t pmap, vm_offset_t va __diagused)
+pmap_pte_bti(pmap_t pmap, vm_offset_t va)
 {
 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
 	MPASS(ADDR_IS_CANONICAL(va));
@@ -7881,9 +8059,73 @@ pmap_pte_bti(pmap_t pmap, vm_offset_t va __diagused)
 		return (0);
 	if (pmap == kernel_pmap)
 		return (ATTR_KERN_GP);
+	if (pmap->pm_bti != NULL && rangeset_lookup(pmap->pm_bti, va) != NULL)
+		return (ATTR_S1_GP);
 	return (0);
 }
 
+static void
+pmap_bti_on_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	if (pmap->pm_bti != NULL)
+		rangeset_remove(pmap->pm_bti, sva, eva);
+}
+
+static int
+pmap_bti_copy(pmap_t dst_pmap, pmap_t src_pmap)
+{
+
+	PMAP_LOCK_ASSERT(dst_pmap, MA_OWNED);
+	PMAP_LOCK_ASSERT(src_pmap, MA_OWNED);
+	MPASS(src_pmap->pm_stage == dst_pmap->pm_stage);
+	MPASS(src_pmap->pm_bti != NULL);
+	MPASS(dst_pmap->pm_bti != NULL);
+	if (src_pmap->pm_bti->rs_data_ctx == NULL)
+		return (0);
+	return (rangeset_copy(dst_pmap->pm_bti, src_pmap->pm_bti));
+}
+
+static void
+pmap_bti_update_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, bool set)
+{
+	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
+	PMAP_ASSERT_STAGE1(pmap);
+
+	pmap_mask_set_locked(pmap, sva, eva, ATTR_S1_GP, set ? ATTR_S1_GP : 0,
+	    true);
+}
+
+int
+pmap_bti_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
+{
+	int error;
+
+	if (pmap->pm_bti == NULL)
+		return (0);
+	if (!ADDR_IS_CANONICAL(sva) || !ADDR_IS_CANONICAL(eva))
+		return (EINVAL);
+	if (pmap->pm_stage != PM_STAGE1)
+		return (EINVAL);
+	if (eva <= sva || ADDR_IS_KERNEL(eva))
+		return (EFAULT);
+
+	sva = trunc_page(sva);
+	eva = round_page(eva);
+	for (;;) {
+		PMAP_LOCK(pmap);
+		error = pmap_bti_assign(pmap, sva, eva);
+		if (error == 0)
+			pmap_bti_update_range(pmap, sva, eva, true);
+		PMAP_UNLOCK(pmap);
+		if (error != ENOMEM)
+			break;
+		vm_wait(NULL);
+	}
+	return (error);
+}
+
 #if defined(KASAN) || defined(KMSAN)
 static pd_entry_t	*pmap_san_early_l2;
 
diff --git a/sys/arm64/include/pmap.h b/sys/arm64/include/pmap.h
index 4b4d47ccc7af..d69924080610 100644
--- a/sys/arm64/include/pmap.h
+++ b/sys/arm64/include/pmap.h
@@ -63,6 +63,8 @@ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma);
  * Pmap stuff
  */
 
+struct rangeset;
+
 struct md_page {
 	TAILQ_HEAD(,pv_entry)	pv_list;
 	int			pv_gen;
@@ -97,7 +99,8 @@ struct pmap {
 	struct asid_set		*pm_asid_set;	/* The ASID/VMID set to use */
 	enum pmap_stage		pm_stage;
 	int			pm_levels;
-	uint64_t		pm_reserved[4];
+	struct rangeset		*pm_bti;
+	uint64_t		pm_reserved[3];
 };
 typedef struct pmap *pmap_t;
 
@@ -182,12 +185,10 @@ extern void (*pmap_stage2_invalidate_range)(uint64_t, vm_offset_t, vm_offset_t,
     bool);
 extern void (*pmap_stage2_invalidate_all)(uint64_t);
 
-static inline int
-pmap_vmspace_copy(pmap_t dst_pmap __unused, pmap_t src_pmap __unused)
-{
+int pmap_vmspace_copy(pmap_t, pmap_t);
 
-	return (0);
-}
+int pmap_bti_set(pmap_t, vm_offset_t, vm_offset_t);
+int pmap_bti_clear(pmap_t, vm_offset_t, vm_offset_t);
 
 #if defined(KASAN) || defined(KMSAN)
 struct arm64_bootparams;