svn commit: r352807 - in head/sys: amd64/amd64 amd64/vmm arm/arm arm64/arm64 i386/i386 kern mips/mips powerpc/powerpc riscv/riscv sparc64/sparc64 sys vm

Konstantin Belousov kib at FreeBSD.org
Fri Sep 27 18:43:40 UTC 2019


Author: kib
Date: Fri Sep 27 18:43:36 2019
New Revision: 352807
URL: https://svnweb.freebsd.org/changeset/base/352807

Log:
  Improve MD page fault handlers.
  
  Centralize calculation of signal and ucode delivered on unhandled page
  fault in new function vm_fault_trap().  MD trap_pfault() now almost
  always uses the signal numbers and error codes calculated in
  consistent MI way.
  
  This introduces the protection fault compatibility sysctls to all
  non-x86 architectures which did not have that bug, but apparently they
  were already much more wrong in selecting delivered signals on
  protection violations.
  
  Change the delivered signal for accesses to mapped area after the
  backing object was truncated.  According to POSIX description for
  mmap(2):
     The system shall always zero-fill any partial page at the end of an
     object. Further, the system shall never write out any modified
     portions of the last page of an object which are beyond its
     end. References within the address range starting at pa and
     continuing for len bytes to whole pages following the end of an
     object shall result in delivery of a SIGBUS signal.
  
     An implementation may generate SIGBUS signals when a reference
     would cause an error in the mapped object, such as out-of-space
     condition.
  Adjust according to the description, keeping the existing
  compatibility code for SIGSEGV/SIGBUS on protection failures.
  
  For situations where kernel cannot handle page fault due to resource
  limit enforcement, SIGBUS with a new error code BUS_OBJERR is
  delivered.  Also, provide a new error code SEGV_PKUERR for SIGSEGV on
  amd64 due to protection key access violation.
  
  vm_fault_hold() is renamed to vm_fault().  Fixed some nits in
  trap_pfault()s like mis-interpreting Mach errors as errnos.  Removed
  unneeded truncations of the fault addresses reported by hardware.
  
  PR:	211924
  Reviewed by:	alc
  Discussed with:	jilles, markj
  Sponsored by:	The FreeBSD Foundation
  MFC after:	1 week
  Differential revision:	https://reviews.freebsd.org/D21566

Modified:
  head/sys/amd64/amd64/trap.c
  head/sys/amd64/vmm/vmm.c
  head/sys/arm/arm/trap-v4.c
  head/sys/arm/arm/trap-v6.c
  head/sys/arm64/arm64/trap.c
  head/sys/i386/i386/trap.c
  head/sys/kern/sys_process.c
  head/sys/mips/mips/trap.c
  head/sys/powerpc/powerpc/trap.c
  head/sys/riscv/riscv/trap.c
  head/sys/sparc64/sparc64/trap.c
  head/sys/sys/signal.h
  head/sys/vm/vm_extern.h
  head/sys/vm/vm_fault.c
  head/sys/vm/vm_map.c
  head/sys/vm/vm_param.h

Modified: head/sys/amd64/amd64/trap.c
==============================================================================
--- head/sys/amd64/amd64/trap.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/amd64/amd64/trap.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -111,7 +111,7 @@ void __noinline trap(struct trapframe *frame);
 void trap_check(struct trapframe *frame);
 void dblfault_handler(struct trapframe *frame);
 
-static int trap_pfault(struct trapframe *, int);
+static int trap_pfault(struct trapframe *, bool, int *, int *);
 static void trap_fatal(struct trapframe *, vm_offset_t);
 #ifdef KDTRACE_HOOKS
 static bool trap_user_dtrace(struct trapframe *,
@@ -155,10 +155,6 @@ static const char *const trap_msg[] = {
 	[T_DTRACE_RET] =	"DTrace pid return trap",
 };
 
-static int prot_fault_translation;
-SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN,
-    &prot_fault_translation, 0,
-    "Select signal to deliver on protection fault");
 static int uprintf_signal;
 SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RWTUN,
     &uprintf_signal, 0,
@@ -192,14 +188,11 @@ trap(struct trapframe *frame)
 	struct thread *td;
 	struct proc *p;
 	register_t addr, dr6;
-	int signo, ucode;
+	int pf, signo, ucode;
 	u_int type;
 
 	td = curthread;
 	p = td->td_proc;
-	signo = 0;
-	ucode = 0;
-	addr = 0;
 	dr6 = 0;
 
 	VM_CNT_INC(v_trap);
@@ -345,47 +338,18 @@ trap(struct trapframe *frame)
 
 		case T_PAGEFLT:		/* page fault */
 			/*
-			 * Emulator can take care about this trap?
+			 * Can emulator handle this trap?
 			 */
 			if (*p->p_sysent->sv_trap != NULL &&
 			    (*p->p_sysent->sv_trap)(td) == 0)
 				return;
 
-			addr = frame->tf_addr;
-			signo = trap_pfault(frame, TRUE);
-			if (signo == -1)
+			pf = trap_pfault(frame, true, &signo, &ucode);
+			if (pf == -1)
 				return;
-			if (signo == 0)
+			if (pf == 0)
 				goto userret;
-			if (signo == SIGSEGV) {
-				ucode = SEGV_MAPERR;
-			} else if (prot_fault_translation == 0) {
-				/*
-				 * Autodetect.  This check also covers
-				 * the images without the ABI-tag ELF
-				 * note.
-				 */
-				if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
-				    p->p_osrel >= P_OSREL_SIGSEGV) {
-					signo = SIGSEGV;
-					ucode = SEGV_ACCERR;
-				} else {
-					signo = SIGBUS;
-					ucode = T_PAGEFLT;
-				}
-			} else if (prot_fault_translation == 1) {
-				/*
-				 * Always compat mode.
-				 */
-				signo = SIGBUS;
-				ucode = T_PAGEFLT;
-			} else {
-				/*
-				 * Always SIGSEGV mode.
-				 */
-				signo = SIGSEGV;
-				ucode = SEGV_ACCERR;
-			}
+			addr = frame->tf_addr;
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
@@ -440,7 +404,7 @@ trap(struct trapframe *frame)
 		    ("kernel trap doesn't have ucred"));
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
-			(void) trap_pfault(frame, FALSE);
+			(void)trap_pfault(frame, false, NULL, NULL);
 			return;
 
 		case T_DNA:
@@ -712,17 +676,29 @@ trap_is_pti(struct trapframe *frame)
 	    (PCPU_GET(curpmap)->pm_cr3 & ~CR3_PCID_MASK));
 }
 
+/*
+ * Handle all details of a page fault.
+ * Returns:
+ * -1 if this fault was fatal, typically from kernel mode
+ *    (cannot happen, but we need to return something).
+ * 0  if this fault was handled by updating either the user or kernel
+ *    page table, execution can continue.
+ * 1  if this fault was from usermode and it was not handled, a synchronous
+ *    signal should be delivered to the thread.  *signo returns the signal
+ *    number, *ucode gives si_code.
+ */
 static int
-trap_pfault(struct trapframe *frame, int usermode)
+trap_pfault(struct trapframe *frame, bool usermode, int *signo, int *ucode)
 {
 	struct thread *td;
 	struct proc *p;
 	vm_map_t map;
-	vm_offset_t va;
+	vm_offset_t eva;
 	int rv;
 	vm_prot_t ftype;
-	vm_offset_t eva;
 
+	MPASS(!usermode || (signo != NULL && ucode != NULL));
+
 	td = curthread;
 	p = td->td_proc;
 	eva = frame->tf_addr;
@@ -771,13 +747,15 @@ trap_pfault(struct trapframe *frame, int usermode)
 			return (-1);
 		}
 	}
-	va = trunc_page(eva);
-	if (va >= VM_MIN_KERNEL_ADDRESS) {
+	if (eva >= VM_MIN_KERNEL_ADDRESS) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 */
-		if (usermode)
-			return (SIGSEGV);
+		if (usermode) {
+			*signo = SIGSEGV;
+			*ucode = SEGV_MAPERR;
+			return (1);
+		}
 
 		map = kernel_map;
 	} else {
@@ -819,7 +797,11 @@ trap_pfault(struct trapframe *frame, int usermode)
 			trap_fatal(frame, eva);
 			return (-1);
 		}
-		rv = KERN_PROTECTION_FAILURE;
+		if (usermode) {
+			*signo = SIGSEGV;
+			*ucode = SEGV_PKUERR;
+			return (1);
+		}
 		goto after_vmfault;
 	}
 
@@ -843,7 +825,7 @@ trap_pfault(struct trapframe *frame, int usermode)
 		ftype = VM_PROT_READ;
 
 	/* Fault in the page. */
-	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode);
 	if (rv == KERN_SUCCESS) {
 #ifdef HWPMC_HOOKS
 		if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
@@ -858,17 +840,17 @@ trap_pfault(struct trapframe *frame, int usermode)
 #endif
 		return (0);
 	}
+
+	if (usermode)
+		return (1);
 after_vmfault:
-	if (!usermode) {
-		if (td->td_intr_nesting_level == 0 &&
-		    curpcb->pcb_onfault != NULL) {
-			frame->tf_rip = (long)curpcb->pcb_onfault;
-			return (0);
-		}
-		trap_fatal(frame, eva);
-		return (-1);
+	if (td->td_intr_nesting_level == 0 &&
+	    curpcb->pcb_onfault != NULL) {
+		frame->tf_rip = (long)curpcb->pcb_onfault;
+		return (0);
 	}
-	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+	trap_fatal(frame, eva);
+	return (-1);
 }
 
 static void

Modified: head/sys/amd64/vmm/vmm.c
==============================================================================
--- head/sys/amd64/vmm/vmm.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/amd64/vmm/vmm.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -1411,7 +1411,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu
 	}
 
 	map = &vm->vmspace->vm_map;
-	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
+	rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL, NULL);
 
 	VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %#lx, "
 	    "ftype = %d", rv, vme->u.paging.gpa, ftype);

Modified: head/sys/arm/arm/trap-v4.c
==============================================================================
--- head/sys/arm/arm/trap-v4.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/arm/arm/trap-v4.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -94,12 +94,12 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_kern.h>
 #include <vm/vm_map.h>
 #include <vm/vm_extern.h>
+#include <vm/vm_param.h>
 
 #include <machine/cpu.h>
 #include <machine/frame.h>
 #include <machine/machdep.h>
 #include <machine/pcb.h>
-#include <machine/vmparam.h>
 
 #ifdef KDB
 #include <sys/kdb.h>
@@ -181,7 +181,7 @@ abort_handler(struct trapframe *tf, int type)
 	vm_prot_t ftype;
 	void *onfault;
 	vm_offset_t va;
-	int error = 0;
+	int error = 0, signo, ucode;
 	struct ksig ksig;
 	struct proc *p;
 
@@ -230,6 +230,8 @@ abort_handler(struct trapframe *tf, int type)
 	if (__predict_false(data_aborts[fsr & FAULT_TYPE_MASK].func != NULL)) {
 		if ((data_aborts[fsr & FAULT_TYPE_MASK].func)(tf, fsr, far,
 		    td, &ksig)) {
+			signo = ksig.signb;
+			ucode = ksig.code;
 			goto do_trapsignal;
 		}
 		goto out;
@@ -262,8 +264,8 @@ abort_handler(struct trapframe *tf, int type)
 			 * Give the user an illegal instruction signal.
 			 */
 			/* Deliver a SIGILL to the process */
-			ksig.signb = SIGILL;
-			ksig.code = 0;
+			signo = SIGILL;
+			ucode = 0;
 			goto do_trapsignal;
 		}
 
@@ -299,8 +301,8 @@ abort_handler(struct trapframe *tf, int type)
 			 * but uses USR mode permissions for its accesses.
 			 */
 			user = 1;
-			ksig.signb = SIGSEGV;
-			ksig.code = 0;
+			signo = SIGSEGV;
+			ucode = 0;
 			goto do_trapsignal;
 		}
 	} else {
@@ -350,9 +352,9 @@ abort_handler(struct trapframe *tf, int type)
 
 	onfault = pcb->pcb_onfault;
 	pcb->pcb_onfault = NULL;
-	error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &signo, &ucode);
 	pcb->pcb_onfault = onfault;
-	if (__predict_true(error == 0))
+	if (__predict_true(error == KERN_SUCCESS))
 		goto out;
 fatal_pagefault:
 	if (user == 0) {
@@ -368,18 +370,8 @@ fatal_pagefault:
 	}
 
 
-	if (error == ENOMEM) {
-		printf("VM: pid %d (%s), uid %d killed: "
-		    "out of swap\n", td->td_proc->p_pid, td->td_name,
-		    (td->td_proc->p_ucred) ?
-		     td->td_proc->p_ucred->cr_uid : -1);
-		ksig.signb = SIGKILL;
-	} else {
-		ksig.signb = SIGSEGV;
-	}
-	ksig.code = 0;
 do_trapsignal:
-	call_trapsignal(td, ksig.signb, ksig.code);
+	call_trapsignal(td, signo, ucode);
 out:
 	/* If returning to user mode, make sure to invoke userret() */
 	if (user)
@@ -613,10 +605,9 @@ prefetch_abort_handler(struct trapframe *tf)
 	struct proc * p;
 	struct vm_map *map;
 	vm_offset_t fault_pc, va;
-	int error = 0;
+	int error = 0, signo, ucode;
 	struct ksig ksig;
 
-
 #if 0
 	/* Update vmmeter statistics */
 	uvmexp.traps++;
@@ -652,8 +643,8 @@ prefetch_abort_handler(struct trapframe *tf)
 	/* Ok validate the address, can only execute in USER space */
 	if (__predict_false(fault_pc >= VM_MAXUSER_ADDRESS ||
 	    (fault_pc < VM_MIN_ADDRESS && vector_page == ARM_VECTORS_LOW))) {
-		ksig.signb = SIGSEGV;
-		ksig.code = 0;
+		signo = SIGSEGV;
+		ucode = 0;
 		goto do_trapsignal;
 	}
 
@@ -669,24 +660,13 @@ prefetch_abort_handler(struct trapframe *tf)
 	if (pmap_fault_fixup(map->pmap, va, VM_PROT_READ, 1))
 		goto out;
 
-	error = vm_fault(map, va, VM_PROT_READ | VM_PROT_EXECUTE,
-	    VM_FAULT_NORMAL);
-	if (__predict_true(error == 0))
+	error = vm_fault_trap(map, va, VM_PROT_READ | VM_PROT_EXECUTE,
+	    VM_FAULT_NORMAL, &signo, &ucode);
+	if (__predict_true(error == KERN_SUCCESS))
 		goto out;
 
-	if (error == ENOMEM) {
-		printf("VM: pid %d (%s), uid %d killed: "
-		    "out of swap\n", td->td_proc->p_pid, td->td_name,
-		    (td->td_proc->p_ucred) ?
-		     td->td_proc->p_ucred->cr_uid : -1);
-		ksig.signb = SIGKILL;
-	} else {
-		ksig.signb = SIGSEGV;
-	}
-	ksig.code = 0;
-
 do_trapsignal:
-	call_trapsignal(td, ksig.signb, ksig.code);
+	call_trapsignal(td, signo, ucode);
 
 out:
 	userret(td, tf);

Modified: head/sys/arm/arm/trap-v6.c
==============================================================================
--- head/sys/arm/arm/trap-v6.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/arm/arm/trap-v6.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -287,7 +287,7 @@ abort_handler(struct trapframe *tf, int prefetch)
 	struct vmspace *vm;
 	vm_prot_t ftype;
 	bool usermode;
-	int bp_harden;
+	int bp_harden, ucode;
 #ifdef INVARIANTS
 	void *onfault;
 #endif
@@ -497,7 +497,9 @@ abort_handler(struct trapframe *tf, int prefetch)
 #endif
 
 	/* Fault in the page. */
-	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &ksig.sig,
+	    &ucode);
+	ksig.code = ucode;
 
 #ifdef INVARIANTS
 	pcb->pcb_onfault = onfault;
@@ -518,8 +520,6 @@ nogo:
 		return;
 	}
 
-	ksig.sig = SIGSEGV;
-	ksig.code = (rv == KERN_PROTECTION_FAILURE) ? SEGV_ACCERR : SEGV_MAPERR;
 	ksig.addr = far;
 
 do_trapsignal:

Modified: head/sys/arm64/arm64/trap.c
==============================================================================
--- head/sys/arm64/arm64/trap.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/arm64/arm64/trap.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -155,7 +155,6 @@ data_abort(struct thread *td, struct trapframe *frame,
 	struct proc *p;
 	struct pcb *pcb;
 	vm_prot_t ftype;
-	vm_offset_t va;
 	int error, sig, ucode;
 #ifdef KDB
 	bool handled;
@@ -211,7 +210,6 @@ data_abort(struct thread *td, struct trapframe *frame,
 		panic("data abort in critical section or under mutex");
 	}
 
-	va = trunc_page(far);
 	if (exec)
 		ftype = VM_PROT_EXECUTE;
 	else
@@ -219,14 +217,9 @@ data_abort(struct thread *td, struct trapframe *frame,
 		    VM_PROT_READ | VM_PROT_WRITE;
 
 	/* Fault in the page. */
-	error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	error = vm_fault_trap(map, far, ftype, VM_FAULT_NORMAL, &sig, &ucode);
 	if (error != KERN_SUCCESS) {
 		if (lower) {
-			sig = SIGSEGV;
-			if (error == KERN_PROTECTION_FAILURE)
-				ucode = SEGV_ACCERR;
-			else
-				ucode = SEGV_MAPERR;
 			call_trapsignal(td, sig, ucode, (void *)far);
 		} else {
 			if (td->td_intr_nesting_level == 0 &&

Modified: head/sys/i386/i386/trap.c
==============================================================================
--- head/sys/i386/i386/trap.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/i386/i386/trap.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -114,7 +114,7 @@ PMC_SOFT_DEFINE( , , page_fault, write);
 void trap(struct trapframe *frame);
 void syscall(struct trapframe *frame);
 
-static int trap_pfault(struct trapframe *, int, vm_offset_t);
+static int trap_pfault(struct trapframe *, bool, vm_offset_t, int *, int *);
 static void trap_fatal(struct trapframe *, vm_offset_t);
 #ifdef KDTRACE_HOOKS
 static bool trap_user_dtrace(struct trapframe *,
@@ -181,9 +181,6 @@ trap_msg(int trapno)
 int has_f00f_bug = 0;		/* Initialized so that it can be patched. */
 #endif
 
-static int prot_fault_translation = 0;
-SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RW,
-	&prot_fault_translation, 0, "Select signal to deliver on protection fault");
 static int uprintf_signal;
 SYSCTL_INT(_machdep, OID_AUTO, uprintf_signal, CTLFLAG_RW,
     &uprintf_signal, 0,
@@ -202,7 +199,7 @@ trap(struct trapframe *frame)
 	ksiginfo_t ksi;
 	struct thread *td;
 	struct proc *p;
-	int signo, ucode;
+	int pf, signo, ucode;
 	u_int type;
 	register_t addr, dr6;
 	vm_offset_t eva;
@@ -212,9 +209,6 @@ trap(struct trapframe *frame)
 
 	td = curthread;
 	p = td->td_proc;
-	signo = 0;
-	ucode = 0;
-	addr = 0;
 	dr6 = 0;
 
 	VM_CNT_INC(v_trap);
@@ -365,6 +359,7 @@ user_trctrap_out:
 		case T_STKFLT:		/* stack fault */
 			if (frame->tf_eflags & PSL_VM) {
 				signo = vm86_emulate((struct vm86frame *)frame);
+				ucode = 0;	/* XXXKIB: better code ? */
 				if (signo == SIGTRAP) {
 					load_dr6(rdr6() | 0x4000);
 					goto user_trctrap_out;
@@ -395,57 +390,23 @@ user_trctrap_out:
 			break;
 
 		case T_PAGEFLT:		/* page fault */
-			signo = trap_pfault(frame, TRUE, eva);
+			addr = eva;
+			pf = trap_pfault(frame, true, eva, &signo, &ucode);
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
-			if (signo == -2) {
+			if (pf == -2) {
 				/*
 				 * The f00f hack workaround has triggered, so
 				 * treat the fault as an illegal instruction 
 				 * (T_PRIVINFLT) instead of a page fault.
 				 */
 				type = frame->tf_trapno = T_PRIVINFLT;
-
-				/* Proceed as in that case. */
-				ucode = ILL_PRVOPC;
-				signo = SIGILL;
 				break;
 			}
 #endif
-			if (signo == -1)
+			if (pf == -1)
 				return;
-			if (signo == 0)
+			if (pf == 0)
 				goto user;
-
-			if (signo == SIGSEGV)
-				ucode = SEGV_MAPERR;
-			else if (prot_fault_translation == 0) {
-				/*
-				 * Autodetect.  This check also covers
-				 * the images without the ABI-tag ELF
-				 * note.
-				 */
-				if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
-				    p->p_osrel >= P_OSREL_SIGSEGV) {
-					signo = SIGSEGV;
-					ucode = SEGV_ACCERR;
-				} else {
-					signo = SIGBUS;
-					ucode = T_PAGEFLT;
-				}
-			} else if (prot_fault_translation == 1) {
-				/*
-				 * Always compat mode.
-				 */
-				signo = SIGBUS;
-				ucode = T_PAGEFLT;
-			} else {
-				/*
-				 * Always SIGSEGV mode.
-				 */
-				signo = SIGSEGV;
-				ucode = SEGV_ACCERR;
-			}
-			addr = eva;
 			break;
 
 		case T_DIVIDE:		/* integer divide fault */
@@ -517,7 +478,7 @@ user_trctrap_out:
 		    ("kernel trap doesn't have ucred"));
 		switch (type) {
 		case T_PAGEFLT:			/* page fault */
-			(void) trap_pfault(frame, FALSE, eva);
+			(void)trap_pfault(frame, false, eva, NULL, NULL);
 			return;
 
 		case T_DNA:
@@ -769,16 +730,31 @@ user:
 	    ("Return from trap with kernel FPU ctx leaked"));
 }
 
+/*
+ * Handle all details of a page fault.
+ * Returns:
+ * -2 if the fault was caused by triggered workaround for Intel Pentium
+ *    0xf00f bug.
+ * -1 if this fault was fatal, typically from kernel mode
+ *    (cannot happen, but we need to return something).
+ * 0  if this fault was handled by updating either the user or kernel
+ *    page table, execution can continue.
+ * 1  if this fault was from usermode and it was not handled, a synchronous
+ *    signal should be delivered to the thread.  *signo returns the signal
+ *    number, *ucode gives si_code.
+ */
 static int
-trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
+trap_pfault(struct trapframe *frame, bool usermode, vm_offset_t eva,
+    int *signo, int *ucode)
 {
 	struct thread *td;
 	struct proc *p;
-	vm_offset_t va;
 	vm_map_t map;
 	int rv;
 	vm_prot_t ftype;
 
+	MPASS(!usermode || (signo != NULL && ucode != NULL));
+
 	td = curthread;
 	p = td->td_proc;
 
@@ -826,8 +802,7 @@ trap_pfault(struct trapframe *frame, int usermode, vm_
 			return (-1);
 		}
 	}
-	va = trunc_page(eva);
-	if (va >= PMAP_TRM_MIN_ADDRESS) {
+	if (eva >= PMAP_TRM_MIN_ADDRESS) {
 		/*
 		 * Don't allow user-mode faults in kernel address space.
 		 * An exception:  if the faulting address is the invalid
@@ -837,11 +812,17 @@ trap_pfault(struct trapframe *frame, int usermode, vm_
 		 * fault.
 		 */
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
-		if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
+		if ((eva == (unsigned int)&idt[6]) && has_f00f_bug) {
+			*ucode = ILL_PRVOPC;
+			*signo = SIGILL;
 			return (-2);
+		}
 #endif
-		if (usermode)
-			return (SIGSEGV);
+		if (usermode) {
+			*signo = SIGSEGV;
+			*ucode = SEGV_MAPERR;
+			return (1);
+		}
 		trap_fatal(frame, eva);
 		return (-1);
 	} else {
@@ -878,7 +859,7 @@ trap_pfault(struct trapframe *frame, int usermode, vm_
 		ftype = VM_PROT_READ;
 
 	/* Fault in the page. */
-	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode);
 	if (rv == KERN_SUCCESS) {
 #ifdef HWPMC_HOOKS
 		if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
@@ -893,16 +874,15 @@ trap_pfault(struct trapframe *frame, int usermode, vm_
 #endif
 		return (0);
 	}
-	if (!usermode) {
-		if (td->td_intr_nesting_level == 0 &&
-		    curpcb->pcb_onfault != NULL) {
-			frame->tf_eip = (int)curpcb->pcb_onfault;
-			return (0);
-		}
-		trap_fatal(frame, eva);
-		return (-1);
+	if (usermode)
+		return (1);
+	if (td->td_intr_nesting_level == 0 &&
+	    curpcb->pcb_onfault != NULL) {
+		frame->tf_eip = (int)curpcb->pcb_onfault;
+		return (0);
 	}
-	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+	trap_fatal(frame, eva);
+	return (-1);
 }
 
 static void

Modified: head/sys/kern/sys_process.c
==============================================================================
--- head/sys/kern/sys_process.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/kern/sys_process.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -286,7 +286,7 @@ proc_rwmem(struct proc *p, struct uio *uio)
 		/*
 		 * Fault and hold the page on behalf of the process.
 		 */
-		error = vm_fault_hold(map, pageno, reqprot, fault_flags, &m);
+		error = vm_fault(map, pageno, reqprot, fault_flags, &m);
 		if (error != KERN_SUCCESS) {
 			if (error == KERN_RESOURCE_SHORTAGE)
 				error = ENOMEM;

Modified: head/sys/mips/mips/trap.c
==============================================================================
--- head/sys/mips/mips/trap.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/mips/mips/trap.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -669,8 +669,9 @@ trap(struct trapframe *trapframe)
 			int rv;
 
 	kernel_fault:
-			va = trunc_page((vm_offset_t)trapframe->badvaddr);
-			rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL);
+			va = (vm_offset_t)trapframe->badvaddr;
+			rv = vm_fault_trap(kernel_map, va, ftype,
+			    VM_FAULT_NORMAL, NULL, NULL);
 			if (rv == KERN_SUCCESS)
 				return (trapframe->pc);
 			if (td->td_pcb->pcb_onfault != NULL) {
@@ -705,7 +706,7 @@ dofault:
 
 			vm = p->p_vmspace;
 			map = &vm->vm_map;
-			va = trunc_page((vm_offset_t)trapframe->badvaddr);
+			va = (vm_offset_t)trapframe->badvaddr;
 			if (KERNLAND(trapframe->badvaddr)) {
 				/*
 				 * Don't allow user-mode faults in kernel
@@ -714,7 +715,8 @@ dofault:
 				goto nogo;
 			}
 
-			rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+			rv = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL,
+			    &i, &ucode);
 			/*
 			 * XXXDTRACE: add dtrace_doubletrap_func here?
 			 */
@@ -739,11 +741,6 @@ dofault:
 				}
 				goto err;
 			}
-			i = SIGSEGV;
-			if (rv == KERN_PROTECTION_FAILURE)
-				ucode = SEGV_ACCERR;
-			else
-				ucode = SEGV_MAPERR;
 			addr = trapframe->pc;
 
 			msg = "BAD_PAGE_FAULT";

Modified: head/sys/powerpc/powerpc/trap.c
==============================================================================
--- head/sys/powerpc/powerpc/trap.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/powerpc/powerpc/trap.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -87,7 +87,8 @@ __FBSDID("$FreeBSD$");
 static void	trap_fatal(struct trapframe *frame);
 static void	printtrap(u_int vector, struct trapframe *frame, int isfatal,
 		    int user);
-static int	trap_pfault(struct trapframe *frame, int user);
+static bool	trap_pfault(struct trapframe *frame, bool user, int *signo,
+		    int *ucode);
 static int	fix_unaligned(struct thread *td, struct trapframe *frame);
 static int	handle_onfault(struct trapframe *frame);
 static void	syscall(struct trapframe *frame);
@@ -269,9 +270,8 @@ trap(struct trapframe *frame)
 #endif
 		case EXC_DSI:
 		case EXC_ISI:
-			sig = trap_pfault(frame, 1);
-			if (sig == SIGSEGV)
-				ucode = SEGV_MAPERR;
+			if (trap_pfault(frame, true, &sig, &ucode))
+				sig = 0;
 			break;
 
 		case EXC_SC:
@@ -460,7 +460,7 @@ trap(struct trapframe *frame)
 			break;
 #endif
 		case EXC_DSI:
-			if (trap_pfault(frame, 0) == 0)
+			if (trap_pfault(frame, false, NULL, NULL))
  				return;
 			break;
 		case EXC_MCHK:
@@ -718,10 +718,10 @@ syscall(struct trapframe *frame)
 	syscallret(td);
 }
 
-static int
-trap_pfault(struct trapframe *frame, int user)
+static bool
+trap_pfault(struct trapframe *frame, bool user, int *signo, int *ucode)
 {
-	vm_offset_t	eva, va;
+	vm_offset_t	eva;
 	struct		thread *td;
 	struct		proc *p;
 	vm_map_t	map;
@@ -753,28 +753,27 @@ trap_pfault(struct trapframe *frame, int user)
 	} else {
 		rv = pmap_decode_kernel_ptr(eva, &is_user, &eva);
 		if (rv != 0)
-			return (SIGSEGV);
+			return (false);
 
 		if (is_user)
 			map = &p->p_vmspace->vm_map;
 		else
 			map = kernel_map;
 	}
-	va = trunc_page(eva);
 
 	/* Fault in the page. */
-	rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	rv = vm_fault_trap(map, eva, ftype, VM_FAULT_NORMAL, signo, ucode);
 	/*
 	 * XXXDTRACE: add dtrace_doubletrap_func here?
 	 */
 
 	if (rv == KERN_SUCCESS)
-		return (0);
+		return (true);
 
 	if (!user && handle_onfault(frame))
-		return (0);
+		return (true);
 
-	return (SIGSEGV);
+	return (false);
 }
 
 /*

Modified: head/sys/riscv/riscv/trap.c
==============================================================================
--- head/sys/riscv/riscv/trap.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/riscv/riscv/trap.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -217,14 +217,9 @@ data_abort(struct trapframe *frame, int usermode)
 	if (pmap_fault_fixup(map->pmap, va, ftype))
 		goto done;
 
-	error = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+	error = vm_fault_trap(map, va, ftype, VM_FAULT_NORMAL, &sig, &ucode);
 	if (error != KERN_SUCCESS) {
 		if (usermode) {
-			sig = SIGSEGV;
-			if (error == KERN_PROTECTION_FAILURE)
-				ucode = SEGV_ACCERR;
-			else
-				ucode = SEGV_MAPERR;
 			call_trapsignal(td, sig, ucode, (void *)stval);
 		} else {
 			if (pcb->pcb_onfault != 0) {

Modified: head/sys/sparc64/sparc64/trap.c
==============================================================================
--- head/sys/sparc64/sparc64/trap.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/sparc64/sparc64/trap.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -91,7 +91,8 @@ void trap(struct trapframe *tf);
 void syscall(struct trapframe *tf);
 
 static int trap_cecc(void);
-static int trap_pfault(struct thread *td, struct trapframe *tf);
+static bool trap_pfault(struct thread *td, struct trapframe *tf, int *signo,
+    int *ucode);
 
 extern char copy_fault[];
 extern char copy_nofault_begin[];
@@ -287,7 +288,8 @@ trap(struct trapframe *tf)
 			addr = tf->tf_sfar;
 			/* FALLTHROUGH */
 		case T_INSTRUCTION_MISS:
-			sig = trap_pfault(td, tf);
+			if (trap_pfault(td, tf, &sig, &ucode))
+				sig = 0;
 			break;
 		case T_FILL:
 			sig = rwindow_load(td, tf, 2);
@@ -358,7 +360,7 @@ trap(struct trapframe *tf)
 		case T_DATA_MISS:
 		case T_DATA_PROTECTION:
 		case T_INSTRUCTION_MISS:
-			error = trap_pfault(td, tf);
+			error = !trap_pfault(td, tf, &sig, &ucode);
 			break;
 		case T_DATA_EXCEPTION:
 		case T_MEM_ADDRESS_NOT_ALIGNED:
@@ -443,8 +445,8 @@ trap_cecc(void)
 	return (0);
 }
 
-static int
-trap_pfault(struct thread *td, struct trapframe *tf)
+static bool
+trap_pfault(struct thread *td, struct trapframe *tf, int *signo, int *ucode)
 {
 	vm_map_t map;
 	struct proc *p;
@@ -508,27 +510,27 @@ trap_pfault(struct thread *td, struct trapframe *tf)
 	}
 
 	/* Fault in the page. */
-	rv = vm_fault(map, va, prot, VM_FAULT_NORMAL);
+	rv = vm_fault_trap(map, va, prot, VM_FAULT_NORMAL, signo, ucode);
 
 	CTR3(KTR_TRAP, "trap_pfault: return td=%p va=%#lx rv=%d",
 	    td, va, rv);
 	if (rv == KERN_SUCCESS)
-		return (0);
+		return (true);
 	if (ctx != TLB_CTX_KERNEL && (tf->tf_tstate & TSTATE_PRIV) != 0) {
 		if (tf->tf_tpc >= (u_long)fs_nofault_begin &&
 		    tf->tf_tpc <= (u_long)fs_nofault_end) {
 			tf->tf_tpc = (u_long)fs_fault;
 			tf->tf_tnpc = tf->tf_tpc + 4;
-			return (0);
+			return (true);
 		}
 		if (tf->tf_tpc >= (u_long)copy_nofault_begin &&
 		    tf->tf_tpc <= (u_long)copy_nofault_end) {
 			tf->tf_tpc = (u_long)copy_fault;
 			tf->tf_tnpc = tf->tf_tpc + 4;
-			return (0);
+			return (true);
 		}
 	}
-	return ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+	return (false);
 }
 
 /* Maximum number of arguments that can be passed via the out registers. */

Modified: head/sys/sys/signal.h
==============================================================================
--- head/sys/sys/signal.h	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/sys/signal.h	Fri Sep 27 18:43:36 2019	(r352807)
@@ -315,11 +315,13 @@ struct siginfo32 {
 #define BUS_ADRALN	1	/* Invalid address alignment.		*/
 #define BUS_ADRERR	2	/* Nonexistent physical address.	*/
 #define BUS_OBJERR	3	/* Object-specific hardware error.	*/
+#define BUS_OOMERR	100	/* Non-standard: No memory.		*/
 
 /* codes for SIGSEGV */
 #define SEGV_MAPERR	1	/* Address not mapped to object.	*/
 #define SEGV_ACCERR	2	/* Invalid permissions for mapped	*/
 				/* object.				*/
+#define	SEGV_PKUERR	100	/* x86: PKU violation			*/
 
 /* codes for SIGFPE */
 #define FPE_INTOVF	1	/* Integer overflow.			*/

Modified: head/sys/vm/vm_extern.h
==============================================================================
--- head/sys/vm/vm_extern.h	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/vm/vm_extern.h	Fri Sep 27 18:43:36 2019	(r352807)
@@ -85,15 +85,16 @@ void kmeminit(void);
 
 int kernacc(void *, int, int);
 int useracc(void *, int, int);
-int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
+int vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+    int fault_flags, vm_page_t *m_hold);
 void vm_fault_copy_entry(vm_map_t, vm_map_t, vm_map_entry_t, vm_map_entry_t,
     vm_ooffset_t *);
 int vm_fault_disable_pagefaults(void);
 void vm_fault_enable_pagefaults(int save);
-int vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
-    int fault_flags, vm_page_t *m_hold);
 int vm_fault_quick_hold_pages(vm_map_t map, vm_offset_t addr, vm_size_t len,
     vm_prot_t prot, vm_page_t *ma, int max_count);
+int vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+    int fault_flags, int *signo, int *ucode);
 int vm_forkproc(struct thread *, struct proc *, struct thread *,
     struct vmspace *, int);
 void vm_waitproc(struct proc *);

Modified: head/sys/vm/vm_fault.c
==============================================================================
--- head/sys/vm/vm_fault.c	Fri Sep 27 18:39:05 2019	(r352806)
+++ head/sys/vm/vm_fault.c	Fri Sep 27 18:43:36 2019	(r352807)
@@ -90,7 +90,9 @@ __FBSDID("$FreeBSD$");
 #include <sys/refcount.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
+#include <sys/signalvar.h>
 #include <sys/sysctl.h>
+#include <sys/sysent.h>
 #include <sys/vmmeter.h>
 #include <sys/vnode.h>
 #ifdef KTRACE
@@ -520,8 +522,19 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t pro
 	return (KERN_SUCCESS);
 }
 
+static int prot_fault_translation;
+SYSCTL_INT(_machdep, OID_AUTO, prot_fault_translation, CTLFLAG_RWTUN,
+    &prot_fault_translation, 0,
+    "Control signal to deliver on protection fault");
+
+/* compat definition to keep common code for signal translation */
+#define	UCODE_PAGEFLT	12
+#ifdef T_PAGEFLT
+_Static_assert(UCODE_PAGEFLT == T_PAGEFLT, "T_PAGEFLT");
+#endif
+
 /*
- *	vm_fault:
+ *	vm_fault_trap:
  *
  *	Handle a page fault occurring at the given address,
  *	requiring the given permissions, in the map specified.
@@ -538,12 +551,13 @@ vm_fault_populate(struct faultstate *fs, vm_prot_t pro
  *	Caller may hold no locks.
  */
 int
-vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
-    int fault_flags)
+vm_fault_trap(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+    int fault_flags, int *signo, int *ucode)
 {
 	struct thread *td;
 	int result;
 
+	MPASS(signo == NULL || ucode != NULL);
 	td = curthread;
 	if ((td->td_pflags & TDP_NOFAULTING) != 0)
 		return (KERN_PROTECTION_FAILURE);
@@ -551,17 +565,69 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fa
 	if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
 		ktrfault(vaddr, fault_type);
 #endif
-	result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
+	result = vm_fault(map, trunc_page(vaddr), fault_type, fault_flags,
 	    NULL);
+	KASSERT(result == KERN_SUCCESS || result == KERN_FAILURE ||
+	    result == KERN_INVALID_ADDRESS ||
+	    result == KERN_RESOURCE_SHORTAGE ||
+	    result == KERN_PROTECTION_FAILURE ||
+	    result == KERN_OUT_OF_BOUNDS,
+	    ("Unexpected Mach error %d from vm_fault()", result));
 #ifdef KTRACE
 	if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
 		ktrfaultend(result);
 #endif
+	if (result != KERN_SUCCESS && signo != NULL) {
+		switch (result) {
+		case KERN_FAILURE:
+		case KERN_INVALID_ADDRESS:
+			*signo = SIGSEGV;
+			*ucode = SEGV_MAPERR;
+			break;
+		case KERN_RESOURCE_SHORTAGE:
+			*signo = SIGBUS;
+			*ucode = BUS_OOMERR;
+			break;
+		case KERN_OUT_OF_BOUNDS:
+			*signo = SIGBUS;
+			*ucode = BUS_OBJERR;
+			break;
+		case KERN_PROTECTION_FAILURE:
+			if (prot_fault_translation == 0) {
+				/*
+				 * Autodetect.  This check also covers
+				 * the images without the ABI-tag ELF
+				 * note.
+				 */
+				if (SV_CURPROC_ABI() == SV_ABI_FREEBSD &&
+				    curproc->p_osrel >= P_OSREL_SIGSEGV) {
+					*signo = SIGSEGV;
+					*ucode = SEGV_ACCERR;
+				} else {
+					*signo = SIGBUS;
+					*ucode = UCODE_PAGEFLT;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-head mailing list