[PATCH] Add ktrace records for user page faults
John Baldwin
jhb at freebsd.org
Tue May 3 13:59:45 UTC 2011
On Monday, May 02, 2011 4:16:02 pm Kostik Belousov wrote:
> On Mon, May 02, 2011 at 04:02:02PM -0400, John Baldwin wrote:
> > On Monday, May 02, 2011 3:55:55 pm Kostik Belousov wrote:
> > > On Mon, May 02, 2011 at 03:37:19PM -0400, John Baldwin wrote:
> > > > One thing I have found useful is knowing when processes are in the kernel
> > > > instead of in userland. ktrace already provides records for syscall
> > > > entry/exit. The other major source of time spent in the kernel that I've seen
> > > > is page fault handling. To that end, I have a patch that adds ktrace records
> > > > to the beginning and end of VM faults. This gives a pair of records so a user
> > > > can see how long a fault took (similar to how one can see how long a syscall
> > > > takes now). Sample output from kdump is below:
> > > >
> > > > 47565 echo CALL mmap(0x800a87000,0x179000,PROT_READ|
> > > > PROT_WRITE,MAP_PRIVATE|MAP_ANON,0xffffffff,0)
> > > > 47565 echo RET mmap 34370777088/0x800a87000
> > > > 47565 echo PFLT 0x800723000 VM_PROT_EXECUTE
> > > > 47565 echo RET KERN_SUCCESS
> > > > 47565 echo CALL munmap(0x800887000,0x179000)
> > > > 47565 echo RET munmap 0
> > > > 47565 echo PFLT 0x800a00000 VM_PROT_WRITE
> > > > 47565 echo RET KERN_SUCCESS
> > > >
> > > > The patch is available at www.freebsd.org/~jhb/patches/ktrace_fault.patch and
> > > > included below.
> > >
> > > One immediate detail is that trap() truncates the fault address to the
> > > page address, that arguably looses useful information.
> >
> > It is true that it would be nice to have the exact faulting address, though
> > having page granularity has been sufficient for the few times I've actually
> > used the address itself (e.g. I could figure out which page of libstdc++ a
> > fault occurred on and narrow down from there as to which of the routines most
> > likely was executed given what the app was doing at the time). In my case
> > knowing how much time was spent handling a page fault has been useful.
> >
> > Would we have to push this logic out of vm_fault and into every trap() routine
> > to get the original address? That would make the patch quite a bit bigger
> > (touching N MD files vs 1 MI file).
>
> Or do the reverse, making vm_fault() do trunc_page() [if doing this
> change at all].
Hmm, so I have a new version of the patch that is 1) against 9 rather than 8,
and 2) pushes the trunc_page() down into vm_fault(). One caveat here is that
faults on sparc64 and sun4v only have the page address, never a sub-page
address. I haven't tested this, but it gives you an idea of what such a change
would look like if we want to do it:
--- //depot/user/jhb/ktrace/amd64/amd64/trap.c 2011-03-07 20:37:47.000000000 0000
+++ /home/jhb/work/p4/ktrace/amd64/amd64/trap.c 2011-03-07 20:37:47.000000000 0000
@@ -641,16 +641,14 @@
struct trapframe *frame;
int usermode;
{
- vm_offset_t va;
struct vmspace *vm = NULL;
vm_map_t map;
int rv = 0;
vm_prot_t ftype;
struct thread *td = curthread;
struct proc *p = td->td_proc;
- vm_offset_t eva = frame->tf_addr;
+ vm_offset_t va = frame->tf_addr;
- va = trunc_page(eva);
if (va >= VM_MIN_KERNEL_ADDRESS) {
/*
* Don't allow user-mode faults in kernel address space.
@@ -716,7 +714,7 @@
frame->tf_rip = (long)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
- trap_fatal(frame, eva);
+ trap_fatal(frame, va);
return (-1);
}
--- //depot/user/jhb/ktrace/arm/arm/trap.c 2010-06-01 21:27:15.000000000 0000
+++ /home/jhb/work/p4/ktrace/arm/arm/trap.c 2010-06-01 21:27:15.000000000 0000
@@ -343,7 +343,7 @@
break;
}
- va = trunc_page((vm_offset_t)far);
+ va = (vm_offset_t)far;
/*
* It is only a kernel address space fault iff:
@@ -412,8 +412,8 @@
#ifdef DEBUG
last_fault_code = fsr;
#endif
- if (pmap_fault_fixup(vmspace_pmap(td->td_proc->p_vmspace), va, ftype,
- user)) {
+ if (pmap_fault_fixup(vmspace_pmap(td->td_proc->p_vmspace),
+ trunc_page(va), ftype, user)) {
goto out;
}
@@ -704,7 +704,7 @@
struct thread *td;
struct proc * p;
struct vm_map *map;
- vm_offset_t fault_pc, va;
+ vm_offset_t fault_pc;
int error = 0;
struct ksig ksig;
@@ -766,7 +766,6 @@
}
map = &td->td_proc->p_vmspace->vm_map;
- va = trunc_page(fault_pc);
/*
* See if the pmap can handle this fault on its own...
@@ -774,7 +773,7 @@
#ifdef DEBUG
last_fault_code = -1;
#endif
- if (pmap_fault_fixup(map->pmap, va, VM_PROT_READ, 1))
+ if (pmap_fault_fixup(map->pmap, trunc_page(fault_pc), VM_PROT_READ, 1))
goto out;
if (map != kernel_map) {
@@ -783,7 +782,7 @@
PROC_UNLOCK(p);
}
- error = vm_fault(map, va, VM_PROT_READ | VM_PROT_EXECUTE,
+ error = vm_fault(map, fault_pc, VM_PROT_READ | VM_PROT_EXECUTE,
VM_FAULT_NORMAL);
if (map != kernel_map) {
PROC_LOCK(p);
--- //depot/user/jhb/ktrace/i386/i386/trap.c 2011-03-07 20:37:47.000000000 0000
+++ /home/jhb/work/p4/ktrace/i386/i386/trap.c 2011-03-07 20:37:47.000000000 0000
@@ -788,9 +788,8 @@
trap_pfault(frame, usermode, eva)
struct trapframe *frame;
int usermode;
- vm_offset_t eva;
+ vm_offset_t va;
{
- vm_offset_t va;
struct vmspace *vm = NULL;
vm_map_t map;
int rv = 0;
@@ -798,7 +797,6 @@
struct thread *td = curthread;
struct proc *p = td->td_proc;
- va = trunc_page(eva);
if (va >= KERNBASE) {
/*
* Don't allow user-mode faults in kernel address space.
@@ -809,7 +807,7 @@
* fault.
*/
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
- if ((eva == (unsigned int)&idt[6]) && has_f00f_bug)
+ if ((va == (unsigned int)&idt[6]) && has_f00f_bug)
return -2;
#endif
if (usermode)
@@ -875,7 +873,7 @@
frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
- trap_fatal(frame, eva);
+ trap_fatal(frame, va);
return (-1);
}
--- //depot/user/jhb/ktrace/ia64/ia64/trap.c 2010-09-22 15:07:20.000000000 0000
+++ /home/jhb/work/p4/ktrace/ia64/ia64/trap.c 2010-09-22 15:07:20.000000000 0000
@@ -530,7 +530,7 @@
int rv;
rv = 0;
- va = trunc_page(tf->tf_special.ifa);
+ va = tf->tf_special.ifa;
if (va >= VM_MAX_ADDRESS) {
/*
@@ -592,6 +592,7 @@
}
trap_panic(vector, tf);
}
+ /* XXX: ksi_addr should be 'va', 'ucode' should be fault type. */
ucode = va;
sig = (rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV;
break;
--- //depot/user/jhb/ktrace/kern/kern_ktrace.c 2011-03-07 20:37:47.000000000 0000
+++ /home/jhb/work/p4/ktrace/kern/kern_ktrace.c 2011-03-07 20:37:47.000000000 0000
@@ -100,6 +100,8 @@
struct ktr_genio ktr_genio;
struct ktr_psig ktr_psig;
struct ktr_csw ktr_csw;
+ struct ktr_fault ktr_fault;
+ struct ktr_faultend ktr_faultend;
} ktr_data;
STAILQ_ENTRY(ktr_request) ktr_list;
};
@@ -117,6 +119,8 @@
0, /* KTR_SYSCTL */
sizeof(struct ktr_proc_ctor), /* KTR_PROCCTOR */
0, /* KTR_PROCDTOR */
+ sizeof(struct ktr_fault), /* KTR_FAULT */
+ sizeof(struct ktr_faultend), /* KTR_FAULTEND */
};
static STAILQ_HEAD(, ktr_request) ktr_free;
@@ -768,6 +772,38 @@
req->ktr_header.ktr_len = buflen;
ktr_submitrequest(curthread, req);
}
+
+void
+ktrfault(vaddr, type)
+ vm_offset_t vaddr;
+ int type;
+{
+ struct ktr_request *req;
+ struct ktr_fault *kf;
+
+ req = ktr_getrequest(KTR_FAULT);
+ if (req == NULL)
+ return;
+ kf = &req->ktr_data.ktr_fault;
+ kf->vaddr = vaddr;
+ kf->type = type;
+ ktr_enqueuerequest(curthread, req);
+}
+
+void
+ktrfaultend(result)
+ int result;
+{
+ struct ktr_request *req;
+ struct ktr_faultend *kf;
+
+ req = ktr_getrequest(KTR_FAULTEND);
+ if (req == NULL)
+ return;
+ kf = &req->ktr_data.ktr_faultend;
+ kf->result = result;
+ ktr_enqueuerequest(curthread, req);
+}
#endif /* KTRACE */
/* Interface and common routines */
--- //depot/user/jhb/ktrace/mips/mips/trap.c 2011-01-13 18:03:58.000000000 0000
+++ /home/jhb/work/p4/ktrace/mips/mips/trap.c 2011-01-13 18:03:58.000000000 0000
@@ -395,12 +395,11 @@
ftype = (type == T_TLB_ST_MISS) ? VM_PROT_WRITE : VM_PROT_READ;
/* check for kernel address */
if (KERNLAND(trapframe->badvaddr)) {
- vm_offset_t va;
int rv;
kernel_fault:
- va = trunc_page((vm_offset_t)trapframe->badvaddr);
- rv = vm_fault(kernel_map, va, ftype, VM_FAULT_NORMAL);
+ rv = vm_fault(kernel_map, trapframe->badvaddr, ftype,
+ VM_FAULT_NORMAL);
if (rv == KERN_SUCCESS)
return (trapframe->pc);
if (td->td_pcb->pcb_onfault != NULL) {
@@ -436,14 +435,12 @@
ftype = VM_PROT_WRITE;
dofault:
{
- vm_offset_t va;
struct vmspace *vm;
vm_map_t map;
int rv = 0;
vm = p->p_vmspace;
map = &vm->vm_map;
- va = trunc_page((vm_offset_t)trapframe->badvaddr);
if (KERNLAND(trapframe->badvaddr)) {
/*
* Don't allow user-mode faults in kernel
@@ -460,14 +457,15 @@
++p->p_lock;
PROC_UNLOCK(p);
- rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+ rv = vm_fault(map, trapframe->badvaddr, ftype,
+ VM_FAULT_NORMAL);
PROC_LOCK(p);
--p->p_lock;
PROC_UNLOCK(p);
#ifdef VMFAULT_TRACE
- printf("vm_fault(%p (pmap %p), %p (%p), %x, %d) -> %x at pc %p\n",
- map, &vm->vm_pmap, (void *)va, (void *)(intptr_t)trapframe->badvaddr,
+ printf("vm_fault(%p (pmap %p), %p, %x, %d) -> %x at pc %p\n",
+ map, &vm->vm_pmap, (void *)(intptr_t)trapframe->badvaddr,
ftype, VM_FAULT_NORMAL, rv, (void *)(intptr_t)trapframe->pc);
#endif
@@ -488,6 +486,7 @@
}
ucode = ftype;
i = ((rv == KERN_PROTECTION_FAILURE) ? SIGBUS : SIGSEGV);
+ /* XXX: Should be badvaddr */
addr = trapframe->pc;
msg = "BAD_PAGE_FAULT";
--- //depot/user/jhb/ktrace/powerpc/aim/trap.c 2011-03-07 20:37:47.000000000 0000
+++ /home/jhb/work/p4/ktrace/powerpc/aim/trap.c 2011-03-07 20:37:47.000000000 0000
@@ -511,7 +511,7 @@
static int
trap_pfault(struct trapframe *frame, int user)
{
- vm_offset_t eva, va;
+ vm_offset_t eva;
struct thread *td;
struct proc *p;
vm_map_t map;
@@ -550,7 +550,6 @@
map = kernel_map;
}
}
- va = trunc_page(eva);
if (map != kernel_map) {
/*
@@ -562,7 +561,7 @@
PROC_UNLOCK(p);
/* Fault in the user page: */
- rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+ rv = vm_fault(map, eva, ftype, VM_FAULT_NORMAL);
PROC_LOCK(p);
--p->p_lock;
@@ -572,7 +571,7 @@
* Don't have to worry about process locking or stacks in the
* kernel.
*/
- rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+ rv = vm_fault(map, eva, ftype, VM_FAULT_NORMAL);
}
if (rv == KERN_SUCCESS)
--- //depot/user/jhb/ktrace/powerpc/booke/trap.c 2010-09-22 15:07:20.000000000 0000
+++ /home/jhb/work/p4/ktrace/powerpc/booke/trap.c 2010-09-22 15:07:20.000000000 0000
@@ -392,7 +392,7 @@
static int
trap_pfault(struct trapframe *frame, int user)
{
- vm_offset_t eva, va;
+ vm_offset_t eva;
struct thread *td;
struct proc *p;
vm_map_t map;
@@ -429,7 +429,6 @@
map = kernel_map;
}
}
- va = trunc_page(eva);
if (map != kernel_map) {
/*
@@ -441,7 +440,7 @@
PROC_UNLOCK(p);
/* Fault in the user page: */
- rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+ rv = vm_fault(map, eva, ftype, VM_FAULT_NORMAL);
PROC_LOCK(p);
--p->p_lock;
@@ -451,7 +450,7 @@
* Don't have to worry about process locking or stacks in the
* kernel.
*/
- rv = vm_fault(map, va, ftype, VM_FAULT_NORMAL);
+ rv = vm_fault(map, eva, ftype, VM_FAULT_NORMAL);
}
if (rv == KERN_SUCCESS)
--- //depot/user/jhb/ktrace/sys/ktrace.h 2011-03-07 20:37:47.000000000 0000
+++ /home/jhb/work/p4/ktrace/sys/ktrace.h 2011-03-07 20:37:47.000000000 0000
@@ -178,6 +178,23 @@
#define KTR_PROCDTOR 11
/*
+ * KTR_FAULT - page fault record
+ */
+#define KTR_FAULT 12
+struct ktr_fault {
+ vm_offset_t vaddr;
+ int type;
+};
+
+/*
+ * KTR_FAULTEND - end of page fault record
+ */
+#define KTR_FAULTEND 13
+struct ktr_faultend {
+ int result;
+};
+
+/*
* KTR_DROP - If this bit is set in ktr_type, then at least one event
* between the previous record and this record was dropped.
*/
@@ -198,6 +215,8 @@
#define KTRFAC_SYSCTL (1<<KTR_SYSCTL)
#define KTRFAC_PROCCTOR (1<<KTR_PROCCTOR)
#define KTRFAC_PROCDTOR (1<<KTR_PROCDTOR)
+#define KTRFAC_FAULT (1<<KTR_FAULT)
+#define KTRFAC_FAULTEND (1<<KTR_FAULTEND)
/*
* trace flags (also in p_traceflags)
@@ -210,6 +229,8 @@
void ktrnamei(char *);
void ktrcsw(int, int);
void ktrpsig(int, sig_t, sigset_t *, int);
+void ktrfault(vm_offset_t, int);
+void ktrfaultend(int);
void ktrgenio(int, enum uio_rw, struct uio *, int);
void ktrsyscall(int, int narg, register_t args[]);
void ktrsysctl(int *name, u_int namelen);
--- //depot/user/jhb/ktrace/vm/vm_fault.c 2011-03-07 20:37:47.000000000 0000
+++ /home/jhb/work/p4/ktrace/vm/vm_fault.c 2011-03-07 20:37:47.000000000 0000
@@ -74,6 +74,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD: src/sys/vm/vm_fault.c,v 1.288 2011/01/15 19:21:28 alc Exp $");
+#include "opt_ktrace.h"
#include "opt_vm.h"
#include <sys/param.h>
@@ -86,6 +87,9 @@
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
+#ifdef KTRACE
+#include <sys/ktrace.h>
+#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -208,8 +212,23 @@
vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int fault_flags)
{
+#ifdef KTRACE
+ struct thread *td;
+#endif
+ int result;
- return (vm_fault_hold(map, vaddr, fault_type, fault_flags, NULL));
+#ifdef KTRACE
+ td = curthread;
+ if (map != kernel_map && KTRPOINT(td, KTR_FAULT))
+ ktrfault(vaddr, fault_type);
+#endif
+ result = vm_fault_hold(map, trunc_page(vaddr), fault_type, fault_flags,
+ NULL);
+#ifdef KTRACE
+ if (map != kernel_map && KTRPOINT(td, KTR_FAULTEND))
+ ktrfaultend(result);
+#endif
+ return (result);
}
int
--
John Baldwin
More information about the freebsd-arch
mailing list