concept prove patch for ktrace output to all file types
Howard Su
howard0su at gmail.com
Wed Jun 27 16:34:31 UTC 2007
This is a concept prove patch for ktrace. In order to make ktrace write to
stdout, /dev/xxx, fifo file, pipe, I changed ktrace syscall to accepting a
file handler instead of a file name.
I just get a workable kernel patch. And i make as small as possible changes
to userland code. Now you can do something like:
#mkfifo foo
in session A: #kdump -f foo
in session B: #ktrace -f foo ls
Then, you can see a real time output like truss did.
Before moving forward (hacking userland), i want to get some feedback if my
changes in kernel part is the right direction to do. Appreciate your time.
PS: The patch can be clearly apply to today's current.
--
-Howard
-------------- next part --------------
--- sys/kern/kern_exec.c.orig
+++ sys/kern/kern_exec.c
@@ -290,7 +290,7 @@
struct pargs *oldargs = NULL, *newargs = NULL;
struct sigacts *oldsigacts, *newsigacts;
#ifdef KTRACE
- struct vnode *tracevp = NULL;
+ struct file *tracefp = NULL;
struct ucred *tracecred = NULL;
#endif
struct vnode *textvp = NULL;
@@ -566,12 +566,12 @@
setsugid(p);
#ifdef KTRACE
- if (p->p_tracevp != NULL &&
+ if (p->p_tracefp != NULL &&
priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0)) {
mtx_lock(&ktrace_mtx);
p->p_traceflag = 0;
- tracevp = p->p_tracevp;
- p->p_tracevp = NULL;
+ tracefp = p->p_tracefp;
+ p->p_tracefp = NULL;
tracecred = p->p_tracecred;
p->p_tracecred = NULL;
mtx_unlock(&ktrace_mtx);
@@ -736,12 +736,8 @@
if (ndp->ni_vp && error != 0)
vrele(ndp->ni_vp);
#ifdef KTRACE
- if (tracevp != NULL) {
- int tvfslocked;
-
- tvfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
- vrele(tracevp);
- VFS_UNLOCK_GIANT(tvfslocked);
+ if (tracefp != NULL) {
+ fdrop(tracefp, td);
}
if (tracecred != NULL)
crfree(tracecred);
--- sys/kern/kern_exit.c.orig
+++ sys/kern/kern_exit.c
@@ -112,7 +112,7 @@
struct vnode *ttyvp;
struct vnode *vtmp;
#ifdef KTRACE
- struct vnode *tracevp;
+ struct file *tracefp;
struct ucred *tracecred;
#endif
struct plimit *plim;
@@ -368,16 +368,14 @@
ktrprocexit(td);
PROC_LOCK(p);
mtx_lock(&ktrace_mtx);
- tracevp = p->p_tracevp;
- p->p_tracevp = NULL;
+ tracefp = p->p_tracefp;
+ p->p_tracefp = NULL;
tracecred = p->p_tracecred;
p->p_tracecred = NULL;
mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
- if (tracevp != NULL) {
- locked = VFS_LOCK_GIANT(tracevp->v_mount);
- vrele(tracevp);
- VFS_UNLOCK_GIANT(locked);
+ if (tracefp != NULL) {
+ fdrop(tracefp, td);
}
if (tracecred != NULL)
crfree(tracecred);
--- sys/kern/kern_fork.c.orig
+++ sys/kern/kern_fork.c
@@ -44,6 +44,7 @@
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/eventhandler.h>
+#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
@@ -601,11 +602,11 @@
* Copy traceflag and tracefile if enabled.
*/
mtx_lock(&ktrace_mtx);
- KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
+ KASSERT(p2->p_tracefp == NULL, ("new process has a ktrace fp"));
if (p1->p_traceflag & KTRFAC_INHERIT) {
p2->p_traceflag = p1->p_traceflag;
- if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
- VREF(p2->p_tracevp);
+ if ((p2->p_tracefp = p1->p_tracefp) != NULL) {
+ fhold(p2->p_tracefp);
KASSERT(p1->p_tracecred != NULL,
("ktrace vnode with no cred"));
p2->p_tracecred = crhold(p1->p_tracecred);
--- sys/kern/kern_ktrace.c.orig
+++ sys/kern/kern_ktrace.c
@@ -40,6 +40,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/fcntl.h>
+#include <sys/file.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/lock.h>
@@ -134,8 +135,8 @@
static void ktr_freerequest(struct ktr_request *req);
static void ktr_writerequest(struct thread *td, struct ktr_request *req);
static int ktrcanset(struct thread *,struct proc *);
-static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
-static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
+static int ktrsetchildren(struct thread *,struct proc *,int,int,struct file *);
+static int ktrops(struct thread *,struct proc *,int,int,struct file *);
/*
* ktrace itself generates events, such as context switches, which we do not
@@ -573,15 +574,14 @@
register struct ktrace_args *uap;
{
#ifdef KTRACE
- register struct vnode *vp = NULL;
register struct proc *p;
+ struct file *fp = NULL;
struct pgrp *pg;
int facs = uap->facs & ~KTRFAC_ROOT;
int ops = KTROP(uap->ops);
int descend = uap->ops & KTRFLAG_DESCEND;
int nfound, ret = 0;
- int flags, error = 0, vfslocked;
- struct nameidata nd;
+ int error = 0;
struct ucred *cred;
/*
@@ -592,28 +592,11 @@
ktrace_enter(td);
if (ops != KTROP_CLEAR) {
- /*
- * an operation which requires a file argument.
- */
- NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
- uap->fname, td);
- flags = FREAD | FWRITE | O_NOFOLLOW;
- error = vn_open(&nd, &flags, 0, NULL);
+ error = fget(td, uap->fd, &fp);
if (error) {
ktrace_exit(td);
return (error);
}
- vfslocked = NDHASGIANT(&nd);
- NDFREE(&nd, NDF_ONLY_PNBUF);
- vp = nd.ni_vp;
- VOP_UNLOCK(vp, 0, td);
- if (vp->v_type != VREG) {
- (void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
- ktrace_exit(td);
- return (EACCES);
- }
- VFS_UNLOCK_GIANT(vfslocked);
}
/*
* Clear all uses of the tracefile.
@@ -625,12 +608,12 @@
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p);
- if (p->p_tracevp == vp) {
+ if (p->p_tracefp == fp) {
if (ktrcanset(td, p)) {
mtx_lock(&ktrace_mtx);
cred = p->p_tracecred;
p->p_tracecred = NULL;
- p->p_tracevp = NULL;
+ p->p_tracefp = NULL;
p->p_traceflag = 0;
mtx_unlock(&ktrace_mtx);
vrele_count++;
@@ -642,10 +625,8 @@
}
sx_sunlock(&allproc_lock);
if (vrele_count > 0) {
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
while (vrele_count-- > 0)
- vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
+ fdrop(fp, td);
}
goto done;
}
@@ -678,9 +659,9 @@
PROC_UNLOCK(p);
nfound++;
if (descend)
- ret |= ktrsetchildren(td, p, ops, facs, vp);
+ ret |= ktrsetchildren(td, p, ops, facs, fp);
else
- ret |= ktrops(td, p, ops, facs, vp);
+ ret |= ktrops(td, p, ops, facs, fp);
}
if (nfound == 0) {
sx_sunlock(&proctree_lock);
@@ -708,19 +689,14 @@
goto done;
}
if (descend)
- ret |= ktrsetchildren(td, p, ops, facs, vp);
+ ret |= ktrsetchildren(td, p, ops, facs, fp);
else
- ret |= ktrops(td, p, ops, facs, vp);
+ ret |= ktrops(td, p, ops, facs, fp);
}
sx_sunlock(&proctree_lock);
if (!ret)
error = EPERM;
done:
- if (vp != NULL) {
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- (void) vn_close(vp, FWRITE, td->td_ucred, td);
- VFS_UNLOCK_GIANT(vfslocked);
- }
ktrace_exit(td);
return (error);
#else /* !KTRACE */
@@ -766,13 +742,13 @@
#ifdef KTRACE
static int
-ktrops(td, p, ops, facs, vp)
+ktrops(td, p, ops, facs, fp)
struct thread *td;
struct proc *p;
int ops, facs;
- struct vnode *vp;
+ struct file *fp;
{
- struct vnode *tracevp = NULL;
+ struct file *tracefp = NULL;
struct ucred *tracecred = NULL;
PROC_LOCK(p);
@@ -782,13 +758,13 @@
}
mtx_lock(&ktrace_mtx);
if (ops == KTROP_SET) {
- if (p->p_tracevp != vp) {
+ if (p->p_tracefp != fp) {
/*
* if trace file already in use, relinquish below
*/
- tracevp = p->p_tracevp;
- VREF(vp);
- p->p_tracevp = vp;
+ tracefp = p->p_tracefp;
+ fhold(fp);
+ p->p_tracefp = fp;
}
if (p->p_tracecred != td->td_ucred) {
tracecred = p->p_tracecred;
@@ -802,20 +778,16 @@
if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
/* no more tracing */
p->p_traceflag = 0;
- tracevp = p->p_tracevp;
- p->p_tracevp = NULL;
+ tracefp = p->p_tracefp;
+ p->p_tracefp = NULL;
tracecred = p->p_tracecred;
p->p_tracecred = NULL;
}
}
mtx_unlock(&ktrace_mtx);
PROC_UNLOCK(p);
- if (tracevp != NULL) {
- int vfslocked;
-
- vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
- vrele(tracevp);
- VFS_UNLOCK_GIANT(vfslocked);
+ if (tracefp != NULL) {
+ fdrop(tracefp, td);
}
if (tracecred != NULL)
crfree(tracecred);
@@ -824,11 +796,11 @@
}
static int
-ktrsetchildren(td, top, ops, facs, vp)
+ktrsetchildren(td, top, ops, facs, fp)
struct thread *td;
struct proc *top;
int ops, facs;
- struct vnode *vp;
+ struct file *fp;
{
register struct proc *p;
register int ret = 0;
@@ -836,7 +808,7 @@
p = top;
sx_assert(&proctree_lock, SX_LOCKED);
for (;;) {
- ret |= ktrops(td, p, ops, facs, vp);
+ ret |= ktrops(td, p, ops, facs, fp);
/*
* If this process has children, descend to them next,
* otherwise do any siblings, and if done with this level,
@@ -861,14 +833,13 @@
ktr_writerequest(struct thread *td, struct ktr_request *req)
{
struct ktr_header *kth;
- struct vnode *vp;
+ struct file *fp;
struct proc *p;
struct ucred *cred;
struct uio auio;
struct iovec aiov[3];
- struct mount *mp;
int datalen, buflen, vrele_count;
- int error, vfslocked;
+ int error;
/*
* We hold the vnode and credential for use in I/O in case ktrace is
@@ -878,20 +849,20 @@
* the vnode has been closed.
*/
mtx_lock(&ktrace_mtx);
- vp = td->td_proc->p_tracevp;
- if (vp != NULL)
- VREF(vp);
+ fp = td->td_proc->p_tracefp;
+ if (fp != NULL)
+ fhold(fp);
cred = td->td_proc->p_tracecred;
if (cred != NULL)
crhold(cred);
mtx_unlock(&ktrace_mtx);
/*
- * If vp is NULL, the vp has been cleared out from under this
+ * If fp is NULL, the fp has been cleared out from under this
* request, so just drop it. Make sure the credential and vnode are
* in sync: we should have both or neither.
*/
- if (vp == NULL) {
+ if (fp == NULL) {
KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
return;
}
@@ -924,19 +895,11 @@
auio.uio_iovcnt++;
}
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- vn_start_write(vp, &mp, V_WAIT);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
- (void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
#ifdef MAC
- error = mac_check_vnode_write(cred, NOCRED, vp);
+ error = mac_check_vnode_write(cred, NOCRED, fp->v_node);
if (error == 0)
#endif
- error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
- VOP_UNLOCK(vp, 0, td);
- vn_finished_write(mp);
- vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
+ error = fo_write(fp, &auio, cred, 0, td);
if (!error)
return;
/*
@@ -958,9 +921,9 @@
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
PROC_LOCK(p);
- if (p->p_tracevp == vp) {
+ if (p->p_tracefp == fp) {
mtx_lock(&ktrace_mtx);
- p->p_tracevp = NULL;
+ p->p_tracefp = NULL;
p->p_traceflag = 0;
cred = p->p_tracecred;
p->p_tracecred = NULL;
@@ -980,10 +943,8 @@
* them but not yet committed them, as those are per-thread. The
* thread will have to clear it itself on system call return.
*/
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
while (vrele_count-- > 0)
- vrele(vp);
- VFS_UNLOCK_GIANT(vfslocked);
+ fdrop(fp, td);
}
/*
--- sys/kern/kern_proc.c.orig
+++ sys/kern/kern_proc.c
@@ -47,6 +47,7 @@
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
+#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/tty.h>
#include <sys/signalvar.h>
@@ -636,7 +637,8 @@
kp->ki_args = p->p_args;
kp->ki_textvp = p->p_textvp;
#ifdef KTRACE
- kp->ki_tracep = p->p_tracevp;
+ if (p->p_tracefp != NULL)
+ kp->ki_tracep = p->p_tracefp->f_vnode;
mtx_lock(&ktrace_mtx);
kp->ki_traceflag = p->p_traceflag;
mtx_unlock(&ktrace_mtx);
--- sys/kern/syscalls.master.orig
+++ sys/kern/syscalls.master
@@ -117,7 +117,7 @@
43 AUE_GETEGID STD { gid_t getegid(void); }
44 AUE_PROFILE STD { int profil(caddr_t samples, size_t size, \
size_t offset, u_int scale); }
-45 AUE_KTRACE STD { int ktrace(const char *fname, int ops, \
+45 AUE_KTRACE STD { int ktrace(int fd, int ops, \
int facs, int pid); }
46 AUE_SIGACTION COMPAT { int sigaction(int signum, \
struct osigaction *nsa, \
--- sys/kern/systrace_args.c.orig
+++ sys/kern/systrace_args.c
@@ -2,7 +2,7 @@
* System call argument to DTrace register array converstion.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/kern/systrace_args.c,v 1.12 2006/11/11 22:01:25 ru Exp $
+ * $FreeBSD$
* This file is part of the DTrace syscall provider.
*/
@@ -313,7 +313,7 @@
/* ktrace */
case 45: {
struct ktrace_args *p = params;
- uarg[0] = (intptr_t) p->fname; /* const char * */
+ iarg[0] = p->fd; /* int */
iarg[1] = p->ops; /* int */
iarg[2] = p->facs; /* int */
iarg[3] = p->pid; /* int */
--- sys/sys/proc.h.orig
+++ sys/sys/proc.h
@@ -531,7 +531,7 @@
int p_profthreads; /* (c) Num threads in addupc_task. */
volatile int p_exitthreads; /* (j) Number of threads exiting */
int p_traceflag; /* (o) Kernel trace points. */
- struct vnode *p_tracevp; /* (c + o) Trace to vnode. */
+ struct file *p_tracefp; /* (c + o) Trace to vnode. */
struct ucred *p_tracecred; /* (o) Credentials to trace with. */
struct vnode *p_textvp; /* (b) Vnode of executable. */
char p_lock; /* (c) Proclock (prevent swap) count. */
--- sys/sys/sysproto.h.orig
+++ sys/sys/sysproto.h
@@ -2,7 +2,7 @@
* System call prototypes.
*
* DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/sys/sysproto.h,v 1.213 2006/11/11 22:01:24 ru Exp $
+ * $FreeBSD$
* created from FreeBSD: src/sys/kern/syscalls.master,v 1.231 2006/11/03 15:23:14 rrs Exp
*/
@@ -193,7 +193,7 @@
char scale_l_[PADL_(u_int)]; u_int scale; char scale_r_[PADR_(u_int)];
};
struct ktrace_args {
- char fname_l_[PADL_(const char *)]; const char * fname; char fname_r_[PADR_(const char *)];
+ char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
char ops_l_[PADL_(int)]; int ops; char ops_r_[PADR_(int)];
char facs_l_[PADL_(int)]; int facs; char facs_r_[PADR_(int)];
char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)];
--- usr.bin/ktrace/ktrace.c.orig
+++ usr.bin/ktrace/ktrace.c
@@ -137,9 +137,13 @@
pid = 1;
} else
ops |= pidset ? KTROP_CLEAR : KTROP_CLEARFILE;
-
- if (ktrace(tracefile, ops, trpoints, pid) < 0)
+ if ((fd = open(tracefile, O_RDWR,
+ DEFFILEMODE)) < 0)
+ err(1, "%s", tracefile);
+
+ if (ktrace(fd, ops, trpoints, pid) < 0)
err(1, "%s", tracefile);
+ close(fd);
exit(0);
}
@@ -154,23 +158,21 @@
if (!(S_ISREG(sb.st_mode)))
errx(1, "%s not regular file", tracefile);
} else {
- if (unlink(tracefile) == -1 && errno != ENOENT)
- err(1, "unlink %s", tracefile);
- if ((fd = open(tracefile, O_CREAT | O_EXCL | O_WRONLY,
+ if ((fd = open(tracefile, O_CREAT | O_WRONLY | O_APPEND,
DEFFILEMODE)) < 0)
err(1, "%s", tracefile);
}
(void)umask(omask);
- (void)close(fd);
- if (*argv) {
- if (ktrace(tracefile, ops, trpoints, getpid()) < 0)
+ if (*argv) {
+ if (ktrace(fd, ops, trpoints, getpid()) < 0)
err(1, "%s", tracefile);
execvp(argv[0], &argv[0]);
err(1, "exec of '%s' failed", argv[0]);
}
- else if (ktrace(tracefile, ops, trpoints, pid) < 0)
+ else if (ktrace(fd, ops, trpoints, pid) < 0)
err(1, "%s", tracefile);
+ close(fd);
exit(0);
}
More information about the freebsd-current
mailing list