concept prove patch for ktrace output to all file types

Howard Su howard0su at gmail.com
Wed Jun 27 16:34:31 UTC 2007


This is a concept prove patch for ktrace. In order to make ktrace write to
stdout, /dev/xxx, fifo file, pipe, I changed ktrace syscall to accepting a
file handler instead of a file name.

I just get a workable kernel patch. And i make as small as possible changes
to userland code. Now you can do something like:
#mkfifo foo
in session A: #kdump -f foo
in session B: #ktrace -f foo ls

Then, you can see a real time output like truss did.

Before moving forward (hacking userland), i want to get some feedback if my
changes in kernel part is the right direction to do. Appreciate your time.

PS: The patch can be clearly apply to today's current.

-- 
-Howard
-------------- next part --------------
---  sys/kern/kern_exec.c.orig
+++  sys/kern/kern_exec.c
@@ -290,7 +290,7 @@
 	struct pargs *oldargs = NULL, *newargs = NULL;
 	struct sigacts *oldsigacts, *newsigacts;
 #ifdef KTRACE
-	struct vnode *tracevp = NULL;
+	struct file *tracefp = NULL;
 	struct ucred *tracecred = NULL;
 #endif
 	struct vnode *textvp = NULL;
@@ -566,12 +566,12 @@
 		setsugid(p);
 
 #ifdef KTRACE
-		if (p->p_tracevp != NULL &&
+		if (p->p_tracefp != NULL &&
 		    priv_check_cred(oldcred, PRIV_DEBUG_DIFFCRED, 0)) {
 			mtx_lock(&ktrace_mtx);
 			p->p_traceflag = 0;
-			tracevp = p->p_tracevp;
-			p->p_tracevp = NULL;
+			tracefp = p->p_tracefp;
+			p->p_tracefp = NULL;
 			tracecred = p->p_tracecred;
 			p->p_tracecred = NULL;
 			mtx_unlock(&ktrace_mtx);
@@ -736,12 +736,8 @@
 	if (ndp->ni_vp && error != 0)
 		vrele(ndp->ni_vp);
 #ifdef KTRACE
-	if (tracevp != NULL) {
-		int tvfslocked;
-
-		tvfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
-		vrele(tracevp);
-		VFS_UNLOCK_GIANT(tvfslocked);
+	if (tracefp != NULL) {
+		fdrop(tracefp, td);
 	}
 	if (tracecred != NULL)
 		crfree(tracecred);
---  sys/kern/kern_exit.c.orig
+++  sys/kern/kern_exit.c
@@ -112,7 +112,7 @@
 	struct vnode *ttyvp;
 	struct vnode *vtmp;
 #ifdef KTRACE
-	struct vnode *tracevp;
+	struct file *tracefp;
 	struct ucred *tracecred;
 #endif
 	struct plimit *plim;
@@ -368,16 +368,14 @@
 		ktrprocexit(td);
 		PROC_LOCK(p);
 		mtx_lock(&ktrace_mtx);
-		tracevp = p->p_tracevp;
-		p->p_tracevp = NULL;
+		tracefp = p->p_tracefp;
+		p->p_tracefp = NULL;
 		tracecred = p->p_tracecred;
 		p->p_tracecred = NULL;
 		mtx_unlock(&ktrace_mtx);
 		PROC_UNLOCK(p);
-		if (tracevp != NULL) {
-			locked = VFS_LOCK_GIANT(tracevp->v_mount);
-			vrele(tracevp);
-			VFS_UNLOCK_GIANT(locked);
+		if (tracefp != NULL) {
+			fdrop(tracefp, td);
 		}
 		if (tracecred != NULL)
 			crfree(tracecred);
---  sys/kern/kern_fork.c.orig
+++  sys/kern/kern_fork.c
@@ -44,6 +44,7 @@
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/eventhandler.h>
+#include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
@@ -601,11 +602,11 @@
 	 * Copy traceflag and tracefile if enabled.
 	 */
 	mtx_lock(&ktrace_mtx);
-	KASSERT(p2->p_tracevp == NULL, ("new process has a ktrace vnode"));
+	KASSERT(p2->p_tracefp == NULL, ("new process has a ktrace fp"));
 	if (p1->p_traceflag & KTRFAC_INHERIT) {
 		p2->p_traceflag = p1->p_traceflag;
-		if ((p2->p_tracevp = p1->p_tracevp) != NULL) {
-			VREF(p2->p_tracevp);
+		if ((p2->p_tracefp = p1->p_tracefp) != NULL) {
+			fhold(p2->p_tracefp);
 			KASSERT(p1->p_tracecred != NULL,
 			    ("ktrace vnode with no cred"));
 			p2->p_tracecred = crhold(p1->p_tracecred);
---  sys/kern/kern_ktrace.c.orig
+++  sys/kern/kern_ktrace.c
@@ -40,6 +40,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/fcntl.h>
+#include <sys/file.h>
 #include <sys/kernel.h>
 #include <sys/kthread.h>
 #include <sys/lock.h>
@@ -134,8 +135,8 @@
 static void ktr_freerequest(struct ktr_request *req);
 static void ktr_writerequest(struct thread *td, struct ktr_request *req);
 static int ktrcanset(struct thread *,struct proc *);
-static int ktrsetchildren(struct thread *,struct proc *,int,int,struct vnode *);
-static int ktrops(struct thread *,struct proc *,int,int,struct vnode *);
+static int ktrsetchildren(struct thread *,struct proc *,int,int,struct file *);
+static int ktrops(struct thread *,struct proc *,int,int,struct file *);
 
 /*
  * ktrace itself generates events, such as context switches, which we do not
@@ -573,15 +574,14 @@
 	register struct ktrace_args *uap;
 {
 #ifdef KTRACE
-	register struct vnode *vp = NULL;
 	register struct proc *p;
+	struct file *fp = NULL;
 	struct pgrp *pg;
 	int facs = uap->facs & ~KTRFAC_ROOT;
 	int ops = KTROP(uap->ops);
 	int descend = uap->ops & KTRFLAG_DESCEND;
 	int nfound, ret = 0;
-	int flags, error = 0, vfslocked;
-	struct nameidata nd;
+	int error = 0;
 	struct ucred *cred;
 
 	/*
@@ -592,28 +592,11 @@
 
 	ktrace_enter(td);
 	if (ops != KTROP_CLEAR) {
-		/*
-		 * an operation which requires a file argument.
-		 */
-		NDINIT(&nd, LOOKUP, NOFOLLOW | MPSAFE, UIO_USERSPACE,
-		    uap->fname, td);
-		flags = FREAD | FWRITE | O_NOFOLLOW;
-		error = vn_open(&nd, &flags, 0, NULL);
+		error = fget(td, uap->fd, &fp);
 		if (error) {
 			ktrace_exit(td);
 			return (error);
 		}
-		vfslocked = NDHASGIANT(&nd);
-		NDFREE(&nd, NDF_ONLY_PNBUF);
-		vp = nd.ni_vp;
-		VOP_UNLOCK(vp, 0, td);
-		if (vp->v_type != VREG) {
-			(void) vn_close(vp, FREAD|FWRITE, td->td_ucred, td);
-			VFS_UNLOCK_GIANT(vfslocked);
-			ktrace_exit(td);
-			return (EACCES);
-		}
-		VFS_UNLOCK_GIANT(vfslocked);
 	}
 	/*
 	 * Clear all uses of the tracefile.
@@ -625,12 +608,12 @@
 		sx_slock(&allproc_lock);
 		FOREACH_PROC_IN_SYSTEM(p) {
 			PROC_LOCK(p);
-			if (p->p_tracevp == vp) {
+			if (p->p_tracefp == fp) {
 				if (ktrcanset(td, p)) {
 					mtx_lock(&ktrace_mtx);
 					cred = p->p_tracecred;
 					p->p_tracecred = NULL;
-					p->p_tracevp = NULL;
+					p->p_tracefp = NULL;
 					p->p_traceflag = 0;
 					mtx_unlock(&ktrace_mtx);
 					vrele_count++;
@@ -642,10 +625,8 @@
 		}
 		sx_sunlock(&allproc_lock);
 		if (vrele_count > 0) {
-			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 			while (vrele_count-- > 0)
-				vrele(vp);
-			VFS_UNLOCK_GIANT(vfslocked);
+				fdrop(fp, td);
 		}
 		goto done;
 	}
@@ -678,9 +659,9 @@
 			PROC_UNLOCK(p); 
 			nfound++;
 			if (descend)
-				ret |= ktrsetchildren(td, p, ops, facs, vp);
+				ret |= ktrsetchildren(td, p, ops, facs, fp);
 			else
-				ret |= ktrops(td, p, ops, facs, vp);
+				ret |= ktrops(td, p, ops, facs, fp);
 		}
 		if (nfound == 0) {
 			sx_sunlock(&proctree_lock);
@@ -708,19 +689,14 @@
 			goto done;
 		}
 		if (descend)
-			ret |= ktrsetchildren(td, p, ops, facs, vp);
+			ret |= ktrsetchildren(td, p, ops, facs, fp);
 		else
-			ret |= ktrops(td, p, ops, facs, vp);
+			ret |= ktrops(td, p, ops, facs, fp);
 	}
 	sx_sunlock(&proctree_lock);
 	if (!ret)
 		error = EPERM;
 done:
-	if (vp != NULL) {
-		vfslocked = VFS_LOCK_GIANT(vp->v_mount);
-		(void) vn_close(vp, FWRITE, td->td_ucred, td);
-		VFS_UNLOCK_GIANT(vfslocked);
-	}
 	ktrace_exit(td);
 	return (error);
 #else /* !KTRACE */
@@ -766,13 +742,13 @@
 
 #ifdef KTRACE
 static int
-ktrops(td, p, ops, facs, vp)
+ktrops(td, p, ops, facs, fp)
 	struct thread *td;
 	struct proc *p;
 	int ops, facs;
-	struct vnode *vp;
+	struct file *fp;
 {
-	struct vnode *tracevp = NULL;
+	struct file *tracefp = NULL;
 	struct ucred *tracecred = NULL;
 
 	PROC_LOCK(p);
@@ -782,13 +758,13 @@
 	}
 	mtx_lock(&ktrace_mtx);
 	if (ops == KTROP_SET) {
-		if (p->p_tracevp != vp) {
+		if (p->p_tracefp != fp) {
 			/*
 			 * if trace file already in use, relinquish below
 			 */
-			tracevp = p->p_tracevp;
-			VREF(vp);
-			p->p_tracevp = vp;
+			tracefp = p->p_tracefp;
+			fhold(fp);
+			p->p_tracefp = fp;
 		}
 		if (p->p_tracecred != td->td_ucred) {
 			tracecred = p->p_tracecred;
@@ -802,20 +778,16 @@
 		if (((p->p_traceflag &= ~facs) & KTRFAC_MASK) == 0) {
 			/* no more tracing */
 			p->p_traceflag = 0;
-			tracevp = p->p_tracevp;
-			p->p_tracevp = NULL;
+			tracefp = p->p_tracefp;
+			p->p_tracefp = NULL;
 			tracecred = p->p_tracecred;
 			p->p_tracecred = NULL;
 		}
 	}
 	mtx_unlock(&ktrace_mtx);
 	PROC_UNLOCK(p);
-	if (tracevp != NULL) {
-		int vfslocked;
-
-		vfslocked = VFS_LOCK_GIANT(tracevp->v_mount);
-		vrele(tracevp);
-		VFS_UNLOCK_GIANT(vfslocked);
+	if (tracefp != NULL) {
+		fdrop(tracefp, td);
 	}
 	if (tracecred != NULL)
 		crfree(tracecred);
@@ -824,11 +796,11 @@
 }
 
 static int
-ktrsetchildren(td, top, ops, facs, vp)
+ktrsetchildren(td, top, ops, facs, fp)
 	struct thread *td;
 	struct proc *top;
 	int ops, facs;
-	struct vnode *vp;
+	struct file *fp;
 {
 	register struct proc *p;
 	register int ret = 0;
@@ -836,7 +808,7 @@
 	p = top;
 	sx_assert(&proctree_lock, SX_LOCKED);
 	for (;;) {
-		ret |= ktrops(td, p, ops, facs, vp);
+		ret |= ktrops(td, p, ops, facs, fp);
 		/*
 		 * If this process has children, descend to them next,
 		 * otherwise do any siblings, and if done with this level,
@@ -861,14 +833,13 @@
 ktr_writerequest(struct thread *td, struct ktr_request *req)
 {
 	struct ktr_header *kth;
-	struct vnode *vp;
+	struct file *fp;
 	struct proc *p;
 	struct ucred *cred;
 	struct uio auio;
 	struct iovec aiov[3];
-	struct mount *mp;
 	int datalen, buflen, vrele_count;
-	int error, vfslocked;
+	int error;
 
 	/*
 	 * We hold the vnode and credential for use in I/O in case ktrace is
@@ -878,20 +849,20 @@
 	 * the vnode has been closed.
 	 */
 	mtx_lock(&ktrace_mtx);
-	vp = td->td_proc->p_tracevp;
-	if (vp != NULL)
-		VREF(vp);
+	fp = td->td_proc->p_tracefp;
+	if (fp != NULL)
+		fhold(fp);
 	cred = td->td_proc->p_tracecred;
 	if (cred != NULL)
 		crhold(cred);
 	mtx_unlock(&ktrace_mtx);
 
 	/*
-	 * If vp is NULL, the vp has been cleared out from under this
+	 * If fp is NULL, the fp has been cleared out from under this
 	 * request, so just drop it.  Make sure the credential and vnode are
 	 * in sync: we should have both or neither.
 	 */
-	if (vp == NULL) {
+	if (fp == NULL) {
 		KASSERT(cred == NULL, ("ktr_writerequest: cred != NULL"));
 		return;
 	}
@@ -924,19 +895,11 @@
 		auio.uio_iovcnt++;
 	}
 
-	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
-	vn_start_write(vp, &mp, V_WAIT);
-	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
-	(void)VOP_LEASE(vp, td, cred, LEASE_WRITE);
 #ifdef MAC
-	error = mac_check_vnode_write(cred, NOCRED, vp);
+	error = mac_check_vnode_write(cred, NOCRED, fp->v_node);
 	if (error == 0)
 #endif
-		error = VOP_WRITE(vp, &auio, IO_UNIT | IO_APPEND, cred);
-	VOP_UNLOCK(vp, 0, td);
-	vn_finished_write(mp);
-	vrele(vp);
-	VFS_UNLOCK_GIANT(vfslocked);
+		error = fo_write(fp, &auio, cred, 0, td);
 	if (!error)
 		return;
 	/*
@@ -958,9 +921,9 @@
 	sx_slock(&allproc_lock);
 	FOREACH_PROC_IN_SYSTEM(p) {
 		PROC_LOCK(p);
-		if (p->p_tracevp == vp) {
+		if (p->p_tracefp == fp) {
 			mtx_lock(&ktrace_mtx);
-			p->p_tracevp = NULL;
+			p->p_tracefp = NULL;
 			p->p_traceflag = 0;
 			cred = p->p_tracecred;
 			p->p_tracecred = NULL;
@@ -980,10 +943,8 @@
 	 * them but not yet committed them, as those are per-thread.  The
 	 * thread will have to clear it itself on system call return.
 	 */
-	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
 	while (vrele_count-- > 0)
-		vrele(vp);
-	VFS_UNLOCK_GIANT(vfslocked);
+		fdrop(fp, td);
 }
 
 /*
---  sys/kern/kern_proc.c.orig
+++  sys/kern/kern_proc.c
@@ -47,6 +47,7 @@
 #include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
+#include <sys/file.h>
 #include <sys/filedesc.h>
 #include <sys/tty.h>
 #include <sys/signalvar.h>
@@ -636,7 +637,8 @@
 	kp->ki_args = p->p_args;
 	kp->ki_textvp = p->p_textvp;
 #ifdef KTRACE
-	kp->ki_tracep = p->p_tracevp;
+	if (p->p_tracefp != NULL)
+		kp->ki_tracep = p->p_tracefp->f_vnode;
 	mtx_lock(&ktrace_mtx);
 	kp->ki_traceflag = p->p_traceflag;
 	mtx_unlock(&ktrace_mtx);
---  sys/kern/syscalls.master.orig
+++  sys/kern/syscalls.master
@@ -117,7 +117,7 @@
 43	AUE_GETEGID	STD	{ gid_t getegid(void); }
 44	AUE_PROFILE	STD	{ int profil(caddr_t samples, size_t size, \
 				    size_t offset, u_int scale); }
-45	AUE_KTRACE	STD	{ int ktrace(const char *fname, int ops, \
+45	AUE_KTRACE	STD	{ int ktrace(int fd, int ops, \
 				    int facs, int pid); }
 46	AUE_SIGACTION	COMPAT	{ int sigaction(int signum, \
 				    struct osigaction *nsa, \
---  sys/kern/systrace_args.c.orig
+++  sys/kern/systrace_args.c
@@ -2,7 +2,7 @@
  * System call argument to DTrace register array converstion.
  *
  * DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/kern/systrace_args.c,v 1.12 2006/11/11 22:01:25 ru Exp $
+ * $FreeBSD$
  * This file is part of the DTrace syscall provider.
  */
 
@@ -313,7 +313,7 @@
 	/* ktrace */
 	case 45: {
 		struct ktrace_args *p = params;
-		uarg[0] = (intptr_t) p->fname; /* const char * */
+		iarg[0] = p->fd; /* int */
 		iarg[1] = p->ops; /* int */
 		iarg[2] = p->facs; /* int */
 		iarg[3] = p->pid; /* int */
---  sys/sys/proc.h.orig
+++  sys/sys/proc.h
@@ -531,7 +531,7 @@
 	int		p_profthreads;	/* (c) Num threads in addupc_task. */
 	volatile int	p_exitthreads;	/* (j) Number of threads exiting */
 	int		p_traceflag;	/* (o) Kernel trace points. */
-	struct vnode	*p_tracevp;	/* (c + o) Trace to vnode. */
+	struct file	*p_tracefp;	/* (c + o) Trace to vnode. */
 	struct ucred	*p_tracecred;	/* (o) Credentials to trace with. */
 	struct vnode	*p_textvp;	/* (b) Vnode of executable. */
 	char		p_lock;		/* (c) Proclock (prevent swap) count. */
---  sys/sys/sysproto.h.orig
+++  sys/sys/sysproto.h
@@ -2,7 +2,7 @@
  * System call prototypes.
  *
  * DO NOT EDIT-- this file is automatically generated.
- * $FreeBSD: src/sys/sys/sysproto.h,v 1.213 2006/11/11 22:01:24 ru Exp $
+ * $FreeBSD$
  * created from FreeBSD: src/sys/kern/syscalls.master,v 1.231 2006/11/03 15:23:14 rrs Exp 
  */
 
@@ -193,7 +193,7 @@
 	char scale_l_[PADL_(u_int)]; u_int scale; char scale_r_[PADR_(u_int)];
 };
 struct ktrace_args {
-	char fname_l_[PADL_(const char *)]; const char * fname; char fname_r_[PADR_(const char *)];
+	char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)];
 	char ops_l_[PADL_(int)]; int ops; char ops_r_[PADR_(int)];
 	char facs_l_[PADL_(int)]; int facs; char facs_r_[PADR_(int)];
 	char pid_l_[PADL_(int)]; int pid; char pid_r_[PADR_(int)];
---  usr.bin/ktrace/ktrace.c.orig
+++  usr.bin/ktrace/ktrace.c
@@ -137,9 +137,13 @@
 			pid = 1;
 		} else
 			ops |= pidset ? KTROP_CLEAR : KTROP_CLEARFILE;
-
-		if (ktrace(tracefile, ops, trpoints, pid) < 0)
+		if ((fd = open(tracefile, O_RDWR, 
+                    DEFFILEMODE)) < 0)
+                        err(1, "%s", tracefile);
+		
+		if (ktrace(fd, ops, trpoints, pid) < 0)
 			err(1, "%s", tracefile);
+		close(fd);
 		exit(0);
 	}
 
@@ -154,23 +158,21 @@
 		if (!(S_ISREG(sb.st_mode)))
 			errx(1, "%s not regular file", tracefile);
 	} else {
-		if (unlink(tracefile) == -1 && errno != ENOENT)
-			err(1, "unlink %s", tracefile);
-		if ((fd = open(tracefile, O_CREAT | O_EXCL | O_WRONLY,
+		if ((fd = open(tracefile, O_CREAT | O_WRONLY | O_APPEND,
 		    DEFFILEMODE)) < 0)
 			err(1, "%s", tracefile);
 	}
 	(void)umask(omask);
-	(void)close(fd);
 
-	if (*argv) { 
-		if (ktrace(tracefile, ops, trpoints, getpid()) < 0)
+	if (*argv) {
+		if (ktrace(fd, ops, trpoints, getpid()) < 0)
 			err(1, "%s", tracefile);
 		execvp(argv[0], &argv[0]);
 		err(1, "exec of '%s' failed", argv[0]);
 	}
-	else if (ktrace(tracefile, ops, trpoints, pid) < 0)
+	else if (ktrace(fd, ops, trpoints, pid) < 0)
 		err(1, "%s", tracefile);
+	close(fd);
 	exit(0);
 }
 


More information about the freebsd-current mailing list