filedesc_sx patch (20070401a)

Robert Watson rwatson at FreeBSD.org
Sun Apr 1 20:03:04 UTC 2007


Dear all,

The attached patch moves file descriptor locks from being a custom mutex/sleep 
lock implemented using msleep() to an sx lock.  With the new sx lock 
optimizations in place, this is now sensible, avoiding both a custom lock type 
and significantly improving performance.  Kris has reported 2x-4x improvement 
in transactions/sec with MySQL using this patch, as it greatly reduces the 
cost of lock contention during file descriptor lookup for threaded 
applications, and also moves to shared locking to avoid exclusive acquisition 
for read-only operations (the vast majority in most workloads).  Patch is 
below, but you can also download from:

   http://www.watson.org/~robert/freebsd/netperf/20070401a-filedesc-sx.diff

I'm currently waiting for the sx lock changes to settle for a few days before 
committing, so will plan to commit this around Wednesday/Thursday of this week 
(unless serious problems arise).

Robert N M Watson
Computer Laboratory
University of Cambridge

--- //depot/vendor/freebsd/src/sys/compat/linux/linux_file.c	2007/03/29 02:17:34
+++ //depot/user/rwatson/filedesc/src/sys/compat/linux/linux_file.c	2007/04/01 15:10:26
@@ -193,7 +193,7 @@
  linux_at(struct thread *td, int dirfd, char *filename, char **newpath, char **freebuf)
  {
     	struct file *fp;
-	int error = 0;
+	int error = 0, vfslocked;
  	struct vnode *dvp;
  	struct filedesc *fdp = td->td_proc->p_fd;
  	char *fullpath = "unknown";
@@ -207,9 +207,10 @@

  	/* check for AT_FDWCD */
  	if (dirfd == LINUX_AT_FDCWD) {
-	   	FILEDESC_LOCK(fdp);
+	   	FILEDESC_SLOCK(fdp);
  		dvp = fdp->fd_cdir;
-	   	FILEDESC_UNLOCK(fdp);
+		vref(dvp);
+	   	FILEDESC_SUNLOCK(fdp);
  	} else {
  	   	error = fget(td, dirfd, &fp);
  		if (error)
@@ -220,16 +221,28 @@
  		   	fdrop(fp, td);
  			return (ENOTDIR);
  		}
+		vref(dvp);
  		fdrop(fp, td);
  	}

+	/*
+	 * XXXRW: This is bogus, as vn_fullpath() returns only an advisory
+	 * file path, and may fail in several common situations, including
+	 * for file systmes that don't use the name cache, and if the entry
+	 * for the file falls out of the name cache.  We should implement
+	 * openat() in the FreeBSD native system call layer properly (using a
+	 * requested starting directory), and have Linux and other ABIs wrap
+	 * the native implementation.
+	 */
  	error = vn_fullpath(td, dvp, &fullpath, &freepath);
  	if (!error) {
  	   	*newpath = malloc(strlen(fullpath) + strlen(filename) + 2, M_TEMP, M_WAITOK | M_ZERO);
  		*freebuf = freepath;
  		sprintf(*newpath, "%s/%s", fullpath, filename);
  	}
-
+	vfslocked = VFS_LOCK_GIANT(dvp->v_mount);
+	vrele(dvp);
+	VFS_UNLOCK_GIANT(vfslocked);
  	return (error);
  }

--- //depot/vendor/freebsd/src/sys/compat/svr4/svr4_filio.c	2005/01/05 22:36:13
+++ //depot/user/rwatson/filedesc/src/sys/compat/svr4/svr4_filio.c	2007/03/03 22:39:43
@@ -211,15 +211,15 @@

  	switch (cmd) {
  	case SVR4_FIOCLEX:
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return 0;

  	case SVR4_FIONCLEX:
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return 0;

  	case SVR4_FIOGETOWN:
--- //depot/vendor/freebsd/src/sys/dev/streams/streams.c	2006/07/21 20:40:58
+++ //depot/user/rwatson/filedesc/src/sys/dev/streams/streams.c	2007/03/03 22:39:43
@@ -253,12 +253,15 @@
  	   return error;
  	}

-	FILEDESC_LOCK_FAST(fdp);
+	/*
+	 * XXXRW: Should be locking fp?
+	 */
+	FILEDESC_XLOCK(fdp);
  	fp->f_data = so;
  	fp->f_flag = FREAD|FWRITE;
  	fp->f_ops = &svr4_netops;
  	fp->f_type = DTYPE_SOCKET;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	/*
  	 * Allocate a stream structure and attach it to this socket.
--- //depot/vendor/freebsd/src/sys/fs/fdescfs/fdesc_vfsops.c	2006/05/15 19:46:09
+++ //depot/user/rwatson/filedesc/src/sys/fs/fdescfs/fdesc_vfsops.c	2007/03/03 22:39:43
@@ -176,7 +176,7 @@
  	lim = lim_cur(td->td_proc, RLIMIT_NOFILE);
  	PROC_UNLOCK(td->td_proc);
  	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	last = min(fdp->fd_nfiles, lim);
  	freefd = 0;
  	for (i = fdp->fd_freefile; i < last; i++)
@@ -189,7 +189,7 @@
  	 */
  	if (fdp->fd_nfiles < lim)
  		freefd += (lim - fdp->fd_nfiles);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);

  	sbp->f_flags = 0;
  	sbp->f_bsize = DEV_BSIZE;
--- //depot/vendor/freebsd/src/sys/fs/fdescfs/fdesc_vnops.c	2007/03/13 01:54:24
+++ //depot/user/rwatson/filedesc/src/sys/fs/fdescfs/fdesc_vnops.c	2007/03/17 21:03:04
@@ -457,7 +457,7 @@

  	fcnt = i - 2;		/* The first two nodes are `.' and `..' */

-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	while (i < fdp->fd_nfiles + 2 && uio->uio_resid >= UIO_MX) {
  		switch (i) {
  		case 0:	/* `.' */
@@ -473,7 +473,7 @@
  			break;
  		default:
  			if (fdp->fd_ofiles[fcnt] == NULL) {
-				FILEDESC_UNLOCK_FAST(fdp);
+				FILEDESC_SUNLOCK(fdp);
  				goto done;
  			}

@@ -487,15 +487,15 @@
  		/*
  		 * And ship to userland
  		 */
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		error = uiomove(dp, UIO_MX, uio);
  		if (error)
  			goto done;
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_SLOCK(fdp);
  		i++;
  		fcnt++;
  	}
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);

  done:
  	uio->uio_offset = i * UIO_MX;
--- //depot/vendor/freebsd/src/sys/fs/unionfs/union_subr.c	2007/03/13 01:54:24
+++ //depot/user/rwatson/filedesc/src/sys/fs/unionfs/union_subr.c	2007/03/17 21:03:04
@@ -450,9 +450,9 @@
  		}
  		break;
  	default:		/* UNIONFS_TRADITIONAL */
-		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+		FILEDESC_SLOCK(td->td_proc->p_fd);
  		uva->va_mode = 0777 & ~td->td_proc->p_fd->fd_cmask;
-		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+		FILEDESC_SUNLOCK(td->td_proc->p_fd);
  		uva->va_uid = ump->um_uid;
  		uva->va_gid = ump->um_gid;
  		break;
--- //depot/vendor/freebsd/src/sys/kern/kern_descrip.c	2007/03/15 21:21:17
+++ //depot/user/rwatson/filedesc/src/sys/kern/kern_descrip.c	2007/04/01 17:49:49
@@ -211,9 +211,11 @@
  static void
  fdused(struct filedesc *fdp, int fd)
  {
-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+
+	FILEDESC_XLOCK_ASSERT(fdp);
  	KASSERT(!fdisused(fdp, fd),
  	    ("fd already used"));
+
  	fdp->fd_map[NDSLOT(fd)] |= NDBIT(fd);
  	if (fd > fdp->fd_lastfile)
  		fdp->fd_lastfile = fd;
@@ -227,11 +229,13 @@
  static void
  fdunused(struct filedesc *fdp, int fd)
  {
-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+
+	FILEDESC_XLOCK_ASSERT(fdp);
  	KASSERT(fdisused(fdp, fd),
  	    ("fd is already unused"));
  	KASSERT(fdp->fd_ofiles[fd] == NULL,
  	    ("fd is still in use"));
+
  	fdp->fd_map[NDSLOT(fd)] &= ~NDBIT(fd);
  	if (fd < fdp->fd_freefile)
  		fdp->fd_freefile = fd;
@@ -371,10 +375,14 @@
  	flg = F_POSIX;
  	p = td->td_proc;
  	fdp = p->p_fd;
-	FILEDESC_LOCK(fdp);
+
+	/*
+	 * XXXRW: It could be an exclusive lock is not [always] needed here.
+	 */
+	FILEDESC_XLOCK(fdp);
  	if ((unsigned)fd >= fdp->fd_nfiles ||
  	    (fp = fdp->fd_ofiles[fd]) == NULL) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		error = EBADF;
  		goto done2;
  	}
@@ -383,7 +391,7 @@
  	switch (cmd) {
  	case F_DUPFD:
  		/* mtx_assert(&Giant, MA_NOTOWNED); */
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		newmin = arg;
  		PROC_LOCK(p);
  		if (newmin >= lim_cur(p, RLIMIT_NOFILE) ||
@@ -399,14 +407,14 @@
  	case F_GETFD:
  		/* mtx_assert(&Giant, MA_NOTOWNED); */
  		td->td_retval[0] = (*pop & UF_EXCLOSE) ? FD_CLOEXEC : 0;
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		break;

  	case F_SETFD:
  		/* mtx_assert(&Giant, MA_NOTOWNED); */
  		*pop = (*pop &~ UF_EXCLOSE) |
  		    (arg & FD_CLOEXEC ? UF_EXCLOSE : 0);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		break;

  	case F_GETFL:
@@ -414,7 +422,7 @@
  		FILE_LOCK(fp);
  		td->td_retval[0] = OFLAGS(fp->f_flag);
  		FILE_UNLOCK(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		break;

  	case F_SETFL:
@@ -424,7 +432,7 @@
  		fp->f_flag &= ~FCNTLFLAGS;
  		fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
  		FILE_UNLOCK(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		tmp = fp->f_flag & FNONBLOCK;
  		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
  		if (error) {
@@ -448,7 +456,7 @@
  	case F_GETOWN:
  		mtx_assert(&Giant, MA_OWNED);
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		error = fo_ioctl(fp, FIOGETOWN, &tmp, td->td_ucred, td);
  		if (error == 0)
  			td->td_retval[0] = tmp;
@@ -458,7 +466,7 @@
  	case F_SETOWN:
  		mtx_assert(&Giant, MA_OWNED);
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		tmp = arg;
  		error = fo_ioctl(fp, FIOSETOWN, &tmp, td->td_ucred, td);
  		fdrop(fp, td);
@@ -472,7 +480,7 @@
  	case F_SETLK:
  		mtx_assert(&Giant, MA_OWNED);
  		if (fp->f_type != DTYPE_VNODE) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			error = EBADF;
  			break;
  		}
@@ -482,7 +490,7 @@
  			if (fp->f_offset < 0 ||
  			    (flp->l_start > 0 &&
  			     fp->f_offset > OFF_MAX - flp->l_start)) {
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				error = EOVERFLOW;
  				break;
  			}
@@ -493,7 +501,7 @@
  		 * VOP_ADVLOCK() may block.
  		 */
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		vp = fp->f_vnode;

  		switch (flp->l_type) {
@@ -528,10 +536,10 @@
  			break;
  		}
  		/* Check for race with close */
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		if ((unsigned) fd >= fdp->fd_nfiles ||
  		    fp != fdp->fd_ofiles[fd]) {
-			FILEDESC_UNLOCK_FAST(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			flp->l_whence = SEEK_SET;
  			flp->l_start = 0;
  			flp->l_len = 0;
@@ -539,21 +547,21 @@
  			(void) VOP_ADVLOCK(vp, (caddr_t)p->p_leader,
  					   F_UNLCK, flp, F_POSIX);
  		} else
-			FILEDESC_UNLOCK_FAST(fdp);
+			FILEDESC_XUNLOCK(fdp);
  		fdrop(fp, td);
  		break;

  	case F_GETLK:
  		mtx_assert(&Giant, MA_OWNED);
  		if (fp->f_type != DTYPE_VNODE) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			error = EBADF;
  			break;
  		}
  		flp = (struct flock *)arg;
  		if (flp->l_type != F_RDLCK && flp->l_type != F_WRLCK &&
  		    flp->l_type != F_UNLCK) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			error = EINVAL;
  			break;
  		}
@@ -562,7 +570,7 @@
  			    fp->f_offset > OFF_MAX - flp->l_start) ||
  			    (flp->l_start < 0 &&
  			     fp->f_offset < OFF_MIN - flp->l_start)) {
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				error = EOVERFLOW;
  				break;
  			}
@@ -572,14 +580,14 @@
  		 * VOP_ADVLOCK() may block.
  		 */
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		vp = fp->f_vnode;
  		error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, flp,
  		    F_POSIX);
  		fdrop(fp, td);
  		break;
  	default:
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		error = EINVAL;
  		break;
  	}
@@ -593,7 +601,8 @@
   * Common code for dup, dup2, and fcntl(F_DUPFD).
   */
  static int
-do_dup(struct thread *td, enum dup_type type, int old, int new, register_t *retval)
+do_dup(struct thread *td, enum dup_type type, int old, int new,
+    register_t *retval)
  {
  	struct filedesc *fdp;
  	struct proc *p;
@@ -619,14 +628,14 @@
  	if (new >= maxfd)
  		return (EMFILE);

-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (EBADF);
  	}
  	if (type == DUP_FIXED && old == new) {
  		*retval = new;
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (0);
  	}
  	fp = fdp->fd_ofiles[old];
@@ -646,7 +655,7 @@
  			fdused(fdp, new);
  	} else {
  		if ((error = fdalloc(td, new, &new)) != 0) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			fdrop(fp, td);
  			return (error);
  		}
@@ -661,7 +670,7 @@
  		/* we've allocated a descriptor which we won't use */
  		if (fdp->fd_ofiles[new] == NULL)
  			fdunused(fdp, new);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		fdrop(fp, td);
  		return (EBADF);
  	}
@@ -708,20 +717,20 @@
  		knote_fdclose(td, new);
  		if (delfp->f_type == DTYPE_MQUEUE)
  			mq_fdclose(td, new, delfp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		(void) closef(delfp, td);
  		if (holdleaders) {
-			FILEDESC_LOCK_FAST(fdp);
+			FILEDESC_XLOCK(fdp);
  			fdp->fd_holdleaderscount--;
  			if (fdp->fd_holdleaderscount == 0 &&
  			    fdp->fd_holdleaderswakeup != 0) {
  				fdp->fd_holdleaderswakeup = 0;
  				wakeup(&fdp->fd_holdleaderscount);
  			}
-			FILEDESC_UNLOCK_FAST(fdp);
+			FILEDESC_XUNLOCK(fdp);
  		}
  	} else {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  	}
  	return (0);
  }
@@ -979,10 +988,10 @@

  	AUDIT_SYSCLOSE(td, fd);

-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if ((unsigned)fd >= fdp->fd_nfiles ||
  	    (fp = fdp->fd_ofiles[fd]) == NULL) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (EBADF);
  	}
  	fdp->fd_ofiles[fd] = NULL;
@@ -998,27 +1007,26 @@
  	}

  	/*
-	 * We now hold the fp reference that used to be owned by the descriptor
-	 * array.
-	 * We have to unlock the FILEDESC *AFTER* knote_fdclose to prevent a
-	 * race of the fd getting opened, a knote added, and deleteing a knote
-	 * for the new fd.
+	 * We now hold the fp reference that used to be owned by the
+	 * descriptor array.  We have to unlock the FILEDESC *AFTER*
+	 * knote_fdclose to prevent a race of the fd getting opened, a knote
+	 * added, and deleteing a knote for the new fd.
  	 */
  	knote_fdclose(td, fd);
  	if (fp->f_type == DTYPE_MQUEUE)
  		mq_fdclose(td, fd, fp);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	error = closef(fp, td);
  	if (holdleaders) {
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_holdleaderscount--;
  		if (fdp->fd_holdleaderscount == 0 &&
  		    fdp->fd_holdleaderswakeup != 0) {
  			fdp->fd_holdleaderswakeup = 0;
  			wakeup(&fdp->fd_holdleaderscount);
  		}
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  	}
  	return (error);
  }
@@ -1176,7 +1184,7 @@
  	int nnfiles, onfiles;
  	NDSLOTTYPE *nmap;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_XLOCK_ASSERT(fdp);

  	KASSERT(fdp->fd_nfiles > 0,
  	    ("zero-length file table"));
@@ -1189,7 +1197,7 @@
  		return;

  	/* allocate a new table and (if required) new bitmaps */
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	MALLOC(ntable, struct file **, nnfiles * OFILESIZE,
  	    M_FILEDESC, M_ZERO | M_WAITOK);
  	nfileflags = (char *)&ntable[nnfiles];
@@ -1198,7 +1206,7 @@
  		    M_FILEDESC, M_ZERO | M_WAITOK);
  	else
  		nmap = NULL;
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);

  	/*
  	 * We now have new tables ready to go.  Since we dropped the
@@ -1237,7 +1245,7 @@
  	struct filedesc *fdp = p->p_fd;
  	int fd = -1, maxfd;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_XLOCK_ASSERT(fdp);

  	if (fdp->fd_freefile > minfd)
  		minfd = fdp->fd_freefile; 
@@ -1276,8 +1284,8 @@
  }

  /*
- * Check to see whether n user file descriptors
- * are available to the process p.
+ * Check to see whether n user file descriptors are available to the process
+ * p.
   */
  int
  fdavail(struct thread *td, int n)
@@ -1287,7 +1295,7 @@
  	struct file **fpp;
  	int i, lim, last;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_LOCK_ASSERT(fdp);

  	PROC_LOCK(p);
  	lim = min((int)lim_cur(p, RLIMIT_NOFILE), maxfilesperproc);
@@ -1304,12 +1312,11 @@
  }

  /*
- * Create a new open file structure and allocate
- * a file decriptor for the process that refers to it.
- * We add one reference to the file for the descriptor table
- * and one reference for resultfp. This is to prevent us being
- * preempted and the entry in the descriptor table closed after
- * we release the FILEDESC lock.
+ * Create a new open file structure and allocate a file decriptor for the
+ * process that refers to it.  We add one reference to the file for the
+ * descriptor table and one reference for resultfp. This is to prevent us
+ * being preempted and the entry in the descriptor table closed after we
+ * release the FILEDESC lock.
   */
  int
  falloc(struct thread *td, struct file **resultfp, int *resultfd)
@@ -1350,7 +1357,7 @@
  	fp->f_ops = &badfileops;
  	fp->f_data = NULL;
  	fp->f_vnode = NULL;
-	FILEDESC_LOCK(p->p_fd);
+	FILEDESC_XLOCK(p->p_fd);
  	if ((fq = p->p_fd->fd_ofiles[0])) {
  		LIST_INSERT_AFTER(fq, fp, f_list);
  	} else {
@@ -1358,14 +1365,14 @@
  	}
  	sx_xunlock(&filelist_lock);
  	if ((error = fdalloc(td, 0, &i))) {
-		FILEDESC_UNLOCK(p->p_fd);
+		FILEDESC_XUNLOCK(p->p_fd);
  		fdrop(fp, td);
  		if (resultfp)
  			fdrop(fp, td);
  		return (error);
  	}
  	p->p_fd->fd_ofiles[i] = fp;
-	FILEDESC_UNLOCK(p->p_fd);
+	FILEDESC_XUNLOCK(p->p_fd);
  	if (resultfp)
  		*resultfp = fp;
  	if (resultfd)
@@ -1383,9 +1390,9 @@
  	struct filedesc0 *newfdp;

  	newfdp = malloc(sizeof *newfdp, M_FILEDESC, M_WAITOK | M_ZERO);
-	mtx_init(&newfdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF);
+	FILEDESC_LOCK_INIT(&newfdp->fd_fd);
  	if (fdp != NULL) {
-		FILEDESC_LOCK(fdp);
+		FILEDESC_XLOCK(fdp);
  		newfdp->fd_fd.fd_cdir = fdp->fd_cdir;
  		if (newfdp->fd_fd.fd_cdir)
  			VREF(newfdp->fd_fd.fd_cdir);
@@ -1395,7 +1402,7 @@
  		newfdp->fd_fd.fd_jdir = fdp->fd_jdir;
  		if (newfdp->fd_fd.fd_jdir)
  			VREF(newfdp->fd_fd.fd_jdir);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  	}

  	/* Create the file descriptor table. */
@@ -1434,7 +1441,7 @@
  	if (i > 0)
  		return;

-	mtx_destroy(&fdp->fd_mtx);
+	FILEDESC_LOCK_DESTROY(fdp);
  	FREE(fdp, M_FILEDESC);
  }

@@ -1444,9 +1451,10 @@
  struct filedesc *
  fdshare(struct filedesc *fdp)
  {
-	FILEDESC_LOCK_FAST(fdp);
+
+	FILEDESC_XLOCK(fdp);
  	fdp->fd_refcnt++;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	return (fdp);
  }

@@ -1457,22 +1465,21 @@
  fdunshare(struct proc *p, struct thread *td)
  {

-	FILEDESC_LOCK_FAST(p->p_fd);
+	FILEDESC_XLOCK(p->p_fd);
  	if (p->p_fd->fd_refcnt > 1) {
  		struct filedesc *tmp;

-		FILEDESC_UNLOCK_FAST(p->p_fd);
+		FILEDESC_XUNLOCK(p->p_fd);
  		tmp = fdcopy(p->p_fd);
  		fdfree(td);
  		p->p_fd = tmp;
  	} else
-		FILEDESC_UNLOCK_FAST(p->p_fd);
+		FILEDESC_XUNLOCK(p->p_fd);
  }

  /*
- * Copy a filedesc structure.
- * A NULL pointer in returns a NULL reference, this is to ease callers,
- * not catch errors.
+ * Copy a filedesc structure.  A NULL pointer in returns a NULL reference,
+ * this is to ease callers, not catch errors.
   */
  struct filedesc *
  fdcopy(struct filedesc *fdp)
@@ -1485,13 +1492,13 @@
  		return (NULL);

  	newfdp = fdinit(fdp);
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	while (fdp->fd_lastfile >= newfdp->fd_nfiles) {
-		FILEDESC_UNLOCK_FAST(fdp);
-		FILEDESC_LOCK(newfdp);
+		FILEDESC_SUNLOCK(fdp);
+		FILEDESC_XLOCK(newfdp);
  		fdgrowtable(newfdp, fdp->fd_lastfile + 1);
-		FILEDESC_UNLOCK(newfdp);
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(newfdp);
+		FILEDESC_SLOCK(fdp);
  	}
  	/* copy everything except kqueue descriptors */
  	newfdp->fd_freefile = -1;
@@ -1507,17 +1514,17 @@
  				newfdp->fd_freefile = i;
  		}
  	}
-	FILEDESC_UNLOCK_FAST(fdp);
-	FILEDESC_LOCK(newfdp);
+	FILEDESC_SUNLOCK(fdp);
+	FILEDESC_XLOCK(newfdp);
  	for (i = 0; i <= newfdp->fd_lastfile; ++i)
  		if (newfdp->fd_ofiles[i] != NULL)
  			fdused(newfdp, i);
-	FILEDESC_UNLOCK(newfdp);
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(newfdp);
+	FILEDESC_SLOCK(fdp);
  	if (newfdp->fd_freefile == -1)
  		newfdp->fd_freefile = i;
  	newfdp->fd_cmask = fdp->fd_cmask;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	return (newfdp);
  }

@@ -1543,7 +1550,7 @@
  	/* Check for special need to clear POSIX style locks */
  	fdtol = td->td_proc->p_fdtol;
  	if (fdtol != NULL) {
-		FILEDESC_LOCK(fdp);
+		FILEDESC_XLOCK(fdp);
  		KASSERT(fdtol->fdl_refcount > 0,
  			("filedesc_to_refcount botch: fdl_refcount=%d",
  			 fdtol->fdl_refcount));
@@ -1557,7 +1564,7 @@
  					continue;
  				fp = *fpp;
  				fhold(fp);
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				lf.l_whence = SEEK_SET;
  				lf.l_start = 0;
  				lf.l_len = 0;
@@ -1571,7 +1578,7 @@
  						   &lf,
  						   F_POSIX);
  				VFS_UNLOCK_GIANT(locked);
-				FILEDESC_LOCK(fdp);
+				FILEDESC_XLOCK(fdp);
  				fdrop(fp, td);
  				fpp = fdp->fd_ofiles + i;
  			}
@@ -1585,18 +1592,18 @@
  				 * in a shared file descriptor table.
  				 */
  				fdp->fd_holdleaderswakeup = 1;
-				msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx,
-				       PLOCK, "fdlhold", 0);
+				sx_sleep(&fdp->fd_holdleaderscount,
+				    FILEDESC_LOCK(fdp), PLOCK, "fdlhold", 0);
  				goto retry;
  			}
  			if (fdtol->fdl_holdcount > 0) {
  				/*
-				 * Ensure that fdtol->fdl_leader
-				 * remains valid in closef().
+				 * Ensure that fdtol->fdl_leader remains
+				 * valid in closef().
  				 */
  				fdtol->fdl_wakeup = 1;
-				msleep(fdtol, &fdp->fd_mtx,
-				       PLOCK, "fdlhold", 0);
+				sx_sleep(fdtol, FILEDESC_LOCK(fdp), PLOCK,
+				    "fdlhold", 0);
  				goto retry;
  			}
  		}
@@ -1608,13 +1615,13 @@
  		} else
  			fdtol = NULL;
  		td->td_proc->p_fdtol = NULL;
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		if (fdtol != NULL)
  			FREE(fdtol, M_FILEDESC_TO_LEADER);
  	}
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	i = --fdp->fd_refcnt;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	if (i > 0)
  		return;
  	/*
@@ -1626,7 +1633,7 @@
  		if (*fpp)
  			(void) closef(*fpp, td);
  	}
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);

  	/* XXX This should happen earlier. */
  	mtx_lock(&fdesc_mtx);
@@ -1646,7 +1653,7 @@
  	fdp->fd_rdir = NULL;
  	jdir = fdp->fd_jdir;
  	fdp->fd_jdir = NULL;
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	if (cdir) {
  		locked = VFS_LOCK_GIANT(cdir->v_mount);
@@ -1706,7 +1713,7 @@
  	 * Note: fdp->fd_ofiles may be reallocated out from under us while
  	 * we are blocked in a close.  Be careful!
  	 */
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	for (i = 0; i <= fdp->fd_lastfile; i++) {
  		if (i > 2)
  			break;
@@ -1722,35 +1729,33 @@
  			fdp->fd_ofiles[i] = NULL;
  			fdp->fd_ofileflags[i] = 0;
  			fdunused(fdp, i);
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			(void) closef(fp, td);
-			FILEDESC_LOCK(fdp);
+			FILEDESC_XLOCK(fdp);
  		}
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);
  }

  /*
- * If a specific file object occupies a specific file descriptor,
- * close the file descriptor entry and drop a reference on the file
- * object.  This is a convenience function to handle a subsequent
- * error in a function that calls falloc() that handles the race that
- * another thread might have closed the file descriptor out from under
- * the thread creating the file object.
+ * If a specific file object occupies a specific file descriptor, close the
+ * file descriptor entry and drop a reference on the file object.  This is a
+ * convenience function to handle a subsequent error in a function that calls
+ * falloc() that handles the race that another thread might have closed the
+ * file descriptor out from under the thread creating the file object.
   */
  void
  fdclose(struct filedesc *fdp, struct file *fp, int idx, struct thread *td)
  {

-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (fdp->fd_ofiles[idx] == fp) {
  		fdp->fd_ofiles[idx] = NULL;
  		fdunused(fdp, idx);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		fdrop(fp, td);
-	} else {
-		FILEDESC_UNLOCK(fdp);
-	}
+	} else
+		FILEDESC_XUNLOCK(fdp);
  }

  /*
@@ -1767,7 +1772,7 @@
  	if (fdp == NULL)
  		return;

-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);

  	/*
  	 * We cannot cache fd_ofiles or fd_ofileflags since operations
@@ -1790,12 +1795,12 @@
  			fdunused(fdp, i);
  			if (fp->f_type == DTYPE_MQUEUE)
  				mq_fdclose(td, i, fp);
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			(void) closef(fp, td);
-			FILEDESC_LOCK(fdp);
+			FILEDESC_XLOCK(fdp);
  		}
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);
  }

  /*
@@ -1838,14 +1843,15 @@
  				/*
  				 * Someone may have closed the entry in the
  				 * file descriptor table, so check it hasn't
-				 * changed before dropping the reference count.
+				 * changed before dropping the reference
+				 * count.
  				 */
-				FILEDESC_LOCK(fdp);
+				FILEDESC_XLOCK(fdp);
  				KASSERT(fdp->fd_ofiles[fd] == fp,
  				    ("table not shared, how did it change?"));
  				fdp->fd_ofiles[fd] = NULL;
  				fdunused(fdp, fd);
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				fdrop(fp, td);
  				fdrop(fp, td);
  				break;
@@ -1873,8 +1879,7 @@
  }

  /*
- * Internal form of close.
- * Decrement reference count on file structure.
+ * Internal form of close.  Decrement reference count on file structure.
   * Note: td may be NULL when closing a file that was being passed in a
   * message.
   *
@@ -1917,11 +1922,11 @@
  		fdtol = td->td_proc->p_fdtol;
  		if (fdtol != NULL) {
  			/*
-			 * Handle special case where file descriptor table
-			 * is shared between multiple process leaders.
+			 * Handle special case where file descriptor table is
+			 * shared between multiple process leaders.
  			 */
  			fdp = td->td_proc->p_fd;
-			FILEDESC_LOCK(fdp);
+			FILEDESC_XLOCK(fdp);
  			for (fdtol = fdtol->fdl_next;
  			     fdtol != td->td_proc->p_fdtol;
  			     fdtol = fdtol->fdl_next) {
@@ -1929,7 +1934,7 @@
  				     P_ADVLOCK) == 0)
  					continue;
  				fdtol->fdl_holdcount++;
-				FILEDESC_UNLOCK(fdp);
+				FILEDESC_XUNLOCK(fdp);
  				lf.l_whence = SEEK_SET;
  				lf.l_start = 0;
  				lf.l_len = 0;
@@ -1938,7 +1943,7 @@
  				(void) VOP_ADVLOCK(vp,
  						   (caddr_t)fdtol->fdl_leader,
  						   F_UNLCK, &lf, F_POSIX);
-				FILEDESC_LOCK(fdp);
+				FILEDESC_XLOCK(fdp);
  				fdtol->fdl_holdcount--;
  				if (fdtol->fdl_holdcount == 0 &&
  				    fdtol->fdl_wakeup != 0) {
@@ -1946,7 +1951,7 @@
  					wakeup(fdtol);
  				}
  			}
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  		}
  		VFS_UNLOCK_GIANT(vfslocked);
  	}
@@ -1954,21 +1959,21 @@
  }

  /*
- * Extract the file pointer associated with the specified descriptor for
- * the current user process.
+ * Extract the file pointer associated with the specified descriptor for the
+ * current user process.
   *
   * If the descriptor doesn't exist, EBADF is returned.
   *
- * If the descriptor exists but doesn't match 'flags' then
- * return EBADF for read attempts and EINVAL for write attempts.
+ * If the descriptor exists but doesn't match 'flags' then return EBADF for
+ * read attempts and EINVAL for write attempts.
   *
   * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
- * It should be dropped with fdrop().
- * If it is not set, then the refcount will not be bumped however the
- * thread's filedesc struct will be returned locked (for fgetsock).
+ * It should be dropped with fdrop().  If it is not set, then the refcount
+ * will not be bumped however the thread's filedesc struct will be returned
+ * locked (for fgetsock).
   *
- * If an error occured the non-zero error is returned and *fpp is set to NULL.
- * Otherwise *fpp is set and zero is returned.
+ * If an error occured the non-zero error is returned and *fpp is set to
+ * NULL.  Otherwise *fpp is set and zero is returned.
   */
  static __inline int
  _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
@@ -1979,9 +1984,9 @@
  	*fpp = NULL;
  	if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
  		return (EBADF);
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		return (EBADF);
  	}

@@ -1991,16 +1996,16 @@
  	 * Only one flag, or 0, may be specified.
  	 */
  	if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		return (EBADF);
  	}
  	if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		return (EBADF);
  	}
  	if (hold) {
  		fhold(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  	}
  	*fpp = fp;
  	return (0);
@@ -2028,9 +2033,9 @@
  }

  /*
- * Like fget() but loads the underlying vnode, or returns an error if
- * the descriptor does not represent a vnode.  Note that pipes use vnodes
- * but never have VM objects.  The returned vnode will be vref()d.
+ * Like fget() but loads the underlying vnode, or returns an error if the
+ * descriptor does not represent a vnode.  Note that pipes use vnodes but
+ * never have VM objects.  The returned vnode will be vref()'d.
   *
   * XXX: what about the unused flags ?
   */
@@ -2049,7 +2054,7 @@
  		*vpp = fp->f_vnode;
  		vref(*vpp);
  	}
-	FILEDESC_UNLOCK(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  	return (error);
  }

@@ -2077,15 +2082,15 @@
  #endif

  /*
- * Like fget() but loads the underlying socket, or returns an error if
- * the descriptor does not represent a socket.
+ * Like fget() but loads the underlying socket, or returns an error if the
+ * descriptor does not represent a socket.
   *
- * We bump the ref count on the returned socket.  XXX Also obtain the SX
- * lock in the future.
+ * We bump the ref count on the returned socket.  XXX Also obtain the SX lock
+ * in the future.
   *
   * XXXRW: fgetsock() and fputsock() are deprecated, as consumers should rely
- * on their file descriptor reference to prevent the socket from being
- * freed during use.
+ * on their file descriptor reference to prevent the socket from being free'd
+ * during use.
   */
  int
  fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
@@ -2110,7 +2115,7 @@
  		soref(*spp);
  		SOCK_UNLOCK(*spp);
  	}
-	FILEDESC_UNLOCK(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  	return (error);
  }

@@ -2257,22 +2262,20 @@
  	 * of file descriptors, or the fd to be dup'd has already been
  	 * closed, then reject.
  	 */
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (dfd < 0 || dfd >= fdp->fd_nfiles ||
  	    (wfp = fdp->fd_ofiles[dfd]) == NULL) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (EBADF);
  	}

  	/*
  	 * There are two cases of interest here.
  	 *
-	 * For ENODEV simply dup (dfd) to file descriptor
-	 * (indx) and return.
+	 * For ENODEV simply dup (dfd) to file descriptor (indx) and return.
  	 *
-	 * For ENXIO steal away the file structure from (dfd) and
-	 * store it in (indx).  (dfd) is effectively closed by
-	 * this operation.
+	 * For ENXIO steal away the file structure from (dfd) and store it in
+	 * (indx).  (dfd) is effectively closed by this operation.
  	 *
  	 * Any other error code is just returned.
  	 */
@@ -2285,7 +2288,7 @@
  		FILE_LOCK(wfp);
  		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
  			FILE_UNLOCK(wfp);
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			return (EACCES);
  		}
  		fp = fdp->fd_ofiles[indx];
@@ -2295,7 +2298,7 @@
  			fdused(fdp, indx);
  		fhold_locked(wfp);
  		FILE_UNLOCK(wfp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		if (fp != NULL)
  			/*
  			 * We now own the reference to fp that the ofiles[]
@@ -2316,7 +2319,7 @@
  		fdunused(fdp, dfd);
  		if (fp == NULL)
  			fdused(fdp, indx);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);

  		/*
  		 * We now own the reference to fp that the ofiles[] array
@@ -2327,16 +2330,15 @@
  		return (0);

  	default:
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		return (error);
  	}
  	/* NOTREACHED */
  }

  /*
- * Scan all active processes to see if any of them have a current
- * or root directory of `olddp'. If so, replace them with the new
- * mount point.
+ * Scan all active processes to see if any of them have a current or root
+ * directory of `olddp'. If so, replace them with the new mount point.
   */
  void
  mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
@@ -2353,7 +2355,7 @@
  		if (fdp == NULL)
  			continue;
  		nrele = 0;
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		if (fdp->fd_cdir == olddp) {
  			vref(newdp);
  			fdp->fd_cdir = newdp;
@@ -2364,7 +2366,7 @@
  			fdp->fd_rdir = newdp;
  			nrele++;
  		}
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		fddrop(fdp);
  		while (nrele--)
  			vrele(olddp);
@@ -2391,12 +2393,12 @@
  	fdtol->fdl_wakeup = 0;
  	fdtol->fdl_leader = leader;
  	if (old != NULL) {
-		FILEDESC_LOCK(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdtol->fdl_next = old->fdl_next;
  		fdtol->fdl_prev = old;
  		old->fdl_next = fdtol;
  		fdtol->fdl_next->fdl_prev = fdtol;
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_XUNLOCK(fdp);
  	} else {
  		fdtol->fdl_next = fdtol;
  		fdtol->fdl_prev = fdtol;
@@ -2459,7 +2461,7 @@
  		fdp = fdhold(p);
  		if (fdp == NULL)
  			continue;
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_SLOCK(fdp);
  		for (n = 0; fdp->fd_refcnt > 0 && n < fdp->fd_nfiles; ++n) {
  			if ((fp = fdp->fd_ofiles[n]) == NULL)
  				continue;
@@ -2476,7 +2478,7 @@
  			if (error)
  				break;
  		}
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		fddrop(fdp);
  		if (error)
  			break;
--- //depot/vendor/freebsd/src/sys/kern/kern_event.c	2007/03/04 22:41:05
+++ //depot/user/rwatson/filedesc/src/sys/kern/kern_event.c	2007/03/05 16:48:38
@@ -527,9 +527,9 @@
  	knlist_init(&kq->kq_sel.si_note, &kq->kq_lock, NULL, NULL, NULL);
  	TASK_INIT(&kq->kq_task, 0, kqueue_task, kq);

-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	FILE_LOCK(fp);
  	fp->f_flag = FREAD | FWRITE;
@@ -1493,9 +1493,9 @@

  	KQ_UNLOCK(kq);

-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);

  	knlist_destroy(&kq->kq_sel.si_note);
  	mtx_destroy(&kq->kq_lock);
@@ -1781,9 +1781,9 @@
  }

  /*
- * remove all knotes referencing a specified fd
- * must be called with FILEDESC lock.  This prevents a race where a new fd
- * comes along and occupies the entry and we attach a knote to the fd.
+ * Remove all knotes referencing a specified fd must be called with FILEDESC
+ * lock.  This prevents a race where a new fd comes along and occupies the
+ * entry and we attach a knote to the fd.
   */
  void
  knote_fdclose(struct thread *td, int fd)
@@ -1793,7 +1793,7 @@
  	struct knote *kn;
  	int influx;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_XLOCK_ASSERT(fdp);

  	/*
  	 * We shouldn't have to worry about new kevents appearing on fd
--- //depot/vendor/freebsd/src/sys/kern/kern_fork.c	2007/03/04 22:41:05
+++ //depot/user/rwatson/filedesc/src/sys/kern/kern_fork.c	2007/03/05 16:48:38
@@ -458,9 +458,9 @@
  			 * shared process leaders.
  			 */
  			fdtol = p1->p_fdtol;
-			FILEDESC_LOCK_FAST(p1->p_fd);
+			FILEDESC_XLOCK(p1->p_fd);
  			fdtol->fdl_refcount++;
-			FILEDESC_UNLOCK_FAST(p1->p_fd);
+			FILEDESC_XUNLOCK(p1->p_fd);
  		} else {
  			/*
  			 * Shared file descriptor table, and
--- //depot/vendor/freebsd/src/sys/kern/subr_witness.c	2007/04/01 15:52:48
+++ //depot/user/rwatson/filedesc/src/sys/kern/subr_witness.c	2007/04/01 18:01:27
@@ -281,7 +281,6 @@
  	 * Various mutexes
  	 */
  	{ "Giant", &lock_class_mtx_sleep },
-	{ "filedesc structure", &lock_class_mtx_sleep },
  	{ "pipe mutex", &lock_class_mtx_sleep },
  	{ "sigio lock", &lock_class_mtx_sleep },
  	{ "process group", &lock_class_mtx_sleep },
@@ -294,7 +293,6 @@
  	/*
  	 * Sockets
  	 */
-	{ "filedesc structure", &lock_class_mtx_sleep },
  	{ "accept", &lock_class_mtx_sleep },
  	{ "so_snd", &lock_class_mtx_sleep },
  	{ "so_rcv", &lock_class_mtx_sleep },
--- //depot/vendor/freebsd/src/sys/kern/sys_generic.c	2007/03/05 13:12:17
+++ //depot/user/rwatson/filedesc/src/sys/kern/sys_generic.c	2007/03/05 16:48:38
@@ -568,14 +568,14 @@
  	fdp = td->td_proc->p_fd;
  	switch (com) {
  	case FIONCLEX:
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		goto out;
  	case FIOCLEX:
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_XLOCK(fdp);
  		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		goto out;
  	case FIONBIO:
  		FILE_LOCK(fp);
@@ -658,11 +658,10 @@
  		return (EINVAL);
  	fdp = td->td_proc->p_fd;

-	FILEDESC_LOCK_FAST(fdp);
-
+	FILEDESC_SLOCK(fdp);
  	if (nd > td->td_proc->p_fd->fd_nfiles)
  		nd = td->td_proc->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);

  	/*
  	 * Allocate just enough bits for the non-null fd_sets.  Use the
@@ -809,7 +808,7 @@
  	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
  	struct filedesc *fdp = td->td_proc->p_fd;

-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	for (msk = 0; msk < 3; msk++) {
  		if (ibits[msk] == NULL)
  			continue;
@@ -820,7 +819,7 @@
  				if (!(bits & 1))
  					continue;
  				if ((fp = fget_locked(fdp, fd)) == NULL) {
-					FILEDESC_UNLOCK(fdp);
+					FILEDESC_SUNLOCK(fdp);
  					return (EBADF);
  				}
  				if (fo_poll(fp, flag[msk], td->td_ucred,
@@ -832,7 +831,7 @@
  			}
  		}
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	td->td_retval[0] = n;
  	return (0);
  }
@@ -973,7 +972,7 @@
  	struct file *fp;
  	int n = 0;

-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	for (i = 0; i < nfd; i++, fds++) {
  		if (fds->fd >= fdp->fd_nfiles) {
  			fds->revents = POLLNVAL;
@@ -997,7 +996,7 @@
  			}
  		}
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	td->td_retval[0] = n;
  	return (0);
  }
--- //depot/vendor/freebsd/src/sys/kern/uipc_mqueue.c	2007/03/13 01:54:24
+++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_mqueue.c	2007/03/17 21:03:04
@@ -2013,10 +2013,10 @@
  	fp->f_data = pn;
  	FILE_UNLOCK(fp);

-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (fdp->fd_ofiles[fd] == fp)
  		fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	td->td_retval[0] = fd;
  	fdrop(fp, td);
  	return (0);
@@ -2197,14 +2197,14 @@
  	if (error)
  		return (error);
  again:
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	if (fget_locked(fdp, uap->mqd) != fp) {
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		error = EBADF;
  		goto out;
  	}
  	mtx_lock(&mq->mq_mutex);
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	if (uap->sigev != NULL) {
  		if (mq->mq_notifier != NULL) {
  			error = EBUSY;
@@ -2267,7 +2267,8 @@
  	struct mqueue *mq;

  	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_LOCK_ASSERT(fdp);
+
  	if (fp->f_ops == &mqueueops) {
  		mq = FPTOMQ(fp);
  		mtx_lock(&mq->mq_mutex);
@@ -2295,7 +2296,7 @@
  	int i;

  	fdp = p->p_fd;
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_SLOCK(fdp);
  	for (i = 0; i < fdp->fd_nfiles; ++i) {
  		fp = fget_locked(fdp, i);
  		if (fp != NULL && fp->f_ops == &mqueueops) {
@@ -2305,7 +2306,7 @@
  			mtx_unlock(&mq->mq_mutex);
  		}
  	}
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	KASSERT(LIST_EMPTY(&p->p_mqnotifier), ("mq notifiers left"));
  }

--- //depot/vendor/freebsd/src/sys/kern/uipc_syscalls.c	2007/03/05 13:12:17
+++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_syscalls.c	2007/03/05 16:48:38
@@ -124,7 +124,7 @@
  	if (fdp == NULL)
  		error = EBADF;
  	else {
-		FILEDESC_LOCK_FAST(fdp);
+		FILEDESC_SLOCK(fdp);
  		fp = fget_locked(fdp, fd);
  		if (fp == NULL)
  			error = EBADF;
@@ -137,7 +137,7 @@
  				*fflagp = fp->f_flag;
  			error = 0;
  		}
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_SUNLOCK(fdp);
  	}
  	*fpp = fp;
  	return (error);
@@ -182,12 +182,17 @@
  	if (error) {
  		fdclose(fdp, fp, fd, td);
  	} else {
-		FILEDESC_LOCK_FAST(fdp);
+		/*
+		 * XXXRW: The logic here seems wrong -- shouldn't it be
+		 * locking the file, not the filedesc?  Other threads could
+		 * already have a reference to the socket by now.
+		 */
+		FILEDESC_XLOCK(fdp);
  		fp->f_data = so;	/* already has ref count */
  		fp->f_flag = FREAD|FWRITE;
  		fp->f_ops = &socketops;
  		fp->f_type = DTYPE_SOCKET;
-		FILEDESC_UNLOCK_FAST(fdp);
+		FILEDESC_XUNLOCK(fdp);
  		td->td_retval[0] = fd;
  	}
  	fdrop(fp, td);
--- //depot/vendor/freebsd/src/sys/kern/uipc_usrreq.c	2007/03/12 14:57:57
+++ //depot/user/rwatson/filedesc/src/sys/kern/uipc_usrreq.c	2007/03/17 21:03:04
@@ -1579,10 +1579,10 @@
  				unp_freerights(rp, newfds);
  				goto next;
  			}
-			FILEDESC_LOCK(td->td_proc->p_fd);
+			FILEDESC_XLOCK(td->td_proc->p_fd);
  			/* if the new FD's will not fit free them.  */
  			if (!fdavail(td, newfds)) {
-				FILEDESC_UNLOCK(td->td_proc->p_fd);
+				FILEDESC_XUNLOCK(td->td_proc->p_fd);
  				error = EMSGSIZE;
  				unp_freerights(rp, newfds);
  				goto next;
@@ -1597,7 +1597,7 @@
  			*controlp = sbcreatecontrol(NULL, newlen,
  			    SCM_RIGHTS, SOL_SOCKET);
  			if (*controlp == NULL) {
-				FILEDESC_UNLOCK(td->td_proc->p_fd);
+				FILEDESC_XUNLOCK(td->td_proc->p_fd);
  				error = E2BIG;
  				unp_freerights(rp, newfds);
  				goto next;
@@ -1616,7 +1616,7 @@
  				unp_rights--;
  				*fdp++ = f;
  			}
-			FILEDESC_UNLOCK(td->td_proc->p_fd);
+			FILEDESC_XUNLOCK(td->td_proc->p_fd);
  		} else {
  			/* We can just copy anything else across. */
  			if (error || controlp == NULL)
@@ -1738,23 +1738,24 @@
  			 * files.  If not, reject the entire operation.
  			 */
  			fdp = data;
-			FILEDESC_LOCK(fdescp);
+			FILEDESC_SLOCK(fdescp);
  			for (i = 0; i < oldfds; i++) {
  				fd = *fdp++;
  				if ((unsigned)fd >= fdescp->fd_nfiles ||
  				    fdescp->fd_ofiles[fd] == NULL) {
-					FILEDESC_UNLOCK(fdescp);
+					FILEDESC_SUNLOCK(fdescp);
  					error = EBADF;
  					goto out;
  				}
  				fp = fdescp->fd_ofiles[fd];
  				if (!(fp->f_ops->fo_flags & DFLAG_PASSABLE)) {
-					FILEDESC_UNLOCK(fdescp);
+					FILEDESC_SUNLOCK(fdescp);
  					error = EOPNOTSUPP;
  					goto out;
  				}

  			}
+
  			/*
  			 * Now replace the integer FDs with pointers to
  			 * the associated global file table entry..
@@ -1763,7 +1764,7 @@
  			*controlp = sbcreatecontrol(NULL, newlen,
  			    SCM_RIGHTS, SOL_SOCKET);
  			if (*controlp == NULL) {
-				FILEDESC_UNLOCK(fdescp);
+				FILEDESC_SUNLOCK(fdescp);
  				error = E2BIG;
  				goto out;
  			}
@@ -1780,7 +1781,7 @@
  				FILE_UNLOCK(fp);
  				unp_rights++;
  			}
-			FILEDESC_UNLOCK(fdescp);
+			FILEDESC_SUNLOCK(fdescp);
  			break;

  		case SCM_TIMESTAMP:
--- //depot/vendor/freebsd/src/sys/kern/vfs_cache.c	2007/03/05 13:12:17
+++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_cache.c	2007/03/05 16:48:38
@@ -717,10 +717,10 @@
  	tmpbuf = malloc(buflen, M_TEMP, M_WAITOK);
  	fdp = td->td_proc->p_fd;
  	mtx_lock(&Giant);
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	error = vn_fullpath1(td, fdp->fd_cdir, fdp->fd_rdir, tmpbuf,
  	    &bp, buflen);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	mtx_unlock(&Giant);

  	if (!error) {
@@ -771,9 +771,9 @@

  	buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
  	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	error = vn_fullpath1(td, vn, fdp->fd_rdir, buf, retbuf, MAXPATHLEN);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);

  	if (!error)
  		*freebuf = buf;
--- //depot/vendor/freebsd/src/sys/kern/vfs_lookup.c	2007/03/31 16:11:57
+++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_lookup.c	2007/04/01 13:10:13
@@ -188,14 +188,14 @@
  	/*
  	 * Get starting point for the translation.
  	 */
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	ndp->ni_rootdir = fdp->fd_rdir;
  	ndp->ni_topdir = fdp->fd_jdir;

  	dp = fdp->fd_cdir;
  	vfslocked = VFS_LOCK_GIANT(dp->v_mount);
  	VREF(dp);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	for (;;) {
  		/*
  		 * Check if root directory should replace current directory.
--- //depot/vendor/freebsd/src/sys/kern/vfs_mount.c	2007/04/01 13:12:37
+++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_mount.c	2007/04/01 13:21:17
@@ -1361,7 +1361,7 @@
  		panic("Cannot find root vnode");

  	p = td->td_proc;
-	FILEDESC_LOCK(p->p_fd);
+	FILEDESC_SLOCK(p->p_fd);

  	if (p->p_fd->fd_cdir != NULL)
  		vrele(p->p_fd->fd_cdir);
@@ -1373,7 +1373,7 @@
  	p->p_fd->fd_rdir = rootvnode;
  	VREF(rootvnode);

-	FILEDESC_UNLOCK(p->p_fd);
+	FILEDESC_SUNLOCK(p->p_fd);

  	VOP_UNLOCK(rootvnode, 0, td);
  }
--- //depot/vendor/freebsd/src/sys/kern/vfs_syscalls.c	2007/03/21 19:36:52
+++ //depot/user/rwatson/filedesc/src/sys/kern/vfs_syscalls.c	2007/04/01 13:10:13
@@ -715,10 +715,10 @@
  	}
  	VOP_UNLOCK(vp, 0, td);
  	VFS_UNLOCK_GIANT(vfslocked);
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	vpold = fdp->fd_cdir;
  	fdp->fd_cdir = vp;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	vfslocked = VFS_LOCK_GIANT(vpold->v_mount);
  	vrele(vpold);
  	VFS_UNLOCK_GIANT(vfslocked);
@@ -767,10 +767,10 @@
  	VOP_UNLOCK(nd.ni_vp, 0, td);
  	VFS_UNLOCK_GIANT(vfslocked);
  	NDFREE(&nd, NDF_ONLY_PNBUF);
-	FILEDESC_LOCK_FAST(fdp);
+	FILEDESC_XLOCK(fdp);
  	vp = fdp->fd_cdir;
  	fdp->fd_cdir = nd.ni_vp;
-	FILEDESC_UNLOCK_FAST(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	vfslocked = VFS_LOCK_GIANT(vp->v_mount);
  	vrele(vp);
  	VFS_UNLOCK_GIANT(vfslocked);
@@ -789,7 +789,8 @@
  	struct file *fp;
  	int fd;

-	FILEDESC_LOCK_ASSERT(fdp, MA_OWNED);
+	FILEDESC_LOCK_ASSERT(fdp);
+
  	for (fd = 0; fd < fdp->fd_nfiles ; fd++) {
  		fp = fget_locked(fdp, fd);
  		if (fp == NULL)
@@ -905,12 +906,12 @@

  	VFS_ASSERT_GIANT(vp->v_mount);
  	fdp = td->td_proc->p_fd;
-	FILEDESC_LOCK(fdp);
+	FILEDESC_XLOCK(fdp);
  	if (chroot_allow_open_directories == 0 ||
  	    (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) {
  		error = chroot_refuse_vdir_fds(fdp);
  		if (error) {
-			FILEDESC_UNLOCK(fdp);
+			FILEDESC_XUNLOCK(fdp);
  			return (error);
  		}
  	}
@@ -921,7 +922,7 @@
  		fdp->fd_jdir = vp;
  		VREF(fdp->fd_jdir);
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_XUNLOCK(fdp);
  	vfslocked = VFS_LOCK_GIANT(oldvp->v_mount);
  	vrele(oldvp);
  	VFS_UNLOCK_GIANT(vfslocked);
@@ -1030,18 +1031,18 @@
  	 *
  	 * Handle the case where someone closed the file (via its file
  	 * descriptor) while we were blocked.  The end result should look
-	 * like opening the file succeeded but it was immediately closed.
-	 * We call vn_close() manually because we haven't yet hooked up
-	 * the various 'struct file' fields.
+	 * like opening the file succeeded but it was immediately closed.  We
+	 * call vn_close() manually because we haven't yet hooked up the
+	 * various 'struct file' fields.
  	 */
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	FILE_LOCK(fp);
  	if (fp->f_count == 1) {
  		mp = vp->v_mount;
  		KASSERT(fdp->fd_ofiles[indx] != fp,
  		    ("Open file descriptor lost all refs"));
  		FILE_UNLOCK(fp);
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		VOP_UNLOCK(vp, 0, td);
  		vn_close(vp, flags & FMASK, fp->f_cred, td);
  		VFS_UNLOCK_GIANT(vfslocked);
@@ -1058,7 +1059,7 @@
  	fp->f_seqcount = 1;
  	fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE);
  	FILE_UNLOCK(fp);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);

  	VOP_UNLOCK(vp, 0, td);
  	if (flags & (O_EXLOCK | O_SHLOCK)) {
@@ -1206,10 +1207,10 @@
  		return (EEXIST);
  	} else {
  		VATTR_NULL(&vattr);
-		FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+		FILEDESC_SLOCK(td->td_proc->p_fd);
  		vattr.va_mode = (mode & ALLPERMS) &
  		    ~td->td_proc->p_fd->fd_cmask;
-		FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+		FILEDESC_SUNLOCK(td->td_proc->p_fd);
  		vattr.va_rdev = dev;
  		whiteout = 0;

@@ -1319,9 +1320,9 @@
  	}
  	VATTR_NULL(&vattr);
  	vattr.va_type = VFIFO;
-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SLOCK(td->td_proc->p_fd);
  	vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  #ifdef MAC
  	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  	    &vattr);
@@ -1534,9 +1535,9 @@
  		goto restart;
  	}
  	VATTR_NULL(&vattr);
-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SLOCK(td->td_proc->p_fd);
  	vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  #ifdef MAC
  	vattr.va_type = VLNK;
  	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
@@ -3418,9 +3419,9 @@
  	}
  	VATTR_NULL(&vattr);
  	vattr.va_type = VDIR;
-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SLOCK(td->td_proc->p_fd);
  	vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_SUNLOCK(td->td_proc->p_fd);
  #ifdef MAC
  	error = mac_check_vnode_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd,
  	    &vattr);
@@ -3807,11 +3808,11 @@
  {
  	register struct filedesc *fdp;

-	FILEDESC_LOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_XLOCK(td->td_proc->p_fd);
  	fdp = td->td_proc->p_fd;
  	td->td_retval[0] = fdp->fd_cmask;
  	fdp->fd_cmask = uap->newmask & ALLPERMS;
-	FILEDESC_UNLOCK_FAST(td->td_proc->p_fd);
+	FILEDESC_XUNLOCK(td->td_proc->p_fd);
  	return (0);
  }

@@ -3887,7 +3888,7 @@
  	if (fdp == NULL)
  		error = EBADF;
  	else {
-		FILEDESC_LOCK(fdp);
+		FILEDESC_SLOCK(fdp);
  		if ((u_int)fd >= fdp->fd_nfiles ||
  		    (fp = fdp->fd_ofiles[fd]) == NULL)
  			error = EBADF;
@@ -3898,7 +3899,7 @@
  			fhold(fp);
  			error = 0;
  		}
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  	}
  	*fpp = fp;
  	return (error);
--- //depot/vendor/freebsd/src/sys/netsmb/smb_dev.c	2007/02/09 17:22:48
+++ //depot/user/rwatson/filedesc/src/sys/netsmb/smb_dev.c	2007/03/03 22:39:43
@@ -368,15 +368,15 @@
  {
  	struct file* fp;

-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	if (((u_int)fd) >= fdp->fd_nfiles ||
  	    (fp = fdp->fd_ofiles[fd]) == NULL ||
  	    (fp->f_flag & flag) == 0) {
-		FILEDESC_UNLOCK(fdp);
+		FILEDESC_SUNLOCK(fdp);
  		return (NULL);
  	}
  	fhold(fp);
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	return (fp);
  }

--- //depot/vendor/freebsd/src/sys/security/audit/audit_bsm_klib.c	2006/12/29 12:22:04
+++ //depot/user/rwatson/filedesc/src/sys/security/audit/audit_bsm_klib.c	2007/03/03 22:39:43
@@ -494,7 +494,7 @@
  	fdp = td->td_proc->p_fd;
  	bufp = path;
  	cisr = 0;
-	FILEDESC_LOCK(fdp);
+	FILEDESC_SLOCK(fdp);
  	if (*(path) == '/') {
  		while (*(bufp) == '/')
  			bufp++;			/* Skip leading '/'s. */
@@ -516,7 +516,7 @@
  		vref(vnp);
  		bufp = path;
  	}
-	FILEDESC_UNLOCK(fdp);
+	FILEDESC_SUNLOCK(fdp);
  	if (vnp != NULL) {
  		/*
  		 * XXX: vn_fullpath() on FreeBSD is "less reliable" than
--- //depot/vendor/freebsd/src/sys/sys/filedesc.h	2006/04/07 05:20:46
+++ //depot/user/rwatson/filedesc/src/sys/sys/filedesc.h	2007/04/01 19:46:08
@@ -35,9 +35,9 @@

  #include <sys/queue.h>
  #include <sys/event.h>
+#include <sys/lock.h>
  #include <sys/priority.h>
-#include <sys/_lock.h>
-#include <sys/_mutex.h>
+#include <sys/sx.h>

  #include <machine/_limits.h>

@@ -60,10 +60,7 @@
  	u_short	fd_cmask;		/* mask for file creation */
  	u_short	fd_refcnt;		/* thread reference count */
  	u_short	fd_holdcnt;		/* hold count on structure + mutex */
-
-	struct	mtx fd_mtx;		/* protects members of this struct */
-	int	fd_locked;		/* long lock flag */
-	int	fd_wanted;		/* "" */
+	struct	sx fd_sx;		/* protects members of this struct */
  	struct	kqlist fd_kqlist;	/* list of kqueues on this filedesc */
  	int	fd_holdleaderscount;	/* block fdfree() for shared close() */
  	int	fd_holdleaderswakeup;	/* fdfree() needs wakeup */
@@ -96,61 +93,18 @@
  #ifdef _KERNEL

  /* Lock a file descriptor table. */
-#define	FILEDESC_LOCK(fd)								\
-	do {										\
-		mtx_lock(&(fd)->fd_mtx);						\
-		(fd)->fd_wanted++;							\
-		while ((fd)->fd_locked)							\
-			msleep(&(fd)->fd_locked, &(fd)->fd_mtx, PLOCK, "fdesc", 0);	\
-		(fd)->fd_locked = 2;							\
-		(fd)->fd_wanted--;							\
-		mtx_unlock(&(fd)->fd_mtx);						\
-	} while (0)
+#define	FILEDESC_LOCK_INIT(fdp)	sx_init(&(fdp)->fd_sx, "filedesc structure")
+#define	FILEDESC_LOCK_DESTROY(fdp)	sx_destroy(&(fdp)->fd_sx)
+#define	FILEDESC_LOCK(fdp)	(&(fdp)->fd_sx)
+#define	FILEDESC_XLOCK(fdp)	sx_xlock(&(fdp)->fd_sx)
+#define	FILEDESC_XUNLOCK(fdp)	sx_xunlock(&(fdp)->fd_sx)
+#define	FILEDESC_SLOCK(fdp)	sx_slock(&(fdp)->fd_sx)
+#define	FILEDESC_SUNLOCK(fdp)	sx_sunlock(&(fdp)->fd_sx)

-#define	FILEDESC_UNLOCK(fd)								\
-	do {										\
-		mtx_lock(&(fd)->fd_mtx);						\
-		KASSERT((fd)->fd_locked == 2,						\
-		    ("fdesc locking mistake %d should be %d", (fd)->fd_locked, 2));	\
-		(fd)->fd_locked = 0;							\
-		if ((fd)->fd_wanted)							\
-			wakeup(&(fd)->fd_locked);					\
-		mtx_unlock(&(fd)->fd_mtx);						\
-	} while (0)
-
-#define	FILEDESC_LOCK_FAST(fd)								\
-	do {										\
-		mtx_lock(&(fd)->fd_mtx);						\
-		(fd)->fd_wanted++;							\
-		while ((fd)->fd_locked)							\
-			msleep(&(fd)->fd_locked, &(fd)->fd_mtx, PLOCK, "fdesc", 0);	\
-		(fd)->fd_locked = 1;							\
-		(fd)->fd_wanted--;							\
-	} while (0)
-
-#define	FILEDESC_UNLOCK_FAST(fd)							\
-	do {										\
-		KASSERT((fd)->fd_locked == 1,						\
-		    ("fdesc locking mistake %d should be %d", (fd)->fd_locked, 1));	\
-		(fd)->fd_locked = 0;							\
-		if ((fd)->fd_wanted)							\
-			wakeup(&(fd)->fd_locked);					\
-		mtx_unlock(&(fd)->fd_mtx);						\
-	} while (0)
-
-#ifdef INVARIANT_SUPPORT
-#define	FILEDESC_LOCK_ASSERT(fd, arg)							\
-	do {										\
-		if ((arg) == MA_OWNED)							\
-			KASSERT((fd)->fd_locked != 0, ("fdesc locking mistake"));	\
-		else									\
-			KASSERT((fd)->fd_locked == 0, ("fdesc locking mistake"));	\
-	} while (0)
-#else
-#define	FILEDESC_LOCK_ASSERT(fd, arg)
-#endif
-
-#define	FILEDESC_LOCK_DESC	"filedesc structure"
+#define	FILEDESC_LOCK_ASSERT(fdp)	sx_assert(&(fdp)->fd_sx, SX_LOCKED | \
+					    SX_NOTRECURSED)
+#define	FILEDESC_XLOCK_ASSERT(fdp)	sx_assert(&(fdp)->fd_sx, SX_XLOCKED | \
+					    SX_NOTRECURSED)

  struct thread;



More information about the freebsd-performance mailing list