svn commit: r192369 - in user/kmacy/releng_7_2_fcs/sys: compat/svr4 dev/streams fs/devfs fs/fifofs kern netgraph opencrypto sys

Kip Macy kmacy at FreeBSD.org
Tue May 19 04:43:02 UTC 2009


Author: kmacy
Date: Tue May 19 04:43:00 2009
New Revision: 192369
URL: http://svn.freebsd.org/changeset/base/192369

Log:
  merge 174988
  Remove explicit locking of struct file.
   - Introduce a finit() which is used to initailize the fields of struct file
     in such a way that the ops vector is only valid after the data, type,
     and flags are valid.
   - Protect f_flag and f_count with atomic operations.
   - Remove the global list of all files and associated accounting.
   - Rewrite the unp garbage collection such that it no longer requires
     the global list of all files and instead uses a list of all unp sockets.
   - Mark sockets in the accept queue so we don't incorrectly gc them.
  
  Tested by:	kris, pho

Modified:
  user/kmacy/releng_7_2_fcs/sys/compat/svr4/svr4_stream.c
  user/kmacy/releng_7_2_fcs/sys/dev/streams/streams.c
  user/kmacy/releng_7_2_fcs/sys/fs/devfs/devfs_vnops.c
  user/kmacy/releng_7_2_fcs/sys/fs/fifofs/fifo_vnops.c
  user/kmacy/releng_7_2_fcs/sys/kern/kern_descrip.c
  user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c
  user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c
  user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c
  user/kmacy/releng_7_2_fcs/sys/kern/uipc_mqueue.c
  user/kmacy/releng_7_2_fcs/sys/kern/uipc_syscalls.c
  user/kmacy/releng_7_2_fcs/sys/kern/uipc_usrreq.c
  user/kmacy/releng_7_2_fcs/sys/kern/vfs_syscalls.c
  user/kmacy/releng_7_2_fcs/sys/kern/vfs_vnops.c
  user/kmacy/releng_7_2_fcs/sys/netgraph/ng_socket.c
  user/kmacy/releng_7_2_fcs/sys/opencrypto/cryptodev.c
  user/kmacy/releng_7_2_fcs/sys/sys/file.h
  user/kmacy/releng_7_2_fcs/sys/sys/unpcb.h

Modified: user/kmacy/releng_7_2_fcs/sys/compat/svr4/svr4_stream.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/compat/svr4/svr4_stream.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/compat/svr4/svr4_stream.c	Tue May 19 04:43:00 2009	(r192369)
@@ -1481,8 +1481,6 @@ svr4_do_putmsg(td, uap, fp)
 		 uap->dat, uap->flags);
 #endif /* DEBUG_SVR4 */
 
-	FILE_LOCK_ASSERT(fp, MA_NOTOWNED);
-
 	if (uap->ctl != NULL) {
 	  if ((error = copyin(uap->ctl, &ctl, sizeof(ctl))) != 0) {
 #ifdef DEBUG_SVR4
@@ -1656,8 +1654,6 @@ svr4_do_getmsg(td, uap, fp)
 	error = 0;
 	afp = NULL;
 
-	FILE_LOCK_ASSERT(fp, MA_NOTOWNED);
-
 	memset(&sc, 0, sizeof(sc));
 
 #ifdef DEBUG_SVR4

Modified: user/kmacy/releng_7_2_fcs/sys/dev/streams/streams.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/dev/streams/streams.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/dev/streams/streams.c	Tue May 19 04:43:00 2009	(r192369)
@@ -251,12 +251,7 @@ streamsopen(struct cdev *dev, int oflags
 	   return error;
 	}
 
-	FILE_LOCK(fp);
-	fp->f_data = so;
-	fp->f_flag = FREAD|FWRITE;
-	fp->f_ops = &svr4_netops;
-	fp->f_type = DTYPE_SOCKET;
-	FILE_UNLOCK(fp);
+	finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &svr4_netops);
 
 	/*
 	 * Allocate a stream structure and attach it to this socket.

Modified: user/kmacy/releng_7_2_fcs/sys/fs/devfs/devfs_vnops.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/fs/devfs/devfs_vnops.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/fs/devfs/devfs_vnops.c	Tue May 19 04:43:00 2009	(r192369)
@@ -887,13 +887,11 @@ devfs_open(struct vop_open_args *ap)
 
 	VOP_UNLOCK(vp, 0, td);
 
+	fpop = td->td_fpop;
+	td->td_fpop = fp;
 	if (fp != NULL) {
-		FILE_LOCK(fp);
 		fp->f_data = dev;
-		FILE_UNLOCK(fp);
 	}
-	fpop = td->td_fpop;
-	td->td_fpop = fp;
 	if(!(dsw->d_flags & D_NEEDGIANT)) {
 		DROP_GIANT();
 		if (dsw->d_fdopen != NULL)
@@ -923,11 +921,9 @@ devfs_open(struct vop_open_args *ap)
 	if(fp == NULL)
 		return (error);
 #endif
-	FILE_LOCK(fp);
 	KASSERT(fp->f_ops == &badfileops,
 	     ("Could not vnode bypass device on fdops %p", fp->f_ops));
-	fp->f_ops = &devfs_ops_f;
-	FILE_UNLOCK(fp);
+	finit(fp, fp->f_flag, DTYPE_VNODE, dev, &devfs_ops_f);
 	return (error);
 }
 

Modified: user/kmacy/releng_7_2_fcs/sys/fs/fifofs/fifo_vnops.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/fs/fifofs/fifo_vnops.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/fs/fifofs/fifo_vnops.c	Tue May 19 04:43:00 2009	(r192369)
@@ -294,11 +294,8 @@ fail1:
 	}
 	mtx_unlock(&fifo_mtx);
 	KASSERT(fp != NULL, ("can't fifo/vnode bypass"));
-	FILE_LOCK(fp);
 	KASSERT(fp->f_ops == &badfileops, ("not badfileops in fifo_open"));
-	fp->f_data = fip;
-	fp->f_ops = &fifo_ops_f;
-	FILE_UNLOCK(fp);
+	finit(fp, fp->f_flag, DTYPE_FIFO, fip, &fifo_ops_f);
 	return (0);
 }
 

Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_descrip.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/kern_descrip.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/kern/kern_descrip.c	Tue May 19 04:43:00 2009	(r192369)
@@ -99,7 +99,6 @@ static int do_dup(struct thread *td, enu
 static int	fd_first_free(struct filedesc *, int, int);
 static int	fd_last_used(struct filedesc *, int, int);
 static void	fdgrowtable(struct filedesc *, int);
-static int	fdrop_locked(struct file *fp, struct thread *td);
 static void	fdunused(struct filedesc *fdp, int fd);
 static void	fdused(struct filedesc *fdp, int fd);
 
@@ -141,9 +140,7 @@ struct filedesc0 {
 /*
  * Descriptor management.
  */
-struct filelist filehead;	/* head of list of open files */
-int openfiles;			/* actual number of open files */
-struct sx filelist_lock;	/* sx to protect filelist */
+volatile int openfiles;			/* actual number of open files */
 struct mtx sigio_lock;		/* mtx to protect pointers to sigio */
 void	(*mq_fdclose)(struct thread *td, int fd, struct file *fp);
 
@@ -476,9 +473,7 @@ kern_fcntl(struct thread *td, int fd, in
 			error = EBADF;
 			break;
 		}
-		FILE_LOCK(fp);
 		td->td_retval[0] = OFLAGS(fp->f_flag);
-		FILE_UNLOCK(fp);
 		FILEDESC_SUNLOCK(fdp);
 		break;
 
@@ -489,12 +484,13 @@ kern_fcntl(struct thread *td, int fd, in
 			error = EBADF;
 			break;
 		}
-		FILE_LOCK(fp);
-		fhold_locked(fp);
-		fp->f_flag &= ~FCNTLFLAGS;
-		fp->f_flag |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
-		FILE_UNLOCK(fp);
+		fhold(fp);
 		FILEDESC_SUNLOCK(fdp);
+		do {
+			tmp = flg = fp->f_flag;
+			tmp &= ~FCNTLFLAGS;
+			tmp |= FFLAGS(arg & ~O_ACCMODE) & FCNTLFLAGS;
+		} while(atomic_cmpset_int(&fp->f_flag, flg, tmp) == 0);
 		tmp = fp->f_flag & FNONBLOCK;
 		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
 		if (error) {
@@ -507,9 +503,7 @@ kern_fcntl(struct thread *td, int fd, in
 			fdrop(fp, td);
 			break;
 		}
-		FILE_LOCK(fp);
-		fp->f_flag &= ~FNONBLOCK;
-		FILE_UNLOCK(fp);
+		atomic_clear_int(&fp->f_flag, FNONBLOCK);
 		tmp = 0;
 		(void)fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
 		fdrop(fp, td);
@@ -1431,15 +1425,13 @@ int
 falloc(struct thread *td, struct file **resultfp, int *resultfd)
 {
 	struct proc *p = td->td_proc;
-	struct file *fp, *fq;
+	struct file *fp;
 	int error, i;
 	int maxuserfiles = maxfiles - (maxfiles / 20);
 	static struct timeval lastfail;
 	static int curfail;
 
 	fp = uma_zalloc(file_zone, M_WAITOK | M_ZERO);
-	sx_xlock(&filelist_lock);
-
 	if ((openfiles >= maxuserfiles &&
 	    priv_check(td, PRIV_MAXFILES) != 0) ||
 	    openfiles >= maxfiles) {
@@ -1447,18 +1439,16 @@ falloc(struct thread *td, struct file **
 			printf("kern.maxfiles limit exceeded by uid %i, please see tuning(7).\n",
 				td->td_ucred->cr_ruid);
 		}
-		sx_xunlock(&filelist_lock);
 		uma_zfree(file_zone, fp);
 		return (ENFILE);
 	}
-	openfiles++;
+	atomic_add_int(&openfiles, 1);
 
 	/*
 	 * If the process has file descriptor zero open, add the new file
 	 * descriptor to the list of open files at that point, otherwise
 	 * put it at the front of the list of open files.
 	 */
-	fp->f_mtxp = mtx_pool_alloc(mtxpool_sleep);
 	fp->f_count = 1;
 	if (resultfp)
 		fp->f_count++;
@@ -1467,12 +1457,6 @@ falloc(struct thread *td, struct file **
 	fp->f_data = NULL;
 	fp->f_vnode = NULL;
 	FILEDESC_XLOCK(p->p_fd);
-	if ((fq = p->p_fd->fd_ofiles[0])) {
-		LIST_INSERT_AFTER(fq, fp, f_list);
-	} else {
-		LIST_INSERT_HEAD(&filehead, fp, f_list);
-	}
-	sx_xunlock(&filelist_lock);
 	if ((error = fdalloc(td, 0, &i))) {
 		FILEDESC_XUNLOCK(p->p_fd);
 		fdrop(fp, td);
@@ -2037,6 +2021,23 @@ closef(struct file *fp, struct thread *t
 }
 
 /*
+ * Initialize the file pointer with the specified properties.
+ * 
+ * The ops are set with release semantics to be certain that the flags, type,
+ * and data are visible when ops is.  This is to prevent ops methods from being
+ * called with bad data.
+ */
+void
+finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
+{
+	fp->f_data = data;
+	fp->f_flag = flag;
+	fp->f_type = type;
+	atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
+}
+
+
+/*
  * Extract the file pointer associated with the specified descriptor for the
  * current user process.
  *
@@ -2210,54 +2211,20 @@ fputsock(struct socket *so)
 	sorele(so);
 }
 
-int
-fdrop(struct file *fp, struct thread *td)
-{
-
-	FILE_LOCK(fp);
-	return (fdrop_locked(fp, td));
-}
-
 /*
- * Drop reference on struct file passed in, may call closef if the
- * reference hits zero.
- * Expects struct file locked, and will unlock it.
+ * Handle the last reference to a file being closed.
  */
-static int
-fdrop_locked(struct file *fp, struct thread *td)
+int
+_fdrop(struct file *fp, struct thread *td)
 {
 	int error;
 
-	FILE_LOCK_ASSERT(fp, MA_OWNED);
-
-	if (--fp->f_count > 0) {
-		FILE_UNLOCK(fp);
-		return (0);
-	}
-
-	/*
-	 * We might have just dropped the last reference to a file
-	 * object that is for a UNIX domain socket whose message
-	 * buffers are being examined in unp_gc().  If that is the
-	 * case, FWAIT will be set in f_gcflag and we need to wait for
-	 * unp_gc() to finish its scan.
-	 */
-	while (fp->f_gcflag & FWAIT)
-		msleep(&fp->f_gcflag, fp->f_mtxp, 0, "fpdrop", 0);
-
-	/* We have the last ref so we can proceed without the file lock. */
-	FILE_UNLOCK(fp);
-	if (fp->f_count < 0)
-		panic("fdrop: count < 0");
+	error = 0;
+	if (fp->f_count != 0)
+		panic("fdrop: count %d", fp->f_count);
 	if (fp->f_ops != &badfileops)
 		error = fo_close(fp, td);
-	else
-		error = 0;
-
-	sx_xlock(&filelist_lock);
-	LIST_REMOVE(fp, f_list);
-	openfiles--;
-	sx_xunlock(&filelist_lock);
+	atomic_subtract_int(&openfiles, 1);
 
 	/*
 	 * The f_cdevpriv cannot be assigned non-NULL value while we
@@ -2307,9 +2274,7 @@ flock(struct thread *td, struct flock_ar
 	lf.l_len = 0;
 	if (uap->how & LOCK_UN) {
 		lf.l_type = F_UNLCK;
-		FILE_LOCK(fp);
-		fp->f_flag &= ~FHASLOCK;
-		FILE_UNLOCK(fp);
+		atomic_clear_int(&fp->f_flag, FHASLOCK);
 		error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK);
 		goto done2;
 	}
@@ -2321,9 +2286,7 @@ flock(struct thread *td, struct flock_ar
 		error = EBADF;
 		goto done2;
 	}
-	FILE_LOCK(fp);
-	fp->f_flag |= FHASLOCK;
-	FILE_UNLOCK(fp);
+	atomic_set_int(&fp->f_flag, FHASLOCK);
 	error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf,
 	    (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT);
 done2:
@@ -2368,9 +2331,7 @@ dupfdopen(struct thread *td, struct file
 		 * Check that the mode the file is being opened for is a
 		 * subset of the mode of the existing descriptor.
 		 */
-		FILE_LOCK(wfp);
 		if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) {
-			FILE_UNLOCK(wfp);
 			FILEDESC_XUNLOCK(fdp);
 			return (EACCES);
 		}
@@ -2379,8 +2340,7 @@ dupfdopen(struct thread *td, struct file
 		fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd];
 		if (fp == NULL)
 			fdused(fdp, indx);
-		fhold_locked(wfp);
-		FILE_UNLOCK(wfp);
+		fhold(wfp);
 		FILEDESC_XUNLOCK(fdp);
 		if (fp != NULL)
 			/*
@@ -2501,29 +2461,23 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS)
 	struct proc *p;
 	int error, n;
 
-	/*
-	 * Note: because the number of file descriptors is calculated
-	 * in different ways for sizing vs returning the data,
-	 * there is information leakage from the first loop.  However,
-	 * it is of a similar order of magnitude to the leakage from
-	 * global system statistics such as kern.openfiles.
-	 */
 	error = sysctl_wire_old_buffer(req, 0);
 	if (error != 0)
 		return (error);
 	if (req->oldptr == NULL) {
-		n = 16;		/* A slight overestimate. */
-		sx_slock(&filelist_lock);
-		LIST_FOREACH(fp, &filehead, f_list) {
-			/*
-			 * We should grab the lock, but this is an
-			 * estimate, so does it really matter?
-			 */
-			/* mtx_lock(fp->f_mtxp); */
-			n += fp->f_count;
-			/* mtx_unlock(f->f_mtxp); */
+		n = 0;
+		sx_slock(&allproc_lock);
+		FOREACH_PROC_IN_SYSTEM(p) {
+			if (p->p_state == PRS_NEW)
+				continue;
+			fdp = fdhold(p);
+			if (fdp == NULL)
+				continue;
+			/* overestimates sparse tables. */
+			n += fdp->fd_lastfile;
+			fddrop(fdp);
 		}
-		sx_sunlock(&filelist_lock);
+		sx_sunlock(&allproc_lock);
 		return (SYSCTL_OUT(req, 0, n * sizeof(xf)));
 	}
 	error = 0;
@@ -2554,7 +2508,7 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS)
 			xf.xf_vnode = fp->f_vnode;
 			xf.xf_type = fp->f_type;
 			xf.xf_count = fp->f_count;
-			xf.xf_msgcount = fp->f_msgcount;
+			xf.xf_msgcount = 0;
 			xf.xf_offset = fp->f_offset;
 			xf.xf_flag = fp->f_flag;
 			error = SYSCTL_OUT(req, &xf, sizeof(xf));
@@ -2662,7 +2616,6 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLE
 			continue;
 		bzero(kif, sizeof(*kif));
 		kif->kf_structsize = sizeof(*kif);
-		FILE_LOCK(fp);
 		vp = NULL;
 		so = NULL;
 		kif->kf_fd = i;
@@ -2670,7 +2623,6 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLE
 		case DTYPE_VNODE:
 			kif->kf_type = KF_TYPE_VNODE;
 			vp = fp->f_vnode;
-			vref(vp);
 			break;
 
 		case DTYPE_SOCKET:
@@ -2722,8 +2674,8 @@ sysctl_kern_proc_ofiledesc(SYSCTL_HANDLE
 		if (fp->f_flag & FHASLOCK)
 			kif->kf_flags |= KF_FLAG_HASLOCK;
 		kif->kf_offset = fp->f_offset;
-		FILE_UNLOCK(fp);
 		if (vp != NULL) {
+			vref(vp);
 			switch (vp->v_type) {
 			case VNON:
 				kif->kf_vnode_type = KF_VTYPE_VNON;
@@ -2895,7 +2847,6 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER
 		if ((fp = fdp->fd_ofiles[i]) == NULL)
 			continue;
 		bzero(kif, sizeof(*kif));
-		FILE_LOCK(fp);
 		vp = NULL;
 		so = NULL;
 		kif->kf_fd = i;
@@ -2959,7 +2910,6 @@ sysctl_kern_proc_filedesc(SYSCTL_HANDLER
 		if (fp->f_flag & FHASLOCK)
 			kif->kf_flags |= KF_FLAG_HASLOCK;
 		kif->kf_offset = fp->f_offset;
-		FILE_UNLOCK(fp);
 		if (vp != NULL) {
 			switch (vp->v_type) {
 			case VNON:
@@ -3118,7 +3068,7 @@ db_print_file(struct file *fp, int heade
 	p = file_to_first_proc(fp);
 	db_printf("%8p %4s %8p %08x %04x %5d %6d %8p %5d %12s\n", fp,
 	    file_type_to_name(fp->f_type), fp->f_data, fp->f_flag,
-	    fp->f_gcflag, fp->f_count, fp->f_msgcount, fp->f_vnode,
+	    0, fp->f_count, 0, fp->f_vnode,
 	    p != NULL ? p->p_pid : -1, p != NULL ? p->p_comm : "-");
 }
 
@@ -3136,13 +3086,24 @@ DB_SHOW_COMMAND(file, db_show_file)
 
 DB_SHOW_COMMAND(files, db_show_files)
 {
+	struct filedesc *fdp;
 	struct file *fp;
+	struct proc *p;
 	int header;
+	int n;
 
 	header = 1;
-	LIST_FOREACH(fp, &filehead, f_list) {
-		db_print_file(fp, header);
-		header = 0;
+	FOREACH_PROC_IN_SYSTEM(p) {
+		if (p->p_state == PRS_NEW)
+			continue;
+		if ((fdp = p->p_fd) == NULL)
+			continue;
+		for (n = 0; n < fdp->fd_nfiles; ++n) {
+			if ((fp = fdp->fd_ofiles[n]) == NULL)
+				continue;
+			db_print_file(fp, header);
+			header = 0;
+		}
 	}
 }
 #endif
@@ -3154,7 +3115,7 @@ SYSCTL_INT(_kern, KERN_MAXFILES, maxfile
     &maxfiles, 0, "Maximum number of files");
 
 SYSCTL_INT(_kern, OID_AUTO, openfiles, CTLFLAG_RD,
-    &openfiles, 0, "System-wide number of open files");
+    __DEVOLATILE(int *, &openfiles), 0, "System-wide number of open files");
 
 /* ARGSUSED*/
 static void
@@ -3163,7 +3124,6 @@ filelistinit(void *dummy)
 
 	file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
 	    NULL, NULL, UMA_ALIGN_PTR, 0);
-	sx_init(&filelist_lock, "filelist lock");
 	mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
 	mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
 }

Modified: user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/kern/kern_event.c	Tue May 19 04:43:00 2009	(r192369)
@@ -583,12 +583,7 @@ kqueue(struct thread *td, struct kqueue_
 	SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list);
 	FILEDESC_XUNLOCK(fdp);
 
-	FILE_LOCK(fp);
-	fp->f_flag = FREAD | FWRITE;
-	fp->f_type = DTYPE_KQUEUE;
-	fp->f_data = kq;
-	fp->f_ops = &kqueueops;
-	FILE_UNLOCK(fp);
+	finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops);
 	fdrop(fp, td);
 
 	td->td_retval[0] = fd;
@@ -1042,24 +1037,17 @@ kqueue_acquire(struct file *fp, struct k
 
 	error = 0;
 
-	FILE_LOCK(fp);
-	do {
-		kq = fp->f_data;
-		if (fp->f_type != DTYPE_KQUEUE || kq == NULL) {
-			error = EBADF;
-			break;
-		}
-		*kqp = kq;
-		KQ_LOCK(kq);
-		if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
-			KQ_UNLOCK(kq);
-			error = EBADF;
-			break;
-		}
-		kq->kq_refcnt++;
+	kq = fp->f_data;
+	if (fp->f_type != DTYPE_KQUEUE || kq == NULL)
+		return (EBADF);
+	*kqp = kq;
+	KQ_LOCK(kq);
+	if ((kq->kq_state & KQ_CLOSING) == KQ_CLOSING) {
 		KQ_UNLOCK(kq);
-	} while (0);
-	FILE_UNLOCK(fp);
+		return (EBADF);
+	}
+	kq->kq_refcnt++;
+	KQ_UNLOCK(kq);
 
 	return error;
 }

Modified: user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/kern/sys_generic.c	Tue May 19 04:43:00 2009	(r192369)
@@ -647,21 +647,17 @@ kern_ioctl(struct thread *td, int fd, u_
 		FILEDESC_XUNLOCK(fdp);
 		goto out;
 	case FIONBIO:
-		FILE_LOCK(fp);
 		if ((tmp = *(int *)data))
-			fp->f_flag |= FNONBLOCK;
+			atomic_set_int(&fp->f_flag, FNONBLOCK);
 		else
-			fp->f_flag &= ~FNONBLOCK;
-		FILE_UNLOCK(fp);
+			atomic_clear_int(&fp->f_flag, FNONBLOCK);
 		data = (void *)&tmp;
 		break;
 	case FIOASYNC:
-		FILE_LOCK(fp);
 		if ((tmp = *(int *)data))
-			fp->f_flag |= FASYNC;
+			atomic_set_int(&fp->f_flag, FASYNC);
 		else
-			fp->f_flag &= ~FASYNC;
-		FILE_UNLOCK(fp);
+			atomic_clear_int(&fp->f_flag, FASYNC);
 		data = (void *)&tmp;
 		break;
 	}

Modified: user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/kern/sys_pipe.c	Tue May 19 04:43:00 2009	(r192369)
@@ -363,12 +363,7 @@ pipe(td, uap)
 	 * to avoid races against processes which manage to dup() the read
 	 * side while we are blocked trying to allocate the write side.
 	 */
-	FILE_LOCK(rf);
-	rf->f_flag = FREAD | FWRITE;
-	rf->f_type = DTYPE_PIPE;
-	rf->f_data = rpipe;
-	rf->f_ops = &pipeops;
-	FILE_UNLOCK(rf);
+	finit(rf, FREAD | FWRITE, DTYPE_PIPE, rpipe, &pipeops);
 	error = falloc(td, &wf, &fd);
 	if (error) {
 		fdclose(fdp, rf, td->td_retval[0], td);
@@ -378,12 +373,7 @@ pipe(td, uap)
 		return (error);
 	}
 	/* An extra reference on `wf' has been held for us by falloc(). */
-	FILE_LOCK(wf);
-	wf->f_flag = FREAD | FWRITE;
-	wf->f_type = DTYPE_PIPE;
-	wf->f_data = wpipe;
-	wf->f_ops = &pipeops;
-	FILE_UNLOCK(wf);
+	finit(wf, FREAD | FWRITE, DTYPE_PIPE, wpipe, &pipeops);
 	fdrop(wf, td);
 	td->td_retval[1] = fd;
 	fdrop(rf, td);

Modified: user/kmacy/releng_7_2_fcs/sys/kern/uipc_mqueue.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/uipc_mqueue.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/kern/uipc_mqueue.c	Tue May 19 04:43:00 2009	(r192369)
@@ -1997,12 +1997,8 @@ kmq_open(struct thread *td, struct kmq_o
 	mqnode_addref(pn);
 	sx_xunlock(&mqfs_data.mi_lock);
 
-	FILE_LOCK(fp);
-	fp->f_flag = (flags & (FREAD | FWRITE | O_NONBLOCK));
-	fp->f_type = DTYPE_MQUEUE;
-	fp->f_data = pn;
-	fp->f_ops = &mqueueops;
-	FILE_UNLOCK(fp);
+	finit(fp, flags & (FREAD | FWRITE | O_NONBLOCK), DTYPE_MQUEUE, pn,
+	    &mqueueops);
 
 	FILEDESC_XLOCK(fdp);
 	if (fdp->fd_ofiles[fd] == fp)
@@ -2095,6 +2091,7 @@ kmq_setattr(struct thread *td, struct km
 	struct mqueue *mq;
 	struct file *fp;
 	struct mq_attr attr, oattr;
+	u_int oflag, flag;
 	int error;
 
 	if (uap->attr) {
@@ -2110,13 +2107,15 @@ kmq_setattr(struct thread *td, struct km
 	oattr.mq_maxmsg  = mq->mq_maxmsg;
 	oattr.mq_msgsize = mq->mq_msgsize;
 	oattr.mq_curmsgs = mq->mq_curmsgs;
-	FILE_LOCK(fp);
-	oattr.mq_flags = (O_NONBLOCK & fp->f_flag);
 	if (uap->attr) {
-		fp->f_flag &= ~O_NONBLOCK;
-		fp->f_flag |= (attr.mq_flags & O_NONBLOCK);
-	}
-	FILE_UNLOCK(fp);
+		do {
+			oflag = flag = fp->f_flag;
+			flag &= ~O_NONBLOCK;
+			flag |= (attr.mq_flags & O_NONBLOCK);
+		} while (atomic_cmpset_int(&fp->f_flag, oflag, flag) == 0);
+	} else
+		oflag = fp->f_flag;
+	oattr.mq_flags = (O_NONBLOCK & oflag);
 	fdrop(fp, td);
 	if (uap->oattr)
 		error = copyout(&oattr, uap->oattr, sizeof(oattr));

Modified: user/kmacy/releng_7_2_fcs/sys/kern/uipc_syscalls.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/uipc_syscalls.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/kern/uipc_syscalls.c	Tue May 19 04:43:00 2009	(r192369)
@@ -180,12 +180,7 @@ socket(td, uap)
 	if (error) {
 		fdclose(fdp, fp, fd, td);
 	} else {
-		FILE_LOCK(fp);
-		fp->f_data = so;	/* already has ref count */
-		fp->f_flag = FREAD|FWRITE;
-		fp->f_type = DTYPE_SOCKET;
-		fp->f_ops = &socketops;
-		FILE_UNLOCK(fp);
+		finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops);
 		td->td_retval[0] = fd;
 	}
 	fdrop(fp, td);
@@ -427,12 +422,7 @@ kern_accept(struct thread *td, int s, st
 	if (pgid != 0)
 		fsetown(pgid, &so->so_sigio);
 
-	FILE_LOCK(nfp);
-	nfp->f_data = so;	/* nfp has ref count from falloc */
-	nfp->f_flag = fflag;
-	nfp->f_type = DTYPE_SOCKET;
-	nfp->f_ops = &socketops;
-	FILE_UNLOCK(nfp);
+	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 	/* Sync socket nonblocking/async state with file flags */
 	tmp = fflag & FNONBLOCK;
 	(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
@@ -652,16 +642,8 @@ socketpair(td, uap)
 		 if (error)
 			goto free4;
 	}
-	FILE_LOCK(fp1);
-	fp1->f_flag = FREAD|FWRITE;
-	fp1->f_type = DTYPE_SOCKET;
-	fp1->f_ops = &socketops;
-	FILE_UNLOCK(fp1);
-	FILE_LOCK(fp2);
-	fp2->f_flag = FREAD|FWRITE;
-	fp2->f_type = DTYPE_SOCKET;
-	fp2->f_ops = &socketops;
-	FILE_UNLOCK(fp2);
+	finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops);
+	finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops);
 	so1 = so2 = NULL;
 	error = copyout(sv, uap->rsv, 2 * sizeof (int));
 	if (error)
@@ -2309,12 +2291,7 @@ sctp_peeloff(td, uap)
 	so->so_qstate &= ~SQ_COMP;
 	so->so_head = NULL;
 	ACCEPT_UNLOCK();
-	FILE_LOCK(nfp);
-	nfp->f_data = so;
-	nfp->f_flag = fflag;
-	nfp->f_type = DTYPE_SOCKET;
-	nfp->f_ops = &socketops;
-	FILE_UNLOCK(nfp);
+	finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
 	error = sctp_do_peeloff(head, so, (sctp_assoc_t)uap->name);
 	if (error)
 		goto noconnection;

Modified: user/kmacy/releng_7_2_fcs/sys/kern/uipc_usrreq.c
==============================================================================
--- user/kmacy/releng_7_2_fcs/sys/kern/uipc_usrreq.c	Tue May 19 03:45:36 2009	(r192368)
+++ user/kmacy/releng_7_2_fcs/sys/kern/uipc_usrreq.c	Tue May 19 04:43:00 2009	(r192369)
@@ -235,12 +235,13 @@ static void	unp_shutdown(struct unpcb *)
 static void	unp_drop(struct unpcb *, int);
 static void	unp_gc(__unused void *, int);
 static void	unp_scan(struct mbuf *, void (*)(struct file *));
-static void	unp_mark(struct file *);
 static void	unp_discard(struct file *);
 static void	unp_freerights(struct file **, int);
 static void	unp_init(void);
 static int	unp_internalize(struct mbuf **, struct thread *);
 static int	unp_externalize(struct mbuf *, struct mbuf **);
+static void	unp_internalize_fp(struct file *);
+static void	unp_externalize_fp(struct file *);
 static struct mbuf	*unp_addsockcred(struct thread *, struct mbuf *);
 
 /*
@@ -1606,12 +1607,7 @@ unp_externalize(struct mbuf *control, st
 					panic("unp_externalize fdalloc failed");
 				fp = *rp++;
 				td->td_proc->p_fd->fd_ofiles[f] = fp;
-				FILE_LOCK(fp);
-				fp->f_msgcount--;
-				FILE_UNLOCK(fp);
-				UNP_GLOBAL_WLOCK();
-				unp_rights--;
-				UNP_GLOBAL_WUNLOCK();
+				unp_externalize_fp(fp);
 				*fdp++ = f;
 			}
 			FILEDESC_XUNLOCK(td->td_proc->p_fd);
@@ -1767,13 +1763,8 @@ unp_internalize(struct mbuf **controlp, 
 			for (i = 0; i < oldfds; i++) {
 				fp = fdescp->fd_ofiles[*fdp++];
 				*rp++ = fp;
-				FILE_LOCK(fp);
-				fp->f_count++;
-				fp->f_msgcount++;
-				FILE_UNLOCK(fp);
-				UNP_GLOBAL_WLOCK();
-				unp_rights++;
-				UNP_GLOBAL_WUNLOCK();
+				fhold(fp);
+				unp_internalize_fp(fp);
 			}
 			FILEDESC_SUNLOCK(fdescp);
 			break;
@@ -1860,255 +1851,201 @@ unp_addsockcred(struct thread *td, struc
 	return (m);
 }
 
+static struct unpcb *
+fptounp(struct file *fp)
+{
+	struct socket *so;
+
+	if (fp->f_type != DTYPE_SOCKET)
+		return (NULL);
+	if ((so = fp->f_data) == NULL)
+		return (NULL);
+	if (so->so_proto->pr_domain != &localdomain)
+		return (NULL);
+	return sotounpcb(so);
+}
+
+static void
+unp_discard(struct file *fp)
+{
+
+	unp_externalize_fp(fp);
+	(void) closef(fp, (struct thread *)NULL);
+}
+
+static void
+unp_internalize_fp(struct file *fp)
+{
+	struct unpcb *unp;
+
+	UNP_GLOBAL_WLOCK();
+	if ((unp = fptounp(fp)) != NULL) {
+		unp->unp_file = fp;
+		unp->unp_msgcount++;
+	}
+	unp_rights++;
+	UNP_GLOBAL_WUNLOCK();
+}
+
+static void
+unp_externalize_fp(struct file *fp)
+{
+	struct unpcb *unp;
+
+	UNP_GLOBAL_WLOCK();
+	if ((unp = fptounp(fp)) != NULL)
+		unp->unp_msgcount--;
+	unp_rights--;
+	UNP_GLOBAL_WUNLOCK();
+}
+
 /*
  * unp_defer indicates whether additional work has been defered for a future
  * pass through unp_gc().  It is thread local and does not require explicit
  * synchronization.
  */
-static int	unp_defer;
+static int	unp_marked;
+static int	unp_unreachable;
 
-static int unp_taskcount;
-SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
+static void
+unp_accessable(struct file *fp)
+{
+	struct unpcb *unp;
 
-static int unp_recycled;
-SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "");
+	unp = fptounp(fp);
+	if (fp == NULL)
+		return;
+	if (unp->unp_gcflag & UNPGC_REF)
+		return;
+	unp->unp_gcflag &= ~UNPGC_DEAD;
+	unp->unp_gcflag |= UNPGC_REF;
+	unp_marked++;
+}
 
 static void
-unp_gc(__unused void *arg, int pending)
+unp_gc_process(struct unpcb *unp)
 {
-	struct file *fp, *nextfp;
-	struct socket *so;
 	struct socket *soa;
-	struct file **extra_ref, **fpp;
-	int nunref, i;
-	int nfiles_snap;
-	int nfiles_slack = 20;
-
-	unp_taskcount++;
-	unp_defer = 0;
+	struct socket *so;
+	struct file *fp;
 
+	/* Already processed. */
+	if (unp->unp_gcflag & UNPGC_SCANNED)
+		return;
+	fp = unp->unp_file;
 	/*
-	 * Before going through all this, set all FDs to be NOT deferred and
-	 * NOT externally accessible.
+	 * Check for a socket potentially in a cycle.  It must be in a
+	 * queue as indicated by msgcount, and this must equal the file
+	 * reference count.  Note that when msgcount is 0 the file is NULL.
 	 */
-	sx_slock(&filelist_lock);
-	LIST_FOREACH(fp, &filehead, f_list)
-		fp->f_gcflag &= ~(FMARK|FDEFER);
-	do {
-		KASSERT(unp_defer >= 0, ("unp_gc: unp_defer %d", unp_defer));
-		LIST_FOREACH(fp, &filehead, f_list) {
-			FILE_LOCK(fp);
-			/*
-			 * If the file is not open, skip it -- could be a
-			 * file in the process of being opened, or in the
-			 * process of being closed.  If the file is
-			 * "closing", it may have been marked for deferred
-			 * consideration.  Clear the flag now if so.
-			 */
-			if (fp->f_count == 0) {
-				if (fp->f_gcflag & FDEFER)
-					unp_defer--;
-				fp->f_gcflag &= ~(FMARK|FDEFER);
-				FILE_UNLOCK(fp);
-				continue;
-			}
-
-			/*
-			 * If we already marked it as 'defer' in a
-			 * previous pass, then try to process it this
-			 * time and un-mark it.
-			 */
-			if (fp->f_gcflag & FDEFER) {
-				fp->f_gcflag &= ~FDEFER;
-				unp_defer--;
-			} else {
-				/*
-				 * If it's not deferred, then check if it's
-				 * already marked.. if so skip it
-				 */
-				if (fp->f_gcflag & FMARK) {
-					FILE_UNLOCK(fp);
-					continue;
-				}
-
-				/*
-				 * If all references are from messages in
-				 * transit, then skip it. it's not externally
-				 * accessible.
-				 */
-				if (fp->f_count == fp->f_msgcount) {
-					FILE_UNLOCK(fp);
-					continue;
-				}
-
-				/*
-				 * If it got this far then it must be
-				 * externally accessible.
-				 */
-				fp->f_gcflag |= FMARK;
-			}
+	if (unp->unp_msgcount != 0 && fp->f_count != 0 &&
+	    fp->f_count == unp->unp_msgcount) {
+		unp->unp_gcflag |= UNPGC_DEAD;
+		unp_unreachable++;
+		return;
+	}
+	/*
+	 * Mark all sockets we reference with RIGHTS.
+	 */
+	so = unp->unp_socket;
+	SOCKBUF_LOCK(&so->so_rcv);
+	unp_scan(so->so_rcv.sb_mb, unp_accessable);
+	SOCKBUF_UNLOCK(&so->so_rcv);
+	/*
+	 * Mark all sockets in our accept queue.
+	 */
+	ACCEPT_LOCK();
+	TAILQ_FOREACH(soa, &so->so_comp, so_list) {
+		SOCKBUF_LOCK(&soa->so_rcv);
+		unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+		SOCKBUF_UNLOCK(&soa->so_rcv);
+	}
+	ACCEPT_UNLOCK();
+	unp->unp_gcflag |= UNPGC_SCANNED;
+}
 
-			/*
-			 * Either it was deferred, or it is externally
-			 * accessible and not already marked so.  Now check
-			 * if it is possibly one of OUR sockets.
-			 */
-			if (fp->f_type != DTYPE_SOCKET ||
-			    (so = fp->f_data) == NULL) {
-				FILE_UNLOCK(fp);
-				continue;
-			}
+static int unp_recycled;
+SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "");
 
-			if (so->so_proto->pr_domain != &localdomain ||
-			    (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
-				FILE_UNLOCK(fp);				
-				continue;
-			}
+static int unp_taskcount;
+SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
 
-			/*
-			 * Tell any other threads that do a subsequent
-			 * fdrop() that we are scanning the message
-			 * buffers.
-			 */
-			fp->f_gcflag |= FWAIT;
-			FILE_UNLOCK(fp);
+static void
+unp_gc(__unused void *arg, int pending)
+{
+	struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL };
+	struct unp_head **head;
+	struct file **unref;
+	struct unpcb *unp;
+	int i;
 
-			/*
-			 * So, Ok, it's one of our sockets and it IS
-			 * externally accessible (or was deferred).  Now we
-			 * look to see if we hold any file descriptors in its
-			 * message buffers. Follow those links and mark them
-			 * as accessible too.
-			 */
-			SOCKBUF_LOCK(&so->so_rcv);
-			unp_scan(so->so_rcv.sb_mb, unp_mark);
-			SOCKBUF_UNLOCK(&so->so_rcv);
+	unp_taskcount++;
+	UNP_GLOBAL_RLOCK();
 
-			/*
-			 * If socket is in listening state, then sockets
-			 * in its accept queue are accessible, and so
-			 * are any descriptors in those sockets' receive
-			 * queues.
-			 */
-			ACCEPT_LOCK();
-			TAILQ_FOREACH(soa, &so->so_comp, so_list) {
-			    SOCKBUF_LOCK(&soa->so_rcv);
-			    unp_scan(soa->so_rcv.sb_mb, unp_mark);
-			    SOCKBUF_UNLOCK(&soa->so_rcv);
+	/*
+	 * First clear all gc flags from previous runs.
+	 */
+	for (head = heads; *head != NULL; head++)
+		LIST_FOREACH(unp, *head, unp_link)
+			unp->unp_gcflag &= ~(UNPGC_REF|UNPGC_DEAD);
+	/*
+	 * Scan marking all reachable sockets with UNPGC_REF.  Once a socket
+	 * is reachable all of the sockets it references are reachable.
+	 * Stop the scan once we do a complete loop without discovering
+	 * a new reachable socket.
+	 */
+	do {
+		unp_unreachable = 0;
+		unp_marked = 0;

*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***


More information about the svn-src-user mailing list