svn commit: r342628 - in head/sys/compat/linuxkpi/common: include/linux src

Konstantin Belousov kib at FreeBSD.org
Sun Dec 30 15:46:47 UTC 2018


Author: kib
Date: Sun Dec 30 15:46:45 2018
New Revision: 342628
URL: https://svnweb.freebsd.org/changeset/base/342628

Log:
  Fix linux_destroy_dev() behaviour when there are still files open from
  the destroying cdev.
  
  Currently linux_destroy_dev() waits for the reference count on the
  linux cdev to drain, and each open file hold the reference.
  Practically it means that linux_destroy_dev() is blocked until all
  userspace processes that have the cdev open, exit.  FreeBSD devfs does
  not have such problem, because device refcount only prevents freeing
  of the cdev memory, and separate 'active methods' counter blocks
  destroy_dev() until all threads leave the cdevsw methods.  After that,
  attempts to enter cdevsw methods are refused with an error.
  
  Implement somewhat similar mechanism for LinuxKPI cdevs.  Demote cdev
  refcount to only mean a hold on the linux cdev memory.  Add sirefs
  count to track both number of threads inside the cdev methods, and for
  single-bit indicator that cdev is being destroyed.  In the later case,
  the call is redirected to the dummy cdev.
  
  Reviewed by:	markj
  Discussed with:	hselasky
  Tested by:	zeising
  MFC after:	1 week
  Sponsored by:	Mellanox Technologies
  Differential revision:	https://reviews.freebsd.org/D18606

Modified:
  head/sys/compat/linuxkpi/common/include/linux/cdev.h
  head/sys/compat/linuxkpi/common/src/linux_compat.c

Modified: head/sys/compat/linuxkpi/common/include/linux/cdev.h
==============================================================================
--- head/sys/compat/linuxkpi/common/include/linux/cdev.h	Sun Dec 30 15:38:07 2018	(r342627)
+++ head/sys/compat/linuxkpi/common/include/linux/cdev.h	Sun Dec 30 15:46:45 2018	(r342628)
@@ -52,7 +52,8 @@ struct linux_cdev {
 	struct cdev	*cdev;
 	dev_t		dev;
 	const struct file_operations *ops;
-	atomic_long_t	refs;
+	u_int		refs;
+	u_int		siref;
 };
 
 static inline void
@@ -61,7 +62,7 @@ cdev_init(struct linux_cdev *cdev, const struct file_o
 
 	kobject_init(&cdev->kobj, &linux_cdev_static_ktype);
 	cdev->ops = ops;
-	atomic_long_set(&cdev->refs, 0);
+	cdev->refs = 1;
 }
 
 static inline struct linux_cdev *
@@ -70,8 +71,9 @@ cdev_alloc(void)
 	struct linux_cdev *cdev;
 
 	cdev = kzalloc(sizeof(struct linux_cdev), M_WAITOK);
-	if (cdev)
+	if (cdev != NULL)
 		kobject_init(&cdev->kobj, &linux_cdev_ktype);
+	cdev->refs = 1;
 	return (cdev);
 }
 

Modified: head/sys/compat/linuxkpi/common/src/linux_compat.c
==============================================================================
--- head/sys/compat/linuxkpi/common/src/linux_compat.c	Sun Dec 30 15:38:07 2018	(r342627)
+++ head/sys/compat/linuxkpi/common/src/linux_compat.c	Sun Dec 30 15:46:45 2018	(r342628)
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/sglist.h>
 #include <sys/sleepqueue.h>
+#include <sys/refcount.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/bus.h>
@@ -100,6 +101,7 @@ MALLOC_DEFINE(M_KMALLOC, "linux", "Linux kmalloc compa
 #undef cdev
 #define	RB_ROOT(head)	(head)->rbh_root
 
+static void linux_cdev_deref(struct linux_cdev *ldev);
 static struct vm_area_struct *linux_cdev_handle_find(void *handle);
 
 struct kobject linux_class_root;
@@ -691,6 +693,52 @@ zap_vma_ptes(struct vm_area_struct *vma, unsigned long
 	return (0);
 }
 
+static struct file_operations dummy_ldev_ops = {
+	/* XXXKIB */
+};
+
+static struct linux_cdev dummy_ldev = {
+	.ops = &dummy_ldev_ops,
+};
+
+#define	LDEV_SI_DTR	0x0001
+#define	LDEV_SI_REF	0x0002
+
+static void
+linux_get_fop(struct linux_file *filp, const struct file_operations **fop,
+    struct linux_cdev **dev)
+{
+	struct linux_cdev *ldev;
+	u_int siref;
+
+	ldev = filp->f_cdev;
+	*fop = filp->f_op;
+	if (ldev != NULL) {
+		for (siref = ldev->siref;;) {
+			if ((siref & LDEV_SI_DTR) != 0) {
+				ldev = &dummy_ldev;
+				siref = ldev->siref;
+				*fop = ldev->ops;
+				MPASS((ldev->siref & LDEV_SI_DTR) == 0);
+			} else if (atomic_fcmpset_int(&ldev->siref, &siref,
+			    siref + LDEV_SI_REF)) {
+				break;
+			}
+		}
+	}
+	*dev = ldev;
+}
+
+static void
+linux_drop_fop(struct linux_cdev *ldev)
+{
+
+	if (ldev == NULL)
+		return;
+	MPASS((ldev->siref & ~LDEV_SI_DTR) != 0);
+	atomic_subtract_int(&ldev->siref, LDEV_SI_REF);
+}
+
 #define	OPW(fp,td,code) ({			\
 	struct file *__fpop;			\
 	__typeof(code) __retval;		\
@@ -703,10 +751,12 @@ zap_vma_ptes(struct vm_area_struct *vma, unsigned long
 })
 
 static int
-linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td, struct file *file)
+linux_dev_fdopen(struct cdev *dev, int fflags, struct thread *td,
+    struct file *file)
 {
 	struct linux_cdev *ldev;
 	struct linux_file *filp;
+	const struct file_operations *fop;
 	int error;
 
 	ldev = dev->si_drv1;
@@ -718,20 +768,17 @@ linux_dev_fdopen(struct cdev *dev, int fflags, struct 
 	filp->f_flags = file->f_flag;
 	filp->f_vnode = file->f_vnode;
 	filp->_file = file;
+	refcount_acquire(&ldev->refs);
 	filp->f_cdev = ldev;
 
 	linux_set_current(td);
+	linux_get_fop(filp, &fop, &ldev);
 
-	/* get a reference on the Linux character device */
-	if (atomic_long_add_unless(&ldev->refs, 1, -1L) == 0) {
-		kfree(filp);
-		return (EINVAL);
-	}
-
-	if (filp->f_op->open) {
-		error = -filp->f_op->open(file->f_vnode, filp);
-		if (error) {
-			atomic_long_dec(&ldev->refs);
+	if (fop->open != NULL) {
+		error = -fop->open(file->f_vnode, filp);
+		if (error != 0) {
+			linux_drop_fop(ldev);
+			linux_cdev_deref(filp->f_cdev);
 			kfree(filp);
 			return (error);
 		}
@@ -742,6 +789,7 @@ linux_dev_fdopen(struct cdev *dev, int fflags, struct 
 
 	/* release the file from devfs */
 	finit(file, filp->f_mode, DTYPE_DEV, filp, &linuxfileops);
+	linux_drop_fop(ldev);
 	return (ENXIO);
 }
 
@@ -877,7 +925,8 @@ linux_get_error(struct task_struct *task, int error)
 
 static int
 linux_file_ioctl_sub(struct file *fp, struct linux_file *filp,
-    u_long cmd, caddr_t data, struct thread *td)
+    const struct file_operations *fop, u_long cmd, caddr_t data,
+    struct thread *td)
 {
 	struct task_struct *task = current;
 	unsigned size;
@@ -902,20 +951,28 @@ linux_file_ioctl_sub(struct file *fp, struct linux_fil
 #if defined(__amd64__)
 	if (td->td_proc->p_elf_machine == EM_386) {
 		/* try the compat IOCTL handler first */
-		if (filp->f_op->compat_ioctl != NULL)
-			error = -OPW(fp, td, filp->f_op->compat_ioctl(filp, cmd, (u_long)data));
-		else
+		if (fop->compat_ioctl != NULL) {
+			error = -OPW(fp, td, fop->compat_ioctl(filp,
+			    cmd, (u_long)data));
+		} else {
 			error = ENOTTY;
+		}
 
 		/* fallback to the regular IOCTL handler, if any */
-		if (error == ENOTTY && filp->f_op->unlocked_ioctl != NULL)
-			error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data));
+		if (error == ENOTTY && fop->unlocked_ioctl != NULL) {
+			error = -OPW(fp, td, fop->unlocked_ioctl(filp,
+			    cmd, (u_long)data));
+		}
 	} else
 #endif
-	if (filp->f_op->unlocked_ioctl != NULL)
-		error = -OPW(fp, td, filp->f_op->unlocked_ioctl(filp, cmd, (u_long)data));
-	else
-		error = ENOTTY;
+	{
+		if (fop->unlocked_ioctl != NULL) {
+			error = -OPW(fp, td, fop->unlocked_ioctl(filp,
+			    cmd, (u_long)data));
+		} else {
+			error = ENOTTY;
+		}
+	}
 	if (size > 0) {
 		task->bsd_ioctl_data = NULL;
 		task->bsd_ioctl_len = 0;
@@ -1084,30 +1141,36 @@ static struct filterops linux_dev_kqfiltops_write = {
 static void
 linux_file_kqfilter_poll(struct linux_file *filp, int kqflags)
 {
+	struct thread *td;
+	const struct file_operations *fop;
+	struct linux_cdev *ldev;
 	int temp;
 
-	if (filp->f_kqflags & kqflags) {
-		struct thread *td = curthread;
+	if ((filp->f_kqflags & kqflags) == 0)
+		return;
 
-		/* get the latest polling state */
-		temp = OPW(filp->_file, td, filp->f_op->poll(filp, NULL));
+	td = curthread;
 
-		spin_lock(&filp->f_kqlock);
-		/* clear kqflags */
-		filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ |
-		    LINUX_KQ_FLAG_NEED_WRITE);
-		/* update kqflags */
-		if (temp & (POLLIN | POLLOUT)) {
-			if (temp & POLLIN)
-				filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ;
-			if (temp & POLLOUT)
-				filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE;
+	linux_get_fop(filp, &fop, &ldev);
+	/* get the latest polling state */
+	temp = OPW(filp->_file, td, fop->poll(filp, NULL));
+	linux_drop_fop(ldev);
 
-			/* make sure the "knote" gets woken up */
-			KNOTE_LOCKED(&filp->f_selinfo.si_note, 0);
-		}
-		spin_unlock(&filp->f_kqlock);
+	spin_lock(&filp->f_kqlock);
+	/* clear kqflags */
+	filp->f_kqflags &= ~(LINUX_KQ_FLAG_NEED_READ |
+	    LINUX_KQ_FLAG_NEED_WRITE);
+	/* update kqflags */
+	if ((temp & (POLLIN | POLLOUT)) != 0) {
+		if ((temp & POLLIN) != 0)
+			filp->f_kqflags |= LINUX_KQ_FLAG_NEED_READ;
+		if ((temp & POLLOUT) != 0)
+			filp->f_kqflags |= LINUX_KQ_FLAG_NEED_WRITE;
+
+		/* make sure the "knote" gets woken up */
+		KNOTE_LOCKED(&filp->f_selinfo.si_note, 0);
 	}
+	spin_unlock(&filp->f_kqlock);
 }
 
 static int
@@ -1156,9 +1219,9 @@ linux_file_kqfilter(struct file *file, struct knote *k
 }
 
 static int
-linux_file_mmap_single(struct file *fp, vm_ooffset_t *offset,
-    vm_size_t size, struct vm_object **object, int nprot,
-    struct thread *td)
+linux_file_mmap_single(struct file *fp, const struct file_operations *fop,
+    vm_ooffset_t *offset, vm_size_t size, struct vm_object **object,
+    int nprot, struct thread *td)
 {
 	struct task_struct *task;
 	struct vm_area_struct *vmap;
@@ -1170,7 +1233,7 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *
 	filp = (struct linux_file *)fp->f_data;
 	filp->f_flags = fp->f_flag;
 
-	if (filp->f_op->mmap == NULL)
+	if (fop->mmap == NULL)
 		return (EOPNOTSUPP);
 
 	linux_set_current(td);
@@ -1200,7 +1263,7 @@ linux_file_mmap_single(struct file *fp, vm_ooffset_t *
 	if (unlikely(down_write_killable(&vmap->vm_mm->mmap_sem))) {
 		error = linux_get_error(task, EINTR);
 	} else {
-		error = -OPW(fp, td, filp->f_op->mmap(filp, vmap));
+		error = -OPW(fp, td, fop->mmap(filp, vmap));
 		error = linux_get_error(task, error);
 		up_write(&vmap->vm_mm->mmap_sem);
 	}
@@ -1319,6 +1382,8 @@ linux_file_read(struct file *file, struct uio *uio, st
     int flags, struct thread *td)
 {
 	struct linux_file *filp;
+	const struct file_operations *fop;
+	struct linux_cdev *ldev;
 	ssize_t bytes;
 	int error;
 
@@ -1331,8 +1396,10 @@ linux_file_read(struct file *file, struct uio *uio, st
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	linux_set_current(td);
-	if (filp->f_op->read) {
-		bytes = OPW(file, td, filp->f_op->read(filp, uio->uio_iov->iov_base,
+	linux_get_fop(filp, &fop, &ldev);
+	if (fop->read != NULL) {
+		bytes = OPW(file, td, fop->read(filp,
+		    uio->uio_iov->iov_base,
 		    uio->uio_iov->iov_len, &uio->uio_offset));
 		if (bytes >= 0) {
 			uio->uio_iov->iov_base =
@@ -1347,6 +1414,7 @@ linux_file_read(struct file *file, struct uio *uio, st
 
 	/* update kqfilter status, if any */
 	linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_READ);
+	linux_drop_fop(ldev);
 
 	return (error);
 }
@@ -1356,10 +1424,11 @@ linux_file_write(struct file *file, struct uio *uio, s
     int flags, struct thread *td)
 {
 	struct linux_file *filp;
+	const struct file_operations *fop;
+	struct linux_cdev *ldev;
 	ssize_t bytes;
 	int error;
 
-	error = 0;
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	/* XXX no support for I/O vectors currently */
@@ -1368,14 +1437,17 @@ linux_file_write(struct file *file, struct uio *uio, s
 	if (uio->uio_resid > DEVFS_IOSIZE_MAX)
 		return (EINVAL);
 	linux_set_current(td);
-	if (filp->f_op->write) {
-		bytes = OPW(file, td, filp->f_op->write(filp, uio->uio_iov->iov_base,
+	linux_get_fop(filp, &fop, &ldev);
+	if (fop->write != NULL) {
+		bytes = OPW(file, td, fop->write(filp,
+		    uio->uio_iov->iov_base,
 		    uio->uio_iov->iov_len, &uio->uio_offset));
 		if (bytes >= 0) {
 			uio->uio_iov->iov_base =
 			    ((uint8_t *)uio->uio_iov->iov_base) + bytes;
 			uio->uio_iov->iov_len -= bytes;
 			uio->uio_resid -= bytes;
+			error = 0;
 		} else {
 			error = linux_get_error(current, -bytes);
 		}
@@ -1385,6 +1457,8 @@ linux_file_write(struct file *file, struct uio *uio, s
 	/* update kqfilter status, if any */
 	linux_file_kqfilter_poll(filp, LINUX_KQ_FLAG_HAS_WRITE);
 
+	linux_drop_fop(ldev);
+
 	return (error);
 }
 
@@ -1393,16 +1467,21 @@ linux_file_poll(struct file *file, int events, struct 
     struct thread *td)
 {
 	struct linux_file *filp;
+	const struct file_operations *fop;
+	struct linux_cdev *ldev;
 	int revents;
 
 	filp = (struct linux_file *)file->f_data;
 	filp->f_flags = file->f_flag;
 	linux_set_current(td);
-	if (filp->f_op->poll != NULL)
-		revents = OPW(file, td, filp->f_op->poll(filp, LINUX_POLL_TABLE_NORMAL)) & events;
-	else
+	linux_get_fop(filp, &fop, &ldev);
+	if (fop->poll != NULL) {
+		revents = OPW(file, td, fop->poll(filp,
+		    LINUX_POLL_TABLE_NORMAL)) & events;
+	} else {
 		revents = 0;
-
+	}
+	linux_drop_fop(ldev);
 	return (revents);
 }
 
@@ -1410,23 +1489,28 @@ static int
 linux_file_close(struct file *file, struct thread *td)
 {
 	struct linux_file *filp;
+	const struct file_operations *fop;
+	struct linux_cdev *ldev;
 	int error;
 
 	filp = (struct linux_file *)file->f_data;
 
-	KASSERT(file_count(filp) == 0, ("File refcount(%d) is not zero", file_count(filp)));
+	KASSERT(file_count(filp) == 0,
+	    ("File refcount(%d) is not zero", file_count(filp)));
 
+	error = 0;
 	filp->f_flags = file->f_flag;
 	linux_set_current(td);
 	linux_poll_wait_dequeue(filp);
-	error = -OPW(file, td, filp->f_op->release(filp->f_vnode, filp));
+	linux_get_fop(filp, &fop, &ldev);
+	if (fop->release != NULL)
+		error = -OPW(file, td, fop->release(filp->f_vnode, filp));
 	funsetown(&filp->f_sigio);
 	if (filp->f_vnode != NULL)
 		vdrop(filp->f_vnode);
-	if (filp->f_cdev != NULL) {
-		/* put a reference on the Linux character device */
-		atomic_long_dec(&filp->f_cdev->refs);
-	}
+	linux_drop_fop(ldev);
+	if (filp->f_cdev != NULL)
+		linux_cdev_deref(filp->f_cdev);
 	kfree(filp);
 
 	return (error);
@@ -1437,27 +1521,30 @@ linux_file_ioctl(struct file *fp, u_long cmd, void *da
     struct thread *td)
 {
 	struct linux_file *filp;
+	const struct file_operations *fop;
+	struct linux_cdev *ldev;
 	int error;
 
+	error = 0;
 	filp = (struct linux_file *)fp->f_data;
 	filp->f_flags = fp->f_flag;
-	error = 0;
+	linux_get_fop(filp, &fop, &ldev);
 
 	linux_set_current(td);
 	switch (cmd) {
 	case FIONBIO:
 		break;
 	case FIOASYNC:
-		if (filp->f_op->fasync == NULL)
+		if (fop->fasync == NULL)
 			break;
-		error = -OPW(fp, td, filp->f_op->fasync(0, filp, fp->f_flag & FASYNC));
+		error = -OPW(fp, td, fop->fasync(0, filp, fp->f_flag & FASYNC));
 		break;
 	case FIOSETOWN:
 		error = fsetown(*(int *)data, &filp->f_sigio);
 		if (error == 0) {
-			if (filp->f_op->fasync == NULL)
+			if (fop->fasync == NULL)
 				break;
-			error = -OPW(fp, td, filp->f_op->fasync(0, filp,
+			error = -OPW(fp, td, fop->fasync(0, filp,
 			    fp->f_flag & FASYNC));
 		}
 		break;
@@ -1465,16 +1552,17 @@ linux_file_ioctl(struct file *fp, u_long cmd, void *da
 		*(int *)data = fgetown(&filp->f_sigio);
 		break;
 	default:
-		error = linux_file_ioctl_sub(fp, filp, cmd, data, td);
+		error = linux_file_ioctl_sub(fp, filp, fop, cmd, data, td);
 		break;
 	}
+	linux_drop_fop(ldev);
 	return (error);
 }
 
 static int
 linux_file_mmap_sub(struct thread *td, vm_size_t objsize, vm_prot_t prot,
     vm_prot_t *maxprotp, int *flagsp, struct file *fp,
-    vm_ooffset_t *foff, vm_object_t *objp)
+    vm_ooffset_t *foff, const struct file_operations *fop, vm_object_t *objp)
 {
 	/*
 	 * Character devices do not provide private mappings
@@ -1486,7 +1574,8 @@ linux_file_mmap_sub(struct thread *td, vm_size_t objsi
 	if ((*flagsp & (MAP_PRIVATE | MAP_COPY)) != 0)
 		return (EINVAL);
 
-	return (linux_file_mmap_single(fp, foff, objsize, objp, (int)prot, td));
+	return (linux_file_mmap_single(fp, fop, foff, objsize, objp,
+	    (int)prot, td));
 }
 
 static int
@@ -1495,6 +1584,8 @@ linux_file_mmap(struct file *fp, vm_map_t map, vm_offs
     struct thread *td)
 {
 	struct linux_file *filp;
+	const struct file_operations *fop;
+	struct linux_cdev *ldev;
 	struct mount *mp;
 	struct vnode *vp;
 	vm_object_t object;
@@ -1541,15 +1632,18 @@ linux_file_mmap(struct file *fp, vm_map_t map, vm_offs
 	}
 	maxprot &= cap_maxprot;
 
-	error = linux_file_mmap_sub(td, size, prot, &maxprot, &flags, fp, &foff,
-	    &object);
+	linux_get_fop(filp, &fop, &ldev);
+	error = linux_file_mmap_sub(td, size, prot, &maxprot, &flags, fp,
+	    &foff, fop, &object);
 	if (error != 0)
-		return (error);
+		goto out;
 
 	error = vm_mmap_object(map, addr, size, prot, maxprot, flags, object,
 	    foff, FALSE, td);
 	if (error != 0)
 		vm_object_deallocate(object);
+out:
+	linux_drop_fop(ldev);
 	return (error);
 }
 
@@ -1971,6 +2065,14 @@ linux_completion_done(struct completion *c)
 }
 
 static void
+linux_cdev_deref(struct linux_cdev *ldev)
+{
+
+	if (refcount_release(&ldev->refs))
+		kfree(ldev);
+}
+
+static void
 linux_cdev_release(struct kobject *kobj)
 {
 	struct linux_cdev *cdev;
@@ -1979,7 +2081,7 @@ linux_cdev_release(struct kobject *kobj)
 	cdev = container_of(kobj, struct linux_cdev, kobj);
 	parent = kobj->parent;
 	linux_destroy_dev(cdev);
-	kfree(cdev);
+	linux_cdev_deref(cdev);
 	kobject_put(parent);
 }
 
@@ -1996,20 +2098,19 @@ linux_cdev_static_release(struct kobject *kobj)
 }
 
 void
-linux_destroy_dev(struct linux_cdev *cdev)
+linux_destroy_dev(struct linux_cdev *ldev)
 {
 
-	if (cdev->cdev == NULL)
+	if (ldev->cdev == NULL)
 		return;
 
-	atomic_long_dec(&cdev->refs);
+	MPASS((ldev->siref & LDEV_SI_DTR) == 0);
+	atomic_set_int(&ldev->siref, LDEV_SI_DTR);
+	while ((atomic_load_int(&ldev->siref) & ~LDEV_SI_DTR) != 0)
+		pause("ldevdtr", hz / 4);
 
-	/* wait for all open files to be closed */
-	while (atomic_long_read(&cdev->refs) != -1L)
-		pause("ldevdrn", hz);
-
-	destroy_dev(cdev->cdev);
-	cdev->cdev = NULL;
+	destroy_dev(ldev->cdev);
+	ldev->cdev = NULL;
 }
 
 const struct kobj_type linux_cdev_ktype = {


More information about the svn-src-head mailing list