Bug: devfs is sure to have the bug.
Kostik Belousov
kostikbel at gmail.com
Wed Aug 3 13:50:49 UTC 2011
On Wed, Aug 03, 2011 at 02:44:23PM +0900, Kohji Okuno wrote:
> Hello,
>
> > Hello,
> >
> > I think that devfs is sure to have the bug.
> > I found that I couldn't open "/dev/XXX" though the kernel detected XXX
> > device.
> >
> >
> > "dm->dm_generation" is updated with "devfs_generation" in
> > devfs_populate(), and the context holds only "dm->dm_lock" in
> > devfs_populate().
> >
> > On the other hand, "devfs_generation" is incremented in devfs_create()
> > and devfs_destroy() the context holds only "devmtx" in devfs_create()
> > and devfs_destroy().
> >
> > If a context executes devfs_create() when other context is executing
> > (***), then "dm->dm_generation" is updated incorrect value.
> > As a result, we can not open the last detected device (we receive ENOENT).
> >
> > I think that we should change the lock method.
> > May I have advice?
> >
> > void
> > devfs_populate(struct devfs_mount *dm)
> > {
> >
> > sx_assert(&dm->dm_lock, SX_XLOCKED);
> > if (dm->dm_generation == devfs_generation)
> > return;
> > while (devfs_populate_loop(dm, 0))
> > continue;
> > (***)
> > dm->dm_generation = devfs_generation;
> > }
> >
> > void
> > devfs_create(struct cdev *dev)
> > {
> > struct cdev_priv *cdp;
> >
> > mtx_assert(&devmtx, MA_OWNED);
> > cdp = cdev2priv(dev);
> > cdp->cdp_flags |= CDP_ACTIVE;
> > cdp->cdp_inode = alloc_unrl(devfs_inos);
> > dev_refl(dev);
> > TAILQ_INSERT_TAIL(&cdevp_list, cdp, cdp_list);
> > devfs_generation++;
> > }
> >
> > void
> > devfs_destroy(struct cdev *dev)
> > {
> > struct cdev_priv *cdp;
> >
> > mtx_assert(&devmtx, MA_OWNED);
> > cdp = cdev2priv(dev);
> > cdp->cdp_flags &= ~CDP_ACTIVE;
> > devfs_generation++;
> > }
> >
> > Thanks.
>
> I propose the solution. May I have advice?
>
> void
> devfs_populate(struct devfs_mount *dm)
> {
>
> sx_assert(&dm->dm_lock, SX_XLOCKED);
> #if 1 /* I propose... */
> int tmp_generation;
> retry:
> tmp_generation = devfs_generation;
> if (dm->dm_generation == tmp_generation)
> return;
> while (devfs_populate_loop(dm, 0))
> continue;
> if (tmp_generation != devfs_generation)
> goto retry;
> dm->dm_generation = tmp_generation;
> #else /* Original */
> if (dm->dm_generation == devfs_generation)
> return;
> while (devfs_populate_loop(dm, 0))
> continue;
> dm->dm_generation = devfs_generation;
> #endif
> }
I think the problem you described is real, and suggested change is right.
Initially, I thought that we should work with devfs_generation as with
the atomic type due to unlocked access in the devfs_populate(), but then
convinced myself that this is not needed.
But also, I think there is another half of the problem. Namely,
devfs_lookup() calls devfs_populate_vp(), and then does lookup with the
help of devfs_lookupx(). We will miss the generation update
happen after the drop of the dm_lock in devfs_populate_vp() to reacquire
the directory vnode lock.
I propose the change below, consisting of your fix and also retry of
population and lookup in case generations do not match for ENOENT case.
diff --git a/sys/fs/devfs/devfs_devs.c b/sys/fs/devfs/devfs_devs.c
index d72ada0..8ff9bc2 100644
--- a/sys/fs/devfs/devfs_devs.c
+++ b/sys/fs/devfs/devfs_devs.c
@@ -63,7 +63,7 @@ static MALLOC_DEFINE(M_CDEVP, "DEVFS1", "DEVFS cdev_priv storage");
static SYSCTL_NODE(_vfs, OID_AUTO, devfs, CTLFLAG_RW, 0, "DEVFS filesystem");
-static unsigned devfs_generation;
+unsigned devfs_generation;
SYSCTL_UINT(_vfs_devfs, OID_AUTO, generation, CTLFLAG_RD,
&devfs_generation, 0, "DEVFS generation number");
@@ -630,13 +630,15 @@ devfs_populate_loop(struct devfs_mount *dm, int cleanup)
void
devfs_populate(struct devfs_mount *dm)
{
+ unsigned gen;
sx_assert(&dm->dm_lock, SX_XLOCKED);
- if (dm->dm_generation == devfs_generation)
+ gen = devfs_generation;
+ if (dm->dm_generation == gen)
return;
while (devfs_populate_loop(dm, 0))
continue;
- dm->dm_generation = devfs_generation;
+ dm->dm_generation = gen;
}
/*
diff --git a/sys/fs/devfs/devfs_int.h b/sys/fs/devfs/devfs_int.h
index cdc6aba..cb01ad1 100644
--- a/sys/fs/devfs/devfs_int.h
+++ b/sys/fs/devfs/devfs_int.h
@@ -71,6 +71,8 @@ struct cdev_priv {
#define cdev2priv(c) member2struct(cdev_priv, cdp_c, c)
+extern unsigned devfs_generation;
+
struct cdev *devfs_alloc(int);
int devfs_dev_exists(const char *);
void devfs_free(struct cdev *);
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index 955bd8b..2603caa 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -188,7 +188,7 @@ devfs_clear_cdevpriv(void)
* On success devfs_populate_vp() returns with dmp->dm_lock held.
*/
static int
-devfs_populate_vp(struct vnode *vp)
+devfs_populate_vp(struct vnode *vp, int dm_locked)
{
struct devfs_dirent *de;
struct devfs_mount *dmp;
@@ -199,7 +199,8 @@ devfs_populate_vp(struct vnode *vp)
dmp = VFSTODEVFS(vp->v_mount);
locked = VOP_ISLOCKED(vp);
- sx_xlock(&dmp->dm_lock);
+ if (!dm_locked)
+ sx_xlock(&dmp->dm_lock);
DEVFS_DMP_HOLD(dmp);
/* Can't call devfs_populate() with the vnode lock held. */
@@ -242,7 +243,7 @@ devfs_vptocnp(struct vop_vptocnp_args *ap)
dmp = VFSTODEVFS(vp->v_mount);
- error = devfs_populate_vp(vp);
+ error = devfs_populate_vp(vp, 0);
if (error != 0)
return (error);
@@ -643,7 +644,7 @@ devfs_getattr(struct vop_getattr_args *ap)
struct devfs_mount *dmp;
struct cdev *dev;
- error = devfs_populate_vp(vp);
+ error = devfs_populate_vp(vp, 0);
if (error != 0)
return (error);
@@ -903,7 +904,7 @@ devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
if (cdev == NULL)
sx_xlock(&dmp->dm_lock);
- else if (devfs_populate_vp(dvp) != 0) {
+ else if (devfs_populate_vp(dvp, 0) != 0) {
*dm_unlock = 0;
sx_xlock(&dmp->dm_lock);
if (DEVFS_DMP_DROP(dmp)) {
@@ -966,19 +967,30 @@ devfs_lookupx(struct vop_lookup_args *ap, int *dm_unlock)
static int
devfs_lookup(struct vop_lookup_args *ap)
{
- int j;
struct devfs_mount *dmp;
- int dm_unlock;
+ int error, dm_unlock;
- if (devfs_populate_vp(ap->a_dvp) != 0)
+ dm_unlock = 0;
+retry:
+ if (devfs_populate_vp(ap->a_dvp, dm_unlock) != 0)
return (ENOTDIR);
dmp = VFSTODEVFS(ap->a_dvp->v_mount);
dm_unlock = 1;
- j = devfs_lookupx(ap, &dm_unlock);
- if (dm_unlock == 1)
+ error = devfs_lookupx(ap, &dm_unlock);
+ if (error == ENOENT) {
+ if (dm_unlock)
+ sx_assert(&dmp->dm_lock, SA_XLOCKED);
+ else {
+ sx_xlock(&dmp->dm_lock);
+ dm_unlock = 1;
+ }
+ if (devfs_generation != dmp->dm_generation)
+ goto retry;
+ }
+ if (dm_unlock)
sx_xunlock(&dmp->dm_lock);
- return (j);
+ return (error);
}
static int
@@ -1202,7 +1214,7 @@ devfs_readdir(struct vop_readdir_args *ap)
}
dmp = VFSTODEVFS(ap->a_vp->v_mount);
- if (devfs_populate_vp(ap->a_vp) != 0) {
+ if (devfs_populate_vp(ap->a_vp, 0) != 0) {
if (tmp_ncookies != NULL)
ap->a_ncookies = tmp_ncookies;
return (EIO);
@@ -1565,7 +1577,7 @@ devfs_symlink(struct vop_symlink_args *ap)
if (error)
return(error);
dmp = VFSTODEVFS(ap->a_dvp->v_mount);
- if (devfs_populate_vp(ap->a_dvp) != 0)
+ if (devfs_populate_vp(ap->a_dvp, 0) != 0)
return (ENOENT);
dd = ap->a_dvp->v_data;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-current/attachments/20110803/810a0b87/attachment.pgp
More information about the freebsd-current
mailing list