[TEST(/review)] major/minor/devname fix
Poul-Henning Kamp
phk at phk.freebsd.dk
Wed Mar 9 13:40:20 PST 2005
For the people with devname(3) and ssh trouble: Please try out
this patch. It is not quite commit ready yet, but it should
make your system usable again.
Part I: Divorce the userland notion of "major/minor" from the kernel.
Change dev2udev() to DTRT, so that kvm/sysctl grubbing programs see
the userland values. Make sysctl_devname() do the right thing as
well. Move both to devfs where they belong (now).
Add a small frustation in case anybody tries to shortcut us again.
Part II: Reclaim and clean up kernel notion of major/minor.
Change the kernel side of the minor concept into cdev->si_drv0 and
make things work accordingly.
Remove the kernel side notion of major in toto, the real handle is
the cdevsw{} of which any given driver can have multiple.
Incomplete: only drivers using SI_CHEAPCLONE should be required to
use unique values in si_drv0.
Index: conf/kern.post.mk
===================================================================
RCS file: /home/ncvs/src/sys/conf/kern.post.mk,v
retrieving revision 1.76
diff -u -r1.76 kern.post.mk
--- conf/kern.post.mk 25 Feb 2005 05:34:45 -0000 1.76
+++ conf/kern.post.mk 9 Mar 2005 21:31:00 -0000
@@ -110,7 +110,7 @@
kernel-clean:
rm -f *.o *.so *.So *.ko *.s eddep errs \
${FULLKERNEL} ${KERNEL_KO} linterrs makelinks tags \
- vers.c vnode_if.c vnode_if.h majors.c \
+ vers.c vnode_if.c vnode_if.h \
${MFILES:T:S/.m$/.c/} ${MFILES:T:S/.m$/.h/} \
${CLEAN}
@@ -227,15 +227,12 @@
${INSTALL} -p -m 555 -o root -g wheel ${KERNEL_KO} ${DESTDIR}${KODIR}
.endif
-config.o env.o hints.o majors.o vers.o vnode_if.o:
+config.o env.o hints.o vers.o vnode_if.o:
${NORMAL_C}
-config.ln env.ln hints.ln majors.ln vers.ln vnode_if.ln:
+config.ln env.ln hints.ln vers.ln vnode_if.ln:
${NORMAL_LINT}
-majors.c: $S/conf/majors $S/conf/majors.awk
- ${AWK} -f $S/conf/majors.awk $S/conf/majors > ${.TARGET}
-
vers.c: $S/conf/newvers.sh $S/sys/param.h ${SYSTEM_DEP}
MAKE=${MAKE} sh $S/conf/newvers.sh ${KERN_IDENT}
Index: conf/kern.pre.mk
===================================================================
RCS file: /home/ncvs/src/sys/conf/kern.pre.mk,v
retrieving revision 1.63
diff -u -r1.63 kern.pre.mk
--- conf/kern.pre.mk 13 Feb 2005 05:58:40 -0000 1.63
+++ conf/kern.pre.mk 9 Mar 2005 21:31:00 -0000
@@ -121,7 +121,7 @@
NORMAL_LINT= ${LINT} ${LINTFLAGS} ${CFLAGS:M-[DIU]*} ${.IMPSRC}
GEN_CFILES= $S/$M/$M/genassym.c ${MFILES:T:S/.m$/.c/}
-SYSTEM_CFILES= config.c env.c hints.c majors.c vnode_if.c
+SYSTEM_CFILES= config.c env.c hints.c vnode_if.c
SYSTEM_DEP= Makefile ${SYSTEM_OBJS}
SYSTEM_OBJS= locore.o ${MDOBJS} ${OBJS}
SYSTEM_OBJS+= ${SYSTEM_CFILES:.c=.o}
Index: fs/devfs/devfs_devs.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/devfs/devfs_devs.c,v
retrieving revision 1.33
diff -u -r1.33 devfs_devs.c
--- fs/devfs/devfs_devs.c 10 Feb 2005 12:22:17 -0000 1.33
+++ fs/devfs/devfs_devs.c 9 Mar 2005 21:31:00 -0000
@@ -114,6 +114,8 @@
devfs_itode (struct devfs_mount *dm, int inode)
{
+ if (inode < 0)
+ return (NULL);
if (inode < NDEVFSINO)
return (&dm->dm_dirent[inode]);
if (devfs_overflow == NULL)
@@ -127,6 +129,8 @@
devfs_itod (int inode)
{
+ if (inode < 0)
+ return (NULL);
if (inode < NDEVFSINO)
return (&devfs_inot[inode]);
if (devfs_overflow == NULL)
@@ -270,10 +274,7 @@
if (dev == NULL && de != NULL) {
dd = de->de_dir;
*dep = NULL;
- TAILQ_REMOVE(&dd->de_dlist, de, de_list);
- if (de->de_vnode)
- de->de_vnode->v_data = NULL;
- FREE(de, M_DEVFS);
+ devfs_delete(dd, de);
devfs_dropref(i);
continue;
}
Index: fs/devfs/devfs_vnops.c
===================================================================
RCS file: /home/ncvs/src/sys/fs/devfs/devfs_vnops.c,v
retrieving revision 1.105
diff -u -r1.105 devfs_vnops.c
--- fs/devfs/devfs_vnops.c 22 Feb 2005 18:17:31 -0000 1.105
+++ fs/devfs/devfs_vnops.c 9 Mar 2005 21:31:00 -0000
@@ -61,6 +61,7 @@
#include <sys/proc.h>
#include <sys/stat.h>
#include <sys/sx.h>
+#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/ttycom.h>
#include <sys/unistd.h>
@@ -113,6 +114,25 @@
extern struct vop_vector devfs_vnodeops;
extern struct vop_vector devfs_specops;
+static u_int
+devfs_random(void)
+{
+ static u_int devfs_seed;
+
+ while (devfs_seed == 0) {
+ /*
+ * Make sure people don't make stupid assumptions
+ * about device major/minor numbers in userspace.
+ * We do this late to get entropy and for the same
+ * reason we force a reseed, but it may not be
+ * late enough for entropy to be available.
+ */
+ arc4rand(&devfs_seed, sizeof devfs_seed, 1);
+ devfs_seed &= 0xf0f;
+ }
+ return (devfs_seed);
+}
+
static int
devfs_fp_check(struct file *fp, struct cdev **devp, struct cdevsw **dswp)
{
@@ -394,8 +414,10 @@
int error = 0;
struct devfs_dirent *de;
struct cdev *dev;
+ struct devfs_mount *dmp;
de = vp->v_data;
+ dmp = VFSTODEVFS(vp->v_mount);
KASSERT(de != NULL, ("Null dirent in devfs_getattr vp=%p", vp));
if (vp->v_type == VDIR) {
de = de->de_dir;
@@ -441,7 +463,8 @@
vap->va_mtime = dev->si_mtime;
fix(dev->si_ctime);
vap->va_ctime = dev->si_ctime;
- vap->va_rdev = dev->si_udev;
+
+ vap->va_rdev = de->de_inode ^ devfs_random();
}
vap->va_gen = 0;
vap->va_flags = 0;
@@ -462,7 +485,9 @@
struct cdevsw *dsw;
struct vnode *vp;
struct vnode *vpold;
- int error;
+ int error, i;
+ const char *p;
+ struct fiodgname_arg *fgn;
error = devfs_fp_check(fp, &dev, &dsw);
if (error)
@@ -472,6 +497,13 @@
*(int *)data = dsw->d_flags & D_TYPEMASK;
dev_relthread(dev);
return (0);
+ } else if (com == FIODGNAME) {
+ fgn = data;
+ p = devtoname(dev);
+ i = strlen(p) + 1;
+ if (i > fgn->len)
+ return (EINVAL);
+ return (copyout(p, fgn->buf, i));
}
if (dsw->d_flags & D_NEEDGIANT)
mtx_lock(&Giant);
@@ -1432,6 +1464,43 @@
.vop_write = VOP_PANIC,
};
+dev_t
+dev2udev(struct cdev *x)
+{
+ if (x == NULL)
+ return (NODEV);
+ return (x->si_inode ^ devfs_random());
+}
+
+/*
+ * Helper sysctl for devname(3). We're given a struct cdev * and return
+ * the name, if any, registered by the device driver.
+ */
+static int
+sysctl_devname(SYSCTL_HANDLER_ARGS)
+{
+ int error;
+ dev_t ud;
+ struct cdev *dev, **dp;
+
+ error = SYSCTL_IN(req, &ud, sizeof (ud));
+ if (error)
+ return (error);
+ if (ud == NODEV)
+ return(EINVAL);
+ dp = devfs_itod(ud ^ devfs_random());
+ if (dp == NULL)
+ return(ENOENT);
+ dev = *dp;
+ if (dev == NULL)
+ return(ENOENT);
+ return(SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1));
+ return (error);
+}
+
+SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY,
+ NULL, 0, sysctl_devname, "", "devname(3) handler");
+
/*
* Our calling convention to the device drivers used to be that we passed
* vnode.h IO_* flags to read()/write(), but we're moving to fcntl.h O_
Index: kern/kern_conf.c
===================================================================
RCS file: /home/ncvs/src/sys/kern/kern_conf.c,v
retrieving revision 1.176
diff -u -r1.176 kern_conf.c
--- kern/kern_conf.c 8 Mar 2005 10:40:03 -0000 1.176
+++ kern/kern_conf.c 9 Mar 2005 21:31:00 -0000
@@ -46,21 +46,8 @@
static MALLOC_DEFINE(M_DEVT, "cdev", "cdev storage");
-/* Built at compile time from sys/conf/majors */
-extern unsigned char reserved_majors[256];
-
-/*
- * This is the number of hash-buckets. Experiments with 'real-life'
- * dev_t's show that a prime halfway between two powers of two works
- * best.
- */
-#define DEVT_HASH 83
-
-static LIST_HEAD(, cdev) dev_hash[DEVT_HASH];
-
static struct mtx devmtx;
static void freedev(struct cdev *dev);
-static struct cdev *newdev(int x, int y, struct cdev *);
static void destroy_devl(struct cdev *dev);
void
@@ -189,7 +176,6 @@
.d_mmap = dead_mmap,
.d_strategy = dead_strategy,
.d_name = "dead",
- .d_maj = 255,
.d_dump = dead_dump,
.d_kqfilter = dead_kqfilter
};
@@ -230,16 +216,12 @@
#define no_dump (dumper_t *)enodev
-/*
- * struct cdev * and u_dev_t primitives
- */
-
int
major(struct cdev *x)
{
if (x == NULL)
return NODEV;
- return((x->si_udev >> 8) & 0xff);
+ return((x->si_drv0 >> 8) & 0xff);
}
int
@@ -247,7 +229,7 @@
{
if (x == NULL)
return NODEV;
- return(x->si_udev & MAXMINOR);
+ return(x->si_drv0 & MAXMINOR);
}
int
@@ -287,29 +269,6 @@
return (si);
}
-static struct cdev *
-newdev(int x, int y, struct cdev *si)
-{
- struct cdev *si2;
- dev_t udev;
- int hash;
-
- mtx_assert(&devmtx, MA_OWNED);
- if (x == umajor(NODEV) && y == uminor(NODEV))
- panic("newdev of NODEV");
- udev = (x << 8) | y;
- hash = udev % DEVT_HASH;
- LIST_FOREACH(si2, &dev_hash[hash], si_hash) {
- if (si2->si_udev == udev) {
- freedev(si);
- return (si2);
- }
- }
- si->si_udev = udev;
- LIST_INSERT_HEAD(&dev_hash[hash], si, si_hash);
- return (si);
-}
-
static void
freedev(struct cdev *dev)
{
@@ -317,31 +276,10 @@
free(dev, M_DEVT);
}
-dev_t
-dev2udev(struct cdev *x)
-{
- if (x == NULL)
- return (NODEV);
- return (x->si_udev);
-}
-
struct cdev *
findcdev(dev_t udev)
{
- struct cdev *si;
- int hash;
-
- mtx_assert(&devmtx, MA_NOTOWNED);
- if (udev == NODEV)
- return (NULL);
- dev_lock();
- hash = udev % DEVT_HASH;
- LIST_FOREACH(si, &dev_hash[hash], si_hash) {
- if (si->si_udev == udev)
- break;
- }
- dev_unlock();
- return (si);
+ return (NULL);
}
int
@@ -357,55 +295,27 @@
}
static void
-find_major(struct cdevsw *devsw)
-{
- int i;
-
- if (devsw->d_maj != MAJOR_AUTO) {
- printf("NOTICE: Ignoring d_maj hint from driver \"%s\", %s",
- devsw->d_name, "driver should be updated/fixed\n");
- devsw->d_maj = MAJOR_AUTO;
- }
- for (i = NUMCDEVSW - 1; i > 0; i--)
- if (reserved_majors[i] != i)
- break;
- KASSERT(i > 0, ("Out of major numbers (%s)", devsw->d_name));
- devsw->d_maj = i;
- reserved_majors[i] = i;
- devsw->d_flags |= D_ALLOCMAJ;
-}
-
-static void
fini_cdevsw(struct cdevsw *devsw)
{
- if (devsw->d_flags & D_ALLOCMAJ) {
- reserved_majors[devsw->d_maj] = 0;
- devsw->d_maj = MAJOR_AUTO;
- devsw->d_flags &= ~D_ALLOCMAJ;
- }
+
+ mtx_assert(&devmtx, MA_OWNED);
devsw->d_flags &= ~D_INIT;
}
-static void
+static struct cdevsw *
prep_cdevsw(struct cdevsw *devsw)
{
+ if (devsw->d_flags & D_INIT)
+ return (devsw);
dev_lock();
if (devsw->d_version != D_VERSION_00) {
printf(
"WARNING: Device driver \"%s\" has wrong version %s\n",
devsw->d_name, "and is disabled. Recompile KLD module.");
- devsw->d_open = dead_open;
- devsw->d_close = dead_close;
- devsw->d_read = dead_read;
- devsw->d_write = dead_write;
- devsw->d_ioctl = dead_ioctl;
- devsw->d_poll = dead_poll;
- devsw->d_mmap = dead_mmap;
- devsw->d_strategy = dead_strategy;
- devsw->d_dump = dead_dump;
- devsw->d_kqfilter = dead_kqfilter;
+ dev_unlock();
+ return (&dead_cdevsw);
}
if (devsw->d_flags & D_TTY) {
@@ -431,57 +341,64 @@
devsw->d_flags |= D_INIT;
- if (!(devsw->d_flags & D_ALLOCMAJ))
- find_major(devsw);
dev_unlock();
+ return (devsw);
}
-struct cdev *
-make_dev(struct cdevsw *devsw, int minornr, uid_t uid, gid_t gid, int perms, const char *fmt, ...)
+static void
+imake_dev(struct cdev *dev, u_int drv0, uid_t uid, gid_t gid, int perms, const char *fmt, va_list ap)
{
- struct cdev *dev;
- va_list ap;
int i;
- KASSERT((minornr & ~MAXMINOR) == 0,
- ("Invalid minor (0x%x) in make_dev", minornr));
-
- if (!(devsw->d_flags & D_INIT))
- prep_cdevsw(devsw);
- dev = allocdev();
- dev_lock();
- dev = newdev(devsw->d_maj, minornr, dev);
+ dev->si_drv0 = drv0;
if (dev->si_flags & SI_CHEAPCLONE &&
- dev->si_flags & SI_NAMED &&
- dev->si_devsw == devsw) {
+ dev->si_flags & SI_NAMED) {
/*
* This is allowed as it removes races and generally
* simplifies cloning devices.
* XXX: still ??
*/
- dev_unlock();
- return (dev);
+ return;
}
KASSERT(!(dev->si_flags & SI_NAMED),
("make_dev() by driver %s on pre-existing device (maj=%d, min=%x, name=%s)",
- devsw->d_name, major(dev), minor(dev), devtoname(dev)));
+ dev->si_devsw->d_name, major(dev), minor(dev), devtoname(dev)));
- va_start(ap, fmt);
i = vsnrprintf(dev->__si_namebuf, sizeof dev->__si_namebuf, 32, fmt, ap);
if (i > (sizeof dev->__si_namebuf - 1)) {
printf("WARNING: Device name truncated! (%s)\n",
dev->__si_namebuf);
}
- va_end(ap);
- dev->si_devsw = devsw;
dev->si_uid = uid;
dev->si_gid = gid;
dev->si_mode = perms;
dev->si_flags |= SI_NAMED;
- LIST_INSERT_HEAD(&devsw->d_devs, dev, si_list);
devfs_create(dev);
+}
+
+
+struct cdev *
+make_dev(struct cdevsw *devsw, u_int drv0, uid_t uid, gid_t gid, int perms, const char *fmt, ...)
+{
+ struct cdev *dev;
+ va_list ap;
+
+ devsw = prep_cdevsw(devsw);
+ LIST_FOREACH(dev, &devsw->d_devs, si_list) {
+ if (dev->si_drv0 == drv0)
+ break;
+ }
+ if (dev == NULL) {
+ dev = allocdev();
+ LIST_INSERT_HEAD(&devsw->d_devs, dev, si_list);
+ }
+ dev->si_devsw = devsw;
+ dev_lock();
+ va_start(ap, fmt);
+ imake_dev(dev, drv0, uid, gid, perms, fmt, ap);
+ va_end(ap);
dev_unlock();
return (dev);
}
@@ -589,7 +506,6 @@
if (LIST_EMPTY(&csw->d_devs))
fini_cdevsw(csw);
- LIST_REMOVE(dev, si_hash);
}
dev->si_flags &= ~SI_ALIAS;
@@ -705,8 +621,7 @@
KASSERT(*up <= CLONE_UNITMASK,
("Too high unit (0x%x) in clone_create", *up));
- if (!(csw->d_flags & D_INIT))
- prep_cdevsw(csw);
+ csw = prep_cdevsw(csw);
/*
* Search the list for a lot of things in one go:
@@ -744,7 +659,8 @@
}
if (unit == -1)
unit = low & CLONE_UNITMASK;
- dev = newdev(csw->d_maj, unit2minor(unit | extra), ndev);
+ dev = ndev;
+ dev->si_drv0 = unit2minor(unit | extra);
if (dev->si_flags & SI_CLONELIST) {
printf("dev %p (%s) is on clonelist\n", dev, dev->si_name);
printf("unit=%d\n", unit);
@@ -755,6 +671,8 @@
}
KASSERT(!(dev->si_flags & SI_CLONELIST),
("Dev %p(%s) should not be on clonelist", dev, dev->si_name));
+ dev->si_devsw = csw;
+ LIST_INSERT_HEAD(&csw->d_devs, dev, si_list);
if (dl != NULL)
LIST_INSERT_BEFORE(dl, dev, si_clone);
else if (de != NULL)
@@ -785,37 +703,11 @@
KASSERT(dev->si_flags & SI_CLONELIST,
("Dev %p(%s) should be on clonelist", dev, dev->si_name));
KASSERT(dev->si_flags & SI_NAMED,
- ("Driver has goofed in cloning underways udev %x", dev->si_udev));
+ ("Driver has goofed in cloning underways drv0 %x %s",
+ dev->si_drv0, devtoname(dev)));
destroy_devl(dev);
}
dev_unlock();
free(cd, M_DEVBUF);
*cdp = NULL;
}
-
-/*
- * Helper sysctl for devname(3). We're given a struct cdev * and return
- * the name, if any, registered by the device driver.
- */
-static int
-sysctl_devname(SYSCTL_HANDLER_ARGS)
-{
- int error;
- dev_t ud;
- struct cdev *dev;
-
- error = SYSCTL_IN(req, &ud, sizeof (ud));
- if (error)
- return (error);
- if (ud == NODEV)
- return(EINVAL);
- dev = findcdev(ud);
- if (dev == NULL)
- error = ENOENT;
- else
- error = SYSCTL_OUT(req, dev->si_name, strlen(dev->si_name) + 1);
- return (error);
-}
-
-SYSCTL_PROC(_kern, OID_AUTO, devname, CTLTYPE_OPAQUE|CTLFLAG_RW|CTLFLAG_ANYBODY,
- NULL, 0, sysctl_devname, "", "devname(3) handler");
Index: sys/conf.h
===================================================================
RCS file: /home/ncvs/src/sys/sys/conf.h,v
retrieving revision 1.212
diff -u -r1.212 conf.h
--- sys/conf.h 8 Mar 2005 10:40:03 -0000 1.212
+++ sys/conf.h 9 Mar 2005 21:31:00 -0000
@@ -67,7 +67,6 @@
struct timespec si_atime;
struct timespec si_ctime;
struct timespec si_mtime;
- dev_t si_udev;
int si_refcount;
LIST_ENTRY(cdev) si_list;
LIST_ENTRY(cdev) si_clone;
@@ -78,6 +77,7 @@
struct cdev *si_parent;
u_int si_inode;
char *si_name;
+ u_int si_drv0;
void *si_drv1, *si_drv2;
struct cdevsw *si_devsw;
int si_iosize_max; /* maximum I/O size (for physio &al) */
@@ -222,12 +222,6 @@
#define MAXMINOR 0xffff00ffU
-/*
- * XXX: do not use MAJOR_AUTO unless you have no choice. In general drivers
- * should just not initialize .d_maj and that will DTRT.
- */
-#define MAJOR_AUTO 0 /* XXX: Not GM */
-
struct module;
struct devsw_module_data {
@@ -262,7 +256,7 @@
void dev_rel(struct cdev *dev);
void dev_strategy(struct cdev *dev, struct buf *bp);
struct cdev *makebdev(int _maj, int _min);
-struct cdev *make_dev(struct cdevsw *_devsw, int _minor, uid_t _uid, gid_t _gid,
+struct cdev *make_dev(struct cdevsw *_devsw, u_int drv0, uid_t _uid, gid_t _gid,
int _perms, const char *_fmt, ...) __printflike(6, 7);
struct cdev *make_dev_alias(struct cdev *_pdev, const char *_fmt, ...) __printflike(2, 3);
int dev2unit(struct cdev *_dev);
--
Poul-Henning Kamp | UNIX since Zilog Zeus 3.20
phk at FreeBSD.ORG | TCP/IP since RFC 956
FreeBSD committer | BSD since 4.3-tahoe
Never attribute to malice what can adequately be explained by incompetence.
More information about the freebsd-current
mailing list