kernel panic at giant_poll (kern_conf.c:385)
Kostik Belousov
kostikbel at gmail.com
Tue Mar 4 09:43:18 PST 2008
On Tue, Mar 04, 2008 at 12:27:04PM +0100, Pietro Cerutti wrote:
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA512
>
> Dear all,
>
> FreeBSD gahrtop.localhost 8.0-CURRENT FreeBSD 8.0-CURRENT #8: Mon Mar 3
> 14:07:47 CET 2008
> root at gahrtop.localhost:/usr/obj/usr/src/sys/MSI1034 i386
>
> vmcore and kernel.symbols available at:
> http://www.gahr.ch/FreeBSD/misc/crash17/vmcore.17
> http://www.gahr.ch/FreeBSD/misc/crash17/kernel.symbols
>
> (if you get a page not found, 's|www.gahr.ch|213.142.182.66|', DNS are
> still refreshing after relocation...)
>
> Fatal trap 12: page fault while in kernel mode
> cpuid = 1; apic id = 01
> fault virtual address = 0x60
> fault code = supervisor read, page not present
> instruction pointer = 0x20:0xc055d43b
> stack pointer = 0x28:0xe757ca68
> frame pointer = 0x28:0xe757ca84
> code segment = base 0x0, limit 0xfffff, type 0x1b
> = DPL 0, pres 1, def32 1, gran 1
> processor eflags = interrupt enabled, resume, IOPL = 0
> current process = 399 (moused)
> trap number = 12
> panic: page fault
> cpuid = 1
> Uptime: 54m21s
> Physical memory: 2031 MB
> Dumping 83 MB: 68 52 36 20 4
>
> (kgdb) bt
> #0 doadump () at pcpu.h:195
> #1 0xc0592a86 in boot (howto=260) at /usr/src/sys/kern/kern_shutdown.c:417
> #2 0xc0592ec8 in panic (fmt=0x104 <Address 0x104 out of bounds>) at
> /usr/src/sys/kern/kern_shutdown.c:571
> #3 0xc07f5b8a in trap_fatal (frame=0xe757ca28, eva=40) at
> /usr/src/sys/i386/i386/trap.c:898
> #4 0xc07f5ef9 in trap_pfault (frame=0xe757ca28, usermode=0, eva=96) at
> /usr/src/sys/i386/i386/trap.c:811
> #5 0xc07f68db in trap (frame=0xe757ca28) at
> /usr/src/sys/i386/i386/trap.c:489
> #6 0xc07dd1fb in calltrap () at /usr/src/sys/i386/i386/exception.s:146
> #7 0xc055d43b in giant_poll (dev=0xc4c09e00, events=64, td=0xc4e6b220)
> at /usr/src/sys/kern/kern_conf.c:385
> #8 0xc05161d5 in devfs_poll_f (fp=0xc4e715e4, events=64,
> cred=0xc4ace500, td=0xc4e6b220) at /usr/src/sys/fs/devfs/devfs_vnops.c:842
> #9 0xc05c9fca in kern_select (td=0xc4e6b220, nd=20, fd_in=0xbfbfea70,
> fd_ou=0x0, fd_ex=0x0, tvp=0xe757cc6c) at file.h:265
> #10 0xc05ca41c in select (td=0xc4e6b220, uap=0xe757ccf8) at
> /usr/src/sys/kern/sys_generic.c:758
> #11 0xc07f6142 in syscall (frame=0xe757cd38) at
> /usr/src/sys/i386/i386/trap.c:1034
> #12 0xc07dd260 in Xint0x80_syscall () at
> /usr/src/sys/i386/i386/exception.s:203
> #13 0x00000033 in ?? ()
> (kgdb) up 7
> #7 0xc055d43b in giant_poll (dev=0xc4c09e00, events=64, td=0xc4e6b220)
> at /usr/src/sys/kern/kern_conf.c:385
> 385 retval = dev->si_devsw->d_gianttrick->
> (kgdb) list
> 380 giant_poll(struct cdev *dev, int events, struct thread *td)
> 381 {
> 382 int retval;
> 383
> 384 mtx_lock(&Giant);
> 385 retval = dev->si_devsw->d_gianttrick->
> 386 d_poll(dev, events, td);
> 387 mtx_unlock(&Giant);
> 388 return (retval);
> 389 }
> (kgdb) set print pretty
> (kgdb) print *dev
> $2 = {
> si_priv = 0xc4c09e00,
> si_flags = 0,
> si_atime = {
> tv_sec = 1204568256,
> tv_nsec = 0
> },
> si_ctime = {
> tv_sec = 0,
> tv_nsec = 0
> },
> si_mtime = {
> tv_sec = 0,
> tv_nsec = 0
> },
> si_uid = 0,
> si_gid = 5,
> si_mode = 420,
> si_cred = 0x0,
> si_drv0 = 0,
> si_refcount = 3,
> si_list = {
> le_next = 0x0,
> le_prev = 0xc0871a58
> },
> si_clone = {
> le_next = 0x0,
> le_prev = 0x0
> },
> si_children = {
> lh_first = 0x0
> },
> si_siblings = {
> le_next = 0x0,
> le_prev = 0x0
> },
> si_parent = 0x0,
> si_name = 0xc4c09e78 "ums0",
> si_drv1 = 0x0,
> si_drv2 = 0x0,
> si_devsw = 0x0,
> si_iosize_max = 65536,
> si_usecount = 1,
> si_threadcount = 1,
> __si_u = {
> - ---Type <return> to continue, or q <return> to quit---
> __sit_tty = 0x0,
> __sid_snapdata = 0x0
> },
> __si_namebuf = "ums0", '\0' <repeats 59 times>
> }
>
> Please note that dev->si_devsw is NULL, and kern_conf.c:385 tries to
> dereference it.
I remember this issue was already reported by M. Warner Losh.
The reason for the problem is that si_refcount does not protect
struct cdev, it only keeps the cdev structure from going away (as
well as protecting against finalizing the cdevsw). The callers of
devvn_refthread() are assumed to only use the returned dsw instead of
dereferencing dev again.
Giant_trick explicitely breaks the rule.
Besides the problem I described above, there is another race in
make_dev_credv(), where cdevsw may be finalized just before new device
is created, since dev_mtx is dropped between prep_cdevsw and newdev()
and later. This requires rapid device creation/destruction, and was
discovered by Peter Holm.
You may try the following patch. Besides removing giant_trick and
directly checking the Giant, it puts the prep_cdevsw() into the common
dev_mtx-protected region.
diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c
index d9565ee..58e943a 100644
--- a/sys/fs/devfs/devfs_vnops.c
+++ b/sys/fs/devfs/devfs_vnops.c
@@ -369,7 +369,9 @@ devfs_close(struct vop_close_args *ap)
error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
PICKUP_GIANT();
} else {
+ mtx_lock(&Giant);
error = dsw->d_close(dev, ap->a_fflag, S_IFCHR, td);
+ mtx_unlock(&Giant);
}
dev_relthread(dev);
vn_lock(vp, vp_locked | LK_RETRY);
@@ -491,7 +493,11 @@ devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struc
dev_relthread(dev);
return (error);
}
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = dsw->d_ioctl(dev, com, data, fp->f_flag, td);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
if (error == ENOIOCTL)
error = ENOTTY;
@@ -534,7 +540,11 @@ devfs_kqfilter_f(struct file *fp, struct knote *kn)
error = devfs_fp_check(fp, &dev, &dsw);
if (error)
return (error);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = dsw->d_kqfilter(dev, kn);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return (error);
}
@@ -780,10 +790,12 @@ devfs_open(struct vop_open_args *ap)
error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
PICKUP_GIANT();
} else {
+ mtx_lock(&Giant);
if (dsw->d_fdopen != NULL)
error = dsw->d_fdopen(dev, ap->a_mode, td, fp);
else
error = dsw->d_open(dev, ap->a_mode, S_IFCHR, td);
+ mtx_unlock(&Giant);
}
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
@@ -839,7 +851,11 @@ devfs_poll_f(struct file *fp, int events, struct ucred *cred, struct thread *td)
error = devfs_fp_check(fp, &dev, &dsw);
if (error)
return (error);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = dsw->d_poll(dev, events, td);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return(error);
}
@@ -874,7 +890,11 @@ devfs_read_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, st
if ((flags & FOF_OFFSET) == 0)
uio->uio_offset = fp->f_offset;
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = dsw->d_read(dev, uio, ioflag);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
if (uio->uio_resid != resid || (error == 0 && resid != 0))
vfs_timestamp(&dev->si_atime);
dev_relthread(dev);
@@ -1308,7 +1328,11 @@ devfs_write_f(struct file *fp, struct uio *uio, struct ucred *cred, int flags, s
resid = uio->uio_resid;
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = dsw->d_write(dev, uio, ioflag);
+ if (dsw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
if (uio->uio_resid != resid || (error == 0 && resid != 0)) {
vfs_timestamp(&dev->si_ctime);
dev->si_mtime = dev->si_ctime;
diff --git a/sys/kern/kern_conf.c b/sys/kern/kern_conf.c
index 33285b4..910a41a 100644
--- a/sys/kern/kern_conf.c
+++ b/sys/kern/kern_conf.c
@@ -294,125 +294,6 @@ no_poll(struct cdev *dev __unused, int events, struct thread *td __unused)
#define no_dump (dumper_t *)enodev
-static int
-giant_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_open(dev, oflags, devtype, td);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-static int
-giant_fdopen(struct cdev *dev, int oflags, struct thread *td, struct file *fp)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_fdopen(dev, oflags, td, fp);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-static int
-giant_close(struct cdev *dev, int fflag, int devtype, struct thread *td)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_close(dev, fflag, devtype, td);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-static void
-giant_strategy(struct bio *bp)
-{
-
- mtx_lock(&Giant);
- bp->bio_dev->si_devsw->d_gianttrick->
- d_strategy(bp);
- mtx_unlock(&Giant);
-}
-
-static int
-giant_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, struct thread *td)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_ioctl(dev, cmd, data, fflag, td);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-static int
-giant_read(struct cdev *dev, struct uio *uio, int ioflag)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_read(dev, uio, ioflag);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-static int
-giant_write(struct cdev *dev, struct uio *uio, int ioflag)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_write(dev, uio, ioflag);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-static int
-giant_poll(struct cdev *dev, int events, struct thread *td)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_poll(dev, events, td);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-static int
-giant_kqfilter(struct cdev *dev, struct knote *kn)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_kqfilter(dev, kn);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-static int
-giant_mmap(struct cdev *dev, vm_offset_t offset, vm_paddr_t *paddr, int nprot)
-{
- int retval;
-
- mtx_lock(&Giant);
- retval = dev->si_devsw->d_gianttrick->
- d_mmap(dev, offset, paddr, nprot);
- mtx_unlock(&Giant);
- return (retval);
-}
-
-
/*
* struct cdev * and u_dev_t primitives
*/
@@ -485,27 +366,16 @@ umajor(dev_t dev)
static void
fini_cdevsw(struct cdevsw *devsw)
{
- struct cdevsw *gt;
+ mtx_assert(&devmtx, MA_OWNED);
- if (devsw->d_gianttrick != NULL) {
- gt = devsw->d_gianttrick;
- memcpy(devsw, gt, sizeof *devsw);
- free(gt, M_DEVT);
- devsw->d_gianttrick = NULL;
- }
devsw->d_flags &= ~D_INIT;
}
static void
prep_cdevsw(struct cdevsw *devsw)
{
- struct cdevsw *dsw2;
- if (devsw->d_flags & D_NEEDGIANT)
- dsw2 = malloc(sizeof *dsw2, M_DEVT, M_WAITOK);
- else
- dsw2 = NULL;
- dev_lock();
+ mtx_assert(&devmtx, MA_OWNED);
if (devsw->d_version != D_VERSION_01) {
printf(
@@ -532,41 +402,29 @@ prep_cdevsw(struct cdevsw *devsw)
if (devsw->d_poll == NULL) devsw->d_poll = ttypoll;
}
- if (devsw->d_flags & D_NEEDGIANT) {
- if (devsw->d_gianttrick == NULL) {
- memcpy(dsw2, devsw, sizeof *dsw2);
- devsw->d_gianttrick = dsw2;
- } else
- free(dsw2, M_DEVT);
- }
-
-#define FIXUP(member, noop, giant) \
+#define FIXUP(member, noop) \
do { \
if (devsw->member == NULL) { \
devsw->member = noop; \
- } else if (devsw->d_flags & D_NEEDGIANT) \
- devsw->member = giant; \
} \
- while (0)
-
- FIXUP(d_open, null_open, giant_open);
- FIXUP(d_fdopen, NULL, giant_fdopen);
- FIXUP(d_close, null_close, giant_close);
- FIXUP(d_read, no_read, giant_read);
- FIXUP(d_write, no_write, giant_write);
- FIXUP(d_ioctl, no_ioctl, giant_ioctl);
- FIXUP(d_poll, no_poll, giant_poll);
- FIXUP(d_mmap, no_mmap, giant_mmap);
- FIXUP(d_strategy, no_strategy, giant_strategy);
- FIXUP(d_kqfilter, no_kqfilter, giant_kqfilter);
+ } while (0)
+
+ FIXUP(d_open, null_open);
+ FIXUP(d_fdopen, NULL);
+ FIXUP(d_close, null_close);
+ FIXUP(d_read, no_read);
+ FIXUP(d_write, no_write);
+ FIXUP(d_ioctl, no_ioctl);
+ FIXUP(d_poll, no_poll);
+ FIXUP(d_mmap, no_mmap);
+ FIXUP(d_strategy, no_strategy);
+ FIXUP(d_kqfilter, no_kqfilter);
if (devsw->d_dump == NULL) devsw->d_dump = no_dump;
LIST_INIT(&devsw->d_devs);
devsw->d_flags |= D_INIT;
-
- dev_unlock();
}
struct cdev *
@@ -580,10 +438,10 @@ make_dev_credv(int flags, struct cdevsw *devsw, int minornr,
KASSERT((minornr & ~MAXMINOR) == 0,
("Invalid minor (0x%x) in make_dev", minornr));
- if (!(devsw->d_flags & D_INIT))
- prep_cdevsw(devsw);
dev = devfs_alloc();
dev_lock();
+ if (!(devsw->d_flags & D_INIT))
+ prep_cdevsw(devsw);
dev = newdev(devsw, minornr, dev);
if (flags & MAKEDEV_REF)
dev_refl(dev);
@@ -884,9 +742,6 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, struct cdev **
KASSERT(*up <= CLONE_UNITMASK,
("Too high unit (0x%x) in clone_create", *up));
- if (!(csw->d_flags & D_INIT))
- prep_cdevsw(csw);
-
/*
* Search the list for a lot of things in one go:
* A preexisting match is returned immediately.
@@ -898,6 +753,8 @@ clone_create(struct clonedevs **cdp, struct cdevsw *csw, int *up, struct cdev **
unit = *up;
ndev = devfs_alloc();
dev_lock();
+ if (!(csw->d_flags & D_INIT))
+ prep_cdevsw(csw);
low = extra;
de = dl = NULL;
cd = *cdp;
diff --git a/sys/kern/tty_cons.c b/sys/kern/tty_cons.c
index 35638c4..758d225 100644
--- a/sys/kern/tty_cons.c
+++ b/sys/kern/tty_cons.c
@@ -393,7 +393,11 @@ cn_devopen(struct cn_device *cnd, struct thread *td, int forceopen)
csw = dev_refthread(dev);
if (csw == NULL)
return (ENXIO);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = (*csw->d_open)(dev, openflag, 0, td);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return (error);
}
@@ -458,7 +462,11 @@ cnread(struct cdev *dev, struct uio *uio, int flag)
csw = dev_refthread(dev);
if (csw == NULL)
return (ENXIO);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = (csw->d_read)(dev, uio, flag);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return (error);
}
@@ -482,7 +490,11 @@ cnwrite(struct cdev *dev, struct uio *uio, int flag)
csw = dev_refthread(dev);
if (csw == NULL)
return (ENXIO);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = (csw->d_write)(dev, uio, flag);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return (error);
}
@@ -518,7 +530,11 @@ cnioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, struct thread *td)
csw = dev_refthread(dev);
if (csw == NULL)
return (ENXIO);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = (csw->d_ioctl)(dev, cmd, data, flag, td);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return (error);
}
@@ -543,7 +559,11 @@ cnpoll(struct cdev *dev, int events, struct thread *td)
csw = dev_refthread(dev);
if (csw == NULL)
return (ENXIO);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = (csw->d_poll)(dev, events, td);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return (error);
}
@@ -564,7 +584,11 @@ cnkqfilter(struct cdev *dev, struct knote *kn)
csw = dev_refthread(dev);
if (csw == NULL)
return (ENXIO);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
error = (csw->d_kqfilter)(dev, kn);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return (error);
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 00ff023..94f5986 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -3118,7 +3118,11 @@ dev_strategy(struct cdev *dev, struct buf *bp)
bufdone(bp);
return;
}
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
(*csw->d_strategy)(bip);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
}
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
index c36ea8c..fdf9d4f 100644
--- a/sys/sys/conf.h
+++ b/sys/sys/conf.h
@@ -213,7 +213,7 @@ struct cdevsw {
LIST_ENTRY(cdevsw) d_list;
LIST_HEAD(, cdev) d_devs;
int d_spare3;
- struct cdevsw *d_gianttrick;
+ void *d_spare4;
};
#define NUMCDEVSW 256
diff --git a/sys/vm/device_pager.c b/sys/vm/device_pager.c
index f0d661f..4f67112 100644
--- a/sys/vm/device_pager.c
+++ b/sys/vm/device_pager.c
@@ -131,11 +131,17 @@ dev_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot, vm_ooffset_t fo
* XXX assumes VM_PROT_* == PROT_*
*/
npages = OFF_TO_IDX(size);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
for (off = foff; npages--; off += PAGE_SIZE)
if ((*csw->d_mmap)(dev, off, &paddr, (int)prot) != 0) {
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
dev_relthread(dev);
return (NULL);
}
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
mtx_lock(&dev_pager_mtx);
@@ -220,7 +226,11 @@ dev_pager_getpages(object, m, count, reqpage)
panic("dev_pager_getpage: no cdevsw");
prot = PROT_READ; /* XXX should pass in? */
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_lock(&Giant);
ret = (*csw->d_mmap)(dev, (vm_offset_t)offset << PAGE_SHIFT, &paddr, prot);
+ if (csw->d_flags & D_NEEDGIANT)
+ mtx_unlock(&Giant);
KASSERT(ret == 0, ("dev_pager_getpage: map function returns error"));
dev_relthread(dev);
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
Url : http://lists.freebsd.org/pipermail/freebsd-current/attachments/20080304/7e19c49b/attachment.pgp
More information about the freebsd-current
mailing list