r244036 kernel hangs under load.
Konstantin Belousov
kostikbel at gmail.com
Fri Dec 14 04:03:45 UTC 2012
On Thu, Dec 13, 2012 at 10:14:29PM -0500, Rick Macklem wrote:
> Good work. This patch seems to have done the trick. I've run
> quite a few kernel build cycles without a hang. I'll keep running
> them, but I would have expected to see a hang by now.
>
> Maybe Tim can test the patch as well?
> (I needed to add #include <sys/smp.h> btw.)
Below is the current version of the patch, it is being tested by Peter Holm.
Compared to what I sent you earlier, it contain a bug fix only relevant if
you use quotas.
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 67e078d..64d75fb 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
#include <sys/reboot.h>
#include <sys/sched.h>
#include <sys/sleepqueue.h>
+#include <sys/smp.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
@@ -4710,32 +4711,54 @@ __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp)
* These are helper functions for filesystems to traverse their
* active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
*/
-struct vnode *
-__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+static void
+mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
+{
+
+ KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+
+ MNT_ILOCK(mp);
+ MNT_REL(mp);
+ MNT_IUNLOCK(mp);
+ free(*mvp, M_VNODE_MARKER);
+ *mvp = NULL;
+}
+
+#ifdef SMP
+#define ALWAYS_YIELD (mp_ncpus == 1)
+#else
+#define ALWAYS_YIELD 1
+#endif
+
+static struct vnode *
+mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
{
struct vnode *vp, *nvp;
- if (should_yield())
- kern_yield(PRI_UNCHANGED);
- mtx_lock(&vnode_free_list_mtx);
-restart:
+ mtx_assert(&vnode_free_list_mtx, MA_OWNED);
KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+restart:
vp = TAILQ_NEXT(*mvp, v_actfreelist);
+ TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
while (vp != NULL) {
if (vp->v_type == VMARKER) {
vp = TAILQ_NEXT(vp, v_actfreelist);
continue;
}
if (!VI_TRYLOCK(vp)) {
- if (should_yield()) {
+ if (ALWAYS_YIELD || should_yield()) {
+ TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
mtx_unlock(&vnode_free_list_mtx);
- kern_yield(PRI_UNCHANGED);
+ kern_yield(PRI_USER);
mtx_lock(&vnode_free_list_mtx);
+ goto restart;
}
- goto restart;
+ continue;
}
- if (vp->v_mount == mp && vp->v_type != VMARKER &&
- (vp->v_iflag & VI_DOOMED) == 0)
+ KASSERT(vp->v_type != VMARKER, ("locked marker %p", vp));
+ KASSERT(vp->v_mount == mp || vp->v_mount == NULL,
+ ("alien vnode on the active list %p %p", vp, mp));
+ if (vp->v_mount == mp && (vp->v_iflag & VI_DOOMED) == 0)
break;
nvp = TAILQ_NEXT(vp, v_actfreelist);
VI_UNLOCK(vp);
@@ -4745,22 +4768,31 @@ restart:
/* Check if we are done */
if (vp == NULL) {
mtx_unlock(&vnode_free_list_mtx);
- __mnt_vnode_markerfree_active(mvp, mp);
- mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
+ mnt_vnode_markerfree_active(mvp, mp);
return (NULL);
}
- TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
mtx_unlock(&vnode_free_list_mtx);
ASSERT_VI_LOCKED(vp, "active iter");
KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp));
return (vp);
}
+#undef ALWAYS_YIELD
+
+struct vnode *
+__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+{
+
+ if (should_yield())
+ kern_yield(PRI_UNCHANGED);
+ mtx_lock(&vnode_free_list_mtx);
+ return (mnt_vnode_next_active(mvp, mp));
+}
struct vnode *
__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
{
- struct vnode *vp, *nvp;
+ struct vnode *vp;
*mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
MNT_ILOCK(mp);
@@ -4770,44 +4802,14 @@ __mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
(*mvp)->v_mount = mp;
mtx_lock(&vnode_free_list_mtx);
-restart:
vp = TAILQ_FIRST(&mp->mnt_activevnodelist);
- while (vp != NULL) {
- if (vp->v_type == VMARKER) {
- vp = TAILQ_NEXT(vp, v_actfreelist);
- continue;
- }
- if (!VI_TRYLOCK(vp)) {
- if (should_yield()) {
- mtx_unlock(&vnode_free_list_mtx);
- kern_yield(PRI_UNCHANGED);
- mtx_lock(&vnode_free_list_mtx);
- }
- goto restart;
- }
- if (vp->v_mount == mp && vp->v_type != VMARKER &&
- (vp->v_iflag & VI_DOOMED) == 0)
- break;
- nvp = TAILQ_NEXT(vp, v_actfreelist);
- VI_UNLOCK(vp);
- vp = nvp;
- }
-
- /* Check if we are done */
if (vp == NULL) {
mtx_unlock(&vnode_free_list_mtx);
- MNT_ILOCK(mp);
- MNT_REL(mp);
- MNT_IUNLOCK(mp);
- free(*mvp, M_VNODE_MARKER);
- *mvp = NULL;
+ mnt_vnode_markerfree_active(mvp, mp);
return (NULL);
}
- TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
- mtx_unlock(&vnode_free_list_mtx);
- ASSERT_VI_LOCKED(vp, "active iter first");
- KASSERT((vp->v_iflag & VI_ACTIVE) != 0, ("Non-active vp %p", vp));
- return (vp);
+ TAILQ_INSERT_BEFORE(vp, *mvp, v_actfreelist);
+ return (mnt_vnode_next_active(mvp, mp));
}
void
@@ -4817,14 +4819,8 @@ __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
if (*mvp == NULL)
return;
- KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
-
mtx_lock(&vnode_free_list_mtx);
TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
mtx_unlock(&vnode_free_list_mtx);
- MNT_ILOCK(mp);
- MNT_REL(mp);
- MNT_IUNLOCK(mp);
- free(*mvp, M_VNODE_MARKER);
- *mvp = NULL;
+ mnt_vnode_markerfree_active(mvp, mp);
}
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 4d010ec..ed2b002 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -199,8 +199,8 @@ struct vnode *__mnt_vnode_next_all(struct vnode **mvp, struct mount *mp);
struct vnode *__mnt_vnode_first_all(struct vnode **mvp, struct mount *mp);
void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp);
-#define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \
- for (vp = __mnt_vnode_first_all(&(mvp), (mp)); \
+#define MNT_VNODE_FOREACH_ALL(vp, mp, mvp) \
+ for (vp = __mnt_vnode_first_all(&(mvp), (mp)); \
(vp) != NULL; vp = __mnt_vnode_next_all(&(mvp), (mp)))
#define MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp) \
@@ -218,17 +218,12 @@ struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp);
struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp);
void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
-#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \
- for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \
+#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \
+ for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \
(vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp)))
#define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \
- do { \
- MNT_ILOCK(mp); \
- __mnt_vnode_markerfree_active(&(mvp), (mp)); \
- /* MNT_IUNLOCK(mp); -- done in above function */ \
- mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \
- } while (0)
+ __mnt_vnode_markerfree_active(&(mvp), (mp))
/*
* Definitions for MNT_VNODE_FOREACH.
diff --git a/sys/ufs/ufs/ufs_quota.c b/sys/ufs/ufs/ufs_quota.c
index 330f449..87ac9a1 100644
--- a/sys/ufs/ufs/ufs_quota.c
+++ b/sys/ufs/ufs/ufs_quota.c
@@ -1044,7 +1044,7 @@ again:
error = vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td);
if (error) {
if (error == ENOENT) {
- MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+ MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp);
goto again;
}
continue;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 196 bytes
Desc: not available
URL: <http://lists.freebsd.org/pipermail/freebsd-current/attachments/20121214/6da363fb/attachment.sig>
More information about the freebsd-current
mailing list