USB-related problem
Adrian Chadd
adrian at freebsd.org
Tue Nov 10 18:57:07 UTC 2015
+ benno, jeff, kib
cool, so now we know where syncer is hanging. nice work!
That syncer loop that calls it waiting for the list to be empty is
also a bit suss; it looks like it could also get stuck in a loop and
never yield.
-a
On 10 November 2015 at 10:54, Eugene Grosbein <eugen at grosbein.net> wrote:
> On 11.11.2015 01:05, Eugene Grosbein wrote:
>> On 10.11.2015 04:43, Adrian Chadd wrote:
>>> Hi,
>>>
>>> How much C do you know? Are you able to add some debugging to
>>> sched_sync() (which IIRC is the syncer thread) and work out where it's
>>> getting stuck and spinning?
>>
>> I did. I've added new global kernel symbol, variable schedstat
>> so I can check its value using ddb. And it's always equals to 5
>> when deadlock occurs, discovering the loop
>
> I've digged it a bit more. I tried to make double sure it loops
> and added loop counter "schedloop" hoping to see some insane large value.
> But it does not loop this way and schedloop is always 1.
>
> It just sits inside sync_vnode(). So I've added another variable "svnodestate"
> to trace sync_vnode() and svnodestate is 9.
>
> That is, VOP_FSYNC(vp, MNT_LAZY, td) does not return:
>
> --- sys/kern/vfs_subr.c.orig 2015-10-08 23:33:53.935440000 +0700
> +++ /sys/kern/vfs_subr.c 2015-11-11 01:39:09.450390000 +0700
> @@ -1738,217 +1738,272 @@ sysctl_vfs_worklist_len(SYSCTL_HANDLER_A
> return (error);
> }
>
> SYSCTL_PROC(_vfs, OID_AUTO, worklist_len, CTLTYPE_INT | CTLFLAG_RD, NULL, 0,
> sysctl_vfs_worklist_len, "I", "Syncer thread worklist length");
>
> static struct proc *updateproc;
> static void sched_sync(void);
> static struct kproc_desc up_kp = {
> "syncer",
> sched_sync,
> &updateproc
> };
> SYSINIT(syncer, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &up_kp);
>
> +volatile unsigned svnodestate = 0;
> +
> static int
> sync_vnode(struct synclist *slp, struct bufobj **bo, struct thread *td)
> {
> struct vnode *vp;
> struct mount *mp;
>
> + svnodestate = 0;
> *bo = LIST_FIRST(slp);
> if (*bo == NULL)
> return (0);
> +
> + svnodestate = 1;
> vp = (*bo)->__bo_vnode; /* XXX */
> if (VOP_ISLOCKED(vp) != 0 || VI_TRYLOCK(vp) == 0)
> return (1);
> /*
> * We use vhold in case the vnode does not
> * successfully sync. vhold prevents the vnode from
> * going away when we unlock the sync_mtx so that
> * we can acquire the vnode interlock.
> */
> + svnodestate = 2;
> vholdl(vp);
> + svnodestate = 3;
> mtx_unlock(&sync_mtx);
> VI_UNLOCK(vp);
> + svnodestate = 4;
> if (vn_start_write(vp, &mp, V_NOWAIT) != 0) {
> + svnodestate = 5;
> vdrop(vp);
> + svnodestate = 6;
> mtx_lock(&sync_mtx);
> + svnodestate = 7;
> return (*bo == LIST_FIRST(slp));
> }
> + svnodestate = 8;
> vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
> + svnodestate = 9;
> (void) VOP_FSYNC(vp, MNT_LAZY, td);
> + svnodestate = 10;
> VOP_UNLOCK(vp, 0);
> + svnodestate = 11;
> vn_finished_write(mp);
> + svnodestate = 12;
> BO_LOCK(*bo);
> + svnodestate = 13;
> if (((*bo)->bo_flag & BO_ONWORKLST) != 0) {
> /*
> * Put us back on the worklist. The worklist
> * routine will remove us from our current
> * position and then add us back in at a later
> * position.
> */
> + svnodestate = 14;
> vn_syncer_add_to_worklist(*bo, syncdelay);
> }
> + svnodestate = 15;
> BO_UNLOCK(*bo);
> + svnodestate = 16;
> vdrop(vp);
> + svnodestate = 17;
> mtx_lock(&sync_mtx);
> + svnodestate = 18;
> return (0);
> }
>
> static int first_printf = 1;
>
> +volatile unsigned schedstate = 0;
> +volatile unsigned schedloop = 0;
> +
> /*
> * System filesystem synchronizer daemon.
> */
> static void
> sched_sync(void)
> {
> struct synclist *next, *slp;
> struct bufobj *bo;
> long starttime;
> struct thread *td = curthread;
> int last_work_seen;
> int net_worklist_len;
> int syncer_final_iter;
> int error;
>
> last_work_seen = 0;
> syncer_final_iter = 0;
> syncer_state = SYNCER_RUNNING;
> starttime = time_uptime;
> td->td_pflags |= TDP_NORUNNINGBUF;
>
> EVENTHANDLER_REGISTER(shutdown_pre_sync, syncer_shutdown, td->td_proc,
> SHUTDOWN_PRI_LAST);
>
> + schedstate = 0;
> mtx_lock(&sync_mtx);
> for (;;) {
> + schedstate = 1;
> +
> if (syncer_state == SYNCER_FINAL_DELAY &&
> syncer_final_iter == 0) {
> mtx_unlock(&sync_mtx);
> kproc_suspend_check(td->td_proc);
> mtx_lock(&sync_mtx);
> }
> + schedstate = 2;
> +
> net_worklist_len = syncer_worklist_len - sync_vnode_count;
> if (syncer_state != SYNCER_RUNNING &&
> starttime != time_uptime) {
> if (first_printf) {
> printf("\nSyncing disks, vnodes remaining...");
> first_printf = 0;
> }
> printf("%d ", net_worklist_len);
> }
> starttime = time_uptime;
>
> /*
> * Push files whose dirty time has expired. Be careful
> * of interrupt race on slp queue.
> *
> * Skip over empty worklist slots when shutting down.
> */
> do {
> + schedstate = 3;
> slp = &syncer_workitem_pending[syncer_delayno];
> syncer_delayno += 1;
> if (syncer_delayno == syncer_maxdelay)
> syncer_delayno = 0;
> next = &syncer_workitem_pending[syncer_delayno];
> /*
> * If the worklist has wrapped since the
> * it was emptied of all but syncer vnodes,
> * switch to the FINAL_DELAY state and run
> * for one more second.
> */
> if (syncer_state == SYNCER_SHUTTING_DOWN &&
> net_worklist_len == 0 &&
> last_work_seen == syncer_delayno) {
> syncer_state = SYNCER_FINAL_DELAY;
> syncer_final_iter = SYNCER_SHUTDOWN_SPEEDUP;
> }
> } while (syncer_state != SYNCER_RUNNING && LIST_EMPTY(slp) &&
> syncer_worklist_len > 0);
>
> + schedstate = 4;
> +
> /*
> * Keep track of the last time there was anything
> * on the worklist other than syncer vnodes.
> * Return to the SHUTTING_DOWN state if any
> * new work appears.
> */
> if (net_worklist_len > 0 || syncer_state == SYNCER_RUNNING)
> last_work_seen = syncer_delayno;
> if (net_worklist_len > 0 && syncer_state == SYNCER_FINAL_DELAY)
> syncer_state = SYNCER_SHUTTING_DOWN;
> + schedloop=0;
> while (!LIST_EMPTY(slp)) {
> + schedstate = 5;
> + schedloop++;
> +
> error = sync_vnode(slp, &bo, td);
> if (error == 1) {
> LIST_REMOVE(bo, bo_synclist);
> LIST_INSERT_HEAD(next, bo, bo_synclist);
> continue;
> }
> + schedstate = 6;
>
> if (first_printf == 0) {
> + schedstate = 7;
> /*
> * Drop the sync mutex, because some watchdog
> * drivers need to sleep while patting
> */
> mtx_unlock(&sync_mtx);
> wdog_kern_pat(WD_LASTVAL);
> mtx_lock(&sync_mtx);
> }
>
> + schedstate = 8;
> }
> + schedstate = 9;
> +
> if (syncer_state == SYNCER_FINAL_DELAY && syncer_final_iter > 0)
> syncer_final_iter--;
> /*
> * The variable rushjob allows the kernel to speed up the
> * processing of the filesystem syncer process. A rushjob
> * value of N tells the filesystem syncer to process the next
> * N seconds worth of work on its queue ASAP. Currently rushjob
> * is used by the soft update code to speed up the filesystem
> * syncer process when the incore state is getting so far
> * ahead of the disk that the kernel memory pool is being
> * threatened with exhaustion.
> */
> if (rushjob > 0) {
> rushjob -= 1;
> continue;
> }
> /*
> * Just sleep for a short period of time between
> * iterations when shutting down to allow some I/O
> * to happen.
> *
> * If it has taken us less than a second to process the
> * current work, then wait. Otherwise start right over
> * again. We can still lose time if any single round
> * takes more than two seconds, but it does not really
> * matter as we are just trying to generally pace the
> * filesystem activity.
> */
> +
> + schedstate = 10;
> +
> if (syncer_state != SYNCER_RUNNING ||
> time_uptime == starttime) {
> + schedstate = 11;
> thread_lock(td);
> sched_prio(td, PPAUSE);
> thread_unlock(td);
> }
> - if (syncer_state != SYNCER_RUNNING)
> + schedstate = 12;
> +
> + if (syncer_state != SYNCER_RUNNING) {
> + schedstate = 13;
> +
> cv_timedwait(&sync_wakeup, &sync_mtx,
> hz / SYNCER_SHUTDOWN_SPEEDUP);
> - else if (time_uptime == starttime)
> + }
> + else if (time_uptime == starttime) {
> + schedstate = 14;
> cv_timedwait(&sync_wakeup, &sync_mtx, hz);
> + }
> + schedstate = 15;
> }
> + schedstate = 16;
> }
>
> /*
> * Request the syncer daemon to speed up its work.
> * We never push it to speed up more than half of its
> * normal turn time, otherwise it could take over the cpu.
> */
> int
> speedup_syncer(void)
> {
> int ret = 0;
>
> mtx_lock(&sync_mtx);
> if (rushjob < syncdelay / 2) {
> rushjob += 1;
>
More information about the freebsd-mips
mailing list