kern/118093: [firewire] firewire bus reset hogs CPU, causing data to be lost

Sean Bruno sean.bruno at dsl-only.net
Mon Jan 5 11:00:12 PST 2009


The following reply was made to PR kern/118093; it has been noted by GNATS.

From: Sean Bruno <sean.bruno at dsl-only.net>
To: bug-followup at FreeBSD.org, freebsd at sopwith.solgatos.com,  freebsd-firewire at freebsd.org
Cc: scottl at freebsd.org
Subject: Re: kern/118093: [firewire] firewire bus reset hogs CPU, causing
 data to be lost
Date: Mon, 05 Jan 2009 10:56:37 -0800

 --=-bdk1tSakYWl4EHnChn2C
 Content-Type: text/plain
 Content-Transfer-Encoding: 7bit
 
 Well ... this has really sent me down the rabbit hole the last couple of
 days.
 
 There is a need to audit all locking in the firewire stack right now and
 I have started that task.
 
 Essentially, threads, callouts, interrupts and task queues are all
 jumping around causing context to be switched from one thread to
 another.  It's kind of bad in there and I need to sort it out.
 
 I'm working with a stable/7 tree, so I've started a patch for it.  This
 patch has quite a few printf -> device_printf changes in it, so try to 
 ignore thost for now.
 
 The meat of the patch is a judicious implementation of FW_GLOCK() in
 certain code areas.  Note that sometimes the code is just trying to get
 the lock and then drops it immediately.  This is not very optimal, but
 it does the trick.
 
 I'm still seeing a high level of broken log messages
 in /var/log/messages but this may help the issue you were seeing.  Give
 it a spin.
 
 Sean
 
 --=-bdk1tSakYWl4EHnChn2C
 Content-Disposition: attachment; filename="firewire.diff"
 Content-Type: text/x-patch; name="firewire.diff"; charset="UTF-8"
 Content-Transfer-Encoding: 7bit
 
 Index: fwohci_pci.c
 ===================================================================
 --- fwohci_pci.c	(revision 186781)
 +++ fwohci_pci.c	(working copy)
 @@ -337,11 +337,7 @@
  
  	err = bus_setup_intr(self, sc->irq_res,
  			INTR_TYPE_NET | INTR_MPSAFE,
 -#if FWOHCI_INTFILT
 -		     fwohci_filt, NULL, sc, &sc->ih);
 -#else
  		     NULL, (driver_intr_t *) fwohci_intr, sc, &sc->ih);
 -#endif
  #if defined(__DragonFly__) || __FreeBSD_version < 500000
  	/* XXX splcam() should mask this irq for sbp.c*/
  	err = bus_setup_intr(self, sc->irq_res, INTR_TYPE_CAM,
 Index: firewire.c
 ===================================================================
 --- firewire.c	(revision 186781)
 +++ firewire.c	(working copy)
 @@ -1240,8 +1240,8 @@
  	fp->mode.common.tcode |= FWTCODE_PHY;
  
  	if (firewire_debug)
 -		printf("send phy_config root_node=%d gap_count=%d\n",
 -						root_node, gap_count);
 +		device_printf(fc->dev, "%s: phy_config root_node=%d gap_count=%d\n",
 +				__func__, root_node, gap_count);
  	fw_asyreq(fc, -1, xfer);
  }
  
 @@ -1285,7 +1285,7 @@
  	self_id = &fc->topology_map->self_id[0];
  	for(i = 0; i < fc->sid_cnt; i ++){
  		if (sid[1] != ~sid[0]) {
 -			printf("fw_sidrcv: invalid self-id packet\n");
 +			device_printf(fc->bdev, "%s: invalid self-id packet\n", __func__);
  			sid += 2;
  			continue;
  		}
 @@ -1331,7 +1331,6 @@
  		self_id++;
  		fc->topology_map->self_id_count ++;
  	}
 -	device_printf(fc->bdev, "%d nodes", fc->max_node + 1);
  	/* CRC */
  	fc->topology_map->crc = fw_crc16(
  			(uint32_t *)&fc->topology_map->generation,
 @@ -1350,16 +1349,16 @@
  	bcopy(p, &CSRARC(fc, SPED_MAP + 8), (fc->speed_map->crc_len - 1)*4);
  
  	fc->max_hop = fc->max_node - i_branch;
 -	printf(", maxhop <= %d", fc->max_hop);
 +	device_printf(fc->bdev, "%s: %d nodes, maxhop <= %d\n",
 +			__func__, fc->max_node + 1, fc->max_hop);
  		
  	if(fc->irm == -1 ){
 -		printf(", Not found IRM capable node");
 +		device_printf(fc->bdev, "%s: No IRM capable node", __func__);
  	}else{
 -		printf(", cable IRM = %d", fc->irm);
 -		if (fc->irm == fc->nodeid)
 -			printf(" (me)");
 +		device_printf(fc->bdev, "%s: IRM capable node = %d, %s\n",
 +				 __func__, fc->irm,
 +				(fc->irm == fc->nodeid) ? " (me)" : "");
  	}
 -	printf("\n");
  
  	if (try_bmr && (fc->irm != -1) && (CSRARC(fc, BUS_MGR_ID) == 0x3f)) {
  		if (fc->irm == fc->nodeid) {
 @@ -1388,6 +1387,7 @@
  	struct fw_device *fwdev;
  
  	s = splfw();
 +	FW_GLOCK(fc);
  	fc->status = FWBUSEXPLORE;
  
  	/* Invalidate all devices, just after bus reset. */
 @@ -1396,6 +1396,7 @@
  			fwdev->status = FWDEVINVAL;
  			fwdev->rcnt = 0;
  		}
 +	FW_GUNLOCK(fc);
  	splx(s);
  
  	wakeup((void *)fc);
 @@ -1647,20 +1648,27 @@
  
  	fc = (struct firewire_comm *)arg;
  
 -	mtx_lock(&fc->wait_lock);
 +	/*mtx_lock(&fc->wait_lock);*/
 +	FW_GLOCK(fc);
  	while (fc->status != FWBUSDETACH) {
 +		FW_GUNLOCK(fc);
  		if (fc->status == FWBUSEXPLORE) {
 -			mtx_unlock(&fc->wait_lock);
 +			/*mtx_unlock(&fc->wait_lock);*/
  			fw_explore(fc);
  			fc->status = FWBUSEXPDONE;
 -			if (firewire_debug)
 -				printf("bus_explore done\n");
 +			if (firewire_debug) {
 +				device_printf(fc->dev,"%s: bus_explore done\n",
 +						__func__);
 +			}
  			fw_attach_dev(fc);
 -			mtx_lock(&fc->wait_lock);
 +			/*mtx_lock(&fc->wait_lock);*/
  		}
 -		msleep((void *)fc, &fc->wait_lock, PWAIT|PCATCH, "-", 0);
 +		FW_GLOCK(fc);
 +		/*msleep((void *)fc, &fc->wait_lock, PWAIT|PCATCH, "-", 0);*/
 +		msleep((void *)fc, FW_GMTX(fc), PWAIT|PCATCH, "-", 0);
  	}
 -	mtx_unlock(&fc->wait_lock);
 +	/*mtx_unlock(&fc->wait_lock);*/
 +	FW_GUNLOCK(fc);
  	kthread_exit(0);
  }
  
 Index: fwohci.c
 ===================================================================
 --- fwohci.c	(revision 186781)
 +++ fwohci.c	(working copy)
 @@ -1003,7 +1003,8 @@
  	if (maxdesc < db_tr->dbcnt) {
  		maxdesc = db_tr->dbcnt;
  		if (firewire_debug)
 -			device_printf(sc->fc.dev, "maxdesc: %d\n", maxdesc);
 +			device_printf(sc->fc.dev, "%s: maxdesc: %d\n",
 +					__func__, maxdesc);
  	}
  	/* last db */
  	LAST_DB(db_tr, db);
 @@ -1024,7 +1025,8 @@
  	if(db_tr != dbch->bottom){
  		goto txloop;
  	} else {
 -		device_printf(sc->fc.dev, "fwohci_start: lack of db_trq\n");
 +		device_printf(sc->fc.dev, "%s: lack of db_trq\n",
 +				__func__);
  		dbch->flags |= FWOHCI_DBCH_FULL;
  	}
  kick:
 @@ -1036,8 +1038,8 @@
  		OWRITE(sc, OHCI_DMACTL(off), OHCI_CNTL_DMA_WAKE);
  	} else {
  		if (firewire_debug)
 -			device_printf(sc->fc.dev, "start AT DMA status=%x\n",
 -					OREAD(sc, OHCI_DMACTL(off)));
 +			device_printf(sc->fc.dev, "%s: start AT DMA status=%x\n",
 +					__func__, OREAD(sc, OHCI_DMACTL(off)));
  		OWRITE(sc, OHCI_DMACMD(off), dbch->top->bus_addr | fsegment);
  		OWRITE(sc, OHCI_DMACTL(off), OHCI_CNTL_DMA_RUN);
  		dbch->xferq.flag |= FWXFERQ_RUNNING;
 @@ -1848,7 +1850,7 @@
  		/* Disable bus reset interrupt until sid recv. */
  		OWRITE(sc, FWOHCI_INTMASKCLR,  OHCI_INT_PHY_BUS_R);
  	
 -		device_printf(fc->dev, "BUS reset\n");
 +		device_printf(fc->dev, "%s: BUS reset\n", __func__);
  		OWRITE(sc, FWOHCI_INTMASKCLR,  OHCI_INT_CYC_LOST);
  		OWRITE(sc, OHCI_LNKCTLCLR, OHCI_CNTL_CYCSRC);
  
 @@ -1885,10 +1887,10 @@
  		plen = OREAD(sc, OHCI_SID_CNT);
  
  		fc->nodeid = node_id & 0x3f;
 -		device_printf(fc->dev, "node_id=0x%08x, gen=%d, ",
 -			node_id, (plen >> 16) & 0xff);
 +		device_printf(fc->dev, "%s: node_id=0x%08x, gen=%d\n",
 +			__func__, node_id, (plen >> 16) & 0xff);
  		if (!(node_id & OHCI_NODE_VALID)) {
 -			printf("Bus reset failure\n");
 +			device_printf(fc->dev, "%s: Bus reset failure\n", __func__);
  			goto sidout;
  		}
  
 @@ -1896,11 +1898,11 @@
  		sc->cycle_lost = 0;
  		OWRITE(sc, FWOHCI_INTMASK,  OHCI_INT_CYC_LOST);
  		if ((node_id & OHCI_NODE_ROOT) && !nocyclemaster) {
 -			printf("CYCLEMASTER mode\n");
 +			device_printf(fc->dev, "%s: CYCLEMASTER mode\n", __func__);
  			OWRITE(sc, OHCI_LNKCTL,
  				OHCI_CNTL_CYCMTR | OHCI_CNTL_CYCTIMER);
  		} else {
 -			printf("non CYCLEMASTER mode\n");
 +			device_printf(fc->dev, "%s: non CYCLEMASTER mode\n", __func__);
  			OWRITE(sc, OHCI_LNKCTLCLR, OHCI_CNTL_CYCMTR);
  			OWRITE(sc, OHCI_LNKCTL, OHCI_CNTL_CYCTIMER);
  		}
 @@ -1922,6 +1924,8 @@
  	u_int i;
  	struct firewire_comm *fc = (struct firewire_comm *)sc;
  
 +	FW_GLOCK(fc);
 +	FW_GUNLOCK(fc);
  	if (stat & OHCI_INT_DMA_IR) {
  		irstat = atomic_readandclear_int(&sc->irstat);
  		for(i = 0; i < fc->nisodma ; i++){
 @@ -1969,8 +1973,8 @@
  			OWRITE(sc, OHCI_LNKCTLCLR, OHCI_CNTL_CYCTIMER);
  #endif
  			OWRITE(sc, FWOHCI_INTMASKCLR,  OHCI_INT_CYC_LOST);
 -			device_printf(fc->dev, "too many cycle lost, "
 -			 "no cycle master presents?\n");
 +			device_printf(fc->dev, "%s: too many cycle lost, "
 +			 "no cycle master presents?\n", __func__);
  		}
  	}
  	if (stat & OHCI_INT_DMA_ATRQ) {
 @@ -1986,7 +1990,7 @@
  		device_printf(fc->dev, "unrecoverable error\n");
  	}
  	if (stat & OHCI_INT_PHY_INT) {
 -		device_printf(fc->dev, "phy int\n");
 +		device_printf(fc->dev, "%s: phy int\n", __func__);
  	}
  
  	return;
 @@ -1997,6 +2001,8 @@
  {
  	struct fwohci_softc *sc = (struct fwohci_softc *)arg;
  
 +	FW_GLOCK(&sc->fc);
 +	FW_GUNLOCK(&sc->fc);
  	fw_busreset(&sc->fc, FWBUSRESET);
  	OWRITE(sc, OHCI_CROMHDR, ntohl(sc->fc.config_rom[0]));
  	OWRITE(sc, OHCI_BUS_OPT, ntohl(sc->fc.config_rom[2]));
 @@ -2011,6 +2017,8 @@
  	int i, plen;
  
  
 +	FW_GLOCK(fc);
 +	FW_GUNLOCK(fc);
  	plen = OREAD(sc, OHCI_SID_CNT);
  
  	if (plen & OHCI_SID_ERR) {
 @@ -2062,7 +2070,16 @@
  {
  	uint32_t stat, irstat, itstat;
  
 +	/*
 +	 * Try and grab the global lock
 +	 * for a moment to keep us from
 +	 * firing while we are still 
 +	 * setting the interrupt condition
 +	 * on the bus
 +	 */
 +	FW_GLOCK(&sc->fc);
  	stat = OREAD(sc, FWOHCI_INTSTAT);
 +	FW_GUNLOCK(&sc->fc);
  	if (stat == 0xffffffff) {
  		device_printf(sc->fc.dev, 
  			"device physically ejected?\n");
 @@ -2091,25 +2108,15 @@
  	return (FILTER_HANDLED);
  }
  
 -int
 -fwohci_filt(void *arg)
 +void
 +fwohci_intr(void *arg)
  {
  	struct fwohci_softc *sc = (struct fwohci_softc *)arg;
  
 -	if (!(sc->intmask & OHCI_INT_EN)) {
 -		/* polling mode */
 -		return (FILTER_STRAY);
 -	}
 -	return (fwohci_check_stat(sc));
 +	fwohci_check_stat(sc);
  }
  
  void
 -fwohci_intr(void *arg)
 -{
 -	fwohci_filt(arg);
 -}
 -
 -void
  fwohci_poll(struct firewire_comm *fc, int quick, int count)
  {
  	struct fwohci_softc *sc = (struct fwohci_softc *)fc;
 @@ -2464,6 +2471,7 @@
  	struct fwohci_softc *sc;
  	uint32_t fun;
  
 +	FW_GLOCK(fc);
  	device_printf(fc->dev, "Initiate bus reset\n");
  	sc = (struct fwohci_softc *)fc;
  
 @@ -2487,6 +2495,7 @@
  	fun |= FW_PHY_ISBR | FW_PHY_RHB;
  	fun = fwphy_wrdata(sc, FW_PHY_ISBR_REG, fun);
  #endif
 +	FW_GUNLOCK(fc);
  }
  
  void
 Index: fwohcivar.h
 ===================================================================
 --- fwohcivar.h	(revision 186781)
 +++ fwohcivar.h	(working copy)
 @@ -37,12 +37,6 @@
  
  #include <sys/taskqueue.h>
  
 -#if defined(__DragonFly__) ||  __FreeBSD_version < 700043
 -#define FWOHCI_INTFILT	0
 -#else
 -#define FWOHCI_INTFILT	1
 -#endif
 -
  typedef struct fwohci_softc {
  	struct firewire_comm fc;
  	bus_space_tag_t bst;
 @@ -84,7 +78,6 @@
  } fwohci_softc_t;
  
  void fwohci_intr (void *arg);
 -int fwohci_filt (void *arg);
  int fwohci_init (struct fwohci_softc *, device_t);
  void fwohci_poll (struct firewire_comm *, int, int);
  void fwohci_reset (struct fwohci_softc *, device_t);
 
 --=-bdk1tSakYWl4EHnChn2C--
 


More information about the freebsd-bugs mailing list